219 files changed, 29683 insertions, 1485 deletions
diff --git a/src/gallium/auxiliary/draw/draw_pipe_aaline.c b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
index d6c16d37b21..eeeed9813fa 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_aaline.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_aaline.c
@@ -398,9 +398,9 @@ aaline_create_texture(struct aaline_stage *aaline)
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
    texTemp.last_level = MAX_TEXTURE_LEVEL;
-   texTemp.width[0] = 1 << MAX_TEXTURE_LEVEL;
-   texTemp.height[0] = 1 << MAX_TEXTURE_LEVEL;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.height0 = 1 << MAX_TEXTURE_LEVEL;
+   texTemp.depth0 = 1;
    pf_get_block(texTemp.format, &texTemp.block);
 
    aaline->texture = screen->texture_create(screen, &texTemp);
@@ -413,11 +413,11 @@ aaline_create_texture(struct aaline_stage *aaline)
     */
    for (level = 0; level <= MAX_TEXTURE_LEVEL; level++) {
       struct pipe_transfer *transfer;
-      const uint size = aaline->texture->width[level];
+      const uint size = u_minify(aaline->texture->width0, level);
       ubyte *data;
       uint i, j;
 
-      assert(aaline->texture->width[level] == aaline->texture->height[level]);
+      assert(aaline->texture->width0 == aaline->texture->height0);
 
       /* This texture is new, no need to flush. 
        */
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 283502cdf3e..27d89721b10 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -427,9 +427,9 @@ pstip_create_texture(struct pstip_stage *pstip)
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = PIPE_FORMAT_A8_UNORM; /* XXX verify supported by driver! */
    texTemp.last_level = 0;
-   texTemp.width[0] = 32;
-   texTemp.height[0] = 32;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = 32;
+   texTemp.height0 = 32;
+   texTemp.depth0 = 1;
    pf_get_block(texTemp.format, &texTemp.block);
 
    pstip->texture = screen->texture_create(screen, &texTemp);
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index dbb5ac71821..4865a2d8542 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -192,7 +192,8 @@ draw_print_arrays(struct draw_context *draw, uint prim, int start, uint count)
                 prim, start, count);
 
    for (i = 0; i < count; i++) {
-      uint ii, j;
+      uint ii = 0;
+      uint j;
 
       if (draw->pt.user.elts) {
          /* indexed arrays */
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index d3f179ced18..757c4874545 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -346,7 +346,8 @@ vcache_check_run( struct draw_pt_front_end *frontend,
                        vcache->fetch_max,
                        draw_count);
       
-   if (max_index == 0xffffffff ||
+   if (max_index >= DRAW_PIPE_MAX_VERTICES ||
+       fetch_count >= UNDEFINED_VERTEX_ID ||
        fetch_count > draw_count) {
       if (0) debug_printf("fail\n");
       goto fail;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 2ef4293d4d7..2f973684f67 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -1,6 +1,6 @@
 /**************************************************************************
  *
- * Copyright 2007 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * Copyright 2007-2009 VMware, Inc.
  * All Rights Reserved.
  *
  * Permission is hereby granted, free of charge, to any person obtaining a
@@ -18,7 +18,7 @@
  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
  * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
  * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
- * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * IN NO EVENT SHALL VMWARE AND/OR ITS SUPPLIERS BE LIABLE FOR
  * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
  * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
  * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
@@ -80,11 +80,27 @@ struct fenced_buffer_list
  */
 struct fenced_buffer
 {
+   /* 
+    * Immutable members.
+    */
+
    struct pb_buffer base;
-   
    struct pb_buffer *buffer;
+   struct fenced_buffer_list *list;
+
+   /**
+    * Protected by fenced_buffer_list::mutex
+    */
+   struct list_head head;
 
-   /* FIXME: protect access with mutex */
+   /**
+    * Following members are mutable and protected by this mutex.
+    * 
+    * You may lock this mutex alone, or lock it with fenced_buffer_list::mutex
+    * held, but in order to prevent deadlocks you must never lock 
+    * fenced_buffer_list::mutex with this mutex held.
+    */
+   pipe_mutex mutex;
 
    /**
     * A bitmask of PIPE_BUFFER_USAGE_CPU/GPU_READ/WRITE describing the current
@@ -96,9 +112,6 @@ struct fenced_buffer
    struct pb_validate *vl;
    unsigned validation_flags;
    struct pipe_fence_handle *fence;
-
-   struct list_head head;
-   struct fenced_buffer_list *list;
 };
 
 
@@ -110,15 +123,24 @@ fenced_buffer(struct pb_buffer *buf)
 }
 
 
+/**
+ * Add the buffer to the fenced list.
+ * 
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this 
+ * order before calling this function.
+ * 
+ * Reference count should be incremented before calling this function.
+ */
 static INLINE void
-_fenced_buffer_add(struct fenced_buffer *fenced_buf)
+fenced_buffer_add_locked(struct fenced_buffer_list *fenced_list, 
+                         struct fenced_buffer *fenced_buf)
 {
-   struct fenced_buffer_list *fenced_list = fenced_buf->list;
-
    assert(pipe_is_referenced(&fenced_buf->base.base.reference));
    assert(fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
    assert(fenced_buf->fence);
 
+   /* TODO: Move the reference count increment here */
+   
 #ifdef DEBUG
    LIST_DEL(&fenced_buf->head);
    assert(fenced_list->numUnfenced);
@@ -130,32 +152,16 @@ _fenced_buffer_add(struct fenced_buffer *fenced_buf)
 
 
 /**
- * Actually destroy the buffer.
+ * Remove the buffer from the fenced list.
+ * 
+ * fenced_buffer_list::mutex and fenced_buffer::mutex must be held, in this 
+ * order before calling this function.
+ * 
+ * Reference count should be decremented after calling this function.
  */
 static INLINE void
-_fenced_buffer_destroy(struct fenced_buffer *fenced_buf)
-{
-   struct fenced_buffer_list *fenced_list = fenced_buf->list;
-   
-   assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
-   assert(!fenced_buf->fence);
-#ifdef DEBUG
-   assert(fenced_buf->head.prev);
-   assert(fenced_buf->head.next);
-   LIST_DEL(&fenced_buf->head);
-   assert(fenced_list->numUnfenced);
-   --fenced_list->numUnfenced;
-#else
-   (void)fenced_list;
-#endif
-   pb_reference(&fenced_buf->buffer, NULL);
-   FREE(fenced_buf);
-}
-
-
-static INLINE void
-_fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
-                      struct fenced_buffer *fenced_buf)
+fenced_buffer_remove_locked(struct fenced_buffer_list *fenced_list,
+                            struct fenced_buffer *fenced_buf)
 {
    struct pb_fence_ops *ops = fenced_list->ops;
 
@@ -177,37 +183,53 @@ _fenced_buffer_remove(struct fenced_buffer_list *fenced_list,
    ++fenced_list->numUnfenced;
 #endif
    
-   /**
-    * FIXME!!!
-    */
-
-   if(!pipe_is_referenced(&fenced_buf->base.base.reference))
-      _fenced_buffer_destroy(fenced_buf);
+   /* TODO: Move the reference count decrement and destruction here */
 }
 
 
+/**
+ * Wait for the fence to expire, and remove it from the fenced list.
+ * 
+ * fenced_buffer::mutex must be held. fenced_buffer_list::mutex must not be 
+ * held -- it will
+ */
 static INLINE enum pipe_error
-_fenced_buffer_finish(struct fenced_buffer *fenced_buf)
+fenced_buffer_finish_locked(struct fenced_buffer_list *fenced_list,
+                              struct fenced_buffer *fenced_buf)
 {
-   struct fenced_buffer_list *fenced_list = fenced_buf->list;
    struct pb_fence_ops *ops = fenced_list->ops;
+   enum pipe_error ret = PIPE_ERROR;
 
 #if 0
    debug_warning("waiting for GPU");
 #endif
 
+   assert(pipe_is_referenced(&fenced_buf->base.base.reference));
    assert(fenced_buf->fence);
+
+   /* Acquire the global lock */
+   pipe_mutex_unlock(fenced_buf->mutex);
+   pipe_mutex_lock(fenced_list->mutex);
+   pipe_mutex_lock(fenced_buf->mutex);
+
    if(fenced_buf->fence) {
-      if(ops->fence_finish(ops, fenced_buf->fence, 0) != 0) {
-	 return PIPE_ERROR;
+      if(ops->fence_finish(ops, fenced_buf->fence, 0) == 0) {
+         /* Remove from the fenced list */
+         /* TODO: remove consequents */
+         fenced_buffer_remove_locked(fenced_list, fenced_buf);
+         
+         p_atomic_dec(&fenced_buf->base.base.reference.count);
+         assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+         fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
+
+         ret = PIPE_OK;
       }
-      /* Remove from the fenced list */
-      /* TODO: remove consequents */
-      _fenced_buffer_remove(fenced_list, fenced_buf);
    }
 
-   fenced_buf->flags &= ~PIPE_BUFFER_USAGE_GPU_READ_WRITE;
-   return PIPE_OK;
+   pipe_mutex_unlock(fenced_list->mutex);
+
+   return ret;
 }
 
 
@@ -215,7 +237,7 @@ _fenced_buffer_finish(struct fenced_buffer *fenced_buf)
  * Free as many fenced buffers from the list head as possible. 
  */
 static void
-_fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list, 
+fenced_buffer_list_check_free_locked(struct fenced_buffer_list *fenced_list, 
                                int wait)
 {
    struct pb_fence_ops *ops = fenced_list->ops;
@@ -228,21 +250,28 @@ _fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
    while(curr != &fenced_list->delayed) {
       fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
 
+      pipe_mutex_lock(fenced_buf->mutex);
+
       if(fenced_buf->fence != prev_fence) {
 	 int signaled;
 	 if (wait)
 	    signaled = ops->fence_finish(ops, fenced_buf->fence, 0);
 	 else
 	    signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
-	 if (signaled != 0)
+	 if (signaled != 0) {
+            pipe_mutex_unlock(fenced_buf->mutex);
 	    break;
+         }
 	 prev_fence = fenced_buf->fence;
       }
       else {
 	 assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
       }
 
-      _fenced_buffer_remove(fenced_list, fenced_buf);
+      fenced_buffer_remove_locked(fenced_list, fenced_buf);
+      pipe_mutex_unlock(fenced_buf->mutex);
+
+      pb_reference((struct pb_buffer **)&fenced_buf, NULL);
 
       curr = next; 
       next = curr->next;
@@ -256,30 +285,25 @@ fenced_buffer_destroy(struct pb_buffer *buf)
    struct fenced_buffer *fenced_buf = fenced_buffer(buf);   
    struct fenced_buffer_list *fenced_list = fenced_buf->list;
 
-   pipe_mutex_lock(fenced_list->mutex);
    assert(!pipe_is_referenced(&fenced_buf->base.base.reference));
-   if (fenced_buf->fence) {
-      struct pb_fence_ops *ops = fenced_list->ops;
-      if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
-	 struct list_head *curr, *prev;
-	 curr = &fenced_buf->head;
-	 prev = curr->prev;
-	 do {
-	    fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
-	    assert(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0);
-	    _fenced_buffer_remove(fenced_list, fenced_buf);
-	    curr = prev;
-	    prev = curr->prev;
-	 } while (curr != &fenced_list->delayed);
-      }	  
-      else {
-	 /* delay destruction */
-      }
-   }
-   else {
-      _fenced_buffer_destroy(fenced_buf);
-   }
+   assert(!fenced_buf->fence);
+
+#ifdef DEBUG
+   pipe_mutex_lock(fenced_list->mutex);
+   assert(fenced_buf->head.prev);
+   assert(fenced_buf->head.next);
+   LIST_DEL(&fenced_buf->head);
+   assert(fenced_list->numUnfenced);
+   --fenced_list->numUnfenced;
    pipe_mutex_unlock(fenced_list->mutex);
+#else
+   (void)fenced_list;
+#endif
+
+   pb_reference(&fenced_buf->buffer, NULL);
+
+   pipe_mutex_destroy(fenced_buf->mutex);
+   FREE(fenced_buf);
 }
 
 
@@ -290,24 +314,23 @@ fenced_buffer_map(struct pb_buffer *buf,
    struct fenced_buffer *fenced_buf = fenced_buffer(buf);
    struct fenced_buffer_list *fenced_list = fenced_buf->list;
    struct pb_fence_ops *ops = fenced_list->ops;
-   void *map;
+   void *map = NULL;
+
+   pipe_mutex_lock(fenced_buf->mutex);
 
    assert(!(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE));
    
    /* Serialize writes */
    if((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_WRITE) ||
       ((fenced_buf->flags & PIPE_BUFFER_USAGE_GPU_READ) && (flags & PIPE_BUFFER_USAGE_CPU_WRITE))) {
-      if(flags & PIPE_BUFFER_USAGE_DONTBLOCK) {
+      if((flags & PIPE_BUFFER_USAGE_DONTBLOCK) &&
+          ops->fence_signalled(ops, fenced_buf->fence, 0) == 0) {
          /* Don't wait for the GPU to finish writing */
-         if(ops->fence_signalled(ops, fenced_buf->fence, 0) == 0)
-            _fenced_buffer_remove(fenced_list, fenced_buf);
-         else
-            return NULL;
-      }
-      else {
-         /* Wait for the GPU to finish writing */
-         _fenced_buffer_finish(fenced_buf);
+         goto finish;
       }
+
+      /* Wait for the GPU to finish writing */
+      fenced_buffer_finish_locked(fenced_list, fenced_buf);
    }
 
 #if 0
@@ -324,6 +347,9 @@ fenced_buffer_map(struct pb_buffer *buf,
       fenced_buf->flags |= flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE;
    }
 
+finish:
+   pipe_mutex_unlock(fenced_buf->mutex);
+   
    return map;
 }
 
@@ -332,6 +358,9 @@ static void
 fenced_buffer_unmap(struct pb_buffer *buf)
 {
    struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+   
+   pipe_mutex_lock(fenced_buf->mutex);
+   
    assert(fenced_buf->mapcount);
    if(fenced_buf->mapcount) {
       pb_unmap(fenced_buf->buffer);
@@ -339,6 +368,8 @@ fenced_buffer_unmap(struct pb_buffer *buf)
       if(!fenced_buf->mapcount)
 	 fenced_buf->flags &= ~PIPE_BUFFER_USAGE_CPU_READ_WRITE;
    }
+   
+   pipe_mutex_unlock(fenced_buf->mutex);
 }
 
 
@@ -350,11 +381,14 @@ fenced_buffer_validate(struct pb_buffer *buf,
    struct fenced_buffer *fenced_buf = fenced_buffer(buf);
    enum pipe_error ret;
    
+   pipe_mutex_lock(fenced_buf->mutex);
+
    if(!vl) {
       /* invalidate */
       fenced_buf->vl = NULL;
       fenced_buf->validation_flags = 0;
-      return PIPE_OK;
+      ret = PIPE_OK;
+      goto finish;
    }
    
    assert(flags & PIPE_BUFFER_USAGE_GPU_READ_WRITE);
@@ -362,14 +396,17 @@ fenced_buffer_validate(struct pb_buffer *buf,
    flags &= PIPE_BUFFER_USAGE_GPU_READ_WRITE;
 
    /* Buffer cannot be validated in two different lists */ 
-   if(fenced_buf->vl && fenced_buf->vl != vl)
-      return PIPE_ERROR_RETRY;
+   if(fenced_buf->vl && fenced_buf->vl != vl) {
+      ret = PIPE_ERROR_RETRY;
+      goto finish;
+   }
    
 #if 0
    /* Do not validate if buffer is still mapped */
    if(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE) {
       /* TODO: wait for the thread that mapped the buffer to unmap it */
-      return PIPE_ERROR_RETRY;
+      ret = PIPE_ERROR_RETRY;
+      goto finish;
    }
    /* Final sanity checking */
    assert(!(fenced_buf->flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
@@ -379,17 +416,21 @@ fenced_buffer_validate(struct pb_buffer *buf,
    if(fenced_buf->vl == vl &&
       (fenced_buf->validation_flags & flags) == flags) {
       /* Nothing to do -- buffer already validated */
-      return PIPE_OK;
+      ret = PIPE_OK;
+      goto finish;
    }
    
    ret = pb_validate(fenced_buf->buffer, vl, flags);
    if (ret != PIPE_OK)
-      return ret;
+      goto finish;
    
    fenced_buf->vl = vl;
    fenced_buf->validation_flags |= flags;
    
-   return PIPE_OK;
+finish:
+   pipe_mutex_unlock(fenced_buf->mutex);
+
+   return ret;
 }
 
 
@@ -404,29 +445,36 @@ fenced_buffer_fence(struct pb_buffer *buf,
    fenced_buf = fenced_buffer(buf);
    fenced_list = fenced_buf->list;
    ops = fenced_list->ops;
-   
-   if(fence == fenced_buf->fence) {
-      /* Nothing to do */
-      return;
-   }
 
-   assert(fenced_buf->vl);
-   assert(fenced_buf->validation_flags);
-   
    pipe_mutex_lock(fenced_list->mutex);
-   if (fenced_buf->fence)
-      _fenced_buffer_remove(fenced_list, fenced_buf);
-   if (fence) {
-      ops->fence_reference(ops, &fenced_buf->fence, fence);
-      fenced_buf->flags |= fenced_buf->validation_flags;
-      _fenced_buffer_add(fenced_buf);
-   }
-   pipe_mutex_unlock(fenced_list->mutex);
+   pipe_mutex_lock(fenced_buf->mutex);
+
+   assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+
+   if(fence != fenced_buf->fence) {
+      assert(fenced_buf->vl);
+      assert(fenced_buf->validation_flags);
+      
+      if (fenced_buf->fence) {
+         fenced_buffer_remove_locked(fenced_list, fenced_buf);
+         p_atomic_dec(&fenced_buf->base.base.reference.count);
+         assert(pipe_is_referenced(&fenced_buf->base.base.reference));
+      }
+      if (fence) {
+         ops->fence_reference(ops, &fenced_buf->fence, fence);
+         fenced_buf->flags |= fenced_buf->validation_flags;
+         p_atomic_inc(&fenced_buf->base.base.reference.count);
+         fenced_buffer_add_locked(fenced_list, fenced_buf);
+      }
+
+      pb_fence(fenced_buf->buffer, fence);
    
-   pb_fence(fenced_buf->buffer, fence);
+      fenced_buf->vl = NULL;
+      fenced_buf->validation_flags = 0;
+   }
 
-   fenced_buf->vl = NULL;
-   fenced_buf->validation_flags = 0;
+   pipe_mutex_unlock(fenced_buf->mutex);
+   pipe_mutex_unlock(fenced_list->mutex);
 }
 
 
@@ -436,6 +484,7 @@ fenced_buffer_get_base_buffer(struct pb_buffer *buf,
                               pb_size *offset)
 {
    struct fenced_buffer *fenced_buf = fenced_buffer(buf);
+   /* NOTE: accesses immutable members only -- mutex not necessary */
    pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
 }
 
@@ -475,6 +524,8 @@ fenced_buffer_create(struct fenced_buffer_list *fenced_list,
    buf->buffer = buffer;
    buf->list = fenced_list;
    
+   pipe_mutex_init(buf->mutex);
+
 #ifdef DEBUG
    pipe_mutex_lock(fenced_list->mutex);
    LIST_ADDTAIL(&buf->head, &fenced_list->unfenced);
@@ -516,7 +567,7 @@ fenced_buffer_list_check_free(struct fenced_buffer_list *fenced_list,
                               int wait)
 {
    pipe_mutex_lock(fenced_list->mutex);
-   _fenced_buffer_list_check_free(fenced_list, wait);
+   fenced_buffer_list_check_free_locked(fenced_list, wait);
    pipe_mutex_unlock(fenced_list->mutex);
 }
 
@@ -538,11 +589,13 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
    next = curr->next;
    while(curr != &fenced_list->unfenced) {
       fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+      pipe_mutex_lock(fenced_buf->mutex);
       assert(!fenced_buf->fence);
       debug_printf("%10p %7u %7u\n",
                    (void *) fenced_buf,
                    fenced_buf->base.base.size,
                    p_atomic_read(&fenced_buf->base.base.reference.count));
+      pipe_mutex_unlock(fenced_buf->mutex);
       curr = next; 
       next = curr->next;
    }
@@ -552,6 +605,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
    while(curr != &fenced_list->delayed) {
       int signaled;
       fenced_buf = LIST_ENTRY(struct fenced_buffer, curr, head);
+      pipe_mutex_lock(fenced_buf->mutex);
       signaled = ops->fence_signalled(ops, fenced_buf->fence, 0);
       debug_printf("%10p %7u %7u %10p %s\n",
                    (void *) fenced_buf,
@@ -559,6 +613,7 @@ fenced_buffer_list_dump(struct fenced_buffer_list *fenced_list)
                    p_atomic_read(&fenced_buf->base.base.reference.count),
                    (void *) fenced_buf->fence,
                    signaled == 0 ? "y" : "n");
+      pipe_mutex_unlock(fenced_buf->mutex);
       curr = next; 
       next = curr->next;
    }
@@ -579,8 +634,8 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
 #if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
       sched_yield();
 #endif
-      _fenced_buffer_list_check_free(fenced_list, 1);
       pipe_mutex_lock(fenced_list->mutex);
+      fenced_buffer_list_check_free_locked(fenced_list, 1);
    }
 
 #ifdef DEBUG
@@ -588,6 +643,7 @@ fenced_buffer_list_destroy(struct fenced_buffer_list *fenced_list)
 #endif
       
    pipe_mutex_unlock(fenced_list->mutex);
+   pipe_mutex_destroy(fenced_list->mutex);
    
    fenced_list->ops->destroy(fenced_list->ops);
    
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index eb492076b7d..080fd4c7310 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -1129,3 +1129,35 @@ TGSI Instruction Specification
 
   target            Label of target instruction.
 
+
+3  Other tokens
+===============
+
+
+3.1  Declaration Semantic
+-------------------------
+
+
+  Follows Declaration token if Semantic bit is set.
+
+  Since its purpose is to link a shader with other stages of the pipeline,
+  it is valid to follow only those Declaration tokens that declare a register
+  either in INPUT or OUTPUT file.
+
+  SemanticName field contains the semantic name of the register being declared.
+  There is no default value.
+
+  SemanticIndex is an optional subscript that can be used to distinguish
+  different register declarations with the same semantic name. The default value
+  is 0.
+
+  The meanings of the individual semantic names are explained in the following
+  sections.
+
+
+3.1.1  FACE
+
+  Valid only in a fragment shader INPUT declaration.
+
+  FACE.x is negative when the primitive is back facing. FACE.x is positive
+  when the primitive is front facing.
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index 4fa10e2f7e3..9791e58db3b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -419,6 +419,7 @@ tgsi_default_instruction( void )
    instruction.NrTokens = 1;
    instruction.Opcode = TGSI_OPCODE_MOV;
    instruction.Saturate = TGSI_SAT_NONE;
+   instruction.Predicate = 0;
    instruction.NumDstRegs = 1;
    instruction.NumSrcRegs = 1;
    instruction.Padding  = 0;
@@ -428,12 +429,12 @@ tgsi_default_instruction( void )
 }
 
 struct tgsi_instruction
-tgsi_build_instruction(
-   unsigned opcode,
-   unsigned saturate,
-   unsigned num_dst_regs,
-   unsigned num_src_regs,
-   struct tgsi_header *header )
+tgsi_build_instruction(unsigned opcode,
+                       unsigned saturate,
+                       unsigned predicate,
+                       unsigned num_dst_regs,
+                       unsigned num_src_regs,
+                       struct tgsi_header *header)
 {
    struct tgsi_instruction instruction;
 
@@ -445,6 +446,7 @@ tgsi_build_instruction(
    instruction = tgsi_default_instruction();
    instruction.Opcode = opcode;
    instruction.Saturate = saturate;
+   instruction.Predicate = predicate;
    instruction.NumDstRegs = num_dst_regs;
    instruction.NumSrcRegs = num_src_regs;
 
@@ -472,9 +474,9 @@ tgsi_default_full_instruction( void )
    unsigned i;
 
    full_instruction.Instruction = tgsi_default_instruction();
+   full_instruction.InstructionPredicate = tgsi_default_instruction_predicate();
    full_instruction.InstructionExtLabel = tgsi_default_instruction_ext_label();
    full_instruction.InstructionExtTexture = tgsi_default_instruction_ext_texture();
-   full_instruction.InstructionExtPredicate = tgsi_default_instruction_ext_predicate();
    for( i = 0;  i < TGSI_FULL_MAX_DST_REGISTERS; i++ ) {
       full_instruction.FullDstRegisters[i] = tgsi_default_full_dst_register();
    }
@@ -504,14 +506,34 @@ tgsi_build_full_instruction(
    instruction = (struct tgsi_instruction *) &tokens[size];
    size++;
 
-   *instruction = tgsi_build_instruction(
-      full_inst->Instruction.Opcode,
-      full_inst->Instruction.Saturate,
-      full_inst->Instruction.NumDstRegs,
-      full_inst->Instruction.NumSrcRegs,
-      header );
+   *instruction = tgsi_build_instruction(full_inst->Instruction.Opcode,
+                                         full_inst->Instruction.Saturate,
+                                         full_inst->Instruction.Predicate,
+                                         full_inst->Instruction.NumDstRegs,
+                                         full_inst->Instruction.NumSrcRegs,
+                                         header);
    prev_token = (struct tgsi_token  *) instruction;
 
+   if (full_inst->Instruction.Predicate) {
+      struct tgsi_instruction_predicate *instruction_predicate;
+
+      if (maxsize <= size) {
+         return 0;
+      }
+      instruction_predicate = (struct tgsi_instruction_predicate *)&tokens[size];
+      size++;
+
+      *instruction_predicate =
+         tgsi_build_instruction_predicate(full_inst->InstructionPredicate.Index,
+                                          full_inst->InstructionPredicate.Negate,
+                                          full_inst->InstructionPredicate.SwizzleX,
+                                          full_inst->InstructionPredicate.SwizzleY,
+                                          full_inst->InstructionPredicate.SwizzleZ,
+                                          full_inst->InstructionPredicate.SwizzleW,
+                                          instruction,
+                                          header);
+   }
+
    if( tgsi_compare_instruction_ext_label(
          full_inst->InstructionExtLabel,
          tgsi_default_instruction_ext_label() ) ) {
@@ -550,29 +572,6 @@ tgsi_build_full_instruction(
       prev_token = (struct tgsi_token  *) instruction_ext_texture;
    }
 
-   if (tgsi_compare_instruction_ext_predicate(full_inst->InstructionExtPredicate,
-                                              tgsi_default_instruction_ext_predicate())) {
-      struct tgsi_instruction_ext_predicate *instruction_ext_predicate;
-
-      if (maxsize <= size) {
-         return 0;
-      }
-      instruction_ext_predicate = (struct tgsi_instruction_ext_predicate *)&tokens[size];
-      size++;
-
-      *instruction_ext_predicate =
-         tgsi_build_instruction_ext_predicate(full_inst->InstructionExtPredicate.SrcIndex,
-                                              full_inst->InstructionExtPredicate.Negate,
-                                              full_inst->InstructionExtPredicate.SwizzleX,
-                                              full_inst->InstructionExtPredicate.SwizzleY,
-                                              full_inst->InstructionExtPredicate.SwizzleZ,
-                                              full_inst->InstructionExtPredicate.SwizzleW,
-                                              prev_token,
-                                              instruction,
-                                              header);
-      prev_token = (struct tgsi_token *)instruction_ext_predicate;
-   }
-
    for( i = 0;  i <   full_inst->Instruction.NumDstRegs; i++ ) {
       const struct tgsi_full_dst_register *reg = &full_inst->FullDstRegisters[i];
       struct tgsi_dst_register *dst_register;
@@ -746,6 +745,47 @@ tgsi_build_full_instruction(
    return size;
 }
 
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void)
+{
+   struct tgsi_instruction_predicate instruction_predicate;
+
+   instruction_predicate.SwizzleX = TGSI_SWIZZLE_X;
+   instruction_predicate.SwizzleY = TGSI_SWIZZLE_Y;
+   instruction_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
+   instruction_predicate.SwizzleW = TGSI_SWIZZLE_W;
+   instruction_predicate.Negate = 0;
+   instruction_predicate.Index = 0;
+   instruction_predicate.Padding = 0;
+
+   return instruction_predicate;
+}
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+                                 unsigned negate,
+                                 unsigned swizzleX,
+                                 unsigned swizzleY,
+                                 unsigned swizzleZ,
+                                 unsigned swizzleW,
+                                 struct tgsi_instruction *instruction,
+                                 struct tgsi_header *header)
+{
+   struct tgsi_instruction_predicate instruction_predicate;
+
+   instruction_predicate = tgsi_default_instruction_predicate();
+   instruction_predicate.SwizzleX = swizzleX;
+   instruction_predicate.SwizzleY = swizzleY;
+   instruction_predicate.SwizzleZ = swizzleZ;
+   instruction_predicate.SwizzleW = swizzleW;
+   instruction_predicate.Negate = negate;
+   instruction_predicate.Index = index;
+
+   instruction_grow(instruction, header);
+
+   return instruction_predicate;
+}
+
 /** test for inequality of 32-bit values pointed to by a and b */
 static INLINE boolean
 compare32(const void *a, const void *b)
@@ -835,60 +875,6 @@ tgsi_build_instruction_ext_texture(
    return instruction_ext_texture;
 }
 
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void)
-{
-   struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
-   instruction_ext_predicate.Type = TGSI_INSTRUCTION_EXT_TYPE_PREDICATE;
-   instruction_ext_predicate.SwizzleX = TGSI_SWIZZLE_X;
-   instruction_ext_predicate.SwizzleY = TGSI_SWIZZLE_Y;
-   instruction_ext_predicate.SwizzleZ = TGSI_SWIZZLE_Z;
-   instruction_ext_predicate.SwizzleW = TGSI_SWIZZLE_W;
-   instruction_ext_predicate.Negate = 0;
-   instruction_ext_predicate.SrcIndex = 0;
-   instruction_ext_predicate.Padding = 0;
-   instruction_ext_predicate.Extended = 0;
-
-   return instruction_ext_predicate;
-}
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
-                                       struct tgsi_instruction_ext_predicate b)
-{
-   a.Padding = b.Padding = 0;
-   a.Extended = b.Extended = 0;
-   return compare32(&a, &b);
-}
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
-                                     unsigned negate,
-                                     unsigned swizzleX,
-                                     unsigned swizzleY,
-                                     unsigned swizzleZ,
-                                     unsigned swizzleW,
-                                     struct tgsi_token *prev_token,
-                                     struct tgsi_instruction *instruction,
-                                     struct tgsi_header *header)
-{
-   struct tgsi_instruction_ext_predicate instruction_ext_predicate;
-
-   instruction_ext_predicate = tgsi_default_instruction_ext_predicate();
-   instruction_ext_predicate.SwizzleX = swizzleX;
-   instruction_ext_predicate.SwizzleY = swizzleY;
-   instruction_ext_predicate.SwizzleZ = swizzleZ;
-   instruction_ext_predicate.SwizzleW = swizzleW;
-   instruction_ext_predicate.Negate = negate;
-   instruction_ext_predicate.SrcIndex = index;
-
-   prev_token->Extended = 1;
-   instruction_grow(instruction, header);
-
-   return instruction_ext_predicate;
-}
-
 struct tgsi_src_register
 tgsi_default_src_register( void )
 {
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 669712eb8f9..0fe5f229d37 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -143,6 +143,7 @@ struct tgsi_instruction
 tgsi_build_instruction(
    unsigned opcode,
    unsigned saturate,
+   unsigned predicate,
    unsigned num_dst_regs,
    unsigned num_src_regs,
    struct tgsi_header *header );
@@ -157,6 +158,19 @@ tgsi_build_full_instruction(
    struct tgsi_header *header,
    unsigned maxsize );
 
+struct tgsi_instruction_predicate
+tgsi_default_instruction_predicate(void);
+
+struct tgsi_instruction_predicate
+tgsi_build_instruction_predicate(int index,
+                                 unsigned negate,
+                                 unsigned swizzleX,
+                                 unsigned swizzleY,
+                                 unsigned swizzleZ,
+                                 unsigned swizzleW,
+                                 struct tgsi_instruction *instruction,
+                                 struct tgsi_header *header);
+
 struct tgsi_instruction_ext_label
 tgsi_default_instruction_ext_label( void );
 
@@ -187,24 +201,6 @@ tgsi_build_instruction_ext_texture(
    struct tgsi_instruction *instruction,
    struct tgsi_header *header );
 
-struct tgsi_instruction_ext_predicate
-tgsi_default_instruction_ext_predicate(void);
-
-unsigned
-tgsi_compare_instruction_ext_predicate(struct tgsi_instruction_ext_predicate a,
-                                       struct tgsi_instruction_ext_predicate b);
-
-struct tgsi_instruction_ext_predicate
-tgsi_build_instruction_ext_predicate(unsigned index,
-                                     unsigned negate,
-                                     unsigned swizzleX,
-                                     unsigned swizzleY,
-                                     unsigned swizzleZ,
-                                     unsigned swizzleW,
-                                     struct tgsi_token *prev_token,
-                                     struct tgsi_instruction *instruction,
-                                     struct tgsi_header *header);
-
 struct tgsi_src_register
 tgsi_default_src_register( void );
 
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index ade5e7cbbd2..2bf367ba505 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -62,9 +62,6 @@
 
 #define FAST_MATH 1
 
-/** for tgsi_full_instruction::Flags */
-#define SOA_DEPENDENCY_FLAG 0x1
-
 #define TILE_TOP_LEFT     0
 #define TILE_TOP_RIGHT    1
 #define TILE_BOTTOM_LEFT  2
@@ -340,20 +337,6 @@ tgsi_exec_machine_bind_shader(
             maxInstructions += 10;
          }
 
-         if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
-            uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
-            parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
-            /* XXX we only handle SOA dependencies properly for MOV/SWZ
-             * at this time!
-             */
-            if (opcode != TGSI_OPCODE_MOV) {
-               debug_printf("Warning: SOA dependency in instruction"
-                            " is not handled:\n");
-               tgsi_dump_instruction(&parse.FullToken.FullInstruction,
-                                     numInstructions);
-            }
-         }
-
          memcpy(instructions + numInstructions,
                 &parse.FullToken.FullInstruction,
                 sizeof(instructions[0]));
@@ -395,6 +378,7 @@ tgsi_exec_machine_create( void )
 
    mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
    mach->MaxGeometryShaderOutputs = TGSI_MAX_TOTAL_VERTICES;
+   mach->Predicates = &mach->Temps[TGSI_EXEC_TEMP_P0];
 
    /* Setup constants. */
    for( i = 0; i < 4; i++ ) {
@@ -526,7 +510,7 @@ micro_ddy(
    dst->f[0] =
    dst->f[1] =
    dst->f[2] =
-   dst->f[3] = src->f[TILE_TOP_LEFT] - src->f[TILE_BOTTOM_LEFT];
+   dst->f[3] = src->f[TILE_BOTTOM_LEFT] - src->f[TILE_TOP_LEFT];
 }
 
 static void
@@ -604,6 +588,24 @@ micro_exp2(
    dst->f[2] = util_fast_exp2( src->f[2] );
    dst->f[3] = util_fast_exp2( src->f[3] );
 #else
+
+#if DEBUG
+   /* Inf is okay for this instruction, so clamp it to silence assertions. */
+   uint i;
+   union tgsi_exec_channel clamped;
+
+   for (i = 0; i < 4; i++) {
+      if (src->f[i] > 127.99999f) {
+         clamped.f[i] = 127.99999f;
+      } else if (src->f[i] < -126.99999f) {
+         clamped.f[i] = -126.99999f;
+      } else {
+         clamped.f[i] = src->f[i];
+      }
+   }
+   src = &clamped;
+#endif
+
    dst->f[0] = powf( 2.0f, src->f[0] );
    dst->f[1] = powf( 2.0f, src->f[1] );
    dst->f[2] = powf( 2.0f, src->f[2] );
@@ -1202,10 +1204,10 @@ fetch_src_file_channel(
          assert(index->i[1] < TGSI_EXEC_NUM_PREDS);
          assert(index->i[2] < TGSI_EXEC_NUM_PREDS);
          assert(index->i[3] < TGSI_EXEC_NUM_PREDS);
-         chan->u[0] = mach->Addrs[0].xyzw[swizzle].u[0];
-         chan->u[1] = mach->Addrs[0].xyzw[swizzle].u[1];
-         chan->u[2] = mach->Addrs[0].xyzw[swizzle].u[2];
-         chan->u[3] = mach->Addrs[0].xyzw[swizzle].u[3];
+         chan->u[0] = mach->Predicates[0].xyzw[swizzle].u[0];
+         chan->u[1] = mach->Predicates[0].xyzw[swizzle].u[1];
+         chan->u[2] = mach->Predicates[0].xyzw[swizzle].u[2];
+         chan->u[3] = mach->Predicates[0].xyzw[swizzle].u[3];
          break;
 
       case TGSI_FILE_OUTPUT:
@@ -1496,10 +1498,17 @@ store_dest(
       dst = &mach->Addrs[index].xyzw[chan_index];
       break;
 
+   case TGSI_FILE_LOOP:
+      assert(reg->DstRegister.Index == 0);
+      assert(mach->LoopCounterStackTop > 0);
+      assert(chan_index == CHAN_X);
+      dst = &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[chan_index];
+      break;
+
    case TGSI_FILE_PREDICATE:
       index = reg->DstRegister.Index;
       assert(index < TGSI_EXEC_NUM_PREDS);
-      dst = &mach->Addrs[index].xyzw[chan_index];
+      dst = &mach->Predicates[index].xyzw[chan_index];
       break;
 
    default:
@@ -1507,6 +1516,47 @@ store_dest(
       return;
    }
 
+   if (inst->Instruction.Predicate) {
+      uint swizzle;
+      union tgsi_exec_channel *pred;
+
+      switch (chan_index) {
+      case CHAN_X:
+         swizzle = inst->InstructionPredicate.SwizzleX;
+         break;
+      case CHAN_Y:
+         swizzle = inst->InstructionPredicate.SwizzleY;
+         break;
+      case CHAN_Z:
+         swizzle = inst->InstructionPredicate.SwizzleZ;
+         break;
+      case CHAN_W:
+         swizzle = inst->InstructionPredicate.SwizzleW;
+         break;
+      default:
+         assert(0);
+         return;
+      }
+
+      assert(inst->InstructionPredicate.Index == 0);
+
+      pred = &mach->Predicates[inst->InstructionPredicate.Index].xyzw[swizzle];
+
+      if (inst->InstructionPredicate.Negate) {
+         for (i = 0; i < QUAD_SIZE; i++) {
+            if (pred->u[i]) {
+               execmask &= ~(1 << i);
+            }
+         }
+      } else {
+         for (i = 0; i < QUAD_SIZE; i++) {
+            if (!pred->u[i]) {
+               execmask &= ~(1 << i);
+            }
+         }
+      }
+   }
+
    switch (inst->Instruction.Saturate) {
    case TGSI_SAT_NONE:
       for (i = 0; i < QUAD_SIZE; i++)
@@ -1762,6 +1812,64 @@ exec_tex(struct tgsi_exec_machine *mach,
    }
 }
 
+static void
+exec_txd(struct tgsi_exec_machine *mach,
+         const struct tgsi_full_instruction *inst)
+{
+   const uint unit = inst->FullSrcRegisters[3].SrcRegister.Index;
+   union tgsi_exec_channel r[4];
+   uint chan_index;
+
+   /*
+    * XXX: This is fake TXD -- the derivatives are not taken into account, yet.
+    */
+
+   switch (inst->InstructionExtTexture.Texture) {
+   case TGSI_TEXTURE_1D:
+   case TGSI_TEXTURE_SHADOW1D:
+
+      FETCH(&r[0], 0, CHAN_X);
+
+      fetch_texel(mach->Samplers[unit],
+                  &r[0], &ZeroVec, &ZeroVec, 0.0f,  /* S, T, P, BIAS */
+                  &r[0], &r[1], &r[2], &r[3]);      /* R, G, B, A */
+      break;
+
+   case TGSI_TEXTURE_2D:
+   case TGSI_TEXTURE_RECT:
+   case TGSI_TEXTURE_SHADOW2D:
+   case TGSI_TEXTURE_SHADOWRECT:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      fetch_texel(mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], 0.0f,    /* inputs */
+                  &r[0], &r[1], &r[2], &r[3]);  /* outputs */
+      break;
+
+   case TGSI_TEXTURE_3D:
+   case TGSI_TEXTURE_CUBE:
+
+      FETCH(&r[0], 0, CHAN_X);
+      FETCH(&r[1], 0, CHAN_Y);
+      FETCH(&r[2], 0, CHAN_Z);
+
+      fetch_texel(mach->Samplers[unit],
+                  &r[0], &r[1], &r[2], 0.0f,
+                  &r[0], &r[1], &r[2], &r[3]);
+      break;
+
+   default:
+      assert(0);
+   }
+
+   FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+      STORE(&r[chan_index], 0, chan_index);
+   }
+}
+
 
 /**
  * Evaluate a constant-valued coefficient at the position of the
@@ -1831,53 +1939,58 @@ typedef void (* eval_coef_func)(
    unsigned chan );
 
 static void
-exec_declaration(
-   struct tgsi_exec_machine *mach,
-   const struct tgsi_full_declaration *decl )
+exec_declaration(struct tgsi_exec_machine *mach,
+                 const struct tgsi_full_declaration *decl)
 {
-   if( mach->Processor == TGSI_PROCESSOR_FRAGMENT ) {
-      if( decl->Declaration.File == TGSI_FILE_INPUT ) {
-         unsigned first, last, mask;
-         eval_coef_func eval;
+   if (mach->Processor == TGSI_PROCESSOR_FRAGMENT) {
+      if (decl->Declaration.File == TGSI_FILE_INPUT) {
+         uint first, last, mask;
 
          first = decl->DeclarationRange.First;
          last = decl->DeclarationRange.Last;
          mask = decl->Declaration.UsageMask;
 
-         switch( decl->Declaration.Interpolate ) {
-         case TGSI_INTERPOLATE_CONSTANT:
-            eval = eval_constant_coef;
-            break;
+         if (decl->Semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+            assert(decl->Semantic.SemanticIndex == 0);
+            assert(first == last);
+            assert(mask = TGSI_WRITEMASK_XYZW);
 
-         case TGSI_INTERPOLATE_LINEAR:
-            eval = eval_linear_coef;
-            break;
+            mach->Inputs[first] = mach->QuadPos;
+         } else if (decl->Semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+            uint i;
 
-         case TGSI_INTERPOLATE_PERSPECTIVE:
-            eval = eval_perspective_coef;
-            break;
+            assert(decl->Semantic.SemanticIndex == 0);
+            assert(first == last);
 
-         default:
-            assert( 0 );
-            return;
-         }
-
-         if( mask == TGSI_WRITEMASK_XYZW ) {
-            unsigned i, j;
-
-            for( i = first; i <= last; i++ ) {
-               for( j = 0; j < NUM_CHANNELS; j++ ) {
-                  eval( mach, i, j );
-               }
+            for (i = 0; i < QUAD_SIZE; i++) {
+               mach->Inputs[first].xyzw[0].f[i] = mach->Face;
+            }
+         } else {
+            eval_coef_func eval;
+            uint i, j;
+
+            switch (decl->Declaration.Interpolate) {
+            case TGSI_INTERPOLATE_CONSTANT:
+               eval = eval_constant_coef;
+               break;
+
+            case TGSI_INTERPOLATE_LINEAR:
+               eval = eval_linear_coef;
+               break;
+
+            case TGSI_INTERPOLATE_PERSPECTIVE:
+               eval = eval_perspective_coef;
+               break;
+
+            default:
+               assert(0);
+               return;
             }
-         }
-         else {
-            unsigned i, j;
 
-            for( j = 0; j < NUM_CHANNELS; j++ ) {
-               if( mask & (1 << j) ) {
-                  for( i = first; i <= last; i++ ) {
-                     eval( mach, i, j );
+            for (j = 0; j < NUM_CHANNELS; j++) {
+               if (mask & (1 << j)) {
+                  for (i = first; i <= last; i++) {
+                     eval(mach, i, j);
                   }
                }
             }
@@ -1894,6 +2007,7 @@ exec_instruction(
 {
    uint chan_index;
    union tgsi_exec_channel r[10];
+   union tgsi_exec_channel d[8];
 
    (*pc)++;
 
@@ -1902,42 +2016,27 @@ exec_instruction(
    case TGSI_OPCODE_FLR:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_flr( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_flr(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_MOV:
-      if (inst->Flags & SOA_DEPENDENCY_FLAG) {
-         /* Do all fetches into temp regs, then do all stores to avoid
-          * intermediate/accidental clobbering.  This could be done all the
-          * time for MOV but for other instructions we'll need more temps...
-          */
-         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-            FETCH( &r[chan_index], 0, chan_index );
-         }
-         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-            STORE( &r[chan_index], 0, chan_index );
-         }
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         FETCH(&d[chan_index], 0, chan_index);
       }
-      else {
-         FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
-            FETCH( &r[0], 0, chan_index );
-            STORE( &r[0], 0, chan_index );
-         }
+      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_LIT:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y ) || IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
          FETCH( &r[0], 0, CHAN_X );
          if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-            micro_max( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-            STORE( &r[0], 0, CHAN_Y );
+            micro_max(&d[CHAN_Y], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
          }
 
          if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
@@ -1948,11 +2047,19 @@ exec_instruction(
             micro_min( &r[2], &r[2], &mach->Temps[TEMP_128_I].xyzw[TEMP_128_C] );
             micro_max( &r[2], &r[2], &mach->Temps[TEMP_M128_I].xyzw[TEMP_M128_C] );
             micro_pow( &r[1], &r[1], &r[2] );
-            micro_lt( &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-            STORE( &r[0], 0, CHAN_Z );
+            micro_lt(&d[CHAN_Z], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
          }
-      }
 
+         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+            STORE(&d[CHAN_Y], 0, CHAN_Y);
+         }
+         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+            STORE(&d[CHAN_Z], 0, CHAN_Z);
+         }
+      }
+      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
+         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
+      }
       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
@@ -2020,14 +2127,13 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_MUL:
-      FOR_EACH_ENABLED_CHANNEL( *inst, chan_index )
-      {
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
          FETCH(&r[0], 0, chan_index);
          FETCH(&r[1], 1, chan_index);
-
-         micro_mul( &r[0], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
+         micro_mul(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2035,8 +2141,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_add( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_add(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2092,25 +2200,29 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_DST:
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X );
-      }
-
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
          FETCH( &r[0], 0, CHAN_Y );
          FETCH( &r[1], 1, CHAN_Y);
-         micro_mul( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, CHAN_Y );
+         micro_mul(&d[CHAN_Y], &r[0], &r[1]);
       }
-
       if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         FETCH( &r[0], 0, CHAN_Z );
-         STORE( &r[0], 0, CHAN_Z );
+         FETCH(&d[CHAN_Z], 0, CHAN_Z);
       }
-
       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
-         FETCH( &r[0], 1, CHAN_W );
-         STORE( &r[0], 0, CHAN_W );
+         FETCH(&d[CHAN_W], 1, CHAN_W);
+      }
+
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+         STORE(&mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_X);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+         STORE(&d[CHAN_Y], 0, CHAN_Y);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+         STORE(&d[CHAN_Z], 0, CHAN_Z);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+         STORE(&d[CHAN_W], 0, CHAN_W);
       }
       break;
 
@@ -2120,9 +2232,10 @@ exec_instruction(
          FETCH(&r[1], 1, chan_index);
 
          /* XXX use micro_min()?? */
-         micro_lt( &r[0], &r[0], &r[1], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
+         micro_lt(&d[chan_index], &r[0], &r[1], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2132,9 +2245,10 @@ exec_instruction(
          FETCH(&r[1], 1, chan_index);
 
          /* XXX use micro_max()?? */
-         micro_lt( &r[0], &r[0], &r[1], &r[1], &r[0] );
-
-         STORE(&r[0], 0, chan_index );
+         micro_lt(&d[chan_index], &r[0], &r[1], &r[1], &r[0] );
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2143,8 +2257,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_lt( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_lt(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2153,9 +2269,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-
-         micro_le( &r[0], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_le(&d[chan_index], &r[1], &r[0], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2166,8 +2283,10 @@ exec_instruction(
          FETCH( &r[1], 1, chan_index );
          micro_mul( &r[0], &r[0], &r[1] );
          FETCH( &r[1], 2, chan_index );
-         micro_add( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_add(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2175,10 +2294,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH(&r[0], 0, chan_index);
          FETCH(&r[1], 1, chan_index);
-
-         micro_sub( &r[0], &r[0], &r[1] );
-
-         STORE(&r[0], 0, chan_index);
+         micro_sub(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2187,12 +2306,12 @@ exec_instruction(
          FETCH(&r[0], 0, chan_index);
          FETCH(&r[1], 1, chan_index);
          FETCH(&r[2], 2, chan_index);
-
          micro_sub( &r[1], &r[1], &r[2] );
          micro_mul( &r[0], &r[0], &r[1] );
-         micro_add( &r[0], &r[0], &r[2] );
-
-         STORE(&r[0], 0, chan_index);
+         micro_add(&d[chan_index], &r[0], &r[2]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2201,8 +2320,10 @@ exec_instruction(
          FETCH(&r[0], 0, chan_index);
          FETCH(&r[1], 1, chan_index);
          FETCH(&r[2], 2, chan_index);
-         micro_lt(&r[0], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
-         STORE(&r[0], 0, chan_index);
+         micro_lt(&d[chan_index], &mach->Temps[TEMP_HALF_I].xyzw[TEMP_HALF_C], &r[2], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2227,8 +2348,10 @@ exec_instruction(
    case TGSI_OPCODE_FRC:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_frc( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_frc(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2238,8 +2361,10 @@ exec_instruction(
          FETCH(&r[1], 1, chan_index);
          micro_max(&r[0], &r[0], &r[1]);
          FETCH(&r[1], 2, chan_index);
-         micro_min(&r[0], &r[0], &r[1]);
-         STORE(&r[0], 0, chan_index);
+         micro_min(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2247,19 +2372,17 @@ exec_instruction(
    case TGSI_OPCODE_ARR:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_rnd( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_rnd(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_EX2:
       FETCH(&r[0], 0, CHAN_X);
 
-#if FAST_MATH
       micro_exp2( &r[0], &r[0] );
-#else
-      micro_pow( &r[0], &mach->Temps[TEMP_2_I].xyzw[TEMP_2_C], &r[0] );
-#endif
 
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          STORE( &r[0], 0, chan_index );
@@ -2295,11 +2418,7 @@ exec_instruction(
       FETCH(&r[4], 1, CHAN_Y);
 
       micro_mul( &r[5], &r[3], &r[4] );
-      micro_sub( &r[2], &r[2], &r[5] );
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_X )) {
-         STORE( &r[2], 0, CHAN_X );
-      }
+      micro_sub(&d[CHAN_X], &r[2], &r[5]);
 
       FETCH(&r[2], 1, CHAN_X);
 
@@ -2308,20 +2427,21 @@ exec_instruction(
       FETCH(&r[5], 0, CHAN_X);
 
       micro_mul( &r[1], &r[1], &r[5] );
-      micro_sub( &r[3], &r[3], &r[1] );
-
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Y )) {
-         STORE( &r[3], 0, CHAN_Y );
-      }
+      micro_sub(&d[CHAN_Y], &r[3], &r[1]);
 
       micro_mul( &r[5], &r[5], &r[4] );
       micro_mul( &r[0], &r[0], &r[2] );
-      micro_sub( &r[5], &r[5], &r[0] );
+      micro_sub(&d[CHAN_Z], &r[5], &r[0]);
 
-      if (IS_CHANNEL_ENABLED( *inst, CHAN_Z )) {
-         STORE( &r[5], 0, CHAN_Z );
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+         STORE(&d[CHAN_X], 0, CHAN_X);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+         STORE(&d[CHAN_Y], 0, CHAN_Y);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+         STORE(&d[CHAN_Z], 0, CHAN_Z);
       }
-
       if (IS_CHANNEL_ENABLED( *inst, CHAN_W )) {
          STORE( &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], 0, CHAN_W );
       }
@@ -2330,11 +2450,11 @@ exec_instruction(
     case TGSI_OPCODE_ABS:
        FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
           FETCH(&r[0], 0, chan_index);
-
-          micro_abs( &r[0], &r[0] );
-
-          STORE(&r[0], 0, chan_index);
+          micro_abs(&d[chan_index], &r[0]);
        }
+       FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
+      }
        break;
 
    case TGSI_OPCODE_RCC:
@@ -2386,16 +2506,20 @@ exec_instruction(
    case TGSI_OPCODE_DDX:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_ddx( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_ddx(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_DDY:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_ddy( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_ddy(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2476,10 +2600,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_eq( &r[0], &r[0], &r[1],
-                   &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C],
-                   &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2493,8 +2617,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2510,8 +2636,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_le( &r[0], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_le(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2519,8 +2647,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_eq( &r[0], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
-         STORE( &r[0], 0, chan_index );
+         micro_eq(&d[chan_index], &r[0], &r[1], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2550,7 +2680,7 @@ exec_instruction(
       /* src[1] = d[strq]/dx */
       /* src[2] = d[strq]/dy */
       /* src[3] = sampler unit */
-      assert (0);
+      exec_txd(mach, inst);
       break;
 
    case TGSI_OPCODE_TXL:
@@ -2594,13 +2724,8 @@ exec_instruction(
          micro_mul(&r[3], &r[3], &r[1]);
          micro_add(&r[2], &r[2], &r[3]);
          FETCH(&r[3], 0, CHAN_X);
-         micro_add(&r[2], &r[2], &r[3]);
-         if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
-            STORE(&r[2], 0, CHAN_X);
-         }
-         if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
-            STORE(&r[2], 0, CHAN_Z);
-         }
+         micro_add(&d[CHAN_X], &r[2], &r[3]);
+         
       }
       if (IS_CHANNEL_ENABLED(*inst, CHAN_Y) ||
           IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
@@ -2610,13 +2735,20 @@ exec_instruction(
          micro_mul(&r[3], &r[3], &r[1]);
          micro_add(&r[2], &r[2], &r[3]);
          FETCH(&r[3], 0, CHAN_Y);
-         micro_add(&r[2], &r[2], &r[3]);
-         if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
-            STORE(&r[2], 0, CHAN_Y);
-         }
-         if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
-            STORE(&r[2], 0, CHAN_W);
-         }
+         micro_add(&d[CHAN_Y], &r[2], &r[3]);
+         
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_X)) {
+         STORE(&d[CHAN_X], 0, CHAN_X);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Y)) {
+         STORE(&d[CHAN_Y], 0, CHAN_Y);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_Z)) {
+         STORE(&d[CHAN_X], 0, CHAN_Z);
+      }
+      if (IS_CHANNEL_ENABLED(*inst, CHAN_W)) {
+         STORE(&d[CHAN_Y], 0, CHAN_W);
       }
       break;
 
@@ -2701,8 +2833,10 @@ exec_instruction(
    /* TGSI_OPCODE_SGN */
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_sgn( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_sgn(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2711,10 +2845,10 @@ exec_instruction(
          FETCH(&r[0], 0, chan_index);
          FETCH(&r[1], 1, chan_index);
          FETCH(&r[2], 2, chan_index);
-
-         micro_lt( &r[0], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2] );
-
-         STORE(&r[0], 0, chan_index);
+         micro_lt(&d[chan_index], &r[0], &mach->Temps[TEMP_0_I].xyzw[TEMP_0_C], &r[1], &r[2]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2889,32 +3023,40 @@ exec_instruction(
    case TGSI_OPCODE_CEIL:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_ceil( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_ceil(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_I2F:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_i2f( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_i2f(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_NOT:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_not( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_not(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
    case TGSI_OPCODE_TRUNC:
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
-         micro_trunc( &r[0], &r[0] );
-         STORE( &r[0], 0, chan_index );
+         micro_trunc(&d[chan_index], &r[0]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2922,8 +3064,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_shl( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_shl(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2931,8 +3075,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_ishr( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_ishr(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2940,8 +3086,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_and( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_and(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2949,8 +3097,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_or( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_or(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2962,8 +3112,10 @@ exec_instruction(
       FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
          FETCH( &r[0], 0, chan_index );
          FETCH( &r[1], 1, chan_index );
-         micro_xor( &r[0], &r[0], &r[1] );
-         STORE( &r[0], 0, chan_index );
+         micro_xor(&d[chan_index], &r[0], &r[1]);
+      }
+      FOR_EACH_ENABLED_CHANNEL(*inst, chan_index) {
+         STORE(&d[chan_index], 0, chan_index);
       }
       break;
 
@@ -2992,8 +3144,23 @@ exec_instruction(
       for (chan_index = 0; chan_index < 3; chan_index++) {
          FETCH( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[chan_index], 0, chan_index );
       }
-      STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
       ++mach->LoopCounterStackTop;
+      STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 0, CHAN_X);
+      /* update LoopMask */
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
+         mach->LoopMask &= ~0x1;
+      }
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
+         mach->LoopMask &= ~0x2;
+      }
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
+         mach->LoopMask &= ~0x4;
+      }
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
+         mach->LoopMask &= ~0x8;
+      }
+      /* TODO: if mach->LoopMask == 0, jump to end of loop */
+      UPDATE_EXEC_MASK(mach);
       /* fall-through (for now) */
    case TGSI_OPCODE_BGNLOOP:
       /* push LoopMask and ContMasks */
@@ -3007,28 +3174,28 @@ exec_instruction(
 
    case TGSI_OPCODE_ENDFOR:
       assert(mach->LoopCounterStackTop > 0);
-      micro_sub( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 
-                 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X],
-                 &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C] );
+      micro_sub(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 
+                &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y],
+                &mach->Temps[TEMP_1_I].xyzw[TEMP_1_C]);
       /* update LoopMask */
-      if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[0] <= 0) {
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[0] <= 0.0f) {
          mach->LoopMask &= ~0x1;
       }
-      if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[1] <= 0 ) {
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[1] <= 0.0f) {
          mach->LoopMask &= ~0x2;
       }
-      if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[2] <= 0 ) {
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[2] <= 0.0f) {
          mach->LoopMask &= ~0x4;
       }
-      if( mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X].f[3] <= 0 ) {
+      if (mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y].f[3] <= 0.0f) {
          mach->LoopMask &= ~0x8;
       }
-      micro_add( &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 
-                 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Y], 
-                 &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
+      micro_add(&mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 
+                &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_X], 
+                &mach->LoopCounterStack[mach->LoopCounterStackTop - 1].xyzw[CHAN_Z]);
       assert(mach->LoopLabelStackTop > 0);
       inst = mach->Instructions + mach->LoopLabelStack[mach->LoopLabelStackTop - 1];
-      STORE( &mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_Y], 0, CHAN_X );
+      STORE(&mach->LoopCounterStack[mach->LoopCounterStackTop].xyzw[CHAN_X], 0, CHAN_X);
       /* Restore ContMask, but don't pop */
       assert(mach->ContStackTop > 0);
       mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
@@ -3095,7 +3262,28 @@ exec_instruction(
       break;
 
    case TGSI_OPCODE_ENDSUB:
-      /* no-op */
+      /*
+       * XXX: This really should be a no-op. We should never reach this opcode.
+       */
+
+      assert(mach->CallStackTop > 0);
+      mach->CallStackTop--;
+
+      mach->CondStackTop = mach->CallStack[mach->CallStackTop].CondStackTop;
+      mach->CondMask = mach->CondStack[mach->CondStackTop];
+
+      mach->LoopStackTop = mach->CallStack[mach->CallStackTop].LoopStackTop;
+      mach->LoopMask = mach->LoopStack[mach->LoopStackTop];
+
+      mach->ContStackTop = mach->CallStack[mach->CallStackTop].ContStackTop;
+      mach->ContMask = mach->ContStack[mach->ContStackTop];
+
+      assert(mach->FuncStackTop > 0);
+      mach->FuncMask = mach->FuncStack[--mach->FuncStackTop];
+
+      *pc = mach->CallStack[mach->CallStackTop].ReturnAddr;
+
+      UPDATE_EXEC_MASK(mach);
       break;
 
    case TGSI_OPCODE_NOP:
@@ -3106,6 +3294,8 @@ exec_instruction(
    }
 }
 
+#define DEBUG_EXECUTION 0
+
 
 /**
  * Run TGSI interpreter.
@@ -3149,10 +3339,67 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
       exec_declaration( mach, mach->Declarations+i );
    }
 
-   /* execute instructions, until pc is set to -1 */
-   while (pc != -1) {
-      assert(pc < (int) mach->NumInstructions);
-      exec_instruction( mach, mach->Instructions + pc, &pc );
+   {
+#if DEBUG_EXECUTION
+      struct tgsi_exec_vector temps[TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS];
+      struct tgsi_exec_vector outputs[PIPE_MAX_ATTRIBS];
+      uint inst = 1;
+
+      memcpy(temps, mach->Temps, sizeof(temps));
+      memcpy(outputs, mach->Outputs, sizeof(outputs));
+#endif
+
+      /* execute instructions, until pc is set to -1 */
+      while (pc != -1) {
+
+#if DEBUG_EXECUTION
+         uint i;
+
+         tgsi_dump_instruction(&mach->Instructions[pc], inst++);
+#endif
+
+         assert(pc < (int) mach->NumInstructions);
+         exec_instruction(mach, mach->Instructions + pc, &pc);
+
+#if DEBUG_EXECUTION
+         for (i = 0; i < TGSI_EXEC_NUM_TEMPS + TGSI_EXEC_NUM_TEMP_EXTRAS; i++) {
+            if (memcmp(&temps[i], &mach->Temps[i], sizeof(temps[i]))) {
+               uint j;
+
+               memcpy(&temps[i], &mach->Temps[i], sizeof(temps[i]));
+               debug_printf("TEMP[%2u] = ", i);
+               for (j = 0; j < 4; j++) {
+                  if (j > 0) {
+                     debug_printf("           ");
+                  }
+                  debug_printf("(%6f, %6f, %6f, %6f)\n",
+                               temps[i].xyzw[0].f[j],
+                               temps[i].xyzw[1].f[j],
+                               temps[i].xyzw[2].f[j],
+                               temps[i].xyzw[3].f[j]);
+               }
+            }
+         }
+         for (i = 0; i < PIPE_MAX_ATTRIBS; i++) {
+            if (memcmp(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]))) {
+               uint j;
+
+               memcpy(&outputs[i], &mach->Outputs[i], sizeof(outputs[i]));
+               debug_printf("OUT[%2u] =  ", i);
+               for (j = 0; j < 4; j++) {
+                  if (j > 0) {
+                     debug_printf("           ");
+                  }
+                  debug_printf("{%6f, %6f, %6f, %6f}\n",
+                               outputs[i].xyzw[0].f[j],
+                               outputs[i].xyzw[1].f[j],
+                               outputs[i].xyzw[2].f[j],
+                               outputs[i].xyzw[3].f[j]);
+               }
+            }
+         }
+#endif
+      }
    }
 
 #if 0
@@ -3166,5 +3413,10 @@ tgsi_exec_machine_run( struct tgsi_exec_machine *mach )
    }
 #endif
 
+   assert(mach->CondStackTop == 0);
+   assert(mach->LoopStackTop == 0);
+   assert(mach->ContStackTop == 0);
+   assert(mach->CallStackTop == 0);
+
    return ~mach->Temps[TEMP_KILMASK_I].xyzw[TEMP_KILMASK_C].u[0];
 }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index 71fd300cbb0..afaf5c39c48 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -225,6 +225,7 @@ struct tgsi_exec_machine
    struct tgsi_exec_vector       Outputs[TGSI_MAX_TOTAL_VERTICES];
 
    struct tgsi_exec_vector       *Addrs;
+   struct tgsi_exec_vector       *Predicates;
 
    struct tgsi_sampler           **Samplers;
 
@@ -241,6 +242,7 @@ struct tgsi_exec_machine
    /* FRAGMENT processor only. */
    const struct tgsi_interp_coef *InterpCoefs;
    struct tgsi_exec_vector       QuadPos;
+   float                         Face;    /**< +1 if front facing, -1 if back facing */
 
    /* Conditional execution masks */
    uint CondMask;  /**< For IF/ELSE/ENDIF */
@@ -261,7 +263,7 @@ struct tgsi_exec_machine
    uint LoopLabelStack[TGSI_EXEC_MAX_LOOP_NESTING];
    int LoopLabelStackTop;
 
-   /** Loop counter stack (x = count, y = current, z = step) */
+   /** Loop counter stack (x = index, y = counter, z = step) */
    struct tgsi_exec_vector LoopCounterStack[TGSI_EXEC_MAX_LOOP_NESTING];
    int LoopCounterStackTop;
    
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 83f9df1183e..9ca29934528 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -175,6 +175,10 @@ tgsi_parse_token(
       copy_token(&inst->Instruction, &token);
       extended = inst->Instruction.Extended;
 
+      if (inst->Instruction.Predicate) {
+         next_token(ctx, &inst->InstructionPredicate);
+      }
+
       while( extended ) {
          struct tgsi_src_register_ext token;
 
@@ -189,10 +193,6 @@ tgsi_parse_token(
             copy_token(&inst->InstructionExtTexture, &token);
             break;
 
-         case TGSI_INSTRUCTION_EXT_TYPE_PREDICATE:
-            copy_token(&inst->InstructionExtPredicate, &token);
-            break;
-
          default:
             assert( 0 );
          }
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 76f1676d85d..cb4772ade8d 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -80,9 +80,9 @@ struct tgsi_full_immediate
 struct tgsi_full_instruction
 {
    struct tgsi_instruction             Instruction;
+   struct tgsi_instruction_predicate   InstructionPredicate;
    struct tgsi_instruction_ext_label   InstructionExtLabel;
    struct tgsi_instruction_ext_texture InstructionExtTexture;
-   struct tgsi_instruction_ext_predicate InstructionExtPredicate;
    struct tgsi_full_dst_register       FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
    struct tgsi_full_src_register       FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
    uint Flags;  /**< user-defined usage */
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
index 80e6ff2b795..557c885a8ad 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -47,9 +47,9 @@ union tgsi_any_token {
    struct tgsi_immediate imm;
    union  tgsi_immediate_data imm_data;
    struct tgsi_instruction insn;
+   struct tgsi_instruction_predicate insn_predicate;
    struct tgsi_instruction_ext_label insn_ext_label;
    struct tgsi_instruction_ext_texture insn_ext_texture;
-   struct tgsi_instruction_ext_predicate insn_ext_predicate;
    struct tgsi_src_register src;
    struct tgsi_src_register_ext_mod src_ext_mod;
    struct tgsi_dimension dim;
@@ -72,6 +72,7 @@ struct ureg_tokens {
 #define UREG_MAX_IMMEDIATE 32
 #define UREG_MAX_TEMP 256
 #define UREG_MAX_ADDR 2
+#define UREG_MAX_LOOP 1
 #define UREG_MAX_PRED 1
 
 #define DOMAIN_DECL 0
@@ -123,6 +124,7 @@ struct ureg_program
 
    unsigned nr_addrs;
    unsigned nr_preds;
+   unsigned nr_loops;
    unsigned nr_instructions;
 
    struct ureg_tokens domain[2];
@@ -383,6 +385,7 @@ struct ureg_src ureg_DECL_constant(struct ureg_program *ureg,
       i = ureg->nr_constant_ranges++;
       ureg->constant_range[i].first = index;
       ureg->constant_range[i].last = index;
+      goto out;
    }
 
    /* Collapse all ranges down to one:
@@ -450,6 +453,19 @@ struct ureg_dst ureg_DECL_address( struct ureg_program *ureg )
    return ureg_dst_register( TGSI_FILE_ADDRESS, 0 );
 }
 
+/* Allocate a new loop register.
+ */
+struct ureg_dst
+ureg_DECL_loop(struct ureg_program *ureg)
+{
+   if (ureg->nr_loops < UREG_MAX_LOOP) {
+      return ureg_dst_register(TGSI_FILE_LOOP, ureg->nr_loops++);
+   }
+
+   assert(0);
+   return ureg_dst_register(TGSI_FILE_LOOP, 0);
+}
+
 /* Allocate a new predicate register.
  */
 struct ureg_dst
@@ -694,35 +710,27 @@ ureg_emit_insn(struct ureg_program *ureg,
    validate( opcode, num_dst, num_src );
    
    out = get_tokens( ureg, DOMAIN_INSN, count );
-   out[0].value = 0;
-   out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
-   out[0].insn.NrTokens = 0;
+   out[0].insn = tgsi_default_instruction();
    out[0].insn.Opcode = opcode;
    out[0].insn.Saturate = saturate;
    out[0].insn.NumDstRegs = num_dst;
    out[0].insn.NumSrcRegs = num_src;
-   out[0].insn.Padding = 0;
 
    result.insn_token = ureg->domain[DOMAIN_INSN].count - count;
+   result.extended_token = result.insn_token;
 
    if (predicate) {
-      out[0].insn.Extended = 1;
-      out[1].insn_ext_predicate = tgsi_default_instruction_ext_predicate();
-      out[1].insn_ext_predicate.Negate = pred_negate;
-      out[1].insn_ext_predicate.SwizzleX = pred_swizzle_x;
-      out[1].insn_ext_predicate.SwizzleY = pred_swizzle_y;
-      out[1].insn_ext_predicate.SwizzleZ = pred_swizzle_z;
-      out[1].insn_ext_predicate.SwizzleW = pred_swizzle_w;
-
-      result.extended_token = result.insn_token + 1;
-   } else {
-      out[0].insn.Extended = 0;
-
-      result.extended_token = result.insn_token;
+      out[0].insn.Predicate = 1;
+      out[1].insn_predicate = tgsi_default_instruction_predicate();
+      out[1].insn_predicate.Negate = pred_negate;
+      out[1].insn_predicate.SwizzleX = pred_swizzle_x;
+      out[1].insn_predicate.SwizzleY = pred_swizzle_y;
+      out[1].insn_predicate.SwizzleZ = pred_swizzle_z;
+      out[1].insn_predicate.SwizzleW = pred_swizzle_w;
    }
 
    ureg->nr_instructions++;
-   
+
    return result;
 }
 
@@ -1065,6 +1073,13 @@ static void emit_decls( struct ureg_program *ureg )
                        0, ureg->nr_addrs );
    }
 
+   if (ureg->nr_loops) {
+      emit_decl_range(ureg,
+                      TGSI_FILE_LOOP,
+                      0,
+                      ureg->nr_loops);
+   }
+
    if (ureg->nr_preds) {
       emit_decl_range(ureg,
                       TGSI_FILE_PREDICATE,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
index e6fad1f0f40..b89f55d5a87 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ureg.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -163,6 +163,9 @@ struct ureg_dst
 ureg_DECL_address( struct ureg_program * );
 
 struct ureg_dst
+ureg_DECL_loop( struct ureg_program * );
+
+struct ureg_dst
 ureg_DECL_predicate(struct ureg_program *);
 
 /* Supply an index to the sampler declaration as this is the hook to
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 50386425995..5372df57352 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -354,9 +354,9 @@ util_blit_pixels_writemask(struct blit_state *ctx,
       texTemp.target = PIPE_TEXTURE_2D;
       texTemp.format = src->format;
       texTemp.last_level = 0;
-      texTemp.width[0] = srcW;
-      texTemp.height[0] = srcH;
-      texTemp.depth[0] = 1;
+      texTemp.width0 = srcW;
+      texTemp.height0 = srcH;
+      texTemp.depth0 = 1;
       pf_get_block(src->format, &texTemp.block);
 
       tex = screen->texture_create(screen, &texTemp);
@@ -389,10 +389,10 @@ util_blit_pixels_writemask(struct blit_state *ctx,
    }
    else {
       pipe_texture_reference(&tex, src->texture);
-      s0 = srcX0 / (float)tex->width[0];
-      s1 = srcX1 / (float)tex->width[0];
-      t0 = srcY0 / (float)tex->height[0];
-      t1 = srcY1 / (float)tex->height[0];
+      s0 = srcX0 / (float)tex->width0;
+      s1 = srcX1 / (float)tex->width0;
+      t0 = srcY0 / (float)tex->height0;
+      t1 = srcY1 / (float)tex->height0;
    }
 
 
@@ -518,13 +518,13 @@ util_blit_pixels_tex(struct blit_state *ctx,
    assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
           filter == PIPE_TEX_MIPFILTER_LINEAR);
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = srcX0 / (float)tex->width[0];
-   s1 = srcX1 / (float)tex->width[0];
-   t0 = srcY0 / (float)tex->height[0];
-   t1 = srcY1 / (float)tex->height[0];
+   s0 = srcX0 / (float)tex->width0;
+   s1 = srcX1 / (float)tex->width0;
+   t0 = srcY0 / (float)tex->height0;
+   t1 = srcY1 / (float)tex->height0;
 
    assert(ctx->pipe->screen->is_format_supported(ctx->pipe->screen, dst->format,
                                                  PIPE_TEXTURE_2D,
diff --git a/src/gallium/auxiliary/util/u_cpu_detect.c b/src/gallium/auxiliary/util/u_cpu_detect.c
index facbe69173b..a08241971ca 100644
--- a/src/gallium/auxiliary/util/u_cpu_detect.c
+++ b/src/gallium/auxiliary/util/u_cpu_detect.c
@@ -80,7 +80,7 @@ static int has_cpuid(void);
 #if defined(PIPE_ARCH_X86)
 
 /* The sigill handlers */
-#if defined(PIPE_OS_LINUX) //&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)
+#if defined(PIPE_OS_LINUX) /*&& defined(_POSIX_SOURCE) && defined(X86_FXSR_MAGIC)*/
 static void
 sigill_handler_sse(int signal, struct sigcontext sc)
 {
@@ -240,7 +240,7 @@ check_os_katmai_support(void)
       __asm __volatile ("xorps %xmm0, %xmm0");
 #elif defined(PIPE_CC_MSVC)
       __asm {
-          xorps xmm0, xmm0        // executing SSE instruction
+          xorps xmm0, xmm0        /* executing SSE instruction */
       }
 #else
 #error Unsupported compiler
@@ -283,7 +283,7 @@ check_os_katmai_support(void)
     * and therefore to be safe I'm going to leave this test in here.
     */
    if (util_cpu_caps.has_sse) {
-      //      test_os_katmai_exception_support();
+      /* test_os_katmai_exception_support(); */
    }
 
    /* Restore the original signal handlers.
diff --git a/src/gallium/auxiliary/util/u_format.csv b/src/gallium/auxiliary/util/u_format.csv
index f1bf94f17dd..b9cc2aa716e 100644
--- a/src/gallium/auxiliary/util/u_format.csv
+++ b/src/gallium/auxiliary/util/u_format.csv
@@ -97,3 +97,13 @@ PIPE_FORMAT_B8G8R8A8_SRGB         , arith , 1, 1, u8  , u8  , u8  , u8  , zyxw,
 PIPE_FORMAT_B8G8R8X8_SRGB         , arith , 1, 1, u8  , u8  , u8  , u8  , zyx1, srgb 
 PIPE_FORMAT_X8UB8UG8SR8S_NORM     , arith , 1, 1, sn8 , sn8 , un8 , x8  , 1zyx, rgb
 PIPE_FORMAT_B6UG5SR5S_NORM        , arith , 1, 1, sn5 , sn5 , un6 ,     , xyz1, rgb
+PIPE_FORMAT_YCBCR                 , yuv   , 2, 1, x32 ,     ,     ,     , xyz1, yuv
+PIPE_FORMAT_YCBCR_REV             , yuv   , 2, 1, x32 ,     ,     ,     , xyz1, yuv
+PIPE_FORMAT_DXT1_RGBA             , dxt   , 4, 4, x64 ,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT1_RGB              , dxt   , 4, 4, x64 ,     ,     ,     , xyz1, rgb
+PIPE_FORMAT_DXT3_RGBA             , dxt   , 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT5_RGBA             , dxt   , 4, 4, x128,     ,     ,     , xyzw, rgb
+PIPE_FORMAT_DXT1_SRGBA            , dxt   , 4, 4, x64 ,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_DXT1_SRGB             , dxt   , 4, 4, x64 ,     ,     ,     , xyz1, srgb
+PIPE_FORMAT_DXT3_SRGBA            , dxt   , 4, 4, x128,     ,     ,     , xyzw, srgb
+PIPE_FORMAT_DXT5_SRGBA            , dxt   , 4, 4, x128,     ,     ,     , xyzw, srgb
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index aa823aa218b..f67f1e458d4 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -45,6 +45,7 @@
 #include "util/u_draw_quad.h"
 #include "util/u_gen_mipmap.h"
 #include "util/u_simple_shaders.h"
+#include "util/u_math.h"
 
 #include "cso_cache/cso_context.h"
 
@@ -1125,12 +1126,12 @@ make_1d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1168,12 +1169,12 @@ make_2d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1213,12 +1214,12 @@ make_3d_mipmap(struct gen_mipmap_state *ctx,
       
       srcTrans = screen->get_tex_transfer(screen, pt, face, srcLevel, zslice,
                                           PIPE_TRANSFER_READ, 0, 0,
-                                          pt->width[srcLevel],
-                                          pt->height[srcLevel]);
+                                          u_minify(pt->width0, srcLevel),
+                                          u_minify(pt->height0, srcLevel));
       dstTrans = screen->get_tex_transfer(screen, pt, face, dstLevel, zslice,
                                           PIPE_TRANSFER_WRITE, 0, 0,
-                                          pt->width[dstLevel],
-                                          pt->height[dstLevel]);
+                                          u_minify(pt->width0, dstLevel),
+                                          u_minify(pt->height0, dstLevel));
 
       srcMap = (ubyte *) screen->transfer_map(screen, srcTrans);
       dstMap = (ubyte *) screen->transfer_map(screen, dstTrans);
@@ -1575,8 +1576,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
        * Setup framebuffer / dest surface
        */
       fb.cbufs[0] = surf;
-      fb.width = pt->width[dstLevel];
-      fb.height = pt->height[dstLevel];
+      fb.width = u_minify(pt->width0, dstLevel);
+      fb.height = u_minify(pt->height0, dstLevel);
       cso_set_framebuffer(ctx->cso, &fb);
 
       /*
@@ -1597,8 +1598,8 @@ util_gen_mipmap(struct gen_mipmap_state *ctx,
       offset = set_vertex_data(ctx,
                                pt->target,
                                face,
-                               (float) pt->width[dstLevel],
-                               (float) pt->height[dstLevel]);
+                               (float) u_minify(pt->width0, dstLevel),
+                               (float) u_minify(pt->height0, dstLevel));
 
       util_draw_vertex_buffer(ctx->pipe, 
                               ctx->vbuf,
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index 75b075f160d..7e75702701d 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -491,6 +491,43 @@ util_next_power_of_two(unsigned x)
 
 
 /**
+ * Return number of bits set in n.
+ */
+static INLINE unsigned
+util_bitcount(unsigned n)
+{
+#if defined(PIPE_CC_GCC)
+   return __builtin_popcount(n);
+#else
+   /* XXX there are more clever ways of doing this */
+   unsigned bits = 0;
+   while (n) {
+      bits += (n & 1);
+      n = n >> 1;
+   }
+   return bits;
+#endif
+}
+
+
+/**
+ * Reverse byte order of a 32 bit word.
+ */
+static INLINE uint32_t
+util_bswap32(uint32_t n)
+{
+#if defined(PIPE_CC_GCC)
+   return __builtin_bswap32(n);
+#else
+   return (n >> 24) |
+          ((n >> 8) & 0x0000ff00) |
+          ((n << 8) & 0x00ff0000) |
+          (n << 24);
+#endif
+}
+
+
+/**
  * Clamp X to [MIN, MAX].
  * This is a macro to allow float, int, uint, etc. types.
  */
@@ -499,6 +536,9 @@ util_next_power_of_two(unsigned x)
 #define MIN2( A, B )   ( (A)<(B) ? (A) : (B) )
 #define MAX2( A, B )   ( (A)>(B) ? (A) : (B) )
 
+#define MIN3( A, B, C ) MIN2( MIN2( A, B ), C )
+#define MAX3( A, B, C ) MAX2( MAX2( A, B ), C )
+
 
 static INLINE int
 align(int value, int alignment)
@@ -507,9 +547,9 @@ align(int value, int alignment)
 }
 
 static INLINE unsigned
-minify(unsigned value)
+u_minify(unsigned value, unsigned levels)
 {
-    return MAX2(1, value >> 1);
+    return MAX2(1, value >> levels);
 }
 
 #ifndef COPY_4V
diff --git a/src/gallium/auxiliary/util/u_pack_color.h b/src/gallium/auxiliary/util/u_pack_color.h
index eda883b3b92..9dacc6d83db 100644
--- a/src/gallium/auxiliary/util/u_pack_color.h
+++ b/src/gallium/auxiliary/util/u_pack_color.h
@@ -302,7 +302,10 @@ util_unpack_color_ub(enum pipe_format format, const void *src,
 static INLINE void
 util_pack_color(const float rgba[4], enum pipe_format format, void *dest)
 {
-   ubyte r, g, b, a;
+   ubyte r = 0;
+   ubyte g = 0;
+   ubyte b = 0;
+   ubyte a = 0;
 
    if (pf_size_x(format) <= 8) {
       /* format uses 8-bit components or less */
diff --git a/src/gallium/auxiliary/util/u_surface.c b/src/gallium/auxiliary/util/u_surface.c
index 85e443204e3..de8c266db83 100644
--- a/src/gallium/auxiliary/util/u_surface.c
+++ b/src/gallium/auxiliary/util/u_surface.c
@@ -79,9 +79,9 @@ util_create_rgba_surface(struct pipe_screen *screen,
    templ.target = target;
    templ.format = format;
    templ.last_level = 0;
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    pf_get_block(format, &templ.block);
    templ.tex_usage = usage;
 
diff --git a/src/gallium/auxiliary/vl/vl_compositor.c b/src/gallium/auxiliary/vl/vl_compositor.c
index cda6dc134a0..592dd174214 100644
--- a/src/gallium/auxiliary/vl/vl_compositor.c
+++ b/src/gallium/auxiliary/vl/vl_compositor.c
@@ -455,8 +455,8 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    assert(dst_area);
    assert(picture_type == PIPE_MPEG12_PICTURE_TYPE_FRAME);
 
-   compositor->fb_state.width = dst_surface->width[0];
-   compositor->fb_state.height = dst_surface->height[0];
+   compositor->fb_state.width = dst_surface->width0;
+   compositor->fb_state.height = dst_surface->height0;
    compositor->fb_state.cbufs[0] = compositor->pipe->screen->get_tex_surface
    (
       compositor->pipe->screen,
@@ -504,12 +504,12 @@ void vl_compositor_render(struct vl_compositor          *compositor,
    vs_consts->dst_trans.z = 0;
    vs_consts->dst_trans.w = 0;
 
-   vs_consts->src_scale.x = src_area->w / (float)src_surface->width[0];
-   vs_consts->src_scale.y = src_area->h / (float)src_surface->height[0];
+   vs_consts->src_scale.x = src_area->w / (float)src_surface->width0;
+   vs_consts->src_scale.y = src_area->h / (float)src_surface->height0;
    vs_consts->src_scale.z = 1;
    vs_consts->src_scale.w = 1;
-   vs_consts->src_trans.x = src_area->x / (float)src_surface->width[0];
-   vs_consts->src_trans.y = src_area->y / (float)src_surface->height[0];
+   vs_consts->src_trans.x = src_area->x / (float)src_surface->width0;
+   vs_consts->src_trans.y = src_area->y / (float)src_surface->height0;
    vs_consts->src_trans.z = 0;
    vs_consts->src_trans.w = 0;
 
diff --git a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
index c4ba69817f9..1934965995f 100644
--- a/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
+++ b/src/gallium/auxiliary/vl/vl_mpeg12_mc_renderer.c
@@ -681,7 +681,7 @@ xfer_buffers_map(struct vl_mpeg12_mc_renderer *r)
       (
          r->pipe->screen, r->textures.all[i],
          0, 0, 0, PIPE_TRANSFER_WRITE, 0, 0,
-         r->textures.all[i]->width[0], r->textures.all[i]->height[0]
+         r->textures.all[i]->width0, r->textures.all[i]->height0
       );
 
       r->texels[i] = r->pipe->screen->transfer_map(r->pipe->screen, r->tex_transfer[i]);
@@ -835,26 +835,26 @@ init_buffers(struct vl_mpeg12_mc_renderer *r)
    /* TODO: Accomodate HW that can't do this and also for cases when this isn't precise enough */
    template.format = PIPE_FORMAT_R16_SNORM;
    template.last_level = 0;
-   template.width[0] = r->pot_buffers ?
+   template.width0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_width) : r->picture_width;
-   template.height[0] = r->pot_buffers ?
+   template.height0 = r->pot_buffers ?
       util_next_power_of_two(r->picture_height) : r->picture_height;
-   template.depth[0] = 1;
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_DYNAMIC;
 
    r->textures.individual.y = r->pipe->screen->texture_create(r->pipe->screen, &template);
 
    if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_420) {
-      template.width[0] = r->pot_buffers ?
+      template.width0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_width / 2) :
          r->picture_width / 2;
-      template.height[0] = r->pot_buffers ?
+      template.height0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_height / 2) :
          r->picture_height / 2;
    }
    else if (r->chroma_format == PIPE_VIDEO_CHROMA_FORMAT_422)
-      template.height[0] = r->pot_buffers ?
+      template.height0 = r->pot_buffers ?
          util_next_power_of_two(r->picture_height / 2) :
          r->picture_height / 2;
 
@@ -1283,8 +1283,8 @@ flush(struct vl_mpeg12_mc_renderer *r)
       PIPE_BUFFER_USAGE_CPU_WRITE | PIPE_BUFFER_USAGE_DISCARD
    );
 
-   vs_consts->denorm.x = r->surface->width[0];
-   vs_consts->denorm.y = r->surface->height[0];
+   vs_consts->denorm.x = r->surface->width0;
+   vs_consts->denorm.y = r->surface->height0;
 
    pipe_buffer_unmap(r->pipe->screen, r->vs_const_buf.buffer);
 
@@ -1633,8 +1633,8 @@ vl_mpeg12_mc_renderer_render_macroblocks(struct vl_mpeg12_mc_renderer
       renderer->past = past;
       renderer->future = future;
       renderer->fence = fence;
-      renderer->surface_tex_inv_size.x = 1.0f / surface->width[0];
-      renderer->surface_tex_inv_size.y = 1.0f / surface->height[0];
+      renderer->surface_tex_inv_size.x = 1.0f / surface->width0;
+      renderer->surface_tex_inv_size.y = 1.0f / surface->height0;
    }
 
    while (num_macroblocks) {
diff --git a/src/gallium/drivers/cell/ppu/cell_state_emit.c b/src/gallium/drivers/cell/ppu/cell_state_emit.c
index d9bae5b641d..5b87286d4c5 100644
--- a/src/gallium/drivers/cell/ppu/cell_state_emit.c
+++ b/src/gallium/drivers/cell/ppu/cell_state_emit.c
@@ -27,6 +27,7 @@
 
 #include "pipe/p_inlines.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 #include "cell_context.h"
 #include "cell_gen_fragment.h"
 #include "cell_state.h"
@@ -299,9 +300,9 @@ cell_emit_state(struct cell_context *cell)
                for (level = 0; level < CELL_MAX_TEXTURE_LEVELS; level++) {
                   texture->start[level] = (ct->mapped +
                                            ct->level_offset[level]);
-                  texture->width[level] = ct->base.width[level];
-                  texture->height[level] = ct->base.height[level];
-                  texture->depth[level] = ct->base.depth[level];
+                  texture->width[level] = u_minify(ct->base.width0, level);
+                  texture->height[level] = u_minify(ct->base.height0, level);
+                  texture->depth[level] = u_minify(ct->base.depth0, level);
                }
                texture->target = ct->base.target;
             }
diff --git a/src/gallium/drivers/cell/ppu/cell_texture.c b/src/gallium/drivers/cell/ppu/cell_texture.c
index ae4c61efb3b..e6b8a870452 100644
--- a/src/gallium/drivers/cell/ppu/cell_texture.c
+++ b/src/gallium/drivers/cell/ppu/cell_texture.c
@@ -49,9 +49,9 @@ cell_texture_layout(struct cell_texture *ct)
 {
    struct pipe_texture *pt = &ct->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    ct->buffer_size = 0;
 
@@ -65,9 +65,6 @@ cell_texture_layout(struct cell_texture *ct)
       w_tile = align(width, TILE_SIZE);
       h_tile = align(height, TILE_SIZE);
 
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w_tile);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h_tile);  
 
@@ -83,9 +80,9 @@ cell_texture_layout(struct cell_texture *ct)
 
       ct->buffer_size += size;
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 }
 
@@ -276,8 +273,8 @@ cell_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = ct->level_offset[level];
       /* XXX may need to override usage flags (see sp_texture.c) */
       ps->usage = usage;
@@ -386,8 +383,8 @@ cell_transfer_map(struct pipe_screen *screen, struct pipe_transfer *transfer)
    struct pipe_texture *pt = transfer->texture;
    struct cell_texture *ct = cell_texture(pt);
    const uint level = ctrans->base.level;
-   const uint texWidth = pt->width[level];
-   const uint texHeight = pt->height[level];
+   const uint texWidth = u_minify(pt->width0, level);
+   const uint texHeight = u_minify(pt->height0, level);
    const uint stride = ct->stride[level];
    unsigned size;
 
@@ -440,8 +437,8 @@ cell_transfer_unmap(struct pipe_screen *screen,
    struct pipe_texture *pt = transfer->texture;
    struct cell_texture *ct = cell_texture(pt);
    const uint level = ctrans->base.level;
-   const uint texWidth = pt->width[level];
-   const uint texHeight = pt->height[level];
+   const uint texWidth = u_minify(pt->width0, level);
+   const uint texHeight = u_minify(pt->height0, level);
    const uint stride = ct->stride[level];
 
    if (!ct->mapped) {
diff --git a/src/gallium/drivers/i915/i915_debug.c b/src/gallium/drivers/i915/i915_debug.c
index e6640e587b4..c6e6d6fd313 100644
--- a/src/gallium/drivers/i915/i915_debug.c
+++ b/src/gallium/drivers/i915/i915_debug.c
@@ -851,6 +851,7 @@ static boolean i915_debug_packet( struct debug_stream *stream )
       default:
 	 return debug(stream, "", 0);
       }
+      break;
    default:
       assert(0);
       return 0;
diff --git a/src/gallium/drivers/i915/i915_state.c b/src/gallium/drivers/i915/i915_state.c
index 7d48e6e84d5..71f00bc346a 100644
--- a/src/gallium/drivers/i915/i915_state.c
+++ b/src/gallium/drivers/i915/i915_state.c
@@ -58,8 +58,10 @@ translate_wrap_mode(unsigned wrap)
       return TEXCOORDMODE_CLAMP_EDGE;
    case PIPE_TEX_WRAP_CLAMP_TO_BORDER:
       return TEXCOORDMODE_CLAMP_BORDER;
-//   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
-//      return TEXCOORDMODE_MIRROR;
+/*
+   case PIPE_TEX_WRAP_MIRRORED_REPEAT:
+      return TEXCOORDMODE_MIRROR;
+*/
    default:
       return TEXCOORDMODE_WRAP;
    }
diff --git a/src/gallium/drivers/i915/i915_state_sampler.c b/src/gallium/drivers/i915/i915_state_sampler.c
index c5e9084d12e..cbac4175c8f 100644
--- a/src/gallium/drivers/i915/i915_state_sampler.c
+++ b/src/gallium/drivers/i915/i915_state_sampler.c
@@ -231,7 +231,7 @@ i915_update_texture(struct i915_context *i915,
 {
    const struct pipe_texture *pt = &tex->base;
    uint format, pitch;
-   const uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+   const uint width = pt->width0, height = pt->height0, depth = pt->depth0;
    const uint num_levels = pt->last_level;
    unsigned max_lod = num_levels * 4;
    unsigned tiled = MS3_USE_FENCE_REGS;
diff --git a/src/gallium/drivers/i915/i915_texture.c b/src/gallium/drivers/i915/i915_texture.c
index 286c9ace8e5..c7b86dd4c57 100644
--- a/src/gallium/drivers/i915/i915_texture.c
+++ b/src/gallium/drivers/i915/i915_texture.c
@@ -105,10 +105,6 @@ i915_miptree_set_level_info(struct i915_texture *tex,
 
    assert(level < PIPE_MAX_TEXTURE_LEVELS);
 
-   pt->width[level] = w;
-   pt->height[level] = h;
-   pt->depth[level] = d;
-   
    pt->nblocksx[level] = pf_get_nblocksx(&pt->block, w);
    pt->nblocksy[level] = pf_get_nblocksy(&pt->block, h);
 
@@ -168,16 +164,16 @@ i915_scanout_layout(struct i915_texture *tex)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width[0],
-                               tex->base.height[0],
+                               tex->base.width0,
+                               tex->base.height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
-   if (tex->base.width[0] >= 240) {
+   if (tex->base.width0 >= 240) {
       tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
       tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
       tex->hw_tiled = INTEL_TILE_X;
-   } else if (tex->base.width[0] == 64 && tex->base.height[0] == 64) {
+   } else if (tex->base.width0 == 64 && tex->base.height0 == 64) {
       tex->stride = power_of_two(tex->base.nblocksx[0] * pt->block.size);
       tex->total_nblocksy = round_up(tex->base.nblocksy[0], 8);
    } else {
@@ -185,7 +181,7 @@ i915_scanout_layout(struct i915_texture *tex)
    }
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width[0], tex->base.height[0], pt->block.size,
+      tex->base.width0, tex->base.height0, pt->block.size,
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -203,12 +199,12 @@ i915_display_target_layout(struct i915_texture *tex)
       return FALSE;
 
    /* fallback to normal textures for small textures */
-   if (tex->base.width[0] < 240)
+   if (tex->base.width0 < 240)
       return FALSE;
 
    i915_miptree_set_level_info(tex, 0, 1,
-                               tex->base.width[0],
-                               tex->base.height[0],
+                               tex->base.width0,
+                               tex->base.height0,
                                1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
@@ -217,7 +213,7 @@ i915_display_target_layout(struct i915_texture *tex)
    tex->hw_tiled = INTEL_TILE_X;
 
    debug_printf("%s size: %d,%d,%d offset %d,%d (0x%x)\n", __FUNCTION__,
-      tex->base.width[0], tex->base.height[0], pt->block.size,
+      tex->base.width0, tex->base.height0, pt->block.size,
       tex->stride, tex->total_nblocksy, tex->stride * tex->total_nblocksy);
 
    return TRUE;
@@ -228,8 +224,8 @@ i915_miptree_layout_2d(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
 
@@ -254,8 +250,8 @@ i915_miptree_layout_2d(struct i915_texture *tex)
 
       tex->total_nblocksy += nblocksy;
 
-      width = minify(width);
-      height = minify(height);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -267,9 +263,9 @@ i915_miptree_layout_3d(struct i915_texture *tex)
    struct pipe_texture *pt = &tex->base;
    unsigned level;
 
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
    unsigned stack_nblocksy = 0;
@@ -285,36 +281,34 @@ i915_miptree_layout_3d(struct i915_texture *tex)
 
       stack_nblocksy += MAX2(2, nblocksy);
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
 
    /* Fixup depth image_offsets: 
     */
-   depth = pt->depth[0];
    for (level = 0; level <= pt->last_level; level++) {
       unsigned i;
       for (i = 0; i < depth; i++) 
          i915_miptree_set_image_offset(tex, level, i, 0, i * stack_nblocksy);
 
-      depth = minify(depth);
+      depth = u_minify(depth, 1);
    }
 
    /* Multiply slice size by texture depth for total size.  It's
     * remarkable how wasteful of memory the i915 texture layouts
     * are.  They are largely fixed in the i945.
     */
-   tex->total_nblocksy = stack_nblocksy * pt->depth[0];
+   tex->total_nblocksy = stack_nblocksy * pt->depth0;
 }
 
 static void
 i915_miptree_layout_cube(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
-   unsigned width = pt->width[0], height = pt->height[0];
+   unsigned width = pt->width0, height = pt->height0;
    const unsigned nblocks = pt->nblocksx[0];
    unsigned level;
    unsigned face;
@@ -383,8 +377,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
    unsigned level;
    unsigned x = 0;
    unsigned y = 0;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
 
@@ -407,8 +401,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
     */
    if (pt->last_level > 0) {
       unsigned mip1_nblocksx 
-         = align(pf_get_nblocksx(&pt->block, minify(width)), align_x)
-         + pf_get_nblocksx(&pt->block, minify(minify(width)));
+         = align(pf_get_nblocksx(&pt->block, u_minify(width, 1)), align_x)
+         + pf_get_nblocksx(&pt->block, u_minify(width, 2));
 
       if (mip1_nblocksx > nblocksx)
          tex->stride = mip1_nblocksx * pt->block.size;
@@ -439,8 +433,8 @@ i945_miptree_layout_2d(struct i915_texture *tex)
          y += nblocksy;
       }
 
-      width  = minify(width);
-      height = minify(height);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -450,9 +444,9 @@ static void
 i945_miptree_layout_3d(struct i915_texture *tex)
 {
    struct pipe_texture *pt = &tex->base;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
    unsigned nblocksx = pt->nblocksx[0];
    unsigned nblocksy = pt->nblocksy[0];
    unsigned pack_x_pitch, pack_x_nr;
@@ -495,9 +489,9 @@ i945_miptree_layout_3d(struct i915_texture *tex)
          pack_y_pitch >>= 1;
       }
 
-      width = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
       nblocksx = pf_get_nblocksx(&pt->block, width);
       nblocksy = pf_get_nblocksy(&pt->block, height);
    }
@@ -511,11 +505,11 @@ i945_miptree_layout_cube(struct i915_texture *tex)
 
    const unsigned nblocks = pt->nblocksx[0];
    unsigned face;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
 
    /*
-   printf("%s %i, %i\n", __FUNCTION__, pt->width[0], pt->height[0]);
+   printf("%s %i, %i\n", __FUNCTION__, pt->width0, pt->height0);
    */
 
    assert(width == height); /* cubemap images are square */
@@ -651,8 +645,8 @@ i915_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&tex->base.reference, 1);
    tex->base.screen = screen;
 
-   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width[0]);
-   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height[0]);
+   tex->base.nblocksx[0] = pf_get_nblocksx(&tex->base.block, tex->base.width0);
+   tex->base.nblocksy[0] = pf_get_nblocksy(&tex->base.block, tex->base.height0);
    
    if (is->is_i945) {
       if (!i945_miptree_layout(tex))
@@ -667,7 +661,7 @@ i915_texture_create(struct pipe_screen *screen,
 
 
    /* for scanouts and cursors, cursors arn't scanouts */
-   if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width[0] != 64)
+   if (templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY && templat->width0 != 64)
       buf_usage = INTEL_NEW_SCANOUT;
    else
       buf_usage = INTEL_NEW_TEXTURE;
@@ -710,7 +704,7 @@ i915_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -724,7 +718,7 @@ i915_texture_blanket(struct pipe_screen * screen,
 
    tex->stride = stride[0];
 
-   i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+   i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
    pipe_buffer_reference(&tex->buffer, buffer);
@@ -788,8 +782,8 @@ i915_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = offset;
       ps->usage = flags;
    }
@@ -919,7 +913,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -933,7 +927,7 @@ i915_texture_blanket_intel(struct pipe_screen *screen,
 
    tex->stride = stride;
 
-   i915_miptree_set_level_info(tex, 0, 1, base->width[0], base->height[0], 1);
+   i915_miptree_set_level_info(tex, 0, 1, base->width0, base->height0, 1);
    i915_miptree_set_image_offset(tex, 0, 0, 0, 0);
 
    tex->buffer = buffer;
diff --git a/src/gallium/drivers/i915/intel_winsys.h b/src/gallium/drivers/i915/intel_winsys.h
index 2c8dc63f3f9..c6bf6e6f7f1 100644
--- a/src/gallium/drivers/i915/intel_winsys.h
+++ b/src/gallium/drivers/i915/intel_winsys.h
@@ -42,21 +42,21 @@ enum intel_buffer_usage
    INTEL_USAGE_2D_TARGET = 0x04,
    INTEL_USAGE_2D_SOURCE = 0x08,
    /* use on vertex */
-   INTEL_USAGE_VERTEX    = 0x10,
+   INTEL_USAGE_VERTEX    = 0x10
 };
 
 enum intel_buffer_type
 {
    INTEL_NEW_TEXTURE,
    INTEL_NEW_SCANOUT, /**< a texture used for scanning out from */
-   INTEL_NEW_VERTEX,
+   INTEL_NEW_VERTEX
 };
 
 enum intel_buffer_tile
 {
    INTEL_TILE_NONE,
    INTEL_TILE_X,
-   INTEL_TILE_Y,
+   INTEL_TILE_Y
 };
 
 struct intel_batchbuffer {
diff --git a/src/gallium/drivers/identity/id_objects.c b/src/gallium/drivers/identity/id_objects.c
index e893e599408..bc9bc7121d5 100644
--- a/src/gallium/drivers/identity/id_objects.c
+++ b/src/gallium/drivers/identity/id_objects.c
@@ -180,3 +180,42 @@ identity_transfer_destroy(struct identity_transfer *id_transfer)
    screen->tex_transfer_destroy(id_transfer->transfer);
    FREE(id_transfer);
 }
+
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+                              struct pipe_video_surface *video_surface)
+{
+   struct identity_video_surface *id_video_surface;
+
+   if (!video_surface) {
+      goto error;
+   }
+
+   assert(video_surface->screen == id_screen->screen);
+
+   id_video_surface = CALLOC_STRUCT(identity_video_surface);
+   if (!id_video_surface) {
+      goto error;
+   }
+
+   memcpy(&id_video_surface->base,
+          video_surface,
+          sizeof(struct pipe_video_surface));
+
+   pipe_reference_init(&id_video_surface->base.reference, 1);
+   id_video_surface->base.screen = &id_screen->base;
+   id_video_surface->video_surface = video_surface;
+
+   return &id_video_surface->base;
+
+error:
+   pipe_video_surface_reference(&video_surface, NULL);
+   return NULL;
+}
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface)
+{
+   pipe_video_surface_reference(&id_video_surface->video_surface, NULL);
+   FREE(id_video_surface);
+}
diff --git a/src/gallium/drivers/identity/id_objects.h b/src/gallium/drivers/identity/id_objects.h
index ce58faa3c7c..77cc7190798 100644
--- a/src/gallium/drivers/identity/id_objects.h
+++ b/src/gallium/drivers/identity/id_objects.h
@@ -31,6 +31,7 @@
 
 #include "pipe/p_compiler.h"
 #include "pipe/p_state.h"
+#include "pipe/p_video_state.h"
 
 #include "id_screen.h"
 
@@ -67,6 +68,14 @@ struct identity_transfer
 };
 
 
+struct identity_video_surface
+{
+   struct pipe_video_surface base;
+
+   struct pipe_video_surface *video_surface;
+};
+
+
 static INLINE struct identity_buffer *
 identity_buffer(struct pipe_buffer *_buffer)
 {
@@ -103,6 +112,15 @@ identity_transfer(struct pipe_transfer *_transfer)
    return (struct identity_transfer *)_transfer;
 }
 
+static INLINE struct identity_video_surface *
+identity_video_surface(struct pipe_video_surface *_video_surface)
+{
+   if (!_video_surface) {
+      return NULL;
+   }
+   (void)identity_screen(_video_surface->screen);
+   return (struct identity_video_surface *)_video_surface;
+}
 
 static INLINE struct pipe_buffer *
 identity_buffer_unwrap(struct pipe_buffer *_buffer)
@@ -165,5 +183,12 @@ identity_transfer_create(struct identity_texture *id_texture,
 void
 identity_transfer_destroy(struct identity_transfer *id_transfer);
 
+struct pipe_video_surface *
+identity_video_surface_create(struct identity_screen *id_screen,
+                              struct pipe_video_surface *video_surface);
+
+void
+identity_video_surface_destroy(struct identity_video_surface *id_video_surface);
+
 
 #endif /* ID_OBJECTS_H */
diff --git a/src/gallium/drivers/identity/id_public.h b/src/gallium/drivers/identity/id_public.h
index cac14cfd604..3d2862eaa01 100644
--- a/src/gallium/drivers/identity/id_public.h
+++ b/src/gallium/drivers/identity/id_public.h
@@ -37,4 +37,4 @@ identity_screen_create(struct pipe_screen *screen);
 struct pipe_context *
 identity_context_create(struct pipe_screen *screen, struct pipe_context *pipe);
 
-#endif /* PT_PUBLIC_H */
+#endif /* ID_PUBLIC_H */
diff --git a/src/gallium/drivers/identity/id_screen.c b/src/gallium/drivers/identity/id_screen.c
index 26439637d08..53eae3ef544 100644
--- a/src/gallium/drivers/identity/id_screen.c
+++ b/src/gallium/drivers/identity/id_screen.c
@@ -379,6 +379,33 @@ identity_screen_buffer_destroy(struct pipe_buffer *_buffer)
    identity_buffer_destroy(identity_buffer(_buffer));
 }
 
+static struct pipe_video_surface *
+identity_screen_video_surface_create(struct pipe_screen *_screen,
+                                     enum pipe_video_chroma_format chroma_format,
+                                     unsigned width,
+                                     unsigned height)
+{
+   struct identity_screen *id_screen = identity_screen(_screen);
+   struct pipe_screen *screen = id_screen->screen;
+   struct pipe_video_surface *result;
+
+   result = screen->video_surface_create(screen,
+                                         chroma_format,
+                                         width,
+                                         height);
+
+   if (result) {
+      return identity_video_surface_create(id_screen, result);
+   }
+   return NULL;
+}
+
+static void
+identity_screen_video_surface_destroy(struct pipe_video_surface *_vsfc)
+{
+   identity_video_surface_destroy(identity_video_surface(_vsfc));
+}
+
 static void
 identity_screen_flush_frontbuffer(struct pipe_screen *_screen,
                                   struct pipe_surface *_surface,
@@ -472,6 +499,12 @@ identity_screen_create(struct pipe_screen *screen)
    if (screen->buffer_unmap)
       id_screen->base.buffer_unmap = identity_screen_buffer_unmap;
    id_screen->base.buffer_destroy = identity_screen_buffer_destroy;
+   if (screen->video_surface_create) {
+      id_screen->base.video_surface_create = identity_screen_video_surface_create;
+   }
+   if (screen->video_surface_destroy) {
+      id_screen->base.video_surface_destroy = identity_screen_video_surface_destroy;
+   }
    id_screen->base.flush_frontbuffer = identity_screen_flush_frontbuffer;
    id_screen->base.fence_reference = identity_screen_fence_reference;
    id_screen->base.fence_signalled = identity_screen_fence_signalled;
diff --git a/src/gallium/drivers/llvmpipe/README b/src/gallium/drivers/llvmpipe/README
index 89d08834a3c..0c3f00fd58f 100644
--- a/src/gallium/drivers/llvmpipe/README
+++ b/src/gallium/drivers/llvmpipe/README
@@ -51,21 +51,22 @@ Requirements
 
  - Linux
  
- - udis86, http://udis86.sourceforge.net/ . Use my repository, which decodes
-   opcodes not yet supported by upstream.
+ - A x86 or amd64 processor.  64bit mode is preferred.
  
-     git clone git://people.freedesktop.org/~jrfonseca/udis86
-     cd udis86
-     ./configure --with-pic
-     make
-     sudo make install
+   Support for sse2 is strongly encouraged.  Support for ssse3, and sse4.1 will
+   yield the most efficient code.  The less features the CPU has the more
+   likely is that you ran into underperforming, buggy, or incomplete code.  
+   
+   See /proc/cpuinfo to know what your CPU supports.
+ 
+ - LLVM 2.5 or greater. LLVM 2.6 is preferred.
  
- - LLVM 2.5. On Debian based distributions do:
+   On Debian based distributions do:
  
      aptitude install llvm-dev
 
-   There is a typo in one of the llvm-dev 2.5 headers, that causes compilation
-   errors in the debug build:
+   There is a typo in one of the llvm 2.5 headers, that may cause compilation
+   errors. To fix it apply the change:
 
      --- /usr/include/llvm-c/Core.h.orig	2009-08-10 15:38:54.000000000 +0100
      +++ /usr/include/llvm-c/Core.h	2009-08-10 15:38:25.000000000 +0100
@@ -79,12 +80,17 @@ Requirements
           #endif
           return reinterpret_cast<T**>(Vals);
  
- - A x86 or amd64 processor with support for sse2, sse3, and sse4.1 SIMD
-   instructions. This is necessary because we emit several SSE intrinsics for
-   convenience. See /proc/cpuinfo to know what your CPU supports.
- 
- - scons
+ - scons (optional)
 
+ - udis86, http://udis86.sourceforge.net/ (optional):
+ 
+     git clone git://udis86.git.sourceforge.net/gitroot/udis86/udis86
+     cd udis86
+     ./autogen.sh
+     ./configure --with-pic
+     make
+     sudo make install
+ 
 
 Building
 ========
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_depth.c b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
index 98ec1cb1b9d..d438c0e63d7 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_depth.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_depth.c
@@ -90,7 +90,7 @@ lp_depth_type(const struct util_format_description *format_desc,
 
    if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_FLOAT) {
       type.floating = TRUE;
-      assert(swizzle = 0);
+      assert(swizzle == 0);
       assert(format_desc->channel[swizzle].size == format_desc->block.bits);
    }
    else if(format_desc->channel[swizzle].type == UTIL_FORMAT_TYPE_UNSIGNED) {
diff --git a/src/gallium/drivers/llvmpipe/lp_bld_sample.c b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
index 4d272bea87e..af70ddc6ab9 100644
--- a/src/gallium/drivers/llvmpipe/lp_bld_sample.c
+++ b/src/gallium/drivers/llvmpipe/lp_bld_sample.c
@@ -59,9 +59,9 @@ lp_sampler_static_state(struct lp_sampler_static_state *state,
 
    state->format            = texture->format;
    state->target            = texture->target;
-   state->pot_width         = util_is_pot(texture->width[0]);
-   state->pot_height        = util_is_pot(texture->height[0]);
-   state->pot_depth         = util_is_pot(texture->depth[0]);
+   state->pot_width         = util_is_pot(texture->width0);
+   state->pot_height        = util_is_pot(texture->height0);
+   state->pot_depth         = util_is_pot(texture->depth0);
 
    state->wrap_s            = sampler->wrap_s;
    state->wrap_t            = sampler->wrap_t;
diff --git a/src/gallium/drivers/llvmpipe/lp_jit.c b/src/gallium/drivers/llvmpipe/lp_jit.c
index 13535dd638e..bce3baec164 100644
--- a/src/gallium/drivers/llvmpipe/lp_jit.c
+++ b/src/gallium/drivers/llvmpipe/lp_jit.c
@@ -154,6 +154,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
 #if 0
    /* For simulating less capable machines */
    util_cpu_caps.has_sse3 = 0;
+   util_cpu_caps.has_ssse3 = 0;
    util_cpu_caps.has_sse4_1 = 0;
 #endif
 
@@ -167,7 +168,7 @@ lp_jit_screen_init(struct llvmpipe_screen *screen)
    if (LLVMCreateJITCompiler(&screen->engine, screen->provider, 1, &error)) {
       _debug_printf("%s\n", error);
       LLVMDisposeMessage(error);
-      abort();
+      assert(0);
    }
 
    screen->target = LLVMGetExecutionEngineTargetData(screen->engine);
diff --git a/src/gallium/drivers/llvmpipe/lp_screen.c b/src/gallium/drivers/llvmpipe/lp_screen.c
index 05189274589..0fb133486aa 100644
--- a/src/gallium/drivers/llvmpipe/lp_screen.c
+++ b/src/gallium/drivers/llvmpipe/lp_screen.c
@@ -27,6 +27,7 @@
 
 
 #include "util/u_memory.h"
+#include "util/u_format.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_screen.h"
 
@@ -131,17 +132,17 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
 {
    struct llvmpipe_screen *screen = llvmpipe_screen(_screen);
    struct llvmpipe_winsys *winsys = screen->winsys;
+   const struct util_format_description *format_desc;
+
+   format_desc = util_format_description(format);
+   if(!format_desc)
+      return FALSE;
 
    assert(target == PIPE_TEXTURE_1D ||
           target == PIPE_TEXTURE_2D ||
           target == PIPE_TEXTURE_3D ||
           target == PIPE_TEXTURE_CUBE);
 
-   if(format == PIPE_FORMAT_Z16_UNORM)
-      return FALSE;
-   if(format == PIPE_FORMAT_S8_UNORM)
-      return FALSE;
-
    switch(format) {
    case PIPE_FORMAT_DXT1_RGB:
    case PIPE_FORMAT_DXT1_RGBA:
@@ -152,8 +153,51 @@ llvmpipe_is_format_supported( struct pipe_screen *_screen,
       break;
    }
 
-   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET)
-      return winsys->is_displaytarget_format_supported(winsys, format);
+   if(tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB)
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DISPLAY_TARGET) {
+      if(!winsys->is_displaytarget_format_supported(winsys, format))
+         return FALSE;
+   }
+
+   if(tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL) {
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+
+      /* FIXME: Temporary restriction. See lp_state_fs.c. */
+      if(format_desc->block.bits != 32)
+         return FALSE;
+   }
+
+   /* FIXME: Temporary restrictions. See lp_bld_sample_soa.c */
+   if(tex_usage & PIPE_TEXTURE_USAGE_SAMPLER) {
+      if(format_desc->block.width != 1 ||
+         format_desc->block.height != 1)
+         return FALSE;
+
+      if(format_desc->layout != UTIL_FORMAT_LAYOUT_SCALAR &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARITH &&
+         format_desc->layout != UTIL_FORMAT_LAYOUT_ARRAY)
+         return FALSE;
+
+      if(format_desc->colorspace != UTIL_FORMAT_COLORSPACE_RGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_SRGB &&
+         format_desc->colorspace != UTIL_FORMAT_COLORSPACE_ZS)
+         return FALSE;
+   }
 
    return TRUE;
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_state_fs.c b/src/gallium/drivers/llvmpipe/lp_state_fs.c
index 2e9aa9fffe3..ee0f69b2af9 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_fs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_fs.c
@@ -148,6 +148,20 @@ generate_depth(LLVMBuilderRef builder,
    format_desc = util_format_description(key->zsbuf_format);
    assert(format_desc);
 
+   /*
+    * Depths are expected to be between 0 and 1, even if they are stored in
+    * floats. Setting these bits here will ensure that the lp_build_conv() call
+    * below won't try to unnecessarily clamp the incoming values.
+    */
+   if(src_type.floating) {
+      src_type.sign = FALSE;
+      src_type.norm = TRUE;
+   }
+   else {
+      assert(!src_type.sign);
+      assert(src_type.norm);
+   }
+
    /* Pick the depth type. */
    dst_type = lp_depth_type(format_desc, src_type.width*src_type.length);
 
@@ -155,14 +169,11 @@ generate_depth(LLVMBuilderRef builder,
    assert(dst_type.width == src_type.width);
    assert(dst_type.length == src_type.length);
 
-#if 1
-   src = lp_build_clamped_float_to_unsigned_norm(builder,
-                                                 src_type,
-                                                 dst_type.width,
-                                                 src);
-#else
    lp_build_conv(builder, src_type, dst_type, &src, 1, &src, 1);
-#endif
+
+   dst_ptr = LLVMBuildBitCast(builder,
+                              dst_ptr,
+                              LLVMPointerType(lp_build_vec_type(dst_type), 0), "");
 
    lp_build_depth_test(builder,
                        &key->depth,
@@ -611,10 +622,12 @@ generate_fragment(struct llvmpipe_context *lp,
     * Translate the LLVM IR into machine code.
     */
 
+#ifdef DEBUG
    if(LLVMVerifyFunction(variant->function, LLVMPrintMessageAction)) {
       LLVMDumpValue(variant->function);
-      abort();
+      assert(0);
    }
+#endif
 
    LLVMRunFunctionPassManager(screen->pass, variant->function);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_state_sampler.c b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
index c69d90c723a..8333805a3fd 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_sampler.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_sampler.c
@@ -102,8 +102,8 @@ llvmpipe_set_sampler_textures(struct pipe_context *pipe,
       if(tex) {
          struct llvmpipe_texture *lp_tex = llvmpipe_texture(tex);
          struct lp_jit_texture *jit_tex = &llvmpipe->jit_context.textures[i];
-         jit_tex->width = tex->width[0];
-         jit_tex->height = tex->height[0];
+         jit_tex->width = tex->width0;
+         jit_tex->height = tex->height0;
          jit_tex->stride = lp_tex->stride[0];
          if(!lp_tex->dt)
             jit_tex->data = lp_tex->data;
diff --git a/src/gallium/drivers/llvmpipe/lp_state_vs.c b/src/gallium/drivers/llvmpipe/lp_state_vs.c
index 15c30296144..8a761648e7e 100644
--- a/src/gallium/drivers/llvmpipe/lp_state_vs.c
+++ b/src/gallium/drivers/llvmpipe/lp_state_vs.c
@@ -92,5 +92,6 @@ llvmpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
       (struct lp_vertex_shader *)vs;
 
    draw_delete_vertex_shader(llvmpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_cache.c b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
index 773e8482425..c7c4143bc62 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_cache.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_cache.c
@@ -36,6 +36,7 @@
 #include "util/u_memory.h"
 #include "util/u_tile.h"
 #include "util/u_format.h"
+#include "util/u_math.h"
 #include "lp_context.h"
 #include "lp_surface.h"
 #include "lp_texture.h"
@@ -270,8 +271,8 @@ lp_find_cached_tex_tile(struct llvmpipe_tex_tile_cache *tc,
                                      addr.bits.level, 
                                      addr.bits.z, 
                                      PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
+                                     u_minify(tc->texture->width0, addr.bits.level),
+                                     u_minify(tc->texture->height0, addr.bits.level));
 
          tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
 
diff --git a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
index a1365a045f1..0d01c07fb5e 100644
--- a/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
+++ b/src/gallium/drivers/llvmpipe/lp_tex_sample_c.c
@@ -544,7 +544,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float dsdy = s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT];
       dsdx = fabsf(dsdx);
       dsdy = fabsf(dsdy);
-      rho = MAX2(dsdx, dsdy) * texture->width[0];
+      rho = MAX2(dsdx, dsdy) * texture->width0;
    }
    if (t) {
       float dtdx = t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT];
@@ -552,7 +552,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dtdx = fabsf(dtdx);
       dtdy = fabsf(dtdy);
-      max = MAX2(dtdx, dtdy) * texture->height[0];
+      max = MAX2(dtdx, dtdy) * texture->height0;
       rho = MAX2(rho, max);
    }
    if (p) {
@@ -561,7 +561,7 @@ compute_lambda(struct tgsi_sampler *tgsi_sampler,
       float max;
       dpdx = fabsf(dpdx);
       dpdy = fabsf(dpdy);
-      max = MAX2(dpdx, dpdy) * texture->depth[0];
+      max = MAX2(dpdx, dpdy) * texture->depth0;
       rho = MAX2(rho, max);
    }
 
@@ -726,9 +726,9 @@ get_texel(const struct tgsi_sampler *tgsi_sampler,
    const struct pipe_texture *texture = samp->texture;
    const struct pipe_sampler_state *sampler = samp->sampler;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level) ||
+       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
       rgba[0][j] = sampler->border_color[0];
       rgba[1][j] = sampler->border_color[1];
       rgba[2][j] = sampler->border_color[2];
@@ -1093,8 +1093,8 @@ lp_get_samples_2d_common(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1250,9 +1250,9 @@ lp_get_samples_3d(struct tgsi_sampler *tgsi_sampler,
 
    assert(sampler->normalized_coords);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1394,8 +1394,8 @@ lp_get_samples_rect(struct tgsi_sampler *tgsi_sampler,
    /* texture RECTS cannot be mipmapped */
    assert(level0 == level1);
 
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
 
@@ -1513,8 +1513,8 @@ lp_get_samples(struct tgsi_sampler *tgsi_sampler,
 
    /* Do this elsewhere: 
     */
-   samp->xpot = util_unsigned_logbase2( samp->texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( samp->texture->height[0] );
+   samp->xpot = util_unsigned_logbase2( samp->texture->width0 );
+   samp->ypot = util_unsigned_logbase2( samp->texture->height0 );
 
    /* Try to hook in a faster sampler.  Ultimately we'll have to
     * code-generate these.  Luckily most of this looks like it is
diff --git a/src/gallium/drivers/llvmpipe/lp_texture.c b/src/gallium/drivers/llvmpipe/lp_texture.c
index a00f2495dfc..65d62fd0723 100644
--- a/src/gallium/drivers/llvmpipe/lp_texture.c
+++ b/src/gallium/drivers/llvmpipe/lp_texture.c
@@ -57,9 +57,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
 {
    struct pipe_texture *pt = &lpt->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    unsigned buffer_size = 0;
 
@@ -68,9 +68,6 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
    for (level = 0; level <= pt->last_level; level++) {
       unsigned nblocksx, nblocksy;
 
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);
 
@@ -87,9 +84,9 @@ llvmpipe_texture_layout(struct llvmpipe_screen *screen,
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       lpt->stride[level]);
 
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 
    lpt->data = align_malloc(buffer_size, 16);
@@ -104,13 +101,13 @@ llvmpipe_displaytarget_layout(struct llvmpipe_screen *screen,
    struct llvmpipe_winsys *winsys = screen->winsys;
 
    pf_get_block(lpt->base.format, &lpt->base.block);
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
 
    lpt->dt = winsys->displaytarget_create(winsys,
                                           lpt->base.format,
-                                          lpt->base.width[0],
-                                          lpt->base.height[0],
+                                          lpt->base.width0,
+                                          lpt->base.height0,
                                           16,
                                           &lpt->stride[0] );
 
@@ -172,7 +169,7 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -183,8 +180,8 @@ llvmpipe_texture_blanket(struct pipe_screen * screen,
    lpt->base = *base;
    pipe_reference_init(&lpt->base.reference, 1);
    lpt->base.screen = screen;
-   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width[0]);  
-   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height[0]);  
+   lpt->base.nblocksx[0] = pf_get_nblocksx(&lpt->base.block, lpt->base.width0);  
+   lpt->base.nblocksy[0] = pf_get_nblocksy(&lpt->base.block, lpt->base.height0);  
    lpt->stride[0] = stride[0];
 
    pipe_buffer_reference(&lpt->buffer, buffer);
@@ -229,8 +226,8 @@ llvmpipe_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = lpt->level_offset[level];
       ps->usage = usage;
 
diff --git a/src/gallium/drivers/nv04/nv04_fragtex.c b/src/gallium/drivers/nv04/nv04_fragtex.c
index 21f990fd536..0cce71ad1de 100644
--- a/src/gallium/drivers/nv04/nv04_fragtex.c
+++ b/src/gallium/drivers/nv04/nv04_fragtex.c
@@ -57,8 +57,8 @@ nv04_fragtex_build(struct nv04_context *nv04, int unit)
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ORIGIN_FOH_CORNER
 		| nv04_fragtex_format(pt->format)
 		| ( (pt->last_level + 1) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_MIPMAP_LEVELS_SHIFT )
-		| ( log2i(pt->width[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
-		| ( log2i(pt->height[0]) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
+		| ( log2i(pt->width0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_U_SHIFT )
+		| ( log2i(pt->height0) << NV04_DX5_TEXTURED_TRIANGLE_FORMAT_BASE_SIZE_V_SHIFT )
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSU_CLAMP_TO_EDGE
 		| NV04_DX5_TEXTURED_TRIANGLE_FORMAT_ADDRESSV_CLAMP_TO_EDGE
 		;
diff --git a/src/gallium/drivers/nv04/nv04_miptree.c b/src/gallium/drivers/nv04/nv04_miptree.c
index 93f752faec9..4fd72c82e62 100644
--- a/src/gallium/drivers/nv04/nv04_miptree.c
+++ b/src/gallium/drivers/nv04/nv04_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv04_context.h"
 #include "nv04_screen.h"
@@ -9,31 +10,29 @@ static void
 nv04_miptree_layout(struct nv04_miptree *nv04mt)
 {
 	struct pipe_texture *pt = &nv04mt->base;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l;
 
 	nr_faces = 1;
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
 
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 		
-		nv04mt->level[l].pitch = pt->width[0];
+		nv04mt->level[l].pitch = pt->width0;
 		nv04mt->level[l].pitch = (nv04mt->level[l].pitch + 63) & ~63;
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
 
 		nv04mt->level[l].image_offset = 
 			CALLOC(nr_faces, sizeof(unsigned));
-		offset += nv04mt->level[l].pitch * pt->height[l];
+		offset += nv04mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv04mt->total_size = offset;
@@ -75,7 +74,7 @@ nv04_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv04_miptree);
@@ -120,8 +119,8 @@ nv04_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv04/nv04_transfer.c b/src/gallium/drivers/nv04/nv04_transfer.c
index 6618660743d..e6456429f4e 100644
--- a/src/gallium/drivers/nv04/nv04_transfer.c
+++ b/src/gallium/drivers/nv04/nv04_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv04_context.h"
 #include "nv04_screen.h"
@@ -20,9 +21,9 @@ nv04_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv10/nv10_fragtex.c b/src/gallium/drivers/nv10/nv10_fragtex.c
index 27f2f875847..906fdfeeb93 100644
--- a/src/gallium/drivers/nv10/nv10_fragtex.c
+++ b/src/gallium/drivers/nv10/nv10_fragtex.c
@@ -62,9 +62,9 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 
 	txf  = tf->format << 8;
 	txf |= (pt->last_level + 1) << 16;
-	txf |= log2i(pt->width[0]) << 20;
-	txf |= log2i(pt->height[0]) << 24;
-	txf |= log2i(pt->depth[0]) << 28;
+	txf |= log2i(pt->width0) << 20;
+	txf |= log2i(pt->height0) << 24;
+	txf |= log2i(pt->depth0) << 28;
 	txf |= 8;
 
 	switch (pt->target) {
@@ -89,7 +89,7 @@ nv10_fragtex_build(struct nv10_context *nv10, int unit)
 	OUT_RING  (0x40000000); /* enable */
 	OUT_RING  (txs);
 	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width[0] << 16) | pt->height[0]);
+	OUT_RING  ((pt->width0 << 16) | pt->height0);
 	OUT_RING  (ps->bcol);
 #endif
 }
diff --git a/src/gallium/drivers/nv10/nv10_miptree.c b/src/gallium/drivers/nv10/nv10_miptree.c
index 34e3c2ebd77..b2a6c59b749 100644
--- a/src/gallium/drivers/nv10/nv10_miptree.c
+++ b/src/gallium/drivers/nv10/nv10_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv10_context.h"
 #include "nv10_screen.h"
@@ -10,7 +11,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 {
 	struct pipe_texture *pt = &nv10mt->base;
 	boolean swizzled = FALSE;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l, f;
 
@@ -21,8 +22,7 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
+
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
@@ -35,15 +35,15 @@ nv10_miptree_layout(struct nv10_miptree *nv10mt)
 		nv10mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 
 	}
 
 	for (f = 0; f < nr_faces; f++) {
 		for (l = 0; l <= pt->last_level; l++) {
 			nv10mt->level[l].image_offset[f] = offset;
-			offset += nv10mt->level[l].pitch * pt->height[l];
+			offset += nv10mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 	}
 
@@ -58,7 +58,7 @@ nv10_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv10_miptree);
@@ -133,8 +133,8 @@ nv10_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv10/nv10_transfer.c b/src/gallium/drivers/nv10/nv10_transfer.c
index 8feb85e4bda..ec54297ab01 100644
--- a/src/gallium/drivers/nv10/nv10_transfer.c
+++ b/src/gallium/drivers/nv10/nv10_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv10_context.h"
 #include "nv10_screen.h"
@@ -20,9 +21,9 @@ nv10_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv20/nv20_fragtex.c b/src/gallium/drivers/nv20/nv20_fragtex.c
index 495a7be9127..2db4a4015a6 100644
--- a/src/gallium/drivers/nv20/nv20_fragtex.c
+++ b/src/gallium/drivers/nv20/nv20_fragtex.c
@@ -62,9 +62,9 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 
 	txf  = tf->format << 8;
 	txf |= (pt->last_level + 1) << 16;
-	txf |= log2i(pt->width[0]) << 20;
-	txf |= log2i(pt->height[0]) << 24;
-	txf |= log2i(pt->depth[0]) << 28;
+	txf |= log2i(pt->width0) << 20;
+	txf |= log2i(pt->height0) << 24;
+	txf |= log2i(pt->depth0) << 28;
 	txf |= 8;
 
 	switch (pt->target) {
@@ -89,7 +89,7 @@ nv20_fragtex_build(struct nv20_context *nv20, int unit)
 	OUT_RING  (0x40000000); /* enable */
 	OUT_RING  (txs);
 	OUT_RING  (ps->filt | 0x2000 /* magic */);
-	OUT_RING  ((pt->width[0] << 16) | pt->height[0]);
+	OUT_RING  ((pt->width0 << 16) | pt->height0);
 	OUT_RING  (ps->bcol);
 #endif
 }
diff --git a/src/gallium/drivers/nv20/nv20_miptree.c b/src/gallium/drivers/nv20/nv20_miptree.c
index 185fbf53e0f..554e28e47dd 100644
--- a/src/gallium/drivers/nv20/nv20_miptree.c
+++ b/src/gallium/drivers/nv20/nv20_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv20_context.h"
 #include "nv20_screen.h"
@@ -9,7 +10,7 @@ static void
 nv20_miptree_layout(struct nv20_miptree *nv20mt)
 {
 	struct pipe_texture *pt = &nv20mt->base;
-	uint width = pt->width[0], height = pt->height[0];
+	uint width = pt->width0, height = pt->height0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -25,21 +26,19 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 	}
 	
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv20mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			nv20mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			nv20mt->level[l].pitch = pt->width[l] * pt->block.size;
+			nv20mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		nv20mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -47,14 +46,14 @@ nv20_miptree_layout(struct nv20_miptree *nv20mt)
 			nv20mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(nv20mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(nv20mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += nv20mt->level[l].pitch * pt->height[l];
+				offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		nv20mt->level[l].image_offset[f] = offset;
-		offset += nv20mt->level[l].pitch * pt->height[l];
+		offset += nv20mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv20mt->total_size = offset;
@@ -68,7 +67,7 @@ nv20_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv20_miptree);
@@ -100,8 +99,8 @@ nv20_miptree_create(struct pipe_screen *screen, const struct pipe_texture *pt)
 	mt->base.screen = screen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -167,8 +166,8 @@ nv20_miptree_surface_get(struct pipe_screen *screen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv20/nv20_transfer.c b/src/gallium/drivers/nv20/nv20_transfer.c
index 81b4f1a9177..87b5c14a3c2 100644
--- a/src/gallium/drivers/nv20/nv20_transfer.c
+++ b/src/gallium/drivers/nv20/nv20_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv20_context.h"
 #include "nv20_screen.h"
@@ -20,9 +21,9 @@ nv20_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv30/nv30_fragprog.c b/src/gallium/drivers/nv30/nv30_fragprog.c
index cc0385426c1..0ce702d6f84 100644
--- a/src/gallium/drivers/nv30/nv30_fragprog.c
+++ b/src/gallium/drivers/nv30/nv30_fragprog.c
@@ -4,6 +4,7 @@
 #include "pipe/p_inlines.h"
 
 #include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_dump.h"
 #include "tgsi/tgsi_parse.h"
 #include "tgsi/tgsi_util.h"
 
@@ -131,7 +132,7 @@ emit_src(struct nv30_fpc *fpc, int pos, struct nv30_sreg src)
 				sizeof(uint32_t) * 4);
 		}
 
-		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);	
+		sr |= (NV30_FP_REG_TYPE_CONST << NV30_FP_REG_TYPE_SHIFT);
 		break;
 	case NV30SR_NONE:
 		sr |= (NV30_FP_REG_TYPE_INPUT << NV30_FP_REG_TYPE_SHIFT);
@@ -666,7 +667,7 @@ nv30_fragprog_prepare(struct nv30_fpc *fpc)
 		{
 			struct tgsi_full_immediate *imm;
 			float vals[4];
-			
+
 			imm = &p.FullToken.FullImmediate;
 			assert(imm->Immediate.DataType == TGSI_IMM_FLOAT32);
 			assert(fpc->nr_imm < MAX_IMM);
@@ -754,7 +755,7 @@ nv30_fragprog_translate(struct nv30_context *nv30,
 	fp->insn[fpc->inst_offset + 1] = 0x00000000;
 	fp->insn[fpc->inst_offset + 2] = 0x00000000;
 	fp->insn[fpc->inst_offset + 3] = 0x00000000;
-	
+
 	fp->translated = TRUE;
 	fp->on_hw = FALSE;
 out_err:
@@ -838,7 +839,7 @@ nv30_fragprog_validate(struct nv30_context *nv30)
 update_constants:
 	if (fp->nr_consts) {
 		float *map;
-		
+
 		map = pipe_buffer_map(pscreen, constbuf,
 				      PIPE_BUFFER_USAGE_CPU_READ);
 		for (i = 0; i < fp->nr_consts; i++) {
diff --git a/src/gallium/drivers/nv30/nv30_fragtex.c b/src/gallium/drivers/nv30/nv30_fragtex.c
index dca760cae62..b3293ee700d 100644
--- a/src/gallium/drivers/nv30/nv30_fragtex.c
+++ b/src/gallium/drivers/nv30/nv30_fragtex.c
@@ -74,9 +74,9 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 
 	txf  = tf->format;
 	txf |= ((pt->last_level>0) ? NV34TCL_TX_FORMAT_MIPMAP : 0);
-	txf |= log2i(pt->width[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
-	txf |= log2i(pt->height[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
-	txf |= log2i(pt->depth[0]) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
+	txf |= log2i(pt->width0) << NV34TCL_TX_FORMAT_BASE_SIZE_U_SHIFT;
+	txf |= log2i(pt->height0) << NV34TCL_TX_FORMAT_BASE_SIZE_V_SHIFT;
+	txf |= log2i(pt->depth0) << NV34TCL_TX_FORMAT_BASE_SIZE_W_SHIFT;
 	txf |= NV34TCL_TX_FORMAT_NO_BORDER | 0x10000;
 
 	switch (pt->target) {
@@ -115,8 +115,8 @@ nv30_fragtex_build(struct nv30_context *nv30, int unit)
 	so_data  (so, NV34TCL_TX_ENABLE_ENABLE | ps->en);
 	so_data  (so, txs);
 	so_data  (so, ps->filt | 0x2000 /*voodoo*/);
-	so_data  (so, (pt->width[0] << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
-		       pt->height[0]);
+	so_data  (so, (pt->width0 << NV34TCL_TX_NPOT_SIZE_W_SHIFT) |
+		       pt->height0);
 	so_data  (so, ps->bcol);
 
 	return so;
diff --git a/src/gallium/drivers/nv30/nv30_miptree.c b/src/gallium/drivers/nv30/nv30_miptree.c
index 280696d4503..b4c306d1272 100644
--- a/src/gallium/drivers/nv30/nv30_miptree.c
+++ b/src/gallium/drivers/nv30/nv30_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv30_context.h"
 
@@ -8,7 +9,7 @@ static void
 nv30_miptree_layout(struct nv30_miptree *nv30mt)
 {
 	struct pipe_texture *pt = &nv30mt->base;
-	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +22,26 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 		nr_faces = 6;
 	} else
 	if (pt->target == PIPE_TEXTURE_3D) {
-		nr_faces = pt->depth[0];
+		nr_faces = pt->depth0;
 	} else {
 		nr_faces = 1;
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			nv30mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			nv30mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			nv30mt->level[l].pitch = pt->width[l] * pt->block.size;
+			nv30mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		nv30mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
-		depth  = MAX2(1, depth  >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -51,14 +49,14 @@ nv30_miptree_layout(struct nv30_miptree *nv30mt)
 			nv30mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(nv30mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(nv30mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += nv30mt->level[l].pitch * pt->height[l];
+				offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		nv30mt->level[l].image_offset[f] = offset;
-		offset += nv30mt->level[l].pitch * pt->height[l];
+		offset += nv30mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	nv30mt->total_size = offset;
@@ -79,8 +77,8 @@ nv30_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -134,7 +132,7 @@ nv30_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv30_miptree);
@@ -182,8 +180,8 @@ nv30_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv30/nv30_screen.c b/src/gallium/drivers/nv30/nv30_screen.c
index 221ae1b5f88..7cd36902eb4 100644
--- a/src/gallium/drivers/nv30/nv30_screen.c
+++ b/src/gallium/drivers/nv30/nv30_screen.c
@@ -116,7 +116,10 @@ nv30_screen_surface_format_supported(struct pipe_screen *pscreen,
 		case PIPE_FORMAT_Z24X8_UNORM:
 			return TRUE;
 		case PIPE_FORMAT_Z16_UNORM:
-			return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
+			if (front) {
+				return (front->format == PIPE_FORMAT_R5G6B5_UNORM);
+			}
+			return TRUE;
 		default:
 			break;
 		}
diff --git a/src/gallium/drivers/nv30/nv30_transfer.c b/src/gallium/drivers/nv30/nv30_transfer.c
index 98011decf7c..5e429b4d85c 100644
--- a/src/gallium/drivers/nv30/nv30_transfer.c
+++ b/src/gallium/drivers/nv30/nv30_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv30_context.h"
 #include "nv30_screen.h"
@@ -20,9 +21,9 @@ nv30_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv40/nv40_fragtex.c b/src/gallium/drivers/nv40/nv40_fragtex.c
index e2ec57564d1..44abc845969 100644
--- a/src/gallium/drivers/nv40/nv40_fragtex.c
+++ b/src/gallium/drivers/nv40/nv40_fragtex.c
@@ -117,11 +117,11 @@ nv40_fragtex_build(struct nv40_context *nv40, int unit)
 	so_data  (so, NV40TCL_TEX_ENABLE_ENABLE | ps->en);
 	so_data  (so, txs);
 	so_data  (so, ps->filt | tf->sign | 0x2000 /*voodoo*/);
-	so_data  (so, (pt->width[0] << NV40TCL_TEX_SIZE0_W_SHIFT) |
-		       pt->height[0]);
+	so_data  (so, (pt->width0 << NV40TCL_TEX_SIZE0_W_SHIFT) |
+		       pt->height0);
 	so_data  (so, ps->bcol);
 	so_method(so, nv40->screen->curie, NV40TCL_TEX_SIZE1(unit), 1);
-	so_data  (so, (pt->depth[0] << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
+	so_data  (so, (pt->depth0 << NV40TCL_TEX_SIZE1_DEPTH_SHIFT) | txp);
 
 	return so;
 }
diff --git a/src/gallium/drivers/nv40/nv40_miptree.c b/src/gallium/drivers/nv40/nv40_miptree.c
index 465dd3b0693..f73bedff6d8 100644
--- a/src/gallium/drivers/nv40/nv40_miptree.c
+++ b/src/gallium/drivers/nv40/nv40_miptree.c
@@ -1,6 +1,7 @@
 #include "pipe/p_state.h"
 #include "pipe/p_defines.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv40_context.h"
 
@@ -8,7 +9,7 @@ static void
 nv40_miptree_layout(struct nv40_miptree *mt)
 {
 	struct pipe_texture *pt = &mt->base;
-	uint width = pt->width[0], height = pt->height[0], depth = pt->depth[0];
+	uint width = pt->width0, height = pt->height0, depth = pt->depth0;
 	uint offset = 0;
 	int nr_faces, l, f;
 	uint wide_pitch = pt->tex_usage & (PIPE_TEXTURE_USAGE_SAMPLER |
@@ -21,29 +22,26 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 		nr_faces = 6;
 	} else
 	if (pt->target == PIPE_TEXTURE_3D) {
-		nr_faces = pt->depth[0];
+		nr_faces = pt->depth0;
 	} else {
 		nr_faces = 1;
 	}
 
 	for (l = 0; l <= pt->last_level; l++) {
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
 		if (wide_pitch && (pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR))
-			mt->level[l].pitch = align(pt->width[0] * pt->block.size, 64);
+			mt->level[l].pitch = align(pt->width0 * pt->block.size, 64);
 		else
-			mt->level[l].pitch = pt->width[l] * pt->block.size;
+			mt->level[l].pitch = u_minify(pt->width0, l) * pt->block.size;
 
 		mt->level[l].image_offset =
 			CALLOC(nr_faces, sizeof(unsigned));
 
-		width  = MAX2(1, width  >> 1);
-		height = MAX2(1, height >> 1);
-		depth  = MAX2(1, depth  >> 1);
+		width  = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth  = u_minify(depth, 1);
 	}
 
 	for (f = 0; f < nr_faces; f++) {
@@ -51,14 +49,14 @@ nv40_miptree_layout(struct nv40_miptree *mt)
 			mt->level[l].image_offset[f] = offset;
 
 			if (!(pt->tex_usage & NOUVEAU_TEXTURE_USAGE_LINEAR) &&
-			    pt->width[l + 1] > 1 && pt->height[l + 1] > 1)
-				offset += align(mt->level[l].pitch * pt->height[l], 64);
+			    u_minify(pt->width0, l + 1) > 1 && u_minify(pt->height0, l + 1) > 1)
+				offset += align(mt->level[l].pitch * u_minify(pt->height0, l), 64);
 			else
-				offset += mt->level[l].pitch * pt->height[l];
+				offset += mt->level[l].pitch * u_minify(pt->height0, l);
 		}
 
 		mt->level[l].image_offset[f] = offset;
-		offset += mt->level[l].pitch * pt->height[l];
+		offset += mt->level[l].pitch * u_minify(pt->height0, l);
 	}
 
 	mt->total_size = offset;
@@ -79,8 +77,8 @@ nv40_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *pt)
 	mt->base.screen = pscreen;
 
 	/* Swizzled textures must be POT */
-	if (pt->width[0] & (pt->width[0] - 1) ||
-	    pt->height[0] & (pt->height[0] - 1))
+	if (pt->width0 & (pt->width0 - 1) ||
+	    pt->height0 & (pt->height0 - 1))
 		mt->base.tex_usage |= NOUVEAU_TEXTURE_USAGE_LINEAR;
 	else
 	if (pt->tex_usage & (PIPE_TEXTURE_USAGE_PRIMARY |
@@ -128,7 +126,7 @@ nv40_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv40_miptree);
@@ -176,8 +174,8 @@ nv40_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ns->base.texture, pt);
 	ns->base.format = pt->format;
-	ns->base.width = pt->width[level];
-	ns->base.height = pt->height[level];
+	ns->base.width = u_minify(pt->width0, level);
+	ns->base.height = u_minify(pt->height0, level);
 	ns->base.usage = flags;
 	pipe_reference_init(&ns->base.reference, 1);
 	ns->base.face = face;
diff --git a/src/gallium/drivers/nv40/nv40_transfer.c b/src/gallium/drivers/nv40/nv40_transfer.c
index 92caee6f382..36e253c96f9 100644
--- a/src/gallium/drivers/nv40/nv40_transfer.c
+++ b/src/gallium/drivers/nv40/nv40_transfer.c
@@ -2,6 +2,7 @@
 #include <pipe/p_defines.h>
 #include <pipe/p_inlines.h>
 #include <util/u_memory.h>
+#include <util/u_math.h>
 #include <nouveau/nouveau_winsys.h>
 #include "nv40_context.h"
 #include "nv40_screen.h"
@@ -20,9 +21,9 @@ nv40_compatible_transfer_tex(struct pipe_texture *pt, unsigned level,
 	memset(template, 0, sizeof(struct pipe_texture));
 	template->target = pt->target;
 	template->format = pt->format;
-	template->width[0] = pt->width[level];
-	template->height[0] = pt->height[level];
-	template->depth[0] = 1;
+	template->width0 = u_minify(pt->width0, level);
+	template->height0 = u_minify(pt->height0, level);
+	template->depth0 = 1;
 	template->block = pt->block;
 	template->nblocksx[0] = pt->nblocksx[level];
 	template->nblocksy[0] = pt->nblocksx[level];
diff --git a/src/gallium/drivers/nv50/nv50_miptree.c b/src/gallium/drivers/nv50/nv50_miptree.c
index 9c20c5cc282..3d58746793f 100644
--- a/src/gallium/drivers/nv50/nv50_miptree.c
+++ b/src/gallium/drivers/nv50/nv50_miptree.c
@@ -61,8 +61,8 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	struct nouveau_device *dev = nouveau_screen(pscreen)->device;
 	struct nv50_miptree *mt = CALLOC_STRUCT(nv50_miptree);
 	struct pipe_texture *pt = &mt->base.base;
-	unsigned width = tmp->width[0], height = tmp->height[0];
-	unsigned depth = tmp->depth[0], image_alignment;
+	unsigned width = tmp->width0, height = tmp->height0;
+	unsigned depth = tmp->depth0, image_alignment;
 	uint32_t tile_flags;
 	int ret, i, l;
 
@@ -92,9 +92,6 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 	for (l = 0; l <= pt->last_level; l++) {
 		struct nv50_miptree_level *lvl = &mt->level[l];
 
-		pt->width[l] = width;
-		pt->height[l] = height;
-		pt->depth[l] = depth;
 		pt->nblocksx[l] = pf_get_nblocksx(&pt->block, width);
 		pt->nblocksy[l] = pf_get_nblocksy(&pt->block, height);
 
@@ -102,9 +99,9 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 		lvl->pitch = align(pt->nblocksx[l] * pt->block.size, 64);
 		lvl->tile_mode = get_tile_mode(pt->nblocksy[l], depth);
 
-		width = MAX2(1, width >> 1);
-		height = MAX2(1, height >> 1);
-		depth = MAX2(1, depth >> 1);
+		width = u_minify(width, 1);
+		height = u_minify(height, 1);
+		depth = u_minify(depth, 1);
 	}
 
 	image_alignment  = get_tile_height(mt->level[0].tile_mode) * 64;
@@ -122,7 +119,7 @@ nv50_miptree_create(struct pipe_screen *pscreen, const struct pipe_texture *tmp)
 
 			size  = lvl->pitch;
 			size *= align(pt->nblocksy[l], tile_h);
-			size *= align(pt->depth[l], tile_d);
+			size *= align(u_minify(pt->depth0, l), tile_d);
 
 			lvl->image_offset[i] = mt->total_size;
 
@@ -151,7 +148,7 @@ nv50_miptree_blanket(struct pipe_screen *pscreen, const struct pipe_texture *pt,
 
 	/* Only supports 2D, non-mipmapped textures for the moment */
 	if (pt->target != PIPE_TEXTURE_2D || pt->last_level != 0 ||
-	    pt->depth[0] != 1)
+	    pt->depth0 != 1)
 		return NULL;
 
 	mt = CALLOC_STRUCT(nv50_miptree);
@@ -202,8 +199,8 @@ nv50_miptree_surface_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 		return NULL;
 	pipe_texture_reference(&ps->texture, pt);
 	ps->format = pt->format;
-	ps->width = pt->width[level];
-	ps->height = pt->height[level];
+	ps->width = u_minify(pt->width0, level);
+	ps->height = u_minify(pt->height0, level);
 	ps->usage = flags;
 	pipe_reference_init(&ps->reference, 1);
 	ps->face = face;
diff --git a/src/gallium/drivers/nv50/nv50_program.c b/src/gallium/drivers/nv50/nv50_program.c
index bf50982dd16..855079f2939 100644
--- a/src/gallium/drivers/nv50/nv50_program.c
+++ b/src/gallium/drivers/nv50/nv50_program.c
@@ -2914,7 +2914,7 @@ nv50_fp_move_results(struct nv50_pc *pc)
 static void
 nv50_program_fixup_insns(struct nv50_pc *pc)
 {
-	struct nv50_program_exec *e, *prev = NULL, **bra_list;
+	struct nv50_program_exec *e, **bra_list;
 	unsigned i, n, pos;
 
 	bra_list = CALLOC(pc->p->exec_size, sizeof(struct nv50_program_exec *));
@@ -2926,6 +2926,16 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 		if (e->param.index >= 0 && !e->param.mask)
 			bra_list[n++] = e;
 
+	/* last instruction must be long so it can have the exit bit set */
+	if (!is_long(pc->p->exec_tail))
+		convert_to_long(pc, pc->p->exec_tail);
+	/* set exit bit */
+	pc->p->exec_tail->inst[1] |= 1;
+
+	/* !immd on exit insn simultaneously means !join */
+	assert(!is_immd(pc->p->exec_head));
+	assert(!is_immd(pc->p->exec_tail));
+
 	/* Make sure we don't have any single 32 bit instructions. */
 	for (e = pc->p->exec_head, pos = 0; e; e = e->next) {
 		pos += is_long(e) ? 2 : 1;
@@ -2937,23 +2947,8 @@ nv50_program_fixup_insns(struct nv50_pc *pc)
 			convert_to_long(pc, e);
 			++pos;
 		}
-		if (e->next)
-			prev = e;
 	}
 
-	assert(!is_immd(pc->p->exec_head));
-	assert(!is_immd(pc->p->exec_tail));
-
-	/* last instruction must be long so it can have the end bit set */
-	if (!is_long(pc->p->exec_tail)) {
-		convert_to_long(pc, pc->p->exec_tail);
-		if (prev)
-			convert_to_long(pc, prev);
-	}
-	assert(!(pc->p->exec_tail->inst[1] & 2));
-	/* set the end-bit */
-	pc->p->exec_tail->inst[1] |= 1;
-
 	FREE(bra_list);
 }
 
diff --git a/src/gallium/drivers/nv50/nv50_state_validate.c b/src/gallium/drivers/nv50/nv50_state_validate.c
index 799d2758fee..19b8ef07b5e 100644
--- a/src/gallium/drivers/nv50/nv50_state_validate.c
+++ b/src/gallium/drivers/nv50/nv50_state_validate.c
@@ -285,7 +285,7 @@ nv50_state_validate(struct nv50_context *nv50)
 		so = so_new(33, 0);
 		so_method(so, tesla, NV50TCL_POLYGON_STIPPLE_PATTERN(0), 32);
 		for (i = 0; i < 32; i++)
-			so_data(so, nv50->stipple.stipple[i]);
+			so_data(so, util_bswap32(nv50->stipple.stipple[i]));
 		so_ref(so, &nv50->state.stipple);
 		so_ref(NULL, &so);
 	}
diff --git a/src/gallium/drivers/nv50/nv50_tex.c b/src/gallium/drivers/nv50/nv50_tex.c
index 2813f544770..417d3679422 100644
--- a/src/gallium/drivers/nv50/nv50_tex.c
+++ b/src/gallium/drivers/nv50/nv50_tex.c
@@ -131,9 +131,9 @@ nv50_tex_construct(struct nv50_context *nv50, struct nouveau_stateobj *so,
 		 NOUVEAU_BO_RD, 0, 0);
 	so_data (so, mode);
 	so_data (so, 0x00300000);
-	so_data (so, mt->base.base.width[0] | (1 << 31));
+	so_data (so, mt->base.base.width0 | (1 << 31));
 	so_data (so, (mt->base.base.last_level << 28) |
-		 (mt->base.base.depth[0] << 16) | mt->base.base.height[0]);
+		 (mt->base.base.depth0 << 16) | mt->base.base.height0);
 	so_data (so, 0x03000000);
 	so_data (so, mt->base.base.last_level << 4);
 
diff --git a/src/gallium/drivers/nv50/nv50_transfer.c b/src/gallium/drivers/nv50/nv50_transfer.c
index ea61357aaa6..39d65279fc0 100644
--- a/src/gallium/drivers/nv50/nv50_transfer.c
+++ b/src/gallium/drivers/nv50/nv50_transfer.c
@@ -1,6 +1,7 @@
 
 #include "pipe/p_context.h"
 #include "pipe/p_inlines.h"
+#include "util/u_math.h"
 
 #include "nv50_context.h"
 
@@ -156,9 +157,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->base.block = pt->block;
 	if (!pt->nblocksx[level]) {
 		tx->base.nblocksx = pf_get_nblocksx(&pt->block,
-						    pt->width[level]);
+						    u_minify(pt->width0, level));
 		tx->base.nblocksy = pf_get_nblocksy(&pt->block,
-						    pt->height[level]);
+						    u_minify(pt->height0, level));
 	} else {
 		tx->base.nblocksx = pt->nblocksx[level];
 		tx->base.nblocksy = pt->nblocksy[level];
@@ -167,9 +168,9 @@ nv50_transfer_new(struct pipe_screen *pscreen, struct pipe_texture *pt,
 	tx->base.usage = usage;
 
 	tx->level_pitch = lvl->pitch;
-	tx->level_width = mt->base.base.width[level];
-	tx->level_height = mt->base.base.height[level];
-	tx->level_depth = mt->base.base.depth[level];
+	tx->level_width = u_minify(mt->base.base.width0, level);
+	tx->level_height = u_minify(mt->base.base.height0, level);
+	tx->level_depth = u_minify(mt->base.base.depth0, level);
 	tx->level_offset = lvl->image_offset[image];
 	tx->level_tiling = lvl->tile_mode;
 	tx->level_x = pf_get_nblocksx(&tx->base.block, x);
diff --git a/src/gallium/drivers/r300/r300_context.c b/src/gallium/drivers/r300/r300_context.c
index ae23329b83f..769733b6dd9 100644
--- a/src/gallium/drivers/r300/r300_context.c
+++ b/src/gallium/drivers/r300/r300_context.c
@@ -69,6 +69,7 @@ static void r300_destroy_context(struct pipe_context* context)
     FREE(r300->blend_color_state);
     FREE(r300->rs_block);
     FREE(r300->scissor_state);
+    FREE(r300->vertex_info);
     FREE(r300->viewport_state);
     FREE(r300);
 }
@@ -123,15 +124,24 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
 
     r300->context.clear = r300_clear;
 
-    if (r300screen->caps->has_tcl)
-    {
+    if (r300screen->caps->has_tcl) {
         r300->context.draw_arrays = r300_draw_arrays;
         r300->context.draw_elements = r300_draw_elements;
         r300->context.draw_range_elements = r300_draw_range_elements;
-    }
-    else
-    {
-        assert(0);
+    } else {
+        r300->context.draw_arrays = r300_swtcl_draw_arrays;
+        r300->context.draw_elements = r300_draw_elements;
+        r300->context.draw_range_elements = r300_swtcl_draw_range_elements;
+
+        /* Create a Draw. This is used for SW TCL. */
+        r300->draw = draw_create();
+        /* Enable our renderer. */
+        draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
+        /* Enable Draw's clipping. */
+        draw_set_driver_clipping(r300->draw, FALSE);
+        /* Force Draw to never do viewport transform, since we can do
+         * transform in hardware, always. */
+        draw_set_viewport_state(r300->draw, &r300_viewport_identity);
     }
 
     r300->context.is_texture_referenced = r300_is_texture_referenced;
@@ -143,18 +153,9 @@ struct pipe_context* r300_create_context(struct pipe_screen* screen,
     r300->blend_color_state = CALLOC_STRUCT(r300_blend_color_state);
     r300->rs_block = CALLOC_STRUCT(r300_rs_block);
     r300->scissor_state = CALLOC_STRUCT(r300_scissor_state);
+    r300->vertex_info = CALLOC_STRUCT(r300_vertex_info);
     r300->viewport_state = CALLOC_STRUCT(r300_viewport_state);
 
-    /* Create a Draw. This is used for vert collation and SW TCL. */
-    r300->draw = draw_create();
-    /* Enable our renderer. */
-    draw_set_rasterize_stage(r300->draw, r300_draw_stage(r300));
-    /* Disable Draw's clipping if TCL is present. */
-    draw_set_driver_clipping(r300->draw, r300_screen(screen)->caps->has_tcl);
-    /* Force Draw to never do viewport transform, since (again) we can do
-     * transform in hardware, always. */
-    draw_set_viewport_state(r300->draw, &r300_viewport_identity);
-
     /* Open up the OQ BO. */
     r300->oqbo = screen->buffer_create(screen, 4096,
             PIPE_BUFFER_USAGE_VERTEX, 4096);
diff --git a/src/gallium/drivers/r300/r300_context.h b/src/gallium/drivers/r300/r300_context.h
index f954ba7f9aa..39c0914cffa 100644
--- a/src/gallium/drivers/r300/r300_context.h
+++ b/src/gallium/drivers/r300/r300_context.h
@@ -92,6 +92,10 @@ struct r300_sampler_state {
     uint32_t filter0;      /* R300_TX_FILTER0: 0x4400 */
     uint32_t filter1;      /* R300_TX_FILTER1: 0x4440 */
     uint32_t border_color; /* R300_TX_BORDER_COLOR: 0x45c0 */
+
+    /* Min/max LOD must be clamped to [0, last_level], thus
+     * it's dependent on a currently bound texture */
+    unsigned min_lod, max_lod;
 };
 
 struct r300_scissor_state {
@@ -219,11 +223,6 @@ struct r300_texture {
 struct r300_vertex_info {
     /* Parent class */
     struct vertex_info vinfo;
-    /* Map of vertex attributes into PVS memory for HW TCL,
-     * or GA memory for SW TCL. */
-    int vs_tab[16];
-    /* Map of rasterizer attributes from GB through RS to US. */
-    int fs_tab[16];
 
     /* R300_VAP_PROG_STREAK_CNTL_[0-7] */
     uint32_t vap_prog_stream_cntl[8];
diff --git a/src/gallium/drivers/r300/r300_emit.c b/src/gallium/drivers/r300/r300_emit.c
index eeb97a2d370..98a39390bf9 100644
--- a/src/gallium/drivers/r300/r300_emit.c
+++ b/src/gallium/drivers/r300/r300_emit.c
@@ -129,7 +129,9 @@ static const float * get_shader_constant(
     struct rc_constant * constant,
     struct r300_constant_buffer * externals)
 {
-    static const float zero[4] = { 0.0, 0.0, 0.0, 0.0 };
+    static float vec[4] = { 0.0, 0.0, 0.0, 1.0 };
+    struct pipe_texture *tex;
+
     switch(constant->Type) {
         case RC_CONSTANT_EXTERNAL:
             return externals->constants[constant->u.External];
@@ -137,11 +139,31 @@ static const float * get_shader_constant(
         case RC_CONSTANT_IMMEDIATE:
             return constant->u.Immediate;
 
+        case RC_CONSTANT_STATE:
+            switch (constant->u.State[0]) {
+                /* Factor for converting rectangle coords to
+                 * normalized coords. Should only show up on non-r500. */
+                case RC_STATE_R300_TEXRECT_FACTOR:
+                    tex = &r300->textures[constant->u.State[1]]->tex;
+                    vec[0] = 1.0 / tex->width0;
+                    vec[1] = 1.0 / tex->height0;
+                    break;
+
+                default:
+                    debug_printf("r300: Implementation error: "
+                        "Unknown RC_CONSTANT type %d\n", constant->u.State[0]);
+            }
+            break;
+
         default:
-            debug_printf("r300: Implementation error: Unhandled constant type %i\n",
-                constant->Type);
-            return zero;
+            debug_printf("r300: Implementation error: "
+                "Unhandled constant type %d\n", constant->Type);
     }
+
+    /* This should either be (0, 0, 0, 1), which should be a relatively safe
+     * RGBA or STRQ value, or it could be one of the RC_CONSTANT_STATE
+     * state factors. */
+    return vec;
 }
 
 /* Convert a normal single-precision float into the 7.16 format
@@ -561,6 +583,8 @@ void r300_emit_texture(struct r300_context* r300,
                        unsigned offset)
 {
     uint32_t filter0 = sampler->filter0;
+    uint32_t format0 = tex->state.format0;
+    unsigned min_level, max_level;
     CS_LOCALS(r300);
 
     /* to emulate 1D textures through 2D ones correctly */
@@ -569,13 +593,20 @@ void r300_emit_texture(struct r300_context* r300,
         filter0 |= R300_TX_WRAP_T(R300_TX_CLAMP_TO_EDGE);
     }
 
+    /* determine min/max levels */
+    /* the MAX_MIP level is the largest (finest) one */
+    max_level = MIN2(sampler->max_lod, tex->tex.last_level);
+    min_level = MIN2(sampler->min_lod, max_level);
+    format0 |= R300_TX_NUM_LEVELS(max_level);
+    filter0 |= R300_TX_MAX_MIP_LEVEL(min_level);
+
     BEGIN_CS(16);
     OUT_CS_REG(R300_TX_FILTER0_0 + (offset * 4), filter0 |
         (offset << 28));
     OUT_CS_REG(R300_TX_FILTER1_0 + (offset * 4), sampler->filter1);
     OUT_CS_REG(R300_TX_BORDER_COLOR_0 + (offset * 4), sampler->border_color);
 
-    OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), tex->state.format0);
+    OUT_CS_REG(R300_TX_FORMAT0_0 + (offset * 4), format0);
     OUT_CS_REG(R300_TX_FORMAT1_0 + (offset * 4), tex->state.format1);
     OUT_CS_REG(R300_TX_FORMAT2_0 + (offset * 4), tex->state.format2);
     OUT_CS_REG_SEQ(R300_TX_OFFSET_0 + (offset * 4), 1);
@@ -690,12 +721,22 @@ void r300_emit_vertex_format_state(struct r300_context* r300)
     END_CS;
 }
 
+
 void r300_emit_vertex_program_code(struct r300_context* r300,
                                    struct r300_vertex_program_code* code)
 {
     int i;
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
     unsigned instruction_count = code->length / 4;
+
+    int vtx_mem_size = r300screen->caps->is_r500 ? 128 : 72;
+    int input_count = MAX2(util_bitcount(code->InputsRead), 1);
+    int output_count = MAX2(util_bitcount(code->OutputsWritten), 1);
+    int temp_count = MAX2(code->num_temporaries, 1);
+    int pvs_num_slots = MIN3(vtx_mem_size / input_count,
+                             vtx_mem_size / output_count, 10);
+    int pvs_num_controllers = MIN2(vtx_mem_size / temp_count, 6);
+
     CS_LOCALS(r300);
 
     if (!r300screen->caps->has_tcl) {
@@ -708,8 +749,7 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     /* R300_VAP_PVS_CODE_CNTL_0
      * R300_VAP_PVS_CONST_CNTL
      * R300_VAP_PVS_CODE_CNTL_1
-     * See the r5xx docs for instructions on how to use these.
-     * XXX these could be optimized to select better values... */
+     * See the r5xx docs for instructions on how to use these. */
     OUT_CS_REG_SEQ(R300_VAP_PVS_CODE_CNTL_0, 3);
     OUT_CS(R300_PVS_FIRST_INST(0) |
             R300_PVS_XYZW_VALID_INST(instruction_count - 1) |
@@ -722,10 +762,11 @@ void r300_emit_vertex_program_code(struct r300_context* r300,
     for (i = 0; i < code->length; i++)
         OUT_CS(code->body.d[i]);
 
-    OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(10) |
-            R300_PVS_NUM_CNTLRS(5) |
+    OUT_CS_REG(R300_VAP_CNTL, R300_PVS_NUM_SLOTS(pvs_num_slots) |
+            R300_PVS_NUM_CNTLRS(pvs_num_controllers) |
             R300_PVS_NUM_FPUS(r300screen->caps->num_vert_fpus) |
-            R300_PVS_VF_MAX_VTX_NUM(12));
+            R300_PVS_VF_MAX_VTX_NUM(12) |
+            (r300screen->caps->is_r500 ? R500_TCL_STATE_OPTIMIZATION : 0));
     END_CS;
 }
 
@@ -790,13 +831,22 @@ void r300_emit_viewport_state(struct r300_context* r300,
     END_CS;
 }
 
+void r300_emit_texture_count(struct r300_context* r300)
+{
+    CS_LOCALS(r300);
+
+    BEGIN_CS(2);
+    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
+    END_CS;
+
+}
+
 void r300_flush_textures(struct r300_context* r300)
 {
     CS_LOCALS(r300);
 
-    BEGIN_CS(4);
+    BEGIN_CS(2);
     OUT_CS_REG(R300_TX_INVALTAGS, 0);
-    OUT_CS_REG(R300_TX_ENABLE, (1 << r300->texture_count) - 1);
     END_CS;
 }
 
@@ -950,6 +1000,8 @@ validate:
     /* Samplers and textures are tracked separately but emitted together. */
     if (r300->dirty_state &
             (R300_ANY_NEW_SAMPLERS | R300_ANY_NEW_TEXTURES)) {
+        r300_emit_texture_count(r300);
+
         for (i = 0; i < MIN2(r300->sampler_count, r300->texture_count); i++) {
   	    if (r300->dirty_state &
 		((R300_NEW_SAMPLER << i) | (R300_NEW_TEXTURE << i))) {
diff --git a/src/gallium/drivers/r300/r300_emit.h b/src/gallium/drivers/r300/r300_emit.h
index 7c83c5166de..3797d3d332a 100644
--- a/src/gallium/drivers/r300/r300_emit.h
+++ b/src/gallium/drivers/r300/r300_emit.h
@@ -92,6 +92,8 @@ void r300_emit_vertex_shader(struct r300_context* r300,
 void r300_emit_viewport_state(struct r300_context* r300,
                               struct r300_viewport_state* viewport);
 
+void r300_emit_texture_count(struct r300_context* r300);
+
 void r300_flush_textures(struct r300_context* r300);
 
 /* Emit all dirty state. */
diff --git a/src/gallium/drivers/r300/r300_reg.h b/src/gallium/drivers/r300/r300_reg.h
index 8ca785cb587..3a419b24b01 100644
--- a/src/gallium/drivers/r300/r300_reg.h
+++ b/src/gallium/drivers/r300/r300_reg.h
@@ -1293,7 +1293,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #        define R500_RS_INST_TEX_ID(x)                  ((x) << 0)
 #define R500_RS_INST_TEX_CN_WRITE			(1 << 4)
 #define R500_RS_INST_TEX_ADDR_SHIFT			5
-#        define R500_RS_INST_TEX_ADDR(x)                ((x) << 0)
+#        define R500_RS_INST_TEX_ADDR(x)                ((x) << 5)
 #define R500_RS_INST_COL_ID_SHIFT			12
 #        define R500_RS_INST_COL_ID(x)                  ((x) << 12)
 #define R500_RS_INST_COL_CN_NO_WRITE			(0 << 16)
@@ -1463,6 +1463,8 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_TX_MIN_FILTER_MIP_NEAREST    (1 << 13)
 #	define R300_TX_MIN_FILTER_MIP_LINEAR     (2 << 13)
 #	define R300_TX_MIN_FILTER_MIP_MASK       (3 << 13)
+#       define R300_TX_MAX_MIP_LEVEL_SHIFT       17
+#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 17)
 #	define R300_TX_MAX_ANISO_1_TO_1          (0 << 21)
 #	define R300_TX_MAX_ANISO_2_TO_1          (1 << 21)
 #	define R300_TX_MAX_ANISO_4_TO_1          (2 << 21)
@@ -1471,6 +1473,7 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #	define R300_TX_MAX_ANISO_MASK            (7 << 21)
 #       define R300_TX_WRAP_S(x)                 ((x) << 0)
 #       define R300_TX_WRAP_T(x)                 ((x) << 3)
+#       define R300_TX_MAX_MIP_LEVEL(x)          ((x) << 17)
 
 #define R300_TX_FILTER1_0                      0x4440
 #	define R300_CHROMA_KEY_MODE_DISABLE    0
@@ -1500,8 +1503,6 @@ USE OR OTHER DEALINGS IN THE SOFTWARE.
 #       define R300_TX_HEIGHTMASK_MASK           (2047 << 11)
 #	define R300_TX_DEPTHMASK_SHIFT           22
 #	define R300_TX_DEPTHMASK_MASK            (0xf << 22)
-#       define R300_TX_MAX_MIP_LEVEL_SHIFT       26
-#       define R300_TX_MAX_MIP_LEVEL_MASK        (0xf << 26)
 #       define R300_TX_SIZE_PROJECTED            (1 << 30)
 #       define R300_TX_PITCH_EN                  (1 << 31)
 #       define R300_TX_WIDTH(x)                  ((x) << 0)
diff --git a/src/gallium/drivers/r300/r300_render.c b/src/gallium/drivers/r300/r300_render.c
index 62e1456ed36..4c5fb405c6a 100644
--- a/src/gallium/drivers/r300/r300_render.c
+++ b/src/gallium/drivers/r300/r300_render.c
@@ -183,8 +183,6 @@ boolean r300_draw_range_elements(struct pipe_context* pipe,
         return FALSE;
     }
 
-    setup_vertex_attributes(r300);
-
     setup_index_buffer(r300, indexBuffer, indexSize);
 
     r300_emit_dirty_state(r300);
@@ -226,8 +224,6 @@ boolean r300_draw_arrays(struct pipe_context* pipe, unsigned mode,
         return FALSE;
     }
 
-    setup_vertex_attributes(r300);
-
     r300_emit_dirty_state(r300);
 
     r300_emit_aos(r300, start);
diff --git a/src/gallium/drivers/r300/r300_screen.h b/src/gallium/drivers/r300/r300_screen.h
index 41df31f670f..1ce5ff3904b 100644
--- a/src/gallium/drivers/r300/r300_screen.h
+++ b/src/gallium/drivers/r300/r300_screen.h
@@ -27,6 +27,8 @@
 
 #include "r300_chipset.h"
 
+struct r300_winsys;
+
 struct r300_screen {
     /* Parent class */
     struct pipe_screen screen;
diff --git a/src/gallium/drivers/r300/r300_state.c b/src/gallium/drivers/r300/r300_state.c
index d1eced61db1..a88d66db247 100644
--- a/src/gallium/drivers/r300/r300_state.c
+++ b/src/gallium/drivers/r300/r300_state.c
@@ -523,6 +523,11 @@ static void*
                                                    state->mag_img_filter,
                                                    state->min_mip_filter);
 
+    /* Unfortunately, r300-r500 don't support floating-point mipmap lods. */
+    /* We must pass these to the emit function to clamp them properly. */
+    sampler->min_lod = MAX2((unsigned)state->min_lod, 0);
+    sampler->max_lod = MAX2((unsigned)ceilf(state->max_lod), 0);
+
     lod_bias = CLAMP((int)(state->lod_bias * 32), -(1 << 9), (1 << 9) - 1);
 
     sampler->filter1 |= lod_bias << R300_LOD_BIAS_SHIFT;
@@ -571,6 +576,7 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
                                       struct pipe_texture** texture)
 {
     struct r300_context* r300 = r300_context(pipe);
+    boolean is_r500 = r300_screen(r300->context.screen)->caps->is_r500;
     int i;
 
     /* XXX magic num */
@@ -585,6 +591,13 @@ static void r300_set_sampler_textures(struct pipe_context* pipe,
             pipe_texture_reference((struct pipe_texture**)&r300->textures[i],
                 texture[i]);
             r300->dirty_state |= (R300_NEW_TEXTURE << i);
+
+            /* R300-specific - set the texrect factor in a fragment shader */
+            if (!is_r500 && r300->textures[i]->is_npot) {
+                /* XXX It would be nice to re-emit just 1 constant,
+                 * XXX not all of them */
+                r300->dirty_state |= R300_NEW_FRAGMENT_SHADER_CONSTANTS;
+            }
         }
     }
 
@@ -674,6 +687,8 @@ static void r300_set_vertex_buffers(struct pipe_context* pipe,
         draw_flush(r300->draw);
         draw_set_vertex_buffers(r300->draw, count, buffers);
     }
+
+    r300->dirty_state |= R300_NEW_VERTEX_FORMAT;
 }
 
 static void r300_set_vertex_elements(struct pipe_context* pipe,
@@ -706,9 +721,6 @@ static void* r300_create_vs_state(struct pipe_context* pipe,
 
         tgsi_scan_shader(shader->tokens, &vs->info);
 
-        /* Appease Draw. */
-        vs->draw = draw_create_vertex_shader(r300->draw, shader);
-
         return (void*)vs;
     } else {
         return draw_create_vertex_shader(r300->draw, shader);
@@ -719,8 +731,6 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
 {
     struct r300_context* r300 = r300_context(pipe);
 
-    draw_flush(r300->draw);
-
     if (r300_screen(pipe->screen)->caps->has_tcl) {
         struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
@@ -731,10 +741,10 @@ static void r300_bind_vs_state(struct pipe_context* pipe, void* shader)
             r300_translate_vertex_shader(r300, vs);
         }
 
-        draw_bind_vertex_shader(r300->draw, vs->draw);
         r300->vs = vs;
         r300->dirty_state |= R300_NEW_VERTEX_SHADER | R300_NEW_VERTEX_SHADER_CONSTANTS;
     } else {
+        draw_flush(r300->draw);
         draw_bind_vertex_shader(r300->draw,
                 (struct draw_vertex_shader*)shader);
     }
@@ -748,7 +758,6 @@ static void r300_delete_vs_state(struct pipe_context* pipe, void* shader)
         struct r300_vertex_shader* vs = (struct r300_vertex_shader*)shader;
 
         rc_constants_destroy(&vs->code.constants);
-        draw_delete_vertex_shader(r300->draw, vs->draw);
         FREE((void*)vs->state.tokens);
         FREE(shader);
     } else {
diff --git a/src/gallium/drivers/r300/r300_state_derived.c b/src/gallium/drivers/r300/r300_state_derived.c
index 131006ca60d..4315780b4fa 100644
--- a/src/gallium/drivers/r300/r300_state_derived.c
+++ b/src/gallium/drivers/r300/r300_state_derived.c
@@ -47,8 +47,8 @@ struct r300_shader_derived_value {
 
 unsigned r300_shader_key_hash(void* key) {
     struct r300_shader_key* shader_key = (struct r300_shader_key*)key;
-    unsigned vs = (unsigned)shader_key->vs;
-    unsigned fs = (unsigned)shader_key->fs;
+    unsigned vs = (intptr_t)shader_key->vs;
+    unsigned fs = (intptr_t)shader_key->fs;
 
     return (vs << 16) | (fs & 0xffff);
 }
@@ -61,88 +61,46 @@ int r300_shader_key_compare(void* key1, void* key2) {
         (shader_key1->fs == shader_key2->fs);
 }
 
-/* Set up the vs_tab and routes. */
-static void r300_vs_tab_routes(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
+/* Set up the vs_output_tab and routes. */
+static void r300_vs_output_tab_routes(struct r300_context* r300,
+                                      int* vs_output_tab)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
+    struct vertex_info* vinfo = &r300->vertex_info->vinfo;
     boolean pos = FALSE, psize = FALSE, fog = FALSE;
     int i, texs = 0, cols = 0;
-    struct tgsi_shader_info* info;
-
-    if (r300screen->caps->has_tcl) {
-        /* Use vertex shader to determine required routes. */
-        info = &r300->vs->info;
-    } else {
-        /* Use fragment shader to determine required routes. */
-        info = &r300->fs->info;
-    }
+    struct tgsi_shader_info* info = &r300->fs->info;
 
-    assert(info->num_inputs <= 16);
+    /* XXX One day we should figure out how to handle a different number of
+     * VS outputs and FS inputs, as well as a different number of vertex streams
+     * and VS inputs. It's definitely one of the sources of hardlocks. */
 
-    if (!r300screen->caps->has_tcl || !r300->rs_state->enable_vte)
-    {
-        for (i = 0; i < info->num_inputs; i++) {
-            switch (r300->vs->code.inputs[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    tab[i] = 0;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    tab[i] = 2 + cols;
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    assert(psize == FALSE);
-                    psize = TRUE;
-                    tab[i] = 15;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    assert(fog == FALSE);
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    tab[i] = 6 + texs;
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex input %d\n",
-                        info->input_semantic_name[i]);
-                    break;
-            }
-        }
-    }
-    else
-    {
-        /* Just copy vert attribs over as-is. */
-        for (i = 0; i < info->num_inputs; i++) {
-            tab[i] = i;
-        }
-
-        for (i = 0; i < info->num_outputs; i++) {
-            switch (info->output_semantic_name[i]) {
-                case TGSI_SEMANTIC_POSITION:
-                    pos = TRUE;
-                    break;
-                case TGSI_SEMANTIC_COLOR:
-                    cols++;
-                    break;
-                case TGSI_SEMANTIC_PSIZE:
-                    psize = TRUE;
-                    break;
-                case TGSI_SEMANTIC_FOG:
-                    fog = TRUE;
-                    /* Fall through */
-                case TGSI_SEMANTIC_GENERIC:
-                    texs++;
-                    break;
-                default:
-                    debug_printf("r300: Unknown vertex output %d\n",
-                        info->output_semantic_name[i]);
-                    break;
-            }
+    for (i = 0; i < info->num_inputs; i++) {
+        switch (info->input_semantic_name[i]) {
+            case TGSI_SEMANTIC_POSITION:
+                pos = TRUE;
+                vs_output_tab[i] = 0;
+                break;
+            case TGSI_SEMANTIC_COLOR:
+                vs_output_tab[i] = 2 + cols;
+                cols++;
+                break;
+            case TGSI_SEMANTIC_PSIZE:
+                assert(psize == FALSE);
+                psize = TRUE;
+                vs_output_tab[i] = 15;
+                break;
+            case TGSI_SEMANTIC_FOG:
+                assert(fog == FALSE);
+                fog = TRUE;
+                /* Fall through */
+            case TGSI_SEMANTIC_GENERIC:
+                vs_output_tab[i] = 6 + texs;
+                texs++;
+                break;
+            default:
+                debug_printf("r300: Unknown vertex input %d\n",
+                    info->input_semantic_name[i]);
+                break;
         }
     }
 
@@ -161,14 +119,13 @@ static void r300_vs_tab_routes(struct r300_context* r300,
 
     /* We need to add vertex position attribute only for SW TCL case,
      * for HW TCL case it could be generated by vertex shader */
-    if (!pos && !r300screen->caps->has_tcl) {
-        debug_printf("r300: Forcing vertex position attribute emit...\n");
+    if (!pos) {
         /* Make room for the position attribute
-         * at the beginning of the tab. */
+         * at the beginning of the vs_output_tab. */
         for (i = 15; i > 0; i--) {
-            tab[i] = tab[i-1];
+            vs_output_tab[i] = vs_output_tab[i-1];
         }
-        tab[0] = 0;
+        vs_output_tab[0] = 0;
     }
 
     /* Position. */
@@ -227,43 +184,78 @@ static void r300_vs_tab_routes(struct r300_context* r300,
 }
 
 /* Update the PSC tables. */
-static void r300_vertex_psc(struct r300_context* r300,
-                            struct r300_vertex_info* vformat)
+static void r300_vertex_psc(struct r300_context* r300)
 {
-    struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct vertex_info* vinfo = &vformat->vinfo;
-    int* tab = vformat->vs_tab;
+    struct r300_vertex_info *vformat = r300->vertex_info;
     uint16_t type, swizzle;
     enum pipe_format format;
-    unsigned i, attrib_count;
+    unsigned i;
 
     /* Vertex shaders have no semantics on their inputs,
-     * so PSC should just route stuff based on their info,
+     * so PSC should just route stuff based on the vertex elements,
      * and not on attrib information. */
-    if (r300screen->caps->has_tcl) {
-        attrib_count = r300->vs->info.num_inputs;
-        DBG(r300, DBG_DRAW, "r300: routing %d attribs in psc for vs\n",
-                attrib_count);
-    } else {
-        attrib_count = vinfo->num_attribs;
-        DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
-        for (i = 0; i < attrib_count; i++) {
-            DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
-                   " tab %d\n", vinfo->attrib[i].src_index,
-                   vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
-                   tab[i]);
+    DBG(r300, DBG_DRAW, "r300: vs expects %d attribs, routing %d elements"
+            " in psc\n",
+            r300->vs->info.num_inputs,
+            r300->vertex_element_count);
+
+    for (i = 0; i < r300->vertex_element_count; i++) {
+        format = r300->vertex_element[i].src_format;
+
+        type = r300_translate_vertex_data_type(format) |
+            (i << R300_DST_VEC_LOC_SHIFT);
+        swizzle = r300_translate_vertex_data_swizzle(format);
+
+        if (i % 2) {
+            vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
+        } else {
+            vformat->vap_prog_stream_cntl[i >> 1] |= type;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
         }
     }
 
+
+    assert(i <= 15);
+
+    /* Set the last vector in the PSC. */
+    if (i) {
+        i -= 1;
+    }
+    vformat->vap_prog_stream_cntl[i >> 1] |=
+        (R300_LAST_VEC << (i & 1 ? 16 : 0));
+}
+
+/* Update the PSC tables for SW TCL, using Draw. */
+static void r300_swtcl_vertex_psc(struct r300_context* r300,
+                                  int* vs_output_tab)
+{
+    struct r300_vertex_info *vformat = r300->vertex_info;
+    struct vertex_info* vinfo = &vformat->vinfo;
+    uint16_t type, swizzle;
+    enum pipe_format format;
+    unsigned i, attrib_count;
+
+    /* For each Draw attribute, route it to the fragment shader according
+     * to the vs_output_tab. */
+    attrib_count = vinfo->num_attribs;
+    DBG(r300, DBG_DRAW, "r300: attrib count: %d\n", attrib_count);
+    for (i = 0; i < attrib_count; i++) {
+        DBG(r300, DBG_DRAW, "r300: attrib: offset %d, interp %d, size %d,"
+               " vs_output_tab %d\n", vinfo->attrib[i].src_index,
+               vinfo->attrib[i].interp_mode, vinfo->attrib[i].emit,
+               vs_output_tab[i]);
+    }
+
     for (i = 0; i < attrib_count; i++) {
         /* Make sure we have a proper destination for our attribute. */
-        assert(tab[i] != -1);
+        assert(vs_output_tab[i] != -1);
 
         format = draw_translate_vinfo_format(vinfo->attrib[i].emit);
 
         /* Obtain the type of data in this attribute. */
         type = r300_translate_vertex_data_type(format) |
-            tab[i] << R300_DST_VEC_LOC_SHIFT;
+            vs_output_tab[i] << R300_DST_VEC_LOC_SHIFT;
 
         /* Obtain the swizzle for this attribute. Note that the default
          * swizzle in the hardware is not XYZW! */
@@ -272,12 +264,10 @@ static void r300_vertex_psc(struct r300_context* r300,
         /* Add the attribute to the PSC table. */
         if (i & 1) {
             vformat->vap_prog_stream_cntl[i >> 1] |= type << 16;
-
             vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 16;
         } else {
-            vformat->vap_prog_stream_cntl[i >> 1] |= type <<  0;
-
-            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle << 0;
+            vformat->vap_prog_stream_cntl[i >> 1] |= type;
+            vformat->vap_prog_stream_cntl_ext[i >> 1] |= swizzle;
         }
     }
 
@@ -289,74 +279,12 @@ static void r300_vertex_psc(struct r300_context* r300,
         (R300_LAST_VEC << (i & 1 ? 16 : 0));
 }
 
-/* Set up the mappings from GB to US, for RS block. */
-static void r300_update_fs_tab(struct r300_context* r300,
-                               struct r300_vertex_info* vformat)
-{
-    struct tgsi_shader_info* info = &r300->fs->info;
-    int i, cols = 0, texs = 0, cols_emitted = 0;
-    int* tab = vformat->fs_tab;
-
-    for (i = 0; i < 16; i++) {
-        tab[i] = -1;
-    }
-
-    assert(info->num_inputs <= 16);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (info->input_semantic_name[i]) {
-            case TGSI_SEMANTIC_COLOR:
-                tab[i] = INTERP_LINEAR;
-                cols++;
-                break;
-            case TGSI_SEMANTIC_POSITION:
-            case TGSI_SEMANTIC_PSIZE:
-                debug_printf("r300: Implementation error: Can't use "
-                        "pos attribs in fragshader yet!\n");
-                /* Pass through for now */
-            case TGSI_SEMANTIC_FOG:
-            case TGSI_SEMANTIC_GENERIC:
-                tab[i] = INTERP_PERSPECTIVE;
-                break;
-            default:
-                debug_printf("r300: Unknown vertex input %d\n",
-                    info->input_semantic_name[i]);
-                break;
-        }
-    }
-
-    /* Now that we know where everything is... */
-    DBG(r300, DBG_DRAW, "r300: fp input count: %d\n", info->num_inputs);
-    for (i = 0; i < info->num_inputs; i++) {
-        switch (tab[i]) {
-            case INTERP_LINEAR:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, color,    tab %d\n",
-                        i, cols_emitted);
-                tab[i] = cols_emitted;
-                cols_emitted++;
-                break;
-            case INTERP_PERSPECTIVE:
-                DBG(r300, DBG_DRAW, "r300: attrib: "
-                        "stack offset %d, texcoord, tab %d\n",
-                        i, cols + texs);
-                tab[i] = cols + texs;
-                texs++;
-                break;
-            case -1:
-                debug_printf("r300: Implementation error: Bad fp interp!\n");
-            default:
-                break;
-        }
-    }
-
-}
-
 /* Set up the RS block. This is the part of the chipset that actually does
  * the rasterization of vertices into fragments. This is also the part of the
  * chipset that locks up if any part of it is even slightly wrong. */
-static void r300_update_rs_block(struct r300_context* r300,
-                                 struct r300_rs_block* rs)
+static void r300_update_rs_block(struct r300_context* r300)
 {
+    struct r300_rs_block* rs = r300->rs_block;
     struct tgsi_shader_info* info = &r300->fs->info;
     int col_count = 0, fp_offset = 0, i, tex_count = 0;
     int rs_tex_comp = 0;
@@ -390,17 +318,18 @@ static void r300_update_rs_block(struct r300_context* r300,
             col_count++;
         }
 
+        for (i = 0; i < col_count; i++) {
+            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
+                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
+            fp_offset++;
+        }
+
         for (i = 0; i < tex_count; i++) {
             rs->inst[i] |= R500_RS_INST_TEX_ID(i) |
                 R500_RS_INST_TEX_CN_WRITE | R500_RS_INST_TEX_ADDR(fp_offset);
             fp_offset++;
         }
 
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R500_RS_INST_COL_ID(i) |
-                R500_RS_INST_COL_CN_WRITE | R500_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
     } else {
         for (i = 0; i < info->num_inputs; i++) {
             switch (info->input_semantic_name[i]) {
@@ -425,8 +354,10 @@ static void r300_update_rs_block(struct r300_context* r300,
             }
         }
 
+        /* Rasterize at least one color, or bad things happen. */
         if (col_count == 0) {
             rs->ip[0] |= R300_RS_COL_FMT(R300_RS_COL_FMT_0001);
+            col_count++;
         }
 
         if (tex_count == 0) {
@@ -437,9 +368,10 @@ static void r300_update_rs_block(struct r300_context* r300,
                 R300_RS_SEL_Q(R300_RS_SEL_K1);
         }
 
-        /* Rasterize at least one color, or bad things happen. */
-        if ((col_count == 0) && (tex_count == 0)) {
-            col_count++;
+        for (i = 0; i < col_count; i++) {
+            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
+                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
+            fp_offset++;
         }
 
         for (i = 0; i < tex_count; i++) {
@@ -447,28 +379,22 @@ static void r300_update_rs_block(struct r300_context* r300,
                 R300_RS_INST_TEX_CN_WRITE | R300_RS_INST_TEX_ADDR(fp_offset);
             fp_offset++;
         }
-
-        for (i = 0; i < col_count; i++) {
-            rs->inst[i] |= R300_RS_INST_COL_ID(i) |
-                R300_RS_INST_COL_CN_WRITE | R300_RS_INST_COL_ADDR(fp_offset);
-            fp_offset++;
-        }
     }
 
     rs->count = (rs_tex_comp) | (col_count << R300_IC_COUNT_SHIFT) |
         R300_HIRES_EN;
 
-    rs->inst_count = MAX2(MAX2(col_count - 1, tex_count - 1), 0);
+    rs->inst_count = MAX3(col_count - 1, tex_count - 1, 0);
 }
 
 /* Update the vertex format. */
 static void r300_update_derived_shader_state(struct r300_context* r300)
 {
     struct r300_screen* r300screen = r300_screen(r300->context.screen);
-    struct r300_vertex_info* vformat;
-    struct r300_rs_block* rs_block;
+    int vs_output_tab[16];
     int i;
 
+
     /*
     struct r300_shader_key* key;
     struct r300_shader_derived_value* value;
@@ -495,27 +421,26 @@ static void r300_update_derived_shader_state(struct r300_context* r300)
             (void*)key, (void*)value);
     } */
 
-    /* XXX This will be refactored ASAP. */
-    vformat = CALLOC_STRUCT(r300_vertex_info);
-    rs_block = CALLOC_STRUCT(r300_rs_block);
+    /* Reset structures */
+    memset(r300->rs_block, 0, sizeof(struct r300_rs_block));
+    memset(r300->vertex_info, 0, sizeof(struct r300_vertex_info));
 
     for (i = 0; i < 16; i++) {
-        vformat->vs_tab[i] = -1;
-        vformat->fs_tab[i] = -1;
+        vs_output_tab[i] = -1;
     }
 
-    r300_vs_tab_routes(r300, vformat);
-    r300_vertex_psc(r300, vformat);
-    r300_update_fs_tab(r300, vformat);
+    /* Update states */
+    r300_vs_output_tab_routes(r300, vs_output_tab);
 
-    r300_update_rs_block(r300, rs_block);
+    if (r300screen->caps->has_tcl) {
+        r300_vertex_psc(r300);
+    } else {
+        r300_swtcl_vertex_psc(r300, vs_output_tab);
+    }
 
-    FREE(r300->vertex_info);
-    FREE(r300->rs_block);
+    r300_update_rs_block(r300);
 
-    r300->vertex_info = vformat;
-    r300->rs_block = rs_block;
-    r300->dirty_state |= (R300_NEW_VERTEX_FORMAT | R300_NEW_RS_BLOCK);
+    r300->dirty_state |= R300_NEW_RS_BLOCK;
 }
 
 static void r300_update_ztop(struct r300_context* r300)
@@ -553,9 +478,9 @@ static void r300_update_ztop(struct r300_context* r300)
 
 void r300_update_derived_state(struct r300_context* r300)
 {
-    /* XXX */
-    if (TRUE || r300->dirty_state &
-        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER)) {
+    if (r300->dirty_state &
+        (R300_NEW_FRAGMENT_SHADER | R300_NEW_VERTEX_SHADER |
+         R300_NEW_VERTEX_FORMAT)) {
         r300_update_derived_shader_state(r300);
     }
 
diff --git a/src/gallium/drivers/r300/r300_state_invariant.c b/src/gallium/drivers/r300/r300_state_invariant.c
index c07e6ae676d..46d1cb39b54 100644
--- a/src/gallium/drivers/r300/r300_state_invariant.c
+++ b/src/gallium/drivers/r300/r300_state_invariant.c
@@ -84,7 +84,7 @@ void r300_emit_invariant_state(struct r300_context* r300)
     END_CS;
 
     /* XXX unsorted stuff from surface_fill */
-    BEGIN_CS(60 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
+    BEGIN_CS(56 + (caps->has_tcl ? 5 : 0) + (caps->is_r500 ? 4 : 0));
     /* Flush PVS. */
     OUT_CS_REG(R300_VAP_PVS_STATE_FLUSH_REG, 0x0);
 
@@ -135,8 +135,6 @@ void r300_emit_invariant_state(struct r300_context* r300)
     OUT_CS_REG(R300_ZB_DEPTHCLEARVALUE, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_OFFSET, 0x00000000);
     OUT_CS_REG(R300_ZB_HIZ_PITCH, 0x00000000);
-    OUT_CS_REG(R300_VAP_VTX_STATE_CNTL, 0x1);
-    OUT_CS_REG(R300_VAP_VSM_VTX_ASSM, 0x405);
     OUT_CS_REG(R300_SE_VTE_CNTL, 0x0000043F);
 
     /* XXX */
diff --git a/src/gallium/drivers/r300/r300_texture.c b/src/gallium/drivers/r300/r300_texture.c
index aea25cf71dd..093a21ebe24 100644
--- a/src/gallium/drivers/r300/r300_texture.c
+++ b/src/gallium/drivers/r300/r300_texture.c
@@ -34,8 +34,8 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
     struct r300_texture_state* state = &tex->state;
     struct pipe_texture *pt = &tex->tex;
 
-    state->format0 = R300_TX_WIDTH((pt->width[0] - 1) & 0x7ff) |
-                     R300_TX_HEIGHT((pt->height[0] - 1) & 0x7ff);
+    state->format0 = R300_TX_WIDTH((pt->width0 - 1) & 0x7ff) |
+                     R300_TX_HEIGHT((pt->height0 - 1) & 0x7ff);
 
     if (tex->is_npot) {
         /* rectangles love this */
@@ -43,8 +43,7 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
         state->format2 = (tex->pitch[0] - 1) & 0x1fff;
     } else {
         /* power of two textures (3D, mipmaps, and no pitch) */
-        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth[0]) & 0xf) |
-                          R300_TX_NUM_LEVELS(pt->last_level & 0xf);
+        state->format0 |= R300_TX_DEPTH(util_logbase2(pt->depth0) & 0xf);
     }
 
     state->format1 = r300_translate_texformat(pt->format);
@@ -58,17 +57,17 @@ static void r300_setup_texture_state(struct r300_texture* tex, boolean is_r500)
     /* large textures on r500 */
     if (is_r500)
     {
-        if (pt->width[0] > 2048) {
+        if (pt->width0 > 2048) {
             state->format2 |= R500_TXWIDTH_BIT11;
         }
-        if (pt->height[0] > 2048) {
+        if (pt->height0 > 2048) {
             state->format2 |= R500_TXHEIGHT_BIT11;
         }
     }
-    assert(is_r500 || (pt->width[0] <= 2048 && pt->height[0] <= 2048));
+    assert(is_r500 || (pt->width0 <= 2048 && pt->height0 <= 2048));
 
     debug_printf("r300: Set texture state (%dx%d, %d levels)\n",
-		 pt->width[0], pt->height[0], pt->last_level);
+		 pt->width0, pt->height0, pt->last_level);
 }
 
 unsigned r300_texture_get_offset(struct r300_texture* tex, unsigned level,
@@ -106,7 +105,7 @@ unsigned r300_texture_get_stride(struct r300_texture* tex, unsigned level)
         return 0;
     }
 
-    return align(pf_get_stride(&tex->tex.block, tex->tex.width[level]), 32);
+    return align(pf_get_stride(&tex->tex.block, u_minify(tex->tex.width0, level)), 32);
 }
 
 static void r300_setup_miptree(struct r300_texture* tex)
@@ -116,14 +115,8 @@ static void r300_setup_miptree(struct r300_texture* tex)
     int i;
 
     for (i = 0; i <= base->last_level; i++) {
-        if (i > 0) {
-            base->width[i] = minify(base->width[i-1]);
-            base->height[i] = minify(base->height[i-1]);
-            base->depth[i] = minify(base->depth[i-1]);
-        }
-
-        base->nblocksx[i] = pf_get_nblocksx(&base->block, base->width[i]);
-        base->nblocksy[i] = pf_get_nblocksy(&base->block, base->height[i]);
+        base->nblocksx[i] = pf_get_nblocksx(&base->block, u_minify(base->width0, i));
+        base->nblocksy[i] = pf_get_nblocksy(&base->block, u_minify(base->height0, i));
 
         stride = r300_texture_get_stride(tex, i);
         layer_size = stride * base->nblocksy[i];
@@ -131,7 +124,7 @@ static void r300_setup_miptree(struct r300_texture* tex)
         if (base->target == PIPE_TEXTURE_CUBE)
             size = layer_size * 6;
         else
-            size = layer_size * base->depth[i];
+            size = layer_size * u_minify(base->depth0, i);
 
         tex->offset[i] = align(tex->size, 32);
         tex->size = tex->offset[i] + size;
@@ -140,15 +133,15 @@ static void r300_setup_miptree(struct r300_texture* tex)
 
         debug_printf("r300: Texture miptree: Level %d "
                 "(%dx%dx%d px, pitch %d bytes)\n",
-                i, base->width[i], base->height[i], base->depth[i],
-                stride);
+                i, u_minify(base->width0, i), u_minify(base->height0, i),
+                u_minify(base->depth0, i), stride);
     }
 }
 
 static void r300_setup_flags(struct r300_texture* tex)
 {
-    tex->is_npot = !util_is_power_of_two(tex->tex.width[0]) ||
-                   !util_is_power_of_two(tex->tex.height[0]);
+    tex->is_npot = !util_is_power_of_two(tex->tex.width0) ||
+                   !util_is_power_of_two(tex->tex.height0);
 }
 
 /* Create a new texture. */
@@ -208,8 +201,8 @@ static struct pipe_surface* r300_get_tex_surface(struct pipe_screen* screen,
         pipe_reference_init(&surface->reference, 1);
         pipe_texture_reference(&surface->texture, texture);
         surface->format = texture->format;
-        surface->width = texture->width[level];
-        surface->height = texture->height[level];
+        surface->width = u_minify(texture->width0, level);
+        surface->height = u_minify(texture->height0, level);
         surface->offset = offset;
         surface->usage = flags;
         surface->zslice = zslice;
@@ -237,7 +230,7 @@ static struct pipe_texture*
 
     /* Support only 2D textures without mipmaps */
     if (base->target != PIPE_TEXTURE_2D ||
-        base->depth[0] != 1 ||
+        base->depth0 != 1 ||
         base->last_level != 0) {
         return NULL;
     }
@@ -287,9 +280,9 @@ r300_video_surface_create(struct pipe_screen *screen,
     template.target = PIPE_TEXTURE_2D;
     template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
     template.last_level = 0;
-    template.width[0] = util_next_power_of_two(width);
-    template.height[0] = util_next_power_of_two(height);
-    template.depth[0] = 1;
+    template.width0 = util_next_power_of_two(width);
+    template.height0 = util_next_power_of_two(height);
+    template.depth0 = 1;
     pf_get_block(template.format, &template.block);
     template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER |
                          PIPE_TEXTURE_USAGE_RENDER_TARGET;
diff --git a/src/gallium/drivers/r300/r300_vbo.c b/src/gallium/drivers/r300/r300_vbo.c
index a6a159667a3..6ebaf715dc5 100644
--- a/src/gallium/drivers/r300/r300_vbo.c
+++ b/src/gallium/drivers/r300/r300_vbo.c
@@ -34,52 +34,6 @@
 #include "r300_reg.h"
 #include "r300_winsys.h"
 
-static INLINE void setup_vertex_attribute(struct r300_vertex_info *vinfo,
-                                          struct pipe_vertex_element *vert_elem,
-                                          unsigned attr_num)
-{
-    uint16_t hw_fmt1, hw_fmt2;
-
-    hw_fmt1 = r300_translate_vertex_data_type(vert_elem->src_format) |
-        (attr_num << R300_DST_VEC_LOC_SHIFT);
-    hw_fmt2 = r300_translate_vertex_data_swizzle(vert_elem->src_format);
-
-    if (attr_num % 2 == 0)
-    {
-        vinfo->vap_prog_stream_cntl[attr_num >> 1] = hw_fmt1;
-        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] = hw_fmt2;
-    }
-    else
-    {
-        vinfo->vap_prog_stream_cntl[attr_num >> 1] |= hw_fmt1 << 16;
-        vinfo->vap_prog_stream_cntl_ext[attr_num >> 1] |= hw_fmt2 << 16;
-    }
-}
-
-static void finish_vertex_attribs_setup(struct r300_vertex_info *vinfo,
-                                        unsigned attribs_num)
-{
-    uint32_t last_vec_bit = (attribs_num % 2 == 0) ?
-        (R300_LAST_VEC << 16) : R300_LAST_VEC;
-
-    assert(attribs_num > 0 && attribs_num <= 16);
-    vinfo->vap_prog_stream_cntl[(attribs_num - 1) >> 1] |= last_vec_bit;
-}
-
-void setup_vertex_attributes(struct r300_context *r300)
-{
-    struct pipe_vertex_element *vert_elem;
-    int i;
-
-    for (i = 0; i < r300->vertex_element_count; i++) {
-        vert_elem = &r300->vertex_element[i];
-        setup_vertex_attribute(r300->vertex_info, vert_elem, i);
-    }
-
-    finish_vertex_attribs_setup(r300->vertex_info,
-        r300->vertex_element_count);
-}
-
 static INLINE int get_buffer_offset(struct r300_context *r300,
                                     unsigned int buf_nr,
                                     unsigned int elem_offset)
diff --git a/src/gallium/drivers/r300/r300_vs.h b/src/gallium/drivers/r300/r300_vs.h
index 2a4ce315e32..00b02bf510d 100644
--- a/src/gallium/drivers/r300/r300_vs.h
+++ b/src/gallium/drivers/r300/r300_vs.h
@@ -35,9 +35,6 @@ struct r300_vertex_shader {
     struct pipe_shader_state state;
     struct tgsi_shader_info info;
 
-    /* Fallback shader, because Draw has issues */
-    struct draw_vertex_shader* draw;
-
     /* Has this shader been translated yet? */
     boolean translated;
 
diff --git a/src/gallium/drivers/softpipe/sp_context.c b/src/gallium/drivers/softpipe/sp_context.c
index 0e2fc111636..9f1524cc66d 100644
--- a/src/gallium/drivers/softpipe/sp_context.c
+++ b/src/gallium/drivers/softpipe/sp_context.c
@@ -90,14 +90,15 @@ softpipe_destroy( struct pipe_context *pipe )
    if (softpipe->draw)
       draw_destroy( softpipe->draw );
 
-      softpipe->quad.shade->destroy( softpipe->quad.shade );
-      softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
-      softpipe->quad.blend->destroy( softpipe->quad.blend );
+   softpipe->quad.shade->destroy( softpipe->quad.shade );
+   softpipe->quad.depth_test->destroy( softpipe->quad.depth_test );
+   softpipe->quad.blend->destroy( softpipe->quad.blend );
 
    for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
       sp_destroy_tile_cache(softpipe->cbuf_cache[i]);
       pipe_surface_reference(&softpipe->framebuffer.cbufs[i], NULL);
    }
+
    sp_destroy_tile_cache(softpipe->zsbuf_cache);
    pipe_surface_reference(&softpipe->framebuffer.zsbuf, NULL);
 
@@ -120,7 +121,7 @@ softpipe_destroy( struct pipe_context *pipe )
  * if (the texture is being used as a framebuffer surface)
  *    return PIPE_REFERENCED_FOR_WRITE
  * else if (the texture is a bound texture source)
- *    return PIPE_REFERENCED_FOR_READ  XXX not done yet
+ *    return PIPE_REFERENCED_FOR_READ
  * else
  *    return PIPE_UNREFERENCED
  */
@@ -132,6 +133,7 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
    struct softpipe_context *softpipe = softpipe_context( pipe );
    unsigned i;
 
+   /* check if any of the bound drawing surfaces are this texture */
    if (softpipe->dirty_render_cache) {
       for (i = 0; i < softpipe->framebuffer.nr_cbufs; i++) {
          if (softpipe->framebuffer.cbufs[i] && 
@@ -145,7 +147,12 @@ softpipe_is_texture_referenced( struct pipe_context *pipe,
       }
    }
    
-   /* FIXME: we also need to do the same for the texture cache */
+   /* check if any of the tex_cache textures are this texture */
+   for (i = 0; i < PIPE_MAX_SAMPLERS; i++) {
+      if (softpipe->tex_cache[i] &&
+          softpipe->tex_cache[i]->texture == texture)
+         return PIPE_REFERENCED_FOR_READ;
+   }
    
    return PIPE_UNREFERENCED;
 }
@@ -247,9 +254,9 @@ softpipe_create( struct pipe_screen *screen )
 
 
    /* setup quad rendering stages */
-      softpipe->quad.shade = sp_quad_shade_stage(softpipe);
-      softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
-      softpipe->quad.blend = sp_quad_blend_stage(softpipe);
+   softpipe->quad.shade = sp_quad_shade_stage(softpipe);
+   softpipe->quad.depth_test = sp_quad_depth_test_stage(softpipe);
+   softpipe->quad.blend = sp_quad_blend_stage(softpipe);
 
 
    /*
@@ -279,7 +286,6 @@ softpipe_create( struct pipe_screen *screen )
    draw_set_render(softpipe->draw, softpipe->vbuf_backend);
 
 
-
    /* plug in AA line/point stages */
    draw_install_aaline_stage(softpipe->draw, &softpipe->pipe);
    draw_install_aapoint_stage(softpipe->draw, &softpipe->pipe);
@@ -295,4 +301,3 @@ softpipe_create( struct pipe_screen *screen )
    softpipe_destroy(&softpipe->pipe);
    return NULL;
 }
-
diff --git a/src/gallium/drivers/softpipe/sp_fs_exec.c b/src/gallium/drivers/softpipe/sp_fs_exec.c
index 4076114d392..a8999ed3479 100644
--- a/src/gallium/drivers/softpipe/sp_fs_exec.c
+++ b/src/gallium/drivers/softpipe/sp_fs_exec.c
@@ -126,7 +126,13 @@ exec_run( const struct sp_fragment_shader *base,
    setup_pos_vector(quad->posCoef, 
                     (float)quad->input.x0, (float)quad->input.y0, 
                     &machine->QuadPos);
-   
+
+   if (quad->input.facing) {
+      machine->Face = -1.0f;
+   } else {
+      machine->Face = 1.0f;
+   }
+
    quad->inout.mask &= tgsi_exec_machine_run( machine );
    if (quad->inout.mask == 0)
       return FALSE;
diff --git a/src/gallium/drivers/softpipe/sp_state_fs.c b/src/gallium/drivers/softpipe/sp_state_fs.c
index bd7306ea67f..1d8142311a6 100644
--- a/src/gallium/drivers/softpipe/sp_state_fs.c
+++ b/src/gallium/drivers/softpipe/sp_state_fs.c
@@ -143,6 +143,7 @@ softpipe_delete_vs_state(struct pipe_context *pipe, void *vs)
    struct sp_vertex_shader *state = (struct sp_vertex_shader *) vs;
 
    draw_delete_vertex_shader(softpipe->draw, state->draw_data);
+   FREE( (void *)state->shader.tokens );
    FREE( state );
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_sample.c b/src/gallium/drivers/softpipe/sp_tex_sample.c
index c22ee86b66c..e26153b1d90 100644
--- a/src/gallium/drivers/softpipe/sp_tex_sample.c
+++ b/src/gallium/drivers/softpipe/sp_tex_sample.c
@@ -521,7 +521,7 @@ compute_lambda_1d(const struct sp_sampler_varient *samp,
    const struct pipe_sampler_state *sampler = samp->sampler;
    float dsdx = fabsf(s[QUAD_BOTTOM_RIGHT] - s[QUAD_BOTTOM_LEFT]);
    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
-   float rho = MAX2(dsdx, dsdy) * texture->width[0];
+   float rho = MAX2(dsdx, dsdy) * texture->width0;
    float lambda;
 
    lambda = util_fast_log2(rho);
@@ -545,8 +545,8 @@ compute_lambda_2d(const struct sp_sampler_varient *samp,
    float dsdy = fabsf(s[QUAD_TOP_LEFT]     - s[QUAD_BOTTOM_LEFT]);
    float dtdx = fabsf(t[QUAD_BOTTOM_RIGHT] - t[QUAD_BOTTOM_LEFT]);
    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
-   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
+   float maxx = MAX2(dsdx, dsdy) * texture->width0;
+   float maxy = MAX2(dtdx, dtdy) * texture->height0;
    float rho  = MAX2(maxx, maxy);
    float lambda;
 
@@ -573,9 +573,9 @@ compute_lambda_3d(const struct sp_sampler_varient *samp,
    float dtdy = fabsf(t[QUAD_TOP_LEFT]     - t[QUAD_BOTTOM_LEFT]);
    float dpdx = fabsf(p[QUAD_BOTTOM_RIGHT] - p[QUAD_BOTTOM_LEFT]);
    float dpdy = fabsf(p[QUAD_TOP_LEFT]     - p[QUAD_BOTTOM_LEFT]);
-   float maxx = MAX2(dsdx, dsdy) * texture->width[0];
-   float maxy = MAX2(dtdx, dtdy) * texture->height[0];
-   float maxz = MAX2(dpdx, dpdy) * texture->depth[0];
+   float maxx = MAX2(dsdx, dsdy) * texture->width0;
+   float maxy = MAX2(dtdx, dtdy) * texture->height0;
+   float maxz = MAX2(dpdx, dpdy) * texture->depth0;
    float rho, lambda;
 
    rho = MAX2(maxx, maxy);
@@ -644,8 +644,8 @@ get_texel_2d(const struct sp_sampler_varient *samp,
    const struct pipe_texture *texture = samp->texture;
    unsigned level = addr.bits.level;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level)) {
       return samp->sampler->border_color;
    }
    else {
@@ -737,9 +737,9 @@ get_texel_3d(const struct sp_sampler_varient *samp,
    const struct pipe_texture *texture = samp->texture;
    unsigned level = addr.bits.level;
 
-   if (x < 0 || x >= (int) texture->width[level] ||
-       y < 0 || y >= (int) texture->height[level] ||
-       z < 0 || z >= (int) texture->depth[level]) {
+   if (x < 0 || x >= (int) u_minify(texture->width0, level) ||
+       y < 0 || y >= (int) u_minify(texture->height0, level) ||
+       z < 0 || z >= (int) u_minify(texture->depth0, level)) {
       return samp->sampler->border_color;
    }
    else {
@@ -925,7 +925,7 @@ img_filter_1d_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
+   width = u_minify(texture->width0, level0);
 
    assert(width > 0);
 
@@ -961,8 +961,8 @@ img_filter_2d_nearest(struct tgsi_sampler *tgsi_sampler,
 
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1008,8 +1008,8 @@ img_filter_cube_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1046,9 +1046,9 @@ img_filter_3d_nearest(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1088,7 +1088,7 @@ img_filter_1d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
+   width = u_minify(texture->width0, level0);
 
    assert(width > 0);
 
@@ -1127,8 +1127,8 @@ img_filter_2d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1174,8 +1174,8 @@ img_filter_cube_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
 
    assert(width > 0);
    assert(height > 0);
@@ -1221,9 +1221,9 @@ img_filter_3d_linear(struct tgsi_sampler *tgsi_sampler,
    union tex_tile_address addr;
 
    level0 = samp->level;
-   width = texture->width[level0];
-   height = texture->height[level0];
-   depth = texture->depth[level0];
+   width = u_minify(texture->width0, level0);
+   height = u_minify(texture->height0, level0);
+   depth = u_minify(texture->depth0, level0);
 
    addr.value = 0;
    addr.bits.level = level0;
@@ -1778,8 +1778,8 @@ sp_sampler_varient_bind_texture( struct sp_sampler_varient *samp,
 
    samp->texture = texture;
    samp->cache = tex_cache;
-   samp->xpot = util_unsigned_logbase2( texture->width[0] );
-   samp->ypot = util_unsigned_logbase2( texture->height[0] );
+   samp->xpot = util_unsigned_logbase2( texture->width0 );
+   samp->ypot = util_unsigned_logbase2( texture->height0 );
    samp->level = CLAMP((int) sampler->min_lod, 0, (int) texture->last_level);
 }
 
diff --git a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
index 407a22a9f4b..e50a76a73bc 100644
--- a/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
+++ b/src/gallium/drivers/softpipe/sp_tex_tile_cache.c
@@ -35,6 +35,7 @@
 #include "pipe/p_inlines.h"
 #include "util/u_memory.h"
 #include "util/u_tile.h"
+#include "util/u_math.h"
 #include "sp_context.h"
 #include "sp_surface.h"
 #include "sp_texture.h"
@@ -246,9 +247,9 @@ sp_find_cached_tile_tex(struct softpipe_tex_tile_cache *tc,
                                      addr.bits.level, 
                                      addr.bits.z, 
                                      PIPE_TRANSFER_READ, 0, 0,
-                                     tc->texture->width[addr.bits.level],
-                                     tc->texture->height[addr.bits.level]);
-
+                                     u_minify(tc->texture->width0, addr.bits.level),
+                                     u_minify(tc->texture->height0, addr.bits.level));
+         
          tc->tex_trans_map = screen->transfer_map(screen, tc->tex_trans);
 
          tc->tex_face = addr.bits.face;
diff --git a/src/gallium/drivers/softpipe/sp_texture.c b/src/gallium/drivers/softpipe/sp_texture.c
index 7caf2928b4b..ac5f61e46f4 100644
--- a/src/gallium/drivers/softpipe/sp_texture.c
+++ b/src/gallium/drivers/softpipe/sp_texture.c
@@ -52,16 +52,17 @@ softpipe_texture_layout(struct pipe_screen *screen,
 {
    struct pipe_texture *pt = &spt->base;
    unsigned level;
-   unsigned width = pt->width[0];
-   unsigned height = pt->height[0];
-   unsigned depth = pt->depth[0];
+   unsigned width = pt->width0;
+   unsigned height = pt->height0;
+   unsigned depth = pt->depth0;
 
    unsigned buffer_size = 0;
 
+   pt->width0 = width;
+   pt->height0 = height;
+   pt->depth0 = depth;
+
    for (level = 0; level <= pt->last_level; level++) {
-      pt->width[level] = width;
-      pt->height[level] = height;
-      pt->depth[level] = depth;
       pt->nblocksx[level] = pf_get_nblocksx(&pt->block, width);  
       pt->nblocksy[level] = pf_get_nblocksy(&pt->block, height);  
       spt->stride[level] = pt->nblocksx[level]*pt->block.size;
@@ -72,9 +73,9 @@ softpipe_texture_layout(struct pipe_screen *screen,
                       ((pt->target == PIPE_TEXTURE_CUBE) ? 6 : depth) *
                       spt->stride[level]);
 
-      width  = minify(width);
-      height = minify(height);
-      depth = minify(depth);
+      width  = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
    }
 
    spt->buffer = screen->buffer_create(screen, 32,
@@ -96,12 +97,12 @@ softpipe_displaytarget_layout(struct pipe_screen *screen,
                      PIPE_BUFFER_USAGE_GPU_READ_WRITE);
    unsigned tex_usage = spt->base.tex_usage;
 
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);  
+   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
+   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
 
    spt->buffer = screen->surface_buffer_create( screen, 
-                                                spt->base.width[0], 
-                                                spt->base.height[0],
+                                                spt->base.width0, 
+                                                spt->base.height0,
                                                 spt->base.format,
                                                 usage,
                                                 tex_usage,
@@ -126,9 +127,9 @@ softpipe_texture_create(struct pipe_screen *screen,
    pipe_reference_init(&spt->base.reference, 1);
    spt->base.screen = screen;
 
-   spt->pot = (util_is_power_of_two(template->width[0]) &&
-               util_is_power_of_two(template->height[0]) &&
-               util_is_power_of_two(template->depth[0]));
+   spt->pot = (util_is_power_of_two(template->width0) &&
+               util_is_power_of_two(template->height0) &&
+               util_is_power_of_two(template->depth0));
 
    if (spt->base.tex_usage & (PIPE_TEXTURE_USAGE_DISPLAY_TARGET |
                               PIPE_TEXTURE_USAGE_PRIMARY)) {
@@ -163,7 +164,7 @@ softpipe_texture_blanket(struct pipe_screen * screen,
    /* Only supports one type */
    if (base->target != PIPE_TEXTURE_2D ||
        base->last_level != 0 ||
-       base->depth[0] != 1) {
+       base->depth0 != 1) {
       return NULL;
    }
 
@@ -174,8 +175,8 @@ softpipe_texture_blanket(struct pipe_screen * screen,
    spt->base = *base;
    pipe_reference_init(&spt->base.reference, 1);
    spt->base.screen = screen;
-   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width[0]);  
-   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height[0]);  
+   spt->base.nblocksx[0] = pf_get_nblocksx(&spt->base.block, spt->base.width0);  
+   spt->base.nblocksy[0] = pf_get_nblocksy(&spt->base.block, spt->base.height0);  
    spt->stride[0] = stride[0];
 
    pipe_buffer_reference(&spt->buffer, buffer);
@@ -213,8 +214,8 @@ softpipe_get_tex_surface(struct pipe_screen *screen,
       pipe_reference_init(&ps->reference, 1);
       pipe_texture_reference(&ps->texture, pt);
       ps->format = pt->format;
-      ps->width = pt->width[level];
-      ps->height = pt->height[level];
+      ps->width = u_minify(pt->width0, level);
+      ps->height = u_minify(pt->height0, level);
       ps->offset = spt->level_offset[level];
       ps->usage = usage;
 
@@ -434,9 +435,9 @@ softpipe_video_surface_create(struct pipe_screen *screen,
    template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
    template.last_level = 0;
    /* vl_mpeg12_mc_renderer expects this when it's initialized with pot_buffers=true */
-   template.width[0] = util_next_power_of_two(width);
-   template.height[0] = util_next_power_of_two(height);
-   template.depth[0] = 1;
+   template.width0 = util_next_power_of_two(width);
+   template.height0 = util_next_power_of_two(height);
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER | PIPE_TEXTURE_USAGE_RENDER_TARGET;
 
diff --git a/src/gallium/drivers/svga/Makefile b/src/gallium/drivers/svga/Makefile
new file mode 100644
index 00000000000..38b63394e38
--- /dev/null
+++ b/src/gallium/drivers/svga/Makefile
@@ -0,0 +1,57 @@
+TOP = ../../../..
+include $(TOP)/configs/current
+
+LIBNAME = svga
+
+C_SOURCES = \
+	svgadump/svga_shader_dump.c \
+	svgadump/svga_shader_op.c \
+	svgadump/svga_dump.c \
+	svga_cmd.c \
+	svga_context.c \
+	svga_draw.c \
+	svga_draw_arrays.c \
+	svga_draw_elements.c \
+	svga_pipe_blend.c \
+	svga_pipe_blit.c \
+	svga_pipe_clear.c \
+	svga_pipe_constants.c \
+	svga_pipe_depthstencil.c \
+	svga_pipe_draw.c \
+	svga_pipe_flush.c \
+	svga_pipe_fs.c \
+	svga_pipe_misc.c \
+	svga_pipe_query.c \
+	svga_pipe_rasterizer.c \
+	svga_pipe_sampler.c \
+	svga_pipe_vertex.c \
+	svga_pipe_vs.c \
+	svga_screen.c \
+	svga_screen_buffer.c \
+	svga_screen_texture.c \
+	svga_screen_cache.c \
+	svga_state.c \
+	svga_state_need_swtnl.c \
+	svga_state_constants.c \
+	svga_state_framebuffer.c \
+	svga_state_rss.c \
+	svga_state_tss.c \
+	svga_state_vdecl.c \
+	svga_state_fs.c \
+	svga_state_vs.c \
+	svga_swtnl_backend.c \
+	svga_swtnl_draw.c \
+	svga_swtnl_state.c \
+	svga_tgsi.c \
+	svga_tgsi_decl_sm20.c \
+	svga_tgsi_decl_sm30.c \
+	svga_tgsi_insn.c
+
+LIBRARY_INCLUDES = \
+	-I$(TOP)/src/gallium/drivers/svga/include
+
+LIBRARY_DEFINES = \
+	-std=gnu99 -fvisibility=hidden \
+	-DHAVE_STDINT_H -DHAVE_SYS_TYPES_H
+
+include ../../Makefile.template
diff --git a/src/gallium/drivers/svga/SConscript b/src/gallium/drivers/svga/SConscript
new file mode 100644
index 00000000000..737b791ceb0
--- /dev/null
+++ b/src/gallium/drivers/svga/SConscript
@@ -0,0 +1,72 @@
+Import('*')
+
+env = env.Clone()
+
+if env['platform'] in ['linux']:
+	env.Append(CCFLAGS = ['-fvisibility=hidden'])
+
+if env['gcc']:
+	env.Append(CPPDEFINES = [
+		'HAVE_STDINT_H', 
+		'HAVE_SYS_TYPES_H',
+	])
+	
+env.Prepend(CPPPATH = [
+	'include',
+])
+
+env.Append(CPPDEFINES = [
+])
+
+sources = [
+    'svga_cmd.c',
+    'svga_context.c',
+    'svga_draw.c',
+    'svga_draw_arrays.c',
+    'svga_draw_elements.c',
+    'svga_pipe_blend.c',
+    'svga_pipe_blit.c',
+    'svga_pipe_clear.c',
+    'svga_pipe_constants.c',
+    'svga_pipe_depthstencil.c',
+    'svga_pipe_draw.c',
+    'svga_pipe_flush.c',
+    'svga_pipe_fs.c',
+    'svga_pipe_misc.c',
+    'svga_pipe_query.c',
+    'svga_pipe_rasterizer.c',
+    'svga_pipe_sampler.c',
+    'svga_pipe_vertex.c',
+    'svga_pipe_vs.c',
+    'svga_screen.c',
+    'svga_screen_buffer.c',
+    'svga_screen_cache.c',
+    'svga_screen_texture.c',
+    'svga_state.c',
+    'svga_state_constants.c',
+    'svga_state_framebuffer.c',
+    'svga_state_need_swtnl.c',
+    'svga_state_rss.c',
+    'svga_state_tss.c',
+    'svga_state_vdecl.c',
+    'svga_state_fs.c',
+    'svga_state_vs.c',
+    'svga_swtnl_backend.c',
+    'svga_swtnl_draw.c',
+    'svga_swtnl_state.c',
+    'svga_tgsi.c',
+    'svga_tgsi_decl_sm20.c',
+    'svga_tgsi_decl_sm30.c',
+    'svga_tgsi_insn.c',
+    
+    'svgadump/svga_dump.c',
+    'svgadump/svga_shader_dump.c',
+    'svgadump/svga_shader_op.c',
+]
+
+svga = env.ConvenienceLibrary(
+	target = 'svga',
+	source = sources,
+)
+
+Export('svga')
diff --git a/src/gallium/drivers/svga/include/README b/src/gallium/drivers/svga/include/README
new file mode 100644
index 00000000000..a0b8916104e
--- /dev/null
+++ b/src/gallium/drivers/svga/include/README
@@ -0,0 +1,3 @@
+This directory contains the headers from the VMware SVGA Device Developer Kit:
+
+   https://vmware-svga.svn.sourceforge.net/svnroot/vmware-svga/trunk/lib/vmware/
diff --git a/src/gallium/drivers/svga/include/svga3d_caps.h b/src/gallium/drivers/svga/include/svga3d_caps.h
new file mode 100644
index 00000000000..714ce9f45fb
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_caps.h
@@ -0,0 +1,139 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_caps.h --
+ *
+ *       Definitions for SVGA3D hardware capabilities.  Capabilities
+ *       are used to query for optional rendering features during
+ *       driver initialization. The capability data is stored as very
+ *       basic key/value dictionary within the "FIFO register" memory
+ *       area at the beginning of BAR2.
+ *
+ *       Note that these definitions are only for 3D capabilities.
+ *       The SVGA device also has "device capabilities" and "FIFO
+ *       capabilities", which are non-3D-specific and are stored as
+ *       bitfields rather than key/value pairs.
+ */
+
+#ifndef _SVGA3D_CAPS_H_
+#define _SVGA3D_CAPS_H_
+
+#define SVGA_FIFO_3D_CAPS_SIZE   (SVGA_FIFO_3D_CAPS_LAST - \
+                                  SVGA_FIFO_3D_CAPS + 1)
+
+
+/*
+ * SVGA3dCapsRecordType
+ *
+ *    Record types that can be found in the caps block.
+ *    Related record types are grouped together numerically so that
+ *    SVGA3dCaps_FindRecord() can be applied on a range of record
+ *    types.
+ */
+
+typedef enum {
+   SVGA3DCAPS_RECORD_UNKNOWN        = 0,
+   SVGA3DCAPS_RECORD_DEVCAPS_MIN    = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS        = 0x100,
+   SVGA3DCAPS_RECORD_DEVCAPS_MAX    = 0x1ff,
+} SVGA3dCapsRecordType;
+
+
+/*
+ * SVGA3dCapsRecordHeader
+ *
+ *    Header field leading each caps block record. Contains the offset (in
+ *    register words, NOT bytes) to the next caps block record (or the end
+ *    of caps block records which will be a zero word) and the record type
+ *    as defined above.
+ */
+
+typedef
+struct SVGA3dCapsRecordHeader {
+   uint32 length;
+   SVGA3dCapsRecordType type;
+}
+SVGA3dCapsRecordHeader;
+
+
+/*
+ * SVGA3dCapsRecord
+ *
+ *    Caps block record; "data" is a placeholder for the actual data structure
+ *    contained within the record; for example a record containing a FOOBAR
+ *    structure would be of size "sizeof(SVGA3dCapsRecordHeader) +
+ *    sizeof(FOOBAR)".
+ */
+
+typedef
+struct SVGA3dCapsRecord {
+   SVGA3dCapsRecordHeader header;
+   uint32 data[1];
+}
+SVGA3dCapsRecord;
+
+
+typedef uint32 SVGA3dCapPair[2];
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dCaps_FindRecord
+ *
+ *    Finds the record with the highest-valued type within the given range
+ *    in the caps block.
+ *
+ *    Result: pointer to found record, or NULL if not found.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dCapsRecord *
+SVGA3dCaps_FindRecord(const uint32 *capsBlock,
+                      SVGA3dCapsRecordType recordTypeMin,
+                      SVGA3dCapsRecordType recordTypeMax)
+{
+   SVGA3dCapsRecord *record, *found = NULL;
+   uint32 offset;
+
+   /*
+    * Search linearly through the caps block records for the specified type.
+    */
+   for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+      record = (SVGA3dCapsRecord *) (capsBlock + offset);
+      if ((record->header.type >= recordTypeMin) &&
+          (record->header.type <= recordTypeMax) &&
+          (!found || (record->header.type > found->header.type))) {
+         found = record;
+      }
+   }
+
+   return found;
+}
+
+
+#endif // _SVGA3D_CAPS_H_
diff --git a/src/gallium/drivers/svga/include/svga3d_reg.h b/src/gallium/drivers/svga/include/svga3d_reg.h
new file mode 100644
index 00000000000..77cb4533100
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_reg.h
@@ -0,0 +1,1793 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_reg.h --
+ *
+ *       SVGA 3D hardware definitions
+ */
+
+#ifndef _SVGA3D_REG_H_
+#define _SVGA3D_REG_H_
+
+#include "svga_reg.h"
+
+
+/*
+ * 3D Hardware Version
+ *
+ *   The hardware version is stored in the SVGA_FIFO_3D_HWVERSION fifo
+ *   register.   Is set by the host and read by the guest.  This lets
+ *   us make new guest drivers which are backwards-compatible with old
+ *   SVGA hardware revisions.  It does not let us support old guest
+ *   drivers.  Good enough for now.
+ *
+ */
+
+#define SVGA3D_MAKE_HWVERSION(major, minor)      (((major) << 16) | ((minor) & 0xFF))
+#define SVGA3D_MAJOR_HWVERSION(version)          ((version) >> 16)
+#define SVGA3D_MINOR_HWVERSION(version)          ((version) & 0xFF)
+
+typedef enum {
+   SVGA3D_HWVERSION_WS5_RC1   = SVGA3D_MAKE_HWVERSION(0, 1),
+   SVGA3D_HWVERSION_WS5_RC2   = SVGA3D_MAKE_HWVERSION(0, 2),
+   SVGA3D_HWVERSION_WS51_RC1  = SVGA3D_MAKE_HWVERSION(0, 3),
+   SVGA3D_HWVERSION_WS6_B1    = SVGA3D_MAKE_HWVERSION(1, 1),
+   SVGA3D_HWVERSION_FUSION_11 = SVGA3D_MAKE_HWVERSION(1, 4),
+   SVGA3D_HWVERSION_WS65_B1   = SVGA3D_MAKE_HWVERSION(2, 0),
+   SVGA3D_HWVERSION_CURRENT   = SVGA3D_HWVERSION_WS65_B1,
+} SVGA3dHardwareVersion;
+
+/*
+ * Generic Types
+ */
+
+typedef uint32 SVGA3dBool; /* 32-bit Bool definition */
+#define SVGA3D_NUM_CLIPPLANES                   6
+#define SVGA3D_MAX_SIMULTANEOUS_RENDER_TARGETS  8
+
+
+/*
+ * Surface formats.
+ *
+ * If you modify this list, be sure to keep GLUtil.c in sync. It
+ * includes the internal format definition of each surface in
+ * GLUtil_ConvertSurfaceFormat, and it contains a table of
+ * human-readable names in GLUtil_GetFormatName.
+ */
+
+typedef enum SVGA3dSurfaceFormat {
+   SVGA3D_FORMAT_INVALID = 0,
+
+   SVGA3D_X8R8G8B8       = 1,
+   SVGA3D_A8R8G8B8       = 2,
+
+   SVGA3D_R5G6B5         = 3,
+   SVGA3D_X1R5G5B5       = 4,
+   SVGA3D_A1R5G5B5       = 5,
+   SVGA3D_A4R4G4B4       = 6,
+
+   SVGA3D_Z_D32          = 7,
+   SVGA3D_Z_D16          = 8,
+   SVGA3D_Z_D24S8        = 9,
+   SVGA3D_Z_D15S1        = 10,
+
+   SVGA3D_LUMINANCE8            = 11,
+   SVGA3D_LUMINANCE4_ALPHA4     = 12,
+   SVGA3D_LUMINANCE16           = 13,
+   SVGA3D_LUMINANCE8_ALPHA8     = 14,
+
+   SVGA3D_DXT1           = 15,
+   SVGA3D_DXT2           = 16,
+   SVGA3D_DXT3           = 17,
+   SVGA3D_DXT4           = 18,
+   SVGA3D_DXT5           = 19,
+
+   SVGA3D_BUMPU8V8       = 20,
+   SVGA3D_BUMPL6V5U5     = 21,
+   SVGA3D_BUMPX8L8V8U8   = 22,
+   SVGA3D_BUMPL8V8U8     = 23,
+
+   SVGA3D_ARGB_S10E5     = 24,   /* 16-bit floating-point ARGB */
+   SVGA3D_ARGB_S23E8     = 25,   /* 32-bit floating-point ARGB */
+
+   SVGA3D_A2R10G10B10    = 26,
+
+   /* signed formats */
+   SVGA3D_V8U8           = 27,
+   SVGA3D_Q8W8V8U8       = 28,
+   SVGA3D_CxV8U8         = 29,
+
+   /* mixed formats */
+   SVGA3D_X8L8V8U8       = 30,
+   SVGA3D_A2W10V10U10    = 31,
+
+   SVGA3D_ALPHA8         = 32,
+
+   /* Single- and dual-component floating point formats */
+   SVGA3D_R_S10E5        = 33,
+   SVGA3D_R_S23E8        = 34,
+   SVGA3D_RG_S10E5       = 35,
+   SVGA3D_RG_S23E8       = 36,
+
+   /*
+    * Any surface can be used as a buffer object, but SVGA3D_BUFFER is
+    * the most efficient format to use when creating new surfaces
+    * expressly for index or vertex data.
+    */
+   SVGA3D_BUFFER         = 37,
+
+   SVGA3D_Z_D24X8        = 38,
+
+   SVGA3D_V16U16         = 39,
+
+   SVGA3D_G16R16         = 40,
+   SVGA3D_A16B16G16R16   = 41,
+
+   /* Packed Video formats */
+   SVGA3D_UYVY           = 42,
+   SVGA3D_YUY2           = 43,
+
+   SVGA3D_FORMAT_MAX
+} SVGA3dSurfaceFormat;
+
+typedef uint32 SVGA3dColor; /* a, r, g, b */
+
+/*
+ * These match the D3DFORMAT_OP definitions used by Direct3D. We need
+ * them so that we can query the host for what the supported surface
+ * operations are (when we're using the D3D backend, in particular),
+ * and so we can send those operations to the guest.
+ */
+typedef enum {
+   SVGA3DFORMAT_OP_TEXTURE                               = 0x00000001,
+   SVGA3DFORMAT_OP_VOLUMETEXTURE                         = 0x00000002,
+   SVGA3DFORMAT_OP_CUBETEXTURE                           = 0x00000004,
+   SVGA3DFORMAT_OP_OFFSCREEN_RENDERTARGET                = 0x00000008,
+   SVGA3DFORMAT_OP_SAME_FORMAT_RENDERTARGET              = 0x00000010,
+   SVGA3DFORMAT_OP_ZSTENCIL                              = 0x00000040,
+   SVGA3DFORMAT_OP_ZSTENCIL_WITH_ARBITRARY_COLOR_DEPTH   = 0x00000080,
+
+/*
+ * This format can be used as a render target if the current display mode
+ * is the same depth if the alpha channel is ignored. e.g. if the device
+ * can render to A8R8G8B8 when the display mode is X8R8G8B8, then the
+ * format op list entry for A8R8G8B8 should have this cap.
+ */
+   SVGA3DFORMAT_OP_SAME_FORMAT_UP_TO_ALPHA_RENDERTARGET  = 0x00000100,
+
+/*
+ * This format contains DirectDraw support (including Flip).  This flag
+ * should not to be set on alpha formats.
+ */
+   SVGA3DFORMAT_OP_DISPLAYMODE                           = 0x00000400,
+
+/*
+ * The rasterizer can support some level of Direct3D support in this format
+ * and implies that the driver can create a Context in this mode (for some
+ * render target format).  When this flag is set, the SVGA3DFORMAT_OP_DISPLAYMODE
+ * flag must also be set.
+ */
+   SVGA3DFORMAT_OP_3DACCELERATION                        = 0x00000800,
+
+/*
+ * This is set for a private format when the driver has put the bpp in
+ * the structure.
+ */
+   SVGA3DFORMAT_OP_PIXELSIZE                             = 0x00001000,
+
+/*
+ * Indicates that this format can be converted to any RGB format for which
+ * SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB is specified
+ */
+   SVGA3DFORMAT_OP_CONVERT_TO_ARGB                       = 0x00002000,
+
+/*
+ * Indicates that this format can be used to create offscreen plain surfaces.
+ */
+   SVGA3DFORMAT_OP_OFFSCREENPLAIN                        = 0x00004000,
+
+/*
+ * Indicated that this format can be read as an SRGB texture (meaning that the
+ * sampler will linearize the looked up data)
+ */
+   SVGA3DFORMAT_OP_SRGBREAD                              = 0x00008000,
+
+/*
+ * Indicates that this format can be used in the bumpmap instructions
+ */
+   SVGA3DFORMAT_OP_BUMPMAP                               = 0x00010000,
+
+/*
+ * Indicates that this format can be sampled by the displacement map sampler
+ */
+   SVGA3DFORMAT_OP_DMAP                                  = 0x00020000,
+
+/*
+ * Indicates that this format cannot be used with texture filtering
+ */
+   SVGA3DFORMAT_OP_NOFILTER                              = 0x00040000,
+
+/*
+ * Indicates that format conversions are supported to this RGB format if
+ * SVGA3DFORMAT_OP_CONVERT_TO_ARGB is specified in the source format.
+ */
+   SVGA3DFORMAT_OP_MEMBEROFGROUP_ARGB                    = 0x00080000,
+
+/*
+ * Indicated that this format can be written as an SRGB target (meaning that the
+ * pixel pipe will DE-linearize data on output to format)
+ */
+   SVGA3DFORMAT_OP_SRGBWRITE                             = 0x00100000,
+
+/*
+ * Indicates that this format cannot be used with alpha blending
+ */
+   SVGA3DFORMAT_OP_NOALPHABLEND                          = 0x00200000,
+
+/*
+ * Indicates that the device can auto-generated sublevels for resources
+ * of this format
+ */
+   SVGA3DFORMAT_OP_AUTOGENMIPMAP                         = 0x00400000,
+
+/*
+ * Indicates that this format can be used by vertex texture sampler
+ */
+   SVGA3DFORMAT_OP_VERTEXTEXTURE                         = 0x00800000,
+
+/*
+ * Indicates that this format supports neither texture coordinate wrap
+ * modes, nor mipmapping
+ */
+   SVGA3DFORMAT_OP_NOTEXCOORDWRAPNORMIP                  = 0x01000000
+} SVGA3dFormatOp;
+
+/*
+ * This structure is a conversion of SVGA3DFORMAT_OP_*.
+ * Entries must be located at the same position.
+ */
+typedef union {
+   uint32 value;
+   struct {
+      uint32 texture : 1;
+      uint32 volumeTexture : 1;
+      uint32 cubeTexture : 1;
+      uint32 offscreenRenderTarget : 1;
+      uint32 sameFormatRenderTarget : 1;
+      uint32 unknown1 : 1;
+      uint32 zStencil : 1;
+      uint32 zStencilArbitraryDepth : 1;
+      uint32 sameFormatUpToAlpha : 1;
+      uint32 unknown2 : 1;
+      uint32 displayMode : 1;
+      uint32 acceleration3d : 1;
+      uint32 pixelSize : 1;
+      uint32 convertToARGB : 1;
+      uint32 offscreenPlain : 1;
+      uint32 sRGBRead : 1;
+      uint32 bumpMap : 1;
+      uint32 dmap : 1;
+      uint32 noFilter : 1;
+      uint32 memberOfGroupARGB : 1;
+      uint32 sRGBWrite : 1;
+      uint32 noAlphaBlend : 1;
+      uint32 autoGenMipMap : 1;
+      uint32 vertexTexture : 1;
+      uint32 noTexCoordWrapNorMip : 1;
+   };
+} SVGA3dSurfaceFormatCaps;
+
+/*
+ * SVGA_3D_CMD_SETRENDERSTATE Types.  All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+   SVGA3D_RS_INVALID                   = 0,
+   SVGA3D_RS_ZENABLE                   = 1,     /* SVGA3dBool */
+   SVGA3D_RS_ZWRITEENABLE              = 2,     /* SVGA3dBool */
+   SVGA3D_RS_ALPHATESTENABLE           = 3,     /* SVGA3dBool */
+   SVGA3D_RS_DITHERENABLE              = 4,     /* SVGA3dBool */
+   SVGA3D_RS_BLENDENABLE               = 5,     /* SVGA3dBool */
+   SVGA3D_RS_FOGENABLE                 = 6,     /* SVGA3dBool */
+   SVGA3D_RS_SPECULARENABLE            = 7,     /* SVGA3dBool */
+   SVGA3D_RS_STENCILENABLE             = 8,     /* SVGA3dBool */
+   SVGA3D_RS_LIGHTINGENABLE            = 9,     /* SVGA3dBool */
+   SVGA3D_RS_NORMALIZENORMALS          = 10,    /* SVGA3dBool */
+   SVGA3D_RS_POINTSPRITEENABLE         = 11,    /* SVGA3dBool */
+   SVGA3D_RS_POINTSCALEENABLE          = 12,    /* SVGA3dBool */
+   SVGA3D_RS_STENCILREF                = 13,    /* uint32 */
+   SVGA3D_RS_STENCILMASK               = 14,    /* uint32 */
+   SVGA3D_RS_STENCILWRITEMASK          = 15,    /* uint32 */
+   SVGA3D_RS_FOGSTART                  = 16,    /* float */
+   SVGA3D_RS_FOGEND                    = 17,    /* float */
+   SVGA3D_RS_FOGDENSITY                = 18,    /* float */
+   SVGA3D_RS_POINTSIZE                 = 19,    /* float */
+   SVGA3D_RS_POINTSIZEMIN              = 20,    /* float */
+   SVGA3D_RS_POINTSIZEMAX              = 21,    /* float */
+   SVGA3D_RS_POINTSCALE_A              = 22,    /* float */
+   SVGA3D_RS_POINTSCALE_B              = 23,    /* float */
+   SVGA3D_RS_POINTSCALE_C              = 24,    /* float */
+   SVGA3D_RS_FOGCOLOR                  = 25,    /* SVGA3dColor */
+   SVGA3D_RS_AMBIENT                   = 26,    /* SVGA3dColor */
+   SVGA3D_RS_CLIPPLANEENABLE           = 27,    /* SVGA3dClipPlanes */
+   SVGA3D_RS_FOGMODE                   = 28,    /* SVGA3dFogMode */
+   SVGA3D_RS_FILLMODE                  = 29,    /* SVGA3dFillMode */
+   SVGA3D_RS_SHADEMODE                 = 30,    /* SVGA3dShadeMode */
+   SVGA3D_RS_LINEPATTERN               = 31,    /* SVGA3dLinePattern */
+   SVGA3D_RS_SRCBLEND                  = 32,    /* SVGA3dBlendOp */
+   SVGA3D_RS_DSTBLEND                  = 33,    /* SVGA3dBlendOp */
+   SVGA3D_RS_BLENDEQUATION             = 34,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_CULLMODE                  = 35,    /* SVGA3dFace */
+   SVGA3D_RS_ZFUNC                     = 36,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_ALPHAFUNC                 = 37,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_STENCILFUNC               = 38,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_STENCILFAIL               = 39,    /* SVGA3dStencilOp */
+   SVGA3D_RS_STENCILZFAIL              = 40,    /* SVGA3dStencilOp */
+   SVGA3D_RS_STENCILPASS               = 41,    /* SVGA3dStencilOp */
+   SVGA3D_RS_ALPHAREF                  = 42,    /* float (0.0 .. 1.0) */
+   SVGA3D_RS_FRONTWINDING              = 43,    /* SVGA3dFrontWinding */
+   SVGA3D_RS_COORDINATETYPE            = 44,    /* SVGA3dCoordinateType */
+   SVGA3D_RS_ZBIAS                     = 45,    /* float */
+   SVGA3D_RS_RANGEFOGENABLE            = 46,    /* SVGA3dBool */
+   SVGA3D_RS_COLORWRITEENABLE          = 47,    /* SVGA3dColorMask */
+   SVGA3D_RS_VERTEXMATERIALENABLE      = 48,    /* SVGA3dBool */
+   SVGA3D_RS_DIFFUSEMATERIALSOURCE     = 49,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_SPECULARMATERIALSOURCE    = 50,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_AMBIENTMATERIALSOURCE     = 51,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_EMISSIVEMATERIALSOURCE    = 52,    /* SVGA3dVertexMaterial */
+   SVGA3D_RS_TEXTUREFACTOR             = 53,    /* SVGA3dColor */
+   SVGA3D_RS_LOCALVIEWER               = 54,    /* SVGA3dBool */
+   SVGA3D_RS_SCISSORTESTENABLE         = 55,    /* SVGA3dBool */
+   SVGA3D_RS_BLENDCOLOR                = 56,    /* SVGA3dColor */
+   SVGA3D_RS_STENCILENABLE2SIDED       = 57,    /* SVGA3dBool */
+   SVGA3D_RS_CCWSTENCILFUNC            = 58,    /* SVGA3dCmpFunc */
+   SVGA3D_RS_CCWSTENCILFAIL            = 59,    /* SVGA3dStencilOp */
+   SVGA3D_RS_CCWSTENCILZFAIL           = 60,    /* SVGA3dStencilOp */
+   SVGA3D_RS_CCWSTENCILPASS            = 61,    /* SVGA3dStencilOp */
+   SVGA3D_RS_VERTEXBLEND               = 62,    /* SVGA3dVertexBlendFlags */
+   SVGA3D_RS_SLOPESCALEDEPTHBIAS       = 63,    /* float */
+   SVGA3D_RS_DEPTHBIAS                 = 64,    /* float */
+
+
+   /*
+    * Output Gamma Level
+    *
+    * Output gamma effects the gamma curve of colors that are output from the
+    * rendering pipeline.  A value of 1.0 specifies a linear color space. If the
+    * value is <= 0.0, gamma correction is ignored and linear color space is
+    * used.
+    */
+
+   SVGA3D_RS_OUTPUTGAMMA               = 65,    /* float */
+   SVGA3D_RS_ZVISIBLE                  = 66,    /* SVGA3dBool */
+   SVGA3D_RS_LASTPIXEL                 = 67,    /* SVGA3dBool */
+   SVGA3D_RS_CLIPPING                  = 68,    /* SVGA3dBool */
+   SVGA3D_RS_WRAP0                     = 69,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP1                     = 70,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP2                     = 71,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP3                     = 72,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP4                     = 73,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP5                     = 74,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP6                     = 75,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP7                     = 76,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP8                     = 77,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP9                     = 78,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP10                    = 79,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP11                    = 80,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP12                    = 81,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP13                    = 82,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP14                    = 83,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_WRAP15                    = 84,    /* SVGA3dWrapFlags */
+   SVGA3D_RS_MULTISAMPLEANTIALIAS      = 85,    /* SVGA3dBool */
+   SVGA3D_RS_MULTISAMPLEMASK           = 86,    /* uint32 */
+   SVGA3D_RS_INDEXEDVERTEXBLENDENABLE  = 87,    /* SVGA3dBool */
+   SVGA3D_RS_TWEENFACTOR               = 88,    /* float */
+   SVGA3D_RS_ANTIALIASEDLINEENABLE     = 89,    /* SVGA3dBool */
+   SVGA3D_RS_COLORWRITEENABLE1         = 90,    /* SVGA3dColorMask */
+   SVGA3D_RS_COLORWRITEENABLE2         = 91,    /* SVGA3dColorMask */
+   SVGA3D_RS_COLORWRITEENABLE3         = 92,    /* SVGA3dColorMask */
+   SVGA3D_RS_SEPARATEALPHABLENDENABLE  = 93,    /* SVGA3dBool */
+   SVGA3D_RS_SRCBLENDALPHA             = 94,    /* SVGA3dBlendOp */
+   SVGA3D_RS_DSTBLENDALPHA             = 95,    /* SVGA3dBlendOp */
+   SVGA3D_RS_BLENDEQUATIONALPHA        = 96,    /* SVGA3dBlendEquation */
+   SVGA3D_RS_MAX
+} SVGA3dRenderStateName;
+
+typedef enum {
+   SVGA3D_VERTEXMATERIAL_NONE     = 0,    /* Use the value in the current material */
+   SVGA3D_VERTEXMATERIAL_DIFFUSE  = 1,    /* Use the value in the diffuse component */
+   SVGA3D_VERTEXMATERIAL_SPECULAR = 2,    /* Use the value in the specular component */
+} SVGA3dVertexMaterial;
+
+typedef enum {
+   SVGA3D_FILLMODE_INVALID = 0,
+   SVGA3D_FILLMODE_POINT   = 1,
+   SVGA3D_FILLMODE_LINE    = 2,
+   SVGA3D_FILLMODE_FILL    = 3,
+   SVGA3D_FILLMODE_MAX
+} SVGA3dFillModeType;
+
+
+typedef
+union {
+   struct {
+      uint16   mode;       /* SVGA3dFillModeType */
+      uint16   face;       /* SVGA3dFace */
+   };
+   uint32 uintValue;
+} SVGA3dFillMode;
+
+typedef enum {
+   SVGA3D_SHADEMODE_INVALID = 0,
+   SVGA3D_SHADEMODE_FLAT    = 1,
+   SVGA3D_SHADEMODE_SMOOTH  = 2,
+   SVGA3D_SHADEMODE_PHONG   = 3,     /* Not supported */
+   SVGA3D_SHADEMODE_MAX
+} SVGA3dShadeMode;
+
+typedef
+union {
+   struct {
+      uint16 repeat;
+      uint16 pattern;
+   };
+   uint32 uintValue;
+} SVGA3dLinePattern;
+
+typedef enum {
+   SVGA3D_BLENDOP_INVALID            = 0,
+   SVGA3D_BLENDOP_ZERO               = 1,
+   SVGA3D_BLENDOP_ONE                = 2,
+   SVGA3D_BLENDOP_SRCCOLOR           = 3,
+   SVGA3D_BLENDOP_INVSRCCOLOR        = 4,
+   SVGA3D_BLENDOP_SRCALPHA           = 5,
+   SVGA3D_BLENDOP_INVSRCALPHA        = 6,
+   SVGA3D_BLENDOP_DESTALPHA          = 7,
+   SVGA3D_BLENDOP_INVDESTALPHA       = 8,
+   SVGA3D_BLENDOP_DESTCOLOR          = 9,
+   SVGA3D_BLENDOP_INVDESTCOLOR       = 10,
+   SVGA3D_BLENDOP_SRCALPHASAT        = 11,
+   SVGA3D_BLENDOP_BLENDFACTOR        = 12,
+   SVGA3D_BLENDOP_INVBLENDFACTOR     = 13,
+   SVGA3D_BLENDOP_MAX
+} SVGA3dBlendOp;
+
+typedef enum {
+   SVGA3D_BLENDEQ_INVALID            = 0,
+   SVGA3D_BLENDEQ_ADD                = 1,
+   SVGA3D_BLENDEQ_SUBTRACT           = 2,
+   SVGA3D_BLENDEQ_REVSUBTRACT        = 3,
+   SVGA3D_BLENDEQ_MINIMUM            = 4,
+   SVGA3D_BLENDEQ_MAXIMUM            = 5,
+   SVGA3D_BLENDEQ_MAX
+} SVGA3dBlendEquation;
+
+typedef enum {
+   SVGA3D_FRONTWINDING_INVALID = 0,
+   SVGA3D_FRONTWINDING_CW      = 1,
+   SVGA3D_FRONTWINDING_CCW     = 2,
+   SVGA3D_FRONTWINDING_MAX
+} SVGA3dFrontWinding;
+
+typedef enum {
+   SVGA3D_FACE_INVALID  = 0,
+   SVGA3D_FACE_NONE     = 1,
+   SVGA3D_FACE_FRONT    = 2,
+   SVGA3D_FACE_BACK     = 3,
+   SVGA3D_FACE_FRONT_BACK = 4,
+   SVGA3D_FACE_MAX
+} SVGA3dFace;
+
+/*
+ * The order and the values should not be changed
+ */
+
+typedef enum {
+   SVGA3D_CMP_INVALID              = 0,
+   SVGA3D_CMP_NEVER                = 1,
+   SVGA3D_CMP_LESS                 = 2,
+   SVGA3D_CMP_EQUAL                = 3,
+   SVGA3D_CMP_LESSEQUAL            = 4,
+   SVGA3D_CMP_GREATER              = 5,
+   SVGA3D_CMP_NOTEQUAL             = 6,
+   SVGA3D_CMP_GREATEREQUAL         = 7,
+   SVGA3D_CMP_ALWAYS               = 8,
+   SVGA3D_CMP_MAX
+} SVGA3dCmpFunc;
+
+/*
+ * SVGA3D_FOGFUNC_* specifies the fog equation, or PER_VERTEX which allows
+ * the fog factor to be specified in the alpha component of the specular
+ * (a.k.a. secondary) vertex color.
+ */
+typedef enum {
+   SVGA3D_FOGFUNC_INVALID          = 0,
+   SVGA3D_FOGFUNC_EXP              = 1,
+   SVGA3D_FOGFUNC_EXP2             = 2,
+   SVGA3D_FOGFUNC_LINEAR           = 3,
+   SVGA3D_FOGFUNC_PER_VERTEX       = 4
+} SVGA3dFogFunction;
+
+/*
+ * SVGA3D_FOGTYPE_* specifies if fog factors are computed on a per-vertex
+ * or per-pixel basis.
+ */
+typedef enum {
+   SVGA3D_FOGTYPE_INVALID          = 0,
+   SVGA3D_FOGTYPE_VERTEX           = 1,
+   SVGA3D_FOGTYPE_PIXEL            = 2,
+   SVGA3D_FOGTYPE_MAX              = 3
+} SVGA3dFogType;
+
+/*
+ * SVGA3D_FOGBASE_* selects depth or range-based fog. Depth-based fog is
+ * computed using the eye Z value of each pixel (or vertex), whereas range-
+ * based fog is computed using the actual distance (range) to the eye.
+ */
+typedef enum {
+   SVGA3D_FOGBASE_INVALID          = 0,
+   SVGA3D_FOGBASE_DEPTHBASED       = 1,
+   SVGA3D_FOGBASE_RANGEBASED       = 2,
+   SVGA3D_FOGBASE_MAX              = 3
+} SVGA3dFogBase;
+
+typedef enum {
+   SVGA3D_STENCILOP_INVALID        = 0,
+   SVGA3D_STENCILOP_KEEP           = 1,
+   SVGA3D_STENCILOP_ZERO           = 2,
+   SVGA3D_STENCILOP_REPLACE        = 3,
+   SVGA3D_STENCILOP_INCRSAT        = 4,
+   SVGA3D_STENCILOP_DECRSAT        = 5,
+   SVGA3D_STENCILOP_INVERT         = 6,
+   SVGA3D_STENCILOP_INCR           = 7,
+   SVGA3D_STENCILOP_DECR           = 8,
+   SVGA3D_STENCILOP_MAX
+} SVGA3dStencilOp;
+
+typedef enum {
+   SVGA3D_CLIPPLANE_0              = (1 << 0),
+   SVGA3D_CLIPPLANE_1              = (1 << 1),
+   SVGA3D_CLIPPLANE_2              = (1 << 2),
+   SVGA3D_CLIPPLANE_3              = (1 << 3),
+   SVGA3D_CLIPPLANE_4              = (1 << 4),
+   SVGA3D_CLIPPLANE_5              = (1 << 5),
+} SVGA3dClipPlanes;
+
+typedef enum {
+   SVGA3D_CLEAR_COLOR              = 0x1,
+   SVGA3D_CLEAR_DEPTH              = 0x2,
+   SVGA3D_CLEAR_STENCIL            = 0x4
+} SVGA3dClearFlag;
+
+typedef enum {
+   SVGA3D_RT_DEPTH                 = 0,
+   SVGA3D_RT_STENCIL               = 1,
+   SVGA3D_RT_COLOR0                = 2,
+   SVGA3D_RT_COLOR1                = 3,
+   SVGA3D_RT_COLOR2                = 4,
+   SVGA3D_RT_COLOR3                = 5,
+   SVGA3D_RT_COLOR4                = 6,
+   SVGA3D_RT_COLOR5                = 7,
+   SVGA3D_RT_COLOR6                = 8,
+   SVGA3D_RT_COLOR7                = 9,
+   SVGA3D_RT_MAX,
+   SVGA3D_RT_INVALID               = ((uint32)-1),
+} SVGA3dRenderTargetType;
+
+#define SVGA3D_MAX_RT_COLOR (SVGA3D_RT_COLOR7 - SVGA3D_RT_COLOR0 + 1)
+
+typedef
+union {
+   struct {
+      uint32  red   : 1;
+      uint32  green : 1;
+      uint32  blue  : 1;
+      uint32  alpha : 1;
+   };
+   uint32 uintValue;
+} SVGA3dColorMask;
+
+typedef enum {
+   SVGA3D_VBLEND_DISABLE            = 0,
+   SVGA3D_VBLEND_1WEIGHT            = 1,
+   SVGA3D_VBLEND_2WEIGHT            = 2,
+   SVGA3D_VBLEND_3WEIGHT            = 3,
+} SVGA3dVertexBlendFlags;
+
+typedef enum {
+   SVGA3D_WRAPCOORD_0   = 1 << 0,
+   SVGA3D_WRAPCOORD_1   = 1 << 1,
+   SVGA3D_WRAPCOORD_2   = 1 << 2,
+   SVGA3D_WRAPCOORD_3   = 1 << 3,
+   SVGA3D_WRAPCOORD_ALL = 0xF,
+} SVGA3dWrapFlags;
+
+/*
+ * SVGA_3D_CMD_TEXTURESTATE Types.  All value types
+ * must fit in a uint32.
+ */
+
+typedef enum {
+   SVGA3D_TS_INVALID                    = 0,
+   SVGA3D_TS_BIND_TEXTURE               = 1,    /* SVGA3dSurfaceId */
+   SVGA3D_TS_COLOROP                    = 2,    /* SVGA3dTextureCombiner */
+   SVGA3D_TS_COLORARG1                  = 3,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_COLORARG2                  = 4,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAOP                    = 5,    /* SVGA3dTextureCombiner */
+   SVGA3D_TS_ALPHAARG1                  = 6,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAARG2                  = 7,    /* SVGA3dTextureArgData */
+   SVGA3D_TS_ADDRESSU                   = 8,    /* SVGA3dTextureAddress */
+   SVGA3D_TS_ADDRESSV                   = 9,    /* SVGA3dTextureAddress */
+   SVGA3D_TS_MIPFILTER                  = 10,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_MAGFILTER                  = 11,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_MINFILTER                  = 12,   /* SVGA3dTextureFilter */
+   SVGA3D_TS_BORDERCOLOR                = 13,   /* SVGA3dColor */
+   SVGA3D_TS_TEXCOORDINDEX              = 14,   /* uint32 */
+   SVGA3D_TS_TEXTURETRANSFORMFLAGS      = 15,   /* SVGA3dTexTransformFlags */
+   SVGA3D_TS_TEXCOORDGEN                = 16,   /* SVGA3dTextureCoordGen */
+   SVGA3D_TS_BUMPENVMAT00               = 17,   /* float */
+   SVGA3D_TS_BUMPENVMAT01               = 18,   /* float */
+   SVGA3D_TS_BUMPENVMAT10               = 19,   /* float */
+   SVGA3D_TS_BUMPENVMAT11               = 20,   /* float */
+   SVGA3D_TS_TEXTURE_MIPMAP_LEVEL       = 21,   /* uint32 */
+   SVGA3D_TS_TEXTURE_LOD_BIAS           = 22,   /* float */
+   SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL  = 23,   /* uint32 */
+   SVGA3D_TS_ADDRESSW                   = 24,   /* SVGA3dTextureAddress */
+
+
+   /*
+    * Sampler Gamma Level
+    *
+    * Sampler gamma effects the color of samples taken from the sampler.  A
+    * value of 1.0 will produce linear samples.  If the value is <= 0.0 the
+    * gamma value is ignored and a linear space is used.
+    */
+
+   SVGA3D_TS_GAMMA                      = 25,   /* float */
+   SVGA3D_TS_BUMPENVLSCALE              = 26,   /* float */
+   SVGA3D_TS_BUMPENVLOFFSET             = 27,   /* float */
+   SVGA3D_TS_COLORARG0                  = 28,   /* SVGA3dTextureArgData */
+   SVGA3D_TS_ALPHAARG0                  = 29,   /* SVGA3dTextureArgData */
+   SVGA3D_TS_MAX
+} SVGA3dTextureStateName;
+
+typedef enum {
+   SVGA3D_TC_INVALID                   = 0,
+   SVGA3D_TC_DISABLE                   = 1,
+   SVGA3D_TC_SELECTARG1                = 2,
+   SVGA3D_TC_SELECTARG2                = 3,
+   SVGA3D_TC_MODULATE                  = 4,
+   SVGA3D_TC_ADD                       = 5,
+   SVGA3D_TC_ADDSIGNED                 = 6,
+   SVGA3D_TC_SUBTRACT                  = 7,
+   SVGA3D_TC_BLENDTEXTUREALPHA         = 8,
+   SVGA3D_TC_BLENDDIFFUSEALPHA         = 9,
+   SVGA3D_TC_BLENDCURRENTALPHA         = 10,
+   SVGA3D_TC_BLENDFACTORALPHA          = 11,
+   SVGA3D_TC_MODULATE2X                = 12,
+   SVGA3D_TC_MODULATE4X                = 13,
+   SVGA3D_TC_DSDT                      = 14,
+   SVGA3D_TC_DOTPRODUCT3               = 15,
+   SVGA3D_TC_BLENDTEXTUREALPHAPM       = 16,
+   SVGA3D_TC_ADDSIGNED2X               = 17,
+   SVGA3D_TC_ADDSMOOTH                 = 18,
+   SVGA3D_TC_PREMODULATE               = 19,
+   SVGA3D_TC_MODULATEALPHA_ADDCOLOR    = 20,
+   SVGA3D_TC_MODULATECOLOR_ADDALPHA    = 21,
+   SVGA3D_TC_MODULATEINVALPHA_ADDCOLOR = 22,
+   SVGA3D_TC_MODULATEINVCOLOR_ADDALPHA = 23,
+   SVGA3D_TC_BUMPENVMAPLUMINANCE       = 24,
+   SVGA3D_TC_MULTIPLYADD               = 25,
+   SVGA3D_TC_LERP                      = 26,
+   SVGA3D_TC_MAX
+} SVGA3dTextureCombiner;
+
+#define SVGA3D_TC_CAP_BIT(svga3d_tc_op) (svga3d_tc_op ? (1 << (svga3d_tc_op - 1)) : 0)
+
+typedef enum {
+   SVGA3D_TEX_ADDRESS_INVALID    = 0,
+   SVGA3D_TEX_ADDRESS_WRAP       = 1,
+   SVGA3D_TEX_ADDRESS_MIRROR     = 2,
+   SVGA3D_TEX_ADDRESS_CLAMP      = 3,
+   SVGA3D_TEX_ADDRESS_BORDER     = 4,
+   SVGA3D_TEX_ADDRESS_MIRRORONCE = 5,
+   SVGA3D_TEX_ADDRESS_EDGE       = 6,
+   SVGA3D_TEX_ADDRESS_MAX
+} SVGA3dTextureAddress;
+
+/*
+ * SVGA3D_TEX_FILTER_NONE as the minification filter means mipmapping is
+ * disabled, and the rasterizer should use the magnification filter instead.
+ */
+typedef enum {
+   SVGA3D_TEX_FILTER_NONE           = 0,
+   SVGA3D_TEX_FILTER_NEAREST        = 1,
+   SVGA3D_TEX_FILTER_LINEAR         = 2,
+   SVGA3D_TEX_FILTER_ANISOTROPIC    = 3,
+   SVGA3D_TEX_FILTER_FLATCUBIC      = 4, // Deprecated, not implemented
+   SVGA3D_TEX_FILTER_GAUSSIANCUBIC  = 5, // Deprecated, not implemented
+   SVGA3D_TEX_FILTER_PYRAMIDALQUAD  = 6, // Not currently implemented
+   SVGA3D_TEX_FILTER_GAUSSIANQUAD   = 7, // Not currently implemented
+   SVGA3D_TEX_FILTER_MAX
+} SVGA3dTextureFilter;
+
+typedef enum {
+   SVGA3D_TEX_TRANSFORM_OFF    = 0,
+   SVGA3D_TEX_TRANSFORM_S      = (1 << 0),
+   SVGA3D_TEX_TRANSFORM_T      = (1 << 1),
+   SVGA3D_TEX_TRANSFORM_R      = (1 << 2),
+   SVGA3D_TEX_TRANSFORM_Q      = (1 << 3),
+   SVGA3D_TEX_PROJECTED        = (1 << 15),
+} SVGA3dTexTransformFlags;
+
+typedef enum {
+   SVGA3D_TEXCOORD_GEN_OFF              = 0,
+   SVGA3D_TEXCOORD_GEN_EYE_POSITION     = 1,
+   SVGA3D_TEXCOORD_GEN_EYE_NORMAL       = 2,
+   SVGA3D_TEXCOORD_GEN_REFLECTIONVECTOR = 3,
+   SVGA3D_TEXCOORD_GEN_SPHERE           = 4,
+   SVGA3D_TEXCOORD_GEN_MAX
+} SVGA3dTextureCoordGen;
+
+/*
+ * Texture argument constants for texture combiner
+ */
+typedef enum {
+   SVGA3D_TA_INVALID    = 0,
+   SVGA3D_TA_CONSTANT   = 1,
+   SVGA3D_TA_PREVIOUS   = 2,
+   SVGA3D_TA_DIFFUSE    = 3,
+   SVGA3D_TA_TEXTURE    = 4,
+   SVGA3D_TA_SPECULAR   = 5,
+   SVGA3D_TA_MAX
+} SVGA3dTextureArgData;
+
+#define SVGA3D_TM_MASK_LEN 4
+
+/* Modifiers for texture argument constants defined above. */
+typedef enum {
+   SVGA3D_TM_NONE       = 0,
+   SVGA3D_TM_ALPHA      = (1 << SVGA3D_TM_MASK_LEN),
+   SVGA3D_TM_ONE_MINUS  = (2 << SVGA3D_TM_MASK_LEN),
+} SVGA3dTextureArgModifier;
+
+#define SVGA3D_INVALID_ID         ((uint32)-1)
+#define SVGA3D_MAX_CLIP_PLANES    6
+
+/*
+ * This is the limit to the number of fixed-function texture
+ * transforms and texture coordinates we can support. It does *not*
+ * correspond to the number of texture image units (samplers) we
+ * support!
+ */
+#define SVGA3D_MAX_TEXTURE_COORDS 8
+
+/*
+ * Vertex declarations
+ *
+ * Notes:
+ *
+ * SVGA3D_DECLUSAGE_POSITIONT is for pre-transformed vertices. If you
+ * draw with any POSITIONT vertex arrays, the programmable vertex
+ * pipeline will be implicitly disabled. Drawing will take place as if
+ * no vertex shader was bound.
+ */
+
+typedef enum {
+   SVGA3D_DECLUSAGE_POSITION     = 0,
+   SVGA3D_DECLUSAGE_BLENDWEIGHT,       //  1
+   SVGA3D_DECLUSAGE_BLENDINDICES,      //  2
+   SVGA3D_DECLUSAGE_NORMAL,            //  3
+   SVGA3D_DECLUSAGE_PSIZE,             //  4
+   SVGA3D_DECLUSAGE_TEXCOORD,          //  5
+   SVGA3D_DECLUSAGE_TANGENT,           //  6
+   SVGA3D_DECLUSAGE_BINORMAL,          //  7
+   SVGA3D_DECLUSAGE_TESSFACTOR,        //  8
+   SVGA3D_DECLUSAGE_POSITIONT,         //  9
+   SVGA3D_DECLUSAGE_COLOR,             // 10
+   SVGA3D_DECLUSAGE_FOG,               // 11
+   SVGA3D_DECLUSAGE_DEPTH,             // 12
+   SVGA3D_DECLUSAGE_SAMPLE,            // 13
+   SVGA3D_DECLUSAGE_MAX
+} SVGA3dDeclUsage;
+
+typedef enum {
+   SVGA3D_DECLMETHOD_DEFAULT     = 0,
+   SVGA3D_DECLMETHOD_PARTIALU,
+   SVGA3D_DECLMETHOD_PARTIALV,
+   SVGA3D_DECLMETHOD_CROSSUV,          // Normal
+   SVGA3D_DECLMETHOD_UV,
+   SVGA3D_DECLMETHOD_LOOKUP,           // Lookup a displacement map
+   SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED, // Lookup a pre-sampled displacement map
+} SVGA3dDeclMethod;
+
+typedef enum {
+   SVGA3D_DECLTYPE_FLOAT1        =  0,
+   SVGA3D_DECLTYPE_FLOAT2        =  1,
+   SVGA3D_DECLTYPE_FLOAT3        =  2,
+   SVGA3D_DECLTYPE_FLOAT4        =  3,
+   SVGA3D_DECLTYPE_D3DCOLOR      =  4,
+   SVGA3D_DECLTYPE_UBYTE4        =  5,
+   SVGA3D_DECLTYPE_SHORT2        =  6,
+   SVGA3D_DECLTYPE_SHORT4        =  7,
+   SVGA3D_DECLTYPE_UBYTE4N       =  8,
+   SVGA3D_DECLTYPE_SHORT2N       =  9,
+   SVGA3D_DECLTYPE_SHORT4N       = 10,
+   SVGA3D_DECLTYPE_USHORT2N      = 11,
+   SVGA3D_DECLTYPE_USHORT4N      = 12,
+   SVGA3D_DECLTYPE_UDEC3         = 13,
+   SVGA3D_DECLTYPE_DEC3N         = 14,
+   SVGA3D_DECLTYPE_FLOAT16_2     = 15,
+   SVGA3D_DECLTYPE_FLOAT16_4     = 16,
+   SVGA3D_DECLTYPE_MAX,
+} SVGA3dDeclType;
+
+/*
+ * This structure is used for the divisor for geometry instancing;
+ * it's a direct translation of the Direct3D equivalent.
+ */
+typedef union {
+   struct {
+      /*
+       * For index data, this number represents the number of instances to draw.
+       * For instance data, this number represents the number of
+       * instances/vertex in this stream
+       */
+      uint32 count : 30;
+
+      /*
+       * This is 1 if this is supposed to be the data that is repeated for
+       * every instance.
+       */
+      uint32 indexedData : 1;
+
+      /*
+       * This is 1 if this is supposed to be the per-instance data.
+       */
+      uint32 instanceData : 1;
+   };
+
+   uint32 value;
+} SVGA3dVertexDivisor;
+
+typedef enum {
+   SVGA3D_PRIMITIVE_INVALID                     = 0,
+   SVGA3D_PRIMITIVE_TRIANGLELIST                = 1,
+   SVGA3D_PRIMITIVE_POINTLIST                   = 2,
+   SVGA3D_PRIMITIVE_LINELIST                    = 3,
+   SVGA3D_PRIMITIVE_LINESTRIP                   = 4,
+   SVGA3D_PRIMITIVE_TRIANGLESTRIP               = 5,
+   SVGA3D_PRIMITIVE_TRIANGLEFAN                 = 6,
+   SVGA3D_PRIMITIVE_MAX
+} SVGA3dPrimitiveType;
+
+typedef enum {
+   SVGA3D_COORDINATE_INVALID                   = 0,
+   SVGA3D_COORDINATE_LEFTHANDED                = 1,
+   SVGA3D_COORDINATE_RIGHTHANDED               = 2,
+   SVGA3D_COORDINATE_MAX
+} SVGA3dCoordinateType;
+
+typedef enum {
+   SVGA3D_TRANSFORM_INVALID                     = 0,
+   SVGA3D_TRANSFORM_WORLD                       = 1,
+   SVGA3D_TRANSFORM_VIEW                        = 2,
+   SVGA3D_TRANSFORM_PROJECTION                  = 3,
+   SVGA3D_TRANSFORM_TEXTURE0                    = 4,
+   SVGA3D_TRANSFORM_TEXTURE1                    = 5,
+   SVGA3D_TRANSFORM_TEXTURE2                    = 6,
+   SVGA3D_TRANSFORM_TEXTURE3                    = 7,
+   SVGA3D_TRANSFORM_TEXTURE4                    = 8,
+   SVGA3D_TRANSFORM_TEXTURE5                    = 9,
+   SVGA3D_TRANSFORM_TEXTURE6                    = 10,
+   SVGA3D_TRANSFORM_TEXTURE7                    = 11,
+   SVGA3D_TRANSFORM_WORLD1                      = 12,
+   SVGA3D_TRANSFORM_WORLD2                      = 13,
+   SVGA3D_TRANSFORM_WORLD3                      = 14,
+   SVGA3D_TRANSFORM_MAX
+} SVGA3dTransformType;
+
+typedef enum {
+   SVGA3D_LIGHTTYPE_INVALID                     = 0,
+   SVGA3D_LIGHTTYPE_POINT                       = 1,
+   SVGA3D_LIGHTTYPE_SPOT1                       = 2, /* 1-cone, in degrees */
+   SVGA3D_LIGHTTYPE_SPOT2                       = 3, /* 2-cone, in radians */
+   SVGA3D_LIGHTTYPE_DIRECTIONAL                 = 4,
+   SVGA3D_LIGHTTYPE_MAX
+} SVGA3dLightType;
+
+typedef enum {
+   SVGA3D_CUBEFACE_POSX                         = 0,
+   SVGA3D_CUBEFACE_NEGX                         = 1,
+   SVGA3D_CUBEFACE_POSY                         = 2,
+   SVGA3D_CUBEFACE_NEGY                         = 3,
+   SVGA3D_CUBEFACE_POSZ                         = 4,
+   SVGA3D_CUBEFACE_NEGZ                         = 5,
+} SVGA3dCubeFace;
+
+typedef enum {
+   SVGA3D_SHADERTYPE_COMPILED_DX8               = 0,
+   SVGA3D_SHADERTYPE_VS                         = 1,
+   SVGA3D_SHADERTYPE_PS                         = 2,
+   SVGA3D_SHADERTYPE_MAX
+} SVGA3dShaderType;
+
+typedef enum {
+   SVGA3D_CONST_TYPE_FLOAT                      = 0,
+   SVGA3D_CONST_TYPE_INT                        = 1,
+   SVGA3D_CONST_TYPE_BOOL                       = 2,
+} SVGA3dShaderConstType;
+
+#define SVGA3D_MAX_SURFACE_FACES                6
+
+typedef enum {
+   SVGA3D_STRETCH_BLT_POINT                     = 0,
+   SVGA3D_STRETCH_BLT_LINEAR                    = 1,
+   SVGA3D_STRETCH_BLT_MAX
+} SVGA3dStretchBltMode;
+
+typedef enum {
+   SVGA3D_QUERYTYPE_OCCLUSION                   = 0,
+   SVGA3D_QUERYTYPE_MAX
+} SVGA3dQueryType;
+
+typedef enum {
+   SVGA3D_QUERYSTATE_PENDING     = 0,      /* Waiting on the host (set by guest) */
+   SVGA3D_QUERYSTATE_SUCCEEDED   = 1,      /* Completed successfully (set by host) */
+   SVGA3D_QUERYSTATE_FAILED      = 2,      /* Completed unsuccessfully (set by host) */
+   SVGA3D_QUERYSTATE_NEW         = 3,      /* Never submitted (For guest use only) */
+} SVGA3dQueryState;
+
+typedef enum {
+   SVGA3D_WRITE_HOST_VRAM        = 1,
+   SVGA3D_READ_HOST_VRAM         = 2,
+} SVGA3dTransferType;
+
+/*
+ * The maximum number vertex arrays we're guaranteed to support in
+ * SVGA_3D_CMD_DRAWPRIMITIVES.
+ */
+#define SVGA3D_MAX_VERTEX_ARRAYS   32
+
+/*
+ * Identifiers for commands in the command FIFO.
+ *
+ * IDs between 1000 and 1039 (inclusive) were used by obsolete versions of
+ * the SVGA3D protocol and remain reserved; they should not be used in the
+ * future.
+ *
+ * IDs between 1040 and 1999 (inclusive) are available for use by the
+ * current SVGA3D protocol.
+ *
+ * FIFO clients other than SVGA3D should stay below 1000, or at 2000
+ * and up.
+ */
+
+#define SVGA_3D_CMD_LEGACY_BASE            1000
+#define SVGA_3D_CMD_BASE                   1040
+
+#define SVGA_3D_CMD_SURFACE_DEFINE         SVGA_3D_CMD_BASE + 0
+#define SVGA_3D_CMD_SURFACE_DESTROY        SVGA_3D_CMD_BASE + 1
+#define SVGA_3D_CMD_SURFACE_COPY           SVGA_3D_CMD_BASE + 2
+#define SVGA_3D_CMD_SURFACE_STRETCHBLT     SVGA_3D_CMD_BASE + 3
+#define SVGA_3D_CMD_SURFACE_DMA            SVGA_3D_CMD_BASE + 4
+#define SVGA_3D_CMD_CONTEXT_DEFINE         SVGA_3D_CMD_BASE + 5
+#define SVGA_3D_CMD_CONTEXT_DESTROY        SVGA_3D_CMD_BASE + 6
+#define SVGA_3D_CMD_SETTRANSFORM           SVGA_3D_CMD_BASE + 7
+#define SVGA_3D_CMD_SETZRANGE              SVGA_3D_CMD_BASE + 8
+#define SVGA_3D_CMD_SETRENDERSTATE         SVGA_3D_CMD_BASE + 9
+#define SVGA_3D_CMD_SETRENDERTARGET        SVGA_3D_CMD_BASE + 10
+#define SVGA_3D_CMD_SETTEXTURESTATE        SVGA_3D_CMD_BASE + 11
+#define SVGA_3D_CMD_SETMATERIAL            SVGA_3D_CMD_BASE + 12
+#define SVGA_3D_CMD_SETLIGHTDATA           SVGA_3D_CMD_BASE + 13
+#define SVGA_3D_CMD_SETLIGHTENABLED        SVGA_3D_CMD_BASE + 14
+#define SVGA_3D_CMD_SETVIEWPORT            SVGA_3D_CMD_BASE + 15
+#define SVGA_3D_CMD_SETCLIPPLANE           SVGA_3D_CMD_BASE + 16
+#define SVGA_3D_CMD_CLEAR                  SVGA_3D_CMD_BASE + 17
+#define SVGA_3D_CMD_PRESENT                SVGA_3D_CMD_BASE + 18    // Deprecated
+#define SVGA_3D_CMD_SHADER_DEFINE          SVGA_3D_CMD_BASE + 19
+#define SVGA_3D_CMD_SHADER_DESTROY         SVGA_3D_CMD_BASE + 20
+#define SVGA_3D_CMD_SET_SHADER             SVGA_3D_CMD_BASE + 21
+#define SVGA_3D_CMD_SET_SHADER_CONST       SVGA_3D_CMD_BASE + 22
+#define SVGA_3D_CMD_DRAW_PRIMITIVES        SVGA_3D_CMD_BASE + 23
+#define SVGA_3D_CMD_SETSCISSORRECT         SVGA_3D_CMD_BASE + 24
+#define SVGA_3D_CMD_BEGIN_QUERY            SVGA_3D_CMD_BASE + 25
+#define SVGA_3D_CMD_END_QUERY              SVGA_3D_CMD_BASE + 26
+#define SVGA_3D_CMD_WAIT_FOR_QUERY         SVGA_3D_CMD_BASE + 27
+#define SVGA_3D_CMD_PRESENT_READBACK       SVGA_3D_CMD_BASE + 28    // Deprecated
+#define SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN SVGA_3D_CMD_BASE + 29
+#define SVGA_3D_CMD_MAX                    SVGA_3D_CMD_BASE + 30
+
+#define SVGA_3D_CMD_FUTURE_MAX             2000
+
+/*
+ * Common substructures used in multiple FIFO commands:
+ */
+
+typedef struct {
+   union {
+      struct {
+         uint16  function;       // SVGA3dFogFunction
+         uint8   type;           // SVGA3dFogType
+         uint8   base;           // SVGA3dFogBase
+      };
+      uint32     uintValue;
+   };
+} SVGA3dFogMode;
+
+/*
+ * Uniquely identify one image (a 1D/2D/3D array) from a surface. This
+ * is a surface ID as well as face/mipmap indices.
+ */
+
+typedef
+struct SVGA3dSurfaceImageId {
+   uint32               sid;
+   uint32               face;
+   uint32               mipmap;
+} SVGA3dSurfaceImageId;
+
+typedef
+struct SVGA3dGuestImage {
+   SVGAGuestPtr         ptr;
+
+   /*
+    * A note on interpretation of pitch: This value of pitch is the
+    * number of bytes between vertically adjacent image
+    * blocks. Normally this is the number of bytes between the first
+    * pixel of two adjacent scanlines. With compressed textures,
+    * however, this may represent the number of bytes between
+    * compression blocks rather than between rows of pixels.
+    *
+    * XXX: Compressed textures currently must be tightly packed in guest memory.
+    *
+    * If the image is 1-dimensional, pitch is ignored.
+    *
+    * If 'pitch' is zero, the SVGA3D device calculates a pitch value
+    * assuming each row of blocks is tightly packed.
+    */
+   uint32 pitch;
+} SVGA3dGuestImage;
+
+
+/*
+ * FIFO command format definitions:
+ */
+
+/*
+ * The data size header following cmdNum for every 3d command
+ */
+typedef
+struct {
+   uint32               id;
+   uint32               size;
+} SVGA3dCmdHeader;
+
+/*
+ * A surface is a hierarchy of host VRAM surfaces: 1D, 2D, or 3D, with
+ * optional mipmaps and cube faces.
+ */
+
+typedef
+struct {
+   uint32               width;
+   uint32               height;
+   uint32               depth;
+} SVGA3dSize;
+
+typedef enum {
+   SVGA3D_SURFACE_CUBEMAP              = (1 << 0),
+   SVGA3D_SURFACE_HINT_STATIC          = (1 << 1),
+   SVGA3D_SURFACE_HINT_DYNAMIC         = (1 << 2),
+   SVGA3D_SURFACE_HINT_INDEXBUFFER     = (1 << 3),
+   SVGA3D_SURFACE_HINT_VERTEXBUFFER    = (1 << 4),
+   SVGA3D_SURFACE_HINT_TEXTURE         = (1 << 5),
+   SVGA3D_SURFACE_HINT_RENDERTARGET    = (1 << 6),
+   SVGA3D_SURFACE_HINT_DEPTHSTENCIL    = (1 << 7),
+   SVGA3D_SURFACE_HINT_WRITEONLY       = (1 << 8),
+} SVGA3dSurfaceFlags;
+
+typedef
+struct {
+   uint32               numMipLevels;
+} SVGA3dSurfaceFace;
+
+typedef
+struct {
+   uint32                      sid;
+   SVGA3dSurfaceFlags          surfaceFlags;
+   SVGA3dSurfaceFormat         format;
+   SVGA3dSurfaceFace           face[SVGA3D_MAX_SURFACE_FACES];
+   /*
+    * Followed by an SVGA3dSize structure for each mip level in each face.
+    *
+    * A note on surface sizes: Sizes are always specified in pixels,
+    * even if the true surface size is not a multiple of the minimum
+    * block size of the surface's format. For example, a 3x3x1 DXT1
+    * compressed texture would actually be stored as a 4x4x1 image in
+    * memory.
+    */
+} SVGA3dCmdDefineSurface;       /* SVGA_3D_CMD_SURFACE_DEFINE */
+
+typedef
+struct {
+   uint32               sid;
+} SVGA3dCmdDestroySurface;      /* SVGA_3D_CMD_SURFACE_DESTROY */
+
+typedef
+struct {
+   uint32               cid;
+} SVGA3dCmdDefineContext;       /* SVGA_3D_CMD_CONTEXT_DEFINE */
+
+typedef
+struct {
+   uint32               cid;
+} SVGA3dCmdDestroyContext;      /* SVGA_3D_CMD_CONTEXT_DESTROY */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dClearFlag      clearFlag;
+   uint32               color;
+   float                depth;
+   uint32               stencil;
+   /* Followed by variable number of SVGA3dRect structures */
+} SVGA3dCmdClear;               /* SVGA_3D_CMD_CLEAR */
+
+typedef
+struct SVGA3dCopyRect {
+   uint32               x;
+   uint32               y;
+   uint32               w;
+   uint32               h;
+   uint32               srcx;
+   uint32               srcy;
+} SVGA3dCopyRect;
+
+typedef
+struct SVGA3dCopyBox {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+   uint32               w;
+   uint32               h;
+   uint32               d;
+   uint32               srcx;
+   uint32               srcy;
+   uint32               srcz;
+} SVGA3dCopyBox;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               w;
+   uint32               h;
+} SVGA3dRect;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+   uint32               w;
+   uint32               h;
+   uint32               d;
+} SVGA3dBox;
+
+typedef
+struct {
+   uint32               x;
+   uint32               y;
+   uint32               z;
+} SVGA3dPoint;
+
+typedef
+struct {
+   SVGA3dLightType      type;
+   SVGA3dBool           inWorldSpace;
+   float                diffuse[4];
+   float                specular[4];
+   float                ambient[4];
+   float                position[4];
+   float                direction[4];
+   float                range;
+   float                falloff;
+   float                attenuation0;
+   float                attenuation1;
+   float                attenuation2;
+   float                theta;
+   float                phi;
+} SVGA3dLightData;
+
+typedef
+struct {
+   uint32               sid;
+   /* Followed by variable number of SVGA3dCopyRect structures */
+} SVGA3dCmdPresent;             /* SVGA_3D_CMD_PRESENT */
+
+typedef
+struct {
+   SVGA3dRenderStateName   state;
+   union {
+      uint32               uintValue;
+      float                floatValue;
+   };
+} SVGA3dRenderState;
+
+typedef
+struct {
+   uint32               cid;
+   /* Followed by variable number of SVGA3dRenderState structures */
+} SVGA3dCmdSetRenderState;      /* SVGA_3D_CMD_SETRENDERSTATE */
+
+typedef
+struct {
+   uint32                 cid;
+   SVGA3dRenderTargetType type;
+   SVGA3dSurfaceImageId   target;
+} SVGA3dCmdSetRenderTarget;     /* SVGA_3D_CMD_SETRENDERTARGET */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId  src;
+   SVGA3dSurfaceImageId  dest;
+   /* Followed by variable number of SVGA3dCopyBox structures */
+} SVGA3dCmdSurfaceCopy;               /* SVGA_3D_CMD_SURFACE_COPY */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId  src;
+   SVGA3dSurfaceImageId  dest;
+   SVGA3dBox             boxSrc;
+   SVGA3dBox             boxDest;
+   SVGA3dStretchBltMode  mode;
+} SVGA3dCmdSurfaceStretchBlt;         /* SVGA_3D_CMD_SURFACE_STRETCHBLT */
+
+typedef
+struct {
+   /*
+    * If the discard flag is present in a surface DMA operation, the host may
+    * discard the contents of the current mipmap level and face of the target
+    * surface before applying the surface DMA contents.
+    */
+   uint32 discard : 1;
+
+   /*
+    * If the unsynchronized flag is present, the host may perform this upload
+    * without syncing to pending reads on this surface.
+    */
+   uint32 unsynchronized : 1;
+
+   /*
+    * Guests *MUST* set the reserved bits to 0 before submitting the command
+    * suffix as future flags may occupy these bits.
+    */
+   uint32 reserved : 30;
+} SVGA3dSurfaceDMAFlags;
+
+typedef
+struct {
+   SVGA3dGuestImage      guest;
+   SVGA3dSurfaceImageId  host;
+   SVGA3dTransferType    transfer;
+   /*
+    * Followed by variable number of SVGA3dCopyBox structures. For consistency
+    * in all clipping logic and coordinate translation, we define the
+    * "source" in each copyBox as the guest image and the
+    * "destination" as the host image, regardless of transfer
+    * direction.
+    *
+    * For efficiency, the SVGA3D device is free to copy more data than
+    * specified. For example, it may round copy boxes outwards such
+    * that they lie on particular alignment boundaries.
+    */
+} SVGA3dCmdSurfaceDMA;                /* SVGA_3D_CMD_SURFACE_DMA */
+
+/*
+ * SVGA3dCmdSurfaceDMASuffix --
+ *
+ *    This is a command suffix that will appear after a SurfaceDMA command in
+ *    the FIFO.  It contains some extra information that hosts may use to
+ *    optimize performance or protect the guest.  This suffix exists to preserve
+ *    backwards compatibility while also allowing for new functionality to be
+ *    implemented.
+ */
+
+typedef
+struct {
+   uint32 suffixSize;
+
+   /*
+    * The maximum offset is used to determine the maximum offset from the
+    * guestPtr base address that will be accessed or written to during this
+    * surfaceDMA.  If the suffix is supported, the host will respect this
+    * boundary while performing surface DMAs.
+    *
+    * Defaults to MAX_UINT32
+    */
+   uint32 maximumOffset;
+
+   /*
+    * A set of flags that describes optimizations that the host may perform
+    * while performing this surface DMA operation.  The guest should never rely
+    * on behaviour that is different when these flags are set for correctness.
+    *
+    * Defaults to 0
+    */
+   SVGA3dSurfaceDMAFlags flags;
+} SVGA3dCmdSurfaceDMASuffix;
+
+/*
+ * SVGA_3D_CMD_DRAW_PRIMITIVES --
+ *
+ *   This command is the SVGA3D device's generic drawing entry point.
+ *   It can draw multiple ranges of primitives, optionally using an
+ *   index buffer, using an arbitrary collection of vertex buffers.
+ *
+ *   Each SVGA3dVertexDecl defines a distinct vertex array to bind
+ *   during this draw call. The declarations specify which surface
+ *   the vertex data lives in, what that vertex data is used for,
+ *   and how to interpret it.
+ *
+ *   Each SVGA3dPrimitiveRange defines a collection of primitives
+ *   to render using the same vertex arrays. An index buffer is
+ *   optional.
+ */
+
+typedef
+struct {
+   /*
+    * A range hint is an optional specification for the range of indices
+    * in an SVGA3dArray that will be used. If 'last' is zero, it is assumed
+    * that the entire array will be used.
+    *
+    * These are only hints. The SVGA3D device may use them for
+    * performance optimization if possible, but it's also allowed to
+    * ignore these values.
+    */
+   uint32               first;
+   uint32               last;
+} SVGA3dArrayRangeHint;
+
+typedef
+struct {
+   /*
+    * Define the origin and shape of a vertex or index array. Both
+    * 'offset' and 'stride' are in bytes. The provided surface will be
+    * reinterpreted as a flat array of bytes in the same format used
+    * by surface DMA operations. To avoid unnecessary conversions, the
+    * surface should be created with the SVGA3D_BUFFER format.
+    *
+    * Index 0 in the array starts 'offset' bytes into the surface.
+    * Index 1 begins at byte 'offset + stride', etc. Array indices may
+    * not be negative.
+    */
+   uint32               surfaceId;
+   uint32               offset;
+   uint32               stride;
+} SVGA3dArray;
+
+typedef
+struct {
+   /*
+    * Describe a vertex array's data type, and define how it is to be
+    * used by the fixed function pipeline or the vertex shader. It
+    * isn't useful to have two VertexDecls with the same
+    * VertexArrayIdentity in one draw call.
+    */
+   SVGA3dDeclType       type;
+   SVGA3dDeclMethod     method;
+   SVGA3dDeclUsage      usage;
+   uint32               usageIndex;
+} SVGA3dVertexArrayIdentity;
+
+typedef
+struct {
+   SVGA3dVertexArrayIdentity  identity;
+   SVGA3dArray                array;
+   SVGA3dArrayRangeHint       rangeHint;
+} SVGA3dVertexDecl;
+
+typedef
+struct {
+   /*
+    * Define a group of primitives to render, from sequential indices.
+    *
+    * The value of 'primitiveType' and 'primitiveCount' imply the
+    * total number of vertices that will be rendered.
+    */
+   SVGA3dPrimitiveType  primType;
+   uint32               primitiveCount;
+
+   /*
+    * Optional index buffer. If indexArray.surfaceId is
+    * SVGA3D_INVALID_ID, we render without an index buffer. Rendering
+    * without an index buffer is identical to rendering with an index
+    * buffer containing the sequence [0, 1, 2, 3, ...].
+    *
+    * If an index buffer is in use, indexWidth specifies the width in
+    * bytes of each index value. It must be less than or equal to
+    * indexArray.stride.
+    *
+    * (Currently, the SVGA3D device requires index buffers to be tightly
+    * packed. In other words, indexWidth == indexArray.stride)
+    */
+   SVGA3dArray          indexArray;
+   uint32               indexWidth;
+
+   /*
+    * Optional index bias. This number is added to all indices from
+    * indexArray before they are used as vertex array indices. This
+    * can be used in multiple ways:
+    *
+    *  - When not using an indexArray, this bias can be used to
+    *    specify where in the vertex arrays to begin rendering.
+    *
+    *  - A positive number here is equivalent to increasing the
+    *    offset in each vertex array.
+    *
+    *  - A negative number can be used to render using a small
+    *    vertex array and an index buffer that contains large
+    *    values. This may be used by some applications that
+    *    crop a vertex buffer without modifying their index
+    *    buffer.
+    *
+    * Note that rendering with a negative bias value may be slower and
+    * use more memory than rendering with a positive or zero bias.
+    */
+   int32                indexBias;
+} SVGA3dPrimitiveRange;
+
+typedef
+struct {
+   uint32               cid;
+   uint32               numVertexDecls;
+   uint32               numRanges;
+
+   /*
+    * There are two variable size arrays after the
+    * SVGA3dCmdDrawPrimitives structure. In order,
+    * they are:
+    *
+    * 1. SVGA3dVertexDecl, quantity 'numVertexDecls'
+    * 2. SVGA3dPrimitiveRange, quantity 'numRanges'
+    * 3. Optionally, SVGA3dVertexDivisor, quantity 'numVertexDecls' (contains
+    *    the frequency divisor for this the corresponding vertex decl)
+    */
+} SVGA3dCmdDrawPrimitives;      /* SVGA_3D_CMD_DRAWPRIMITIVES */
+
+typedef
+struct {
+   uint32                   stage;
+   SVGA3dTextureStateName   name;
+   union {
+      uint32                value;
+      float                 floatValue;
+   };
+} SVGA3dTextureState;
+
+typedef
+struct {
+   uint32               cid;
+   /* Followed by variable number of SVGA3dTextureState structures */
+} SVGA3dCmdSetTextureState;      /* SVGA_3D_CMD_SETTEXTURESTATE */
+
+typedef
+struct {
+   uint32                   cid;
+   SVGA3dTransformType      type;
+   float                    matrix[16];
+} SVGA3dCmdSetTransform;          /* SVGA_3D_CMD_SETTRANSFORM */
+
+typedef
+struct {
+   float                min;
+   float                max;
+} SVGA3dZRange;
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dZRange         zRange;
+} SVGA3dCmdSetZRange;             /* SVGA_3D_CMD_SETZRANGE */
+
+typedef
+struct {
+   float                diffuse[4];
+   float                ambient[4];
+   float                specular[4];
+   float                emissive[4];
+   float                shininess;
+} SVGA3dMaterial;
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dFace           face;
+   SVGA3dMaterial       material;
+} SVGA3dCmdSetMaterial;           /* SVGA_3D_CMD_SETMATERIAL */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   SVGA3dLightData      data;
+} SVGA3dCmdSetLightData;           /* SVGA_3D_CMD_SETLIGHTDATA */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   uint32               enabled;
+} SVGA3dCmdSetLightEnabled;      /* SVGA_3D_CMD_SETLIGHTENABLED */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dRect           rect;
+} SVGA3dCmdSetViewport;           /* SVGA_3D_CMD_SETVIEWPORT */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dRect           rect;
+} SVGA3dCmdSetScissorRect;         /* SVGA_3D_CMD_SETSCISSORRECT */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               index;
+   float                plane[4];
+} SVGA3dCmdSetClipPlane;           /* SVGA_3D_CMD_SETCLIPPLANE */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               shid;
+   SVGA3dShaderType     type;
+   /* Followed by variable number of DWORDs for shader bycode */
+} SVGA3dCmdDefineShader;           /* SVGA_3D_CMD_SHADER_DEFINE */
+
+typedef
+struct {
+   uint32               cid;
+   uint32               shid;
+   SVGA3dShaderType     type;
+} SVGA3dCmdDestroyShader;         /* SVGA_3D_CMD_SHADER_DESTROY */
+
+typedef
+struct {
+   uint32                  cid;
+   uint32                  reg;     /* register number */
+   SVGA3dShaderType        type;
+   SVGA3dShaderConstType   ctype;
+   uint32                  values[4];
+} SVGA3dCmdSetShaderConst;        /* SVGA_3D_CMD_SET_SHADER_CONST */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dShaderType     type;
+   uint32               shid;
+} SVGA3dCmdSetShader;             /* SVGA_3D_CMD_SET_SHADER */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dQueryType      type;
+} SVGA3dCmdBeginQuery;           /* SVGA_3D_CMD_BEGIN_QUERY */
+
+typedef
+struct {
+   uint32               cid;
+   SVGA3dQueryType      type;
+   SVGAGuestPtr         guestResult;  /* Points to an SVGA3dQueryResult structure */
+} SVGA3dCmdEndQuery;                  /* SVGA_3D_CMD_END_QUERY */
+
+typedef
+struct {
+   uint32               cid;          /* Same parameters passed to END_QUERY */
+   SVGA3dQueryType      type;
+   SVGAGuestPtr         guestResult;
+} SVGA3dCmdWaitForQuery;              /* SVGA_3D_CMD_WAIT_FOR_QUERY */
+
+typedef
+struct {
+   uint32               totalSize;    /* Set by guest before query is ended. */
+   SVGA3dQueryState     state;        /* Set by host or guest. See SVGA3dQueryState. */
+   union {                            /* Set by host on exit from PENDING state */
+      uint32            result32;
+   };
+} SVGA3dQueryResult;
+
+/*
+ * SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN --
+ *
+ *    This is a blit from an SVGA3D surface to a Screen Object. Just
+ *    like GMR-to-screen blits, this blit may be directed at a
+ *    specific screen or to the virtual coordinate space.
+ *
+ *    The blit copies from a rectangular region of an SVGA3D surface
+ *    image to a rectangular region of a screen or screens.
+ *
+ *    This command takes an optional variable-length list of clipping
+ *    rectangles after the body of the command. If no rectangles are
+ *    specified, there is no clipping region. The entire destRect is
+ *    drawn to. If one or more rectangles are included, they describe
+ *    a clipping region. The clip rectangle coordinates are measured
+ *    relative to the top-left corner of destRect.
+ *
+ *    This clipping region serves multiple purposes:
+ *
+ *      - It can be used to perform an irregularly shaped blit more
+ *        efficiently than by issuing many separate blit commands.
+ *
+ *      - It is equivalent to allowing blits with non-integer
+ *        source coordinates. You could blit just one half-pixel
+ *        of a source, for example, by specifying a larger
+ *        destination rectangle than you need, then removing
+ *        part of it using a clip rectangle.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ *
+ * Limitations:
+ *
+ *    - Currently, no backend supports blits from a mipmap or face
+ *      other than the first one.
+ */
+
+typedef
+struct {
+   SVGA3dSurfaceImageId srcImage;
+   SVGASignedRect       srcRect;
+   uint32               destScreenId; /* Screen ID or SVGA_ID_INVALID for virt. coords */
+   SVGASignedRect       destRect;     /* Supports scaling if src/rest different size */
+   /* Clipping: zero or more SVGASignedRects follow */
+} SVGA3dCmdBlitSurfaceToScreen;         /* SVGA_3D_CMD_BLIT_SURFACE_TO_SCREEN */
+
+
+/*
+ * Capability query index.
+ *
+ * Notes:
+ *
+ *   1. SVGA3D_DEVCAP_MAX_TEXTURES reflects the maximum number of
+ *      fixed-function texture units available. Each of these units
+ *      work in both FFP and Shader modes, and they support texture
+ *      transforms and texture coordinates. The host may have additional
+ *      texture image units that are only usable with shaders.
+ *
+ *   2. The BUFFER_FORMAT capabilities are deprecated, and they always
+ *      return TRUE. Even on physical hardware that does not support
+ *      these formats natively, the SVGA3D device will provide an emulation
+ *      which should be invisible to the guest OS.
+ *
+ *      In general, the SVGA3D device should support any operation on
+ *      any surface format, it just may perform some of these
+ *      operations in software depending on the capabilities of the
+ *      available physical hardware.
+ *
+ *      XXX: In the future, we will add capabilities that describe in
+ *      detail what formats are supported in hardware for what kinds
+ *      of operations.
+ */
+
+typedef enum {
+   SVGA3D_DEVCAP_3D                                = 0,
+   SVGA3D_DEVCAP_MAX_LIGHTS                        = 1,
+   SVGA3D_DEVCAP_MAX_TEXTURES                      = 2,  /* See note (1) */
+   SVGA3D_DEVCAP_MAX_CLIP_PLANES                   = 3,
+   SVGA3D_DEVCAP_VERTEX_SHADER_VERSION             = 4,
+   SVGA3D_DEVCAP_VERTEX_SHADER                     = 5,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION           = 6,
+   SVGA3D_DEVCAP_FRAGMENT_SHADER                   = 7,
+   SVGA3D_DEVCAP_MAX_RENDER_TARGETS                = 8,
+   SVGA3D_DEVCAP_S23E8_TEXTURES                    = 9,
+   SVGA3D_DEVCAP_S10E5_TEXTURES                    = 10,
+   SVGA3D_DEVCAP_MAX_FIXED_VERTEXBLEND             = 11,
+   SVGA3D_DEVCAP_D16_BUFFER_FORMAT                 = 12, /* See note (2) */
+   SVGA3D_DEVCAP_D24S8_BUFFER_FORMAT               = 13, /* See note (2) */
+   SVGA3D_DEVCAP_D24X8_BUFFER_FORMAT               = 14, /* See note (2) */
+   SVGA3D_DEVCAP_QUERY_TYPES                       = 15,
+   SVGA3D_DEVCAP_TEXTURE_GRADIENT_SAMPLING         = 16,
+   SVGA3D_DEVCAP_MAX_POINT_SIZE                    = 17,
+   SVGA3D_DEVCAP_MAX_SHADER_TEXTURES               = 18,
+   SVGA3D_DEVCAP_MAX_TEXTURE_WIDTH                 = 19,
+   SVGA3D_DEVCAP_MAX_TEXTURE_HEIGHT                = 20,
+   SVGA3D_DEVCAP_MAX_VOLUME_EXTENT                 = 21,
+   SVGA3D_DEVCAP_MAX_TEXTURE_REPEAT                = 22,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ASPECT_RATIO          = 23,
+   SVGA3D_DEVCAP_MAX_TEXTURE_ANISOTROPY            = 24,
+   SVGA3D_DEVCAP_MAX_PRIMITIVE_COUNT               = 25,
+   SVGA3D_DEVCAP_MAX_VERTEX_INDEX                  = 26,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_INSTRUCTIONS    = 27,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_INSTRUCTIONS  = 28,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEMPS           = 29,
+   SVGA3D_DEVCAP_MAX_FRAGMENT_SHADER_TEMPS         = 30,
+   SVGA3D_DEVCAP_TEXTURE_OPS                       = 31,
+   SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8               = 32,
+   SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8               = 33,
+   SVGA3D_DEVCAP_SURFACEFMT_A2R10G10B10            = 34,
+   SVGA3D_DEVCAP_SURFACEFMT_X1R5G5B5               = 35,
+   SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5               = 36,
+   SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4               = 37,
+   SVGA3D_DEVCAP_SURFACEFMT_R5G6B5                 = 38,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE16            = 39,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8_ALPHA8      = 40,
+   SVGA3D_DEVCAP_SURFACEFMT_ALPHA8                 = 41,
+   SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8             = 42,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D16                  = 43,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8                = 44,
+   SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8                = 45,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT1                   = 46,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT2                   = 47,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT3                   = 48,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT4                   = 49,
+   SVGA3D_DEVCAP_SURFACEFMT_DXT5                   = 50,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPX8L8V8U8           = 51,
+   SVGA3D_DEVCAP_SURFACEFMT_A2W10V10U10            = 52,
+   SVGA3D_DEVCAP_SURFACEFMT_BUMPU8V8               = 53,
+   SVGA3D_DEVCAP_SURFACEFMT_Q8W8V8U8               = 54,
+   SVGA3D_DEVCAP_SURFACEFMT_CxV8U8                 = 55,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S10E5                = 56,
+   SVGA3D_DEVCAP_SURFACEFMT_R_S23E8                = 57,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S10E5               = 58,
+   SVGA3D_DEVCAP_SURFACEFMT_RG_S23E8               = 59,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S10E5             = 60,
+   SVGA3D_DEVCAP_SURFACEFMT_ARGB_S23E8             = 61,
+   SVGA3D_DEVCAP_MAX_VERTEX_SHADER_TEXTURES        = 63,
+
+   /*
+    * Note that MAX_SIMULTANEOUS_RENDER_TARGETS is a maximum count of color
+    * render targets.  This does no include the depth or stencil targets.
+    */
+   SVGA3D_DEVCAP_MAX_SIMULTANEOUS_RENDER_TARGETS   = 64,
+
+   SVGA3D_DEVCAP_SURFACEFMT_V16U16                 = 65,
+   SVGA3D_DEVCAP_SURFACEFMT_G16R16                 = 66,
+   SVGA3D_DEVCAP_SURFACEFMT_A16B16G16R16           = 67,
+   SVGA3D_DEVCAP_SURFACEFMT_UYVY                   = 68,
+   SVGA3D_DEVCAP_SURFACEFMT_YUY2                   = 69,
+
+   /*
+    * Don't add new caps into the previous section; the values in this
+    * enumeration must not change. You can put new values right before
+    * SVGA3D_DEVCAP_MAX.
+    */
+   SVGA3D_DEVCAP_MAX                                  /* This must be the last index. */
+} SVGA3dDevCapIndex;
+
+typedef union {
+   Bool   b;
+   uint32 u;
+   int32  i;
+   float  f;
+} SVGA3dDevCapResult;
+
+#endif /* _SVGA3D_REG_H_ */
diff --git a/src/gallium/drivers/svga/include/svga3d_shaderdefs.h b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
new file mode 100644
index 00000000000..2078c4a8a44
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga3d_shaderdefs.h
@@ -0,0 +1,519 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga3d_shaderdefs.h --
+ *
+ * SVGA3D byte code format and limit definitions.
+ *
+ * The format of the byte code directly corresponds to that defined
+ * by Microsoft DirectX SDK 9.0c (file d3d9types.h). The format can
+ * also be extended so that different shader formats can be supported
+ * for example GLSL, ARB vp/fp, NV/ATI shader formats, etc.
+ *
+ */
+
+#ifndef __SVGA3D_SHADER_DEFS__
+#define __SVGA3D_SHADER_DEFS__
+
+/* SVGA3D shader hardware limits. */
+
+#define SVGA3D_INPUTREG_MAX            16
+#define SVGA3D_OUTPUTREG_MAX           12
+#define SVGA3D_VERTEX_SAMPLERREG_MAX   4
+#define SVGA3D_PIXEL_SAMPLERREG_MAX    16
+#define SVGA3D_SAMPLERREG_MAX          (SVGA3D_PIXEL_SAMPLERREG_MAX+\
+                                        SVGA3D_VERTEX_SAMPLERREG_MAX)
+#define SVGA3D_TEMPREG_MAX             32
+#define SVGA3D_CONSTREG_MAX            256
+#define SVGA3D_CONSTINTREG_MAX         16
+#define SVGA3D_CONSTBOOLREG_MAX        16
+#define SVGA3D_ADDRREG_MAX             1
+#define SVGA3D_PREDREG_MAX             1
+
+/* SVGA3D byte code specific limits */
+
+#define SVGA3D_MAX_SRC_REGS      4
+#define SVGA3D_MAX_NESTING_LEVEL 32
+
+/* SVGA3D version information. */
+
+#define SVGA3D_VS_TYPE  0xFFFE
+#define SVGA3D_PS_TYPE  0xFFFF
+
+typedef struct {
+   union {
+      struct {
+         uint32 minor : 8;
+         uint32 major : 8;
+         uint32 type : 16;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderVersion;
+
+#define SVGA3D_VS_10 ((SVGA3D_VS_TYPE << 16) | 1 << 8)
+#define SVGA3D_VS_11 (SVGA3D_VS_10 | 1)
+#define SVGA3D_VS_20 ((SVGA3D_VS_TYPE << 16) | 2 << 8)
+#define SVGA3D_VS_30 ((SVGA3D_VS_TYPE << 16) | 3 << 8)
+
+#define SVGA3D_PS_10 ((SVGA3D_PS_TYPE << 16) | 1 << 8)
+#define SVGA3D_PS_11 (SVGA3D_PS_10 | 1)
+#define SVGA3D_PS_12 (SVGA3D_PS_10 | 2)
+#define SVGA3D_PS_13 (SVGA3D_PS_10 | 3)
+#define SVGA3D_PS_14 (SVGA3D_PS_10 | 4)
+#define SVGA3D_PS_20 ((SVGA3D_PS_TYPE << 16) | 2 << 8)
+#define SVGA3D_PS_30 ((SVGA3D_PS_TYPE << 16) | 3 << 8)
+
+/* The *_ENABLED are for backwards compatibility with old drivers */
+typedef enum {
+   SVGA3DPSVERSION_NONE = 0,
+   SVGA3DPSVERSION_ENABLED = 1,
+   SVGA3DPSVERSION_11 = 3,
+   SVGA3DPSVERSION_12 = 5,
+   SVGA3DPSVERSION_13 = 7,
+   SVGA3DPSVERSION_14 = 9,
+   SVGA3DPSVERSION_20 = 11,
+   SVGA3DPSVERSION_30 = 13,
+   SVGA3DPSVERSION_40 = 15,
+   SVGA3DPSVERSION_MAX
+} SVGA3dPixelShaderVersion;
+
+typedef enum {
+   SVGA3DVSVERSION_NONE = 0,
+   SVGA3DVSVERSION_ENABLED = 1,
+   SVGA3DVSVERSION_11 = 3,
+   SVGA3DVSVERSION_20 = 5,
+   SVGA3DVSVERSION_30 = 7,
+   SVGA3DVSVERSION_40 = 9,
+   SVGA3DVSVERSION_MAX
+} SVGA3dVertexShaderVersion;
+
+/* SVGA3D instruction op codes. */
+
+typedef enum {
+   SVGA3DOP_NOP = 0,
+   SVGA3DOP_MOV,
+   SVGA3DOP_ADD,
+   SVGA3DOP_SUB,
+   SVGA3DOP_MAD,
+   SVGA3DOP_MUL,
+   SVGA3DOP_RCP,
+   SVGA3DOP_RSQ,
+   SVGA3DOP_DP3,
+   SVGA3DOP_DP4,
+   SVGA3DOP_MIN,
+   SVGA3DOP_MAX,
+   SVGA3DOP_SLT,
+   SVGA3DOP_SGE,
+   SVGA3DOP_EXP,
+   SVGA3DOP_LOG,
+   SVGA3DOP_LIT,
+   SVGA3DOP_DST,
+   SVGA3DOP_LRP,
+   SVGA3DOP_FRC,
+   SVGA3DOP_M4x4,
+   SVGA3DOP_M4x3,
+   SVGA3DOP_M3x4,
+   SVGA3DOP_M3x3,
+   SVGA3DOP_M3x2,
+   SVGA3DOP_CALL,
+   SVGA3DOP_CALLNZ,
+   SVGA3DOP_LOOP,
+   SVGA3DOP_RET,
+   SVGA3DOP_ENDLOOP,
+   SVGA3DOP_LABEL,
+   SVGA3DOP_DCL,
+   SVGA3DOP_POW,
+   SVGA3DOP_CRS,
+   SVGA3DOP_SGN,
+   SVGA3DOP_ABS,
+   SVGA3DOP_NRM,
+   SVGA3DOP_SINCOS,
+   SVGA3DOP_REP,
+   SVGA3DOP_ENDREP,
+   SVGA3DOP_IF,
+   SVGA3DOP_IFC,
+   SVGA3DOP_ELSE,
+   SVGA3DOP_ENDIF,
+   SVGA3DOP_BREAK,
+   SVGA3DOP_BREAKC,
+   SVGA3DOP_MOVA,
+   SVGA3DOP_DEFB,
+   SVGA3DOP_DEFI,
+   SVGA3DOP_TEXCOORD = 64,
+   SVGA3DOP_TEXKILL,
+   SVGA3DOP_TEX,
+   SVGA3DOP_TEXBEM,
+   SVGA3DOP_TEXBEML,
+   SVGA3DOP_TEXREG2AR,
+   SVGA3DOP_TEXREG2GB = 70,
+   SVGA3DOP_TEXM3x2PAD,
+   SVGA3DOP_TEXM3x2TEX,
+   SVGA3DOP_TEXM3x3PAD,
+   SVGA3DOP_TEXM3x3TEX,
+   SVGA3DOP_RESERVED0,
+   SVGA3DOP_TEXM3x3SPEC,
+   SVGA3DOP_TEXM3x3VSPEC,
+   SVGA3DOP_EXPP,
+   SVGA3DOP_LOGP,
+   SVGA3DOP_CND = 80,
+   SVGA3DOP_DEF,
+   SVGA3DOP_TEXREG2RGB,
+   SVGA3DOP_TEXDP3TEX,
+   SVGA3DOP_TEXM3x2DEPTH,
+   SVGA3DOP_TEXDP3,
+   SVGA3DOP_TEXM3x3,
+   SVGA3DOP_TEXDEPTH,
+   SVGA3DOP_CMP,
+   SVGA3DOP_BEM,
+   SVGA3DOP_DP2ADD = 90,
+   SVGA3DOP_DSX,
+   SVGA3DOP_DSY,
+   SVGA3DOP_TEXLDD,
+   SVGA3DOP_SETP,
+   SVGA3DOP_TEXLDL,
+   SVGA3DOP_BREAKP = 96,
+   SVGA3DOP_LAST_INST,
+   SVGA3DOP_PHASE = 0xFFFD,
+   SVGA3DOP_COMMENT = 0xFFFE,
+   SVGA3DOP_END = 0xFFFF,
+} SVGA3dShaderOpCodeType;
+
+/* SVGA3D operation control/comparison function types */
+
+typedef enum {
+   SVGA3DOPCONT_NONE,
+   SVGA3DOPCONT_PROJECT,   /* Projective texturing */
+   SVGA3DOPCONT_BIAS,      /* Texturing with a LOD bias */
+} SVGA3dShaderOpCodeControlFnType;
+
+typedef enum {
+   SVGA3DOPCOMP_RESERVED0 = 0,
+   SVGA3DOPCOMP_GT,
+   SVGA3DOPCOMP_EQ,
+   SVGA3DOPCOMP_GE,
+   SVGA3DOPCOMP_LT,
+   SVGA3DOPCOMPC_NE,
+   SVGA3DOPCOMP_LE,
+   SVGA3DOPCOMP_RESERVED1
+} SVGA3dShaderOpCodeCompFnType;
+
+/* SVGA3D register types */
+
+typedef enum {
+    SVGA3DREG_TEMP = 0,       /* Temporary register file */
+    SVGA3DREG_INPUT,          /* Input register file */
+    SVGA3DREG_CONST,          /* Constant register file */
+    SVGA3DREG_ADDR,           /* Address register for VS */
+    SVGA3DREG_TEXTURE = 3,    /* Texture register file for PS */
+    SVGA3DREG_RASTOUT,        /* Rasterizer register file */
+    SVGA3DREG_ATTROUT,        /* Attribute output register file */
+    SVGA3DREG_TEXCRDOUT,      /* Texture coordinate output register file */
+    SVGA3DREG_OUTPUT = 6,     /* Output register file for VS 3.0+ */
+    SVGA3DREG_CONSTINT,       /* Constant integer vector register file */
+    SVGA3DREG_COLOROUT,       /* Color output register file */
+    SVGA3DREG_DEPTHOUT,       /* Depth output register file */
+    SVGA3DREG_SAMPLER,        /* Sampler state register file */
+    SVGA3DREG_CONST2,         /* Constant register file 2048 - 4095 */
+    SVGA3DREG_CONST3,         /* Constant register file 4096 - 6143 */
+    SVGA3DREG_CONST4,         /* Constant register file 6144 - 8191 */
+    SVGA3DREG_CONSTBOOL,      /* Constant boolean register file */
+    SVGA3DREG_LOOP,           /* Loop counter register file */
+    SVGA3DREG_TEMPFLOAT16,    /* 16-bit float temp register file */
+    SVGA3DREG_MISCTYPE,       /* Miscellaneous (single) registers */
+    SVGA3DREG_LABEL,          /* Label */
+    SVGA3DREG_PREDICATE,      /* Predicate register */
+} SVGA3dShaderRegType;
+
+/* SVGA3D rasterizer output register types */
+
+typedef enum {
+   SVGA3DRASTOUT_POSITION = 0,
+   SVGA3DRASTOUT_FOG,
+   SVGA3DRASTOUT_PSIZE
+} SVGA3dShaderRastOutRegType;
+
+/* SVGA3D miscellaneous register types */
+
+typedef enum {
+   SVGA3DMISCREG_POSITION = 0,   /* Input position x,y,z,rhw (PS) */
+   SVGA3DMISCREG_FACE            /* Floating point primitive area (PS) */
+} SVGA3DShaderMiscRegType;
+
+/* SVGA3D sampler types */
+
+typedef enum {
+   SVGA3DSAMP_UNKNOWN = 0, /* Uninitialized value */
+   SVGA3DSAMP_2D = 2,      /* dcl_2d s# (for declaring a 2-D texture) */
+   SVGA3DSAMP_CUBE,        /* dcl_cube s# (for declaring a cube texture) */
+   SVGA3DSAMP_VOLUME,      /* dcl_volume s# (for declaring a volume texture) */
+} SVGA3dShaderSamplerType;
+
+/* SVGA3D sampler format classes */
+
+typedef enum {
+   SVGA3DSAMPFORMAT_ARGB,        /* ARGB formats */
+   SVGA3DSAMPFORMAT_V8U8,        /* Sign and normalize (SNORM) V & U */
+   SVGA3DSAMPFORMAT_Q8W8V8U8,    /* SNORM all */
+   SVGA3DSAMPFORMAT_CxV8U8,      /* SNORM V & U, C=SQRT(1-U^2-V^2) */
+   SVGA3DSAMPFORMAT_X8L8V8U8,    /* SNORM V & U */
+   SVGA3DSAMPFORMAT_A2W10V10U10, /* SNORM W, V & U */
+   SVGA3DSAMPFORMAT_DXT_PMA,     /* DXT pre-multiplied alpha */
+   SVGA3DSAMPFORMAT_YUV,         /* YUV video format */
+   SVGA3DSAMPFORMAT_UYVY,        /* UYVY video format */
+   SVGA3DSAMPFORMAT_Rx,          /* R16F/32F */
+   SVGA3DSAMPFORMAT_RxGx,        /* R16FG16F, R32FG32F */
+   SVGA3DSAMPFORMAT_V16U16,      /* SNORM all */
+} SVGA3DShaderSamplerFormatClass;
+
+/* SVGA3D write mask */
+
+#define SVGA3DWRITEMASK_0    1 /* Component 0 (X;Red) */
+#define SVGA3DWRITEMASK_1    2 /* Component 1 (Y;Green) */
+#define SVGA3DWRITEMASK_2    4 /* Component 2 (Z;Blue) */
+#define SVGA3DWRITEMASK_3    8 /* Component 3 (W;Alpha) */
+#define SVGA3DWRITEMASK_ALL 15 /* All components */
+
+/* SVGA3D destination modifiers */
+
+#define SVGA3DDSTMOD_NONE              0 /* nop */
+#define SVGA3DDSTMOD_SATURATE          1 /* clamp to [0, 1] */
+#define SVGA3DDSTMOD_PARTIALPRECISION  2 /* Partial precision hint */
+
+/*
+ * Relevant to multisampling only:
+ * When the pixel center is not covered, sample
+ * attribute or compute gradients/LOD
+ * using multisample "centroid" location.
+ * "Centroid" is some location within the covered
+ * region of the pixel.
+ */
+
+#define SVGA3DDSTMOD_MSAMPCENTROID     4
+
+/* SVGA3D source swizzle */
+
+#define SVGA3DSWIZZLE_REPLICATEX 0x00
+#define SVGA3DSWIZZLE_REPLICATEY 0x55
+#define SVGA3DSWIZZLE_REPLICATEZ 0xAA
+#define SVGA3DSWIZZLE_REPLICATEW 0xFF
+#define SVGA3DSWIZZLE_NONE       0xE4
+#define SVGA3DSWIZZLE_YZXW       0xC9
+#define SVGA3DSWIZZLE_ZXYW       0xD2
+#define SVGA3DSWIZZLE_WXYZ       0x1B
+
+/* SVGA3D source modifiers */
+
+typedef enum {
+    SVGA3DSRCMOD_NONE = 0, /* nop */
+    SVGA3DSRCMOD_NEG,      /* negate */
+    SVGA3DSRCMOD_BIAS,     /* bias */
+    SVGA3DSRCMOD_BIASNEG,  /* bias and negate */
+    SVGA3DSRCMOD_SIGN,     /* sign */
+    SVGA3DSRCMOD_SIGNNEG,  /* sign and negate */
+    SVGA3DSRCMOD_COMP,     /* complement */
+    SVGA3DSRCMOD_X2,       /* x2 */
+    SVGA3DSRCMOD_X2NEG,    /* x2 and negate */
+    SVGA3DSRCMOD_DZ,       /* divide through by z component */
+    SVGA3DSRCMOD_DW,       /* divide through by w component */
+    SVGA3DSRCMOD_ABS,      /* abs() */
+    SVGA3DSRCMOD_ABSNEG,   /* -abs() */
+    SVGA3DSRCMOD_NOT,      /* ! (for predicate register) */
+} SVGA3dShaderSrcModType;
+
+/* SVGA3D instruction token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 comment_op : 16;
+         uint32 comment_size : 16;
+      };
+
+      struct {
+         uint32 op : 16;
+         uint32 control : 3;
+         uint32 reserved2 : 5;
+         uint32 size : 4;
+         uint32 predicated : 1;
+         uint32 reserved1 : 1;
+         uint32 coissue : 1;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderInstToken;
+
+/* SVGA3D destination parameter token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 num : 11;
+         uint32 type_upper : 2;
+         uint32 relAddr : 1;
+         uint32 reserved1 : 2;
+         uint32 mask : 4;
+         uint32 dstMod : 4;
+         uint32 shfScale : 4;
+         uint32 type_lower : 3;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderDestToken;
+
+/* SVGA3D source parameter token */
+
+typedef struct {
+   union {
+      struct {
+         uint32 num : 11;
+         uint32 type_upper : 2;
+         uint32 relAddr : 1;
+         uint32 reserved1 : 2;
+         uint32 swizzle : 8;
+         uint32 srcMod : 4;
+         uint32 type_lower : 3;
+         uint32 reserved0 : 1;
+      };
+
+      uint32 value;
+   };
+} SVGA3dShaderSrcToken;
+
+/* SVGA3DOP_DCL parameter tokens */
+
+typedef struct {
+   union {
+      struct {
+         union {
+            struct {
+               uint32 usage : 5;
+               uint32 reserved1 : 11;
+               uint32 index : 4;
+               uint32 reserved0 : 12;
+            }; /* input / output declaration */
+
+            struct {
+               uint32 reserved3 : 27;
+               uint32 type : 4;
+               uint32 reserved2 : 1;
+            }; /* sampler declaration */
+         };
+
+         SVGA3dShaderDestToken dst;
+      };
+
+      uint32 values[2];
+   };
+} SVGA3DOpDclArgs;
+
+/* SVGA3DOP_DEF parameter tokens */
+
+typedef struct {
+   union {
+      struct {
+         SVGA3dShaderDestToken dst;
+
+         union {
+            float constValues[4];
+            int constIValues[4];
+            Bool constBValue;
+         };
+      };
+
+      uint32 values[5];
+   };
+} SVGA3DOpDefArgs;
+
+/* SVGA3D shader token */
+
+typedef union {
+   uint32 value;
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dest;
+   SVGA3dShaderSrcToken src;
+} SVGA3dShaderToken;
+
+/* SVGA3D shader program */
+
+typedef struct {
+   SVGA3dShaderVersion version;
+   /* SVGA3dShaderToken stream */
+} SVGA3dShaderProgram;
+
+/* SVGA3D version specific register assignments */
+
+static const uint32 SVGA3D_INPUT_REG_POSITION_VS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_PSIZE_VS11 = 1;
+static const uint32 SVGA3D_INPUT_REG_FOG_VS11 = 3;
+static const uint32 SVGA3D_INPUT_REG_FOG_MASK_VS11 = SVGA3DWRITEMASK_3;
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_VS11 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_VS11 = 4;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS11 = 0;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS11 = 2;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_PS11 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_PS11 = 1;
+
+static const uint32 SVGA3D_INPUT_REG_COLOR_BASE_PS20 = 0;
+static const uint32 SVGA3D_INPUT_REG_COLOR_NUM_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_BASE_PS20 = 2;
+static const uint32 SVGA3D_INPUT_REG_TEXCOORD_NUM_PS20 = 8;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_BASE_PS20 = 1;
+static const uint32 SVGA3D_OUTPUT_REG_COLOR_NUM_PS20 = 4;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_BASE_PS20 = 0;
+static const uint32 SVGA3D_OUTPUT_REG_DEPTH_NUM_PS20 = 1;
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3dShaderGetRegType --
+ *
+ *      As the register type is split into two non sequential fields,
+ *      this function provides an useful way of accessing the actual
+ *      register type without having to manually concatenate the
+ *      type_upper and type_lower fields.
+ *
+ * Results:
+ *      Returns the register type.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE SVGA3dShaderRegType
+SVGA3dShaderGetRegType(uint32 token)
+{
+   SVGA3dShaderSrcToken src;
+   src.value = token;
+   return (SVGA3dShaderRegType)(src.type_upper << 3 | src.type_lower);
+}
+
+#endif /* __SVGA3D_SHADER_DEFS__ */
diff --git a/src/gallium/drivers/svga/include/svga_reg.h b/src/gallium/drivers/svga/include/svga_reg.h
new file mode 100644
index 00000000000..1b96c2ec07d
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_reg.h
@@ -0,0 +1,1346 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_reg.h --
+ *
+ *    Virtual hardware definitions for the VMware SVGA II device.
+ */
+
+#ifndef _SVGA_REG_H_
+#define _SVGA_REG_H_
+
+/*
+ * PCI device IDs.
+ */
+#define PCI_VENDOR_ID_VMWARE            0x15AD
+#define PCI_DEVICE_ID_VMWARE_SVGA2      0x0405
+
+/*
+ * Legal values for the SVGA_REG_CURSOR_ON register in old-fashioned
+ * cursor bypass mode. This is still supported, but no new guest
+ * drivers should use it.
+ */
+#define SVGA_CURSOR_ON_HIDE            0x0   /* Must be 0 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_SHOW            0x1   /* Must be 1 to maintain backward compatibility */
+#define SVGA_CURSOR_ON_REMOVE_FROM_FB  0x2   /* Remove the cursor from the framebuffer because we need to see what's under it */
+#define SVGA_CURSOR_ON_RESTORE_TO_FB   0x3   /* Put the cursor back in the framebuffer so the user can see it */
+
+/*
+ * The maximum framebuffer size that can traced for e.g. guests in VESA mode.
+ * The changeMap in the monitor is proportional to this number. Therefore, we'd
+ * like to keep it as small as possible to reduce monitor overhead (using
+ * SVGA_VRAM_MAX_SIZE for this increases the size of the shared area by over
+ * 4k!).
+ *
+ * NB: For compatibility reasons, this value must be greater than 0xff0000.
+ *     See bug 335072.
+ */
+#define SVGA_FB_MAX_TRACEABLE_SIZE      0x1000000
+
+#define SVGA_MAX_PSEUDOCOLOR_DEPTH      8
+#define SVGA_MAX_PSEUDOCOLORS           (1 << SVGA_MAX_PSEUDOCOLOR_DEPTH)
+#define SVGA_NUM_PALETTE_REGS           (3 * SVGA_MAX_PSEUDOCOLORS)
+
+#define SVGA_MAGIC         0x900000UL
+#define SVGA_MAKE_ID(ver)  (SVGA_MAGIC << 8 | (ver))
+
+/* Version 2 let the address of the frame buffer be unsigned on Win32 */
+#define SVGA_VERSION_2     2
+#define SVGA_ID_2          SVGA_MAKE_ID(SVGA_VERSION_2)
+
+/* Version 1 has new registers starting with SVGA_REG_CAPABILITIES so
+   PALETTE_BASE has moved */
+#define SVGA_VERSION_1     1
+#define SVGA_ID_1          SVGA_MAKE_ID(SVGA_VERSION_1)
+
+/* Version 0 is the initial version */
+#define SVGA_VERSION_0     0
+#define SVGA_ID_0          SVGA_MAKE_ID(SVGA_VERSION_0)
+
+/* "Invalid" value for all SVGA IDs. (Version ID, screen object ID, surface ID...) */
+#define SVGA_ID_INVALID    0xFFFFFFFF
+
+/* Port offsets, relative to BAR0 */
+#define SVGA_INDEX_PORT         0x0
+#define SVGA_VALUE_PORT         0x1
+#define SVGA_BIOS_PORT          0x2
+#define SVGA_IRQSTATUS_PORT     0x8
+
+/*
+ * Interrupt source flags for IRQSTATUS_PORT and IRQMASK.
+ *
+ * Interrupts are only supported when the
+ * SVGA_CAP_IRQMASK capability is present.
+ */
+#define SVGA_IRQFLAG_ANY_FENCE            0x1    /* Any fence was passed */
+#define SVGA_IRQFLAG_FIFO_PROGRESS        0x2    /* Made forward progress in the FIFO */
+#define SVGA_IRQFLAG_FENCE_GOAL           0x4    /* SVGA_FIFO_FENCE_GOAL reached */
+
+/*
+ * Registers
+ */
+
+enum {
+   SVGA_REG_ID = 0,
+   SVGA_REG_ENABLE = 1,
+   SVGA_REG_WIDTH = 2,
+   SVGA_REG_HEIGHT = 3,
+   SVGA_REG_MAX_WIDTH = 4,
+   SVGA_REG_MAX_HEIGHT = 5,
+   SVGA_REG_DEPTH = 6,
+   SVGA_REG_BITS_PER_PIXEL = 7,       /* Current bpp in the guest */
+   SVGA_REG_PSEUDOCOLOR = 8,
+   SVGA_REG_RED_MASK = 9,
+   SVGA_REG_GREEN_MASK = 10,
+   SVGA_REG_BLUE_MASK = 11,
+   SVGA_REG_BYTES_PER_LINE = 12,
+   SVGA_REG_FB_START = 13,            /* (Deprecated) */
+   SVGA_REG_FB_OFFSET = 14,
+   SVGA_REG_VRAM_SIZE = 15,
+   SVGA_REG_FB_SIZE = 16,
+
+   /* ID 0 implementation only had the above registers, then the palette */
+
+   SVGA_REG_CAPABILITIES = 17,
+   SVGA_REG_MEM_START = 18,           /* (Deprecated) */
+   SVGA_REG_MEM_SIZE = 19,
+   SVGA_REG_CONFIG_DONE = 20,         /* Set when memory area configured */
+   SVGA_REG_SYNC = 21,                /* See "FIFO Synchronization Registers" */
+   SVGA_REG_BUSY = 22,                /* See "FIFO Synchronization Registers" */
+   SVGA_REG_GUEST_ID = 23,            /* Set guest OS identifier */
+   SVGA_REG_CURSOR_ID = 24,           /* (Deprecated) */
+   SVGA_REG_CURSOR_X = 25,            /* (Deprecated) */
+   SVGA_REG_CURSOR_Y = 26,            /* (Deprecated) */
+   SVGA_REG_CURSOR_ON = 27,           /* (Deprecated) */
+   SVGA_REG_HOST_BITS_PER_PIXEL = 28, /* (Deprecated) */
+   SVGA_REG_SCRATCH_SIZE = 29,        /* Number of scratch registers */
+   SVGA_REG_MEM_REGS = 30,            /* Number of FIFO registers */
+   SVGA_REG_NUM_DISPLAYS = 31,        /* (Deprecated) */
+   SVGA_REG_PITCHLOCK = 32,           /* Fixed pitch for all modes */
+   SVGA_REG_IRQMASK = 33,             /* Interrupt mask */
+
+   /* Legacy multi-monitor support */
+   SVGA_REG_NUM_GUEST_DISPLAYS = 34,/* Number of guest displays in X/Y direction */
+   SVGA_REG_DISPLAY_ID = 35,        /* Display ID for the following display attributes */
+   SVGA_REG_DISPLAY_IS_PRIMARY = 36,/* Whether this is a primary display */
+   SVGA_REG_DISPLAY_POSITION_X = 37,/* The display position x */
+   SVGA_REG_DISPLAY_POSITION_Y = 38,/* The display position y */
+   SVGA_REG_DISPLAY_WIDTH = 39,     /* The display's width */
+   SVGA_REG_DISPLAY_HEIGHT = 40,    /* The display's height */
+
+   /* See "Guest memory regions" below. */
+   SVGA_REG_GMR_ID = 41,
+   SVGA_REG_GMR_DESCRIPTOR = 42,
+   SVGA_REG_GMR_MAX_IDS = 43,
+   SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH = 44,
+
+   SVGA_REG_TRACES = 45,            /* Enable trace-based updates even when FIFO is on */
+   SVGA_REG_TOP = 46,               /* Must be 1 more than the last register */
+
+   SVGA_PALETTE_BASE = 1024,        /* Base of SVGA color map */
+   /* Next 768 (== 256*3) registers exist for colormap */
+
+   SVGA_SCRATCH_BASE = SVGA_PALETTE_BASE + SVGA_NUM_PALETTE_REGS
+                                    /* Base of scratch registers */
+   /* Next reg[SVGA_REG_SCRATCH_SIZE] registers exist for scratch usage:
+      First 4 are reserved for VESA BIOS Extension; any remaining are for
+      the use of the current SVGA driver. */
+};
+
+
+/*
+ * Guest memory regions (GMRs):
+ *
+ * This is a new memory mapping feature available in SVGA devices
+ * which have the SVGA_CAP_GMR bit set. Previously, there were two
+ * fixed memory regions available with which to share data between the
+ * device and the driver: the FIFO ('MEM') and the framebuffer. GMRs
+ * are our name for an extensible way of providing arbitrary DMA
+ * buffers for use between the driver and the SVGA device. They are a
+ * new alternative to framebuffer memory, usable for both 2D and 3D
+ * graphics operations.
+ *
+ * Since GMR mapping must be done synchronously with guest CPU
+ * execution, we use a new pair of SVGA registers:
+ *
+ *   SVGA_REG_GMR_ID --
+ *
+ *     Read/write.
+ *     This register holds the 32-bit ID (a small positive integer)
+ *     of a GMR to create, delete, or redefine. Writing this register
+ *     has no side-effects.
+ *
+ *   SVGA_REG_GMR_DESCRIPTOR --
+ *
+ *     Write-only.
+ *     Writing this register will create, delete, or redefine the GMR
+ *     specified by the above ID register. If this register is zero,
+ *     the GMR is deleted. Any pointers into this GMR (including those
+ *     currently being processed by FIFO commands) will be
+ *     synchronously invalidated.
+ *
+ *     If this register is nonzero, it must be the physical page
+ *     number (PPN) of a data structure which describes the physical
+ *     layout of the memory region this GMR should describe. The
+ *     descriptor structure will be read synchronously by the SVGA
+ *     device when this register is written. The descriptor need not
+ *     remain allocated for the lifetime of the GMR.
+ *
+ *     The guest driver should write SVGA_REG_GMR_ID first, then
+ *     SVGA_REG_GMR_DESCRIPTOR.
+ *
+ *   SVGA_REG_GMR_MAX_IDS --
+ *
+ *     Read-only.
+ *     The SVGA device may choose to support a maximum number of
+ *     user-defined GMR IDs. This register holds the number of supported
+ *     IDs. (The maximum supported ID plus 1)
+ *
+ *   SVGA_REG_GMR_MAX_DESCRIPTOR_LENGTH --
+ *
+ *     Read-only.
+ *     The SVGA device may choose to put a limit on the total number
+ *     of SVGAGuestMemDescriptor structures it will read when defining
+ *     a single GMR.
+ *
+ * The descriptor structure is an array of SVGAGuestMemDescriptor
+ * structures. Each structure may do one of three things:
+ *
+ *   - Terminate the GMR descriptor list.
+ *     (ppn==0, numPages==0)
+ *
+ *   - Add a PPN or range of PPNs to the GMR's virtual address space.
+ *     (ppn != 0, numPages != 0)
+ *
+ *   - Provide the PPN of the next SVGAGuestMemDescriptor, in order to
+ *     support multi-page GMR descriptor tables without forcing the
+ *     driver to allocate physically contiguous memory.
+ *     (ppn != 0, numPages == 0)
+ *
+ * Note that each physical page of SVGAGuestMemDescriptor structures
+ * can describe at least 2MB of guest memory. If the driver needs to
+ * use more than one page of descriptor structures, it must use one of
+ * its SVGAGuestMemDescriptors to point to an additional page.  The
+ * device will never automatically cross a page boundary.
+ *
+ * Once the driver has described a GMR, it is immediately available
+ * for use via any FIFO command that uses an SVGAGuestPtr structure.
+ * These pointers include a GMR identifier plus an offset into that
+ * GMR.
+ *
+ * The driver must check the SVGA_CAP_GMR bit before using the GMR
+ * registers.
+ */
+
+/*
+ * Special GMR IDs, allowing SVGAGuestPtrs to point to framebuffer
+ * memory as well.  In the future, these IDs could even be used to
+ * allow legacy memory regions to be redefined by the guest as GMRs.
+ *
+ * Using the guest framebuffer (GFB) at BAR1 for general purpose DMA
+ * is being phased out. Please try to use user-defined GMRs whenever
+ * possible.
+ */
+#define SVGA_GMR_NULL         ((uint32) -1)
+#define SVGA_GMR_FRAMEBUFFER  ((uint32) -2)  // Guest Framebuffer (GFB)
+
+typedef
+struct SVGAGuestMemDescriptor {
+   uint32 ppn;
+   uint32 numPages;
+} SVGAGuestMemDescriptor;
+
+typedef
+struct SVGAGuestPtr {
+   uint32 gmrId;
+   uint32 offset;
+} SVGAGuestPtr;
+
+
+/*
+ * SVGAGMRImageFormat --
+ *
+ *    This is a packed representation of the source 2D image format
+ *    for a GMR-to-screen blit. Currently it is defined as an encoding
+ *    of the screen's color depth and bits-per-pixel, however, 16 bits
+ *    are reserved for future use to identify other encodings (such as
+ *    RGBA or higher-precision images).
+ *
+ *    Currently supported formats:
+ *
+ *       bpp depth  Format Name
+ *       --- -----  -----------
+ *        32    24  32-bit BGRX
+ *        24    24  24-bit BGR
+ *        16    16  RGB 5-6-5
+ *        16    15  RGB 5-5-5
+ *
+ */
+
+typedef
+struct SVGAGMRImageFormat {
+   union {
+      struct {
+         uint32 bitsPerPixel : 8;
+         uint32 colorDepth   : 8;
+         uint32 reserved     : 16;  // Must be zero
+      };
+
+      uint32 value;
+   };
+} SVGAGMRImageFormat;
+
+/*
+ * SVGAColorBGRX --
+ *
+ *    A 24-bit color format (BGRX), which does not depend on the
+ *    format of the legacy guest framebuffer (GFB) or the current
+ *    GMRFB state.
+ */
+
+typedef
+struct SVGAColorBGRX {
+   union {
+      struct {
+         uint32 b : 8;
+         uint32 g : 8;
+         uint32 r : 8;
+         uint32 x : 8;  // Unused
+      };
+
+      uint32 value;
+   };
+} SVGAColorBGRX;
+
+
+/*
+ * SVGASignedRect --
+ * SVGASignedPoint --
+ *
+ *    Signed rectangle and point primitives. These are used by the new
+ *    2D primitives for drawing to Screen Objects, which can occupy a
+ *    signed virtual coordinate space.
+ *
+ *    SVGASignedRect specifies a half-open interval: the (left, top)
+ *    pixel is part of the rectangle, but the (right, bottom) pixel is
+ *    not.
+ */
+
+typedef
+struct SVGASignedRect {
+   int32  left;
+   int32  top;
+   int32  right;
+   int32  bottom;
+} SVGASignedRect;
+
+typedef
+struct SVGASignedPoint {
+   int32  x;
+   int32  y;
+} SVGASignedPoint;
+
+
+/*
+ *  Capabilities
+ *
+ *  Note the holes in the bitfield. Missing bits have been deprecated,
+ *  and must not be reused. Those capabilities will never be reported
+ *  by new versions of the SVGA device.
+ */
+
+#define SVGA_CAP_NONE               0x00000000
+#define SVGA_CAP_RECT_COPY          0x00000002
+#define SVGA_CAP_CURSOR             0x00000020
+#define SVGA_CAP_CURSOR_BYPASS      0x00000040   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_CURSOR_BYPASS_2    0x00000080   // Legacy (Use Cursor Bypass 3 instead)
+#define SVGA_CAP_8BIT_EMULATION     0x00000100
+#define SVGA_CAP_ALPHA_CURSOR       0x00000200
+#define SVGA_CAP_3D                 0x00004000
+#define SVGA_CAP_EXTENDED_FIFO      0x00008000
+#define SVGA_CAP_MULTIMON           0x00010000   // Legacy multi-monitor support
+#define SVGA_CAP_PITCHLOCK          0x00020000
+#define SVGA_CAP_IRQMASK            0x00040000
+#define SVGA_CAP_DISPLAY_TOPOLOGY   0x00080000   // Legacy multi-monitor support
+#define SVGA_CAP_GMR                0x00100000
+#define SVGA_CAP_TRACES             0x00200000
+
+
+/*
+ * FIFO register indices.
+ *
+ * The FIFO is a chunk of device memory mapped into guest physmem.  It
+ * is always treated as 32-bit words.
+ *
+ * The guest driver gets to decide how to partition it between
+ * - FIFO registers (there are always at least 4, specifying where the
+ *   following data area is and how much data it contains; there may be
+ *   more registers following these, depending on the FIFO protocol
+ *   version in use)
+ * - FIFO data, written by the guest and slurped out by the VMX.
+ * These indices are 32-bit word offsets into the FIFO.
+ */
+
+enum {
+   /*
+    * Block 1 (basic registers): The originally defined FIFO registers.
+    * These exist and are valid for all versions of the FIFO protocol.
+    */
+
+   SVGA_FIFO_MIN = 0,
+   SVGA_FIFO_MAX,       /* The distance from MIN to MAX must be at least 10K */
+   SVGA_FIFO_NEXT_CMD,
+   SVGA_FIFO_STOP,
+
+   /*
+    * Block 2 (extended registers): Mandatory registers for the extended
+    * FIFO.  These exist if the SVGA caps register includes
+    * SVGA_CAP_EXTENDED_FIFO; some of them are valid only if their
+    * associated capability bit is enabled.
+    *
+    * Note that when originally defined, SVGA_CAP_EXTENDED_FIFO implied
+    * support only for (FIFO registers) CAPABILITIES, FLAGS, and FENCE.
+    * This means that the guest has to test individually (in most cases
+    * using FIFO caps) for the presence of registers after this; the VMX
+    * can define "extended FIFO" to mean whatever it wants, and currently
+    * won't enable it unless there's room for that set and much more.
+    */
+
+   SVGA_FIFO_CAPABILITIES = 4,
+   SVGA_FIFO_FLAGS,
+   // Valid with SVGA_FIFO_CAP_FENCE:
+   SVGA_FIFO_FENCE,
+
+   /*
+    * Block 3a (optional extended registers): Additional registers for the
+    * extended FIFO, whose presence isn't actually implied by
+    * SVGA_CAP_EXTENDED_FIFO; these exist if SVGA_FIFO_MIN is high enough to
+    * leave room for them.
+    *
+    * These in block 3a, the VMX currently considers mandatory for the
+    * extended FIFO.
+    */
+
+   // Valid if exists (i.e. if extended FIFO enabled):
+   SVGA_FIFO_3D_HWVERSION,       /* See SVGA3dHardwareVersion in svga3d_reg.h */
+   // Valid with SVGA_FIFO_CAP_PITCHLOCK:
+   SVGA_FIFO_PITCHLOCK,
+
+   // Valid with SVGA_FIFO_CAP_CURSOR_BYPASS_3:
+   SVGA_FIFO_CURSOR_ON,          /* Cursor bypass 3 show/hide register */
+   SVGA_FIFO_CURSOR_X,           /* Cursor bypass 3 x register */
+   SVGA_FIFO_CURSOR_Y,           /* Cursor bypass 3 y register */
+   SVGA_FIFO_CURSOR_COUNT,       /* Incremented when any of the other 3 change */
+   SVGA_FIFO_CURSOR_LAST_UPDATED,/* Last time the host updated the cursor */
+
+   // Valid with SVGA_FIFO_CAP_RESERVE:
+   SVGA_FIFO_RESERVED,           /* Bytes past NEXT_CMD with real contents */
+
+   /*
+    * Valid with SVGA_FIFO_CAP_SCREEN_OBJECT:
+    *
+    * By default this is SVGA_ID_INVALID, to indicate that the cursor
+    * coordinates are specified relative to the virtual root. If this
+    * is set to a specific screen ID, cursor position is reinterpreted
+    * as a signed offset relative to that screen's origin. This is the
+    * only way to place the cursor on a non-rooted screen.
+    */
+   SVGA_FIFO_CURSOR_SCREEN_ID,
+
+   /*
+    * XXX: The gap here, up until SVGA_FIFO_3D_CAPS, can be used for new
+    * registers, but this must be done carefully and with judicious use of
+    * capability bits, since comparisons based on SVGA_FIFO_MIN aren't
+    * enough to tell you whether the register exists: we've shipped drivers
+    * and products that used SVGA_FIFO_3D_CAPS but didn't know about some of
+    * the earlier ones.  The actual order of introduction was:
+    * - PITCHLOCK
+    * - 3D_CAPS
+    * - CURSOR_* (cursor bypass 3)
+    * - RESERVED
+    * So, code that wants to know whether it can use any of the
+    * aforementioned registers, or anything else added after PITCHLOCK and
+    * before 3D_CAPS, needs to reason about something other than
+    * SVGA_FIFO_MIN.
+    */
+
+   /*
+    * 3D caps block space; valid with 3D hardware version >=
+    * SVGA3D_HWVERSION_WS6_B1.
+    */
+   SVGA_FIFO_3D_CAPS      = 32,
+   SVGA_FIFO_3D_CAPS_LAST = 32 + 255,
+
+   /*
+    * End of VMX's current definition of "extended-FIFO registers".
+    * Registers before here are always enabled/disabled as a block; either
+    * the extended FIFO is enabled and includes all preceding registers, or
+    * it's disabled entirely.
+    *
+    * Block 3b (truly optional extended registers): Additional registers for
+    * the extended FIFO, which the VMX already knows how to enable and
+    * disable with correct granularity.
+    *
+    * Registers after here exist if and only if the guest SVGA driver
+    * sets SVGA_FIFO_MIN high enough to leave room for them.
+    */
+
+   // Valid if register exists:
+   SVGA_FIFO_GUEST_3D_HWVERSION, /* Guest driver's 3D version */
+   SVGA_FIFO_FENCE_GOAL,         /* Matching target for SVGA_IRQFLAG_FENCE_GOAL */
+   SVGA_FIFO_BUSY,               /* See "FIFO Synchronization Registers" */
+
+   /*
+    * Always keep this last.  This defines the maximum number of
+    * registers we know about.  At power-on, this value is placed in
+    * the SVGA_REG_MEM_REGS register, and we expect the guest driver
+    * to allocate this much space in FIFO memory for registers.
+    */
+    SVGA_FIFO_NUM_REGS
+};
+
+
+/*
+ * Definition of registers included in extended FIFO support.
+ *
+ * The guest SVGA driver gets to allocate the FIFO between registers
+ * and data.  It must always allocate at least 4 registers, but old
+ * drivers stopped there.
+ *
+ * The VMX will enable extended FIFO support if and only if the guest
+ * left enough room for all registers defined as part of the mandatory
+ * set for the extended FIFO.
+ *
+ * Note that the guest drivers typically allocate the FIFO only at
+ * initialization time, not at mode switches, so it's likely that the
+ * number of FIFO registers won't change without a reboot.
+ *
+ * All registers less than this value are guaranteed to be present if
+ * svgaUser->fifo.extended is set. Any later registers must be tested
+ * individually for compatibility at each use (in the VMX).
+ *
+ * This value is used only by the VMX, so it can change without
+ * affecting driver compatibility; keep it that way?
+ */
+#define SVGA_FIFO_EXTENDED_MANDATORY_REGS  (SVGA_FIFO_3D_CAPS_LAST + 1)
+
+
+/*
+ * FIFO Synchronization Registers
+ *
+ *  This explains the relationship between the various FIFO
+ *  sync-related registers in IOSpace and in FIFO space.
+ *
+ *  SVGA_REG_SYNC --
+ *
+ *       The SYNC register can be used in two different ways by the guest:
+ *
+ *         1. If the guest wishes to fully sync (drain) the FIFO,
+ *            it will write once to SYNC then poll on the BUSY
+ *            register. The FIFO is sync'ed once BUSY is zero.
+ *
+ *         2. If the guest wants to asynchronously wake up the host,
+ *            it will write once to SYNC without polling on BUSY.
+ *            Ideally it will do this after some new commands have
+ *            been placed in the FIFO, and after reading a zero
+ *            from SVGA_FIFO_BUSY.
+ *
+ *       (1) is the original behaviour that SYNC was designed to
+ *       support.  Originally, a write to SYNC would implicitly
+ *       trigger a read from BUSY. This causes us to synchronously
+ *       process the FIFO.
+ *
+ *       This behaviour has since been changed so that writing SYNC
+ *       will *not* implicitly cause a read from BUSY. Instead, it
+ *       makes a channel call which asynchronously wakes up the MKS
+ *       thread.
+ *
+ *       New guests can use this new behaviour to implement (2)
+ *       efficiently. This lets guests get the host's attention
+ *       without waiting for the MKS to poll, which gives us much
+ *       better CPU utilization on SMP hosts and on UP hosts while
+ *       we're blocked on the host GPU.
+ *
+ *       Old guests shouldn't notice the behaviour change. SYNC was
+ *       never guaranteed to process the entire FIFO, since it was
+ *       bounded to a particular number of CPU cycles. Old guests will
+ *       still loop on the BUSY register until the FIFO is empty.
+ *
+ *       Writing to SYNC currently has the following side-effects:
+ *
+ *         - Sets SVGA_REG_BUSY to TRUE (in the monitor)
+ *         - Asynchronously wakes up the MKS thread for FIFO processing
+ *         - The value written to SYNC is recorded as a "reason", for
+ *           stats purposes.
+ *
+ *       If SVGA_FIFO_BUSY is available, drivers are advised to only
+ *       write to SYNC if SVGA_FIFO_BUSY is FALSE. Drivers should set
+ *       SVGA_FIFO_BUSY to TRUE after writing to SYNC. The MKS will
+ *       eventually set SVGA_FIFO_BUSY on its own, but this approach
+ *       lets the driver avoid sending multiple asynchronous wakeup
+ *       messages to the MKS thread.
+ *
+ *  SVGA_REG_BUSY --
+ *
+ *       This register is set to TRUE when SVGA_REG_SYNC is written,
+ *       and it reads as FALSE when the FIFO has been completely
+ *       drained.
+ *
+ *       Every read from this register causes us to synchronously
+ *       process FIFO commands. There is no guarantee as to how many
+ *       commands each read will process.
+ *
+ *       CPU time spent processing FIFO commands will be billed to
+ *       the guest.
+ *
+ *       New drivers should avoid using this register unless they
+ *       need to guarantee that the FIFO is completely drained. It
+ *       is overkill for performing a sync-to-fence. Older drivers
+ *       will use this register for any type of synchronization.
+ *
+ *  SVGA_FIFO_BUSY --
+ *
+ *       This register is a fast way for the guest driver to check
+ *       whether the FIFO is already being processed. It reads and
+ *       writes at normal RAM speeds, with no monitor intervention.
+ *
+ *       If this register reads as TRUE, the host is guaranteeing that
+ *       any new commands written into the FIFO will be noticed before
+ *       the MKS goes back to sleep.
+ *
+ *       If this register reads as FALSE, no such guarantee can be
+ *       made.
+ *
+ *       The guest should use this register to quickly determine
+ *       whether or not it needs to wake up the host. If the guest
+ *       just wrote a command or group of commands that it would like
+ *       the host to begin processing, it should:
+ *
+ *         1. Read SVGA_FIFO_BUSY. If it reads as TRUE, no further
+ *            action is necessary.
+ *
+ *         2. Write TRUE to SVGA_FIFO_BUSY. This informs future guest
+ *            code that we've already sent a SYNC to the host and we
+ *            don't need to send a duplicate.
+ *
+ *         3. Write a reason to SVGA_REG_SYNC. This will send an
+ *            asynchronous wakeup to the MKS thread.
+ */
+
+
+/*
+ * FIFO Capabilities
+ *
+ *      Fence -- Fence register and command are supported
+ *      Accel Front -- Front buffer only commands are supported
+ *      Pitch Lock -- Pitch lock register is supported
+ *      Video -- SVGA Video overlay units are supported
+ *      Escape -- Escape command is supported
+ *
+ * XXX: Add longer descriptions for each capability, including a list
+ *      of the new features that each capability provides.
+ *
+ * SVGA_FIFO_CAP_SCREEN_OBJECT --
+ *
+ *    Provides dynamic multi-screen rendering, for improved Unity and
+ *    multi-monitor modes. With Screen Object, the guest can
+ *    dynamically create and destroy 'screens', which can represent
+ *    Unity windows or virtual monitors. Screen Object also provides
+ *    strong guarantees that DMA operations happen only when
+ *    guest-initiated. Screen Object deprecates the BAR1 guest
+ *    framebuffer (GFB) and all commands that work only with the GFB.
+ *
+ *    New registers:
+ *       FIFO_CURSOR_SCREEN_ID, VIDEO_DATA_GMRID, VIDEO_DST_SCREEN_ID
+ *
+ *    New 2D commands:
+ *       DEFINE_SCREEN, DESTROY_SCREEN, DEFINE_GMRFB, BLIT_GMRFB_TO_SCREEN,
+ *       BLIT_SCREEN_TO_GMRFB, ANNOTATION_FILL, ANNOTATION_COPY
+ *
+ *    New 3D commands:
+ *       BLIT_SURFACE_TO_SCREEN
+ *
+ *    New guarantees:
+ *
+ *       - The host will not read or write guest memory, including the GFB,
+ *         except when explicitly initiated by a DMA command.
+ *
+ *       - All DMA, including legacy DMA like UPDATE and PRESENT_READBACK,
+ *         is guaranteed to complete before any subsequent FENCEs.
+ *
+ *       - All legacy commands which affect a Screen (UPDATE, PRESENT,
+ *         PRESENT_READBACK) as well as new Screen blit commands will
+ *         all behave consistently as blits, and memory will be read
+ *         or written in FIFO order.
+ *
+ *         For example, if you PRESENT from one SVGA3D surface to multiple
+ *         places on the screen, the data copied will always be from the
+ *         SVGA3D surface at the time the PRESENT was issued in the FIFO.
+ *         This was not necessarily true on devices without Screen Object.
+ *
+ *         This means that on devices that support Screen Object, the
+ *         PRESENT_READBACK command should not be necessary unless you
+ *         actually want to read back the results of 3D rendering into
+ *         system memory. (And for that, the BLIT_SCREEN_TO_GMRFB
+ *         command provides a strict superset of functionality.)
+ *
+ *       - When a screen is resized, either using Screen Object commands or
+ *         legacy multimon registers, its contents are preserved.
+ */
+
+#define SVGA_FIFO_CAP_NONE                  0
+#define SVGA_FIFO_CAP_FENCE             (1<<0)
+#define SVGA_FIFO_CAP_ACCELFRONT        (1<<1)
+#define SVGA_FIFO_CAP_PITCHLOCK         (1<<2)
+#define SVGA_FIFO_CAP_VIDEO             (1<<3)
+#define SVGA_FIFO_CAP_CURSOR_BYPASS_3   (1<<4)
+#define SVGA_FIFO_CAP_ESCAPE            (1<<5)
+#define SVGA_FIFO_CAP_RESERVE           (1<<6)
+#define SVGA_FIFO_CAP_SCREEN_OBJECT     (1<<7)
+
+
+/*
+ * FIFO Flags
+ *
+ *      Accel Front -- Driver should use front buffer only commands
+ */
+
+#define SVGA_FIFO_FLAG_NONE                 0
+#define SVGA_FIFO_FLAG_ACCELFRONT       (1<<0)
+#define SVGA_FIFO_FLAG_RESERVED        (1<<31) // Internal use only
+
+/*
+ * FIFO reservation sentinel value
+ */
+
+#define SVGA_FIFO_RESERVED_UNKNOWN      0xffffffff
+
+
+/*
+ * Video overlay support
+ */
+
+#define SVGA_NUM_OVERLAY_UNITS 32
+
+
+/*
+ * Video capabilities that the guest is currently using
+ */
+
+#define SVGA_VIDEO_FLAG_COLORKEY        0x0001
+
+
+/*
+ * Offsets for the video overlay registers
+ */
+
+enum {
+   SVGA_VIDEO_ENABLED = 0,
+   SVGA_VIDEO_FLAGS,
+   SVGA_VIDEO_DATA_OFFSET,
+   SVGA_VIDEO_FORMAT,
+   SVGA_VIDEO_COLORKEY,
+   SVGA_VIDEO_SIZE,          // Deprecated
+   SVGA_VIDEO_WIDTH,
+   SVGA_VIDEO_HEIGHT,
+   SVGA_VIDEO_SRC_X,
+   SVGA_VIDEO_SRC_Y,
+   SVGA_VIDEO_SRC_WIDTH,
+   SVGA_VIDEO_SRC_HEIGHT,
+   SVGA_VIDEO_DST_X,         // Signed int32
+   SVGA_VIDEO_DST_Y,         // Signed int32
+   SVGA_VIDEO_DST_WIDTH,
+   SVGA_VIDEO_DST_HEIGHT,
+   SVGA_VIDEO_PITCH_1,
+   SVGA_VIDEO_PITCH_2,
+   SVGA_VIDEO_PITCH_3,
+   SVGA_VIDEO_DATA_GMRID,    // Optional, defaults to SVGA_GMR_FRAMEBUFFER
+   SVGA_VIDEO_DST_SCREEN_ID, // Optional, defaults to virtual coords (SVGA_ID_INVALID)
+   SVGA_VIDEO_NUM_REGS
+};
+
+
+/*
+ * SVGA Overlay Units
+ *
+ *      width and height relate to the entire source video frame.
+ *      srcX, srcY, srcWidth and srcHeight represent subset of the source
+ *      video frame to be displayed.
+ */
+
+typedef struct SVGAOverlayUnit {
+   uint32 enabled;
+   uint32 flags;
+   uint32 dataOffset;
+   uint32 format;
+   uint32 colorKey;
+   uint32 size;
+   uint32 width;
+   uint32 height;
+   uint32 srcX;
+   uint32 srcY;
+   uint32 srcWidth;
+   uint32 srcHeight;
+   int32  dstX;
+   int32  dstY;
+   uint32 dstWidth;
+   uint32 dstHeight;
+   uint32 pitches[3];
+   uint32 dataGMRId;
+   uint32 dstScreenId;
+} SVGAOverlayUnit;
+
+
+/*
+ * SVGAScreenObject --
+ *
+ *    This is a new way to represent a guest's multi-monitor screen or
+ *    Unity window. Screen objects are only supported if the
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT capability bit is set.
+ *
+ *    If Screen Objects are supported, they can be used to fully
+ *    replace the functionality provided by the framebuffer registers
+ *    (SVGA_REG_WIDTH, HEIGHT, etc.) and by SVGA_CAP_DISPLAY_TOPOLOGY.
+ *
+ *    The screen object is a struct with guaranteed binary
+ *    compatibility. New flags can be added, and the struct may grow,
+ *    but existing fields must retain their meaning.
+ *
+ */
+
+#define SVGA_SCREEN_HAS_ROOT    (1 << 0)  // Screen is present in the virtual coord space
+#define SVGA_SCREEN_IS_PRIMARY  (1 << 1)  // Guest considers this screen to be 'primary'
+#define SVGA_SCREEN_FULLSCREEN_HINT (1 << 2)   // Guest is running a fullscreen app here
+
+typedef
+struct SVGAScreenObject {
+   uint32 structSize;   // sizeof(SVGAScreenObject)
+   uint32 id;
+   uint32 flags;
+   struct {
+      uint32 width;
+      uint32 height;
+   } size;
+   struct {
+      int32 x;
+      int32 y;
+   } root;              // Only used if SVGA_SCREEN_HAS_ROOT is set.
+} SVGAScreenObject;
+
+
+/*
+ *  Commands in the command FIFO:
+ *
+ *  Command IDs defined below are used for the traditional 2D FIFO
+ *  communication (not all commands are available for all versions of the
+ *  SVGA FIFO protocol).
+ *
+ *  Note the holes in the command ID numbers: These commands have been
+ *  deprecated, and the old IDs must not be reused.
+ *
+ *  Command IDs from 1000 to 1999 are reserved for use by the SVGA3D
+ *  protocol.
+ *
+ *  Each command's parameters are described by the comments and
+ *  structs below.
+ */
+
+typedef enum {
+   SVGA_CMD_INVALID_CMD           = 0,
+   SVGA_CMD_UPDATE                = 1,
+   SVGA_CMD_RECT_COPY             = 3,
+   SVGA_CMD_DEFINE_CURSOR         = 19,
+   SVGA_CMD_DEFINE_ALPHA_CURSOR   = 22,
+   SVGA_CMD_UPDATE_VERBOSE        = 25,
+   SVGA_CMD_FRONT_ROP_FILL        = 29,
+   SVGA_CMD_FENCE                 = 30,
+   SVGA_CMD_ESCAPE                = 33,
+   SVGA_CMD_DEFINE_SCREEN         = 34,
+   SVGA_CMD_DESTROY_SCREEN        = 35,
+   SVGA_CMD_DEFINE_GMRFB          = 36,
+   SVGA_CMD_BLIT_GMRFB_TO_SCREEN  = 37,
+   SVGA_CMD_BLIT_SCREEN_TO_GMRFB  = 38,
+   SVGA_CMD_ANNOTATION_FILL       = 39,
+   SVGA_CMD_ANNOTATION_COPY       = 40,
+   SVGA_CMD_MAX
+} SVGAFifoCmdId;
+
+#define SVGA_CMD_MAX_ARGS           64
+
+
+/*
+ * SVGA_CMD_UPDATE --
+ *
+ *    This is a DMA transfer which copies from the Guest Framebuffer
+ *    (GFB) at BAR1 + SVGA_REG_FB_OFFSET to any screens which
+ *    intersect with the provided virtual rectangle.
+ *
+ *    This command does not support using arbitrary guest memory as a
+ *    data source- it only works with the pre-defined GFB memory.
+ *    This command also does not support signed virtual coordinates.
+ *    If you have defined screens (using SVGA_CMD_DEFINE_SCREEN) with
+ *    negative root x/y coordinates, the negative portion of those
+ *    screens will not be reachable by this command.
+ *
+ *    This command is not necessary when using framebuffer
+ *    traces. Traces are automatically enabled if the SVGA FIFO is
+ *    disabled, and you may explicitly enable/disable traces using
+ *    SVGA_REG_TRACES. With traces enabled, any write to the GFB will
+ *    automatically act as if a subsequent SVGA_CMD_UPDATE was issued.
+ *
+ *    Traces and SVGA_CMD_UPDATE are the only supported ways to render
+ *    pseudocolor screen updates. The newer Screen Object commands
+ *    only support true color formats.
+ *
+ * Availability:
+ *    Always available.
+ */
+
+typedef
+struct {
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+} SVGAFifoCmdUpdate;
+
+
+/*
+ * SVGA_CMD_RECT_COPY --
+ *
+ *    Perform a rectangular DMA transfer from one area of the GFB to
+ *    another, and copy the result to any screens which intersect it.
+ *
+ * Availability:
+ *    SVGA_CAP_RECT_COPY
+ */
+
+typedef
+struct {
+   uint32 srcX;
+   uint32 srcY;
+   uint32 destX;
+   uint32 destY;
+   uint32 width;
+   uint32 height;
+} SVGAFifoCmdRectCopy;
+
+
+/*
+ * SVGA_CMD_DEFINE_CURSOR --
+ *
+ *    Provide a new cursor image, as an AND/XOR mask.
+ *
+ *    The recommended way to position the cursor overlay is by using
+ *    the SVGA_FIFO_CURSOR_* registers, supported by the
+ *    SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ *    SVGA_CAP_CURSOR
+ */
+
+typedef
+struct {
+   uint32 id;             // Reserved, must be zero.
+   uint32 hotspotX;
+   uint32 hotspotY;
+   uint32 width;
+   uint32 height;
+   uint32 andMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   uint32 xorMaskDepth;   // Value must be 1 or equal to BITS_PER_PIXEL
+   /*
+    * Followed by scanline data for AND mask, then XOR mask.
+    * Each scanline is padded to a 32-bit boundary.
+   */
+} SVGAFifoCmdDefineCursor;
+
+
+/*
+ * SVGA_CMD_DEFINE_ALPHA_CURSOR --
+ *
+ *    Provide a new cursor image, in 32-bit BGRA format.
+ *
+ *    The recommended way to position the cursor overlay is by using
+ *    the SVGA_FIFO_CURSOR_* registers, supported by the
+ *    SVGA_FIFO_CAP_CURSOR_BYPASS_3 capability.
+ *
+ * Availability:
+ *    SVGA_CAP_ALPHA_CURSOR
+ */
+
+typedef
+struct {
+   uint32 id;             // Reserved, must be zero.
+   uint32 hotspotX;
+   uint32 hotspotY;
+   uint32 width;
+   uint32 height;
+   /* Followed by scanline data */
+} SVGAFifoCmdDefineAlphaCursor;
+
+
+/*
+ * SVGA_CMD_UPDATE_VERBOSE --
+ *
+ *    Just like SVGA_CMD_UPDATE, but also provide a per-rectangle
+ *    'reason' value, an opaque cookie which is used by internal
+ *    debugging tools. Third party drivers should not use this
+ *    command.
+ *
+ * Availability:
+ *    SVGA_CAP_EXTENDED_FIFO
+ */
+
+typedef
+struct {
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+   uint32 reason;
+} SVGAFifoCmdUpdateVerbose;
+
+
+/*
+ * SVGA_CMD_FRONT_ROP_FILL --
+ *
+ *    This is a hint which tells the SVGA device that the driver has
+ *    just filled a rectangular region of the GFB with a solid
+ *    color. Instead of reading these pixels from the GFB, the device
+ *    can assume that they all equal 'color'. This is primarily used
+ *    for remote desktop protocols.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_ACCELFRONT
+ */
+
+#define  SVGA_ROP_COPY                    0x03
+
+typedef
+struct {
+   uint32 color;     // In the same format as the GFB
+   uint32 x;
+   uint32 y;
+   uint32 width;
+   uint32 height;
+   uint32 rop;       // Must be SVGA_ROP_COPY
+} SVGAFifoCmdFrontRopFill;
+
+
+/*
+ * SVGA_CMD_FENCE --
+ *
+ *    Insert a synchronization fence.  When the SVGA device reaches
+ *    this command, it will copy the 'fence' value into the
+ *    SVGA_FIFO_FENCE register. It will also compare the fence against
+ *    SVGA_FIFO_FENCE_GOAL. If the fence matches the goal and the
+ *    SVGA_IRQFLAG_FENCE_GOAL interrupt is enabled, the device will
+ *    raise this interrupt.
+ *
+ * Availability:
+ *    SVGA_FIFO_FENCE for this command,
+ *    SVGA_CAP_IRQMASK for SVGA_FIFO_FENCE_GOAL.
+ */
+
+typedef
+struct {
+   uint32 fence;
+} SVGAFifoCmdFence;
+
+
+/*
+ * SVGA_CMD_ESCAPE --
+ *
+ *    Send an extended or vendor-specific variable length command.
+ *    This is used for video overlay, third party plugins, and
+ *    internal debugging tools. See svga_escape.h
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_ESCAPE
+ */
+
+typedef
+struct {
+   uint32 nsid;
+   uint32 size;
+   /* followed by 'size' bytes of data */
+} SVGAFifoCmdEscape;
+
+
+/*
+ * SVGA_CMD_DEFINE_SCREEN --
+ *
+ *    Define or redefine an SVGAScreenObject. See the description of
+ *    SVGAScreenObject above.  The video driver is responsible for
+ *    generating new screen IDs. They should be small positive
+ *    integers. The virtual device will have an implementation
+ *    specific upper limit on the number of screen IDs
+ *    supported. Drivers are responsible for recycling IDs. The first
+ *    valid ID is zero.
+ *
+ *    - Interaction with other registers:
+ *
+ *    For backwards compatibility, when the GFB mode registers (WIDTH,
+ *    HEIGHT, PITCHLOCK, BITS_PER_PIXEL) are modified, the SVGA device
+ *    deletes all screens other than screen #0, and redefines screen
+ *    #0 according to the specified mode. Drivers that use
+ *    SVGA_CMD_DEFINE_SCREEN should destroy or redefine screen #0.
+ *
+ *    If you use screen objects, do not use the legacy multi-mon
+ *    registers (SVGA_REG_NUM_GUEST_DISPLAYS, SVGA_REG_DISPLAY_*).
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAScreenObject screen;   // Variable-length according to version
+} SVGAFifoCmdDefineScreen;
+
+
+/*
+ * SVGA_CMD_DESTROY_SCREEN --
+ *
+ *    Destroy an SVGAScreenObject. Its ID is immediately available for
+ *    re-use.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   uint32 screenId;
+} SVGAFifoCmdDestroyScreen;
+
+
+/*
+ * SVGA_CMD_DEFINE_GMRFB --
+ *
+ *    This command sets a piece of SVGA device state called the
+ *    Guest Memory Region Framebuffer, or GMRFB. The GMRFB is a
+ *    piece of light-weight state which identifies the location and
+ *    format of an image in guest memory or in BAR1. The GMRFB has
+ *    an arbitrary size, and it doesn't need to match the geometry
+ *    of the GFB or any screen object.
+ *
+ *    The GMRFB can be redefined as often as you like. You could
+ *    always use the same GMRFB, you could redefine it before
+ *    rendering from a different guest screen, or you could even
+ *    redefine it before every blit.
+ *
+ *    There are multiple ways to use this command. The simplest way is
+ *    to use it to move the framebuffer either to elsewhere in the GFB
+ *    (BAR1) memory region, or to a user-defined GMR. This lets a
+ *    driver use a framebuffer allocated entirely out of normal system
+ *    memory, which we encourage.
+ *
+ *    Another way to use this command is to set up a ring buffer of
+ *    updates in GFB memory. If a driver wants to ensure that no
+ *    frames are skipped by the SVGA device, it is important that the
+ *    driver not modify the source data for a blit until the device is
+ *    done processing the command. One efficient way to accomplish
+ *    this is to use a ring of small DMA buffers. Each buffer is used
+ *    for one blit, then we move on to the next buffer in the
+ *    ring. The FENCE mechanism is used to protect each buffer from
+ *    re-use until the device is finished with that buffer's
+ *    corresponding blit.
+ *
+ *    This command does not affect the meaning of SVGA_CMD_UPDATE.
+ *    UPDATEs always occur from the legacy GFB memory area. This
+ *    command has no support for pseudocolor GMRFBs. Currently only
+ *    true-color 15, 16, and 24-bit depths are supported. Future
+ *    devices may expose capabilities for additional framebuffer
+ *    formats.
+ *
+ *    The default GMRFB value is undefined. Drivers must always send
+ *    this command at least once before performing any blit from the
+ *    GMRFB.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAGuestPtr        ptr;
+   uint32              bytesPerLine;
+   SVGAGMRImageFormat  format;
+} SVGAFifoCmdDefineGMRFB;
+
+
+/*
+ * SVGA_CMD_BLIT_GMRFB_TO_SCREEN --
+ *
+ *    This is a guest-to-host blit. It performs a DMA operation to
+ *    copy a rectangular region of pixels from the current GMRFB to
+ *    one or more Screen Objects.
+ *
+ *    The destination coordinate may be specified relative to a
+ *    screen's origin (if a screen ID is specified) or relative to the
+ *    virtual coordinate system's origin (if the screen ID is
+ *    SVGA_ID_INVALID). The actual destination may span zero or more
+ *    screens, in the case of a virtual destination rect or a rect
+ *    which extends off the edge of the specified screen.
+ *
+ *    This command writes to the screen's "base layer": the underlying
+ *    framebuffer which exists below any cursor or video overlays. No
+ *    action is necessary to explicitly hide or update any overlays
+ *    which exist on top of the updated region.
+ *
+ *    The SVGA device is guaranteed to finish reading from the GMRFB
+ *    by the time any subsequent FENCE commands are reached.
+ *
+ *    This command consumes an annotation. See the
+ *    SVGA_CMD_ANNOTATION_* commands for details.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  srcOrigin;
+   SVGASignedRect   destRect;
+   uint32           destScreenId;
+} SVGAFifoCmdBlitGMRFBToScreen;
+
+
+/*
+ * SVGA_CMD_BLIT_SCREEN_TO_GMRFB --
+ *
+ *    This is a host-to-guest blit. It performs a DMA operation to
+ *    copy a rectangular region of pixels from a single Screen Object
+ *    back to the current GMRFB.
+ *
+ *    Usage note: This command should be used rarely. It will
+ *    typically be inefficient, but it is necessary for some types of
+ *    synchronization between 3D (GPU) and 2D (CPU) rendering into
+ *    overlapping areas of a screen.
+ *
+ *    The source coordinate is specified relative to a screen's
+ *    origin. The provided screen ID must be valid. If any parameters
+ *    are invalid, the resulting pixel values are undefined.
+ *
+ *    This command reads the screen's "base layer". Overlays like
+ *    video and cursor are not included, but any data which was sent
+ *    using a blit-to-screen primitive will be available, no matter
+ *    whether the data's original source was the GMRFB or the 3D
+ *    acceleration hardware.
+ *
+ *    Note that our guest-to-host blits and host-to-guest blits aren't
+ *    symmetric in their current implementation. While the parameters
+ *    are identical, host-to-guest blits are a lot less featureful.
+ *    They do not support clipping: If the source parameters don't
+ *    fully fit within a screen, the blit fails. They must originate
+ *    from exactly one screen. Virtual coordinates are not directly
+ *    supported.
+ *
+ *    Host-to-guest blits do support the same set of GMRFB formats
+ *    offered by guest-to-host blits.
+ *
+ *    The SVGA device is guaranteed to finish writing to the GMRFB by
+ *    the time any subsequent FENCE commands are reached.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  destOrigin;
+   SVGASignedRect   srcRect;
+   uint32           srcScreenId;
+} SVGAFifoCmdBlitScreenToGMRFB;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_FILL --
+ *
+ *    This is a blit annotation. This command stores a small piece of
+ *    device state which is consumed by the next blit-to-screen
+ *    command. The state is only cleared by commands which are
+ *    specifically documented as consuming an annotation. Other
+ *    commands (such as ESCAPEs for debugging) may intervene between
+ *    the annotation and its associated blit.
+ *
+ *    This annotation is a promise about the contents of the next
+ *    blit: The video driver is guaranteeing that all pixels in that
+ *    blit will have the same value, specified here as a color in
+ *    SVGAColorBGRX format.
+ *
+ *    The SVGA device can still render the blit correctly even if it
+ *    ignores this annotation, but the annotation may allow it to
+ *    perform the blit more efficiently, for example by ignoring the
+ *    source data and performing a fill in hardware.
+ *
+ *    This annotation is most important for performance when the
+ *    user's display is being remoted over a network connection.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGAColorBGRX  color;
+} SVGAFifoCmdAnnotationFill;
+
+
+/*
+ * SVGA_CMD_ANNOTATION_COPY --
+ *
+ *    This is a blit annotation. See SVGA_CMD_ANNOTATION_FILL for more
+ *    information about annotations.
+ *
+ *    This annotation is a promise about the contents of the next
+ *    blit: The video driver is guaranteeing that all pixels in that
+ *    blit will have the same value as those which already exist at an
+ *    identically-sized region on the same or a different screen.
+ *
+ *    Note that the source pixels for the COPY in this annotation are
+ *    sampled before applying the anqnotation's associated blit. They
+ *    are allowed to overlap with the blit's destination pixels.
+ *
+ *    The copy source rectangle is specified the same way as the blit
+ *    destination: it can be a rectangle which spans zero or more
+ *    screens, specified relative to either a screen or to the virtual
+ *    coordinate system's origin. If the source rectangle includes
+ *    pixels which are not from exactly one screen, the results are
+ *    undefined.
+ *
+ * Availability:
+ *    SVGA_FIFO_CAP_SCREEN_OBJECT
+ */
+
+typedef
+struct {
+   SVGASignedPoint  srcOrigin;
+   uint32           srcScreenId;
+} SVGAFifoCmdAnnotationCopy;
+
+#endif
diff --git a/src/gallium/drivers/svga/include/svga_types.h b/src/gallium/drivers/svga/include/svga_types.h
new file mode 100644
index 00000000000..7fd9bab03a5
--- /dev/null
+++ b/src/gallium/drivers/svga/include/svga_types.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 1998-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef _SVGA_TYPES_H_
+#define _SVGA_TYPES_H_
+
+#include "pipe/p_compiler.h"
+
+typedef int64_t int64;
+typedef uint64_t uint64;
+
+typedef int32_t int32;
+typedef uint32_t uint32;
+
+typedef int16_t int16;
+typedef uint16_t uint16;
+
+typedef int8_t int8;
+typedef uint8_t uint8;
+
+typedef uint8_t Bool;
+
+#endif /* _SVGA_TYPES_H_ */
+
diff --git a/src/gallium/drivers/svga/svga_cmd.c b/src/gallium/drivers/svga/svga_cmd.c
new file mode 100644
index 00000000000..a0da7d7e5d5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.c
@@ -0,0 +1,1427 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * svga_cmd.c --
+ *
+ *      Command construction utility for the SVGA3D protocol used by
+ *      the VMware SVGA device, based on the svgautil library.
+ */
+
+#include "svga_winsys.h"
+#include "svga_screen_buffer.h"
+#include "svga_screen_texture.h"
+#include "svga_cmd.h"
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * surface_to_surfaceid --
+ *
+ *      Utility function for surface ids.
+ *      Can handle null surface. Does a surface_reallocation so you need
+ *      to have allocated the fifo space before converting.
+ *
+ * Results:
+ *      id is filld out.
+ *
+ * Side effects:
+ *      One surface relocation is preformed for texture handle.
+ *
+ *----------------------------------------------------------------------
+ */
+
+static INLINE
+void surface_to_surfaceid(struct svga_winsys_context *swc, // IN
+                          struct pipe_surface *surface,    // IN
+                          SVGA3dSurfaceImageId *id,        // OUT
+                          unsigned flags)                  // IN
+{
+   if(surface) {
+      struct svga_surface *s = svga_surface(surface);
+      swc->surface_relocation(swc, &id->sid, s->handle, flags);
+      id->face = s->real_face; /* faces have the same order */
+      id->mipmap = s->real_level;
+   }
+   else {
+      id->sid = SVGA3D_INVALID_ID;
+      id->face = 0;
+      id->mipmap = 0;
+   }
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_FIFOReserve --
+ *
+ *      Reserve space for an SVGA3D FIFO command.
+ *
+ *      The 2D SVGA commands have been around for a while, so they
+ *      have a rather asymmetric structure. The SVGA3D protocol is
+ *      more uniform: each command begins with a header containing the
+ *      command number and the full size.
+ *
+ *      This is a convenience wrapper around SVGA_FIFOReserve. We
+ *      reserve space for the whole command, and write the header.
+ *
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ * Results:
+ *      Returns a pointer to the space reserved for command-specific
+ *      data. It must be 'cmdSize' bytes long.
+ *
+ * Side effects:
+ *      Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc,
+                   uint32 cmd,       // IN
+                   uint32 cmdSize,   // IN
+                   uint32 nr_relocs) // IN
+{
+   SVGA3dCmdHeader *header;
+
+   header = swc->reserve(swc, sizeof *header + cmdSize, nr_relocs);
+   if(!header)
+      return NULL;
+
+   header->id = cmd;
+   header->size = cmdSize;
+
+   return &header[1];
+}
+
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc)
+{
+   swc->commit(swc);
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineContext --
+ *
+ *      Create a new context, to be referred to with the provided ID.
+ *
+ *      Context objects encapsulate all render state, and shader
+ *      objects are per-context.
+ *
+ *      Surfaces are not per-context. The same surface can be shared
+ *      between multiple contexts, and surface operations can occur
+ *      without a context.
+ *
+ *      If the provided context ID already existed, it is redefined.
+ *
+ *      Context IDs are arbitrary small non-negative integers,
+ *      global to the entire SVGA device.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc)  // IN
+{
+   SVGA3dCmdDefineContext *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CONTEXT_DEFINE, sizeof *cmd, 0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyContext --
+ *
+ *      Delete a context created with SVGA3D_DefineContext.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc)  // IN
+{
+   SVGA3dCmdDestroyContext *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CONTEXT_DESTROY, sizeof *cmd, 0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   cmd->cid = swc->cid;
+   
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDefineSurface --
+ *
+ *      Begin a SURFACE_DEFINE command. This reserves space for it in
+ *      the FIFO, and returns pointers to the command's faces and
+ *      mipsizes arrays.
+ *
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *      The faces and mipSizes arrays are initialized to zero.
+ *
+ *      This creates a "surface" object in the SVGA3D device,
+ *      with the provided surface ID (sid). Surfaces are generic
+ *      containers for host VRAM objects like textures, vertex
+ *      buffers, and depth/stencil buffers.
+ *
+ *      Surfaces are hierarchial:
+ *
+ *        - Surface may have multiple faces (for cube maps)
+ *
+ *          - Each face has a list of mipmap levels
+ *
+ *             - Each mipmap image may have multiple volume
+ *               slices, if the image is three dimensional.
+ *
+ *                - Each slice is a 2D array of 'blocks'
+ *
+ *                   - Each block may be one or more pixels.
+ *                     (Usually 1, more for DXT or YUV formats.)
+ *
+ *      Surfaces are generic host VRAM objects. The SVGA3D device
+ *      may optimize surfaces according to the format they were
+ *      created with, but this format does not limit the ways in
+ *      which the surface may be used. For example, a depth surface
+ *      can be used as a texture, or a floating point image may
+ *      be used as a vertex buffer. Some surface usages may be
+ *      lower performance, due to software emulation, but any
+ *      usage should work with any surface.
+ *
+ *      If 'sid' is already defined, the old surface is deleted
+ *      and this new surface replaces it.
+ *
+ *      Surface IDs are arbitrary small non-negative integers,
+ *      global to the entire SVGA device.
+ *
+ * Results:
+ *      Returns pointers to arrays allocated in the FIFO for 'faces'
+ *      and 'mipSizes'.
+ *
+ * Side effects:
+ *      Begins a FIFO reservation.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+                          struct svga_winsys_surface *sid, // IN
+                          SVGA3dSurfaceFlags flags,    // IN
+                          SVGA3dSurfaceFormat format,  // IN
+                          SVGA3dSurfaceFace **faces,   // OUT
+                          SVGA3dSize **mipSizes,       // OUT
+                          uint32 numMipSizes)          // IN
+{
+   SVGA3dCmdDefineSurface *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DEFINE, sizeof *cmd +
+                            sizeof **mipSizes * numMipSizes, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_WRITE);
+   cmd->surfaceFlags = flags;
+   cmd->format = format;
+
+   *faces = &cmd->face[0];
+   *mipSizes = (SVGA3dSize*) &cmd[1];
+
+   memset(*faces, 0, sizeof **faces * SVGA3D_MAX_SURFACE_FACES);
+   memset(*mipSizes, 0, sizeof **mipSizes * numMipSizes);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineSurface2D --
+ *
+ *      This is a simplified version of SVGA3D_BeginDefineSurface(),
+ *      which does not support cube maps, mipmaps, or volume textures.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,    // IN
+                       struct svga_winsys_surface *sid, // IN
+                       uint32 width,                // IN
+                       uint32 height,               // IN
+                       SVGA3dSurfaceFormat format)  // IN
+{
+   SVGA3dSize *mipSizes;
+   SVGA3dSurfaceFace *faces;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginDefineSurface(swc,
+                                   sid, 0, format, &faces, &mipSizes, 1);
+   if(ret != PIPE_OK)
+      return ret;
+
+   faces[0].numMipLevels = 1;
+
+   mipSizes[0].width = width;
+   mipSizes[0].height = height;
+   mipSizes[0].depth = 1;
+ 
+   swc->commit(swc);;
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroySurface --
+ *
+ *      Release the host VRAM encapsulated by a particular surface ID.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *sid)  // IN
+{
+   SVGA3dCmdDestroySurface *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DESTROY, sizeof *cmd, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   swc->surface_relocation(swc, &cmd->sid, sid, PIPE_BUFFER_USAGE_GPU_READ);
+   swc->commit(swc);;
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceDMA--
+ *
+ *      Begin a SURFACE_DMA command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's box array.
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      When the SVGA3D device asynchronously processes this FIFO
+ *      command, a DMA operation is performed between host VRAM and
+ *      a generic SVGAGuestPtr. The guest pointer may refer to guest
+ *      VRAM (provided by the SVGA PCI device) or to guest system
+ *      memory that has been set up as a Guest Memory Region (GMR)
+ *      by the SVGA device.
+ *
+ *      The guest's DMA buffer must remain valid (not freed, paged out,
+ *      or overwritten) until the host has finished processing this
+ *      command. The guest can determine that the host has finished
+ *      by using the SVGA device's FIFO Fence mechanism.
+ *
+ *      The guest's image buffer can be an arbitrary size and shape.
+ *      Guest image data is interpreted according to the SVGA3D surface
+ *      format specified when the surface was defined.
+ *
+ *      The caller may optionally define the guest image's pitch.
+ *      guestImage->pitch can either be zero (assume image is tightly
+ *      packed) or it must be the number of bytes between vertically
+ *      adjacent image blocks.
+ *
+ *      The provided copybox list specifies which regions of the source
+ *      image are to be copied, and where they appear on the destination.
+ *
+ *      NOTE: srcx/srcy are always on the guest image and x/y are
+ *      always on the host image, regardless of the actual transfer
+ *      direction!
+ *
+ *      For efficiency, the SVGA3D device is free to copy more data
+ *      than specified. For example, it may round copy boxes outwards
+ *      such that they lie on particular alignment boundaries.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+                  struct svga_transfer *st,         // IN
+                  SVGA3dTransferType transfer,      // IN
+                  const SVGA3dCopyBox *boxes,       // IN
+                  uint32 numBoxes)                  // IN
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   SVGA3dCmdSurfaceDMA *cmd;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   uint32 boxesSize = sizeof *boxes * numBoxes;
+   unsigned region_flags;
+   unsigned surface_flags;
+   
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + boxesSize + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, st->hwbuf, 0, region_flags);
+   cmd->guest.pitch = st->base.stride;
+
+   swc->surface_relocation(swc, &cmd->host.sid, texture->handle, surface_flags);
+   cmd->host.face = st->base.face; /* PIPE_TEX_FACE_* and SVGA3D_CUBEFACE_* match */
+   cmd->host.mipmap = st->base.level;
+
+   cmd->transfer = transfer;
+
+   memcpy(&cmd[1], boxes, boxesSize);
+   
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + boxesSize);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = st->hw_nblocksy*st->base.stride;
+   memset(&pSuffix->flags, 0, sizeof pSuffix->flags);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+                 struct svga_winsys_buffer *guest,
+                 struct svga_winsys_surface *host,
+                 SVGA3dTransferType transfer,      // IN
+                 uint32 size,                      // IN
+                 uint32 offset,                    // IN
+                 SVGA3dSurfaceDMAFlags flags)      // IN
+{
+   SVGA3dCmdSurfaceDMA *cmd;
+   SVGA3dCopyBox *box;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   unsigned region_flags;
+   unsigned surface_flags;
+   
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + sizeof *box + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+   cmd->guest.pitch = 0;
+
+   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   cmd->host.face = 0;
+   cmd->host.mipmap = 0;
+
+   cmd->transfer = transfer;
+
+   box = (SVGA3dCopyBox *)&cmd[1];
+   box->x = offset;
+   box->y = 0;
+   box->z = 0;
+   box->w = size;
+   box->h = 1;
+   box->d = 1;
+   box->srcx = offset;
+   box->srcy = 0;
+   box->srcz = 0;
+   
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + sizeof *box);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = offset + size;
+   pSuffix->flags = flags;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetRenderTarget --
+ *
+ *      Bind a surface object to a particular render target attachment
+ *      point on the current context. Render target attachment points
+ *      exist for color buffers, a depth buffer, and a stencil buffer.
+ *
+ *      The SVGA3D device is quite lenient about the types of surfaces
+ *      that may be used as render targets. The color buffers must
+ *      all be the same size, but the depth and stencil buffers do not
+ *      have to be the same size as the color buffer. All attachments
+ *      are optional.
+ *
+ *      Some combinations of render target formats may require software
+ *      emulation, depending on the capabilities of the host graphics
+ *      API and graphics hardware.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+                       SVGA3dRenderTargetType type,   // IN
+                       struct pipe_surface *surface)  // IN
+{
+   SVGA3dCmdSetRenderTarget *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETRENDERTARGET, sizeof *cmd, 1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+
+   cmd->cid = swc->cid;
+
+   cmd->type = type;
+
+   surface_to_surfaceid(swc, surface, &cmd->target, PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DefineShader --
+ *
+ *      Upload the bytecode for a new shader. The bytecode is "SVGA3D
+ *      format", which is theoretically a binary-compatible superset
+ *      of Microsoft's DirectX shader bytecode. In practice, the
+ *      SVGA3D bytecode doesn't yet have any extensions to DirectX's
+ *      bytecode format.
+ *
+ *      The SVGA3D device supports shader models 1.1 through 2.0.
+ *
+ *      The caller chooses a shader ID (small positive integer) by
+ *      which this shader will be identified in future commands. This
+ *      ID is in a namespace which is per-context and per-shader-type.
+ *
+ *      'bytecodeLen' is specified in bytes. It must be a multiple of 4.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+                    uint32 shid,                  // IN
+                    SVGA3dShaderType type,        // IN
+                    const uint32 *bytecode,       // IN
+                    uint32 bytecodeLen)           // IN
+{
+   SVGA3dCmdDefineShader *cmd;
+
+   assert(bytecodeLen % 4 == 0);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SHADER_DEFINE, sizeof *cmd + bytecodeLen,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->shid = shid;
+   cmd->type = type;
+   memcpy(&cmd[1], bytecode, bytecodeLen);
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_DestroyShader --
+ *
+ *      Delete a shader that was created by SVGA3D_DefineShader. If
+ *      the shader was the current vertex or pixel shader for its
+ *      context, rendering results are undefined until a new shader is
+ *      bound.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+                     uint32 shid,            // IN
+                     SVGA3dShaderType type)  // IN
+{
+   SVGA3dCmdDestroyShader *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SHADER_DESTROY, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->shid = shid;
+   cmd->type = type;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShaderConst --
+ *
+ *      Set the value of a shader constant.
+ *
+ *      Shader constants are analogous to uniform variables in GLSL,
+ *      except that they belong to the render context rather than to
+ *      an individual shader.
+ *
+ *      Constants may have one of three types: A 4-vector of floats,
+ *      a 4-vector of integers, or a single boolean flag.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+                      uint32 reg,                   // IN
+                      SVGA3dShaderType type,        // IN
+                      SVGA3dShaderConstType ctype,  // IN
+                      const void *value)            // IN
+{
+   SVGA3dCmdSetShaderConst *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_SHADER_CONST, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->reg = reg;
+   cmd->type = type;
+   cmd->ctype = ctype;
+
+   switch (ctype) {
+
+   case SVGA3D_CONST_TYPE_FLOAT:
+   case SVGA3D_CONST_TYPE_INT:
+      memcpy(&cmd->values, value, sizeof cmd->values);
+      break;
+
+   case SVGA3D_CONST_TYPE_BOOL:
+      memset(&cmd->values, 0, sizeof cmd->values);
+      cmd->values[0] = *(uint32*)value;
+      break;
+
+   default:
+      assert(0);
+      break;
+
+   }
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetShader --
+ *
+ *      Switch active shaders. This binds a new vertex or pixel shader
+ *      to the specified context.
+ *
+ *      A shader ID of SVGA3D_INVALID_ID unbinds any shader, switching
+ *      back to the fixed function vertex or pixel pipeline.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+                 SVGA3dShaderType type,  // IN
+                 uint32 shid)            // IN
+{
+   SVGA3dCmdSetShader *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SET_SHADER, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+   
+   cmd->cid = swc->cid;
+   cmd->type = type;
+   cmd->shid = shid;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginClear --
+ *
+ *      Begin a CLEAR command. This reserves space for it in the FIFO,
+ *      and returns a pointer to the command's rectangle array.  This
+ *      function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      Clear is a rendering operation which fills a list of
+ *      rectangles with constant values on all render target types
+ *      indicated by 'flags'.
+ *
+ *      Clear is not affected by clipping, depth test, or other
+ *      render state which affects the fragment pipeline.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+                  SVGA3dClearFlag flags,  // IN
+                  uint32 color,           // IN
+                  float depth,            // IN
+                  uint32 stencil,         // IN
+                  SVGA3dRect **rects,     // OUT
+                  uint32 numRects)        // IN
+{
+   SVGA3dCmdClear *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_CLEAR, 
+                            sizeof *cmd + sizeof **rects * numRects,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->clearFlag = flags;
+   cmd->color = color;
+   cmd->depth = depth;
+   cmd->stencil = stencil;
+   *rects = (SVGA3dRect*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_ClearRect --
+ *
+ *      This is a simplified version of SVGA3D_BeginClear().
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+                 SVGA3dClearFlag flags,  // IN
+                 uint32 color,           // IN
+                 float depth,            // IN
+                 uint32 stencil,         // IN
+                 uint32 x,               // IN
+                 uint32 y,               // IN
+                 uint32 w,               // IN
+                 uint32 h)               // IN
+{
+   SVGA3dRect *rect;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginClear(swc, flags, color, depth, stencil, &rect, 1);
+   if(ret != PIPE_OK)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   memset(rect, 0, sizeof *rect);
+   rect->x = x;
+   rect->y = y;
+   rect->w = w;
+   rect->h = h;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginDrawPrimitives --
+ *
+ *      Begin a DRAW_PRIMITIVES command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's arrays.
+ *      This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      Drawing commands consist of two variable-length arrays:
+ *      SVGA3dVertexDecl elements declare a set of vertex buffers to
+ *      use while rendering, and SVGA3dPrimitiveRange elements specify
+ *      groups of primitives each with an optional index buffer.
+ *
+ *      The decls and ranges arrays are initialized to zero.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      May write to attached render target surfaces.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+                           SVGA3dVertexDecl **decls,      // OUT
+                           uint32 numVertexDecls,         // IN
+                           SVGA3dPrimitiveRange **ranges, // OUT
+                           uint32 numRanges)              // IN
+{
+   SVGA3dCmdDrawPrimitives *cmd;
+   SVGA3dVertexDecl *declArray;
+   SVGA3dPrimitiveRange *rangeArray;
+   uint32 declSize = sizeof **decls * numVertexDecls;
+   uint32 rangeSize = sizeof **ranges * numRanges;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_DRAW_PRIMITIVES, 
+                            sizeof *cmd + declSize + rangeSize,
+                            numVertexDecls + numRanges);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->numVertexDecls = numVertexDecls;
+   cmd->numRanges = numRanges;
+
+   declArray = (SVGA3dVertexDecl*) &cmd[1];
+   rangeArray = (SVGA3dPrimitiveRange*) &declArray[numVertexDecls];
+
+   memset(declArray, 0, declSize);
+   memset(rangeArray, 0, rangeSize);
+
+   *decls = declArray;
+   *ranges = rangeArray;
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSurfaceCopy --
+ *
+ *      Begin a SURFACE_COPY command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's arrays.  This
+ *      function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      The box array is initialized with zeroes.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Asynchronously copies a list of boxes from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+                        struct pipe_surface *src,    // IN
+                        struct pipe_surface *dest,   // IN
+                        SVGA3dCopyBox **boxes,       // OUT
+                        uint32 numBoxes)             // IN
+{
+   SVGA3dCmdSurfaceCopy *cmd;
+   uint32 boxesSize = sizeof **boxes * numBoxes;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_COPY, sizeof *cmd + boxesSize,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+   surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+   *boxes = (SVGA3dCopyBox*) &cmd[1];
+
+   memset(*boxes, 0, boxesSize);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SurfaceStretchBlt --
+ *
+ *      Issue a SURFACE_STRETCHBLT command: an asynchronous
+ *      surface-to-surface blit, with scaling.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Asynchronously copies one box from surface to surface.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+                         struct pipe_surface *src,    // IN
+                         struct pipe_surface *dest,   // IN
+                         SVGA3dBox *boxSrc,           // IN
+                         SVGA3dBox *boxDest,          // IN
+                         SVGA3dStretchBltMode mode)   // IN
+{
+   SVGA3dCmdSurfaceStretchBlt *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_STRETCHBLT, sizeof *cmd,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   surface_to_surfaceid(swc, src, &cmd->src, PIPE_BUFFER_USAGE_GPU_READ);
+   surface_to_surfaceid(swc, dest, &cmd->dest, PIPE_BUFFER_USAGE_GPU_WRITE);
+   cmd->boxSrc = *boxSrc;
+   cmd->boxDest = *boxDest;
+   cmd->mode = mode;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetViewport --
+ *
+ *      Set the current context's viewport rectangle. The viewport
+ *      is clipped to the dimensions of the current render target,
+ *      then all rendering is clipped to the viewport.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+                   SVGA3dRect *rect)  // IN
+{
+   SVGA3dCmdSetViewport *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETVIEWPORT, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->rect = *rect;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetScissorRect --
+ *
+ *      Set the current context's scissor rectangle. If scissor
+ *      is enabled then all rendering is clipped to the scissor.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+                      SVGA3dRect *rect)  // IN
+{
+   SVGA3dCmdSetScissorRect *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETSCISSORRECT, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->rect = *rect;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetClipPlane --
+ *
+ *      Set one of the current context's clip planes. If the clip
+ *      plane is enabled then all 3d rendering is clipped to against
+ *      the plane.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+                         uint32 index, const float *plane)
+{
+   SVGA3dCmdSetClipPlane *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETCLIPPLANE, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->index = index;
+   cmd->plane[0] = plane[0];
+   cmd->plane[1] = plane[1];
+   cmd->plane[2] = plane[2];
+   cmd->plane[3] = plane[3];
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_SetZRange --
+ *
+ *      Set the range of the depth buffer to use. 'min' and 'max'
+ *      are values between 0.0 and 1.0.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+                 float zMin,  // IN
+                 float zMax)  // IN
+{
+   SVGA3dCmdSetZRange *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETZRANGE, sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->zRange.min = zMin;
+   cmd->zRange.max = zMax;
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetTextureState --
+ *
+ *      Begin a SETTEXTURESTATE command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's texture state
+ *      array.  This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      This command sets rendering state which is per-texture-unit.
+ *
+ *      XXX: Individual texture states need documentation. However,
+ *           they are very similar to the texture states defined by
+ *           Direct3D. The D3D documentation is a good starting point
+ *           for understanding SVGA3D texture states.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+                            SVGA3dTextureState **states,  // OUT
+                            uint32 numStates)             // IN
+{
+   SVGA3dCmdSetTextureState *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETTEXTURESTATE, 
+                            sizeof *cmd + sizeof **states * numStates,
+                            numStates);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   *states = (SVGA3dTextureState*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginSetRenderState --
+ *
+ *      Begin a SETRENDERSTATE command. This reserves space for it in
+ *      the FIFO, and returns a pointer to the command's texture state
+ *      array.  This function must be paired with SVGA_FIFOCommitAll().
+ *
+ *      This command sets rendering state which is global to the context.
+ *
+ *      XXX: Individual render states need documentation. However,
+ *           they are very similar to the render states defined by
+ *           Direct3D. The D3D documentation is a good starting point
+ *           for understanding SVGA3D render states.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      None.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+                           SVGA3dRenderState **states,  // OUT
+                           uint32 numStates)            // IN
+{
+   SVGA3dCmdSetRenderState *cmd;
+   
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SETRENDERSTATE, 
+                            sizeof *cmd + sizeof **states * numStates,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   *states = (SVGA3dRenderState*) &cmd[1];
+
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_BeginQuery--
+ *
+ *      Issues a SVGA_3D_CMD_BEGIN_QUERY command.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+                  SVGA3dQueryType type) // IN
+{
+   SVGA3dCmdBeginQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_BEGIN_QUERY,
+                            sizeof *cmd,
+                            0);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_EndQuery--
+ *
+ *      Issues a SVGA_3D_CMD_END_QUERY command.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+                SVGA3dQueryType type,              // IN
+                struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdEndQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_END_QUERY, 
+                            sizeof *cmd,
+                            1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+
+   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+                          PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
+
+
+/*
+ *----------------------------------------------------------------------
+ *
+ * SVGA3D_WaitForQuery--
+ *
+ *      Issues a SVGA_3D_CMD_WAIT_FOR_QUERY command.  This reserves space
+ *      for it in the FIFO.  This doesn't actually wait for the query to
+ *      finish but instead tells the host to start a wait at the driver
+ *      level.  The caller can wait on the status variable in the
+ *      guestPtr memory or send an insert fence instruction after this
+ *      command and wait on the fence.
+ *
+ * Results:
+ *      None.
+ *
+ * Side effects:
+ *      Commits space in the FIFO memory.
+ *
+ *----------------------------------------------------------------------
+ */
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+                    SVGA3dQueryType type,              // IN
+                    struct svga_winsys_buffer *buffer) // IN/OUT
+{
+   SVGA3dCmdWaitForQuery *cmd;
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_WAIT_FOR_QUERY, 
+                            sizeof *cmd,
+                            1);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   cmd->cid = swc->cid;
+   cmd->type = type;
+   
+   swc->region_relocation(swc, &cmd->guestResult, buffer, 0,
+                          PIPE_BUFFER_USAGE_GPU_WRITE);
+
+   swc->commit(swc);
+   
+   return PIPE_OK;
+}
diff --git a/src/gallium/drivers/svga/svga_cmd.h b/src/gallium/drivers/svga/svga_cmd.h
new file mode 100644
index 00000000000..80410547690
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_cmd.h
@@ -0,0 +1,235 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/*
+ * svga_cmd.h --
+ *
+ *      Command construction utility for the SVGA3D protocol used by
+ *      the VMware SVGA device, based on the svgautil library.
+ */
+
+#ifndef __SVGA3D_H__
+#define __SVGA3D_H__
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_defines.h"
+
+
+struct pipe_buffer;
+struct pipe_surface;
+struct svga_transfer;
+struct svga_winsys_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+/*
+ * SVGA Device Interoperability
+ */
+
+void *
+SVGA3D_FIFOReserve(struct svga_winsys_context *swc, uint32 cmd, uint32 cmdSize, uint32 nr_relocs);
+
+void
+SVGA_FIFOCommitAll(struct svga_winsys_context *swc);
+
+
+/*
+ * Context Management
+ */
+
+enum pipe_error
+SVGA3D_DefineContext(struct svga_winsys_context *swc);
+
+enum pipe_error
+SVGA3D_DestroyContext(struct svga_winsys_context *swc);
+
+
+/*
+ * Surface Management
+ */
+
+enum pipe_error
+SVGA3D_BeginDefineSurface(struct svga_winsys_context *swc,
+                          struct svga_winsys_surface *sid,
+                          SVGA3dSurfaceFlags flags,
+                          SVGA3dSurfaceFormat format,
+                          SVGA3dSurfaceFace **faces,
+                          SVGA3dSize **mipSizes,
+                          uint32 numMipSizes);
+enum pipe_error
+SVGA3D_DefineSurface2D(struct svga_winsys_context *swc,
+                       struct svga_winsys_surface *sid,
+                       uint32 width,
+                       uint32 height,
+                       SVGA3dSurfaceFormat format);
+enum pipe_error
+SVGA3D_DestroySurface(struct svga_winsys_context *swc,
+                      struct svga_winsys_surface *sid);
+
+
+/*
+ * Surface Operations
+ */
+
+enum pipe_error
+SVGA3D_SurfaceDMA(struct svga_winsys_context *swc,
+                  struct svga_transfer *st,
+                  SVGA3dTransferType transfer,
+                  const SVGA3dCopyBox *boxes,
+                  uint32 numBoxes);
+
+enum pipe_error
+SVGA3D_BufferDMA(struct svga_winsys_context *swc,
+                 struct svga_winsys_buffer *guest,
+                 struct svga_winsys_surface *host,
+                 SVGA3dTransferType transfer,
+                 uint32 size,
+                 uint32 offset,
+                 SVGA3dSurfaceDMAFlags flags);
+
+/*
+ * Drawing Operations
+ */
+
+
+enum pipe_error
+SVGA3D_BeginClear(struct svga_winsys_context *swc,
+                  SVGA3dClearFlag flags,
+                  uint32 color, float depth, uint32 stencil,
+                  SVGA3dRect **rects, uint32 numRects);
+
+enum pipe_error
+SVGA3D_ClearRect(struct svga_winsys_context *swc,
+                 SVGA3dClearFlag flags, uint32 color, float depth,
+                 uint32 stencil, uint32 x, uint32 y, uint32 w, uint32 h);
+
+enum pipe_error
+SVGA3D_BeginDrawPrimitives(struct svga_winsys_context *swc,
+                           SVGA3dVertexDecl **decls,
+                           uint32 numVertexDecls,
+                           SVGA3dPrimitiveRange **ranges,
+                           uint32 numRanges);
+
+/*
+ * Blits
+ */
+
+enum pipe_error
+SVGA3D_BeginSurfaceCopy(struct svga_winsys_context *swc,
+                        struct pipe_surface *src,
+                        struct pipe_surface *dest,
+                        SVGA3dCopyBox **boxes, uint32 numBoxes);
+
+
+enum pipe_error
+SVGA3D_SurfaceStretchBlt(struct svga_winsys_context *swc,
+                         struct pipe_surface *src,
+                         struct pipe_surface *dest,
+                         SVGA3dBox *boxSrc, SVGA3dBox *boxDest,
+                         SVGA3dStretchBltMode mode);
+
+/*
+ * Shared FFP/Shader Render State
+ */
+
+enum pipe_error
+SVGA3D_SetRenderTarget(struct svga_winsys_context *swc,
+                       SVGA3dRenderTargetType type,
+                       struct pipe_surface *surface);
+
+enum pipe_error
+SVGA3D_SetZRange(struct svga_winsys_context *swc,
+                 float zMin, float zMax);
+
+enum pipe_error
+SVGA3D_SetViewport(struct svga_winsys_context *swc,
+                   SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetScissorRect(struct svga_winsys_context *swc,
+                      SVGA3dRect *rect);
+
+enum pipe_error
+SVGA3D_SetClipPlane(struct svga_winsys_context *swc,
+                    uint32 index, const float *plane);
+
+enum pipe_error
+SVGA3D_BeginSetTextureState(struct svga_winsys_context *swc,
+                            SVGA3dTextureState **states,
+                            uint32 numStates);
+
+enum pipe_error
+SVGA3D_BeginSetRenderState(struct svga_winsys_context *swc,
+                           SVGA3dRenderState **states,
+                           uint32 numStates);
+
+
+/*
+ * Shaders
+ */
+
+enum pipe_error
+SVGA3D_DefineShader(struct svga_winsys_context *swc,
+                    uint32 shid, SVGA3dShaderType type,
+                    const uint32 *bytecode, uint32 bytecodeLen);
+
+enum pipe_error
+SVGA3D_DestroyShader(struct svga_winsys_context *swc,
+                     uint32 shid, SVGA3dShaderType type);
+
+enum pipe_error
+SVGA3D_SetShaderConst(struct svga_winsys_context *swc,
+                      uint32 reg, SVGA3dShaderType type,
+                      SVGA3dShaderConstType ctype, const void *value);
+
+enum pipe_error
+SVGA3D_SetShader(struct svga_winsys_context *swc,
+                 SVGA3dShaderType type, uint32 shid);
+
+
+/*
+ * Queries
+ */
+
+enum pipe_error
+SVGA3D_BeginQuery(struct svga_winsys_context *swc,
+                  SVGA3dQueryType type);
+
+enum pipe_error
+SVGA3D_EndQuery(struct svga_winsys_context *swc,
+                SVGA3dQueryType type,
+                struct svga_winsys_buffer *buffer);
+
+enum pipe_error
+SVGA3D_WaitForQuery(struct svga_winsys_context *swc,
+                    SVGA3dQueryType type,
+                    struct svga_winsys_buffer *buffer);
+
+#endif /* __SVGA3D_H__ */
diff --git a/src/gallium/drivers/svga/svga_context.c b/src/gallium/drivers/svga/svga_context.c
new file mode 100644
index 00000000000..73233957f36
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.c
@@ -0,0 +1,269 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_screen.h"
+#include "util/u_memory.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_swtnl.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+#include "svga_state.h"
+
+
+static void svga_destroy( struct pipe_context *pipe )
+{
+   struct svga_context *svga = svga_context( pipe );
+   unsigned shader;
+
+   svga_cleanup_framebuffer( svga );
+   svga_cleanup_tss_binding( svga );
+
+   svga_hwtnl_destroy( svga->hwtnl );
+
+   svga_cleanup_vertex_state(svga);
+   
+   svga->swc->destroy(svga->swc);
+   
+   svga_destroy_swtnl( svga );
+
+   u_upload_destroy( svga->upload_vb );
+   u_upload_destroy( svga->upload_ib );
+
+   for(shader = 0; shader < PIPE_SHADER_TYPES; ++shader)
+      pipe_buffer_reference( &svga->curr.cb[shader], NULL );
+
+   FREE( svga );
+}
+
+static unsigned int
+svga_is_texture_referenced( struct pipe_context *pipe,
+			    struct pipe_texture *texture,
+			    unsigned face, unsigned level)
+{
+   struct svga_texture *tex = svga_texture(texture);
+   struct svga_screen *ss = svga_screen(pipe->screen);
+
+   /**
+    * The screen does not cache texture writes.
+    */
+
+   if (!tex->handle || ss->sws->surface_is_flushed(ss->sws, tex->handle))
+      return PIPE_UNREFERENCED;
+
+   /**
+    * sws->surface_is_flushed() does not distinguish between read references
+    * and write references. So assume a reference is both.
+    */
+
+   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+static unsigned int
+svga_is_buffer_referenced( struct pipe_context *pipe,
+			   struct pipe_buffer *buf)
+
+{
+   struct svga_screen *ss = svga_screen(pipe->screen);
+   struct svga_buffer *sbuf = svga_buffer(buf);
+
+   /**
+    * XXX: Check this.
+    * The screen may cache buffer writes, but when we map, we map out
+    * of those cached writes, so we don't need to set a
+    * PIPE_REFERENCED_FOR_WRITE flag for cached buffers.
+    */
+
+   if (!sbuf->handle || ss->sws->surface_is_flushed(ss->sws, sbuf->handle))
+     return PIPE_UNREFERENCED;
+
+   /**
+    * sws->surface_is_flushed() does not distinguish between read references
+    * and write references. So assume a reference is both,
+    * however, we make an exception for index- and vertex buffers, to avoid
+    * a flush in st_bufferobj_get_subdata, during display list replay.
+    */
+
+   if (sbuf->base.usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX))
+      return PIPE_REFERENCED_FOR_READ;
+
+   return PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE;
+}
+
+
+struct pipe_context *svga_context_create( struct pipe_screen *screen )
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_context *svga = NULL;
+   enum pipe_error ret;
+
+   svga = CALLOC_STRUCT(svga_context);
+   if (svga == NULL)
+      goto error1;
+
+   svga->pipe.winsys = screen->winsys;
+   svga->pipe.screen = screen;
+   svga->pipe.destroy = svga_destroy;
+   svga->pipe.clear = svga_clear;
+
+   svga->pipe.is_texture_referenced = svga_is_texture_referenced;
+   svga->pipe.is_buffer_referenced = svga_is_buffer_referenced;
+
+   svga->swc = svgascreen->sws->context_create(svgascreen->sws);
+   if(!svga->swc)
+      goto error2;
+
+   svga_init_blend_functions(svga);
+   svga_init_blit_functions(svga);
+   svga_init_depth_stencil_functions(svga);
+   svga_init_draw_functions(svga);
+   svga_init_flush_functions(svga);
+   svga_init_misc_functions(svga);
+   svga_init_rasterizer_functions(svga);
+   svga_init_sampler_functions(svga);
+   svga_init_fs_functions(svga);
+   svga_init_vs_functions(svga);
+   svga_init_vertex_functions(svga);
+   svga_init_constbuffer_functions(svga);
+   svga_init_query_functions(svga);
+
+   /* debug */
+   svga->debug.no_swtnl = debug_get_bool_option("SVGA_NO_SWTNL", FALSE);
+   svga->debug.force_swtnl = debug_get_bool_option("SVGA_FORCE_SWTNL", FALSE);
+   svga->debug.use_min_mipmap = debug_get_bool_option("SVGA_USE_MIN_MIPMAP", FALSE);
+   svga->debug.disable_shader = debug_get_num_option("SVGA_DISABLE_SHADER", ~0);
+
+   if (!svga_init_swtnl(svga))
+      goto error3;
+
+   svga->upload_ib = u_upload_create( svga->pipe.screen,
+                                      32 * 1024,
+                                      16,
+                                      PIPE_BUFFER_USAGE_INDEX );
+   if (svga->upload_ib == NULL)
+      goto error4;
+
+   svga->upload_vb = u_upload_create( svga->pipe.screen,
+                                      128 * 1024,
+                                      16,
+                                      PIPE_BUFFER_USAGE_VERTEX );
+   if (svga->upload_vb == NULL)
+      goto error5;
+
+   svga->hwtnl = svga_hwtnl_create( svga,
+                                    svga->upload_ib,
+                                    svga->swc );
+   if (svga->hwtnl == NULL)
+      goto error6;
+
+
+   ret = svga_emit_initial_state( svga );
+   if (ret)
+      goto error7;
+   
+   /* Avoid shortcircuiting state with initial value of zero.
+    */
+   memset(&svga->state.hw_clear, 0xcd, sizeof(svga->state.hw_clear));
+   memset(&svga->state.hw_clear.framebuffer, 0x0, 
+          sizeof(svga->state.hw_clear.framebuffer));
+
+   memset(&svga->state.hw_draw, 0xcd, sizeof(svga->state.hw_draw));
+   memset(&svga->state.hw_draw.views, 0x0, sizeof(svga->state.hw_draw.views));
+   svga->state.hw_draw.num_views = 0;
+
+   svga->dirty = ~0;
+   svga->state.white_fs_id = SVGA3D_INVALID_ID;
+
+   LIST_INITHEAD(&svga->dirty_buffers);
+
+   return &svga->pipe;
+
+error7:
+   svga_hwtnl_destroy( svga->hwtnl );
+error6:
+   u_upload_destroy( svga->upload_vb );
+error5:
+   u_upload_destroy( svga->upload_ib );
+error4:
+   svga_destroy_swtnl(svga);
+error3:
+   svga->swc->destroy(svga->swc);
+error2:
+   FREE(svga);
+error1:
+   return NULL;
+}
+
+
+void svga_context_flush( struct svga_context *svga, 
+                         struct pipe_fence_handle **pfence )
+{
+   struct svga_screen *svgascreen = svga_screen(svga->pipe.screen);
+   
+   /* Unmap upload manager buffers: 
+    */
+   u_upload_flush(svga->upload_vb);
+   u_upload_flush(svga->upload_ib);
+
+   /* Flush screen, to ensure that texture dma uploads are processed
+    * before submitting commands.
+    */
+   svga_screen_flush(svgascreen, NULL);
+   
+   svga_context_flush_buffers(svga);
+
+   /* Flush pending commands to hardware:
+    */
+   svga->swc->flush(svga->swc, pfence);
+
+   if (SVGA_DEBUG & DEBUG_SYNC) {
+      if (pfence && *pfence)
+         svga->pipe.screen->fence_finish( svga->pipe.screen, *pfence, 0);
+   }
+}
+
+
+void svga_hwtnl_flush_retry( struct svga_context *svga )
+{
+   enum pipe_error ret = PIPE_OK;
+
+   ret = svga_hwtnl_flush( svga->hwtnl );
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      svga_context_flush( svga, NULL );
+      ret = svga_hwtnl_flush( svga->hwtnl );
+   }
+
+   assert(ret == 0);
+}
+
diff --git a/src/gallium/drivers/svga/svga_context.h b/src/gallium/drivers/svga/svga_context.h
new file mode 100644
index 00000000000..9a3e92fd8d1
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_context.h
@@ -0,0 +1,443 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_CONTEXT_H
+#define SVGA_CONTEXT_H
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "tgsi/tgsi_scan.h"
+
+
+#define SVGA_TEX_UNITS 8
+
+struct draw_vertex_shader;
+struct svga_shader_result;
+struct SVGACmdMemory;
+struct u_upload_mgr;
+
+
+struct svga_shader
+{
+   const struct tgsi_token *tokens;
+
+   struct tgsi_shader_info info;
+
+   struct svga_shader_result *results;
+
+   unsigned id;
+
+   boolean use_sm30;
+};
+
+struct svga_fragment_shader
+{
+   struct svga_shader base;
+};
+
+struct svga_vertex_shader
+{
+   struct svga_shader base;
+
+   struct draw_vertex_shader *draw_shader;
+};
+
+
+struct svga_cache_context;
+struct svga_tracked_state;
+
+struct svga_blend_state {
+
+   boolean need_white_fragments;
+
+   /* Should be per-render-target:
+    */
+   struct {
+      uint8_t writemask;
+
+      boolean blend_enable;
+      uint8_t srcblend;
+      uint8_t dstblend;
+      uint8_t blendeq;
+      
+      boolean separate_alpha_blend_enable;
+      uint8_t srcblend_alpha;
+      uint8_t dstblend_alpha;
+      uint8_t blendeq_alpha;
+
+   } rt[1];
+};
+
+struct svga_depth_stencil_state {
+   unsigned zfunc:8;
+   unsigned zenable:1;
+   unsigned zwriteenable:1;
+
+   unsigned alphatestenable:1;
+   unsigned alphafunc:8;
+  
+   struct {
+      unsigned enabled:1;
+      unsigned func:8;
+      unsigned fail:8;
+      unsigned zfail:8;
+      unsigned pass:8;
+   } stencil[2];
+   
+   /* SVGA3D has one ref/mask/writemask triple shared between front &
+    * back face stencil.  We really need two:
+    */
+   unsigned stencil_ref:8;
+   unsigned stencil_mask:8;
+   unsigned stencil_writemask:8;
+
+   float    alpharef;
+};
+
+#define SVGA_UNFILLED_DISABLE 0
+#define SVGA_UNFILLED_LINE    1
+#define SVGA_UNFILLED_POINT   2
+
+#define SVGA_PIPELINE_FLAG_POINTS   (1<<PIPE_PRIM_POINTS)
+#define SVGA_PIPELINE_FLAG_LINES    (1<<PIPE_PRIM_LINES)
+#define SVGA_PIPELINE_FLAG_TRIS     (1<<PIPE_PRIM_TRIANGLES)
+
+struct svga_rasterizer_state {
+   struct pipe_rasterizer_state templ; /* needed for draw module */
+
+   unsigned shademode:8;
+   unsigned cullmode:8;
+   unsigned scissortestenable:1;
+   unsigned multisampleantialias:1;
+   unsigned antialiasedlineenable:1;
+   unsigned lastpixel:1;
+
+   unsigned linepattern;
+
+   float slopescaledepthbias;
+   float depthbias;
+   float pointsize;
+   float pointsize_min;
+   float pointsize_max;
+   
+   unsigned hw_unfilled:16;         /* PIPE_POLYGON_MODE_x */
+   unsigned need_pipeline:16;    /* which prims do we need help for? */
+};
+
+struct svga_sampler_state {
+   unsigned mipfilter;
+   unsigned magfilter;
+   unsigned minfilter;
+   unsigned aniso_level;
+   float lod_bias;
+   unsigned addressu;
+   unsigned addressv;
+   unsigned addressw;
+   unsigned bordercolor;
+   unsigned normalized_coords:1;
+   unsigned compare_mode:1;
+   unsigned compare_func:3;
+
+   unsigned min_lod;
+   unsigned view_min_lod;
+   unsigned view_max_lod;
+};
+
+/* Use to calculate differences between state emitted to hardware and
+ * current driver-calculated state.  
+ */
+struct svga_state 
+{
+   const struct svga_blend_state *blend;
+   const struct svga_depth_stencil_state *depth;
+   const struct svga_rasterizer_state *rast;
+   const struct svga_sampler_state *sampler[PIPE_MAX_SAMPLERS];
+
+   struct pipe_texture *texture[PIPE_MAX_SAMPLERS]; /* or texture ID's? */
+   struct svga_fragment_shader *fs;
+   struct svga_vertex_shader *vs;
+
+   struct pipe_vertex_buffer vb[PIPE_MAX_ATTRIBS];
+   struct pipe_vertex_element ve[PIPE_MAX_ATTRIBS];
+   struct pipe_buffer *cb[PIPE_SHADER_TYPES];
+
+   struct pipe_framebuffer_state framebuffer;
+   float depthscale;
+
+   struct pipe_poly_stipple poly_stipple;
+   struct pipe_scissor_state scissor;
+   struct pipe_blend_color blend_color;
+   struct pipe_clip_state clip;
+   struct pipe_viewport_state viewport;
+
+   const unsigned *edgeflags;
+
+   unsigned num_samplers;
+   unsigned num_textures;
+   unsigned num_vertex_elements;
+   unsigned num_vertex_buffers;
+   unsigned reduced_prim;
+
+   struct {
+      unsigned flag_1d;
+      unsigned flag_srgb;
+   } tex_flags;
+
+   boolean any_user_vertex_buffers;
+
+   unsigned zero_stride_vertex_elements;
+   unsigned num_zero_stride_vertex_elements;
+   /* ### maybe dynamically allocate this */
+   float zero_stride_constants[PIPE_MAX_ATTRIBS*4];
+};
+
+#define RS_MAX 97
+#define TS_MAX 30
+#define CB_MAX 256
+
+struct svga_prescale {
+   float translate[4];
+   float scale[4];
+   boolean enabled;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_VIEWPORT )
+ */
+struct svga_hw_clear_state
+{
+   struct {
+      unsigned x,y,w,h;
+   } viewport;
+
+   struct {
+      float zmin, zmax;
+   } depthrange;
+   
+   struct pipe_framebuffer_state framebuffer;
+   struct svga_prescale prescale;
+};
+
+struct svga_hw_view_state
+{
+   struct pipe_texture *texture;
+   struct svga_sampler_view *v;
+   unsigned min_lod;
+   unsigned max_lod;
+   int dirty;
+};
+
+/* Updated by calling svga_update_state( SVGA_STATE_HW_DRAW )
+ */
+struct svga_hw_draw_state
+{
+   unsigned rs[RS_MAX];
+   unsigned ts[16][TS_MAX];
+   float cb[PIPE_SHADER_TYPES][CB_MAX][4];
+
+   unsigned shader_id[PIPE_SHADER_TYPES];
+   
+   struct svga_shader_result *fs;
+   struct svga_shader_result *vs;
+   struct svga_hw_view_state views[PIPE_MAX_SAMPLERS];
+
+   unsigned num_views;
+};
+
+
+/* Updated by calling svga_update_state( SVGA_STATE_NEED_SWTNL )
+ */
+struct svga_sw_state
+{
+   unsigned ve_format[PIPE_MAX_ATTRIBS]; /* NEW_VELEMENT */
+
+   /* which parts we need */
+   boolean need_swvfetch;
+   boolean need_pipeline;
+   boolean need_swtnl;
+};
+
+
+/* Queue some state updates (like rss) and submit them to hardware in
+ * a single packet.
+ */
+struct svga_hw_queue;
+
+struct svga_query;
+
+struct svga_context
+{
+   struct pipe_context pipe;
+   struct svga_winsys_context *swc;
+
+   struct {
+      boolean no_swtnl;
+      boolean force_swtnl;
+      boolean use_min_mipmap;
+
+      /* incremented for each shader */
+      unsigned shader_id;
+
+      unsigned disable_shader;
+   } debug;
+
+   struct {
+      struct draw_context *draw;
+      struct vbuf_render *backend;
+      unsigned hw_prim;
+      boolean new_vbuf;
+      boolean new_vdecl;
+   } swtnl;
+
+   struct {
+      unsigned dirty[4];
+
+      unsigned texture_timestamp;
+      unsigned next_fs_id;
+      unsigned next_vs_id;
+
+      /* Internally generated shaders:
+       */
+      unsigned white_fs_id;
+
+      /* 
+       */
+      struct svga_sw_state          sw;
+      struct svga_hw_draw_state     hw_draw;
+      struct svga_hw_clear_state    hw_clear;
+   } state;
+
+   struct svga_state curr;      /* state from the state tracker */
+   unsigned dirty;              /* statechanges since last update_state() */
+
+   struct u_upload_mgr *upload_ib;
+   struct u_upload_mgr *upload_vb;
+   struct svga_hwtnl *hwtnl;
+
+   /** The occlusion query currently in progress */
+   struct svga_query *sq;
+
+   /** List of buffers with queued transfers */
+   struct list_head dirty_buffers;
+};
+
+/* A flag for each state_tracker state object:
+ */
+#define SVGA_NEW_BLEND               0x1
+#define SVGA_NEW_DEPTH_STENCIL       0x2
+#define SVGA_NEW_RAST                0x4
+#define SVGA_NEW_SAMPLER             0x8
+#define SVGA_NEW_TEXTURE             0x10
+#define SVGA_NEW_VBUFFER             0x20
+#define SVGA_NEW_VELEMENT            0x40
+#define SVGA_NEW_FS                  0x80
+#define SVGA_NEW_VS                  0x100
+#define SVGA_NEW_FS_CONST_BUFFER     0x200
+#define SVGA_NEW_VS_CONST_BUFFER     0x400
+#define SVGA_NEW_FRAME_BUFFER        0x800
+#define SVGA_NEW_STIPPLE             0x1000
+#define SVGA_NEW_SCISSOR             0x2000
+#define SVGA_NEW_BLEND_COLOR         0x5000
+#define SVGA_NEW_CLIP                0x8000
+#define SVGA_NEW_VIEWPORT            0x10000
+#define SVGA_NEW_PRESCALE            0x20000
+#define SVGA_NEW_REDUCED_PRIMITIVE   0x40000
+#define SVGA_NEW_TEXTURE_BINDING     0x80000
+#define SVGA_NEW_NEED_PIPELINE       0x100000
+#define SVGA_NEW_NEED_SWVFETCH       0x200000
+#define SVGA_NEW_NEED_SWTNL          0x400000
+#define SVGA_NEW_FS_RESULT           0x800000
+#define SVGA_NEW_VS_RESULT           0x1000000
+#define SVGA_NEW_EDGEFLAGS           0x2000000
+#define SVGA_NEW_ZERO_STRIDE         0x4000000
+#define SVGA_NEW_TEXTURE_FLAGS       0x8000000
+
+
+
+
+
+/***********************************************************************
+ * svga_clear.c: 
+ */
+void svga_clear(struct pipe_context *pipe, 
+                unsigned buffers,
+                const float *rgba,
+                double depth,
+                unsigned stencil);
+
+
+/***********************************************************************
+ * svga_screen_texture.c: 
+ */
+void svga_mark_surfaces_dirty(struct svga_context *svga);
+
+
+
+
+void svga_init_state_functions( struct svga_context *svga );
+void svga_init_flush_functions( struct svga_context *svga );
+void svga_init_string_functions( struct svga_context *svga );
+void svga_init_blit_functions(struct svga_context *svga);
+
+void svga_init_blend_functions( struct svga_context *svga );
+void svga_init_depth_stencil_functions( struct svga_context *svga );
+void svga_init_misc_functions( struct svga_context *svga );
+void svga_init_rasterizer_functions( struct svga_context *svga );
+void svga_init_sampler_functions( struct svga_context *svga );
+void svga_init_fs_functions( struct svga_context *svga );
+void svga_init_vs_functions( struct svga_context *svga );
+void svga_init_vertex_functions( struct svga_context *svga );
+void svga_init_constbuffer_functions( struct svga_context *svga );
+void svga_init_draw_functions( struct svga_context *svga );
+void svga_init_query_functions( struct svga_context *svga );
+
+void svga_cleanup_vertex_state( struct svga_context *svga );
+void svga_cleanup_tss_binding( struct svga_context *svga );
+void svga_cleanup_framebuffer( struct svga_context *svga );
+
+void svga_context_flush( struct svga_context *svga,
+                         struct pipe_fence_handle **pfence );
+
+void svga_hwtnl_flush_retry( struct svga_context *svga );
+
+
+/***********************************************************************
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_context *
+svga_context( struct pipe_context *pipe )
+{
+   return (struct svga_context *)pipe;
+}
+
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_debug.h b/src/gallium/drivers/svga/svga_debug.h
new file mode 100644
index 00000000000..b7bb5686ed3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_debug.h
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DEBUG_H
+#define SVGA_DEBUG_H
+
+#include "pipe/p_compiler.h"
+#include "util/u_debug.h"
+
+#define DEBUG_DMA      0x1
+#define DEBUG_TGSI     0x4
+#define DEBUG_PIPE     0x8
+#define DEBUG_STATE    0x10
+#define DEBUG_SCREEN   0x20
+#define DEBUG_TEX      0x40
+#define DEBUG_SWTNL    0x80
+#define DEBUG_CONSTS   0x100
+#define DEBUG_VIEWPORT 0x200
+#define DEBUG_VIEWS    0x400
+#define DEBUG_PERF     0x800    /* print something when we hit any slow path operation */
+#define DEBUG_FLUSH    0x1000   /* flush after every draw */
+#define DEBUG_SYNC     0x2000   /* sync after every flush */
+#define DEBUG_QUERY    0x4000
+
+#ifdef DEBUG
+extern int SVGA_DEBUG;
+#define DBSTR(x) x
+#else
+#define SVGA_DEBUG 0
+#define DBSTR(x) ""
+#endif
+
+static INLINE void
+SVGA_DBG( unsigned flag, const char *fmt, ... )
+{
+#ifdef DEBUG 
+    if (SVGA_DEBUG & flag)
+    {
+        va_list args;
+
+        va_start( args, fmt );
+        debug_vprintf( fmt, args );
+        va_end( args );
+    }
+#else
+    (void)flag;
+    (void)fmt;
+#endif
+}
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_draw.c b/src/gallium/drivers/svga/svga_draw.c
new file mode 100644
index 00000000000..1b371cecc61
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.c
@@ -0,0 +1,370 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_cmd.h"
+
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+                                      struct u_upload_mgr *upload_ib,
+                                      struct svga_winsys_context *swc )
+{
+   struct svga_hwtnl *hwtnl = CALLOC_STRUCT(svga_hwtnl);
+   if (hwtnl == NULL)
+      goto fail;
+
+   hwtnl->svga = svga;
+   hwtnl->upload_ib = upload_ib;
+   
+   hwtnl->cmd.swc = swc;
+
+   return hwtnl;
+
+fail:
+   return NULL;
+}
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl )
+{
+   int i, j;
+
+   for (i = 0; i < PIPE_PRIM_MAX; i++) {
+      for (j = 0; j < IDX_CACHE_MAX; j++) {
+         pipe_buffer_reference( &hwtnl->index_cache[i][j].buffer,
+                                NULL );
+      }
+   }
+
+   for (i = 0; i < hwtnl->cmd.vdecl_count; i++)
+      pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i], NULL);
+
+   for (i = 0; i < hwtnl->cmd.prim_count; i++)
+      pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      
+
+   FREE(hwtnl);
+}
+
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+                               boolean flatshade,
+                               boolean flatshade_first )
+{
+   hwtnl->hw_pv = PV_FIRST;
+   hwtnl->api_pv = (flatshade && !flatshade_first) ? PV_LAST : PV_FIRST;
+}                               
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+                              unsigned mode )
+{
+   hwtnl->api_fillmode = mode;
+}                               
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+                             unsigned count )
+{
+   unsigned i;
+
+   assert(hwtnl->cmd.prim_count == 0);
+
+   for (i = count; i < hwtnl->cmd.vdecl_count; i++) {
+      pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+                            NULL);
+   }
+
+   hwtnl->cmd.vdecl_count = count;
+}
+
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+                          unsigned i,
+                          const SVGA3dVertexDecl *decl,
+                          struct pipe_buffer *vb)
+{
+   assert(hwtnl->cmd.prim_count == 0);
+
+   assert( i < hwtnl->cmd.vdecl_count );
+
+   hwtnl->cmd.vdecl[i] = *decl;
+
+   pipe_buffer_reference(&hwtnl->cmd.vdecl_vb[i],
+                         vb);   
+}
+
+
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl )
+{
+   struct svga_winsys_context *swc = hwtnl->cmd.swc;
+   struct svga_context *svga = hwtnl->svga;
+   enum pipe_error ret;
+
+   if (hwtnl->cmd.prim_count) {
+      struct svga_winsys_surface *vb_handle[SVGA3D_INPUTREG_MAX];
+      struct svga_winsys_surface *ib_handle[QSZ];
+      struct svga_winsys_surface *handle;
+      SVGA3dVertexDecl *vdecl;
+      SVGA3dPrimitiveRange *prim;
+      unsigned i;
+
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         handle = svga_buffer_handle(svga, hwtnl->cmd.vdecl_vb[i]);
+         if (handle == NULL)
+            return PIPE_ERROR_OUT_OF_MEMORY;
+
+         vb_handle[i] = handle;
+      }
+
+      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+         if (hwtnl->cmd.prim_ib[i]) {
+            handle = svga_buffer_handle(svga, hwtnl->cmd.prim_ib[i]);
+            if (handle == NULL)
+               return PIPE_ERROR_OUT_OF_MEMORY;
+         }
+         else
+            handle = NULL;
+
+         ib_handle[i] = handle;
+      }
+
+      ret = SVGA3D_BeginDrawPrimitives(swc, 
+                                       &vdecl, 
+                                       hwtnl->cmd.vdecl_count, 
+                                       &prim, 
+                                       hwtnl->cmd.prim_count);
+      if (ret != PIPE_OK) 
+         return ret;
+
+      
+      memcpy( vdecl,
+              hwtnl->cmd.vdecl,
+              hwtnl->cmd.vdecl_count * sizeof hwtnl->cmd.vdecl[0]);
+
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         /* Given rangeHint is considered to be relative to indexBias, and 
+          * indexBias varies per primitive, we cannot accurately supply an 
+          * rangeHint when emitting more than one primitive per draw command.
+          */
+         if (hwtnl->cmd.prim_count == 1) {
+            vdecl[i].rangeHint.first = hwtnl->cmd.min_index[0];
+            vdecl[i].rangeHint.last = hwtnl->cmd.max_index[0] + 1;
+         }
+         else {
+            vdecl[i].rangeHint.first = 0;
+            vdecl[i].rangeHint.last = 0;
+         }
+
+         swc->surface_relocation(swc,
+                                 &vdecl[i].array.surfaceId,
+                                 vb_handle[i],
+                                 PIPE_BUFFER_USAGE_GPU_READ);
+      }
+
+      memcpy( prim,
+              hwtnl->cmd.prim,
+              hwtnl->cmd.prim_count * sizeof hwtnl->cmd.prim[0]);
+
+      for (i = 0; i < hwtnl->cmd.prim_count; i++) {
+         swc->surface_relocation(swc,
+                                 &prim[i].indexArray.surfaceId,
+                                 ib_handle[i],
+                                 PIPE_BUFFER_USAGE_GPU_READ);
+         pipe_buffer_reference(&hwtnl->cmd.prim_ib[i], NULL);
+      }
+      
+      SVGA_FIFOCommitAll( swc );
+      hwtnl->cmd.prim_count = 0;
+   }
+
+   return PIPE_OK;
+}
+
+
+
+
+
+/***********************************************************************
+ * Internal functions:
+ */
+
+enum pipe_error svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+                                 const SVGA3dPrimitiveRange *range,
+                                 unsigned min_index,
+                                 unsigned max_index,
+                                 struct pipe_buffer *ib )
+{
+   int ret = PIPE_OK;
+
+#ifdef DEBUG
+   {
+      unsigned i;
+      for (i = 0; i < hwtnl->cmd.vdecl_count; i++) {
+         struct pipe_buffer *vb = hwtnl->cmd.vdecl_vb[i];
+         unsigned size = vb ? vb->size : 0;
+         unsigned offset = hwtnl->cmd.vdecl[i].array.offset;
+         unsigned stride = hwtnl->cmd.vdecl[i].array.stride;
+         unsigned index_bias = range->indexBias;
+         unsigned width;
+
+         assert(vb);
+         assert(size);
+         assert(offset < size);
+         assert(index_bias >= 0);
+         assert(min_index <= max_index);
+         assert(offset + index_bias*stride < size);
+         assert(offset + (index_bias + min_index)*stride < size);
+
+         switch (hwtnl->cmd.vdecl[i].identity.type) {
+         case SVGA3D_DECLTYPE_FLOAT1:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT2:
+            width = 4*2;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT3:
+            width = 4*3;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT4:
+            width = 4*4;
+            break;
+         case SVGA3D_DECLTYPE_D3DCOLOR:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_UBYTE4:
+            width = 1*4;
+            break;
+         case SVGA3D_DECLTYPE_SHORT2:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_SHORT4:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_UBYTE4N:
+            width = 1*4;
+            break;
+         case SVGA3D_DECLTYPE_SHORT2N:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_SHORT4N:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_USHORT2N:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_USHORT4N:
+            width = 2*4;
+            break;
+         case SVGA3D_DECLTYPE_UDEC3:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_DEC3N:
+            width = 4;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT16_2:
+            width = 2*2;
+            break;
+         case SVGA3D_DECLTYPE_FLOAT16_4:
+            width = 2*4;
+            break;
+         default:
+            assert(0);
+            width = 0;
+            break;
+         }
+
+         assert(!stride || width <= stride);
+         assert(offset + (index_bias + max_index)*stride + width <= size);
+      }
+
+      assert(range->indexWidth == range->indexArray.stride);
+
+      if(ib) {
+         unsigned size = ib->size;
+         unsigned offset = range->indexArray.offset;
+         unsigned stride = range->indexArray.stride;
+         unsigned count;
+
+         assert(size);
+         assert(offset < size);
+         assert(stride);
+
+         switch (range->primType) {
+         case SVGA3D_PRIMITIVE_POINTLIST:
+            count = range->primitiveCount;
+            break;
+         case SVGA3D_PRIMITIVE_LINELIST:
+            count = range->primitiveCount * 2;
+            break;
+         case SVGA3D_PRIMITIVE_LINESTRIP:
+            count = range->primitiveCount + 1;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLELIST:
+            count = range->primitiveCount * 3;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+            count = range->primitiveCount + 2;
+            break;
+         case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+            count = range->primitiveCount + 2;
+            break;
+         default:
+            assert(0);
+            count = 0;
+            break;
+         }
+
+         assert(offset + count*stride <= size);
+      }
+   }
+#endif
+
+   if (hwtnl->cmd.prim_count+1 >= QSZ) {
+      ret = svga_hwtnl_flush( hwtnl );
+      if (ret != PIPE_OK)
+         return ret;
+   }
+   
+   /* min/max indices are relative to bias */
+   hwtnl->cmd.min_index[hwtnl->cmd.prim_count] = min_index;
+   hwtnl->cmd.max_index[hwtnl->cmd.prim_count] = max_index;
+
+   hwtnl->cmd.prim[hwtnl->cmd.prim_count] = *range;
+
+   pipe_buffer_reference(&hwtnl->cmd.prim_ib[hwtnl->cmd.prim_count], ib);
+   hwtnl->cmd.prim_count++;
+
+   return ret;
+}
diff --git a/src/gallium/drivers/svga/svga_draw.h b/src/gallium/drivers/svga/svga_draw.h
new file mode 100644
index 00000000000..14553b17b58
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw.h
@@ -0,0 +1,83 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H
+#define SVGA_DRAW_H
+
+#include "pipe/p_compiler.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_hwtnl;
+struct svga_winsys_context;
+struct svga_screen;
+struct svga_context;
+struct pipe_buffer;
+struct u_upload_mgr;
+
+struct svga_hwtnl *svga_hwtnl_create( struct svga_context *svga,
+                                      struct u_upload_mgr *upload_ib,
+                                      struct svga_winsys_context *swc );
+
+void svga_hwtnl_destroy( struct svga_hwtnl *hwtnl );
+
+void svga_hwtnl_set_flatshade( struct svga_hwtnl *hwtnl,
+                               boolean flatshade,
+                               boolean flatshade_first );
+
+void svga_hwtnl_set_unfilled( struct svga_hwtnl *hwtnl,
+                              unsigned mode );
+
+void svga_hwtnl_vdecl( struct svga_hwtnl *hwtnl,
+                       unsigned i,
+                       const SVGA3dVertexDecl *decl,
+                       struct pipe_buffer *vb);
+
+void svga_hwtnl_reset_vdecl( struct svga_hwtnl *hwtnl,
+                             unsigned count );
+
+
+enum pipe_error 
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+                        unsigned prim, 
+                        unsigned start, 
+                        unsigned count);
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                struct pipe_buffer *indexBuffer,
+                                unsigned index_size,
+                                unsigned min_index,
+                                unsigned max_index,
+                                unsigned prim, 
+                                unsigned start, 
+                                unsigned count,
+                                unsigned bias );
+
+enum pipe_error
+svga_hwtnl_flush( struct svga_hwtnl *hwtnl );
+
+
+#endif /* SVGA_DRAW_H_ */
diff --git a/src/gallium/drivers/svga/svga_draw_arrays.c b/src/gallium/drivers/svga/svga_draw_arrays.c
new file mode 100644
index 00000000000..75492dffca2
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_arrays.c
@@ -0,0 +1,297 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_context.h"
+
+
+#define DBG 0
+
+
+
+
+static enum pipe_error generate_indices( struct svga_hwtnl *hwtnl,
+                                         unsigned nr,
+                                         unsigned index_size,
+                                         u_generate_func generate,
+                                         struct pipe_buffer **out_buf )
+{
+   struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+   unsigned size = index_size * nr;
+   struct pipe_buffer *dst = NULL;
+   void *dst_map = NULL;
+
+   dst = screen->buffer_create( screen, 32, 
+                                PIPE_BUFFER_USAGE_INDEX |
+                                PIPE_BUFFER_USAGE_CPU_WRITE |
+                                PIPE_BUFFER_USAGE_GPU_READ, 
+                                size );
+   if (dst == NULL)
+      goto fail;
+
+   dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+   if (dst_map == NULL)
+      goto fail;
+
+   generate( nr,
+             dst_map );
+
+   pipe_buffer_unmap( screen, dst );
+
+   *out_buf = dst;
+   return PIPE_OK;
+
+fail:
+   if (dst_map)
+      screen->buffer_unmap( screen, dst );
+
+   if (dst)
+      screen->buffer_destroy( dst );
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+static boolean compare( unsigned cached_nr,
+                        unsigned nr,
+                        unsigned type )
+{
+   if (type == U_GENERATE_REUSABLE)
+      return cached_nr >= nr;
+   else
+      return cached_nr == nr;
+}
+
+static enum pipe_error retrieve_or_generate_indices( struct svga_hwtnl *hwtnl,
+                                                     unsigned prim,
+                                                     unsigned gen_type,
+                                                     unsigned gen_nr,
+                                                     unsigned gen_size,
+                                                     u_generate_func generate,
+                                                     struct pipe_buffer **out_buf )
+{
+   enum pipe_error ret = PIPE_OK;
+   int i;
+
+   for (i = 0; i < IDX_CACHE_MAX; i++) {
+      if (hwtnl->index_cache[prim][i].buffer != NULL &&
+          hwtnl->index_cache[prim][i].generate == generate)
+      {
+         if (compare(hwtnl->index_cache[prim][i].gen_nr, gen_nr, gen_type))
+         {
+            pipe_buffer_reference( out_buf,
+                                   hwtnl->index_cache[prim][i].buffer );
+
+            if (DBG) 
+               debug_printf("%s retrieve %d/%d\n", __FUNCTION__, i, gen_nr);
+
+            return PIPE_OK;
+         }
+         else if (gen_type == U_GENERATE_REUSABLE) 
+         {
+            pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+                                   NULL );
+
+            if (DBG) 
+               debug_printf("%s discard %d/%d\n", __FUNCTION__, 
+                            i, hwtnl->index_cache[prim][i].gen_nr);
+
+            break;
+         }
+      }
+   }
+
+   if (i == IDX_CACHE_MAX)
+   {
+      unsigned smallest = 0;
+      unsigned smallest_size = ~0;
+      
+      for (i = 0; i < IDX_CACHE_MAX && smallest_size; i++) {
+         if (hwtnl->index_cache[prim][i].buffer == NULL)
+         {
+            smallest = i;
+            smallest_size = 0;
+         }
+         else if (hwtnl->index_cache[prim][i].gen_nr < smallest)
+         {
+            smallest = i;
+            smallest_size = hwtnl->index_cache[prim][i].gen_nr;
+         }
+      }
+
+      assert (smallest != IDX_CACHE_MAX);
+
+      pipe_buffer_reference( &hwtnl->index_cache[prim][smallest].buffer,
+                             NULL );
+
+      if (DBG)
+         debug_printf("%s discard smallest %d/%d\n", __FUNCTION__, 
+                      smallest, smallest_size);
+      
+      i = smallest;
+   }
+      
+      
+   ret = generate_indices( hwtnl, 
+                           gen_nr,
+                           gen_size,
+                           generate,
+                           out_buf );
+   if (ret != PIPE_OK)
+      return ret;
+
+
+   hwtnl->index_cache[prim][i].generate = generate;
+   hwtnl->index_cache[prim][i].gen_nr = gen_nr;
+   pipe_buffer_reference( &hwtnl->index_cache[prim][i].buffer,
+                          *out_buf );
+
+   if (DBG)
+      debug_printf("%s cache %d/%d\n", __FUNCTION__, 
+                   i, hwtnl->index_cache[prim][i].gen_nr);
+
+   return PIPE_OK;
+}
+
+
+
+static enum pipe_error
+simple_draw_arrays( struct svga_hwtnl *hwtnl,
+                    unsigned prim, unsigned start, unsigned count )
+{
+   SVGA3dPrimitiveRange range;
+   unsigned hw_prim;
+   unsigned hw_count;
+
+   hw_prim = svga_translate_prim(prim, count, &hw_count);
+   if (hw_count == 0)
+      return PIPE_ERROR_BAD_INPUT;
+      
+   range.primType = hw_prim;
+   range.primitiveCount = hw_count;
+   range.indexArray.surfaceId = SVGA3D_INVALID_ID;
+   range.indexArray.offset = 0;
+   range.indexArray.stride = 0;
+   range.indexWidth = 0;
+   range.indexBias = start;
+
+   /* Min/max index should be calculated prior to applying bias, so we
+    * end up with min_index = 0, max_index = count - 1 and everybody
+    * looking at those numbers knows to adjust them by
+    * range.indexBias.
+    */
+   return svga_hwtnl_prim( hwtnl, &range, 0, count - 1, NULL );
+}
+
+
+
+
+
+
+
+
+
+
+enum pipe_error 
+svga_hwtnl_draw_arrays( struct svga_hwtnl *hwtnl,
+                        unsigned prim, 
+                        unsigned start, 
+                        unsigned count)
+{
+   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   u_generate_func gen_func;
+   enum pipe_error ret = PIPE_OK;
+
+   if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && 
+       prim >= PIPE_PRIM_TRIANGLES) 
+   {
+      gen_type = u_unfilled_generator( prim,
+                                       start,
+                                       count,
+                                       hwtnl->api_fillmode,
+                                       &gen_prim,
+                                       &gen_size,
+                                       &gen_nr,
+                                       &gen_func );
+   }
+   else {
+      gen_type = u_index_generator( svga_hw_prims,
+                                    prim,
+                                    start,
+                                    count,
+                                    hwtnl->api_pv,
+                                    hwtnl->hw_pv,
+                                    &gen_prim,
+                                    &gen_size,
+                                    &gen_nr,
+                                    &gen_func );
+   }
+
+   if (gen_type == U_GENERATE_LINEAR) {
+      return simple_draw_arrays( hwtnl, gen_prim, start, count );
+   }
+   else {
+      struct pipe_buffer *gen_buf = NULL;
+
+      /* Need to draw as indexed primitive. 
+       * Potentially need to run the gen func to build an index buffer.
+       */
+      ret = retrieve_or_generate_indices( hwtnl,
+                                          prim,
+                                          gen_type,
+                                          gen_nr,
+                                          gen_size,
+                                          gen_func,
+                                          &gen_buf );
+      if (ret)
+         goto done;
+
+      ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+                                                   gen_buf,
+                                                   gen_size,
+                                                   0,
+                                                   count - 1,
+                                                   gen_prim,
+                                                   0,
+                                                   gen_nr,
+                                                   start );
+      if (ret)
+         goto done;
+
+   done:
+      if (gen_buf)
+         pipe_buffer_reference( &gen_buf, NULL );
+
+      return ret;
+   }
+}
+
diff --git a/src/gallium/drivers/svga/svga_draw_elements.c b/src/gallium/drivers/svga/svga_draw_elements.c
new file mode 100644
index 00000000000..167d8178315
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_elements.c
@@ -0,0 +1,255 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_upload_mgr.h"
+#include "indices/u_indices.h"
+
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_draw_private.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+
+#include "svga_hw_reg.h"
+
+
+static enum pipe_error
+translate_indices( struct svga_hwtnl *hwtnl,
+                   struct pipe_buffer *src,
+                   unsigned offset,
+                   unsigned nr,
+                   unsigned index_size,
+                   u_translate_func translate,
+                   struct pipe_buffer **out_buf )
+{
+   struct pipe_screen *screen = hwtnl->svga->pipe.screen;
+   unsigned size = index_size * nr;
+   const void *src_map = NULL;
+   struct pipe_buffer *dst = NULL;
+   void *dst_map = NULL;
+
+   dst = screen->buffer_create( screen, 32, 
+                                PIPE_BUFFER_USAGE_INDEX |
+                                PIPE_BUFFER_USAGE_CPU_WRITE |
+                                PIPE_BUFFER_USAGE_GPU_READ, 
+                                size );
+   if (dst == NULL)
+      goto fail;
+
+   src_map = pipe_buffer_map( screen, src, PIPE_BUFFER_USAGE_CPU_READ );
+   if (src_map == NULL)
+      goto fail;
+
+   dst_map = pipe_buffer_map( screen, dst, PIPE_BUFFER_USAGE_CPU_WRITE );
+   if (dst_map == NULL)
+      goto fail;
+
+   translate( (const char *)src_map + offset,
+              nr,
+              dst_map );
+
+   pipe_buffer_unmap( screen, src );
+   pipe_buffer_unmap( screen, dst );
+
+   *out_buf = dst;
+   return PIPE_OK;
+
+fail:
+   if (src_map)
+      screen->buffer_unmap( screen, src );
+
+   if (dst_map)
+      screen->buffer_unmap( screen, dst );
+
+   if (dst)
+      screen->buffer_destroy( dst );
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+
+
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                       struct pipe_buffer *index_buffer,
+                                       unsigned index_size,
+                                       unsigned min_index,
+                                       unsigned max_index,
+                                       unsigned prim, 
+                                       unsigned start,
+                                       unsigned count,
+                                       unsigned bias )
+{
+   struct pipe_buffer *upload_buffer = NULL;
+   SVGA3dPrimitiveRange range;
+   unsigned hw_prim;
+   unsigned hw_count;
+   unsigned index_offset = start * index_size;
+   int ret = PIPE_OK;
+
+   hw_prim = svga_translate_prim(prim, count, &hw_count);
+   if (hw_count == 0)
+      goto done;
+
+   if (index_buffer && 
+       svga_buffer_is_user_buffer(index_buffer)) 
+   {
+      assert( index_buffer->size >= index_offset + count * index_size );
+
+      ret = u_upload_buffer( hwtnl->upload_ib,
+                             index_offset,
+                             count * index_size,
+                             index_buffer,
+                             &index_offset,
+                             &upload_buffer );
+      if (ret)
+         goto done;
+
+      /* Don't need to worry about refcounting index_buffer as this is
+       * just a stack variable without a counted reference of its own.
+       * The caller holds the reference.
+       */
+      index_buffer = upload_buffer;
+   }
+
+   range.primType = hw_prim;
+   range.primitiveCount = hw_count;
+   range.indexArray.offset = index_offset;
+   range.indexArray.stride = index_size;
+   range.indexWidth = index_size;
+   range.indexBias = bias;
+      
+   ret = svga_hwtnl_prim( hwtnl, &range, min_index, max_index, index_buffer );
+   if (ret)
+      goto done;
+
+done:
+   if (upload_buffer)
+      pipe_buffer_reference( &upload_buffer, NULL );
+
+   return ret;
+}
+
+
+
+
+enum pipe_error
+svga_hwtnl_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                struct pipe_buffer *index_buffer,
+                                unsigned index_size,
+                                unsigned min_index,
+                                unsigned max_index,
+                                unsigned prim, unsigned start, unsigned count,
+                                unsigned bias)
+{
+   unsigned gen_prim, gen_size, gen_nr, gen_type;
+   u_translate_func gen_func;
+   enum pipe_error ret = PIPE_OK;
+
+   if (hwtnl->api_fillmode != PIPE_POLYGON_MODE_FILL && 
+       prim >= PIPE_PRIM_TRIANGLES) 
+   {
+      gen_type = u_unfilled_translator( prim,
+                                        index_size,
+                                        count,
+                                        hwtnl->api_fillmode,
+                                        &gen_prim,
+                                        &gen_size,
+                                        &gen_nr,
+                                        &gen_func );
+   }
+   else
+   {
+      gen_type = u_index_translator( svga_hw_prims,
+                                     prim,
+                                     index_size,
+                                     count,
+                                     hwtnl->api_pv,
+                                     hwtnl->hw_pv,
+                                     &gen_prim,
+                                     &gen_size,
+                                     &gen_nr,
+                                     &gen_func );
+   }
+
+   
+   if (gen_type == U_TRANSLATE_MEMCPY) {
+      /* No need for translation, just pass through to hardware: 
+       */
+      return svga_hwtnl_simple_draw_range_elements( hwtnl, index_buffer,
+                                                    index_size,
+                                                    min_index,
+                                                    max_index,
+                                                    gen_prim, start, count, bias );
+   }
+   else {
+      struct pipe_buffer *gen_buf = NULL;
+
+      /* Need to allocate a new index buffer and run the translate
+       * func to populate it.  Could potentially cache this translated
+       * index buffer with the original to avoid future
+       * re-translations.  Not much point if we're just accelerating
+       * GL though, as index buffers are typically used only once
+       * there.
+       */
+      ret = translate_indices( hwtnl,
+                               index_buffer,
+                               start * index_size,
+                               gen_nr,
+                               gen_size,
+                               gen_func,
+                               &gen_buf );
+      if (ret)
+         goto done;
+
+      ret = svga_hwtnl_simple_draw_range_elements( hwtnl,
+                                                   gen_buf,
+                                                   gen_size,
+                                                   min_index,
+                                                   max_index,
+                                                   gen_prim,
+                                                   0,
+                                                   gen_nr,
+                                                   bias );
+      if (ret)
+         goto done;
+
+   done:
+      if (gen_buf)
+         pipe_buffer_reference( &gen_buf, NULL );
+
+      return ret;
+   }
+}
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_draw_private.h b/src/gallium/drivers/svga/svga_draw_private.h
new file mode 100644
index 00000000000..9aa40e16642
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_draw_private.h
@@ -0,0 +1,158 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DRAW_H_
+#define SVGA_DRAW_H_
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+#include "indices/u_indices.h"
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct svga_context;
+struct u_upload_mgr;
+
+/* Should include polygon?
+ */
+static const unsigned svga_hw_prims = 
+   ((1 << PIPE_PRIM_POINTS) |
+    (1 << PIPE_PRIM_LINES) |
+    (1 << PIPE_PRIM_LINE_STRIP) |
+    (1 << PIPE_PRIM_TRIANGLES) |
+    (1 << PIPE_PRIM_TRIANGLE_STRIP) |
+    (1 << PIPE_PRIM_TRIANGLE_FAN));
+
+
+static INLINE unsigned svga_translate_prim(unsigned mode, 
+                                           unsigned count,
+                                           unsigned *out_count)
+{
+   switch (mode) {
+   case PIPE_PRIM_POINTS:
+      *out_count = count;
+      return SVGA3D_PRIMITIVE_POINTLIST;
+
+   case PIPE_PRIM_LINES:
+      *out_count = count / 2;
+      return SVGA3D_PRIMITIVE_LINELIST; 
+
+   case PIPE_PRIM_LINE_STRIP:
+      *out_count = count - 1;
+      return SVGA3D_PRIMITIVE_LINESTRIP; 
+
+   case PIPE_PRIM_TRIANGLES:
+      *out_count = count / 3;
+      return SVGA3D_PRIMITIVE_TRIANGLELIST; 
+
+   case PIPE_PRIM_TRIANGLE_STRIP:
+      *out_count = count - 2;
+      return SVGA3D_PRIMITIVE_TRIANGLESTRIP; 
+
+   case PIPE_PRIM_TRIANGLE_FAN:
+      *out_count = count - 2;
+      return SVGA3D_PRIMITIVE_TRIANGLEFAN; 
+
+   default:
+      assert(0);
+      *out_count = 0;
+      return 0;
+   }
+}
+
+
+struct index_cache {
+   u_generate_func generate;
+   unsigned gen_nr;
+
+   /* If non-null, this buffer is filled by calling 
+    *   generate(nr, map(buffer))
+    */
+   struct pipe_buffer *buffer;
+};
+
+#define QSZ 32
+
+struct draw_cmd {
+   struct svga_winsys_context *swc;
+
+   SVGA3dVertexDecl vdecl[SVGA3D_INPUTREG_MAX];
+   struct pipe_buffer *vdecl_vb[SVGA3D_INPUTREG_MAX];
+   unsigned vdecl_count;
+
+   SVGA3dPrimitiveRange prim[QSZ];
+   struct pipe_buffer *prim_ib[QSZ];
+   unsigned prim_count;
+   unsigned min_index[QSZ];
+   unsigned max_index[QSZ];
+};
+
+#define IDX_CACHE_MAX  8
+
+struct svga_hwtnl {
+   struct svga_context *svga;
+   struct u_upload_mgr *upload_ib;
+   
+   /* Flatshade information:
+    */
+   unsigned api_pv;
+   unsigned hw_pv;
+   unsigned api_fillmode;
+
+   /* Cache the results of running a particular generate func on each
+    * primitive type.
+    */
+   struct index_cache index_cache[PIPE_PRIM_MAX][IDX_CACHE_MAX];
+
+   /* Try to build the maximal draw command packet before emitting:
+    */
+   struct draw_cmd cmd;
+};
+
+
+
+/***********************************************************************
+ * Internal functions
+ */
+enum pipe_error 
+svga_hwtnl_prim( struct svga_hwtnl *hwtnl,
+                 const SVGA3dPrimitiveRange *range,
+                 unsigned min_index,
+                 unsigned max_index,
+                 struct pipe_buffer *ib );
+
+enum pipe_error
+svga_hwtnl_simple_draw_range_elements( struct svga_hwtnl *hwtnl,
+                                       struct pipe_buffer *indexBuffer,
+                                       unsigned index_size,
+                                       unsigned min_index,
+                                       unsigned max_index,
+                                       unsigned prim, 
+                                       unsigned start,
+                                       unsigned count,
+                                       unsigned bias );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_hw_reg.h b/src/gallium/drivers/svga/svga_hw_reg.h
new file mode 100644
index 00000000000..183f4b918e0
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_hw_reg.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_HW_REG_H
+#define SVGA_HW_REG_H
+
+#include "pipe/p_compiler.h"
+
+#if defined(PIPE_CC_GCC)
+#ifndef HAVE_STDINT_H
+#define HAVE_STDINT_H
+#endif
+#endif
+
+#include "svga_types.h"
+
+#include "svga3d_reg.h"
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_pipe_blend.c b/src/gallium/drivers/svga/svga_pipe_blend.c
new file mode 100644
index 00000000000..855d228755f
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blend.c
@@ -0,0 +1,246 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_blend_factor(unsigned factor)
+{
+   switch (factor) {
+   case PIPE_BLENDFACTOR_ZERO:            return SVGA3D_BLENDOP_ZERO;
+   case PIPE_BLENDFACTOR_SRC_ALPHA:       return SVGA3D_BLENDOP_SRCALPHA;
+   case PIPE_BLENDFACTOR_ONE:             return SVGA3D_BLENDOP_ONE;
+   case PIPE_BLENDFACTOR_SRC_COLOR:       return SVGA3D_BLENDOP_SRCCOLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_COLOR:   return SVGA3D_BLENDOP_INVSRCCOLOR;
+   case PIPE_BLENDFACTOR_DST_COLOR:       return SVGA3D_BLENDOP_DESTCOLOR;
+   case PIPE_BLENDFACTOR_INV_DST_COLOR:   return SVGA3D_BLENDOP_INVDESTCOLOR;
+   case PIPE_BLENDFACTOR_INV_SRC_ALPHA:   return SVGA3D_BLENDOP_INVSRCALPHA;
+   case PIPE_BLENDFACTOR_DST_ALPHA:       return SVGA3D_BLENDOP_DESTALPHA;
+   case PIPE_BLENDFACTOR_INV_DST_ALPHA:   return SVGA3D_BLENDOP_INVDESTALPHA;
+   case PIPE_BLENDFACTOR_SRC_ALPHA_SATURATE: return SVGA3D_BLENDOP_SRCALPHASAT;
+   case PIPE_BLENDFACTOR_CONST_COLOR:     return SVGA3D_BLENDOP_BLENDFACTOR;
+   case PIPE_BLENDFACTOR_INV_CONST_COLOR: return SVGA3D_BLENDOP_INVBLENDFACTOR;
+   case PIPE_BLENDFACTOR_CONST_ALPHA:     return SVGA3D_BLENDOP_BLENDFACTOR; /* ? */
+   case PIPE_BLENDFACTOR_INV_CONST_ALPHA: return SVGA3D_BLENDOP_INVBLENDFACTOR; /* ? */
+   default:
+      assert(0);
+      return SVGA3D_BLENDOP_ZERO;
+   }
+}
+
+static INLINE unsigned
+svga_translate_blend_func(unsigned mode)
+{
+   switch (mode) {
+   case PIPE_BLEND_ADD:              return SVGA3D_BLENDEQ_ADD;
+   case PIPE_BLEND_SUBTRACT:         return SVGA3D_BLENDEQ_SUBTRACT;
+   case PIPE_BLEND_REVERSE_SUBTRACT: return SVGA3D_BLENDEQ_REVSUBTRACT;
+   case PIPE_BLEND_MIN:              return SVGA3D_BLENDEQ_MINIMUM;
+   case PIPE_BLEND_MAX:              return SVGA3D_BLENDEQ_MAXIMUM;
+   default:
+      assert(0);
+      return SVGA3D_BLENDEQ_ADD;
+   }
+}
+
+
+static void *
+svga_create_blend_state(struct pipe_context *pipe,
+                        const struct pipe_blend_state *templ)
+{
+   struct svga_blend_state *blend = CALLOC_STRUCT( svga_blend_state );
+   unsigned i;
+
+ 
+   /* Fill in the per-rendertarget blend state.  We currently only
+    * have one rendertarget.
+    */
+   for (i = 0; i < 1; i++) {
+      /* No way to set this in SVGA3D, and no way to correctly implement it on
+       * top of D3D9 API.  Instead we try to simulate with various blend modes.
+       */
+      if (templ->logicop_enable) {
+         switch (templ->logicop_func) {
+         case PIPE_LOGICOP_XOR:
+            blend->need_white_fragments = TRUE;
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_SUBTRACT;
+            break;
+         case PIPE_LOGICOP_CLEAR:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_COPY:
+            blend->rt[i].blend_enable = FALSE;
+            break;
+         case PIPE_LOGICOP_COPY_INVERTED:
+            blend->rt[i].blend_enable   = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_NOOP:
+            blend->rt[i].blend_enable   = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_SET:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ONE;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_INVERT:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_ZERO;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_ADD;
+            break;
+         case PIPE_LOGICOP_AND:
+            /* Approximate with minimum - works for the 0 & anything case: */
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_AND_REVERSE:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_INVDESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_AND_INVERTED:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MINIMUM;
+            break;
+         case PIPE_LOGICOP_OR:
+            /* Approximate with maximum - works for the 1 | anything case: */
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_OR_REVERSE:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_SRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_INVDESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_OR_INVERTED:
+            blend->rt[i].blend_enable = TRUE;
+            blend->rt[i].srcblend       = SVGA3D_BLENDOP_INVSRCCOLOR;
+            blend->rt[i].dstblend       = SVGA3D_BLENDOP_DESTCOLOR;
+            blend->rt[i].blendeq        = SVGA3D_BLENDEQ_MAXIMUM;
+            break;
+         case PIPE_LOGICOP_NAND:
+         case PIPE_LOGICOP_NOR:
+         case PIPE_LOGICOP_EQUIV:
+            /* Fill these in with plausible values */
+            blend->rt[i].blend_enable = FALSE;
+            break;
+         default:
+            assert(0);
+            break;
+         }
+      }
+      else {
+         blend->rt[i].blend_enable   = templ->blend_enable;
+
+         if (templ->blend_enable) {
+            blend->rt[i].srcblend       = svga_translate_blend_factor(templ->rgb_src_factor);
+            blend->rt[i].dstblend       = svga_translate_blend_factor(templ->rgb_dst_factor);
+            blend->rt[i].blendeq        = svga_translate_blend_func(templ->rgb_func);
+            blend->rt[i].srcblend_alpha = svga_translate_blend_factor(templ->alpha_src_factor);
+            blend->rt[i].dstblend_alpha = svga_translate_blend_factor(templ->alpha_dst_factor);
+            blend->rt[i].blendeq_alpha  = svga_translate_blend_func(templ->alpha_func);
+
+            if (blend->rt[i].srcblend_alpha != blend->rt[i].srcblend ||
+                blend->rt[i].dstblend_alpha != blend->rt[i].dstblend ||
+                blend->rt[i].blendeq_alpha  != blend->rt[i].blendeq)
+            {
+               blend->rt[i].separate_alpha_blend_enable = TRUE;
+            }
+         }
+      }
+
+      blend->rt[i].writemask = templ->colormask;
+   }
+
+   return blend;
+}
+
+static void svga_bind_blend_state(struct pipe_context *pipe,
+                                  void *blend)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.blend = (struct svga_blend_state*)blend;
+   svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+static void svga_delete_blend_state(struct pipe_context *pipe, void *blend)
+{
+   FREE(blend);
+}
+
+static void svga_set_blend_color( struct pipe_context *pipe,
+                                  const struct pipe_blend_color *blend_color )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.blend_color = *blend_color;
+
+   svga->dirty |= SVGA_NEW_BLEND;
+}
+
+
+void svga_init_blend_functions( struct svga_context *svga )
+{
+   svga->pipe.create_blend_state = svga_create_blend_state;
+   svga->pipe.bind_blend_state = svga_bind_blend_state;
+   svga->pipe.delete_blend_state = svga_delete_blend_state;
+
+   svga->pipe.set_blend_color = svga_set_blend_color;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_blit.c b/src/gallium/drivers/svga/svga_pipe_blit.c
new file mode 100644
index 00000000000..5a4a8c0f5f1
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_blit.c
@@ -0,0 +1,84 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_cmd.h"
+
+#define FILE_DEBUG_FLAG DEBUG_BLIT
+
+
+static void svga_surface_copy(struct pipe_context *pipe,
+                              struct pipe_surface *dest,
+                              unsigned destx, unsigned desty,
+                              struct pipe_surface *src,
+                              unsigned srcx, unsigned srcy,
+                              unsigned width, unsigned height)
+{
+   struct svga_context *svga = svga_context(pipe);
+   SVGA3dCopyBox *box;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                 src,
+                                 dest,
+                                 &box,
+                                 1);
+   if(ret != PIPE_OK) {
+
+      svga_context_flush(svga, NULL);
+
+      ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                    src,
+                                    dest,
+                                    &box,
+                                    1);
+      assert(ret == PIPE_OK);
+   }
+
+   box->x = destx;
+   box->y = desty;
+   box->z = 0;
+   box->w = width;
+   box->h = height;
+   box->d = 1;
+   box->srcx = srcx;
+   box->srcy = srcy;
+   box->srcz = 0;
+
+   SVGA_FIFOCommitAll(svga->swc);
+
+   svga_surface(dest)->dirty = TRUE;
+   svga_propagate_surface(pipe, dest);
+}
+
+
+void
+svga_init_blit_functions(struct svga_context *svga)
+{
+   svga->pipe.surface_copy = svga_surface_copy;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_clear.c b/src/gallium/drivers/svga/svga_pipe_clear.c
new file mode 100644
index 00000000000..8977d26541c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_clear.c
@@ -0,0 +1,119 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_defines.h"
+#include "util/u_pack_color.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+
+static enum pipe_error
+try_clear(struct svga_context *svga, 
+          unsigned buffers,
+          const float *rgba,
+          double depth,
+          unsigned stencil)
+{
+   int ret = PIPE_OK;
+   SVGA3dRect rect = { 0, 0, 0, 0 };
+   boolean restore_viewport = FALSE;
+   SVGA3dClearFlag flags = 0;
+   struct pipe_framebuffer_state *fb = &svga->curr.framebuffer;
+   unsigned color = 0;
+
+   ret = svga_update_state(svga, SVGA_STATE_HW_CLEAR);
+   if (ret)
+      return ret;
+
+   if ((buffers & PIPE_CLEAR_COLOR) && fb->cbufs[0]) {
+      flags |= SVGA3D_CLEAR_COLOR;
+      util_pack_color(rgba, PIPE_FORMAT_A8R8G8B8_UNORM, &color);
+
+      rect.w = fb->cbufs[0]->width;
+      rect.h = fb->cbufs[0]->height;
+   }
+
+   if ((buffers & PIPE_CLEAR_DEPTHSTENCIL) && fb->zsbuf) {
+      flags |= SVGA3D_CLEAR_DEPTH;
+
+      if (svga->curr.framebuffer.zsbuf->format == PIPE_FORMAT_Z24S8_UNORM)
+         flags |= SVGA3D_CLEAR_STENCIL;
+
+      rect.w = MAX2(rect.w, fb->zsbuf->width);
+      rect.h = MAX2(rect.h, fb->zsbuf->height);
+   }
+
+   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+      restore_viewport = TRUE;
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      if (ret)
+         return ret;
+   }
+
+   ret = SVGA3D_ClearRect(svga->swc, flags, color, depth, stencil,
+                          rect.x, rect.y, rect.w, rect.h);
+   if (ret != PIPE_OK)
+      return ret;
+
+   if (restore_viewport) {
+      memcpy(&rect, &svga->state.hw_clear.viewport, sizeof rect);
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+   }
+   
+   return ret;
+}
+
+/**
+ * Clear the given surface to the specified value.
+ * No masking, no scissor (clear entire buffer).
+ */
+void
+svga_clear(struct pipe_context *pipe, unsigned buffers, const float *rgba,
+	   double depth, unsigned stencil)
+{
+   struct svga_context *svga = svga_context( pipe );
+   int ret;
+
+   ret = try_clear( svga, buffers, rgba, depth, stencil );
+
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      /* Flush command buffer and retry:
+       */
+      svga_context_flush( svga, NULL );
+
+      ret = try_clear( svga, buffers, rgba, depth, stencil );
+   }
+
+   /*
+    * Mark target surfaces as dirty
+    * TODO Mark only cleared surfaces.
+    */
+   svga_mark_surfaces_dirty(svga);
+
+   assert (ret == PIPE_OK);
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_constants.c b/src/gallium/drivers/svga/svga_pipe_constants.c
new file mode 100644
index 00000000000..10e7a121892
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_constants.c
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+
+/***********************************************************************
+ * Constant buffers 
+ */
+
+struct svga_constbuf 
+{
+   unsigned type;
+   float (*data)[4];
+   unsigned count;
+};
+
+
+
+static void svga_set_constant_buffer(struct pipe_context *pipe,
+                                     uint shader, uint index,
+                                     const struct pipe_constant_buffer *buf)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   assert(shader < PIPE_SHADER_TYPES);
+   assert(index == 0);
+
+   pipe_buffer_reference( &svga->curr.cb[shader],
+                          buf->buffer );
+
+   if (shader == PIPE_SHADER_FRAGMENT)
+      svga->dirty |= SVGA_NEW_FS_CONST_BUFFER;
+   else
+      svga->dirty |= SVGA_NEW_VS_CONST_BUFFER;
+}
+
+
+
+void svga_init_constbuffer_functions( struct svga_context *svga )
+{
+   svga->pipe.set_constant_buffer = svga_set_constant_buffer;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_depthstencil.c b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
new file mode 100644
index 00000000000..df636c08a05
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_depthstencil.c
@@ -0,0 +1,153 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_hw_reg.h"
+
+
+static INLINE unsigned
+svga_translate_compare_func(unsigned func)
+{
+   switch (func) {
+   case PIPE_FUNC_NEVER:     return SVGA3D_CMP_NEVER;
+   case PIPE_FUNC_LESS:      return SVGA3D_CMP_LESS;
+   case PIPE_FUNC_LEQUAL:    return SVGA3D_CMP_LESSEQUAL;
+   case PIPE_FUNC_GREATER:   return SVGA3D_CMP_GREATER;
+   case PIPE_FUNC_GEQUAL:    return SVGA3D_CMP_GREATEREQUAL;
+   case PIPE_FUNC_NOTEQUAL:  return SVGA3D_CMP_NOTEQUAL;
+   case PIPE_FUNC_EQUAL:     return SVGA3D_CMP_EQUAL;
+   case PIPE_FUNC_ALWAYS:    return SVGA3D_CMP_ALWAYS;
+   default:
+      assert(0);
+      return SVGA3D_CMP_ALWAYS;
+   }
+}
+
+static INLINE unsigned
+svga_translate_stencil_op(unsigned op)
+{
+   switch (op) {
+   case PIPE_STENCIL_OP_KEEP:      return SVGA3D_STENCILOP_KEEP;
+   case PIPE_STENCIL_OP_ZERO:      return SVGA3D_STENCILOP_ZERO;
+   case PIPE_STENCIL_OP_REPLACE:   return SVGA3D_STENCILOP_REPLACE;
+   case PIPE_STENCIL_OP_INCR:      return SVGA3D_STENCILOP_INCR;
+   case PIPE_STENCIL_OP_DECR:      return SVGA3D_STENCILOP_DECR;
+   case PIPE_STENCIL_OP_INCR_WRAP: return SVGA3D_STENCILOP_INCRSAT; /* incorrect? */
+   case PIPE_STENCIL_OP_DECR_WRAP: return SVGA3D_STENCILOP_DECRSAT; /* incorrect? */
+   case PIPE_STENCIL_OP_INVERT:    return SVGA3D_STENCILOP_INVERT;
+   default:
+      assert(0);
+      return SVGA3D_STENCILOP_KEEP;
+   }
+}
+
+
+static void *
+svga_create_depth_stencil_state(struct pipe_context *pipe,
+				const struct pipe_depth_stencil_alpha_state *templ)
+{
+   struct svga_depth_stencil_state *ds = CALLOC_STRUCT( svga_depth_stencil_state );
+
+   /* Don't try to figure out CW/CCW correspondence with
+    * stencil[0]/[1] at this point.  Presumably this can change as
+    * back/front face are modified.
+    */
+   ds->stencil[0].enabled = templ->stencil[0].enabled;
+   if (ds->stencil[0].enabled) {
+      ds->stencil[0].func  = svga_translate_compare_func(templ->stencil[0].func);
+      ds->stencil[0].fail  = svga_translate_stencil_op(templ->stencil[0].fail_op);
+      ds->stencil[0].zfail = svga_translate_stencil_op(templ->stencil[0].zfail_op);
+      ds->stencil[0].pass  = svga_translate_stencil_op(templ->stencil[0].zpass_op);
+      
+      /* SVGA3D has one ref/mask/writemask triple shared between front &
+       * back face stencil.  We really need two:
+       */
+      ds->stencil_ref       = templ->stencil[0].ref_value & 0xff;
+      ds->stencil_mask      = templ->stencil[0].valuemask & 0xff;
+      ds->stencil_writemask = templ->stencil[0].writemask & 0xff;
+   }
+
+
+   ds->stencil[1].enabled = templ->stencil[1].enabled;
+   if (templ->stencil[1].enabled) {
+      ds->stencil[1].func   = svga_translate_compare_func(templ->stencil[1].func);
+      ds->stencil[1].fail   = svga_translate_stencil_op(templ->stencil[1].fail_op);
+      ds->stencil[1].zfail  = svga_translate_stencil_op(templ->stencil[1].zfail_op);
+      ds->stencil[1].pass   = svga_translate_stencil_op(templ->stencil[1].zpass_op);
+
+      ds->stencil_ref       = templ->stencil[1].ref_value & 0xff;
+      ds->stencil_mask      = templ->stencil[1].valuemask & 0xff;
+      ds->stencil_writemask = templ->stencil[1].writemask & 0xff;
+   }
+
+
+   ds->zenable = templ->depth.enabled;
+   if (ds->zenable) {
+      ds->zfunc = svga_translate_compare_func(templ->depth.func);
+      ds->zwriteenable = templ->depth.writemask;
+   }
+
+   ds->alphatestenable = templ->alpha.enabled;
+   if (ds->alphatestenable) {
+      ds->alphafunc = svga_translate_compare_func(templ->alpha.func);
+      ds->alpharef = templ->alpha.ref_value;
+   }
+
+   return ds;
+}
+
+static void svga_bind_depth_stencil_state(struct pipe_context *pipe,
+                                          void *depth_stencil)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.depth = (const struct svga_depth_stencil_state *)depth_stencil;
+   svga->dirty |= SVGA_NEW_DEPTH_STENCIL;
+}
+
+static void svga_delete_depth_stencil_state(struct pipe_context *pipe,
+                                            void *depth_stencil)
+{
+   FREE(depth_stencil);
+}
+
+
+
+void svga_init_depth_stencil_functions( struct svga_context *svga )
+{
+   svga->pipe.create_depth_stencil_alpha_state = svga_create_depth_stencil_state;
+   svga->pipe.bind_depth_stencil_alpha_state = svga_bind_depth_stencil_state;
+   svga->pipe.delete_depth_stencil_alpha_state = svga_delete_depth_stencil_state;
+}
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_draw.c b/src/gallium/drivers/svga/svga_pipe_draw.c
new file mode 100644
index 00000000000..71a552862e9
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_draw.c
@@ -0,0 +1,261 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_prim.h"
+#include "util/u_time.h"
+#include "indices/u_indices.h"
+
+#include "svga_hw_reg.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+#include "svga_debug.h"
+
+
+
+static enum pipe_error
+retry_draw_range_elements( struct svga_context *svga,
+                           struct pipe_buffer *index_buffer,
+                           unsigned index_size,
+                           unsigned min_index,
+                           unsigned max_index,
+                           unsigned prim, 
+                           unsigned start, 
+                           unsigned count,
+                           boolean do_retry )
+{
+   enum pipe_error ret = 0;
+
+   svga_hwtnl_set_unfilled( svga->hwtnl,
+                            svga->curr.rast->hw_unfilled );
+
+   svga_hwtnl_set_flatshade( svga->hwtnl,
+                             svga->curr.rast->templ.flatshade,
+                             svga->curr.rast->templ.flatshade_first );
+
+
+   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+   if (ret)
+      goto retry;
+
+   ret = svga_hwtnl_draw_range_elements( svga->hwtnl,
+                                         index_buffer, index_size,
+                                         min_index, max_index,
+                                         prim, start, count, 0 );
+   if (ret)
+      goto retry;
+
+   if (svga->curr.any_user_vertex_buffers) {
+      ret = svga_hwtnl_flush( svga->hwtnl );
+      if (ret)
+         goto retry;
+   }
+
+   return PIPE_OK;
+
+retry:
+   svga_context_flush( svga, NULL );
+
+   if (do_retry)
+   {
+      return retry_draw_range_elements( svga,
+                                        index_buffer, index_size,
+                                        min_index, max_index,
+                                        prim, start, count,
+                                        FALSE );
+   }
+
+   return ret;
+}
+
+
+static enum pipe_error
+retry_draw_arrays( struct svga_context *svga,
+                   unsigned prim, 
+                   unsigned start, 
+                   unsigned count,
+                   boolean do_retry )
+{
+   enum pipe_error ret;
+
+   svga_hwtnl_set_unfilled( svga->hwtnl,
+                            svga->curr.rast->hw_unfilled );
+
+   svga_hwtnl_set_flatshade( svga->hwtnl,
+                             svga->curr.rast->templ.flatshade,
+                             svga->curr.rast->templ.flatshade_first );
+
+   ret = svga_update_state( svga, SVGA_STATE_HW_DRAW );
+   if (ret)
+      goto retry;
+
+   ret = svga_hwtnl_draw_arrays( svga->hwtnl, prim,
+                                 start, count );
+   if (ret)
+      goto retry;
+
+   if (svga->curr.any_user_vertex_buffers) {
+      ret = svga_hwtnl_flush( svga->hwtnl );
+      if (ret)
+         goto retry;
+   }
+
+   return 0;
+
+retry:
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY && do_retry) 
+   {
+      svga_context_flush( svga, NULL );
+
+      return retry_draw_arrays( svga,
+                                prim,
+                                start,
+                                count,
+                                FALSE );
+   }
+
+   return ret;
+}
+
+
+
+
+
+static boolean
+svga_draw_range_elements( struct pipe_context *pipe,
+                          struct pipe_buffer *index_buffer,
+                          unsigned index_size,
+                          unsigned min_index,
+                          unsigned max_index,
+                          unsigned prim, unsigned start, unsigned count)
+{
+   struct svga_context *svga = svga_context( pipe );
+   unsigned reduced_prim = u_reduced_prim(prim);
+   enum pipe_error ret = 0;
+
+   if (!u_trim_pipe_prim( prim, &count ))
+      return TRUE;
+
+   /*
+    * Mark currently bound target surfaces as dirty
+    * doesn't really matter if it is done before drawing.
+    *
+    * TODO If we ever normaly return something other then
+    * true we should not mark it as dirty then.
+    */
+   svga_mark_surfaces_dirty(svga_context(pipe));
+
+   if (svga->curr.reduced_prim != reduced_prim) {
+      svga->curr.reduced_prim = reduced_prim;
+      svga->dirty |= SVGA_NEW_REDUCED_PRIMITIVE;
+   }
+   
+   svga_update_state_retry( svga, SVGA_STATE_NEED_SWTNL );
+
+#ifdef DEBUG
+   if (svga->curr.vs->base.id == svga->debug.disable_shader ||
+       svga->curr.fs->base.id == svga->debug.disable_shader)
+      return 0;
+#endif
+
+   if (svga->state.sw.need_swtnl)
+   {
+      ret = svga_swtnl_draw_range_elements( svga, 
+                                            index_buffer, 
+                                            index_size,
+                                            min_index, max_index,
+                                            prim,
+                                            start, count );
+   }
+   else {
+      if (index_buffer) {
+         ret = retry_draw_range_elements( svga,
+                                          index_buffer,
+                                          index_size,
+                                          min_index,
+                                          max_index,
+                                          prim,
+                                          start,
+                                          count,
+                                          TRUE );
+      }
+      else {
+         ret = retry_draw_arrays( svga, 
+                                  prim, 
+                                  start, 
+                                  count,
+                                  TRUE );
+      }
+   }
+
+   if (SVGA_DEBUG & DEBUG_FLUSH) {
+      static unsigned id;
+      debug_printf("%s %d\n", __FUNCTION__, id++);
+      if (id > 1300)
+         util_time_sleep( 2000 );
+
+      svga_hwtnl_flush_retry( svga );
+      svga_context_flush(svga, NULL);
+   }
+
+   return ret == PIPE_OK;
+}
+
+
+static boolean 
+svga_draw_elements( struct pipe_context *pipe,
+                    struct pipe_buffer *index_buffer,
+                    unsigned index_size,
+                    unsigned prim, unsigned start, unsigned count)
+{
+   return svga_draw_range_elements( pipe, index_buffer,
+                                    index_size,
+                                    0, 0xffffffff,
+                                    prim, start, count );
+}
+
+static boolean 
+svga_draw_arrays( struct pipe_context *pipe,
+                  unsigned prim, unsigned start, unsigned count)
+{
+   return svga_draw_range_elements(pipe, NULL, 0, 
+                                   start, start + count - 1, 
+                                   prim, 
+                                   start, count);
+}
+
+
+void svga_init_draw_functions( struct svga_context *svga )
+{
+   svga->pipe.draw_arrays = svga_draw_arrays;
+   svga->pipe.draw_elements = svga_draw_elements;
+   svga->pipe.draw_range_elements = svga_draw_range_elements;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_flush.c b/src/gallium/drivers/svga/svga_pipe_flush.c
new file mode 100644
index 00000000000..942366de721
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_flush.c
@@ -0,0 +1,68 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_defines.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_context.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_flush( struct pipe_context *pipe,
+                        unsigned flags,
+                        struct pipe_fence_handle **fence )
+{
+   struct svga_context *svga = svga_context(pipe);
+   int i;
+
+   /* Emit buffered drawing commands.
+    */
+   svga_hwtnl_flush_retry( svga );
+
+   /* Emit back-copy from render target view to texture.
+    */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (svga->curr.framebuffer.cbufs[i])
+         svga_propagate_surface(pipe, svga->curr.framebuffer.cbufs[i]);
+   }
+   if (svga->curr.framebuffer.zsbuf)
+      svga_propagate_surface(pipe, svga->curr.framebuffer.zsbuf);
+
+   /* Flush command queue.
+    */
+   svga_context_flush(svga, fence);
+}
+
+
+void svga_init_flush_functions( struct svga_context *svga )
+{
+   svga->pipe.flush = svga_flush;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_fs.c b/src/gallium/drivers/svga/svga_pipe_fs.c
new file mode 100644
index 00000000000..e3be840d920
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_fs.c
@@ -0,0 +1,124 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/***********************************************************************
+ * Fragment shaders 
+ */
+
+static void *
+svga_create_fs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_fragment_shader *fs;
+
+   fs = CALLOC_STRUCT(svga_fragment_shader);
+   if (!fs)
+      return NULL;
+
+   fs->base.tokens = tgsi_dup_tokens(templ->tokens);
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(fs->base.tokens, &fs->base.info);
+
+   fs->base.id = svga->debug.shader_id++;
+   fs->base.use_sm30 = svgascreen->use_ps30;
+   
+   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+                   __FUNCTION__, fs->base.id,
+                   fs->base.info.num_inputs, fs->base.info.num_outputs);
+   }
+
+   return fs;
+}
+
+static void
+svga_bind_fs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.fs = fs;
+   svga->dirty |= SVGA_NEW_FS;
+}
+
+static
+void svga_delete_fs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_fragment_shader *fs = (struct svga_fragment_shader *) shader;
+   struct svga_shader_result *result, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   for (result = fs->base.results; result; result = tmp ) {
+      tmp = result->next;
+
+      ret = SVGA3D_DestroyShader(svga->swc, 
+                                 result->id,
+                                 SVGA3D_SHADERTYPE_PS );
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_DestroyShader(svga->swc, 
+                                    result->id,
+                                    SVGA3D_SHADERTYPE_PS );
+         assert(ret == PIPE_OK);
+      }
+
+      svga_destroy_shader_result( result );
+   }
+
+   FREE((void *)fs->base.tokens);
+   FREE(fs);
+}
+
+
+void svga_init_fs_functions( struct svga_context *svga )
+{
+   svga->pipe.create_fs_state = svga_create_fs_state;
+   svga->pipe.bind_fs_state = svga_bind_fs_state;
+   svga->pipe.delete_fs_state = svga_delete_fs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_pipe_misc.c b/src/gallium/drivers/svga/svga_pipe_misc.c
new file mode 100644
index 00000000000..58cb1e6e230
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_misc.c
@@ -0,0 +1,187 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+
+
+static void svga_set_scissor_state( struct pipe_context *pipe,
+                                 const struct pipe_scissor_state *scissor )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   memcpy( &svga->curr.scissor, scissor, sizeof(*scissor) );
+   svga->dirty |= SVGA_NEW_SCISSOR;
+}
+
+
+static void svga_set_polygon_stipple( struct pipe_context *pipe,
+                                      const struct pipe_poly_stipple *stipple )
+{
+   /* overridden by the draw module */
+}
+
+
+void svga_cleanup_framebuffer(struct svga_context *svga)
+{
+   struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   int i;
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      pipe_surface_reference(&curr->cbufs[i], NULL);
+      pipe_surface_reference(&hw->cbufs[i], NULL);
+   }
+
+   pipe_surface_reference(&curr->zsbuf, NULL);
+   pipe_surface_reference(&hw->zsbuf, NULL);
+}
+
+
+#define DEPTH_BIAS_SCALE_FACTOR_D16    ((float)(1<<15))
+#define DEPTH_BIAS_SCALE_FACTOR_D24S8  ((float)(1<<23))
+#define DEPTH_BIAS_SCALE_FACTOR_D32    ((float)(1<<31))
+
+
+static void svga_set_framebuffer_state(struct pipe_context *pipe,
+				       const struct pipe_framebuffer_state *fb)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct pipe_framebuffer_state *dst = &svga->curr.framebuffer;
+   boolean propagate = FALSE;
+   int i;
+
+   dst->width = fb->width;
+   dst->height = fb->height;
+   dst->nr_cbufs = fb->nr_cbufs;
+
+   /* check if we need to propaget any of the target surfaces */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+         if (svga_surface_needs_propagation(dst->cbufs[i]))
+            propagate = TRUE;
+   }
+
+   if (propagate) {
+      /* make sure that drawing calls comes before propagation calls */
+      svga_hwtnl_flush_retry( svga );
+   
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+         if (dst->cbufs[i] && dst->cbufs[i] != fb->cbufs[i])
+            svga_propagate_surface(pipe, dst->cbufs[i]);
+   }
+
+   /* XXX: Actually the virtual hardware may support rendertargets with
+    * different size, depending on the host API and driver, but since we cannot
+    * know that make no such assumption here. */
+   for(i = 0; i < fb->nr_cbufs; ++i) {
+      if (fb->zsbuf && fb->cbufs[i]) {
+         assert(fb->zsbuf->width == fb->cbufs[i]->width); 
+         assert(fb->zsbuf->height == fb->cbufs[i]->height); 
+      }
+   }
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++)
+      pipe_surface_reference(&dst->cbufs[i], fb->cbufs[i]);
+   pipe_surface_reference(&dst->zsbuf, fb->zsbuf);
+
+
+   if (svga->curr.framebuffer.zsbuf)
+   {
+      switch (svga->curr.framebuffer.zsbuf->format) {
+      case PIPE_FORMAT_Z16_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D16;
+         break;
+      case PIPE_FORMAT_S8Z24_UNORM:
+      case PIPE_FORMAT_X8Z24_UNORM:
+      case PIPE_FORMAT_Z24S8_UNORM:
+      case PIPE_FORMAT_Z24X8_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D24S8;
+         break;
+      case PIPE_FORMAT_Z32_UNORM:
+         svga->curr.depthscale = 1.0f / DEPTH_BIAS_SCALE_FACTOR_D32;
+         break;
+      case PIPE_FORMAT_Z32_FLOAT:
+         svga->curr.depthscale = 1.0f / ((float)(1<<23));
+         break;
+      default:
+         svga->curr.depthscale = 0.0f;
+         break;
+      }
+   }
+   else {
+      svga->curr.depthscale = 0.0f;
+   }
+
+   svga->dirty |= SVGA_NEW_FRAME_BUFFER;
+}
+
+
+
+static void svga_set_clip_state( struct pipe_context *pipe,
+                                 const struct pipe_clip_state *clip )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.clip = *clip; /* struct copy */
+
+   svga->dirty |= SVGA_NEW_CLIP;
+}
+
+
+
+/* Called when driver state tracker notices changes to the viewport
+ * matrix:
+ */
+static void svga_set_viewport_state( struct pipe_context *pipe,
+				     const struct pipe_viewport_state *viewport )
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.viewport = *viewport; /* struct copy */
+
+   svga->dirty |= SVGA_NEW_VIEWPORT;
+}
+
+
+
+void svga_init_misc_functions( struct svga_context *svga )
+{
+   svga->pipe.set_scissor_state = svga_set_scissor_state;
+   svga->pipe.set_polygon_stipple = svga_set_polygon_stipple;
+   svga->pipe.set_framebuffer_state = svga_set_framebuffer_state;
+   svga->pipe.set_clip_state = svga_set_clip_state;
+   svga->pipe.set_viewport_state = svga_set_viewport_state;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_query.c b/src/gallium/drivers/svga/svga_pipe_query.c
new file mode 100644
index 00000000000..01336b0a2c3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_query.c
@@ -0,0 +1,267 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_state.h"
+#include "pipe/p_context.h"
+#include "util/u_memory.h"
+
+#include "svga_cmd.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_draw.h"
+#include "svga_debug.h"
+
+
+/* Fixme: want a public base class for all pipe structs, even if there
+ * isn't much in them.
+ */
+struct pipe_query {
+   int dummy;
+};
+
+struct svga_query {
+   struct pipe_query base;
+   SVGA3dQueryType type;
+   struct svga_winsys_buffer *hwbuf;
+   volatile SVGA3dQueryResult *queryResult;
+   struct pipe_fence_handle *fence;
+};
+
+/***********************************************************************
+ * Inline conversion functions.  These are better-typed than the
+ * macros used previously:
+ */
+static INLINE struct svga_query *
+svga_query( struct pipe_query *q )
+{
+   return (struct svga_query *)q;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe, 
+                                     struct pipe_query *q,
+                                     boolean wait,
+                                     uint64_t *result);
+
+static struct pipe_query *svga_create_query( struct pipe_context *pipe,
+                                             unsigned query_type )
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+
+   sq = CALLOC_STRUCT(svga_query);
+   if (!sq)
+      goto no_sq;
+
+   sq->type = SVGA3D_QUERYTYPE_OCCLUSION;
+
+   sq->hwbuf = svga_winsys_buffer_create(svgascreen, 
+                                         1,
+                                         SVGA_BUFFER_USAGE_PINNED,
+                                         sizeof *sq->queryResult);
+   if(!sq->hwbuf)
+      goto no_hwbuf;
+    
+   sq->queryResult = (SVGA3dQueryResult *)sws->buffer_map(sws, 
+                                                          sq->hwbuf, 
+                                                          PIPE_BUFFER_USAGE_CPU_WRITE);
+   if(!sq->queryResult)
+      goto no_query_result;
+
+   sq->queryResult->totalSize = sizeof *sq->queryResult;
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+
+   /*
+    * We request the buffer to be pinned and assume it is always mapped.
+    * 
+    * The reason is that we don't want to wait for fences when checking the
+    * query status.
+    */
+   sws->buffer_unmap(sws, sq->hwbuf);
+
+   return &sq->base;
+
+no_query_result:
+   sws->buffer_destroy(sws, sq->hwbuf);
+no_hwbuf:
+   FREE(sq);
+no_sq:
+   return NULL;
+}
+
+static void svga_destroy_query(struct pipe_context *pipe,
+                               struct pipe_query *q)
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq = svga_query( q );
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   sws->buffer_destroy(sws, sq->hwbuf);
+   sws->fence_reference(sws, &sq->fence, NULL);
+   FREE(sq);
+}
+
+static void svga_begin_query(struct pipe_context *pipe, 
+                             struct pipe_query *q)
+{
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_query *sq = svga_query( q );
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   
+   assert(!svga->sq);
+
+   /* Need to flush out buffered drawing commands so that they don't
+    * get counted in the query results.
+    */
+   svga_hwtnl_flush_retry(svga);
+   
+   if(sq->queryResult->state == SVGA3D_QUERYSTATE_PENDING) {
+      /* The application doesn't care for the pending query result. We cannot
+       * let go the existing buffer and just get a new one because its storage
+       * may be reused for other purposes and clobbered by the host when it
+       * determines the query result. So the only option here is to wait for
+       * the existing query's result -- not a big deal, given that no sane
+       * application would do this.
+       */
+      uint64_t result;
+
+      svga_get_query_result(pipe, q, TRUE, &result);
+      
+      assert(sq->queryResult->state != SVGA3D_QUERYSTATE_PENDING);
+   }
+   
+   sq->queryResult->state = SVGA3D_QUERYSTATE_NEW;
+   sws->fence_reference(sws, &sq->fence, NULL);
+
+   ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_BeginQuery(svga->swc, sq->type);
+      assert(ret == PIPE_OK);
+   }
+
+   svga->sq = sq;
+}
+
+static void svga_end_query(struct pipe_context *pipe, 
+                           struct pipe_query *q)
+{
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_query *sq = svga_query( q );
+   enum pipe_error ret;
+
+   SVGA_DBG(DEBUG_QUERY, "%s\n", __FUNCTION__);
+   assert(svga->sq == sq);
+
+   svga_hwtnl_flush_retry(svga);
+   
+   /* Set to PENDING before sending EndQuery. */
+   sq->queryResult->state = SVGA3D_QUERYSTATE_PENDING;
+
+   ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = SVGA3D_EndQuery( svga->swc, sq->type, sq->hwbuf);
+      assert(ret == PIPE_OK);
+   }
+   
+   /* TODO: Delay flushing. We don't really need to flush here, just ensure 
+    * that there is one flush before svga_get_query_result attempts to get the
+    * result */
+   svga_context_flush(svga, NULL);
+
+   svga->sq = NULL;
+}
+
+static boolean svga_get_query_result(struct pipe_context *pipe, 
+                                     struct pipe_query *q,
+                                     boolean wait,
+                                     uint64_t *result)
+{
+   struct svga_context *svga = svga_context( pipe );
+   struct svga_screen *svgascreen = svga_screen( pipe->screen );
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_query *sq = svga_query( q );
+   SVGA3dQueryState state;
+   
+   SVGA_DBG(DEBUG_QUERY, "%s wait: %d\n", __FUNCTION__);
+
+   /* The query status won't be updated by the host unless 
+    * SVGA_3D_CMD_WAIT_FOR_QUERY is emitted. Unfortunately this will cause a 
+    * synchronous wait on the host */
+   if(!sq->fence) {
+      enum pipe_error ret;
+
+      ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_WaitForQuery( svga->swc, sq->type, sq->hwbuf);
+         assert(ret == PIPE_OK);
+      }
+   
+      svga_context_flush(svga, &sq->fence);
+      
+      assert(sq->fence);
+   }
+
+   state = sq->queryResult->state;
+   if(state == SVGA3D_QUERYSTATE_PENDING) {
+      if(!wait)
+         return FALSE;
+   
+      sws->fence_finish(sws, sq->fence, 0);
+      
+      state = sq->queryResult->state;
+   }
+
+   assert(state == SVGA3D_QUERYSTATE_SUCCEEDED || 
+          state == SVGA3D_QUERYSTATE_FAILED);
+   
+   *result = (uint64_t)sq->queryResult->result32;
+
+   SVGA_DBG(DEBUG_QUERY, "%s result %d\n", __FUNCTION__, (unsigned)*result);
+
+   return TRUE;
+}
+
+
+
+void svga_init_query_functions( struct svga_context *svga )
+{
+   svga->pipe.create_query = svga_create_query;
+   svga->pipe.destroy_query = svga_destroy_query;
+   svga->pipe.begin_query = svga_begin_query;
+   svga->pipe.end_query = svga_end_query;
+   svga->pipe.get_query_result = svga_get_query_result;
+}
diff --git a/src/gallium/drivers/svga/svga_pipe_rasterizer.c b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
new file mode 100644
index 00000000000..b03f8eb9cf3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_rasterizer.c
@@ -0,0 +1,250 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+/* Hardware frontwinding is always set up as SVGA3D_FRONTWINDING_CW.
+ */
+static SVGA3dFace svga_translate_cullmode( unsigned mode,
+                                           unsigned front_winding )
+{
+   switch (mode) {
+   case PIPE_WINDING_NONE:
+      return SVGA3D_FACE_NONE;
+   case PIPE_WINDING_CCW:
+      return SVGA3D_FACE_BACK;
+   case PIPE_WINDING_CW:
+      return SVGA3D_FACE_FRONT;
+   case PIPE_WINDING_BOTH:
+      return SVGA3D_FACE_FRONT_BACK;
+   default:
+      assert(0);
+      return SVGA3D_FACE_NONE;
+   }
+}
+
+static SVGA3dShadeMode svga_translate_flatshade( unsigned mode )
+{
+   return mode ? SVGA3D_SHADEMODE_FLAT : SVGA3D_SHADEMODE_SMOOTH;
+}
+
+
+static void *
+svga_create_rasterizer_state(struct pipe_context *pipe,
+                             const struct pipe_rasterizer_state *templ)
+{
+   struct svga_rasterizer_state *rast = CALLOC_STRUCT( svga_rasterizer_state );
+   /* need this for draw module. */
+   rast->templ = *templ;
+
+   /* light_twoside          - XXX: need fragment shader varient */
+   /* poly_smooth            - XXX: no fallback available */
+   /* poly_stipple_enable    - draw module */
+   /* point_sprite           - ? */
+   /* point_size_per_vertex  - ? */
+   /* sprite_coord_mode      - ??? */
+   /* bypass_vs_viewport_and_clip        - handled by viewport setup */
+   /* flatshade_first        - handled by index translation */
+   /* gl_rasterization_rules - XXX - viewport code */
+   /* line_width             - draw module */
+   /* fill_cw, fill_ccw      - draw module or index translation */
+
+   rast->shademode = svga_translate_flatshade( templ->flatshade );
+   rast->cullmode = svga_translate_cullmode( templ->cull_mode, 
+                                             templ->front_winding );
+   rast->scissortestenable = templ->scissor;
+   rast->multisampleantialias = templ->multisample;
+   rast->antialiasedlineenable = templ->line_smooth;
+   rast->lastpixel = templ->line_last_pixel;
+   rast->pointsize = templ->point_size;
+   rast->pointsize_min = templ->point_size_min;
+   rast->pointsize_max = templ->point_size_max;
+   rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+
+   /* Use swtnl + decomposition implement these:
+    */
+   if (templ->poly_stipple_enable)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+
+   if (templ->line_width != 1.0 &&
+       templ->line_width != 0.0)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+
+   if (templ->line_stipple_enable) {
+      /* LinePattern not implemented on all backends. 
+       */
+      if (0) {
+         SVGA3dLinePattern lp;
+         lp.repeat = templ->line_stipple_factor + 1;
+         lp.pattern = templ->line_stipple_pattern;
+         rast->linepattern = lp.uintValue;
+      }
+      else {
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_LINES;
+      }
+   } 
+
+   if (templ->point_smooth)
+      rast->need_pipeline |= SVGA_PIPELINE_FLAG_POINTS;
+
+   {
+      boolean offset_cw = templ->offset_cw;
+      boolean offset_ccw = templ->offset_ccw;
+      boolean offset  = 0;
+      int fill_cw = templ->fill_cw;
+      int fill_ccw = templ->fill_ccw;
+      int fill = PIPE_POLYGON_MODE_FILL;
+
+      switch (templ->cull_mode) {
+      case PIPE_WINDING_BOTH:
+         offset = 0;
+         fill = PIPE_POLYGON_MODE_FILL;
+         break;
+
+      case PIPE_WINDING_CW:
+         offset = offset_ccw;
+         fill = fill_ccw;
+         break;
+
+      case PIPE_WINDING_CCW:
+         offset = offset_cw;
+         fill = fill_cw;
+         break;
+
+      case PIPE_WINDING_NONE:
+         if (fill_cw != fill_ccw || offset_cw != offset_ccw) 
+         {
+            /* Always need the draw module to work out different
+             * front/back fill modes:
+             */
+            rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+         }
+         else {
+            offset = offset_ccw;
+            fill = fill_ccw;
+         }
+         break;
+
+      default:
+         assert(0);
+         break;
+      }
+
+      /* Unfilled primitive modes aren't implemented on all virtual
+       * hardware.  We can do some unfilled processing with index
+       * translation, but otherwise need the draw module:
+       */
+      if (fill != PIPE_POLYGON_MODE_FILL &&
+          (templ->flatshade ||
+           templ->light_twoside ||
+           offset ||
+           templ->cull_mode != PIPE_WINDING_NONE)) 
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      /* If we are decomposing to lines, and lines need the pipeline,
+       * then we also need the pipeline for tris.
+       */
+      if (fill == PIPE_POLYGON_MODE_LINE &&
+          (rast->need_pipeline & SVGA_PIPELINE_FLAG_LINES))
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      /* Similarly for points:
+       */
+      if (fill == PIPE_POLYGON_MODE_POINT &&
+          (rast->need_pipeline & SVGA_PIPELINE_FLAG_POINTS))
+      {
+         fill = PIPE_POLYGON_MODE_FILL;
+         rast->need_pipeline |= SVGA_PIPELINE_FLAG_TRIS;
+      }
+
+      if (offset) {
+         rast->slopescaledepthbias = templ->offset_scale;
+         rast->depthbias = templ->offset_units;
+      }
+
+      rast->hw_unfilled = fill;
+   }
+
+
+
+
+   if (rast->need_pipeline & SVGA_PIPELINE_FLAG_TRIS) {
+      /* Turn off stuff which will get done in the draw module:
+       */
+      rast->hw_unfilled = PIPE_POLYGON_MODE_FILL;
+      rast->slopescaledepthbias = 0;
+      rast->depthbias = 0;
+   }
+
+   return rast;
+}
+
+static void svga_bind_rasterizer_state( struct pipe_context *pipe,
+                                        void *state )
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_rasterizer_state *raster = (struct svga_rasterizer_state *)state;
+
+   svga->curr.rast = raster;
+
+   draw_set_rasterizer_state(svga->swtnl.draw, raster ? &raster->templ : NULL);
+   
+   svga->dirty |= SVGA_NEW_RAST;
+}
+
+static void svga_delete_rasterizer_state(struct pipe_context *pipe,
+                                         void *raster)
+{
+   FREE(raster);
+}
+
+
+void svga_init_rasterizer_functions( struct svga_context *svga )
+{
+   svga->pipe.create_rasterizer_state = svga_create_rasterizer_state;
+   svga->pipe.bind_rasterizer_state = svga_bind_rasterizer_state;
+   svga->pipe.delete_rasterizer_state = svga_delete_rasterizer_state;
+}
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
diff --git a/src/gallium/drivers/svga/svga_pipe_sampler.c b/src/gallium/drivers/svga/svga_pipe_sampler.c
new file mode 100644
index 00000000000..3eeca6b784b
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_sampler.c
@@ -0,0 +1,243 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_pack_color.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_state.h"
+
+#include "svga_hw_reg.h"
+
+#include "svga_debug.h"
+
+static INLINE unsigned
+translate_wrap_mode(unsigned wrap)
+{
+   switch (wrap) {
+   case PIPE_TEX_WRAP_REPEAT: 
+      return SVGA3D_TEX_ADDRESS_WRAP;
+
+   case PIPE_TEX_WRAP_CLAMP: 
+      return SVGA3D_TEX_ADDRESS_CLAMP;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_EDGE: 
+      /* Unfortunately SVGA3D_TEX_ADDRESS_EDGE not respected by
+       * hardware.
+       */
+      return SVGA3D_TEX_ADDRESS_CLAMP;
+
+   case PIPE_TEX_WRAP_CLAMP_TO_BORDER: 
+      return SVGA3D_TEX_ADDRESS_BORDER;
+
+   case PIPE_TEX_WRAP_MIRROR_REPEAT: 
+      return SVGA3D_TEX_ADDRESS_MIRROR;
+
+   case PIPE_TEX_WRAP_MIRROR_CLAMP:  
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_EDGE:   
+   case PIPE_TEX_WRAP_MIRROR_CLAMP_TO_BORDER: 
+      return SVGA3D_TEX_ADDRESS_MIRRORONCE;
+
+   default:
+      assert(0);
+      return SVGA3D_TEX_ADDRESS_WRAP;
+   }
+}
+
+static INLINE unsigned translate_img_filter( unsigned filter )
+{
+   switch (filter) {
+   case PIPE_TEX_FILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+   case PIPE_TEX_FILTER_LINEAR:  return SVGA3D_TEX_FILTER_LINEAR;
+   case PIPE_TEX_FILTER_ANISO:   return SVGA3D_TEX_FILTER_ANISOTROPIC;
+   default:
+      assert(0);
+      return SVGA3D_TEX_FILTER_NEAREST;
+   }
+}
+
+static INLINE unsigned translate_mip_filter( unsigned filter )
+{
+   switch (filter) {
+   case PIPE_TEX_MIPFILTER_NONE:    return SVGA3D_TEX_FILTER_NONE;
+   case PIPE_TEX_MIPFILTER_NEAREST: return SVGA3D_TEX_FILTER_NEAREST;
+   case PIPE_TEX_MIPFILTER_LINEAR:  return SVGA3D_TEX_FILTER_LINEAR;
+   default:
+      assert(0);
+      return SVGA3D_TEX_FILTER_NONE;
+   }
+}
+
+static void *
+svga_create_sampler_state(struct pipe_context *pipe,
+                          const struct pipe_sampler_state *sampler)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_sampler_state *cso = CALLOC_STRUCT( svga_sampler_state );
+   
+   cso->mipfilter = translate_mip_filter(sampler->min_mip_filter);
+   cso->magfilter = translate_img_filter( sampler->mag_img_filter );
+   cso->minfilter = translate_img_filter( sampler->min_img_filter );
+   cso->aniso_level = MAX2( (unsigned) sampler->max_anisotropy, 1 );
+   cso->lod_bias = sampler->lod_bias;
+   cso->addressu = translate_wrap_mode(sampler->wrap_s);
+   cso->addressv = translate_wrap_mode(sampler->wrap_t);
+   cso->addressw = translate_wrap_mode(sampler->wrap_r);
+   cso->normalized_coords = sampler->normalized_coords;
+   cso->compare_mode = sampler->compare_mode;
+   cso->compare_func = sampler->compare_func;
+
+   {
+      ubyte r = float_to_ubyte(sampler->border_color[0]);
+      ubyte g = float_to_ubyte(sampler->border_color[1]);
+      ubyte b = float_to_ubyte(sampler->border_color[2]);
+      ubyte a = float_to_ubyte(sampler->border_color[3]);
+
+      util_pack_color_ub( r, g, b, a,
+                          PIPE_FORMAT_B8G8R8A8_UNORM,
+                          &cso->bordercolor );
+   }
+
+   /* No SVGA3D support for:
+    *    - min/max LOD clamping
+    */
+   cso->min_lod = 0;
+   cso->view_min_lod = MAX2(sampler->min_lod, 0);
+   cso->view_max_lod = MAX2(sampler->max_lod, 0);
+
+   /* Use min_mipmap */
+   if (svga->debug.use_min_mipmap) {
+      if (cso->view_min_lod == cso->view_max_lod) {
+         cso->min_lod = cso->view_min_lod;
+         cso->view_min_lod = 0;
+         cso->view_max_lod = 1000; /* Just a high number */
+         cso->mipfilter = SVGA3D_TEX_FILTER_NONE;
+      }
+   }
+
+   SVGA_DBG(DEBUG_VIEWS, "min %u, view(min %u, max %u) lod, mipfilter %s\n",
+            cso->min_lod, cso->view_min_lod, cso->view_max_lod,
+            cso->mipfilter == SVGA3D_TEX_FILTER_NONE ? "SVGA3D_TEX_FILTER_NONE" : "SOMETHING");
+
+   return cso;
+}
+
+static void svga_bind_sampler_states(struct pipe_context *pipe,
+                                     unsigned num, void **sampler)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+
+   assert(num <= PIPE_MAX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num == svga->curr.num_samplers &&
+       !memcmp(svga->curr.sampler, sampler, num * sizeof(void *))) {
+      debug_printf("sampler noop\n");
+      return;
+   }
+
+   for (i = 0; i < num; i++)
+      svga->curr.sampler[i] = sampler[i];
+
+   for (i = num; i < svga->curr.num_samplers; i++)
+      svga->curr.sampler[i] = NULL;
+
+   svga->curr.num_samplers = num;
+   svga->dirty |= SVGA_NEW_SAMPLER;
+}
+
+static void svga_delete_sampler_state(struct pipe_context *pipe,
+                                      void *sampler)
+{
+   FREE(sampler);
+}
+
+
+static void svga_set_sampler_textures(struct pipe_context *pipe,
+                                      unsigned num,
+                                      struct pipe_texture **texture)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned flag_1d = 0;
+   unsigned flag_srgb = 0;
+   uint i;
+
+   assert(num <= PIPE_MAX_SAMPLERS);
+
+   /* Check for no-op */
+   if (num == svga->curr.num_textures &&
+       !memcmp(svga->curr.texture, texture, num * sizeof(struct pipe_texture *))) {
+      if (0) debug_printf("texture noop\n");
+      return;
+   }
+
+   for (i = 0; i < num; i++) {
+      pipe_texture_reference(&svga->curr.texture[i],
+                             texture[i]);
+
+      if (!texture[i])
+         continue;
+
+      if (texture[i]->format == PIPE_FORMAT_A8R8G8B8_SRGB)
+         flag_srgb |= 1 << i;
+
+      if (texture[i]->target == PIPE_TEXTURE_1D)
+         flag_1d |= 1 << i;
+   }
+
+   for (i = num; i < svga->curr.num_textures; i++)
+      pipe_texture_reference(&svga->curr.texture[i],
+                             NULL);
+
+   svga->curr.num_textures = num;
+   svga->dirty |= SVGA_NEW_TEXTURE_BINDING;
+
+   if (flag_srgb != svga->curr.tex_flags.flag_srgb ||
+       flag_1d != svga->curr.tex_flags.flag_1d) 
+   {
+      svga->dirty |= SVGA_NEW_TEXTURE_FLAGS;
+      svga->curr.tex_flags.flag_1d = flag_1d;
+      svga->curr.tex_flags.flag_srgb = flag_srgb;
+   }  
+}
+
+
+
+void svga_init_sampler_functions( struct svga_context *svga )
+{
+   svga->pipe.create_sampler_state = svga_create_sampler_state;
+   svga->pipe.bind_sampler_states = svga_bind_sampler_states;
+   svga->pipe.delete_sampler_state = svga_delete_sampler_state;
+   svga->pipe.set_sampler_textures = svga_set_sampler_textures;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vertex.c b/src/gallium/drivers/svga/svga_pipe_vertex.c
new file mode 100644
index 00000000000..28e2787e0d3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vertex.c
@@ -0,0 +1,115 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_winsys.h"
+
+#include "svga_hw_reg.h"
+
+
+static void svga_set_vertex_buffers(struct pipe_context *pipe,
+                                    unsigned count,
+                                    const struct pipe_vertex_buffer *buffers)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+   boolean any_user_buffer = FALSE;
+
+   /* Check for no change */
+   if (count == svga->curr.num_vertex_buffers &&
+       memcmp(svga->curr.vb, buffers, count * sizeof buffers[0]) == 0)
+      return;
+
+   /* Adjust refcounts */
+   for (i = 0; i < count; i++) {
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, buffers[i].buffer);
+      if (svga_buffer(buffers[i].buffer)->user)
+         any_user_buffer = TRUE;
+   }
+
+   for ( ; i < svga->curr.num_vertex_buffers; i++)
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+
+   /* Copy remaining data */
+   memcpy(svga->curr.vb, buffers, count * sizeof buffers[0]);
+   svga->curr.num_vertex_buffers = count;
+   svga->curr.any_user_vertex_buffers = any_user_buffer;
+
+   svga->dirty |= SVGA_NEW_VBUFFER;
+}
+
+static void svga_set_vertex_elements(struct pipe_context *pipe,
+                                     unsigned count,
+                                     const struct pipe_vertex_element *elements)
+{
+   struct svga_context *svga = svga_context(pipe);
+   unsigned i;
+
+   for (i = 0; i < count; i++)
+      svga->curr.ve[i] = elements[i];
+
+   svga->curr.num_vertex_elements = count;
+   svga->dirty |= SVGA_NEW_VELEMENT;
+}
+
+
+static void svga_set_edgeflags(struct pipe_context *pipe,
+                               const unsigned *bitfield)
+{
+   struct svga_context *svga = svga_context(pipe);
+
+   if (bitfield != NULL || svga->curr.edgeflags != NULL) {
+      svga->curr.edgeflags = bitfield;
+      svga->dirty |= SVGA_NEW_EDGEFLAGS;
+   }
+}
+
+
+void svga_cleanup_vertex_state( struct svga_context *svga )
+{
+   unsigned i;
+   
+   for (i = 0 ; i < svga->curr.num_vertex_buffers; i++)
+      pipe_buffer_reference(&svga->curr.vb[i].buffer, NULL);
+}
+
+
+void svga_init_vertex_functions( struct svga_context *svga )
+{
+   svga->pipe.set_vertex_buffers = svga_set_vertex_buffers;
+   svga->pipe.set_vertex_elements = svga_set_vertex_elements;
+   svga->pipe.set_edgeflags = svga_set_edgeflags;
+}
+
+
diff --git a/src/gallium/drivers/svga/svga_pipe_vs.c b/src/gallium/drivers/svga/svga_pipe_vs.c
new file mode 100644
index 00000000000..e5ffe668c35
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_pipe_vs.c
@@ -0,0 +1,189 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_text.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_tgsi.h"
+#include "svga_hw_reg.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+
+static const struct tgsi_token *substitute_vs( 
+   unsigned shader_id,
+   const struct tgsi_token *old_tokens )
+{
+#if 0
+   if (shader_id == 12) {
+   static struct tgsi_token tokens[300];
+
+   const char *text = 
+      "VERT1.1\n"
+      "DCL IN[0]\n"
+      "DCL IN[1]\n"
+      "DCL IN[2]\n"
+      "DCL OUT[0], POSITION\n"
+      "DCL TEMP[0..4]\n"
+      "IMM FLT32 {     1.0000,     1.0000,     1.0000,     1.0000 }\n"
+      "IMM FLT32 {     0.45,     1.0000,     1.0000,     1.0000 }\n"
+      "IMM FLT32 { 1.297863, 0.039245, 0.035993, 0.035976}\n"
+      "IMM FLT32 { -0.019398, 1.696131, -0.202151, -0.202050  }\n"
+      "IMM FLT32 { 0.051711, -0.348713, -0.979204, -0.978714  }\n"
+      "IMM FLT32 { 0.000000, 0.000003, 139.491577, 141.421356 }\n"
+      "DCL CONST[0..7]\n"
+      "DCL CONST[9..16]\n"
+      "  MOV TEMP[2], IMM[0]\n"
+
+      "  MOV TEMP[2].xyz, IN[2]\n"
+      "  MOV TEMP[2].xyz, IN[0]\n"
+      "  MOV TEMP[2].xyz, IN[1]\n"
+
+      "  MUL TEMP[1], IMM[3], TEMP[2].yyyy\n"
+      "  MAD TEMP[3], IMM[2],  TEMP[2].xxxx, TEMP[1]\n"
+      "  MAD TEMP[1], IMM[4], TEMP[2].zzzz, TEMP[3]\n"
+      "  MAD TEMP[4], IMM[5], TEMP[2].wwww, TEMP[1]\n"
+
+      "  MOV OUT[0], TEMP[4]\n"
+      "  END\n";
+
+   if (!tgsi_text_translate( text,
+                             tokens,
+                             Elements(tokens) ))
+   {
+      assert(0);
+      return NULL;
+   }
+
+   return tokens;
+   }
+#endif
+
+   return old_tokens;
+}
+
+
+/***********************************************************************
+ * Vertex shaders 
+ */
+
+static void *
+svga_create_vs_state(struct pipe_context *pipe,
+                     const struct pipe_shader_state *templ)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_screen *svgascreen = svga_screen(pipe->screen);
+   struct svga_vertex_shader *vs = CALLOC_STRUCT(svga_vertex_shader);
+   if (!vs)
+      return NULL;
+
+   /* substitute a debug shader?
+    */
+   vs->base.tokens = tgsi_dup_tokens(substitute_vs(svga->debug.shader_id,
+                                                   templ->tokens));
+
+
+   /* Collect basic info that we'll need later:
+    */
+   tgsi_scan_shader(vs->base.tokens, &vs->base.info);
+
+   {
+      /* Need to do construct a new template in case we substitued a
+       * debug shader.
+       */
+      struct pipe_shader_state tmp2 = *templ;
+      tmp2.tokens = vs->base.tokens;
+      vs->draw_shader = draw_create_vertex_shader(svga->swtnl.draw, &tmp2);
+   }
+
+   vs->base.id = svga->debug.shader_id++;
+   vs->base.use_sm30 = svgascreen->use_vs30;
+
+   if (SVGA_DEBUG & DEBUG_TGSI || 0) {
+      debug_printf("%s id: %u, inputs: %u, outputs: %u\n",
+                   __FUNCTION__, vs->base.id,
+                   vs->base.info.num_inputs, vs->base.info.num_outputs);
+   }
+
+   return vs;
+}
+
+static void svga_bind_vs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+   struct svga_context *svga = svga_context(pipe);
+
+   svga->curr.vs = vs;
+   svga->dirty |= SVGA_NEW_VS;
+}
+
+
+static void svga_delete_vs_state(struct pipe_context *pipe, void *shader)
+{
+   struct svga_context *svga = svga_context(pipe);
+   struct svga_vertex_shader *vs = (struct svga_vertex_shader *)shader;
+   struct svga_shader_result *result, *tmp;
+   enum pipe_error ret;
+
+   svga_hwtnl_flush_retry( svga );
+
+   draw_delete_vertex_shader(svga->swtnl.draw, vs->draw_shader);
+   
+   for (result = vs->base.results; result; result = tmp ) {
+      tmp = result->next;
+
+      ret = SVGA3D_DestroyShader(svga->swc, 
+                                 result->id,
+                                 SVGA3D_SHADERTYPE_VS );
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_DestroyShader(svga->swc, 
+                                    result->id,
+                                    SVGA3D_SHADERTYPE_VS );
+         assert(ret == PIPE_OK);
+      }
+
+      svga_destroy_shader_result( result );
+   }
+
+   FREE((void *)vs->base.tokens);
+   FREE(vs);
+}
+
+
+void svga_init_vs_functions( struct svga_context *svga )
+{
+   svga->pipe.create_vs_state = svga_create_vs_state;
+   svga->pipe.bind_vs_state = svga_bind_vs_state;
+   svga->pipe.delete_vs_state = svga_delete_vs_state;
+}
+
diff --git a/src/gallium/drivers/svga/svga_screen.c b/src/gallium/drivers/svga/svga_screen.c
new file mode 100644
index 00000000000..3afcaffff55
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.c
@@ -0,0 +1,435 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+#include "pipe/p_inlines.h"
+#include "util/u_string.h"
+#include "util/u_math.h"
+
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+#ifdef DEBUG
+int SVGA_DEBUG = 0;
+
+static const struct debug_named_value svga_debug_flags[] = {
+   { "dma",      DEBUG_DMA },
+   { "tgsi",     DEBUG_TGSI },
+   { "pipe",     DEBUG_PIPE },
+   { "state",    DEBUG_STATE },
+   { "screen",   DEBUG_SCREEN },
+   { "tex",      DEBUG_TEX },
+   { "swtnl",    DEBUG_SWTNL },
+   { "const",    DEBUG_CONSTS },
+   { "viewport", DEBUG_VIEWPORT },
+   { "views",    DEBUG_VIEWS },
+   { "perf",     DEBUG_PERF },
+   { "flush",    DEBUG_FLUSH },
+   { "sync",     DEBUG_SYNC },
+   {NULL, 0}
+};
+#endif
+
+static const char *
+svga_get_vendor( struct pipe_screen *pscreen )
+{
+   return "VMware, Inc.";
+}
+
+
+static const char *
+svga_get_name( struct pipe_screen *pscreen )
+{
+#ifdef DEBUG
+   /* Only return internal details in the DEBUG version:
+    */
+   return "SVGA3D; build: DEBUG; mutex: " PIPE_ATOMIC;
+#else
+   return "SVGA3D; build: RELEASE; ";
+#endif
+}
+
+
+
+
+static float
+svga_get_paramf(struct pipe_screen *screen, int param)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   SVGA3dDevCapResult result;
+
+   switch (param) {
+   case PIPE_CAP_MAX_LINE_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_LINE_WIDTH_AA:
+      return 7.0;
+
+   case PIPE_CAP_MAX_POINT_WIDTH:
+      /* fall-through */
+   case PIPE_CAP_MAX_POINT_WIDTH_AA:
+      /* Keep this to a reasonable size to avoid failures in
+       * conform/pntaa.c:
+       */
+      return 80.0;
+
+   case PIPE_CAP_MAX_TEXTURE_ANISOTROPY:
+      return 4.0;
+
+   case PIPE_CAP_MAX_TEXTURE_LOD_BIAS:
+      return 16.0;
+
+   case PIPE_CAP_MAX_TEXTURE_IMAGE_UNITS:
+      return 16;
+   case PIPE_CAP_NPOT_TEXTURES:
+      return 1;
+   case PIPE_CAP_TWO_SIDED_STENCIL:
+      return 1;
+   case PIPE_CAP_GLSL:
+      return svgascreen->use_ps30 && svgascreen->use_vs30;
+   case PIPE_CAP_ANISOTROPIC_FILTER:
+      return 1;
+   case PIPE_CAP_POINT_SPRITE:
+      return 1;
+   case PIPE_CAP_MAX_RENDER_TARGETS:
+      if(!sws->get_cap(sws, SVGA3D_DEVCAP_MAX_RENDER_TARGETS, &result))
+         return 1;
+      if(!result.u)
+         return 1;
+      return MIN2(result.u, PIPE_MAX_COLOR_BUFS);
+   case PIPE_CAP_OCCLUSION_QUERY:
+      return 1;
+   case PIPE_CAP_TEXTURE_SHADOW_MAP:
+      return 1;
+   case PIPE_CAP_MAX_TEXTURE_2D_LEVELS:
+      return SVGA_MAX_TEXTURE_LEVELS;
+   case PIPE_CAP_MAX_TEXTURE_3D_LEVELS:
+      return 8;  /* max 128x128x128 */
+   case PIPE_CAP_MAX_TEXTURE_CUBE_LEVELS:
+      return SVGA_MAX_TEXTURE_LEVELS;
+
+   case PIPE_CAP_TEXTURE_MIRROR_REPEAT: /* req. for GL 1.4 */
+      return 1;
+
+   case PIPE_CAP_BLEND_EQUATION_SEPARATE: /* req. for GL 1.5 */
+      return 1;
+
+   default:
+      return 0;
+   }
+}
+
+
+/* This is a fairly pointless interface
+ */
+static int
+svga_get_param(struct pipe_screen *screen, int param)
+{
+   return (int) svga_get_paramf( screen, param );
+}
+
+
+static INLINE SVGA3dDevCapIndex
+svga_translate_format_cap(enum pipe_format format)
+{
+   switch(format) {
+   
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A8R8G8B8;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_X8R8G8B8;
+
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_R5G6B5;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A1R5G5B5;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_A4R4G4B4;
+
+   case PIPE_FORMAT_Z16_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D16;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D24S8;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_Z_D24X8;
+
+   case PIPE_FORMAT_A8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_ALPHA8;
+   case PIPE_FORMAT_L8_UNORM:
+      return SVGA3D_DEVCAP_SURFACEFMT_LUMINANCE8;
+
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT1;
+   case PIPE_FORMAT_DXT3_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT3;
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_DEVCAP_SURFACEFMT_DXT5;
+
+   default:
+      return SVGA3D_DEVCAP_MAX;
+   }
+}
+
+
+static boolean
+svga_is_format_supported( struct pipe_screen *screen,
+                          enum pipe_format format, 
+                          enum pipe_texture_target target,
+                          unsigned tex_usage, 
+                          unsigned geom_flags )
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   SVGA3dDevCapIndex index;
+   SVGA3dDevCapResult result;
+   
+   assert(tex_usage);
+
+   /* Override host capabilities */
+   if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) {
+      switch(format) { 
+
+      /* Often unsupported/problematic. This means we end up with the same
+       * visuals for all virtual hardware implementations.
+       */
+      case PIPE_FORMAT_A4R4G4B4_UNORM:
+      case PIPE_FORMAT_A1R5G5B5_UNORM:
+         return FALSE;
+         
+      /* Simulate ability to render into compressed textures */
+      case PIPE_FORMAT_DXT1_RGB:
+      case PIPE_FORMAT_DXT1_RGBA:
+      case PIPE_FORMAT_DXT3_RGBA:
+      case PIPE_FORMAT_DXT5_RGBA:
+         return TRUE;
+
+      default:
+         break;
+      }
+   }
+   
+   /* Try to query the host */
+   index = svga_translate_format_cap(format);
+   if( index < SVGA3D_DEVCAP_MAX && 
+       sws->get_cap(sws, index, &result) )
+   {
+      SVGA3dSurfaceFormatCaps mask;
+      
+      mask.value = 0;
+      if (tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET)
+         mask.offscreenRenderTarget = 1;
+      if (tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+         mask.zStencil = 1;
+      if (tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+         mask.texture = 1;
+
+      if ((result.u & mask.value) == mask.value)
+         return TRUE;
+      else
+         return FALSE;
+   }
+
+   /* Use our translate functions directly rather than relying on a
+    * duplicated list of supported formats which is prone to getting
+    * out of sync:
+    */
+   if(tex_usage & (PIPE_TEXTURE_USAGE_RENDER_TARGET | PIPE_TEXTURE_USAGE_DEPTH_STENCIL))
+      return svga_translate_format_render(format) != SVGA3D_FORMAT_INVALID;
+   else
+      return svga_translate_format(format) != SVGA3D_FORMAT_INVALID;
+}
+
+
+static void
+svga_fence_reference(struct pipe_screen *screen,
+                     struct pipe_fence_handle **ptr,
+                     struct pipe_fence_handle *fence)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   sws->fence_reference(sws, ptr, fence);
+}
+
+
+static int
+svga_fence_signalled(struct pipe_screen *screen,
+                     struct pipe_fence_handle *fence,
+                     unsigned flag)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   return sws->fence_signalled(sws, fence, flag);
+}
+
+
+static int
+svga_fence_finish(struct pipe_screen *screen,
+                  struct pipe_fence_handle *fence,
+                  unsigned flag)
+{
+   struct svga_winsys_screen *sws = svga_screen(screen)->sws;
+   return sws->fence_finish(sws, fence, flag);
+}
+
+
+static void
+svga_destroy_screen( struct pipe_screen *screen )
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   
+   svga_screen_cache_cleanup(svgascreen);
+
+   pipe_mutex_destroy(svgascreen->swc_mutex);
+   pipe_mutex_destroy(svgascreen->tex_mutex);
+
+   svgascreen->swc->destroy(svgascreen->swc);
+   
+   svgascreen->sws->destroy(svgascreen->sws);
+   
+   FREE(svgascreen);
+}
+
+
+/**
+ * Create a new svga_screen object
+ */
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws)
+{
+   struct svga_screen *svgascreen;
+   struct pipe_screen *screen;
+   SVGA3dDevCapResult result;
+
+#ifdef DEBUG
+   SVGA_DEBUG = debug_get_flags_option("SVGA_DEBUG", svga_debug_flags, 0 );
+#endif
+
+   svgascreen = CALLOC_STRUCT(svga_screen);
+   if (!svgascreen)
+      goto error1;
+
+   svgascreen->debug.force_level_surface_view =
+      debug_get_bool_option("SVGA_FORCE_LEVEL_SURFACE_VIEW", FALSE);
+   svgascreen->debug.force_surface_view =
+      debug_get_bool_option("SVGA_FORCE_SURFACE_VIEW", FALSE);
+   svgascreen->debug.force_sampler_view =
+      debug_get_bool_option("SVGA_FORCE_SAMPLER_VIEW", FALSE);
+   svgascreen->debug.no_surface_view =
+      debug_get_bool_option("SVGA_NO_SURFACE_VIEW", FALSE);
+   svgascreen->debug.no_sampler_view =
+      debug_get_bool_option("SVGA_NO_SAMPLER_VIEW", FALSE);
+
+   screen = &svgascreen->screen;
+
+   screen->destroy = svga_destroy_screen;
+   screen->get_name = svga_get_name;
+   screen->get_vendor = svga_get_vendor;
+   screen->get_param = svga_get_param;
+   screen->get_paramf = svga_get_paramf;
+   screen->is_format_supported = svga_is_format_supported;
+   screen->fence_reference = svga_fence_reference;
+   screen->fence_signalled = svga_fence_signalled;
+   screen->fence_finish = svga_fence_finish;
+   svgascreen->sws = sws;
+
+   svga_screen_init_texture_functions(screen);
+   svga_screen_init_buffer_functions(screen);
+
+   svgascreen->use_ps30 =
+      sws->get_cap(sws, SVGA3D_DEVCAP_FRAGMENT_SHADER_VERSION, &result) &&
+      result.u >= SVGA3DPSVERSION_30 ? TRUE : FALSE;
+
+   svgascreen->use_vs30 =
+      sws->get_cap(sws, SVGA3D_DEVCAP_VERTEX_SHADER_VERSION, &result) &&
+      result.u >= SVGA3DVSVERSION_30 ? TRUE : FALSE;
+
+#if 1
+   /* Shader model 2.0 is unsupported at the moment. */
+   if(!svgascreen->use_ps30 || !svgascreen->use_vs30)
+      goto error2;
+#else
+   if(debug_get_bool_option("SVGA_NO_SM30", FALSE))
+      svgascreen->use_vs30 = svgascreen->use_ps30 = FALSE;
+#endif
+
+   svgascreen->swc = sws->context_create(sws);
+   if(!svgascreen->swc)
+      goto error2;
+
+   pipe_mutex_init(svgascreen->tex_mutex);
+   pipe_mutex_init(svgascreen->swc_mutex);
+
+   LIST_INITHEAD(&svgascreen->cached_buffers);
+   
+   svga_screen_cache_init(svgascreen);
+
+   return screen;
+error2:
+   FREE(svgascreen);
+error1:
+   return NULL;
+}
+
+void svga_screen_flush( struct svga_screen *svgascreen, 
+                        struct pipe_fence_handle **pfence )
+{
+   struct pipe_fence_handle *fence = NULL;
+
+   SVGA_DBG(DEBUG_PERF, "%s\n", __FUNCTION__);
+   
+   pipe_mutex_lock(svgascreen->swc_mutex);
+   svgascreen->swc->flush(svgascreen->swc, &fence);
+   pipe_mutex_unlock(svgascreen->swc_mutex);
+   
+   svga_screen_cache_flush(svgascreen, fence);
+   
+   if(pfence)
+      *pfence = fence;
+   else
+      svgascreen->sws->fence_reference(svgascreen->sws, &fence, NULL);
+}
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen)
+{
+   return svga_screen(screen)->sws;
+}
+
+#ifdef DEBUG
+struct svga_screen *
+svga_screen(struct pipe_screen *screen)
+{
+   assert(screen);
+   assert(screen->destroy == svga_destroy_screen);
+   return (struct svga_screen *)screen;
+}
+#endif
diff --git a/src/gallium/drivers/svga/svga_screen.h b/src/gallium/drivers/svga/svga_screen.h
new file mode 100644
index 00000000000..b94ca7fc1ca
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_H
+#define SVGA_SCREEN_H
+
+
+#include "pipe/p_screen.h"
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_context;
+struct SVGACmdMemory;
+
+#define SVGA_COMBINE_USERBUFFERS 1
+
+/**
+ * Subclass of pipe_screen
+ */
+struct svga_screen
+{
+   struct pipe_screen screen;
+   struct svga_winsys_screen *sws;
+
+   unsigned use_ps30;
+   unsigned use_vs30;
+   
+   struct {
+      boolean force_level_surface_view;
+      boolean force_surface_view;
+      boolean no_surface_view;
+      boolean force_sampler_view;
+      boolean no_sampler_view;
+   } debug;
+
+   /* The screen needs its own context */
+   struct svga_winsys_context *swc;
+   struct SVGACmdMemory *fifo;
+
+   unsigned texture_timestamp;
+   pipe_mutex tex_mutex; 
+   pipe_mutex swc_mutex; /* Protects the use of swc and dirty_buffers */
+   
+   /** 
+    * List of buffers with cached GMR. Ordered from the most recently used to
+    * the least recently used 
+    */
+   struct list_head cached_buffers;
+   
+   struct svga_host_surface_cache cache;
+};
+
+#ifndef DEBUG
+/** cast wrapper */
+static INLINE struct svga_screen *
+svga_screen(struct pipe_screen *pscreen)
+{
+   return (struct svga_screen *) pscreen;
+}
+#else
+struct svga_screen *
+svga_screen(struct pipe_screen *screen);
+#endif
+
+void svga_screen_flush( struct svga_screen *svga_screen, 
+                        struct pipe_fence_handle **pfence );
+
+#endif /* SVGA_SCREEN_H */
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.c b/src/gallium/drivers/svga/svga_screen_buffer.c
new file mode 100644
index 00000000000..3b7811734ed
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.c
@@ -0,0 +1,820 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+
+
+/**
+ * Vertex and index buffers have to be treated slightly differently from 
+ * regular guest memory regions because the SVGA device sees them as 
+ * surfaces, and the state tracker can create/destroy without the pipe 
+ * driver, therefore we must do the uploads from the vws.
+ */
+static INLINE boolean
+svga_buffer_needs_hw_storage(unsigned usage)
+{
+   return usage & (PIPE_BUFFER_USAGE_VERTEX | PIPE_BUFFER_USAGE_INDEX);
+}
+
+
+static INLINE enum pipe_error
+svga_buffer_create_host_surface(struct svga_screen *ss,
+                                struct svga_buffer *sbuf)
+{
+   if(!sbuf->handle) {
+      sbuf->key.flags = 0;
+      
+      sbuf->key.format = SVGA3D_BUFFER;
+      if(sbuf->base.usage & PIPE_BUFFER_USAGE_VERTEX)
+         sbuf->key.flags |= SVGA3D_SURFACE_HINT_VERTEXBUFFER;
+      if(sbuf->base.usage & PIPE_BUFFER_USAGE_INDEX)
+         sbuf->key.flags |= SVGA3D_SURFACE_HINT_INDEXBUFFER;
+      
+      sbuf->key.size.width = sbuf->base.size;
+      sbuf->key.size.height = 1;
+      sbuf->key.size.depth = 1;
+      
+      sbuf->key.numFaces = 1;
+      sbuf->key.numMipLevels = 1;
+      
+      sbuf->handle = svga_screen_surface_create(ss, &sbuf->key);
+      if(!sbuf->handle)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+   
+      /* Always set the discard flag on the first time the buffer is written
+       * as svga_screen_surface_create might have passed a recycled host
+       * buffer.
+       */
+      sbuf->hw.flags.discard = TRUE;
+
+      SVGA_DBG(DEBUG_DMA, "   grab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+   }
+   
+   return PIPE_OK;
+}   
+
+
+static INLINE void
+svga_buffer_destroy_host_surface(struct svga_screen *ss,
+                                 struct svga_buffer *sbuf)
+{
+   if(sbuf->handle) {
+      SVGA_DBG(DEBUG_DMA, " ungrab sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+      svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+   }
+}   
+
+
+static INLINE void
+svga_buffer_destroy_hw_storage(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   struct svga_winsys_screen *sws = ss->sws;
+
+   assert(!sbuf->map.count);
+   assert(sbuf->hw.buf);
+   if(sbuf->hw.buf) {
+      sws->buffer_destroy(sws, sbuf->hw.buf);
+      sbuf->hw.buf = NULL;
+      assert(sbuf->head.prev && sbuf->head.next);
+      LIST_DEL(&sbuf->head);
+#ifdef DEBUG
+      sbuf->head.next = sbuf->head.prev = NULL; 
+#endif
+   }
+}
+
+static INLINE enum pipe_error
+svga_buffer_backup(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   if (sbuf->hw.buf && sbuf->hw.num_ranges) {
+      void *src;
+
+      if (!sbuf->swbuf)
+	 sbuf->swbuf = align_malloc(sbuf->base.size, sbuf->base.alignment);
+      if (!sbuf->swbuf)
+	 return PIPE_ERROR_OUT_OF_MEMORY;
+
+      src = ss->sws->buffer_map(ss->sws, sbuf->hw.buf,
+				PIPE_BUFFER_USAGE_CPU_READ);
+      if (!src)
+	 return PIPE_ERROR;
+
+      memcpy(sbuf->swbuf, src, sbuf->base.size);
+      ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+   }
+
+   return PIPE_OK;
+}
+
+/**
+ * Try to make GMR space available by freeing the hardware storage of 
+ * unmapped
+ */
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss)
+{
+   struct list_head *curr;
+   struct svga_buffer *sbuf;
+   enum pipe_error ret = PIPE_OK;
+
+   curr = ss->cached_buffers.prev;
+   
+   /* free the least recently used buffer's hw storage which is not mapped */
+   do {
+      if(curr == &ss->cached_buffers)
+         return FALSE;
+
+      sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+      
+      curr = curr->prev;
+      if (sbuf->map.count == 0)
+	 ret = svga_buffer_backup(ss, sbuf);
+
+   } while(sbuf->map.count != 0 || ret != PIPE_OK);
+   
+   svga_buffer_destroy_hw_storage(ss, sbuf);
+   
+   return TRUE;
+}
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create( struct svga_screen *ss,
+                           unsigned alignment, 
+                           unsigned usage,
+                           unsigned size )
+{
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_winsys_buffer *buf;
+   
+   /* Just try */
+   buf = sws->buffer_create(sws, alignment, usage, size);
+   if(!buf) {
+
+      SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "flushing screen to find %d bytes GMR\n", 
+               size); 
+      
+      /* Try flushing all pending DMAs */
+      svga_screen_flush(ss, NULL);
+      buf = sws->buffer_create(sws, alignment, usage, size);
+
+      SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "evicting buffers to find %d bytes GMR\n", 
+               size);
+
+      /* Try evicing all buffer storage */
+      while(!buf && svga_buffer_free_cached_hw_storage(ss))
+         buf = sws->buffer_create(sws, alignment, usage, size);
+   }
+   
+   return buf;
+}
+
+
+/**
+ * Allocate DMA'ble storage for the buffer. 
+ * 
+ * Called before mapping a buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_create_hw_storage(struct svga_screen *ss,
+                              struct svga_buffer *sbuf)
+{
+   if(!sbuf->hw.buf) {
+      unsigned alignment = sbuf->base.alignment;
+      unsigned usage = 0;
+      unsigned size = sbuf->base.size;
+      
+      sbuf->hw.buf = svga_winsys_buffer_create(ss, alignment, usage, size);
+      if(!sbuf->hw.buf)
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      
+      assert(!sbuf->needs_flush);
+      assert(!sbuf->head.prev && !sbuf->head.next);
+      LIST_ADD(&sbuf->head, &ss->cached_buffers);
+   }
+   
+   return PIPE_OK;
+}
+
+
+/**
+ * Variant of SVGA3D_BufferDMA which leaves the copy box temporarily in blank.
+ */
+static enum pipe_error
+svga_buffer_upload_command(struct svga_context *svga,
+                           struct svga_buffer *sbuf)
+{
+   struct svga_winsys_context *swc = svga->swc;
+   struct svga_winsys_buffer *guest = sbuf->hw.buf;
+   struct svga_winsys_surface *host = sbuf->handle;
+   SVGA3dTransferType transfer = SVGA3D_WRITE_HOST_VRAM;
+   SVGA3dSurfaceDMAFlags flags = sbuf->hw.flags;
+   SVGA3dCmdSurfaceDMA *cmd;
+   uint32 numBoxes = sbuf->hw.num_ranges;
+   SVGA3dCopyBox *boxes;
+   SVGA3dCmdSurfaceDMASuffix *pSuffix;
+   unsigned region_flags;
+   unsigned surface_flags;
+   struct pipe_buffer *dummy;
+
+   if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_READ;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+   }
+   else if(transfer == SVGA3D_READ_HOST_VRAM) {
+      region_flags = PIPE_BUFFER_USAGE_GPU_WRITE;
+      surface_flags = PIPE_BUFFER_USAGE_GPU_READ;
+   }
+   else {
+      assert(0);
+      return PIPE_ERROR_BAD_INPUT;
+   }
+
+   assert(numBoxes);
+
+   cmd = SVGA3D_FIFOReserve(swc,
+                            SVGA_3D_CMD_SURFACE_DMA,
+                            sizeof *cmd + numBoxes * sizeof *boxes + sizeof *pSuffix,
+                            2);
+   if(!cmd)
+      return PIPE_ERROR_OUT_OF_MEMORY;
+
+   swc->region_relocation(swc, &cmd->guest.ptr, guest, 0, region_flags);
+   cmd->guest.pitch = 0;
+
+   swc->surface_relocation(swc, &cmd->host.sid, host, surface_flags);
+   cmd->host.face = 0;
+   cmd->host.mipmap = 0;
+
+   cmd->transfer = transfer;
+
+   sbuf->hw.boxes = (SVGA3dCopyBox *)&cmd[1];
+   sbuf->hw.svga = svga;
+
+   /* Increment reference count */
+   dummy = NULL;
+   pipe_buffer_reference(&dummy, &sbuf->base);
+
+   pSuffix = (SVGA3dCmdSurfaceDMASuffix *)((uint8_t*)cmd + sizeof *cmd + numBoxes * sizeof *boxes);
+   pSuffix->suffixSize = sizeof *pSuffix;
+   pSuffix->maximumOffset = sbuf->base.size;
+   pSuffix->flags = flags;
+
+   swc->commit(swc);
+
+   return PIPE_OK;
+}
+
+
+/**
+ * Patch up the upload DMA command reserved by svga_buffer_upload_command
+ * with the final ranges.
+ */
+static void
+svga_buffer_upload_flush(struct svga_context *svga,
+                         struct svga_buffer *sbuf)
+{
+   struct svga_screen *ss = svga_screen(svga->pipe.screen);
+   SVGA3dCopyBox *boxes;
+   unsigned i;
+
+   assert(sbuf->handle); 
+   assert(sbuf->hw.buf);
+   assert(sbuf->hw.num_ranges);
+   assert(sbuf->hw.svga == svga);
+   assert(sbuf->hw.boxes);
+   
+   /*
+    * Patch the DMA command with the final copy box.
+    */
+
+   SVGA_DBG(DEBUG_DMA, "dma to sid %p\n", sbuf->handle);
+
+   boxes = sbuf->hw.boxes;
+   for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+      SVGA_DBG(DEBUG_DMA, "  bytes %u - %u\n",
+               sbuf->hw.ranges[i].start, sbuf->hw.ranges[i].end);
+
+      boxes[i].x = sbuf->hw.ranges[i].start;
+      boxes[i].y = 0;
+      boxes[i].z = 0;
+      boxes[i].w = sbuf->hw.ranges[i].end - sbuf->hw.ranges[i].start;
+      boxes[i].h = 1;
+      boxes[i].d = 1;
+      boxes[i].srcx = sbuf->hw.ranges[i].start;
+      boxes[i].srcy = 0;
+      boxes[i].srcz = 0;
+   }
+
+   sbuf->hw.num_ranges = 0;
+   memset(&sbuf->hw.flags, 0, sizeof sbuf->hw.flags);
+
+   assert(sbuf->head.prev && sbuf->head.next);
+   LIST_DEL(&sbuf->head);
+   sbuf->needs_flush = FALSE;
+   /* XXX: do we care about cached_buffers any more ?*/
+   LIST_ADD(&sbuf->head, &ss->cached_buffers);
+
+   sbuf->hw.svga = NULL;
+   sbuf->hw.boxes = NULL;
+
+   /* Decrement reference count */
+   pipe_buffer_reference((struct pipe_buffer **)&sbuf, NULL);
+}
+
+
+/**
+ * Queue a DMA upload of a range of this buffer to the host.
+ *
+ * This function only notes the range down. It doesn't actually emit a DMA
+ * upload command. That only happens when a context tries to refer to this
+ * buffer, and the DMA upload command is added to that context's command buffer.
+ * 
+ * We try to lump as many contiguous DMA transfers together as possible.
+ */
+static void
+svga_buffer_upload_queue(struct svga_buffer *sbuf,
+                         unsigned start,
+                         unsigned end)
+{
+   unsigned i;
+
+   assert(sbuf->hw.buf);
+   assert(end > start);
+   
+   /*
+    * Try to grow one of the ranges.
+    *
+    * Note that it is not this function task to care about overlapping ranges,
+    * as the GMR was already given so it is too late to do anything. Situations
+    * where overlapping ranges may pose a problem should be detected via
+    * pipe_context::is_buffer_referenced and the context that refers to the
+    * buffer should be flushed.
+    */
+
+   for(i = 0; i < sbuf->hw.num_ranges; ++i) {
+      if(start <= sbuf->hw.ranges[i].end && sbuf->hw.ranges[i].start <= end) {
+         sbuf->hw.ranges[i].start = MIN2(sbuf->hw.ranges[i].start, start);
+         sbuf->hw.ranges[i].end   = MAX2(sbuf->hw.ranges[i].end,    end);
+         return;
+      }
+   }
+
+   /*
+    * We cannot add a new range to an existing DMA command, so patch-up the
+    * pending DMA upload and start clean.
+    */
+
+   if(sbuf->needs_flush)
+      svga_buffer_upload_flush(sbuf->hw.svga, sbuf);
+
+   assert(!sbuf->needs_flush);
+   assert(!sbuf->hw.svga);
+   assert(!sbuf->hw.boxes);
+
+   /*
+    * Add a new range.
+    */
+
+   sbuf->hw.ranges[sbuf->hw.num_ranges].start = start;
+   sbuf->hw.ranges[sbuf->hw.num_ranges].end = end;
+   ++sbuf->hw.num_ranges;
+}
+
+
+static void *
+svga_buffer_map_range( struct pipe_screen *screen,
+                       struct pipe_buffer *buf,
+                       unsigned offset, unsigned length,
+                       unsigned usage )
+{
+   struct svga_screen *ss = svga_screen(screen); 
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   void *map;
+
+   if(sbuf->swbuf) {
+      /* User/malloc buffer */
+      map = sbuf->swbuf;
+   }
+   else {
+      if(!sbuf->hw.buf) {
+         struct svga_winsys_surface *handle = sbuf->handle;
+
+         if(svga_buffer_create_hw_storage(ss, sbuf) != PIPE_OK)
+            return NULL;
+         
+         /* Populate the hardware storage if the host surface pre-existed */
+         if((usage & PIPE_BUFFER_USAGE_CPU_READ) && handle) {
+            SVGA3dSurfaceDMAFlags flags;
+            enum pipe_error ret;
+            struct pipe_fence_handle *fence = NULL;
+            
+            SVGA_DBG(DEBUG_DMA|DEBUG_PERF, "dma from sid %p, bytes %u - %u\n", 
+                     sbuf->handle, 0, sbuf->base.size);
+
+            memset(&flags, 0, sizeof flags);
+            
+            ret = SVGA3D_BufferDMA(ss->swc,
+                                   sbuf->hw.buf,
+                                   sbuf->handle,
+                                   SVGA3D_READ_HOST_VRAM,
+                                   sbuf->base.size,
+                                   0,
+                                   flags);
+            if(ret != PIPE_OK) {
+               ss->swc->flush(ss->swc, NULL);
+               
+               ret = SVGA3D_BufferDMA(ss->swc,
+                                      sbuf->hw.buf,
+                                      sbuf->handle,
+                                      SVGA3D_READ_HOST_VRAM,
+                                      sbuf->base.size,
+                                      0,
+                                      flags);
+               assert(ret == PIPE_OK);
+            }
+            
+            ss->swc->flush(ss->swc, &fence);
+            sws->fence_finish(sws, fence, 0);
+            sws->fence_reference(sws, &fence, NULL);
+         }
+      }
+      else {
+         if((usage & PIPE_BUFFER_USAGE_CPU_READ) && !sbuf->needs_flush) {
+            /* We already had the hardware storage but we would have to issue
+             * a download if we hadn't, so move the buffer to the begginning
+             * of the LRU list.
+             */
+            assert(sbuf->head.prev && sbuf->head.next);
+            LIST_DEL(&sbuf->head);
+            LIST_ADD(&sbuf->head, &ss->cached_buffers);
+         }
+      }
+         
+      map = sws->buffer_map(sws, sbuf->hw.buf, usage);
+   }
+
+   if(map) {
+      pipe_mutex_lock(ss->swc_mutex);
+
+      ++sbuf->map.count;
+
+      if (usage & PIPE_BUFFER_USAGE_CPU_WRITE) {
+         assert(sbuf->map.count <= 1);
+         sbuf->map.writing = TRUE;
+         if (usage & PIPE_BUFFER_USAGE_FLUSH_EXPLICIT)
+            sbuf->map.flush_explicit = TRUE;
+      }
+      
+      pipe_mutex_unlock(ss->swc_mutex);
+   }
+   
+   return map;
+}
+
+static void 
+svga_buffer_flush_mapped_range( struct pipe_screen *screen,
+                                struct pipe_buffer *buf,
+                                unsigned offset, unsigned length)
+{
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   struct svga_screen *ss = svga_screen(screen);
+   
+   pipe_mutex_lock(ss->swc_mutex);
+   assert(sbuf->map.writing);
+   if(sbuf->map.writing) {
+      assert(sbuf->map.flush_explicit);
+      if(sbuf->hw.buf)
+         svga_buffer_upload_queue(sbuf, offset, offset + length);
+   }
+   pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void 
+svga_buffer_unmap( struct pipe_screen *screen,
+                   struct pipe_buffer *buf)
+{
+   struct svga_screen *ss = svga_screen(screen); 
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_buffer *sbuf = svga_buffer( buf );
+   
+   pipe_mutex_lock(ss->swc_mutex);
+   
+   assert(sbuf->map.count);
+   if(sbuf->map.count)
+      --sbuf->map.count;
+
+   if(sbuf->hw.buf)
+      sws->buffer_unmap(sws, sbuf->hw.buf);
+
+   if(sbuf->map.writing) {
+      if(!sbuf->map.flush_explicit) {
+         /* No mapped range was flushed -- flush the whole buffer */
+         SVGA_DBG(DEBUG_DMA, "flushing the whole buffer\n");
+   
+         if(sbuf->hw.buf)
+            svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+      }
+      
+      sbuf->map.writing = FALSE;
+      sbuf->map.flush_explicit = FALSE;
+   }
+
+   pipe_mutex_unlock(ss->swc_mutex);
+}
+
+static void
+svga_buffer_destroy( struct pipe_buffer *buf )
+{
+   struct svga_screen *ss = svga_screen(buf->screen); 
+   struct svga_buffer *sbuf = svga_buffer( buf );
+
+   assert(!p_atomic_read(&buf->reference.count));
+   
+   assert(!sbuf->needs_flush);
+
+   if(sbuf->handle) {
+      SVGA_DBG(DEBUG_DMA, "release sid %p sz %d\n", sbuf->handle, sbuf->base.size);
+      svga_screen_surface_destroy(ss, &sbuf->key, &sbuf->handle);
+   }
+   
+   if(sbuf->hw.buf)
+      svga_buffer_destroy_hw_storage(ss, sbuf);
+   
+   if(sbuf->swbuf && !sbuf->user)
+      align_free(sbuf->swbuf);
+   
+   FREE(sbuf);
+}
+
+static struct pipe_buffer *
+svga_buffer_create(struct pipe_screen *screen,
+                   unsigned alignment,
+                   unsigned usage,
+                   unsigned size)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_buffer *sbuf;
+   
+   sbuf = CALLOC_STRUCT(svga_buffer);
+   if(!sbuf)
+      goto error1;
+      
+   sbuf->magic = SVGA_BUFFER_MAGIC;
+   
+   pipe_reference_init(&sbuf->base.reference, 1);
+   sbuf->base.screen = screen;
+   sbuf->base.alignment = alignment;
+   sbuf->base.usage = usage;
+   sbuf->base.size = size;
+
+   if(svga_buffer_needs_hw_storage(usage)) {
+      if(svga_buffer_create_host_surface(ss, sbuf) != PIPE_OK)
+         goto error2;
+   }
+   else {
+      if(alignment < sizeof(void*))
+         alignment = sizeof(void*);
+
+      usage |= PIPE_BUFFER_USAGE_CPU_READ_WRITE;
+      
+      sbuf->swbuf = align_malloc(size, alignment);
+      if(!sbuf->swbuf)
+         goto error2;
+   }
+      
+   return &sbuf->base; 
+
+error2:
+   FREE(sbuf);
+error1:
+   return NULL;
+}
+
+static struct pipe_buffer *
+svga_user_buffer_create(struct pipe_screen *screen,
+                        void *ptr,
+                        unsigned bytes)
+{
+   struct svga_buffer *sbuf;
+   
+   sbuf = CALLOC_STRUCT(svga_buffer);
+   if(!sbuf)
+      goto no_sbuf;
+      
+   sbuf->magic = SVGA_BUFFER_MAGIC;
+   
+   sbuf->swbuf = ptr;
+   sbuf->user = TRUE;
+   
+   pipe_reference_init(&sbuf->base.reference, 1);
+   sbuf->base.screen = screen;
+   sbuf->base.alignment = 1;
+   sbuf->base.usage = 0;
+   sbuf->base.size = bytes;
+   
+   return &sbuf->base; 
+
+no_sbuf:
+   return NULL;
+}
+
+   
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen)
+{
+   screen->buffer_create = svga_buffer_create;
+   screen->user_buffer_create = svga_user_buffer_create;
+   screen->buffer_map_range = svga_buffer_map_range;
+   screen->buffer_flush_mapped_range = svga_buffer_flush_mapped_range;
+   screen->buffer_unmap = svga_buffer_unmap;
+   screen->buffer_destroy = svga_buffer_destroy;
+}
+
+
+/** 
+ * Copy the contents of the user buffer / malloc buffer to a hardware buffer.
+ */
+static INLINE enum pipe_error
+svga_buffer_update_hw(struct svga_screen *ss, struct svga_buffer *sbuf)
+{
+   if(!sbuf->hw.buf) {
+      enum pipe_error ret;
+      void *map;
+      
+      assert(sbuf->swbuf);
+      if(!sbuf->swbuf)
+         return PIPE_ERROR;
+      
+      ret = svga_buffer_create_hw_storage(ss, sbuf);
+      assert(ret == PIPE_OK);
+      if(ret != PIPE_OK)
+         return ret;
+
+      pipe_mutex_lock(ss->swc_mutex);
+      map = ss->sws->buffer_map(ss->sws, sbuf->hw.buf, PIPE_BUFFER_USAGE_CPU_WRITE);
+      assert(map);
+      if(!map) {
+	 pipe_mutex_unlock(ss->swc_mutex);
+         return PIPE_ERROR_OUT_OF_MEMORY;
+      }
+
+      memcpy(map, sbuf->swbuf, sbuf->base.size);
+      ss->sws->buffer_unmap(ss->sws, sbuf->hw.buf);
+
+      /* This user/malloc buffer is now indistinguishable from a gpu buffer */
+      assert(!sbuf->map.count);
+      if(!sbuf->map.count) {
+         if(sbuf->user)
+            sbuf->user = FALSE;
+         else
+            align_free(sbuf->swbuf);
+         sbuf->swbuf = NULL;
+      }
+      
+      svga_buffer_upload_queue(sbuf, 0, sbuf->base.size);
+   }
+   
+   pipe_mutex_unlock(ss->swc_mutex);
+   return PIPE_OK;
+}
+
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+                   struct pipe_buffer *buf)
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_buffer *sbuf;
+   enum pipe_error ret;
+
+   if(!buf)
+      return NULL;
+
+   sbuf = svga_buffer(buf);
+   
+   assert(!sbuf->map.count);
+   
+   if(!sbuf->handle) {
+      ret = svga_buffer_create_host_surface(ss, sbuf);
+      if(ret != PIPE_OK)
+	 return NULL;
+
+      ret = svga_buffer_update_hw(ss, sbuf);
+      if(ret != PIPE_OK)
+	 return NULL;
+   }
+
+   if(!sbuf->needs_flush && sbuf->hw.num_ranges) {
+      /* Queue the buffer for flushing */
+      ret = svga_buffer_upload_command(svga, sbuf);
+      if(ret != PIPE_OK)
+         /* XXX: Should probably have a richer return value */
+         return NULL;
+
+      assert(sbuf->hw.svga == svga);
+
+      sbuf->needs_flush = TRUE;
+      assert(sbuf->head.prev && sbuf->head.next);
+      LIST_DEL(&sbuf->head);
+      LIST_ADDTAIL(&sbuf->head, &svga->dirty_buffers);
+   }
+
+   return sbuf->handle;
+}
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+				enum SVGA3dSurfaceFormat format,
+				struct svga_winsys_surface *srf)
+{
+   struct pipe_buffer *buf;
+   struct svga_buffer *sbuf;
+   struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+
+   buf = svga_buffer_create(screen, 0, SVGA_BUFFER_USAGE_WRAPPED, 0);
+   if (!buf)
+      return NULL;
+
+   sbuf = svga_buffer(buf);
+
+   /*
+    * We are not the creator of this surface and therefore we must not
+    * cache it for reuse. The caching code only caches SVGA3D_BUFFER surfaces
+    * so make sure this isn't one of those.
+    */
+
+   assert(format != SVGA3D_BUFFER);
+   sbuf->key.format = format;
+   sws->surface_reference(sws, &sbuf->handle, srf);
+
+   return buf;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer)
+{
+   struct svga_winsys_screen *sws = svga_winsys_screen(buffer->screen);
+   struct svga_winsys_surface *vsurf = NULL;
+
+   sws->surface_reference(sws, &vsurf, svga_buffer(buffer)->handle);
+   return vsurf;
+}
+
+void
+svga_context_flush_buffers(struct svga_context *svga)
+{
+   struct list_head *curr, *next;
+   struct svga_buffer *sbuf;
+
+   curr = svga->dirty_buffers.next;
+   next = curr->next;
+   while(curr != &svga->dirty_buffers) {
+      sbuf = LIST_ENTRY(struct svga_buffer, curr, head);
+
+      assert(p_atomic_read(&sbuf->base.reference.count) != 0);
+      assert(sbuf->needs_flush);
+      
+      svga_buffer_upload_flush(svga, sbuf);
+
+      curr = next; 
+      next = curr->next;
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_buffer.h b/src/gallium/drivers/svga/svga_screen_buffer.h
new file mode 100644
index 00000000000..5d7af5a7c50
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_buffer.h
@@ -0,0 +1,190 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_BUFFER_H
+#define SVGA_BUFFER_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "util/u_double_list.h"
+
+#include "svga_screen_cache.h"
+
+
+#define SVGA_BUFFER_MAGIC 0x344f9005
+
+/**
+ * Maximum number of discontiguous ranges
+ */
+#define SVGA_BUFFER_MAX_RANGES 32
+
+
+struct svga_screen;
+struct svga_context;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+
+
+struct svga_buffer_range
+{
+   unsigned start;
+   unsigned end;
+};
+
+
+/**
+ * Describe a
+ *
+ * This holds the information to emit a SVGA3dCmdSurfaceDMA.
+ */
+struct svga_buffer_upload
+{
+   /**
+    * Guest memory region.
+    */
+   struct svga_winsys_buffer *buf;
+
+   struct svga_buffer_range ranges[SVGA_BUFFER_MAX_RANGES];
+   unsigned num_ranges;
+
+   SVGA3dSurfaceDMAFlags flags;
+
+   /**
+    * Pointer to the DMA copy box *inside* the command buffer.
+    */
+   SVGA3dCopyBox *boxes;
+
+   /**
+    * Context that has the pending DMA to this buffer.
+    */
+   struct svga_context *svga;
+};
+
+
+/**
+ * SVGA pipe buffer.
+ */
+struct svga_buffer 
+{
+   struct pipe_buffer base;
+
+   /** 
+    * Marker to detect bad casts in runtime.
+    */ 
+   uint32_t magic;
+
+   /**
+    * Regular (non DMA'able) memory.
+    * 
+    * Used for user buffers or for buffers which we know before hand that can
+    * never be used by the virtual hardware directly, such as constant buffers.
+    */
+   void *swbuf;
+   
+   /** 
+    * Whether swbuf was created by the user or not.
+    */
+   boolean user;
+   
+   /**
+    * DMA'ble memory.
+    * 
+    * A piece of GMR memory. It is created when mapping the buffer, and will be
+    * used to upload/download vertex data from the host.
+    */
+   struct svga_buffer_upload hw;
+
+   /**
+    * Creation key for the host surface handle.
+    * 
+    * This structure describes all the host surface characteristics so that it 
+    * can be looked up in cache, since creating a host surface is often a slow
+    * operation.
+    */
+   struct svga_host_surface_cache_key key;
+   
+   /**
+    * Host surface handle.
+    * 
+    * This is a platform independent abstraction for host SID. We create when 
+    * trying to bind
+    */
+   struct svga_winsys_surface *handle;
+   
+   struct {
+      unsigned count;
+      boolean writing;
+      boolean flush_explicit;
+   } map;
+   
+   boolean needs_flush;
+   struct list_head head;
+};
+
+
+static INLINE struct svga_buffer *
+svga_buffer(struct pipe_buffer *buffer)
+{
+   if (buffer) {
+      assert(((struct svga_buffer *)buffer)->magic == SVGA_BUFFER_MAGIC);
+      return (struct svga_buffer *)buffer;
+   }
+   return NULL;
+}
+
+
+/**
+ * Returns TRUE for user buffers.  We may
+ * decide to use an alternate upload path for these buffers.
+ */
+static INLINE boolean 
+svga_buffer_is_user_buffer( struct pipe_buffer *buffer )
+{
+   return svga_buffer(buffer)->user;
+}
+
+
+void
+svga_screen_init_buffer_functions(struct pipe_screen *screen);
+
+struct svga_winsys_surface *
+svga_buffer_handle(struct svga_context *svga,
+                   struct pipe_buffer *buf);
+
+void
+svga_context_flush_buffers(struct svga_context *svga);
+
+boolean
+svga_buffer_free_cached_hw_storage(struct svga_screen *ss);
+
+struct svga_winsys_buffer *
+svga_winsys_buffer_create(struct svga_screen *ss,
+                          unsigned alignment, 
+                          unsigned usage,
+                          unsigned size);
+
+#endif /* SVGA_BUFFER_H */
diff --git a/src/gallium/drivers/svga/svga_screen_cache.c b/src/gallium/drivers/svga/svga_screen_cache.c
new file mode 100644
index 00000000000..7360c1688bb
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.c
@@ -0,0 +1,307 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_memory.h"
+
+#include "svga_debug.h"
+#include "svga_winsys.h"
+#include "svga_screen.h"
+#include "svga_screen_cache.h"
+
+
+#define SVGA_SURFACE_CACHE_ENABLED 1
+
+
+/** 
+ * Compute the bucket for this key. 
+ * 
+ * We simply compute log2(width) for now, but
+ */
+static INLINE unsigned
+svga_screen_cache_bucket(const struct svga_host_surface_cache_key *key)
+{
+   unsigned bucket = 0;
+   unsigned size = key->size.width;
+   
+   while ((size >>= 1))
+      ++bucket;
+   
+   if(key->flags & SVGA3D_SURFACE_HINT_INDEXBUFFER)
+      bucket += 32;
+   
+   assert(bucket < SVGA_HOST_SURFACE_CACHE_BUCKETS);
+   
+   return bucket;
+}
+
+
+static INLINE struct svga_winsys_surface *
+svga_screen_cache_lookup(struct svga_screen *svgascreen,
+                         const struct svga_host_surface_cache_key *key)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry;
+   struct svga_winsys_surface *handle = NULL;
+   struct list_head *curr, *next;
+   unsigned bucket;
+   unsigned tries = 0;
+
+   bucket = svga_screen_cache_bucket(key);
+
+   pipe_mutex_lock(cache->mutex);
+
+   curr = cache->bucket[bucket].next;
+   next = curr->next;
+   while(curr != &cache->bucket[bucket]) {
+      ++tries;
+      
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, bucket_head);
+
+      assert(entry->handle);
+      
+      if(memcmp(&entry->key, key, sizeof *key) == 0 &&
+         sws->fence_signalled( sws, entry->fence, 0 ) == 0) {
+         assert(sws->surface_is_flushed(sws, entry->handle));
+         
+         handle = entry->handle; // Reference is transfered here.
+         entry->handle = NULL;
+         
+         LIST_DEL(&entry->bucket_head);
+
+         LIST_DEL(&entry->head);
+         
+         LIST_ADD(&entry->head, &cache->empty);
+
+         break;
+      }
+
+      curr = next; 
+      next = curr->next;
+   }
+
+   pipe_mutex_unlock(cache->mutex);
+   
+#if 0
+   _debug_printf("%s: cache %s after %u tries\n", __FUNCTION__, handle ? "hit" : "miss", tries);
+#else
+   (void)tries;
+#endif
+   
+   return handle;
+}
+
+
+/*
+ * Transfers a handle reference.
+ */
+                           
+static INLINE void
+svga_screen_cache_add(struct svga_screen *svgascreen,
+                      const struct svga_host_surface_cache_key *key, 
+                      struct svga_winsys_surface **p_handle)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry = NULL;
+   struct svga_winsys_surface *handle = *p_handle;
+   
+
+   assert(handle);
+   if(!handle)
+      return;
+   
+   *p_handle = NULL;
+   pipe_mutex_lock(cache->mutex);
+   
+   if(!LIST_IS_EMPTY(&cache->empty)) {
+        /* use the first empty entry */
+        entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->empty.next, head);
+        
+        LIST_DEL(&entry->head);
+     }
+   else if(!LIST_IS_EMPTY(&cache->unused)) {
+      /* free the last used buffer and reuse its entry */
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, cache->unused.prev, head);
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", entry->handle);
+      sws->surface_reference(sws, &entry->handle, NULL);
+
+      LIST_DEL(&entry->bucket_head);
+
+      LIST_DEL(&entry->head);
+   }
+
+   if(entry) {
+      entry->handle = handle;
+      memcpy(&entry->key, key, sizeof entry->key);
+   
+      LIST_ADD(&entry->head, &cache->validated);
+   }
+   else {
+      /* Couldn't cache the buffer -- this really shouldn't happen */
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", handle);
+      sws->surface_reference(sws, &handle, NULL);
+   }
+   
+   pipe_mutex_unlock(cache->mutex);
+}
+
+
+/**
+ * Called during the screen flush to move all buffers not in a validate list
+ * into the unused list.
+ */
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+                        struct pipe_fence_handle *fence)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_host_surface_cache_entry *entry;
+   struct list_head *curr, *next;
+   unsigned bucket;
+
+   pipe_mutex_lock(cache->mutex);
+
+   curr = cache->validated.next;
+   next = curr->next;
+   while(curr != &cache->validated) {
+      entry = LIST_ENTRY(struct svga_host_surface_cache_entry, curr, head);
+
+      assert(entry->handle);
+
+      if(sws->surface_is_flushed(sws, entry->handle)) {
+         LIST_DEL(&entry->head);
+         
+         svgascreen->sws->fence_reference(svgascreen->sws, &entry->fence, fence);
+
+         LIST_ADD(&entry->head, &cache->unused);
+
+         bucket = svga_screen_cache_bucket(&entry->key);
+         LIST_ADD(&entry->bucket_head, &cache->bucket[bucket]);
+      }
+
+      curr = next; 
+      next = curr->next;
+   }
+
+   pipe_mutex_unlock(cache->mutex);
+}
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   unsigned i;
+   
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i) {
+      if(cache->entries[i].handle) {
+	 SVGA_DBG(DEBUG_DMA, "unref sid %p\n", cache->entries[i].handle);
+	 sws->surface_reference(sws, &cache->entries[i].handle, NULL);
+      }
+
+      if(cache->entries[i].fence)
+         svgascreen->sws->fence_reference(svgascreen->sws, &cache->entries[i].fence, NULL);
+   }
+   
+   pipe_mutex_destroy(cache->mutex);
+}
+
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen)
+{
+   struct svga_host_surface_cache *cache = &svgascreen->cache;
+   unsigned i;
+
+   pipe_mutex_init(cache->mutex);
+   
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_BUCKETS; ++i)
+      LIST_INITHEAD(&cache->bucket[i]);
+
+   LIST_INITHEAD(&cache->unused);
+   
+   LIST_INITHEAD(&cache->validated);
+   
+   LIST_INITHEAD(&cache->empty);
+   for(i = 0; i < SVGA_HOST_SURFACE_CACHE_SIZE; ++i)
+      LIST_ADDTAIL(&cache->entries[i].head, &cache->empty);
+
+   return PIPE_OK;
+}
+
+                           
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+                           struct svga_host_surface_cache_key *key)
+{
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_winsys_surface *handle = NULL;
+
+   if (SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
+      /* round the buffer size up to the nearest power of two to increase the
+       * probability of cache hits */
+      uint32_t size = 1;
+      while(size < key->size.width)
+         size <<= 1;
+      key->size.width = size;
+      
+      handle = svga_screen_cache_lookup(svgascreen, key);
+      if (handle)
+         SVGA_DBG(DEBUG_DMA, "  reuse sid %p sz %d\n", handle, size);
+   }
+
+   if (!handle) {
+      handle = sws->surface_create(sws,
+                                   key->flags,
+                                   key->format,
+                                   key->size, 
+                                   key->numFaces, 
+                                   key->numMipLevels);
+      if (handle)
+         SVGA_DBG(DEBUG_DMA, "create sid %p sz %d\n", handle, key->size);
+   }
+
+   return handle;
+}
+
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+                            const struct svga_host_surface_cache_key *key,
+                            struct svga_winsys_surface **p_handle)
+{
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   
+   if(SVGA_SURFACE_CACHE_ENABLED && key->format == SVGA3D_BUFFER) {
+      svga_screen_cache_add(svgascreen, key, p_handle);
+   }
+   else {
+      SVGA_DBG(DEBUG_DMA, "unref sid %p\n", *p_handle);
+      sws->surface_reference(sws, p_handle, NULL);
+   }
+}
diff --git a/src/gallium/drivers/svga/svga_screen_cache.h b/src/gallium/drivers/svga/svga_screen_cache.h
new file mode 100644
index 00000000000..1bbe9877688
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_cache.h
@@ -0,0 +1,135 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SCREEN_CACHE_H_
+#define SVGA_SCREEN_CACHE_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_thread.h"
+
+#include "util/u_double_list.h"
+
+
+/* TODO: Reduce this once we don't allocate an index buffer per draw call */ 
+#define SVGA_HOST_SURFACE_CACHE_SIZE 1024
+
+#define SVGA_HOST_SURFACE_CACHE_BUCKETS 64
+
+
+struct svga_winsys_surface;
+struct svga_screen;
+
+/**
+ * Same as svga_winsys_screen::surface_create.
+ */
+struct svga_host_surface_cache_key
+{
+   SVGA3dSurfaceFlags flags;
+   SVGA3dSurfaceFormat format;
+   SVGA3dSize size;
+   uint32_t numFaces;
+   uint32_t numMipLevels;
+};
+
+
+struct svga_host_surface_cache_entry 
+{
+   /** 
+    * Head for the LRU list, svga_host_surface_cache::unused, and
+    * svga_host_surface_cache::empty
+    */
+   struct list_head head;
+   
+   /** Head for the bucket lists. */
+   struct list_head bucket_head;
+
+   struct svga_host_surface_cache_key key;
+   struct svga_winsys_surface *handle;
+   
+   struct pipe_fence_handle *fence;
+};
+
+
+/**
+ * Cache of the host surfaces.
+ * 
+ * A cache entry can be in the following stages:
+ * 1. empty
+ * 2. holding a buffer in a validate list
+ * 3. holding a flushed buffer (not in any validate list) with an active fence
+ * 4. holding a flushed buffer with an expired fence
+ * 
+ * An entry progresses from 1 -> 2 -> 3 -> 4. When we need an entry to put a 
+ * buffer into we preferencial take from 1, or from the least recentely used 
+ * buffer from 3/4.
+ */
+struct svga_host_surface_cache 
+{
+   pipe_mutex mutex;
+   
+   /* Unused buffers are put in buckets to speed up lookups */
+   struct list_head bucket[SVGA_HOST_SURFACE_CACHE_BUCKETS];
+   
+   /* Entries with unused buffers, ordered from most to least recently used 
+    * (3 and 4) */
+   struct list_head unused;
+   
+   /* Entries with buffers still in validate lists (2) */
+   struct list_head validated;
+   
+   /** Empty entries (1) */
+   struct list_head empty;
+
+   /** The actual storage for the entries */
+   struct svga_host_surface_cache_entry entries[SVGA_HOST_SURFACE_CACHE_SIZE];
+};
+
+
+void
+svga_screen_cache_cleanup(struct svga_screen *svgascreen);
+
+void
+svga_screen_cache_flush(struct svga_screen *svgascreen,
+                        struct pipe_fence_handle *fence);
+
+enum pipe_error
+svga_screen_cache_init(struct svga_screen *svgascreen);
+
+
+struct svga_winsys_surface *
+svga_screen_surface_create(struct svga_screen *svgascreen,
+                           struct svga_host_surface_cache_key *key);
+
+void
+svga_screen_surface_destroy(struct svga_screen *svgascreen,
+                            const struct svga_host_surface_cache_key *key,
+                            struct svga_winsys_surface **handle);
+
+
+#endif /* SVGA_SCREEN_CACHE_H_ */
diff --git a/src/gallium/drivers/svga/svga_screen_texture.c b/src/gallium/drivers/svga/svga_screen_texture.c
new file mode 100644
index 00000000000..fb11b80dcf9
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.c
@@ -0,0 +1,1064 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "svga_cmd.h"
+
+#include "pipe/p_state.h"
+#include "pipe/p_defines.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_thread.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+
+#include "svga_screen.h"
+#include "svga_context.h"
+#include "svga_screen_texture.h"
+#include "svga_screen_buffer.h"
+#include "svga_winsys.h"
+#include "svga_debug.h"
+#include "svga_screen_buffer.h"
+
+#include <util/u_string.h>
+
+
+/* XXX: This isn't a real hardware flag, but just a hack for kernel to
+ * know about primary surfaces. Find a better way to accomplish this.
+ */
+#define SVGA3D_SURFACE_HINT_SCANOUT (1 << 9)
+
+
+/*
+ * Helper function and arrays
+ */
+
+SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format)
+{
+   switch(format) {
+   
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+      return SVGA3D_A8R8G8B8;
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+      return SVGA3D_X8R8G8B8;
+
+      /* Required for GL2.1:
+       */
+   case PIPE_FORMAT_A8R8G8B8_SRGB:
+      return SVGA3D_A8R8G8B8;
+
+   case PIPE_FORMAT_R5G6B5_UNORM:
+      return SVGA3D_R5G6B5;
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+      return SVGA3D_A1R5G5B5;
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+      return SVGA3D_A4R4G4B4;
+
+      
+   /* XXX: Doesn't seem to work properly.
+   case PIPE_FORMAT_Z32_UNORM:
+      return SVGA3D_Z_D32;
+    */
+   case PIPE_FORMAT_Z16_UNORM:
+      return SVGA3D_Z_D16;
+   case PIPE_FORMAT_Z24S8_UNORM:
+      return SVGA3D_Z_D24S8;
+   case PIPE_FORMAT_Z24X8_UNORM:
+      return SVGA3D_Z_D24X8;
+
+   case PIPE_FORMAT_A8_UNORM:
+      return SVGA3D_ALPHA8;
+   case PIPE_FORMAT_L8_UNORM:
+      return SVGA3D_LUMINANCE8;
+
+   case PIPE_FORMAT_DXT1_RGB:
+   case PIPE_FORMAT_DXT1_RGBA:
+      return SVGA3D_DXT1;
+   case PIPE_FORMAT_DXT3_RGBA:
+      return SVGA3D_DXT3;
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_DXT5;
+
+   default:
+      return SVGA3D_FORMAT_INVALID;
+   }
+}
+
+
+SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format)
+{
+   switch(format) { 
+   case PIPE_FORMAT_A8R8G8B8_UNORM:
+   case PIPE_FORMAT_X8R8G8B8_UNORM:
+   case PIPE_FORMAT_A1R5G5B5_UNORM:
+   case PIPE_FORMAT_A4R4G4B4_UNORM:
+   case PIPE_FORMAT_R5G6B5_UNORM:
+   case PIPE_FORMAT_Z24S8_UNORM:
+   case PIPE_FORMAT_Z24X8_UNORM:
+   case PIPE_FORMAT_Z32_UNORM:
+   case PIPE_FORMAT_Z16_UNORM:
+   case PIPE_FORMAT_L8_UNORM:
+      return svga_translate_format(format);
+
+#if 1
+   /* For on host conversion */
+   case PIPE_FORMAT_DXT1_RGB:
+      return SVGA3D_X8R8G8B8;
+   case PIPE_FORMAT_DXT1_RGBA:
+   case PIPE_FORMAT_DXT3_RGBA:
+   case PIPE_FORMAT_DXT5_RGBA:
+      return SVGA3D_A8R8G8B8;
+#endif
+
+   default:
+      return SVGA3D_FORMAT_INVALID;
+   }
+}
+
+
+static INLINE void
+svga_transfer_dma_band(struct svga_transfer *st,
+                       SVGA3dTransferType transfer,
+                       unsigned y, unsigned h, unsigned srcy)
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   struct svga_screen *screen = svga_screen(texture->base.screen);
+   SVGA3dCopyBox box;
+   enum pipe_error ret;
+   
+   SVGA_DBG(DEBUG_DMA, "dma %s sid %p, face %u, (%u, %u, %u) - (%u, %u, %u), %ubpp\n",
+                transfer == SVGA3D_WRITE_HOST_VRAM ? "to" : "from", 
+                texture->handle,
+                st->base.face,
+                st->base.x,
+                y,
+                st->base.zslice,
+                st->base.x + st->base.width,
+                y + h,
+                st->base.zslice + 1,
+                texture->base.block.size*8/(texture->base.block.width*texture->base.block.height));
+   
+   box.x = st->base.x;
+   box.y = y;
+   box.z = st->base.zslice;
+   box.w = st->base.width;
+   box.h = h;
+   box.d = 1;
+   box.srcx = 0;
+   box.srcy = srcy;
+   box.srcz = 0;
+
+   pipe_mutex_lock(screen->swc_mutex);
+   ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+   if(ret != PIPE_OK) {
+      screen->swc->flush(screen->swc, NULL);
+      ret = SVGA3D_SurfaceDMA(screen->swc, st, transfer, &box, 1);
+      assert(ret == PIPE_OK);
+   }
+   pipe_mutex_unlock(screen->swc_mutex);
+}
+
+
+static INLINE void
+svga_transfer_dma(struct svga_transfer *st,
+                 SVGA3dTransferType transfer)
+{
+   struct svga_texture *texture = svga_texture(st->base.texture); 
+   struct svga_screen *screen = svga_screen(texture->base.screen);
+   struct svga_winsys_screen *sws = screen->sws;
+   struct pipe_fence_handle *fence = NULL;
+   
+   if (transfer == SVGA3D_READ_HOST_VRAM) {
+      SVGA_DBG(DEBUG_PERF, "%s: readback transfer\n", __FUNCTION__);
+   }
+
+
+   if(!st->swbuf) {
+      /* Do the DMA transfer in a single go */
+      
+      svga_transfer_dma_band(st, transfer, st->base.y, st->base.height, 0);
+
+      if(transfer == SVGA3D_READ_HOST_VRAM) {
+         svga_screen_flush(screen, &fence);
+         sws->fence_finish(sws, fence, 0);
+         //sws->fence_reference(sws, &fence, NULL);
+      }
+   }
+   else {
+      unsigned y, h, srcy;
+      h = st->hw_nblocksy * st->base.block.height;
+      srcy = 0;
+      for(y = 0; y < st->base.height; y += h) {
+         unsigned offset, length;
+         void *hw, *sw;
+
+         if (y + h > st->base.height)
+            h = st->base.height - y;
+
+         /* Transfer band must be aligned to pixel block boundaries */
+         assert(y % st->base.block.height == 0);
+         assert(h % st->base.block.height == 0);
+         
+         offset = y * st->base.stride / st->base.block.height;
+         length = h * st->base.stride / st->base.block.height;
+
+         sw = (uint8_t *)st->swbuf + offset;
+         
+         if(transfer == SVGA3D_WRITE_HOST_VRAM) {
+            /* Wait for the previous DMAs to complete */
+            /* TODO: keep one DMA (at half the size) in the background */
+            if(y) {
+               svga_screen_flush(screen, &fence);
+               sws->fence_finish(sws, fence, 0);
+               //sws->fence_reference(sws, &fence, NULL);
+            }
+
+            hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_WRITE);
+            assert(hw);
+            if(hw) {
+               memcpy(hw, sw, length);
+               sws->buffer_unmap(sws, st->hwbuf);
+            }
+         }
+         
+         svga_transfer_dma_band(st, transfer, y, h, srcy);
+         
+         if(transfer == SVGA3D_READ_HOST_VRAM) {
+            svga_screen_flush(screen, &fence);
+            sws->fence_finish(sws, fence, 0);
+
+            hw = sws->buffer_map(sws, st->hwbuf, PIPE_BUFFER_USAGE_CPU_READ);
+            assert(hw);
+            if(hw) {
+               memcpy(sw, hw, length);
+               sws->buffer_unmap(sws, st->hwbuf);
+            }
+         }
+      }
+   }
+}
+
+
+static struct pipe_texture *
+svga_texture_create(struct pipe_screen *screen,
+                    const struct pipe_texture *templat)
+{
+   struct svga_screen *svgascreen = svga_screen(screen);
+   struct svga_winsys_screen *sws = svgascreen->sws;
+   struct svga_texture *tex = CALLOC_STRUCT(svga_texture);
+   unsigned width, height, depth;
+   SVGA3dSurfaceFlags flags = 0;
+   SVGA3dSurfaceFormat format;
+   SVGA3dSize size;
+   uint32 numFaces;
+   uint32 numMipLevels;
+   unsigned level;
+   
+   if (!tex)
+      goto error1;
+
+   tex->base = *templat;
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   assert(templat->last_level < SVGA_MAX_TEXTURE_LEVELS);
+   if(templat->last_level >= SVGA_MAX_TEXTURE_LEVELS)
+      goto error2;
+   
+   width = templat->width0;
+   height = templat->height0;
+   depth = templat->depth0;
+   for(level = 0; level <= templat->last_level; ++level) {
+      tex->base.nblocksx[level] = pf_get_nblocksx(&tex->base.block, width);  
+      tex->base.nblocksy[level] = pf_get_nblocksy(&tex->base.block, height);  
+      width = u_minify(width, 1);
+      height = u_minify(height, 1);
+      depth = u_minify(depth, 1);
+   }
+   
+   size.width = templat->width0;
+   size.height = templat->height0;
+   size.depth = templat->depth0;
+   
+   if(templat->target == PIPE_TEXTURE_CUBE) {
+      flags |= SVGA3D_SURFACE_CUBEMAP;
+      numFaces = 6;
+   }
+   else {
+      numFaces = 1;
+   }
+
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_SAMPLER)
+      flags |= SVGA3D_SURFACE_HINT_TEXTURE;
+
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_PRIMARY)
+      flags |= SVGA3D_SURFACE_HINT_SCANOUT;
+   
+   /* 
+    * XXX: Never pass the SVGA3D_SURFACE_HINT_RENDERTARGET hint. Mesa cannot
+    * know beforehand whether a texture will be used as a rendertarget or not
+    * and it always requests PIPE_TEXTURE_USAGE_RENDER_TARGET, therefore
+    * passing the SVGA3D_SURFACE_HINT_RENDERTARGET here defeats its purpose.
+    */
+#if 0
+   if((templat->tex_usage & PIPE_TEXTURE_USAGE_RENDER_TARGET) &&
+      !pf_is_compressed(templat->format))
+      flags |= SVGA3D_SURFACE_HINT_RENDERTARGET;
+#endif
+   
+   if(templat->tex_usage & PIPE_TEXTURE_USAGE_DEPTH_STENCIL)
+      flags |= SVGA3D_SURFACE_HINT_DEPTHSTENCIL;
+   
+   numMipLevels = templat->last_level + 1;
+   
+   format = svga_translate_format(templat->format);
+   if(format == SVGA3D_FORMAT_INVALID)
+      goto error2;
+   
+   tex->handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
+   if (tex->handle)
+      SVGA_DBG(DEBUG_DMA, "create sid %p (texture)\n", tex->handle);
+
+   return &tex->base;
+
+error2:
+   FREE(tex);
+error1:
+   return NULL;
+}
+
+
+static struct pipe_texture *
+svga_texture_blanket(struct pipe_screen * screen,
+                     const struct pipe_texture *base,
+                     const unsigned *stride,
+                     struct pipe_buffer *buffer)
+{
+   struct svga_texture *tex;
+   struct svga_buffer *sbuf = svga_buffer(buffer);
+   struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+   assert(screen);
+
+   /* Only supports one type */
+   if (base->target != PIPE_TEXTURE_2D ||
+       base->last_level != 0 ||
+       base->depth0 != 1) {
+      return NULL;
+   }
+
+   /**
+    * We currently can't do texture blanket on
+    * SVGA3D_BUFFER. Need to blit to a temporary surface?
+    */
+
+   assert(sbuf->handle);
+   if (!sbuf->handle)
+      return NULL;
+
+   if (svga_translate_format(base->format) != sbuf->key.format) {
+      unsigned f1 = svga_translate_format(base->format);
+      unsigned f2 = sbuf->key.format;
+
+      /* It's okay for XRGB and ARGB or depth with/out stencil to get mixed up */
+      if ( !( (f1 == SVGA3D_X8R8G8B8 && f2 == SVGA3D_A8R8G8B8) ||
+              (f1 == SVGA3D_A8R8G8B8 && f2 == SVGA3D_X8R8G8B8) ||
+              (f1 == SVGA3D_Z_D24X8 && f2 == SVGA3D_Z_D24S8) ) ) {
+         debug_printf("%s wrong format %u != %u\n", __FUNCTION__, f1, f2);
+         return NULL;
+      }
+   }
+
+   tex = CALLOC_STRUCT(svga_texture);
+   if (!tex)
+      return NULL;
+
+   tex->base = *base;
+
+   if (sbuf->key.format == 1)
+      tex->base.format = PIPE_FORMAT_X8R8G8B8_UNORM;
+   else if (sbuf->key.format == 2)
+      tex->base.format = PIPE_FORMAT_A8R8G8B8_UNORM;
+
+   pipe_reference_init(&tex->base.reference, 1);
+   tex->base.screen = screen;
+
+   sws->surface_reference(sws, &tex->handle, sbuf->handle);
+
+   return &tex->base;
+}
+
+
+static void
+svga_texture_destroy(struct pipe_texture *pt)
+{
+   struct svga_screen *ss = svga_screen(pt->screen);
+   struct svga_texture *tex = (struct svga_texture *)pt;
+
+   ss->texture_timestamp++;
+
+   svga_sampler_view_reference(&tex->cached_view, NULL);
+
+   /*
+     DBG("%s deleting %p\n", __FUNCTION__, (void *) tex);
+   */
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (texture)\n", tex->handle);
+   ss->sws->surface_reference(ss->sws, &tex->handle, NULL);
+
+   FREE(tex);
+}
+
+
+static void
+svga_texture_copy_handle(struct svga_context *svga,
+                         struct svga_screen *ss,
+                         struct svga_winsys_surface *src_handle,
+                         unsigned src_x, unsigned src_y, unsigned src_z,
+                         unsigned src_level, unsigned src_face,
+                         struct svga_winsys_surface *dst_handle,
+                         unsigned dst_x, unsigned dst_y, unsigned dst_z,
+                         unsigned dst_level, unsigned dst_face,
+                         unsigned width, unsigned height, unsigned depth)
+{
+   struct svga_surface dst, src;
+   enum pipe_error ret;
+   SVGA3dCopyBox box, *boxes;
+
+   assert(svga || ss);
+
+   src.handle = src_handle;
+   src.real_level = src_level;
+   src.real_face = src_face;
+   src.real_zslice = 0;
+
+   dst.handle = dst_handle;
+   dst.real_level = dst_level;
+   dst.real_face = dst_face;
+   dst.real_zslice = 0;
+
+   box.x = dst_x;
+   box.y = dst_y;
+   box.z = dst_z;
+   box.w = width;
+   box.h = height;
+   box.d = depth;
+   box.srcx = src_x;
+   box.srcy = src_y;
+   box.srcz = src_z;
+
+/*
+   SVGA_DBG(DEBUG_VIEWS, "mipcopy src: %p %u (%ux%ux%u), dst: %p %u (%ux%ux%u)\n",
+            src_handle, src_level, src_x, src_y, src_z,
+            dst_handle, dst_level, dst_x, dst_y, dst_z);
+*/
+
+   if (svga) {
+      ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                    &src.base,
+                                    &dst.base,
+                                    &boxes, 1);
+      if(ret != PIPE_OK) {
+         svga_context_flush(svga, NULL);
+         ret = SVGA3D_BeginSurfaceCopy(svga->swc,
+                                       &src.base,
+                                       &dst.base,
+                                       &boxes, 1);
+         assert(ret == PIPE_OK);
+      }
+      *boxes = box;
+      SVGA_FIFOCommitAll(svga->swc);
+   } else {
+      pipe_mutex_lock(ss->swc_mutex);
+      ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+                                    &src.base,
+                                    &dst.base,
+                                    &boxes, 1);
+      if(ret != PIPE_OK) {
+         ss->swc->flush(ss->swc, NULL);
+         ret = SVGA3D_BeginSurfaceCopy(ss->swc,
+                                       &src.base,
+                                       &dst.base,
+                                       &boxes, 1);
+         assert(ret == PIPE_OK);
+      }
+      *boxes = box;
+      SVGA_FIFOCommitAll(ss->swc);
+      pipe_mutex_unlock(ss->swc_mutex);
+   }
+}
+
+static struct svga_winsys_surface *
+svga_texture_view_surface(struct pipe_context *pipe,
+                          struct svga_texture *tex,
+                          SVGA3dSurfaceFormat format,
+                          unsigned start_mip,
+                          unsigned num_mip,
+                          int face_pick,
+                          int zslice_pick)
+{
+   struct svga_screen *ss = svga_screen(tex->base.screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_winsys_surface *handle;
+   int i, j;
+   SVGA3dSurfaceFlags flags = 0;
+   SVGA3dSize size;
+   uint32 numFaces;
+   uint32 numMipLevels = num_mip;
+   unsigned z_offset = 0;
+
+   SVGA_DBG(DEBUG_PERF, 
+            "svga: Create surface view: face %d zslice %d mips %d..%d\n",
+            face_pick, zslice_pick, start_mip, start_mip+num_mip-1);
+
+   size.width = u_minify(tex->base.width0, start_mip);
+   size.height = u_minify(tex->base.height0, start_mip);
+   size.depth = zslice_pick < 0 ? u_minify(tex->base.depth0, start_mip) : 1;
+   assert(size.depth == 1);
+   
+   if(tex->base.target == PIPE_TEXTURE_CUBE && face_pick < 0) {
+      flags |= SVGA3D_SURFACE_CUBEMAP;
+      numFaces = 6;
+   } else {
+      numFaces = 1;
+   }
+
+   if(format == SVGA3D_FORMAT_INVALID)
+      return NULL;
+
+   handle = sws->surface_create(sws, flags, format, size, numFaces, numMipLevels);
+
+   if (!handle)
+      return NULL;
+
+   SVGA_DBG(DEBUG_DMA, "create sid %p (texture view)\n", handle);
+
+   if (face_pick < 0)
+      face_pick = 0;
+
+   if (zslice_pick >= 0)
+       z_offset = zslice_pick;
+
+   for (i = 0; i < num_mip; i++) {
+      for (j = 0; j < numFaces; j++) {
+         if(tex->defined[j + face_pick][i + start_mip]) {
+            unsigned depth = zslice_pick < 0 ? u_minify(tex->base.depth0, i + start_mip) : 1;
+            svga_texture_copy_handle(svga_context(pipe), ss,
+                                     tex->handle, 0, 0, z_offset, i + start_mip, j + face_pick,
+                                     handle, 0, 0, 0, i, j,
+                                     u_minify(tex->base.width0, i + start_mip),
+                                     u_minify(tex->base.height0, i + start_mip), depth);
+         }
+      }
+   }
+
+   return handle;
+}
+
+
+static struct pipe_surface *
+svga_get_tex_surface(struct pipe_screen *screen,
+                     struct pipe_texture *pt,
+                     unsigned face, unsigned level, unsigned zslice,
+                     unsigned flags)
+{
+   struct svga_texture *tex = svga_texture(pt);
+   struct svga_surface *s;
+   struct pipe_surface *ps;
+   boolean render = flags & PIPE_BUFFER_USAGE_GPU_WRITE ? TRUE : FALSE;
+   boolean view = FALSE;
+   SVGA3dSurfaceFormat format;
+
+   s = CALLOC_STRUCT(svga_surface);
+   ps = &s->base;
+   if (!ps)
+      return NULL;
+
+   pipe_reference_init(&ps->reference, 1);
+   pipe_texture_reference(&ps->texture, pt);
+   ps->format = pt->format;
+   ps->width = u_minify(pt->width0, level);
+   ps->height = u_minify(pt->height0, level);
+   ps->usage = flags;
+   ps->level = level;
+   ps->face = face;
+   ps->zslice = zslice;
+
+   if (!render)
+      format = svga_translate_format(pt->format);
+   else
+      format = svga_translate_format_render(pt->format);
+
+   assert(format != SVGA3D_FORMAT_INVALID);
+   assert(!(flags & PIPE_BUFFER_USAGE_CPU_READ_WRITE));
+
+
+   if (svga_screen(screen)->debug.force_surface_view)
+      view = TRUE;
+
+   /* Currently only used for compressed textures */
+   if (render && (format != svga_translate_format(pt->format))) {
+      view = TRUE;
+   }
+
+   if (level != 0 && svga_screen(screen)->debug.force_level_surface_view)
+      view = TRUE;
+
+   if (pt->target == PIPE_TEXTURE_3D)
+      view = TRUE;
+
+   if (svga_screen(screen)->debug.no_surface_view)
+      view = FALSE;
+
+   if (view) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: yes %p, level %u face %u z %u, %p\n",
+               pt, level, face, zslice, ps);
+
+      s->handle = svga_texture_view_surface(NULL, tex, format, level, 1, face, zslice);
+      s->real_face = 0;
+      s->real_level = 0;
+      s->real_zslice = 0;
+   } else {
+      struct svga_winsys_screen *sws = svga_winsys_screen(screen);
+
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface view: no %p, level %u, face %u, z %u, %p\n",
+               pt, level, face, zslice, ps);
+
+      sws->surface_reference(sws, &s->handle, tex->handle);
+      s->real_face = face;
+      s->real_level = level;
+      s->real_zslice = zslice;
+   }
+
+   return ps;
+}
+
+
+static void
+svga_tex_surface_destroy(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (tex surface)\n", s->handle);
+   ss->sws->surface_reference(ss->sws, &s->handle, NULL);
+   pipe_texture_reference(&surf->texture, NULL);
+   FREE(surf);
+}
+
+
+static INLINE void 
+svga_mark_surface_dirty(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+
+   if(!s->dirty) {
+      struct svga_texture *tex = svga_texture(surf->texture);
+
+      s->dirty = TRUE;
+
+      if (s->handle == tex->handle)
+         tex->defined[surf->face][surf->level] = TRUE;
+      else {
+         /* this will happen later in svga_propagate_surface */
+      }
+   }
+}
+
+
+void svga_mark_surfaces_dirty(struct svga_context *svga)
+{
+   unsigned i;
+
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (svga->curr.framebuffer.cbufs[i])
+         svga_mark_surface_dirty(svga->curr.framebuffer.cbufs[i]);
+   }
+   if (svga->curr.framebuffer.zsbuf)
+      svga_mark_surface_dirty(svga->curr.framebuffer.zsbuf);
+}
+
+/**
+ * Progagate any changes from surfaces to texture.
+ * pipe is optional context to inline the blit command in.
+ */
+void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
+   struct svga_screen *ss = svga_screen(surf->texture->screen);
+
+   if (!s->dirty)
+      return;
+
+   s->dirty = FALSE;
+   ss->texture_timestamp++;
+   tex->view_age[surf->level] = ++(tex->age);
+
+   if (s->handle != tex->handle) {
+      SVGA_DBG(DEBUG_VIEWS, "svga: Surface propagate: tex %p, level %u, from %p\n", tex, surf->level, surf);
+      svga_texture_copy_handle(svga_context(pipe), ss,
+                               s->handle, 0, 0, 0, s->real_level, s->real_face,
+                               tex->handle, 0, 0, surf->zslice, surf->level, surf->face,
+                               u_minify(tex->base.width0, surf->level),
+                               u_minify(tex->base.height0, surf->level), 1);
+      tex->defined[surf->face][surf->level] = TRUE;
+   }
+}
+
+/**
+ * Check if we should call svga_propagate_surface on the surface.
+ */
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf)
+{
+   struct svga_surface *s = svga_surface(surf);
+   struct svga_texture *tex = svga_texture(surf->texture);
+
+   return s->dirty && s->handle != tex->handle;
+}
+
+
+static struct pipe_transfer *
+svga_get_tex_transfer(struct pipe_screen *screen,
+                     struct pipe_texture *texture,
+                     unsigned face, unsigned level, unsigned zslice,
+                     enum pipe_transfer_usage usage, unsigned x, unsigned y,
+                     unsigned w, unsigned h)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st;
+
+   /* We can't map texture storage directly */
+   if (usage & PIPE_TRANSFER_MAP_DIRECTLY)
+      return NULL;
+
+   st = CALLOC_STRUCT(svga_transfer);
+   if (!st)
+      return NULL;
+   
+   st->base.format = texture->format;
+   st->base.block = texture->block;
+   st->base.x = x;
+   st->base.y = y;
+   st->base.width = w;
+   st->base.height = h;
+   st->base.nblocksx = pf_get_nblocksx(&texture->block, w);
+   st->base.nblocksy = pf_get_nblocksy(&texture->block, h);
+   st->base.stride = st->base.nblocksx*st->base.block.size;
+   st->base.usage = usage;
+   st->base.face = face;
+   st->base.level = level;
+   st->base.zslice = zslice;
+
+   st->hw_nblocksy = st->base.nblocksy;
+   
+   st->hwbuf = svga_winsys_buffer_create(ss, 
+                                         1, 
+                                         0,
+                                         st->hw_nblocksy*st->base.stride);
+   while(!st->hwbuf && (st->hw_nblocksy /= 2)) {
+      st->hwbuf = svga_winsys_buffer_create(ss, 
+                                            1, 
+                                            0,
+                                            st->hw_nblocksy*st->base.stride);
+   }
+
+   if(!st->hwbuf)
+      goto no_hwbuf;
+
+   if(st->hw_nblocksy < st->base.nblocksy) {
+      /* We couldn't allocate a hardware buffer big enough for the transfer, 
+       * so allocate regular malloc memory instead */
+      debug_printf("%s: failed to allocate %u KB of DMA, splitting into %u x %u KB DMA transfers\n",
+                   __FUNCTION__,
+                   (st->base.nblocksy*st->base.stride + 1023)/1024,
+                   (st->base.nblocksy + st->hw_nblocksy - 1)/st->hw_nblocksy,
+                   (st->hw_nblocksy*st->base.stride + 1023)/1024);
+      st->swbuf = MALLOC(st->base.nblocksy*st->base.stride);
+      if(!st->swbuf)
+         goto no_swbuf;
+   }
+   
+   pipe_texture_reference(&st->base.texture, texture);
+
+   if (usage & PIPE_TRANSFER_READ)
+      svga_transfer_dma(st, SVGA3D_READ_HOST_VRAM);
+
+   return &st->base;
+
+no_swbuf:
+   sws->buffer_destroy(sws, st->hwbuf);
+no_hwbuf:
+   FREE(st);
+   return NULL;
+}
+
+
+static void *
+svga_transfer_map( struct pipe_screen *screen,
+                   struct pipe_transfer *transfer )
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+
+   if(st->swbuf)
+      return st->swbuf;
+   else
+      /* The wait for read transfers already happened when svga_transfer_dma
+       * was called. */
+      return sws->buffer_map(sws, st->hwbuf,
+                             pipe_transfer_buffer_flags(transfer));
+}
+
+
+static void
+svga_transfer_unmap(struct pipe_screen *screen,
+                    struct pipe_transfer *transfer)
+{
+   struct svga_screen *ss = svga_screen(screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+   
+   if(!st->swbuf)
+      sws->buffer_unmap(sws, st->hwbuf);
+}
+
+
+static void
+svga_tex_transfer_destroy(struct pipe_transfer *transfer)
+{
+   struct svga_texture *tex = svga_texture(transfer->texture);
+   struct svga_screen *ss = svga_screen(transfer->texture->screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_transfer *st = svga_transfer(transfer);
+
+   if (st->base.usage & PIPE_TRANSFER_WRITE) {
+      svga_transfer_dma(st, SVGA3D_WRITE_HOST_VRAM);
+      ss->texture_timestamp++;
+      tex->view_age[transfer->level] = ++(tex->age);
+      tex->defined[transfer->face][transfer->level] = TRUE;
+   }
+
+   pipe_texture_reference(&st->base.texture, NULL);
+   FREE(st->swbuf);
+   sws->buffer_destroy(sws, st->hwbuf);
+   FREE(st);
+}
+
+void
+svga_screen_init_texture_functions(struct pipe_screen *screen)
+{
+   screen->texture_create = svga_texture_create;
+   screen->texture_destroy = svga_texture_destroy;
+   screen->get_tex_surface = svga_get_tex_surface;
+   screen->tex_surface_destroy = svga_tex_surface_destroy;
+   screen->texture_blanket = svga_texture_blanket;
+   screen->get_tex_transfer = svga_get_tex_transfer;
+   screen->transfer_map = svga_transfer_map;
+   screen->transfer_unmap = svga_transfer_unmap;
+   screen->tex_transfer_destroy = svga_tex_transfer_destroy;
+}
+
+/*********************************************************************** 
+ */
+
+struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe, struct pipe_texture *pt,
+                          unsigned min_lod, unsigned max_lod)
+{
+   struct svga_screen *ss = svga_screen(pt->screen);
+   struct svga_winsys_screen *sws = ss->sws;
+   struct svga_texture *tex = svga_texture(pt); 
+   struct svga_sampler_view *sv = NULL;
+   SVGA3dSurfaceFormat format = svga_translate_format(pt->format);
+   boolean view = TRUE;
+
+   assert(pt);
+   assert(min_lod >= 0);
+   assert(min_lod <= max_lod);
+   assert(max_lod <= pt->last_level);
+
+
+   /* Is a view needed */
+   {
+      /*
+       * Can't control max lod. For first level views and when we only
+       * look at one level we disable mip filtering to achive the same
+       * results as a view.
+       */
+      if (min_lod == 0 && max_lod >= pt->last_level)
+         view = FALSE;
+
+      if (pf_is_compressed(pt->format) && view) {
+         format = svga_translate_format_render(pt->format);
+      }
+
+      if (ss->debug.no_sampler_view)
+         view = FALSE;
+
+      if (ss->debug.force_sampler_view)
+         view = TRUE;
+   }
+
+   /* First try the cache */
+   if (view) {
+      pipe_mutex_lock(ss->tex_mutex);
+      if (tex->cached_view &&
+          tex->cached_view->min_lod == min_lod &&
+          tex->cached_view->max_lod == max_lod) {
+         svga_sampler_view_reference(&sv, tex->cached_view);
+         pipe_mutex_unlock(ss->tex_mutex);
+         SVGA_DBG(DEBUG_VIEWS, "svga: Sampler view: reuse %p, %u %u, last %u\n",
+                              pt, min_lod, max_lod, pt->last_level);
+         svga_validate_sampler_view(svga_context(pipe), sv);
+         return sv;
+      }
+      pipe_mutex_unlock(ss->tex_mutex);
+   }
+
+   sv = CALLOC_STRUCT(svga_sampler_view);
+   pipe_reference_init(&sv->reference, 1);
+   sv->texture = tex;
+   sv->min_lod = min_lod;
+   sv->max_lod = max_lod;
+
+   /* No view needed just use the whole texture */
+   if (!view) {
+      SVGA_DBG(DEBUG_VIEWS,
+               "svga: Sampler view: no %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+               pt, min_lod, max_lod,
+               max_lod - min_lod + 1,
+               pt->width0,
+               pt->height0,
+               pt->depth0,
+               pt->last_level);
+      sws->surface_reference(sws, &sv->handle, tex->handle);
+      return sv;
+   }
+
+   SVGA_DBG(DEBUG_VIEWS,
+            "svga: Sampler view: yes %p, mips %u..%u, nr %u, size (%ux%ux%u), last %u\n",
+            pt, min_lod, max_lod,
+            max_lod - min_lod + 1,
+            pt->width0,
+            pt->height0,
+            pt->depth0,
+            pt->last_level);
+
+   sv->age = tex->age;
+   sv->handle = svga_texture_view_surface(pipe, tex, format,
+                                          min_lod,
+                                          max_lod - min_lod + 1,
+                                          -1, -1);
+
+   if (!sv->handle) {
+      assert(0);
+      sws->surface_reference(sws, &sv->handle, tex->handle);
+      return sv;
+   }
+
+   pipe_mutex_lock(ss->tex_mutex);
+   svga_sampler_view_reference(&tex->cached_view, sv);
+   pipe_mutex_unlock(ss->tex_mutex);
+
+   return sv;
+}
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v)
+{
+   struct svga_texture *tex = v->texture;
+   unsigned numFaces;
+   unsigned age = 0;
+   int i, k;
+
+   assert(svga);
+
+   if (v->handle == v->texture->handle)
+      return;
+
+   age = tex->age;
+
+   if(tex->base.target == PIPE_TEXTURE_CUBE)
+      numFaces = 6;
+   else
+      numFaces = 1;
+
+   for (i = v->min_lod; i <= v->max_lod; i++) {
+      for (k = 0; k < numFaces; k++) {
+         if (v->age < tex->view_age[i])
+            svga_texture_copy_handle(svga, NULL,
+                                     tex->handle, 0, 0, 0, i, k,
+                                     v->handle, 0, 0, 0, i - v->min_lod, k,
+                                     u_minify(tex->base.width0, i),
+                                     u_minify(tex->base.height0, i),
+                                     u_minify(tex->base.depth0, i));
+      }
+   }
+
+   v->age = age;
+}
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v)
+{
+   struct svga_screen *ss = svga_screen(v->texture->base.screen);
+
+   SVGA_DBG(DEBUG_DMA, "unref sid %p (sampler view)\n", v->handle);
+   ss->sws->surface_reference(ss->sws, &v->handle, NULL);
+
+   FREE(v);
+}
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+				struct pipe_buffer **buffer,
+				unsigned *stride)
+{
+   struct svga_texture *stex = svga_texture(texture);
+
+   *buffer = svga_screen_buffer_wrap_surface
+      (texture->screen,
+       svga_translate_format(texture->format),
+       stex->handle);
+
+   *stride = pf_get_nblocksx(&texture->block, texture->width0) *
+      texture->block.size;
+
+   return *buffer != NULL;
+}
+
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture)
+{
+   struct svga_winsys_screen *sws = svga_winsys_screen(texture->screen);
+   struct svga_winsys_surface *vsurf = NULL;
+
+   sws->surface_reference(sws, &vsurf, svga_texture(texture)->handle);
+   return vsurf;
+}
diff --git a/src/gallium/drivers/svga/svga_screen_texture.h b/src/gallium/drivers/svga/svga_screen_texture.h
new file mode 100644
index 00000000000..1e6fef59a39
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_screen_texture.h
@@ -0,0 +1,177 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TEXTURE_H
+#define SVGA_TEXTURE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+
+struct pipe_context;
+struct pipe_screen;
+struct svga_context;
+struct svga_winsys_surface;
+enum SVGA3dSurfaceFormat;
+
+
+#define SVGA_MAX_TEXTURE_LEVELS 12 /* 2048x2048 */
+
+
+/**
+ * A sampler's view into a texture
+ *
+ * We currently cache one sampler view on
+ * the texture and in there by holding a reference
+ * from the texture to the sampler view.
+ *
+ * Because of this we can not hold a refernce to the
+ * texture from the sampler view. So the user
+ * of the sampler views must make sure that the
+ * texture has a reference take for as long as
+ * the sampler view is refrenced.
+ *
+ * Just unreferencing the sampler_view before the
+ * texture is enough.
+ */
+struct svga_sampler_view
+{
+   struct pipe_reference reference;
+
+   struct svga_texture *texture;
+
+   int min_lod;
+   int max_lod;
+
+   unsigned age;
+
+   struct svga_winsys_surface *handle;
+};
+
+
+struct svga_texture 
+{
+   struct pipe_texture base;
+
+   struct svga_winsys_surface *handle;
+
+   boolean defined[6][PIPE_MAX_TEXTURE_LEVELS];
+   
+   struct svga_sampler_view *cached_view;
+
+   unsigned view_age[SVGA_MAX_TEXTURE_LEVELS];
+   unsigned age;
+
+   boolean views_modified;
+};
+
+
+struct svga_surface
+{
+   struct pipe_surface base;
+
+   struct svga_winsys_surface *handle;
+
+   unsigned real_face;
+   unsigned real_level;
+   unsigned real_zslice;
+
+   boolean dirty;
+};
+
+
+struct svga_transfer
+{
+   struct pipe_transfer base;
+
+   struct svga_winsys_buffer *hwbuf;
+
+   /* Height of the hardware buffer in pixel blocks */
+   unsigned hw_nblocksy;
+
+   /* Temporary malloc buffer when we can't allocate a hardware buffer
+    * big enough */
+   void *swbuf;
+};
+
+
+static INLINE struct svga_texture *
+svga_texture(struct pipe_texture *texture)
+{
+   return (struct svga_texture *)texture;
+}
+
+static INLINE struct svga_surface *
+svga_surface(struct pipe_surface *surface)
+{
+   assert(surface);
+   return (struct svga_surface *)surface;
+}
+
+static INLINE struct svga_transfer *
+svga_transfer(struct pipe_transfer *transfer)
+{
+   assert(transfer);
+   return (struct svga_transfer *)transfer;
+}
+
+extern struct svga_sampler_view *
+svga_get_tex_sampler_view(struct pipe_context *pipe,
+                          struct pipe_texture *pt,
+                          unsigned min_lod, unsigned max_lod);
+
+void
+svga_validate_sampler_view(struct svga_context *svga, struct svga_sampler_view *v);
+
+void
+svga_destroy_sampler_view_priv(struct svga_sampler_view *v);
+
+static INLINE void
+svga_sampler_view_reference(struct svga_sampler_view **ptr, struct svga_sampler_view *v)
+{
+   struct svga_sampler_view *old = *ptr;
+
+   if (pipe_reference((struct pipe_reference **)ptr, &v->reference))
+      svga_destroy_sampler_view_priv(old);
+}
+
+extern void
+svga_propagate_surface(struct pipe_context *pipe, struct pipe_surface *surf);
+
+extern boolean
+svga_surface_needs_propagation(struct pipe_surface *surf);
+
+extern void
+svga_screen_init_texture_functions(struct pipe_screen *screen);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format(enum pipe_format format);
+
+enum SVGA3dSurfaceFormat
+svga_translate_format_render(enum pipe_format format);
+
+
+#endif /* SVGA_TEXTURE_H */
diff --git a/src/gallium/drivers/svga/svga_state.c b/src/gallium/drivers/svga/svga_state.c
new file mode 100644
index 00000000000..1c21d3acfe3
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.c
@@ -0,0 +1,278 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "util/u_debug.h"
+#include "pipe/p_defines.h"
+#include "util/u_memory.h"
+#include "draw/draw_context.h"
+
+#include "svga_context.h"
+#include "svga_screen.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_cmd.h"
+#include "svga_hw_reg.h"
+
+/* This is just enough to decide whether we need to use the draw
+ * module (swtnl) or not.
+ */
+static const struct svga_tracked_state *need_swtnl_state[] =
+{
+   &svga_update_need_swvfetch,
+   &svga_update_need_pipeline,
+   &svga_update_need_swtnl,
+   NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a clear or draw
+ * packet.
+ */
+static const struct svga_tracked_state *hw_clear_state[] =
+{
+   &svga_hw_scissor,
+   &svga_hw_viewport,
+   &svga_hw_framebuffer,
+   NULL
+};
+
+
+/* Atoms to update hardware state prior to emitting a draw packet.
+ */
+static const struct svga_tracked_state *hw_draw_state[] =
+{
+   &svga_hw_update_zero_stride,
+   &svga_hw_fs,
+   &svga_hw_vs,
+   &svga_hw_rss,
+   &svga_hw_tss,
+   &svga_hw_tss_binding,
+   &svga_hw_clip_planes,
+   &svga_hw_vdecl,
+   &svga_hw_fs_parameters,
+   &svga_hw_vs_parameters,
+   NULL
+};
+
+
+static const struct svga_tracked_state *swtnl_draw_state[] =
+{
+   &svga_update_swtnl_draw,
+   &svga_update_swtnl_vdecl,
+   NULL
+};
+
+/* Flattens the graph of state dependencies.  Could swap the positions
+ * of hw_clear_state and need_swtnl_state without breaking anything.
+ */
+static const struct svga_tracked_state **state_levels[] = 
+{
+   need_swtnl_state,
+   hw_clear_state,
+   hw_draw_state,
+   swtnl_draw_state
+};
+
+
+
+static unsigned check_state( unsigned a,
+                             unsigned b )
+{
+   return (a & b);
+}
+
+static void accumulate_state( unsigned *a,
+			      unsigned b )
+{
+   *a |= b;
+}
+
+
+static void xor_states( unsigned *result,
+                        unsigned a,
+                        unsigned b )
+{
+   *result = a ^ b;
+}
+
+
+
+static int update_state( struct svga_context *svga,
+                         const struct svga_tracked_state *atoms[],
+                         unsigned *state )
+{
+   boolean debug = TRUE;
+   enum pipe_error ret = 0;
+   unsigned i;
+
+   ret = svga_hwtnl_flush( svga->hwtnl );
+   if (ret != 0)
+      return ret;
+
+   if (debug) {
+      /* Debug version which enforces various sanity checks on the
+       * state flags which are generated and checked to help ensure
+       * state atoms are ordered correctly in the list.
+       */
+      unsigned examined, prev;      
+
+      examined = 0;
+      prev = *state;
+
+      for (i = 0; atoms[i] != NULL; i++) {	 
+	 unsigned generated;
+
+	 assert(atoms[i]->dirty); 
+	 assert(atoms[i]->update);
+
+	 if (check_state(*state, atoms[i]->dirty)) {
+	    if (0)
+               debug_printf("update: %s\n", atoms[i]->name);
+	    ret = atoms[i]->update( svga, *state );
+            if (ret != 0)
+               return ret;
+	 }
+
+	 /* generated = (prev ^ state)
+	  * if (examined & generated)
+	  *     fail;
+	  */
+	 xor_states(&generated, prev, *state);
+	 if (check_state(examined, generated)) {
+	    debug_printf("state atom %s generated state already examined\n", 
+                         atoms[i]->name);
+	    assert(0);
+	 }
+			 
+	 prev = *state;
+	 accumulate_state(&examined, atoms[i]->dirty);
+      }
+   }
+   else {
+      for (i = 0; atoms[i] != NULL; i++) {	 
+	 if (check_state(*state, atoms[i]->dirty)) {
+	    ret = atoms[i]->update( svga, *state );
+            if (ret != 0)
+               return ret;
+         }
+      }
+   }
+
+   return 0;
+}
+
+
+
+int svga_update_state( struct svga_context *svga,
+                       unsigned max_level )
+{
+   struct svga_screen *screen = svga_screen(svga->pipe.screen);
+   int ret = 0;
+   int i;
+
+   /* Check for updates to bound textures.  This can't be done in an
+    * atom as there is no flag which could provoke this test, and we
+    * cannot create one.
+    */
+   if (svga->state.texture_timestamp != screen->texture_timestamp) {
+      svga->state.texture_timestamp = screen->texture_timestamp;
+      svga->dirty |= SVGA_NEW_TEXTURE;
+   }
+
+   for (i = 0; i <= max_level; i++) {
+      svga->dirty |= svga->state.dirty[i];
+
+      if (svga->dirty) {
+         ret = update_state( svga, 
+                             state_levels[i], 
+                             &svga->dirty );
+         if (ret != 0)
+            return ret;
+
+         svga->state.dirty[i] = 0;
+      }
+   }
+   
+   for (; i < SVGA_STATE_MAX; i++) 
+      svga->state.dirty[i] |= svga->dirty;
+
+   svga->dirty = 0;
+   return 0;
+}
+
+
+
+
+void svga_update_state_retry( struct svga_context *svga,
+                              unsigned max_level )
+{
+   int ret;
+
+   ret = svga_update_state( svga, max_level );
+
+   if (ret == PIPE_ERROR_OUT_OF_MEMORY) {
+      svga_context_flush(svga, NULL);
+      ret = svga_update_state( svga, max_level );
+   }
+
+   assert( ret == 0 );
+}
+
+
+
+#define EMIT_RS(_rs, _count, _name, _value)     \
+do {                                            \
+   _rs[_count].state = _name;                   \
+   _rs[_count].uintValue = _value;              \
+   _count++;                                    \
+} while (0)
+
+
+/* Setup any hardware state which will be constant through the life of
+ * a context.
+ */
+enum pipe_error svga_emit_initial_state( struct svga_context *svga )
+{
+   SVGA3dRenderState *rs;
+   unsigned count = 0;
+   const unsigned COUNT = 2;
+   enum pipe_error ret;
+
+   ret = SVGA3D_BeginSetRenderState( svga->swc, &rs, COUNT );
+   if (ret)
+      return ret;
+
+   /* Always use D3D style coordinate space as this is the only one
+    * which is implemented on all backends.
+    */
+   EMIT_RS(rs, count, SVGA3D_RS_COORDINATETYPE, SVGA3D_COORDINATE_LEFTHANDED );
+   EMIT_RS(rs, count, SVGA3D_RS_FRONTWINDING, SVGA3D_FRONTWINDING_CW );
+   
+   assert( COUNT == count );
+   SVGA_FIFOCommitAll( svga->swc );
+
+   return 0;
+
+}
diff --git a/src/gallium/drivers/svga/svga_state.h b/src/gallium/drivers/svga/svga_state.h
new file mode 100644
index 00000000000..22d5a6d552a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state.h
@@ -0,0 +1,95 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_STATE_H
+#define SVGA_STATE_H
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+struct svga_context;
+
+
+void svga_init_state( struct svga_context *svga );
+void svga_destroy_state( struct svga_context *svga );
+
+
+struct svga_tracked_state {
+   const char *name;
+   unsigned dirty;
+   int (*update)( struct svga_context *svga, unsigned dirty );
+};
+
+/* NEED_SWTNL
+ */
+extern struct svga_tracked_state svga_update_need_swvfetch;
+extern struct svga_tracked_state svga_update_need_pipeline;
+extern struct svga_tracked_state svga_update_need_swtnl;
+
+/* HW_CLEAR
+ */
+extern struct svga_tracked_state svga_hw_viewport;
+extern struct svga_tracked_state svga_hw_scissor;
+extern struct svga_tracked_state svga_hw_framebuffer;
+
+/* HW_DRAW
+ */
+extern struct svga_tracked_state svga_hw_vs;
+extern struct svga_tracked_state svga_hw_fs;
+extern struct svga_tracked_state svga_hw_rss;
+extern struct svga_tracked_state svga_hw_tss;
+extern struct svga_tracked_state svga_hw_tss_binding;
+extern struct svga_tracked_state svga_hw_clip_planes;
+extern struct svga_tracked_state svga_hw_vdecl;
+extern struct svga_tracked_state svga_hw_fs_parameters;
+extern struct svga_tracked_state svga_hw_vs_parameters;
+extern struct svga_tracked_state svga_hw_update_zero_stride;
+
+/* SWTNL_DRAW
+ */
+extern struct svga_tracked_state svga_update_swtnl_draw;
+extern struct svga_tracked_state svga_update_swtnl_vdecl;
+
+/* Bring the hardware fully up-to-date so that we can emit draw
+ * commands.
+ */
+#define SVGA_STATE_NEED_SWTNL        0
+#define SVGA_STATE_HW_CLEAR          1
+#define SVGA_STATE_HW_DRAW           2
+#define SVGA_STATE_SWTNL_DRAW        3
+#define SVGA_STATE_MAX               4
+
+
+enum pipe_error svga_update_state( struct svga_context *svga,
+                                   unsigned level );
+
+void svga_update_state_retry( struct svga_context *svga,
+                              unsigned level );
+
+
+enum pipe_error svga_emit_initial_state( struct svga_context *svga );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_state_constants.c b/src/gallium/drivers/svga/svga_state_constants.c
new file mode 100644
index 00000000000..209ed282450
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_constants.c
@@ -0,0 +1,239 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ * Hardware update 
+ */
+
+/* Convert from PIPE_SHADER_* to SVGA3D_SHADERTYPE_*
+ */
+static int svga_shader_type( int unit )
+{
+   return unit + 1;
+}
+
+
+static int emit_const( struct svga_context *svga,
+                       int unit,
+                       int i,
+                       const float *value )
+{
+   int ret = PIPE_OK;
+
+   if (memcmp(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float)) != 0) {
+      if (SVGA_DEBUG & DEBUG_CONSTS)
+         debug_printf("%s %s %d: %f %f %f %f\n",
+                      __FUNCTION__,
+                      unit == PIPE_SHADER_VERTEX ? "VERT" : "FRAG",
+                      i,
+                      value[0],
+                      value[1],
+                      value[2],
+                      value[3]);
+
+      ret = SVGA3D_SetShaderConst( svga->swc, 
+                                   i,
+                                   svga_shader_type(unit),
+                                   SVGA3D_CONST_TYPE_FLOAT,
+                                   value );
+      if (ret)
+         return ret;
+
+      memcpy(svga->state.hw_draw.cb[unit][i], value, 4 * sizeof(float));
+   }
+   
+   return ret;
+}
+
+static int emit_consts( struct svga_context *svga,
+                        int offset,
+                        int unit )
+{
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned count;
+   const float (*data)[4] = NULL;
+   unsigned i;
+   int ret = PIPE_OK;
+
+   if (svga->curr.cb[unit] == NULL)
+      goto done;
+
+   count = svga->curr.cb[unit]->size / (4 * sizeof(float));
+
+   data = (const float (*)[4])pipe_buffer_map(screen,
+                                              svga->curr.cb[unit],
+                                              PIPE_BUFFER_USAGE_CPU_READ);
+   if (data == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto done;
+   }
+
+   for (i = 0; i < count; i++) {
+      ret = emit_const( svga, unit, offset + i, data[i] );
+      if (ret)
+         goto done;
+   }
+
+done:
+   if (data)
+      pipe_buffer_unmap(screen, svga->curr.cb[unit]);
+
+   return ret;
+}
+   
+static int emit_fs_consts( struct svga_context *svga,
+                           unsigned dirty )
+{
+   const struct svga_shader_result *result = svga->state.hw_draw.fs;
+   const struct svga_fs_compile_key *key = &result->key.fkey;
+   int ret = 0;
+
+   ret = emit_consts( svga, 0, PIPE_SHADER_FRAGMENT );
+   if (ret)
+      return ret;
+
+   /* The internally generated fragment shader for xor blending
+    * doesn't have a 'result' struct.  It should be fixed to avoid
+    * this special case, but work around it with a NULL check:
+    */
+   if (result != NULL &&
+       key->num_unnormalized_coords)
+   {
+      unsigned offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      int i;
+
+      for (i = 0; i < key->num_textures; i++) {
+         if (key->tex[i].unnormalized) {
+            struct pipe_texture *tex = svga->curr.texture[i];
+            float data[4];
+
+            data[0] = 1.0 / (float)tex->width0;
+            data[1] = 1.0 / (float)tex->height0;
+            data[2] = 1.0;
+            data[3] = 1.0;
+
+            ret = emit_const( svga,
+                              PIPE_SHADER_FRAGMENT,
+                              key->tex[i].width_height_idx + offset,
+                              data );
+            if (ret)
+               return ret;
+         }
+      }
+
+      offset += key->num_unnormalized_coords;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_fs_parameters = 
+{
+   "hw fs params",
+   (SVGA_NEW_FS_CONST_BUFFER |
+    SVGA_NEW_FS_RESULT |
+    SVGA_NEW_TEXTURE_BINDING),
+   emit_fs_consts
+};
+
+/***********************************************************************
+ */
+
+static int emit_vs_consts( struct svga_context *svga,
+                           unsigned dirty )
+{
+   const struct svga_shader_result *result = svga->state.hw_draw.vs;
+   const struct svga_vs_compile_key *key = &result->key.vkey;
+   int ret = 0;
+   unsigned offset;
+
+   /* SVGA_NEW_VS_RESULT
+    */
+   if (result == NULL) 
+      return 0;
+
+   /* SVGA_NEW_VS_CONST_BUFFER 
+    */
+   ret = emit_consts( svga, 0, PIPE_SHADER_VERTEX );
+   if (ret)
+      return ret;
+
+   offset = result->shader->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+   /* SVGA_NEW_VS_RESULT
+    */
+   if (key->need_prescale) {
+      ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                        svga->state.hw_clear.prescale.scale );
+      if (ret)
+         return ret;
+
+      ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                        svga->state.hw_clear.prescale.translate );
+      if (ret)
+         return ret;
+   }
+
+   /* SVGA_NEW_ZERO_STRIDE
+    */
+   if (key->zero_stride_vertex_elements) {
+      unsigned i, curr_zero_stride = 0;
+      for (i = 0; i < PIPE_MAX_ATTRIBS; ++i) {
+         if (key->zero_stride_vertex_elements & (1 << i)) {
+            ret = emit_const( svga, PIPE_SHADER_VERTEX, offset++,
+                              svga->curr.zero_stride_constants +
+                              4 * curr_zero_stride );
+            if (ret)
+               return ret;
+            ++curr_zero_stride;
+         }
+      }
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_vs_parameters = 
+{
+   "hw vs params",
+   (SVGA_NEW_VS_CONST_BUFFER |
+    SVGA_NEW_ZERO_STRIDE |
+    SVGA_NEW_VS_RESULT),
+   emit_vs_consts
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_framebuffer.c b/src/gallium/drivers/svga/svga_state_framebuffer.c
new file mode 100644
index 00000000000..7d7f93d8e3c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_framebuffer.c
@@ -0,0 +1,455 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+
+
+/***********************************************************************
+ * Hardware state update
+ */
+
+
+static int emit_framebuffer( struct svga_context *svga,
+                             unsigned dirty )
+{
+   const struct pipe_framebuffer_state *curr = &svga->curr.framebuffer;
+   struct pipe_framebuffer_state *hw = &svga->state.hw_clear.framebuffer;
+   unsigned i;
+   enum pipe_error ret;
+
+   /* XXX: Need shadow state in svga->hw to eliminate redundant
+    * uploads, especially of NULL buffers.
+    */
+   
+   for(i = 0; i < PIPE_MAX_COLOR_BUFS; ++i) {
+      if (curr->cbufs[i] != hw->cbufs[i]) {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_COLOR0 + i, curr->cbufs[i]);
+         if (ret != PIPE_OK)
+            return ret;
+         
+         pipe_surface_reference(&hw->cbufs[i], curr->cbufs[i]);
+      }
+   }
+
+   
+   if (curr->zsbuf != hw->zsbuf) {
+      ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_DEPTH, curr->zsbuf);
+      if (ret != PIPE_OK)
+         return ret;
+
+      if (curr->zsbuf &&
+          curr->zsbuf->format == PIPE_FORMAT_Z24S8_UNORM) {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, curr->zsbuf);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      else {
+         ret = SVGA3D_SetRenderTarget(svga->swc, SVGA3D_RT_STENCIL, NULL);
+         if (ret != PIPE_OK)
+            return ret;
+      }
+      
+      pipe_surface_reference(&hw->zsbuf, curr->zsbuf);
+   }
+
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_framebuffer = 
+{
+   "hw framebuffer state",
+   SVGA_NEW_FRAME_BUFFER,
+   emit_framebuffer
+};
+
+
+
+
+/*********************************************************************** 
+ */
+
+static int emit_viewport( struct svga_context *svga,
+                          unsigned dirty )
+{
+   const struct pipe_viewport_state *viewport = &svga->curr.viewport;
+   struct svga_prescale prescale;
+   SVGA3dRect rect;
+   /* Not sure if this state is relevant with POSITIONT.  Probably
+    * not, but setting to 0,1 avoids some state pingponging.
+    */
+   float range_min = 0.0;
+   float range_max = 1.0;
+   float flip = -1.0;
+   boolean degenerate = FALSE;
+   enum pipe_error ret;
+
+   float fb_width = svga->curr.framebuffer.width;
+   float fb_height = svga->curr.framebuffer.height;
+
+   memset( &prescale, 0, sizeof(prescale) );
+
+   if (svga->curr.rast->templ.bypass_vs_clip_and_viewport) {
+
+      /* Avoid POSITIONT as it has a non trivial implementation outside the D3D
+       * API. Always generate a vertex shader.
+       */
+      rect.x = 0;
+      rect.y = 0;
+      rect.w = svga->curr.framebuffer.width;
+      rect.h = svga->curr.framebuffer.height;
+
+      prescale.scale[0] = 2.0 / (float)rect.w;
+      prescale.scale[1] = - 2.0 / (float)rect.h;
+      prescale.scale[2] = 1.0;
+      prescale.scale[3] = 1.0;
+      prescale.translate[0] = -1.0f;
+      prescale.translate[1] = 1.0f;
+      prescale.translate[2] = 0;
+      prescale.translate[3] = 0;
+      prescale.enabled = TRUE;
+   } else {
+
+      /* Examine gallium viewport transformation and produce a screen
+       * rectangle and possibly vertex shader pre-transformation to
+       * get the same results.
+       */
+      float fx =        viewport->scale[0] * -1.0 + viewport->translate[0];
+      float fy = flip * viewport->scale[1] * -1.0 + viewport->translate[1];
+      float fw =        viewport->scale[0] * 2; 
+      float fh = flip * viewport->scale[1] * 2; 
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "\ninitial %f,%f %fx%f\n",
+               fx,
+               fy,
+               fw,
+               fh);
+
+      prescale.scale[0] = 1.0;
+      prescale.scale[1] = 1.0;
+      prescale.scale[2] = 1.0;
+      prescale.scale[3] = 1.0;
+      prescale.translate[0] = 0;
+      prescale.translate[1] = 0;
+      prescale.translate[2] = 0;
+      prescale.translate[3] = 0;
+      prescale.enabled = TRUE;
+
+
+
+      if (fw < 0) {
+         prescale.scale[0] *= -1.0;
+         prescale.translate[0] += -fw;
+         fw = -fw;
+         fx =        viewport->scale[0] * 1.0 + viewport->translate[0];
+      }
+
+      if (fh < 0) {
+         prescale.scale[1] *= -1.0;
+         prescale.translate[1] += -fh;
+         fh = -fh;
+         fy = flip * viewport->scale[1] * 1.0 + viewport->translate[1];
+      }
+
+      if (fx < 0) {
+         prescale.translate[0] += fx;
+         prescale.scale[0] *= fw / (fw + fx); 
+         fw += fx;
+         fx = 0;
+      }
+
+      if (fy < 0) {
+         prescale.translate[1] += fy;
+         prescale.scale[1] *= fh / (fh + fy); 
+         fh += fy;
+         fy = 0;
+      }
+
+      if (fx + fw > fb_width) {
+         prescale.scale[0] *= fw / (fb_width - fx); 
+         prescale.translate[0] -= fx * (fw / (fb_width - fx));
+         prescale.translate[0] += fx;
+         fw = fb_width - fx;
+         
+      }
+
+      if (fy + fh > fb_height) {
+         prescale.scale[1] *= fh / (fb_height - fy);
+         prescale.translate[1] -= fy * (fh / (fb_height - fy));
+         prescale.translate[1] += fy;
+         fh = fb_height - fy;
+      }
+
+      if (fw < 0 || fh < 0) {
+         fw = fh = fx = fy = 0;
+         degenerate = TRUE;
+         goto out;
+      }
+
+
+      /* D3D viewport is integer space.  Convert fx,fy,etc. to
+       * integers.
+       *
+       * TODO: adjust pretranslate correct for any subpixel error
+       * introduced converting to integers.
+       */
+      rect.x = fx;
+      rect.y = fy;
+      rect.w = fw;
+      rect.h = fh;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "viewport error %f,%f %fx%f\n",
+               fabs((float)rect.x - fx),
+               fabs((float)rect.y - fy),
+               fabs((float)rect.w - fw),
+               fabs((float)rect.h - fh));
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "viewport %d,%d %dx%d\n",
+               rect.x,
+               rect.y,
+               rect.w,
+               rect.h);
+      
+
+      /* Finally, to get GL rasterization rules, need to tweak the
+       * screen-space coordinates slightly relative to D3D which is
+       * what hardware implements natively.
+       */
+      if (svga->curr.rast->templ.gl_rasterization_rules) {
+         float adjust_x = 0.0;
+         float adjust_y = 0.0;
+
+         switch (svga->curr.reduced_prim) {
+         case PIPE_PRIM_LINES:
+            adjust_x = -0.5;
+            adjust_y = 0;
+            break;
+         case PIPE_PRIM_POINTS:
+         case PIPE_PRIM_TRIANGLES:
+            adjust_x = -0.375;
+            adjust_y = -0.5;
+            break;
+         }
+
+         prescale.translate[0] += adjust_x;
+         prescale.translate[1] += adjust_y;
+         prescale.translate[2] = 0.5; /* D3D clip space */
+         prescale.scale[2]     = 0.5; /* D3D clip space */
+      }
+
+
+      range_min = viewport->scale[2] * -1.0 + viewport->translate[2];
+      range_max = viewport->scale[2] *  1.0 + viewport->translate[2];
+
+      /* D3D (and by implication SVGA) doesn't like dealing with zmax
+       * less than zmin.  Detect that case, flip the depth range and
+       * invert our z-scale factor to achieve the same effect.
+       */
+      if (range_min > range_max) {
+         float range_tmp;
+         range_tmp = range_min; 
+         range_min = range_max; 
+         range_max = range_tmp;
+         prescale.scale[2]     = -prescale.scale[2];
+      }
+   }
+
+   if (prescale.enabled) {
+      float H[2];
+      float J[2];
+      int i;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "prescale %f,%f %fx%f\n",
+               prescale.translate[0],
+               prescale.translate[1],
+               prescale.scale[0],
+               prescale.scale[1]);
+
+      H[0] = (float)rect.w / 2.0;
+      H[1] = -(float)rect.h / 2.0;
+      J[0] = (float)rect.x + (float)rect.w / 2.0;
+      J[1] = (float)rect.y + (float)rect.h / 2.0;
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "H %f,%f\n"
+               "J %fx%f\n",
+               H[0],
+               H[1],
+               J[0],
+               J[1]);
+
+      /* Adjust prescale to take into account the fact that it is
+       * going to be applied prior to the perspective divide and
+       * viewport transformation.
+       * 
+       * Vwin = H(Vc/Vc.w) + J
+       *
+       * We want to tweak Vwin with scale and translation from above,
+       * as in:
+       *
+       * Vwin' = S Vwin + T
+       *
+       * But we can only modify the values at Vc.  Plugging all the
+       * above together, and rearranging, eventually we get:
+       *
+       *   Vwin' = H(Vc'/Vc'.w) + J
+       * where:
+       *   Vc' = SVc + KVc.w
+       *   K = (T + (S-1)J) / H
+       *
+       * Overwrite prescale.translate with values for K:
+       */
+      for (i = 0; i < 2; i++) {
+         prescale.translate[i] = ((prescale.translate[i] +
+                                   (prescale.scale[i] - 1.0) * J[i]) / H[i]);
+      }
+
+      SVGA_DBG(DEBUG_VIEWPORT,
+               "clipspace %f,%f %fx%f\n",
+               prescale.translate[0],
+               prescale.translate[1],
+               prescale.scale[0],
+               prescale.scale[1]);
+   }
+
+out:
+   if (degenerate) {
+      rect.x = 0;
+      rect.y = 0;
+      rect.w = 1;
+      rect.h = 1;
+      prescale.enabled = FALSE;
+   }
+
+   if (memcmp(&rect, &svga->state.hw_clear.viewport, sizeof(rect)) != 0) {
+      ret = SVGA3D_SetViewport(svga->swc, &rect);
+      if(ret != PIPE_OK)
+         return ret;
+
+      memcpy(&svga->state.hw_clear.viewport, &rect, sizeof(rect));
+      assert(sizeof(rect) == sizeof(svga->state.hw_clear.viewport));
+   }
+
+   if (svga->state.hw_clear.depthrange.zmin != range_min ||
+       svga->state.hw_clear.depthrange.zmax != range_max) 
+   {
+      ret = SVGA3D_SetZRange(svga->swc, range_min, range_max );
+      if(ret != PIPE_OK)
+         return ret;
+
+      svga->state.hw_clear.depthrange.zmin = range_min;
+      svga->state.hw_clear.depthrange.zmax = range_max;
+   }
+
+   if (memcmp(&prescale, &svga->state.hw_clear.prescale, sizeof prescale) != 0) {
+      svga->dirty |= SVGA_NEW_PRESCALE;
+      svga->state.hw_clear.prescale = prescale;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_viewport = 
+{
+   "hw viewport state",
+   ( SVGA_NEW_FRAME_BUFFER |
+     SVGA_NEW_VIEWPORT |
+     SVGA_NEW_RAST |
+     SVGA_NEW_REDUCED_PRIMITIVE ),
+   emit_viewport
+};
+
+
+/***********************************************************************
+ * Scissor state
+ */
+static int emit_scissor_rect( struct svga_context *svga,
+                              unsigned dirty )
+{
+   const struct pipe_scissor_state *scissor = &svga->curr.scissor;
+   SVGA3dRect rect;
+
+   rect.x = scissor->minx;
+   rect.y = scissor->miny;
+   rect.w = scissor->maxx - scissor->minx; /* + 1 ?? */
+   rect.h = scissor->maxy - scissor->miny; /* + 1 ?? */
+   
+   return SVGA3D_SetScissorRect(svga->swc, &rect);
+}
+
+
+struct svga_tracked_state svga_hw_scissor = 
+{
+   "hw scissor state",
+   SVGA_NEW_SCISSOR,
+   emit_scissor_rect
+};
+
+
+/***********************************************************************
+ * Userclip state
+ */
+
+static int emit_clip_planes( struct svga_context *svga,
+                             unsigned dirty )
+{
+   unsigned i;
+   enum pipe_error ret;
+
+   /* TODO: just emit directly from svga_set_clip_state()?
+    */
+   for (i = 0; i < svga->curr.clip.nr; i++) {
+      ret = SVGA3D_SetClipPlane( svga->swc,
+                                 i,
+                                 svga->curr.clip.ucp[i] );
+      if(ret != PIPE_OK)
+         return ret;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_hw_clip_planes = 
+{
+   "hw viewport state",
+   SVGA_NEW_CLIP,
+   emit_clip_planes
+};
diff --git a/src/gallium/drivers/svga/svga_state_fs.c b/src/gallium/drivers/svga/svga_state_fs.c
new file mode 100644
index 00000000000..6ec38ed3e45
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_fs.c
@@ -0,0 +1,282 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+
+
+static INLINE int compare_fs_keys( const struct svga_fs_compile_key *a,
+                                   const struct svga_fs_compile_key *b )
+{
+   unsigned keysize = svga_fs_key_size( a );
+   return memcmp( a, b, keysize );
+}
+
+
+static struct svga_shader_result *search_fs_key( struct svga_fragment_shader *fs,
+                                                 const struct svga_fs_compile_key *key )
+{
+   struct svga_shader_result *result = fs->base.results;
+
+   assert(key);
+
+   for ( ; result; result = result->next) {
+      if (compare_fs_keys( key, &result->key.fkey ) == 0)
+         return result;
+   }
+   
+   return NULL;
+}
+
+
+static enum pipe_error compile_fs( struct svga_context *svga,
+                                   struct svga_fragment_shader *fs,
+                                   const struct svga_fs_compile_key *key,
+                                   struct svga_shader_result **out_result )
+{
+   struct svga_shader_result *result;
+   enum pipe_error ret;
+
+   result = svga_translate_fragment_program( fs, key );
+   if (result == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_fs_id,
+                             SVGA3D_SHADERTYPE_PS,
+                             result->tokens, 
+                             result->nr_tokens * sizeof result->tokens[0]);
+   if (ret)
+      goto fail;
+
+   *out_result = result;
+   result->id = svga->state.next_fs_id++;
+   result->next = fs->base.results;
+   fs->base.results = result;
+   return PIPE_OK;
+
+fail:
+   if (result)
+      svga_destroy_shader_result( result );
+   return ret;
+}
+
+/* The blend workaround for simulating logicop xor behaviour requires
+ * that the incoming fragment color be white.  This change achieves
+ * that by hooking up a hard-wired fragment shader that just emits
+ * color 1,1,1,1
+ *   
+ * This is a slightly incomplete solution as it assumes that the
+ * actual bound shader has no other effects beyond generating a
+ * fragment color.  In particular shaders containing TEXKIL and/or
+ * depth-write will not have the correct behaviour, nor will those
+ * expecting to use alphatest.
+ *   
+ * These are avoidable issues, but they are not much worse than the
+ * unavoidable ones associated with this technique, so it's not clear
+ * how much effort should be expended trying to resolve them - the
+ * ultimate result will still not be correct in most cases.
+ *
+ * Shader below was generated with:
+ *   SVGA_DEBUG=tgsi ./mesa/progs/fp/fp-tri white.txt
+ */
+static int emit_white_fs( struct svga_context *svga )
+{
+   int ret;
+
+   /* ps_3_0
+    * def c0, 1.000000, 0.000000, 0.000000, 1.000000
+    * mov oC0, c0.x
+    * end
+    */
+   static const unsigned white_tokens[] = {
+      0xffff0300,
+      0x05000051,
+      0xa00f0000,
+      0x3f800000,
+      0x00000000,
+      0x00000000,
+      0x3f800000,
+      0x02000001,
+      0x800f0800,
+      0xa0000000,
+      0x0000ffff,
+   };
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_fs_id,
+                             SVGA3D_SHADERTYPE_PS,
+                             white_tokens, 
+                             sizeof(white_tokens));
+   if (ret)
+      return ret;
+
+   svga->state.white_fs_id = svga->state.next_fs_id++;
+   return 0;
+}
+
+
+/* SVGA_NEW_TEXTURE_BINDING
+ * SVGA_NEW_RAST
+ * SVGA_NEW_NEED_SWTNL
+ * SVGA_NEW_SAMPLER
+ */
+static int make_fs_key( const struct svga_context *svga,
+                        struct svga_fs_compile_key *key )
+{
+   int i;
+   int idx = 0;
+
+   memset(key, 0, sizeof *key);
+
+   /* Only need fragment shader fixup for twoside lighting if doing
+    * hwtnl.  Otherwise the draw module does the whole job for us.
+    *
+    * SVGA_NEW_SWTNL
+    */
+   if (!svga->state.sw.need_swtnl) {
+      /* SVGA_NEW_RAST
+       */
+      key->light_twoside = svga->curr.rast->templ.light_twoside;
+      key->front_cw = (svga->curr.rast->templ.front_winding == 
+                       PIPE_WINDING_CW);
+   }
+
+   
+   /* XXX: want to limit this to the textures that the shader actually
+    * refers to.
+    *
+    * SVGA_NEW_TEXTURE_BINDING | SVGA_NEW_SAMPLER
+    */
+   for (i = 0; i < svga->curr.num_textures; i++) {
+      if (svga->curr.texture[i]) {
+         assert(svga->curr.sampler[i]);
+         key->tex[i].texture_target = svga->curr.texture[i]->target;
+         if (!svga->curr.sampler[i]->normalized_coords) {
+            key->tex[i].width_height_idx = idx++;
+            key->tex[i].unnormalized = TRUE;
+            ++key->num_unnormalized_coords;
+         }
+      }
+   }
+   key->num_textures = svga->curr.num_textures;
+
+   idx = 0;
+   for (i = 0; i < svga->curr.num_samplers; ++i) {
+      if (svga->curr.sampler[i]) {
+         key->tex[i].compare_mode = svga->curr.sampler[i]->compare_mode;
+         key->tex[i].compare_func = svga->curr.sampler[i]->compare_func;
+      }
+   }
+
+   return 0;
+}
+
+
+
+static int emit_hw_fs( struct svga_context *svga,
+                       unsigned dirty )
+{
+   struct svga_shader_result *result = NULL;
+   unsigned id = SVGA3D_INVALID_ID;
+   int ret = 0;
+
+   /* SVGA_NEW_BLEND
+    */
+   if (svga->curr.blend->need_white_fragments) {
+      if (svga->state.white_fs_id == SVGA3D_INVALID_ID) {
+         ret = emit_white_fs( svga );
+         if (ret)
+            return ret;
+      }
+      id = svga->state.white_fs_id;
+   }
+   else {
+      struct svga_fragment_shader *fs = svga->curr.fs;
+      struct svga_fs_compile_key key;
+
+      /* SVGA_NEW_TEXTURE_BINDING
+       * SVGA_NEW_RAST
+       * SVGA_NEW_NEED_SWTNL
+       * SVGA_NEW_SAMPLER
+       */
+      ret = make_fs_key( svga, &key );
+      if (ret)
+         return ret;
+
+      result = search_fs_key( fs, &key );
+      if (!result) {
+         ret = compile_fs( svga, fs, &key, &result );
+         if (ret)
+            return ret;
+      }
+
+      assert (result);
+      id = result->id;
+   }
+
+   assert(id != SVGA3D_INVALID_ID);
+
+   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT]) {
+      ret = SVGA3D_SetShader(svga->swc, 
+                             SVGA3D_SHADERTYPE_PS, 
+                             id );
+      if (ret)
+         return ret;
+
+      svga->dirty |= SVGA_NEW_FS_RESULT;
+      svga->state.hw_draw.shader_id[PIPE_SHADER_FRAGMENT] = id;
+      svga->state.hw_draw.fs = result;      
+   }
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_fs = 
+{
+   "fragment shader (hwtnl)",
+   (SVGA_NEW_FS |
+    SVGA_NEW_TEXTURE_BINDING |
+    SVGA_NEW_NEED_SWTNL |
+    SVGA_NEW_RAST |
+    SVGA_NEW_SAMPLER |
+    SVGA_NEW_BLEND),
+   emit_hw_fs
+};
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_need_swtnl.c b/src/gallium/drivers/svga/svga_state_need_swtnl.c
new file mode 100644
index 00000000000..00201b8091d
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_need_swtnl.c
@@ -0,0 +1,200 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_debug.h"
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+static INLINE SVGA3dDeclType 
+svga_translate_vertex_format(enum pipe_format format)
+{
+   switch (format) {
+   case PIPE_FORMAT_R32_FLOAT:            return SVGA3D_DECLTYPE_FLOAT1;
+   case PIPE_FORMAT_R32G32_FLOAT:         return SVGA3D_DECLTYPE_FLOAT2;
+   case PIPE_FORMAT_R32G32B32_FLOAT:      return SVGA3D_DECLTYPE_FLOAT3;
+   case PIPE_FORMAT_R32G32B32A32_FLOAT:   return SVGA3D_DECLTYPE_FLOAT4;
+   case PIPE_FORMAT_B8G8R8A8_UNORM:       return SVGA3D_DECLTYPE_D3DCOLOR;
+   case PIPE_FORMAT_R8G8B8A8_USCALED:     return SVGA3D_DECLTYPE_UBYTE4;
+   case PIPE_FORMAT_R16G16_SSCALED:       return SVGA3D_DECLTYPE_SHORT2;
+   case PIPE_FORMAT_R16G16B16A16_SSCALED: return SVGA3D_DECLTYPE_SHORT4;
+   case PIPE_FORMAT_R8G8B8A8_UNORM:       return SVGA3D_DECLTYPE_UBYTE4N;
+   case PIPE_FORMAT_R16G16_SNORM:         return SVGA3D_DECLTYPE_SHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_SNORM:   return SVGA3D_DECLTYPE_SHORT4N;
+   case PIPE_FORMAT_R16G16_UNORM:         return SVGA3D_DECLTYPE_USHORT2N;
+   case PIPE_FORMAT_R16G16B16A16_UNORM:   return SVGA3D_DECLTYPE_USHORT4N;
+
+   /* These formats don't exist yet:
+    * 
+   case PIPE_FORMAT_R10G10B10_USCALED:    return SVGA3D_DECLTYPE_UDEC3;
+   case PIPE_FORMAT_R10G10B10_SNORM:      return SVGA3D_DECLTYPE_DEC3N;
+   case PIPE_FORMAT_R16G16_FLOAT:         return SVGA3D_DECLTYPE_FLOAT16_2;
+   case PIPE_FORMAT_R16G16B16A16_FLOAT:   return SVGA3D_DECLTYPE_FLOAT16_4;
+   */
+
+   default:
+      /* There are many formats without hardware support.  This case
+       * will be hit regularly, meaning we'll need swvfetch.
+       */
+      return SVGA3D_DECLTYPE_MAX;
+   }
+}
+
+
+static int update_need_swvfetch( struct svga_context *svga,
+                                 unsigned dirty )
+{
+   unsigned i;
+   boolean need_swvfetch = FALSE;
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      svga->state.sw.ve_format[i] = svga_translate_vertex_format(svga->curr.ve[i].src_format);
+      if (svga->state.sw.ve_format[i] == SVGA3D_DECLTYPE_MAX) {
+         need_swvfetch = TRUE;
+         break;
+      }
+   }
+
+   if (need_swvfetch != svga->state.sw.need_swvfetch) {
+      svga->state.sw.need_swvfetch = need_swvfetch;
+      svga->dirty |= SVGA_NEW_NEED_SWVFETCH;
+   }
+   
+   return 0;
+}
+
+struct svga_tracked_state svga_update_need_swvfetch = 
+{
+   "update need_swvfetch",
+   ( SVGA_NEW_VELEMENT ),
+   update_need_swvfetch
+};
+
+
+/*********************************************************************** 
+ */
+
+static int update_need_pipeline( struct svga_context *svga,
+                                 unsigned dirty )
+{
+   
+   boolean need_pipeline = FALSE;
+
+   /* SVGA_NEW_RAST, SVGA_NEW_REDUCED_PRIMITIVE
+    */
+   if (svga->curr.rast->need_pipeline & (1 << svga->curr.reduced_prim)) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: rast need_pipeline (%d) & prim (%x)\n", 
+                 __FUNCTION__,
+                 svga->curr.rast->need_pipeline,
+                 (1 << svga->curr.reduced_prim) );
+      need_pipeline = TRUE;
+   }
+
+   /* SVGA_NEW_EDGEFLAGS
+    */
+   if (svga->curr.rast->hw_unfilled != PIPE_POLYGON_MODE_FILL &&
+       svga->curr.reduced_prim == PIPE_PRIM_TRIANGLES && 
+       svga->curr.edgeflags != NULL) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: edgeflags\n", __FUNCTION__);
+      need_pipeline = TRUE;
+   }
+
+   /* SVGA_NEW_CLIP 
+    */
+   if (!svga->curr.rast->templ.bypass_vs_clip_and_viewport &&
+       svga->curr.clip.nr) {
+      SVGA_DBG(DEBUG_SWTNL, "%s: userclip\n", __FUNCTION__);
+      need_pipeline = TRUE;
+   }
+
+   if (need_pipeline != svga->state.sw.need_pipeline) {
+      svga->state.sw.need_pipeline = need_pipeline;
+      svga->dirty |= SVGA_NEW_NEED_PIPELINE;
+   }
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_pipeline = 
+{
+   "need pipeline",
+   (SVGA_NEW_RAST |
+    SVGA_NEW_CLIP |
+    SVGA_NEW_REDUCED_PRIMITIVE),
+   update_need_pipeline
+};
+
+
+/*********************************************************************** 
+ */
+
+static int update_need_swtnl( struct svga_context *svga,
+                              unsigned dirty )
+{
+   boolean need_swtnl;
+
+   if (svga->debug.no_swtnl) {
+      svga->state.sw.need_swvfetch = 0;
+      svga->state.sw.need_pipeline = 0;
+   }
+
+   need_swtnl = (svga->state.sw.need_swvfetch ||
+                 svga->state.sw.need_pipeline);
+
+   if (svga->debug.force_swtnl) {
+      need_swtnl = 1;
+   }
+
+   if (need_swtnl != svga->state.sw.need_swtnl) {
+      SVGA_DBG(DEBUG_SWTNL|DEBUG_PERF,
+               "%s need_swvfetch: %s, need_pipeline %s\n",
+               __FUNCTION__,
+               svga->state.sw.need_swvfetch ? "true" : "false",
+               svga->state.sw.need_pipeline ? "true" : "false");
+
+      svga->state.sw.need_swtnl = need_swtnl;
+      svga->dirty |= SVGA_NEW_NEED_SWTNL;
+      svga->swtnl.new_vdecl = TRUE;
+   }
+  
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_need_swtnl =
+{
+   "need swtnl",
+   (SVGA_NEW_NEED_PIPELINE |
+    SVGA_NEW_NEED_SWVFETCH),
+   update_need_swtnl
+};
diff --git a/src/gallium/drivers/svga/svga_state_rss.c b/src/gallium/drivers/svga/svga_state_rss.c
new file mode 100644
index 00000000000..8b6803a285a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_rss.c
@@ -0,0 +1,268 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+
+struct rs_queue {
+   unsigned rs_count;
+   SVGA3dRenderState rs[SVGA3D_RS_MAX];
+};
+
+
+#define EMIT_RS(svga, value, token, fail)                       \
+do {                                                            \
+   if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) {    \
+      svga_queue_rs( &queue, SVGA3D_RS_##token, value );        \
+      svga->state.hw_draw.rs[SVGA3D_RS_##token] = value;        \
+   }                                                            \
+} while (0)
+
+#define EMIT_RS_FLOAT(svga, fvalue, token, fail)                \
+do {                                                            \
+   unsigned value = fui(fvalue);                                \
+   if (svga->state.hw_draw.rs[SVGA3D_RS_##token] != value) {    \
+      svga_queue_rs( &queue, SVGA3D_RS_##token, value );        \
+      svga->state.hw_draw.rs[SVGA3D_RS_##token] = value;        \
+   }                                                            \
+} while (0)
+
+
+static INLINE void
+svga_queue_rs( struct rs_queue *q,
+               unsigned rss,
+               unsigned value )
+{
+   q->rs[q->rs_count].state = rss;
+   q->rs[q->rs_count].uintValue = value;
+   q->rs_count++;
+}
+
+
+/* Compare old and new render states and emit differences between them
+ * to hardware.  Simplest implementation would be to emit the whole of
+ * the "to" state.
+ */
+static int emit_rss( struct svga_context *svga,
+                     unsigned dirty )
+{
+   struct rs_queue queue;
+
+   queue.rs_count = 0;
+
+   if (dirty & SVGA_NEW_BLEND) {
+      const struct svga_blend_state *curr = svga->curr.blend;
+
+      EMIT_RS( svga, curr->rt[0].writemask, COLORWRITEENABLE, fail );
+      EMIT_RS( svga, curr->rt[0].blend_enable, BLENDENABLE, fail );
+
+      if (curr->rt[0].blend_enable) {
+         EMIT_RS( svga, curr->rt[0].srcblend, SRCBLEND, fail );
+         EMIT_RS( svga, curr->rt[0].dstblend, DSTBLEND, fail );
+         EMIT_RS( svga, curr->rt[0].blendeq, BLENDEQUATION, fail );
+
+         EMIT_RS( svga, curr->rt[0].separate_alpha_blend_enable, 
+                  SEPARATEALPHABLENDENABLE, fail );
+
+         if (curr->rt[0].separate_alpha_blend_enable) {
+            EMIT_RS( svga, curr->rt[0].srcblend_alpha, SRCBLENDALPHA, fail );
+            EMIT_RS( svga, curr->rt[0].dstblend_alpha, DSTBLENDALPHA, fail );
+            EMIT_RS( svga, curr->rt[0].blendeq_alpha, BLENDEQUATIONALPHA, fail );
+         }
+      }
+   }
+
+
+   if (dirty & (SVGA_NEW_DEPTH_STENCIL | SVGA_NEW_RAST)) {
+      const struct svga_depth_stencil_state *curr = svga->curr.depth; 
+      const struct svga_rasterizer_state *rast = svga->curr.rast; 
+
+      if (!curr->stencil[0].enabled) 
+      {
+         /* Stencil disabled
+          */
+         EMIT_RS( svga, FALSE, STENCILENABLE, fail );
+         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+      }
+      else if (curr->stencil[0].enabled && !curr->stencil[1].enabled)
+      {
+         /* Regular stencil
+          */
+         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+         EMIT_RS( svga, FALSE, STENCILENABLE2SIDED, fail );
+
+         EMIT_RS( svga, curr->stencil[0].func,  STENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[0].fail,  STENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[0].zfail, STENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[0].pass,  STENCILPASS, fail );
+         
+         EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+      }
+      else 
+      {
+         int cw, ccw;
+
+         /* Hardware frontwinding is always CW, so if ours is also CW,
+          * then our definition of front face agrees with hardware.
+          * Otherwise need to flip.
+          */
+         if (rast->templ.front_winding == PIPE_WINDING_CW) {
+            cw = 0;
+            ccw = 1;
+         }
+         else {
+            cw = 1;
+            ccw = 0;
+         }
+
+         /* Twoside stencil
+          */
+         EMIT_RS( svga, TRUE, STENCILENABLE, fail );
+         EMIT_RS( svga, TRUE, STENCILENABLE2SIDED, fail );
+
+         EMIT_RS( svga, curr->stencil[cw].func,  STENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[cw].fail,  STENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[cw].zfail, STENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[cw].pass,  STENCILPASS, fail );
+
+         EMIT_RS( svga, curr->stencil[ccw].func,  CCWSTENCILFUNC, fail );
+         EMIT_RS( svga, curr->stencil[ccw].fail,  CCWSTENCILFAIL, fail );
+         EMIT_RS( svga, curr->stencil[ccw].zfail, CCWSTENCILZFAIL, fail );
+         EMIT_RS( svga, curr->stencil[ccw].pass,  CCWSTENCILPASS, fail );
+
+         EMIT_RS( svga, curr->stencil_ref, STENCILREF, fail );
+         EMIT_RS( svga, curr->stencil_mask, STENCILMASK, fail );
+         EMIT_RS( svga, curr->stencil_writemask, STENCILWRITEMASK, fail );
+      }
+
+      EMIT_RS( svga, curr->zenable, ZENABLE, fail );
+      if (curr->zenable) {
+         EMIT_RS( svga, curr->zfunc, ZFUNC, fail );
+         EMIT_RS( svga, curr->zwriteenable, ZWRITEENABLE, fail );
+      }
+
+      EMIT_RS( svga, curr->alphatestenable, ALPHATESTENABLE, fail );
+      if (curr->alphatestenable) {
+         EMIT_RS( svga, curr->alphafunc, ALPHAFUNC, fail );
+         EMIT_RS_FLOAT( svga, curr->alpharef, ALPHAREF, fail );
+      }
+   }
+
+
+   if (dirty & SVGA_NEW_RAST)
+   {
+      const struct svga_rasterizer_state *curr = svga->curr.rast; 
+
+      /* Shademode: still need to rearrange index list to move
+       * flat-shading PV first vertex.
+       */
+      EMIT_RS( svga, curr->shademode, SHADEMODE, fail );
+      EMIT_RS( svga, curr->cullmode, CULLMODE, fail );
+      EMIT_RS( svga, curr->scissortestenable, SCISSORTESTENABLE, fail );
+      EMIT_RS( svga, curr->multisampleantialias, MULTISAMPLEANTIALIAS, fail );
+      EMIT_RS( svga, curr->lastpixel, LASTPIXEL, fail );
+      EMIT_RS( svga, curr->linepattern, LINEPATTERN, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize, POINTSIZE, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize_min, POINTSIZEMIN, fail );
+      EMIT_RS_FLOAT( svga, curr->pointsize_max, POINTSIZEMAX, fail );
+   }
+
+   if (dirty & (SVGA_NEW_RAST | SVGA_NEW_FRAME_BUFFER | SVGA_NEW_NEED_PIPELINE))
+   {
+      const struct svga_rasterizer_state *curr = svga->curr.rast; 
+      float slope = 0.0;
+      float bias  = 0.0;
+
+      /* Need to modify depth bias according to bound depthbuffer
+       * format.  Don't do hardware depthbias while the software
+       * pipeline is active.
+       */
+      if (!svga->state.sw.need_pipeline &&
+          svga->curr.framebuffer.zsbuf)
+      {
+         slope = curr->slopescaledepthbias;
+         bias  = svga->curr.depthscale * curr->depthbias;
+      }
+
+      EMIT_RS_FLOAT( svga, slope, SLOPESCALEDEPTHBIAS, fail );
+      EMIT_RS_FLOAT( svga, bias, DEPTHBIAS, fail );
+   }
+
+
+   if (queue.rs_count) {
+      SVGA3dRenderState *rs;
+
+      if (SVGA3D_BeginSetRenderState( svga->swc,
+                                      &rs,
+                                      queue.rs_count ) != PIPE_OK)
+         goto fail;
+
+      memcpy( rs,
+              queue.rs,
+              queue.rs_count * sizeof queue.rs[0]);
+      
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   /* Also blend color:
+    */
+
+   return 0;
+
+fail:
+   /* XXX: need to poison cached hardware state on failure to ensure
+    * dirty state gets re-emitted.  Fix this by re-instating partial
+    * FIFOCommit command and only updating cached hw state once the
+    * initial allocation has succeeded.
+    */
+   memset(svga->state.hw_draw.rs, 0xcd, sizeof(svga->state.hw_draw.rs));
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_rss = 
+{
+   "hw rss state",
+
+   (SVGA_NEW_BLEND |
+    SVGA_NEW_DEPTH_STENCIL |
+    SVGA_NEW_RAST |
+    SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_NEED_PIPELINE),
+
+   emit_rss
+};
diff --git a/src/gallium/drivers/svga/svga_state_tss.c b/src/gallium/drivers/svga/svga_state_tss.c
new file mode 100644
index 00000000000..b3137945202
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_tss.c
@@ -0,0 +1,279 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+
+#include "svga_screen_texture.h"
+#include "svga_winsys.h"
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+
+#include "svga_hw_reg.h"
+
+
+void svga_cleanup_tss_binding(struct svga_context *svga)
+{
+   int i;
+   unsigned count = MAX2( svga->curr.num_textures,
+                          svga->state.hw_draw.num_views );
+
+   for (i = 0; i < count; i++) {
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+      svga_sampler_view_reference(&view->v, NULL);
+      pipe_texture_reference( &svga->curr.texture[i], NULL );
+      pipe_texture_reference( &view->texture, NULL );
+
+      view->dirty = 1;
+   }
+}
+
+
+static int
+update_tss_binding(struct svga_context *svga, 
+                   unsigned dirty )
+{
+   unsigned i;
+   unsigned count = MAX2( svga->curr.num_textures,
+                          svga->state.hw_draw.num_views );
+   unsigned min_lod;
+   unsigned max_lod;
+
+
+   struct {
+      struct {
+         unsigned unit;
+         struct svga_hw_view_state *view;
+      } bind[PIPE_MAX_SAMPLERS];
+
+      unsigned bind_count;
+   } queue;
+
+   queue.bind_count = 0;
+   
+   for (i = 0; i < count; i++) {
+      const struct svga_sampler_state *s = svga->curr.sampler[i];
+      struct svga_hw_view_state *view = &svga->state.hw_draw.views[i];
+
+      /* get min max lod */
+      if (svga->curr.texture[i]) {
+         min_lod = MAX2(s->view_min_lod, 0);
+         max_lod = MIN2(s->view_max_lod, svga->curr.texture[i]->last_level);
+      } else {
+         min_lod = 0;
+         max_lod = 0;
+      }
+
+      if (view->texture != svga->curr.texture[i] ||
+          view->min_lod != min_lod ||
+          view->max_lod != max_lod) {
+
+         svga_sampler_view_reference(&view->v, NULL);
+         pipe_texture_reference( &view->texture, svga->curr.texture[i] );
+
+         view->dirty = TRUE;
+         view->min_lod = min_lod;
+         view->max_lod = max_lod;
+
+         if (svga->curr.texture[i])
+            view->v = svga_get_tex_sampler_view(&svga->pipe, 
+                                                svga->curr.texture[i], 
+                                                min_lod,
+                                                max_lod);
+      }
+
+      if (view->dirty) {
+         queue.bind[queue.bind_count].unit = i;
+         queue.bind[queue.bind_count].view = view;
+         queue.bind_count++;
+      } 
+      else if (view->v) {
+         svga_validate_sampler_view(svga, view->v);
+      }
+   }
+
+   svga->state.hw_draw.num_views = svga->curr.num_textures;
+
+   if (queue.bind_count) {
+      SVGA3dTextureState *ts;
+
+      if (SVGA3D_BeginSetTextureState( svga->swc,
+                                       &ts,
+                                       queue.bind_count ) != PIPE_OK)
+         goto fail;
+
+      for (i = 0; i < queue.bind_count; i++) {
+         ts[i].stage = queue.bind[i].unit;
+         ts[i].name = SVGA3D_TS_BIND_TEXTURE;
+
+         if (queue.bind[i].view->v) {
+            svga->swc->surface_relocation(svga->swc,
+                                          &ts[i].value,
+                                          queue.bind[i].view->v->handle,
+                                          PIPE_BUFFER_USAGE_GPU_READ);
+         }
+         else {
+            ts[i].value = SVGA3D_INVALID_ID;
+         }
+         
+         queue.bind[i].view->dirty = FALSE;
+      }
+
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   return 0;
+
+fail:
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss_binding = {
+   "texture binding emit",
+   SVGA_NEW_TEXTURE_BINDING |
+   SVGA_NEW_SAMPLER,
+   update_tss_binding
+};
+
+
+/***********************************************************************
+ */
+
+struct ts_queue {
+   unsigned ts_count;
+   SVGA3dTextureState ts[PIPE_MAX_SAMPLERS*SVGA3D_TS_MAX];
+};
+
+
+#define EMIT_TS(svga, unit, val, token, fail)                           \
+do {                                                                    \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+#define EMIT_TS_FLOAT(svga, unit, fvalue, token, fail)                  \
+do {                                                                    \
+   unsigned val = fui(fvalue);                                          \
+   if (svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] != val) {        \
+      svga_queue_tss( &queue, unit, SVGA3D_TS_##token, val );           \
+      svga->state.hw_draw.ts[unit][SVGA3D_TS_##token] = val;            \
+   }                                                                    \
+} while (0)
+
+
+static INLINE void 
+svga_queue_tss( struct ts_queue *q,
+                unsigned unit,
+                unsigned tss,
+                unsigned value )
+{
+   assert(q->ts_count < sizeof(q->ts)/sizeof(q->ts[0]));
+   q->ts[q->ts_count].stage = unit;
+   q->ts[q->ts_count].name = tss;
+   q->ts[q->ts_count].value = value;
+   q->ts_count++;
+}
+
+
+static int
+update_tss(struct svga_context *svga, 
+           unsigned dirty )
+{
+   unsigned i;
+   struct ts_queue queue;
+
+   queue.ts_count = 0;
+   for (i = 0; i < svga->curr.num_samplers; i++) {
+      if (svga->curr.sampler[i]) {
+         const struct svga_sampler_state *curr = svga->curr.sampler[i];
+
+         EMIT_TS(svga, i, curr->mipfilter, MIPFILTER, fail);
+         EMIT_TS(svga, i, curr->min_lod, TEXTURE_MIPMAP_LEVEL, fail);
+         EMIT_TS(svga, i, curr->magfilter, MAGFILTER, fail);
+         EMIT_TS(svga, i, curr->minfilter, MINFILTER, fail);
+         EMIT_TS(svga, i, curr->aniso_level, TEXTURE_ANISOTROPIC_LEVEL, fail);
+         EMIT_TS_FLOAT(svga, i, curr->lod_bias, TEXTURE_LOD_BIAS, fail);
+         EMIT_TS(svga, i, curr->addressu, ADDRESSU, fail);
+         EMIT_TS(svga, i, curr->addressw, ADDRESSW, fail);
+         EMIT_TS(svga, i, curr->bordercolor, BORDERCOLOR, fail);
+         // TEXCOORDINDEX -- hopefully not needed
+
+         if (svga->curr.tex_flags.flag_1d & (1 << i)) {
+            debug_printf("wrap 1d tex %d\n", i);
+            EMIT_TS(svga, i, SVGA3D_TEX_ADDRESS_WRAP, ADDRESSV, fail);
+         }
+         else
+            EMIT_TS(svga, i, curr->addressv, ADDRESSV, fail);
+
+         if (svga->curr.tex_flags.flag_srgb & (1 << i))
+            EMIT_TS_FLOAT(svga, i, 2.2f, GAMMA, fail);
+         else
+            EMIT_TS_FLOAT(svga, i, 1.0f, GAMMA, fail);
+
+      }
+   }
+ 
+   if (queue.ts_count) {
+      SVGA3dTextureState *ts;
+
+      if (SVGA3D_BeginSetTextureState( svga->swc,
+                                       &ts,
+                                       queue.ts_count ) != PIPE_OK)
+         goto fail;
+
+      memcpy( ts,
+              queue.ts,
+              queue.ts_count * sizeof queue.ts[0]);
+      
+      SVGA_FIFOCommitAll( svga->swc );
+   }
+
+   return 0;
+
+fail:
+   /* XXX: need to poison cached hardware state on failure to ensure
+    * dirty state gets re-emitted.  Fix this by re-instating partial
+    * FIFOCommit command and only updating cached hw state once the
+    * initial allocation has succeeded.
+    */
+   memset(svga->state.hw_draw.ts, 0xcd, sizeof(svga->state.hw_draw.ts));
+
+   return PIPE_ERROR_OUT_OF_MEMORY;
+}
+
+
+struct svga_tracked_state svga_hw_tss = {
+   "texture state emit",
+   (SVGA_NEW_SAMPLER |
+    SVGA_NEW_TEXTURE_FLAGS),
+   update_tss
+};
+
diff --git a/src/gallium/drivers/svga/svga_state_vdecl.c b/src/gallium/drivers/svga/svga_state_vdecl.c
new file mode 100644
index 00000000000..c534308f503
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vdecl.c
@@ -0,0 +1,182 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "util/u_upload_mgr.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_draw.h"
+#include "svga_tgsi.h"
+#include "svga_screen.h"
+#include "svga_screen_buffer.h"
+
+#include "svga_hw_reg.h"
+
+
+static int
+upload_user_buffers( struct svga_context *svga )
+{
+   enum pipe_error ret = PIPE_OK;
+   int i;
+   int nr;
+
+   if (0) 
+      debug_printf("%s: %d\n", __FUNCTION__, svga->curr.num_vertex_buffers);
+
+   nr = svga->curr.num_vertex_buffers;
+
+   for (i = 0; i < nr; i++) 
+   {
+      if (svga_buffer_is_user_buffer(svga->curr.vb[i].buffer))
+      {
+         struct pipe_buffer *upload_buffer = NULL;
+         unsigned offset = /*svga->curr.vb[i].buffer_offset*/ 0;
+         unsigned size = svga->curr.vb[i].buffer->size /*- offset*/;
+         unsigned upload_offset;
+
+         ret = u_upload_buffer( svga->upload_vb,
+                                offset,
+                                size,
+                                svga->curr.vb[i].buffer,
+                                &upload_offset,
+                                &upload_buffer );
+         if (ret)
+            return ret;
+
+         if (0)
+            debug_printf("%s: %d: orig buf %p upl buf %p ofs %d sz %d\n", 
+                         __FUNCTION__, 
+                         i,
+                         svga->curr.vb[i].buffer,
+                         upload_buffer, upload_offset, size);
+
+         /* Make sure we release the old buffer and end up with the
+          * correct refcount on the uploaded buffer.
+          */
+         pipe_buffer_reference( &svga->curr.vb[i].buffer, NULL );
+         svga->curr.vb[i].buffer = upload_buffer;
+         svga->curr.vb[i].buffer_offset = upload_offset;
+      }
+   }
+
+   if (0)
+      debug_printf("%s: DONE\n", __FUNCTION__);
+
+   return ret;
+}
+
+
+/***********************************************************************
+ */
+
+
+static int emit_hw_vs_vdecl( struct svga_context *svga,
+                             unsigned dirty )
+{
+   const struct pipe_vertex_element *ve = svga->curr.ve;
+   SVGA3dVertexDecl decl;
+   unsigned i;
+
+   assert(svga->curr.num_vertex_elements >=
+          svga->curr.vs->base.info.file_count[TGSI_FILE_INPUT]);
+
+   svga_hwtnl_reset_vdecl( svga->hwtnl, 
+                           svga->curr.num_vertex_elements );
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      const struct pipe_vertex_buffer *vb = &svga->curr.vb[ve[i].vertex_buffer_index];
+      unsigned usage, index;
+
+
+      svga_generate_vdecl_semantics( i, &usage, &index );
+
+      /* SVGA_NEW_VELEMENT
+       */
+      decl.identity.type = svga->state.sw.ve_format[i];
+      decl.identity.method = SVGA3D_DECLMETHOD_DEFAULT;
+      decl.identity.usage = usage;
+      decl.identity.usageIndex = index;
+      decl.array.stride = vb->stride;
+      decl.array.offset = (vb->buffer_offset +
+                           ve[i].src_offset);
+
+      svga_hwtnl_vdecl( svga->hwtnl,
+                        i,
+                        &decl,
+                        vb->buffer );
+   }
+
+   return 0;
+}
+
+
+static int emit_hw_vdecl( struct svga_context *svga,
+                          unsigned dirty )
+{
+   int ret = 0;
+
+   /* SVGA_NEW_NEED_SWTNL
+    */
+   if (svga->state.sw.need_swtnl)
+      return 0; /* Do not emit during swtnl */
+
+   /* If we get to here, we know that we're going to draw.  Upload
+    * userbuffers now and try to combine multiple userbuffers from
+    * multiple draw calls into a single host buffer for performance.
+    */
+   if (svga->curr.any_user_vertex_buffers &&
+       SVGA_COMBINE_USERBUFFERS)
+   {
+      ret = upload_user_buffers( svga );
+      if (ret)
+         return ret;
+
+      svga->curr.any_user_vertex_buffers = FALSE;
+   }
+
+   return emit_hw_vs_vdecl( svga, dirty );
+}
+
+
+struct svga_tracked_state svga_hw_vdecl = 
+{
+   "hw vertex decl state (hwtnl version)",
+   ( SVGA_NEW_NEED_SWTNL |
+     SVGA_NEW_VELEMENT |
+     SVGA_NEW_VBUFFER |
+     SVGA_NEW_RAST |
+     SVGA_NEW_FS |
+     SVGA_NEW_VS ),
+   emit_hw_vdecl
+};
+
+
+
+
+
+
diff --git a/src/gallium/drivers/svga/svga_state_vs.c b/src/gallium/drivers/svga/svga_state_vs.c
new file mode 100644
index 00000000000..a947745732c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_state_vs.c
@@ -0,0 +1,239 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "pipe/p_inlines.h"
+#include "pipe/p_defines.h"
+#include "util/u_math.h"
+#include "translate/translate.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_cmd.h"
+#include "svga_tgsi.h"
+
+#include "svga_hw_reg.h"
+
+/***********************************************************************
+ */
+
+
+static INLINE int compare_vs_keys( const struct svga_vs_compile_key *a,
+                                   const struct svga_vs_compile_key *b )
+{
+   unsigned keysize = svga_vs_key_size( a );
+   return memcmp( a, b, keysize );
+}
+
+
+static struct svga_shader_result *search_vs_key( struct svga_vertex_shader *vs,
+                                                 const struct svga_vs_compile_key *key )
+{
+   struct svga_shader_result *result = vs->base.results;
+
+   assert(key);
+
+   for ( ; result; result = result->next) {
+      if (compare_vs_keys( key, &result->key.vkey ) == 0)
+         return result;
+   }
+   
+   return NULL;
+}
+
+
+static enum pipe_error compile_vs( struct svga_context *svga,
+                                   struct svga_vertex_shader *vs,
+                                   const struct svga_vs_compile_key *key,
+                                   struct svga_shader_result **out_result )
+{
+   struct svga_shader_result *result;
+   enum pipe_error ret = PIPE_OK;
+
+   result = svga_translate_vertex_program( vs, key );
+   if (result == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+   ret = SVGA3D_DefineShader(svga->swc, 
+                             svga->state.next_vs_id,
+                             SVGA3D_SHADERTYPE_VS,
+                             result->tokens, 
+                             result->nr_tokens * sizeof result->tokens[0]);
+   if (ret)
+      goto fail;
+
+   *out_result = result;
+   result->id = svga->state.next_vs_id++;
+   result->next = vs->base.results;
+   vs->base.results = result;
+   return PIPE_OK;
+
+fail:
+   if (result)
+      svga_destroy_shader_result( result );
+   return ret;
+}
+
+/* SVGA_NEW_PRESCALE, SVGA_NEW_RAST, SVGA_NEW_ZERO_STRIDE
+ */
+static int make_vs_key( struct svga_context *svga,
+                        struct svga_vs_compile_key *key )
+{
+   memset(key, 0, sizeof *key);
+   key->need_prescale = svga->state.hw_clear.prescale.enabled;
+   key->allow_psiz = svga->curr.rast->templ.point_size_per_vertex;
+   key->zero_stride_vertex_elements =
+      svga->curr.zero_stride_vertex_elements;
+   key->num_zero_stride_vertex_elements =
+      svga->curr.num_zero_stride_vertex_elements;
+   return 0;
+}
+
+
+
+static int emit_hw_vs( struct svga_context *svga,
+                       unsigned dirty )
+{
+   struct svga_shader_result *result = NULL;
+   unsigned id = SVGA3D_INVALID_ID;
+   int ret = 0;
+
+   /* SVGA_NEW_NEED_SWTNL */
+   if (!svga->state.sw.need_swtnl) {
+      struct svga_vertex_shader *vs = svga->curr.vs;
+      struct svga_vs_compile_key key;
+
+      ret = make_vs_key( svga, &key );
+      if (ret)
+         return ret;
+
+      result = search_vs_key( vs, &key );
+      if (!result) {
+         ret = compile_vs( svga, vs, &key, &result );
+         if (ret)
+            return ret;
+      }
+
+      assert (result);
+      id = result->id;
+   }
+
+   if (id != svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX]) {
+      ret = SVGA3D_SetShader(svga->swc, 
+                             SVGA3D_SHADERTYPE_VS, 
+                             id );
+      if (ret)
+         return ret;
+
+      svga->dirty |= SVGA_NEW_VS_RESULT;
+      svga->state.hw_draw.shader_id[PIPE_SHADER_VERTEX] = id;
+      svga->state.hw_draw.vs = result;      
+   }
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_vs = 
+{
+   "vertex shader (hwtnl)",
+   (SVGA_NEW_VS |
+    SVGA_NEW_PRESCALE |
+    SVGA_NEW_NEED_SWTNL |
+    SVGA_NEW_ZERO_STRIDE),
+   emit_hw_vs
+};
+
+
+/***********************************************************************
+ */
+static int update_zero_stride( struct svga_context *svga,
+                               unsigned dirty )
+{
+   unsigned i;
+
+   svga->curr.zero_stride_vertex_elements = 0;
+   svga->curr.num_zero_stride_vertex_elements = 0;
+
+   for (i = 0; i < svga->curr.num_vertex_elements; i++) {
+      const struct pipe_vertex_element *vel = &svga->curr.ve[i];
+      const struct pipe_vertex_buffer *vbuffer = &svga->curr.vb[
+         vel->vertex_buffer_index];
+      if (vbuffer->stride == 0) {
+         unsigned const_idx =
+            svga->curr.num_zero_stride_vertex_elements;
+         struct translate *translate;
+         struct translate_key key;
+         void *mapped_buffer;
+
+         svga->curr.zero_stride_vertex_elements |= (1 << i);
+         ++svga->curr.num_zero_stride_vertex_elements;
+
+         key.output_stride = 4 * sizeof(float);
+         key.nr_elements = 1;
+         key.element[0].input_format = vel->src_format;
+         key.element[0].output_format = PIPE_FORMAT_R32G32B32A32_FLOAT;
+         key.element[0].input_buffer = vel->vertex_buffer_index;
+         key.element[0].input_offset = vel->src_offset;
+         key.element[0].output_offset = const_idx * 4 * sizeof(float);
+
+         translate_key_sanitize(&key);
+         /* translate_generic_create is technically private but
+          * we don't want to code-generate, just want generic
+          * translation */
+         translate = translate_generic_create(&key);
+
+         assert(vel->src_offset == 0);
+         
+         mapped_buffer = pipe_buffer_map_range(svga->pipe.screen, 
+                                               vbuffer->buffer,
+                                               vel->src_offset,
+                                               pf_get_size(vel->src_format),
+                                               PIPE_BUFFER_USAGE_CPU_READ);
+         translate->set_buffer(translate, vel->vertex_buffer_index,
+                               mapped_buffer,
+                               vbuffer->stride);
+         translate->run(translate, 0, 1,
+                        svga->curr.zero_stride_constants);
+
+         pipe_buffer_unmap(svga->pipe.screen,
+                           vbuffer->buffer);
+         translate->release(translate);
+      }
+   }
+
+   if (svga->curr.num_zero_stride_vertex_elements)
+      svga->dirty |= SVGA_NEW_ZERO_STRIDE;
+
+   return 0;
+}
+
+struct svga_tracked_state svga_hw_update_zero_stride =
+{
+   "update zero_stride",
+   ( SVGA_NEW_VELEMENT |
+     SVGA_NEW_VBUFFER ),
+   update_zero_stride
+};
diff --git a/src/gallium/drivers/svga/svga_swtnl.h b/src/gallium/drivers/svga/svga_swtnl.h
new file mode 100644
index 00000000000..4882f26b170
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl.h
@@ -0,0 +1,52 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_H
+#define SVGA_SWTNL_H
+
+#include "pipe/p_compiler.h"
+
+struct svga_context;
+struct pipe_context;
+struct pipe_buffer;
+struct vbuf_render;
+
+
+boolean svga_init_swtnl( struct svga_context *svga );
+void svga_destroy_swtnl( struct svga_context *svga );
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+                               struct pipe_buffer *indexBuffer,
+                               unsigned indexSize,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, 
+                               unsigned start, 
+                               unsigned count);
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_backend.c b/src/gallium/drivers/svga/svga_swtnl_backend.c
new file mode 100644
index 00000000000..b4f757a47a9
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_backend.c
@@ -0,0 +1,349 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_vbuf.h"
+#include "draw/draw_context.h"
+#include "draw/draw_vertex.h"
+
+#include "util/u_debug.h"
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "util/u_simple_shaders.h"
+
+#include "svga_context.h"
+#include "svga_state.h"
+#include "svga_swtnl.h"
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+#include "svga_draw.h"
+#include "svga_swtnl_private.h"
+
+
+static const struct vertex_info *
+svga_vbuf_render_get_vertex_info( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+
+   svga_swtnl_update_vdecl(svga);
+
+   return &svga_render->vertex_info;
+}
+
+
+static boolean
+svga_vbuf_render_allocate_vertices( struct vbuf_render *render,
+                                    ushort vertex_size,
+                                    ushort nr_vertices )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   size_t size = (size_t)nr_vertices * (size_t)vertex_size;
+   boolean new_vbuf = FALSE;
+   boolean new_ibuf = FALSE;
+
+   if (svga_render->vertex_size != vertex_size)
+      svga->swtnl.new_vdecl = TRUE;
+   svga_render->vertex_size = (size_t)vertex_size;
+
+   if (svga->swtnl.new_vbuf)
+      new_ibuf = new_vbuf = TRUE;
+   svga->swtnl.new_vbuf = FALSE;
+
+   if (svga_render->vbuf_size < svga_render->vbuf_offset + svga_render->vbuf_used + size)
+      new_vbuf = TRUE;
+
+   if (new_vbuf)
+      pipe_buffer_reference(&svga_render->vbuf, NULL);
+   if (new_ibuf)
+      pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+   if (!svga_render->vbuf) {
+      svga_render->vbuf_size = MAX2(size, svga_render->vbuf_alloc_size);
+      svga_render->vbuf = pipe_buffer_create(screen,
+                                             0,
+                                             PIPE_BUFFER_USAGE_VERTEX,
+                                             svga_render->vbuf_size);
+      if(!svga_render->vbuf) {
+         svga_context_flush(svga, NULL);
+         svga_render->vbuf = pipe_buffer_create(screen,
+                                                0,
+                                                PIPE_BUFFER_USAGE_VERTEX,
+                                                svga_render->vbuf_size);
+         assert(svga_render->vbuf);
+      }
+
+      svga->swtnl.new_vdecl = TRUE;
+      svga_render->vbuf_offset = 0;
+   } else {
+      svga_render->vbuf_offset += svga_render->vbuf_used;
+   }
+
+   svga_render->vbuf_used = 0;
+
+   if (svga->swtnl.new_vdecl)
+      svga_render->vdecl_offset = svga_render->vbuf_offset;
+
+   return TRUE;
+}
+
+static void *
+svga_vbuf_render_map_vertices( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+
+   char *ptr = (char*)pipe_buffer_map(screen,
+                                      svga_render->vbuf,
+                                      PIPE_BUFFER_USAGE_CPU_WRITE | 
+                                      PIPE_BUFFER_USAGE_FLUSH_EXPLICIT);
+   return ptr + svga_render->vbuf_offset;
+}
+
+static void
+svga_vbuf_render_unmap_vertices( struct vbuf_render *render,
+                                 ushort min_index,
+                                 ushort max_index )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned offset, length;
+   size_t used = svga_render->vertex_size * ((size_t)max_index + 1);
+
+   offset = svga_render->vbuf_offset + svga_render->vertex_size * min_index;
+   length = svga_render->vertex_size * (max_index + 1 - min_index);
+   pipe_buffer_flush_mapped_range(screen, svga_render->vbuf, offset, length);
+   pipe_buffer_unmap(screen, svga_render->vbuf);
+   svga_render->min_index = min_index;
+   svga_render->max_index = max_index;
+   svga_render->vbuf_used = MAX2(svga_render->vbuf_used, used);
+}
+
+static boolean
+svga_vbuf_render_set_primitive( struct vbuf_render *render,
+                                unsigned prim )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   svga_render->prim = prim;
+
+   return TRUE;
+}
+
+static void
+svga_vbuf_sumbit_state( struct svga_vbuf_render *svga_render )
+{
+   struct svga_context *svga = svga_render->svga;
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   enum pipe_error ret;
+   int i;
+
+   /* if the vdecl or vbuf hasn't changed do nothing */
+   if (!svga->swtnl.new_vdecl)
+      return;
+
+   memcpy(vdecl, svga_render->vdecl, sizeof(vdecl));
+
+   /* flush the hw state */
+   ret = svga_hwtnl_flush(svga->hwtnl);
+   if (ret) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_flush(svga->hwtnl);
+      /* if we hit this path we might become synced with hw */
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == 0);
+   }
+
+   svga_hwtnl_reset_vdecl(svga->hwtnl, svga_render->vdecl_count);
+
+   for (i = 0; i < svga_render->vdecl_count; i++) {
+      vdecl[i].array.offset += svga_render->vdecl_offset;
+
+      svga_hwtnl_vdecl( svga->hwtnl,
+                        i,
+                        &vdecl[i],
+                        svga_render->vbuf );
+   }
+
+   /* We have already taken care of flatshading, so let the hwtnl
+    * module use whatever is most convenient:
+    */
+   if (svga->state.sw.need_pipeline) {
+      svga_hwtnl_set_flatshade(svga->hwtnl, FALSE, FALSE);
+      svga_hwtnl_set_unfilled(svga->hwtnl, PIPE_POLYGON_MODE_FILL);
+   }
+   else {
+      svga_hwtnl_set_flatshade( svga->hwtnl,
+                                svga->curr.rast->templ.flatshade,
+                                svga->curr.rast->templ.flatshade_first );
+
+      svga_hwtnl_set_unfilled( svga->hwtnl,
+                               svga->curr.rast->hw_unfilled );
+   }
+
+   svga->swtnl.new_vdecl = FALSE;
+}
+
+static void
+svga_vbuf_render_draw_arrays( struct vbuf_render *render,
+                              unsigned start,
+                              uint nr )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+   enum pipe_error ret = 0;
+
+   svga_vbuf_sumbit_state(svga_render);
+
+   /* Need to call update_state() again as the draw module may have
+    * altered some of our state behind our backs.  Testcase:
+    * redbook/polys.c
+    */
+   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+   ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+   if (ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_draw_arrays(svga->hwtnl, svga_render->prim, start + bias, nr);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+}
+
+
+static void
+svga_vbuf_render_draw( struct vbuf_render *render,
+                       const ushort *indices,
+                       uint nr_indices)
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+   struct svga_context *svga = svga_render->svga;
+   struct pipe_screen *screen = svga->pipe.screen;
+   unsigned bias = (svga_render->vbuf_offset - svga_render->vdecl_offset) / svga_render->vertex_size;
+   boolean ret;
+   size_t size = 2 * nr_indices;
+
+   assert(( svga_render->vbuf_offset - svga_render->vdecl_offset) % svga_render->vertex_size == 0);
+   
+   if (svga_render->ibuf_size < svga_render->ibuf_offset + size)
+      pipe_buffer_reference(&svga_render->ibuf, NULL);
+
+   if (!svga_render->ibuf) {
+      svga_render->ibuf_size = MAX2(size, svga_render->ibuf_alloc_size);
+      svga_render->ibuf = pipe_buffer_create(screen,
+                                             0,
+                                             PIPE_BUFFER_USAGE_VERTEX,
+                                             svga_render->ibuf_size);
+      svga_render->ibuf_offset = 0;
+   }
+
+   pipe_buffer_write(screen, svga_render->ibuf,
+                     svga_render->ibuf_offset, 2 * nr_indices, indices);
+
+
+   /* off to hardware */
+   svga_vbuf_sumbit_state(svga_render);
+
+   /* Need to call update_state() again as the draw module may have
+    * altered some of our state behind our backs.  Testcase:
+    * redbook/polys.c
+    */
+   svga_update_state_retry( svga, SVGA_STATE_HW_DRAW );
+
+   ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+                                        svga_render->ibuf,
+                                        2,
+                                        svga_render->min_index,
+                                        svga_render->max_index,
+                                        svga_render->prim,
+                                        svga_render->ibuf_offset / 2, nr_indices, bias);
+   if(ret != PIPE_OK) {
+      svga_context_flush(svga, NULL);
+      ret = svga_hwtnl_draw_range_elements(svga->hwtnl,
+                                           svga_render->ibuf,
+                                           2,
+                                           svga_render->min_index,
+                                           svga_render->max_index,
+                                           svga_render->prim,
+                                           svga_render->ibuf_offset / 2, nr_indices, bias);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+
+   svga_render->ibuf_offset += size;
+}
+
+
+static void
+svga_vbuf_render_release_vertices( struct vbuf_render *render )
+{
+
+}
+
+
+static void
+svga_vbuf_render_destroy( struct vbuf_render *render )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(render);
+
+   pipe_buffer_reference(&svga_render->vbuf, NULL);
+   pipe_buffer_reference(&svga_render->ibuf, NULL);
+   FREE(svga_render);
+}
+
+
+/**
+ * Create a new primitive render.
+ */
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga )
+{
+   struct svga_vbuf_render *svga_render = CALLOC_STRUCT(svga_vbuf_render);
+
+   svga_render->svga = svga;
+   svga_render->ibuf_size = 0;
+   svga_render->vbuf_size = 0;
+   svga_render->ibuf_alloc_size = 4*1024;
+   svga_render->vbuf_alloc_size = 64*1024;
+   svga_render->base.max_vertex_buffer_bytes = 64*1024/10;
+   svga_render->base.max_indices = 65536;
+   svga_render->base.get_vertex_info = svga_vbuf_render_get_vertex_info;
+   svga_render->base.allocate_vertices = svga_vbuf_render_allocate_vertices;
+   svga_render->base.map_vertices = svga_vbuf_render_map_vertices;
+   svga_render->base.unmap_vertices = svga_vbuf_render_unmap_vertices;
+   svga_render->base.set_primitive = svga_vbuf_render_set_primitive;
+   svga_render->base.draw = svga_vbuf_render_draw;
+   svga_render->base.draw_arrays = svga_vbuf_render_draw_arrays;
+   svga_render->base.release_vertices = svga_vbuf_render_release_vertices;
+   svga_render->base.destroy = svga_vbuf_render_destroy;
+
+   return &svga_render->base;
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_draw.c b/src/gallium/drivers/svga/svga_swtnl_draw.c
new file mode 100644
index 00000000000..8b14c913f72
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_draw.c
@@ -0,0 +1,170 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+#include "svga_swtnl_private.h"
+
+
+
+enum pipe_error
+svga_swtnl_draw_range_elements(struct svga_context *svga,
+                               struct pipe_buffer *indexBuffer,
+                               unsigned indexSize,
+                               unsigned min_index,
+                               unsigned max_index,
+                               unsigned prim, unsigned start, unsigned count)
+{
+   struct draw_context *draw = svga->swtnl.draw;
+   unsigned i;
+   const void *map;
+   enum pipe_error ret;
+
+   assert(!svga->dirty);
+   assert(svga->state.sw.need_swtnl);
+   assert(draw);
+
+   ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+   if (ret) {
+      svga_context_flush(svga, NULL);
+      ret = svga_update_state(svga, SVGA_STATE_SWTNL_DRAW);
+      svga->swtnl.new_vbuf = TRUE;
+      assert(ret == PIPE_OK);
+   }
+
+   /*
+    * Map vertex buffers
+    */
+   for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+      map = pipe_buffer_map(svga->pipe.screen,
+                            svga->curr.vb[i].buffer,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+
+      draw_set_mapped_vertex_buffer(draw, i, map);
+   }
+
+   /* Map index buffer, if present */
+   if (indexBuffer) {
+      map = pipe_buffer_map(svga->pipe.screen, indexBuffer,
+                            PIPE_BUFFER_USAGE_CPU_READ);
+
+      draw_set_mapped_element_buffer_range(draw, 
+                                           indexSize, 
+                                           min_index,
+                                           max_index,
+                                           map);
+   }
+   
+   if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+      map = pipe_buffer_map(svga->pipe.screen,
+                            svga->curr.cb[PIPE_SHADER_VERTEX],
+                            PIPE_BUFFER_USAGE_CPU_READ);
+      assert(map);
+      draw_set_mapped_constant_buffer(
+         draw, 
+         map,
+         svga->curr.cb[PIPE_SHADER_VERTEX]->size);
+   }
+
+   draw_arrays(svga->swtnl.draw, prim, start, count);
+
+   draw_flush(svga->swtnl.draw);
+
+   /* Ensure the draw module didn't touch this */
+   assert(i == svga->curr.num_vertex_buffers);
+   
+   /*
+    * unmap vertex/index buffers
+    */
+   for (i = 0; i < svga->curr.num_vertex_buffers; i++) {
+      pipe_buffer_unmap(svga->pipe.screen, svga->curr.vb[i].buffer);
+      draw_set_mapped_vertex_buffer(draw, i, NULL);
+   }
+
+   if (indexBuffer) {
+      pipe_buffer_unmap(svga->pipe.screen, indexBuffer);
+      draw_set_mapped_element_buffer(draw, 0, NULL);
+   }
+
+   if (svga->curr.cb[PIPE_SHADER_VERTEX]) {
+      pipe_buffer_unmap(svga->pipe.screen,
+                        svga->curr.cb[PIPE_SHADER_VERTEX]);
+   }
+
+   return ret;
+}
+
+
+
+
+boolean svga_init_swtnl( struct svga_context *svga )
+{
+   svga->swtnl.backend = svga_vbuf_render_create(svga);
+   if(!svga->swtnl.backend)
+      goto fail;
+
+   /*
+    * Create drawing context and plug our rendering stage into it.
+    */
+   svga->swtnl.draw = draw_create();
+   if (svga->swtnl.draw == NULL)
+      goto fail;
+
+
+   draw_set_rasterize_stage(svga->swtnl.draw, 
+                            draw_vbuf_stage( svga->swtnl.draw, svga->swtnl.backend ));
+
+   draw_set_render(svga->swtnl.draw, svga->swtnl.backend);
+
+   draw_install_aaline_stage(svga->swtnl.draw, &svga->pipe);
+   draw_install_aapoint_stage(svga->swtnl.draw, &svga->pipe);
+   draw_install_pstipple_stage(svga->swtnl.draw, &svga->pipe);
+
+   draw_set_driver_clipping(svga->swtnl.draw, debug_get_bool_option("SVGA_SWTNL_FSE", FALSE));
+
+   return TRUE;
+
+fail:
+   if (svga->swtnl.backend)
+      svga->swtnl.backend->destroy( svga->swtnl.backend );
+
+   if (svga->swtnl.draw)
+      draw_destroy( svga->swtnl.draw );
+
+   return FALSE;
+}
+
+
+void svga_destroy_swtnl( struct svga_context *svga )
+{
+   draw_destroy( svga->swtnl.draw );
+}
diff --git a/src/gallium/drivers/svga/svga_swtnl_private.h b/src/gallium/drivers/svga/svga_swtnl_private.h
new file mode 100644
index 00000000000..9bbb42910f5
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_private.h
@@ -0,0 +1,93 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_SWTNL_PRIVATE_H
+#define SVGA_SWTNL_PRIVATE_H
+
+#include "svga_swtnl.h"
+#include "draw/draw_vertex.h"
+
+#include "svga_types.h"
+#include "svga3d_reg.h"
+
+/**
+ * Primitive renderer for svga.
+ */
+struct svga_vbuf_render {
+   struct vbuf_render base;
+
+   struct svga_context *svga;
+   struct vertex_info vertex_info;
+
+   unsigned vertex_size;
+
+   unsigned prim;
+
+   struct pipe_buffer *vbuf;
+   struct pipe_buffer *ibuf;
+
+   /* current size of buffer */
+   size_t vbuf_size;
+   size_t ibuf_size;
+
+   /* size of that the buffer should be */
+   size_t vbuf_alloc_size;
+   size_t ibuf_alloc_size;
+
+   /* current write place */
+   size_t vbuf_offset;
+   size_t ibuf_offset;
+
+   /* currently used */
+   size_t vbuf_used;
+
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   unsigned vdecl_offset;
+   unsigned vdecl_count;
+
+   ushort min_index;
+   ushort max_index;
+};
+
+/**
+ * Basically a cast wrapper.
+ */
+static INLINE struct svga_vbuf_render *
+svga_vbuf_render( struct vbuf_render *render )
+{
+   assert(render);
+   return (struct svga_vbuf_render *)render;
+}
+
+
+struct vbuf_render *
+svga_vbuf_render_create( struct svga_context *svga );
+
+
+int
+svga_swtnl_update_vdecl( struct svga_context *svga );
+
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_swtnl_state.c b/src/gallium/drivers/svga/svga_swtnl_state.c
new file mode 100644
index 00000000000..16163121131
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_swtnl_state.c
@@ -0,0 +1,242 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#include "draw/draw_context.h"
+#include "draw/draw_vbuf.h"
+#include "pipe/p_inlines.h"
+#include "pipe/p_state.h"
+#include "util/u_memory.h"
+
+#include "svga_context.h"
+#include "svga_swtnl.h"
+#include "svga_state.h"
+
+#include "svga_swtnl_private.h"
+
+
+#define SVGA_POINT_ADJ_X -0.375
+#define SVGA_POINT_ADJ_Y -0.5
+
+#define SVGA_LINE_ADJ_X -0.5
+#define SVGA_LINE_ADJ_Y -0.5
+
+#define SVGA_TRIANGLE_ADJ_X -0.375
+#define SVGA_TRIANGLE_ADJ_Y -0.5
+
+
+static void set_draw_viewport( struct svga_context *svga )
+{
+   struct pipe_viewport_state vp = svga->curr.viewport;
+   float adjx = 0;
+   float adjy = 0;
+
+   switch (svga->curr.reduced_prim) {
+   case PIPE_PRIM_POINTS:
+      adjx = SVGA_POINT_ADJ_X;
+      adjy = SVGA_POINT_ADJ_Y;
+      break;
+   case PIPE_PRIM_LINES:
+      /* XXX: This is to compensate for the fact that wide lines are
+       * going to be drawn with triangles, but we're not catching all
+       * cases where that will happen.
+       */
+      if (svga->curr.rast->templ.line_width > 1.0) 
+      {
+         adjx = SVGA_LINE_ADJ_X + 0.175;
+         adjy = SVGA_LINE_ADJ_Y - 0.175;
+      }
+      else {
+         adjx = SVGA_LINE_ADJ_X;
+         adjy = SVGA_LINE_ADJ_Y;
+      }
+      break;
+   case PIPE_PRIM_TRIANGLES:
+      adjx += SVGA_TRIANGLE_ADJ_X;
+      adjy += SVGA_TRIANGLE_ADJ_Y;
+      break;
+   }
+
+   vp.translate[0] += adjx;
+   vp.translate[1] += adjy;
+
+   draw_set_viewport_state(svga->swtnl.draw, &vp);
+}
+
+static int update_swtnl_draw( struct svga_context *svga,
+                              unsigned dirty )
+{
+   draw_flush( svga->swtnl.draw );
+
+   if (dirty & SVGA_NEW_VS) 
+      draw_bind_vertex_shader(svga->swtnl.draw,
+                              svga->curr.vs->draw_shader);
+
+   if (dirty & SVGA_NEW_VBUFFER)
+      draw_set_vertex_buffers(svga->swtnl.draw, 
+                              svga->curr.num_vertex_buffers, 
+                              svga->curr.vb);
+
+   if (dirty & SVGA_NEW_VELEMENT)
+      draw_set_vertex_elements(svga->swtnl.draw, 
+                               svga->curr.num_vertex_elements, 
+                               svga->curr.ve );
+
+   if (dirty & SVGA_NEW_CLIP)
+      draw_set_clip_state(svga->swtnl.draw, 
+                          &svga->curr.clip);
+
+   if (dirty & (SVGA_NEW_VIEWPORT |
+                SVGA_NEW_REDUCED_PRIMITIVE | 
+                SVGA_NEW_RAST))
+      set_draw_viewport( svga );
+
+   if (dirty & SVGA_NEW_RAST)
+      draw_set_rasterizer_state(svga->swtnl.draw,
+                                &svga->curr.rast->templ);
+
+   if (dirty & SVGA_NEW_FRAME_BUFFER)
+      draw_set_mrd(svga->swtnl.draw, 
+                   svga->curr.depthscale);
+
+   if (dirty & SVGA_NEW_EDGEFLAGS)
+      draw_set_edgeflags( svga->swtnl.draw, 
+                          svga->curr.edgeflags );
+
+   return 0;
+}
+
+
+struct svga_tracked_state svga_update_swtnl_draw =
+{
+   "update draw module state",
+   (SVGA_NEW_VS |
+    SVGA_NEW_VBUFFER |
+    SVGA_NEW_VELEMENT |
+    SVGA_NEW_CLIP |
+    SVGA_NEW_VIEWPORT |
+    SVGA_NEW_RAST |
+    SVGA_NEW_FRAME_BUFFER |
+    SVGA_NEW_REDUCED_PRIMITIVE |
+    SVGA_NEW_EDGEFLAGS),
+   update_swtnl_draw
+};
+
+
+int svga_swtnl_update_vdecl( struct svga_context *svga )
+{
+   struct svga_vbuf_render *svga_render = svga_vbuf_render(svga->swtnl.backend);
+   struct draw_context *draw = svga->swtnl.draw;
+   struct vertex_info *vinfo = &svga_render->vertex_info;
+   SVGA3dVertexDecl vdecl[PIPE_MAX_ATTRIBS];
+   const enum interp_mode colorInterp =
+      svga->curr.rast->templ.flatshade ? INTERP_CONSTANT : INTERP_LINEAR;
+   const struct svga_fragment_shader *fs = svga->curr.fs;
+   int offset = 0;
+   int nr_decls = 0;
+   int src, i;
+
+   memset(vinfo, 0, sizeof(*vinfo));
+   memset(vdecl, 0, sizeof(vdecl));
+
+   /* always add position */
+   src = draw_find_vs_output(draw, TGSI_SEMANTIC_POSITION, 0);
+   draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_LINEAR, src);
+   vinfo->attrib[0].emit = EMIT_4F;
+   vdecl[0].array.offset = offset;
+   vdecl[0].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+   vdecl[0].identity.usage = SVGA3D_DECLUSAGE_POSITIONT;
+   vdecl[0].identity.usageIndex = 0;
+   offset += 16;
+   nr_decls++;
+
+   for (i = 0; i < fs->base.info.num_inputs; i++) {
+      unsigned name = fs->base.info.input_semantic_name[i];
+      unsigned index = fs->base.info.input_semantic_index[i];
+      src = draw_find_vs_output(draw, name, index);
+      vdecl[nr_decls].array.offset = offset;
+      vdecl[nr_decls].identity.usageIndex = fs->base.info.input_semantic_index[i];
+
+      switch (name) {
+      case TGSI_SEMANTIC_COLOR:
+         draw_emit_vertex_attr(vinfo, EMIT_4F, colorInterp, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_COLOR;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+         offset += 16;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_GENERIC:
+         draw_emit_vertex_attr(vinfo, EMIT_4F, INTERP_PERSPECTIVE, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT4;
+         vdecl[nr_decls].identity.usageIndex += 1;
+         offset += 16;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_FOG:
+         draw_emit_vertex_attr(vinfo, EMIT_1F, INTERP_PERSPECTIVE, src);
+         vdecl[nr_decls].identity.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+         vdecl[nr_decls].identity.type = SVGA3D_DECLTYPE_FLOAT1;
+         assert(vdecl[nr_decls].identity.usageIndex == 0);
+         offset += 4;
+         nr_decls++;
+         break;
+      case TGSI_SEMANTIC_POSITION:
+         /* generated internally, not a vertex shader output */
+         break;
+      default:
+         assert(0);
+      }
+   }
+
+   draw_compute_vertex_size(vinfo);
+
+   svga_render->vdecl_count = nr_decls;
+   for (i = 0; i < svga_render->vdecl_count; i++)
+      vdecl[i].array.stride = offset;
+
+   if (memcmp(svga_render->vdecl, vdecl, sizeof(vdecl)) == 0)
+      return 0;
+
+   memcpy(svga_render->vdecl, vdecl, sizeof(vdecl));
+   svga->swtnl.new_vdecl = TRUE;
+
+   return 0;
+}
+
+
+static int update_swtnl_vdecl( struct svga_context *svga,
+                               unsigned dirty )
+{
+   return svga_swtnl_update_vdecl( svga );
+}
+
+
+struct svga_tracked_state svga_update_swtnl_vdecl =
+{
+   "update draw module vdecl",
+   (SVGA_NEW_VS |
+    SVGA_NEW_FS),
+   update_swtnl_vdecl
+};
diff --git a/src/gallium/drivers/svga/svga_tgsi.c b/src/gallium/drivers/svga/svga_tgsi.c
new file mode 100644
index 00000000000..81eea1a145a
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.c
@@ -0,0 +1,266 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+#include "pipe/p_defines.h"
+#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_dump.h"
+#include "tgsi/tgsi_scan.h"
+#include "util/u_memory.h"
+
+#include "svgadump/svga_shader_dump.h"
+
+#include "svga_context.h"
+#include "svga_tgsi.h"
+#include "svga_tgsi_emit.h"
+#include "svga_debug.h"
+
+#include "svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+
+/* Sinkhole used only in error conditions.
+ */
+static char err_buf[128];
+
+#if 0
+static void svga_destroy_shader_emitter( struct svga_shader_emitter *emit )
+{
+   if (emit->buf != err_buf)
+      FREE(emit->buf);
+}
+#endif
+
+
+static boolean svga_shader_expand( struct svga_shader_emitter *emit )
+{
+   char *new_buf;
+   unsigned newsize = emit->size * 2;
+
+   if(emit->buf != err_buf)
+      new_buf = REALLOC(emit->buf, emit->size, newsize);
+   else
+      new_buf = NULL;
+
+   if (new_buf == NULL) {
+      emit->ptr = err_buf;
+      emit->buf = err_buf;
+      emit->size = sizeof(err_buf);
+      return FALSE;
+   }
+
+   emit->size = newsize;
+   emit->ptr = new_buf + (emit->ptr - emit->buf);
+   emit->buf = new_buf;
+   return TRUE;
+}   
+
+static INLINE boolean reserve(  struct svga_shader_emitter *emit,
+                                unsigned nr_dwords )
+{
+   if (emit->ptr - emit->buf + nr_dwords * sizeof(unsigned) >= emit->size) {
+      if (!svga_shader_expand( emit ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+                                unsigned dword )
+{
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   *(unsigned *)emit->ptr = dword;
+   emit->ptr += sizeof dword;
+   return TRUE;
+}
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+                                 const unsigned *dwords,
+                                 unsigned nr )
+{
+   if (!reserve(emit, nr))
+      return FALSE;
+
+   memcpy( emit->ptr, dwords, nr * sizeof *dwords );
+   emit->ptr += nr * sizeof *dwords;
+   return TRUE;
+}
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+                                 unsigned opcode )
+{
+   SVGA3dShaderInstToken *here;
+
+   if (!reserve(emit, 1))
+      return FALSE;
+
+   here = (SVGA3dShaderInstToken *)emit->ptr;
+   here->value = opcode;
+
+   if (emit->insn_offset) {
+      SVGA3dShaderInstToken *prev = (SVGA3dShaderInstToken *)(emit->buf + 
+                                                              emit->insn_offset);
+      prev->size = (here - prev) - 1;
+   }
+   
+   emit->insn_offset = emit->ptr - emit->buf;
+   emit->ptr += sizeof(unsigned);
+   return TRUE;
+}
+
+#define SVGA3D_PS_2X (SVGA3D_PS_20 | 1)
+#define SVGA3D_VS_2X (SVGA3D_VS_20 | 1)
+
+static boolean svga_shader_emit_header( struct svga_shader_emitter *emit )
+{
+   SVGA3dShaderVersion header;
+
+   memset( &header, 0, sizeof header );
+
+   switch (emit->unit) {
+   case PIPE_SHADER_FRAGMENT:
+      header.value = emit->use_sm30 ? SVGA3D_PS_30 : SVGA3D_PS_2X;
+      break;
+   case PIPE_SHADER_VERTEX:
+      header.value = emit->use_sm30 ? SVGA3D_VS_30 : SVGA3D_VS_2X;
+      break;
+   }
+ 
+   return svga_shader_emit_dword( emit, header.value );
+}
+
+
+
+
+
+/* Parse TGSI shader and translate to SVGA/DX9 serialized
+ * representation.  
+ *
+ * In this function SVGA shader is emitted to an in-memory buffer that
+ * can be dynamically grown.  Once we've finished and know how large
+ * it is, it will be copied to a hardware buffer for upload.
+ */
+static struct svga_shader_result *
+svga_tgsi_translate( const struct svga_shader *shader,
+                     union svga_compile_key key,
+                     unsigned unit )
+{
+   struct svga_shader_result *result = NULL;
+   struct svga_shader_emitter emit;
+   int ret = 0;
+
+   memset(&emit, 0, sizeof(emit));
+
+   emit.use_sm30 = shader->use_sm30;
+   emit.size = 1024;
+   emit.buf = MALLOC(emit.size);
+   if (emit.buf == NULL) {
+      ret = PIPE_ERROR_OUT_OF_MEMORY;
+      goto fail;
+   }
+
+   emit.ptr = emit.buf;
+   emit.unit = unit;
+   emit.key = key;
+
+   tgsi_scan_shader( shader->tokens, &emit.info);
+
+   emit.imm_start = emit.info.file_max[TGSI_FILE_CONSTANT] + 1;
+   
+   if (unit == PIPE_SHADER_FRAGMENT)
+      emit.imm_start += key.fkey.num_unnormalized_coords;
+
+   if (unit == PIPE_SHADER_VERTEX) {
+      emit.imm_start += key.vkey.need_prescale ? 2 : 0;
+      emit.imm_start += key.vkey.num_zero_stride_vertex_elements;
+   }
+
+   emit.nr_hw_const = (emit.imm_start + emit.info.file_max[TGSI_FILE_IMMEDIATE] + 1);
+
+   emit.nr_hw_temp = emit.info.file_max[TGSI_FILE_TEMPORARY] + 1;
+   emit.in_main_func = TRUE;
+
+   if (!svga_shader_emit_header( &emit ))
+      goto fail;
+
+   if (!svga_shader_emit_instructions( &emit, shader->tokens ))
+      goto fail;
+   
+   result = CALLOC_STRUCT(svga_shader_result);
+   if (result == NULL)
+      goto fail;
+
+   result->shader = shader;
+   result->tokens = (const unsigned *)emit.buf;
+   result->nr_tokens = (emit.ptr - emit.buf) / sizeof(unsigned);
+   memcpy(&result->key, &key, sizeof key);
+
+   return result;
+
+fail:
+   FREE(result);
+   FREE(emit.buf);
+   return NULL;
+}
+
+
+
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+                                 const struct svga_fs_compile_key *fkey )
+{
+   union svga_compile_key key;
+   memcpy(&key.fkey, fkey, sizeof *fkey);
+
+   return svga_tgsi_translate( &fs->base, 
+                               key,
+                               PIPE_SHADER_FRAGMENT );
+}
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *vs,
+                               const struct svga_vs_compile_key *vkey )
+{
+   union svga_compile_key key;
+   memcpy(&key.vkey, vkey, sizeof *vkey);
+
+   return svga_tgsi_translate( &vs->base, 
+                               key,
+                               PIPE_SHADER_VERTEX );
+}
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result )
+{
+   FREE((unsigned *)result->tokens);
+   FREE(result);
+}
+
diff --git a/src/gallium/drivers/svga/svga_tgsi.h b/src/gallium/drivers/svga/svga_tgsi.h
new file mode 100644
index 00000000000..896c90a89ae
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi.h
@@ -0,0 +1,139 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_H
+#define SVGA_TGSI_H
+
+#include "pipe/p_state.h"
+
+#include "svga_hw_reg.h"
+
+struct svga_fragment_shader;
+struct svga_vertex_shader;
+struct svga_shader;
+struct tgsi_shader_info;
+struct tgsi_token;
+
+
+struct svga_vs_compile_key
+{
+   ubyte need_prescale:1;
+   ubyte allow_psiz:1;
+   unsigned zero_stride_vertex_elements;
+   ubyte num_zero_stride_vertex_elements:6;
+};
+
+struct svga_fs_compile_key
+{
+   boolean light_twoside:1;
+   boolean front_cw:1;
+   ubyte num_textures;
+   ubyte num_unnormalized_coords;
+   struct {
+      ubyte compare_mode       : 1;
+      ubyte compare_func       : 3;
+      ubyte unnormalized       : 1;
+
+      ubyte width_height_idx   : 7;
+
+      ubyte texture_target;
+   } tex[PIPE_MAX_SAMPLERS];
+};
+
+union svga_compile_key {
+   struct svga_vs_compile_key vkey;
+   struct svga_fs_compile_key fkey;
+};
+
+struct svga_shader_result
+{
+   const struct svga_shader *shader;
+
+   /* Parameters used to generate this compilation result:
+    */
+   union svga_compile_key key;
+
+   /* Compiled shader tokens:
+    */
+   const unsigned *tokens;
+   unsigned nr_tokens;
+
+   /* SVGA Shader ID:
+    */
+   unsigned id;
+   
+   /* Next compilation result:
+    */
+   struct svga_shader_result *next;
+};
+
+
+/* TGSI doesn't provide use with VS input semantics (they're actually
+ * pretty meaningless), so we just generate some plausible ones here.
+ * This is called both from within the TGSI translator and when
+ * building vdecls to ensure they match up.
+ *
+ * The real use of this information is matching vertex elements to
+ * fragment shader inputs in the case where vertex shader is disabled.
+ */
+static INLINE void svga_generate_vdecl_semantics( unsigned idx,
+                                                  unsigned *usage,
+                                                  unsigned *usage_index )
+{
+   if (idx == 0) {
+      *usage = SVGA3D_DECLUSAGE_POSITION;
+      *usage_index = 0;
+   }
+   else {
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      *usage_index = idx - 1;
+   }
+}
+
+
+
+static INLINE unsigned svga_vs_key_size( const struct svga_vs_compile_key *key )
+{
+   return sizeof *key;
+}
+
+static INLINE unsigned svga_fs_key_size( const struct svga_fs_compile_key *key )
+{
+   return (const char *)&key->tex[key->num_textures].texture_target -
+      (const char *)key;
+}
+
+struct svga_shader_result *
+svga_translate_fragment_program( const struct svga_fragment_shader *fs,
+                                 const struct svga_fs_compile_key *fkey );
+
+struct svga_shader_result *
+svga_translate_vertex_program( const struct svga_vertex_shader *fs,
+                               const struct svga_vs_compile_key *vkey );
+
+
+void svga_destroy_shader_result( struct svga_shader_result *result );
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
new file mode 100644
index 00000000000..54457082a06
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm20.c
@@ -0,0 +1,280 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+
+
+static boolean ps20_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   struct src_register reg;
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:
+      /* Special case:
+       */
+      reg = src_register( SVGA3DREG_MISCTYPE, 
+                          SVGA3DMISCREG_POSITION );
+      break;
+   case TGSI_SEMANTIC_COLOR:
+      reg = src_register( SVGA3DREG_INPUT, 
+                          semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_FOG:
+      assert(semantic.SemanticIndex == 0);
+      reg = src_register( SVGA3DREG_TEXTURE, 0 );
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+      reg = src_register( SVGA3DREG_TEXTURE,
+                          semantic.SemanticIndex + 1 );
+      break;
+   default:
+      assert(0);
+      return TRUE;
+   }
+
+   emit->input_map[idx] = reg;
+
+   dcl.dst = dst( reg );
+
+   dcl.usage = 0;
+   dcl.index = 0;
+
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean ps20_output( struct svga_shader_emitter *emit,
+                            struct tgsi_declaration_semantic semantic,
+                            unsigned idx )
+{
+   SVGA3dShaderDestToken reg;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_COLOR:
+      if (semantic.SemanticIndex < PIPE_MAX_COLOR_BUFS) {
+         unsigned cbuf = semantic.SemanticIndex;
+
+         emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                               emit->nr_hw_temp++ );
+         emit->temp_col[cbuf] = emit->output_map[idx];
+         emit->true_col[cbuf] = dst_register( SVGA3DREG_COLOROUT, 
+                                              semantic.SemanticIndex );
+      }
+      else {
+         assert(0);
+         reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      }
+      break;
+   case TGSI_SEMANTIC_POSITION:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
+                                     semantic.SemanticIndex );
+      break;
+   default:
+      assert(0);
+      reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      break;
+   }
+
+   return TRUE;
+}
+
+
+static boolean vs20_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+   dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+   assert(dcl.dst.reserved0);
+
+   /* Mesa doesn't provide use with VS input semantics (they're
+    * actually pretty meaningless), so we just generate some plausible
+    * ones here.  This has to match what we declare in the vdecl code
+    * in svga_pipe_vertex.c.
+    */
+   if (idx == 0) {
+      dcl.usage = SVGA3D_DECLUSAGE_POSITION;
+      dcl.index = 0;
+   }
+   else {
+      dcl.usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      dcl.index = idx - 1;
+   }
+
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+static boolean vs20_output( struct svga_shader_emitter *emit,
+                         struct tgsi_declaration_semantic semantic,
+                         unsigned idx )
+{
+   /* Don't emit dcl instruction for vs20 inputs
+    */
+
+   /* Just build the register map table: 
+    */
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_RASTOUT, 
+                                     SVGA3DRASTOUT_POSITION);
+      break;
+   case TGSI_SEMANTIC_PSIZE:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_psiz = emit->output_map[idx];
+      emit->true_psiz = dst_register( SVGA3DREG_RASTOUT, 
+                                      SVGA3DRASTOUT_PSIZE );
+      break;
+   case TGSI_SEMANTIC_FOG:
+      assert(semantic.SemanticIndex == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT, 0 );
+      break;
+   case TGSI_SEMANTIC_COLOR:
+      /* oD0 */
+      emit->output_map[idx] = dst_register( SVGA3DREG_ATTROUT,
+                                            semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_GENERIC:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEXCRDOUT,
+                                            semantic.SemanticIndex + 1 );
+      break;
+   default:
+      assert(0);
+      emit->output_map[idx] = dst_register(  SVGA3DREG_TEMP, 0 );
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean ps20_sampler( struct svga_shader_emitter *emit,
+                          struct tgsi_declaration_semantic semantic,
+                          unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+   dcl.type = svga_tgsi_sampler_type( emit, idx );
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+                             const struct tgsi_full_declaration *decl )
+{
+   unsigned first = decl->DeclarationRange.First;
+   unsigned last = decl->DeclarationRange.Last;
+   unsigned semantic = 0;
+   unsigned semantic_idx = 0;
+   unsigned idx;
+   
+   if (decl->Declaration.Semantic) {
+      semantic = decl->Semantic.SemanticName;
+      semantic_idx = decl->Semantic.SemanticIndex;
+   }
+
+   for( idx = first; idx <= last; idx++ ) {
+      boolean ok;
+
+      switch (decl->Declaration.File) {
+      case TGSI_FILE_SAMPLER:
+         assert (emit->unit == PIPE_SHADER_FRAGMENT);
+         ok = ps20_sampler( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_INPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs20_input( emit, decl->Semantic, idx );
+         else
+            ok = ps20_input( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs20_output( emit, decl->Semantic, idx );
+         else
+            ok = ps20_output( emit, decl->Semantic, idx );
+         break;
+
+      default:
+         /* don't need to declare other vars */
+         ok = TRUE;
+      }
+
+      if (!ok)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
new file mode 100644
index 00000000000..08e7dfb117c
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_decl_sm30.c
@@ -0,0 +1,385 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+static boolean translate_vs_ps_semantic( struct tgsi_declaration_semantic semantic,
+                                         unsigned *usage,
+                                         unsigned *idx )
+{
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_POSITION:  
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_POSITION;
+      break;
+   case TGSI_SEMANTIC_COLOR:     
+
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_COLOR;
+      break;
+   case TGSI_SEMANTIC_BCOLOR:
+      *idx = semantic.SemanticIndex + 2; /* sharing with COLOR */
+      *usage = SVGA3D_DECLUSAGE_COLOR;
+      break;
+   case TGSI_SEMANTIC_FOG:       
+      *idx = 0;
+      assert(semantic.SemanticIndex == 0);
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      break;
+   case TGSI_SEMANTIC_PSIZE:     
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_PSIZE;
+      break;
+   case TGSI_SEMANTIC_GENERIC:   
+      *idx = semantic.SemanticIndex + 1; /* texcoord[0] is reserved for fog */
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      break;
+   case TGSI_SEMANTIC_NORMAL:    
+      *idx = semantic.SemanticIndex;
+      *usage = SVGA3D_DECLUSAGE_NORMAL;
+      break;
+   default:
+      assert(0);
+      *usage = SVGA3D_DECLUSAGE_TEXCOORD;
+      *idx = 0;
+      return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_decl( struct svga_shader_emitter *emit,
+                          SVGA3dShaderDestToken reg,
+                          unsigned usage, 
+                          unsigned index )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = reg;
+   dcl.usage = usage;
+   dcl.index = index;
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean emit_vface_decl( struct svga_shader_emitter *emit )
+{
+   if (!emit->emitted_vface) {
+      SVGA3dShaderDestToken reg =
+         dst_register( SVGA3DREG_MISCTYPE,
+                       SVGA3DMISCREG_FACE );
+
+      if (!emit_decl( emit, reg, 0, 0 ))
+         return FALSE;
+
+      emit->emitted_vface = TRUE;
+   }
+   return TRUE;
+}
+
+static boolean ps30_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   unsigned usage, index;
+   SVGA3dShaderDestToken reg;
+
+   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+      emit->input_map[idx] = src_register( SVGA3DREG_MISCTYPE,
+                                           SVGA3DMISCREG_POSITION );
+
+      emit->input_map[idx].base.swizzle = TRANSLATE_SWIZZLE( TGSI_SWIZZLE_X,
+                                                             TGSI_SWIZZLE_Y,
+                                                             TGSI_SWIZZLE_Y,
+                                                             TGSI_SWIZZLE_Y );
+
+      reg = writemask( dst(emit->input_map[idx]),
+                       TGSI_WRITEMASK_XY );
+
+      return emit_decl( emit, reg, 0, 0 );
+   }
+   else if (emit->key.fkey.light_twoside &&
+            (semantic.SemanticName == TGSI_SEMANTIC_COLOR)) {
+
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      emit->internal_color_idx[emit->internal_color_count] = idx;
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count );
+      emit->ps30_input_count++;
+      emit->internal_color_count++;
+
+      reg = dst( emit->input_map[idx] );
+
+      if (!emit_decl( emit, reg, usage, index ))
+         return FALSE;
+
+      semantic.SemanticName = TGSI_SEMANTIC_BCOLOR;
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      reg = dst_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+
+      if (!emit_decl( emit, reg, usage, index ))
+         return FALSE;
+
+      if (!emit_vface_decl( emit ))
+         return FALSE;
+
+      return TRUE;
+   }
+   else if (semantic.SemanticName == TGSI_SEMANTIC_FACE) {
+      if (!emit_vface_decl( emit ))
+         return FALSE;
+      emit->emit_frontface = TRUE;
+      emit->internal_frontface_idx = idx;
+      return TRUE;
+   }
+   else {
+
+      if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+         return FALSE;
+
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, emit->ps30_input_count++ );
+      reg = dst( emit->input_map[idx] );
+
+      return emit_decl( emit, reg, usage, index );
+   }
+
+}
+
+
+/* PS output registers are the same as 2.0
+ */
+static boolean ps30_output( struct svga_shader_emitter *emit,
+                            struct tgsi_declaration_semantic semantic,
+                            unsigned idx )
+{
+   SVGA3dShaderDestToken reg;
+
+   switch (semantic.SemanticName) {
+   case TGSI_SEMANTIC_COLOR:
+      emit->output_map[idx] = dst_register( SVGA3DREG_COLOROUT, 
+                                            semantic.SemanticIndex );
+      break;
+   case TGSI_SEMANTIC_POSITION:
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dst_register( SVGA3DREG_DEPTHOUT, 
+                                     semantic.SemanticIndex );
+      break;
+   default:
+      assert(0);
+      reg = dst_register( SVGA3DREG_COLOROUT, 0 );
+      break;
+   }
+
+   return TRUE;
+}
+
+
+/* We still make up the input semantics the same as in 2.0
+ */
+static boolean vs30_input( struct svga_shader_emitter *emit,
+                           struct tgsi_declaration_semantic semantic,
+                           unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+   unsigned usage, index;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   if (emit->key.vkey.zero_stride_vertex_elements & (1 << idx)) {
+      unsigned i;
+      unsigned offset = 0;
+      unsigned start_idx = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      /* adjust for prescale constants */
+      start_idx += emit->key.vkey.need_prescale ? 2 : 0;
+      /* compute the offset from the start of zero stride constants */
+      for (i = 0; i < PIPE_MAX_ATTRIBS && i < idx; ++i) {
+         if (emit->key.vkey.zero_stride_vertex_elements & (1<<i))
+            ++offset;
+      }
+      emit->input_map[idx] = src_register( SVGA3DREG_CONST,
+                                           start_idx + offset );
+   } else {
+      emit->input_map[idx] = src_register( SVGA3DREG_INPUT, idx );
+      dcl.dst = dst_register( SVGA3DREG_INPUT, idx );
+
+      assert(dcl.dst.reserved0);
+
+      svga_generate_vdecl_semantics( idx, &usage, &index );
+
+      dcl.usage = usage;
+      dcl.index = index;
+      dcl.values[0] |= 1<<31;
+
+      return  (emit_instruction(emit, opcode) &&
+               svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+   }
+   return TRUE;
+}
+
+/* VS3.0 outputs have proper declarations and semantic info for
+ * matching against PS inputs.
+ */
+static boolean vs30_output( struct svga_shader_emitter *emit,
+                         struct tgsi_declaration_semantic semantic,
+                         unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+   unsigned usage, index;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   if (!translate_vs_ps_semantic( semantic, &usage, &index ))
+      return FALSE;
+
+   dcl.dst = dst_register( SVGA3DREG_OUTPUT, idx );
+   dcl.usage = usage;
+   dcl.index = index;
+   dcl.values[0] |= 1<<31;
+
+   if (semantic.SemanticName == TGSI_SEMANTIC_POSITION) {
+      assert(idx == 0);
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_pos = emit->output_map[idx];
+      emit->true_pos = dcl.dst;
+   }
+   else if (semantic.SemanticName == TGSI_SEMANTIC_PSIZE) {
+      emit->output_map[idx] = dst_register( SVGA3DREG_TEMP,
+                                            emit->nr_hw_temp++ );
+      emit->temp_psiz = emit->output_map[idx];
+
+      /* This has the effect of not declaring psiz (below) and not 
+       * emitting the final MOV to true_psiz in the postamble.
+       */
+      if (!emit->key.vkey.allow_psiz)
+         return TRUE;
+
+      emit->true_psiz = dcl.dst;
+   }
+   else {
+      emit->output_map[idx] = dcl.dst;
+   }
+
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+static boolean ps30_sampler( struct svga_shader_emitter *emit,
+                          struct tgsi_declaration_semantic semantic,
+                          unsigned idx )
+{
+   SVGA3DOpDclArgs dcl;
+   SVGA3dShaderInstToken opcode;
+
+   opcode = inst_token( SVGA3DOP_DCL );
+   dcl.values[0] = 0;
+   dcl.values[1] = 0;
+
+   dcl.dst = dst_register( SVGA3DREG_SAMPLER, idx );
+   dcl.type = svga_tgsi_sampler_type( emit, idx );
+   dcl.values[0] |= 1<<31;
+
+   return  (emit_instruction(emit, opcode) &&
+            svga_shader_emit_dwords( emit, dcl.values, Elements(dcl.values)));
+}
+
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+                             const struct tgsi_full_declaration *decl )
+{
+   unsigned first = decl->DeclarationRange.First;
+   unsigned last = decl->DeclarationRange.Last;
+   unsigned semantic = 0;
+   unsigned semantic_idx = 0;
+   unsigned idx;
+
+   if (decl->Declaration.Semantic) {
+      semantic = decl->Semantic.SemanticName;
+      semantic_idx = decl->Semantic.SemanticIndex;
+   }
+
+   for( idx = first; idx <= last; idx++ ) {
+      boolean ok;
+
+      switch (decl->Declaration.File) {
+      case TGSI_FILE_SAMPLER:
+         assert (emit->unit == PIPE_SHADER_FRAGMENT);
+         ok = ps30_sampler( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_INPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs30_input( emit, decl->Semantic, idx );
+         else
+            ok = ps30_input( emit, decl->Semantic, idx );
+         break;
+
+      case TGSI_FILE_OUTPUT:
+         if (emit->unit == PIPE_SHADER_VERTEX)
+            ok = vs30_output( emit, decl->Semantic, idx );
+         else
+            ok = ps30_output( emit, decl->Semantic, idx );
+         break;
+
+      default:
+         /* don't need to declare other vars */
+         ok = TRUE;
+      }
+
+      if (!ok)
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+
diff --git a/src/gallium/drivers/svga/svga_tgsi_emit.h b/src/gallium/drivers/svga/svga_tgsi_emit.h
new file mode 100644
index 00000000000..2557824293e
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_emit.h
@@ -0,0 +1,345 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_TGSI_EMIT_H
+#define SVGA_TGSI_EMIT_H
+
+#include "tgsi/tgsi_scan.h"
+#include "svga_hw_reg.h"
+#include "svga_tgsi.h"
+#include "svga3d_shaderdefs.h"
+
+struct src_register
+{
+   SVGA3dShaderSrcToken base;
+   SVGA3dShaderSrcToken indirect;
+};
+
+
+struct svga_arl_consts {
+   int number;
+   int idx;
+   int swizzle;
+   int arl_num;
+};
+
+/* Internal functions:
+ */
+
+struct svga_shader_emitter
+{
+   boolean use_sm30;
+   
+   unsigned size;
+   char *buf;
+   char *ptr;
+
+   union svga_compile_key key;
+   struct tgsi_shader_info info;
+   int unit;
+
+   int imm_start;
+
+   int nr_hw_const;
+   int nr_hw_temp;
+   
+   int insn_offset;
+
+   int internal_temp_count;
+   int internal_imm_count;
+
+   int internal_color_idx[2]; /* diffuse, specular */
+   int internal_color_count;
+
+   boolean emitted_vface;
+   boolean emit_frontface;
+   int internal_frontface_idx;
+
+   int ps30_input_count;
+
+   boolean in_main_func;
+
+   boolean created_zero_immediate;
+   int zero_immediate_idx;
+
+   boolean created_loop_const;
+   int loop_const_idx;
+
+   boolean created_sincos_consts;
+   int sincos_consts_idx;
+
+   unsigned label[32];
+   unsigned nr_labels;
+
+   struct src_register input_map[PIPE_MAX_ATTRIBS];
+   SVGA3dShaderDestToken output_map[PIPE_MAX_ATTRIBS];
+
+   struct src_register imm_0055;
+   SVGA3dShaderDestToken temp_pos;
+   SVGA3dShaderDestToken true_pos;
+
+   SVGA3dShaderDestToken temp_col[PIPE_MAX_COLOR_BUFS];
+   SVGA3dShaderDestToken true_col[PIPE_MAX_COLOR_BUFS];
+
+   SVGA3dShaderDestToken temp_psiz;
+   SVGA3dShaderDestToken true_psiz;
+
+   struct svga_arl_consts arl_consts[12];
+   int num_arl_consts;
+   int current_arl;
+};
+
+
+boolean svga_shader_emit_dword( struct svga_shader_emitter *emit,
+                                unsigned dword );
+
+boolean svga_shader_emit_dwords( struct svga_shader_emitter *emit,
+                                 const unsigned *dwords,
+                                 unsigned nr );
+
+boolean svga_shader_emit_opcode( struct svga_shader_emitter *emit,
+                                 unsigned opcode );
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+                                       const struct tgsi_token *tokens );
+
+boolean svga_translate_decl_sm20( struct svga_shader_emitter *emit,
+                               const struct tgsi_full_declaration *decl );
+
+boolean svga_translate_decl_sm30( struct svga_shader_emitter *emit,
+                               const struct tgsi_full_declaration *decl );
+
+
+static INLINE boolean emit_dst( struct svga_shader_emitter *emit,
+                         SVGA3dShaderDestToken dest )
+{
+   assert(dest.reserved0);
+   return svga_shader_emit_dword( emit, dest.value );
+}
+
+static INLINE boolean emit_src( struct svga_shader_emitter *emit,
+                         const struct src_register src )
+{
+   if (src.base.relAddr) {
+      assert(src.base.reserved0);
+      assert(src.indirect.reserved0);
+      return (svga_shader_emit_dword( emit, src.base.value ) &&
+              svga_shader_emit_dword( emit, src.indirect.value ));
+   }
+   else {
+      assert(src.base.reserved0);
+      return svga_shader_emit_dword( emit, src.base.value );
+   }
+}
+
+
+static INLINE boolean emit_instruction( struct svga_shader_emitter *emit,
+                                 SVGA3dShaderInstToken opcode )
+{
+   return svga_shader_emit_opcode( emit, opcode.value );
+}
+
+
+static INLINE boolean emit_op1( struct svga_shader_emitter *emit,
+                         SVGA3dShaderInstToken inst,
+                         SVGA3dShaderDestToken dest,
+                         struct src_register src0 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ));
+}
+
+static INLINE boolean emit_op2( struct svga_shader_emitter *emit,
+                     SVGA3dShaderInstToken inst,
+                     SVGA3dShaderDestToken dest,
+                     struct src_register src0,
+                     struct src_register src1 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ) &&
+           emit_src( emit, src1 ));
+}
+
+static INLINE boolean emit_op3( struct svga_shader_emitter *emit,
+                         SVGA3dShaderInstToken inst,
+                         SVGA3dShaderDestToken dest,
+                         struct src_register src0,
+                         struct src_register src1,
+                         struct src_register src2 )
+{
+   return (emit_instruction( emit, inst ) &&
+           emit_dst( emit, dest ) &&
+           emit_src( emit, src0 ) &&
+           emit_src( emit, src1 ) &&
+           emit_src( emit, src2 ));
+}
+
+
+#define TRANSLATE_SWIZZLE(x,y,z,w)  ((x) | ((y) << 2) | ((z) << 4) | ((w) << 6))
+#define SWIZZLE_XYZW  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_W)
+#define SWIZZLE_XXXX  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X,TGSI_SWIZZLE_X)
+#define SWIZZLE_YYYY  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y,TGSI_SWIZZLE_Y)
+#define SWIZZLE_ZZZZ  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z,TGSI_SWIZZLE_Z)
+#define SWIZZLE_WWWW  \
+ TRANSLATE_SWIZZLE(TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W,TGSI_SWIZZLE_W)
+
+
+
+static INLINE SVGA3dShaderInstToken
+inst_token( unsigned opcode )
+{
+   SVGA3dShaderInstToken inst;
+
+   inst.value = 0;
+   inst.op = opcode;
+
+   return inst;
+}
+
+static INLINE SVGA3dShaderDestToken 
+dst_register( unsigned file,
+              int number )
+{
+   SVGA3dShaderDestToken dest;
+
+   dest.value = 0;
+   dest.num = number;
+   dest.type_upper = file >> 3;
+   dest.relAddr = 0;
+   dest.reserved1 = 0;
+   dest.mask = 0xf;
+   dest.dstMod = 0;
+   dest.shfScale = 0;
+   dest.type_lower = file & 0x7;
+   dest.reserved0 = 1;          /* is_reg */
+   
+   return dest;
+}
+
+static INLINE SVGA3dShaderDestToken
+writemask( SVGA3dShaderDestToken dest,
+           unsigned mask )
+{
+   dest.mask &= mask;
+   return dest;
+}
+
+
+static INLINE SVGA3dShaderSrcToken 
+src_token( unsigned file, int number )
+{
+   SVGA3dShaderSrcToken src;
+
+   src.value = 0;
+   src.num = number;
+   src.type_upper = file >> 3;
+   src.relAddr = 0;
+   src.reserved1 = 0;
+   src.swizzle = SWIZZLE_XYZW;
+   src.srcMod = 0;
+   src.type_lower = file & 0x7;
+   src.reserved0 = 1;           /* is_reg */
+
+   return src;
+}
+
+
+static INLINE struct src_register 
+absolute( struct src_register src )
+{
+   src.base.srcMod = SVGA3DSRCMOD_ABS;
+
+   return src;
+}
+
+
+static INLINE struct src_register 
+negate( struct src_register src )
+{
+   switch (src.base.srcMod) {
+   case SVGA3DSRCMOD_ABS:
+      src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      src.base.srcMod = SVGA3DSRCMOD_ABS;
+      break;
+   case SVGA3DSRCMOD_NEG:
+      src.base.srcMod = SVGA3DSRCMOD_NONE;
+      break;
+   case SVGA3DSRCMOD_NONE:
+      src.base.srcMod = SVGA3DSRCMOD_NEG;
+      break;
+   }
+   return src;
+}
+
+
+static INLINE struct src_register 
+src_register( unsigned file, int number )
+{
+   struct src_register src;
+   
+   src.base = src_token( file, number );
+   src.indirect.value = 0;
+
+   return src;
+}
+
+static INLINE SVGA3dShaderDestToken dst( struct src_register src )
+{
+   return dst_register( SVGA3dShaderGetRegType( src.base.value ),
+                        src.base.num );
+}
+
+static INLINE struct src_register src( SVGA3dShaderDestToken dst )
+{
+   return src_register( SVGA3dShaderGetRegType( dst.value ),
+                        dst.num );
+}
+
+static INLINE ubyte svga_tgsi_sampler_type( struct svga_shader_emitter *emit,
+                                            int idx )
+{
+   switch (emit->key.fkey.tex[idx].texture_target) {
+   case PIPE_TEXTURE_1D:
+      return SVGA3DSAMP_2D;
+   case PIPE_TEXTURE_2D:
+      return SVGA3DSAMP_2D;
+   case PIPE_TEXTURE_3D:
+      return SVGA3DSAMP_VOLUME;
+   case PIPE_TEXTURE_CUBE:
+      return SVGA3DSAMP_CUBE;
+   }
+
+   return SVGA3DSAMP_UNKNOWN;
+}
+
+#endif
diff --git a/src/gallium/drivers/svga/svga_tgsi_insn.c b/src/gallium/drivers/svga/svga_tgsi_insn.c
new file mode 100644
index 00000000000..ea409b7e165
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_tgsi_insn.c
@@ -0,0 +1,2716 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_shader_tokens.h"
+#include "tgsi/tgsi_parse.h"
+#include "util/u_memory.h"
+
+#include "svga_tgsi_emit.h"
+#include "svga_context.h"
+
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit );
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit );
+
+
+
+ 
+static unsigned
+translate_opcode(
+   uint opcode )
+{
+   switch (opcode) {
+   case TGSI_OPCODE_ABS:        return SVGA3DOP_ABS;
+   case TGSI_OPCODE_ADD:        return SVGA3DOP_ADD;
+   case TGSI_OPCODE_BREAKC:     return SVGA3DOP_BREAKC;
+   case TGSI_OPCODE_DDX:        return SVGA3DOP_DSX;
+   case TGSI_OPCODE_DDY:        return SVGA3DOP_DSY;
+   case TGSI_OPCODE_DP2A:       return SVGA3DOP_DP2ADD;
+   case TGSI_OPCODE_DP3:        return SVGA3DOP_DP3;
+   case TGSI_OPCODE_DP4:        return SVGA3DOP_DP4;
+   case TGSI_OPCODE_ENDFOR:     return SVGA3DOP_ENDLOOP;
+   case TGSI_OPCODE_FRC:        return SVGA3DOP_FRC;
+   case TGSI_OPCODE_BGNFOR:     return SVGA3DOP_LOOP;
+   case TGSI_OPCODE_MAD:        return SVGA3DOP_MAD;
+   case TGSI_OPCODE_MAX:        return SVGA3DOP_MAX;
+   case TGSI_OPCODE_MIN:        return SVGA3DOP_MIN;
+   case TGSI_OPCODE_MOV:        return SVGA3DOP_MOV;
+   case TGSI_OPCODE_MUL:        return SVGA3DOP_MUL;
+   case TGSI_OPCODE_NOP:        return SVGA3DOP_NOP;
+   case TGSI_OPCODE_NRM4:       return SVGA3DOP_NRM;
+   case TGSI_OPCODE_SSG:        return SVGA3DOP_SGN;
+   default:
+      debug_printf("Unkown opcode %u\n", opcode);
+      assert( 0 );
+      return SVGA3DOP_LAST_INST;
+   }
+}
+
+
+static unsigned translate_file( unsigned file )
+{
+   switch (file) {
+   case TGSI_FILE_TEMPORARY: return SVGA3DREG_TEMP;
+   case TGSI_FILE_INPUT:     return SVGA3DREG_INPUT;
+   case TGSI_FILE_OUTPUT:    return SVGA3DREG_OUTPUT; /* VS3.0+ only */
+   case TGSI_FILE_IMMEDIATE: return SVGA3DREG_CONST;
+   case TGSI_FILE_CONSTANT:  return SVGA3DREG_CONST;
+   case TGSI_FILE_SAMPLER:   return SVGA3DREG_SAMPLER;
+   case TGSI_FILE_ADDRESS:   return SVGA3DREG_ADDR;
+   default:
+      assert( 0 );
+      return SVGA3DREG_TEMP;
+   }
+}
+
+
+
+
+
+
+static SVGA3dShaderDestToken 
+translate_dst_register( struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn,
+                        unsigned idx )
+{
+   const struct tgsi_full_dst_register *reg = &insn->FullDstRegisters[idx];
+   SVGA3dShaderDestToken dest;
+
+   switch (reg->DstRegister.File) {
+   case TGSI_FILE_OUTPUT:
+      /* Output registers encode semantic information in their name.
+       * Need to lookup a table built at decl time:
+       */
+      dest = emit->output_map[reg->DstRegister.Index];
+      break;
+
+   default:
+      dest = dst_register( translate_file( reg->DstRegister.File ),
+                           reg->DstRegister.Index );
+      break;
+   }
+
+   dest.mask = reg->DstRegister.WriteMask;
+
+   if (insn->Instruction.Saturate) 
+      dest.dstMod = SVGA3DDSTMOD_SATURATE;
+
+   return dest;
+}
+
+
+static struct src_register 
+swizzle( struct src_register src,
+         int x,
+         int y,
+         int z,
+         int w )
+{
+   x = (src.base.swizzle >> (x * 2)) & 0x3;
+   y = (src.base.swizzle >> (y * 2)) & 0x3;
+   z = (src.base.swizzle >> (z * 2)) & 0x3;
+   w = (src.base.swizzle >> (w * 2)) & 0x3;
+
+   src.base.swizzle = TRANSLATE_SWIZZLE(x,y,z,w);
+
+   return src;
+}
+
+static struct src_register
+scalar( struct src_register src,
+        int comp )
+{
+   return swizzle( src, comp, comp, comp, comp );
+}
+
+static INLINE boolean
+svga_arl_needs_adjustment( const struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl)
+         return TRUE;
+   }
+   return FALSE;
+}
+
+static INLINE int
+svga_arl_adjustment( const struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl)
+         return emit->arl_consts[i].number;
+   }
+   return 0;
+}
+
+static struct src_register 
+translate_src_register( const struct svga_shader_emitter *emit,
+                        const struct tgsi_full_src_register *reg )
+{
+   struct src_register src;
+
+   switch (reg->SrcRegister.File) {
+   case TGSI_FILE_INPUT:
+      /* Input registers are referred to by their semantic name rather
+       * than by index.  Use the mapping build up from the decls:
+       */
+      src = emit->input_map[reg->SrcRegister.Index];
+      break;
+       
+   case TGSI_FILE_IMMEDIATE:
+      /* Immediates are appended after TGSI constants in the D3D
+       * constant buffer.
+       */
+      src = src_register( translate_file( reg->SrcRegister.File ),
+                          reg->SrcRegister.Index + 
+                          emit->imm_start );
+      break;
+
+   default:
+      src = src_register( translate_file( reg->SrcRegister.File ),
+                          reg->SrcRegister.Index );
+
+      break;
+   }
+
+   /* Indirect addressing (for coninstant buffer lookups only)
+    */
+   if (reg->SrcRegister.Indirect)
+   {
+      /* we shift the offset towards the minimum */
+      if (svga_arl_needs_adjustment( emit )) {
+         src.base.num -= svga_arl_adjustment( emit );
+      }
+      src.base.relAddr = 1;
+
+      /* Not really sure what should go in the second token:
+       */
+      src.indirect = src_token( SVGA3DREG_ADDR,
+                                reg->SrcRegisterInd.Index );
+
+      src.indirect.swizzle = SWIZZLE_XXXX;
+   }
+
+   src = swizzle( src,
+                  reg->SrcRegister.SwizzleX,
+                  reg->SrcRegister.SwizzleY,
+                  reg->SrcRegister.SwizzleZ,
+                  reg->SrcRegister.SwizzleW );
+
+   /* src.mod isn't a bitfield, unfortunately:
+    * See tgsi_util_get_full_src_register_sign_mode for implementation details.
+    */
+   if (reg->SrcRegisterExtMod.Absolute) {
+      if (reg->SrcRegisterExtMod.Negate)
+         src.base.srcMod = SVGA3DSRCMOD_ABSNEG;
+      else
+         src.base.srcMod = SVGA3DSRCMOD_ABS;
+   }
+   else {
+      if (reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate)
+         src.base.srcMod = SVGA3DSRCMOD_NEG;
+      else
+         src.base.srcMod = SVGA3DSRCMOD_NONE;
+   }
+
+   return src;
+}
+
+
+/*
+ * Get a temporary register, return -1 if none available
+ */
+static INLINE SVGA3dShaderDestToken 
+get_temp( struct svga_shader_emitter *emit )
+{
+   int i = emit->nr_hw_temp + emit->internal_temp_count++;
+
+   return dst_register( SVGA3DREG_TEMP, i );
+}
+
+/* Release a single temp.  Currently only effective if it was the last
+ * allocated temp, otherwise release will be delayed until the next
+ * call to reset_temp_regs().
+ */
+static INLINE void 
+release_temp( struct svga_shader_emitter *emit,
+              SVGA3dShaderDestToken temp )
+{
+   if (temp.num == emit->internal_temp_count - 1)
+      emit->internal_temp_count--;
+}
+
+static void reset_temp_regs( struct svga_shader_emitter *emit )
+{
+   emit->internal_temp_count = 0;
+}
+   
+
+static boolean submit_op0( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest )
+{
+   return (emit_instruction( emit, inst ) && 
+           emit_dst( emit, dest ));
+}
+
+static boolean submit_op1( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0 )
+{
+   return emit_op1( emit, inst, dest, src0 );
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction.  This function checks for that usage and inserts a
+ * move to temporary if detected.
+ *
+ * The same applies to input registers -- at most a single input
+ * register may be read by any instruction.
+ */
+static boolean submit_op2( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0,
+                           struct src_register src1 )
+{
+   SVGA3dShaderDestToken temp;
+   SVGA3dShaderRegType type0, type1;
+   boolean need_temp = FALSE;
+
+   temp.value = 0;
+   type0 = SVGA3dShaderGetRegType( src0.base.value );
+   type1 = SVGA3dShaderGetRegType( src1.base.value );
+
+   if (type0 == SVGA3DREG_CONST &&
+       type1 == SVGA3DREG_CONST &&
+       src0.base.num != src1.base.num)
+      need_temp = TRUE;
+
+   if (type0 == SVGA3DREG_INPUT &&
+       type1 == SVGA3DREG_INPUT &&
+       src0.base.num != src1.base.num)
+      need_temp = TRUE;
+
+   if (need_temp)
+   {
+      temp = get_temp( emit );
+
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 ))
+         return FALSE;
+
+      src0 = src( temp );
+   }
+
+   if (!emit_op2( emit, inst, dest, src0, src1 ))
+      return FALSE;
+
+   if (need_temp)
+      release_temp( emit, temp );
+
+   return TRUE;
+}
+
+
+/* SVGA shaders may not refer to >1 constant register in a single
+ * instruction.  This function checks for that usage and inserts a
+ * move to temporary if detected.
+ */
+static boolean submit_op3( struct svga_shader_emitter *emit,
+                           SVGA3dShaderInstToken inst,
+                           SVGA3dShaderDestToken dest,
+                           struct src_register src0,
+                           struct src_register src1,
+                           struct src_register src2 )
+{
+   SVGA3dShaderDestToken temp0;
+   SVGA3dShaderDestToken temp1;
+   boolean need_temp0 = FALSE;
+   boolean need_temp1 = FALSE;
+   SVGA3dShaderRegType type0, type1, type2;
+
+   temp0.value = 0;
+   temp1.value = 0;
+   type0 = SVGA3dShaderGetRegType( src0.base.value );
+   type1 = SVGA3dShaderGetRegType( src1.base.value );
+   type2 = SVGA3dShaderGetRegType( src2.base.value );
+
+   if (inst.op != SVGA3DOP_SINCOS) {
+      if (type0 == SVGA3DREG_CONST &&
+          ((type1 == SVGA3DREG_CONST && src0.base.num != src1.base.num) ||
+           (type2 == SVGA3DREG_CONST && src0.base.num != src2.base.num)))
+         need_temp0 = TRUE;
+
+      if (type1 == SVGA3DREG_CONST &&
+          (type2 == SVGA3DREG_CONST && src1.base.num != src2.base.num))
+         need_temp1 = TRUE;
+   }
+
+   if (type0 == SVGA3DREG_INPUT &&
+       ((type1 == SVGA3DREG_INPUT && src0.base.num != src1.base.num) ||
+        (type2 == SVGA3DREG_INPUT && src0.base.num != src2.base.num)))
+      need_temp0 = TRUE;
+
+   if (type1 == SVGA3DREG_INPUT &&
+       (type2 == SVGA3DREG_INPUT && src1.base.num != src2.base.num))
+      need_temp1 = TRUE;
+
+   if (need_temp0)
+   {
+      temp0 = get_temp( emit );
+ 
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp0, src0 ))
+         return FALSE;
+         
+      src0 = src( temp0 );
+   }
+
+   if (need_temp1)
+   {
+      temp1 = get_temp( emit );
+
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), temp1, src1 ))
+         return FALSE;
+
+      src1 = src( temp1 );
+   }
+
+   if (!emit_op3( emit, inst, dest, src0, src1, src2 ))
+      return FALSE;
+
+   if (need_temp1)
+      release_temp( emit, temp1 );
+   if (need_temp0)
+      release_temp( emit, temp0 );
+   return TRUE;
+}
+
+
+static boolean emit_def_const( struct svga_shader_emitter *emit,
+                               SVGA3dShaderConstType type,
+                               unsigned idx,
+                               float a,
+                               float b,
+                               float c,
+                               float d )
+{
+   SVGA3DOpDefArgs def;
+   SVGA3dShaderInstToken opcode;
+
+   switch (type) {
+   case SVGA3D_CONST_TYPE_FLOAT:
+      opcode = inst_token( SVGA3DOP_DEF );
+      def.dst = dst_register( SVGA3DREG_CONST, idx );
+      def.constValues[0] = a;
+      def.constValues[1] = b;
+      def.constValues[2] = c;
+      def.constValues[3] = d;
+      break;
+   case SVGA3D_CONST_TYPE_INT:
+      opcode = inst_token( SVGA3DOP_DEFI );
+      def.dst = dst_register( SVGA3DREG_CONSTINT, idx );
+      def.constIValues[0] = (int)a;
+      def.constIValues[1] = (int)b;
+      def.constIValues[2] = (int)c;
+      def.constIValues[3] = (int)d;
+      break;
+   default:
+      assert(0);
+      break;
+   }
+
+   if (!emit_instruction(emit, opcode) ||
+       !svga_shader_emit_dwords( emit, def.values, Elements(def.values)))
+      return FALSE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_zero_immediate( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                        idx, 0, 0, 0, 1 ))
+      return FALSE;
+
+   emit->zero_immediate_idx = idx;
+   emit->created_zero_immediate = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_loop_const( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_INT, idx,
+                        255, /* iteration count */
+                        0, /* initial value */
+                        1, /* step size */
+                        0 /* not used, must be 0 */))
+      return FALSE;
+
+   emit->loop_const_idx = idx;
+   emit->created_loop_const = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_sincos_consts( struct svga_shader_emitter *emit )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                        -1.5500992e-006f,
+                        -2.1701389e-005f,
+                        0.0026041667f,
+                        0.00026041668f ))
+      return FALSE;
+
+   emit->sincos_consts_idx = idx;
+   idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                        -0.020833334f,
+                        -0.12500000f,
+                        1.0f,
+                        0.50000000f ))
+      return FALSE;
+
+   emit->created_sincos_consts = TRUE;
+
+   return TRUE;
+}
+
+static INLINE boolean
+create_arl_consts( struct svga_shader_emitter *emit )
+{
+   int i;
+
+   for (i = 0; i < emit->num_arl_consts; i += 4) {
+      int j;
+      unsigned idx = emit->nr_hw_const++;
+      float vals[4];
+      for (j = 0; j < 4 && (j + i) < emit->num_arl_consts; ++j) {
+         vals[j] = emit->arl_consts[i + j].number;
+         emit->arl_consts[i + j].idx = idx;
+         switch (j) {
+         case 0:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_X;
+            break;
+         case 1:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Y;
+            break;
+         case 2:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_Z;
+            break;
+         case 3:
+            emit->arl_consts[i + 0].swizzle = TGSI_SWIZZLE_W;
+            break;
+         }
+      }
+      while (j < 4)
+         vals[j++] = 0;
+
+      if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT, idx,
+                           vals[0], vals[1],
+                           vals[2], vals[3]))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static INLINE struct src_register
+get_vface( struct svga_shader_emitter *emit )
+{
+   assert(emit->emitted_vface);
+   return src_register(SVGA3DREG_MISCTYPE, 
+                       SVGA3DMISCREG_FACE);
+}
+
+/* returns {0, 0, 0, 1} immediate */
+static INLINE struct src_register
+get_zero_immediate( struct svga_shader_emitter *emit )
+{
+   assert(emit->created_zero_immediate);
+   assert(emit->zero_immediate_idx >= 0);
+   return src_register( SVGA3DREG_CONST,
+                        emit->zero_immediate_idx );
+}
+
+/* returns the loop const */
+static INLINE struct src_register
+get_loop_const( struct svga_shader_emitter *emit )
+{
+   assert(emit->created_loop_const);
+   assert(emit->loop_const_idx >= 0);
+   return src_register( SVGA3DREG_CONSTINT,
+                        emit->loop_const_idx );
+}
+
+/* returns a sincos const */
+static INLINE struct src_register
+get_sincos_const( struct svga_shader_emitter *emit,
+                  unsigned index )
+{
+   assert(emit->created_sincos_consts);
+   assert(emit->sincos_consts_idx >= 0);
+   assert(index == 0 || index == 1);
+   return src_register( SVGA3DREG_CONST,
+                        emit->sincos_consts_idx + index );
+}
+
+static INLINE struct src_register
+get_fake_arl_const( struct svga_shader_emitter *emit )
+{
+   struct src_register reg;
+   int idx = 0, swizzle = 0, i;
+
+   for (i = 0; i < emit->num_arl_consts; ++ i) {
+      if (emit->arl_consts[i].arl_num == emit->current_arl) {
+         idx = emit->arl_consts[i].idx;
+         swizzle = emit->arl_consts[i].swizzle;
+      }
+   }
+
+   reg = src_register( SVGA3DREG_CONST, idx );
+   return scalar(reg, swizzle);
+}
+
+static INLINE struct src_register
+get_tex_dimensions( struct svga_shader_emitter *emit, int sampler_num )
+{
+   int idx;
+   struct src_register reg;
+
+   /* the width/height indexes start right after constants */
+   idx = emit->key.fkey.tex[sampler_num].width_height_idx +
+         emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+
+   reg = src_register( SVGA3DREG_CONST, idx );
+   return reg;
+}
+
+static boolean emit_fake_arl(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn)
+{
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = get_fake_arl_const( emit );
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   SVGA3dShaderDestToken tmp = get_temp( emit );
+
+   if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), tmp, src0))
+      return FALSE;
+
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), tmp, src( tmp ),
+                    src1))
+      return FALSE;
+
+   /* replicate the original swizzle */
+   src1 = src(tmp);
+   src1.base.swizzle = src0.base.swizzle;
+
+   return submit_op1( emit, inst_token( SVGA3DOP_MOVA ),
+                      dst, src1 );
+}
+
+static boolean emit_if(struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn)
+{
+   const struct src_register src = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderInstToken if_token = inst_token( SVGA3DOP_IFC );
+
+   if_token.control = SVGA3DOPCOMPC_NE;
+   zero = scalar(zero, TGSI_SWIZZLE_X);
+
+   return (emit_instruction( emit, if_token ) &&
+           emit_src( emit, src ) &&
+           emit_src( emit, zero ) );
+}
+
+static boolean emit_endif(struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn)
+{
+   return (emit_instruction( emit,
+                             inst_token( SVGA3DOP_ENDIF )));
+}
+
+static boolean emit_else(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn)
+{
+   return (emit_instruction( emit,
+                             inst_token( SVGA3DOP_ELSE )));
+}
+
+/* Translate the following TGSI FLR instruction.
+ *    FLR  DST, SRC
+ * To the following SVGA3D instruction sequence.
+ *    FRC  TMP, SRC
+ *    SUB  DST, SRC, TMP
+ */
+static boolean emit_floor(struct svga_shader_emitter *emit,
+                          const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* FRC  TMP, SRC */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ), temp, src0 ))
+      return FALSE;
+
+   /* SUB  DST, SRC, TMP */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst, src0,
+                    negate( src( temp ) ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/* Translate the following TGSI CMP instruction.
+ *    CMP  DST, SRC0, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    CMP  DST, SRC0, SRC2, SRC1
+ */
+static boolean emit_cmp(struct svga_shader_emitter *emit,
+                          const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   const struct src_register src2 = translate_src_register(
+      emit, &insn->FullSrcRegisters[2] );
+
+   /* CMP  DST, SRC0, SRC2, SRC1 */
+   return submit_op3( emit, inst_token( SVGA3DOP_CMP ), dst, src0, src2, src1);
+}
+
+
+
+/* Translate the following TGSI DIV instruction.
+ *    DIV  DST.xy, SRC0, SRC1
+ * To the following SVGA3D instruction sequence.
+ *    RCP  TMP.x, SRC1.xxxx
+ *    RCP  TMP.y, SRC1.yyyy
+ *    MUL  DST.xy, SRC0, TMP
+ */
+static boolean emit_div(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+   int i;
+
+   /* For each enabled element, perform a RCP instruction.  Note that
+    * RCP is scalar in SVGA3D:
+    */
+   for (i = 0; i < 4; i++) {
+      unsigned channel = 1 << i;
+      if (dst.mask & channel) {
+         /* RCP  TMP.?, SRC1.???? */
+         if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ), 
+                          writemask(temp, channel), 
+                          scalar(src1, i) ))
+            return FALSE;
+      }
+   }
+
+   /* Then multiply them out with a single mul:
+    *
+    * MUL  DST, SRC0, TMP
+    */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst, src0,
+                    src( temp ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/* Translate the following TGSI DP2 instruction.
+ *    DP2  DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    MUL  TMP, SRC1, SRC2
+ *    ADD  DST, TMP.xxxx, TMP.yyyy
+ */
+static boolean emit_dp2(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+   struct src_register temp_src0, temp_src1;
+
+   /* MUL  TMP, SRC1, SRC2 */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), temp, src0, src1 ))
+      return FALSE;
+
+   temp_src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+   temp_src1 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+   /* ADD  DST, TMP.xxxx, TMP.yyyy */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    temp_src0, temp_src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/* Translate the following TGSI DPH instruction.
+ *    DPH  DST, SRC1, SRC2
+ * To the following SVGA3D instruction sequence.
+ *    DP3  TMP, SRC1, SRC2
+ *    ADD  DST, TMP, SRC2.wwww
+ */
+static boolean emit_dph(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* DP3  TMP, SRC1, SRC2 */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src1 ))
+      return FALSE;
+
+   src1 = scalar(src1, TGSI_SWIZZLE_W);
+
+   /* ADD  DST, TMP, SRC2.wwww */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    src( temp ), src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/* Translate the following TGSI DST instruction.
+ *    NRM  DST, SRC
+ * To the following SVGA3D instruction sequence.
+ *    DP3  TMP, SRC, SRC
+ *    RSQ  TMP, TMP
+ *    MUL  DST, SRC, TMP
+ */
+static boolean emit_nrm(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* DP3  TMP, SRC, SRC */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_DP3 ), temp, src0, src0 ))
+      return FALSE;
+
+   /* RSQ  TMP, TMP */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_RSQ ), temp, src( temp )))
+      return FALSE;
+
+   /* MUL  DST, SRC, TMP */
+   if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), dst,
+                    src0, src( temp )))
+      return FALSE;
+
+   return TRUE;
+
+}
+
+static boolean do_emit_sincos(struct svga_shader_emitter *emit,
+                              SVGA3dShaderDestToken dst,
+                              struct src_register src0)
+{
+   src0 = scalar(src0, TGSI_SWIZZLE_X);
+
+   if (emit->use_sm30) {
+      return submit_op1( emit, inst_token( SVGA3DOP_SINCOS ),
+                         dst, src0 );
+   } else {
+      struct src_register const1 = get_sincos_const( emit, 0 );
+      struct src_register const2 = get_sincos_const( emit, 1 );
+
+      return submit_op3( emit, inst_token( SVGA3DOP_SINCOS ),
+                         dst, src0, const1, const2 );
+   }
+}
+
+static boolean emit_sincos(struct svga_shader_emitter *emit,
+                           const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_XY), src0 ))
+      return FALSE;
+
+   /* MOV DST TMP */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src( temp ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.yyyy
+ */
+static boolean emit_sin(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos(emit, writemask(temp, TGSI_WRITEMASK_Y), src0))
+      return FALSE;
+
+   src0 = scalar(src( temp ), TGSI_SWIZZLE_Y);
+
+   /* MOV DST TMP.yyyy */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+ * SCS TMP SRC
+ * MOV DST TMP.xxxx
+ */
+static boolean emit_cos(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   SVGA3dShaderDestToken temp = get_temp( emit );
+
+   /* SCS TMP SRC */
+   if (!do_emit_sincos( emit, writemask(temp, TGSI_WRITEMASK_X), src0 ))
+      return FALSE;
+
+   src0 = scalar(src( temp ), TGSI_SWIZZLE_X);
+
+   /* MOV DST TMP.xxxx */
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src0 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+/*
+ * ADD DST SRC0, negate(SRC0)
+ */
+static boolean emit_sub(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+
+   src1 = negate(src1);
+
+   if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ), dst,
+                    src0, src1 ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+static boolean emit_kil(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+   struct src_register src0;
+
+   inst = inst_token( SVGA3DOP_TEXKILL );
+   src0 = translate_src_register( emit, reg );
+
+   if (reg->SrcRegisterExtMod.Absolute ||
+       reg->SrcRegister.Negate != reg->SrcRegisterExtMod.Negate ||
+       reg->SrcRegister.Indirect ||
+       reg->SrcRegister.SwizzleX != 0 ||
+       reg->SrcRegister.SwizzleY != 1 ||
+       reg->SrcRegister.SwizzleZ != 2 ||
+       reg->SrcRegister.File != TGSI_FILE_TEMPORARY)
+   {
+      SVGA3dShaderDestToken temp = get_temp( emit );
+
+      submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp, src0 );
+      src0 = src( temp );
+   }
+
+   return submit_op0( emit, inst, dst(src0) );
+}
+
+
+/* mesa state tracker always emits kilp as an unconditional
+ * kil */
+static boolean emit_kilp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken temp;
+   struct src_register one = get_zero_immediate( emit );
+
+   inst = inst_token( SVGA3DOP_TEXKILL );
+   one = scalar( one, TGSI_SWIZZLE_W );
+
+   /* texkill doesn't allow negation on the operand so lets move
+    * negation of {1} to a temp register */
+   temp = get_temp( emit );
+   if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), temp,
+                    negate( one ) ))
+      return FALSE;
+
+   return submit_op0( emit, inst, temp );
+}
+
+/* Implement conditionals by initializing destination reg to 'fail',
+ * then set predicate reg with UFOP_SETP, then move 'pass' to dest
+ * based on predicate reg.
+ *
+ * SETP src0, cmp, src1  -- do this first to avoid aliasing problems.
+ * MOV dst, fail
+ * MOV dst, pass, p0 
+ */
+static boolean
+emit_conditional(struct svga_shader_emitter *emit,
+                 unsigned compare_func,
+                 SVGA3dShaderDestToken dst,
+                 struct src_register src0,
+                 struct src_register src1,
+                 struct src_register pass,
+                 struct src_register fail)
+{
+   SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+   SVGA3dShaderInstToken setp_token, mov_token;
+   setp_token = inst_token( SVGA3DOP_SETP );
+
+   switch (compare_func) {
+   case PIPE_FUNC_NEVER:
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst, fail );
+      break;
+   case PIPE_FUNC_LESS:
+      setp_token.control = SVGA3DOPCOMP_LT;
+      break;
+   case PIPE_FUNC_EQUAL:
+      setp_token.control = SVGA3DOPCOMP_EQ;
+      break;
+   case PIPE_FUNC_LEQUAL:
+      setp_token.control = SVGA3DOPCOMP_LE;
+      break;
+   case PIPE_FUNC_GREATER:
+      setp_token.control = SVGA3DOPCOMP_GT;
+      break;
+   case PIPE_FUNC_NOTEQUAL:
+      setp_token.control = SVGA3DOPCOMPC_NE;
+      break;
+   case PIPE_FUNC_GEQUAL:
+      setp_token.control = SVGA3DOPCOMP_GE;
+      break;
+   case PIPE_FUNC_ALWAYS:
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst, pass );
+      break;
+   }
+
+   /* SETP src0, COMPOP, src1 */
+   if (!submit_op2( emit, setp_token, pred_reg,
+                    src0, src1 ))
+      return FALSE;
+
+   mov_token = inst_token( SVGA3DOP_MOV );
+
+   /* MOV dst, fail */
+   if (!submit_op1( emit, mov_token, dst,
+                    fail ))
+      return FALSE;
+
+   /* MOV dst, pass (predicated)
+    *
+    * Note that the predicate reg (and possible modifiers) is passed
+    * as the first source argument.
+    */
+   mov_token.predicated = 1;
+   if (!submit_op2( emit, mov_token, dst,
+                    src( pred_reg ), pass ))
+      return FALSE;
+
+   return TRUE;
+}
+
+
+static boolean
+emit_select(struct svga_shader_emitter *emit,
+            unsigned compare_func,
+            SVGA3dShaderDestToken dst,
+            struct src_register src0,
+            struct src_register src1 )
+{
+   /* There are some SVGA instructions which implement some selects
+    * directly, but they are only available in the vertex shader.
+    */
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      switch (compare_func) {
+      case PIPE_FUNC_GEQUAL:
+         return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src0, src1 );
+      case PIPE_FUNC_LEQUAL:
+         return submit_op2( emit, inst_token( SVGA3DOP_SGE ), dst, src1, src0 );
+      case PIPE_FUNC_GREATER:
+         return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src1, src0 );
+      case PIPE_FUNC_LESS:
+         return submit_op2( emit, inst_token( SVGA3DOP_SLT ), dst, src0, src1 );
+      default:
+         break;
+      }
+   }
+
+
+   /* Otherwise, need to use the setp approach:
+    */
+   {
+      struct src_register one, zero;
+      /* zero immediate is 0,0,0,1 */
+      zero = get_zero_immediate( emit );
+      one  = scalar( zero, TGSI_SWIZZLE_W );
+      zero = scalar( zero, TGSI_SWIZZLE_X );
+
+      return emit_conditional(
+         emit,
+         compare_func,
+         dst,
+         src0,
+         src1,
+         one, zero);
+   }
+}
+
+
+static boolean emit_select_op(struct svga_shader_emitter *emit,
+                              unsigned compare,
+                              const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+      
+   return emit_select( emit, compare, dst, src0, src1 );
+}
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex2(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn,
+                         SVGA3dShaderDestToken dst )
+{
+   SVGA3dShaderInstToken inst;
+   struct src_register src0;
+   struct src_register src1;
+
+   inst.value = 0;
+   inst.op = SVGA3DOP_TEX;
+
+   switch (insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TEX:
+      break;
+   case TGSI_OPCODE_TXP:
+      inst.control = SVGA3DOPCONT_PROJECT;
+      break;
+   case TGSI_OPCODE_TXB:
+      inst.control = SVGA3DOPCONT_BIAS;
+      break;
+   default:
+      assert(0);
+      return FALSE;
+   }
+
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
+
+   if (emit->key.fkey.tex[src1.base.num].unnormalized) {
+      struct src_register wh = get_tex_dimensions( emit, src1.base.num );
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      /* MUL  tmp, SRC0, WH */
+      if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                       tmp, src0, wh ))
+         return FALSE;
+      src0 = src( tmp );
+   }
+
+   return submit_op2( emit, inst, dst, src0, src1 );
+}
+
+
+
+
+/* Translate texture instructions to SVGA3D representation.
+ */
+static boolean emit_tex3(struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn,
+                         SVGA3dShaderDestToken dst )
+{
+   SVGA3dShaderInstToken inst;
+   struct src_register src0;
+   struct src_register src1;
+   struct src_register src2;
+
+   inst.value = 0;
+
+   switch (insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TXD: 
+      inst.op = SVGA3DOP_TEXLDD;
+      break;
+   case TGSI_OPCODE_TXL:
+      inst.op = SVGA3DOP_TEXLDL;
+      break;
+   }
+
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src1 = translate_src_register( emit, &insn->FullSrcRegisters[1] );
+   src2 = translate_src_register( emit, &insn->FullSrcRegisters[2] );
+
+   return submit_op3( emit, inst, dst, src0, src1, src2 );
+}
+
+
+static boolean emit_tex(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderDestToken dst = 
+      translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 =
+      translate_src_register( emit, &insn->FullSrcRegisters[1] );
+
+   SVGA3dShaderDestToken tex_result;
+
+   /* check for shadow samplers */
+   boolean compare = (emit->key.fkey.tex[src1.base.num].compare_mode ==
+                      PIPE_TEX_COMPARE_R_TO_TEXTURE);
+
+
+   /* If doing compare processing, need to put this value into a
+    * temporary so it can be used as a source later on.
+    */
+   if (compare ||
+       (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) ) {
+      tex_result = get_temp( emit );
+   }
+   else {
+      tex_result = dst;
+   }
+
+   switch(insn->Instruction.Opcode) {
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXP:
+      if (!emit_tex2( emit, insn, tex_result ))
+         return FALSE;
+      break;
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXD:
+      if (!emit_tex3( emit, insn, tex_result ))
+         return FALSE;
+      break;
+   default:
+      assert(0);
+   }
+
+
+   if (compare) {
+      SVGA3dShaderDestToken src0_zdivw = get_temp( emit );
+      struct src_register tex_src_x = scalar(src(tex_result), TGSI_SWIZZLE_Y);
+      struct src_register one =
+         scalar( get_zero_immediate( emit ), TGSI_SWIZZLE_W );
+
+      /* Divide texcoord R by Q */
+      if (!submit_op1( emit, inst_token( SVGA3DOP_RCP ),
+                       src0_zdivw,
+                       scalar(src0, TGSI_SWIZZLE_W) ))
+         return FALSE;
+
+      if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                       src0_zdivw,
+                       scalar(src0, TGSI_SWIZZLE_Z),
+                       src(src0_zdivw) ))
+         return FALSE;
+
+      if (!emit_select(
+             emit,
+             emit->key.fkey.tex[src1.base.num].compare_func,
+             dst,
+             src(src0_zdivw),
+             tex_src_x))
+         return FALSE;
+
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         writemask( dst, TGSI_WRITEMASK_W),
+                         one );
+   }
+   else if (!emit->use_sm30 && dst.mask != TGSI_WRITEMASK_XYZW) 
+   {
+      if (!emit_op1( emit, inst_token( SVGA3DOP_MOV ), dst, src(tex_result) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_bgnloop2( struct svga_shader_emitter *emit,
+                              const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_LOOP );
+   struct src_register loop_reg = src_register( SVGA3DREG_LOOP, 0 );
+   struct src_register const_int = get_loop_const( emit );
+
+   return (emit_instruction( emit, inst ) &&
+           emit_src( emit, loop_reg ) &&
+           emit_src( emit, const_int ) );
+}
+
+static boolean emit_endloop2( struct svga_shader_emitter *emit,
+                              const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_ENDLOOP );
+   return emit_instruction( emit, inst );
+}
+
+static boolean emit_brk( struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst = inst_token( SVGA3DOP_BREAK );
+   return emit_instruction( emit, inst );
+}
+
+static boolean emit_scalar_op1( struct svga_shader_emitter *emit,
+                                unsigned opcode,
+                                const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+   struct src_register src;
+
+   inst = inst_token( opcode );
+   dst = translate_dst_register( emit, insn, 0 );
+   src = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src = scalar( src, TGSI_SWIZZLE_X );
+
+   return submit_op1( emit, inst, dst, src );
+}
+
+
+static boolean emit_simple_instruction(struct svga_shader_emitter *emit,
+                                       unsigned opcode,
+                                       const struct tgsi_full_instruction *insn )
+{
+   const struct tgsi_full_src_register *src = insn->FullSrcRegisters;
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+
+   inst = inst_token( opcode );
+   dst = translate_dst_register( emit, insn, 0 );
+
+   switch (insn->Instruction.NumSrcRegs) {
+   case 0:
+      return submit_op0( emit, inst, dst );
+   case 1:
+      return submit_op1( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ));
+   case 2:
+      return submit_op2( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ),
+                         translate_src_register( emit, &src[1] ) );
+   case 3:
+      return submit_op3( emit, inst, dst,
+                         translate_src_register( emit, &src[0] ),
+                         translate_src_register( emit, &src[1] ),
+                         translate_src_register( emit, &src[2] ) );
+   default:
+      assert(0);
+      return FALSE;
+   }
+}
+
+static boolean emit_arl(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   ++emit->current_arl;
+   if (svga_arl_needs_adjustment( emit )) {
+      return emit_fake_arl( emit, insn );
+   } else {
+      /* no need to adjust, just emit straight arl */
+      return emit_simple_instruction(emit, SVGA3DOP_MOVA, insn);
+   }
+}
+
+static boolean alias_src_dst( struct src_register src,
+                              SVGA3dShaderDestToken dst )
+{
+   if (src.base.num != dst.num)
+      return FALSE;
+
+   if (SVGA3dShaderGetRegType(dst.value) != 
+       SVGA3dShaderGetRegType(src.base.value))
+      return FALSE;
+
+   return TRUE;
+}
+
+static boolean emit_pow(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   boolean need_tmp = FALSE;
+   
+   /* POW can only output to a temporary */
+   if (insn->FullDstRegisters[0].DstRegister.File != TGSI_FILE_TEMPORARY)
+      need_tmp = TRUE;
+   
+   /* POW src1 must not be the same register as dst */
+   if (alias_src_dst( src1, dst ))
+      need_tmp = TRUE;
+
+   /* it's a scalar op */
+   src0 = scalar( src0, TGSI_SWIZZLE_X );
+   src1 = scalar( src1, TGSI_SWIZZLE_X );
+
+   if (need_tmp) {
+      SVGA3dShaderDestToken tmp = writemask(get_temp( emit ), TGSI_WRITEMASK_X );
+
+      if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), tmp, src0, src1))
+         return FALSE;
+
+      return submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, scalar(src(tmp), 0) );
+   } 
+   else {
+      return submit_op2(emit, inst_token( SVGA3DOP_POW ), dst, src0, src1);
+   }
+}
+
+static boolean emit_xpd(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   boolean need_dst_tmp = FALSE;
+
+   /* XPD can only output to a temporary */
+   if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP) 
+      need_dst_tmp = TRUE;
+
+   /* The dst reg must not be the same as src0 or src1*/
+   if (alias_src_dst(src0, dst) ||
+       alias_src_dst(src1, dst))
+      need_dst_tmp = TRUE;
+
+   if (need_dst_tmp) {
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      /* Obey DX9 restrictions on mask:
+       */
+      tmp.mask = dst.mask & TGSI_WRITEMASK_XYZ;
+
+      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), tmp, src0, src1))
+         return FALSE;
+
+      if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+         return FALSE;
+   } 
+   else {
+      if (!submit_op2(emit, inst_token( SVGA3DOP_CRS ), dst, src0, src1))
+         return FALSE;
+   }
+
+   /* Need to emit 1.0 to dst.w?
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      struct src_register zero = get_zero_immediate( emit );
+
+      if (!submit_op1(emit, 
+                      inst_token( SVGA3DOP_MOV ), 
+                      writemask(dst, TGSI_WRITEMASK_W),
+                      zero))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_lrp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   SVGA3dShaderDestToken tmp;
+   const struct src_register src0 = translate_src_register(
+      emit, &insn->FullSrcRegisters[0] );
+   const struct src_register src1 = translate_src_register(
+      emit, &insn->FullSrcRegisters[1] );
+   const struct src_register src2 = translate_src_register(
+      emit, &insn->FullSrcRegisters[2] );
+   boolean need_dst_tmp = FALSE;
+
+   /* The dst reg must not be the same as src0 or src2 */
+   if (alias_src_dst(src0, dst) ||
+       alias_src_dst(src2, dst))
+      need_dst_tmp = TRUE;
+
+   if (need_dst_tmp) {
+      tmp = get_temp( emit );
+      tmp.mask = dst.mask;
+   }
+   else {
+      tmp = dst;
+   }
+
+   if (!submit_op3(emit, inst_token( SVGA3DOP_LRP ), tmp, src0, src1, src2))
+      return FALSE;
+
+   if (need_dst_tmp) {
+      if (!submit_op1(emit, inst_token( SVGA3DOP_MOV ), dst, src( tmp )))
+         return FALSE;      
+   } 
+
+   return TRUE;
+}
+
+
+static boolean emit_dst_insn(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      /* SVGA/DX9 has a DST instruction, but only for vertex shaders:
+       */
+      return emit_simple_instruction(emit, SVGA3DOP_DST, insn);
+   }
+   else {
+
+      /* result[0] = 1    * 1;
+       * result[1] = a[1] * b[1];
+       * result[2] = a[2] * 1;
+       * result[3] = 1    * b[3];
+       */
+
+      SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+      SVGA3dShaderDestToken tmp;
+      const struct src_register src0 = translate_src_register(
+         emit, &insn->FullSrcRegisters[0] );
+      const struct src_register src1 = translate_src_register(
+         emit, &insn->FullSrcRegisters[1] );
+      struct src_register zero = get_zero_immediate( emit );
+      boolean need_tmp = FALSE;
+
+      if (SVGA3dShaderGetRegType(dst.value) != SVGA3DREG_TEMP ||
+          alias_src_dst(src0, dst) ||
+          alias_src_dst(src1, dst))
+         need_tmp = TRUE;
+
+      if (need_tmp) {
+         tmp = get_temp( emit );
+      }
+      else {
+         tmp = dst;
+      }
+
+      /* tmp.xw = 1.0
+       */
+      if (tmp.mask & TGSI_WRITEMASK_XW) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_XW ),
+                          scalar( zero, 3 )))
+            return FALSE;
+      }
+      
+      /* tmp.yz = src0
+       */
+      if (tmp.mask & TGSI_WRITEMASK_YZ) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_YZ ),
+                          src0))
+            return FALSE;
+      }
+
+      /* tmp.yw = tmp * src1
+       */
+      if (tmp.mask & TGSI_WRITEMASK_YW) {
+         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ), 
+                          writemask(tmp, TGSI_WRITEMASK_YW ),
+                          src(tmp),
+                          src1))
+            return FALSE;
+      }
+
+      /* dst = tmp
+       */
+      if (need_tmp) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          dst,
+                          src(tmp)))
+            return FALSE;
+      }      
+   }
+   
+   return TRUE;
+}
+
+
+static boolean emit_exp(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderDestToken fraction;
+
+   if (dst.mask & TGSI_WRITEMASK_Y)
+      fraction = dst;
+   else if (dst.mask & TGSI_WRITEMASK_X)
+      fraction = get_temp( emit );
+
+   /* If y is being written, fill it with src0 - floor(src0).
+    */
+   if (dst.mask & TGSI_WRITEMASK_XY) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+                       writemask( fraction, TGSI_WRITEMASK_Y ),
+                       src0 ))
+         return FALSE;
+   }
+
+   /* If x is being written, fill it with 2 ^ floor(src0).
+    */
+   if (dst.mask & TGSI_WRITEMASK_X) {
+      if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+                       src0,
+                       scalar( negate( src( fraction ) ), TGSI_SWIZZLE_Y ) ) )
+         return FALSE;
+
+      if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_X ),
+                       scalar( src( dst ), TGSI_SWIZZLE_X ) ) )
+         return FALSE;
+
+      if (!(dst.mask & TGSI_WRITEMASK_Y))
+         release_temp( emit, fraction );
+   }
+
+   /* If z is being written, fill it with 2 ^ src0 (partial precision).
+    */
+   if (dst.mask & TGSI_WRITEMASK_Z) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_EXPP ),
+                       writemask( dst, dst.mask & TGSI_WRITEMASK_Z ),
+                       src0 ) )
+         return FALSE;
+   }
+
+   /* If w is being written, fill it with one.
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                       writemask(dst, TGSI_WRITEMASK_W),
+                       scalar( zero, TGSI_SWIZZLE_W ) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_lit(struct svga_shader_emitter *emit,
+                             const struct tgsi_full_instruction *insn )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      /* SVGA/DX9 has a LIT instruction, but only for vertex shaders:
+       */
+      return emit_simple_instruction(emit, SVGA3DOP_LIT, insn);
+   }
+   else {
+
+      /* D3D vs. GL semantics can be fairly easily accomodated by
+       * variations on this sequence.
+       *
+       * GL:
+       *   tmp.y = src.x
+       *   tmp.z = pow(src.y,src.w)
+       *   p0 = src0.xxxx > 0
+       *   result = zero.wxxw
+       *   (p0) result.yz = tmp
+       *
+       * D3D:
+       *   tmp.y = src.x
+       *   tmp.z = pow(src.y,src.w)
+       *   p0 = src0.xxyy > 0
+       *   result = zero.wxxw
+       *   (p0) result.yz = tmp
+       *
+       * Will implement the GL version for now.
+       */
+
+      SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+      const struct src_register src0 = translate_src_register(
+         emit, &insn->FullSrcRegisters[0] );
+      struct src_register zero = get_zero_immediate( emit );
+
+      /* tmp = pow(src.y, src.w)
+       */
+      if (dst.mask & TGSI_WRITEMASK_Z) {
+         if (!submit_op2(emit, inst_token( SVGA3DOP_POW ), 
+                         tmp, 
+                         scalar(src0, 1), 
+                         scalar(src0, 3)))
+            return FALSE;
+      }
+
+      /* tmp.y = src.x
+       */
+      if (dst.mask & TGSI_WRITEMASK_Y) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), 
+                          writemask(tmp, TGSI_WRITEMASK_Y ),
+                          scalar(src0, 0)))
+            return FALSE;
+      }
+      
+      /* Can't quite do this with emit conditional due to the extra
+       * writemask on the predicated mov:
+       */
+      {
+         SVGA3dShaderDestToken pred_reg = dst_register( SVGA3DREG_PREDICATE, 0 );
+         SVGA3dShaderInstToken setp_token, mov_token;
+         struct src_register predsrc;
+
+         setp_token = inst_token( SVGA3DOP_SETP );
+         mov_token = inst_token( SVGA3DOP_MOV );
+
+         setp_token.control = SVGA3DOPCOMP_GT;
+
+         /* D3D vs GL semantics:
+          */
+         if (0)
+            predsrc = swizzle(src0, 0, 0, 1, 1); /* D3D */
+         else
+            predsrc = swizzle(src0, 0, 0, 0, 0); /* GL */
+
+         /* SETP src0.xxyy, GT, {0}.x */
+         if (!submit_op2( emit, setp_token, pred_reg,
+                          predsrc, 
+                          swizzle(zero, 0, 0, 0, 0) ))
+            return FALSE;
+         
+         /* MOV dst, fail */
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), dst,
+                          swizzle(zero, 3, 0, 0, 3 )))
+             return FALSE;
+
+         /* MOV dst.yz, tmp (predicated)
+          *
+          * Note that the predicate reg (and possible modifiers) is passed
+          * as the first source argument.
+          */
+         if (dst.mask & TGSI_WRITEMASK_YZ) {
+            mov_token.predicated = 1;
+            if (!submit_op2( emit, mov_token,
+                             writemask(dst, TGSI_WRITEMASK_YZ),
+                             src( pred_reg ), src( tmp ) ))
+               return FALSE;
+         }
+      }
+   }
+
+   return TRUE;
+}
+
+
+
+
+static boolean emit_ex2( struct svga_shader_emitter *emit,
+                         const struct tgsi_full_instruction *insn )
+{
+   SVGA3dShaderInstToken inst;
+   SVGA3dShaderDestToken dst;
+   struct src_register src0;
+
+   inst = inst_token( SVGA3DOP_EXP );
+   dst = translate_dst_register( emit, insn, 0 );
+   src0 = translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   src0 = scalar( src0, TGSI_SWIZZLE_X );
+
+   if (dst.mask != TGSI_WRITEMASK_XYZW) {
+      SVGA3dShaderDestToken tmp = get_temp( emit );
+
+      if (!submit_op1( emit, inst, tmp, src0 ))
+         return FALSE;
+
+      return submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                         dst,
+                         scalar( src( tmp ), TGSI_SWIZZLE_X ) );
+   }
+
+   return submit_op1( emit, inst, dst, src0 );
+}
+
+
+static boolean emit_log(struct svga_shader_emitter *emit,
+                        const struct tgsi_full_instruction *insn)
+{
+   SVGA3dShaderDestToken dst = translate_dst_register( emit, insn, 0 );
+   struct src_register src0 =
+      translate_src_register( emit, &insn->FullSrcRegisters[0] );
+   struct src_register zero = get_zero_immediate( emit );
+   SVGA3dShaderDestToken abs_tmp;
+   struct src_register abs_src0;
+   SVGA3dShaderDestToken log2_abs;
+
+   if (dst.mask & TGSI_WRITEMASK_Z)
+      log2_abs = dst;
+   else if (dst.mask & TGSI_WRITEMASK_XY)
+      log2_abs = get_temp( emit );
+
+   /* If z is being written, fill it with log2( abs( src0 ) ).
+    */
+   if (dst.mask & TGSI_WRITEMASK_XYZ) {
+      if (!src0.base.srcMod || src0.base.srcMod == SVGA3DSRCMOD_ABS)
+         abs_src0 = src0;
+      else {
+         abs_tmp = get_temp( emit );
+
+         if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                          abs_tmp,
+                          src0 ) )
+            return FALSE;
+
+         abs_src0 = src( abs_tmp );
+      }
+
+      abs_src0 = absolute( scalar( abs_src0, TGSI_SWIZZLE_X ) );
+
+      if (!submit_op1( emit, inst_token( SVGA3DOP_LOG ),
+                       writemask( log2_abs, TGSI_WRITEMASK_Z ),
+                       abs_src0 ) )
+         return FALSE;
+   }
+
+   if (dst.mask & TGSI_WRITEMASK_XY) {
+      SVGA3dShaderDestToken floor_log2;
+
+      if (dst.mask & TGSI_WRITEMASK_X)
+         floor_log2 = dst;
+      else
+         floor_log2 = get_temp( emit );
+
+      /* If x is being written, fill it with floor( log2( abs( src0 ) ) ).
+       */
+      if (!submit_op1( emit, inst_token( SVGA3DOP_FRC ),
+                       writemask( floor_log2, TGSI_WRITEMASK_X ),
+                       scalar( src( log2_abs ), TGSI_SWIZZLE_Z ) ) )
+         return FALSE;
+
+      if (!submit_op2( emit, inst_token( SVGA3DOP_ADD ),
+                       writemask( floor_log2, TGSI_WRITEMASK_X ),
+                       scalar( src( log2_abs ), TGSI_SWIZZLE_Z ),
+                       negate( src( floor_log2 ) ) ) )
+         return FALSE;
+
+      /* If y is being written, fill it with
+       * abs ( src0 ) / ( 2 ^ floor( log2( abs( src0 ) ) ) ).
+       */
+      if (dst.mask & TGSI_WRITEMASK_Y) {
+         if (!submit_op1( emit, inst_token( SVGA3DOP_EXP ),
+                          writemask( dst, TGSI_WRITEMASK_Y ),
+                          negate( scalar( src( floor_log2 ),
+                                          TGSI_SWIZZLE_X ) ) ) )
+            return FALSE;
+
+         if (!submit_op2( emit, inst_token( SVGA3DOP_MUL ),
+                          writemask( dst, TGSI_WRITEMASK_Y ),
+                          src( dst ),
+                          abs_src0 ) )
+            return FALSE;
+      }
+
+      if (!(dst.mask & TGSI_WRITEMASK_X))
+         release_temp( emit, floor_log2 );
+
+      if (!(dst.mask & TGSI_WRITEMASK_Z))
+         release_temp( emit, log2_abs );
+   }
+
+   if (dst.mask & TGSI_WRITEMASK_XYZ && src0.base.srcMod &&
+       src0.base.srcMod != SVGA3DSRCMOD_ABS)
+      release_temp( emit, abs_tmp );
+
+   /* If w is being written, fill it with one.
+    */
+   if (dst.mask & TGSI_WRITEMASK_W) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ),
+                       writemask(dst, TGSI_WRITEMASK_W),
+                       scalar( zero, TGSI_SWIZZLE_W ) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+
+static boolean emit_bgnsub( struct svga_shader_emitter *emit,
+                           unsigned position,
+                           const struct tgsi_full_instruction *insn )
+{
+   unsigned i;
+
+   /* Note that we've finished the main function and are now emitting
+    * subroutines.  This affects how we terminate the generated
+    * shader.
+    */
+   emit->in_main_func = FALSE;
+   
+   for (i = 0; i < emit->nr_labels; i++) {
+      if (emit->label[i] == position) {
+         return (emit_instruction( emit, inst_token( SVGA3DOP_RET ) ) &&
+                 emit_instruction( emit, inst_token( SVGA3DOP_LABEL ) ) &&
+                 emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+      }
+   }
+
+   assert(0);
+   return TRUE;
+}
+
+static boolean emit_call( struct svga_shader_emitter *emit,
+                           const struct tgsi_full_instruction *insn )
+{
+   unsigned position = insn->InstructionExtLabel.Label;
+   unsigned i;
+   
+   for (i = 0; i < emit->nr_labels; i++) {
+      if (emit->label[i] == position) 
+         break;
+   }
+
+   if (emit->nr_labels == Elements(emit->label))
+      return FALSE;
+
+   if (i == emit->nr_labels) {
+      emit->label[i] = position;
+      emit->nr_labels++;
+   }
+
+   return (emit_instruction( emit, inst_token( SVGA3DOP_CALL ) ) &&
+           emit_src( emit, src_register( SVGA3DREG_LABEL, i )));
+}
+
+
+static boolean emit_end( struct svga_shader_emitter *emit )
+{
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      return emit_vs_postamble( emit );
+   }
+   else {
+      return emit_ps_postamble( emit );
+   }
+}
+
+
+
+static boolean svga_emit_instruction( struct svga_shader_emitter *emit,
+                                      unsigned position,
+                                      const struct tgsi_full_instruction *insn )
+{
+   switch (insn->Instruction.Opcode) {
+
+   case TGSI_OPCODE_ARL:
+      return emit_arl( emit, insn );
+
+   case TGSI_OPCODE_TEX:
+   case TGSI_OPCODE_TXB:
+   case TGSI_OPCODE_TXP:
+   case TGSI_OPCODE_TXL:
+   case TGSI_OPCODE_TXD:
+      return emit_tex( emit, insn );
+
+   case TGSI_OPCODE_BGNSUB:
+      return emit_bgnsub( emit, position, insn );
+
+   case TGSI_OPCODE_ENDSUB:
+      return TRUE;
+
+   case TGSI_OPCODE_CAL:
+      return emit_call( emit, insn );
+
+   case TGSI_OPCODE_FLR:
+   case TGSI_OPCODE_TRUNC:        /* should be TRUNC, not FLR */
+      return emit_floor( emit, insn );
+
+   case TGSI_OPCODE_CMP:
+      return emit_cmp( emit, insn );
+
+   case TGSI_OPCODE_DIV:
+      return emit_div( emit, insn );
+
+   case TGSI_OPCODE_DP2:
+      return emit_dp2( emit, insn );
+
+   case TGSI_OPCODE_DPH:
+      return emit_dph( emit, insn );
+
+   case TGSI_OPCODE_NRM:
+      return emit_nrm( emit, insn );
+
+   case TGSI_OPCODE_COS:
+      return emit_cos( emit, insn );
+
+   case TGSI_OPCODE_SIN:
+      return emit_sin( emit, insn );
+
+   case TGSI_OPCODE_SCS:
+      return emit_sincos( emit, insn );
+
+   case TGSI_OPCODE_END:
+      /* TGSI always finishes the main func with an END */
+      return emit_end( emit );
+
+   case TGSI_OPCODE_KIL:
+      return emit_kil( emit, insn );
+
+      /* Selection opcodes.  The underlying language is fairly
+       * non-orthogonal about these.
+       */
+   case TGSI_OPCODE_SEQ:
+      return emit_select_op( emit, PIPE_FUNC_EQUAL, insn );
+
+   case TGSI_OPCODE_SNE:
+      return emit_select_op( emit, PIPE_FUNC_NOTEQUAL, insn );
+
+   case TGSI_OPCODE_SGT:
+      return emit_select_op( emit, PIPE_FUNC_GREATER, insn );
+
+   case TGSI_OPCODE_SGE:
+      return emit_select_op( emit, PIPE_FUNC_GEQUAL, insn );
+
+   case TGSI_OPCODE_SLT:
+      return emit_select_op( emit, PIPE_FUNC_LESS, insn );
+
+   case TGSI_OPCODE_SLE:
+      return emit_select_op( emit, PIPE_FUNC_LEQUAL, insn );
+
+   case TGSI_OPCODE_SUB:
+      return emit_sub( emit, insn );
+
+   case TGSI_OPCODE_POW:
+      return emit_pow( emit, insn );
+
+   case TGSI_OPCODE_EX2:
+      return emit_ex2( emit, insn );
+
+   case TGSI_OPCODE_EXP:
+      return emit_exp( emit, insn );
+
+   case TGSI_OPCODE_LOG:
+      return emit_log( emit, insn );
+
+   case TGSI_OPCODE_LG2:
+      return emit_scalar_op1( emit, SVGA3DOP_LOG, insn );
+
+   case TGSI_OPCODE_RSQ:
+      return emit_scalar_op1( emit, SVGA3DOP_RSQ, insn );
+
+   case TGSI_OPCODE_RCP:
+      return emit_scalar_op1( emit, SVGA3DOP_RCP, insn );
+
+   case TGSI_OPCODE_CONT:
+   case TGSI_OPCODE_RET:
+      /* This is a noop -- we tell mesa that we can't support RET
+       * within a function (early return), so this will always be
+       * followed by an ENDSUB.
+       */
+      return TRUE;
+
+      /* These aren't actually used by any of the frontends we care
+       * about:
+       */
+   case TGSI_OPCODE_CLAMP:
+   case TGSI_OPCODE_ROUND:
+   case TGSI_OPCODE_AND:
+   case TGSI_OPCODE_OR:
+   case TGSI_OPCODE_I2F:
+   case TGSI_OPCODE_NOT:
+   case TGSI_OPCODE_SHL:
+   case TGSI_OPCODE_SHR:
+   case TGSI_OPCODE_XOR:
+      return FALSE;
+
+   case TGSI_OPCODE_IF:
+      return emit_if( emit, insn );
+   case TGSI_OPCODE_ELSE:
+      return emit_else( emit, insn );
+   case TGSI_OPCODE_ENDIF:
+      return emit_endif( emit, insn );
+
+   case TGSI_OPCODE_BGNLOOP:
+      return emit_bgnloop2( emit, insn );
+   case TGSI_OPCODE_ENDLOOP:
+      return emit_endloop2( emit, insn );
+   case TGSI_OPCODE_BRK:
+      return emit_brk( emit, insn );
+
+   case TGSI_OPCODE_XPD:
+      return emit_xpd( emit, insn );
+
+   case TGSI_OPCODE_KILP:
+      return emit_kilp( emit, insn );
+
+   case TGSI_OPCODE_DST:
+      return emit_dst_insn( emit, insn );
+
+   case TGSI_OPCODE_LIT:
+      return emit_lit( emit, insn );
+
+   case TGSI_OPCODE_LRP:
+      return emit_lrp( emit, insn );
+
+   default: {
+      unsigned opcode = translate_opcode(insn->Instruction.Opcode);
+
+      if (opcode == SVGA3DOP_LAST_INST)
+         return FALSE;
+
+      if (!emit_simple_instruction( emit, opcode, insn ))
+         return FALSE;
+   }
+   }
+
+   return TRUE;
+}
+
+
+static boolean svga_emit_immediate( struct svga_shader_emitter *emit,
+                                    struct tgsi_full_immediate *imm)
+{
+   static const float id[4] = {0,0,0,1};
+   float value[4];
+   unsigned i;
+
+   assert(1 <= imm->Immediate.NrTokens && imm->Immediate.NrTokens <= 5);
+   for (i = 0; i < imm->Immediate.NrTokens - 1; i++)
+      value[i] = imm->u[i].Float;
+
+   for ( ; i < 4; i++ )
+      value[i] = id[i];
+
+   return emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                          emit->imm_start + emit->internal_imm_count++,
+                          value[0], value[1], value[2], value[3]);
+}
+
+static boolean make_immediate( struct svga_shader_emitter *emit,
+                               float a,
+                               float b,
+                               float c,
+                               float d,
+                               struct src_register *out )
+{
+   unsigned idx = emit->nr_hw_const++;
+
+   if (!emit_def_const( emit, SVGA3D_CONST_TYPE_FLOAT,
+                        idx, a, b, c, d ))
+      return FALSE;
+
+   *out = src_register( SVGA3DREG_CONST, idx );
+
+   return TRUE;
+}
+
+static boolean emit_vs_preamble( struct svga_shader_emitter *emit )
+{
+   if (!emit->key.vkey.need_prescale) {
+      if (!make_immediate( emit, 0, 0, .5, .5,
+                           &emit->imm_0055))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+static boolean emit_ps_preamble( struct svga_shader_emitter *emit )
+{
+   unsigned i;
+
+   /* For SM20, need to initialize the temporaries we're using to hold
+    * color outputs to some value.  Shaders which don't set all of
+    * these values are likely to be rejected by the DX9 runtime.
+    */
+   if (!emit->use_sm30) {
+      struct src_register zero = get_zero_immediate( emit );
+      for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+         if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+            
+            if (!submit_op1( emit,
+                             inst_token(SVGA3DOP_MOV),
+                             emit->temp_col[i],
+                             zero ))
+               return FALSE;
+         }
+      }
+   }
+   
+   return TRUE;
+}
+
+static boolean emit_ps_postamble( struct svga_shader_emitter *emit )
+{
+   unsigned i;
+
+   /* PS oDepth is incredibly fragile and it's very hard to catch the
+    * types of usage that break it during shader emit.  Easier just to
+    * redirect the main program to a temporary and then only touch
+    * oDepth with a hand-crafted MOV below.
+    */
+   if (SVGA3dShaderGetRegType(emit->true_pos.value) != 0) {
+
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       emit->true_pos,
+                       scalar(src(emit->temp_pos), TGSI_SWIZZLE_Z) ))
+         return FALSE;
+   }
+
+   /* Similarly for SM20 color outputs...  Luckily SM30 isn't so
+    * fragile.
+    */
+   for (i = 0; i < PIPE_MAX_COLOR_BUFS; i++) {
+      if (SVGA3dShaderGetRegType(emit->true_col[i].value) != 0) {
+
+         if (!submit_op1( emit,
+                          inst_token(SVGA3DOP_MOV),
+                          emit->true_col[i],
+                          src(emit->temp_col[i]) ))
+            return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+static boolean emit_vs_postamble( struct svga_shader_emitter *emit )
+{
+   /* PSIZ output is incredibly fragile and it's very hard to catch
+    * the types of usage that break it during shader emit.  Easier
+    * just to redirect the main program to a temporary and then only
+    * touch PSIZ with a hand-crafted MOV below.
+    */
+   if (SVGA3dShaderGetRegType(emit->true_psiz.value) != 0) {
+      
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       emit->true_psiz,
+                       scalar(src(emit->temp_psiz), TGSI_SWIZZLE_X) ))
+         return FALSE;
+   }
+
+   /* Need to perform various manipulations on vertex position to cope
+    * with the different GL and D3D clip spaces.
+    */
+   if (emit->key.vkey.need_prescale) {
+      SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+      SVGA3dShaderDestToken pos = emit->true_pos;
+      unsigned offset = emit->info.file_max[TGSI_FILE_CONSTANT] + 1;
+      struct src_register prescale_scale = src_register( SVGA3DREG_CONST, 
+                                                         offset + 0 ); 
+      struct src_register prescale_trans = src_register( SVGA3DREG_CONST, 
+                                                         offset + 1 ); 
+
+      /* MUL temp_pos.xyz,    temp_pos,      prescale.scale
+       * MAD result.position, temp_pos.wwww, prescale.trans, temp_pos
+       *   --> Note that prescale.trans.w == 0
+       */
+      if (!submit_op2( emit, 
+                       inst_token(SVGA3DOP_MUL), 
+                       writemask(temp_pos, TGSI_WRITEMASK_XYZ), 
+                       src(temp_pos),
+                       prescale_scale ))
+         return FALSE;
+
+      if (!submit_op3( emit, 
+                       inst_token(SVGA3DOP_MAD), 
+                       pos, 
+                       swizzle(src(temp_pos), 3, 3, 3, 3),
+                       prescale_trans,
+                       src(temp_pos)))
+         return FALSE;
+   }
+   else {
+      SVGA3dShaderDestToken temp_pos = emit->temp_pos;
+      SVGA3dShaderDestToken pos = emit->true_pos;
+      struct src_register imm_0055 = emit->imm_0055;
+
+      /* Adjust GL clipping coordinate space to hardware (D3D-style):
+       *
+       * DP4 temp_pos.z, {0,0,.5,.5}, temp_pos
+       * MOV result.position, temp_pos 
+       */
+      if (!submit_op2( emit, 
+                       inst_token(SVGA3DOP_DP4), 
+                       writemask(temp_pos, TGSI_WRITEMASK_Z), 
+                       imm_0055, 
+                       src(temp_pos) ))
+         return FALSE;
+
+      if (!submit_op1( emit,
+                       inst_token(SVGA3DOP_MOV),
+                       pos,
+                       src(temp_pos) ))
+         return FALSE;
+   }
+
+   return TRUE;
+}
+
+/*
+  0: IF VFACE :4
+  1:   COLOR = FrontColor;
+  2: ELSE
+  3:   COLOR = BackColor;
+  4: ENDIF
+ */
+static boolean emit_light_twoside( struct svga_shader_emitter *emit )
+{
+   struct src_register vface, zero;
+   struct src_register front[2];
+   struct src_register back[2];
+   SVGA3dShaderDestToken color[2];
+   int count =  emit->internal_color_count;
+   int i;
+   SVGA3dShaderInstToken if_token;
+
+   if (count == 0)
+      return TRUE;
+
+   vface = get_vface( emit );
+   zero = get_zero_immediate( emit );
+
+   /* Can't use get_temp() to allocate the color reg as such
+    * temporaries will be reclaimed after each instruction by the call
+    * to reset_temp_regs().
+    */
+   for (i = 0; i < count; i++) {
+      color[i] = dst_register( SVGA3DREG_TEMP, 
+                               emit->nr_hw_temp++ );
+
+      front[i] = emit->input_map[emit->internal_color_idx[i]];
+
+      /* Back is always the next input:
+       */
+      back[i] = front[i];
+      back[i].base.num = front[i].base.num + 1;
+
+      /* Reassign the input_map to the actual front-face color:
+       */
+      emit->input_map[emit->internal_color_idx[i]] = src(color[i]);
+   }
+   
+   if_token = inst_token( SVGA3DOP_IFC );
+
+   if (emit->key.fkey.front_cw)
+      if_token.control = SVGA3DOPCOMP_GT;
+   else
+      if_token.control = SVGA3DOPCOMP_LT;
+
+   zero = scalar(zero, TGSI_SWIZZLE_X);
+
+   if (!(emit_instruction( emit, if_token ) &&
+         emit_src( emit, vface ) &&
+         emit_src( emit, zero ) ))
+      return FALSE;
+
+   for (i = 0; i < count; i++) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], front[i] ))
+         return FALSE;
+   }
+
+   if (!(emit_instruction( emit, inst_token( SVGA3DOP_ELSE))))
+      return FALSE;
+   
+   for (i = 0; i < count; i++) {
+      if (!submit_op1( emit, inst_token( SVGA3DOP_MOV ), color[i], back[i] ))
+         return FALSE;
+   }
+
+   if (!emit_instruction( emit, inst_token( SVGA3DOP_ENDIF ) ))
+      return FALSE;
+
+   return TRUE;
+}
+
+/*
+  0: SETP_GT TEMP, VFACE, 0
+  where TEMP is a fake frontface register
+ */
+static boolean emit_frontface( struct svga_shader_emitter *emit )
+{
+   struct src_register vface, zero;
+   SVGA3dShaderDestToken temp;
+   struct src_register pass, fail;
+
+   vface = get_vface( emit );
+   zero = get_zero_immediate( emit );
+
+   /* Can't use get_temp() to allocate the fake frontface reg as such
+    * temporaries will be reclaimed after each instruction by the call
+    * to reset_temp_regs().
+    */
+   temp = dst_register( SVGA3DREG_TEMP,
+                        emit->nr_hw_temp++ );
+
+   if (emit->key.fkey.front_cw) {
+      pass = scalar( zero, TGSI_SWIZZLE_W );
+      fail = scalar( zero, TGSI_SWIZZLE_X );
+   } else {
+      pass = scalar( zero, TGSI_SWIZZLE_X );
+      fail = scalar( zero, TGSI_SWIZZLE_W );
+   }
+
+   if (!emit_conditional(emit, PIPE_FUNC_GREATER,
+                         temp, vface, scalar( zero, TGSI_SWIZZLE_X ),
+                         pass, fail))
+      return FALSE;
+
+   /* Reassign the input_map to the actual front-face color:
+    */
+   emit->input_map[emit->internal_frontface_idx] = src(temp);
+
+   return TRUE;
+}
+
+static INLINE boolean
+needs_to_create_zero( struct svga_shader_emitter *emit )
+{
+   int i;
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (!emit->use_sm30)
+         return TRUE;
+
+      if (emit->key.fkey.light_twoside)
+         return TRUE;
+
+      if (emit->emit_frontface)
+         return TRUE;
+
+      if (emit->info.opcode_count[TGSI_OPCODE_DST] >= 1 ||
+          emit->info.opcode_count[TGSI_OPCODE_LIT] >= 1)
+         return TRUE;
+   }
+
+   if (emit->info.opcode_count[TGSI_OPCODE_IF] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SGE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SGT] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SLE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SLT] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SNE] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_SEQ] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_EXP] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_LOG] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_XPD] >= 1 ||
+       emit->info.opcode_count[TGSI_OPCODE_KILP] >= 1)
+      return TRUE;
+
+   for (i = 0; i < emit->key.fkey.num_textures; i++) {
+      if (emit->key.fkey.tex[i].compare_mode == PIPE_TEX_COMPARE_R_TO_TEXTURE)
+         return TRUE;
+   }
+
+   return FALSE;
+}
+
+static INLINE boolean
+needs_to_create_loop_const( struct svga_shader_emitter *emit )
+{
+   return (emit->info.opcode_count[TGSI_OPCODE_BGNLOOP] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_sincos_consts( struct svga_shader_emitter *emit )
+{
+   return !emit->use_sm30 && (emit->info.opcode_count[TGSI_OPCODE_SIN] >= 1 ||
+                              emit->info.opcode_count[TGSI_OPCODE_COS] >= 1 ||
+                              emit->info.opcode_count[TGSI_OPCODE_SCS] >= 1);
+}
+
+static INLINE boolean
+needs_to_create_arl_consts( struct svga_shader_emitter *emit )
+{
+   return (emit->num_arl_consts > 0);
+}
+
+static INLINE boolean
+pre_parse_add_indirect( struct svga_shader_emitter *emit,
+                        int num, int current_arl)
+{
+   int i;
+   assert(num < 0);
+
+   for (i = 0; i < emit->num_arl_consts; ++i) {
+      if (emit->arl_consts[i].arl_num == current_arl)
+         break;
+   }
+   /* new entry */
+   if (emit->num_arl_consts == i) {
+      ++emit->num_arl_consts;
+   }
+   emit->arl_consts[i].number = (emit->arl_consts[i].number > num) ?
+                                num :
+                                emit->arl_consts[i].number;
+   emit->arl_consts[i].arl_num = current_arl;
+   return TRUE;
+}
+
+static boolean
+pre_parse_instruction( struct svga_shader_emitter *emit,
+                       const struct tgsi_full_instruction *insn,
+                       int current_arl)
+{
+   if (insn->FullSrcRegisters[0].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[0].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[0];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   if (insn->FullSrcRegisters[1].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[1].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[1];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   if (insn->FullSrcRegisters[2].SrcRegister.Indirect &&
+       insn->FullSrcRegisters[2].SrcRegisterInd.File == TGSI_FILE_ADDRESS) {
+      const struct tgsi_full_src_register *reg = &insn->FullSrcRegisters[2];
+      if (reg->SrcRegister.Index < 0) {
+         pre_parse_add_indirect(emit, reg->SrcRegister.Index, current_arl);
+      }
+   }
+
+   return TRUE;
+}
+
+static boolean
+pre_parse_tokens( struct svga_shader_emitter *emit,
+                  const struct tgsi_token *tokens )
+{
+   struct tgsi_parse_context parse;
+   int current_arl = 0;
+
+   tgsi_parse_init( &parse, tokens );
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         break;
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (parse.FullToken.FullInstruction.Instruction.Opcode ==
+             TGSI_OPCODE_ARL) {
+            ++current_arl;
+         }
+         if (!pre_parse_instruction( emit, &parse.FullToken.FullInstruction,
+                                     current_arl ))
+            return FALSE;
+         break;
+      default:
+         break;
+      }
+
+   }
+   return TRUE;
+}
+
+static boolean svga_shader_emit_helpers( struct svga_shader_emitter *emit )
+
+{
+   if (needs_to_create_zero( emit )) {
+      create_zero_immediate( emit );
+   }
+   if (needs_to_create_loop_const( emit )) {
+      create_loop_const( emit );
+   }
+   if (needs_to_create_sincos_consts( emit )) {
+      create_sincos_consts( emit );
+   }
+   if (needs_to_create_arl_consts( emit )) {
+      create_arl_consts( emit );
+   }
+
+   if (emit->unit == PIPE_SHADER_FRAGMENT) {
+      if (!emit_ps_preamble( emit ))
+         return FALSE;
+
+      if (emit->key.fkey.light_twoside) {
+         if (!emit_light_twoside( emit ))
+            return FALSE;
+      }
+      if (emit->emit_frontface) {
+         if (!emit_frontface( emit ))
+            return FALSE;
+      }
+   }
+
+   return TRUE;
+}
+
+boolean svga_shader_emit_instructions( struct svga_shader_emitter *emit,
+                                       const struct tgsi_token *tokens )
+{
+   struct tgsi_parse_context parse;
+   boolean ret = TRUE;
+   boolean helpers_emitted = FALSE;
+   unsigned line_nr = 0;
+
+   tgsi_parse_init( &parse, tokens );
+   emit->internal_imm_count = 0;
+
+   if (emit->unit == PIPE_SHADER_VERTEX) {
+      ret = emit_vs_preamble( emit );
+      if (!ret)
+         goto done;
+   }
+
+   pre_parse_tokens(emit, tokens);
+
+   while (!tgsi_parse_end_of_tokens( &parse )) {
+      tgsi_parse_token( &parse );
+
+      switch (parse.FullToken.Token.Type) {
+      case TGSI_TOKEN_TYPE_IMMEDIATE:
+         ret = svga_emit_immediate( emit, &parse.FullToken.FullImmediate );
+         if (!ret)
+            goto done;
+         break;
+
+      case TGSI_TOKEN_TYPE_DECLARATION:
+         if (emit->use_sm30)
+            ret = svga_translate_decl_sm30( emit, &parse.FullToken.FullDeclaration );
+         else
+            ret = svga_translate_decl_sm20( emit, &parse.FullToken.FullDeclaration );
+         if (!ret)
+            goto done;
+         break;
+         
+      case TGSI_TOKEN_TYPE_INSTRUCTION:
+         if (!helpers_emitted) {
+            if (!svga_shader_emit_helpers( emit ))
+               goto done;
+            helpers_emitted = TRUE;
+         }
+         ret = svga_emit_instruction( emit, 
+                                      line_nr++,
+                                      &parse.FullToken.FullInstruction );
+         if (!ret)
+            goto done;
+         break;
+      default:
+         break;
+      }
+      
+      reset_temp_regs( emit );
+   }
+
+   /* Need to terminate the current subroutine.  Note that the
+    * hardware doesn't tolerate shaders without sub-routines
+    * terminating with RET+END.
+    */
+   if (!emit->in_main_func) {
+      ret = emit_instruction( emit, inst_token( SVGA3DOP_RET ) );
+      if (!ret)
+         goto done;
+   }
+
+   /* Need to terminate the whole shader:
+    */
+   ret = emit_instruction( emit, inst_token( SVGA3DOP_END ) );
+   if (!ret)
+      goto done;
+
+done:
+   assert(ret);
+   tgsi_parse_free( &parse );
+   return ret;
+}
+
diff --git a/src/gallium/drivers/svga/svga_winsys.h b/src/gallium/drivers/svga/svga_winsys.h
new file mode 100644
index 00000000000..59f299c1858
--- /dev/null
+++ b/src/gallium/drivers/svga/svga_winsys.h
@@ -0,0 +1,299 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * VMware SVGA specific winsys interface.
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ * 
+ * Documentation taken from the VMware SVGA DDK.
+ */
+
+#ifndef SVGA_WINSYS_H_
+#define SVGA_WINSYS_H_
+
+
+#include "svga_types.h"
+#include "svga_reg.h"
+#include "svga3d_reg.h"
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_defines.h"
+
+
+struct svga_winsys_screen;
+struct svga_winsys_buffer;
+struct pipe_screen;
+struct pipe_context;
+struct pipe_fence_handle;
+struct pipe_texture;
+struct svga_region;
+
+
+#define SVGA_BUFFER_USAGE_PINNED  (PIPE_BUFFER_USAGE_CUSTOM << 0)
+#define SVGA_BUFFER_USAGE_WRAPPED (PIPE_BUFFER_USAGE_CUSTOM << 1)
+
+
+/** Opaque surface handle */
+struct svga_winsys_surface;
+
+/** Opaque buffer handle */
+struct svga_winsys_handle;
+
+
+/**
+ * SVGA per-context winsys interface.
+ */
+struct svga_winsys_context
+{
+   void
+   (*destroy)(struct svga_winsys_context *swc);
+
+   void *       
+   (*reserve)(struct svga_winsys_context *swc, 
+	      uint32_t nr_bytes, uint32_t nr_relocs );
+   
+   /**
+    * Emit a relocation for a host surface.
+    * 
+    * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+    * 
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*surface_relocation)(struct svga_winsys_context *swc, 
+	                 uint32 *sid, 
+	                 struct svga_winsys_surface *surface,
+	                 unsigned flags);
+   
+   /**
+    * Emit a relocation for a guest memory region.
+    * 
+    * @param flags PIPE_BUFFER_USAGE_GPU_READ/WRITE
+    * 
+    * NOTE: Order of this call does matter. It should be the same order
+    * as relocations appear in the command buffer.
+    */
+   void
+   (*region_relocation)(struct svga_winsys_context *swc, 
+	                struct SVGAGuestPtr *ptr, 
+	                struct svga_winsys_buffer *buffer,
+	                uint32 offset,
+                        unsigned flags);
+
+   void
+   (*commit)(struct svga_winsys_context *swc);
+   
+   enum pipe_error
+   (*flush)(struct svga_winsys_context *swc, 
+	    struct pipe_fence_handle **pfence);
+
+   /** 
+    * Context ID used to fill in the commands
+    * 
+    * Context IDs are arbitrary small non-negative integers,
+    * global to the entire SVGA device.
+    */
+   uint32 cid;
+};
+
+
+/**
+ * SVGA per-screen winsys interface.
+ */
+struct svga_winsys_screen
+{
+   void
+   (*destroy)(struct svga_winsys_screen *sws);
+   
+   boolean
+   (*get_cap)(struct svga_winsys_screen *sws,
+              SVGA3dDevCapIndex index,
+              SVGA3dDevCapResult *result);
+   
+   /**
+    * Create a new context.
+    *
+    * Context objects encapsulate all render state, and shader
+    * objects are per-context.
+    *
+    * Surfaces are not per-context. The same surface can be shared
+    * between multiple contexts, and surface operations can occur
+    * without a context.
+    */
+   struct svga_winsys_context *
+   (*context_create)(struct svga_winsys_screen *sws);
+   
+   
+   /**
+    * This creates a "surface" object in the SVGA3D device,
+    * and returns the surface ID (sid). Surfaces are generic
+    * containers for host VRAM objects like textures, vertex
+    * buffers, and depth/stencil buffers.
+    *
+    * Surfaces are hierarchial:
+    *
+    * - Surface may have multiple faces (for cube maps)
+    *
+    * - Each face has a list of mipmap levels
+    *
+    * - Each mipmap image may have multiple volume
+    *   slices, if the image is three dimensional.
+    *
+    * - Each slice is a 2D array of 'blocks'
+    *
+    * - Each block may be one or more pixels.
+    *   (Usually 1, more for DXT or YUV formats.)
+    *
+    * Surfaces are generic host VRAM objects. The SVGA3D device
+    * may optimize surfaces according to the format they were
+    * created with, but this format does not limit the ways in
+    * which the surface may be used. For example, a depth surface
+    * can be used as a texture, or a floating point image may
+    * be used as a vertex buffer. Some surface usages may be
+    * lower performance, due to software emulation, but any
+    * usage should work with any surface.
+    */
+   struct svga_winsys_surface *
+   (*surface_create)(struct svga_winsys_screen *sws,
+                     SVGA3dSurfaceFlags flags,
+                     SVGA3dSurfaceFormat format,
+                     SVGA3dSize size,
+                     uint32 numFaces,
+                     uint32 numMipLevels);
+
+   /**
+    * Whether this surface is sitting in a validate list
+    */
+   boolean
+   (*surface_is_flushed)(struct svga_winsys_screen *sws,
+                         struct svga_winsys_surface *surface);
+
+   /**
+    * Reference a SVGA3D surface object. This allows sharing of a
+    * surface between different objects.
+    */
+   void 
+   (*surface_reference)(struct svga_winsys_screen *sws,
+			struct svga_winsys_surface **pdst,
+			struct svga_winsys_surface *src);
+
+   /**
+    * Buffer management. Buffer attributes are mostly fixed over its lifetime.
+    *
+    * Remember that gallium gets to choose the interface it needs, and the
+    * window systems must then implement that interface (rather than the
+    * other way around...).
+    *
+    * usage is a bitmask of PIPE_BUFFER_USAGE_PIXEL/VERTEX/INDEX/CONSTANT. This
+    * usage argument is only an optimization hint, not a guarantee, therefore 
+    * proper behavior must be observed in all circumstances.
+    *
+    * alignment indicates the client's alignment requirements, eg for
+    * SSE instructions.
+    */
+   struct svga_winsys_buffer *
+   (*buffer_create)( struct svga_winsys_screen *sws, 
+	             unsigned alignment, 
+	             unsigned usage,
+	             unsigned size );
+
+   /** 
+    * Map the entire data store of a buffer object into the client's address.
+    * flags is a bitmask of:
+    * - PIPE_BUFFER_USAGE_CPU_READ/WRITE
+    * - PIPE_BUFFER_USAGE_DONTBLOCK
+    * - PIPE_BUFFER_USAGE_UNSYNCHRONIZED
+    */
+   void *
+   (*buffer_map)( struct svga_winsys_screen *sws, 
+	          struct svga_winsys_buffer *buf,
+		  unsigned usage );
+   
+   void 
+   (*buffer_unmap)( struct svga_winsys_screen *sws, 
+                    struct svga_winsys_buffer *buf );
+
+   void 
+   (*buffer_destroy)( struct svga_winsys_screen *sws,
+	              struct svga_winsys_buffer *buf );
+
+
+   /**
+    * Reference a fence object.
+    */
+   void
+   (*fence_reference)( struct svga_winsys_screen *sws,
+                       struct pipe_fence_handle **pdst,
+                       struct pipe_fence_handle *src );
+
+   /**
+    * Checks whether the fence has been signalled.
+    * \param flags  driver-specific meaning
+    * \return zero on success.
+    */
+   int (*fence_signalled)( struct svga_winsys_screen *sws,
+                           struct pipe_fence_handle *fence,
+                           unsigned flag );
+
+   /**
+    * Wait for the fence to finish.
+    * \param flags  driver-specific meaning
+    * \return zero on success.
+    */
+   int (*fence_finish)( struct svga_winsys_screen *sws,
+                        struct pipe_fence_handle *fence,
+                        unsigned flag );
+
+};
+
+
+struct pipe_context *
+svga_context_create(struct pipe_screen *screen);
+
+struct pipe_screen *
+svga_screen_create(struct svga_winsys_screen *sws);
+
+struct svga_winsys_screen *
+svga_winsys_screen(struct pipe_screen *screen);
+
+struct pipe_buffer *
+svga_screen_buffer_wrap_surface(struct pipe_screen *screen,
+				enum SVGA3dSurfaceFormat format,
+				struct svga_winsys_surface *srf);
+
+struct svga_winsys_surface *
+svga_screen_texture_get_winsys_surface(struct pipe_texture *texture);
+struct svga_winsys_surface *
+svga_screen_buffer_get_winsys_surface(struct pipe_buffer *buffer);
+
+boolean
+svga_screen_buffer_from_texture(struct pipe_texture *texture,
+				struct pipe_buffer **buffer,
+				unsigned *stride);
+
+#endif /* SVGA_WINSYS_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.c b/src/gallium/drivers/svga/svgadump/svga_dump.c
new file mode 100644
index 00000000000..910afa25287
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.c
@@ -0,0 +1,1736 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Dump SVGA commands.
+ *
+ * Generated automatically from svga3d_reg.h by svga_dump.py.
+ */
+
+#include "svga_types.h"
+#include "svga_shader_dump.h"
+#include "svga3d_reg.h"
+
+#include "util/u_debug.h"
+#include "svga_dump.h"
+
+static void
+dump_SVGA3dVertexDecl(const SVGA3dVertexDecl *cmd)
+{
+   switch((*cmd).identity.type) {
+   case SVGA3D_DECLTYPE_FLOAT1:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT1\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT2\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT3:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT3\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT4\n");
+      break;
+   case SVGA3D_DECLTYPE_D3DCOLOR:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_D3DCOLOR\n");
+      break;
+   case SVGA3D_DECLTYPE_UBYTE4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4\n");
+      break;
+   case SVGA3D_DECLTYPE_UBYTE4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UBYTE4N\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT2N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT2N\n");
+      break;
+   case SVGA3D_DECLTYPE_SHORT4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_SHORT4N\n");
+      break;
+   case SVGA3D_DECLTYPE_USHORT2N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT2N\n");
+      break;
+   case SVGA3D_DECLTYPE_USHORT4N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_USHORT4N\n");
+      break;
+   case SVGA3D_DECLTYPE_UDEC3:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_UDEC3\n");
+      break;
+   case SVGA3D_DECLTYPE_DEC3N:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_DEC3N\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT16_2:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_2\n");
+      break;
+   case SVGA3D_DECLTYPE_FLOAT16_4:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_FLOAT16_4\n");
+      break;
+   case SVGA3D_DECLTYPE_MAX:
+      debug_printf("\t\t.identity.type = SVGA3D_DECLTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.type = %i\n", (*cmd).identity.type);
+      break;
+   }
+   switch((*cmd).identity.method) {
+   case SVGA3D_DECLMETHOD_DEFAULT:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_DEFAULT\n");
+      break;
+   case SVGA3D_DECLMETHOD_PARTIALU:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALU\n");
+      break;
+   case SVGA3D_DECLMETHOD_PARTIALV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_PARTIALV\n");
+      break;
+   case SVGA3D_DECLMETHOD_CROSSUV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_CROSSUV\n");
+      break;
+   case SVGA3D_DECLMETHOD_UV:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_UV\n");
+      break;
+   case SVGA3D_DECLMETHOD_LOOKUP:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUP\n");
+      break;
+   case SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED:
+      debug_printf("\t\t.identity.method = SVGA3D_DECLMETHOD_LOOKUPPRESAMPLED\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.method = %i\n", (*cmd).identity.method);
+      break;
+   }
+   switch((*cmd).identity.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITION\n");
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDWEIGHT\n");
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BLENDINDICES\n");
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_NORMAL\n");
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_PSIZE\n");
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TEXCOORD\n");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TANGENT\n");
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_BINORMAL\n");
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_TESSFACTOR\n");
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_POSITIONT\n");
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_COLOR\n");
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_FOG\n");
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_DEPTH\n");
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_SAMPLE\n");
+      break;
+   case SVGA3D_DECLUSAGE_MAX:
+      debug_printf("\t\t.identity.usage = SVGA3D_DECLUSAGE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.identity.usage = %i\n", (*cmd).identity.usage);
+      break;
+   }
+   debug_printf("\t\t.identity.usageIndex = %u\n", (*cmd).identity.usageIndex);
+   debug_printf("\t\t.array.surfaceId = %u\n", (*cmd).array.surfaceId);
+   debug_printf("\t\t.array.offset = %u\n", (*cmd).array.offset);
+   debug_printf("\t\t.array.stride = %u\n", (*cmd).array.stride);
+   debug_printf("\t\t.rangeHint.first = %u\n", (*cmd).rangeHint.first);
+   debug_printf("\t\t.rangeHint.last = %u\n", (*cmd).rangeHint.last);
+}
+
+static void
+dump_SVGA3dTextureState(const SVGA3dTextureState *cmd)
+{
+   debug_printf("\t\t.stage = %u\n", (*cmd).stage);
+   switch((*cmd).name) {
+   case SVGA3D_TS_INVALID:
+      debug_printf("\t\t.name = SVGA3D_TS_INVALID\n");
+      break;
+   case SVGA3D_TS_BIND_TEXTURE:
+      debug_printf("\t\t.name = SVGA3D_TS_BIND_TEXTURE\n");
+      break;
+   case SVGA3D_TS_COLOROP:
+      debug_printf("\t\t.name = SVGA3D_TS_COLOROP\n");
+      break;
+   case SVGA3D_TS_COLORARG1:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG1\n");
+      break;
+   case SVGA3D_TS_COLORARG2:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG2\n");
+      break;
+   case SVGA3D_TS_ALPHAOP:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAOP\n");
+      break;
+   case SVGA3D_TS_ALPHAARG1:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG1\n");
+      break;
+   case SVGA3D_TS_ALPHAARG2:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG2\n");
+      break;
+   case SVGA3D_TS_ADDRESSU:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSU\n");
+      break;
+   case SVGA3D_TS_ADDRESSV:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSV\n");
+      break;
+   case SVGA3D_TS_MIPFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MIPFILTER\n");
+      break;
+   case SVGA3D_TS_MAGFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MAGFILTER\n");
+      break;
+   case SVGA3D_TS_MINFILTER:
+      debug_printf("\t\t.name = SVGA3D_TS_MINFILTER\n");
+      break;
+   case SVGA3D_TS_BORDERCOLOR:
+      debug_printf("\t\t.name = SVGA3D_TS_BORDERCOLOR\n");
+      break;
+   case SVGA3D_TS_TEXCOORDINDEX:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDINDEX\n");
+      break;
+   case SVGA3D_TS_TEXTURETRANSFORMFLAGS:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURETRANSFORMFLAGS\n");
+      break;
+   case SVGA3D_TS_TEXCOORDGEN:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXCOORDGEN\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT00:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT00\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT01:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT01\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT10:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT10\n");
+      break;
+   case SVGA3D_TS_BUMPENVMAT11:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVMAT11\n");
+      break;
+   case SVGA3D_TS_TEXTURE_MIPMAP_LEVEL:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_MIPMAP_LEVEL\n");
+      break;
+   case SVGA3D_TS_TEXTURE_LOD_BIAS:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_LOD_BIAS\n");
+      break;
+   case SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL:
+      debug_printf("\t\t.name = SVGA3D_TS_TEXTURE_ANISOTROPIC_LEVEL\n");
+      break;
+   case SVGA3D_TS_ADDRESSW:
+      debug_printf("\t\t.name = SVGA3D_TS_ADDRESSW\n");
+      break;
+   case SVGA3D_TS_GAMMA:
+      debug_printf("\t\t.name = SVGA3D_TS_GAMMA\n");
+      break;
+   case SVGA3D_TS_BUMPENVLSCALE:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLSCALE\n");
+      break;
+   case SVGA3D_TS_BUMPENVLOFFSET:
+      debug_printf("\t\t.name = SVGA3D_TS_BUMPENVLOFFSET\n");
+      break;
+   case SVGA3D_TS_COLORARG0:
+      debug_printf("\t\t.name = SVGA3D_TS_COLORARG0\n");
+      break;
+   case SVGA3D_TS_ALPHAARG0:
+      debug_printf("\t\t.name = SVGA3D_TS_ALPHAARG0\n");
+      break;
+   case SVGA3D_TS_MAX:
+      debug_printf("\t\t.name = SVGA3D_TS_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.name = %i\n", (*cmd).name);
+      break;
+   }
+   debug_printf("\t\t.value = %u\n", (*cmd).value);
+   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dCopyBox(const SVGA3dCopyBox *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.z = %u\n", (*cmd).z);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+   debug_printf("\t\t.d = %u\n", (*cmd).d);
+   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+   debug_printf("\t\t.srcz = %u\n", (*cmd).srcz);
+}
+
+static void
+dump_SVGA3dCmdSetClipPlane(const SVGA3dCmdSetClipPlane *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   debug_printf("\t\t.plane[0] = %f\n", (*cmd).plane[0]);
+   debug_printf("\t\t.plane[1] = %f\n", (*cmd).plane[1]);
+   debug_printf("\t\t.plane[2] = %f\n", (*cmd).plane[2]);
+   debug_printf("\t\t.plane[3] = %f\n", (*cmd).plane[3]);
+}
+
+static void
+dump_SVGA3dCmdWaitForQuery(const SVGA3dCmdWaitForQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dCmdSetRenderTarget(const SVGA3dCmdSetRenderTarget *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_RT_DEPTH:
+      debug_printf("\t\t.type = SVGA3D_RT_DEPTH\n");
+      break;
+   case SVGA3D_RT_STENCIL:
+      debug_printf("\t\t.type = SVGA3D_RT_STENCIL\n");
+      break;
+   default:
+      debug_printf("\t\t.type = SVGA3D_RT_COLOR%u\n", (*cmd).type - SVGA3D_RT_COLOR0);
+      break;
+   }
+   debug_printf("\t\t.target.sid = %u\n", (*cmd).target.sid);
+   debug_printf("\t\t.target.face = %u\n", (*cmd).target.face);
+   debug_printf("\t\t.target.mipmap = %u\n", (*cmd).target.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetTextureState(const SVGA3dCmdSetTextureState *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceCopy(const SVGA3dCmdSurfaceCopy *cmd)
+{
+   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+}
+
+static void
+dump_SVGA3dCmdSetMaterial(const SVGA3dCmdSetMaterial *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).face) {
+   case SVGA3D_FACE_INVALID:
+      debug_printf("\t\t.face = SVGA3D_FACE_INVALID\n");
+      break;
+   case SVGA3D_FACE_NONE:
+      debug_printf("\t\t.face = SVGA3D_FACE_NONE\n");
+      break;
+   case SVGA3D_FACE_FRONT:
+      debug_printf("\t\t.face = SVGA3D_FACE_FRONT\n");
+      break;
+   case SVGA3D_FACE_BACK:
+      debug_printf("\t\t.face = SVGA3D_FACE_BACK\n");
+      break;
+   case SVGA3D_FACE_FRONT_BACK:
+      debug_printf("\t\t.face = SVGA3D_FACE_FRONT_BACK\n");
+      break;
+   case SVGA3D_FACE_MAX:
+      debug_printf("\t\t.face = SVGA3D_FACE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.face = %i\n", (*cmd).face);
+      break;
+   }
+   debug_printf("\t\t.material.diffuse[0] = %f\n", (*cmd).material.diffuse[0]);
+   debug_printf("\t\t.material.diffuse[1] = %f\n", (*cmd).material.diffuse[1]);
+   debug_printf("\t\t.material.diffuse[2] = %f\n", (*cmd).material.diffuse[2]);
+   debug_printf("\t\t.material.diffuse[3] = %f\n", (*cmd).material.diffuse[3]);
+   debug_printf("\t\t.material.ambient[0] = %f\n", (*cmd).material.ambient[0]);
+   debug_printf("\t\t.material.ambient[1] = %f\n", (*cmd).material.ambient[1]);
+   debug_printf("\t\t.material.ambient[2] = %f\n", (*cmd).material.ambient[2]);
+   debug_printf("\t\t.material.ambient[3] = %f\n", (*cmd).material.ambient[3]);
+   debug_printf("\t\t.material.specular[0] = %f\n", (*cmd).material.specular[0]);
+   debug_printf("\t\t.material.specular[1] = %f\n", (*cmd).material.specular[1]);
+   debug_printf("\t\t.material.specular[2] = %f\n", (*cmd).material.specular[2]);
+   debug_printf("\t\t.material.specular[3] = %f\n", (*cmd).material.specular[3]);
+   debug_printf("\t\t.material.emissive[0] = %f\n", (*cmd).material.emissive[0]);
+   debug_printf("\t\t.material.emissive[1] = %f\n", (*cmd).material.emissive[1]);
+   debug_printf("\t\t.material.emissive[2] = %f\n", (*cmd).material.emissive[2]);
+   debug_printf("\t\t.material.emissive[3] = %f\n", (*cmd).material.emissive[3]);
+   debug_printf("\t\t.material.shininess = %f\n", (*cmd).material.shininess);
+}
+
+static void
+dump_SVGA3dCmdSetLightData(const SVGA3dCmdSetLightData *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   switch((*cmd).data.type) {
+   case SVGA3D_LIGHTTYPE_INVALID:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_INVALID\n");
+      break;
+   case SVGA3D_LIGHTTYPE_POINT:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_POINT\n");
+      break;
+   case SVGA3D_LIGHTTYPE_SPOT1:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT1\n");
+      break;
+   case SVGA3D_LIGHTTYPE_SPOT2:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_SPOT2\n");
+      break;
+   case SVGA3D_LIGHTTYPE_DIRECTIONAL:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_DIRECTIONAL\n");
+      break;
+   case SVGA3D_LIGHTTYPE_MAX:
+      debug_printf("\t\t.data.type = SVGA3D_LIGHTTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.data.type = %i\n", (*cmd).data.type);
+      break;
+   }
+   debug_printf("\t\t.data.inWorldSpace = %u\n", (*cmd).data.inWorldSpace);
+   debug_printf("\t\t.data.diffuse[0] = %f\n", (*cmd).data.diffuse[0]);
+   debug_printf("\t\t.data.diffuse[1] = %f\n", (*cmd).data.diffuse[1]);
+   debug_printf("\t\t.data.diffuse[2] = %f\n", (*cmd).data.diffuse[2]);
+   debug_printf("\t\t.data.diffuse[3] = %f\n", (*cmd).data.diffuse[3]);
+   debug_printf("\t\t.data.specular[0] = %f\n", (*cmd).data.specular[0]);
+   debug_printf("\t\t.data.specular[1] = %f\n", (*cmd).data.specular[1]);
+   debug_printf("\t\t.data.specular[2] = %f\n", (*cmd).data.specular[2]);
+   debug_printf("\t\t.data.specular[3] = %f\n", (*cmd).data.specular[3]);
+   debug_printf("\t\t.data.ambient[0] = %f\n", (*cmd).data.ambient[0]);
+   debug_printf("\t\t.data.ambient[1] = %f\n", (*cmd).data.ambient[1]);
+   debug_printf("\t\t.data.ambient[2] = %f\n", (*cmd).data.ambient[2]);
+   debug_printf("\t\t.data.ambient[3] = %f\n", (*cmd).data.ambient[3]);
+   debug_printf("\t\t.data.position[0] = %f\n", (*cmd).data.position[0]);
+   debug_printf("\t\t.data.position[1] = %f\n", (*cmd).data.position[1]);
+   debug_printf("\t\t.data.position[2] = %f\n", (*cmd).data.position[2]);
+   debug_printf("\t\t.data.position[3] = %f\n", (*cmd).data.position[3]);
+   debug_printf("\t\t.data.direction[0] = %f\n", (*cmd).data.direction[0]);
+   debug_printf("\t\t.data.direction[1] = %f\n", (*cmd).data.direction[1]);
+   debug_printf("\t\t.data.direction[2] = %f\n", (*cmd).data.direction[2]);
+   debug_printf("\t\t.data.direction[3] = %f\n", (*cmd).data.direction[3]);
+   debug_printf("\t\t.data.range = %f\n", (*cmd).data.range);
+   debug_printf("\t\t.data.falloff = %f\n", (*cmd).data.falloff);
+   debug_printf("\t\t.data.attenuation0 = %f\n", (*cmd).data.attenuation0);
+   debug_printf("\t\t.data.attenuation1 = %f\n", (*cmd).data.attenuation1);
+   debug_printf("\t\t.data.attenuation2 = %f\n", (*cmd).data.attenuation2);
+   debug_printf("\t\t.data.theta = %f\n", (*cmd).data.theta);
+   debug_printf("\t\t.data.phi = %f\n", (*cmd).data.phi);
+}
+
+static void
+dump_SVGA3dCmdSetViewport(const SVGA3dCmdSetViewport *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCmdSetScissorRect(const SVGA3dCmdSetScissorRect *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.rect.x = %u\n", (*cmd).rect.x);
+   debug_printf("\t\t.rect.y = %u\n", (*cmd).rect.y);
+   debug_printf("\t\t.rect.w = %u\n", (*cmd).rect.w);
+   debug_printf("\t\t.rect.h = %u\n", (*cmd).rect.h);
+}
+
+static void
+dump_SVGA3dCopyRect(const SVGA3dCopyRect *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+   debug_printf("\t\t.srcx = %u\n", (*cmd).srcx);
+   debug_printf("\t\t.srcy = %u\n", (*cmd).srcy);
+}
+
+static void
+dump_SVGA3dCmdSetShader(const SVGA3dCmdSetShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+}
+
+static void
+dump_SVGA3dCmdEndQuery(const SVGA3dCmdEndQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.guestResult.gmrId = %u\n", (*cmd).guestResult.gmrId);
+   debug_printf("\t\t.guestResult.offset = %u\n", (*cmd).guestResult.offset);
+}
+
+static void
+dump_SVGA3dSize(const SVGA3dSize *cmd)
+{
+   debug_printf("\t\t.width = %u\n", (*cmd).width);
+   debug_printf("\t\t.height = %u\n", (*cmd).height);
+   debug_printf("\t\t.depth = %u\n", (*cmd).depth);
+}
+
+static void
+dump_SVGA3dCmdDestroySurface(const SVGA3dCmdDestroySurface *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdDefineContext(const SVGA3dCmdDefineContext *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dRect(const SVGA3dRect *cmd)
+{
+   debug_printf("\t\t.x = %u\n", (*cmd).x);
+   debug_printf("\t\t.y = %u\n", (*cmd).y);
+   debug_printf("\t\t.w = %u\n", (*cmd).w);
+   debug_printf("\t\t.h = %u\n", (*cmd).h);
+}
+
+static void
+dump_SVGA3dCmdBeginQuery(const SVGA3dCmdBeginQuery *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_QUERYTYPE_OCCLUSION:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_OCCLUSION\n");
+      break;
+   case SVGA3D_QUERYTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_QUERYTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dRenderState(const SVGA3dRenderState *cmd)
+{
+   switch((*cmd).state) {
+   case SVGA3D_RS_INVALID:
+      debug_printf("\t\t.state = SVGA3D_RS_INVALID\n");
+      break;
+   case SVGA3D_RS_ZENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZENABLE\n");
+      break;
+   case SVGA3D_RS_ZWRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZWRITEENABLE\n");
+      break;
+   case SVGA3D_RS_ALPHATESTENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHATESTENABLE\n");
+      break;
+   case SVGA3D_RS_DITHERENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_DITHERENABLE\n");
+      break;
+   case SVGA3D_RS_BLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDENABLE\n");
+      break;
+   case SVGA3D_RS_FOGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGENABLE\n");
+      break;
+   case SVGA3D_RS_SPECULARENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SPECULARENABLE\n");
+      break;
+   case SVGA3D_RS_STENCILENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE\n");
+      break;
+   case SVGA3D_RS_LIGHTINGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_LIGHTINGENABLE\n");
+      break;
+   case SVGA3D_RS_NORMALIZENORMALS:
+      debug_printf("\t\t.state = SVGA3D_RS_NORMALIZENORMALS\n");
+      break;
+   case SVGA3D_RS_POINTSPRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSPRITEENABLE\n");
+      break;
+   case SVGA3D_RS_POINTSCALEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALEENABLE\n");
+      break;
+   case SVGA3D_RS_STENCILREF:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILREF\n");
+      break;
+   case SVGA3D_RS_STENCILMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILMASK\n");
+      break;
+   case SVGA3D_RS_STENCILWRITEMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILWRITEMASK\n");
+      break;
+   case SVGA3D_RS_FOGSTART:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGSTART\n");
+      break;
+   case SVGA3D_RS_FOGEND:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGEND\n");
+      break;
+   case SVGA3D_RS_FOGDENSITY:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGDENSITY\n");
+      break;
+   case SVGA3D_RS_POINTSIZE:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZE\n");
+      break;
+   case SVGA3D_RS_POINTSIZEMIN:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMIN\n");
+      break;
+   case SVGA3D_RS_POINTSIZEMAX:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSIZEMAX\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_A:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_A\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_B:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_B\n");
+      break;
+   case SVGA3D_RS_POINTSCALE_C:
+      debug_printf("\t\t.state = SVGA3D_RS_POINTSCALE_C\n");
+      break;
+   case SVGA3D_RS_FOGCOLOR:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGCOLOR\n");
+      break;
+   case SVGA3D_RS_AMBIENT:
+      debug_printf("\t\t.state = SVGA3D_RS_AMBIENT\n");
+      break;
+   case SVGA3D_RS_CLIPPLANEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_CLIPPLANEENABLE\n");
+      break;
+   case SVGA3D_RS_FOGMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_FOGMODE\n");
+      break;
+   case SVGA3D_RS_FILLMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_FILLMODE\n");
+      break;
+   case SVGA3D_RS_SHADEMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_SHADEMODE\n");
+      break;
+   case SVGA3D_RS_LINEPATTERN:
+      debug_printf("\t\t.state = SVGA3D_RS_LINEPATTERN\n");
+      break;
+   case SVGA3D_RS_SRCBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_SRCBLEND\n");
+      break;
+   case SVGA3D_RS_DSTBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_DSTBLEND\n");
+      break;
+   case SVGA3D_RS_BLENDEQUATION:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATION\n");
+      break;
+   case SVGA3D_RS_CULLMODE:
+      debug_printf("\t\t.state = SVGA3D_RS_CULLMODE\n");
+      break;
+   case SVGA3D_RS_ZFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_ZFUNC\n");
+      break;
+   case SVGA3D_RS_ALPHAFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHAFUNC\n");
+      break;
+   case SVGA3D_RS_STENCILFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILFUNC\n");
+      break;
+   case SVGA3D_RS_STENCILFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILFAIL\n");
+      break;
+   case SVGA3D_RS_STENCILZFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILZFAIL\n");
+      break;
+   case SVGA3D_RS_STENCILPASS:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILPASS\n");
+      break;
+   case SVGA3D_RS_ALPHAREF:
+      debug_printf("\t\t.state = SVGA3D_RS_ALPHAREF\n");
+      break;
+   case SVGA3D_RS_FRONTWINDING:
+      debug_printf("\t\t.state = SVGA3D_RS_FRONTWINDING\n");
+      break;
+   case SVGA3D_RS_COORDINATETYPE:
+      debug_printf("\t\t.state = SVGA3D_RS_COORDINATETYPE\n");
+      break;
+   case SVGA3D_RS_ZBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_ZBIAS\n");
+      break;
+   case SVGA3D_RS_RANGEFOGENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_RANGEFOGENABLE\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE\n");
+      break;
+   case SVGA3D_RS_VERTEXMATERIALENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_VERTEXMATERIALENABLE\n");
+      break;
+   case SVGA3D_RS_DIFFUSEMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_DIFFUSEMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_SPECULARMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_SPECULARMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_AMBIENTMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_AMBIENTMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_EMISSIVEMATERIALSOURCE:
+      debug_printf("\t\t.state = SVGA3D_RS_EMISSIVEMATERIALSOURCE\n");
+      break;
+   case SVGA3D_RS_TEXTUREFACTOR:
+      debug_printf("\t\t.state = SVGA3D_RS_TEXTUREFACTOR\n");
+      break;
+   case SVGA3D_RS_LOCALVIEWER:
+      debug_printf("\t\t.state = SVGA3D_RS_LOCALVIEWER\n");
+      break;
+   case SVGA3D_RS_SCISSORTESTENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SCISSORTESTENABLE\n");
+      break;
+   case SVGA3D_RS_BLENDCOLOR:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDCOLOR\n");
+      break;
+   case SVGA3D_RS_STENCILENABLE2SIDED:
+      debug_printf("\t\t.state = SVGA3D_RS_STENCILENABLE2SIDED\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILFUNC:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFUNC\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILFAIL\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILZFAIL:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILZFAIL\n");
+      break;
+   case SVGA3D_RS_CCWSTENCILPASS:
+      debug_printf("\t\t.state = SVGA3D_RS_CCWSTENCILPASS\n");
+      break;
+   case SVGA3D_RS_VERTEXBLEND:
+      debug_printf("\t\t.state = SVGA3D_RS_VERTEXBLEND\n");
+      break;
+   case SVGA3D_RS_SLOPESCALEDEPTHBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_SLOPESCALEDEPTHBIAS\n");
+      break;
+   case SVGA3D_RS_DEPTHBIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_DEPTHBIAS\n");
+      break;
+   case SVGA3D_RS_OUTPUTGAMMA:
+      debug_printf("\t\t.state = SVGA3D_RS_OUTPUTGAMMA\n");
+      break;
+   case SVGA3D_RS_ZVISIBLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ZVISIBLE\n");
+      break;
+   case SVGA3D_RS_LASTPIXEL:
+      debug_printf("\t\t.state = SVGA3D_RS_LASTPIXEL\n");
+      break;
+   case SVGA3D_RS_CLIPPING:
+      debug_printf("\t\t.state = SVGA3D_RS_CLIPPING\n");
+      break;
+   case SVGA3D_RS_WRAP0:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP0\n");
+      break;
+   case SVGA3D_RS_WRAP1:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP1\n");
+      break;
+   case SVGA3D_RS_WRAP2:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP2\n");
+      break;
+   case SVGA3D_RS_WRAP3:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP3\n");
+      break;
+   case SVGA3D_RS_WRAP4:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP4\n");
+      break;
+   case SVGA3D_RS_WRAP5:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP5\n");
+      break;
+   case SVGA3D_RS_WRAP6:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP6\n");
+      break;
+   case SVGA3D_RS_WRAP7:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP7\n");
+      break;
+   case SVGA3D_RS_WRAP8:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP8\n");
+      break;
+   case SVGA3D_RS_WRAP9:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP9\n");
+      break;
+   case SVGA3D_RS_WRAP10:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP10\n");
+      break;
+   case SVGA3D_RS_WRAP11:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP11\n");
+      break;
+   case SVGA3D_RS_WRAP12:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP12\n");
+      break;
+   case SVGA3D_RS_WRAP13:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP13\n");
+      break;
+   case SVGA3D_RS_WRAP14:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP14\n");
+      break;
+   case SVGA3D_RS_WRAP15:
+      debug_printf("\t\t.state = SVGA3D_RS_WRAP15\n");
+      break;
+   case SVGA3D_RS_MULTISAMPLEANTIALIAS:
+      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEANTIALIAS\n");
+      break;
+   case SVGA3D_RS_MULTISAMPLEMASK:
+      debug_printf("\t\t.state = SVGA3D_RS_MULTISAMPLEMASK\n");
+      break;
+   case SVGA3D_RS_INDEXEDVERTEXBLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_INDEXEDVERTEXBLENDENABLE\n");
+      break;
+   case SVGA3D_RS_TWEENFACTOR:
+      debug_printf("\t\t.state = SVGA3D_RS_TWEENFACTOR\n");
+      break;
+   case SVGA3D_RS_ANTIALIASEDLINEENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_ANTIALIASEDLINEENABLE\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE1:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE1\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE2:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE2\n");
+      break;
+   case SVGA3D_RS_COLORWRITEENABLE3:
+      debug_printf("\t\t.state = SVGA3D_RS_COLORWRITEENABLE3\n");
+      break;
+   case SVGA3D_RS_SEPARATEALPHABLENDENABLE:
+      debug_printf("\t\t.state = SVGA3D_RS_SEPARATEALPHABLENDENABLE\n");
+      break;
+   case SVGA3D_RS_SRCBLENDALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_SRCBLENDALPHA\n");
+      break;
+   case SVGA3D_RS_DSTBLENDALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_DSTBLENDALPHA\n");
+      break;
+   case SVGA3D_RS_BLENDEQUATIONALPHA:
+      debug_printf("\t\t.state = SVGA3D_RS_BLENDEQUATIONALPHA\n");
+      break;
+   case SVGA3D_RS_MAX:
+      debug_printf("\t\t.state = SVGA3D_RS_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.state = %i\n", (*cmd).state);
+      break;
+   }
+   debug_printf("\t\t.uintValue = %u\n", (*cmd).uintValue);
+   debug_printf("\t\t.floatValue = %f\n", (*cmd).floatValue);
+}
+
+static void
+dump_SVGA3dVertexDivisor(const SVGA3dVertexDivisor *cmd)
+{
+   debug_printf("\t\t.value = %u\n", (*cmd).value);
+   debug_printf("\t\t.count = %u\n", (*cmd).count);
+   debug_printf("\t\t.indexedData = %u\n", (*cmd).indexedData);
+   debug_printf("\t\t.instanceData = %u\n", (*cmd).instanceData);
+}
+
+static void
+dump_SVGA3dCmdDefineShader(const SVGA3dCmdDefineShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSetShaderConst(const SVGA3dCmdSetShaderConst *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.reg = %u\n", (*cmd).reg);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   switch((*cmd).ctype) {
+   case SVGA3D_CONST_TYPE_FLOAT:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_FLOAT\n");
+      debug_printf("\t\t.values[0] = %f\n", *(const float *)&(*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %f\n", *(const float *)&(*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %f\n", *(const float *)&(*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %f\n", *(const float *)&(*cmd).values[3]);
+      break;
+   case SVGA3D_CONST_TYPE_INT:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_INT\n");
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   case SVGA3D_CONST_TYPE_BOOL:
+      debug_printf("\t\t.ctype = SVGA3D_CONST_TYPE_BOOL\n");
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   default:
+      debug_printf("\t\t.ctype = %i\n", (*cmd).ctype);
+      debug_printf("\t\t.values[0] = %u\n", (*cmd).values[0]);
+      debug_printf("\t\t.values[1] = %u\n", (*cmd).values[1]);
+      debug_printf("\t\t.values[2] = %u\n", (*cmd).values[2]);
+      debug_printf("\t\t.values[3] = %u\n", (*cmd).values[3]);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSetZRange(const SVGA3dCmdSetZRange *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.zRange.min = %f\n", (*cmd).zRange.min);
+   debug_printf("\t\t.zRange.max = %f\n", (*cmd).zRange.max);
+}
+
+static void
+dump_SVGA3dCmdDrawPrimitives(const SVGA3dCmdDrawPrimitives *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.numVertexDecls = %u\n", (*cmd).numVertexDecls);
+   debug_printf("\t\t.numRanges = %u\n", (*cmd).numRanges);
+}
+
+static void
+dump_SVGA3dCmdSetLightEnabled(const SVGA3dCmdSetLightEnabled *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.index = %u\n", (*cmd).index);
+   debug_printf("\t\t.enabled = %u\n", (*cmd).enabled);
+}
+
+static void
+dump_SVGA3dPrimitiveRange(const SVGA3dPrimitiveRange *cmd)
+{
+   switch((*cmd).primType) {
+   case SVGA3D_PRIMITIVE_INVALID:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_INVALID\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLELIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLELIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_POINTLIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_POINTLIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_LINELIST:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINELIST\n");
+      break;
+   case SVGA3D_PRIMITIVE_LINESTRIP:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_LINESTRIP\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLESTRIP:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLESTRIP\n");
+      break;
+   case SVGA3D_PRIMITIVE_TRIANGLEFAN:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_TRIANGLEFAN\n");
+      break;
+   case SVGA3D_PRIMITIVE_MAX:
+      debug_printf("\t\t.primType = SVGA3D_PRIMITIVE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.primType = %i\n", (*cmd).primType);
+      break;
+   }
+   debug_printf("\t\t.primitiveCount = %u\n", (*cmd).primitiveCount);
+   debug_printf("\t\t.indexArray.surfaceId = %u\n", (*cmd).indexArray.surfaceId);
+   debug_printf("\t\t.indexArray.offset = %u\n", (*cmd).indexArray.offset);
+   debug_printf("\t\t.indexArray.stride = %u\n", (*cmd).indexArray.stride);
+   debug_printf("\t\t.indexWidth = %u\n", (*cmd).indexWidth);
+   debug_printf("\t\t.indexBias = %i\n", (*cmd).indexBias);
+}
+
+static void
+dump_SVGA3dCmdPresent(const SVGA3dCmdPresent *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+}
+
+static void
+dump_SVGA3dCmdSetRenderState(const SVGA3dCmdSetRenderState *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdSurfaceStretchBlt(const SVGA3dCmdSurfaceStretchBlt *cmd)
+{
+   debug_printf("\t\t.src.sid = %u\n", (*cmd).src.sid);
+   debug_printf("\t\t.src.face = %u\n", (*cmd).src.face);
+   debug_printf("\t\t.src.mipmap = %u\n", (*cmd).src.mipmap);
+   debug_printf("\t\t.dest.sid = %u\n", (*cmd).dest.sid);
+   debug_printf("\t\t.dest.face = %u\n", (*cmd).dest.face);
+   debug_printf("\t\t.dest.mipmap = %u\n", (*cmd).dest.mipmap);
+   debug_printf("\t\t.boxSrc.x = %u\n", (*cmd).boxSrc.x);
+   debug_printf("\t\t.boxSrc.y = %u\n", (*cmd).boxSrc.y);
+   debug_printf("\t\t.boxSrc.z = %u\n", (*cmd).boxSrc.z);
+   debug_printf("\t\t.boxSrc.w = %u\n", (*cmd).boxSrc.w);
+   debug_printf("\t\t.boxSrc.h = %u\n", (*cmd).boxSrc.h);
+   debug_printf("\t\t.boxSrc.d = %u\n", (*cmd).boxSrc.d);
+   debug_printf("\t\t.boxDest.x = %u\n", (*cmd).boxDest.x);
+   debug_printf("\t\t.boxDest.y = %u\n", (*cmd).boxDest.y);
+   debug_printf("\t\t.boxDest.z = %u\n", (*cmd).boxDest.z);
+   debug_printf("\t\t.boxDest.w = %u\n", (*cmd).boxDest.w);
+   debug_printf("\t\t.boxDest.h = %u\n", (*cmd).boxDest.h);
+   debug_printf("\t\t.boxDest.d = %u\n", (*cmd).boxDest.d);
+   switch((*cmd).mode) {
+   case SVGA3D_STRETCH_BLT_POINT:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_POINT\n");
+      break;
+   case SVGA3D_STRETCH_BLT_LINEAR:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_LINEAR\n");
+      break;
+   case SVGA3D_STRETCH_BLT_MAX:
+      debug_printf("\t\t.mode = SVGA3D_STRETCH_BLT_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.mode = %i\n", (*cmd).mode);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMA(const SVGA3dCmdSurfaceDMA *cmd)
+{
+   debug_printf("\t\t.guest.ptr.gmrId = %u\n", (*cmd).guest.ptr.gmrId);
+   debug_printf("\t\t.guest.ptr.offset = %u\n", (*cmd).guest.ptr.offset);
+   debug_printf("\t\t.guest.pitch = %u\n", (*cmd).guest.pitch);
+   debug_printf("\t\t.host.sid = %u\n", (*cmd).host.sid);
+   debug_printf("\t\t.host.face = %u\n", (*cmd).host.face);
+   debug_printf("\t\t.host.mipmap = %u\n", (*cmd).host.mipmap);
+   switch((*cmd).transfer) {
+   case SVGA3D_WRITE_HOST_VRAM:
+      debug_printf("\t\t.transfer = SVGA3D_WRITE_HOST_VRAM\n");
+      break;
+   case SVGA3D_READ_HOST_VRAM:
+      debug_printf("\t\t.transfer = SVGA3D_READ_HOST_VRAM\n");
+      break;
+   default:
+      debug_printf("\t\t.transfer = %i\n", (*cmd).transfer);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdSurfaceDMASuffix(const SVGA3dCmdSurfaceDMASuffix *cmd)
+{
+   debug_printf("\t\t.suffixSize = %u\n", (*cmd).suffixSize);
+   debug_printf("\t\t.maximumOffset = %u\n", (*cmd).maximumOffset);
+   debug_printf("\t\t.flags.discard = %u\n", (*cmd).flags.discard);
+   debug_printf("\t\t.flags.unsynchronized = %u\n", (*cmd).flags.unsynchronized);
+}
+
+static void
+dump_SVGA3dCmdSetTransform(const SVGA3dCmdSetTransform *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).type) {
+   case SVGA3D_TRANSFORM_INVALID:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_INVALID\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD\n");
+      break;
+   case SVGA3D_TRANSFORM_VIEW:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_VIEW\n");
+      break;
+   case SVGA3D_TRANSFORM_PROJECTION:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_PROJECTION\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE0:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE0\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE1:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE1\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE2:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE2\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE3:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE3\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE4:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE4\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE5:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE5\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE6:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE6\n");
+      break;
+   case SVGA3D_TRANSFORM_TEXTURE7:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_TEXTURE7\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD1:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD1\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD2:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD2\n");
+      break;
+   case SVGA3D_TRANSFORM_WORLD3:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_WORLD3\n");
+      break;
+   case SVGA3D_TRANSFORM_MAX:
+      debug_printf("\t\t.type = SVGA3D_TRANSFORM_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+   debug_printf("\t\t.matrix[0] = %f\n", (*cmd).matrix[0]);
+   debug_printf("\t\t.matrix[1] = %f\n", (*cmd).matrix[1]);
+   debug_printf("\t\t.matrix[2] = %f\n", (*cmd).matrix[2]);
+   debug_printf("\t\t.matrix[3] = %f\n", (*cmd).matrix[3]);
+   debug_printf("\t\t.matrix[4] = %f\n", (*cmd).matrix[4]);
+   debug_printf("\t\t.matrix[5] = %f\n", (*cmd).matrix[5]);
+   debug_printf("\t\t.matrix[6] = %f\n", (*cmd).matrix[6]);
+   debug_printf("\t\t.matrix[7] = %f\n", (*cmd).matrix[7]);
+   debug_printf("\t\t.matrix[8] = %f\n", (*cmd).matrix[8]);
+   debug_printf("\t\t.matrix[9] = %f\n", (*cmd).matrix[9]);
+   debug_printf("\t\t.matrix[10] = %f\n", (*cmd).matrix[10]);
+   debug_printf("\t\t.matrix[11] = %f\n", (*cmd).matrix[11]);
+   debug_printf("\t\t.matrix[12] = %f\n", (*cmd).matrix[12]);
+   debug_printf("\t\t.matrix[13] = %f\n", (*cmd).matrix[13]);
+   debug_printf("\t\t.matrix[14] = %f\n", (*cmd).matrix[14]);
+   debug_printf("\t\t.matrix[15] = %f\n", (*cmd).matrix[15]);
+}
+
+static void
+dump_SVGA3dCmdDestroyShader(const SVGA3dCmdDestroyShader *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   debug_printf("\t\t.shid = %u\n", (*cmd).shid);
+   switch((*cmd).type) {
+   case SVGA3D_SHADERTYPE_COMPILED_DX8:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_COMPILED_DX8\n");
+      break;
+   case SVGA3D_SHADERTYPE_VS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_VS\n");
+      break;
+   case SVGA3D_SHADERTYPE_PS:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_PS\n");
+      break;
+   case SVGA3D_SHADERTYPE_MAX:
+      debug_printf("\t\t.type = SVGA3D_SHADERTYPE_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.type = %i\n", (*cmd).type);
+      break;
+   }
+}
+
+static void
+dump_SVGA3dCmdDestroyContext(const SVGA3dCmdDestroyContext *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+}
+
+static void
+dump_SVGA3dCmdClear(const SVGA3dCmdClear *cmd)
+{
+   debug_printf("\t\t.cid = %u\n", (*cmd).cid);
+   switch((*cmd).clearFlag) {
+   case SVGA3D_CLEAR_COLOR:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_COLOR\n");
+      break;
+   case SVGA3D_CLEAR_DEPTH:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_DEPTH\n");
+      break;
+   case SVGA3D_CLEAR_STENCIL:
+      debug_printf("\t\t.clearFlag = SVGA3D_CLEAR_STENCIL\n");
+      break;
+   default:
+      debug_printf("\t\t.clearFlag = %i\n", (*cmd).clearFlag);
+      break;
+   }
+   debug_printf("\t\t.color = %u\n", (*cmd).color);
+   debug_printf("\t\t.depth = %f\n", (*cmd).depth);
+   debug_printf("\t\t.stencil = %u\n", (*cmd).stencil);
+}
+
+static void
+dump_SVGA3dCmdDefineSurface(const SVGA3dCmdDefineSurface *cmd)
+{
+   debug_printf("\t\t.sid = %u\n", (*cmd).sid);
+   switch((*cmd).surfaceFlags) {
+   case SVGA3D_SURFACE_CUBEMAP:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_CUBEMAP\n");
+      break;
+   case SVGA3D_SURFACE_HINT_STATIC:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_STATIC\n");
+      break;
+   case SVGA3D_SURFACE_HINT_DYNAMIC:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_DYNAMIC\n");
+      break;
+   case SVGA3D_SURFACE_HINT_INDEXBUFFER:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_INDEXBUFFER\n");
+      break;
+   case SVGA3D_SURFACE_HINT_VERTEXBUFFER:
+      debug_printf("\t\t.surfaceFlags = SVGA3D_SURFACE_HINT_VERTEXBUFFER\n");
+      break;
+   default:
+      debug_printf("\t\t.surfaceFlags = %i\n", (*cmd).surfaceFlags);
+      break;
+   }
+   switch((*cmd).format) {
+   case SVGA3D_FORMAT_INVALID:
+      debug_printf("\t\t.format = SVGA3D_FORMAT_INVALID\n");
+      break;
+   case SVGA3D_X8R8G8B8:
+      debug_printf("\t\t.format = SVGA3D_X8R8G8B8\n");
+      break;
+   case SVGA3D_A8R8G8B8:
+      debug_printf("\t\t.format = SVGA3D_A8R8G8B8\n");
+      break;
+   case SVGA3D_R5G6B5:
+      debug_printf("\t\t.format = SVGA3D_R5G6B5\n");
+      break;
+   case SVGA3D_X1R5G5B5:
+      debug_printf("\t\t.format = SVGA3D_X1R5G5B5\n");
+      break;
+   case SVGA3D_A1R5G5B5:
+      debug_printf("\t\t.format = SVGA3D_A1R5G5B5\n");
+      break;
+   case SVGA3D_A4R4G4B4:
+      debug_printf("\t\t.format = SVGA3D_A4R4G4B4\n");
+      break;
+   case SVGA3D_Z_D32:
+      debug_printf("\t\t.format = SVGA3D_Z_D32\n");
+      break;
+   case SVGA3D_Z_D16:
+      debug_printf("\t\t.format = SVGA3D_Z_D16\n");
+      break;
+   case SVGA3D_Z_D24S8:
+      debug_printf("\t\t.format = SVGA3D_Z_D24S8\n");
+      break;
+   case SVGA3D_Z_D15S1:
+      debug_printf("\t\t.format = SVGA3D_Z_D15S1\n");
+      break;
+   case SVGA3D_LUMINANCE8:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE8\n");
+      break;
+   case SVGA3D_LUMINANCE4_ALPHA4:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE4_ALPHA4\n");
+      break;
+   case SVGA3D_LUMINANCE16:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE16\n");
+      break;
+   case SVGA3D_LUMINANCE8_ALPHA8:
+      debug_printf("\t\t.format = SVGA3D_LUMINANCE8_ALPHA8\n");
+      break;
+   case SVGA3D_DXT1:
+      debug_printf("\t\t.format = SVGA3D_DXT1\n");
+      break;
+   case SVGA3D_DXT2:
+      debug_printf("\t\t.format = SVGA3D_DXT2\n");
+      break;
+   case SVGA3D_DXT3:
+      debug_printf("\t\t.format = SVGA3D_DXT3\n");
+      break;
+   case SVGA3D_DXT4:
+      debug_printf("\t\t.format = SVGA3D_DXT4\n");
+      break;
+   case SVGA3D_DXT5:
+      debug_printf("\t\t.format = SVGA3D_DXT5\n");
+      break;
+   case SVGA3D_BUMPU8V8:
+      debug_printf("\t\t.format = SVGA3D_BUMPU8V8\n");
+      break;
+   case SVGA3D_BUMPL6V5U5:
+      debug_printf("\t\t.format = SVGA3D_BUMPL6V5U5\n");
+      break;
+   case SVGA3D_BUMPX8L8V8U8:
+      debug_printf("\t\t.format = SVGA3D_BUMPX8L8V8U8\n");
+      break;
+   case SVGA3D_BUMPL8V8U8:
+      debug_printf("\t\t.format = SVGA3D_BUMPL8V8U8\n");
+      break;
+   case SVGA3D_ARGB_S10E5:
+      debug_printf("\t\t.format = SVGA3D_ARGB_S10E5\n");
+      break;
+   case SVGA3D_ARGB_S23E8:
+      debug_printf("\t\t.format = SVGA3D_ARGB_S23E8\n");
+      break;
+   case SVGA3D_A2R10G10B10:
+      debug_printf("\t\t.format = SVGA3D_A2R10G10B10\n");
+      break;
+   case SVGA3D_V8U8:
+      debug_printf("\t\t.format = SVGA3D_V8U8\n");
+      break;
+   case SVGA3D_Q8W8V8U8:
+      debug_printf("\t\t.format = SVGA3D_Q8W8V8U8\n");
+      break;
+   case SVGA3D_CxV8U8:
+      debug_printf("\t\t.format = SVGA3D_CxV8U8\n");
+      break;
+   case SVGA3D_X8L8V8U8:
+      debug_printf("\t\t.format = SVGA3D_X8L8V8U8\n");
+      break;
+   case SVGA3D_A2W10V10U10:
+      debug_printf("\t\t.format = SVGA3D_A2W10V10U10\n");
+      break;
+   case SVGA3D_ALPHA8:
+      debug_printf("\t\t.format = SVGA3D_ALPHA8\n");
+      break;
+   case SVGA3D_R_S10E5:
+      debug_printf("\t\t.format = SVGA3D_R_S10E5\n");
+      break;
+   case SVGA3D_R_S23E8:
+      debug_printf("\t\t.format = SVGA3D_R_S23E8\n");
+      break;
+   case SVGA3D_RG_S10E5:
+      debug_printf("\t\t.format = SVGA3D_RG_S10E5\n");
+      break;
+   case SVGA3D_RG_S23E8:
+      debug_printf("\t\t.format = SVGA3D_RG_S23E8\n");
+      break;
+   case SVGA3D_BUFFER:
+      debug_printf("\t\t.format = SVGA3D_BUFFER\n");
+      break;
+   case SVGA3D_Z_D24X8:
+      debug_printf("\t\t.format = SVGA3D_Z_D24X8\n");
+      break;
+   case SVGA3D_FORMAT_MAX:
+      debug_printf("\t\t.format = SVGA3D_FORMAT_MAX\n");
+      break;
+   default:
+      debug_printf("\t\t.format = %i\n", (*cmd).format);
+      break;
+   }
+   debug_printf("\t\t.face[0].numMipLevels = %u\n", (*cmd).face[0].numMipLevels);
+   debug_printf("\t\t.face[1].numMipLevels = %u\n", (*cmd).face[1].numMipLevels);
+   debug_printf("\t\t.face[2].numMipLevels = %u\n", (*cmd).face[2].numMipLevels);
+   debug_printf("\t\t.face[3].numMipLevels = %u\n", (*cmd).face[3].numMipLevels);
+   debug_printf("\t\t.face[4].numMipLevels = %u\n", (*cmd).face[4].numMipLevels);
+   debug_printf("\t\t.face[5].numMipLevels = %u\n", (*cmd).face[5].numMipLevels);
+}
+
+
+void            
+svga_dump_commands(const void *commands, uint32_t size)
+{
+   const uint8_t *next = commands;
+   const uint8_t *last = next + size;
+   
+   assert(size % sizeof(uint32_t) == 0);
+   
+   while(next < last) {
+      const uint32_t cmd_id = *(const uint32_t *)next;
+
+      if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+         const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+         const uint8_t *body = (const uint8_t *)&header[1];
+
+         next = (const uint8_t *)body + header->size;
+         if(next > last)
+            break;
+
+         switch(cmd_id) {
+         case SVGA_3D_CMD_SURFACE_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DEFINE\n");
+            {
+               const SVGA3dCmdDefineSurface *cmd = (const SVGA3dCmdDefineSurface *)body;
+               dump_SVGA3dCmdDefineSurface(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dSize) <= next) {
+                  dump_SVGA3dSize((const SVGA3dSize *)body);
+                  body += sizeof(SVGA3dSize);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DESTROY\n");
+            {
+               const SVGA3dCmdDestroySurface *cmd = (const SVGA3dCmdDestroySurface *)body;
+               dump_SVGA3dCmdDestroySurface(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_COPY:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_COPY\n");
+            {
+               const SVGA3dCmdSurfaceCopy *cmd = (const SVGA3dCmdSurfaceCopy *)body;
+               dump_SVGA3dCmdSurfaceCopy(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyBox) <= next) {
+                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+                  body += sizeof(SVGA3dCopyBox);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_STRETCHBLT:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_STRETCHBLT\n");
+            {
+               const SVGA3dCmdSurfaceStretchBlt *cmd = (const SVGA3dCmdSurfaceStretchBlt *)body;
+               dump_SVGA3dCmdSurfaceStretchBlt(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SURFACE_DMA:
+            debug_printf("\tSVGA_3D_CMD_SURFACE_DMA\n");
+            {
+               const SVGA3dCmdSurfaceDMA *cmd = (const SVGA3dCmdSurfaceDMA *)body;
+               dump_SVGA3dCmdSurfaceDMA(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyBox) <= next) {
+                  dump_SVGA3dCopyBox((const SVGA3dCopyBox *)body);
+                  body += sizeof(SVGA3dCopyBox);
+               }
+               while(body + sizeof(SVGA3dCmdSurfaceDMASuffix) <= next) {
+                  dump_SVGA3dCmdSurfaceDMASuffix((const SVGA3dCmdSurfaceDMASuffix *)body);
+                  body += sizeof(SVGA3dCmdSurfaceDMASuffix);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_CONTEXT_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_CONTEXT_DEFINE\n");
+            {
+               const SVGA3dCmdDefineContext *cmd = (const SVGA3dCmdDefineContext *)body;
+               dump_SVGA3dCmdDefineContext(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_CONTEXT_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_CONTEXT_DESTROY\n");
+            {
+               const SVGA3dCmdDestroyContext *cmd = (const SVGA3dCmdDestroyContext *)body;
+               dump_SVGA3dCmdDestroyContext(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETTRANSFORM:
+            debug_printf("\tSVGA_3D_CMD_SETTRANSFORM\n");
+            {
+               const SVGA3dCmdSetTransform *cmd = (const SVGA3dCmdSetTransform *)body;
+               dump_SVGA3dCmdSetTransform(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETZRANGE:
+            debug_printf("\tSVGA_3D_CMD_SETZRANGE\n");
+            {
+               const SVGA3dCmdSetZRange *cmd = (const SVGA3dCmdSetZRange *)body;
+               dump_SVGA3dCmdSetZRange(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETRENDERSTATE:
+            debug_printf("\tSVGA_3D_CMD_SETRENDERSTATE\n");
+            {
+               const SVGA3dCmdSetRenderState *cmd = (const SVGA3dCmdSetRenderState *)body;
+               dump_SVGA3dCmdSetRenderState(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dRenderState) <= next) {
+                  dump_SVGA3dRenderState((const SVGA3dRenderState *)body);
+                  body += sizeof(SVGA3dRenderState);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETRENDERTARGET:
+            debug_printf("\tSVGA_3D_CMD_SETRENDERTARGET\n");
+            {
+               const SVGA3dCmdSetRenderTarget *cmd = (const SVGA3dCmdSetRenderTarget *)body;
+               dump_SVGA3dCmdSetRenderTarget(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETTEXTURESTATE:
+            debug_printf("\tSVGA_3D_CMD_SETTEXTURESTATE\n");
+            {
+               const SVGA3dCmdSetTextureState *cmd = (const SVGA3dCmdSetTextureState *)body;
+               dump_SVGA3dCmdSetTextureState(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dTextureState) <= next) {
+                  dump_SVGA3dTextureState((const SVGA3dTextureState *)body);
+                  body += sizeof(SVGA3dTextureState);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETMATERIAL:
+            debug_printf("\tSVGA_3D_CMD_SETMATERIAL\n");
+            {
+               const SVGA3dCmdSetMaterial *cmd = (const SVGA3dCmdSetMaterial *)body;
+               dump_SVGA3dCmdSetMaterial(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETLIGHTDATA:
+            debug_printf("\tSVGA_3D_CMD_SETLIGHTDATA\n");
+            {
+               const SVGA3dCmdSetLightData *cmd = (const SVGA3dCmdSetLightData *)body;
+               dump_SVGA3dCmdSetLightData(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETLIGHTENABLED:
+            debug_printf("\tSVGA_3D_CMD_SETLIGHTENABLED\n");
+            {
+               const SVGA3dCmdSetLightEnabled *cmd = (const SVGA3dCmdSetLightEnabled *)body;
+               dump_SVGA3dCmdSetLightEnabled(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETVIEWPORT:
+            debug_printf("\tSVGA_3D_CMD_SETVIEWPORT\n");
+            {
+               const SVGA3dCmdSetViewport *cmd = (const SVGA3dCmdSetViewport *)body;
+               dump_SVGA3dCmdSetViewport(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SETCLIPPLANE:
+            debug_printf("\tSVGA_3D_CMD_SETCLIPPLANE\n");
+            {
+               const SVGA3dCmdSetClipPlane *cmd = (const SVGA3dCmdSetClipPlane *)body;
+               dump_SVGA3dCmdSetClipPlane(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_CLEAR:
+            debug_printf("\tSVGA_3D_CMD_CLEAR\n");
+            {
+               const SVGA3dCmdClear *cmd = (const SVGA3dCmdClear *)body;
+               dump_SVGA3dCmdClear(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dRect) <= next) {
+                  dump_SVGA3dRect((const SVGA3dRect *)body);
+                  body += sizeof(SVGA3dRect);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_PRESENT:
+            debug_printf("\tSVGA_3D_CMD_PRESENT\n");
+            {
+               const SVGA3dCmdPresent *cmd = (const SVGA3dCmdPresent *)body;
+               dump_SVGA3dCmdPresent(cmd);
+               body = (const uint8_t *)&cmd[1];
+               while(body + sizeof(SVGA3dCopyRect) <= next) {
+                  dump_SVGA3dCopyRect((const SVGA3dCopyRect *)body);
+                  body += sizeof(SVGA3dCopyRect);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SHADER_DEFINE:
+            debug_printf("\tSVGA_3D_CMD_SHADER_DEFINE\n");
+            {
+               const SVGA3dCmdDefineShader *cmd = (const SVGA3dCmdDefineShader *)body;
+               dump_SVGA3dCmdDefineShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+               svga_shader_dump((const uint32_t *)body, 
+                            (unsigned)(next - body)/sizeof(uint32_t),
+                            FALSE );
+               body = next;
+            }
+            break;
+         case SVGA_3D_CMD_SHADER_DESTROY:
+            debug_printf("\tSVGA_3D_CMD_SHADER_DESTROY\n");
+            {
+               const SVGA3dCmdDestroyShader *cmd = (const SVGA3dCmdDestroyShader *)body;
+               dump_SVGA3dCmdDestroyShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SET_SHADER:
+            debug_printf("\tSVGA_3D_CMD_SET_SHADER\n");
+            {
+               const SVGA3dCmdSetShader *cmd = (const SVGA3dCmdSetShader *)body;
+               dump_SVGA3dCmdSetShader(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_SET_SHADER_CONST:
+            debug_printf("\tSVGA_3D_CMD_SET_SHADER_CONST\n");
+            {
+               const SVGA3dCmdSetShaderConst *cmd = (const SVGA3dCmdSetShaderConst *)body;
+               dump_SVGA3dCmdSetShaderConst(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_DRAW_PRIMITIVES:
+            debug_printf("\tSVGA_3D_CMD_DRAW_PRIMITIVES\n");
+            {
+               const SVGA3dCmdDrawPrimitives *cmd = (const SVGA3dCmdDrawPrimitives *)body;
+               unsigned i, j;
+               dump_SVGA3dCmdDrawPrimitives(cmd);
+               body = (const uint8_t *)&cmd[1];
+               for(i = 0; i < cmd->numVertexDecls; ++i) {
+                  dump_SVGA3dVertexDecl((const SVGA3dVertexDecl *)body);
+                  body += sizeof(SVGA3dVertexDecl);
+               }
+               for(j = 0; j < cmd->numRanges; ++j) {
+                  dump_SVGA3dPrimitiveRange((const SVGA3dPrimitiveRange *)body);
+                  body += sizeof(SVGA3dPrimitiveRange);
+               }
+               while(body + sizeof(SVGA3dVertexDivisor) <= next) {
+                  dump_SVGA3dVertexDivisor((const SVGA3dVertexDivisor *)body);
+                  body += sizeof(SVGA3dVertexDivisor);
+               }
+            }
+            break;
+         case SVGA_3D_CMD_SETSCISSORRECT:
+            debug_printf("\tSVGA_3D_CMD_SETSCISSORRECT\n");
+            {
+               const SVGA3dCmdSetScissorRect *cmd = (const SVGA3dCmdSetScissorRect *)body;
+               dump_SVGA3dCmdSetScissorRect(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_BEGIN_QUERY:
+            debug_printf("\tSVGA_3D_CMD_BEGIN_QUERY\n");
+            {
+               const SVGA3dCmdBeginQuery *cmd = (const SVGA3dCmdBeginQuery *)body;
+               dump_SVGA3dCmdBeginQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_END_QUERY:
+            debug_printf("\tSVGA_3D_CMD_END_QUERY\n");
+            {
+               const SVGA3dCmdEndQuery *cmd = (const SVGA3dCmdEndQuery *)body;
+               dump_SVGA3dCmdEndQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         case SVGA_3D_CMD_WAIT_FOR_QUERY:
+            debug_printf("\tSVGA_3D_CMD_WAIT_FOR_QUERY\n");
+            {
+               const SVGA3dCmdWaitForQuery *cmd = (const SVGA3dCmdWaitForQuery *)body;
+               dump_SVGA3dCmdWaitForQuery(cmd);
+               body = (const uint8_t *)&cmd[1];
+            }
+            break;
+         default:
+            debug_printf("\t0x%08x\n", cmd_id);
+            break;
+         }
+
+         while(body + sizeof(uint32_t) <= next) {
+            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            body += sizeof(uint32_t);
+         }
+         while(body + sizeof(uint32_t) <= next)
+            debug_printf("\t\t0x%02x\n", *body++);
+      }
+      else if(cmd_id == SVGA_CMD_FENCE) {
+         debug_printf("\tSVGA_CMD_FENCE\n");
+         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         next += 2*sizeof(uint32_t);
+      }
+      else {
+         debug_printf("\t0x%08x\n", cmd_id);
+         next += sizeof(uint32_t);
+      }
+   }
+}
+
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.h b/src/gallium/drivers/svga/svgadump/svga_dump.h
new file mode 100644
index 00000000000..69a87020875
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.h
@@ -0,0 +1,34 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+#ifndef SVGA_DUMP_H_
+#define SVGA_DUMP_H_
+
+#include "pipe/p_compiler.h"
+
+void
+svga_dump_commands(const void *commands, uint32_t size);
+
+#endif /* SVGA_DUMP_H_ */
diff --git a/src/gallium/drivers/svga/svgadump/svga_dump.py b/src/gallium/drivers/svga/svgadump/svga_dump.py
new file mode 100755
index 00000000000..288e753296e
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_dump.py
@@ -0,0 +1,329 @@
+#!/usr/bin/env python
+'''
+Generates dumper for the SVGA 3D command stream using pygccxml.
+
+Jose Fonseca <jfonseca@vmware.com>
+'''
+
+copyright = '''
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+ '''
+
+import os
+import sys
+
+from pygccxml import parser
+from pygccxml import declarations
+
+from pygccxml.declarations import algorithm
+from pygccxml.declarations import decl_visitor
+from pygccxml.declarations import type_traits
+from pygccxml.declarations import type_visitor
+
+
+enums = True
+
+
+class decl_dumper_t(decl_visitor.decl_visitor_t):
+
+    def __init__(self, instance = '', decl = None):
+        decl_visitor.decl_visitor_t.__init__(self)
+        self._instance = instance
+        self.decl = decl
+
+    def clone(self):
+        return decl_dumper_t(self._instance, self.decl)
+
+    def visit_class(self):
+        class_ = self.decl
+        assert self.decl.class_type in ('struct', 'union')
+
+        for variable in class_.variables():
+            if variable.name != '':
+                #print 'variable = %r' % variable.name
+                dump_type(self._instance + '.' + variable.name, variable.type)
+
+    def visit_enumeration(self):
+        if enums:
+            print '   switch(%s) {' % ("(*cmd)" + self._instance,)
+            for name, value in self.decl.values:
+                print '   case %s:' % (name,)
+                print '      debug_printf("\\t\\t%s = %s\\n");' % (self._instance, name)
+                print '      break;'
+            print '   default:'
+            print '      debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+            print '      break;'
+            print '   }'
+        else:
+            print '   debug_printf("\\t\\t%s = %%i\\n", %s);' % (self._instance, "(*cmd)" + self._instance)
+
+
+def dump_decl(instance, decl):
+    dumper = decl_dumper_t(instance, decl)
+    algorithm.apply_visitor(dumper, decl)
+
+
+class type_dumper_t(type_visitor.type_visitor_t):
+
+    def __init__(self, instance, type_):
+        type_visitor.type_visitor_t.__init__(self)
+        self.instance = instance
+        self.type = type_
+
+    def clone(self):
+        return type_dumper_t(self.instance, self.type)
+
+    def visit_char(self):
+        self.print_instance('%i')
+        
+    def visit_unsigned_char(self):
+        self.print_instance('%u')
+
+    def visit_signed_char(self):
+        self.print_instance('%i')
+    
+    def visit_wchar(self):
+        self.print_instance('%i')
+        
+    def visit_short_int(self):
+        self.print_instance('%i')
+        
+    def visit_short_unsigned_int(self):
+        self.print_instance('%u')
+        
+    def visit_bool(self):
+        self.print_instance('%i')
+        
+    def visit_int(self):
+        self.print_instance('%i')
+        
+    def visit_unsigned_int(self):
+        self.print_instance('%u')
+        
+    def visit_long_int(self):
+        self.print_instance('%li')
+        
+    def visit_long_unsigned_int(self):
+        self.print_instance('%lu')
+        
+    def visit_long_long_int(self):
+        self.print_instance('%lli')
+        
+    def visit_long_long_unsigned_int(self):
+        self.print_instance('%llu')
+        
+    def visit_float(self):
+        self.print_instance('%f')
+        
+    def visit_double(self):
+        self.print_instance('%f')
+        
+    def visit_array(self):
+        for i in range(type_traits.array_size(self.type)):
+            dump_type(self.instance + '[%i]' % i, type_traits.base_type(self.type))
+
+    def visit_pointer(self):
+        self.print_instance('%p')
+
+    def visit_declarated(self):
+        #print 'decl = %r' % self.type.decl_string
+        decl = type_traits.remove_declarated(self.type)
+        dump_decl(self.instance, decl)
+
+    def print_instance(self, format):
+        print '   debug_printf("\\t\\t%s = %s\\n", %s);' % (self.instance, format, "(*cmd)" + self.instance)
+
+
+def dump_type(instance, type_):
+    type_ = type_traits.remove_alias(type_)
+    visitor = type_dumper_t(instance, type_)
+    algorithm.apply_visitor(visitor, type_)
+
+
+def dump_struct(decls, class_):
+    print 'static void'
+    print 'dump_%s(const %s *cmd)' % (class_.name, class_.name)
+    print '{'
+    dump_decl('', class_)
+    print '}'
+    print ''
+
+
+cmds = [
+    ('SVGA_3D_CMD_SURFACE_DEFINE', 'SVGA3dCmdDefineSurface', (), 'SVGA3dSize'),
+    ('SVGA_3D_CMD_SURFACE_DESTROY', 'SVGA3dCmdDestroySurface', (), None),
+    ('SVGA_3D_CMD_SURFACE_COPY', 'SVGA3dCmdSurfaceCopy', (), 'SVGA3dCopyBox'),
+    ('SVGA_3D_CMD_SURFACE_STRETCHBLT', 'SVGA3dCmdSurfaceStretchBlt', (), None),
+    ('SVGA_3D_CMD_SURFACE_DMA', 'SVGA3dCmdSurfaceDMA', (), 'SVGA3dCopyBox'),
+    ('SVGA_3D_CMD_CONTEXT_DEFINE', 'SVGA3dCmdDefineContext', (), None),
+    ('SVGA_3D_CMD_CONTEXT_DESTROY', 'SVGA3dCmdDestroyContext', (), None),
+    ('SVGA_3D_CMD_SETTRANSFORM', 'SVGA3dCmdSetTransform', (), None),
+    ('SVGA_3D_CMD_SETZRANGE', 'SVGA3dCmdSetZRange', (), None),
+    ('SVGA_3D_CMD_SETRENDERSTATE', 'SVGA3dCmdSetRenderState', (), 'SVGA3dRenderState'),
+    ('SVGA_3D_CMD_SETRENDERTARGET', 'SVGA3dCmdSetRenderTarget', (), None),
+    ('SVGA_3D_CMD_SETTEXTURESTATE', 'SVGA3dCmdSetTextureState', (), 'SVGA3dTextureState'),
+    ('SVGA_3D_CMD_SETMATERIAL', 'SVGA3dCmdSetMaterial', (), None),
+    ('SVGA_3D_CMD_SETLIGHTDATA', 'SVGA3dCmdSetLightData', (), None),
+    ('SVGA_3D_CMD_SETLIGHTENABLED', 'SVGA3dCmdSetLightEnabled', (), None),
+    ('SVGA_3D_CMD_SETVIEWPORT', 'SVGA3dCmdSetViewport', (), None),
+    ('SVGA_3D_CMD_SETCLIPPLANE', 'SVGA3dCmdSetClipPlane', (), None),
+    ('SVGA_3D_CMD_CLEAR', 'SVGA3dCmdClear', (), 'SVGA3dRect'),
+    ('SVGA_3D_CMD_PRESENT', 'SVGA3dCmdPresent', (), 'SVGA3dCopyRect'),
+    ('SVGA_3D_CMD_SHADER_DEFINE', 'SVGA3dCmdDefineShader', (), None),
+    ('SVGA_3D_CMD_SHADER_DESTROY', 'SVGA3dCmdDestroyShader', (), None),
+    ('SVGA_3D_CMD_SET_SHADER', 'SVGA3dCmdSetShader', (), None),
+    ('SVGA_3D_CMD_SET_SHADER_CONST', 'SVGA3dCmdSetShaderConst', (), None),
+    ('SVGA_3D_CMD_DRAW_PRIMITIVES', 'SVGA3dCmdDrawPrimitives', (('SVGA3dVertexDecl', 'numVertexDecls'), ('SVGA3dPrimitiveRange', 'numRanges')), 'SVGA3dVertexDivisor'),
+    ('SVGA_3D_CMD_SETSCISSORRECT', 'SVGA3dCmdSetScissorRect', (), None),
+    ('SVGA_3D_CMD_BEGIN_QUERY', 'SVGA3dCmdBeginQuery', (), None),
+    ('SVGA_3D_CMD_END_QUERY', 'SVGA3dCmdEndQuery', (), None),
+    ('SVGA_3D_CMD_WAIT_FOR_QUERY', 'SVGA3dCmdWaitForQuery', (), None),
+    #('SVGA_3D_CMD_PRESENT_READBACK', None, (), None),
+]
+
+def dump_cmds():
+    print r'''
+void            
+svga_dump_commands(const void *commands, uint32_t size)
+{
+   const uint8_t *next = commands;
+   const uint8_t *last = next + size;
+   
+   assert(size % sizeof(uint32_t) == 0);
+   
+   while(next < last) {
+      const uint32_t cmd_id = *(const uint32_t *)next;
+
+      if(SVGA_3D_CMD_BASE <= cmd_id && cmd_id < SVGA_3D_CMD_MAX) {
+         const SVGA3dCmdHeader *header = (const SVGA3dCmdHeader *)next;
+         const uint8_t *body = (const uint8_t *)&header[1];
+
+         next = (const uint8_t *)body + header->size;
+         if(next > last)
+            break;
+'''
+
+    print '         switch(cmd_id) {'
+    indexes = 'ijklmn'
+    for id, header, body, footer in cmds:
+        print '         case %s:' % id
+        print '            debug_printf("\\t%s\\n");' % id
+        print '            {'
+        print '               const %s *cmd = (const %s *)body;' % (header, header)
+        if len(body):
+            print '               unsigned ' + ', '.join(indexes[:len(body)]) + ';'
+        print '               dump_%s(cmd);' % header
+        print '               body = (const uint8_t *)&cmd[1];'
+        for i in range(len(body)):
+            struct, count = body[i]
+            idx = indexes[i]
+            print '               for(%s = 0; %s < cmd->%s; ++%s) {' % (idx, idx, count, idx)
+            print '                  dump_%s((const %s *)body);' % (struct, struct)
+            print '                  body += sizeof(%s);' % struct
+            print '               }'
+        if footer is not None:
+            print '               while(body + sizeof(%s) <= next) {' % footer
+            print '                  dump_%s((const %s *)body);' % (footer, footer)
+            print '                  body += sizeof(%s);' % footer
+            print '               }'
+        if id == 'SVGA_3D_CMD_SHADER_DEFINE':
+            print '               sh_svga_dump((const uint32_t *)body, (unsigned)(next - body)/sizeof(uint32_t));'
+            print '               body = next;'
+        print '            }'
+        print '            break;'
+    print '         default:'
+    print '            debug_printf("\\t0x%08x\\n", cmd_id);'
+    print '            break;'
+    print '         }'
+            
+    print r'''
+         while(body + sizeof(uint32_t) <= next) {
+            debug_printf("\t\t0x%08x\n", *(const uint32_t *)body);
+            body += sizeof(uint32_t);
+         }
+         while(body + sizeof(uint32_t) <= next)
+            debug_printf("\t\t0x%02x\n", *body++);
+      }
+      else if(cmd_id == SVGA_CMD_FENCE) {
+         debug_printf("\tSVGA_CMD_FENCE\n");
+         debug_printf("\t\t0x%08x\n", ((const uint32_t *)next)[1]);
+         next += 2*sizeof(uint32_t);
+      }
+      else {
+         debug_printf("\t0x%08x\n", cmd_id);
+         next += sizeof(uint32_t);
+      }
+   }
+}
+'''
+
+def main():
+    print copyright.strip()
+    print
+    print '/**'
+    print ' * @file'
+    print ' * Dump SVGA commands.'
+    print ' *'
+    print ' * Generated automatically from svga3d_reg.h by svga_dump.py.'
+    print ' */'
+    print
+    print '#include "svga_types.h"'
+    print '#include "svga_shader_dump.h"'
+    print '#include "svga3d_reg.h"'
+    print
+    print '#include "pipe/p_debug.h"'
+    print '#include "svga_dump.h"'
+    print
+
+    config = parser.config_t(
+        include_paths = ['include'],
+        compiler = 'gcc',
+    )
+
+    headers = [
+        'include/svga_types.h', 
+        'include/svga3d_reg.h', 
+    ]
+
+    decls = parser.parse(headers, config, parser.COMPILATION_MODE.ALL_AT_ONCE)
+    global_ns = declarations.get_global_namespace(decls)
+
+    names = set()
+    for id, header, body, footer in cmds:
+        names.add(header)
+        for struct, count in body:
+            names.add(struct)
+        if footer is not None:
+            names.add(footer)
+
+    for class_ in global_ns.classes(lambda decl: decl.name in names):
+        dump_struct(decls, class_)
+
+    dump_cmds()
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader.h b/src/gallium/drivers/svga/svgadump/svga_shader.h
new file mode 100644
index 00000000000..9217af2dd99
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader.h
@@ -0,0 +1,220 @@
+/**********************************************************
+ * Copyright 2007-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Definitions
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef ST_SHADER_SVGA_H
+#define ST_SHADER_SVGA_H
+
+#include "pipe/p_compiler.h"
+
+struct sh_op
+{
+   unsigned opcode:16;
+   unsigned control:8;
+   unsigned length:4;
+   unsigned predicated:1;
+   unsigned unused:1;
+   unsigned coissue:1;
+   unsigned is_reg:1;
+};
+
+struct sh_reg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:14;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_reg_type( struct sh_reg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_cdata
+{
+   float xyzw[4];
+};
+
+struct sh_def
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_cdata cdata;
+};
+
+struct sh_defb
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   uint data;
+};
+
+struct sh_idata
+{
+   int xyzw[4];
+};
+
+struct sh_defi
+{
+   struct sh_op op;
+   struct sh_reg reg;
+   struct sh_idata idata;
+};
+
+#define PS_TEXTURETYPE_UNKNOWN   SVGA3DSAMP_UNKNOWN
+#define PS_TEXTURETYPE_2D        SVGA3DSAMP_2D
+#define PS_TEXTURETYPE_CUBE      SVGA3DSAMP_CUBE
+#define PS_TEXTURETYPE_VOLUME    SVGA3DSAMP_VOLUME
+
+struct ps_sampleinfo
+{
+   unsigned unused:27;
+   unsigned texture_type:4;
+   unsigned is_reg:1;
+};
+
+struct vs_semantic
+{
+   unsigned usage:5;
+   unsigned unused1:11;
+   unsigned usage_index:4;
+   unsigned unused2:12;
+};
+
+struct sh_dstreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned write_mask:4;
+   unsigned modifier:4;
+   unsigned shift_scale:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_dstreg_type( struct sh_dstreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dcl
+{
+   struct sh_op op;
+   union {
+      struct {
+         struct ps_sampleinfo sampleinfo;
+      } ps;
+      struct {
+         struct vs_semantic semantic;
+      } vs;
+   } u;
+   struct sh_dstreg reg;
+};
+
+
+struct sh_srcreg
+{
+   unsigned number:11;
+   unsigned type_hi:2;
+   unsigned relative:1;
+   unsigned unused:2;
+   unsigned swizzle_x:2;
+   unsigned swizzle_y:2;
+   unsigned swizzle_z:2;
+   unsigned swizzle_w:2;
+   unsigned modifier:4;
+   unsigned type_lo:3;
+   unsigned is_reg:1;
+};
+
+static INLINE unsigned
+sh_srcreg_type( struct sh_srcreg reg )
+{
+   return reg.type_lo | (reg.type_hi << 3);
+}
+
+struct sh_dstop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+};
+
+struct sh_srcop
+{
+   struct sh_op op;
+   struct sh_srcreg src;
+};
+
+struct sh_src2op
+{
+   struct sh_op op;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_unaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src;
+};
+
+struct sh_binaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+};
+
+struct sh_trinaryop
+{
+   struct sh_op op;
+   struct sh_dstreg dst;
+   struct sh_srcreg src0;
+   struct sh_srcreg src1;
+   struct sh_srcreg src2;
+};
+
+struct sh_comment
+{
+   unsigned opcode:16;
+   unsigned size:16;
+};
+
+#endif /* ST_SHADER_SVGA_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.c b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
new file mode 100644
index 00000000000..b0e7fdf378a
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.c
@@ -0,0 +1,654 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "svga_shader.h"
+#include "svga_shader_dump.h"
+#include "svga_shader_op.h"
+#include "util/u_debug.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+struct dump_info
+{
+   SVGA3dShaderVersion version;
+   boolean is_ps;
+};
+
+static void dump_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.predicated == 0 );
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case 0:
+      break;
+   case SVGA3DOPCONT_PROJECT:
+      debug_printf( "p" );
+      break;
+   case SVGA3DOPCONT_BIAS:
+      debug_printf( "b" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_comp_op( struct sh_op op, const char *mnemonic )
+{
+   assert( op.is_reg == 0 );
+
+   if (op.coissue)
+      debug_printf( "+" );
+   debug_printf( "%s", mnemonic );
+   switch (op.control) {
+   case SVGA3DOPCOMP_RESERVED0:
+      break;
+   case SVGA3DOPCOMP_GT:
+      debug_printf("_gt");
+      break;
+   case SVGA3DOPCOMP_EQ:
+      debug_printf("_eq");
+      break;
+   case SVGA3DOPCOMP_GE:
+      debug_printf("_ge");
+      break;
+   case SVGA3DOPCOMP_LT:
+      debug_printf("_lt");
+      break;
+   case SVGA3DOPCOMPC_NE:
+      debug_printf("_ne");
+      break;
+   case SVGA3DOPCOMP_LE:
+      debug_printf("_le");
+      break;
+   case SVGA3DOPCOMP_RESERVED1:
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_reg( struct sh_reg reg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   assert( sh_reg_type( reg ) == SVGA3DREG_CONST || reg.relative == 0 );
+   assert( reg.is_reg == 1 );
+
+   switch (sh_reg_type( reg )) {
+   case SVGA3DREG_TEMP:
+      debug_printf( "r%u", reg.number );
+      break;
+
+   case SVGA3DREG_INPUT:
+      debug_printf( "v%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONST:
+      if (reg.relative) {
+         if (sh_srcreg_type( *indreg ) == SVGA3DREG_LOOP)
+            debug_printf( "c[aL+%u]", reg.number );
+         else
+            debug_printf( "c[a%u.x+%u]", indreg->number, reg.number );
+      }
+      else
+         debug_printf( "c%u", reg.number );
+      break;
+
+   case SVGA3DREG_ADDR:    /* VS */
+   /* SVGA3DREG_TEXTURE */ /* PS */
+      if (di->is_ps)
+         debug_printf( "t%u", reg.number );
+      else
+         debug_printf( "a%u", reg.number );
+      break;
+
+   case SVGA3DREG_RASTOUT:
+      switch (reg.number) {
+      case 0 /*POSITION*/:
+         debug_printf( "oPos" );
+         break;
+      case 1 /*FOG*/:
+         debug_printf( "oFog" );
+         break;
+      case 2 /*POINT_SIZE*/:
+         debug_printf( "oPts" );
+         break;
+      default:
+         assert( 0 );
+         debug_printf( "???" );
+      }
+      break;
+
+   case SVGA3DREG_ATTROUT:
+      assert( reg.number < 2 );
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_TEXCRDOUT:
+   /* SVGA3DREG_OUTPUT */
+      debug_printf( "oT%u", reg.number );
+      break;
+
+   case SVGA3DREG_COLOROUT:
+      debug_printf( "oC%u", reg.number );
+      break;
+
+   case SVGA3DREG_DEPTHOUT:
+      debug_printf( "oD%u", reg.number );
+      break;
+
+   case SVGA3DREG_SAMPLER:
+      debug_printf( "s%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTBOOL:
+      assert( !reg.relative );
+      debug_printf( "b%u", reg.number );
+      break;
+
+   case SVGA3DREG_CONSTINT:
+      assert( !reg.relative );
+      debug_printf( "i%u", reg.number );
+      break;
+
+   case SVGA3DREG_LOOP:
+      assert( reg.number == 0 );
+      debug_printf( "aL" );
+      break;
+
+   case SVGA3DREG_MISCTYPE:
+      switch (reg.number) {
+      case SVGA3DMISCREG_POSITION:
+         debug_printf( "vPos" );
+         break;
+      case SVGA3DMISCREG_FACE:
+         debug_printf( "vFace" );
+         break;
+      default:
+         assert(0);
+         break;
+      }
+      break;
+
+   case SVGA3DREG_LABEL:
+      debug_printf( "l%u", reg.number );
+      break;
+
+   case SVGA3DREG_PREDICATE:
+      debug_printf( "p%u", reg.number );
+      break;
+
+
+   default:
+      assert( 0 );
+      debug_printf( "???" );
+   }
+}
+
+static void dump_cdata( struct sh_cdata cdata )
+{
+   debug_printf( "%f, %f, %f, %f", cdata.xyzw[0], cdata.xyzw[1], cdata.xyzw[2], cdata.xyzw[3] );
+}
+
+static void dump_idata( struct sh_idata idata )
+{
+   debug_printf( "%d, %d, %d, %d", idata.xyzw[0], idata.xyzw[1], idata.xyzw[2], idata.xyzw[3] );
+}
+
+static void dump_bdata( boolean bdata )
+{
+   debug_printf( bdata ? "TRUE" : "FALSE" );
+}
+
+static void dump_sampleinfo( struct ps_sampleinfo sampleinfo )
+{
+   switch (sampleinfo.texture_type) {
+   case SVGA3DSAMP_2D:
+      debug_printf( "_2d" );
+      break;
+   case SVGA3DSAMP_CUBE:
+      debug_printf( "_cube" );
+      break;
+   case SVGA3DSAMP_VOLUME:
+      debug_printf( "_volume" );
+      break;
+   default:
+      assert( 0 );
+   }
+}
+
+
+static void dump_usageinfo( struct vs_semantic semantic )
+{
+   switch (semantic.usage) {
+   case SVGA3D_DECLUSAGE_POSITION:
+      debug_printf("_position" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDWEIGHT:
+      debug_printf("_blendweight" );
+      break;
+   case SVGA3D_DECLUSAGE_BLENDINDICES:
+      debug_printf("_blendindices" );
+      break;
+   case SVGA3D_DECLUSAGE_NORMAL:
+      debug_printf("_normal" );
+      break;
+   case SVGA3D_DECLUSAGE_PSIZE:
+      debug_printf("_psize" );
+      break;
+   case SVGA3D_DECLUSAGE_TEXCOORD:
+      debug_printf("_texcoord");
+      break;
+   case SVGA3D_DECLUSAGE_TANGENT:
+      debug_printf("_tangent" );
+      break;
+   case SVGA3D_DECLUSAGE_BINORMAL:
+      debug_printf("_binormal" );
+      break;
+   case SVGA3D_DECLUSAGE_TESSFACTOR:
+      debug_printf("_tessfactor" );
+      break;
+   case SVGA3D_DECLUSAGE_POSITIONT:
+      debug_printf("_positiont" );
+      break;
+   case SVGA3D_DECLUSAGE_COLOR:
+      debug_printf("_color" );
+      break;
+   case SVGA3D_DECLUSAGE_FOG:
+      debug_printf("_fog" );
+      break;
+   case SVGA3D_DECLUSAGE_DEPTH:
+      debug_printf("_depth" );
+      break;
+   case SVGA3D_DECLUSAGE_SAMPLE:
+      debug_printf("_sample");
+      break;
+   default:
+      assert( 0 );
+      return;
+   }
+
+   if (semantic.usage_index != 0) {
+      debug_printf("%d", semantic.usage_index );
+   }
+}
+
+static void dump_dstreg( struct sh_dstreg dstreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_dstreg dstreg;
+   } u;
+
+   assert( (dstreg.modifier & (SVGA3DDSTMOD_SATURATE | SVGA3DDSTMOD_PARTIALPRECISION)) == dstreg.modifier );
+
+   if (dstreg.modifier & SVGA3DDSTMOD_SATURATE)
+      debug_printf( "_sat" );
+   if (dstreg.modifier & SVGA3DDSTMOD_PARTIALPRECISION)
+      debug_printf( "_pp" );
+   switch (dstreg.shift_scale) {
+   case 0:
+      break;
+   case 1:
+      debug_printf( "_x2" );
+      break;
+   case 2:
+      debug_printf( "_x4" );
+      break;
+   case 3:
+      debug_printf( "_x8" );
+      break;
+   case 13:
+      debug_printf( "_d8" );
+      break;
+   case 14:
+      debug_printf( "_d4" );
+      break;
+   case 15:
+      debug_printf( "_d2" );
+      break;
+   default:
+      assert( 0 );
+   }
+   debug_printf( " " );
+
+   u.dstreg = dstreg;
+   dump_reg( u.reg, NULL, di );
+   if (dstreg.write_mask != SVGA3DWRITEMASK_ALL) {
+      debug_printf( "." );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_0)
+         debug_printf( "x" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_1)
+         debug_printf( "y" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_2)
+         debug_printf( "z" );
+      if (dstreg.write_mask & SVGA3DWRITEMASK_3)
+         debug_printf( "w" );
+   }
+}
+
+static void dump_srcreg( struct sh_srcreg srcreg, struct sh_srcreg *indreg, const struct dump_info *di )
+{
+   union {
+      struct sh_reg reg;
+      struct sh_srcreg srcreg;
+   } u;
+
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_BIASNEG:
+   case SVGA3DSRCMOD_SIGNNEG:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "-" );
+      break;
+   case SVGA3DSRCMOD_ABS:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "-|" );
+      break;
+   case SVGA3DSRCMOD_COMP:
+      debug_printf( "1-" );
+      break;
+   case SVGA3DSRCMOD_NOT:
+      debug_printf( "!" );
+   }
+
+   u.srcreg = srcreg;
+   dump_reg( u.reg, indreg, di );
+   switch (srcreg.modifier) {
+   case SVGA3DSRCMOD_NONE:
+   case SVGA3DSRCMOD_NEG:
+   case SVGA3DSRCMOD_COMP:
+   case SVGA3DSRCMOD_NOT:
+      break;
+   case SVGA3DSRCMOD_ABS:
+   case SVGA3DSRCMOD_ABSNEG:
+      debug_printf( "|" );
+      break;
+   case SVGA3DSRCMOD_BIAS:
+   case SVGA3DSRCMOD_BIASNEG:
+      debug_printf( "_bias" );
+      break;
+   case SVGA3DSRCMOD_SIGN:
+   case SVGA3DSRCMOD_SIGNNEG:
+      debug_printf( "_bx2" );
+      break;
+   case SVGA3DSRCMOD_X2:
+   case SVGA3DSRCMOD_X2NEG:
+      debug_printf( "_x2" );
+      break;
+   case SVGA3DSRCMOD_DZ:
+      debug_printf( "_dz" );
+      break;
+   case SVGA3DSRCMOD_DW:
+      debug_printf( "_dw" );
+      break;
+   default:
+      assert( 0 );
+   }
+   if (srcreg.swizzle_x != 0 || srcreg.swizzle_y != 1 || srcreg.swizzle_z != 2 || srcreg.swizzle_w != 3) {
+      debug_printf( "." );
+      if (srcreg.swizzle_x == srcreg.swizzle_y && srcreg.swizzle_y == srcreg.swizzle_z && srcreg.swizzle_z == srcreg.swizzle_w) {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+      }
+      else {
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_x] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_y] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_z] );
+         debug_printf( "%c", "xyzw"[srcreg.swizzle_w] );
+      }
+   }
+}
+
+void
+svga_shader_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary )
+{
+   const unsigned *start = assem;
+   boolean finished = FALSE;
+   struct dump_info di;
+   unsigned i;
+
+   if (do_binary) {
+      for (i = 0; i < dwords; i++) 
+         debug_printf("  0x%08x,\n", assem[i]);
+      
+      debug_printf("\n\n");
+   }
+
+   di.version.value = *assem++;
+   di.is_ps = (di.version.type == SVGA3D_PS_TYPE);
+
+   debug_printf(
+      "%s_%u_%u\n",
+      di.is_ps ? "ps" : "vs",
+      di.version.major,
+      di.version.minor );
+
+   while (!finished) {
+      struct sh_op op = *(struct sh_op *) assem;
+
+      if (assem - start >= dwords) {
+         debug_printf("... ran off end of buffer\n");
+         assert(0);
+         return;
+      }
+
+      switch (op.opcode) {
+      case SVGA3DOP_DCL:
+         {
+            struct sh_dcl dcl = *(struct sh_dcl *) assem;
+
+            debug_printf( "dcl" );
+            if (sh_dstreg_type( dcl.reg ) == SVGA3DREG_SAMPLER)
+               dump_sampleinfo( dcl.u.ps.sampleinfo );
+            else if (di.is_ps) {
+               if (di.version.major == 3 && 
+                   sh_dstreg_type( dcl.reg ) != SVGA3DREG_MISCTYPE)
+                  dump_usageinfo( dcl.u.vs.semantic );
+            }
+            else
+               dump_usageinfo( dcl.u.vs.semantic );
+            dump_dstreg( dcl.reg, &di );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_dcl ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFB:
+         {
+            struct sh_defb defb = *(struct sh_defb *) assem;
+
+            debug_printf( "defb " );
+            dump_reg( defb.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_bdata( defb.data );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defb ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_DEFI:
+         {
+            struct sh_defi defi = *(struct sh_defi *) assem;
+
+            debug_printf( "defi " );
+            dump_reg( defi.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_idata( defi.idata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_defi ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_TEXCOORD:
+         assert( di.is_ps );
+         dump_op( op, "texcoord" );
+         if (0) {
+            struct sh_dstop dstop = *(struct sh_dstop *) assem;
+            dump_dstreg( dstop.dst, &di );
+            assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+         }
+         else {
+            struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+            dump_dstreg( unaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( unaryop.src, NULL, &di );
+            assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_TEX:
+         assert( di.is_ps );
+         if (0) {
+            dump_op( op, "tex" );
+            if (0) {
+               struct sh_dstop dstop = *(struct sh_dstop *) assem;
+
+               dump_dstreg( dstop.dst, &di );
+               assem += sizeof( struct sh_dstop ) / sizeof( unsigned );
+            }
+            else {
+               struct sh_unaryop unaryop = *(struct sh_unaryop *) assem;
+
+               dump_dstreg( unaryop.dst, &di );
+               debug_printf( ", " );
+               dump_srcreg( unaryop.src, NULL, &di );
+               assem += sizeof( struct sh_unaryop ) / sizeof( unsigned );
+            }
+         }
+         else {
+            struct sh_binaryop binaryop = *(struct sh_binaryop *) assem;
+
+            dump_op( op, "texld" );
+            dump_dstreg( binaryop.dst, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src0, NULL, &di );
+            debug_printf( ", " );
+            dump_srcreg( binaryop.src1, NULL, &di );
+            assem += sizeof( struct sh_binaryop ) / sizeof( unsigned );
+         }
+         debug_printf( "\n" );
+         break;
+
+      case SVGA3DOP_DEF:
+         {
+            struct sh_def def = *(struct sh_def *) assem;
+
+            debug_printf( "def " );
+            dump_reg( def.reg, NULL, &di );
+            debug_printf( ", " );
+            dump_cdata( def.cdata );
+            debug_printf( "\n" );
+            assem += sizeof( struct sh_def ) / sizeof( unsigned );
+         }
+         break;
+
+      case SVGA3DOP_PHASE:
+         debug_printf( "phase\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_COMMENT:
+         {
+            struct sh_comment comment = *(struct sh_comment *)assem;
+
+            /* Ignore comment contents. */
+            assem += sizeof(struct sh_comment) / sizeof(unsigned) + comment.size;
+         }
+         break;
+
+      case SVGA3DOP_RET:
+         debug_printf( "ret\n" );
+         assem += sizeof( struct sh_op ) / sizeof( unsigned );
+         break;
+
+      case SVGA3DOP_END:
+         debug_printf( "end\n" );
+         finished = TRUE;
+         break;
+
+      default:
+         {
+            const struct sh_opcode_info *info = svga_opcode_info( op.opcode );
+            uint i;
+            uint num_src = info->num_src + op.predicated;
+            boolean not_first_arg = FALSE;
+
+            assert( info->num_dst <= 1 );
+
+            if (op.opcode == SVGA3DOP_SINCOS && di.version.major < 3)
+               num_src += 2;
+
+            dump_comp_op( op, info->mnemonic );
+            assem += sizeof( struct sh_op ) / sizeof( unsigned );
+
+            if (info->num_dst > 0) {
+               struct sh_dstreg dstreg = *(struct sh_dstreg *) assem;
+
+               dump_dstreg( dstreg, &di );
+               assem += sizeof( struct sh_dstreg ) / sizeof( unsigned );
+               not_first_arg = TRUE;
+            }
+
+            for (i = 0; i < num_src; i++) {
+               struct sh_srcreg srcreg;
+               struct sh_srcreg indreg;
+
+               srcreg = *(struct sh_srcreg *) assem;
+               assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               if (srcreg.relative && !di.is_ps && di.version.major >= 2) {
+                  indreg = *(struct sh_srcreg *) assem;
+                  assem += sizeof( struct sh_srcreg ) / sizeof( unsigned );
+               }
+
+               if (not_first_arg)
+                  debug_printf( ", " );
+               else
+                  debug_printf( " " );
+               dump_srcreg( srcreg, &indreg, &di );
+               not_first_arg = TRUE;
+            }
+
+            debug_printf( "\n" );
+         }
+      }
+   }
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_dump.h b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
new file mode 100644
index 00000000000..a2657acb2f1
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_dump.h
@@ -0,0 +1,42 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Dump Facilities
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef SVGA_SHADER_DUMP_H
+#define SVGA_SHADER_DUMP_H
+
+void
+svga_shader_dump(
+   const unsigned *assem,
+   unsigned dwords,
+   unsigned do_binary );
+
+#endif /* SVGA_SHADER_DUMP_H */
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.c b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
new file mode 100644
index 00000000000..8343bfdaab4
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.c
@@ -0,0 +1,168 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#include "util/u_debug.h"
+#include "svga_shader_op.h"
+
+#include "../svga_hw_reg.h"
+#include "svga3d_shaderdefs.h"
+
+#define SVGA3DOP_INVALID SVGA3DOP_END
+#define TGSI_OPCODE_INVALID TGSI_OPCODE_LAST
+
+static struct sh_opcode_info opcode_info[] =
+{
+   { "nop",          0, 0, SVGA3DOP_NOP          },
+   { "mov",          1, 1, SVGA3DOP_MOV,         },
+   { "add",          1, 2, SVGA3DOP_ADD,         },
+   { "sub",          1, 2, SVGA3DOP_SUB,         },
+   { "mad",          1, 3, SVGA3DOP_MAD,         },
+   { "mul",          1, 2, SVGA3DOP_MUL,         },
+   { "rcp",          1, 1, SVGA3DOP_RCP,         },
+   { "rsq",          1, 1, SVGA3DOP_RSQ,         },
+   { "dp3",          1, 2, SVGA3DOP_DP3,         },
+   { "dp4",          1, 2, SVGA3DOP_DP4,         },
+   { "min",          1, 2, SVGA3DOP_MIN,         },
+   { "max",          1, 2, SVGA3DOP_MAX,         },
+   { "slt",          1, 2, SVGA3DOP_SLT,         },
+   { "sge",          1, 2, SVGA3DOP_SGE,         },
+   { "exp",          1, 1, SVGA3DOP_EXP,         },
+   { "log",          1, 1, SVGA3DOP_LOG,         },
+   { "lit",          1, 1, SVGA3DOP_LIT,         },
+   { "dst",          1, 2, SVGA3DOP_DST,         },
+   { "lrp",          1, 3, SVGA3DOP_LRP,         },
+   { "frc",          1, 1, SVGA3DOP_FRC,         },
+   { "m4x4",         1, 2, SVGA3DOP_M4x4,        },
+   { "m4x3",         1, 2, SVGA3DOP_M4x3,        },
+   { "m3x4",         1, 2, SVGA3DOP_M3x4,        },
+   { "m3x3",         1, 2, SVGA3DOP_M3x3,        },
+   { "m3x2",         1, 2, SVGA3DOP_M3x2,        },
+   { "call",         0, 1, SVGA3DOP_CALL,        },
+   { "callnz",       0, 2, SVGA3DOP_CALLNZ,      },
+   { "loop",         0, 2, SVGA3DOP_LOOP,        },
+   { "ret",          0, 0, SVGA3DOP_RET,         },
+   { "endloop",      0, 0, SVGA3DOP_ENDLOOP,     },
+   { "label",        0, 1, SVGA3DOP_LABEL,       },
+   { "dcl",          0, 0, SVGA3DOP_DCL,         },
+   { "pow",          1, 2, SVGA3DOP_POW,         },
+   { "crs",          1, 2, SVGA3DOP_CRS,         },
+   { "sgn",          1, 3, SVGA3DOP_SGN,         },
+   { "abs",          1, 1, SVGA3DOP_ABS,         },
+   { "nrm",          1, 1, SVGA3DOP_NRM,         }, /* 3-componenet normalization */
+   { "sincos",       1, 1, SVGA3DOP_SINCOS,      },
+   { "rep",          0, 1, SVGA3DOP_REP,         },
+   { "endrep",       0, 0, SVGA3DOP_ENDREP,      },
+   { "if",           0, 1, SVGA3DOP_IF,          },
+   { "ifc",          0, 2, SVGA3DOP_IFC,         },
+   { "else",         0, 0, SVGA3DOP_ELSE,        },
+   { "endif",        0, 0, SVGA3DOP_ENDIF,       },
+   { "break",        0, 0, SVGA3DOP_BREAK,       },
+   { "breakc",       0, 0, SVGA3DOP_BREAKC,      },
+   { "mova",         1, 1, SVGA3DOP_MOVA,        },
+   { "defb",         0, 0, SVGA3DOP_DEFB,        },
+   { "defi",         0, 0, SVGA3DOP_DEFI,        },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "???",          0, 0, SVGA3DOP_INVALID,     },
+   { "texcoord",     0, 0, SVGA3DOP_TEXCOORD,    },
+   { "texkill",      1, 0, SVGA3DOP_TEXKILL,     },
+   { "tex",          0, 0, SVGA3DOP_TEX,         },
+   { "texbem",       1, 1, SVGA3DOP_TEXBEM,      },
+   { "texbeml",      1, 1, SVGA3DOP_TEXBEML,     },
+   { "texreg2ar",    1, 1, SVGA3DOP_TEXREG2AR,   },
+   { "texreg2gb",    1, 1, SVGA3DOP_TEXREG2GB,   },
+   { "texm3x2pad",   1, 1, SVGA3DOP_TEXM3x2PAD,  },
+   { "texm3x2tex",   1, 1, SVGA3DOP_TEXM3x2TEX,  },
+   { "texm3x3pad",   1, 1, SVGA3DOP_TEXM3x3PAD,  },
+   { "texm3x3tex",   1, 1, SVGA3DOP_TEXM3x3TEX,  },
+   { "reserved0",    0, 0, SVGA3DOP_RESERVED0,   },
+   { "texm3x3spec",  1, 2, SVGA3DOP_TEXM3x3SPEC, },
+   { "texm3x3vspec", 1, 1, SVGA3DOP_TEXM3x3VSPEC,},
+   { "expp",         1, 1, SVGA3DOP_EXPP,        },
+   { "logp",         1, 1, SVGA3DOP_LOGP,        },
+   { "cnd",          1, 3, SVGA3DOP_CND,         },
+   { "def",          0, 0, SVGA3DOP_DEF,         },
+   { "texreg2rgb",   1, 1, SVGA3DOP_TEXREG2RGB,  },
+   { "texdp3tex",    1, 1, SVGA3DOP_TEXDP3TEX,   },
+   { "texm3x2depth", 1, 1, SVGA3DOP_TEXM3x2DEPTH,},
+   { "texdp3",       1, 1, SVGA3DOP_TEXDP3,      },
+   { "texm3x3",      1, 1, SVGA3DOP_TEXM3x3,     },
+   { "texdepth",     1, 0, SVGA3DOP_TEXDEPTH,    },
+   { "cmp",          1, 3, SVGA3DOP_CMP,         },
+   { "bem",          1, 2, SVGA3DOP_BEM,         },
+   { "dp2add",       1, 3, SVGA3DOP_DP2ADD,      },
+   { "dsx",          1, 1, SVGA3DOP_INVALID,     },
+   { "dsy",          1, 1, SVGA3DOP_INVALID,     },
+   { "texldd",       1, 1, SVGA3DOP_INVALID,     },
+   { "setp",         1, 2, SVGA3DOP_SETP,        },
+   { "texldl",       1, 1, SVGA3DOP_INVALID,     },
+   { "breakp",       1, 1, SVGA3DOP_INVALID,     },
+};
+
+const struct sh_opcode_info *svga_opcode_info( uint op )
+{
+   struct sh_opcode_info *info;
+
+   if (op >= sizeof( opcode_info ) / sizeof( opcode_info[0] )) {
+      /* The opcode is either PHASE, COMMENT, END or out of range.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   info = &opcode_info[op];
+
+   if (info->svga_opcode == SVGA3DOP_INVALID) {
+      /* No valid information. Please provide number of dst/src registers.
+       */
+      assert( 0 );
+      return NULL;
+   }
+
+   /* Sanity check.
+    */
+   assert( op == info->svga_opcode );
+
+   return info;
+}
diff --git a/src/gallium/drivers/svga/svgadump/svga_shader_op.h b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
new file mode 100644
index 00000000000..e558de02c53
--- /dev/null
+++ b/src/gallium/drivers/svga/svgadump/svga_shader_op.h
@@ -0,0 +1,46 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA Shader Token Opcode Info
+ * 
+ * @author Michal Krol <michal@vmware.com>
+ */
+
+#ifndef SVGA_SHADER_OP_H
+#define SVGA_SHADER_OP_H
+
+struct sh_opcode_info
+{
+   const char *mnemonic;
+   unsigned num_dst:8;
+   unsigned num_src:8;
+   unsigned svga_opcode:16;
+};
+
+const struct sh_opcode_info *svga_opcode_info( unsigned op );
+
+#endif /* SVGA_SHADER_OP_H */
diff --git a/src/gallium/drivers/trace/tr_dump_state.c b/src/gallium/drivers/trace/tr_dump_state.c
index bcf6751af4f..6d582092941 100644
--- a/src/gallium/drivers/trace/tr_dump_state.c
+++ b/src/gallium/drivers/trace/tr_dump_state.c
@@ -83,15 +83,15 @@ void trace_dump_template(const struct pipe_texture *templat)
    trace_dump_member(format, templat, format);
 
    trace_dump_member_begin("width");
-   trace_dump_array(uint, templat->width, 1);
+   trace_dump_uint(templat->width0);
    trace_dump_member_end();
 
    trace_dump_member_begin("height");
-   trace_dump_array(uint, templat->height, 1);
+   trace_dump_uint(templat->height0);
    trace_dump_member_end();
 
    trace_dump_member_begin("depth");
-   trace_dump_array(uint, templat->depth, 1);
+   trace_dump_uint(templat->depth0);
    trace_dump_member_end();
 
    trace_dump_member_begin("block");
diff --git a/src/gallium/drivers/trace/tr_rbug.c b/src/gallium/drivers/trace/tr_rbug.c
index 81e0a6f3b00..b59458c0e37 100644
--- a/src/gallium/drivers/trace/tr_rbug.c
+++ b/src/gallium/drivers/trace/tr_rbug.c
@@ -200,9 +200,9 @@ trace_rbug_texture_info(struct trace_rbug *tr_rbug, struct rbug_header *header,
    t = tr_tex->texture;
    rbug_send_texture_info_reply(tr_rbug->con, serial,
                                t->target, t->format,
-                               t->width, t->last_level + 1,
-                               t->height, t->last_level + 1,
-                               t->depth, t->last_level + 1,
+                               &t->width0, 1,
+                               &t->height0, 1,
+                               &t->depth0, 1,
                                t->block.width, t->block.height, t->block.size,
                                t->last_level,
                                t->nr_samples,
diff --git a/src/gallium/include/pipe/p_shader_tokens.h b/src/gallium/include/pipe/p_shader_tokens.h
index c4e05a55269..8cf3942cc7a 100644
--- a/src/gallium/include/pipe/p_shader_tokens.h
+++ b/src/gallium/include/pipe/p_shader_tokens.h
@@ -295,6 +295,8 @@ union tgsi_immediate_data
  * respectively. For a given operation code, those numbers are fixed and are
  * present here only for convenience.
  *
+ * If Predicate is TRUE, tgsi_instruction_predicate token immediately follows.
+ *
  * If Extended is TRUE, it is now executed.
  *
  * Saturate controls how are final results in destination registers modified.
@@ -308,7 +310,8 @@ struct tgsi_instruction
    unsigned Saturate   : 2;  /* TGSI_SAT_ */
    unsigned NumDstRegs : 2;  /* UINT */
    unsigned NumSrcRegs : 4;  /* UINT */
-   unsigned Padding    : 3;
+   unsigned Predicate  : 1;  /* BOOL */
+   unsigned Padding    : 2;
    unsigned Extended   : 1;  /* BOOL */
 };
 
@@ -325,7 +328,6 @@ struct tgsi_instruction
 
 #define TGSI_INSTRUCTION_EXT_TYPE_LABEL     1
 #define TGSI_INSTRUCTION_EXT_TYPE_TEXTURE   2
-#define TGSI_INSTRUCTION_EXT_TYPE_PREDICATE 3
 
 struct tgsi_instruction_ext
 {
@@ -341,9 +343,6 @@ struct tgsi_instruction_ext
  * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_TEXTURE, it
  * should be cast to tgsi_instruction_ext_texture.
  * 
- * If tgsi_instruction_ext::Type is TGSI_INSTRUCTION_EXT_TYPE_PREDICATE, it
- * should be cast to tgsi_instruction_ext_predicate.
- * 
  * If tgsi_instruction_ext::Extended is TRUE, another tgsi_instruction_ext
  * follows.
  */
@@ -384,17 +383,15 @@ struct tgsi_instruction_ext_texture
  * For SM3, the following constraint applies.
  *   - Swizzle is either set to identity or replicate.
  */
-struct tgsi_instruction_ext_predicate
+struct tgsi_instruction_predicate
 {
-   unsigned Type     : 4;  /* TGSI_INSTRUCTION_EXT_TYPE_PREDICATE */
+   int      Index    : 16; /* SINT */
    unsigned SwizzleX : 2;  /* TGSI_SWIZZLE_x */
    unsigned SwizzleY : 2;  /* TGSI_SWIZZLE_x */
    unsigned SwizzleZ : 2;  /* TGSI_SWIZZLE_x */
    unsigned SwizzleW : 2;  /* TGSI_SWIZZLE_x */
    unsigned Negate   : 1;  /* BOOL */
-   unsigned SrcIndex : 8;  /* UINT */
-   unsigned Padding  : 10;
-   unsigned Extended : 1;  /* BOOL */
+   unsigned Padding  : 7;
 };
 
 /**
diff --git a/src/gallium/include/pipe/p_state.h b/src/gallium/include/pipe/p_state.h
index 3c9f0f076f3..12433ad4c24 100644
--- a/src/gallium/include/pipe/p_state.h
+++ b/src/gallium/include/pipe/p_state.h
@@ -350,9 +350,9 @@ struct pipe_texture
    enum pipe_texture_target target; /**< PIPE_TEXTURE_x */
    enum pipe_format format;         /**< PIPE_FORMAT_x */
 
-   unsigned width[PIPE_MAX_TEXTURE_LEVELS];
-   unsigned height[PIPE_MAX_TEXTURE_LEVELS];
-   unsigned depth[PIPE_MAX_TEXTURE_LEVELS];
+   unsigned width0;
+   unsigned height0;
+   unsigned depth0;
 
    struct pipe_format_block block;
    unsigned nblocksx[PIPE_MAX_TEXTURE_LEVELS]; /**< allocated width in blocks */
diff --git a/src/gallium/state_trackers/dri/dri_drawable.c b/src/gallium/state_trackers/dri/dri_drawable.c
index 5625ff53cfd..45a6059ea83 100644
--- a/src/gallium/state_trackers/dri/dri_drawable.c
+++ b/src/gallium/state_trackers/dri/dri_drawable.c
@@ -46,7 +46,7 @@
 
 #include "util/u_memory.h"
 #include "util/u_rect.h"
-
+ 
 static struct pipe_surface *
 dri_surface_from_handle(struct drm_api *api,
 			struct pipe_screen *screen,
@@ -62,10 +62,10 @@ dri_surface_from_handle(struct drm_api *api,
    templat.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
    templat.target = PIPE_TEXTURE_2D;
    templat.last_level = 0;
-   templat.depth[0] = 1;
+   templat.depth0 = 1;
    templat.format = format;
-   templat.width[0] = width;
-   templat.height[0] = height;
+   templat.width0 = width;
+   templat.height0 = height;
    pf_get_block(templat.format, &templat.block);
 
    texture = api->texture_from_shared_handle(api, screen, &templat,
diff --git a/src/gallium/state_trackers/egl/egl_surface.c b/src/gallium/state_trackers/egl/egl_surface.c
index 91615abebee..ddd9b04cd48 100644
--- a/src/gallium/state_trackers/egl/egl_surface.c
+++ b/src/gallium/state_trackers/egl/egl_surface.c
@@ -114,10 +114,10 @@ drm_create_texture(_EGLDisplay *dpy,
 	templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
 	templat.target = PIPE_TEXTURE_2D;
 	templat.last_level = 0;
-	templat.depth[0] = 1;
+	templat.depth0 = 1;
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-	templat.width[0] = w;
-	templat.height[0] = h;
+	templat.width0 = w;
+	templat.height0 = h;
 	pf_get_block(templat.format, &templat.block);
 
 	texture = screen->texture_create(dev->screen,
diff --git a/src/gallium/state_trackers/egl/egl_tracker.c b/src/gallium/state_trackers/egl/egl_tracker.c
index 5811c20f03e..745803c7eb0 100644
--- a/src/gallium/state_trackers/egl/egl_tracker.c
+++ b/src/gallium/state_trackers/egl/egl_tracker.c
@@ -88,11 +88,11 @@ drm_get_device_id(struct drm_device *device)
 	}
 
 	ret = fgets(path, sizeof( path ), file);
+	fclose(file);
 	if (!ret)
 		return;
 
 	sscanf(path, "%x", &device->deviceID);
-	fclose(file);
 }
 
 static void
diff --git a/src/gallium/state_trackers/python/p_device.i b/src/gallium/state_trackers/python/p_device.i
index f16fe5b0ff7..a83bcc71a1a 100644
--- a/src/gallium/state_trackers/python/p_device.i
+++ b/src/gallium/state_trackers/python/p_device.i
@@ -113,9 +113,9 @@ struct st_device {
       memset(&templat, 0, sizeof(templat));
       templat.format = format;
       pf_get_block(templat.format, &templat.block);
-      templat.width[0] = width;
-      templat.height[0] = height;
-      templat.depth[0] = depth;
+      templat.width0 = width;
+      templat.height0 = height;
+      templat.depth0 = depth;
       templat.last_level = last_level;
       templat.target = target;
       templat.tex_usage = tex_usage;
diff --git a/src/gallium/state_trackers/python/p_texture.i b/src/gallium/state_trackers/python/p_texture.i
index 1d513abf3c7..5416b872f53 100644
--- a/src/gallium/state_trackers/python/p_texture.i
+++ b/src/gallium/state_trackers/python/p_texture.i
@@ -59,15 +59,15 @@
    }
    
    unsigned get_width(unsigned level=0) {
-      return $self->width[level];
+      return u_minify($self->width0, level);
    }
    
    unsigned get_height(unsigned level=0) {
-      return $self->height[level];
+      return u_minify($self->height0, level);
    }
    
    unsigned get_depth(unsigned level=0) {
-      return $self->depth[level];
+      return u_minify($self->depth0, level);
    }
    
    unsigned get_nblocksx(unsigned level=0) {
@@ -88,7 +88,7 @@
          SWIG_exception(SWIG_ValueError, "face out of bounds");
       if(level > $self->last_level)
          SWIG_exception(SWIG_ValueError, "level out of bounds");
-      if(zslice >= $self->depth[level])
+      if(zslice >= u_minify($self->depth0, level))
          SWIG_exception(SWIG_ValueError, "zslice out of bounds");
       
       surface = CALLOC_STRUCT(st_surface);
@@ -375,13 +375,13 @@ struct st_surface
    static unsigned
    st_surface_width_get(struct st_surface *surface)
    {
-      return surface->texture->width[surface->level];
+      return u_minify(surface->texture->width0, surface->level);
    }
    
    static unsigned
    st_surface_height_get(struct st_surface *surface)
    {
-      return surface->texture->height[surface->level];
+      return u_minify(surface->texture->height0, surface->level);
    }
 
    static unsigned
diff --git a/src/gallium/state_trackers/python/retrace/interpreter.py b/src/gallium/state_trackers/python/retrace/interpreter.py
index 348f2e43683..d0bcb690a9b 100755
--- a/src/gallium/state_trackers/python/retrace/interpreter.py
+++ b/src/gallium/state_trackers/python/retrace/interpreter.py
@@ -279,9 +279,9 @@ class Screen(Object):
     def texture_create(self, templat):
         return self.real.texture_create(
             format = templat.format,
-            width = templat.width[0],
-            height = templat.height[0],
-            depth = templat.depth[0],
+            width = templat.width0,
+            height = templat.height0,
+            depth = templat.depth0,
             last_level = templat.last_level,
             target = templat.target,
             tex_usage = templat.tex_usage,
diff --git a/src/gallium/state_trackers/python/st_device.c b/src/gallium/state_trackers/python/st_device.c
index ea7d18738f6..a791113abac 100644
--- a/src/gallium/state_trackers/python/st_device.c
+++ b/src/gallium/state_trackers/python/st_device.c
@@ -252,9 +252,9 @@ st_context_create(struct st_device *st_dev)
       templat.block.size = 4;
       templat.block.width = 1;
       templat.block.height = 1;
-      templat.width[0] = 1;
-      templat.height[0] = 1;
-      templat.depth[0] = 1;
+      templat.width0 = 1;
+      templat.height0 = 1;
+      templat.depth0 = 1;
       templat.last_level = 0;
    
       st_ctx->default_texture = screen->texture_create( screen, &templat );
@@ -264,8 +264,8 @@ st_context_create(struct st_device *st_dev)
                                              0, 0, 0,
                                              PIPE_TRANSFER_WRITE,
                                              0, 0,
-                                             st_ctx->default_texture->width[0],
-                                             st_ctx->default_texture->height[0]);
+                                             st_ctx->default_texture->width0,
+                                             st_ctx->default_texture->height0);
          if (transfer) {
             uint32_t *map;
             map = (uint32_t *) screen->transfer_map(screen, transfer);
diff --git a/src/gallium/state_trackers/python/st_sample.c b/src/gallium/state_trackers/python/st_sample.c
index 53a01891e12..6fee90afdaf 100644
--- a/src/gallium/state_trackers/python/st_sample.c
+++ b/src/gallium/state_trackers/python/st_sample.c
@@ -528,8 +528,8 @@ st_sample_surface(struct st_surface *surface, float *rgba)
 {
    struct pipe_texture *texture = surface->texture;
    struct pipe_screen *screen = texture->screen;
-   unsigned width = texture->width[surface->level];
-   unsigned height = texture->height[surface->level];
+   unsigned width = u_minify(texture->width0, surface->level);
+   unsigned height = u_minify(texture->height0, surface->level);
    uint rgba_stride = width * 4;
    struct pipe_transfer *transfer;
    void *raw;
diff --git a/src/gallium/state_trackers/vega/api_filters.c b/src/gallium/state_trackers/vega/api_filters.c
index 862cbb03c43..faf396d0877 100644
--- a/src/gallium/state_trackers/vega/api_filters.c
+++ b/src/gallium/state_trackers/vega/api_filters.c
@@ -68,9 +68,9 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
    templ.target = PIPE_TEXTURE_1D;
    templ.format = PIPE_FORMAT_A8R8G8B8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = color_data_len;
-   templ.height[0] = 1;
-   templ.depth[0] = 1;
+   templ.width0 = color_data_len;
+   templ.height0 = 1;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -81,7 +81,7 @@ static INLINE struct pipe_texture *create_texture_1d(struct vg_context *ctx,
          screen->get_tex_transfer(screen, tex,
                                   0, 0, 0,
                                   PIPE_TRANSFER_READ_WRITE ,
-                                  0, 0, tex->width[0], tex->height[0]);
+                                  0, 0, tex->width0, tex->height0);
       void *map = screen->transfer_map(screen, transfer);
       memcpy(map, color_data, sizeof(VGint)*color_data_len);
       screen->transfer_unmap(screen, transfer);
diff --git a/src/gallium/state_trackers/vega/arc.c b/src/gallium/state_trackers/vega/arc.c
index e74c7f03345..8b04d21ea76 100644
--- a/src/gallium/state_trackers/vega/arc.c
+++ b/src/gallium/state_trackers/vega/arc.c
@@ -528,6 +528,7 @@ static INLINE int num_beziers_needed(struct arc *arc)
    double threshold = 0.05;
    VGboolean found = VG_FALSE;
    int n = 1;
+   int i;
    double min_eta, max_eta;
 
    min_eta = MIN2(arc->eta1, arc->eta2);
@@ -538,7 +539,7 @@ static INLINE int num_beziers_needed(struct arc *arc)
       if (d_eta <= 0.5 * M_PI) {
          double eta_b = min_eta;
          found = VG_TRUE;
-         for (int i = 0; found && (i < n); ++i) {
+         for (i = 0; found && (i < n); ++i) {
             double etaA = eta_b;
             eta_b += d_eta;
             found = (estimate_error(arc, etaA, eta_b) <= threshold);
@@ -554,6 +555,7 @@ static void arc_to_beziers(struct arc *arc,
                            struct arc_cb cb,
                            struct matrix *matrix)
 {
+   int i;
    int n = 1;
    double d_eta, eta_b, cos_eta_b,
       sin_eta_b, a_cos_eta_b, b_sin_eta_b, a_sin_eta_b,
@@ -607,7 +609,7 @@ static void arc_to_beziers(struct arc *arc,
    t     = tan(0.5 * d_eta);
    alpha = sin(d_eta) * (sqrt(4 + 3 * t * t) - 1) / 3;
 
-   for (int i = 0; i < n; ++i) {
+   for (i = 0; i < n; ++i) {
       struct bezier bezier;
       double xA    = x_b;
       double yA    = y_b;
diff --git a/src/gallium/state_trackers/vega/bezier.c b/src/gallium/state_trackers/vega/bezier.c
index 39a7ade0161..0d5504004cc 100644
--- a/src/gallium/state_trackers/vega/bezier.c
+++ b/src/gallium/state_trackers/vega/bezier.c
@@ -255,7 +255,9 @@ static enum shift_result good_offset(const struct bezier *b1,
    const float max_dist_line = threshold*offset*offset;
    const float max_dist_normal = threshold*offset;
    const float spacing = 0.25;
-   for (float i = spacing; i < 0.99; i += spacing) {
+   float i;
+
+   for (i = spacing; i < 0.99; i += spacing) {
       float p1[2],p2[2], d, l;
       float normal[2];
       bezier_point_at(b1, i, p1);
@@ -330,6 +332,7 @@ static enum shift_result shift(const struct bezier *orig,
                                struct bezier *shifted,
                                float offset, float threshold)
 {
+   int i;
    int map[4];
    VGboolean p1_p2_equal = (orig->x1 == orig->x2 && orig->y1 == orig->y2);
    VGboolean p2_p3_equal = (orig->x2 == orig->x3 && orig->y2 == orig->y3);
@@ -404,7 +407,7 @@ static enum shift_result shift(const struct bezier *orig,
    points_shifted[0][0] = points[0][0] + offset * prev_normal[0];
    points_shifted[0][1] = points[0][1] + offset * prev_normal[1];
 
-   for (int i = 1; i < np - 1; ++i) {
+   for (i = 1; i < np - 1; ++i) {
       float normal_sum[2], r;
       float next_normal[2];
       compute_pt_normal(points[i], points[i + 1], next_normal);
diff --git a/src/gallium/state_trackers/vega/image.c b/src/gallium/state_trackers/vega/image.c
index 9a722980d52..4684a5727dd 100644
--- a/src/gallium/state_trackers/vega/image.c
+++ b/src/gallium/state_trackers/vega/image.c
@@ -93,8 +93,8 @@ static void vg_copy_texture(struct vg_context *ctx,
    dst_loc[3] = height;
    dst_bounds[0] = 0.f;
    dst_bounds[1] = 0.f;
-   dst_bounds[2] = dst->width[0];
-   dst_bounds[3] = dst->height[0];
+   dst_bounds[2] = dst->width0;
+   dst_bounds[3] = dst->height0;
 
    src_loc[0] = sx;
    src_loc[1] = sy;
@@ -102,8 +102,8 @@ static void vg_copy_texture(struct vg_context *ctx,
    src_loc[3] = height;
    src_bounds[0] = 0.f;
    src_bounds[1] = 0.f;
-   src_bounds[2] = src->width[0];
-   src_bounds[3] = src->height[0];
+   src_bounds[2] = src->width0;
+   src_bounds[3] = src->height0;
 
    vg_bound_rect(src_loc, src_bounds, src_shift);
    vg_bound_rect(dst_loc, dst_bounds, dst_shift);
@@ -272,9 +272,9 @@ struct vg_image * image_create(VGImageFormat format,
    pt.format = pformat;
    pf_get_block(pformat, &pt.block);
    pt.last_level = 0;
-   pt.width[0] = width;
-   pt.height[0] = height;
-   pt.depth[0] = 1;
+   pt.width0 = width;
+   pt.height0 = height;
+   pt.depth0 = 1;
    pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
    newtex = screen->texture_create(screen, &pt);
@@ -414,7 +414,7 @@ void image_sub_data(struct vg_image *image,
    { /* upload color_data */
       struct pipe_transfer *transfer = screen->get_tex_transfer(
          screen, texture, 0, 0, 0,
-         PIPE_TRANSFER_WRITE, 0, 0, texture->width[0], texture->height[0]);
+         PIPE_TRANSFER_WRITE, 0, 0, texture->width0, texture->height0);
       src += (dataStride * yoffset);
       for (i = 0; i < height; i++) {
          _vega_unpack_float_span_rgba(ctx, width, xoffset, src, dataFormat, temp);
diff --git a/src/gallium/state_trackers/vega/mask.c b/src/gallium/state_trackers/vega/mask.c
index 24650a37d50..b84103fdbac 100644
--- a/src/gallium/state_trackers/vega/mask.c
+++ b/src/gallium/state_trackers/vega/mask.c
@@ -426,7 +426,7 @@ static void mask_using_texture(struct pipe_texture *texture,
    if (!surface)
       return;
    if (!intersect_rectangles(surface->width, surface->height,
-                             texture->width[0], texture->height[0],
+                             texture->width0, texture->height0,
                              x, y, width, height,
                              offsets, loc))
       return;
@@ -493,9 +493,9 @@ struct vg_mask_layer * mask_layer_create(VGint width, VGint height)
       pt.format = PIPE_FORMAT_A8R8G8B8_UNORM;
       pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &pt.block);
       pt.last_level = 0;
-      pt.width[0] = width;
-      pt.height[0] = height;
-      pt.depth[0] = 1;
+      pt.width0 = width;
+      pt.height0 = height;
+      pt.depth0 = 1;
       pt.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
       pt.compressed = 0;
 
@@ -607,8 +607,8 @@ void mask_render_to(struct path *path,
    struct vg_mask_layer *temp_layer;
    VGint width, height;
 
-   width = fb_buffers->alpha_mask->width[0];
-   height = fb_buffers->alpha_mask->width[0];
+   width = fb_buffers->alpha_mask->width0;
+   height = fb_buffers->alpha_mask->width0;
 
    temp_layer = mask_layer_create(width, height);
 
diff --git a/src/gallium/state_trackers/vega/paint.c b/src/gallium/state_trackers/vega/paint.c
index 04a6ba9cdcd..e8ca7d9e89b 100644
--- a/src/gallium/state_trackers/vega/paint.c
+++ b/src/gallium/state_trackers/vega/paint.c
@@ -151,9 +151,9 @@ static INLINE struct pipe_texture *create_gradient_texture(struct vg_paint *p)
    templ.target = PIPE_TEXTURE_1D;
    templ.format = PIPE_FORMAT_A8R8G8B8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = 1024;
-   templ.height[0] = 1;
-   templ.depth[0] = 1;
+   templ.width0 = 1024;
+   templ.height0 = 1;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_A8R8G8B8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -328,8 +328,8 @@ static INLINE void  paint_pattern_buffer(struct vg_paint *paint, void *buffer)
 
    map[4] = 0.f;
    map[5] = 1.f;
-   map[6] = paint->pattern.texture->width[0];
-   map[7] = paint->pattern.texture->height[0];
+   map[6] = paint->pattern.texture->width0;
+   map[7] = paint->pattern.texture->height0;
    {
       struct matrix mat;
       memcpy(&mat, &ctx->state.vg.fill_paint_to_user_matrix,
diff --git a/src/gallium/state_trackers/vega/renderer.c b/src/gallium/state_trackers/vega/renderer.c
index 396c88aa3d2..9085ed1bfe0 100644
--- a/src/gallium/state_trackers/vega/renderer.c
+++ b/src/gallium/state_trackers/vega/renderer.c
@@ -230,13 +230,13 @@ void renderer_draw_texture(struct renderer *r,
    struct pipe_buffer *buf;
    VGfloat s0, t0, s1, t1;
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = x1offset / tex->width[0];
-   s1 = x2offset / tex->width[0];
-   t0 = y1offset / tex->height[0];
-   t1 = y2offset / tex->height[0];
+   s0 = x1offset / tex->width0;
+   s1 = x2offset / tex->width0;
+   t0 = y1offset / tex->height0;
+   t1 = y2offset / tex->height0;
 
    cso_save_vertex_shader(r->cso);
    /* shaders */
@@ -276,10 +276,10 @@ void renderer_copy_texture(struct renderer *ctx,
    struct pipe_framebuffer_state fb;
    float s0, t0, s1, t1;
 
-   assert(src->width[0] != 0);
-   assert(src->height[0] != 0);
-   assert(dst->width[0] != 0);
-   assert(dst->height[0] != 0);
+   assert(src->width0 != 0);
+   assert(src->height0 != 0);
+   assert(dst->width0 != 0);
+   assert(dst->height0 != 0);
 
 #if 0
    debug_printf("copy texture [%f, %f, %f, %f], [%f, %f, %f, %f]\n",
@@ -287,10 +287,10 @@ void renderer_copy_texture(struct renderer *ctx,
 #endif
 
 #if 1
-   s0 = sx1 / src->width[0];
-   s1 = sx2 / src->width[0];
-   t0 = sy1 / src->height[0];
-   t1 = sy2 / src->height[0];
+   s0 = sx1 / src->width0;
+   s1 = sx2 / src->width0;
+   t0 = sy1 / src->height0;
+   t1 = sy2 / src->height0;
 #else
    s0 = 0;
    s1 = 1;
@@ -445,9 +445,9 @@ void renderer_copy_surface(struct renderer *ctx,
    texTemp.target = PIPE_TEXTURE_2D;
    texTemp.format = src->format;
    texTemp.last_level = 0;
-   texTemp.width[0] = srcW;
-   texTemp.height[0] = srcH;
-   texTemp.depth[0] = 1;
+   texTemp.width0 = srcW;
+   texTemp.height0 = srcH;
+   texTemp.depth0 = 1;
    pf_get_block(src->format, &texTemp.block);
 
    tex = screen->texture_create(screen, &texTemp);
@@ -570,13 +570,13 @@ void renderer_texture_quad(struct renderer *r,
    struct pipe_buffer *buf;
    VGfloat s0, t0, s1, t1;
 
-   assert(tex->width[0] != 0);
-   assert(tex->height[0] != 0);
+   assert(tex->width0 != 0);
+   assert(tex->height0 != 0);
 
-   s0 = x1offset / tex->width[0];
-   s1 = x2offset / tex->width[0];
-   t0 = y1offset / tex->height[0];
-   t1 = y2offset / tex->height[0];
+   s0 = x1offset / tex->width0;
+   s1 = x2offset / tex->width0;
+   t0 = y1offset / tex->height0;
+   t1 = y2offset / tex->height0;
 
    cso_save_vertex_shader(r->cso);
    /* shaders */
diff --git a/src/gallium/state_trackers/vega/vg_context.c b/src/gallium/state_trackers/vega/vg_context.c
index e0ff02f3a99..00d23f5c227 100644
--- a/src/gallium/state_trackers/vega/vg_context.c
+++ b/src/gallium/state_trackers/vega/vg_context.c
@@ -231,6 +231,8 @@ static void update_clip_state(struct vg_context *ctx)
    if (state->scissoring) {
       struct pipe_blend_state *blend = &ctx->state.g3d.blend;
       struct pipe_framebuffer_state *fb = &ctx->state.g3d.fb;
+      int i;
+
       dsa->depth.writemask = 1;/*glDepthMask(TRUE);*/
       dsa->depth.func = PIPE_FUNC_ALWAYS;
       dsa->depth.enabled = 1;
@@ -254,7 +256,7 @@ static void update_clip_state(struct vg_context *ctx)
       cso_set_blend(ctx->cso_context, blend);
 
       /* enable scissoring */
-      for (int i = 0; i < state->scissor_rects_num; ++i) {
+      for (i = 0; i < state->scissor_rects_num; ++i) {
          const float x      = state->scissor_rects[i * 4 + 0].f;
          const float y      = state->scissor_rects[i * 4 + 1].f;
          const float width  = state->scissor_rects[i * 4 + 2].f;
diff --git a/src/gallium/state_trackers/vega/vg_tracker.c b/src/gallium/state_trackers/vega/vg_tracker.c
index c4da01e52cc..d28463dd1b8 100644
--- a/src/gallium/state_trackers/vega/vg_tracker.c
+++ b/src/gallium/state_trackers/vega/vg_tracker.c
@@ -51,9 +51,9 @@ create_texture(struct pipe_context *pipe, enum pipe_format format,
 
    templ.target = PIPE_TEXTURE_2D;
    pf_get_block(templ.format, &templ.block);
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    templ.last_level = 0;
 
    if (pf_get_component_bits(format, PIPE_FORMAT_COMP_S)) {
diff --git a/src/gallium/state_trackers/wgl/stw_winsys.h b/src/gallium/state_trackers/wgl/stw_winsys.h
index 1ead47d6e63..1de6e906d0d 100644
--- a/src/gallium/state_trackers/wgl/stw_winsys.h
+++ b/src/gallium/state_trackers/wgl/stw_winsys.h
@@ -73,7 +73,7 @@ struct stw_winsys
                           HANDLE hSharedSurface);
 
    /**
-    * Open a shared surface (optional).
+    * Close a shared surface (optional).
     */
    void
    (*shared_surface_close)(struct pipe_screen *screen,
diff --git a/src/gallium/state_trackers/xorg/xorg_composite.c b/src/gallium/state_trackers/xorg/xorg_composite.c
index 93a3e1b8cf0..69a83b15234 100644
--- a/src/gallium/state_trackers/xorg/xorg_composite.c
+++ b/src/gallium/state_trackers/xorg/xorg_composite.c
@@ -151,8 +151,11 @@ render_filter_to_gallium(int xrender_filter, int *out_filter)
    case PictFilterBest:
       *out_filter = PIPE_TEX_FILTER_LINEAR;
       break;
+   case PictFilterConvolution:
+      *out_filter = PIPE_TEX_FILTER_NEAREST;
+      return FALSE;
    default:
-      debug_printf("Unkown xrender filter");
+      debug_printf("Unknown xrender filter\n");
       *out_filter = PIPE_TEX_FILTER_NEAREST;
       return FALSE;
    }
@@ -237,7 +240,7 @@ picture_format_fixups(struct exa_pixmap_priv *pSrc, PicturePtr pSrcPicture, bool
 
    if (pSrc->picture_format == pSrcPicture->format) {
       if (pSrc->picture_format == PICT_a8)
-         return mask ? FS_MASK_LUMINANCE : FS_MASK_LUMINANCE;
+         return mask ? FS_MASK_LUMINANCE : FS_SRC_LUMINANCE;
       return 0;
    }
 
@@ -436,8 +439,8 @@ setup_fs_constant_buffer(struct exa_context *exa)
 static void
 setup_constant_buffers(struct exa_context *exa, struct exa_pixmap_priv *pDst)
 {
-   int width = pDst->tex->width[0];
-   int height = pDst->tex->height[0];
+   int width = pDst->tex->width0;
+   int height = pDst->tex->height0;
 
    setup_vs_constant_buffer(exa, width, height);
    setup_fs_constant_buffer(exa);
@@ -564,6 +567,8 @@ boolean xorg_solid_bind_state(struct exa_context *exa,
    renderer_bind_viewport(exa->renderer, pixmap);
    renderer_bind_rasterizer(exa->renderer);
    bind_blend_state(exa, PictOpSrc, NULL, NULL, NULL);
+   cso_set_samplers(exa->renderer->cso, 0, NULL);
+   cso_set_sampler_textures(exa->renderer->cso, 0, NULL);
    setup_constant_buffers(exa, pixmap);
 
    shader = xorg_shaders_get(exa->renderer->shaders, vs_traits, fs_traits);
diff --git a/src/gallium/state_trackers/xorg/xorg_crtc.c b/src/gallium/state_trackers/xorg/xorg_crtc.c
index 85b9162d4cf..c4751724c99 100644
--- a/src/gallium/state_trackers/xorg/xorg_crtc.c
+++ b/src/gallium/state_trackers/xorg/xorg_crtc.c
@@ -187,10 +187,10 @@ crtc_load_cursor_argb(xf86CrtcPtr crtc, CARD32 * image)
 	templat.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
 	templat.target = PIPE_TEXTURE_2D;
 	templat.last_level = 0;
-	templat.depth[0] = 1;
+	templat.depth0 = 1;
 	templat.format = PIPE_FORMAT_A8R8G8B8_UNORM;
-	templat.width[0] = 64;
-	templat.height[0] = 64;
+	templat.width0 = 64;
+	templat.height0 = 64;
 	pf_get_block(templat.format, &templat.block);
 
 	crtcp->cursor_tex = ms->screen->texture_create(ms->screen,
diff --git a/src/gallium/state_trackers/xorg/xorg_dri2.c b/src/gallium/state_trackers/xorg/xorg_dri2.c
index c41a7cd639d..406e0afff4b 100644
--- a/src/gallium/state_trackers/xorg/xorg_dri2.c
+++ b/src/gallium/state_trackers/xorg/xorg_dri2.c
@@ -42,6 +42,12 @@
 
 #include "util/u_rect.h"
 
+/* Make all the #if cases in the code esier to read */
+/* XXX can it be set to 1? */
+#ifndef DRI2INFOREC_VERSION
+#define DRI2INFOREC_VERSION 0
+#endif
+
 typedef struct {
     PixmapPtr pPixmap;
     struct pipe_texture *tex;
@@ -79,7 +85,7 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
     case DRI2BufferFrontLeft:
 	break;
     case DRI2BufferStencil:
-#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2
+#if DRI2INFOREC_VERSION >= 3
     case DRI2BufferDepthStencil:
 #else
     /* Works on old X servers because sanity checking is for the weak */
@@ -103,9 +109,9 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
 		template.format = ms->ds_depth_bits_last ?
 		    PIPE_FORMAT_S8Z24_UNORM : PIPE_FORMAT_Z24S8_UNORM;
 	    pf_get_block(template.format, &template.block);
-	    template.width[0] = pDraw->width;
-	    template.height[0] = pDraw->height;
-	    template.depth[0] = 1;
+	    template.width0 = pDraw->width;
+	    template.height0 = pDraw->height;
+	    template.depth0 = 1;
 	    template.last_level = 0;
 	    template.tex_usage = PIPE_TEXTURE_USAGE_DEPTH_STENCIL |
 		PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
@@ -121,9 +127,12 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
     }
 
     if (!tex) {
+	/* First call to make sure we have a pixmap private */
 	exaMoveInPixmap(private->pPixmap);
 	xorg_exa_set_shared_usage(private->pPixmap);
 	pScreen->ModifyPixmapHeader(private->pPixmap, 0, 0, 0, 0, 0, NULL);
+	/* Second call to make sure texture has valid contents */
+	exaMoveInPixmap(private->pPixmap);
 	tex = xorg_exa_get_texture(private->pPixmap);
     }
 
@@ -137,6 +146,11 @@ driDoCreateBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer, unsigned int format)
     buffer->cpp = 4;
     buffer->driverPrivate = private;
     buffer->flags = 0; /* not tiled */
+#if DRI2INFOREC_VERSION == 2
+    ((DRI2Buffer2Ptr)buffer)->format = 0;
+#elif DRI2INFOREC_VERSION >= 3
+    buffer->format = 0;
+#endif
     private->tex = tex;
 
     return TRUE;
@@ -157,12 +171,12 @@ driDoDestroyBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer)
     (*pScreen->DestroyPixmap)(private->pPixmap);
 }
 
-#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2
+#if DRI2INFOREC_VERSION >= 2
 
-static DRI2BufferPtr
+static DRI2Buffer2Ptr
 driCreateBuffer(DrawablePtr pDraw, unsigned int attachment, unsigned int format)
 {
-    DRI2BufferPtr buffer;
+    DRI2Buffer2Ptr buffer;
     BufferPrivatePtr private;
 
     buffer = xcalloc(1, sizeof *buffer);
@@ -177,7 +191,8 @@ driCreateBuffer(DrawablePtr pDraw, unsigned int attachment, unsigned int format)
     buffer->attachment = attachment;
     buffer->driverPrivate = private;
 
-    if (driDoCreateBuffer(pDraw, buffer, format))
+    /* So far it is safe to downcast a DRI2Buffer2Ptr to DRI2BufferPtr */
+    if (driDoCreateBuffer(pDraw, (DRI2BufferPtr)buffer, format))
 	return buffer;
 
     xfree(private);
@@ -187,15 +202,16 @@ fail:
 }
 
 static void
-driDestroyBuffer(DrawablePtr pDraw, DRI2BufferPtr buffer)
+driDestroyBuffer(DrawablePtr pDraw, DRI2Buffer2Ptr buffer)
 {
-    driDoDestroyBuffer(pDraw, buffer);
+    /* So far it is safe to downcast a DRI2Buffer2Ptr to DRI2BufferPtr */
+    driDoDestroyBuffer(pDraw, (DRI2BufferPtr)buffer);
 
     xfree(buffer->driverPrivate);
     xfree(buffer);
 }
 
-#else /* DRI2INFOREC_VERSION <= 2 */
+#else /* DRI2INFOREC_VERSION < 2 */
 
 static DRI2BufferPtr
 driCreateBuffers(DrawablePtr pDraw, unsigned int *attachments, int count)
@@ -245,7 +261,7 @@ driDestroyBuffers(DrawablePtr pDraw, DRI2BufferPtr buffers, int count)
     }
 }
 
-#endif /* DRI2INFOREC_VERSION */
+#endif /* DRI2INFOREC_VERSION >= 2 */
 
 static void
 driCopyRegion(DrawablePtr pDraw, RegionPtr pRegion,
@@ -260,6 +276,7 @@ driCopyRegion(DrawablePtr pDraw, RegionPtr pRegion,
     PixmapPtr dst_pixmap;
     GCPtr gc;
     RegionPtr copy_clip;
+    Bool save_accel;
 
     /*
      * In driCreateBuffers we dewrap windows into the
@@ -325,8 +342,11 @@ driCopyRegion(DrawablePtr pDraw, RegionPtr pRegion,
 	}
     }
 
+    save_accel = ms->exa->accel;
+    ms->exa->accel = TRUE;
     (*gc->ops->CopyArea)(&src_pixmap->drawable, &dst_pixmap->drawable, gc,
 			 0, 0, pDraw->width, pDraw->height, 0, 0);
+    ms->exa->accel = save_accel;
 
     FreeScratchGC(gc);
 
@@ -342,7 +362,7 @@ driScreenInit(ScreenPtr pScreen)
     modesettingPtr ms = modesettingPTR(pScrn);
     DRI2InfoRec dri2info;
 
-#if defined(DRI2INFOREC_VERSION)
+#if DRI2INFOREC_VERSION >= 2
     dri2info.version = DRI2INFOREC_VERSION;
 #else
     dri2info.version = 1;
@@ -352,7 +372,7 @@ driScreenInit(ScreenPtr pScreen)
     dri2info.driverName = pScrn->driverName;
     dri2info.deviceName = "/dev/dri/card0"; /* FIXME */
 
-#if defined(DRI2INFOREC_VERSION) && DRI2INFOREC_VERSION > 2
+#if DRI2INFOREC_VERSION >= 2
     dri2info.CreateBuffer = driCreateBuffer;
     dri2info.DestroyBuffer = driDestroyBuffer;
 #else
diff --git a/src/gallium/state_trackers/xorg/xorg_driver.c b/src/gallium/state_trackers/xorg/xorg_driver.c
index 26cf2dd7723..d949167adce 100644
--- a/src/gallium/state_trackers/xorg/xorg_driver.c
+++ b/src/gallium/state_trackers/xorg/xorg_driver.c
@@ -75,10 +75,12 @@ static Bool PreInit(ScrnInfoPtr pScrn, int flags);
 typedef enum
 {
     OPTION_SW_CURSOR,
+    OPTION_2D_ACCEL,
 } modesettingOpts;
 
 static const OptionInfoRec Options[] = {
     {OPTION_SW_CURSOR, "SWcursor", OPTV_BOOLEAN, {0}, FALSE},
+    {OPTION_2D_ACCEL, "2DAccel", OPTV_BOOLEAN, {0}, FALSE},
     {-1, NULL, OPTV_NONE, {0}, FALSE}
 };
 
@@ -609,7 +611,8 @@ ScreenInit(int scrnIndex, ScreenPtr pScreen, int argc, char **argv)
 
     xf86SetBlackWhitePixels(pScreen);
 
-    ms->exa = xorg_exa_init(pScrn);
+    ms->exa = xorg_exa_init(pScrn, xf86ReturnOptValBool(ms->Options,
+                                                        OPTION_2D_ACCEL, TRUE));
     ms->debug_fallback = debug_get_bool_option("XORG_DEBUG_FALLBACK", TRUE);
 
     xorg_init_video(pScreen);
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.c b/src/gallium/state_trackers/xorg/xorg_exa.c
index 6fa274eb0ab..a68a626fa48 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa.c
@@ -46,7 +46,6 @@
 #include "util/u_rect.h"
 
 #define DEBUG_PRINT 0
-#define ACCEL_ENABLED TRUE
 
 /*
  * Helper functions
@@ -170,15 +169,7 @@ xorg_exa_common_done(struct exa_context *exa)
 static void
 ExaWaitMarker(ScreenPtr pScreen, int marker)
 {
-   ScrnInfoPtr pScrn = xf86Screens[pScreen->myNum];
-   modesettingPtr ms = modesettingPTR(pScrn);
-   struct exa_context *exa = ms->exa;
-
-#if 0
-   xorg_exa_flush(exa, PIPE_FLUSH_RENDER_CACHE, NULL);
-#else
-   xorg_exa_finish(exa);
-#endif
+   /* Nothing to do, handled in the PrepareAccess hook */
 }
 
 static int
@@ -239,6 +230,11 @@ ExaUploadToScreen(PixmapPtr pPix, int x, int y, int w, int h, char *src,
     if (!priv || !priv->tex)
 	return FALSE;
 
+    /* make sure that any pending operations are flushed to hardware */
+    if (exa->pipe->is_texture_referenced(exa->pipe, priv->tex, 0, 0) &
+	(PIPE_REFERENCED_FOR_READ | PIPE_REFERENCED_FOR_WRITE))
+	xorg_exa_flush(exa, 0, NULL);
+
     transfer = exa->scrn->get_tex_transfer(exa->scrn, priv->tex, 0, 0, 0,
 					   PIPE_TRANSFER_WRITE, x, y, w, h);
     if (!transfer)
@@ -288,7 +284,7 @@ ExaPrepareAccess(PixmapPtr pPix, int index)
 					PIPE_TRANSFER_MAP_DIRECTLY |
 #endif
 					PIPE_TRANSFER_READ_WRITE,
-					0, 0, priv->tex->width[0], priv->tex->height[0]);
+					0, 0, priv->tex->width0, priv->tex->height0);
 	if (!priv->map_transfer)
 #ifdef EXA_MIXED_PIXMAPS
 	    return FALSE;
@@ -384,7 +380,7 @@ ExaPrepareSolid(PixmapPtr pPixmap, int alu, Pixel planeMask, Pixel fg)
 	XORG_FALLBACK("format %s", pf_name(priv->tex->format));
     }
 
-    return ACCEL_ENABLED && xorg_solid_bind_state(exa, priv, fg);
+    return exa->accel && xorg_solid_bind_state(exa, priv, fg);
 }
 
 static void
@@ -443,7 +439,7 @@ ExaPrepareCopy(PixmapPtr pSrcPixmap, PixmapPtr pDstPixmap, int xdir,
     exa->copy.src = src_priv;
     exa->copy.dst = priv;
 
-    return ACCEL_ENABLED;
+    return exa->accel;
 }
 
 static void
@@ -572,7 +568,7 @@ ExaPrepareComposite(int op, PicturePtr pSrcPicture,
                        render_format_name(pMaskPicture->format));
    }
 
-   return ACCEL_ENABLED &&
+   return exa->accel &&
           xorg_composite_bind_state(exa, op, pSrcPicture, pMaskPicture,
                                     pDstPicture,
                                     pSrc ? exaGetPixmapDriverPrivate(pSrc) : NULL,
@@ -605,6 +601,9 @@ ExaCheckComposite(int op,
 		  PicturePtr pSrcPicture, PicturePtr pMaskPicture,
 		  PicturePtr pDstPicture)
 {
+   ScrnInfoPtr pScrn = xf86Screens[pDstPicture->pDrawable->pScreen->myNum];
+   modesettingPtr ms = modesettingPTR(pScrn);
+   struct exa_context *exa = ms->exa;
    boolean accelerated = xorg_composite_accelerated(op,
                                                     pSrcPicture,
                                                     pMaskPicture,
@@ -613,7 +612,7 @@ ExaCheckComposite(int op,
    debug_printf("ExaCheckComposite(%d, %p, %p, %p) = %d\n",
                 op, pSrcPicture, pMaskPicture, pDstPicture, accelerated);
 #endif
-   return ACCEL_ENABLED && accelerated;
+   return exa->accel && accelerated;
 }
 
 static void *
@@ -751,10 +750,11 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 			     bitsPerPixel, devKind, NULL);
 
     /* Deal with screen resize */
-    if (!priv->tex ||
-        (priv->tex->width[0] != width ||
-         priv->tex->height[0] != height ||
-         priv->tex_flags != priv->flags)) {
+    if ((exa->accel || priv->flags) &&
+        (!priv->tex ||
+         (priv->tex->width0 != width ||
+          priv->tex->height0 != height ||
+          priv->tex_flags != priv->flags))) {
 	struct pipe_texture *texture = NULL;
 	struct pipe_texture template;
 
@@ -762,9 +762,9 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	template.target = PIPE_TEXTURE_2D;
 	exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &priv->picture_format);
 	pf_get_block(template.format, &template.block);
-	template.width[0] = width;
-	template.height[0] = height;
-	template.depth[0] = 1;
+	template.width0 = width;
+	template.height0 = height;
+	template.depth0 = 1;
 	template.last_level = 0;
 	template.tex_usage = PIPE_TEXTURE_USAGE_RENDER_TARGET | priv->flags;
 	priv->tex_flags = priv->flags;
@@ -779,12 +779,12 @@ ExaModifyPixmapHeader(PixmapPtr pPixmap, int width, int height,
 	    src_surf = xorg_gpu_surface(exa->pipe->screen, priv);
         if (exa->pipe->surface_copy) {
             exa->pipe->surface_copy(exa->pipe, dst_surf, 0, 0, src_surf,
-                        0, 0, min(width, texture->width[0]),
-                        min(height, texture->height[0]));
+                        0, 0, min(width, texture->width0),
+                        min(height, texture->height0));
         } else {
             util_surface_copy(exa->pipe, FALSE, dst_surf, 0, 0, src_surf,
-                        0, 0, min(width, texture->width[0]),
-                        min(height, texture->height[0]));
+                        0, 0, min(width, texture->width0),
+                        min(height, texture->height0));
         }
 	    exa->scrn->tex_surface_destroy(dst_surf);
 	    exa->scrn->tex_surface_destroy(src_surf);
@@ -817,8 +817,8 @@ xorg_exa_set_texture(PixmapPtr pPixmap, struct  pipe_texture *tex)
     if (!priv)
 	return FALSE;
 
-    if (pPixmap->drawable.width != tex->width[0] ||
-	pPixmap->drawable.height != tex->height[0])
+    if (pPixmap->drawable.width != tex->width0 ||
+	pPixmap->drawable.height != tex->height0)
 	return FALSE;
 
     pipe_texture_reference(&priv->tex, tex);
@@ -841,9 +841,9 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn,
     template.target = PIPE_TEXTURE_2D;
     exa_get_pipe_format(depth, &template.format, &bitsPerPixel, &dummy);
     pf_get_block(template.format, &template.block);
-    template.width[0] = width;
-    template.height[0] = height;
-    template.depth[0] = 1;
+    template.width0 = width;
+    template.height0 = height;
+    template.depth0 = 1;
     template.last_level = 0;
     template.tex_usage |= PIPE_TEXTURE_USAGE_RENDER_TARGET;
     template.tex_usage |= PIPE_TEXTURE_USAGE_PRIMARY;
@@ -869,7 +869,7 @@ xorg_exa_close(ScrnInfoPtr pScrn)
 }
 
 void *
-xorg_exa_init(ScrnInfoPtr pScrn)
+xorg_exa_init(ScrnInfoPtr pScrn, Bool accel)
 {
    modesettingPtr ms = modesettingPTR(pScrn);
    struct exa_context *exa;
@@ -934,6 +934,7 @@ xorg_exa_init(ScrnInfoPtr pScrn)
    ms->ctx = exa->pipe;
 
    exa->renderer = renderer_create(exa->pipe);
+   exa->accel = accel;
 
    return (void *)exa;
 
diff --git a/src/gallium/state_trackers/xorg/xorg_exa.h b/src/gallium/state_trackers/xorg/xorg_exa.h
index 7f4aebb9c38..15cc29d6620 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa.h
+++ b/src/gallium/state_trackers/xorg/xorg_exa.h
@@ -24,6 +24,8 @@ struct exa_context
    float solid_color[4];
    boolean has_solid_color;
 
+   boolean accel;
+
    /* float[9] projective matrix bound to pictures */
    struct {
       float    src[9];
diff --git a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
index 3bf64b6331d..52b97af1635 100644
--- a/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
+++ b/src/gallium/state_trackers/xorg/xorg_exa_tgsi.c
@@ -47,22 +47,22 @@ static void
 print_fs_traits(int fs_traits)
 {
    const char *strings[] = {
-      "FS_COMPOSITE",       // = 1 << 0,
-      "FS_MASK",            // = 1 << 1,
-      "FS_SOLID_FILL",      // = 1 << 2,
-      "FS_LINGRAD_FILL",    // = 1 << 3,
-      "FS_RADGRAD_FILL",    // = 1 << 4,
-      "FS_CA_FULL",         // = 1 << 5, /* src.rgba * mask.rgba */
-      "FS_CA_SRCALPHA",     // = 1 << 6, /* src.aaaa * mask.rgba */
-      "FS_YUV",             // = 1 << 7,
-      "FS_SRC_REPEAT_NONE", // = 1 << 8,
-      "FS_MASK_REPEAT_NONE",// = 1 << 9,
-      "FS_SRC_SWIZZLE_RGB", // = 1 << 10,
-      "FS_MASK_SWIZZLE_RGB",// = 1 << 11,
-      "FS_SRC_SET_ALPHA",   // = 1 << 12,
-      "FS_MASK_SET_ALPHA",  // = 1 << 13,
-      "FS_SRC_LUMINANCE",   // = 1 << 14,
-      "FS_MASK_LUMINANCE",  // = 1 << 15,
+      "FS_COMPOSITE",       /* = 1 << 0 */
+      "FS_MASK",            /* = 1 << 1 */
+      "FS_SOLID_FILL",      /* = 1 << 2 */
+      "FS_LINGRAD_FILL",    /* = 1 << 3 */
+      "FS_RADGRAD_FILL",    /* = 1 << 4 */
+      "FS_CA_FULL",         /* = 1 << 5 - src.rgba * mask.rgba */
+      "FS_CA_SRCALPHA",     /* = 1 << 6 - src.aaaa * mask.rgba */
+      "FS_YUV",             /* = 1 << 7 */
+      "FS_SRC_REPEAT_NONE", /* = 1 << 8 */
+      "FS_MASK_REPEAT_NONE",/* = 1 << 9 */
+      "FS_SRC_SWIZZLE_RGB", /* = 1 << 10 */
+      "FS_MASK_SWIZZLE_RGB",/* = 1 << 11 */
+      "FS_SRC_SET_ALPHA",   /* = 1 << 12 */
+      "FS_MASK_SET_ALPHA",  /* = 1 << 13 */
+      "FS_SRC_LUMINANCE",   /* = 1 << 14 */
+      "FS_MASK_LUMINANCE",  /* = 1 << 15 */
    };
    int i, k;
    debug_printf("%s: ", __func__);
diff --git a/src/gallium/state_trackers/xorg/xorg_renderer.c b/src/gallium/state_trackers/xorg/xorg_renderer.c
index 723605312c5..418a8dd88bd 100644
--- a/src/gallium/state_trackers/xorg/xorg_renderer.c
+++ b/src/gallium/state_trackers/xorg/xorg_renderer.c
@@ -167,14 +167,14 @@ add_vertex_data1(struct xorg_renderer *r,
       map_point(src_matrix, pt3[0], pt3[1], &pt3[0], &pt3[1]);
    }
 
-   s0 =  pt0[0] / src->width[0];
-   s1 =  pt1[0] / src->width[0];
-   s2 =  pt2[0] / src->width[0];
-   s3 =  pt3[0] / src->width[0];
-   t0 =  pt0[1] / src->height[0];
-   t1 =  pt1[1] / src->height[0];
-   t2 =  pt2[1] / src->height[0];
-   t3 =  pt3[1] / src->height[0];
+   s0 =  pt0[0] / src->width0;
+   s1 =  pt1[0] / src->width0;
+   s2 =  pt2[0] / src->width0;
+   s3 =  pt3[0] / src->width0;
+   t0 =  pt0[1] / src->height0;
+   t1 =  pt1[1] / src->height0;
+   t2 =  pt2[1] / src->height0;
+   t3 =  pt3[1] / src->height0;
 
    /* 1st vertex */
    add_vertex_1tex(r, dstX, dstY, s0, t0);
@@ -262,15 +262,15 @@ add_vertex_data2(struct xorg_renderer *r,
       map_point(mask_matrix, mpt1[0], mpt1[1], &mpt1[0], &mpt1[1]);
    }
 
-   src_s0 = spt0[0] / src->width[0];
-   src_t0 = spt0[1] / src->height[0];
-   src_s1 = spt1[0] / src->width[0];
-   src_t1 = spt1[1] / src->height[0];
+   src_s0 = spt0[0] / src->width0;
+   src_t0 = spt0[1] / src->height0;
+   src_s1 = spt1[0] / src->width0;
+   src_t1 = spt1[1] / src->height0;
 
-   mask_s0 = mpt0[0] / mask->width[0];
-   mask_t0 = mpt0[1] / mask->height[0];
-   mask_s1 = mpt1[0] / mask->width[0];
-   mask_t1 = mpt1[1] / mask->height[0];
+   mask_s0 = mpt0[0] / mask->width0;
+   mask_t0 = mpt0[1] / mask->height0;
+   mask_s1 = mpt1[0] / mask->width0;
+   mask_t1 = mpt1[1] / mask->height0;
 
    /* 1st vertex */
    add_vertex_2tex(r, dstX, dstY,
@@ -300,10 +300,10 @@ setup_vertex_data_yuv(struct xorg_renderer *r,
    spt1[0] = srcX + srcW;
    spt1[1] = srcY + srcH;
 
-   s0 = spt0[0] / tex[0]->width[0];
-   t0 = spt0[1] / tex[0]->height[0];
-   s1 = spt1[0] / tex[0]->width[0];
-   t1 = spt1[1] / tex[0]->height[0];
+   s0 = spt0[0] / tex[0]->width0;
+   t0 = spt0[1] / tex[0]->height0;
+   s1 = spt1[0] / tex[0]->width0;
+   t1 = spt1[1] / tex[0]->height0;
 
    /* 1st vertex */
    add_vertex_1tex(r, dstX, dstY, s0, t0);
@@ -387,8 +387,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r,
    struct pipe_surface *surface = xorg_gpu_surface(r->pipe->screen, priv);
    memset(&state, 0, sizeof(struct pipe_framebuffer_state));
 
-   state.width  = priv->tex->width[0];
-   state.height = priv->tex->height[0];
+   state.width  = priv->tex->width0;
+   state.height = priv->tex->height0;
 
    state.nr_cbufs = 1;
    state.cbufs[0] = surface;
@@ -407,8 +407,8 @@ void renderer_bind_framebuffer(struct xorg_renderer *r,
 void renderer_bind_viewport(struct xorg_renderer *r,
                             struct exa_pixmap_priv *dst)
 {
-   int width = dst->tex->width[0];
-   int height = dst->tex->height[0];
+   int width = dst->tex->width0;
+   int height = dst->tex->height0;
 
    /*debug_printf("Bind viewport (%d, %d)\n", width, height);*/
 
@@ -584,16 +584,16 @@ static void renderer_copy_texture(struct xorg_renderer *r,
    float s0, t0, s1, t1;
    struct xorg_shader shader;
 
-   assert(src->width[0] != 0);
-   assert(src->height[0] != 0);
-   assert(dst->width[0] != 0);
-   assert(dst->height[0] != 0);
+   assert(src->width0 != 0);
+   assert(src->height0 != 0);
+   assert(dst->width0 != 0);
+   assert(dst->height0 != 0);
 
 #if 1
-   s0 = sx1 / src->width[0];
-   s1 = sx2 / src->width[0];
-   t0 = sy1 / src->height[0];
-   t1 = sy2 / src->height[0];
+   s0 = sx1 / src->width0;
+   s1 = sx2 / src->width0;
+   t0 = sy1 / src->height0;
+   t1 = sy2 / src->height0;
 #else
    s0 = 0;
    s1 = 1;
@@ -730,9 +730,9 @@ create_sampler_texture(struct xorg_renderer *r,
    templ.target = PIPE_TEXTURE_2D;
    templ.format = format;
    templ.last_level = 0;
-   templ.width[0] = src->width[0];
-   templ.height[0] = src->height[0];
-   templ.depth[0] = 1;
+   templ.width0 = src->width0;
+   templ.height0 = src->height0;
+   templ.depth0 = 1;
    pf_get_block(format, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -754,13 +754,13 @@ create_sampler_texture(struct xorg_renderer *r,
                 ps_tex, /* dest */
                 0, 0, /* destx/y */
                 ps_read,
-                0, 0, src->width[0], src->height[0]);
+                0, 0, src->width0, src->height0);
       } else {
           util_surface_copy(pipe, FALSE,
                 ps_tex, /* dest */
                 0, 0, /* destx/y */
                 ps_read,
-                0, 0, src->width[0], src->height[0]);
+                0, 0, src->width0, src->height0);
       }
       pipe_surface_reference(&ps_read, NULL);
       pipe_surface_reference(&ps_tex, NULL);
@@ -791,8 +791,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r,
    dst_loc[3] = height;
    dst_bounds[0] = 0.f;
    dst_bounds[1] = 0.f;
-   dst_bounds[2] = dst->width[0];
-   dst_bounds[3] = dst->height[0];
+   dst_bounds[2] = dst->width0;
+   dst_bounds[3] = dst->height0;
 
    src_loc[0] = sx;
    src_loc[1] = sy;
@@ -800,8 +800,8 @@ void renderer_copy_pixmap(struct xorg_renderer *r,
    src_loc[3] = height;
    src_bounds[0] = 0.f;
    src_bounds[1] = 0.f;
-   src_bounds[2] = src->width[0];
-   src_bounds[3] = src->height[0];
+   src_bounds[2] = src->width0;
+   src_bounds[3] = src->height0;
 
    bound_rect(src_loc, src_bounds, src_shift);
    bound_rect(dst_loc, dst_bounds, dst_shift);
diff --git a/src/gallium/state_trackers/xorg/xorg_tracker.h b/src/gallium/state_trackers/xorg/xorg_tracker.h
index 6130cf6621f..20c9259c7bc 100644
--- a/src/gallium/state_trackers/xorg/xorg_tracker.h
+++ b/src/gallium/state_trackers/xorg/xorg_tracker.h
@@ -131,7 +131,7 @@ xorg_exa_create_root_texture(ScrnInfoPtr pScrn,
 			     int depth, int bpp);
 
 void *
-xorg_exa_init(ScrnInfoPtr pScrn);
+xorg_exa_init(ScrnInfoPtr pScrn, Bool accel);
 
 void
 xorg_exa_close(ScrnInfoPtr pScrn);
diff --git a/src/gallium/state_trackers/xorg/xorg_xv.c b/src/gallium/state_trackers/xorg/xorg_xv.c
index 2b935c0f737..bb515a0f49b 100644
--- a/src/gallium/state_trackers/xorg/xorg_xv.c
+++ b/src/gallium/state_trackers/xorg/xorg_xv.c
@@ -73,10 +73,11 @@ static XF86VideoEncodingRec DummyEncoding[1] = {
    }
 };
 
-#define NUM_IMAGES 2
+#define NUM_IMAGES 3
 static XF86ImageRec Images[NUM_IMAGES] = {
    XVIMAGE_UYVY,
    XVIMAGE_YUY2,
+   XVIMAGE_YV12,
 };
 
 struct xorg_xv_port_priv {
@@ -166,9 +167,9 @@ create_component_texture(struct pipe_context *pipe,
    templ.target = PIPE_TEXTURE_2D;
    templ.format = PIPE_FORMAT_L8_UNORM;
    templ.last_level = 0;
-   templ.width[0] = width;
-   templ.height[0] = height;
-   templ.depth[0] = 1;
+   templ.width0 = width;
+   templ.height0 = height;
+   templ.depth0 = 1;
    pf_get_block(PIPE_FORMAT_L8_UNORM, &templ.block);
    templ.tex_usage = PIPE_TEXTURE_USAGE_SAMPLER;
 
@@ -182,18 +183,18 @@ check_yuv_textures(struct xorg_xv_port_priv *priv,  int width, int height)
 {
    struct pipe_texture **dst = priv->yuv[priv->current_set];
    if (!dst[0] ||
-       dst[0]->width[0] != width ||
-       dst[0]->height[0] != height) {
+       dst[0]->width0 != width ||
+       dst[0]->height0 != height) {
       pipe_texture_reference(&dst[0], NULL);
    }
    if (!dst[1] ||
-       dst[1]->width[0] != width ||
-       dst[1]->height[0] != height) {
+       dst[1]->width0 != width ||
+       dst[1]->height0 != height) {
       pipe_texture_reference(&dst[1], NULL);
    }
    if (!dst[2] ||
-       dst[2]->width[0] != width ||
-       dst[2]->height[0] != height) {
+       dst[2]->width0 != width ||
+       dst[2]->height0 != height) {
       pipe_texture_reference(&dst[2], NULL);
    }
 
@@ -256,7 +257,7 @@ copy_packed_data(ScrnInfoPtr pScrn,
    switch (id) {
    case FOURCC_YV12: {
       for (i = 0; i < w; ++i) {
-         for (j = 0; i < h; ++j) {
+         for (j = 0; j < h; ++j) {
             /*XXX use src? */
             y1  = buf[j*w + i];
             u   = buf[(j/2) * (w/2) + i/2 + y_array_size];
@@ -320,8 +321,8 @@ copy_packed_data(ScrnInfoPtr pScrn,
 static void
 setup_vs_video_constants(struct xorg_renderer *r, struct exa_pixmap_priv *dst)
 {
-   int width = dst->tex->width[0];
-   int height = dst->tex->height[0];
+   int width = dst->tex->width0;
+   int height = dst->tex->height0;
    const int param_bytes = 8 * sizeof(float);
    float vs_consts[8] = {
       2.f/width, 2.f/height, 1, 1,
@@ -446,6 +447,11 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
    int x, y, w, h;
    struct exa_pixmap_priv *dst = exaGetPixmapDriverPrivate(pPixmap);
 
+   if (dst && !dst->tex) {
+	xorg_exa_set_shared_usage(pPixmap);
+	pScrn->pScreen->ModifyPixmapHeader(pPixmap, 0, 0, 0, 0, 0, NULL);
+   }
+
    if (!dst || !dst->tex)
       XORG_FALLBACK("Xv destination %s", !dst ? "!dst" : "!dst->tex");
 
@@ -469,6 +475,9 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
    setup_vs_video_constants(pPriv->r, dst);
    setup_fs_video_constants(pPriv->r, hdtv);
 
+   exaMoveInPixmap(pPixmap);
+   DamageDamageRegion(&pPixmap->drawable, dstRegion);
+
    while (nbox--) {
       int box_x1 = pbox->x1;
       int box_y1 = pbox->y1;
@@ -476,8 +485,8 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
       int box_y2 = pbox->y2;
       float diff_x = (float)src_w / (float)dst_w;
       float diff_y = (float)src_h / (float)dst_h;
-      int offset_x = box_x1 - dstX;
-      int offset_y = box_y1 - dstY;
+      int offset_x = box_x1 - dstX + pPixmap->screen_x;
+      int offset_y = box_y1 - dstY + pPixmap->screen_y;
       int offset_w;
       int offset_h;
 
@@ -495,7 +504,7 @@ display_video(ScrnInfoPtr pScrn, struct xorg_xv_port_priv *pPriv, int id,
 
       pbox++;
    }
-   DamageDamageRegion(&pPixmap->drawable, dstRegion);
+   DamageRegionProcessPending(&pPixmap->drawable);
 
    return TRUE;
 }
@@ -537,6 +546,7 @@ put_image(ScrnInfoPtr pScrn,
    switch (id) {
    case FOURCC_UYVY:
    case FOURCC_YUY2:
+   case FOURCC_YV12:
    default:
       srcPitch = width << 1;
       break;
@@ -585,6 +595,7 @@ query_image_attributes(ScrnInfoPtr pScrn,
    switch (id) {
    case FOURCC_UYVY:
    case FOURCC_YUY2:
+   case FOURCC_YV12:
    default:
       size = *w << 1;
       if (pitches)
diff --git a/src/gallium/state_trackers/xorg/xvmc/surface.c b/src/gallium/state_trackers/xorg/xvmc/surface.c
index bf9038f356e..8cb73f48970 100644
--- a/src/gallium/state_trackers/xorg/xvmc/surface.c
+++ b/src/gallium/state_trackers/xorg/xvmc/surface.c
@@ -103,9 +103,9 @@ CreateOrResizeBackBuffer(struct pipe_video_context *vpipe, unsigned int width, u
    /* XXX: Needs to match the drawable's format? */
    template.format = PIPE_FORMAT_X8R8G8B8_UNORM;
    template.last_level = 0;
-   template.width[0] = width;
-   template.height[0] = height;
-   template.depth[0] = 1;
+   template.width0 = width;
+   template.height0 = height;
+   template.depth0 = 1;
    pf_get_block(template.format, &template.block);
    template.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
 
diff --git a/src/gallium/winsys/drm/SConscript b/src/gallium/winsys/drm/SConscript
index a9e9f2682a7..9f7b383d2d3 100644
--- a/src/gallium/winsys/drm/SConscript
+++ b/src/gallium/winsys/drm/SConscript
@@ -48,6 +48,11 @@ if env['dri']:
 	#	$(INSTALL) -d $(DRI_DRIVER_INSTALL_DIR)
 	#	$(INSTALL) -m 755 $(LIBNAME) $(DRI_DRIVER_INSTALL_DIR)
 
+	if 'vmware' in env['winsys']:
+		SConscript([
+			'vmware/SConscript',
+		])
+
 	if 'intel' in env['winsys']:
 		SConscript([
 			'intel/SConscript',
diff --git a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
index 317dc44d22f..d4978613247 100644
--- a/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
+++ b/src/gallium/winsys/drm/nouveau/drm/nouveau_drm_api.c
@@ -24,10 +24,10 @@ dri_surface_from_handle(struct drm_api *api, struct pipe_screen *pscreen,
 	tmpl.tex_usage = PIPE_TEXTURE_USAGE_PRIMARY;
 	tmpl.target = PIPE_TEXTURE_2D;
 	tmpl.last_level = 0;
-	tmpl.depth[0] = 1;
+	tmpl.depth0 = 1;
 	tmpl.format = format;
-	tmpl.width[0] = width;
-	tmpl.height[0] = height;
+	tmpl.width0 = width;
+	tmpl.height0 = height;
 	pf_get_block(tmpl.format, &tmpl.block);
 
 	pt = api->texture_from_shared_handle(api, pscreen, &tmpl,
diff --git a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
index 81cd9dc4fb1..74afffc9cfa 100644
--- a/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
+++ b/src/gallium/winsys/drm/radeon/core/radeon_buffer.c
@@ -317,9 +317,9 @@ struct pipe_surface *radeon_surface_from_handle(struct radeon_context *radeon_co
     memset(&tmpl, 0, sizeof(tmpl));
     tmpl.tex_usage = PIPE_TEXTURE_USAGE_DISPLAY_TARGET;
     tmpl.target = PIPE_TEXTURE_2D;
-    tmpl.width[0] = w;
-    tmpl.height[0] = h;
-    tmpl.depth[0] = 1;
+    tmpl.width0 = w;
+    tmpl.height0 = h;
+    tmpl.depth0 = 1;
     tmpl.format = format;
     pf_get_block(tmpl.format, &tmpl.block);
     tmpl.nblocksx[0] = pf_get_nblocksx(&tmpl.block, w);
diff --git a/src/gallium/winsys/drm/vmware/Makefile b/src/gallium/winsys/drm/vmware/Makefile
new file mode 100644
index 00000000000..2ae6dead5c1
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/Makefile
@@ -0,0 +1,12 @@
+# src/gallium/winsys/drm/vmware/Makefile
+TOP = ../../../../..
+include $(TOP)/configs/current
+
+SUBDIRS = core $(GALLIUM_STATE_TRACKERS_DIRS)
+
+default install clean:
+	@for dir in $(SUBDIRS) ; do \
+		if [ -d $$dir ] ; then \
+			(cd $$dir && $(MAKE) $@) || exit 1; \
+		fi \
+	done
diff --git a/src/gallium/winsys/drm/vmware/SConscript b/src/gallium/winsys/drm/vmware/SConscript
new file mode 100644
index 00000000000..06e6d5be9ca
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/SConscript
@@ -0,0 +1,11 @@
+Import('*')
+
+SConscript(['core/SConscript',])
+
+if 'mesa' in env['statetrackers']:
+
+    SConscript(['dri/SConscript'])
+
+if 'xorg' in env['statetrackers']:
+
+    SConscript(['xorg/SConscript'])
diff --git a/src/gallium/winsys/drm/vmware/core/Makefile b/src/gallium/winsys/drm/vmware/core/Makefile
new file mode 100644
index 00000000000..a52957c1a5b
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/Makefile
@@ -0,0 +1,35 @@
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = svgadrm
+
+C_SOURCES = \
+        vmw_buffer.c \
+        vmw_context.c  \
+        vmw_fence.c  \
+        vmw_screen.c  \
+        vmw_screen_dri.c  \
+        vmw_screen_ioctl.c  \
+        vmw_screen_pools.c  \
+        vmw_screen_svga.c  \
+        vmw_surface.c
+
+LIBRARY_INCLUDES = \
+       -I$(TOP)/src/gallium/drivers/svga \
+       -I$(TOP)/src/gallium/drivers/svga/include \
+       -I$(GALLIUM)/src/mesa/drivers/dri/common \
+       -I$(GALLIUM)/include \
+       -I$(GALLIUM)/include/GL/internal \
+       -I$(GALLIUM)/src/mesa \
+       -I$(GALLIUM)/src/mesa/main \
+       -I$(GALLIUM)/src/mesa/glapi \
+       -I$(GALLIUM)/src/egl/main \
+       -I$(GALLIUM)/src/egl/drivers/dri \
+       $(shell pkg-config libdrm --cflags-only-I)
+
+LIBRARY_DEFINES = \
+       -std=gnu99 -fvisibility=hidden \
+       -DHAVE_STDINT_H -D_FILE_OFFSET_BITS=64 \
+       $(shell pkg-config libdrm --cflags-only-other)
+
+include ../../../../Makefile.template
diff --git a/src/gallium/winsys/drm/vmware/core/SConscript b/src/gallium/winsys/drm/vmware/core/SConscript
new file mode 100644
index 00000000000..edaf9458bee
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/SConscript
@@ -0,0 +1,39 @@
+Import('*')
+
+env = env.Clone()
+
+if env['gcc']:
+	env.Append(CCFLAGS = ['-fvisibility=hidden'])
+	env.Append(CPPDEFINES = [
+		'HAVE_STDINT_H', 
+		'HAVE_SYS_TYPES_H',
+                '-D_FILE_OFFSET_BITS=64',
+	])
+	
+env.Prepend(CPPPATH = [
+	'include',
+        '#/src/gallium/drivers/svga',
+        '#/src/gallium/drivers/svga/include',
+])
+
+env.Append(CPPDEFINES = [
+])
+
+sources = [
+        'vmw_buffer.c',
+        'vmw_context.c',
+        'vmw_fence.c',
+        'vmw_screen.c',
+        'vmw_screen_dri.c',
+        'vmw_screen_ioctl.c',
+        'vmw_screen_pools.c',
+        'vmw_screen_svga.c',
+        'vmw_surface.c',
+]
+
+svgadrm = env.ConvenienceLibrary(
+	target = 'svgadrm',
+	source = sources,
+)
+
+Export('svgadrm')
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.c b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c
new file mode 100644
index 00000000000..b812fb59d39
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.c
@@ -0,0 +1,274 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * SVGA buffer manager for Guest Memory Regions (GMRs).
+ * 
+ * GMRs are used for pixel and vertex data upload/download to/from the virtual
+ * SVGA hardware. There is a limited number of GMRs available, and 
+ * creating/destroying them is also a slow operation so we must suballocate 
+ * them.
+ * 
+ * This file implements a pipebuffer library's buffer manager, so that we can
+ * use pipepbuffer's suballocation, fencing, and debugging facilities with GMRs. 
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#include "svga_cmd.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+#include "svga_winsys.h"
+
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+
+
+struct vmw_gmr_bufmgr;
+
+
+struct vmw_gmr_buffer
+{
+   struct pb_buffer base;
+   
+   struct vmw_gmr_bufmgr *mgr;
+   
+   struct vmw_region *region;
+   void *map;
+   
+#ifdef DEBUG
+   struct pipe_fence_handle *last_fence;
+#endif
+};
+
+
+extern const struct pb_vtbl vmw_gmr_buffer_vtbl;
+
+
+static INLINE struct vmw_gmr_buffer *
+vmw_gmr_buffer(struct pb_buffer *buf)
+{
+   assert(buf);
+   assert(buf->vtbl == &vmw_gmr_buffer_vtbl);
+   return (struct vmw_gmr_buffer *)buf;
+}
+
+
+struct vmw_gmr_bufmgr
+{
+   struct pb_manager base;
+   
+   struct vmw_winsys_screen *vws;
+};
+
+
+static INLINE struct vmw_gmr_bufmgr *
+vmw_gmr_bufmgr(struct pb_manager *mgr)
+{
+   assert(mgr);
+   return (struct vmw_gmr_bufmgr *)mgr;
+}
+
+
+static void
+vmw_gmr_buffer_destroy(struct pb_buffer *_buf)
+{
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+
+#ifdef DEBUG
+   if(buf->last_fence) {
+      struct svga_winsys_screen *sws = &buf->mgr->vws->base;
+      assert(sws->fence_signalled(sws, buf->last_fence, 0) == 0);
+   }
+#endif
+
+   vmw_ioctl_region_unmap(buf->region);
+   
+   vmw_ioctl_region_destroy(buf->region);
+
+   FREE(buf);
+}
+
+
+static void *
+vmw_gmr_buffer_map(struct pb_buffer *_buf,
+               unsigned flags)
+{
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+   return buf->map;
+}
+
+
+static void
+vmw_gmr_buffer_unmap(struct pb_buffer *_buf)
+{
+   /* Do nothing */
+   (void)_buf;
+}
+
+
+static void
+vmw_gmr_buffer_get_base_buffer(struct pb_buffer *buf,
+                           struct pb_buffer **base_buf,
+                           unsigned *offset)
+{
+   *base_buf = buf;
+   *offset = 0;
+}
+
+
+static enum pipe_error
+vmw_gmr_buffer_validate( struct pb_buffer *_buf, 
+                         struct pb_validate *vl,
+                         unsigned flags )
+{
+   /* Always pinned */
+   return PIPE_OK;
+}
+
+
+static void
+vmw_gmr_buffer_fence( struct pb_buffer *_buf, 
+                      struct pipe_fence_handle *fence )
+{
+   /* We don't need to do anything, as the pipebuffer library
+    * will take care of delaying the destruction of fenced buffers */  
+#ifdef DEBUG
+   struct vmw_gmr_buffer *buf = vmw_gmr_buffer(_buf);
+   if(fence)
+      buf->last_fence = fence;
+#endif
+}
+
+
+const struct pb_vtbl vmw_gmr_buffer_vtbl = {
+   vmw_gmr_buffer_destroy,
+   vmw_gmr_buffer_map,
+   vmw_gmr_buffer_unmap,
+   vmw_gmr_buffer_validate,
+   vmw_gmr_buffer_fence,
+   vmw_gmr_buffer_get_base_buffer
+};
+
+
+static struct pb_buffer *
+vmw_gmr_bufmgr_create_buffer(struct pb_manager *_mgr,
+                         pb_size size,
+                         const struct pb_desc *desc) 
+{
+   struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr);
+   struct vmw_winsys_screen *vws = mgr->vws;
+   struct vmw_gmr_buffer *buf;
+   
+   buf = CALLOC_STRUCT(vmw_gmr_buffer);
+   if(!buf)
+      goto error1;
+
+   pipe_reference_init(&buf->base.base.reference, 1);
+   buf->base.base.alignment = desc->alignment;
+   buf->base.base.usage = desc->usage;
+   buf->base.base.size = size;
+   buf->base.vtbl = &vmw_gmr_buffer_vtbl;
+   buf->mgr = mgr;
+
+   buf->region = vmw_ioctl_region_create(vws, size);
+   if(!buf->region)
+      goto error2;
+	 
+   buf->map = vmw_ioctl_region_map(buf->region);
+   if(!buf->map)
+      goto error3;
+
+   return &buf->base;
+
+error3:
+   vmw_ioctl_region_destroy(buf->region);
+error2:
+   FREE(buf);
+error1:
+   return NULL;
+}
+
+
+static void
+vmw_gmr_bufmgr_flush(struct pb_manager *mgr) 
+{
+   /* No-op */
+}
+
+
+static void
+vmw_gmr_bufmgr_destroy(struct pb_manager *_mgr) 
+{
+   struct vmw_gmr_bufmgr *mgr = vmw_gmr_bufmgr(_mgr);
+   FREE(mgr);
+}
+
+
+struct pb_manager *
+vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws) 
+{
+   struct vmw_gmr_bufmgr *mgr;
+   
+   mgr = CALLOC_STRUCT(vmw_gmr_bufmgr);
+   if(!mgr)
+      return NULL;
+
+   mgr->base.destroy = vmw_gmr_bufmgr_destroy;
+   mgr->base.create_buffer = vmw_gmr_bufmgr_create_buffer;
+   mgr->base.flush = vmw_gmr_bufmgr_flush;
+   
+   mgr->vws = vws;
+   
+   return &mgr->base;
+}
+
+
+boolean
+vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, 
+                          struct SVGAGuestPtr *ptr)
+{
+   struct pb_buffer *base_buf;
+   unsigned offset = 0;
+   struct vmw_gmr_buffer *gmr_buf;
+   
+   pb_get_base_buffer( buf, &base_buf, &offset );
+   
+   gmr_buf = vmw_gmr_buffer(base_buf);
+   if(!gmr_buf)
+      return FALSE;
+   
+   *ptr = vmw_ioctl_region_ptr(gmr_buf->region);
+   
+   ptr->offset += offset;
+   
+   return TRUE;
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_buffer.h b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h
new file mode 100644
index 00000000000..634bdcabd26
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_buffer.h
@@ -0,0 +1,65 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#ifndef VMW_BUFFER_H_
+#define VMW_BUFFER_H_
+
+
+#include "pipe/p_compiler.h"
+
+struct SVGAGuestPtr;
+struct pb_buffer;
+struct pb_manager;
+struct svga_winsys_buffer;
+struct svga_winsys_surface;
+struct vmw_winsys_screen;
+
+
+static INLINE struct pb_buffer *
+vmw_pb_buffer(struct svga_winsys_buffer *buffer)
+{
+   assert(buffer);
+   return (struct pb_buffer *)buffer;
+}
+
+
+static INLINE struct svga_winsys_buffer *
+vmw_svga_winsys_buffer(struct pb_buffer *buffer)
+{
+   assert(buffer);
+   return (struct svga_winsys_buffer *)buffer;
+}
+
+
+struct pb_manager *
+vmw_gmr_bufmgr_create(struct vmw_winsys_screen *vws);
+
+boolean
+vmw_gmr_bufmgr_region_ptr(struct pb_buffer *buf, 
+                          struct SVGAGuestPtr *ptr);
+
+
+#endif /* VMW_BUFFER_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.c b/src/gallium/winsys/drm/vmware/core/vmw_context.c
new file mode 100644
index 00000000000..b6997588de4
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_context.c
@@ -0,0 +1,297 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "svga_cmd.h"
+
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+#include "util/u_debug_stack.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_validate.h"
+
+#include "svga_winsys.h"
+#include "vmw_context.h"
+#include "vmw_screen.h"
+#include "vmw_buffer.h"
+#include "vmw_surface.h"
+#include "vmw_fence.h"
+
+#define VMW_COMMAND_SIZE (64*1024)
+#define VMW_SURFACE_RELOCS (1024)
+
+#define VMW_MUST_FLUSH_STACK 8
+
+struct vmw_svga_winsys_context
+{
+   struct svga_winsys_context base;
+
+   struct vmw_winsys_screen *vws;
+
+#ifdef DEBUG
+   boolean must_flush;
+   struct debug_stack_frame must_flush_stack[VMW_MUST_FLUSH_STACK];
+#endif
+
+   struct {
+      uint8_t buffer[VMW_COMMAND_SIZE];
+      uint32_t size;
+      uint32_t used;
+      uint32_t reserved;
+   } command;
+
+   struct {
+      struct vmw_svga_winsys_surface *handles[VMW_SURFACE_RELOCS];
+      uint32_t size;
+      uint32_t used;
+      uint32_t staged;
+      uint32_t reserved;
+   } surface;
+
+   struct pb_validate *validate;
+
+   uint32_t last_fence;
+};
+
+
+static INLINE struct vmw_svga_winsys_context *
+vmw_svga_winsys_context(struct svga_winsys_context *swc)
+{
+   assert(swc);
+   return (struct vmw_svga_winsys_context *)swc;
+}
+
+
+static enum pipe_error
+vmw_swc_flush(struct svga_winsys_context *swc,
+              struct pipe_fence_handle **pfence)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct pipe_fence_handle *fence = NULL;
+   unsigned i;
+   enum pipe_error ret;
+
+   ret = pb_validate_validate(vswc->validate);
+   assert(ret == PIPE_OK);
+   if(ret == PIPE_OK) {
+
+      if (vswc->command.used)
+         vmw_ioctl_command(vswc->vws,
+                           vswc->command.buffer,
+                           vswc->command.used,
+                           &vswc->last_fence);
+
+      fence = vmw_pipe_fence(vswc->last_fence);
+
+      pb_validate_fence(vswc->validate, fence);
+   }
+
+   vswc->command.used = 0;
+   vswc->command.reserved = 0;
+
+   for(i = 0; i < vswc->surface.used + vswc->surface.staged; ++i) {
+      struct vmw_svga_winsys_surface *vsurf =
+	 vswc->surface.handles[i];
+      p_atomic_dec(&vsurf->validated);
+      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+   }
+
+   vswc->surface.used = 0;
+   vswc->surface.reserved = 0;
+
+#ifdef DEBUG
+   vswc->must_flush = FALSE;
+#endif
+
+   if(pfence)
+      *pfence = fence;
+
+   return ret;
+}
+
+
+static void *
+vmw_swc_reserve(struct svga_winsys_context *swc,
+                uint32_t nr_bytes, uint32_t nr_relocs )
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+#ifdef DEBUG
+   /* Check if somebody forgot to check the previous failure */
+   if(vswc->must_flush) {
+      debug_printf("Forgot to flush:\n");
+      debug_backtrace_dump(vswc->must_flush_stack, VMW_MUST_FLUSH_STACK);
+      assert(!vswc->must_flush);
+   }
+#endif
+
+   assert(nr_bytes <= vswc->command.size);
+   if(nr_bytes > vswc->command.size)
+      return NULL;
+
+   if(vswc->command.used + nr_bytes > vswc->command.size ||
+      vswc->surface.used + nr_relocs > vswc->surface.size) {
+#ifdef DEBUG
+      vswc->must_flush = TRUE;
+      debug_backtrace_capture(vswc->must_flush_stack, 1,
+                              VMW_MUST_FLUSH_STACK);
+#endif
+      return NULL;
+   }
+
+   assert(vswc->command.used + nr_bytes <= vswc->command.size);
+   assert(vswc->surface.used + nr_relocs <= vswc->surface.size);
+
+   vswc->command.reserved = nr_bytes;
+   vswc->surface.reserved = nr_relocs;
+   vswc->surface.staged = 0;
+
+   return vswc->command.buffer + vswc->command.used;
+}
+
+
+static void
+vmw_swc_surface_relocation(struct svga_winsys_context *swc,
+                           uint32 *where,
+                           struct svga_winsys_surface *surface,
+                           unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct vmw_svga_winsys_surface *vsurf;
+
+   if(!surface) {
+      *where = SVGA3D_INVALID_ID;
+      return;
+   }
+
+   assert(vswc->surface.staged < vswc->surface.reserved);
+
+   vsurf = vmw_svga_winsys_surface(surface);
+
+   *where = vsurf->sid;
+
+   vmw_svga_winsys_surface_reference(&vswc->surface.handles[vswc->surface.used + vswc->surface.staged], vsurf);
+   p_atomic_inc(&vsurf->validated);
+   ++vswc->surface.staged;
+}
+
+
+static void
+vmw_swc_region_relocation(struct svga_winsys_context *swc,
+                          struct SVGAGuestPtr *where,
+                          struct svga_winsys_buffer *buffer,
+                          uint32 offset,
+                          unsigned flags)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   struct SVGAGuestPtr ptr;
+   struct pb_buffer *buf = vmw_pb_buffer(buffer);
+   enum pipe_error ret;
+
+   if(!vmw_gmr_bufmgr_region_ptr(buf, &ptr))
+      assert(0);
+
+   ptr.offset += offset;
+
+   *where = ptr;
+
+   ret = pb_validate_add_buffer(vswc->validate, buf, flags);
+   /* TODO: Update pipebuffer to reserve buffers and not fail here */
+   assert(ret == PIPE_OK);
+}
+
+
+static void
+vmw_swc_commit(struct svga_winsys_context *swc)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+
+   assert(vswc->command.reserved);
+   assert(vswc->command.used + vswc->command.reserved <= vswc->command.size);
+   vswc->command.used += vswc->command.reserved;
+   vswc->command.reserved = 0;
+
+   assert(vswc->surface.staged <= vswc->surface.reserved);
+   assert(vswc->surface.used + vswc->surface.staged <= vswc->surface.size);
+   vswc->surface.used += vswc->surface.staged;
+   vswc->surface.staged = 0;
+   vswc->surface.reserved = 0;
+}
+
+
+static void
+vmw_swc_destroy(struct svga_winsys_context *swc)
+{
+   struct vmw_svga_winsys_context *vswc = vmw_svga_winsys_context(swc);
+   unsigned i;
+   for(i = 0; i < vswc->surface.used; ++i) {
+      p_atomic_dec(&vswc->surface.handles[i]->validated);
+      vmw_svga_winsys_surface_reference(&vswc->surface.handles[i], NULL);
+   }
+   pb_validate_destroy(vswc->validate);
+   vmw_ioctl_context_destroy(vswc->vws, swc->cid);
+   FREE(vswc);
+}
+
+
+struct svga_winsys_context *
+vmw_svga_winsys_context_create(struct svga_winsys_screen *sws)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct vmw_svga_winsys_context *vswc;
+
+   vswc = CALLOC_STRUCT(vmw_svga_winsys_context);
+   if(!vswc)
+      return NULL;
+
+   vswc->base.destroy = vmw_swc_destroy;
+   vswc->base.reserve = vmw_swc_reserve;
+   vswc->base.surface_relocation = vmw_swc_surface_relocation;
+   vswc->base.region_relocation = vmw_swc_region_relocation;
+   vswc->base.commit = vmw_swc_commit;
+   vswc->base.flush = vmw_swc_flush;
+
+   vswc->base.cid = vmw_ioctl_context_create(vws);
+
+   vswc->vws = vws;
+
+   vswc->command.size = VMW_COMMAND_SIZE;
+   vswc->surface.size = VMW_SURFACE_RELOCS;
+
+   vswc->validate = pb_validate_create();
+   if(!vswc->validate) {
+      FREE(vswc);
+      return NULL;
+   }
+
+   return &vswc->base;
+}
+
+
+struct pipe_context *
+vmw_svga_context_create(struct pipe_screen *screen)
+{
+   return svga_context_create(screen);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_context.h b/src/gallium/winsys/drm/vmware/core/vmw_context.h
new file mode 100644
index 00000000000..305ce9b5bec
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_context.h
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_CONTEXT_H_
+#define VMW_CONTEXT_H_
+
+#include "pipe/p_compiler.h"
+
+struct svga_winsys_screen;
+struct svga_winsys_context;
+struct pipe_context;
+struct pipe_screen;
+
+#define VMW_DEBUG 0
+
+#if VMW_DEBUG
+#define vmw_printf debug_printf
+#define VMW_FUNC  debug_printf("%s\n", __FUNCTION__)
+#else
+#define VMW_FUNC
+#define vmw_printf(...)
+#endif
+
+
+struct svga_winsys_context *
+vmw_svga_winsys_context_create(struct svga_winsys_screen *sws);
+
+struct pipe_context *
+vmw_svga_context_create(struct pipe_screen *screen);
+
+
+#endif /* VMW_CONTEXT_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.c b/src/gallium/winsys/drm/vmware/core/vmw_fence.c
new file mode 100644
index 00000000000..873dd51166c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.c
@@ -0,0 +1,108 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer_fenced.h"
+
+#include "vmw_screen.h"
+#include "vmw_fence.h"
+
+
+
+struct vmw_fence_ops 
+{
+   struct pb_fence_ops base;
+
+   struct vmw_winsys_screen *vws;
+};
+
+
+static INLINE struct vmw_fence_ops *
+vmw_fence_ops(struct pb_fence_ops *ops)
+{
+   assert(ops);
+   return (struct vmw_fence_ops *)ops;
+}
+
+
+static void
+vmw_fence_ops_fence_reference(struct pb_fence_ops *ops,
+                              struct pipe_fence_handle **ptr,
+                              struct pipe_fence_handle *fence)
+{
+   *ptr = fence;
+}
+
+
+static int
+vmw_fence_ops_fence_signalled(struct pb_fence_ops *ops,
+                              struct pipe_fence_handle *fence,
+                              unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws;
+   (void)flag;
+   return vmw_ioctl_fence_signalled(vws, vmw_fence(fence));
+}
+
+
+static int
+vmw_fence_ops_fence_finish(struct pb_fence_ops *ops,
+                           struct pipe_fence_handle *fence,
+                           unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_fence_ops(ops)->vws;
+   (void)flag;
+   return vmw_ioctl_fence_finish(vws, vmw_fence(fence));
+}
+
+
+static void
+vmw_fence_ops_destroy(struct pb_fence_ops *ops)
+{
+   FREE(ops);
+}
+
+
+struct pb_fence_ops *
+vmw_fence_ops_create(struct vmw_winsys_screen *vws) 
+{
+   struct vmw_fence_ops *ops;
+
+   ops = CALLOC_STRUCT(vmw_fence_ops);
+   if(!ops)
+      return NULL;
+
+   ops->base.destroy = &vmw_fence_ops_destroy;
+   ops->base.fence_reference = &vmw_fence_ops_fence_reference;
+   ops->base.fence_signalled = &vmw_fence_ops_fence_signalled;
+   ops->base.fence_finish = &vmw_fence_ops_fence_finish;
+
+   ops->vws = vws;
+
+   return &ops->base;
+}
+
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_fence.h b/src/gallium/winsys/drm/vmware/core/vmw_fence.h
new file mode 100644
index 00000000000..5357b4f61de
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_fence.h
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#ifndef VMW_FENCE_H_
+#define VMW_FENCE_H_
+
+
+#include "pipe/p_compiler.h"
+
+
+struct pipe_fence_handle;
+struct pb_fence_ops;
+struct vmw_winsys_screen;
+
+
+/** Cast from a pipe_fence_handle pointer into a SVGA fence */
+static INLINE uint32_t
+vmw_fence( struct pipe_fence_handle *fence )
+{
+   return (uint32_t)(uintptr_t)fence;
+}
+
+
+/** Cast from a SVGA fence number to pipe_fence_handle pointer */
+static INLINE struct pipe_fence_handle *
+vmw_pipe_fence( uint32_t fence )
+{
+   return (struct pipe_fence_handle *)(uintptr_t)fence;
+}
+
+
+struct pb_fence_ops *
+vmw_fence_ops_create(struct vmw_winsys_screen *vws); 
+
+
+#endif /* VMW_FENCE_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.c b/src/gallium/winsys/drm/vmware/core/vmw_screen.c
new file mode 100644
index 00000000000..911eec5e254
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.c
@@ -0,0 +1,74 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "vmw_screen.h"
+
+#include "vmw_context.h"
+
+#include "util/u_memory.h"
+#include "pipe/p_compiler.h"
+
+
+/* Called from vmw_drm_create_screen(), creates and initializes the
+ * vmw_winsys_screen structure, which is the main entity in this
+ * module.
+ */
+struct vmw_winsys_screen *
+vmw_winsys_create( int fd )
+{
+   struct vmw_winsys_screen *vws = CALLOC_STRUCT(vmw_winsys_screen);
+   if (!vws)
+      goto out_no_vws;
+
+   vws->ioctl.drm_fd = fd;
+
+   if (!vmw_ioctl_init(vws))
+      goto out_no_ioctl;
+
+   if(!vmw_pools_init(vws))
+      goto out_no_pools;
+
+   if (!vmw_winsys_screen_init_svga(vws))
+      goto out_no_svga;
+
+   return vws;
+out_no_svga:
+   vmw_pools_cleanup(vws);
+out_no_pools:
+   vmw_ioctl_cleanup(vws);
+out_no_ioctl:
+   FREE(vws);
+out_no_vws:
+   return NULL;
+}
+
+void
+vmw_winsys_destroy(struct vmw_winsys_screen *vws)
+{
+   vmw_pools_cleanup(vws);
+   vmw_ioctl_cleanup(vws);
+   FREE(vws);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen.h b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
new file mode 100644
index 00000000000..a875107370c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen.h
@@ -0,0 +1,134 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Common definitions for the VMware SVGA winsys.
+ *
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_SCREEN_H_
+#define VMW_SCREEN_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
+
+#include "svga_winsys.h"
+
+struct pb_manager;
+struct vmw_region;
+
+
+struct vmw_winsys_screen
+{
+   struct svga_winsys_screen base;
+
+   struct {
+      volatile uint32_t *fifo_map;
+      uint64_t last_fence;
+      int drm_fd;
+   } ioctl;
+
+   struct {
+      struct pb_manager *gmr;
+      struct pb_manager *gmr_mm;
+      struct pb_manager *gmr_fenced;
+   } pools;
+};
+
+
+static INLINE struct vmw_winsys_screen *
+vmw_winsys_screen(struct svga_winsys_screen *base)
+{
+   return (struct vmw_winsys_screen *)base;
+}
+
+/*  */
+uint32
+vmw_ioctl_context_create(struct vmw_winsys_screen *vws);
+
+void
+vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws,
+                          uint32 cid);
+
+uint32
+vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
+                              SVGA3dSurfaceFlags flags,
+                              SVGA3dSurfaceFormat format,
+                              SVGA3dSize size,
+                              uint32 numFaces,
+                              uint32 numMipLevels);
+
+void
+vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws,
+                          uint32 sid);
+
+void
+vmw_ioctl_command(struct vmw_winsys_screen *vws,
+                       void *commands,
+                       uint32_t size,
+                       uint32_t *fence);
+
+struct vmw_region *
+vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size);
+
+void
+vmw_ioctl_region_destroy(struct vmw_region *region);
+
+struct SVGAGuestPtr
+vmw_ioctl_region_ptr(struct vmw_region *region);
+
+void *
+vmw_ioctl_region_map(struct vmw_region *region);
+void
+vmw_ioctl_region_unmap(struct vmw_region *region);
+
+
+int
+vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
+                       uint32_t fence);
+
+int
+vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
+                          uint32_t fence);
+
+
+/* Initialize parts of vmw_winsys_screen at startup:
+ */
+boolean vmw_ioctl_init(struct vmw_winsys_screen *vws);
+boolean vmw_pools_init(struct vmw_winsys_screen *vws);
+boolean vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws);
+
+void vmw_ioctl_cleanup(struct vmw_winsys_screen *vws);
+void vmw_pools_cleanup(struct vmw_winsys_screen *vws);
+
+struct vmw_winsys_screen *vmw_winsys_create(int fd);
+void vmw_winsys_destroy(struct vmw_winsys_screen *sws);
+
+
+#endif /* VMW_SCREEN_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
new file mode 100644
index 00000000000..5995eee34ba
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_dri.c
@@ -0,0 +1,371 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_inlines.h"
+#include "util/u_memory.h"
+#include "vmw_screen.h"
+
+#include "trace/tr_drm.h"
+
+#include "vmw_screen.h"
+#include "vmw_surface.h"
+#include "vmw_fence.h"
+#include "vmw_context.h"
+
+#include <state_tracker/dri1_api.h>
+#include <state_tracker/drm_api.h>
+#include <vmwgfx_drm.h>
+#include <xf86drm.h>
+
+#include <stdio.h>
+
+static struct dri1_api dri1_api_hooks;
+static struct dri1_api_version ddx_required = { 0, 1, 0 };
+static struct dri1_api_version ddx_compat = { 0, 0, 0 };
+static struct dri1_api_version dri_required = { 4, 0, 0 };
+static struct dri1_api_version dri_compat = { 4, 0, 0 };
+static struct dri1_api_version drm_required = { 0, 1, 0 };
+static struct dri1_api_version drm_compat = { 0, 0, 0 };
+
+static boolean
+vmw_dri1_check_version(const struct dri1_api_version *cur,
+		       const struct dri1_api_version *required,
+		       const struct dri1_api_version *compat,
+		       const char component[])
+{
+   if (cur->major > required->major && cur->major <= compat->major)
+      return TRUE;
+   if (cur->major == required->major && cur->minor >= required->minor)
+      return TRUE;
+
+   fprintf(stderr, "%s version failure.\n", component);
+   fprintf(stderr, "%s version is %d.%d.%d and this driver can only work\n"
+	   "with versions %d.%d.x through %d.x.x.\n",
+	   component,
+	   cur->major,
+	   cur->minor,
+	   cur->patch_level, required->major, required->minor, compat->major);
+   return FALSE;
+}
+
+/* This is actually the entrypoint to the entire driver, called by the
+ * libGL (or EGL, or ...) code via the drm_api_hooks table at the
+ * bottom of the file.
+ */
+static struct pipe_screen *
+vmw_drm_create_screen(struct drm_api *drm_api,
+                      int fd,
+                      struct drm_create_screen_arg *arg)
+{
+   struct vmw_winsys_screen *vws;
+   struct pipe_screen *screen;
+   struct dri1_create_screen_arg *dri1;
+
+   if (arg != NULL) {
+      switch (arg->mode) {
+      case DRM_CREATE_NORMAL:
+	 break;
+      case DRM_CREATE_DRI1:
+	 dri1 = (struct dri1_create_screen_arg *)arg;
+	 if (!vmw_dri1_check_version(&dri1->ddx_version, &ddx_required,
+				     &ddx_compat, "ddx - driver api"))
+	    return NULL;
+	 if (!vmw_dri1_check_version(&dri1->dri_version, &dri_required,
+				     &dri_compat, "dri info"))
+	    return NULL;
+	 if (!vmw_dri1_check_version(&dri1->drm_version, &drm_required,
+				     &drm_compat, "vmwgfx drm driver"))
+	    return NULL;
+	 dri1->api = &dri1_api_hooks;
+	 break;
+      default:
+	 return NULL;
+      }
+   }
+
+   vws = vmw_winsys_create( fd );
+   if (!vws)
+      goto out_no_vws;
+
+   screen = svga_screen_create( &vws->base );
+   if (!screen)
+      goto out_no_screen;
+
+   return screen;
+
+   /* Failure cases:
+    */
+out_no_screen:
+   vmw_winsys_destroy( vws );
+
+out_no_vws:
+   return NULL;
+}
+
+static INLINE boolean
+vmw_dri1_intersect_src_bbox(struct drm_clip_rect *dst,
+			    int dst_x,
+			    int dst_y,
+			    const struct drm_clip_rect *src,
+			    const struct drm_clip_rect *bbox)
+{
+   int xy1;
+   int xy2;
+
+   xy1 = ((int)src->x1 > (int)bbox->x1 + dst_x) ? src->x1 :
+      (int)bbox->x1 + dst_x;
+   xy2 = ((int)src->x2 < (int)bbox->x2 + dst_x) ? src->x2 :
+      (int)bbox->x2 + dst_x;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->x1 = xy1;
+   dst->x2 = xy2;
+
+   xy1 = ((int)src->y1 > (int)bbox->y1 + dst_y) ? src->y1 :
+      (int)bbox->y1 + dst_y;
+   xy2 = ((int)src->y2 < (int)bbox->y2 + dst_y) ? src->y2 :
+      (int)bbox->y2 + dst_y;
+   if (xy1 >= xy2 || xy1 < 0)
+      return FALSE;
+
+   dst->y1 = xy1;
+   dst->y2 = xy2;
+   return TRUE;
+}
+
+/**
+ * No fancy get-surface-from-sarea stuff here.
+ * Just use the present blit.
+ */
+
+static void
+vmw_dri1_present_locked(struct pipe_context *locked_pipe,
+			struct pipe_surface *surf,
+			const struct drm_clip_rect *rect,
+			unsigned int num_clip,
+			int x_draw, int y_draw,
+			const struct drm_clip_rect *bbox,
+			struct pipe_fence_handle **p_fence)
+{
+   struct svga_winsys_surface *srf =
+      svga_screen_texture_get_winsys_surface(surf->texture);
+   struct vmw_svga_winsys_surface *vsrf = vmw_svga_winsys_surface(srf);
+   struct vmw_winsys_screen *vws =
+      vmw_winsys_screen(svga_winsys_screen(locked_pipe->screen));
+   struct drm_clip_rect clip;
+   int i;
+   struct
+   {
+      SVGA3dCmdHeader header;
+      SVGA3dCmdPresent body;
+      SVGA3dCopyRect rect;
+   } cmd;
+   boolean visible = FALSE;
+   uint32_t fence_seq = 0;
+
+   VMW_FUNC;
+   cmd.header.id = SVGA_3D_CMD_PRESENT;
+   cmd.header.size = sizeof cmd.body + sizeof cmd.rect;
+   cmd.body.sid = vsrf->sid;
+
+   for (i = 0; i < num_clip; ++i) {
+      if (!vmw_dri1_intersect_src_bbox(&clip, x_draw, y_draw, rect++, bbox))
+	 continue;
+
+      cmd.rect.x = clip.x1;
+      cmd.rect.y = clip.y1;
+      cmd.rect.w = clip.x2 - clip.x1;
+      cmd.rect.h = clip.y2 - clip.y1;
+      cmd.rect.srcx = (int)clip.x1 - x_draw;
+      cmd.rect.srcy = (int)clip.y1 - y_draw;
+
+      vmw_printf("%s: Clip %d x %d y %d w %d h %d srcx %d srcy %d\n",
+		   __FUNCTION__,
+		   i,
+		   cmd.rect.x,
+		   cmd.rect.y,
+		   cmd.rect.w, cmd.rect.h, cmd.rect.srcx, cmd.rect.srcy);
+
+      vmw_ioctl_command(vws, &cmd, sizeof cmd.header + cmd.header.size,
+                        &fence_seq);
+      visible = TRUE;
+   }
+
+   *p_fence = (visible) ? vmw_pipe_fence(fence_seq) : NULL;
+   vmw_svga_winsys_surface_reference(&vsrf, NULL);
+}
+
+/**
+ * FIXME: We'd probably want to cache these buffers in the
+ * screen, based on handle.
+ */
+
+static struct pipe_buffer *
+vmw_drm_buffer_from_handle(struct drm_api *drm_api,
+                           struct pipe_screen *screen,
+			   const char *name,
+			   unsigned handle)
+{
+    struct vmw_svga_winsys_surface *vsrf;
+    struct svga_winsys_surface *ssrf;
+    struct vmw_winsys_screen *vws =
+	vmw_winsys_screen(svga_winsys_screen(screen));
+    struct pipe_buffer *buf;
+    union drm_vmw_surface_reference_arg arg;
+    struct drm_vmw_surface_arg *req = &arg.req;
+    struct drm_vmw_surface_create_req *rep = &arg.rep;
+    int ret;
+    int i;
+
+    /**
+     * The vmware device specific handle is the hardware SID.
+     * FIXME: We probably want to move this to the ioctl implementations.
+     */
+
+    memset(&arg, 0, sizeof(arg));
+    req->sid = handle;
+
+    ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_REF_SURFACE,
+			      &arg, sizeof(arg));
+
+    if (ret) {
+	fprintf(stderr, "Failed referencing shared surface. SID %d.\n"
+		"Error %d (%s).\n",
+		handle, ret, strerror(-ret));
+	return NULL;
+    }
+
+    if (rep->mip_levels[0] != 1) {
+	fprintf(stderr, "Incorrect number of mipmap levels on shared surface."
+		" SID %d, levels %d\n",
+		handle, rep->mip_levels[0]);
+	goto out_mip;
+    }
+
+    for (i=1; i < DRM_VMW_MAX_SURFACE_FACES; ++i) {
+	if (rep->mip_levels[i] != 0) {
+	    fprintf(stderr, "Incorrect number of faces levels on shared surface."
+		    " SID %d, face %d present.\n",
+		    handle, i);
+	    goto out_mip;
+	}
+    }
+
+    vsrf = CALLOC_STRUCT(vmw_svga_winsys_surface);
+    if (!vsrf)
+	goto out_mip;
+
+    pipe_reference_init(&vsrf->refcnt, 1);
+    p_atomic_set(&vsrf->validated, 0);
+    vsrf->sid = handle;
+    ssrf = svga_winsys_surface(vsrf);
+    buf = svga_screen_buffer_wrap_surface(screen, rep->format, ssrf);
+    if (!buf)
+	vmw_svga_winsys_surface_reference(&vsrf, NULL);
+
+    return buf;
+  out_mip:
+    vmw_ioctl_surface_destroy(vws, handle);
+    return NULL;
+}
+
+static struct pipe_texture *
+vmw_drm_texture_from_handle(struct drm_api *drm_api,
+			    struct pipe_screen *screen,
+			    struct pipe_texture *templat,
+			    const char *name,
+			    unsigned stride,
+			    unsigned handle)
+{
+    struct pipe_buffer *buffer;
+    buffer = vmw_drm_buffer_from_handle(drm_api, screen, name, handle);
+
+    if (!buffer)
+	return NULL;
+
+    return screen->texture_blanket(screen, templat, &stride, buffer);
+}
+
+static boolean
+vmw_drm_handle_from_buffer(struct drm_api *drm_api,
+                           struct pipe_screen *screen,
+			   struct pipe_buffer *buffer,
+			   unsigned *handle)
+{
+    struct svga_winsys_surface *surface =
+	svga_screen_buffer_get_winsys_surface(buffer);
+    struct vmw_svga_winsys_surface *vsrf;
+
+    if (!surface)
+	return FALSE;
+
+    vsrf = vmw_svga_winsys_surface(surface);
+    *handle = vsrf->sid;
+    vmw_svga_winsys_surface_reference(&vsrf, NULL);
+    return TRUE;
+}
+
+static boolean
+vmw_drm_handle_from_texture(struct drm_api *drm_api,
+			    struct pipe_screen *screen,
+			    struct pipe_texture *texture,
+			    unsigned *stride,
+			    unsigned *handle)
+{
+    struct pipe_buffer *buffer;
+
+    if (!svga_screen_buffer_from_texture(texture, &buffer, stride))
+	return FALSE;
+
+    return vmw_drm_handle_from_buffer(drm_api, screen, buffer, handle);
+}
+
+static struct pipe_context*
+vmw_drm_create_context(struct drm_api *drm_api,
+                       struct pipe_screen *screen)
+{
+   return vmw_svga_context_create(screen);
+}
+
+static struct dri1_api dri1_api_hooks = {
+   .front_srf_locked = NULL,
+   .present_locked = vmw_dri1_present_locked
+};
+
+static struct drm_api vmw_drm_api_hooks = {
+   .create_screen = vmw_drm_create_screen,
+   .create_context = vmw_drm_create_context,
+   .texture_from_shared_handle = vmw_drm_texture_from_handle,
+   .shared_handle_from_texture = vmw_drm_handle_from_texture,
+   .local_handle_from_texture = vmw_drm_handle_from_texture,
+};
+
+struct drm_api* drm_api_create()
+{
+   return trace_drm_create(&vmw_drm_api_hooks);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
new file mode 100644
index 00000000000..51e455f9254
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_ioctl.c
@@ -0,0 +1,504 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ *
+ * Wrappers for DRM ioctl functionlaity used by the rest of the vmw
+ * drm winsys.
+ *
+ * Based on svgaicd_escape.c
+ */
+
+
+#include "svga_cmd.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+#include "svgadump/svga_dump.h"
+#include "vmw_screen.h"
+#include "vmw_context.h"
+#include "xf86drm.h"
+#include "vmwgfx_drm.h"
+
+#include <sys/mman.h>
+#include <errno.h>
+#include <unistd.h>
+
+struct vmw_region
+{
+   SVGAGuestPtr ptr;
+   uint32_t handle;
+   uint64_t map_handle;
+   void *data;
+   uint32_t map_count;
+   int drm_fd;
+   uint32_t size;
+};
+
+static void
+vmw_check_last_cmd(struct vmw_winsys_screen *vws)
+{
+   static uint32_t buffer[16384];
+   struct drm_vmw_fifo_debug_arg arg;
+   int ret;
+
+   return;
+   memset(&arg, 0, sizeof(arg));
+   arg.debug_buffer = (unsigned long)buffer;
+   arg.debug_buffer_size = 65536;
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FIFO_DEBUG,
+			     &arg, sizeof(arg));
+
+   if (ret) {
+      debug_printf("%s Ioctl error: \"%s\".\n", __FUNCTION__, strerror(-ret));
+      return;
+   }
+
+   if (arg.did_not_fit) {
+      debug_printf("%s Command did not fit completely.\n", __FUNCTION__);
+   }
+
+   svga_dump_commands(buffer, arg.used_size);
+}
+
+static void
+vmw_ioctl_fifo_unmap(struct vmw_winsys_screen *vws, void *mapping)
+{
+   VMW_FUNC;
+   (void)munmap(mapping, getpagesize());
+}
+
+
+static void *
+vmw_ioctl_fifo_map(struct vmw_winsys_screen *vws,
+                   uint32_t fifo_offset )
+{
+   void *map;
+
+   VMW_FUNC;
+
+   map = mmap(NULL, getpagesize(), PROT_READ, MAP_SHARED,
+	      vws->ioctl.drm_fd, fifo_offset);
+
+   if (map == MAP_FAILED) {
+      debug_printf("Map failed %s\n", strerror(errno));
+      return NULL;
+   }
+
+   vmw_printf("Fifo (min) is 0x%08x\n", ((uint32_t *) map)[SVGA_FIFO_MIN]);
+
+   return map;
+}
+
+uint32
+vmw_ioctl_context_create(struct vmw_winsys_screen *vws)
+{
+   struct drm_vmw_context_arg c_arg;
+   int ret;
+
+   VMW_FUNC;
+
+   ret = drmCommandRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_CONTEXT,
+			&c_arg, sizeof(c_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_check_last_cmd(vws);
+   vmw_printf("Context id is %d\n", c_arg.cid);
+
+   return c_arg.cid;
+}
+
+void
+vmw_ioctl_context_destroy(struct vmw_winsys_screen *vws, uint32 cid)
+{
+   struct drm_vmw_context_arg c_arg;
+
+   VMW_FUNC;
+
+   memset(&c_arg, 0, sizeof(c_arg));
+   c_arg.cid = cid;
+
+   (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_CONTEXT,
+			 &c_arg, sizeof(c_arg));
+
+   vmw_check_last_cmd(vws);
+}
+
+uint32
+vmw_ioctl_surface_create(struct vmw_winsys_screen *vws,
+			      SVGA3dSurfaceFlags flags,
+			      SVGA3dSurfaceFormat format,
+			      SVGA3dSize size,
+			      uint32_t numFaces, uint32_t numMipLevels)
+{
+   union drm_vmw_surface_create_arg s_arg;
+   struct drm_vmw_surface_create_req *req = &s_arg.req;
+   struct drm_vmw_surface_arg *rep = &s_arg.rep;
+   struct drm_vmw_size sizes[DRM_VMW_MAX_SURFACE_FACES*
+			     DRM_VMW_MAX_MIP_LEVELS];
+   struct drm_vmw_size *cur_size;
+   uint32_t iFace;
+   uint32_t iMipLevel;
+   int ret;
+
+   vmw_printf("%s flags %d format %d\n", __FUNCTION__, flags, format);
+
+   memset(&s_arg, 0, sizeof(s_arg));
+   req->flags = (uint32_t) flags;
+   req->format = (uint32_t) format;
+   req->shareable = 1;
+
+   assert(numFaces * numMipLevels < DRM_VMW_MAX_SURFACE_FACES*
+	  DRM_VMW_MAX_MIP_LEVELS);
+   cur_size = sizes;
+   for (iFace = 0; iFace < numFaces; ++iFace) {
+      SVGA3dSize mipSize = size;
+
+      req->mip_levels[iFace] = numMipLevels;
+      for (iMipLevel = 0; iMipLevel < numMipLevels; ++iMipLevel) {
+	 cur_size->width = mipSize.width;
+	 cur_size->height = mipSize.height;
+	 cur_size->depth = mipSize.depth;
+	 mipSize.width = MAX2(mipSize.width >> 1, 1);
+	 mipSize.height = MAX2(mipSize.height >> 1, 1);
+	 mipSize.depth = MAX2(mipSize.depth >> 1, 1);
+	 cur_size++;
+      }
+   }
+   for (iFace = numFaces; iFace < SVGA3D_MAX_SURFACE_FACES; ++iFace) {
+      req->mip_levels[iFace] = 0;
+   }
+
+   req->size_addr = (unsigned long)&sizes;
+
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_CREATE_SURFACE,
+			     &s_arg, sizeof(s_arg));
+
+   if (ret)
+      return -1;
+
+   vmw_printf("Surface id is %d\n", rep->sid);
+   vmw_check_last_cmd(vws);
+
+   return rep->sid;
+}
+
+void
+vmw_ioctl_surface_destroy(struct vmw_winsys_screen *vws, uint32 sid)
+{
+   struct drm_vmw_surface_arg s_arg;
+
+   VMW_FUNC;
+
+   memset(&s_arg, 0, sizeof(s_arg));
+   s_arg.sid = sid;
+
+   (void)drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_UNREF_SURFACE,
+			 &s_arg, sizeof(s_arg));
+   vmw_check_last_cmd(vws);
+
+}
+
+void
+vmw_ioctl_command(struct vmw_winsys_screen *vws, void *commands, uint32_t size,
+		       uint32_t * pfence)
+{
+   struct drm_vmw_execbuf_arg arg;
+   struct drm_vmw_fence_rep rep;
+   int ret;
+
+#ifdef DEBUG
+   {
+      static boolean firsttime = TRUE;
+      static boolean debug = FALSE;
+      static boolean skip = FALSE;
+      if (firsttime) {
+         debug = debug_get_bool_option("SVGA_DUMP_CMD", FALSE);
+         skip = debug_get_bool_option("SVGA_SKIP_CMD", FALSE);
+      }
+      if (debug) {
+         VMW_FUNC;
+         svga_dump_commands(commands, size);
+      }
+      firsttime = FALSE;
+      if (skip) {
+         size = 0;
+      }
+   }
+#endif
+
+   memset(&arg, 0, sizeof(arg));
+   memset(&rep, 0, sizeof(rep));
+
+   rep.error = -EFAULT;
+   arg.fence_rep = (unsigned long)&rep;
+   arg.commands = (unsigned long)commands;
+   arg.command_size = size;
+
+   do {
+       ret = drmCommandWrite(vws->ioctl.drm_fd, DRM_VMW_EXECBUF, &arg, sizeof(arg));
+   } while(ret == -ERESTART);
+   if (ret) {
+      debug_printf("%s error %s.\n", __FUNCTION__, strerror(-ret));
+   }
+   if (rep.error) {
+
+      /*
+       * Kernel has synced and put the last fence sequence in the FIFO
+       * register.
+       */
+
+      if (rep.error == -EFAULT)
+	 rep.fence_seq = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+
+      debug_printf("%s Fence error %s.\n", __FUNCTION__,
+		   strerror(-rep.error));
+   }
+
+   vws->ioctl.last_fence = rep.fence_seq;
+
+   if (pfence)
+      *pfence = rep.fence_seq;
+   vmw_check_last_cmd(vws);
+
+}
+
+
+struct vmw_region *
+vmw_ioctl_region_create(struct vmw_winsys_screen *vws, uint32_t size)
+{
+   struct vmw_region *region;
+   union drm_vmw_alloc_dmabuf_arg arg;
+   struct drm_vmw_alloc_dmabuf_req *req = &arg.req;
+   struct drm_vmw_dmabuf_rep *rep = &arg.rep;
+   int ret;
+
+   vmw_printf("%s: size = %u\n", __FUNCTION__, size);
+
+   region = CALLOC_STRUCT(vmw_region);
+   if (!region)
+      goto out_err1;
+
+   memset(&arg, 0, sizeof(arg));
+   req->size = size;
+   do {
+      ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_ALLOC_DMABUF, &arg,
+				sizeof(arg));
+   } while (ret == -ERESTART);
+
+   if (ret) {
+      debug_printf("IOCTL failed %d: %s\n", ret, strerror(-ret));
+      goto out_err1;
+   }
+
+   region->ptr.gmrId = rep->cur_gmr_id;
+   region->ptr.offset = rep->cur_gmr_offset;
+   region->data = NULL;
+   region->handle = rep->handle;
+   region->map_handle = rep->map_handle;
+   region->map_count = 0;
+   region->size = size;
+   region->drm_fd = vws->ioctl.drm_fd;
+
+   vmw_printf("   gmrId = %u, offset = %u\n",
+              region->ptr.gmrId, region->ptr.offset);
+
+   return region;
+
+ out_err1:
+   FREE(region);
+   return NULL;
+}
+
+void
+vmw_ioctl_region_destroy(struct vmw_region *region)
+{
+   struct drm_vmw_unref_dmabuf_arg arg;
+
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+
+   if (region->data) {
+      munmap(region->data, region->size);
+      region->data = NULL;
+   }
+
+   memset(&arg, 0, sizeof(arg));
+   arg.handle = region->handle;
+   drmCommandWrite(region->drm_fd, DRM_VMW_UNREF_DMABUF, &arg, sizeof(arg));
+
+   FREE(region);
+}
+
+SVGAGuestPtr
+vmw_ioctl_region_ptr(struct vmw_region *region)
+{
+   return region->ptr;
+}
+
+void *
+vmw_ioctl_region_map(struct vmw_region *region)
+{
+   void *map;
+
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+
+   if (region->data == NULL) {
+      map = mmap(NULL, region->size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		 region->drm_fd, region->map_handle);
+      if (map == MAP_FAILED) {
+	 debug_printf("%s: Map failed.\n", __FUNCTION__);
+	 return NULL;
+      }
+
+      region->data = map;
+   }
+
+   ++region->map_count;
+
+   return region->data;
+}
+
+void
+vmw_ioctl_region_unmap(struct vmw_region *region)
+{
+   vmw_printf("%s: gmrId = %u, offset = %u\n", __FUNCTION__,
+              region->ptr.gmrId, region->ptr.offset);
+   --region->map_count;
+}
+
+
+int
+vmw_ioctl_fence_signalled(struct vmw_winsys_screen *vws,
+                          uint32_t fence)
+{
+   uint32_t expected;
+   uint32_t current;
+   
+   assert(fence);
+   if(!fence)
+      return 0;
+   
+   expected = fence;
+   current = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+   
+   if ((int32)(current - expected) >= 0)
+      return 0; /* fence passed */
+   else
+      return -1;
+}
+
+
+static void
+vmw_ioctl_sync(struct vmw_winsys_screen *vws, 
+		    uint32_t fence)
+{
+   uint32_t cur_fence;
+   struct drm_vmw_fence_wait_arg arg;
+   int ret;
+
+   vmw_printf("%s: fence = %lu\n", __FUNCTION__,
+              (unsigned long)fence);
+
+   cur_fence = vws->ioctl.fifo_map[SVGA_FIFO_FENCE];
+   vmw_printf("%s: Fence id read is 0x%08x\n", __FUNCTION__,
+              (unsigned int)cur_fence);
+
+   if ((cur_fence - fence) < (1 << 24))
+      return;
+
+   memset(&arg, 0, sizeof(arg));
+   arg.sequence = fence;
+
+   do {
+       ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_FENCE_WAIT, &arg,
+				 sizeof(arg));
+   } while (ret == -ERESTART);
+}
+
+
+int
+vmw_ioctl_fence_finish(struct vmw_winsys_screen *vws,
+                       uint32_t fence)
+{
+   assert(fence);
+   
+   if(fence) {
+      if(vmw_ioctl_fence_signalled(vws, fence) != 0) {
+         vmw_ioctl_sync(vws, fence);
+      }
+   }
+   
+   return 0;
+}
+
+
+boolean
+vmw_ioctl_init(struct vmw_winsys_screen *vws)
+{
+   struct drm_vmw_getparam_arg gp_arg;
+   int ret;
+
+   VMW_FUNC;
+
+   memset(&gp_arg, 0, sizeof(gp_arg));
+   gp_arg.param = DRM_VMW_PARAM_FIFO_OFFSET;
+   ret = drmCommandWriteRead(vws->ioctl.drm_fd, DRM_VMW_GET_PARAM,
+			     &gp_arg, sizeof(gp_arg));
+
+   if (ret) {
+      debug_printf("GET_PARAM on %d returned %d: %s\n",
+		   vws->ioctl.drm_fd, ret, strerror(-ret));
+      goto out_err1;
+   }
+
+   vmw_printf("Offset to map is 0x%08llx\n",
+              (unsigned long long)gp_arg.value);
+
+   vws->ioctl.fifo_map = vmw_ioctl_fifo_map(vws, gp_arg.value);
+   if (vws->ioctl.fifo_map == NULL)
+      goto out_err1;
+
+   vmw_printf("%s OK\n", __FUNCTION__);
+   return TRUE;
+
+ out_err1:
+   debug_printf("%s Failed\n", __FUNCTION__);
+   return FALSE;
+}
+
+
+
+void
+vmw_ioctl_cleanup(struct vmw_winsys_screen *vws)
+{
+   VMW_FUNC;
+
+   vmw_ioctl_fifo_unmap(vws, (void *)vws->ioctl.fifo_map);
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
new file mode 100644
index 00000000000..b1c24b0cb6a
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_pools.c
@@ -0,0 +1,79 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "vmw_screen.h"
+
+#include "vmw_buffer.h"
+#include "vmw_fence.h"
+
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+
+void
+vmw_pools_cleanup(struct vmw_winsys_screen *vws)
+{
+   if(vws->pools.gmr_fenced)
+      vws->pools.gmr_fenced->destroy(vws->pools.gmr_fenced);
+
+   /* gmr_mm pool is already destroyed above */
+
+   if(vws->pools.gmr)
+      vws->pools.gmr->destroy(vws->pools.gmr);
+}
+
+
+boolean
+vmw_pools_init(struct vmw_winsys_screen *vws)
+{
+   vws->pools.gmr = vmw_gmr_bufmgr_create(vws);
+   if(!vws->pools.gmr)
+      goto error;
+
+   vws->pools.gmr_mm = mm_bufmgr_create(vws->pools.gmr,
+                                        16*1024*1024,
+                                        12 /* 4096 alignment */);
+   if(!vws->pools.gmr_mm)
+      goto error;
+
+   vws->pools.gmr_fenced = fenced_bufmgr_create(
+      vws->pools.gmr_mm,
+      vmw_fence_ops_create(vws));
+
+#ifdef DEBUG
+   vws->pools.gmr_fenced = pb_debug_manager_create(vws->pools.gmr_fenced,
+						   4096,
+						   4096);
+#endif
+   if(!vws->pools.gmr_fenced)
+      goto error;
+
+   return TRUE;
+
+error:
+   vmw_pools_cleanup(vws);
+   return FALSE;
+}
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c
new file mode 100644
index 00000000000..d7d008859b3
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_screen_svga.c
@@ -0,0 +1,295 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * This file implements the SVGA interface into this winsys, defined
+ * in drivers/svga/svga_winsys.h.
+ *
+ * @author Keith Whitwell
+ * @author Jose Fonseca
+ */
+
+
+#include "svga_cmd.h"
+#include "svga3d_caps.h"
+
+#include "pipe/p_inlines.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
+#include "pipebuffer/pb_buffer.h"
+#include "pipebuffer/pb_bufmgr.h"
+#include "svga_winsys.h"
+#include "vmw_context.h"
+#include "vmw_screen.h"
+#include "vmw_surface.h"
+#include "vmw_buffer.h"
+#include "vmw_fence.h"
+
+
+static struct svga_winsys_buffer *
+vmw_svga_winsys_buffer_create(struct svga_winsys_screen *sws,
+                              unsigned alignment,
+                              unsigned usage,
+                              unsigned size)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct pb_desc desc;
+   struct pb_manager *provider;
+   struct pb_buffer *buffer;
+
+   memset(&desc, 0, sizeof desc);
+   desc.alignment = alignment;
+   desc.usage = usage;
+
+   provider = vws->pools.gmr_fenced;
+
+   assert(provider);
+   buffer = provider->create_buffer(provider, size, &desc);
+   if(!buffer)
+      return NULL;
+
+   return vmw_svga_winsys_buffer(buffer);
+}
+
+
+static void *
+vmw_svga_winsys_buffer_map(struct svga_winsys_screen *sws,
+                           struct svga_winsys_buffer *buf,
+                           unsigned flags)
+{
+   (void)sws;
+   return pb_map(vmw_pb_buffer(buf), flags);
+}
+
+
+static void
+vmw_svga_winsys_buffer_unmap(struct svga_winsys_screen *sws,
+                             struct svga_winsys_buffer *buf)
+{
+   (void)sws;
+   pb_unmap(vmw_pb_buffer(buf));
+}
+
+
+static void
+vmw_svga_winsys_buffer_destroy(struct svga_winsys_screen *sws,
+                               struct svga_winsys_buffer *buf)
+{
+   struct pb_buffer *pbuf = vmw_pb_buffer(buf);
+   (void)sws;
+   pb_reference(&pbuf, NULL);
+}
+
+
+static void
+vmw_svga_winsys_fence_reference(struct svga_winsys_screen *sws,
+                                struct pipe_fence_handle **pdst,
+                                struct pipe_fence_handle *src)
+{
+   (void)sws;
+   *pdst = src;
+}
+
+
+static int
+vmw_svga_winsys_fence_signalled(struct svga_winsys_screen *sws,
+                                struct pipe_fence_handle *fence,
+                                unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   (void)flag;
+   return vmw_ioctl_fence_signalled(vws, vmw_fence(fence));
+}
+
+
+static int
+vmw_svga_winsys_fence_finish(struct svga_winsys_screen *sws,
+                             struct pipe_fence_handle *fence,
+                             unsigned flag)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   (void)flag;
+   return vmw_ioctl_fence_finish(vws, vmw_fence(fence));
+}
+
+
+
+static struct svga_winsys_surface *
+vmw_svga_winsys_surface_create(struct svga_winsys_screen *sws,
+                               SVGA3dSurfaceFlags flags,
+                               SVGA3dSurfaceFormat format,
+                               SVGA3dSize size,
+                               uint32 numFaces,
+                               uint32 numMipLevels)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   struct vmw_svga_winsys_surface *surface;
+
+   surface = CALLOC_STRUCT(vmw_svga_winsys_surface);
+   if(!surface)
+      goto no_surface;
+
+   pipe_reference_init(&surface->refcnt, 1);
+   p_atomic_set(&surface->validated, 0);
+   surface->screen = vws;
+   surface->sid = vmw_ioctl_surface_create(vws,
+                                           flags, format, size,
+                                           numFaces, numMipLevels);
+   if(surface->sid == SVGA3D_INVALID_ID)
+      goto no_sid;
+
+   return svga_winsys_surface(surface);
+
+no_sid:
+   FREE(surface);
+no_surface:
+   return NULL;
+}
+
+
+static boolean
+vmw_svga_winsys_surface_is_flushed(struct svga_winsys_screen *sws,
+                                   struct svga_winsys_surface *surface)
+{
+   struct vmw_svga_winsys_surface *vsurf = vmw_svga_winsys_surface(surface);
+   return (p_atomic_read(&vsurf->validated) == 0);
+}
+
+
+static void
+vmw_svga_winsys_surface_ref(struct svga_winsys_screen *sws,
+			    struct svga_winsys_surface **pDst,
+			    struct svga_winsys_surface *src)
+{
+   struct vmw_svga_winsys_surface *d_vsurf = vmw_svga_winsys_surface(*pDst);
+   struct vmw_svga_winsys_surface *s_vsurf = vmw_svga_winsys_surface(src);
+
+   vmw_svga_winsys_surface_reference(&d_vsurf, s_vsurf);
+   *pDst = svga_winsys_surface(d_vsurf);
+}
+
+
+static void
+vmw_svga_winsys_destroy(struct svga_winsys_screen *sws)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+
+   vmw_winsys_destroy(vws);
+}
+
+
+static boolean
+vmw_svga_winsys_get_cap(struct svga_winsys_screen *sws,
+                        SVGA3dDevCapIndex index,
+                        SVGA3dDevCapResult *result)
+{
+   struct vmw_winsys_screen *vws = vmw_winsys_screen(sws);
+   const uint32 *capsBlock;
+   const SVGA3dCapsRecord *capsRecord = NULL;
+   uint32 offset;
+   const SVGA3dCapPair *capArray;
+   int numCaps, first, last;
+
+   if(!vws->ioctl.fifo_map)
+      return FALSE;
+
+   if(vws->ioctl.fifo_map[SVGA_FIFO_3D_HWVERSION] < SVGA3D_HWVERSION_WS6_B1)
+      return FALSE;
+
+   /*
+    * Search linearly through the caps block records for the specified type.
+    */
+   capsBlock = (const uint32 *)&vws->ioctl.fifo_map[SVGA_FIFO_3D_CAPS];
+   for (offset = 0; capsBlock[offset] != 0; offset += capsBlock[offset]) {
+      const SVGA3dCapsRecord *record;
+      assert(offset < SVGA_FIFO_3D_CAPS_SIZE);
+      record = (const SVGA3dCapsRecord *) (capsBlock + offset);
+      if ((record->header.type >= SVGA3DCAPS_RECORD_DEVCAPS_MIN) &&
+          (record->header.type <= SVGA3DCAPS_RECORD_DEVCAPS_MAX) &&
+          (!capsRecord || (record->header.type > capsRecord->header.type))) {
+         capsRecord = record;
+      }
+   }
+
+   if(!capsRecord)
+      return FALSE;
+
+   /*
+    * Calculate the number of caps from the size of the record.
+    */
+   capArray = (const SVGA3dCapPair *) capsRecord->data;
+   numCaps = (int) ((capsRecord->header.length * sizeof(uint32) -
+                     sizeof capsRecord->header) / (2 * sizeof(uint32)));
+
+   /*
+    * Binary-search for the cap with the specified index.
+    */
+   for (first = 0, last = numCaps - 1; first <= last; ) {
+      int mid = (first + last) / 2;
+
+      if ((SVGA3dDevCapIndex) capArray[mid][0] == index) {
+         /*
+          * Found it.
+          */
+         result->u = capArray[mid][1];
+         return TRUE;
+      }
+
+      /*
+       * Divide and conquer.
+       */
+      if ((SVGA3dDevCapIndex) capArray[mid][0] > index) {
+         last = mid - 1;
+      } else {
+         first = mid + 1;
+      }
+   }
+
+   return FALSE;
+}
+
+
+boolean
+vmw_winsys_screen_init_svga(struct vmw_winsys_screen *vws)
+{
+   vws->base.destroy = vmw_svga_winsys_destroy;
+   vws->base.get_cap = vmw_svga_winsys_get_cap;
+   vws->base.context_create = vmw_svga_winsys_context_create;
+   vws->base.surface_create = vmw_svga_winsys_surface_create;
+   vws->base.surface_is_flushed = vmw_svga_winsys_surface_is_flushed;
+   vws->base.surface_reference = vmw_svga_winsys_surface_ref;
+   vws->base.buffer_create = vmw_svga_winsys_buffer_create;
+   vws->base.buffer_map = vmw_svga_winsys_buffer_map;
+   vws->base.buffer_unmap = vmw_svga_winsys_buffer_unmap;
+   vws->base.buffer_destroy = vmw_svga_winsys_buffer_destroy;
+   vws->base.fence_reference = vmw_svga_winsys_fence_reference;
+   vws->base.fence_signalled = vmw_svga_winsys_fence_signalled;
+   vws->base.fence_finish = vmw_svga_winsys_fence_finish;
+
+   return TRUE;
+}
+
+
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.c b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
new file mode 100644
index 00000000000..c19e556df9f
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.c
@@ -0,0 +1,59 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+
+#include "svga_cmd.h"
+#include "util/u_debug.h"
+#include "util/u_memory.h"
+
+#include "vmw_surface.h"
+#include "vmw_screen.h"
+
+void
+vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst,
+                                  struct vmw_svga_winsys_surface *src)
+{
+   struct pipe_reference *src_ref;
+   struct pipe_reference *dst_ref;
+   struct vmw_svga_winsys_surface *dst = *pdst;
+   
+   if(pdst == NULL || *pdst == src)
+      return;
+   
+   src_ref = src ? &src->refcnt : NULL;
+   dst_ref = dst ? &dst->refcnt : NULL;
+
+   if (pipe_reference(&dst_ref, src_ref)) {
+      vmw_ioctl_surface_destroy(dst->screen, dst->sid);
+#ifdef DEBUG
+      /* to detect dangling pointers */
+      assert(p_atomic_read(&dst->validated) == 0);
+      dst->sid = SVGA3D_INVALID_ID;
+#endif
+      FREE(dst);
+   }
+
+   *pdst = src;
+}
diff --git a/src/gallium/winsys/drm/vmware/core/vmw_surface.h b/src/gallium/winsys/drm/vmware/core/vmw_surface.h
new file mode 100644
index 00000000000..340cc1532e0
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmw_surface.h
@@ -0,0 +1,79 @@
+/**********************************************************
+ * Copyright 2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Surfaces for VMware SVGA winsys.
+ * 
+ * @author Jose Fonseca <jfonseca@vmware.com>
+ */
+
+
+#ifndef VMW_SURFACE_H_
+#define VMW_SURFACE_H_
+
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_atomic.h"
+#include "pipe/p_refcnt.h"
+
+#define VMW_MAX_PRESENTS 3
+
+
+
+struct vmw_svga_winsys_surface
+{
+   struct pipe_atomic validated;
+   struct pipe_reference refcnt;
+
+   struct vmw_winsys_screen *screen;
+   uint32_t sid;
+
+   /* FIXME: make this thread safe */
+   unsigned next_present_no;
+   uint32_t present_fences[VMW_MAX_PRESENTS];
+};
+
+
+static INLINE struct svga_winsys_surface *
+svga_winsys_surface(struct vmw_svga_winsys_surface *surf)
+{
+   assert(!surf || surf->sid != SVGA3D_INVALID_ID);
+   return (struct svga_winsys_surface *)surf;
+}
+
+
+static INLINE struct vmw_svga_winsys_surface *
+vmw_svga_winsys_surface(struct svga_winsys_surface *surf)
+{
+   return (struct vmw_svga_winsys_surface *)surf;
+}
+
+
+void
+vmw_svga_winsys_surface_reference(struct vmw_svga_winsys_surface **pdst,
+                                  struct vmw_svga_winsys_surface *src);
+
+#endif /* VMW_SURFACE_H_ */
diff --git a/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h
new file mode 100644
index 00000000000..6705dd42897
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/core/vmwgfx_drm.h
@@ -0,0 +1,442 @@
+/**************************************************************************
+ *
+ * Copyright © 2009 VMware, Inc., Palo Alto, CA., USA
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
+ * THE COPYRIGHT HOLDERS, AUTHORS AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM,
+ * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
+ * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
+ * USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef _VMWGFX_DRM_H_
+#define _VMWGFX_DRM_H_
+
+#define DRM_VMW_MAX_SURFACE_FACES 6
+#define DRM_VMW_MAX_MIP_LEVELS 24
+
+#define DRM_VMW_EXT_NAME_LEN 128
+
+#define DRM_VMW_GET_PARAM            1
+#define DRM_VMW_EXTENSION            2
+#define DRM_VMW_CREATE_CONTEXT       3
+#define DRM_VMW_UNREF_CONTEXT        4
+#define DRM_VMW_CREATE_SURFACE       5
+#define DRM_VMW_UNREF_SURFACE        6
+#define DRM_VMW_REF_SURFACE          7
+#define DRM_VMW_EXECBUF              8
+#define DRM_VMW_ALLOC_DMABUF         9
+#define DRM_VMW_UNREF_DMABUF         10
+#define DRM_VMW_FIFO_DEBUG           11
+#define DRM_VMW_FENCE_WAIT           12
+
+
+/*************************************************************************/
+/**
+ * DRM_VMW_GET_PARAM - get device information.
+ *
+ * Currently we support only one parameter:
+ *
+ * DRM_VMW_PARAM_FIFO_OFFSET:
+ * Offset to use to map the first page of the FIFO read-only.
+ * The fifo is mapped using the mmap() system call on the drm device.
+ */
+
+#define DRM_VMW_PARAM_FIFO_OFFSET    0
+
+/**
+ * struct drm_vmw_getparam_arg
+ *
+ * @value: Returned value. //Out
+ * @param: Parameter to query. //In.
+ *
+ * Argument to the DRM_VMW_GET_PARAM Ioctl.
+ */
+
+struct drm_vmw_getparam_arg {
+	uint64_t value;
+	uint32_t param;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_EXTENSION - Query device extensions.
+ */
+
+/**
+ * struct drm_vmw_extension_rep
+ *
+ * @exists: The queried extension exists.
+ * @driver_ioctl_offset: Ioctl number of the first ioctl in the extension.
+ * @driver_sarea_offset: Offset to any space in the DRI SAREA
+ * used by the extension.
+ * @major: Major version number of the extension.
+ * @minor: Minor version number of the extension.
+ * @pl: Patch level version number of the extension.
+ *
+ * Output argument to the DRM_VMW_EXTENSION Ioctl.
+ */
+
+struct drm_vmw_extension_rep {
+	int32_t exists;
+	uint32_t driver_ioctl_offset;
+	uint32_t driver_sarea_offset;
+	uint32_t major;
+	uint32_t minor;
+	uint32_t pl;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_extension_arg
+ *
+ * @extension - Ascii name of the extension to be queried. //In
+ * @rep - Reply as defined above. //Out
+ *
+ * Argument to the DRM_VMW_EXTENSION Ioctl.
+ */
+
+union drm_vmw_extension_arg {
+	char extension[DRM_VMW_EXT_NAME_LEN];
+	struct drm_vmw_extension_rep rep;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_CONTEXT - Create a host context.
+ *
+ * Allocates a device unique context id, and queues a create context command
+ * for the host. Does not wait for host completion.
+ */
+
+/**
+ * struct drm_vmw_context_arg
+ *
+ * @cid: Device unique context ID.
+ *
+ * Output argument to the DRM_VMW_CREATE_CONTEXT Ioctl.
+ * Input argument to the DRM_VMW_UNREF_CONTEXT Ioctl.
+ */
+
+struct drm_vmw_context_arg {
+	int32_t cid;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_CONTEXT - Create a host context.
+ *
+ * Frees a global context id, and queues a destroy host command for the host.
+ * Does not wait for host completion. The context ID can be used directly
+ * in the command stream and shows up as the same context ID on the host.
+ */
+
+/*************************************************************************/
+/**
+ * DRM_VMW_CREATE_SURFACE - Create a host suface.
+ *
+ * Allocates a device unique surface id, and queues a create surface command
+ * for the host. Does not wait for host completion. The surface ID can be
+ * used directly in the command stream and shows up as the same surface
+ * ID on the host.
+ */
+
+/**
+ * struct drm_wmv_surface_create_req
+ *
+ * @flags: Surface flags as understood by the host.
+ * @format: Surface format as understood by the host.
+ * @mip_levels: Number of mip levels for each face.
+ * An unused face should have 0 encoded.
+ * @size_addr: Address of a user-space array of sruct drm_vmw_size
+ * cast to an uint64_t for 32-64 bit compatibility.
+ * The size of the array should equal the total number of mipmap levels.
+ * @shareable: Boolean whether other clients (as identified by file descriptors)
+ * may reference this surface.
+ *
+ * Input data to the DRM_VMW_CREATE_SURFACE Ioctl.
+ * Output data from the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+struct drm_vmw_surface_create_req {
+	uint32_t flags;
+	uint32_t format;
+	uint32_t mip_levels[DRM_VMW_MAX_SURFACE_FACES];
+	uint64_t size_addr;
+	int32_t shareable;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_wmv_surface_arg
+ *
+ * @sid: Surface id of created surface or surface to destroy or reference.
+ *
+ * Output data from the DRM_VMW_CREATE_SURFACE Ioctl.
+ * Input argument to the DRM_VMW_UNREF_SURFACE Ioctl.
+ * Input argument to the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+struct drm_vmw_surface_arg {
+	int32_t sid;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_vmw_size ioctl.
+ *
+ * @width - mip level width
+ * @height - mip level height
+ * @depth - mip level depth
+ *
+ * Description of a mip level.
+ * Input data to the DRM_WMW_CREATE_SURFACE Ioctl.
+ */
+
+struct drm_vmw_size {
+	uint32_t width;
+	uint32_t height;
+	uint32_t depth;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_surface_create_arg
+ *
+ * @rep: Output data as described above.
+ * @req: Input data as described above.
+ *
+ * Argument to the DRM_VMW_CREATE_SURFACE Ioctl.
+ */
+
+union drm_vmw_surface_create_arg {
+	struct drm_vmw_surface_arg rep;
+	struct drm_vmw_surface_create_req req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_REF_SURFACE - Reference a host surface.
+ *
+ * Puts a reference on a host surface with a give sid, as previously
+ * returned by the DRM_VMW_CREATE_SURFACE ioctl.
+ * A reference will make sure the surface isn't destroyed while we hold
+ * it and will allow the calling client to use the surface ID in the command
+ * stream.
+ *
+ * On successful return, the Ioctl returns the surface information given
+ * in the DRM_VMW_CREATE_SURFACE ioctl.
+ */
+
+/**
+ * union drm_vmw_surface_reference_arg
+ *
+ * @rep: Output data as described above.
+ * @req: Input data as described above.
+ *
+ * Argument to the DRM_VMW_REF_SURFACE Ioctl.
+ */
+
+union drm_vmw_surface_reference_arg {
+	struct drm_vmw_surface_create_req rep;
+	struct drm_vmw_surface_arg req;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_SURFACE - Unreference a host surface.
+ *
+ * Clear a reference previously put on a host surface.
+ * When all references are gone, including the one implicitly placed
+ * on creation,
+ * a destroy surface command will be queued for the host.
+ * Does not wait for completion.
+ */
+
+/*************************************************************************/
+/**
+ * DRM_VMW_EXECBUF
+ *
+ * Submit a command buffer for execution on the host, and return a
+ * fence sequence that when signaled, indicates that the command buffer has
+ * executed.
+ */
+
+/**
+ * struct drm_vmw_execbuf_arg
+ *
+ * @commands: User-space address of a command buffer cast to an uint64_t.
+ * @command-size: Size in bytes of the command buffer.
+ * @fence_rep: User-space address of a struct drm_vmw_fence_rep cast to an
+ * uint64_t.
+ *
+ * Argument to the DRM_VMW_EXECBUF Ioctl.
+ */
+
+struct drm_vmw_execbuf_arg {
+	uint64_t commands;
+	uint32_t command_size;
+	uint32_t pad64;
+	uint64_t fence_rep;
+};
+
+/**
+ * struct drm_vmw_fence_rep
+ *
+ * @fence_seq: Fence sequence associated with a command submission.
+ * @error: This member should've been set to -EFAULT on submission.
+ * The following actions should be take on completion:
+ * error == -EFAULT: Fence communication failed. The host is synchronized.
+ * Use the last fence id read from the FIFO fence register.
+ * error != 0 && error != -EFAULT:
+ * Fence submission failed. The host is synchronized. Use the fence_seq member.
+ * error == 0: All is OK, The host may not be synchronized.
+ * Use the fence_seq member.
+ *
+ * Input / Output data to the DRM_VMW_EXECBUF Ioctl.
+ */
+
+struct drm_vmw_fence_rep {
+	uint64_t fence_seq;
+	int32_t error;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_ALLOC_DMABUF
+ *
+ * Allocate a DMA buffer that is visible also to the host.
+ * NOTE: The buffer is
+ * identified by a handle and an offset, which are private to the guest, but
+ * useable in the command stream. The guest kernel may translate these
+ * and patch up the command stream accordingly. In the future, the offset may
+ * be zero at all times, or it may disappear from the interface before it is
+ * fixed.
+ *
+ * The DMA buffer may stay user-space mapped in the guest at all times,
+ * and is thus suitable for sub-allocation.
+ *
+ * DMA buffers are mapped using the mmap() syscall on the drm device.
+ */
+
+/**
+ * struct drm_vmw_alloc_dmabuf_req
+ *
+ * @size: Required minimum size of the buffer.
+ *
+ * Input data to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+struct drm_vmw_alloc_dmabuf_req {
+	uint32_t size;
+	uint32_t pad64;
+};
+
+/**
+ * struct drm_vmw_dmabuf_rep
+ *
+ * @map_handle: Offset to use in the mmap() call used to map the buffer.
+ * @handle: Handle unique to this buffer. Used for unreferencing.
+ * @cur_gmr_id: GMR id to use in the command stream when this buffer is
+ * referenced. See not above.
+ * @cur_gmr_offset: Offset to use in the command stream when this buffer is
+ * referenced. See note above.
+ *
+ * Output data from the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+struct drm_vmw_dmabuf_rep {
+	uint64_t map_handle;
+	uint32_t handle;
+	uint32_t cur_gmr_id;
+	uint32_t cur_gmr_offset;
+	uint32_t pad64;
+};
+
+/**
+ * union drm_vmw_dmabuf_arg
+ *
+ * @req: Input data as described above.
+ * @rep: Output data as described above.
+ *
+ * Argument to the DRM_VMW_ALLOC_DMABUF Ioctl.
+ */
+
+union drm_vmw_alloc_dmabuf_arg {
+	struct drm_vmw_alloc_dmabuf_req req;
+	struct drm_vmw_dmabuf_rep rep;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_UNREF_DMABUF - Free a DMA buffer.
+ *
+ */
+
+/**
+ * struct drm_vmw_unref_dmabuf_arg
+ *
+ * @handle: Handle indicating what buffer to free. Obtained from the
+ * DRM_VMW_ALLOC_DMABUF Ioctl.
+ *
+ * Argument to the DRM_VMW_UNREF_DMABUF Ioctl.
+ */
+
+struct drm_vmw_unref_dmabuf_arg {
+	uint32_t handle;
+	uint32_t pad64;
+};
+
+/*************************************************************************/
+/**
+ * DRM_VMW_FIFO_DEBUG - Get last FIFO submission.
+ *
+ * This IOCTL copies the last FIFO submission directly out of the FIFO buffer.
+ */
+
+/**
+ * struct drm_vmw_fifo_debug_arg
+ *
+ * @debug_buffer: User space address of a debug_buffer cast to an uint64_t //In
+ * @debug_buffer_size: Size in bytes of debug buffer //In
+ * @used_size: Number of bytes copied to the buffer // Out
+ * @did_not_fit: Boolean indicating that the fifo contents did not fit. //Out
+ *
+ * Argument to the DRM_VMW_FIFO_DEBUG Ioctl.
+ */
+
+struct drm_vmw_fifo_debug_arg {
+	uint64_t debug_buffer;
+	uint32_t debug_buffer_size;
+	uint32_t used_size;
+	int32_t did_not_fit;
+	uint32_t pad64;
+};
+
+struct drm_vmw_fence_wait_arg {
+	uint64_t sequence;
+	uint64_t kernel_cookie;
+	int32_t cookie_valid;
+	int32_t pad64;
+};
+
+#endif
diff --git a/src/gallium/winsys/drm/vmware/dri/Makefile b/src/gallium/winsys/drm/vmware/dri/Makefile
new file mode 100644
index 00000000000..8a39e23da6d
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/dri/Makefile
@@ -0,0 +1,18 @@
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = vmwgfx_dri.so
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/state_trackers/dri/libdridrm.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a
+
+C_SOURCES = \
+	$(COMMON_GALLIUM_SOURCES)
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/vmware/dri/SConscript b/src/gallium/winsys/drm/vmware/dri/SConscript
new file mode 100644
index 00000000000..1019f577a5f
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/dri/SConscript
@@ -0,0 +1,62 @@
+import os
+import os.path
+
+Import('*')
+
+if env['platform'] == 'linux':
+
+   if env['dri']:
+      env = env.Clone()
+
+      sources = [
+        '#/src/mesa/drivers/dri/common/utils.c',
+        '#/src/mesa/drivers/dri/common/vblank.c',
+        '#/src/mesa/drivers/dri/common/dri_util.c',
+        '#/src/mesa/drivers/dri/common/xmlconfig.c',
+         ]
+   
+      
+      env.ParseConfig('pkg-config --cflags --libs libdrm')
+      
+      env.Prepend(CPPPATH = [
+            '#/src/mesa/state_tracker',
+            '#/src/mesa/drivers/dri/common',
+            '#/src/mesa/main',
+            '#/src/mesa/glapi',
+            '#/src/mesa',
+            '#/include',
+            '#/src/gallium/drivers/svga',
+            '#/src/gallium/drivers/svga/include',
+            ])
+      
+      env.Append(CPPDEFINES = [
+            'HAVE_STDINT_H', 
+            'HAVE_SYS_TYPES_H',
+            ])
+
+      env.Append(CFLAGS = [
+            '-std=gnu99',
+            '-D_FILE_OFFSET_BITS=64',
+            ])
+      
+      env.Prepend(LIBPATH = [
+            ])
+      
+      env.Prepend(LIBS = [
+            trace,
+            st_dri,
+            svgadrm,
+            svga,
+            mesa,
+            auxiliaries,
+            ])
+      
+      # TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+      env.LoadableModule(
+         target ='vmwgfx_dri.so',
+         source = sources,
+         LIBS = env['LIBS'],
+         SHLIBPREFIX = '',
+         )
+      
+
diff --git a/src/gallium/winsys/drm/vmware/egl/Makefile b/src/gallium/winsys/drm/vmware/egl/Makefile
new file mode 100644
index 00000000000..8e2980c318c
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/egl/Makefile
@@ -0,0 +1,18 @@
+
+TOP = ../../../../../..
+include $(TOP)/configs/current
+
+LIBNAME = EGL_svga.so
+
+PIPE_DRIVERS = \
+	$(TOP)/src/gallium/state_trackers/egl/libegldrm.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a
+
+C_SOURCES = \
+	$(COMMON_GALLIUM_SOURCES)
+
+include ../../Makefile.template
+
+symlinks:
diff --git a/src/gallium/winsys/drm/vmware/xorg/Makefile b/src/gallium/winsys/drm/vmware/xorg/Makefile
new file mode 100644
index 00000000000..e152263256a
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/Makefile
@@ -0,0 +1,54 @@
+TARGET     = vmwgfx_drv.so
+CFILES     = $(wildcard ./*.c)
+OBJECTS    = $(patsubst ./%.c,./%.o,$(CFILES))
+TOP        = ../../../../../..
+
+include $(TOP)/configs/current
+
+INCLUDES = \
+	$(shell pkg-config --cflags-only-I pixman-1 xorg-server libdrm xproto) \
+	-I../gem \
+	-I$(TOP)/src/gallium/include \
+	-I$(TOP)/src/gallium/drivers \
+	-I$(TOP)/src/gallium/auxiliary \
+	-I$(TOP)/src/gallium
+
+LIBS = \
+	$(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a \
+	$(TOP)/src/gallium/winsys/drm/vmware/core/libsvgadrm.a \
+	$(TOP)/src/gallium/drivers/trace/libtrace.a \
+	$(TOP)/src/gallium/drivers/svga/libsvga.a \
+	$(GALLIUM_AUXILIARIES)
+
+DRIVER_DEFINES = \
+	-DHAVE_CONFIG_H
+
+
+#############################################
+
+
+
+all default: $(TARGET)
+
+$(TARGET): $(OBJECTS) Makefile $(TOP)/src/gallium/state_trackers/xorg/libxorgtracker.a $(LIBS)
+	$(TOP)/bin/mklib -noprefix -o $@ \
+	$(OBJECTS) $(LIBS) $(shell pkg-config --libs libdrm) -ldrm_intel
+
+clean:
+	rm -rf $(OBJECTS) $(TARGET)
+
+install:
+	$(INSTALL) -d $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+	$(MINSTALL) -m 755 $(TARGET) $(DESTDIR)/$(XORG_DRIVER_INSTALL_DIR)
+
+
+##############################################
+
+
+.c.o:
+	$(CC) -c $(CFLAGS) $(INCLUDES) $(DRIVER_DEFINES) $< -o $@
+
+
+##############################################
+
+.PHONY	= all clean install
diff --git a/src/gallium/winsys/drm/vmware/xorg/SConscript b/src/gallium/winsys/drm/vmware/xorg/SConscript
new file mode 100644
index 00000000000..ff7b2ed34ed
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/SConscript
@@ -0,0 +1,54 @@
+import os.path
+
+Import('*')
+
+if env['platform'] == 'linux':
+
+	env = env.Clone()
+
+	env.ParseConfig('pkg-config --cflags --libs libdrm xorg-server')
+
+	env.Prepend(CPPPATH = [
+		'#/include',
+		'#/src/gallium',
+		'#/src/mesa',
+		'#/src/gallium/drivers/svga',
+		'#/src/gallium/drivers/svga/include',
+	])
+
+	env.Append(CPPDEFINES = [
+	])
+
+	if env['gcc']:
+		env.Append(CPPDEFINES = [
+			'HAVE_STDINT_H',
+			'HAVE_SYS_TYPES_H',
+		])
+
+	env.Append(CFLAGS = [
+		'-std=gnu99',
+		'-D_FILE_OFFSET_BITS=64',
+	])
+
+	env.Prepend(LIBPATH = [
+	])
+
+	env.Prepend(LIBS = [
+		trace,
+		st_xorg,
+		svgadrm,
+		svga,
+                auxiliaries,
+	])
+
+	sources = [
+		'vmw_xorg.c',
+	]
+
+	# TODO: write a wrapper function http://www.scons.org/wiki/WrapperFunctions
+	env.LoadableModule(
+		target ='vmwgfx_drv.so',
+		source = sources,
+		LIBS = env['LIBS'],
+		SHLIBPREFIX = '',
+	)
diff --git a/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c
new file mode 100644
index 00000000000..3acc110ae79
--- /dev/null
+++ b/src/gallium/winsys/drm/vmware/xorg/vmw_xorg.c
@@ -0,0 +1,150 @@
+/**********************************************************
+ * Copyright 2008-2009 VMware, Inc.  All rights reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person
+ * obtaining a copy of this software and associated documentation
+ * files (the "Software"), to deal in the Software without
+ * restriction, including without limitation the rights to use, copy,
+ * modify, merge, publish, distribute, sublicense, and/or sell copies
+ * of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be
+ * included in all copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+ * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+ * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
+ * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
+ * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
+ * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ *
+ **********************************************************/
+
+/**
+ * @file
+ * Glue file for Xorg State Tracker.
+ *
+ * @author Alan Hourihane <alanh@tungstengraphics.com>
+ * @author Jakob Bornecrantz <wallbraker@gmail.com>
+ */
+
+#include "state_trackers/xorg/xorg_winsys.h"
+
+static void vmw_xorg_identify(int flags);
+static Bool vmw_xorg_pci_probe(DriverPtr driver,
+			       int entity_num,
+			       struct pci_device *device,
+			       intptr_t match_data);
+
+static const struct pci_id_match vmw_xorg_device_match[] = {
+    {0x15ad, PCI_MATCH_ANY, PCI_MATCH_ANY, PCI_MATCH_ANY, 0, 0, 0},
+    {0, 0, 0, 0, 0, 0, 0},
+};
+
+static SymTabRec vmw_xorg_chipsets[] = {
+    {PCI_MATCH_ANY, "VMware SVGA Device"},
+    {-1, NULL}
+};
+
+static PciChipsets vmw_xorg_pci_devices[] = {
+    {PCI_MATCH_ANY, PCI_MATCH_ANY, NULL},
+    {-1, -1, NULL}
+};
+
+static XF86ModuleVersionInfo vmw_xorg_version = {
+    "vmwgfx",
+    MODULEVENDORSTRING,
+    MODINFOSTRING1,
+    MODINFOSTRING2,
+    XORG_VERSION_CURRENT,
+    0, 1, 0, /* major, minor, patch */
+    ABI_CLASS_VIDEODRV,
+    ABI_VIDEODRV_VERSION,
+    MOD_CLASS_VIDEODRV,
+    {0, 0, 0, 0}
+};
+
+/*
+ * Xorg driver exported structures
+ */
+
+_X_EXPORT DriverRec vmwgfx = {
+    1,
+    "vmwgfx",
+    vmw_xorg_identify,
+    NULL,
+    xorg_tracker_available_options,
+    NULL,
+    0,
+    NULL,
+    vmw_xorg_device_match,
+    vmw_xorg_pci_probe
+};
+
+static MODULESETUPPROTO(vmw_xorg_setup);
+
+_X_EXPORT XF86ModuleData vmwgfxModuleData = {
+    &vmw_xorg_version,
+    vmw_xorg_setup,
+    NULL
+};
+
+/*
+ * Xorg driver functions
+ */
+
+static pointer
+vmw_xorg_setup(pointer module, pointer opts, int *errmaj, int *errmin)
+{
+    static Bool setupDone = 0;
+
+    /* This module should be loaded only once, but check to be sure.
+     */
+    if (!setupDone) {
+	setupDone = 1;
+	xf86AddDriver(&vmwgfx, module, HaveDriverFuncs);
+
+	/*
+	 * The return value must be non-NULL on success even though there
+	 * is no TearDownProc.
+	 */
+	return (pointer) 1;
+    } else {
+	if (errmaj)
+	    *errmaj = LDR_ONCEONLY;
+	return NULL;
+    }
+}
+
+static void
+vmw_xorg_identify(int flags)
+{
+    xf86PrintChipsets("vmwgfx", "Driver for VMware SVGA device",
+		      vmw_xorg_chipsets);
+}
+
+static Bool
+vmw_xorg_pci_probe(DriverPtr driver,
+	  int entity_num, struct pci_device *device, intptr_t match_data)
+{
+    ScrnInfoPtr scrn = NULL;
+    EntityInfoPtr entity;
+
+    scrn = xf86ConfigPciEntity(scrn, 0, entity_num, vmw_xorg_pci_devices,
+			       NULL, NULL, NULL, NULL, NULL);
+    if (scrn != NULL) {
+	scrn->driverVersion = 1;
+	scrn->driverName = "vmwgfx";
+	scrn->name = "vmwgfx";
+	scrn->Probe = NULL;
+
+	entity = xf86GetEntityInfo(entity_num);
+
+	/* Use all the functions from the xorg tracker */
+	xorg_tracker_set_functions(scrn);
+    }
+    return scrn != NULL;
+}
diff --git a/src/gallium/winsys/egl_xlib/egl_xlib.c b/src/gallium/winsys/egl_xlib/egl_xlib.c
index d02f8250478..599973ce127 100644
--- a/src/gallium/winsys/egl_xlib/egl_xlib.c
+++ b/src/gallium/winsys/egl_xlib/egl_xlib.c
@@ -41,6 +41,7 @@
 #include "pipe/p_state.h"
 #include "pipe/internal/p_winsys_screen.h"
 #include "util/u_memory.h"
+#include "util/u_math.h"
 #include "softpipe/sp_winsys.h"
 #include "softpipe/sp_texture.h"
 
@@ -138,17 +139,6 @@ lookup_context(_EGLContext *ctx)
 }
 
 
-static unsigned int
-bitcount(unsigned int n)
-{
-   unsigned int bits;
-   for (bits = 0; n > 0; n = n >> 1) {
-      bits += (n & 1);
-   }
-   return bits;
-}
-
-
 /**
  * Create the EGLConfigs.  (one per X visual)
  */
@@ -174,9 +164,9 @@ create_configs(struct xlib_egl_display *xdpy, _EGLDisplay *disp)
    for (i = 0; i < num_visuals; i++) {
       _EGLConfig *config = calloc(1, sizeof(_EGLConfig));
       int id = i + 1;
-      int rbits = bitcount(visInfo[i].red_mask);
-      int gbits = bitcount(visInfo[i].green_mask);
-      int bbits = bitcount(visInfo[i].blue_mask);
+      int rbits = util_bitcount(visInfo[i].red_mask);
+      int gbits = util_bitcount(visInfo[i].green_mask);
+      int bbits = util_bitcount(visInfo[i].blue_mask);
       int abits = bbits == 8 ? 8 : 0;
       int zbits = 24;
       int sbits = 8;