summaryrefslogtreecommitdiff
path: root/src/gallium/auxiliary
diff options
context:
space:
mode:
authorMichal Krol <michal@vmware.com>2009-09-04 15:38:43 +0200
committerMichal Krol <michal@vmware.com>2009-09-04 15:38:43 +0200
commitecd8d638c423b71c2e09713f60274ccb389f9be1 (patch)
treebf911cd1349bb91e1804b46b5594620e3bead4ab /src/gallium/auxiliary
parentc2670f621e6ea338cef2a955cf15cea2ad0525f8 (diff)
parent4ba872b49bf9412a4b285169381255978d2ccec5 (diff)
downloadmesa-glsl-pp-rework-1.tar.gz
Merge branch 'master' into glsl-pp-rework-1glsl-pp-rework-1
Diffstat (limited to 'src/gallium/auxiliary')
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_cache.c4
-rw-r--r--src/gallium/auxiliary/cso_cache/cso_context.c4
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe.c100
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_pstipple.c5
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_vbuf.c15
-rw-r--r--src/gallium/auxiliary/draw/draw_pipe_wide_point.c52
-rw-r--r--src/gallium/auxiliary/draw/draw_private.h5
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt.h1
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_decompose.h65
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_elts.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_util.c25
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache.c37
-rw-r--r--src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h73
-rw-r--r--src/gallium/auxiliary/draw/draw_vs.c23
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_aos.c17
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_exec.c3
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_llvm.c2
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_ppc.c1
-rw-r--r--src/gallium/auxiliary/draw/draw_vs_sse.c31
-rw-r--r--src/gallium/auxiliary/gallivm/instructionssoa.cpp4
-rw-r--r--src/gallium/auxiliary/gallivm/tgsitollvm.cpp84
-rw-r--r--src/gallium/auxiliary/indices/u_indices.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer.h14
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c6
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr.h20
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c30
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c2
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c24
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c8
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c20
-rw-r--r--src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c36
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_ppc.c6
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.c9
-rw-r--r--src/gallium/auxiliary/rtasm/rtasm_x86sse.h1
-rw-r--r--src/gallium/auxiliary/tgsi/Makefile1
-rw-r--r--src/gallium/auxiliary/tgsi/SConscript1
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt42
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.c59
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_build.h3
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump.c94
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_dump_c.c8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.c189
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_exec.h36
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.c258
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_info.h7
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h173
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.c25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_parse.h8
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ppc.c34
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sanity.c29
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.c678
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_sse2.h28
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_text.c25
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.c860
-rw-r--r--src/gallium/auxiliary/tgsi/tgsi_ureg.h481
-rw-r--r--src/gallium/auxiliary/trace/trace_drm.h165
-rw-r--r--src/gallium/auxiliary/util/u_blit.c82
-rw-r--r--src/gallium/auxiliary/util/u_cache.c10
-rw-r--r--src/gallium/auxiliary/util/u_debug.c8
-rw-r--r--src/gallium/auxiliary/util/u_gen_mipmap.c6
-rw-r--r--src/gallium/auxiliary/util/u_handle_table.c13
-rw-r--r--src/gallium/auxiliary/util/u_hash_table.c22
-rw-r--r--src/gallium/auxiliary/util/u_keymap.c12
-rw-r--r--src/gallium/auxiliary/util/u_math.h75
-rw-r--r--src/gallium/auxiliary/util/u_memory.h8
-rw-r--r--src/gallium/auxiliary/util/u_mm.c22
-rw-r--r--src/gallium/auxiliary/util/u_mm.h2
-rw-r--r--src/gallium/auxiliary/util/u_prim.h2
-rw-r--r--src/gallium/auxiliary/util/u_rect.c8
-rw-r--r--src/gallium/auxiliary/util/u_rect.h4
-rw-r--r--src/gallium/auxiliary/util/u_simple_screen.c3
-rw-r--r--src/gallium/auxiliary/util/u_simple_shaders.c318
-rw-r--r--src/gallium/auxiliary/util/u_snprintf.c2
-rw-r--r--src/gallium/auxiliary/util/u_surface.h17
-rw-r--r--src/gallium/auxiliary/util/u_tile.c34
-rw-r--r--src/gallium/auxiliary/util/u_time.c12
-rw-r--r--src/gallium/auxiliary/util/u_time.h6
-rw-r--r--src/gallium/auxiliary/util/u_timed_winsys.c3
-rw-r--r--src/gallium/auxiliary/util/u_upload_mgr.c4
83 files changed, 3254 insertions, 1376 deletions
diff --git a/src/gallium/auxiliary/cso_cache/cso_cache.c b/src/gallium/auxiliary/cso_cache/cso_cache.c
index 0bc77a57287..e6dce3f0e5b 100644
--- a/src/gallium/auxiliary/cso_cache/cso_cache.c
+++ b/src/gallium/auxiliary/cso_cache/cso_cache.c
@@ -361,6 +361,10 @@ void cso_for_each_state(struct cso_cache *sc, enum cso_cache_type type,
void cso_cache_delete(struct cso_cache *sc)
{
assert(sc);
+
+ if (!sc)
+ return;
+
/* delete driver data */
cso_for_each_state(sc, CSO_BLEND, delete_blend_state, 0);
cso_for_each_state(sc, CSO_DEPTH_STENCIL_ALPHA, delete_depth_stencil_state, 0);
diff --git a/src/gallium/auxiliary/cso_cache/cso_context.c b/src/gallium/auxiliary/cso_cache/cso_context.c
index f388bf5d951..36c882acb75 100644
--- a/src/gallium/auxiliary/cso_cache/cso_context.c
+++ b/src/gallium/auxiliary/cso_cache/cso_context.c
@@ -55,14 +55,14 @@ struct cso_context {
void *samplers[PIPE_MAX_SAMPLERS];
unsigned nr_samplers;
- void *samplers_saved[PIPE_MAX_SAMPLERS];
unsigned nr_samplers_saved;
+ void *samplers_saved[PIPE_MAX_SAMPLERS];
struct pipe_texture *textures[PIPE_MAX_SAMPLERS];
uint nr_textures;
- struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS];
uint nr_textures_saved;
+ struct pipe_texture *textures_saved[PIPE_MAX_SAMPLERS];
/** Current and saved state.
* The saved state is used as a 1-deep stack.
diff --git a/src/gallium/auxiliary/draw/draw_pipe.c b/src/gallium/auxiliary/draw/draw_pipe.c
index 3cde9d36d3a..1c6d657297c 100644
--- a/src/gallium/auxiliary/draw/draw_pipe.c
+++ b/src/gallium/auxiliary/draw/draw_pipe.c
@@ -158,6 +158,60 @@ static void do_triangle( struct draw_context *draw,
+#define QUAD(i0,i1,i2,i3) \
+ do_triangle( draw, \
+ ( DRAW_PIPE_RESET_STIPPLE | \
+ DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_2 ), \
+ verts + stride * elts[i0], \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i3]); \
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i2], \
+ verts + stride * elts[i3])
+
+#define TRIANGLE(flags,i0,i1,i2) \
+ do_triangle( draw, \
+ elts[i0], /* flags */ \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * elts[i1], \
+ verts + stride * elts[i2])
+
+#define LINE(flags,i0,i1) \
+ do_line( draw, \
+ elts[i0], \
+ verts + stride * (elts[i0] & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * elts[i1])
+
+#define POINT(i0) \
+ do_point( draw, \
+ verts + stride * elts[i0] )
+
+#define FUNC pipe_run
+#define ARGS \
+ struct draw_context *draw, \
+ unsigned prim, \
+ struct vertex_header *vertices, \
+ unsigned stride, \
+ const ushort *elts
+
+#define LOCAL_VARS \
+ char *verts = (char *)vertices; \
+ boolean flatfirst = (draw->rasterizer->flatshade && \
+ draw->rasterizer->flatshade_first); \
+ unsigned i; \
+ ushort flags
+
+#define FLUSH
+
+#include "draw_pt_decompose.h"
+#undef ARGS
+#undef LOCAL_VARS
+
+
/* Code to run the pipeline on a fairly arbitary collection of vertices.
*
@@ -178,34 +232,12 @@ void draw_pipeline_run( struct draw_context *draw,
unsigned count )
{
char *verts = (char *)vertices;
- unsigned i;
draw->pipeline.verts = verts;
draw->pipeline.vertex_stride = stride;
draw->pipeline.vertex_count = vertex_count;
- switch (prim) {
- case PIPE_PRIM_POINTS:
- for (i = 0; i < count; i++)
- do_point( draw,
- verts + stride * elts[i] );
- break;
- case PIPE_PRIM_LINES:
- for (i = 0; i+1 < count; i += 2)
- do_line( draw,
- elts[i+0], /* flags */
- verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK),
- verts + stride * elts[i+1]);
- break;
- case PIPE_PRIM_TRIANGLES:
- for (i = 0; i+2 < count; i += 3)
- do_triangle( draw,
- elts[i+0], /* flags */
- verts + stride * (elts[i+0] & ~DRAW_PIPE_FLAG_MASK),
- verts + stride * elts[i+1],
- verts + stride * elts[i+2]);
- break;
- }
+ pipe_run(draw, prim, vertices, stride, elts, count);
draw->pipeline.verts = NULL;
draw->pipeline.vertex_count = 0;
@@ -216,22 +248,22 @@ void draw_pipeline_run( struct draw_context *draw,
( DRAW_PIPE_RESET_STIPPLE | \
DRAW_PIPE_EDGE_FLAG_0 | \
DRAW_PIPE_EDGE_FLAG_2 ), \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
verts + stride * (i3)); \
- do_triangle( draw, \
- ( DRAW_PIPE_EDGE_FLAG_0 | \
- DRAW_PIPE_EDGE_FLAG_1 ), \
- verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i2), \
- verts + stride * (i3))
+ do_triangle( draw, \
+ ( DRAW_PIPE_EDGE_FLAG_0 | \
+ DRAW_PIPE_EDGE_FLAG_1 ), \
+ verts + stride * ((i1) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i2), \
+ verts + stride * (i3))
#define TRIANGLE(flags,i0,i1,i2) \
do_triangle( draw, \
flags, /* flags */ \
- verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
- verts + stride * (i1), \
- verts + stride * (i2))
+ verts + stride * ((i0) & ~DRAW_PIPE_FLAG_MASK), \
+ verts + stride * (i1), \
+ verts + stride * (i2))
#define LINE(flags,i0,i1) \
do_line( draw, \
diff --git a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
index 30a6d2919d9..283502cdf3e 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_pstipple.c
@@ -256,7 +256,10 @@ pstip_transform_inst(struct tgsi_transform_context *ctx,
uint size = 4;
immed = tgsi_default_full_immediate();
immed.Immediate.NrTokens = 1 + size; /* one for the token itself */
- immed.u.Pointer = (void *) value;
+ immed.u[0].Float = value[0];
+ immed.u[1].Float = value[1];
+ immed.u[2].Float = value[2];
+ immed.u[3].Float = value[3];
ctx->emit_immediate(ctx, &immed);
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
index a5d840b96ea..1a5269c0de9 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_vbuf.c
@@ -159,8 +159,19 @@ vbuf_tri( struct draw_stage *stage,
check_space( vbuf, 3 );
- for (i = 0; i < 3; i++) {
- vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
+ if (vbuf->stage.draw->rasterizer->flatshade_first) {
+ /* Put provoking vertex in position expected by the driver.
+ * Emit last provoking vertex in first pos.
+ * Swap verts 0 & 1 to preserve polygon winding.
+ */
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[2] );
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[0] );
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[1] );
+ }
+ else {
+ for (i = 0; i < 3; i++) {
+ vbuf->indices[vbuf->nr_indices++] = emit_vertex( vbuf, prim->v[i] );
+ }
}
}
diff --git a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
index d84bab9eaae..7d76a7dbf39 100644
--- a/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
+++ b/src/gallium/auxiliary/draw/draw_pipe_wide_point.c
@@ -28,6 +28,30 @@
/* Authors: Keith Whitwell <keith@tungstengraphics.com>
*/
+/**
+ * Notes on wide points and sprite mode:
+ *
+ * In wide point/sprite mode we effectively need to convert each incoming
+ * vertex into four outgoing vertices specifying the corners of a quad.
+ * Since we don't (yet) have geometry shaders, we have to handle this here
+ * in the draw module.
+ *
+ * For sprites, it also means that this is where we have to handle texcoords
+ * for the vertices of the quad. OpenGL's GL_COORD_REPLACE state specifies
+ * if/how enabled texcoords are automatically generated for sprites. We pass
+ * that info through gallium in the pipe_rasterizer_state::sprite_coord_mode
+ * array.
+ *
+ * Additionally, GLSL's gl_PointCoord fragment attribute has to be handled
+ * here as well. This is basically an additional texture/generic attribute
+ * that varies .x from 0 to 1 horizontally across the point and varies .y
+ * vertically from 0 to 1 down the sprite.
+ *
+ * With geometry shaders, the state tracker could create a GS to do
+ * most/all of this.
+ */
+
+
#include "util/u_math.h"
#include "util/u_memory.h"
#include "pipe/p_defines.h"
@@ -52,7 +76,7 @@ struct widepoint_stage {
int psize_slot;
- int point_coord_fs_input; /**< input for pointcoord (and fog) */
+ int point_coord_fs_input; /**< input for pointcoord */
};
@@ -64,8 +88,6 @@ widepoint_stage( struct draw_stage *stage )
}
-
-
/**
* Set the vertex texcoords for sprite mode.
* Coords may be left untouched or set to a right-side-up or upside-down
@@ -89,10 +111,12 @@ static void set_texcoords(const struct widepoint_stage *wide,
}
if (wide->point_coord_fs_input >= 0) {
- /* put gl_PointCoord into extra vertex output's zw components */
- uint k = wide->stage.draw->extra_vp_outputs.slot;
- v->data[k][2] = tc[0];
- v->data[k][3] = tc[1];
+ /* put gl_PointCoord into the extra vertex slot */
+ uint slot = wide->stage.draw->extra_vp_outputs.slot;
+ v->data[slot][0] = tc[0];
+ v->data[slot][1] = tc[1];
+ v->data[slot][2] = 0.0F;
+ v->data[slot][3] = 1.0F;
}
}
@@ -181,6 +205,16 @@ static void widepoint_point( struct draw_stage *stage,
}
+static int
+find_pntc_input_attrib(struct draw_context *draw)
+{
+ /* Scan the fragment program's input decls to find the pointcoord
+ * attribute. The xy components will store the point coord.
+ */
+ return 0; /* XXX fix this */
+}
+
+
static void widepoint_first_point( struct draw_stage *stage,
struct prim_header *header )
{
@@ -219,8 +253,8 @@ static void widepoint_first_point( struct draw_stage *stage,
}
wide->num_texcoords = j;
- /* find fragment shader PointCoord/Fog input */
- wide->point_coord_fs_input = 0; /* XXX fix this! */
+ /* find fragment shader PointCoord input */
+ wide->point_coord_fs_input = find_pntc_input_attrib(draw);
/* setup extra vp output (point coord implemented as a texcoord) */
draw->extra_vp_outputs.semantic_name = TGSI_SEMANTIC_GENERIC;
diff --git a/src/gallium/auxiliary/draw/draw_private.h b/src/gallium/auxiliary/draw/draw_private.h
index 81e4eae401c..41fcb16a0a5 100644
--- a/src/gallium/auxiliary/draw/draw_private.h
+++ b/src/gallium/auxiliary/draw/draw_private.h
@@ -44,7 +44,6 @@
#include "pipe/p_state.h"
#include "pipe/p_defines.h"
-#include "tgsi/tgsi_exec.h"
#include "tgsi/tgsi_scan.h"
@@ -55,6 +54,8 @@ struct draw_vertex_shader;
struct draw_context;
struct draw_stage;
struct vbuf_render;
+struct tgsi_exec_machine;
+struct tgsi_sampler;
/**
@@ -185,7 +186,7 @@ struct draw_context
uint position_output;
/** TGSI program interpreter runtime state */
- struct tgsi_exec_machine machine;
+ struct tgsi_exec_machine *machine;
uint num_samplers;
struct tgsi_sampler **samplers;
diff --git a/src/gallium/auxiliary/draw/draw_pt.c b/src/gallium/auxiliary/draw/draw_pt.c
index 9ea0cbe5990..dbb5ac71821 100644
--- a/src/gallium/auxiliary/draw/draw_pt.c
+++ b/src/gallium/auxiliary/draw/draw_pt.c
@@ -36,6 +36,7 @@
#include "draw/draw_vs.h"
#include "tgsi/tgsi_dump.h"
#include "util/u_math.h"
+#include "util/u_prim.h"
static unsigned trim( unsigned count, unsigned first, unsigned incr )
{
@@ -278,7 +279,7 @@ void
draw_arrays(struct draw_context *draw, unsigned prim,
unsigned start, unsigned count)
{
- unsigned reduced_prim = draw_pt_reduced_prim(prim);
+ unsigned reduced_prim = u_reduced_prim(prim);
if (reduced_prim != draw->reduced_prim) {
draw_do_flush( draw, DRAW_FLUSH_STATE_CHANGE );
draw->reduced_prim = reduced_prim;
diff --git a/src/gallium/auxiliary/draw/draw_pt.h b/src/gallium/auxiliary/draw/draw_pt.h
index 8ef0ea8011f..7a17a9fb6b2 100644
--- a/src/gallium/auxiliary/draw/draw_pt.h
+++ b/src/gallium/auxiliary/draw/draw_pt.h
@@ -228,7 +228,6 @@ void draw_pt_post_vs_destroy( struct pt_post_vs *pvs );
* Utils:
*/
void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr);
-unsigned draw_pt_reduced_prim(unsigned prim);
#endif
diff --git a/src/gallium/auxiliary/draw/draw_pt_decompose.h b/src/gallium/auxiliary/draw/draw_pt_decompose.h
index 3fb06956878..4ca5b520204 100644
--- a/src/gallium/auxiliary/draw/draw_pt_decompose.h
+++ b/src/gallium/auxiliary/draw/draw_pt_decompose.h
@@ -47,10 +47,19 @@ static void FUNC( ARGS,
case PIPE_PRIM_TRIANGLES:
for (i = 0; i+2 < count; i += 3) {
- TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
- (i + 1),
- (i + 2 ));
+ if (flatfirst) {
+ /* put provoking vertex in last pos for clipper */
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 1),
+ (i + 2),
+ (i + 0 ));
+ }
+ else {
+ TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
+ (i + 0),
+ (i + 1),
+ (i + 2 ));
+ }
}
break;
@@ -58,9 +67,9 @@ static void FUNC( ARGS,
if (flatfirst) {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 0),
(i + 1 + (i&1)),
- (i + 2 - (i&1)));
+ (i + 2 - (i&1)),
+ (i + 0) );
}
}
else {
@@ -78,9 +87,9 @@ static void FUNC( ARGS,
if (flatfirst) {
for (i = 0; i+2 < count; i++) {
TRIANGLE( DRAW_PIPE_RESET_STIPPLE | DRAW_PIPE_EDGE_FLAG_ALL,
- (i + 1),
(i + 2),
- (0 ));
+ 0,
+ (i + 1) );
}
}
else {
@@ -96,20 +105,40 @@ static void FUNC( ARGS,
case PIPE_PRIM_QUADS:
- for (i = 0; i+3 < count; i += 4) {
- QUAD( (i + 0),
- (i + 1),
- (i + 2),
- (i + 3));
+ if (flatfirst) {
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 1),
+ (i + 2),
+ (i + 3),
+ (i + 0) );
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 4) {
+ QUAD( (i + 0),
+ (i + 1),
+ (i + 2),
+ (i + 3));
+ }
}
break;
case PIPE_PRIM_QUAD_STRIP:
- for (i = 0; i+3 < count; i += 2) {
- QUAD( (i + 2),
- (i + 0),
- (i + 1),
- (i + 3));
+ if (flatfirst) {
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 1),
+ (i + 3),
+ (i + 2),
+ (i + 0) );
+ }
+ }
+ else {
+ for (i = 0; i+3 < count; i += 2) {
+ QUAD( (i + 2),
+ (i + 0),
+ (i + 1),
+ (i + 3));
+ }
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_pt_elts.c b/src/gallium/auxiliary/draw/draw_pt_elts.c
index b7780fb5073..88f4d9f495a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_elts.c
+++ b/src/gallium/auxiliary/draw/draw_pt_elts.c
@@ -54,7 +54,8 @@ static unsigned elt_ubyte( const void *elts, unsigned idx )
static unsigned elt_vert( const void *elts, unsigned idx )
{
- return (const ubyte *)elts - (const ubyte *)NULL + idx;
+ /* unsigned index is packed in the pointer */
+ return (unsigned)(uintptr_t)elts + idx;
}
pt_elt_func draw_pt_elt_func( struct draw_context *draw )
diff --git a/src/gallium/auxiliary/draw/draw_pt_util.c b/src/gallium/auxiliary/draw/draw_pt_util.c
index 3bc7939c556..b61fa291436 100644
--- a/src/gallium/auxiliary/draw/draw_pt_util.c
+++ b/src/gallium/auxiliary/draw/draw_pt_util.c
@@ -75,28 +75,3 @@ void draw_pt_split_prim(unsigned prim, unsigned *first, unsigned *incr)
break;
}
}
-
-
-unsigned draw_pt_reduced_prim(unsigned prim)
-{
- switch (prim) {
- case PIPE_PRIM_POINTS:
- return PIPE_PRIM_POINTS;
- case PIPE_PRIM_LINES:
- case PIPE_PRIM_LINE_STRIP:
- case PIPE_PRIM_LINE_LOOP:
- return PIPE_PRIM_LINES;
- case PIPE_PRIM_TRIANGLES:
- case PIPE_PRIM_TRIANGLE_STRIP:
- case PIPE_PRIM_TRIANGLE_FAN:
- case PIPE_PRIM_POLYGON:
- case PIPE_PRIM_QUADS:
- case PIPE_PRIM_QUAD_STRIP:
- return PIPE_PRIM_TRIANGLES;
- default:
- assert(0);
- return PIPE_PRIM_POINTS;
- }
-}
-
-
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache.c b/src/gallium/auxiliary/draw/draw_pt_vcache.c
index 5d268a22264..1a0527be63a 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache.c
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache.c
@@ -31,6 +31,7 @@
*/
#include "util/u_memory.h"
+#include "util/u_prim.h"
#include "draw/draw_context.h"
#include "draw/draw_private.h"
#include "draw/draw_pt.h"
@@ -193,16 +194,30 @@ vcache_ef_quad( struct vcache_frontend *vcache,
unsigned i2,
unsigned i3 )
{
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_RESET_STIPPLE |
- DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_2 ),
- i0, i1, i3 );
-
- vcache_triangle_flags( vcache,
- ( DRAW_PIPE_EDGE_FLAG_0 |
- DRAW_PIPE_EDGE_FLAG_1 ),
- i1, i2, i3 );
+ if (vcache->draw->rasterizer->flatshade_first) {
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i0, i1, i2 );
+
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_2 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i0, i2, i3 );
+ }
+ else {
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_RESET_STIPPLE |
+ DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_2 ),
+ i0, i1, i3 );
+
+ vcache_triangle_flags( vcache,
+ ( DRAW_PIPE_EDGE_FLAG_0 |
+ DRAW_PIPE_EDGE_FLAG_1 ),
+ i1, i2, i3 );
+ }
}
/* At least for now, we're back to using a template include file for
@@ -453,7 +468,7 @@ vcache_prepare( struct draw_pt_front_end *frontend,
}
vcache->input_prim = prim;
- vcache->output_prim = draw_pt_reduced_prim(prim);
+ vcache->output_prim = u_reduced_prim(prim);
vcache->middle = middle;
vcache->opt = opt;
diff --git a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
index ec05bbeab40..62822a3d562 100644
--- a/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
+++ b/src/gallium/auxiliary/draw/draw_pt_vcache_tmp.h
@@ -118,21 +118,39 @@ static void FUNC( struct draw_pt_front_end *frontend,
case PIPE_PRIM_QUADS:
for (i = 0; i+3 < count; i += 4) {
- QUAD( vcache,
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, i + 3));
+ if (flatfirst) {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 3) );
+ }
+ else {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 3) );
+ }
}
break;
case PIPE_PRIM_QUAD_STRIP:
for (i = 0; i+3 < count; i += 2) {
- QUAD( vcache,
- get_elt(elts, i + 2),
- get_elt(elts, i + 0),
- get_elt(elts, i + 1),
- get_elt(elts, i + 3));
+ if (flatfirst) {
+ QUAD( vcache,
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 3),
+ get_elt(elts, i + 2) );
+ }
+ else {
+ QUAD( vcache,
+ get_elt(elts, i + 2),
+ get_elt(elts, i + 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 3) );
+ }
}
break;
@@ -144,19 +162,38 @@ static void FUNC( struct draw_pt_front_end *frontend,
const ushort edge_first = DRAW_PIPE_EDGE_FLAG_2;
const ushort edge_middle = DRAW_PIPE_EDGE_FLAG_0;
const ushort edge_last = DRAW_PIPE_EDGE_FLAG_1;
+ ushort edge_next, edge_finish;
- flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ if (flatfirst) {
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_middle | edge_last;
+ edge_next = edge_last;
+ edge_finish = edge_first;
+ }
+ else {
+ flags = DRAW_PIPE_RESET_STIPPLE | edge_first | edge_middle;
+ edge_next = edge_middle;
+ edge_finish = edge_last;
+ }
- for (i = 0; i+2 < count; i++, flags = edge_middle) {
+ for (i = 0; i+2 < count; i++, flags = edge_next) {
if (i + 3 == count)
- flags |= edge_last;
+ flags |= edge_finish;
- TRIANGLE( vcache,
- flags,
- get_elt(elts, i + 1),
- get_elt(elts, i + 2),
- get_elt(elts, 0));
+ if (flatfirst) {
+ TRIANGLE( vcache,
+ flags,
+ get_elt(elts, 0),
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2) );
+ }
+ else {
+ TRIANGLE( vcache,
+ flags,
+ get_elt(elts, i + 1),
+ get_elt(elts, i + 2),
+ get_elt(elts, 0));
+ }
}
}
break;
diff --git a/src/gallium/auxiliary/draw/draw_vs.c b/src/gallium/auxiliary/draw/draw_vs.c
index c057cd67fde..790e89ed820 100644
--- a/src/gallium/auxiliary/draw/draw_vs.c
+++ b/src/gallium/auxiliary/draw/draw_vs.c
@@ -43,6 +43,8 @@
#include "translate/translate.h"
#include "translate/translate_cache.h"
+#include "tgsi/tgsi_exec.h"
+
@@ -146,16 +148,8 @@ draw_delete_vertex_shader(struct draw_context *draw,
boolean
draw_vs_init( struct draw_context *draw )
{
- tgsi_exec_machine_init(&draw->vs.machine);
-
- /* FIXME: give this machine thing a proper constructor:
- */
- draw->vs.machine.Inputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- if (!draw->vs.machine.Inputs)
- return FALSE;
-
- draw->vs.machine.Outputs = align_malloc(PIPE_MAX_ATTRIBS * sizeof(struct tgsi_exec_vector), 16);
- if (!draw->vs.machine.Outputs)
+ draw->vs.machine = tgsi_exec_machine_create();
+ if (!draw->vs.machine)
return FALSE;
draw->vs.emit_cache = translate_cache_create();
@@ -178,12 +172,6 @@ draw_vs_init( struct draw_context *draw )
void
draw_vs_destroy( struct draw_context *draw )
{
- if (draw->vs.machine.Inputs)
- align_free(draw->vs.machine.Inputs);
-
- if (draw->vs.machine.Outputs)
- align_free(draw->vs.machine.Outputs);
-
if (draw->vs.fetch_cache)
translate_cache_destroy(draw->vs.fetch_cache);
@@ -196,8 +184,7 @@ draw_vs_destroy( struct draw_context *draw )
if (draw->vs.aligned_constant_storage)
align_free((void*)draw->vs.aligned_constant_storage);
- tgsi_exec_machine_free_data(&draw->vs.machine);
-
+ tgsi_exec_machine_destroy(draw->vs.machine);
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_aos.c b/src/gallium/auxiliary/draw/draw_vs_aos.c
index 9e37a26c1e2..62e04a65f30 100644
--- a/src/gallium/auxiliary/draw/draw_vs_aos.c
+++ b/src/gallium/auxiliary/draw/draw_vs_aos.c
@@ -1758,24 +1758,24 @@ emit_instruction( struct aos_compilation *cp,
case TGSI_OPCODE_SUB:
return emit_SUB(cp, inst);
- case TGSI_OPCODE_LERP:
+ case TGSI_OPCODE_LRP:
// return emit_LERP(cp, inst);
return FALSE;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
return emit_FRC(cp, inst);
case TGSI_OPCODE_CLAMP:
// return emit_CLAMP(cp, inst);
return FALSE;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
return emit_FLR(cp, inst);
case TGSI_OPCODE_ROUND:
return emit_RND(cp, inst);
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
#if FAST_MATH
return emit_EXPBASE2(cp, inst);
#elif 0
@@ -1787,13 +1787,13 @@ emit_instruction( struct aos_compilation *cp,
return FALSE;
#endif
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
return emit_LG2(cp, inst);
- case TGSI_OPCODE_POWER:
+ case TGSI_OPCODE_POW:
return emit_POW(cp, inst);
- case TGSI_OPCODE_CROSSPRODUCT:
+ case TGSI_OPCODE_XPD:
return emit_XPD(cp, inst);
case TGSI_OPCODE_ABS:
@@ -1891,8 +1891,9 @@ static boolean note_immediate( struct aos_compilation *cp,
unsigned pos = cp->num_immediates++;
unsigned j;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (j = 0; j < imm->Immediate.NrTokens - 1; j++) {
- cp->vaos->machine->immediate[pos][j] = imm->u.ImmediateFloat32[j].Float;
+ cp->vaos->machine->immediate[pos][j] = imm->u[j].Float;
}
return TRUE;
diff --git a/src/gallium/auxiliary/draw/draw_vs_exec.c b/src/gallium/auxiliary/draw/draw_vs_exec.c
index f2368dde5c2..41cc8026131 100644
--- a/src/gallium/auxiliary/draw/draw_vs_exec.c
+++ b/src/gallium/auxiliary/draw/draw_vs_exec.c
@@ -41,6 +41,7 @@
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_scan.h"
+#include "tgsi/tgsi_exec.h"
struct exec_vertex_shader {
@@ -201,7 +202,7 @@ draw_create_vs_exec(struct draw_context *draw,
vs->base.run_linear = vs_exec_run_linear;
vs->base.delete = vs_exec_delete;
vs->base.create_varient = draw_vs_varient_generic;
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
return &vs->base;
}
diff --git a/src/gallium/auxiliary/draw/draw_vs_llvm.c b/src/gallium/auxiliary/draw/draw_vs_llvm.c
index 727977bc3af..b3535c0e48e 100644
--- a/src/gallium/auxiliary/draw/draw_vs_llvm.c
+++ b/src/gallium/auxiliary/draw/draw_vs_llvm.c
@@ -119,7 +119,7 @@ draw_create_vs_llvm(struct draw_context *draw,
vs->base.create_varient = draw_vs_varient_generic;
vs->base.run_linear = vs_llvm_run_linear;
vs->base.delete = vs_llvm_delete;
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
{
struct gallivm_ir *ir = gallivm_ir_new(GALLIVM_VS);
diff --git a/src/gallium/auxiliary/draw/draw_vs_ppc.c b/src/gallium/auxiliary/draw/draw_vs_ppc.c
index d35db57d571..ad184bd696d 100644
--- a/src/gallium/auxiliary/draw/draw_vs_ppc.c
+++ b/src/gallium/auxiliary/draw/draw_vs_ppc.c
@@ -48,6 +48,7 @@
#include "rtasm/rtasm_ppc.h"
#include "tgsi/tgsi_ppc.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
diff --git a/src/gallium/auxiliary/draw/draw_vs_sse.c b/src/gallium/auxiliary/draw/draw_vs_sse.c
index 77ba5152f9f..702051387ac 100644
--- a/src/gallium/auxiliary/draw/draw_vs_sse.c
+++ b/src/gallium/auxiliary/draw/draw_vs_sse.c
@@ -48,27 +48,16 @@
#include "rtasm/rtasm_x86sse.h"
#include "tgsi/tgsi_sse2.h"
#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_exec.h"
#define SSE_MAX_VERTICES 4
-typedef void (PIPE_CDECL *codegen_function) (
- const struct tgsi_exec_vector *input, /* 1 */
- struct tgsi_exec_vector *output, /* 2 */
- float (*constant)[4], /* 3 */
- struct tgsi_exec_vector *temporary, /* 4 */
- float (*immediates)[4], /* 5 */
- const float (*aos_input)[4], /* 6 */
- uint num_inputs, /* 7 */
- uint input_stride, /* 8 */
- float (*aos_output)[4], /* 9 */
- uint num_outputs, /* 10 */
- uint output_stride ); /* 11 */
struct draw_sse_vertex_shader {
struct draw_vertex_shader base;
struct x86_function sse2_program;
- codegen_function func;
+ tgsi_sse2_vs_func func;
struct tgsi_exec_machine *machine;
};
@@ -78,6 +67,10 @@ static void
vs_sse_prepare( struct draw_vertex_shader *base,
struct draw_context *draw )
{
+ struct draw_sse_vertex_shader *shader = (struct draw_sse_vertex_shader *)base;
+ struct tgsi_exec_machine *machine = shader->machine;
+
+ machine->Samplers = draw->vs.samplers;
}
@@ -118,11 +111,9 @@ vs_sse_run_linear( struct draw_vertex_shader *base,
/* run compiled shader
*/
- shader->func(machine->Inputs,
- machine->Outputs,
- (float (*)[4])constants,
- machine->Temps,
- (float (*)[4])shader->base.immediates,
+ shader->func(machine,
+ constants,
+ shader->base.immediates,
input,
base->info.num_inputs,
input_stride,
@@ -184,7 +175,7 @@ draw_create_vs_sse(struct draw_context *draw,
vs->base.immediates = align_malloc(TGSI_EXEC_NUM_IMMEDIATES * 4 *
sizeof(float), 16);
- vs->machine = &draw->vs.machine;
+ vs->machine = draw->vs.machine;
x86_init_func( &vs->sse2_program );
@@ -194,7 +185,7 @@ draw_create_vs_sse(struct draw_context *draw,
TRUE ))
goto fail;
- vs->func = (codegen_function) x86_get_func( &vs->sse2_program );
+ vs->func = (tgsi_sse2_vs_func) x86_get_func( &vs->sse2_program );
if (!vs->func) {
goto fail;
}
diff --git a/src/gallium/auxiliary/gallivm/instructionssoa.cpp b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
index 2d2af3085e6..721b7d2d833 100644
--- a/src/gallium/auxiliary/gallivm/instructionssoa.cpp
+++ b/src/gallium/auxiliary/gallivm/instructionssoa.cpp
@@ -128,7 +128,7 @@ void InstructionsSoa::createFunctionMap()
m_functionsMap[TGSI_OPCODE_DP4] = "dp4";
m_functionsMap[TGSI_OPCODE_MIN] = "min";
m_functionsMap[TGSI_OPCODE_MAX] = "max";
- m_functionsMap[TGSI_OPCODE_POWER] = "pow";
+ m_functionsMap[TGSI_OPCODE_POW] = "pow";
m_functionsMap[TGSI_OPCODE_LIT] = "lit";
m_functionsMap[TGSI_OPCODE_RSQ] = "rsq";
m_functionsMap[TGSI_OPCODE_SLT] = "slt";
@@ -311,7 +311,7 @@ std::vector<llvm::Value*> InstructionsSoa::mul(const std::vector<llvm::Value*> i
std::vector<llvm::Value*> InstructionsSoa::pow(const std::vector<llvm::Value*> in1,
const std::vector<llvm::Value*> in2)
{
- llvm::Function *func = function(TGSI_OPCODE_POWER);
+ llvm::Function *func = function(TGSI_OPCODE_POW);
return callBuiltin(func, in1, in2);
}
diff --git a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
index 5b08200d142..bf84401e112 100644
--- a/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
+++ b/src/gallium/auxiliary/gallivm/tgsitollvm.cpp
@@ -160,10 +160,11 @@ translate_immediate(Storage *storage,
{
float vec[4];
int i;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
+ vec[i] = imm->u[i].Float;
break;
default:
assert(0);
@@ -179,10 +180,11 @@ translate_immediateir(StorageSoa *storage,
{
float vec[4];
int i;
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; ++i) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- vec[i] = imm->u.ImmediateFloat32[i].Float;
+ vec[i] = imm->u[i].Float;
break;
default:
assert(0);
@@ -336,7 +338,7 @@ translate_instruction(llvm::Module *module,
out = instr->sub(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_LERP: {
+ case TGSI_OPCODE_LRP: {
out = instr->lerp(inputs[0], inputs[1], inputs[2]);
}
break;
@@ -348,17 +350,11 @@ translate_instruction(llvm::Module *module,
out = instr->cnd0(inputs[0], inputs[1], inputs[2]);
}
break;
- case TGSI_OPCODE_DOT2ADD: {
+ case TGSI_OPCODE_DP2A: {
out = instr->dot2add(inputs[0], inputs[1], inputs[2]);
}
break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE: {
- out = instr->neg(inputs[0]);
- }
- break;
- case TGSI_OPCODE_FRAC: {
+ case TGSI_OPCODE_FRC: {
out = instr->frc(inputs[0]);
}
break;
@@ -366,30 +362,28 @@ translate_instruction(llvm::Module *module,
out = instr->clamp(inputs[0]);
}
break;
- case TGSI_OPCODE_FLOOR: {
+ case TGSI_OPCODE_FLR: {
out = instr->floor(inputs[0]);
}
break;
case TGSI_OPCODE_ROUND:
break;
- case TGSI_OPCODE_EXPBASE2: {
+ case TGSI_OPCODE_EX2: {
out = instr->ex2(inputs[0]);
}
break;
- case TGSI_OPCODE_LOGBASE2: {
+ case TGSI_OPCODE_LG2: {
out = instr->lg2(inputs[0]);
}
break;
- case TGSI_OPCODE_POWER: {
+ case TGSI_OPCODE_POW: {
out = instr->pow(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_CROSSPRODUCT: {
+ case TGSI_OPCODE_XPD: {
out = instr->cross(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
case TGSI_OPCODE_ABS: {
out = instr->abs(inputs[0]);
}
@@ -522,7 +516,7 @@ translate_instruction(llvm::Module *module,
return; //just update the state
}
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
break;
case TGSI_OPCODE_REP:
break;
@@ -538,7 +532,7 @@ translate_instruction(llvm::Module *module,
return; //just update the state
}
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
break;
case TGSI_OPCODE_ENDREP:
break;
@@ -580,7 +574,7 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_ENDPRIM:
break;
- case TGSI_OPCODE_BGNLOOP2: {
+ case TGSI_OPCODE_BGNLOOP: {
instr->beginLoop();
storage->setCurrentBlock(instr->currentBlock());
return;
@@ -593,7 +587,7 @@ translate_instruction(llvm::Module *module,
return;
}
break;
- case TGSI_OPCODE_ENDLOOP2: {
+ case TGSI_OPCODE_ENDLOOP: {
instr->endLoop();
storage->setCurrentBlock(instr->currentBlock());
return;
@@ -617,14 +611,6 @@ translate_instruction(llvm::Module *module,
break;
case TGSI_OPCODE_NOP:
break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
case TGSI_OPCODE_CALLNZ:
break;
case TGSI_OPCODE_IFC:
@@ -778,44 +764,38 @@ translate_instructionir(llvm::Module *module,
out = instr->sub(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_LERP: {
+ case TGSI_OPCODE_LRP: {
}
break;
case TGSI_OPCODE_CND:
break;
case TGSI_OPCODE_CND0:
break;
- case TGSI_OPCODE_DOT2ADD:
+ case TGSI_OPCODE_DP2A:
break;
- case TGSI_OPCODE_INDEX:
- break;
- case TGSI_OPCODE_NEGATE:
- break;
- case TGSI_OPCODE_FRAC: {
+ case TGSI_OPCODE_FRC: {
}
break;
case TGSI_OPCODE_CLAMP:
break;
- case TGSI_OPCODE_FLOOR: {
+ case TGSI_OPCODE_FLR: {
}
break;
case TGSI_OPCODE_ROUND:
break;
- case TGSI_OPCODE_EXPBASE2: {
+ case TGSI_OPCODE_EX2: {
}
break;
- case TGSI_OPCODE_LOGBASE2: {
+ case TGSI_OPCODE_LG2: {
}
break;
- case TGSI_OPCODE_POWER: {
+ case TGSI_OPCODE_POW: {
out = instr->pow(inputs[0], inputs[1]);
}
break;
- case TGSI_OPCODE_CROSSPRODUCT: {
+ case TGSI_OPCODE_XPD: {
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- break;
case TGSI_OPCODE_ABS: {
out = instr->abs(inputs[0]);
}
@@ -910,7 +890,7 @@ translate_instructionir(llvm::Module *module,
case TGSI_OPCODE_IF: {
}
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
break;
case TGSI_OPCODE_REP:
break;
@@ -920,7 +900,7 @@ translate_instructionir(llvm::Module *module,
case TGSI_OPCODE_ENDIF: {
}
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
break;
case TGSI_OPCODE_ENDREP:
break;
@@ -961,13 +941,13 @@ translate_instructionir(llvm::Module *module,
break;
case TGSI_OPCODE_ENDPRIM:
break;
- case TGSI_OPCODE_BGNLOOP2: {
+ case TGSI_OPCODE_BGNLOOP: {
}
break;
case TGSI_OPCODE_BGNSUB: {
}
break;
- case TGSI_OPCODE_ENDLOOP2: {
+ case TGSI_OPCODE_ENDLOOP: {
}
break;
case TGSI_OPCODE_ENDSUB: {
@@ -983,14 +963,6 @@ translate_instructionir(llvm::Module *module,
break;
case TGSI_OPCODE_NOP:
break;
- case TGSI_OPCODE_M4X3:
- break;
- case TGSI_OPCODE_M3X4:
- break;
- case TGSI_OPCODE_M3X3:
- break;
- case TGSI_OPCODE_M3X2:
- break;
case TGSI_OPCODE_NRM4:
break;
case TGSI_OPCODE_CALLNZ:
diff --git a/src/gallium/auxiliary/indices/u_indices.c b/src/gallium/auxiliary/indices/u_indices.c
index 0cf7d88653c..03d7453f303 100644
--- a/src/gallium/auxiliary/indices/u_indices.c
+++ b/src/gallium/auxiliary/indices/u_indices.c
@@ -244,7 +244,7 @@ int u_index_generator( unsigned hw_mask,
default:
assert(0);
- *out_generate = generate[out_idx][in_pv][out_pv][prim];
+ *out_generate = generate[out_idx][in_pv][out_pv][PIPE_PRIM_POINTS];
*out_prim = PIPE_PRIM_POINTS;
*out_nr = nr;
return U_TRANSLATE_ERROR;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer.h b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
index 92b6fd00564..2590546cb4a 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer.h
@@ -72,6 +72,12 @@ struct pb_desc
/**
+ * Size. Regular (32bit) unsigned for now.
+ */
+typedef unsigned pb_size;
+
+
+/**
* Base class for all pb_* buffers.
*/
struct pb_buffer
@@ -126,7 +132,7 @@ struct pb_vtbl
*/
void (*get_base_buffer)( struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset );
+ pb_size *offset );
};
@@ -177,7 +183,7 @@ pb_unmap(struct pb_buffer *buf)
static INLINE void
pb_get_base_buffer( struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset )
+ pb_size *offset )
{
assert(buf);
if(!buf) {
@@ -241,7 +247,7 @@ pb_reference(struct pb_buffer **dst,
* the requested or not.
*/
static INLINE boolean
-pb_check_alignment(size_t requested, size_t provided)
+pb_check_alignment(pb_size requested, pb_size provided)
{
if(!requested)
return TRUE;
@@ -269,7 +275,7 @@ pb_check_usage(unsigned requested, unsigned provided)
* hardware.
*/
struct pb_buffer *
-pb_malloc_buffer_create(size_t size,
+pb_malloc_buffer_create(pb_size size,
const struct pb_desc *desc);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
index 4698efa69cf..109ac7c9d63 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_fenced.c
@@ -65,11 +65,11 @@ struct fenced_buffer_list
struct pb_fence_ops *ops;
- size_t numDelayed;
+ pb_size numDelayed;
struct list_head delayed;
#ifdef DEBUG
- size_t numUnfenced;
+ pb_size numUnfenced;
struct list_head unfenced;
#endif
};
@@ -433,7 +433,7 @@ fenced_buffer_fence(struct pb_buffer *buf,
static void
fenced_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct fenced_buffer *fenced_buf = fenced_buffer(buf);
pb_get_base_buffer(fenced_buf->buffer, base_buf, offset);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
index 689fd74771f..6bdce5fcb06 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_buffer_malloc.c
@@ -53,6 +53,8 @@ static INLINE struct malloc_buffer *
malloc_buffer(struct pb_buffer *buf)
{
assert(buf);
+ if (!buf)
+ return NULL;
assert(buf->vtbl == &malloc_buffer_vtbl);
return (struct malloc_buffer *)buf;
}
@@ -102,7 +104,7 @@ malloc_buffer_fence(struct pb_buffer *buf,
static void
malloc_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
*base_buf = buf;
*offset = 0;
@@ -121,7 +123,7 @@ malloc_buffer_vtbl = {
struct pb_buffer *
-pb_malloc_buffer_create(size_t size,
+pb_malloc_buffer_create(pb_size size,
const struct pb_desc *desc)
{
struct malloc_buffer *buf;
@@ -150,7 +152,7 @@ pb_malloc_buffer_create(size_t size,
static struct pb_buffer *
pb_malloc_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
return pb_malloc_buffer_create(size, desc);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
index 74077f82774..39ab8e722c1 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr.h
@@ -73,7 +73,7 @@ struct pb_manager
struct pb_buffer *
(*create_buffer)( struct pb_manager *mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc);
/**
@@ -106,7 +106,7 @@ pb_malloc_bufmgr_create(void);
*/
struct pb_manager *
pool_bufmgr_create(struct pb_manager *provider,
- size_t n, size_t size,
+ pb_size n, pb_size size,
const struct pb_desc *desc);
@@ -119,7 +119,7 @@ pool_bufmgr_create(struct pb_manager *provider,
*/
struct pb_manager *
mm_bufmgr_create(struct pb_manager *provider,
- size_t size, size_t align2);
+ pb_size size, pb_size align2);
/**
* Same as mm_bufmgr_create.
@@ -128,7 +128,7 @@ mm_bufmgr_create(struct pb_manager *provider,
*/
struct pb_manager *
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
- size_t size, size_t align2);
+ pb_size size, pb_size align2);
/**
@@ -136,8 +136,8 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
*/
struct pb_manager *
pb_slab_manager_create(struct pb_manager *provider,
- size_t bufSize,
- size_t slabSize,
+ pb_size bufSize,
+ pb_size slabSize,
const struct pb_desc *desc);
/**
@@ -146,9 +146,9 @@ pb_slab_manager_create(struct pb_manager *provider,
*/
struct pb_manager *
pb_slab_range_manager_create(struct pb_manager *provider,
- size_t minBufSize,
- size_t maxBufSize,
- size_t slabSize,
+ pb_size minBufSize,
+ pb_size maxBufSize,
+ pb_size slabSize,
const struct pb_desc *desc);
@@ -204,7 +204,7 @@ pb_ondemand_manager_create(struct pb_manager *provider);
*/
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
- size_t underflow_size, size_t overflow_size);
+ pb_size underflow_size, pb_size overflow_size);
#ifdef __cplusplus
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
index db67d46c561..f60c836f18b 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_alt.c
@@ -60,7 +60,7 @@ pb_alt_manager(struct pb_manager *mgr)
static struct pb_buffer *
pb_alt_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_alt_manager *mgr = pb_alt_manager(_mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
index 35358430b43..57d1ede45a4 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_cache.c
@@ -81,7 +81,7 @@ struct pb_cache_manager
pipe_mutex mutex;
struct list_head delayed;
- size_t numDelayed;
+ pb_size numDelayed;
};
@@ -204,7 +204,7 @@ pb_cache_buffer_fence(struct pb_buffer *_buf,
static void
pb_cache_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct pb_cache_buffer *buf = pb_cache_buffer(_buf);
pb_get_base_buffer(buf->buffer, base_buf, offset);
@@ -224,7 +224,7 @@ pb_cache_buffer_vtbl = {
static INLINE boolean
pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
if(buf->base.base.size < size)
@@ -246,7 +246,7 @@ pb_cache_is_buffer_compat(struct pb_cache_buffer *buf,
static struct pb_buffer *
pb_cache_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_cache_manager *mgr = pb_cache_manager(_mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
index cedf745bdac..1b4df28c707 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_debug.c
@@ -72,8 +72,8 @@ struct pb_debug_buffer
struct pb_buffer *buffer;
struct pb_debug_manager *mgr;
- size_t underflow_size;
- size_t overflow_size;
+ pb_size underflow_size;
+ pb_size overflow_size;
struct debug_stack_frame create_backtrace[PB_DEBUG_CREATE_BACKTRACE];
@@ -91,8 +91,8 @@ struct pb_debug_manager
struct pb_manager *provider;
- size_t underflow_size;
- size_t overflow_size;
+ pb_size underflow_size;
+ pb_size overflow_size;
pipe_mutex mutex;
struct list_head list;
@@ -124,9 +124,9 @@ static const uint8_t random_pattern[32] = {
static INLINE void
-fill_random_pattern(uint8_t *dst, size_t size)
+fill_random_pattern(uint8_t *dst, pb_size size)
{
- size_t i = 0;
+ pb_size i = 0;
while(size--) {
*dst++ = random_pattern[i++];
i &= sizeof(random_pattern) - 1;
@@ -135,11 +135,11 @@ fill_random_pattern(uint8_t *dst, size_t size)
static INLINE boolean
-check_random_pattern(const uint8_t *dst, size_t size,
- size_t *min_ofs, size_t *max_ofs)
+check_random_pattern(const uint8_t *dst, pb_size size,
+ pb_size *min_ofs, pb_size *max_ofs)
{
boolean result = TRUE;
- size_t i;
+ pb_size i;
*min_ofs = size;
*max_ofs = 0;
for(i = 0; i < size; ++i) {
@@ -183,7 +183,7 @@ pb_debug_buffer_check(struct pb_debug_buffer *buf)
assert(map);
if(map) {
boolean underflow, overflow;
- size_t min_ofs, max_ofs;
+ pb_size min_ofs, max_ofs;
underflow = !check_random_pattern(map, buf->underflow_size,
&min_ofs, &max_ofs);
@@ -287,7 +287,7 @@ pb_debug_buffer_unmap(struct pb_buffer *_buf)
static void
pb_debug_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct pb_debug_buffer *buf = pb_debug_buffer(_buf);
pb_get_base_buffer(buf->buffer, base_buf, offset);
@@ -363,13 +363,13 @@ pb_debug_manager_dump(struct pb_debug_manager *mgr)
static struct pb_buffer *
pb_debug_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_debug_manager *mgr = pb_debug_manager(_mgr);
struct pb_debug_buffer *buf;
struct pb_desc real_desc;
- size_t real_size;
+ pb_size real_size;
buf = CALLOC_STRUCT(pb_debug_buffer);
if(!buf)
@@ -455,7 +455,7 @@ pb_debug_manager_destroy(struct pb_manager *_mgr)
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
- size_t underflow_size, size_t overflow_size)
+ pb_size underflow_size, pb_size overflow_size)
{
struct pb_debug_manager *mgr;
@@ -485,7 +485,7 @@ pb_debug_manager_create(struct pb_manager *provider,
struct pb_manager *
pb_debug_manager_create(struct pb_manager *provider,
- size_t underflow_size, size_t overflow_size)
+ pb_size underflow_size, pb_size overflow_size)
{
return provider;
}
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
index 144db5669b6..97dd1427fda 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_fenced.c
@@ -62,7 +62,7 @@ fenced_pb_manager(struct pb_manager *mgr)
static struct pb_buffer *
fenced_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct fenced_pb_manager *fenced_mgr = fenced_pb_manager(mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
index d8decba49df..6400fc5b0a3 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_mm.c
@@ -55,10 +55,10 @@ struct mm_pb_manager
pipe_mutex mutex;
- size_t size;
+ pb_size size;
struct mem_block *heap;
- size_t align2;
+ pb_size align2;
struct pb_buffer *buffer;
void *map;
@@ -148,7 +148,7 @@ mm_buffer_fence(struct pb_buffer *buf,
static void
mm_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct mm_buffer *mm_buf = mm_buffer(buf);
struct mm_pb_manager *mm = mm_buf->mgr;
@@ -170,15 +170,15 @@ mm_buffer_vtbl = {
static struct pb_buffer *
mm_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct mm_pb_manager *mm = mm_pb_manager(mgr);
struct mm_buffer *mm_buf;
/* We don't handle alignments larger then the one initially setup */
- assert(pb_check_alignment(desc->alignment, 1 << mm->align2));
- if(!pb_check_alignment(desc->alignment, 1 << mm->align2))
+ assert(pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2));
+ if(!pb_check_alignment(desc->alignment, (pb_size)1 << mm->align2))
return NULL;
pipe_mutex_lock(mm->mutex);
@@ -198,7 +198,7 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
mm_buf->mgr = mm;
- mm_buf->block = u_mmAllocMem(mm->heap, size, mm->align2, 0);
+ mm_buf->block = u_mmAllocMem(mm->heap, (int)size, (int)mm->align2, 0);
if(!mm_buf->block) {
#if 0
debug_printf("warning: heap full\n");
@@ -210,8 +210,8 @@ mm_bufmgr_create_buffer(struct pb_manager *mgr,
}
/* Some sanity checks */
- assert(0 <= (unsigned)mm_buf->block->ofs && (unsigned)mm_buf->block->ofs < mm->size);
- assert(size <= (unsigned)mm_buf->block->size && (unsigned)mm_buf->block->ofs + (unsigned)mm_buf->block->size <= mm->size);
+ assert(0 <= (pb_size)mm_buf->block->ofs && (pb_size)mm_buf->block->ofs < mm->size);
+ assert(size <= (pb_size)mm_buf->block->size && (pb_size)mm_buf->block->ofs + (pb_size)mm_buf->block->size <= mm->size);
pipe_mutex_unlock(mm->mutex);
return SUPER(mm_buf);
@@ -245,7 +245,7 @@ mm_bufmgr_destroy(struct pb_manager *mgr)
struct pb_manager *
mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
- size_t size, size_t align2)
+ pb_size size, pb_size align2)
{
struct mm_pb_manager *mm;
@@ -273,7 +273,7 @@ mm_bufmgr_create_from_buffer(struct pb_buffer *buffer,
if(!mm->map)
goto failure;
- mm->heap = u_mmInit(0, size);
+ mm->heap = u_mmInit(0, (int)size);
if (!mm->heap)
goto failure;
@@ -292,7 +292,7 @@ if(mm->heap)
struct pb_manager *
mm_bufmgr_create(struct pb_manager *provider,
- size_t size, size_t align2)
+ pb_size size, pb_size align2)
{
struct pb_buffer *buffer;
struct pb_manager *mgr;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
index 4f7e6b1c4df..cb32d251367 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_ondemand.c
@@ -55,7 +55,7 @@ struct pb_ondemand_buffer
/** Real buffer */
struct pb_buffer *buffer;
- size_t size;
+ pb_size size;
struct pb_desc desc;
};
@@ -74,6 +74,8 @@ static INLINE struct pb_ondemand_buffer *
pb_ondemand_buffer(struct pb_buffer *buf)
{
assert(buf);
+ if (!buf)
+ return NULL;
assert(buf->vtbl == &pb_ondemand_buffer_vtbl);
return (struct pb_ondemand_buffer *)buf;
}
@@ -204,7 +206,7 @@ pb_ondemand_buffer_fence(struct pb_buffer *_buf,
static void
pb_ondemand_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct pb_ondemand_buffer *buf = pb_ondemand_buffer(_buf);
@@ -232,7 +234,7 @@ pb_ondemand_buffer_vtbl = {
static struct pb_buffer *
pb_ondemand_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_ondemand_manager *mgr = pb_ondemand_manager(_mgr);
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
index 07fd1a22d93..7fd65ed2261 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_pool.c
@@ -58,11 +58,11 @@ struct pool_pb_manager
pipe_mutex mutex;
- size_t bufSize;
- size_t bufAlign;
+ pb_size bufSize;
+ pb_size bufAlign;
- size_t numFree;
- size_t numTot;
+ pb_size numFree;
+ pb_size numTot;
struct list_head free;
@@ -89,7 +89,7 @@ struct pool_buffer
struct list_head head;
- size_t start;
+ pb_size start;
};
@@ -162,7 +162,7 @@ pool_buffer_fence(struct pb_buffer *buf,
static void
pool_buffer_get_base_buffer(struct pb_buffer *buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct pool_buffer *pool_buf = pool_buffer(buf);
struct pool_pb_manager *pool = pool_buf->mgr;
@@ -184,7 +184,7 @@ pool_buffer_vtbl = {
static struct pb_buffer *
pool_bufmgr_create_buffer(struct pb_manager *mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pool_pb_manager *pool = pool_pb_manager(mgr);
@@ -251,13 +251,13 @@ pool_bufmgr_destroy(struct pb_manager *mgr)
struct pb_manager *
pool_bufmgr_create(struct pb_manager *provider,
- size_t numBufs,
- size_t bufSize,
+ pb_size numBufs,
+ pb_size bufSize,
const struct pb_desc *desc)
{
struct pool_pb_manager *pool;
struct pool_buffer *pool_buf;
- size_t i;
+ pb_size i;
if(!provider)
return NULL;
diff --git a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
index 724aaadb436..e7352e90db9 100644
--- a/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
+++ b/src/gallium/auxiliary/pipebuffer/pb_bufmgr_slab.c
@@ -68,7 +68,7 @@ struct pb_slab_buffer
unsigned mapCount;
/** Offset relative to the start of the slab buffer. */
- size_t start;
+ pb_size start;
/** Use when validating, to signal that all mappings are finished */
/* TODO: Actually validation does not reach this stage yet */
@@ -83,8 +83,8 @@ struct pb_slab
{
struct list_head head;
struct list_head freeBuffers;
- size_t numBuffers;
- size_t numFree;
+ pb_size numBuffers;
+ pb_size numFree;
struct pb_slab_buffer *buffers;
struct pb_slab_manager *mgr;
@@ -108,10 +108,10 @@ struct pb_slab_manager
struct pb_manager *provider;
/** Size of the buffers we hand on downstream */
- size_t bufSize;
+ pb_size bufSize;
/** Size of the buffers we request upstream */
- size_t slabSize;
+ pb_size slabSize;
/**
* Alignment, usage to be used to allocate the slab buffers.
@@ -150,14 +150,14 @@ struct pb_slab_range_manager
struct pb_manager *provider;
- size_t minBufSize;
- size_t maxBufSize;
+ pb_size minBufSize;
+ pb_size maxBufSize;
/** @sa pb_slab_manager::desc */
struct pb_desc desc;
unsigned numBuckets;
- size_t *bucketSizes;
+ pb_size *bucketSizes;
/** Array of pb_slab_manager, one for each bucket size */
struct pb_manager **buckets;
@@ -270,7 +270,7 @@ pb_slab_buffer_fence(struct pb_buffer *_buf,
static void
pb_slab_buffer_get_base_buffer(struct pb_buffer *_buf,
struct pb_buffer **base_buf,
- unsigned *offset)
+ pb_size *offset)
{
struct pb_slab_buffer *buf = pb_slab_buffer(_buf);
pb_get_base_buffer(buf->slab->bo, base_buf, offset);
@@ -369,7 +369,7 @@ out_err0:
static struct pb_buffer *
pb_slab_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_slab_manager *mgr = pb_slab_manager(_mgr);
@@ -450,8 +450,8 @@ pb_slab_manager_destroy(struct pb_manager *_mgr)
struct pb_manager *
pb_slab_manager_create(struct pb_manager *provider,
- size_t bufSize,
- size_t slabSize,
+ pb_size bufSize,
+ pb_size slabSize,
const struct pb_desc *desc)
{
struct pb_slab_manager *mgr;
@@ -479,11 +479,11 @@ pb_slab_manager_create(struct pb_manager *provider,
static struct pb_buffer *
pb_slab_range_manager_create_buffer(struct pb_manager *_mgr,
- size_t size,
+ pb_size size,
const struct pb_desc *desc)
{
struct pb_slab_range_manager *mgr = pb_slab_range_manager(_mgr);
- size_t bufSize;
+ pb_size bufSize;
unsigned i;
bufSize = mgr->minBufSize;
@@ -527,13 +527,13 @@ pb_slab_range_manager_destroy(struct pb_manager *_mgr)
struct pb_manager *
pb_slab_range_manager_create(struct pb_manager *provider,
- size_t minBufSize,
- size_t maxBufSize,
- size_t slabSize,
+ pb_size minBufSize,
+ pb_size maxBufSize,
+ pb_size slabSize,
const struct pb_desc *desc)
{
struct pb_slab_range_manager *mgr;
- size_t bufSize;
+ pb_size bufSize;
unsigned i;
if(!provider)
diff --git a/src/gallium/auxiliary/rtasm/rtasm_ppc.c b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
index e3586482db4..ef4b306cb67 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_ppc.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_ppc.c
@@ -168,7 +168,7 @@ ppc_allocate_register(struct ppc_function *p)
{
unsigned i;
for (i = 0; i < PPC_NUM_REGS; i++) {
- const uint64_t mask = 1 << i;
+ const uint32_t mask = 1 << i;
if ((p->reg_used & mask) == 0) {
p->reg_used |= mask;
return i;
@@ -200,7 +200,7 @@ ppc_allocate_fp_register(struct ppc_function *p)
{
unsigned i;
for (i = 0; i < PPC_NUM_FP_REGS; i++) {
- const uint64_t mask = 1 << i;
+ const uint32_t mask = 1 << i;
if ((p->fp_used & mask) == 0) {
p->fp_used |= mask;
return i;
@@ -232,7 +232,7 @@ ppc_allocate_vec_register(struct ppc_function *p)
{
unsigned i;
for (i = 0; i < PPC_NUM_VEC_REGS; i++) {
- const uint64_t mask = 1 << i;
+ const uint32_t mask = 1 << i;
if ((p->vec_used & mask) == 0) {
p->vec_used |= mask;
return i;
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
index 57fcf6de2ab..1acf3c373eb 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.c
@@ -993,6 +993,15 @@ void sse_pmovmskb( struct x86_function *p,
emit_modrm(p, dst, src);
}
+void sse_movmskps( struct x86_function *p,
+ struct x86_reg dst,
+ struct x86_reg src)
+{
+ DUMP_RR( dst, src );
+ emit_2ub(p, X86_TWOB, 0x50);
+ emit_modrm(p, dst, src);
+}
+
/***********************************************************************
* SSE2 instructions
*/
diff --git a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
index 1b5eaaca850..731a6517968 100644
--- a/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
+++ b/src/gallium/auxiliary/rtasm/rtasm_x86sse.h
@@ -223,6 +223,7 @@ void sse_unpckhps( struct x86_function *p, struct x86_reg dst, struct x86_reg sr
void sse_unpcklps( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void sse_pmovmskb( struct x86_function *p, struct x86_reg dest, struct x86_reg src );
void sse2_punpcklbw( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
+void sse_movmskps( struct x86_function *p, struct x86_reg dst, struct x86_reg src);
void x86_add( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
void x86_and( struct x86_function *p, struct x86_reg dst, struct x86_reg src );
diff --git a/src/gallium/auxiliary/tgsi/Makefile b/src/gallium/auxiliary/tgsi/Makefile
index b4900e8dbaa..5f0a580b096 100644
--- a/src/gallium/auxiliary/tgsi/Makefile
+++ b/src/gallium/auxiliary/tgsi/Makefile
@@ -16,6 +16,7 @@ C_SOURCES = \
tgsi_sse2.c \
tgsi_text.c \
tgsi_transform.c \
+ tgsi_ureg.c \
tgsi_util.c
include ../../Makefile.template
diff --git a/src/gallium/auxiliary/tgsi/SConscript b/src/gallium/auxiliary/tgsi/SConscript
index 8200cce42f5..b6bc2924f06 100644
--- a/src/gallium/auxiliary/tgsi/SConscript
+++ b/src/gallium/auxiliary/tgsi/SConscript
@@ -16,6 +16,7 @@ tgsi = env.ConvenienceLibrary(
'tgsi_sse2.c',
'tgsi_text.c',
'tgsi_transform.c',
+ 'tgsi_ureg.c',
'tgsi_util.c',
])
diff --git a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
index a3f4947c734..e63813d6936 100644
--- a/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
+++ b/src/gallium/auxiliary/tgsi/tgsi-instruction-set.txt
@@ -665,9 +665,18 @@ TGSI Instruction Specification
TBD
-1.9.8 LOOP - Loop
+1.9.8 BGNFOR - Begin a For-Loop
- TBD
+ dst.x = floor(src.x)
+ dst.y = floor(src.y)
+ dst.z = floor(src.z)
+
+ if (dst.y <= 0)
+ pc = [matching ENDFOR] + 1
+ endif
+
+ Note: The destination must be a loop register.
+ The source must be a constant register.
1.9.9 REP - Repeat
@@ -685,9 +694,16 @@ TGSI Instruction Specification
TBD
-1.9.12 ENDLOOP - End Loop
+1.9.12 ENDFOR - End a For-Loop
- TBD
+ dst.x = dst.x + dst.z
+ dst.y = dst.y - 1.0
+
+ if (dst.y > 0)
+ pc = [matching BGNFOR instruction] + 1
+ endif
+
+ Note: The destination must be a loop register.
1.9.13 ENDREP - End Repeat
@@ -840,7 +856,7 @@ TGSI Instruction Specification
----------
-1.13.1 BGNLOOP2 - Begin Loop
+1.13.1 BGNLOOP - Begin a Loop
TBD
@@ -850,7 +866,7 @@ TGSI Instruction Specification
TBD
-1.13.3 ENDLOOP2 - End Loop
+1.13.3 ENDLOOP - End a Loop
TBD
@@ -1015,12 +1031,12 @@ TGSI Instruction Specification
1.18.1 EXPP - Approximate Exponential Base 2
- Alias for EXP.
+ Use EXP. See also 1.19.3.
1.18.2 LOGP - Logarithm Base 2
- Alias for LG2.
+ Use LOG. See also 1.19.4.
1.19 vs_2_0
@@ -1037,6 +1053,16 @@ TGSI Instruction Specification
Alias for ARR.
+1.19.3 EXPP - Approximate Exponential Base 2
+
+ Use EX2.
+
+
+1.19.4 LOGP - Logarithm Base 2
+
+ Use LG2.
+
+
2 Explanation of symbols used
==============================
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.c b/src/gallium/auxiliary/tgsi/tgsi_build.c
index a1891a140ac..e0cfc54420e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.c
@@ -139,8 +139,8 @@ tgsi_build_declaration(
{
struct tgsi_declaration declaration;
- assert( file <= TGSI_FILE_IMMEDIATE );
- assert( interpolate <= TGSI_INTERPOLATE_PERSPECTIVE );
+ assert( file < TGSI_FILE_COUNT );
+ assert( interpolate < TGSI_INTERPOLATE_COUNT );
declaration = tgsi_default_declaration();
declaration.File = file;
@@ -335,7 +335,10 @@ tgsi_default_full_immediate( void )
struct tgsi_full_immediate fullimm;
fullimm.Immediate = tgsi_default_immediate();
- fullimm.u.Pointer = (void *) 0;
+ fullimm.u[0].Float = 0.0f;
+ fullimm.u[1].Float = 0.0f;
+ fullimm.u[2].Float = 0.0f;
+ fullimm.u[3].Float = 0.0f;
return fullimm;
}
@@ -352,19 +355,19 @@ immediate_grow(
header_bodysize_grow( header );
}
-struct tgsi_immediate_float32
+union tgsi_immediate_data
tgsi_build_immediate_float32(
float value,
struct tgsi_immediate *immediate,
struct tgsi_header *header )
{
- struct tgsi_immediate_float32 immediate_float32;
+ union tgsi_immediate_data immediate_data;
- immediate_float32.Float = value;
+ immediate_data.Float = value;
immediate_grow( immediate, header );
- return immediate_float32;
+ return immediate_data;
}
unsigned
@@ -384,16 +387,18 @@ tgsi_build_full_immediate(
*immediate = tgsi_build_immediate( header );
+ assert( full_imm->Immediate.NrTokens <= 4 + 1 );
+
for( i = 0; i < full_imm->Immediate.NrTokens - 1; i++ ) {
- struct tgsi_immediate_float32 *if32;
+ union tgsi_immediate_data *data;
if( maxsize <= size )
return 0;
- if32 = (struct tgsi_immediate_float32 *) &tokens[size];
+ data = (union tgsi_immediate_data *) &tokens[size];
size++;
- *if32 = tgsi_build_immediate_float32(
- full_imm->u.ImmediateFloat32[i].Float,
+ *data = tgsi_build_immediate_float32(
+ full_imm->u[i].Float,
immediate,
header );
}
@@ -477,6 +482,8 @@ tgsi_default_full_instruction( void )
full_instruction.FullSrcRegisters[i] = tgsi_default_full_src_register();
}
+ full_instruction.Flags = 0x0;
+
return full_instruction;
}
@@ -584,6 +591,7 @@ tgsi_build_full_instruction(
*dst_register = tgsi_build_dst_register(
reg->DstRegister.File,
reg->DstRegister.WriteMask,
+ reg->DstRegister.Indirect,
reg->DstRegister.Index,
instruction,
header );
@@ -631,6 +639,28 @@ tgsi_build_full_instruction(
header );
prev_token = (struct tgsi_token *) dst_register_ext_modulate;
}
+
+ if( reg->DstRegister.Indirect ) {
+ struct tgsi_src_register *ind;
+
+ if( maxsize <= size )
+ return 0;
+ ind = (struct tgsi_src_register *) &tokens[size];
+ size++;
+
+ *ind = tgsi_build_src_register(
+ reg->DstRegisterInd.File,
+ reg->DstRegisterInd.SwizzleX,
+ reg->DstRegisterInd.SwizzleY,
+ reg->DstRegisterInd.SwizzleZ,
+ reg->DstRegisterInd.SwizzleW,
+ reg->DstRegisterInd.Negate,
+ reg->DstRegisterInd.Indirect,
+ reg->DstRegisterInd.Dimension,
+ reg->DstRegisterInd.Index,
+ instruction,
+ header );
+ }
}
for( i = 0; i < full_inst->Instruction.NumSrcRegs; i++ ) {
@@ -973,7 +1003,7 @@ tgsi_build_src_register(
{
struct tgsi_src_register src_register;
- assert( file <= TGSI_FILE_IMMEDIATE );
+ assert( file < TGSI_FILE_COUNT );
assert( swizzle_x <= TGSI_SWIZZLE_W );
assert( swizzle_y <= TGSI_SWIZZLE_W );
assert( swizzle_z <= TGSI_SWIZZLE_W );
@@ -1194,13 +1224,14 @@ struct tgsi_dst_register
tgsi_build_dst_register(
unsigned file,
unsigned mask,
+ unsigned indirect,
int index,
struct tgsi_instruction *instruction,
struct tgsi_header *header )
{
struct tgsi_dst_register dst_register;
- assert( file <= TGSI_FILE_IMMEDIATE );
+ assert( file < TGSI_FILE_COUNT );
assert( mask <= TGSI_WRITEMASK_XYZW );
assert( index >= -32768 && index <= 32767 );
@@ -1208,6 +1239,7 @@ tgsi_build_dst_register(
dst_register.File = file;
dst_register.WriteMask = mask;
dst_register.Index = index;
+ dst_register.Indirect = indirect;
instruction_grow( instruction, header );
@@ -1220,6 +1252,7 @@ tgsi_default_full_dst_register( void )
struct tgsi_full_dst_register full_dst_register;
full_dst_register.DstRegister = tgsi_default_dst_register();
+ full_dst_register.DstRegisterInd = tgsi_default_src_register();
full_dst_register.DstRegisterExtConcode =
tgsi_default_dst_register_ext_concode();
full_dst_register.DstRegisterExtModulate =
diff --git a/src/gallium/auxiliary/tgsi/tgsi_build.h b/src/gallium/auxiliary/tgsi/tgsi_build.h
index 0fd6fabd83d..17d977b0597 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_build.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_build.h
@@ -119,7 +119,7 @@ tgsi_build_immediate(
struct tgsi_full_immediate
tgsi_default_full_immediate( void );
-struct tgsi_immediate_float32
+union tgsi_immediate_data
tgsi_build_immediate_float32(
float value,
struct tgsi_immediate *immediate,
@@ -289,6 +289,7 @@ struct tgsi_dst_register
tgsi_build_dst_register(
unsigned file,
unsigned mask,
+ unsigned indirect,
int index,
struct tgsi_instruction *instruction,
struct tgsi_header *header );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump.c b/src/gallium/auxiliary/tgsi/tgsi_dump.c
index a784b7cc3c1..05b07a3a73e 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump.c
@@ -27,16 +27,25 @@
#include "util/u_debug.h"
#include "util/u_string.h"
+#include "util/u_math.h"
+#include "util/u_memory.h"
#include "tgsi_dump.h"
#include "tgsi_info.h"
#include "tgsi_iterate.h"
+
+/** Number of spaces to indent for IF/LOOP/etc */
+static const int indent_spaces = 3;
+
+
struct dump_ctx
{
struct tgsi_iterate_context iter;
uint instno;
+ uint indentation;
+
void (*printf)(struct dump_ctx *ctx, const char *format, ...);
};
@@ -80,7 +89,7 @@ static const char *processor_type_names[] =
"GEOM"
};
-static const char *file_names[] =
+static const char *file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -89,7 +98,8 @@ static const char *file_names[] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static const char *interpolate_names[] =
@@ -107,7 +117,8 @@ static const char *semantic_names[] =
"FOG",
"PSIZE",
"GENERIC",
- "NORMAL"
+ "NORMAL",
+ "FACE"
};
static const char *immediate_type_names[] =
@@ -223,6 +234,9 @@ iter_declaration(
{
struct dump_ctx *ctx = (struct dump_ctx *)iter;
+ assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT);
+ assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT);
+
TXT( "DCL " );
_dump_register(
@@ -289,10 +303,12 @@ iter_immediate(
ENM( imm->Immediate.DataType, immediate_type_names );
TXT( " { " );
+
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for (i = 0; i < imm->Immediate.NrTokens - 1; i++) {
switch (imm->Immediate.DataType) {
case TGSI_IMM_FLOAT32:
- FLT( imm->u.ImmediateFloat32[i].Float );
+ FLT( imm->u[i].Float );
break;
default:
assert( 0 );
@@ -319,6 +335,14 @@ tgsi_dump_immediate(
iter_immediate( &ctx.iter, (struct tgsi_full_immediate *)imm );
}
+static void
+indent(struct dump_ctx *ctx)
+{
+ uint i;
+ for (i = 0; i < ctx->indentation; i++)
+ TXT(" ");
+}
+
static boolean
iter_instruction(
struct tgsi_iterate_context *iter,
@@ -332,6 +356,15 @@ iter_instruction(
INSTID( instno );
TXT( ": " );
+
+ /* update indentation */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_ENDIF ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ENDFOR ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ENDLOOP) {
+ ctx->indentation -= indent_spaces;
+ }
+ indent(ctx);
+
TXT( tgsi_get_opcode_info( inst->Instruction.Opcode )->mnemonic );
switch (inst->Instruction.Saturate) {
@@ -354,11 +387,22 @@ iter_instruction(
CHR( ',' );
CHR( ' ' );
- _dump_register(
- ctx,
- dst->DstRegister.File,
- dst->DstRegister.Index,
- dst->DstRegister.Index );
+ if (dst->DstRegister.Indirect) {
+ _dump_register_ind(
+ ctx,
+ dst->DstRegister.File,
+ dst->DstRegister.Index,
+ dst->DstRegisterInd.File,
+ dst->DstRegisterInd.Index,
+ dst->DstRegisterInd.SwizzleX );
+ }
+ else {
+ _dump_register(
+ ctx,
+ dst->DstRegister.File,
+ dst->DstRegister.Index,
+ dst->DstRegister.Index );
+ }
ENM( dst->DstRegisterExtModulate.Modulate, modulate_names );
_dump_writemask( ctx, dst->DstRegister.WriteMask );
@@ -453,14 +497,22 @@ iter_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_IF:
case TGSI_OPCODE_ELSE:
- case TGSI_OPCODE_BGNLOOP2:
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
+ case TGSI_OPCODE_ENDLOOP:
case TGSI_OPCODE_CAL:
TXT( " :" );
UID( inst->InstructionExtLabel.Label );
break;
}
+ /* update indentation */
+ if (inst->Instruction.Opcode == TGSI_OPCODE_IF ||
+ inst->Instruction.Opcode == TGSI_OPCODE_ELSE ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNFOR ||
+ inst->Instruction.Opcode == TGSI_OPCODE_BGNLOOP) {
+ ctx->indentation += indent_spaces;
+ }
+
EOL();
return TRUE;
@@ -475,6 +527,7 @@ tgsi_dump_instruction(
ctx.instno = instno;
ctx.printf = dump_ctx_printf;
+ ctx.indentation = 0;
iter_instruction( &ctx.iter, (struct tgsi_full_instruction *)inst );
}
@@ -507,6 +560,7 @@ tgsi_dump(
ctx.instno = 0;
ctx.printf = dump_ctx_printf;
+ ctx.indentation = 0;
tgsi_iterate_shader( tokens, &ctx.iter );
}
@@ -516,7 +570,7 @@ struct str_dump_ctx
struct dump_ctx base;
char *str;
char *ptr;
- size_t left;
+ int left;
};
static void
@@ -525,13 +579,20 @@ str_dump_ctx_printf(struct dump_ctx *ctx, const char *format, ...)
struct str_dump_ctx *sctx = (struct str_dump_ctx *)ctx;
if(sctx->left > 1) {
- size_t written;
+ int written;
va_list ap;
va_start(ap, format);
written = util_vsnprintf(sctx->ptr, sctx->left, format, ap);
va_end(ap);
- sctx->ptr += written;
- sctx->left -= written;
+
+ /* Some complicated logic needed to handle the return value of
+ * vsnprintf:
+ */
+ if (written > 0) {
+ written = MIN2(sctx->left, written);
+ sctx->ptr += written;
+ sctx->left -= written;
+ }
}
}
@@ -552,11 +613,12 @@ tgsi_dump_str(
ctx.base.instno = 0;
ctx.base.printf = &str_dump_ctx_printf;
+ ctx.base.indentation = 0;
ctx.str = str;
ctx.str[0] = 0;
ctx.ptr = str;
- ctx.left = size;
+ ctx.left = (int)size;
tgsi_iterate_shader( tokens, &ctx.base.iter );
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
index 3dc61c48ca3..4a9c02b1413 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_dump_c.c
@@ -69,7 +69,7 @@ static const char *TGSI_TOKEN_TYPES[] =
"TOKEN_TYPE_INSTRUCTION"
};
-static const char *TGSI_FILES[] =
+static const char *TGSI_FILES[TGSI_FILE_COUNT] =
{
"FILE_NULL",
"FILE_CONSTANT",
@@ -78,7 +78,8 @@ static const char *TGSI_FILES[] =
"FILE_TEMPORARY",
"FILE_SAMPLER",
"FILE_ADDRESS",
- "FILE_IMMEDIATE"
+ "FILE_IMMEDIATE",
+ "FILE_LOOP"
};
static const char *TGSI_INTERPOLATES[] =
@@ -283,12 +284,13 @@ dump_immediate_verbose(
UIX( imm->Immediate.Padding );
}
+ assert( imm->Immediate.NrTokens <= 4 + 1 );
for( i = 0; i < imm->Immediate.NrTokens - 1; i++ ) {
EOL();
switch( imm->Immediate.DataType ) {
case TGSI_IMM_FLOAT32:
TXT( "\nFloat: " );
- FLT( imm->u.ImmediateFloat32[i].Float );
+ FLT( imm->u[i].Float );
break;
default:
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.c b/src/gallium/auxiliary/tgsi/tgsi_exec.c
index aba7a3f9374..711e86d6edf 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.c
@@ -62,6 +62,9 @@
#define FAST_MATH 1
+/** for tgsi_full_instruction::Flags */
+#define SOA_DEPENDENCY_FLAG 0x1
+
#define TILE_TOP_LEFT 0
#define TILE_TOP_RIGHT 1
#define TILE_BOTTOM_LEFT 2
@@ -182,7 +185,7 @@ print_temp(const struct tgsi_exec_machine *mach, uint index)
* MOV t3, t2;
* The second instruction will have the wrong value for t0 if executed as-is.
*/
-static boolean
+boolean
tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst)
{
uint i, chan;
@@ -301,14 +304,14 @@ tgsi_exec_machine_bind_shader(
case TGSI_TOKEN_TYPE_IMMEDIATE:
{
uint size = parse.FullToken.FullImmediate.Immediate.NrTokens - 1;
- assert( size % 4 == 0 );
- assert( mach->ImmLimit + size / 4 <= TGSI_EXEC_NUM_IMMEDIATES );
+ assert( size <= 4 );
+ assert( mach->ImmLimit + 1 <= TGSI_EXEC_NUM_IMMEDIATES );
for( i = 0; i < size; i++ ) {
- mach->Imms[mach->ImmLimit + i / 4][i % 4] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ mach->Imms[mach->ImmLimit][i] =
+ parse.FullToken.FullImmediate.u[i].Float;
}
- mach->ImmLimit += size / 4;
+ mach->ImmLimit += 1;
}
break;
@@ -328,19 +331,24 @@ tgsi_exec_machine_bind_shader(
* sizeof(struct tgsi_full_instruction));
maxInstructions += 10;
}
- memcpy(instructions + numInstructions,
- &parse.FullToken.FullInstruction,
- sizeof(instructions[0]));
-#if 0
if (tgsi_check_soa_dependencies(&parse.FullToken.FullInstruction)) {
- debug_printf("SOA dependency in instruction:\n");
- tgsi_dump_instruction(&parse.FullToken.FullInstruction,
- numInstructions);
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ parse.FullToken.FullInstruction.Flags = SOA_DEPENDENCY_FLAG;
+ /* XXX we only handle SOA dependencies properly for MOV/SWZ
+ * at this time!
+ */
+ if (opcode != TGSI_OPCODE_MOV && opcode != TGSI_OPCODE_SWZ) {
+ debug_printf("Warning: SOA dependency in instruction"
+ " is not handled:\n");
+ tgsi_dump_instruction(&parse.FullToken.FullInstruction,
+ numInstructions);
+ }
}
-#else
- (void) tgsi_check_soa_dependencies;
-#endif
+
+ memcpy(instructions + numInstructions,
+ &parse.FullToken.FullInstruction,
+ sizeof(instructions[0]));
numInstructions++;
break;
@@ -365,13 +373,18 @@ tgsi_exec_machine_bind_shader(
}
-void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach )
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void )
{
+ struct tgsi_exec_machine *mach;
uint i;
- mach->Temps = (struct tgsi_exec_vector *) tgsi_align_128bit( mach->_Temps);
+ mach = align_malloc( sizeof *mach, 16 );
+ if (!mach)
+ goto fail;
+
+ memset(mach, 0, sizeof(*mach));
+
mach->Addrs = &mach->Temps[TGSI_EXEC_TEMP_ADDR];
/* Setup constants. */
@@ -393,22 +406,24 @@ tgsi_exec_machine_init(
(void) print_chan;
(void) print_temp;
#endif
+
+ return mach;
+
+fail:
+ align_free(mach);
+ return NULL;
}
void
-tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach)
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach)
{
- if (mach->Instructions) {
+ if (mach) {
FREE(mach->Instructions);
- mach->Instructions = NULL;
- mach->NumInstructions = 0;
- }
- if (mach->Declarations) {
FREE(mach->Declarations);
- mach->Declarations = NULL;
- mach->NumDeclarations = 0;
}
+
+ align_free(mach);
}
@@ -1395,28 +1410,69 @@ store_dest(
union tgsi_exec_channel null;
union tgsi_exec_channel *dst;
uint execmask = mach->ExecMask;
+ int offset = 0; /* indirection offset */
+ int index;
#ifdef DEBUG
check_inf_or_nan(chan);
#endif
+ /* There is an extra source register that indirectly subscripts
+ * a register file. The direct index now becomes an offset
+ * that is being added to the indirect register.
+ *
+ * file[ind[2].x+1],
+ * where:
+ * ind = DstRegisterInd.File
+ * [2] = DstRegisterInd.Index
+ * .x = DstRegisterInd.SwizzleX
+ */
+ if (reg->DstRegister.Indirect) {
+ union tgsi_exec_channel index;
+ union tgsi_exec_channel indir_index;
+ uint swizzle;
+
+ /* which address register (always zero for now) */
+ index.i[0] =
+ index.i[1] =
+ index.i[2] =
+ index.i[3] = reg->DstRegisterInd.Index;
+
+ /* get current value of address register[swizzle] */
+ swizzle = tgsi_util_get_src_register_swizzle( &reg->DstRegisterInd, CHAN_X );
+
+ /* fetch values from the address/indirection register */
+ fetch_src_file_channel(
+ mach,
+ reg->DstRegisterInd.File,
+ swizzle,
+ &index,
+ &indir_index );
+
+ /* save indirection offset */
+ offset = (int) indir_index.f[0];
+ }
+
switch (reg->DstRegister.File) {
case TGSI_FILE_NULL:
dst = &null;
break;
case TGSI_FILE_OUTPUT:
- dst = &mach->Outputs[mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
- + reg->DstRegister.Index].xyzw[chan_index];
+ index = mach->Temps[TEMP_OUTPUT_I].xyzw[TEMP_OUTPUT_C].u[0]
+ + reg->DstRegister.Index;
+ dst = &mach->Outputs[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_TEMPORARY:
- assert( reg->DstRegister.Index < TGSI_EXEC_NUM_TEMPS );
- dst = &mach->Temps[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ assert( index < TGSI_EXEC_NUM_TEMPS );
+ dst = &mach->Temps[offset + index].xyzw[chan_index];
break;
case TGSI_FILE_ADDRESS:
- dst = &mach->Addrs[reg->DstRegister.Index].xyzw[chan_index];
+ index = reg->DstRegister.Index;
+ dst = &mach->Addrs[index].xyzw[chan_index];
break;
default:
@@ -1966,8 +2022,7 @@ exec_instruction(
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_flr( &r[0], &r[0] );
@@ -1977,9 +2032,23 @@ exec_instruction(
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
- FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
- FETCH( &r[0], 0, chan_index );
- STORE( &r[0], 0, chan_index );
+ if (inst->Flags & SOA_DEPENDENCY_FLAG) {
+ /* Do all fetches into temp regs, then do all stores to avoid
+ * intermediate/accidental clobbering. This could be done all the
+ * time for MOV but for other instructions we'll need more temps...
+ */
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[chan_index], 0, chan_index );
+ }
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ STORE( &r[chan_index], 0, chan_index );
+ }
+ }
+ else {
+ FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
+ FETCH( &r[0], 0, chan_index );
+ STORE( &r[0], 0, chan_index );
+ }
}
break;
@@ -2236,8 +2305,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
FETCH(&r[1], 1, chan_index);
@@ -2271,8 +2339,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( &r[0], 0, CHAN_X );
FETCH( &r[1], 1, CHAN_X );
micro_mul( &r[0], &r[0], &r[1] );
@@ -2290,18 +2357,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_INDEX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_NEGATE:
- /* XXX: considered for removal */
- assert (0);
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( &r[0], 0, chan_index );
micro_frc( &r[0], &r[0] );
@@ -2329,8 +2385,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH(&r[0], 0, CHAN_X);
#if FAST_MATH
@@ -2344,8 +2399,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( &r[0], 0, CHAN_X );
micro_lg2( &r[0], &r[0] );
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -2353,8 +2407,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH(&r[0], 0, CHAN_X);
FETCH(&r[1], 1, CHAN_X);
@@ -2365,8 +2418,7 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
FETCH(&r[0], 0, CHAN_Y);
FETCH(&r[1], 1, CHAN_Z);
@@ -2408,11 +2460,6 @@ exec_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- /* XXX: considered for removal */
- assert (0);
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH(&r[0], 0, chan_index);
@@ -3056,9 +3103,9 @@ exec_instruction(
mach->Primitives[mach->Temps[TEMP_PRIMITIVE_I].xyzw[TEMP_PRIMITIVE_C].u[0]] = 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
/* fall-through (for now) */
- case TGSI_OPCODE_BGNLOOP2:
+ case TGSI_OPCODE_BGNLOOP:
/* push LoopMask and ContMasks */
assert(mach->LoopStackTop < TGSI_EXEC_MAX_LOOP_NESTING);
mach->LoopStack[mach->LoopStackTop++] = mach->LoopMask;
@@ -3066,9 +3113,9 @@ exec_instruction(
mach->ContStack[mach->ContStackTop++] = mach->ContMask;
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
/* fall-through (for now at least) */
- case TGSI_OPCODE_ENDLOOP2:
+ case TGSI_OPCODE_ENDLOOP:
/* Restore ContMask, but don't pop */
assert(mach->ContStackTop > 0);
mach->ContMask = mach->ContStack[mach->ContStackTop - 1];
diff --git a/src/gallium/auxiliary/tgsi/tgsi_exec.h b/src/gallium/auxiliary/tgsi/tgsi_exec.h
index da22baad3ef..fd9ef6f35df 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_exec.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_exec.h
@@ -29,6 +29,7 @@
#define TGSI_EXEC_H
#include "pipe/p_compiler.h"
+#include "pipe/p_state.h"
#if defined __cplusplus
extern "C" {
@@ -94,7 +95,6 @@ struct tgsi_exec_labels
#define TGSI_EXEC_NUM_TEMPS 128
-#define TGSI_EXEC_NUM_TEMP_EXTRAS 6
#define TGSI_EXEC_NUM_IMMEDIATES 256
/*
@@ -162,9 +162,14 @@ struct tgsi_exec_labels
#define TGSI_EXEC_MASK_I (TGSI_EXEC_NUM_TEMPS + 3)
#define TGSI_EXEC_MASK_C 2
+/* 4 register buffer for various purposes */
#define TGSI_EXEC_TEMP_R0 (TGSI_EXEC_NUM_TEMPS + 4)
+#define TGSI_EXEC_NUM_TEMP_R 4
+
+#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 8)
+#define TGSI_EXEC_NUM_ADDRS 1
+#define TGSI_EXEC_NUM_TEMP_EXTRAS 9
-#define TGSI_EXEC_TEMP_ADDR (TGSI_EXEC_NUM_TEMPS + 5)
#define TGSI_EXEC_MAX_COND_NESTING 20
@@ -187,24 +192,21 @@ struct tgsi_exec_labels
struct tgsi_exec_machine
{
/* Total = program temporaries + internal temporaries
- * + 1 padding to align to 16 bytes
*/
- struct tgsi_exec_vector _Temps[TGSI_EXEC_NUM_TEMPS +
- TGSI_EXEC_NUM_TEMP_EXTRAS + 1];
+ struct tgsi_exec_vector Temps[TGSI_EXEC_NUM_TEMPS +
+ TGSI_EXEC_NUM_TEMP_EXTRAS];
+
+ float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
+
+ struct tgsi_exec_vector Inputs[PIPE_MAX_ATTRIBS];
+ struct tgsi_exec_vector Outputs[PIPE_MAX_ATTRIBS];
- /*
- * This will point to _Temps after aligning to 16B boundary.
- */
- struct tgsi_exec_vector *Temps;
struct tgsi_exec_vector *Addrs;
struct tgsi_sampler **Samplers;
- float Imms[TGSI_EXEC_NUM_IMMEDIATES][4];
unsigned ImmLimit;
const float (*Consts)[4];
- struct tgsi_exec_vector *Inputs;
- struct tgsi_exec_vector *Outputs;
const struct tgsi_token *Tokens; /**< Declarations, instructions */
unsigned Processor; /**< TGSI_PROCESSOR_x */
@@ -251,9 +253,11 @@ struct tgsi_exec_machine
struct tgsi_exec_labels Labels;
};
+struct tgsi_exec_machine *
+tgsi_exec_machine_create( void );
+
void
-tgsi_exec_machine_init(
- struct tgsi_exec_machine *mach );
+tgsi_exec_machine_destroy(struct tgsi_exec_machine *mach);
void
@@ -272,6 +276,10 @@ void
tgsi_exec_machine_free_data(struct tgsi_exec_machine *mach);
+boolean
+tgsi_check_soa_dependencies(const struct tgsi_full_instruction *inst);
+
+
static INLINE void
tgsi_set_kill_mask(struct tgsi_exec_machine *mach, unsigned mask)
{
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.c b/src/gallium/auxiliary/tgsi/tgsi_info.c
index 37f2b66d1f6..ccf4b205ffb 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.c
@@ -26,136 +26,156 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "tgsi_info.h"
static const struct tgsi_opcode_info opcode_info[TGSI_OPCODE_LAST] =
{
- { 1, 1, 0, 0, "ARL", NULL, NULL },
- { 1, 1, 0, 0, "MOV", NULL, NULL },
- { 1, 1, 0, 0, "LIT", NULL, NULL },
- { 1, 1, 0, 0, "RCP", "RECIP", NULL },
- { 1, 1, 0, 0, "RSQ", "RECIPSQRT", NULL },
- { 1, 1, 0, 0, "EXP", "EXPP", NULL },
- { 1, 1, 0, 0, "LOG", NULL, NULL },
- { 1, 2, 0, 0, "MUL", NULL, NULL },
- { 1, 2, 0, 0, "ADD", NULL, NULL },
- { 1, 2, 0, 0, "DP3", "DOT3", NULL },
- { 1, 2, 0, 0, "DP4", "DOT4", NULL },
- { 1, 2, 0, 0, "DST", NULL, NULL },
- { 1, 2, 0, 0, "MIN", NULL, NULL },
- { 1, 2, 0, 0, "MAX", NULL, NULL },
- { 1, 2, 0, 0, "SLT", "SETLT", NULL },
- { 1, 2, 0, 0, "SGE", "SETGE", NULL },
- { 1, 3, 0, 0, "MAD", "MADD", NULL },
- { 1, 2, 0, 0, "SUB", NULL, NULL },
- { 1, 3, 0, 0, "LRP", "LERP", NULL },
- { 1, 3, 0, 0, "CND", NULL, NULL },
- { 1, 3, 0, 0, "CND0", NULL, NULL },
- { 1, 3, 0, 0, "DP2A", "DP2ADD", "DOT2ADD" },
- { 1, 2, 0, 0, "INDEX", NULL, NULL },
- { 1, 1, 0, 0, "NEGATE", NULL, NULL },
- { 1, 1, 0, 0, "FRC", "FRAC", NULL },
- { 1, 3, 0, 0, "CLAMP", NULL, NULL },
- { 1, 1, 0, 0, "FLR", "FLOOR", NULL },
- { 1, 1, 0, 0, "ROUND", NULL, NULL },
- { 1, 1, 0, 0, "EX2", "EXPBASE2", NULL },
- { 1, 1, 0, 0, "LG2", "LOGBASE2", "LOGP" },
- { 1, 2, 0, 0, "POW", "POWER", NULL },
- { 1, 2, 0, 0, "XPD", "CRS", "CROSSPRODUCT" },
- { 1, 2, 0, 0, "M4X4", "MULTIPLYMATRIX", NULL },
- { 1, 1, 0, 0, "ABS", NULL, NULL },
- { 1, 1, 0, 0, "RCC", NULL, NULL },
- { 1, 2, 0, 0, "DPH", NULL, NULL },
- { 1, 1, 0, 0, "COS", NULL, NULL },
- { 1, 1, 0, 0, "DDX", "DSX", NULL },
- { 1, 1, 0, 0, "DDY", "DSY", NULL },
- { 0, 0, 0, 0, "KILP", NULL, NULL },
- { 1, 1, 0, 0, "PK2H", NULL, NULL },
- { 1, 1, 0, 0, "PK2US", NULL, NULL },
- { 1, 1, 0, 0, "PK4B", NULL, NULL },
- { 1, 1, 0, 0, "PK4UB", NULL, NULL },
- { 1, 2, 0, 0, "RFL", NULL, NULL },
- { 1, 2, 0, 0, "SEQ", NULL, NULL },
- { 1, 2, 0, 0, "SFL", NULL, NULL },
- { 1, 2, 0, 0, "SGT", NULL, NULL },
- { 1, 1, 0, 0, "SIN", NULL, NULL },
- { 1, 2, 0, 0, "SLE", NULL, NULL },
- { 1, 2, 0, 0, "SNE", NULL, NULL },
- { 1, 2, 0, 0, "STR", NULL, NULL },
- { 1, 2, 1, 0, "TEX", "TEXLD", NULL },
- { 1, 4, 1, 0, "TXD", "TEXLDD", NULL },
- { 1, 2, 1, 0, "TXP", NULL, NULL },
- { 1, 1, 0, 0, "UP2H", NULL, NULL },
- { 1, 1, 0, 0, "UP2US", NULL, NULL },
- { 1, 1, 0, 0, "UP4B", NULL, NULL },
- { 1, 1, 0, 0, "UP4UB", NULL, NULL },
- { 1, 3, 0, 0, "X2D", NULL, NULL },
- { 1, 1, 0, 0, "ARA", NULL, NULL },
- { 1, 1, 0, 0, "ARR", "MOVA", NULL },
- { 0, 1, 0, 0, "BRA", NULL, NULL },
- { 0, 0, 0, 1, "CAL", "CALL", NULL },
- { 0, 0, 0, 0, "RET", NULL, NULL },
- { 1, 1, 0, 0, "SGN", "SSG", NULL },
- { 1, 3, 0, 0, "CMP", NULL, NULL },
- { 1, 1, 0, 0, "SCS", "SINCOS", NULL },
- { 1, 2, 1, 0, "TXB", "TEXLDB", NULL },
- { 1, 1, 0, 0, "NRM", NULL, NULL },
- { 1, 2, 0, 0, "DIV", NULL, NULL },
- { 1, 2, 0, 0, "DP2", NULL, NULL },
- { 1, 2, 1, 0, "TXL", NULL, NULL },
- { 0, 0, 0, 0, "BRK", "BREAK", NULL },
- { 0, 1, 0, 1, "IF", NULL, NULL },
- { 0, 0, 0, 0, "LOOP", NULL, NULL },
- { 0, 1, 0, 0, "REP", NULL, NULL },
- { 0, 0, 0, 1, "ELSE", NULL, NULL },
- { 0, 0, 0, 0, "ENDIF", NULL, NULL },
- { 0, 0, 0, 0, "ENDLOOP", NULL, NULL },
- { 0, 0, 0, 0, "ENDREP", NULL, NULL },
- { 0, 1, 0, 0, "PUSHA", NULL, NULL },
- { 1, 0, 0, 0, "POPA", NULL, NULL },
- { 1, 1, 0, 0, "CEIL", NULL, NULL },
- { 1, 1, 0, 0, "I2F", NULL, NULL },
- { 1, 1, 0, 0, "NOT", NULL, NULL },
- { 1, 1, 0, 0, "INT", "TRUNC", NULL },
- { 1, 2, 0, 0, "SHL", NULL, NULL },
- { 1, 2, 0, 0, "SHR", NULL, NULL },
- { 1, 2, 0, 0, "AND", NULL, NULL },
- { 1, 2, 0, 0, "OR", NULL, NULL },
- { 1, 2, 0, 0, "MOD", NULL, NULL },
- { 1, 2, 0, 0, "XOR", NULL, NULL },
- { 1, 3, 0, 0, "SAD", NULL, NULL },
- { 1, 2, 1, 0, "TXF", NULL, NULL },
- { 1, 2, 1, 0, "TXQ", NULL, NULL },
- { 0, 0, 0, 0, "CONT", NULL, NULL },
- { 0, 0, 0, 0, "EMIT", NULL, NULL },
- { 0, 0, 0, 0, "ENDPRIM", NULL, NULL },
- { 0, 0, 0, 1, "BGNLOOP2", NULL, NULL },
- { 0, 0, 0, 0, "BGNSUB", NULL, NULL },
- { 0, 0, 0, 1, "ENDLOOP2", NULL, NULL },
- { 0, 0, 0, 0, "ENDSUB", NULL, NULL },
- { 1, 1, 0, 0, "NOISE1", NULL, NULL },
- { 1, 1, 0, 0, "NOISE2", NULL, NULL },
- { 1, 1, 0, 0, "NOISE3", NULL, NULL },
- { 1, 1, 0, 0, "NOISE4", NULL, NULL },
- { 0, 0, 0, 0, "NOP", NULL, NULL },
- { 1, 2, 0, 0, "M4X3", NULL, NULL },
- { 1, 2, 0, 0, "M3X4", NULL, NULL },
- { 1, 2, 0, 0, "M3X3", NULL, NULL },
- { 1, 2, 0, 0, "M3X2", NULL, NULL },
- { 1, 1, 0, 0, "NRM4", NULL, NULL },
- { 0, 1, 0, 0, "CALLNZ", NULL, NULL },
- { 0, 1, 0, 0, "IFC", NULL, NULL },
- { 0, 1, 0, 0, "BREAKC", NULL, NULL },
- { 0, 1, 0, 0, "KIL", "TEXKILL", NULL },
- { 0, 0, 0, 0, "END", NULL, NULL },
- { 1, 1, 0, 0, "SWZ", NULL, NULL }
+ { 1, 1, 0, 0, "ARL", TGSI_OPCODE_ARL },
+ { 1, 1, 0, 0, "MOV", TGSI_OPCODE_MOV },
+ { 1, 1, 0, 0, "LIT", TGSI_OPCODE_LIT },
+ { 1, 1, 0, 0, "RCP", TGSI_OPCODE_RCP },
+ { 1, 1, 0, 0, "RSQ", TGSI_OPCODE_RSQ },
+ { 1, 1, 0, 0, "EXP", TGSI_OPCODE_EXP },
+ { 1, 1, 0, 0, "LOG", TGSI_OPCODE_LOG },
+ { 1, 2, 0, 0, "MUL", TGSI_OPCODE_MUL },
+ { 1, 2, 0, 0, "ADD", TGSI_OPCODE_ADD },
+ { 1, 2, 0, 0, "DP3", TGSI_OPCODE_DP3 },
+ { 1, 2, 0, 0, "DP4", TGSI_OPCODE_DP4 },
+ { 1, 2, 0, 0, "DST", TGSI_OPCODE_DST },
+ { 1, 2, 0, 0, "MIN", TGSI_OPCODE_MIN },
+ { 1, 2, 0, 0, "MAX", TGSI_OPCODE_MAX },
+ { 1, 2, 0, 0, "SLT", TGSI_OPCODE_SLT },
+ { 1, 2, 0, 0, "SGE", TGSI_OPCODE_SGE },
+ { 1, 3, 0, 0, "MAD", TGSI_OPCODE_MAD },
+ { 1, 2, 0, 0, "SUB", TGSI_OPCODE_SUB },
+ { 1, 3, 0, 0, "LRP", TGSI_OPCODE_LRP },
+ { 1, 3, 0, 0, "CND", TGSI_OPCODE_CND },
+ { 1, 3, 0, 0, "CND0", TGSI_OPCODE_CND0 },
+ { 1, 3, 0, 0, "DP2A", TGSI_OPCODE_DP2A },
+ { 0, 0, 0, 0, "", 22 }, /* removed */
+ { 0, 0, 0, 0, "", 23 }, /* removed */
+ { 1, 1, 0, 0, "FRC", TGSI_OPCODE_FRC },
+ { 1, 3, 0, 0, "CLAMP", TGSI_OPCODE_CLAMP },
+ { 1, 1, 0, 0, "FLR", TGSI_OPCODE_FLR },
+ { 1, 1, 0, 0, "ROUND", TGSI_OPCODE_ROUND },
+ { 1, 1, 0, 0, "EX2", TGSI_OPCODE_EX2 },
+ { 1, 1, 0, 0, "LG2", TGSI_OPCODE_LG2 },
+ { 1, 2, 0, 0, "POW", TGSI_OPCODE_POW },
+ { 1, 2, 0, 0, "XPD", TGSI_OPCODE_XPD },
+ { 0, 0, 0, 0, "", 32 }, /* removed */
+ { 1, 1, 0, 0, "ABS", TGSI_OPCODE_ABS },
+ { 1, 1, 0, 0, "RCC", TGSI_OPCODE_RCC },
+ { 1, 2, 0, 0, "DPH", TGSI_OPCODE_DPH },
+ { 1, 1, 0, 0, "COS", TGSI_OPCODE_COS },
+ { 1, 1, 0, 0, "DDX", TGSI_OPCODE_DDX },
+ { 1, 1, 0, 0, "DDY", TGSI_OPCODE_DDY },
+ { 0, 0, 0, 0, "KILP", TGSI_OPCODE_KILP },
+ { 1, 1, 0, 0, "PK2H", TGSI_OPCODE_PK2H },
+ { 1, 1, 0, 0, "PK2US", TGSI_OPCODE_PK2US },
+ { 1, 1, 0, 0, "PK4B", TGSI_OPCODE_PK4B },
+ { 1, 1, 0, 0, "PK4UB", TGSI_OPCODE_PK4UB },
+ { 1, 2, 0, 0, "RFL", TGSI_OPCODE_RFL },
+ { 1, 2, 0, 0, "SEQ", TGSI_OPCODE_SEQ },
+ { 1, 2, 0, 0, "SFL", TGSI_OPCODE_SFL },
+ { 1, 2, 0, 0, "SGT", TGSI_OPCODE_SGT },
+ { 1, 1, 0, 0, "SIN", TGSI_OPCODE_SIN },
+ { 1, 2, 0, 0, "SLE", TGSI_OPCODE_SLE },
+ { 1, 2, 0, 0, "SNE", TGSI_OPCODE_SNE },
+ { 1, 2, 0, 0, "STR", TGSI_OPCODE_STR },
+ { 1, 2, 1, 0, "TEX", TGSI_OPCODE_TEX },
+ { 1, 4, 1, 0, "TXD", TGSI_OPCODE_TXD },
+ { 1, 2, 1, 0, "TXP", TGSI_OPCODE_TXP },
+ { 1, 1, 0, 0, "UP2H", TGSI_OPCODE_UP2H },
+ { 1, 1, 0, 0, "UP2US", TGSI_OPCODE_UP2US },
+ { 1, 1, 0, 0, "UP4B", TGSI_OPCODE_UP4B },
+ { 1, 1, 0, 0, "UP4UB", TGSI_OPCODE_UP4UB },
+ { 1, 3, 0, 0, "X2D", TGSI_OPCODE_X2D },
+ { 1, 1, 0, 0, "ARA", TGSI_OPCODE_ARA },
+ { 1, 1, 0, 0, "ARR", TGSI_OPCODE_ARR },
+ { 0, 1, 0, 0, "BRA", TGSI_OPCODE_BRA },
+ { 0, 0, 0, 1, "CAL", TGSI_OPCODE_CAL },
+ { 0, 0, 0, 0, "RET", TGSI_OPCODE_RET },
+ { 1, 1, 0, 0, "SSG", TGSI_OPCODE_SSG },
+ { 1, 3, 0, 0, "CMP", TGSI_OPCODE_CMP },
+ { 1, 1, 0, 0, "SCS", TGSI_OPCODE_SCS },
+ { 1, 2, 1, 0, "TXB", TGSI_OPCODE_TXB },
+ { 1, 1, 0, 0, "NRM", TGSI_OPCODE_NRM },
+ { 1, 2, 0, 0, "DIV", TGSI_OPCODE_DIV },
+ { 1, 2, 0, 0, "DP2", TGSI_OPCODE_DP2 },
+ { 1, 2, 1, 0, "TXL", TGSI_OPCODE_TXL },
+ { 0, 0, 0, 0, "BRK", TGSI_OPCODE_BRK },
+ { 0, 1, 0, 1, "IF", TGSI_OPCODE_IF },
+ { 1, 1, 0, 0, "BGNFOR", TGSI_OPCODE_BGNFOR },
+ { 0, 1, 0, 0, "REP", TGSI_OPCODE_REP },
+ { 0, 0, 0, 1, "ELSE", TGSI_OPCODE_ELSE },
+ { 0, 0, 0, 0, "ENDIF", TGSI_OPCODE_ENDIF },
+ { 1, 0, 0, 0, "ENDFOR", TGSI_OPCODE_ENDFOR },
+ { 0, 0, 0, 0, "ENDREP", TGSI_OPCODE_ENDREP },
+ { 0, 1, 0, 0, "PUSHA", TGSI_OPCODE_PUSHA },
+ { 1, 0, 0, 0, "POPA", TGSI_OPCODE_POPA },
+ { 1, 1, 0, 0, "CEIL", TGSI_OPCODE_CEIL },
+ { 1, 1, 0, 0, "I2F", TGSI_OPCODE_I2F },
+ { 1, 1, 0, 0, "NOT", TGSI_OPCODE_NOT },
+ { 1, 1, 0, 0, "TRUNC", TGSI_OPCODE_TRUNC },
+ { 1, 2, 0, 0, "SHL", TGSI_OPCODE_SHL },
+ { 1, 2, 0, 0, "SHR", TGSI_OPCODE_SHR },
+ { 1, 2, 0, 0, "AND", TGSI_OPCODE_AND },
+ { 1, 2, 0, 0, "OR", TGSI_OPCODE_OR },
+ { 1, 2, 0, 0, "MOD", TGSI_OPCODE_MOD },
+ { 1, 2, 0, 0, "XOR", TGSI_OPCODE_XOR },
+ { 1, 3, 0, 0, "SAD", TGSI_OPCODE_SAD },
+ { 1, 2, 1, 0, "TXF", TGSI_OPCODE_TXF },
+ { 1, 2, 1, 0, "TXQ", TGSI_OPCODE_TXQ },
+ { 0, 0, 0, 0, "CONT", TGSI_OPCODE_CONT },
+ { 0, 0, 0, 0, "EMIT", TGSI_OPCODE_EMIT },
+ { 0, 0, 0, 0, "ENDPRIM", TGSI_OPCODE_ENDPRIM },
+ { 0, 0, 0, 1, "BGNLOOP", TGSI_OPCODE_BGNLOOP },
+ { 0, 0, 0, 0, "BGNSUB", TGSI_OPCODE_BGNSUB },
+ { 0, 0, 0, 1, "ENDLOOP", TGSI_OPCODE_ENDLOOP },
+ { 0, 0, 0, 0, "ENDSUB", TGSI_OPCODE_ENDSUB },
+ { 1, 1, 0, 0, "NOISE1", TGSI_OPCODE_NOISE1 },
+ { 1, 1, 0, 0, "NOISE2", TGSI_OPCODE_NOISE2 },
+ { 1, 1, 0, 0, "NOISE3", TGSI_OPCODE_NOISE3 },
+ { 1, 1, 0, 0, "NOISE4", TGSI_OPCODE_NOISE4 },
+ { 0, 0, 0, 0, "NOP", TGSI_OPCODE_NOP },
+ { 0, 0, 0, 0, "", 108 }, /* removed */
+ { 0, 0, 0, 0, "", 109 }, /* removed */
+ { 0, 0, 0, 0, "", 110 }, /* removed */
+ { 0, 0, 0, 0, "", 111 }, /* removed */
+ { 1, 1, 0, 0, "NRM4", TGSI_OPCODE_NRM4 },
+ { 0, 1, 0, 0, "CALLNZ", TGSI_OPCODE_CALLNZ },
+ { 0, 1, 0, 0, "IFC", TGSI_OPCODE_IFC },
+ { 0, 1, 0, 0, "BREAKC", TGSI_OPCODE_BREAKC },
+ { 0, 1, 0, 0, "KIL", TGSI_OPCODE_KIL },
+ { 0, 0, 0, 0, "END", TGSI_OPCODE_END },
+ { 1, 1, 0, 0, "SWZ", TGSI_OPCODE_SWZ }
};
const struct tgsi_opcode_info *
tgsi_get_opcode_info( uint opcode )
{
+ static boolean firsttime = 1;
+
+ if (firsttime) {
+ unsigned i;
+ firsttime = 0;
+ for (i = 0; i < Elements(opcode_info); i++)
+ assert(opcode_info[i].opcode == i);
+ }
+
if (opcode < TGSI_OPCODE_LAST)
return &opcode_info[opcode];
+
assert( 0 );
return NULL;
}
+
+
+const char *
+tgsi_get_opcode_name( uint opcode )
+{
+ const struct tgsi_opcode_info *info = tgsi_get_opcode_info(opcode);
+ return info->mnemonic;
+}
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_info.h b/src/gallium/auxiliary/tgsi/tgsi_info.h
index 077e25acd7f..b2375c69710 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_info.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_info.h
@@ -41,13 +41,16 @@ struct tgsi_opcode_info
boolean is_tex;
boolean is_branch;
const char *mnemonic;
- const char *alt_mnemonic1;
- const char *alt_mnemonic2;
+ uint opcode;
};
const struct tgsi_opcode_info *
tgsi_get_opcode_info( uint opcode );
+const char *
+tgsi_get_opcode_name( uint opcode );
+
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
new file mode 100644
index 00000000000..ed594a3e2c7
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_opcode_tmp.h
@@ -0,0 +1,173 @@
+/**************************************************************************
+ *
+ * Copyright 2008 Tungsten Graphics, Inc., Cedar Park, Texas.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL TUNGSTEN GRAPHICS AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+#ifndef OP12_TEX
+#define OP12_TEX(a) OP12(a)
+#endif
+
+#ifndef OP14_TEX
+#define OP14_TEX(a) OP14(a)
+#endif
+
+#ifndef OP00_LBL
+#define OP00_LBL(a) OP00(a)
+#endif
+
+#ifndef OP01_LBL
+#define OP01_LBL(a) OP01(a)
+#endif
+
+OP11(ARL)
+OP11(MOV)
+OP11(LIT)
+OP11(RCP)
+OP11(RSQ)
+OP11(EXP)
+OP11(LOG)
+OP12(MUL)
+OP12(ADD)
+OP12(DP3)
+OP12(DP4)
+OP12(DST)
+OP12(MIN)
+OP12(MAX)
+OP12(SLT)
+OP12(SGE)
+OP13(MAD)
+OP12(SUB)
+OP13(LRP)
+OP13(CND)
+OP13(CND0)
+OP13(DP2A)
+OP11(FRC)
+OP13(CLAMP)
+OP11(FLR)
+OP11(ROUND)
+OP11(EX2)
+OP11(LG2)
+OP12(POW)
+OP12(XPD)
+OP11(ABS)
+OP11(RCC)
+OP12(DPH)
+OP11(COS)
+OP11(DDX)
+OP11(DDY)
+OP00(KILP)
+OP11(PK2H)
+OP11(PK2US)
+OP11(PK4B)
+OP11(PK4UB)
+OP12(RFL)
+OP12(SEQ)
+OP12(SFL)
+OP12(SGT)
+OP11(SIN)
+OP12(SLE)
+OP12(SNE)
+OP12(STR)
+OP12_TEX(TEX)
+OP14_TEX(TXD)
+OP12_TEX(TXP)
+OP11(UP2H)
+OP11(UP2US)
+OP11(UP4B)
+OP11(UP4UB)
+OP13(X2D)
+OP11(ARA)
+OP11(ARR)
+OP01(BRA)
+OP00_LBL(CAL)
+OP00(RET)
+OP11(SSG)
+OP13(CMP)
+OP11(SCS)
+OP12_TEX(TXB)
+OP11(NRM)
+OP12(DIV)
+OP12(DP2)
+OP12_TEX(TXL)
+OP00(BRK)
+OP01_LBL(IF)
+OP11(BGNFOR)
+OP01(REP)
+OP00_LBL(ELSE)
+OP00(ENDIF)
+OP10(ENDFOR)
+OP00(ENDREP)
+OP01(PUSHA)
+OP10(POPA)
+OP11(CEIL)
+OP11(I2F)
+OP11(NOT)
+OP11(TRUNC)
+OP12(SHL)
+OP12(SHR)
+OP12(AND)
+OP12(OR)
+OP12(MOD)
+OP12(XOR)
+OP13(SAD)
+OP12_TEX(TXF)
+OP12_TEX(TXQ)
+OP00(CONT)
+OP00(EMIT)
+OP00(ENDPRIM)
+OP00_LBL(BGNLOOP)
+OP00(BGNSUB)
+OP00_LBL(ENDLOOP)
+OP00(ENDSUB)
+OP11(NOISE1)
+OP11(NOISE2)
+OP11(NOISE3)
+OP11(NOISE4)
+OP00(NOP)
+OP11(NRM4)
+OP01(CALLNZ)
+OP01(IFC)
+OP01(BREAKC)
+OP01(KIL)
+OP00(END)
+OP11(SWZ)
+
+
+#undef OP00
+#undef OP01
+#undef OP10
+#undef OP11
+#undef OP12
+#undef OP13
+
+#ifdef OP14
+#undef OP14
+#endif
+
+#undef OP00_LBL
+#undef OP01_LBL
+
+#undef OP12_TEX
+#undef OP14_TEX
+
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.c b/src/gallium/auxiliary/tgsi/tgsi_parse.c
index 0081f74ffc4..4870f82b6bd 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.c
@@ -42,9 +42,6 @@ void
tgsi_full_token_free(
union tgsi_full_token *full_token )
{
- if( full_token->Token.Type == TGSI_TOKEN_TYPE_IMMEDIATE ) {
- FREE( (void *) full_token->FullImmediate.u.Pointer );
- }
}
unsigned
@@ -156,14 +153,8 @@ tgsi_parse_token(
case TGSI_IMM_FLOAT32:
{
uint imm_count = imm->Immediate.NrTokens - 1;
- struct tgsi_immediate_float32 *data;
-
- data = (struct tgsi_immediate_float32 *) MALLOC(sizeof(struct tgsi_immediate_float32) * imm_count);
- if (data) {
- for (i = 0; i < imm_count; i++) {
- next_token(ctx, &data[i]);
- }
- imm->u.ImmediateFloat32 = data;
+ for (i = 0; i < imm_count; i++) {
+ next_token(ctx, &imm->u[i]);
}
}
break;
@@ -219,7 +210,6 @@ tgsi_parse_token(
/*
* No support for indirect or multi-dimensional addressing.
*/
- assert( !inst->FullDstRegisters[i].DstRegister.Indirect );
assert( !inst->FullDstRegisters[i].DstRegister.Dimension );
extended = inst->FullDstRegisters[i].DstRegister.Extended;
@@ -246,6 +236,17 @@ tgsi_parse_token(
extended = token.Extended;
}
+
+ if( inst->FullDstRegisters[i].DstRegister.Indirect ) {
+ next_token( ctx, &inst->FullDstRegisters[i].DstRegisterInd );
+
+ /*
+ * No support for indirect or multi-dimensional addressing.
+ */
+ assert( !inst->FullDstRegisters[i].DstRegisterInd.Indirect );
+ assert( !inst->FullDstRegisters[i].DstRegisterInd.Dimension );
+ assert( !inst->FullDstRegisters[i].DstRegisterInd.Extended );
+ }
}
assert( inst->Instruction.NumSrcRegs <= TGSI_FULL_MAX_SRC_REGISTERS );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_parse.h b/src/gallium/auxiliary/tgsi/tgsi_parse.h
index 054350712d8..a26ee5ba862 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_parse.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_parse.h
@@ -48,6 +48,7 @@ struct tgsi_full_header
struct tgsi_full_dst_register
{
struct tgsi_dst_register DstRegister;
+ struct tgsi_src_register DstRegisterInd;
struct tgsi_dst_register_ext_concode DstRegisterExtConcode;
struct tgsi_dst_register_ext_modulate DstRegisterExtModulate;
};
@@ -72,11 +73,7 @@ struct tgsi_full_declaration
struct tgsi_full_immediate
{
struct tgsi_immediate Immediate;
- union
- {
- const void *Pointer;
- const struct tgsi_immediate_float32 *ImmediateFloat32;
- } u;
+ union tgsi_immediate_data u[4];
};
#define TGSI_FULL_MAX_DST_REGISTERS 2
@@ -90,6 +87,7 @@ struct tgsi_full_instruction
struct tgsi_instruction_ext_texture InstructionExtTexture;
struct tgsi_full_dst_register FullDstRegisters[TGSI_FULL_MAX_DST_REGISTERS];
struct tgsi_full_src_register FullSrcRegisters[TGSI_FULL_MAX_SRC_REGISTERS];
+ uint Flags; /**< user-defined usage */
};
union tgsi_full_token
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ppc.c b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
index 0c64ae57131..4b1c7d4e01b 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_ppc.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_ppc.c
@@ -38,6 +38,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_sse.h"
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_dump.h"
@@ -619,17 +620,17 @@ emit_unaryop(struct gen_context *gen, struct tgsi_full_instruction *inst)
ppc_vandc(gen->f, v1, v0, bit31_vec); /* v1 = v0 & ~bit31 */
}
break;
- case TGSI_OPCODE_FLOOR:
+ case TGSI_OPCODE_FLR:
ppc_vrfim(gen->f, v1, v0); /* v1 = floor(v0) */
break;
- case TGSI_OPCODE_FRAC:
+ case TGSI_OPCODE_FRC:
ppc_vrfim(gen->f, v1, v0); /* tmp = floor(v0) */
ppc_vsubfp(gen->f, v1, v0, v1); /* v1 = v0 - v1 */
break;
- case TGSI_OPCODE_EXPBASE2:
+ case TGSI_OPCODE_EX2:
ppc_vexptefp(gen->f, v1, v0); /* v1 = 2^v0 */
break;
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_LG2:
/* XXX this may be broken! */
ppc_vlogefp(gen->f, v1, v0); /* v1 = log2(v0) */
break;
@@ -1107,14 +1108,23 @@ static int
emit_instruction(struct gen_context *gen,
struct tgsi_full_instruction *inst)
{
+
+ /* we don't handle saturation/clamping yet */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+ return 0;
+
+ /* need to use extra temps to fix SOA dependencies : */
+ if (tgsi_check_soa_dependencies(inst))
+ return FALSE;
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_MOV:
case TGSI_OPCODE_SWZ:
case TGSI_OPCODE_ABS:
- case TGSI_OPCODE_FLOOR:
- case TGSI_OPCODE_FRAC:
- case TGSI_OPCODE_EXPBASE2:
- case TGSI_OPCODE_LOGBASE2:
+ case TGSI_OPCODE_FLR:
+ case TGSI_OPCODE_FRC:
+ case TGSI_OPCODE_EX2:
+ case TGSI_OPCODE_LG2:
emit_unaryop(gen, inst);
break;
case TGSI_OPCODE_RSQ:
@@ -1317,8 +1327,10 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
ok = emit_instruction(&gen, &parse.FullToken.FullInstruction);
if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to PPC (%s)\n",
- parse.FullToken.FullInstruction.Instruction.Opcode,
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ debug_printf("failed to translate tgsi opcode %d (%s) to PPC (%s)\n",
+ opcode,
+ tgsi_get_opcode_name(opcode),
parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
"vertex shader" : "fragment shader");
}
@@ -1333,7 +1345,7 @@ tgsi_emit_ppc(const struct tgsi_token *tokens,
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for (i = 0; i < size; i++) {
immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ parse.FullToken.FullImmediate.u[i].Float;
}
num_immediates++;
}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sanity.c b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
index 6f1f5c2b4b0..4fe8553c423 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sanity.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sanity.c
@@ -131,7 +131,7 @@ is_register_used(
return (ctx->regs_used[file][index / BITS_IN_REG_FLAG] & (1 << (index % BITS_IN_REG_FLAG))) ? TRUE : FALSE;
}
-static const char *file_names[] =
+static const char *file_names[TGSI_FILE_COUNT] =
{
"NULL",
"CONST",
@@ -140,7 +140,8 @@ static const char *file_names[] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static boolean
@@ -234,9 +235,29 @@ iter_instruction(
index,
"indirect",
FALSE );
- if (file != TGSI_FILE_ADDRESS || index != 0)
- report_warning( ctx, "Indirect register not ADDR[0]" );
+ if (!(file == TGSI_FILE_ADDRESS || file == TGSI_FILE_LOOP) || index != 0) {
+ report_warning(ctx, "Indirect register neither ADDR[0] nor LOOP[0]");
+ }
+ }
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_BGNFOR:
+ case TGSI_OPCODE_ENDFOR:
+ if (inst->FullDstRegisters[0].DstRegister.File != TGSI_FILE_LOOP ||
+ inst->FullDstRegisters[0].DstRegister.Index != 0) {
+ report_error(ctx, "Destination register must be LOOP[0]");
+ }
+ break;
+ }
+
+ switch (inst->Instruction.Opcode) {
+ case TGSI_OPCODE_BGNFOR:
+ if (inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_CONSTANT &&
+ inst->FullSrcRegisters[0].SrcRegister.File != TGSI_FILE_IMMEDIATE) {
+ report_error(ctx, "Source register file must be either CONST or IMM");
}
+ break;
}
ctx->num_instructions++;
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.c b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
index ba2bfdef062..46f2387c158 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.c
@@ -32,9 +32,11 @@
#include "util/u_debug.h"
#include "pipe/p_shader_tokens.h"
#include "util/u_math.h"
+#include "util/u_memory.h"
#if defined(PIPE_ARCH_SSE)
#include "util/u_sse.h"
#endif
+#include "tgsi/tgsi_info.h"
#include "tgsi/tgsi_parse.h"
#include "tgsi/tgsi_util.h"
#include "tgsi_exec.h"
@@ -100,37 +102,55 @@ get_const_base( void )
{
return x86_make_reg(
file_REG32,
- reg_CX );
+ reg_AX );
}
static struct x86_reg
-get_input_base( void )
+get_machine_base( void )
{
return x86_make_reg(
file_REG32,
- reg_AX );
+ reg_CX );
+}
+
+static struct x86_reg
+get_input_base( void )
+{
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Inputs) );
}
static struct x86_reg
get_output_base( void )
{
- return x86_make_reg(
- file_REG32,
- reg_DX );
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Outputs) );
}
static struct x86_reg
get_temp_base( void )
{
+ return x86_make_disp(
+ get_machine_base(),
+ Offset(struct tgsi_exec_machine, Temps) );
+}
+
+static struct x86_reg
+get_coef_base( void )
+{
return x86_make_reg(
file_REG32,
reg_BX );
}
static struct x86_reg
-get_coef_base( void )
+get_sampler_base( void )
{
- return get_output_base();
+ return x86_make_reg(
+ file_REG32,
+ reg_DI );
}
static struct x86_reg
@@ -138,7 +158,7 @@ get_immediate_base( void )
{
return x86_make_reg(
file_REG32,
- reg_DI );
+ reg_DX );
}
@@ -168,6 +188,15 @@ get_const(
}
static struct x86_reg
+get_sampler_ptr(
+ unsigned unit )
+{
+ return x86_make_disp(
+ get_sampler_base(),
+ unit * sizeof( struct tgsi_sampler * ) );
+}
+
+static struct x86_reg
get_input(
unsigned vec,
unsigned chan )
@@ -241,12 +270,14 @@ emit_const(
/* 'vec' is the offset from the address register's value.
* We're loading CONST[ADDR+vec] into an xmm register.
*/
- struct x86_reg r0 = get_input_base();
- struct x86_reg r1 = get_output_base();
+ struct x86_reg r0 = get_immediate_base();
+ struct x86_reg r1 = get_coef_base();
uint i;
assert( indirectFile == TGSI_FILE_ADDRESS );
assert( indirectIndex == 0 );
+ assert( r0.mod == mod_REG );
+ assert( r1.mod == mod_REG );
x86_push( func, r0 );
x86_push( func, r1 );
@@ -520,24 +551,15 @@ emit_coef_dady(
* that the stack pointer is 16 byte aligned, as expected.
*/
static void
-emit_func_call_dst(
+emit_func_call(
struct x86_function *func,
- unsigned xmm_save,
- unsigned xmm_dst,
+ unsigned xmm_save_mask,
+ const struct x86_reg *arg,
+ unsigned nr_args,
void (PIPE_CDECL *code)() )
{
struct x86_reg ecx = x86_make_reg( file_REG32, reg_CX );
unsigned i, n;
- unsigned xmm_mask;
-
- /* Bitmask of the xmm registers to save */
- xmm_mask = (1 << xmm_save) - 1;
- xmm_mask &= ~(1 << xmm_dst);
-
- sse_movaps(
- func,
- get_temp( TEMP_R0, 0 ),
- make_xmm( xmm_dst ) );
x86_push(
func,
@@ -549,8 +571,10 @@ emit_func_call_dst(
func,
x86_make_reg( file_REG32, reg_DX) );
+ /* Store XMM regs to the stack
+ */
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i))
+ if(xmm_save_mask & (1 << i))
++n;
x86_sub_imm(
@@ -559,26 +583,42 @@ emit_func_call_dst(
n*16);
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i)) {
+ if(xmm_save_mask & (1 << i)) {
sse_movups(
func,
x86_make_disp( x86_make_reg( file_REG32, reg_SP ), n*16 ),
make_xmm( i ) );
++n;
}
+
+ for (i = 0; i < nr_args; i++) {
+ /* Load the address of the buffer we use for passing arguments and
+ * receiving results:
+ */
+ x86_lea(
+ func,
+ ecx,
+ arg[i] );
- x86_lea(
- func,
- ecx,
- get_temp( TEMP_R0, 0 ) );
-
- x86_push( func, ecx );
+ /* Push actual function arguments (currently just the pointer to
+ * the buffer above), and call the function:
+ */
+ x86_push( func, ecx );
+ }
+
x86_mov_reg_imm( func, ecx, (unsigned long) code );
x86_call( func, ecx );
- x86_pop(func, ecx );
-
+
+ /* Pop the arguments (or just add an immediate to esp)
+ */
+ for (i = 0; i < nr_args; i++) {
+ x86_pop(func, ecx );
+ }
+
+ /* Pop the saved XMM regs:
+ */
for(i = 0, n = 0; i < 8; ++i)
- if(xmm_mask & (1 << i)) {
+ if(xmm_save_mask & (1 << i)) {
sse_movups(
func,
make_xmm( i ),
@@ -602,34 +642,86 @@ emit_func_call_dst(
x86_pop(
func,
x86_make_reg( file_REG32, reg_AX) );
+}
+
+static void
+emit_func_call_dst_src1(
+ struct x86_function *func,
+ unsigned xmm_save,
+ unsigned xmm_dst,
+ unsigned xmm_src0,
+ void (PIPE_CDECL *code)() )
+{
+ struct x86_reg store = get_temp( TEMP_R0, 0 );
+ unsigned xmm_mask = ((1 << xmm_save) - 1) & ~(1 << xmm_dst);
+
+ /* Store our input parameters (in xmm regs) to the buffer we use
+ * for passing arguments. We will pass a pointer to this buffer as
+ * the actual function argument.
+ */
+ sse_movaps(
+ func,
+ store,
+ make_xmm( xmm_src0 ) );
+
+ emit_func_call( func,
+ xmm_mask,
+ &store,
+ 1,
+ code );
sse_movaps(
func,
make_xmm( xmm_dst ),
- get_temp( TEMP_R0, 0 ) );
+ store );
}
+
static void
-emit_func_call_dst_src(
+emit_func_call_dst_src2(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src,
+ unsigned xmm_src0,
+ unsigned xmm_src1,
void (PIPE_CDECL *code)() )
{
+ struct x86_reg store = get_temp( TEMP_R0, 0 );
+ unsigned xmm_mask = ((1 << xmm_save) - 1) & ~(1 << xmm_dst);
+
+ /* Store two inputs to parameter buffer.
+ */
sse_movaps(
func,
- get_temp( TEMP_R0, 1 ),
- make_xmm( xmm_src ) );
+ store,
+ make_xmm( xmm_src0 ) );
- emit_func_call_dst(
+ sse_movaps(
func,
- xmm_save,
- xmm_dst,
- code );
+ x86_make_disp( store, 4 * sizeof(float) ),
+ make_xmm( xmm_src1 ) );
+
+
+ /* Emit the call
+ */
+ emit_func_call( func,
+ xmm_mask,
+ &store,
+ 1,
+ code );
+
+ /* Retrieve the results:
+ */
+ sse_movaps(
+ func,
+ make_xmm( xmm_dst ),
+ store );
}
+
+
+
#if defined(PIPE_ARCH_SSE)
/*
@@ -782,10 +874,11 @@ emit_cos(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
cos4f );
}
@@ -812,10 +905,11 @@ emit_ex2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
ex24f );
}
@@ -857,10 +951,11 @@ emit_flr(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
flr4f );
}
@@ -880,10 +975,11 @@ emit_frc(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
frc4f );
}
@@ -910,10 +1006,11 @@ emit_lg2(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
lg24f );
}
@@ -975,13 +1072,15 @@ emit_pow(
struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst,
- unsigned xmm_src )
+ unsigned xmm_src0,
+ unsigned xmm_src1 )
{
- emit_func_call_dst_src(
+ emit_func_call_dst_src2(
func,
xmm_save,
xmm_dst,
- xmm_src,
+ xmm_src0,
+ xmm_src1,
pow4f );
}
@@ -1017,10 +1116,11 @@ emit_rnd(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
rnd4f );
}
@@ -1099,10 +1199,11 @@ emit_sgn(
unsigned xmm_save,
unsigned xmm_dst )
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sgn4f );
}
@@ -1121,10 +1222,11 @@ emit_sin (struct x86_function *func,
unsigned xmm_save,
unsigned xmm_dst)
{
- emit_func_call_dst(
+ emit_func_call_dst_src1(
func,
xmm_save,
xmm_dst,
+ xmm_dst,
sin4f );
}
@@ -1140,6 +1242,12 @@ emit_sub(
make_xmm( xmm_src ) );
}
+
+
+
+
+
+
/**
* Register fetch.
*/
@@ -1298,20 +1406,164 @@ emit_store(
#define STORE( FUNC, INST, XMM, INDEX, CHAN )\
emit_store( FUNC, XMM, &(INST).FullDstRegisters[INDEX], &(INST), CHAN )
+
+static void PIPE_CDECL
+fetch_texel( struct tgsi_sampler **sampler,
+ float *store )
+{
+#if 0
+ uint j;
+
+ debug_printf("%s sampler: %p (%p) store: %p\n",
+ __FUNCTION__,
+ sampler, *sampler,
+ store );
+
+ debug_printf("lodbias %f\n", store[12]);
+
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d texcoord %f %f\n",
+ j,
+ store[0+j],
+ store[4+j]);
+#endif
+
+ {
+ float rgba[NUM_CHANNELS][QUAD_SIZE];
+ (*sampler)->get_samples(*sampler,
+ &store[0],
+ &store[4],
+ &store[8],
+ 0.0f, /*store[12], lodbias */
+ rgba);
+
+ memcpy( store, rgba, 16 * sizeof(float));
+ }
+
+#if 0
+ for (j = 0; j < 4; j++)
+ debug_printf("sample %d result %f %f %f %f\n",
+ j,
+ store[0+j],
+ store[4+j],
+ store[8+j],
+ store[12+j]);
+#endif
+}
+
/**
* High-level instruction translators.
*/
static void
+emit_tex( struct x86_function *func,
+ const struct tgsi_full_instruction *inst,
+ boolean lodbias,
+ boolean projected)
+{
+ const uint unit = inst->FullSrcRegisters[1].SrcRegister.Index;
+ struct x86_reg args[2];
+ unsigned count;
+ unsigned i;
+
+ switch (inst->InstructionExtTexture.Texture) {
+ case TGSI_TEXTURE_1D:
+ count = 1;
+ break;
+ case TGSI_TEXTURE_2D:
+ case TGSI_TEXTURE_RECT:
+ count = 2;
+ break;
+ case TGSI_TEXTURE_SHADOW1D:
+ case TGSI_TEXTURE_SHADOW2D:
+ case TGSI_TEXTURE_SHADOWRECT:
+ case TGSI_TEXTURE_3D:
+ case TGSI_TEXTURE_CUBE:
+ count = 3;
+ break;
+ default:
+ assert(0);
+ return;
+ }
+
+ if (lodbias) {
+ FETCH( func, *inst, 3, 0, 3 );
+ }
+ else {
+ emit_tempf(
+ func,
+ 3,
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C );
+
+ }
+
+ /* store lodbias whether enabled or not -- fetch_texel currently
+ * respects it always.
+ */
+ sse_movaps( func,
+ get_temp( TEMP_R0, 3 ),
+ make_xmm( 3 ) );
+
+
+ if (projected) {
+ FETCH( func, *inst, 3, 0, 3 );
+
+ emit_rcp( func, 3, 3 );
+ }
+
+ for (i = 0; i < count; i++) {
+ FETCH( func, *inst, i, 0, i );
+
+ if (projected) {
+ sse_mulps(
+ func,
+ make_xmm( i ),
+ make_xmm( 3 ) );
+ }
+
+ /* Store in the argument buffer:
+ */
+ sse_movaps(
+ func,
+ get_temp( TEMP_R0, i ),
+ make_xmm( i ) );
+ }
+
+ args[0] = get_temp( TEMP_R0, 0 );
+ args[1] = get_sampler_ptr( unit );
+
+
+ emit_func_call( func,
+ 0,
+ args,
+ Elements(args),
+ fetch_texel );
+
+ /* If all four channels are enabled, could use a pointer to
+ * dst[0].x instead of TEMP_R0 for store?
+ */
+ FOR_EACH_DST0_ENABLED_CHANNEL( *inst, i ) {
+
+ sse_movaps(
+ func,
+ make_xmm( 0 ),
+ get_temp( TEMP_R0, i ) );
+
+ STORE( func, *inst, 0, 0, i );
+ }
+}
+
+
+static void
emit_kil(
struct x86_function *func,
const struct tgsi_full_src_register *reg )
{
unsigned uniquemask;
- unsigned registers[4];
- unsigned nextregister = 0;
- unsigned firstchan = ~0;
+ unsigned unique_count = 0;
unsigned chan_index;
+ unsigned i;
/* This mask stores component bits that were already tested. Note that
* we test if the value is less than zero, so 1.0 and 0.0 need not to be
@@ -1331,18 +1583,11 @@ emit_kil(
uniquemask |= 1 << swizzle;
/* allocate register */
- registers[chan_index] = nextregister;
emit_fetch(
func,
- nextregister,
+ unique_count++,
reg,
chan_index );
- nextregister++;
-
- /* mark the first channel used */
- if( firstchan == ~0 ) {
- firstchan = chan_index;
- }
}
}
@@ -1353,32 +1598,32 @@ emit_kil(
func,
x86_make_reg( file_REG32, reg_DX ) );
- FOR_EACH_CHANNEL( chan_index ) {
- if( uniquemask & (1 << chan_index) ) {
- sse_cmpps(
+ for (i = 0 ; i < unique_count; i++ ) {
+ struct x86_reg dataXMM = make_xmm(i);
+
+ sse_cmpps(
+ func,
+ dataXMM,
+ get_temp(
+ TGSI_EXEC_TEMP_00000000_I,
+ TGSI_EXEC_TEMP_00000000_C ),
+ cc_LessThan );
+
+ if( i == 0 ) {
+ sse_movmskps(
func,
- make_xmm( registers[chan_index] ),
- get_temp(
- TGSI_EXEC_TEMP_00000000_I,
- TGSI_EXEC_TEMP_00000000_C ),
- cc_LessThan );
-
- if( chan_index == firstchan ) {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- make_xmm( registers[chan_index] ) );
- }
- else {
- sse_pmovmskb(
- func,
- x86_make_reg( file_REG32, reg_DX ),
- make_xmm( registers[chan_index] ) );
- x86_or(
- func,
- x86_make_reg( file_REG32, reg_AX ),
- x86_make_reg( file_REG32, reg_DX ) );
- }
+ x86_make_reg( file_REG32, reg_AX ),
+ dataXMM );
+ }
+ else {
+ sse_movmskps(
+ func,
+ x86_make_reg( file_REG32, reg_DX ),
+ dataXMM );
+ x86_or(
+ func,
+ x86_make_reg( file_REG32, reg_AX ),
+ x86_make_reg( file_REG32, reg_DX ) );
}
}
@@ -1502,6 +1747,14 @@ emit_instruction(
if (indirect_temp_reference(inst))
return FALSE;
+ /* we don't handle saturation/clamping yet */
+ if (inst->Instruction.Saturate != TGSI_SAT_NONE)
+ return FALSE;
+
+ /* need to use extra temps to fix SOA dependencies : */
+ if (tgsi_check_soa_dependencies(inst))
+ return FALSE;
+
switch (inst->Instruction.Opcode) {
case TGSI_OPCODE_ARL:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1573,7 +1826,7 @@ emit_instruction(
get_temp(
TGSI_EXEC_TEMP_MINUS_128_I,
TGSI_EXEC_TEMP_MINUS_128_C ) );
- emit_pow( func, 3, 1, 2 );
+ emit_pow( func, 3, 1, 1, 2 );
FETCH( func, *inst, 0, 0, CHAN_X );
sse_xorps(
func,
@@ -1820,8 +2073,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_LERP:
- /* TGSI_OPCODE_LRP */
+ case TGSI_OPCODE_LRP:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
FETCH( func, *inst, 1, 1, chan_index );
@@ -1841,8 +2093,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_DOT2ADD:
- /* TGSI_OPCODE_DP2A */
+ case TGSI_OPCODE_DP2A:
FETCH( func, *inst, 0, 0, CHAN_X ); /* xmm0 = src[0].x */
FETCH( func, *inst, 1, 1, CHAN_X ); /* xmm1 = src[1].x */
emit_mul( func, 0, 1 ); /* xmm0 = xmm0 * xmm1 */
@@ -1857,16 +2108,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_INDEX:
- return 0;
- break;
-
- case TGSI_OPCODE_NEGATE:
- return 0;
- break;
-
- case TGSI_OPCODE_FRAC:
- /* TGSI_OPCODE_FRC */
+ case TGSI_OPCODE_FRC:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_frc( func, 0, 0 );
@@ -1878,8 +2120,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_FLOOR:
- /* TGSI_OPCODE_FLR */
+ case TGSI_OPCODE_FLR:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
emit_flr( func, 0, 0 );
@@ -1895,8 +2136,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_EXPBASE2:
- /* TGSI_OPCODE_EX2 */
+ case TGSI_OPCODE_EX2:
FETCH( func, *inst, 0, 0, CHAN_X );
emit_ex2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1904,8 +2144,7 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_LOGBASE2:
- /* TGSI_OPCODE_LG2 */
+ case TGSI_OPCODE_LG2:
FETCH( func, *inst, 0, 0, CHAN_X );
emit_lg2( func, 0, 0 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
@@ -1913,18 +2152,16 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_POWER:
- /* TGSI_OPCODE_POW */
+ case TGSI_OPCODE_POW:
FETCH( func, *inst, 0, 0, CHAN_X );
FETCH( func, *inst, 1, 1, CHAN_X );
- emit_pow( func, 0, 0, 1 );
+ emit_pow( func, 0, 0, 0, 1 );
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
STORE( func, *inst, 0, 0, chan_index );
}
break;
- case TGSI_OPCODE_CROSSPRODUCT:
- /* TGSI_OPCODE_XPD */
+ case TGSI_OPCODE_XPD:
if( IS_DST0_CHANNEL_ENABLED( *inst, CHAN_X ) ||
IS_DST0_CHANNEL_ENABLED( *inst, CHAN_Y ) ) {
FETCH( func, *inst, 1, 1, CHAN_Z );
@@ -1970,10 +2207,6 @@ emit_instruction(
}
break;
- case TGSI_OPCODE_MULTIPLYMATRIX:
- return 0;
- break;
-
case TGSI_OPCODE_ABS:
FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
FETCH( func, *inst, 0, 0, chan_index );
@@ -2086,21 +2319,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TEX:
- if (0) {
- /* Disable dummy texture code:
- */
- emit_tempf(
- func,
- 0,
- TEMP_ONE_I,
- TEMP_ONE_C );
- FOR_EACH_DST0_ENABLED_CHANNEL( *inst, chan_index ) {
- STORE( func, *inst, 0, 0, chan_index );
- }
- }
- else {
- return 0;
- }
+ emit_tex( func, inst, FALSE, FALSE );
break;
case TGSI_OPCODE_TXD:
@@ -2198,7 +2417,7 @@ emit_instruction(
break;
case TGSI_OPCODE_TXB:
- return 0;
+ emit_tex( func, inst, TRUE, FALSE );
break;
case TGSI_OPCODE_NRM:
@@ -2306,9 +2525,13 @@ emit_instruction(
break;
case TGSI_OPCODE_TXL:
- return 0;
+ emit_tex( func, inst, TRUE, FALSE );
break;
+ case TGSI_OPCODE_TXP:
+ emit_tex( func, inst, FALSE, TRUE );
+ break;
+
case TGSI_OPCODE_BRK:
return 0;
break;
@@ -2317,7 +2540,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_LOOP:
+ case TGSI_OPCODE_BGNFOR:
return 0;
break;
@@ -2333,7 +2556,7 @@ emit_instruction(
return 0;
break;
- case TGSI_OPCODE_ENDLOOP:
+ case TGSI_OPCODE_ENDFOR:
return 0;
break;
@@ -2488,7 +2711,7 @@ emit_declaration(
static void aos_to_soa( struct x86_function *func,
uint arg_aos,
- uint arg_soa,
+ uint arg_machine,
uint arg_num,
uint arg_stride )
{
@@ -2503,7 +2726,10 @@ static void aos_to_soa( struct x86_function *func,
x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
x86_mov( func, aos_input, x86_fn_arg( func, arg_aos ) );
- x86_mov( func, soa_input, x86_fn_arg( func, arg_soa ) );
+ x86_mov( func, soa_input, x86_fn_arg( func, arg_machine ) );
+ x86_lea( func, soa_input,
+ x86_make_disp( soa_input,
+ Offset(struct tgsi_exec_machine, Inputs) ) );
x86_mov( func, num_inputs, x86_fn_arg( func, arg_num ) );
x86_mov( func, stride, x86_fn_arg( func, arg_stride ) );
@@ -2545,28 +2771,30 @@ static void aos_to_soa( struct x86_function *func,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, aos_input );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
-static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num, uint stride )
+static void soa_to_aos( struct x86_function *func,
+ uint arg_aos,
+ uint arg_machine,
+ uint arg_num,
+ uint arg_stride )
{
- struct x86_reg soa_output;
- struct x86_reg aos_output;
- struct x86_reg num_outputs;
- struct x86_reg temp;
+ struct x86_reg soa_output = x86_make_reg( file_REG32, reg_AX );
+ struct x86_reg aos_output = x86_make_reg( file_REG32, reg_BX );
+ struct x86_reg num_outputs = x86_make_reg( file_REG32, reg_CX );
+ struct x86_reg temp = x86_make_reg( file_REG32, reg_DX );
int inner_loop;
- soa_output = x86_make_reg( file_REG32, reg_AX );
- aos_output = x86_make_reg( file_REG32, reg_BX );
- num_outputs = x86_make_reg( file_REG32, reg_CX );
- temp = x86_make_reg( file_REG32, reg_DX );
-
/* Save EBX */
- x86_push( func, aos_output );
+ x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
- x86_mov( func, soa_output, x86_fn_arg( func, soa ) );
- x86_mov( func, aos_output, x86_fn_arg( func, aos ) );
- x86_mov( func, num_outputs, x86_fn_arg( func, num ) );
+ x86_mov( func, aos_output, x86_fn_arg( func, arg_aos ) );
+ x86_mov( func, soa_output, x86_fn_arg( func, arg_machine ) );
+ x86_lea( func, soa_output,
+ x86_make_disp( soa_output,
+ Offset(struct tgsi_exec_machine, Outputs) ) );
+ x86_mov( func, num_outputs, x86_fn_arg( func, arg_num ) );
/* do */
inner_loop = x86_get_label( func );
@@ -2583,7 +2811,7 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
sse_unpcklps( func, make_xmm( 3 ), make_xmm( 4 ) );
sse_unpckhps( func, make_xmm( 5 ), make_xmm( 4 ) );
- x86_mov( func, temp, x86_fn_arg( func, stride ) );
+ x86_mov( func, temp, x86_fn_arg( func, arg_stride ) );
x86_push( func, aos_output );
sse_movlps( func, x86_make_disp( aos_output, 0 ), make_xmm( 0 ) );
sse_movlps( func, x86_make_disp( aos_output, 8 ), make_xmm( 3 ) );
@@ -2607,20 +2835,13 @@ static void soa_to_aos( struct x86_function *func, uint aos, uint soa, uint num,
x86_jcc( func, cc_NE, inner_loop );
/* Restore EBX */
- x86_pop( func, aos_output );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
}
/**
* Translate a TGSI vertex/fragment shader to SSE2 code.
* Slightly different things are done for vertex vs. fragment shaders.
*
- * Note that fragment shaders are responsible for interpolating shader
- * inputs. Because on x86 we have only 4 GP registers, and here we
- * have 5 shader arguments (input, output, const, temp and coef), the
- * code is split into two phases -- DECLARATION and INSTRUCTION phase.
- * GP register holding the output argument is aliased with the coeff
- * argument, as outputs are not needed in the DECLARATION phase.
- *
* \param tokens the TGSI input shader
* \param func the output SSE code/function
* \param immediates buffer to place immediates, later passed to SSE func
@@ -2634,7 +2855,6 @@ tgsi_emit_sse2(
boolean do_swizzles )
{
struct tgsi_parse_context parse;
- boolean instruction_phase = FALSE;
unsigned ok = 1;
uint num_immediates = 0;
@@ -2646,74 +2866,48 @@ tgsi_emit_sse2(
/* Can't just use EDI, EBX without save/restoring them:
*/
- x86_push(
- func,
- get_immediate_base() );
-
- x86_push(
- func,
- get_temp_base() );
-
+ x86_push( func, x86_make_reg( file_REG32, reg_BX ) );
+ x86_push( func, x86_make_reg( file_REG32, reg_DI ) );
/*
* Different function args for vertex/fragment shaders:
*/
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- /* DECLARATION phase, do not load output argument. */
- x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- /* skipping outputs argument here */
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
- x86_mov(
- func,
- get_coef_base(),
- x86_fn_arg( func, 5 ) );
- x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 6 ) );
- }
- else {
- assert(parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX);
-
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
aos_to_soa( func,
- 6, /* aos_input */
- 1, /* machine->input */
- 7, /* num_inputs */
- 8 ); /* input_stride */
+ 4, /* aos_input */
+ 1, /* machine */
+ 5, /* num_inputs */
+ 6 ); /* input_stride */
+ }
+ x86_mov(
+ func,
+ get_machine_base(),
+ x86_fn_arg( func, 1 ) );
+ x86_mov(
+ func,
+ get_const_base(),
+ x86_fn_arg( func, 2 ) );
+ x86_mov(
+ func,
+ get_immediate_base(),
+ x86_fn_arg( func, 3 ) );
+
+ if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
x86_mov(
- func,
- get_input_base(),
- x86_fn_arg( func, 1 ) );
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- x86_mov(
- func,
- get_const_base(),
- x86_fn_arg( func, 3 ) );
- x86_mov(
- func,
- get_temp_base(),
- x86_fn_arg( func, 4 ) );
- x86_mov(
- func,
- get_immediate_base(),
- x86_fn_arg( func, 5 ) );
+ func,
+ get_coef_base(),
+ x86_fn_arg( func, 4 ) );
}
+ x86_mov(
+ func,
+ get_sampler_base(),
+ x86_make_disp( get_machine_base(),
+ Offset( struct tgsi_exec_machine, Samplers ) ) );
+
+
while( !tgsi_parse_end_of_tokens( &parse ) && ok ) {
tgsi_parse_token( &parse );
@@ -2727,24 +2921,15 @@ tgsi_emit_sse2(
break;
case TGSI_TOKEN_TYPE_INSTRUCTION:
- if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_FRAGMENT) {
- if( !instruction_phase ) {
- /* INSTRUCTION phase, overwrite coeff with output. */
- instruction_phase = TRUE;
- x86_mov(
- func,
- get_output_base(),
- x86_fn_arg( func, 2 ) );
- }
- }
-
ok = emit_instruction(
func,
&parse.FullToken.FullInstruction );
if (!ok) {
- debug_printf("failed to translate tgsi opcode %d to SSE (%s)\n",
- parse.FullToken.FullInstruction.Instruction.Opcode,
+ uint opcode = parse.FullToken.FullInstruction.Instruction.Opcode;
+ debug_printf("failed to translate tgsi opcode %d (%s) to SSE (%s)\n",
+ opcode,
+ tgsi_get_opcode_name(opcode),
parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX ?
"vertex shader" : "fragment shader");
}
@@ -2759,7 +2944,7 @@ tgsi_emit_sse2(
assert(num_immediates < TGSI_EXEC_NUM_IMMEDIATES);
for( i = 0; i < size; i++ ) {
immediates[num_immediates][i] =
- parse.FullToken.FullImmediate.u.ImmediateFloat32[i].Float;
+ parse.FullToken.FullImmediate.u[i].Float;
}
#if 0
debug_printf("SSE FS immediate[%d] = %f %f %f %f\n",
@@ -2781,18 +2966,17 @@ tgsi_emit_sse2(
if (parse.FullHeader.Processor.Processor == TGSI_PROCESSOR_VERTEX) {
if (do_swizzles)
- soa_to_aos( func, 9, 2, 10, 11 );
+ soa_to_aos( func,
+ 7, /* aos_output */
+ 1, /* machine */
+ 8, /* num_outputs */
+ 9 ); /* output_stride */
}
/* Can't just use EBX, EDI without save/restoring them:
*/
- x86_pop(
- func,
- get_temp_base() );
-
- x86_pop(
- func,
- get_immediate_base() );
+ x86_pop( func, x86_make_reg( file_REG32, reg_DI ) );
+ x86_pop( func, x86_make_reg( file_REG32, reg_BX ) );
emit_ret( func );
diff --git a/src/gallium/auxiliary/tgsi/tgsi_sse2.h b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
index af838b2a25b..d81ee3d00ec 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_sse2.h
+++ b/src/gallium/auxiliary/tgsi/tgsi_sse2.h
@@ -34,6 +34,7 @@ extern "C" {
struct tgsi_token;
struct x86_function;
+struct tgsi_interp_coef;
unsigned
tgsi_emit_sse2(
@@ -42,6 +43,33 @@ tgsi_emit_sse2(
float (*immediates)[4],
boolean do_swizzles );
+
+/* This is the function prototype generated when do_swizzles is false
+ * -- effectively for fragment shaders.
+ */
+typedef void (PIPE_CDECL *tgsi_sse2_fs_function) (
+ struct tgsi_exec_machine *machine, /* 1 */
+ const float (*constant)[4], /* 2 */
+ const float (*immediate)[4], /* 3 */
+ const struct tgsi_interp_coef *coef /* 4 */
+ );
+
+
+/* This is the function prototype generated when do_swizzles is true
+ * -- effectively for vertex shaders.
+ */
+typedef void (PIPE_CDECL *tgsi_sse2_vs_func) (
+ struct tgsi_exec_machine *machine, /* 1 */
+ const float (*constant)[4], /* 2 */
+ const float (*immediate)[4], /* 3 */
+ const float (*aos_input)[4], /* 4 */
+ uint num_inputs, /* 5 */
+ uint input_stride, /* 6 */
+ float (*aos_output)[4], /* 7 */
+ uint num_outputs, /* 8 */
+ uint output_stride ); /* 9 */
+
+
#if defined __cplusplus
}
#endif
diff --git a/src/gallium/auxiliary/tgsi/tgsi_text.c b/src/gallium/auxiliary/tgsi/tgsi_text.c
index a40fcab2126..d438450b1e4 100644
--- a/src/gallium/auxiliary/tgsi/tgsi_text.c
+++ b/src/gallium/auxiliary/tgsi/tgsi_text.c
@@ -26,6 +26,7 @@
**************************************************************************/
#include "util/u_debug.h"
+#include "util/u_memory.h"
#include "tgsi_text.h"
#include "tgsi_build.h"
#include "tgsi_info.h"
@@ -230,7 +231,8 @@ static const char *file_names[TGSI_FILE_COUNT] =
"TEMP",
"SAMP",
"ADDR",
- "IMM"
+ "IMM",
+ "LOOP"
};
static boolean
@@ -788,16 +790,6 @@ match_inst_mnemonic(const char **pcur,
if (str_match_no_case(pcur, info->mnemonic)) {
return TRUE;
}
- if (info->alt_mnemonic1) {
- if (str_match_no_case(pcur, info->alt_mnemonic1)) {
- return TRUE;
- }
- if (info->alt_mnemonic2) {
- if (str_match_no_case(pcur, info->alt_mnemonic2)) {
- return TRUE;
- }
- }
- }
return FALSE;
}
@@ -927,7 +919,8 @@ static const char *semantic_names[TGSI_SEMANTIC_COUNT] =
"FOG",
"PSIZE",
"GENERIC",
- "NORMAL"
+ "NORMAL",
+ "FACE"
};
static const char *interpolate_names[TGSI_INTERPOLATE_COUNT] =
@@ -947,6 +940,9 @@ static boolean parse_declaration( struct translate_ctx *ctx )
const char *cur;
uint advance;
+ assert(Elements(semantic_names) == TGSI_SEMANTIC_COUNT);
+ assert(Elements(interpolate_names) == TGSI_INTERPOLATE_COUNT);
+
if (!eat_white( &ctx->cur )) {
report_error( ctx, "Syntax error" );
return FALSE;
@@ -1086,7 +1082,10 @@ static boolean parse_immediate( struct translate_ctx *ctx )
imm = tgsi_default_full_immediate();
imm.Immediate.NrTokens += 4;
imm.Immediate.DataType = TGSI_IMM_FLOAT32;
- imm.u.Pointer = values;
+ imm.u[0].Float = values[0];
+ imm.u[1].Float = values[1];
+ imm.u[2].Float = values[2];
+ imm.u[3].Float = values[3];
advance = tgsi_build_full_immediate(
&imm,
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.c b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
new file mode 100644
index 00000000000..c0a0627e0b2
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.c
@@ -0,0 +1,860 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+
+#include "pipe/p_context.h"
+#include "pipe/p_state.h"
+#include "tgsi/tgsi_ureg.h"
+#include "tgsi/tgsi_dump.h"
+#include "util/u_memory.h"
+#include "util/u_math.h"
+
+union tgsi_any_token {
+ struct tgsi_version version;
+ struct tgsi_header header;
+ struct tgsi_processor processor;
+ struct tgsi_token token;
+ struct tgsi_declaration decl;
+ struct tgsi_declaration_range decl_range;
+ struct tgsi_declaration_semantic decl_semantic;
+ struct tgsi_immediate imm;
+ union tgsi_immediate_data imm_data;
+ struct tgsi_instruction insn;
+ struct tgsi_instruction_ext_nv insn_ext_nv;
+ struct tgsi_instruction_ext_label insn_ext_label;
+ struct tgsi_instruction_ext_texture insn_ext_texture;
+ struct tgsi_instruction_ext_predicate insn_ext_predicate;
+ struct tgsi_src_register src;
+ struct tgsi_src_register_ext_swz src_ext_swz;
+ struct tgsi_src_register_ext_mod src_ext_mod;
+ struct tgsi_dimension dim;
+ struct tgsi_dst_register dst;
+ struct tgsi_dst_register_ext_concode dst_ext_code;
+ struct tgsi_dst_register_ext_modulate dst_ext_mod;
+ struct tgsi_dst_register_ext_predicate dst_ext_pred;
+ unsigned value;
+};
+
+
+struct ureg_tokens {
+ union tgsi_any_token *tokens;
+ unsigned size;
+ unsigned order;
+ unsigned count;
+};
+
+#define UREG_MAX_INPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_OUTPUT PIPE_MAX_ATTRIBS
+#define UREG_MAX_IMMEDIATE 32
+#define UREG_MAX_TEMP 256
+
+#define DOMAIN_DECL 0
+#define DOMAIN_INSN 1
+
+struct ureg_program
+{
+ unsigned processor;
+ struct pipe_context *pipe;
+
+ struct {
+ unsigned semantic_name;
+ unsigned semantic_index;
+ unsigned interp;
+ } input[UREG_MAX_INPUT];
+ unsigned nr_inputs;
+
+ struct {
+ unsigned semantic_name;
+ unsigned semantic_index;
+ } output[UREG_MAX_OUTPUT];
+ unsigned nr_outputs;
+
+ struct {
+ float v[4];
+ unsigned nr;
+ } immediate[UREG_MAX_IMMEDIATE];
+ unsigned nr_immediates;
+
+ unsigned temps_active[UREG_MAX_TEMP / 32];
+ unsigned nr_temps;
+
+ unsigned nr_constants;
+ unsigned nr_samplers;
+ unsigned nr_instructions;
+
+ struct ureg_tokens domain[2];
+};
+
+static union tgsi_any_token error_tokens[32];
+
+static void tokens_error( struct ureg_tokens *tokens )
+{
+ tokens->tokens = error_tokens;
+ tokens->size = Elements(error_tokens);
+ tokens->count = 0;
+}
+
+
+static void tokens_expand( struct ureg_tokens *tokens,
+ unsigned count )
+{
+ unsigned old_size = tokens->size * sizeof(unsigned);
+
+ if (tokens->tokens == error_tokens)
+ goto fail;
+
+ while (tokens->count + count > tokens->size) {
+ tokens->size = (1 << ++tokens->order);
+ }
+
+ tokens->tokens = REALLOC(tokens->tokens,
+ old_size,
+ tokens->size * sizeof(unsigned));
+ if (tokens->tokens == NULL)
+ goto fail;
+
+ return;
+
+fail:
+ tokens_error(tokens);
+}
+
+static void set_bad( struct ureg_program *ureg )
+{
+ tokens_error(&ureg->domain[0]);
+}
+
+
+
+static union tgsi_any_token *get_tokens( struct ureg_program *ureg,
+ unsigned domain,
+ unsigned count )
+{
+ struct ureg_tokens *tokens = &ureg->domain[domain];
+ union tgsi_any_token *result;
+
+ if (tokens->count + count > tokens->size)
+ tokens_expand(tokens, count);
+
+ result = &tokens->tokens[tokens->count];
+ tokens->count += count;
+ return result;
+}
+
+
+static union tgsi_any_token *retrieve_token( struct ureg_program *ureg,
+ unsigned domain,
+ unsigned nr )
+{
+ if (ureg->domain[domain].tokens == error_tokens)
+ return &error_tokens[0];
+
+ return &ureg->domain[domain].tokens[nr];
+}
+
+
+
+static INLINE struct ureg_dst
+ureg_dst_register( unsigned file,
+ unsigned index )
+{
+ struct ureg_dst dst;
+
+ dst.File = file;
+ dst.WriteMask = TGSI_WRITEMASK_XYZW;
+ dst.Indirect = 0;
+ dst.Saturate = 0;
+ dst.Index = index;
+ dst.Pad1 = 0;
+ dst.Pad2 = 0;
+
+ return dst;
+}
+
+static INLINE struct ureg_src
+ureg_src_register( unsigned file,
+ unsigned index )
+{
+ struct ureg_src src;
+
+ src.File = file;
+ src.SwizzleX = TGSI_SWIZZLE_X;
+ src.SwizzleY = TGSI_SWIZZLE_Y;
+ src.SwizzleZ = TGSI_SWIZZLE_Z;
+ src.SwizzleW = TGSI_SWIZZLE_W;
+ src.Pad = 0;
+ src.Indirect = 0;
+ src.Absolute = 0;
+ src.Index = index;
+ src.Negate = 0;
+
+ return src;
+}
+
+
+
+
+static struct ureg_src
+ureg_DECL_input( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned interp_mode )
+{
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_inputs; i++) {
+ if (ureg->input[i].semantic_name == name &&
+ ureg->input[i].semantic_index == index)
+ goto out;
+ }
+
+ if (ureg->nr_inputs < UREG_MAX_INPUT) {
+ ureg->input[i].semantic_name = name;
+ ureg->input[i].semantic_index = index;
+ ureg->input[i].interp = interp_mode;
+ ureg->nr_inputs++;
+ }
+ else {
+ set_bad( ureg );
+ }
+
+out:
+ return ureg_src_register( TGSI_FILE_INPUT, i );
+}
+
+
+
+struct ureg_src
+ureg_DECL_fs_input( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index,
+ unsigned interp )
+{
+ return ureg_DECL_input( ureg, name, index, interp );
+}
+
+
+struct ureg_src
+ureg_DECL_vs_input( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index )
+{
+ return ureg_DECL_input( ureg, name, index, TGSI_INTERPOLATE_CONSTANT );
+}
+
+
+struct ureg_dst
+ureg_DECL_output( struct ureg_program *ureg,
+ unsigned name,
+ unsigned index )
+{
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_outputs; i++) {
+ if (ureg->output[i].semantic_name == name &&
+ ureg->output[i].semantic_index == index)
+ goto out;
+ }
+
+ if (ureg->nr_outputs < UREG_MAX_OUTPUT) {
+ ureg->output[i].semantic_name = name;
+ ureg->output[i].semantic_index = index;
+ ureg->nr_outputs++;
+ }
+ else {
+ set_bad( ureg );
+ }
+
+out:
+ return ureg_dst_register( TGSI_FILE_OUTPUT, i );
+}
+
+
+/* Returns a new constant register. Keep track of which have been
+ * referred to so that we can emit decls later.
+ *
+ * There is nothing in this code to bind this constant to any tracked
+ * value or manage any constant_buffer contents -- that's the
+ * resposibility of the calling code.
+ */
+struct ureg_src ureg_DECL_constant(struct ureg_program *ureg )
+{
+ return ureg_src_register( TGSI_FILE_CONSTANT, ureg->nr_constants++ );
+}
+
+
+/* Allocate a new temporary. Temporaries greater than UREG_MAX_TEMP
+ * are legal, but will not be released.
+ */
+struct ureg_dst ureg_DECL_temporary( struct ureg_program *ureg )
+{
+ unsigned i;
+
+ for (i = 0; i < UREG_MAX_TEMP; i += 32) {
+ int bit = ffs(~ureg->temps_active[i/32]);
+ if (bit != 0) {
+ i += bit - 1;
+ goto out;
+ }
+ }
+
+ /* No reusable temps, so allocate a new one:
+ */
+ i = ureg->nr_temps++;
+
+out:
+ if (i < UREG_MAX_TEMP)
+ ureg->temps_active[i/32] |= 1 << (i % 32);
+
+ if (i >= ureg->nr_temps)
+ ureg->nr_temps = i + 1;
+
+ return ureg_dst_register( TGSI_FILE_TEMPORARY, i );
+}
+
+
+void ureg_release_temporary( struct ureg_program *ureg,
+ struct ureg_dst tmp )
+{
+ if(tmp.File == TGSI_FILE_TEMPORARY)
+ if (tmp.Index < UREG_MAX_TEMP)
+ ureg->temps_active[tmp.Index/32] &= ~(1 << (tmp.Index % 32));
+}
+
+
+/* Allocate a new sampler.
+ */
+struct ureg_src ureg_DECL_sampler( struct ureg_program *ureg )
+{
+ return ureg_src_register( TGSI_FILE_SAMPLER, ureg->nr_samplers++ );
+}
+
+
+
+
+static int match_or_expand_immediate( const float *v,
+ unsigned nr,
+ float *v2,
+ unsigned *nr2,
+ unsigned *swizzle )
+{
+ unsigned i, j;
+
+ for (i = 0; i < nr; i++) {
+ boolean found = FALSE;
+
+ for (j = 0; j < *nr2 && !found; j++) {
+ if (v[i] == v2[j]) {
+ *swizzle |= j << (i * 2);
+ found = TRUE;
+ }
+ }
+
+ if (!found) {
+ if (*nr2 >= 4)
+ return FALSE;
+
+ v2[*nr2] = v[i];
+ *swizzle |= *nr2 << (i * 2);
+ (*nr2)++;
+ }
+ }
+
+ return TRUE;
+}
+
+
+
+
+struct ureg_src ureg_DECL_immediate( struct ureg_program *ureg,
+ const float *v,
+ unsigned nr )
+{
+ unsigned i;
+ unsigned swizzle = 0;
+
+ /* Could do a first pass where we examine all existing immediates
+ * without expanding.
+ */
+
+ for (i = 0; i < ureg->nr_immediates; i++) {
+ if (match_or_expand_immediate( v,
+ nr,
+ ureg->immediate[i].v,
+ &ureg->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ if (ureg->nr_immediates < UREG_MAX_IMMEDIATE) {
+ i = ureg->nr_immediates++;
+ if (match_or_expand_immediate( v,
+ nr,
+ ureg->immediate[i].v,
+ &ureg->immediate[i].nr,
+ &swizzle ))
+ goto out;
+ }
+
+ set_bad( ureg );
+
+out:
+ return ureg_swizzle( ureg_src_register( TGSI_FILE_IMMEDIATE, i ),
+ (swizzle >> 0) & 0x3,
+ (swizzle >> 2) & 0x3,
+ (swizzle >> 4) & 0x3,
+ (swizzle >> 6) & 0x3);
+}
+
+
+void
+ureg_emit_src( struct ureg_program *ureg,
+ struct ureg_src src )
+{
+ unsigned size = (1 +
+ (src.Absolute ? 1 : 0) +
+ (src.Indirect ? 1 : 0));
+
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
+ unsigned n = 0;
+
+ out[n].value = 0;
+ out[n].src.File = src.File;
+ out[n].src.SwizzleX = src.SwizzleX;
+ out[n].src.SwizzleY = src.SwizzleY;
+ out[n].src.SwizzleZ = src.SwizzleZ;
+ out[n].src.SwizzleW = src.SwizzleW;
+ out[n].src.Indirect = src.Indirect;
+ out[n].src.Index = src.Index;
+ n++;
+
+ if (src.Absolute) {
+ out[n].value = 0;
+ out[n].src_ext_mod.Absolute = 1;
+ n++;
+ }
+
+ if (src.Indirect) {
+ out[n].value = 0;
+ out[n].src.File = TGSI_FILE_ADDRESS;
+ out[n].src.SwizzleX = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleY = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleZ = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleW = TGSI_SWIZZLE_X;
+ out[n].src.Indirect = 0;
+ out[n].src.Index = 0;
+ n++;
+ }
+
+ assert(n == size);
+}
+
+
+void
+ureg_emit_dst( struct ureg_program *ureg,
+ struct ureg_dst dst )
+{
+ unsigned size = (1 +
+ (dst.Indirect ? 1 : 0));
+
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_INSN, size );
+ unsigned n = 0;
+
+ out[n].value = 0;
+ out[n].dst.File = dst.File;
+ out[n].dst.WriteMask = dst.WriteMask;
+ out[n].dst.Indirect = dst.Indirect;
+ out[n].dst.Index = dst.Index;
+ n++;
+
+ if (dst.Indirect) {
+ out[n].value = 0;
+ out[n].src.File = TGSI_FILE_ADDRESS;
+ out[n].src.SwizzleX = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleY = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleZ = TGSI_SWIZZLE_X;
+ out[n].src.SwizzleW = TGSI_SWIZZLE_X;
+ out[n].src.Indirect = 0;
+ out[n].src.Index = 0;
+ n++;
+ }
+
+ assert(n == size);
+}
+
+
+
+unsigned
+ureg_emit_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ boolean saturate,
+ unsigned num_dst,
+ unsigned num_src )
+{
+ union tgsi_any_token *out;
+
+ out = get_tokens( ureg, DOMAIN_INSN, 1 );
+ out[0].value = 0;
+ out[0].insn.Type = TGSI_TOKEN_TYPE_INSTRUCTION;
+ out[0].insn.NrTokens = 0;
+ out[0].insn.Opcode = opcode;
+ out[0].insn.Saturate = saturate;
+ out[0].insn.NrTokens = 0;
+ out[0].insn.NumDstRegs = num_dst;
+ out[0].insn.NumSrcRegs = num_src;
+ out[0].insn.Padding = 0;
+ out[0].insn.Extended = 0;
+
+ ureg->nr_instructions++;
+
+ return ureg->domain[DOMAIN_INSN].count - 1;
+}
+
+
+void
+ureg_emit_label(struct ureg_program *ureg,
+ unsigned insn_token,
+ unsigned *label_token )
+{
+ union tgsi_any_token *out, *insn;
+
+ out = get_tokens( ureg, DOMAIN_INSN, 1 );
+ insn = retrieve_token( ureg, DOMAIN_INSN, insn_token );
+
+ insn->insn.Extended = 1;
+
+ out[0].value = 0;
+ out[0].insn_ext_label.Type = TGSI_INSTRUCTION_EXT_TYPE_LABEL;
+
+ *label_token = ureg->domain[DOMAIN_INSN].count - 1;
+}
+
+/* Will return a number which can be used in a label to point to the
+ * next instruction to be emitted.
+ */
+unsigned
+ureg_get_instruction_number( struct ureg_program *ureg )
+{
+ return ureg->nr_instructions;
+}
+
+/* Patch a given label (expressed as a token number) to point to a
+ * given instruction (expressed as an instruction number).
+ */
+void
+ureg_fixup_label(struct ureg_program *ureg,
+ unsigned label_token,
+ unsigned instruction_number )
+{
+ union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, label_token );
+
+ assert(out->insn_ext_label.Type == TGSI_INSTRUCTION_EXT_TYPE_LABEL);
+ out->insn_ext_label.Label = instruction_number;
+}
+
+
+void
+ureg_emit_texture(struct ureg_program *ureg,
+ unsigned insn_token,
+ unsigned target )
+{
+ union tgsi_any_token *out, *insn;
+
+ out = get_tokens( ureg, DOMAIN_INSN, 1 );
+ insn = retrieve_token( ureg, DOMAIN_INSN, insn_token );
+
+ insn->insn.Extended = 1;
+
+ out[0].value = 0;
+ out[0].insn_ext_texture.Type = TGSI_INSTRUCTION_EXT_TYPE_TEXTURE;
+ out[0].insn_ext_texture.Texture = target;
+}
+
+
+void
+ureg_fixup_insn_size(struct ureg_program *ureg,
+ unsigned insn )
+{
+ union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_INSN, insn );
+
+ assert(out->insn.Type == TGSI_TOKEN_TYPE_INSTRUCTION);
+ out->insn.NrTokens = ureg->domain[DOMAIN_INSN].count - insn - 1;
+}
+
+
+void
+ureg_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ const struct ureg_src *src,
+ unsigned nr_src )
+{
+ unsigned insn, i;
+ boolean saturate;
+
+ saturate = nr_dst ? dst[0].Saturate : FALSE;
+
+ insn = ureg_emit_insn( ureg, opcode, saturate, nr_dst, nr_src );
+
+ for (i = 0; i < nr_dst; i++)
+ ureg_emit_dst( ureg, dst[i] );
+
+ for (i = 0; i < nr_src; i++)
+ ureg_emit_src( ureg, src[i] );
+
+ ureg_fixup_insn_size( ureg, insn );
+}
+
+
+
+static void emit_decl( struct ureg_program *ureg,
+ unsigned file,
+ unsigned index,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp )
+{
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 3;
+ out[0].decl.File = file;
+ out[0].decl.UsageMask = TGSI_WRITEMASK_XYZW; /* FIXME! */
+ out[0].decl.Interpolate = interp;
+ out[0].decl.Semantic = 1;
+
+ out[1].value = 0;
+ out[1].decl_range.First =
+ out[1].decl_range.Last = index;
+
+ out[2].value = 0;
+ out[2].decl_semantic.SemanticName = semantic_name;
+ out[2].decl_semantic.SemanticIndex = semantic_index;
+
+}
+
+
+static void emit_decl_range( struct ureg_program *ureg,
+ unsigned file,
+ unsigned first,
+ unsigned count )
+{
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 2 );
+
+ out[0].value = 0;
+ out[0].decl.Type = TGSI_TOKEN_TYPE_DECLARATION;
+ out[0].decl.NrTokens = 2;
+ out[0].decl.File = file;
+ out[0].decl.UsageMask = 0xf;
+ out[0].decl.Interpolate = TGSI_INTERPOLATE_CONSTANT;
+ out[0].decl.Semantic = 0;
+
+ out[1].value = 0;
+ out[1].decl_range.First = first;
+ out[1].decl_range.Last = first + count - 1;
+}
+
+static void emit_immediate( struct ureg_program *ureg,
+ const float *v )
+{
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 5 );
+
+ out[0].value = 0;
+ out[0].imm.Type = TGSI_TOKEN_TYPE_IMMEDIATE;
+ out[0].imm.NrTokens = 5;
+ out[0].imm.DataType = TGSI_IMM_FLOAT32;
+ out[0].imm.Padding = 0;
+ out[0].imm.Extended = 0;
+
+ out[1].imm_data.Float = v[0];
+ out[2].imm_data.Float = v[1];
+ out[3].imm_data.Float = v[2];
+ out[4].imm_data.Float = v[3];
+}
+
+
+
+
+static void emit_decls( struct ureg_program *ureg )
+{
+ unsigned i;
+
+ for (i = 0; i < ureg->nr_inputs; i++) {
+ emit_decl( ureg,
+ TGSI_FILE_INPUT,
+ i,
+ ureg->input[i].semantic_name,
+ ureg->input[i].semantic_index,
+ ureg->input[i].interp );
+ }
+
+ for (i = 0; i < ureg->nr_outputs; i++) {
+ emit_decl( ureg,
+ TGSI_FILE_OUTPUT,
+ i,
+ ureg->output[i].semantic_name,
+ ureg->output[i].semantic_index,
+ TGSI_INTERPOLATE_CONSTANT );
+ }
+
+ if (ureg->nr_samplers) {
+ emit_decl_range( ureg,
+ TGSI_FILE_SAMPLER,
+ 0, ureg->nr_samplers );
+ }
+
+ if (ureg->nr_constants) {
+ emit_decl_range( ureg,
+ TGSI_FILE_CONSTANT,
+ 0, ureg->nr_constants );
+ }
+
+ if (ureg->nr_temps) {
+ emit_decl_range( ureg,
+ TGSI_FILE_TEMPORARY,
+ 0, ureg->nr_temps );
+ }
+
+ for (i = 0; i < ureg->nr_immediates; i++) {
+ emit_immediate( ureg,
+ ureg->immediate[i].v );
+ }
+}
+
+/* Append the instruction tokens onto the declarations to build a
+ * contiguous stream suitable to send to the driver.
+ */
+static void copy_instructions( struct ureg_program *ureg )
+{
+ unsigned nr_tokens = ureg->domain[DOMAIN_INSN].count;
+ union tgsi_any_token *out = get_tokens( ureg,
+ DOMAIN_DECL,
+ nr_tokens );
+
+ memcpy(out,
+ ureg->domain[DOMAIN_INSN].tokens,
+ nr_tokens * sizeof out[0] );
+}
+
+
+static void
+fixup_header_size(struct ureg_program *ureg )
+{
+ union tgsi_any_token *out = retrieve_token( ureg, DOMAIN_DECL, 1 );
+
+ out->header.BodySize = ureg->domain[DOMAIN_DECL].count - 3;
+}
+
+
+static void
+emit_header( struct ureg_program *ureg )
+{
+ union tgsi_any_token *out = get_tokens( ureg, DOMAIN_DECL, 3 );
+
+ out[0].version.MajorVersion = 1;
+ out[0].version.MinorVersion = 1;
+ out[0].version.Padding = 0;
+
+ out[1].header.HeaderSize = 2;
+ out[1].header.BodySize = 0;
+
+ out[2].processor.Processor = ureg->processor;
+ out[2].processor.Padding = 0;
+}
+
+
+const struct tgsi_token *ureg_finalize( struct ureg_program *ureg )
+{
+ const struct tgsi_token *tokens;
+
+ emit_header( ureg );
+ emit_decls( ureg );
+ copy_instructions( ureg );
+ fixup_header_size( ureg );
+
+ if (ureg->domain[0].tokens == error_tokens ||
+ ureg->domain[1].tokens == error_tokens) {
+ debug_printf("%s: error in generated shader\n", __FUNCTION__);
+ assert(0);
+ return NULL;
+ }
+
+ tokens = &ureg->domain[DOMAIN_DECL].tokens[0].token;
+
+ if (0) {
+ debug_printf("%s: emitted shader %d tokens:\n", __FUNCTION__,
+ ureg->domain[DOMAIN_DECL].count);
+ tgsi_dump( tokens, 0 );
+ }
+
+ return tokens;
+}
+
+
+void *ureg_create_shader( struct ureg_program *ureg,
+ struct pipe_context *pipe )
+{
+ struct pipe_shader_state state;
+
+ state.tokens = ureg_finalize(ureg);
+ if(!state.tokens)
+ return NULL;
+
+ if (ureg->processor == TGSI_PROCESSOR_VERTEX)
+ return pipe->create_vs_state( pipe, &state );
+ else
+ return pipe->create_fs_state( pipe, &state );
+}
+
+
+
+
+struct ureg_program *ureg_create( unsigned processor )
+{
+ struct ureg_program *ureg = CALLOC_STRUCT( ureg_program );
+ if (ureg == NULL)
+ return NULL;
+
+ ureg->processor = processor;
+ return ureg;
+}
+
+
+void ureg_destroy( struct ureg_program *ureg )
+{
+ unsigned i;
+
+ for (i = 0; i < Elements(ureg->domain); i++) {
+ if (ureg->domain[i].tokens &&
+ ureg->domain[i].tokens != error_tokens)
+ FREE(ureg->domain[i].tokens);
+ }
+
+ FREE(ureg);
+}
diff --git a/src/gallium/auxiliary/tgsi/tgsi_ureg.h b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
new file mode 100644
index 00000000000..8836a1ea0eb
--- /dev/null
+++ b/src/gallium/auxiliary/tgsi/tgsi_ureg.h
@@ -0,0 +1,481 @@
+/**************************************************************************
+ *
+ * Copyright 2009 VMware, Inc.
+ * All Rights Reserved.
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a
+ * copy of this software and associated documentation files (the
+ * "Software"), to deal in the Software without restriction, including
+ * without limitation the rights to use, copy, modify, merge, publish,
+ * distribute, sub license, and/or sell copies of the Software, and to
+ * permit persons to whom the Software is furnished to do so, subject to
+ * the following conditions:
+ *
+ * The above copyright notice and this permission notice (including the
+ * next paragraph) shall be included in all copies or substantial portions
+ * of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
+ * OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+ * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT.
+ * IN NO EVENT SHALL VMWARE, INC AND/OR ITS SUPPLIERS BE LIABLE FOR
+ * ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+ * TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+ * SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
+ *
+ **************************************************************************/
+
+#ifndef TGSI_UREG_H
+#define TGSI_UREG_H
+
+#include "pipe/p_compiler.h"
+#include "pipe/p_shader_tokens.h"
+
+struct ureg_program;
+
+/* Almost a tgsi_src_register, but we need to pull in the Absolute
+ * flag from the _ext token. Indirect flag always implies ADDR[0].
+ */
+struct ureg_src
+{
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned SwizzleX : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleY : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleZ : 2; /* TGSI_SWIZZLE_ */
+ unsigned SwizzleW : 2; /* TGSI_SWIZZLE_ */
+ unsigned Pad : 1; /* BOOL */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned Absolute : 1; /* BOOL */
+ int Index : 16; /* SINT */
+ unsigned Negate : 1; /* BOOL */
+};
+
+/* Very similar to a tgsi_dst_register, removing unsupported fields
+ * and adding a Saturate flag. It's easier to push saturate into the
+ * destination register than to try and create a _SAT varient of each
+ * instruction function.
+ */
+struct ureg_dst
+{
+ unsigned File : 4; /* TGSI_FILE_ */
+ unsigned WriteMask : 4; /* TGSI_WRITEMASK_ */
+ unsigned Indirect : 1; /* BOOL */
+ unsigned Saturate : 1; /* BOOL */
+ int Index : 16; /* SINT */
+ unsigned Pad1 : 5;
+ unsigned Pad2 : 1; /* BOOL */
+};
+
+struct pipe_context;
+
+struct ureg_program *
+ureg_create( unsigned processor );
+
+const struct tgsi_token *
+ureg_finalize( struct ureg_program * );
+
+void *
+ureg_create_shader( struct ureg_program *,
+ struct pipe_context *pipe );
+
+void
+ureg_destroy( struct ureg_program * );
+
+
+/***********************************************************************
+ * Convenience routine:
+ */
+static INLINE void *
+ureg_create_shader_and_destroy( struct ureg_program *p,
+ struct pipe_context *pipe )
+{
+ void *result = ureg_create_shader( p, pipe );
+ ureg_destroy( p );
+ return result;
+}
+
+
+
+/***********************************************************************
+ * Build shader declarations:
+ */
+
+struct ureg_src
+ureg_DECL_fs_input( struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index,
+ unsigned interp_mode );
+
+struct ureg_src
+ureg_DECL_vs_input( struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index );
+
+struct ureg_dst
+ureg_DECL_output( struct ureg_program *,
+ unsigned semantic_name,
+ unsigned semantic_index );
+
+struct ureg_src
+ureg_DECL_immediate( struct ureg_program *,
+ const float *v,
+ unsigned nr );
+
+struct ureg_src
+ureg_DECL_constant( struct ureg_program * );
+
+struct ureg_dst
+ureg_DECL_temporary( struct ureg_program * );
+
+void
+ureg_release_temporary( struct ureg_program *ureg,
+ struct ureg_dst tmp );
+
+struct ureg_src
+ureg_DECL_sampler( struct ureg_program * );
+
+
+static INLINE struct ureg_src
+ureg_DECL_immediate4f( struct ureg_program *ureg,
+ float a, float b,
+ float c, float d)
+{
+ float v[4];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ v[3] = d;
+ return ureg_DECL_immediate( ureg, v, 4 );
+}
+
+static INLINE struct ureg_src
+ureg_DECL_immediate3f( struct ureg_program *ureg,
+ float a, float b,
+ float c)
+{
+ float v[3];
+ v[0] = a;
+ v[1] = b;
+ v[2] = c;
+ return ureg_DECL_immediate( ureg, v, 3 );
+}
+
+static INLINE struct ureg_src
+ureg_DECL_immediate2f( struct ureg_program *ureg,
+ float a, float b)
+{
+ float v[2];
+ v[0] = a;
+ v[1] = b;
+ return ureg_DECL_immediate( ureg, v, 2 );
+}
+
+static INLINE struct ureg_src
+ureg_DECL_immediate1f( struct ureg_program *ureg,
+ float a)
+{
+ float v[1];
+ v[0] = a;
+ return ureg_DECL_immediate( ureg, v, 1 );
+}
+
+/***********************************************************************
+ * Functions for patching up labels
+ */
+
+
+/* Will return a number which can be used in a label to point to the
+ * next instruction to be emitted.
+ */
+unsigned
+ureg_get_instruction_number( struct ureg_program *ureg );
+
+
+/* Patch a given label (expressed as a token number) to point to a
+ * given instruction (expressed as an instruction number).
+ *
+ * Labels are obtained from instruction emitters, eg ureg_CAL().
+ * Instruction numbers are obtained from ureg_get_instruction_number(),
+ * above.
+ */
+void
+ureg_fixup_label(struct ureg_program *ureg,
+ unsigned label_token,
+ unsigned instruction_number );
+
+
+/* Generic instruction emitter. Use if you need to pass the opcode as
+ * a parameter, rather than using the emit_OP() varients below.
+ */
+void
+ureg_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ const struct ureg_dst *dst,
+ unsigned nr_dst,
+ const struct ureg_src *src,
+ unsigned nr_src );
+
+
+/***********************************************************************
+ * Internal instruction helpers, don't call these directly:
+ */
+
+unsigned
+ureg_emit_insn(struct ureg_program *ureg,
+ unsigned opcode,
+ boolean saturate,
+ unsigned num_dst,
+ unsigned num_src );
+
+void
+ureg_emit_label(struct ureg_program *ureg,
+ unsigned insn_token,
+ unsigned *label_token );
+
+void
+ureg_emit_texture(struct ureg_program *ureg,
+ unsigned insn_token,
+ unsigned target );
+
+void
+ureg_emit_dst( struct ureg_program *ureg,
+ struct ureg_dst dst );
+
+void
+ureg_emit_src( struct ureg_program *ureg,
+ struct ureg_src src );
+
+void
+ureg_fixup_insn_size(struct ureg_program *ureg,
+ unsigned insn );
+
+
+#define OP00( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP01( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_src src ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \
+ ureg_emit_src( ureg, src ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP00_LBL( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ unsigned *label_token ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 0 ); \
+ ureg_emit_label( ureg, insn, label_token ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP01_LBL( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_src src, \
+ unsigned *label_token ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, FALSE, 0, 1 ); \
+ ureg_emit_label( ureg, insn, label_token ); \
+ ureg_emit_src( ureg, src ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP10( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 0 ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+
+#define OP11( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ struct ureg_src src ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 1 ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP12( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP12_TEX( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ unsigned target, \
+ struct ureg_src src0, \
+ struct ureg_src src1 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 2 ); \
+ ureg_emit_texture( ureg, insn, target ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP13( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 3 ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_emit_src( ureg, src2 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+#define OP14_TEX( op ) \
+static INLINE void ureg_##op( struct ureg_program *ureg, \
+ struct ureg_dst dst, \
+ unsigned target, \
+ struct ureg_src src0, \
+ struct ureg_src src1, \
+ struct ureg_src src2, \
+ struct ureg_src src3 ) \
+{ \
+ unsigned opcode = TGSI_OPCODE_##op; \
+ unsigned insn = ureg_emit_insn( ureg, opcode, dst.Saturate, 1, 4 ); \
+ ureg_emit_texture( ureg, insn, target ); \
+ ureg_emit_dst( ureg, dst ); \
+ ureg_emit_src( ureg, src0 ); \
+ ureg_emit_src( ureg, src1 ); \
+ ureg_emit_src( ureg, src2 ); \
+ ureg_emit_src( ureg, src3 ); \
+ ureg_fixup_insn_size( ureg, insn ); \
+}
+
+
+/* Use a template include to generate a correctly-typed ureg_OP()
+ * function for each TGSI opcode:
+ */
+#include "tgsi_opcode_tmp.h"
+
+
+/***********************************************************************
+ * Inline helpers for manipulating register structs:
+ */
+static INLINE struct ureg_src
+ureg_negate( struct ureg_src reg )
+{
+ reg.Negate ^= 1;
+ return reg;
+}
+
+static INLINE struct ureg_src
+ureg_abs( struct ureg_src reg )
+{
+ reg.Absolute = 1;
+ reg.Negate = 0;
+ return reg;
+}
+
+static INLINE struct ureg_src
+ureg_swizzle( struct ureg_src reg,
+ int x, int y, int z, int w )
+{
+ unsigned swz = ( (reg.SwizzleX << 0) |
+ (reg.SwizzleY << 2) |
+ (reg.SwizzleZ << 4) |
+ (reg.SwizzleW << 6));
+
+ reg.SwizzleX = (swz >> (x*2)) & 0x3;
+ reg.SwizzleY = (swz >> (y*2)) & 0x3;
+ reg.SwizzleZ = (swz >> (z*2)) & 0x3;
+ reg.SwizzleW = (swz >> (w*2)) & 0x3;
+ return reg;
+}
+
+static INLINE struct ureg_src
+ureg_scalar( struct ureg_src reg, int x )
+{
+ return ureg_swizzle(reg, x, x, x, x);
+}
+
+static INLINE struct ureg_dst
+ureg_writemask( struct ureg_dst reg,
+ unsigned writemask )
+{
+ reg.WriteMask &= writemask;
+ return reg;
+}
+
+static INLINE struct ureg_dst
+ureg_saturate( struct ureg_dst reg )
+{
+ reg.Saturate = 1;
+ return reg;
+}
+
+static INLINE struct ureg_dst
+ureg_dst( struct ureg_src src )
+{
+ struct ureg_dst dst;
+
+ dst.File = src.File;
+ dst.WriteMask = TGSI_WRITEMASK_XYZW;
+ dst.Indirect = src.Indirect;
+ dst.Saturate = 0;
+ dst.Index = src.Index;
+ dst.Pad1 = 0;
+ dst.Pad2 = 0;
+
+ return dst;
+}
+
+static INLINE struct ureg_src
+ureg_src( struct ureg_dst dst )
+{
+ struct ureg_src src;
+
+ src.File = dst.File;
+ src.SwizzleX = TGSI_SWIZZLE_X;
+ src.SwizzleY = TGSI_SWIZZLE_Y;
+ src.SwizzleZ = TGSI_SWIZZLE_Z;
+ src.SwizzleW = TGSI_SWIZZLE_W;
+ src.Pad = 0;
+ src.Indirect = dst.Indirect;
+ src.Absolute = 0;
+ src.Index = dst.Index;
+ src.Negate = 0;
+
+ return src;
+}
+
+
+
+#endif
diff --git a/src/gallium/auxiliary/trace/trace_drm.h b/src/gallium/auxiliary/trace/trace_drm.h
deleted file mode 100644
index 892bd9860c6..00000000000
--- a/src/gallium/auxiliary/trace/trace_drm.h
+++ /dev/null
@@ -1,165 +0,0 @@
-/*
- * Copyright 2009 Jakob Bornecrantz <jakob@vmware.com>
- * Corbin Simpson <MostAwesomeDude@gmail.com>
- *
- * Permission is hereby granted, free of charge, to any person obtaining a
- * copy of this software and associated documentation files (the "Software"),
- * to deal in the Software without restriction, including without limitation
- * on the rights to use, copy, modify, merge, publish, distribute, sub
- * license, and/or sell copies of the Software, and to permit persons to whom
- * the Software is furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice (including the next
- * paragraph) shall be included in all copies or substantial portions of the
- * Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
- * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
- * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
- * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
- * USE OR OTHER DEALINGS IN THE SOFTWARE. */
-
-#ifndef TRACE_DRM_H
-#define TRACE_DRM_H
-
-#include "state_tracker/drm_api.h"
-
-#include "trace/tr_buffer.h"
-#include "trace/tr_context.h"
-#include "trace/tr_screen.h"
-#include "trace/tr_texture.h"
-
-struct drm_api hooks;
-
-static struct pipe_screen *
-trace_drm_create_screen(int fd, struct drm_create_screen_arg *arg)
-{
- struct pipe_screen *screen;
-
- if (arg && arg->mode != DRM_CREATE_NORMAL)
- return NULL;
-
- screen = hooks.create_screen(fd, arg);
-
- return trace_screen_create(screen);
-};
-
-static struct pipe_context *
-trace_drm_create_context(struct pipe_screen *_screen)
-{
- struct pipe_screen *screen;
- struct pipe_context *pipe;
-
- if (trace_enabled())
- screen = trace_screen(_screen)->screen;
- else
- screen = _screen;
-
- pipe = hooks.create_context(screen);
-
- if (trace_enabled())
- pipe = trace_context_create(_screen, pipe);
-
- return pipe;
-};
-
-static boolean
-trace_drm_buffer_from_texture(struct pipe_texture *_texture,
- struct pipe_buffer **_buffer,
- unsigned *stride)
-{
- struct pipe_texture *texture;
- struct pipe_buffer *buffer = NULL;
- boolean result;
-
- if (trace_enabled())
- texture = trace_texture(_texture)->texture;
- else
- texture = _texture;
-
- result = hooks.buffer_from_texture(texture, &buffer, stride);
-
- if (result && _buffer)
- buffer = trace_buffer_create(trace_screen(texture->screen), buffer);
-
- if (_buffer)
- *_buffer = buffer;
- else
- pipe_buffer_reference(&buffer, NULL);
-
- return result;
-}
-
-static struct pipe_buffer *
-trace_drm_buffer_from_handle(struct pipe_screen *_screen,
- const char *name,
- unsigned handle)
-{
- struct pipe_screen *screen;
- struct pipe_buffer *result;
-
- if (trace_enabled())
- screen = trace_screen(_screen)->screen;
- else
- screen = _screen;
-
- result = hooks.buffer_from_handle(screen, name, handle);
-
- if (trace_enabled())
- result = trace_buffer_create(trace_screen(_screen), result);
-
- return result;
-}
-
-static boolean
-trace_drm_handle_from_buffer(struct pipe_screen *_screen,
- struct pipe_buffer *_buffer,
- unsigned *handle)
-{
- struct pipe_screen *screen;
- struct pipe_buffer *buffer;
-
- if (trace_enabled()) {
- screen = trace_screen(_screen)->screen;
- buffer = trace_buffer(_buffer)->buffer;
- } else {
- screen = _screen;
- buffer = _buffer;
- }
-
- return hooks.handle_from_buffer(screen, buffer, handle);
-}
-
-static boolean
-trace_drm_global_handle_from_buffer(struct pipe_screen *_screen,
- struct pipe_buffer *_buffer,
- unsigned *handle)
-{
- struct pipe_screen *screen;
- struct pipe_buffer *buffer;
-
- if (trace_enabled()) {
- screen = trace_screen(_screen)->screen;
- buffer = trace_buffer(_buffer)->buffer;
- } else {
- screen = _screen;
- buffer = _buffer;
- }
-
- return hooks.global_handle_from_buffer(screen, buffer, handle);
-}
-
-struct drm_api drm_api_hooks =
-{
- .create_screen = trace_drm_create_screen,
- .create_context = trace_drm_create_context,
-
- .buffer_from_texture = trace_drm_buffer_from_texture,
- .buffer_from_handle = trace_drm_buffer_from_handle,
- .handle_from_buffer = trace_drm_handle_from_buffer,
- .global_handle_from_buffer = trace_drm_global_handle_from_buffer,
-};
-
-#endif /* TRACE_DRM_H */
diff --git a/src/gallium/auxiliary/util/u_blit.c b/src/gallium/auxiliary/util/u_blit.c
index 414cf910254..cda6dbd46d7 100644
--- a/src/gallium/auxiliary/util/u_blit.c
+++ b/src/gallium/auxiliary/util/u_blit.c
@@ -45,6 +45,7 @@
#include "util/u_math.h"
#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
+#include "util/u_surface.h"
#include "cso_cache/cso_context.h"
@@ -155,7 +156,11 @@ util_destroy_blit(struct blit_state *ctx)
}
-static unsigned get_next_slot( struct blit_state *ctx )
+/**
+ * Get offset of next free slot in vertex buffer for quad vertices.
+ */
+static unsigned
+get_next_slot( struct blit_state *ctx )
{
const unsigned max_slots = 4096 / sizeof ctx->vertices;
@@ -173,7 +178,6 @@ static unsigned get_next_slot( struct blit_state *ctx )
}
-
/**
* Setup vertex data for the textured quad we'll draw.
* Note: y=0=top
@@ -260,9 +264,38 @@ setup_vertex_data_tex(struct blit_state *ctx,
return offset;
}
+
+
+/**
+ * \return TRUE if two regions overlap, FALSE otherwise
+ */
+static boolean
+regions_overlap(int srcX0, int srcY0,
+ int srcX1, int srcY1,
+ int dstX0, int dstY0,
+ int dstX1, int dstY1)
+{
+ if (MAX2(srcX0, srcX1) < MIN2(dstX0, dstX1))
+ return FALSE; /* src completely left of dst */
+
+ if (MAX2(dstX0, dstX1) < MIN2(srcX0, srcX1))
+ return FALSE; /* dst completely left of src */
+
+ if (MAX2(srcY0, srcY1) < MIN2(dstY0, dstY1))
+ return FALSE; /* src completely above dst */
+
+ if (MAX2(dstY0, dstY1) < MIN2(srcY0, srcY1))
+ return FALSE; /* dst completely above src */
+
+ return TRUE; /* some overlap */
+}
+
+
/**
* Copy pixel block from src surface to dst surface.
* Overlapping regions are acceptable.
+ * Flipping and stretching are supported.
+ * XXX what about clipping???
* XXX need some control over blitting Z and/or stencil.
*/
void
@@ -285,10 +318,41 @@ util_blit_pixels(struct blit_state *ctx,
const int srcLeft = MIN2(srcX0, srcX1);
const int srcTop = MIN2(srcY0, srcY1);
unsigned offset;
+ boolean overlap;
assert(filter == PIPE_TEX_MIPFILTER_NEAREST ||
filter == PIPE_TEX_MIPFILTER_LINEAR);
+ assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
+ PIPE_TEXTURE_USAGE_SAMPLER, 0));
+ assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
+ PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
+
+ /* do the regions overlap? */
+ overlap = util_same_surface(src, dst) &&
+ regions_overlap(srcX0, srcY0, srcX1, srcY1,
+ dstX0, dstY0, dstX1, dstY1);
+
+ /*
+ * Check for simple case: no format conversion, no flipping, no stretching,
+ * no overlapping.
+ * Filter mode should not matter since there's no stretching.
+ */
+ if (dst->format == src->format &&
+ srcX0 < srcX1 &&
+ dstX0 < dstX1 &&
+ srcY0 < srcY1 &&
+ dstY0 < dstY1 &&
+ (dstX1 - dstX0) == (srcX1 - srcX0) &&
+ (dstY1 - dstY0) == (srcY1 - srcY0) &&
+ !overlap) {
+ pipe->surface_copy(pipe,
+ dst, dstX0, dstY0, /* dest */
+ src, srcX0, srcY0, /* src */
+ srcW, srcH); /* size */
+ return;
+ }
+
if (srcLeft != srcX0) {
/* left-right flip */
int tmp = dstX0;
@@ -303,20 +367,6 @@ util_blit_pixels(struct blit_state *ctx,
dstY1 = tmp;
}
- assert(screen->is_format_supported(screen, src->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_SAMPLER, 0));
- assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
- PIPE_TEXTURE_USAGE_SAMPLER, 0));
-
- if(dst->format == src->format && (dstX1 - dstX0) == srcW && (dstY1 - dstY0) == srcH) {
- /* FIXME: this will most surely fail for overlapping rectangles */
- pipe->surface_copy(pipe,
- dst, dstX0, dstY0, /* dest */
- src, srcX0, srcY0, /* src */
- srcW, srcH); /* size */
- return;
- }
-
assert(screen->is_format_supported(screen, dst->format, PIPE_TEXTURE_2D,
PIPE_TEXTURE_USAGE_RENDER_TARGET, 0));
diff --git a/src/gallium/auxiliary/util/u_cache.c b/src/gallium/auxiliary/util/u_cache.c
index 41cd38171fa..47c16b1c927 100644
--- a/src/gallium/auxiliary/util/u_cache.c
+++ b/src/gallium/auxiliary/util/u_cache.c
@@ -137,6 +137,8 @@ util_cache_set(struct util_cache *cache,
struct util_cache_entry *entry;
assert(cache);
+ if (!cache)
+ return;
entry = util_cache_entry_get(cache, key);
util_cache_entry_destroy(cache, entry);
@@ -158,6 +160,8 @@ util_cache_get(struct util_cache *cache,
struct util_cache_entry *entry;
assert(cache);
+ if (!cache)
+ return NULL;
entry = util_cache_entry_get(cache, key);
if(!entry->key && !entry->value)
@@ -176,7 +180,9 @@ util_cache_clear(struct util_cache *cache)
uint32_t i;
assert(cache);
-
+ if (!cache)
+ return;
+
for(i = 0; i < cache->size; ++i)
util_cache_entry_destroy(cache, &cache->entries[i]);
}
@@ -186,6 +192,8 @@ void
util_cache_destroy(struct util_cache *cache)
{
assert(cache);
+ if (!cache)
+ return;
#ifdef DEBUG
if(cache->count >= 20*cache->size) {
diff --git a/src/gallium/auxiliary/util/u_debug.c b/src/gallium/auxiliary/util/u_debug.c
index 18597ef8395..96d400c839b 100644
--- a/src/gallium/auxiliary/util/u_debug.c
+++ b/src/gallium/auxiliary/util/u_debug.c
@@ -97,10 +97,8 @@ void _debug_vprintf(const char *format, va_list ap)
buf[0] = '\0';
}
#elif defined(PIPE_SUBSYSTEM_WINDOWS_USER)
- /* EngDebugPrint does not handle float point arguments, so we need to use
- * our own vsnprintf implementation. It is also very slow, so buffer until
- * we find a newline. */
- static char buf[512 + 1] = {'\0'};
+ /* OutputDebugStringA can be very slow, so buffer until we find a newline. */
+ static char buf[4096] = {'\0'};
size_t len = strlen(buf);
int ret = util_vsnprintf(buf + len, sizeof(buf) - len, format, ap);
if(ret > (int)(sizeof(buf) - len - 1) || util_strchr(buf + len, '\n')) {
@@ -145,11 +143,9 @@ void _debug_vprintf(const char *format, va_list ap)
#elif defined(PIPE_SUBSYSTEM_WINDOWS_MINIPORT)
/* TODO */
#else /* !PIPE_SUBSYSTEM_WINDOWS */
-#ifdef DEBUG
fflush(stdout);
vfprintf(stderr, format, ap);
#endif
-#endif
}
diff --git a/src/gallium/auxiliary/util/u_gen_mipmap.c b/src/gallium/auxiliary/util/u_gen_mipmap.c
index 6fa13a8ce11..edc37561ab1 100644
--- a/src/gallium/auxiliary/util/u_gen_mipmap.c
+++ b/src/gallium/auxiliary/util/u_gen_mipmap.c
@@ -46,10 +46,6 @@
#include "util/u_gen_mipmap.h"
#include "util/u_simple_shaders.h"
-#include "tgsi/tgsi_build.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
-
#include "cso_cache/cso_context.h"
@@ -1385,7 +1381,7 @@ set_vertex_data(struct gen_mipmap_state *ctx,
* Not +/-1 to avoid cube face selection ambiguity near the edges,
* though that can still sometimes happen with this scale factor...
*/
- const float scale = 0.9999;
+ const float scale = 0.9999f;
const float sc = (2.0f * st[i][0] - 1.0f) * scale;
const float tc = (2.0f * st[i][1] - 1.0f) * scale;
diff --git a/src/gallium/auxiliary/util/u_handle_table.c b/src/gallium/auxiliary/util/u_handle_table.c
index 6da7353e259..3703718a621 100644
--- a/src/gallium/auxiliary/util/u_handle_table.c
+++ b/src/gallium/auxiliary/util/u_handle_table.c
@@ -87,6 +87,8 @@ handle_table_set_destroy(struct handle_table *ht,
void (*destroy)(void *object))
{
assert(ht);
+ if (!ht)
+ return;
ht->destroy = destroy;
}
@@ -155,7 +157,7 @@ handle_table_add(struct handle_table *ht,
assert(ht);
assert(object);
- if(!object)
+ if(!object || !ht)
return 0;
/* linear search for an empty handle */
@@ -193,7 +195,7 @@ handle_table_set(struct handle_table *ht,
assert(ht);
assert(handle);
- if(!handle)
+ if(!handle || !ht)
return 0;
assert(object);
@@ -222,7 +224,7 @@ handle_table_get(struct handle_table *ht,
assert(ht);
assert(handle);
- if(!handle || handle > ht->size)
+ if(!handle || !ht || handle > ht->size)
return NULL;
object = ht->objects[handle - 1];
@@ -240,7 +242,7 @@ handle_table_remove(struct handle_table *ht,
assert(ht);
assert(handle);
- if(!handle || handle > ht->size)
+ if(!handle || !ht || handle > ht->size)
return;
index = handle - 1;
@@ -283,6 +285,9 @@ handle_table_destroy(struct handle_table *ht)
unsigned index;
assert(ht);
+ if (!ht)
+ return;
+
if(ht->destroy)
for(index = 0; index < ht->size; ++index)
handle_table_clear(ht, index);
diff --git a/src/gallium/auxiliary/util/u_hash_table.c b/src/gallium/auxiliary/util/u_hash_table.c
index 2f83e318e44..8c2a8f454cc 100644
--- a/src/gallium/auxiliary/util/u_hash_table.c
+++ b/src/gallium/auxiliary/util/u_hash_table.c
@@ -148,6 +148,8 @@ hash_table_set(struct hash_table *ht,
struct cso_hash_iter iter;
assert(ht);
+ if (!ht)
+ return PIPE_ERROR_BAD_INPUT;
key_hash = ht->hash(key);
@@ -183,6 +185,8 @@ hash_table_get(struct hash_table *ht,
struct hash_table_item *item;
assert(ht);
+ if (!ht)
+ return NULL;
key_hash = ht->hash(key);
@@ -203,6 +207,8 @@ hash_table_remove(struct hash_table *ht,
struct hash_table_item *item;
assert(ht);
+ if (!ht)
+ return;
key_hash = ht->hash(key);
@@ -225,7 +231,9 @@ hash_table_clear(struct hash_table *ht)
struct hash_table_item *item;
assert(ht);
-
+ if (!ht)
+ return;
+
iter = cso_hash_first_node(ht->cso);
while (!cso_hash_iter_is_null(iter)) {
item = (struct hash_table_item *)cso_hash_take(ht->cso, cso_hash_iter_key(iter));
@@ -243,9 +251,11 @@ hash_table_foreach(struct hash_table *ht,
struct cso_hash_iter iter;
struct hash_table_item *item;
enum pipe_error result;
-
+
assert(ht);
-
+ if (!ht)
+ return PIPE_ERROR_BAD_INPUT;
+
iter = cso_hash_first_node(ht->cso);
while (!cso_hash_iter_is_null(iter)) {
item = (struct hash_table_item *)cso_hash_iter_data(iter);
@@ -264,9 +274,11 @@ hash_table_destroy(struct hash_table *ht)
{
struct cso_hash_iter iter;
struct hash_table_item *item;
-
+
assert(ht);
-
+ if (!ht)
+ return;
+
iter = cso_hash_first_node(ht->cso);
while (!cso_hash_iter_is_null(iter)) {
item = (struct hash_table_item *)cso_hash_iter_data(iter);
diff --git a/src/gallium/auxiliary/util/u_keymap.c b/src/gallium/auxiliary/util/u_keymap.c
index 3f70809efdc..508a2ee0634 100644
--- a/src/gallium/auxiliary/util/u_keymap.c
+++ b/src/gallium/auxiliary/util/u_keymap.c
@@ -194,6 +194,8 @@ util_keymap_insert(struct keymap *map, const void *key,
struct cso_hash_iter iter;
assert(map);
+ if (!map)
+ return FALSE;
key_hash = hash(key, map->key_size);
@@ -234,6 +236,8 @@ util_keymap_lookup(const struct keymap *map, const void *key)
struct keymap_item *item;
assert(map);
+ if (!map)
+ return NULL;
key_hash = hash(key, map->key_size);
@@ -258,6 +262,8 @@ util_keymap_remove(struct keymap *map, const void *key, void *user)
struct keymap_item *item;
assert(map);
+ if (!map)
+ return;
key_hash = hash(key, map->key_size);
@@ -267,6 +273,8 @@ util_keymap_remove(struct keymap *map, const void *key, void *user)
item = hash_table_item(iter);
assert(item);
+ if (!item)
+ return;
map->delete_func(map, item->key, item->value, user);
FREE(item->key);
FREE(item);
@@ -288,7 +296,9 @@ util_keymap_remove_all(struct keymap *map, void *user)
struct keymap_item *item;
assert(map);
-
+ if (!map)
+ return;
+
iter = cso_hash_first_node(map->cso);
while (!cso_hash_iter_is_null(iter)) {
item = (struct keymap_item *)
diff --git a/src/gallium/auxiliary/util/u_math.h b/src/gallium/auxiliary/util/u_math.h
index e5003af01d8..57410e78b02 100644
--- a/src/gallium/auxiliary/util/u_math.h
+++ b/src/gallium/auxiliary/util/u_math.h
@@ -53,11 +53,11 @@ __inline double ceil(double val)
{
double ceil_val;
- if((val - (long) val) == 0) {
+ if ((val - (long) val) == 0) {
ceil_val = val;
}
else {
- if(val > 0) {
+ if (val > 0) {
ceil_val = (long) val + 1;
}
else {
@@ -73,11 +73,11 @@ __inline double floor(double val)
{
double floor_val;
- if((val - (long) val) == 0) {
+ if ((val - (long) val) == 0) {
floor_val = val;
}
else {
- if(val > 0) {
+ if (val > 0) {
floor_val = (long) val;
}
else {
@@ -189,7 +189,10 @@ static INLINE double log2( double x )
extern float pow2_table[POW2_TABLE_SIZE];
-
+/**
+ * Initialize math module. This should be called before using any
+ * other functions in this module.
+ */
extern void
util_init_math(void);
@@ -216,23 +219,24 @@ util_fast_exp2(float x)
int32_t ipart;
float fpart, mpart;
union fi epart;
-
+
if(x > 129.00000f)
return 3.402823466e+38f;
-
- if(x < -126.99999f)
+
+ if (x < -126.99999f)
return 0.0f;
ipart = (int32_t) x;
fpart = x - (float) ipart;
-
+
/* same as
* epart.f = (float) (1 << ipart)
- * but faster and without integer overflow for ipart > 31 */
+ * but faster and without integer overflow for ipart > 31
+ */
epart.i = (ipart + 127 ) << 23;
-
+
mpart = pow2_table[POW2_TABLE_OFFSET + (int)(fpart * POW2_TABLE_SCALE)];
-
+
return epart.f * mpart;
}
@@ -254,6 +258,9 @@ util_fast_exp(float x)
extern float log2_table[LOG2_TABLE_SIZE];
+/**
+ * Fast approximation to log2(x).
+ */
static INLINE float
util_fast_log2(float x)
{
@@ -267,6 +274,9 @@ util_fast_log2(float x)
}
+/**
+ * Fast approximation to x^y.
+ */
static INLINE float
util_fast_pow(float x, float y)
{
@@ -274,7 +284,6 @@ util_fast_pow(float x, float y)
}
-
/**
* Floor(x), returned as int.
*/
@@ -284,8 +293,8 @@ util_ifloor(float f)
int ai, bi;
double af, bf;
union fi u;
- af = (3 << 22) + 0.5 + (double)f;
- bf = (3 << 22) + 0.5 - (double)f;
+ af = (3 << 22) + 0.5 + (double) f;
+ bf = (3 << 22) + 0.5 - (double) f;
u.f = (float) af; ai = u.i;
u.f = (float) bf; bi = u.i;
return (ai - bi) >> 1;
@@ -305,9 +314,9 @@ util_iround(float f)
#elif defined(PIPE_CC_MSVC) && defined(PIPE_ARCH_X86)
int r;
_asm {
- fld f
- fistp r
- }
+ fld f
+ fistp r
+ }
return r;
#else
if (f >= 0.0f)
@@ -340,7 +349,7 @@ static INLINE
unsigned long ffs( unsigned long u )
{
unsigned long i;
- if(_BitScanForward(&i, u))
+ if (_BitScanForward(&i, u))
return i + 1;
else
return 0;
@@ -351,7 +360,7 @@ unsigned ffs( unsigned u )
{
unsigned i;
- if( u == 0 ) {
+ if (u == 0) {
return 0;
}
@@ -378,7 +387,10 @@ fui( float f )
}
-
+/**
+ * Convert ubyte to float in [0, 1].
+ * XXX a 256-entry lookup table would be slightly faster.
+ */
static INLINE float
ubyte_to_float(ubyte ub)
{
@@ -409,7 +421,23 @@ float_to_ubyte(float f)
}
+/**
+ * Calc log base 2
+ */
+static INLINE unsigned
+util_logbase2(unsigned n)
+{
+ unsigned log2 = 0;
+ while (n >>= 1)
+ ++log2;
+ return log2;
+}
+
+/**
+ * Clamp X to [MIN, MAX].
+ * This is a macro to allow float, int, uint, etc. types.
+ */
#define CLAMP( X, MIN, MAX ) ( (X)<(MIN) ? (MIN) : ((X)>(MAX) ? (MAX) : (X)) )
#define MIN2( A, B ) ( (A)<(B) ? (A) : (B) )
@@ -422,6 +450,11 @@ align(int value, int alignment)
return (value + alignment - 1) & ~(alignment - 1);
}
+static INLINE unsigned
+minify(unsigned value)
+{
+ return MAX2(1, value >> 1);
+}
#ifndef COPY_4V
#define COPY_4V( DST, SRC ) \
diff --git a/src/gallium/auxiliary/util/u_memory.h b/src/gallium/auxiliary/util/u_memory.h
index 0b18d043adb..c3f8c918338 100644
--- a/src/gallium/auxiliary/util/u_memory.h
+++ b/src/gallium/auxiliary/util/u_memory.h
@@ -100,8 +100,14 @@ ExFreePool(void *P);
#define MALLOC( SIZE ) malloc( SIZE )
#define CALLOC( COUNT, SIZE ) calloc( COUNT, SIZE )
#define FREE( PTR ) free( PTR )
-#define REALLOC( OLDPTR, OLDSIZE, NEWSIZE ) realloc( OLDPTR, NEWSIZE )
+static INLINE void *
+_REALLOC( void *old_ptr, unsigned old_size, unsigned new_size )
+{
+ (void) old_size;
+ return realloc(old_ptr, new_size);
+}
+#define REALLOC( a, b, c ) _REALLOC( a, b, c )
#endif
diff --git a/src/gallium/auxiliary/util/u_mm.c b/src/gallium/auxiliary/util/u_mm.c
index 151a480d34d..4b75d4ba1d0 100644
--- a/src/gallium/auxiliary/util/u_mm.c
+++ b/src/gallium/auxiliary/util/u_mm.c
@@ -33,30 +33,32 @@
void
u_mmDumpMemInfo(const struct mem_block *heap)
{
- debug_printf("Memory heap %p:\n", (void *)heap);
+ debug_printf("Memory heap %p:\n", (void *) heap);
if (heap == 0) {
debug_printf(" heap == 0\n");
- } else {
+ }
+ else {
const struct mem_block *p;
- for(p = heap->next; p != heap; p = p->next) {
- debug_printf(" Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
+ for (p = heap->next; p != heap; p = p->next) {
+ debug_printf(" Offset:%08x, Size:%08x, %c%c\n", p->ofs, p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
}
debug_printf("\nFree list:\n");
- for(p = heap->next_free; p != heap; p = p->next_free) {
- debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n",p->ofs,p->size,
- p->free ? 'F':'.',
- p->reserved ? 'R':'.');
+ for (p = heap->next_free; p != heap; p = p->next_free) {
+ debug_printf(" FREE Offset:%08x, Size:%08x, %c%c\n", p->ofs, p->size,
+ p->free ? 'F':'.',
+ p->reserved ? 'R':'.');
}
}
debug_printf("End of memory blocks\n");
}
+
struct mem_block *
u_mmInit(int ofs, int size)
{
diff --git a/src/gallium/auxiliary/util/u_mm.h b/src/gallium/auxiliary/util/u_mm.h
index ce20e487635..6b158aae6e4 100644
--- a/src/gallium/auxiliary/util/u_mm.h
+++ b/src/gallium/auxiliary/util/u_mm.h
@@ -84,7 +84,7 @@ extern struct mem_block *u_mmFindBlock(struct mem_block *heap, int start);
extern void u_mmDestroy(struct mem_block *mmInit);
/**
- * For debuging purpose.
+ * For debugging purposes.
*/
extern void u_mmDumpMemInfo(const struct mem_block *mmInit);
diff --git a/src/gallium/auxiliary/util/u_prim.h b/src/gallium/auxiliary/util/u_prim.h
index d7c3995dbf0..a9b533eea70 100644
--- a/src/gallium/auxiliary/util/u_prim.h
+++ b/src/gallium/auxiliary/util/u_prim.h
@@ -119,7 +119,7 @@ static INLINE boolean u_trim_pipe_prim( unsigned pipe_prim, unsigned *nr )
}
-static INLINE boolean u_reduced_prim( unsigned pipe_prim )
+static INLINE unsigned u_reduced_prim( unsigned pipe_prim )
{
switch (pipe_prim) {
case PIPE_PRIM_POINTS:
diff --git a/src/gallium/auxiliary/util/u_rect.c b/src/gallium/auxiliary/util/u_rect.c
index 74259d453b1..9866b6fc8a0 100644
--- a/src/gallium/auxiliary/util/u_rect.c
+++ b/src/gallium/auxiliary/util/u_rect.c
@@ -43,7 +43,7 @@
* src_pitch may be negative to do vertical flip of pixels from source.
*/
void
-pipe_copy_rect(ubyte * dst,
+util_copy_rect(ubyte * dst,
const struct pipe_format_block *block,
unsigned dst_stride,
unsigned dst_x,
@@ -91,7 +91,7 @@ pipe_copy_rect(ubyte * dst,
}
void
-pipe_fill_rect(ubyte * dst,
+util_fill_rect(ubyte * dst,
const struct pipe_format_block *block,
unsigned dst_stride,
unsigned dst_x,
@@ -204,7 +204,7 @@ util_surface_copy(struct pipe_context *pipe,
if (src_map && dst_map) {
/* If do_flip, invert src_y position and pass negative src stride */
- pipe_copy_rect(dst_map,
+ util_copy_rect(dst_map,
&dst_trans->block,
dst_trans->stride,
0, 0,
@@ -263,7 +263,7 @@ util_surface_fill(struct pipe_context *pipe,
case 1:
case 2:
case 4:
- pipe_fill_rect(dst_map, &dst_trans->block, dst_trans->stride,
+ util_fill_rect(dst_map, &dst_trans->block, dst_trans->stride,
0, 0, width, height, value);
break;
case 8:
diff --git a/src/gallium/auxiliary/util/u_rect.h b/src/gallium/auxiliary/util/u_rect.h
index 59e842e16d1..daa50834d36 100644
--- a/src/gallium/auxiliary/util/u_rect.h
+++ b/src/gallium/auxiliary/util/u_rect.h
@@ -42,13 +42,13 @@ struct pipe_surface;
extern void
-pipe_copy_rect(ubyte * dst, const struct pipe_format_block *block,
+util_copy_rect(ubyte * dst, const struct pipe_format_block *block,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
unsigned width, unsigned height, const ubyte * src,
int src_stride, unsigned src_x, int src_y);
extern void
-pipe_fill_rect(ubyte * dst, const struct pipe_format_block *block,
+util_fill_rect(ubyte * dst, const struct pipe_format_block *block,
unsigned dst_stride, unsigned dst_x, unsigned dst_y,
unsigned width, unsigned height, uint32_t value);
diff --git a/src/gallium/auxiliary/util/u_simple_screen.c b/src/gallium/auxiliary/util/u_simple_screen.c
index 8114b53cd0d..f01296b40fc 100644
--- a/src/gallium/auxiliary/util/u_simple_screen.c
+++ b/src/gallium/auxiliary/util/u_simple_screen.c
@@ -65,12 +65,13 @@ pass_surface_buffer_create(struct pipe_screen *screen,
unsigned width, unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride)
{
struct pipe_buffer *buffer =
screen->winsys->surface_buffer_create(screen->winsys,
width, height,
- format, usage, stride);
+ format, usage, tex_usage, stride);
buffer->screen = screen;
diff --git a/src/gallium/auxiliary/util/u_simple_shaders.c b/src/gallium/auxiliary/util/u_simple_shaders.c
index e519c354d25..d54a1d8c746 100644
--- a/src/gallium/auxiliary/util/u_simple_shaders.c
+++ b/src/gallium/auxiliary/util/u_simple_shaders.c
@@ -42,9 +42,7 @@
#include "util/u_memory.h"
#include "util/u_simple_shaders.h"
-#include "tgsi/tgsi_build.h"
-#include "tgsi/tgsi_dump.h"
-#include "tgsi/tgsi_parse.h"
+#include "tgsi/tgsi_ureg.h"
@@ -58,93 +56,31 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
const uint *semantic_indexes)
{
- struct pipe_shader_state shader;
- struct tgsi_token tokens[100];
- struct tgsi_header *header;
- struct tgsi_processor *processor;
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
- const uint procType = TGSI_PROCESSOR_VERTEX;
- uint ti, i;
+ struct ureg_program *ureg;
+ uint i;
- /* shader header
- */
- *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
+ ureg = ureg_create( TGSI_PROCESSOR_VERTEX );
+ if (ureg == NULL)
+ return NULL;
- header = (struct tgsi_header *) &tokens[1];
- *header = tgsi_build_header();
-
- processor = (struct tgsi_processor *) &tokens[2];
- *processor = tgsi_build_processor( procType, header );
-
- ti = 3;
-
- /* declare inputs */
- for (i = 0; i < num_attribs; i++) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_INPUT;
-
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = semantic_names[i];
- decl.Semantic.SemanticIndex = semantic_indexes[i];
-
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = i;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
- }
-
- /* declare outputs */
for (i = 0; i < num_attribs; i++) {
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_OUTPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = semantic_names[i];
- decl.Semantic.SemanticIndex = semantic_indexes[i];
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = i;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
+ struct ureg_src src;
+ struct ureg_dst dst;
+
+ src = ureg_DECL_vs_input( ureg,
+ semantic_names[i],
+ semantic_indexes[i]);
+
+ dst = ureg_DECL_output( ureg,
+ semantic_names[i],
+ semantic_indexes[i]);
+
+ ureg_MOV( ureg, dst, src );
}
- /* emit MOV instructions */
- for (i = 0; i < num_attribs; i++) {
- /* MOVE out[i], in[i]; */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_MOV;
- inst.Instruction.NumDstRegs = 1;
- inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
- inst.FullDstRegisters[0].DstRegister.Index = i;
- inst.Instruction.NumSrcRegs = 1;
- inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- inst.FullSrcRegisters[0].SrcRegister.Index = i;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
- }
-
- /* END instruction */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_END;
- inst.Instruction.NumDstRegs = 0;
- inst.Instruction.NumSrcRegs = 0;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
-
-#if 0 /*debug*/
- tgsi_dump(tokens, 0);
-#endif
-
- shader.tokens = tokens;
+ ureg_END( ureg );
- return pipe->create_vs_state(pipe, &shader);
+ return ureg_create_shader_and_destroy( ureg, pipe );
}
@@ -158,99 +94,29 @@ util_make_vertex_passthrough_shader(struct pipe_context *pipe,
void *
util_make_fragment_tex_shader(struct pipe_context *pipe)
{
- struct pipe_shader_state shader;
- struct tgsi_token tokens[100];
- struct tgsi_header *header;
- struct tgsi_processor *processor;
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
- const uint procType = TGSI_PROCESSOR_FRAGMENT;
- uint ti;
-
- /* shader header
- */
- *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-
- header = (struct tgsi_header *) &tokens[1];
- *header = tgsi_build_header();
-
- processor = (struct tgsi_processor *) &tokens[2];
- *processor = tgsi_build_processor( procType, header );
-
- ti = 3;
-
- /* declare TEX[0] input */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_INPUT;
- /* XXX this could be linear... */
- decl.Declaration.Interpolate = TGSI_INTERPOLATE_PERSPECTIVE;
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = TGSI_SEMANTIC_GENERIC;
- decl.Semantic.SemanticIndex = 0;
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = 0;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
-
- /* declare color[0] output */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_OUTPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
- decl.Semantic.SemanticIndex = 0;
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = 0;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
-
- /* declare sampler */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_SAMPLER;
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = 0;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
-
- /* TEX instruction */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_TEX;
- inst.Instruction.NumDstRegs = 1;
- inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
- inst.FullDstRegisters[0].DstRegister.Index = 0;
- inst.Instruction.NumSrcRegs = 2;
- inst.InstructionExtTexture.Texture = TGSI_TEXTURE_2D;
- inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- inst.FullSrcRegisters[0].SrcRegister.Index = 0;
- inst.FullSrcRegisters[1].SrcRegister.File = TGSI_FILE_SAMPLER;
- inst.FullSrcRegisters[1].SrcRegister.Index = 0;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
-
- /* END instruction */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_END;
- inst.Instruction.NumDstRegs = 0;
- inst.Instruction.NumSrcRegs = 0;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
-
-#if 0 /*debug*/
- tgsi_dump(tokens, 0);
-#endif
-
- shader.tokens = tokens;
-
- return pipe->create_fs_state(pipe, &shader);
+ struct ureg_program *ureg;
+ struct ureg_src sampler;
+ struct ureg_src tex;
+ struct ureg_dst out;
+
+ ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+ if (ureg == NULL)
+ return NULL;
+
+ sampler = ureg_DECL_sampler( ureg );
+
+ tex = ureg_DECL_fs_input( ureg,
+ TGSI_SEMANTIC_GENERIC, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE );
+
+ out = ureg_DECL_output( ureg,
+ TGSI_SEMANTIC_COLOR,
+ 0 );
+
+ ureg_TEX( ureg, out, TGSI_TEXTURE_2D, tex, sampler );
+ ureg_END( ureg );
+
+ return ureg_create_shader_and_destroy( ureg, pipe );
}
@@ -263,87 +129,23 @@ util_make_fragment_tex_shader(struct pipe_context *pipe)
void *
util_make_fragment_passthrough_shader(struct pipe_context *pipe)
{
- struct pipe_shader_state shader;
- struct tgsi_token tokens[40];
- struct tgsi_header *header;
- struct tgsi_processor *processor;
- struct tgsi_full_declaration decl;
- struct tgsi_full_instruction inst;
- const uint procType = TGSI_PROCESSOR_FRAGMENT;
- uint ti;
-
- /* shader header
- */
- *(struct tgsi_version *) &tokens[0] = tgsi_build_version();
-
- header = (struct tgsi_header *) &tokens[1];
- *header = tgsi_build_header();
-
- processor = (struct tgsi_processor *) &tokens[2];
- *processor = tgsi_build_processor( procType, header );
-
- ti = 3;
-
- /* declare input */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_INPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
- decl.Semantic.SemanticIndex = 0;
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = 0;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
-
- /* declare output */
- decl = tgsi_default_full_declaration();
- decl.Declaration.File = TGSI_FILE_OUTPUT;
- decl.Declaration.Semantic = 1;
- decl.Semantic.SemanticName = TGSI_SEMANTIC_COLOR;
- decl.Semantic.SemanticIndex = 0;
- decl.DeclarationRange.First =
- decl.DeclarationRange.Last = 0;
- ti += tgsi_build_full_declaration(&decl,
- &tokens[ti],
- header,
- Elements(tokens) - ti);
-
-
- /* MOVE out[0], in[0]; */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_MOV;
- inst.Instruction.NumDstRegs = 1;
- inst.FullDstRegisters[0].DstRegister.File = TGSI_FILE_OUTPUT;
- inst.FullDstRegisters[0].DstRegister.Index = 0;
- inst.Instruction.NumSrcRegs = 1;
- inst.FullSrcRegisters[0].SrcRegister.File = TGSI_FILE_INPUT;
- inst.FullSrcRegisters[0].SrcRegister.Index = 0;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
-
- /* END instruction */
- inst = tgsi_default_full_instruction();
- inst.Instruction.Opcode = TGSI_OPCODE_END;
- inst.Instruction.NumDstRegs = 0;
- inst.Instruction.NumSrcRegs = 0;
- ti += tgsi_build_full_instruction(&inst,
- &tokens[ti],
- header,
- Elements(tokens) - ti );
-
- assert(ti < Elements(tokens));
-
-#if 0 /*debug*/
- tgsi_dump(tokens, 0);
-#endif
-
- shader.tokens = tokens;
-
- return pipe->create_fs_state(pipe, &shader);
+ struct ureg_program *ureg;
+ struct ureg_src src;
+ struct ureg_dst dst;
+
+ ureg = ureg_create( TGSI_PROCESSOR_FRAGMENT );
+ if (ureg == NULL)
+ return NULL;
+
+ src = ureg_DECL_fs_input( ureg, TGSI_SEMANTIC_COLOR, 0,
+ TGSI_INTERPOLATE_PERSPECTIVE );
+
+ dst = ureg_DECL_output( ureg, TGSI_SEMANTIC_COLOR, 0 );
+
+ ureg_MOV( ureg, dst, src );
+ ureg_END( ureg );
+
+ return ureg_create_shader_and_destroy( ureg, pipe );
}
diff --git a/src/gallium/auxiliary/util/u_snprintf.c b/src/gallium/auxiliary/util/u_snprintf.c
index 0d54299b28a..65bdd0df2b7 100644
--- a/src/gallium/auxiliary/util/u_snprintf.c
+++ b/src/gallium/auxiliary/util/u_snprintf.c
@@ -829,7 +829,7 @@ util_vsnprintf(char *str, size_t size, const char *format, va_list args)
break;
default:
intptr = va_arg(args, int *);
- *intptr = len;
+ *intptr = (int)len;
break;
}
break;
diff --git a/src/gallium/auxiliary/util/u_surface.h b/src/gallium/auxiliary/util/u_surface.h
index a5b73cfc20a..ce84ed7ad06 100644
--- a/src/gallium/auxiliary/util/u_surface.h
+++ b/src/gallium/auxiliary/util/u_surface.h
@@ -37,6 +37,23 @@ struct pipe_texture;
struct pipe_surface;
+/**
+ * Are s1 and s2 the same surface?
+ * Surfaces are basically views into textures so check if the two surfaces
+ * name the same part of the same texture.
+ */
+static INLINE boolean
+util_same_surface(const struct pipe_surface *s1, const struct pipe_surface *s2)
+{
+ return (s1->texture == s2->texture &&
+ s1->face == s2->face &&
+ s1->level == s2->level &&
+ s1->zslice == s2->zslice);
+}
+
+
+
+
extern boolean
util_create_rgba_surface(struct pipe_screen *screen,
uint width, uint height,
diff --git a/src/gallium/auxiliary/util/u_tile.c b/src/gallium/auxiliary/util/u_tile.c
index 9747a55cbfa..9e76cfbb053 100644
--- a/src/gallium/auxiliary/util/u_tile.c
+++ b/src/gallium/auxiliary/util/u_tile.c
@@ -62,7 +62,7 @@ pipe_get_tile_raw(struct pipe_transfer *pt,
if(!src)
return;
- pipe_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
+ util_copy_rect(dst, &pt->block, dst_stride, 0, 0, w, h, src, pt->stride, x, y);
screen->transfer_unmap(screen, pt);
}
@@ -90,7 +90,7 @@ pipe_put_tile_raw(struct pipe_transfer *pt,
if(!dst)
return;
- pipe_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0);
+ util_copy_rect(dst, &pt->block, pt->stride, x, y, w, h, src, src_stride, 0, 0);
screen->transfer_unmap(screen, pt);
}
@@ -153,7 +153,7 @@ a8r8g8b8_put_tile_rgba(unsigned *dst,
}
-/*** PIPE_FORMAT_A8R8G8B8_UNORM ***/
+/*** PIPE_FORMAT_X8R8G8B8_UNORM ***/
static void
x8r8g8b8_get_tile_rgba(const unsigned *src,
@@ -1202,6 +1202,19 @@ pipe_put_tile_z(struct pipe_transfer *pt,
}
break;
case PIPE_FORMAT_S8Z24_UNORM:
+ {
+ uint *pDest = (uint *) (map + y * pt->stride + x*4);
+ assert(pt->usage == PIPE_TRANSFER_READ_WRITE);
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ /* convert 32-bit Z to 24-bit Z, preserve stencil */
+ pDest[j] = (pDest[j] & 0xff000000) | ptrc[j] >> 8;
+ }
+ pDest += pt->stride/4;
+ ptrc += srcStride;
+ }
+ }
+ break;
case PIPE_FORMAT_X8Z24_UNORM:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
@@ -1216,13 +1229,26 @@ pipe_put_tile_z(struct pipe_transfer *pt,
}
break;
case PIPE_FORMAT_Z24S8_UNORM:
+ {
+ uint *pDest = (uint *) (map + y * pt->stride + x*4);
+ assert(pt->usage == PIPE_TRANSFER_READ_WRITE);
+ for (i = 0; i < h; i++) {
+ for (j = 0; j < w; j++) {
+ /* convert 32-bit Z to 24-bit Z, preserve stencil */
+ pDest[j] = (pDest[j] & 0xff) | (ptrc[j] & 0xffffff00);
+ }
+ pDest += pt->stride/4;
+ ptrc += srcStride;
+ }
+ }
+ break;
case PIPE_FORMAT_Z24X8_UNORM:
{
uint *pDest = (uint *) (map + y * pt->stride + x*4);
for (i = 0; i < h; i++) {
for (j = 0; j < w; j++) {
/* convert 32-bit Z to 24-bit Z (0 stencil) */
- pDest[j] = ptrc[j] << 8;
+ pDest[j] = ptrc[j] & 0xffffff00;
}
pDest += pt->stride/4;
ptrc += srcStride;
diff --git a/src/gallium/auxiliary/util/u_time.c b/src/gallium/auxiliary/util/u_time.c
index 5268cbf79ce..c16cdd0b226 100644
--- a/src/gallium/auxiliary/util/u_time.c
+++ b/src/gallium/auxiliary/util/u_time.c
@@ -35,7 +35,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include <sys/time.h>
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
#include <windows.h>
@@ -77,7 +77,7 @@ util_time_get_frequency(void)
void
util_time_get(struct util_time *t)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
gettimeofday(&t->tv, NULL);
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY)
LONGLONG temp;
@@ -102,7 +102,7 @@ util_time_add(const struct util_time *t1,
int64_t usecs,
struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
t2->tv.tv_sec = t1->tv.tv_sec + usecs / 1000000;
t2->tv.tv_usec = t1->tv.tv_usec + usecs % 1000000;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
@@ -124,7 +124,7 @@ int64_t
util_time_diff(const struct util_time *t1,
const struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
return (t2->tv.tv_usec - t1->tv.tv_usec) +
(t2->tv.tv_sec - t1->tv.tv_sec)*1000000;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
@@ -144,7 +144,7 @@ util_time_micros( void )
util_time_get(&t1);
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
return t1.tv.tv_usec + t1.tv.tv_sec*1000000LL;
#elif defined(PIPE_SUBSYSTEM_WINDOWS_DISPLAY) || defined(PIPE_SUBSYSTEM_WINDOWS_USER) || defined(PIPE_SUBSYSTEM_WINDOWS_CE)
util_time_get_frequency();
@@ -166,7 +166,7 @@ static INLINE int
util_time_compare(const struct util_time *t1,
const struct util_time *t2)
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
if (t1->tv.tv_sec < t2->tv.tv_sec)
return -1;
else if(t1->tv.tv_sec > t2->tv.tv_sec)
diff --git a/src/gallium/auxiliary/util/u_time.h b/src/gallium/auxiliary/util/u_time.h
index 6bca6077a2a..7a5c54d9b23 100644
--- a/src/gallium/auxiliary/util/u_time.h
+++ b/src/gallium/auxiliary/util/u_time.h
@@ -38,7 +38,7 @@
#include "pipe/p_config.h"
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#include <time.h> /* timeval */
#include <unistd.h> /* usleep */
#endif
@@ -58,7 +58,7 @@ extern "C" {
*/
struct util_time
{
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
struct timeval tv;
#else
int64_t counter;
@@ -89,7 +89,7 @@ util_time_timeout(const struct util_time *start,
const struct util_time *end,
const struct util_time *curr);
-#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS)
+#if defined(PIPE_OS_LINUX) || defined(PIPE_OS_BSD) || defined(PIPE_OS_SOLARIS) || defined(PIPE_OS_APPLE)
#define util_time_sleep usleep
#else
void
diff --git a/src/gallium/auxiliary/util/u_timed_winsys.c b/src/gallium/auxiliary/util/u_timed_winsys.c
index 77b2a3a1c87..178acdca4df 100644
--- a/src/gallium/auxiliary/util/u_timed_winsys.c
+++ b/src/gallium/auxiliary/util/u_timed_winsys.c
@@ -212,13 +212,14 @@ timed_surface_buffer_create(struct pipe_winsys *winsys,
unsigned width, unsigned height,
enum pipe_format format,
unsigned usage,
+ unsigned tex_usage,
unsigned *stride)
{
struct pipe_winsys *backend = timed_winsys(winsys)->backend;
uint64_t start = time_start();
struct pipe_buffer *ret = backend->surface_buffer_create( backend, width, height,
- format, usage, stride );
+ format, usage, tex_usage, stride );
time_finish(winsys, start, 7, __FUNCTION__);
diff --git a/src/gallium/auxiliary/util/u_upload_mgr.c b/src/gallium/auxiliary/util/u_upload_mgr.c
index 2eb98068c86..c90425f3e54 100644
--- a/src/gallium/auxiliary/util/u_upload_mgr.c
+++ b/src/gallium/auxiliary/util/u_upload_mgr.c
@@ -83,7 +83,9 @@ my_buffer_write(struct pipe_screen *screen,
assert(dirty_size >= size);
assert(size);
- map = pipe_buffer_map_range(screen, buf, offset, size, PIPE_BUFFER_USAGE_CPU_WRITE);
+ map = pipe_buffer_map_range(screen, buf, offset, size,
+ PIPE_BUFFER_USAGE_CPU_WRITE |
+ PIPE_BUFFER_USAGE_FLUSH_EXPLICIT);
if (map == NULL)
return PIPE_ERROR_OUT_OF_MEMORY;