summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorZack Rusin <zackr@vmware.com>2013-11-20 18:50:00 -0500
committerZack Rusin <zackr@vmware.com>2013-11-20 18:50:00 -0500
commite8b9a19fb074295f4bf06a18afa77b8249ee9643 (patch)
treebdab127e62bc023676810ac49846de3cfdbbd7e4
parentac854f29d67a524066ec9921791a132c33b5831f (diff)
downloadmesa-llvmpipe-rast-64.tar.gz
llvmpipe: enable 32 bit sse paths in the triangle setupllvmpipe-rast-64
we can use the 32 bit sse paths if the fb is smaller than the largest bounding box is known to not cause an overflow and triangle fits within the box.
-rw-r--r--src/gallium/drivers/llvmpipe/lp_setup_tri.c35
1 files changed, 23 insertions, 12 deletions
diff --git a/src/gallium/drivers/llvmpipe/lp_setup_tri.c b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
index 0062ebbf61d..1507a5c8a86 100644
--- a/src/gallium/drivers/llvmpipe/lp_setup_tri.c
+++ b/src/gallium/drivers/llvmpipe/lp_setup_tri.c
@@ -44,9 +44,6 @@
#define NUM_CHANNELS 4
-/* TODO */
-#undef PIPE_ARCH_SSE
-
#if defined(PIPE_ARCH_SSE)
#include <emmintrin.h>
#endif
@@ -381,7 +378,10 @@ do_triangle_ccw(struct lp_setup_context *setup,
plane = GET_PLANES(tri);
#if defined(PIPE_ARCH_SSE)
- {
+ if (setup->fb.width <= MAX_FIXED_LENGTH32 &&
+ setup->fb.height <= MAX_FIXED_LENGTH32 &&
+ (bbox.x1 - bbox.x0) <= MAX_FIXED_LENGTH32 &&
+ (bbox.y1 - bbox.y0) <= MAX_FIXED_LENGTH32) {
__m128i vertx, verty;
__m128i shufx, shufy;
__m128i dcdx, dcdy, c;
@@ -393,9 +393,12 @@ do_triangle_ccw(struct lp_setup_context *setup,
__m128i c_inc_mask, c_inc;
__m128i eo, p0, p1, p2;
__m128i zero = _mm_setzero_si128();
+ PIPE_ALIGN_VAR(16) int32_t temp_vec[4];
- vertx = _mm_loadu_si128((__m128i *)position->x); /* vertex x coords */
- verty = _mm_loadu_si128((__m128i *)position->y); /* vertex y coords */
+ vertx = _mm_setr_epi32((int32_t)position->x[0], (int32_t)position->x[1],
+ (int32_t)position->x[2], (int32_t)position->x[3]);
+ verty = _mm_setr_epi32((int32_t)position->y[0], (int32_t)position->y[1],
+ (int32_t)position->y[2], (int32_t)position->y[3]);
shufx = _mm_shuffle_epi32(vertx, _MM_SHUFFLE(3,0,2,1));
shufy = _mm_shuffle_epi32(verty, _MM_SHUFFLE(3,0,2,1));
@@ -439,11 +442,20 @@ do_triangle_ccw(struct lp_setup_context *setup,
transpose4_epi32(&c, &dcdx, &dcdy, &eo,
&p0, &p1, &p2, &unused);
- _mm_store_si128((__m128i *)&plane[0], p0);
- _mm_store_si128((__m128i *)&plane[1], p1);
- _mm_store_si128((__m128i *)&plane[2], p2);
- }
-#else
+#define STORE_PLANE(plane, vec) do { \
+ _mm_store_si128((__m128i *)&temp_vec, vec); \
+ plane.c = (int64_t)temp_vec[0]; \
+ plane.dcdx = temp_vec[1]; \
+ plane.dcdy = temp_vec[2]; \
+ plane.eo = temp_vec[3]; \
+ } while(0)
+
+ STORE_PLANE(plane[0], p0);
+ STORE_PLANE(plane[1], p1);
+ STORE_PLANE(plane[2], p2);
+#undef STORE_PLANE
+ } else
+#endif
{
int i;
plane[0].dcdy = position->dx01;
@@ -496,7 +508,6 @@ do_triangle_ccw(struct lp_setup_context *setup,
if (plane[i].dcdy > 0) plane[i].eo += plane[i].dcdy;
}
}
-#endif
if (0) {
debug_printf("p0: %"PRIx64"/%08x/%08x/%"PRIx64"\n",