PR target/13366

* config/i386/i386.h (enum ix86_builtins): Move ... * config/i386/i386.c: ... here. (IX86_BUILTIN_MOVDDUP, IX86_BUILTIN_MMX_ZERO, IX86_BUILTIN_PEXTRW, IX86_BUILTIN_PINSRW, IX86_BUILTIN_LOADAPS, IX86_BUILTIN_LOADSS, IX86_BUILTIN_STORESS, IX86_BUILTIN_SSE_ZERO, IX86_BUILTIN_PEXTRW128, IX86_BUILTIN_PINSRW128, IX86_BUILTIN_LOADAPD, IX86_BUILTIN_LOADSD, IX86_BUILTIN_STOREAPD, IX86_BUILTIN_STORESD, IX86_BUILTIN_STOREHPD, IX86_BUILTIN_STORELPD, IX86_BUILTIN_SETPD1, IX86_BUILTIN_SETPD, IX86_BUILTIN_CLRPD, IX86_BUILTIN_LOADPD1, IX86_BUILTIN_LOADRPD, IX86_BUILTIN_STOREPD1, IX86_BUILTIN_STORERPD, IX86_BUILTIN_LOADDQA, IX86_BUILTIN_STOREDQA, IX86_BUILTIN_CLRTI, IX86_BUILTIN_LOADDDUP): Remove. (IX86_BUILTIN_VEC_INIT_V2SI, IX86_BUILTIN_VEC_INIT_V4HI, IX86_BUILTIN_VEC_INIT_V8QI, IX86_BUILTIN_VEC_EXT_V2DF, IX86_BUILTIN_VEC_EXT_V2DI, IX86_BUILTIN_VEC_EXT_V4SF, IX86_BUILTIN_VEC_EXT_V8HI, IX86_BUILTIN_VEC_EXT_V4HI, IX86_BUILTIN_VEC_SET_V8HI, IX86_BUILTIN_VEC_SET_V4HI): New. (ix86_init_builtins): Make static. (ix86_init_mmx_sse_builtins): Update for changed builtins. (ix86_expand_binop_builtin): Only use ix86_fixup_binary_operands if all the modes match. Otherwise, fake it. (get_element_number, ix86_expand_vec_init_builtin, ix86_expand_vec_ext_builtin, ix86_expand_vec_set_builtin): New. (ix86_expand_builtin): Make static. Update for changed builtins. (ix86_expand_vector_move_misalign): Use sse2_loadlpd with zero operand instead of sse2_loadsd. Cast sse1 fallback to V4SFmode. (ix86_expand_vector_init_duplicate): New. (ix86_expand_vector_init_low_nonzero): New. (ix86_expand_vector_init_one_var, ix86_expand_vector_init_general): Split out from ix86_expand_vector_init; handle integer modes. (ix86_expand_vector_init): Use them. (ix86_expand_vector_set, ix86_expand_vector_extract): New. * config/i386/i386-protos.h: Update. * config/i386/predicates.md (reg_or_0_operand): New. * config/i386/mmx.md (mov<MMXMODEI>_internal): Add 'r' variants. (movv2sf_internal): Likewise. And a splitter to match them all. (vec_dupv2sf, mmx_concatv2sf, vec_setv2sf, vec_extractv2sf, vec_initv2sf, vec_dupv4hi, vec_dupv2si, mmx_concatv2si, vec_setv2si, vec_extractv2si, vec_initv2si, vec_setv4hi, vec_extractv4hi, vec_initv4hi, vec_setv8qi, vec_extractv8qi, vec_initv8qi): New. (mmx_pinsrw): Fix operand ordering. * config/i386/sse.md (movv4sf splitter): Use direct pattern, rather than sse_loadss expander. (movv2df splitter): Similarly. (sse_loadss, sse_loadlss): Remove. (vec_dupv4sf, sse_concatv2sf, sse_concatv4sf, vec_extractv4sf_0): New. (vec_setv4sf, vec_setv2df): Use ix86_expand_vector_set. (vec_extractv4sf, vec_extractv2df): Use ix86_expand_vector_extract. (sse3_movddup): Rename with '*'. (sse3_movddup splitter): Use gen_rtx_REG instead of gen_lowpart. (sse2_loadsd): Remove. (vec_dupv2df_sse3): Rename from sse3_loadddup. (vec_dupv2df, vec_concatv2df_sse3, vec_concatv2df): New. (sse2_pinsrw): Fix argument ordering. (sse2_loadld, sse2_loadq): Add sse1 alternatives. (sse2_stored): Remove 'r' destination. (vec_dupv4si, vec_dupv2di, sse2_concatv2si, sse1_concatv2si, vec_concatv4si_1, vec_concatv2di, vec_setv2di, vec_extractv2di, vec_initv2di, vec_setv4si, vec_extractv4si, vec_initv4si, vec_setv8hi, vec_extractv8hi, vec_initv8hi, vec_setv16qi, vec_extractv16qi, vec_initv16qi): New. * config/i386/emmintrin.h (__m128i, __m128d): Use typedef, not define. (_mm_set_sd, _mm_set1_pd, _mm_setzero_pd, _mm_set_epi64x, _mm_set_epi32, _mm_set_epi16, _mm_set_epi8, _mm_setzero_si128): Use constructor form. (_mm_load_pd, _mm_store_pd): Use plain dereference. (_mm_load_si128, _mm_store_si128): Likewise. (_mm_load1_pd): Use _mm_set1_pd. (_mm_load_sd): Use _mm_set_sd. (_mm_store_sd, _mm_storeh_pd): Use __builtin_ia32_vec_ext_v2df. (_mm_store1_pd, _mm_storer_pd): Use _mm_store_pd. (_mm_set_epi64): Use _mm_set_epi64x. (_mm_set1_epi64x, _mm_set1_epi64, _mm_set1_epi32, _mm_set_epi16, _mm_set1_epi8, _mm_setr_epi64, _mm_setr_epi32, _mm_setr_epi16, _mm_setr_epi8): Use _mm_set_foo form. (_mm_loadl_epi64, _mm_movpi64_epi64, _mm_move_epi64): Use _mm_set_epi64. (_mm_storel_epi64, _mm_movepi64_pi64): Use __builtin_ia32_vec_ext_v2di. (_mm_extract_epi16): Use __builtin_ia32_vec_ext_v8hi. (_mm_insert_epi16): Use __builtin_ia32_vec_set_v8hi. * config/i386/mmintrin.h (_mm_setzero_si64): Use plain cast. (_mm_set_pi32): Use __builtin_ia32_vec_init_v2si. (_mm_set_pi16): Use __builtin_ia32_vec_init_v4hi. (_mm_set_pi8): Use __builtin_ia32_vec_init_v8qi. (_mm_set1_pi16, _mm_set1_pi8): Use _mm_set_piN variant. * config/i386/pmmintrin.h (_mm_loaddup_pd): Use _mm_load1_pd. (_mm_movedup_pd): Use _mm_shuffle_pd. * config/i386/xmmintrin.h (_mm_setzero_ps, _mm_set_ss, _mm_set1_ps, _mm_set_ps, _mm_setr_ps): Use constructor form. (_mm_cvtpi16_ps, _mm_cvtpu16_ps, _mm_cvtpi8_ps, _mm_cvtpu8_ps, _mm_cvtps_pi8, _mm_cvtpi32x2_ps): Avoid __builtin_ia32_mmx_zero; Use _mm_setzero_ps. (_mm_load_ss, _mm_load1_ps): Use _mm_set* form. (_mm_load_ps, _mm_loadr_ps): Use raw dereference. (_mm_store_ss): Use __builtin_ia32_vec_ext_v4sf. (_mm_store_ps): Use raw dereference. (_mm_store1_ps): Use _mm_storeu_ps. (_mm_storer_ps): Use _mm_store_ps. (_mm_extract_pi16): Use __builtin_ia32_vec_ext_v4hi. (_mm_insert_pi16): Use __builtin_ia32_vec_set_v4hi. git-svn-id: svn+ssh://gcc.gnu.org/svn/gcc/trunk@93199 138bc75d-0d04-0410-961f-82ee72b054a4
author: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> 2005-01-11 21:33:14 +0000
committer: rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4> 2005-01-11 21:33:14 +0000
commit: ad2c46cf58ec0f1ec0328005b016ba8159c34530 (patch)
tree: 41dfb70d2dcb0969d9a1eb218ace89a7d8b7331b /gcc/config/i386/xmmintrin.h
parent: a633f77e84acdcbfdd803d817eaf744012080639 (diff)
download: gcc-ad2c46cf58ec0f1ec0328005b016ba8159c34530.tar.gz
1 files changed, 69 insertions, 77 deletions
diff --git a/gcc/config/i386/xmmintrin.h b/gcc/config/i386/xmmintrin.h
index 6ef302e8259..6c56973f0db 100644
--- a/gcc/config/i386/xmmintrin.h
+++ b/gcc/config/i386/xmmintrin.h
@@ -86,6 +86,13 @@ enum _mm_hint
 #define _MM_FLUSH_ZERO_ON     0x8000
 #define _MM_FLUSH_ZERO_OFF    0x0000
 
+/* Create a vector of zeros.  */
+static __inline __m128
+_mm_setzero_ps (void)
+{
+  return (__m128){ 0.0f, 0.0f, 0.0f, 0.0f };
+}
+
 /* Perform the respective operation on the lower SPFP (single-precision
    floating-point) values of A and B; the upper three SPFP values are
    passed through from A.  */
@@ -590,15 +597,14 @@ _mm_cvtpi16_ps (__m64 __A)
   /* This comparison against zero gives us a mask that can be used to
      fill in the missing sign bits in the unpack operations below, so
      that we get signed values after unpacking.  */
-  __sign = (__v4hi) __builtin_ia32_mmx_zero ();
-  __sign = __builtin_ia32_pcmpgtw (__sign, (__v4hi)__A);
+  __sign = __builtin_ia32_pcmpgtw ((__v4hi)0LL, (__v4hi)__A);
 
   /* Convert the four words to doublewords.  */
   __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __sign);
   __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __sign);
 
   /* Convert the doublewords to floating point two at a time.  */
-  __r = (__v4sf) __builtin_ia32_setzerops ();
+  __r = (__v4sf) _mm_setzero_ps ();
   __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
   __r = __builtin_ia32_movlhps (__r, __r);
   __r = __builtin_ia32_cvtpi2ps (__r, __losi);
@@ -610,16 +616,15 @@ _mm_cvtpi16_ps (__m64 __A)
 static __inline __m128
 _mm_cvtpu16_ps (__m64 __A)
 {
-  __v4hi __zero = (__v4hi) __builtin_ia32_mmx_zero ();
   __v2si __hisi, __losi;
   __v4sf __r;
 
   /* Convert the four words to doublewords.  */
-  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, __zero);
-  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, __zero);
+  __hisi = (__v2si) __builtin_ia32_punpckhwd ((__v4hi)__A, (__v4hi)0LL);
+  __losi = (__v2si) __builtin_ia32_punpcklwd ((__v4hi)__A, (__v4hi)0LL);
 
   /* Convert the doublewords to floating point two at a time.  */
-  __r = (__v4sf) __builtin_ia32_setzerops ();
+  __r = (__v4sf) _mm_setzero_ps ();
   __r = __builtin_ia32_cvtpi2ps (__r, __hisi);
   __r = __builtin_ia32_movlhps (__r, __r);
   __r = __builtin_ia32_cvtpi2ps (__r, __losi);
@@ -636,8 +641,7 @@ _mm_cvtpi8_ps (__m64 __A)
   /* This comparison against zero gives us a mask that can be used to
      fill in the missing sign bits in the unpack operations below, so
      that we get signed values after unpacking.  */
-  __sign = (__v8qi) __builtin_ia32_mmx_zero ();
-  __sign = __builtin_ia32_pcmpgtb (__sign, (__v8qi)__A);
+  __sign = __builtin_ia32_pcmpgtb ((__v8qi)0LL, (__v8qi)__A);
 
   /* Convert the four low bytes to words.  */
   __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __sign);
@@ -649,8 +653,7 @@ _mm_cvtpi8_ps (__m64 __A)
 static __inline __m128
 _mm_cvtpu8_ps(__m64 __A)
 {
-  __v8qi __zero = (__v8qi) __builtin_ia32_mmx_zero ();
-  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, __zero);
+  __A = (__m64) __builtin_ia32_punpcklbw ((__v8qi)__A, (__v8qi)0LL);
   return _mm_cvtpu16_ps(__A);
 }
 
@@ -658,7 +661,7 @@ _mm_cvtpu8_ps(__m64 __A)
 static __inline __m128
 _mm_cvtpi32x2_ps(__m64 __A, __m64 __B)
 {
-  __v4sf __zero = (__v4sf) __builtin_ia32_setzerops ();
+  __v4sf __zero = (__v4sf) _mm_setzero_ps ();
   __v4sf __sfa = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__A);
   __v4sf __sfb = __builtin_ia32_cvtpi2ps (__zero, (__v2si)__B);
   return (__m128) __builtin_ia32_movlhps (__sfa, __sfb);
@@ -680,8 +683,7 @@ static __inline __m64
 _mm_cvtps_pi8(__m128 __A)
 {
   __v4hi __tmp = (__v4hi) _mm_cvtps_pi16 (__A);
-  __v4hi __zero = (__v4hi) __builtin_ia32_mmx_zero ();
-  return (__m64) __builtin_ia32_packsswb (__tmp, __zero);
+  return (__m64) __builtin_ia32_packsswb (__tmp, (__v4hi)0LL);
 }
 
 /* Selects four specific SPFP values from A and B based on MASK.  */
@@ -826,19 +828,38 @@ _MM_SET_FLUSH_ZERO_MODE (unsigned int __mode)
   _mm_setcsr((_mm_getcsr() & ~_MM_FLUSH_ZERO_MASK) | __mode);
 }
 
+/* Create a vector with element 0 as F and the rest zero.  */
+static __inline __m128
+_mm_set_ss (float __F)
+{
+  return (__m128)(__v4sf){ __F, 0, 0, 0 };
+}
+
+/* Create a vector with all four elements equal to F.  */
+static __inline __m128
+_mm_set1_ps (float __F)
+{
+  return (__m128)(__v4sf){ __F, __F, __F, __F };
+}
+
+static __inline __m128
+_mm_set_ps1 (float __F)
+{
+  return _mm_set1_ps (__F);
+}
+
 /* Create a vector with element 0 as *P and the rest zero.  */
 static __inline __m128
 _mm_load_ss (float const *__P)
 {
-  return (__m128) __builtin_ia32_loadss (__P);
+  return _mm_set_ss (*__P);
 }
 
 /* Create a vector with all four elements equal to *P.  */
 static __inline __m128
 _mm_load1_ps (float const *__P)
 {
-  __v4sf __tmp = __builtin_ia32_loadss (__P);
-  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,0,0,0));
+  return _mm_set1_ps (*__P);
 }
 
 static __inline __m128
@@ -851,7 +872,7 @@ _mm_load_ps1 (float const *__P)
 static __inline __m128
 _mm_load_ps (float const *__P)
 {
-  return (__m128) __builtin_ia32_loadaps (__P);
+  return (__m128) *(__v4sf *)__P;
 }
 
 /* Load four SPFP values from P.  The address need not be 16-byte aligned.  */
@@ -865,86 +886,58 @@ _mm_loadu_ps (float const *__P)
 static __inline __m128
 _mm_loadr_ps (float const *__P)
 {
-  __v4sf __tmp = __builtin_ia32_loadaps (__P);
+  __v4sf __tmp = *(__v4sf *)__P;
   return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,1,2,3));
 }
 
-/* Create a vector with element 0 as F and the rest zero.  */
-static __inline __m128
-_mm_set_ss (float __F)
-{
-  return (__m128) __builtin_ia32_loadss (&__F);
-}
-
-/* Create a vector with all four elements equal to F.  */
-static __inline __m128
-_mm_set1_ps (float __F)
-{
-  __v4sf __tmp = __builtin_ia32_loadss (&__F);
-  return (__m128) __builtin_ia32_shufps (__tmp, __tmp, _MM_SHUFFLE (0,0,0,0));
-}
-
-static __inline __m128
-_mm_set_ps1 (float __F)
-{
-  return _mm_set1_ps (__F);
-}
-
 /* Create the vector [Z Y X W].  */
 static __inline __m128
 _mm_set_ps (const float __Z, const float __Y, const float __X, const float __W)
 {
-  return (__v4sf) {__W, __X, __Y, __Z};
+  return (__m128)(__v4sf){ __W, __X, __Y, __Z };
 }
 
 /* Create the vector [W X Y Z].  */
 static __inline __m128
 _mm_setr_ps (float __Z, float __Y, float __X, float __W)
 {
-  return _mm_set_ps (__W, __X, __Y, __Z);
-}
-
-/* Create a vector of zeros.  */
-static __inline __m128
-_mm_setzero_ps (void)
-{
-  return (__m128) __builtin_ia32_setzerops ();
+  return (__m128)(__v4sf){ __Z, __Y, __X, __W };
 }
 
 /* Stores the lower SPFP value.  */
 static __inline void
 _mm_store_ss (float *__P, __m128 __A)
 {
-  __builtin_ia32_storess (__P, (__v4sf)__A);
+  *__P = __builtin_ia32_vec_ext_v4sf ((__v4sf)__A, 0);
 }
 
-/* Store the lower SPFP value across four words.  */
+/* Store four SPFP values.  The address must be 16-byte aligned.  */
 static __inline void
-_mm_store1_ps (float *__P, __m128 __A)
+_mm_store_ps (float *__P, __m128 __A)
 {
-  __v4sf __va = (__v4sf)__A;
-  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
-  __builtin_ia32_storeaps (__P, __tmp);
+  *(__v4sf *)__P = (__v4sf)__A;
 }
 
+/* Store four SPFP values.  The address need not be 16-byte aligned.  */
 static __inline void
-_mm_store_ps1 (float *__P, __m128 __A)
+_mm_storeu_ps (float *__P, __m128 __A)
 {
-  _mm_store1_ps (__P, __A);
+  __builtin_ia32_storeups (__P, (__v4sf)__A);
 }
 
-/* Store four SPFP values.  The address must be 16-byte aligned.  */
+/* Store the lower SPFP value across four words.  */
 static __inline void
-_mm_store_ps (float *__P, __m128 __A)
+_mm_store1_ps (float *__P, __m128 __A)
 {
-  __builtin_ia32_storeaps (__P, (__v4sf)__A);
+  __v4sf __va = (__v4sf)__A;
+  __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,0,0,0));
+  _mm_storeu_ps (__P, __tmp);
 }
 
-/* Store four SPFP values.  The address need not be 16-byte aligned.  */
 static __inline void
-_mm_storeu_ps (float *__P, __m128 __A)
+_mm_store_ps1 (float *__P, __m128 __A)
 {
-  __builtin_ia32_storeups (__P, (__v4sf)__A);
+  _mm_store1_ps (__P, __A);
 }
 
 /* Store four SPFP values in reverse order.  The address must be aligned.  */
@@ -953,7 +946,7 @@ _mm_storer_ps (float *__P, __m128 __A)
 {
   __v4sf __va = (__v4sf)__A;
   __v4sf __tmp = __builtin_ia32_shufps (__va, __va, _MM_SHUFFLE (0,1,2,3));
-  __builtin_ia32_storeaps (__P, __tmp);
+  _mm_store_ps (__P, __tmp);
 }
 
 /* Sets the low SPFP value of A from the low value of B.  */
@@ -965,40 +958,39 @@ _mm_move_ss (__m128 __A, __m128 __B)
 
 /* Extracts one of the four words of A.  The selector N must be immediate.  */
 #if 0
-static __inline int
-_mm_extract_pi16 (__m64 __A, int __N)
+static __inline int __attribute__((__always_inline__))
+_mm_extract_pi16 (__m64 const __A, int const __N)
 {
-  return __builtin_ia32_pextrw ((__v4hi)__A, __N);
+  return __builtin_ia32_vec_ext_v4hi ((__v4hi)__A, __N);
 }
 
-static __inline int
-_m_pextrw (__m64 __A, int __N)
+static __inline int __attribute__((__always_inline__))
+_m_pextrw (__m64 const __A, int const __N)
 {
   return _mm_extract_pi16 (__A, __N);
 }
 #else
-#define _mm_extract_pi16(A, N) \
-  __builtin_ia32_pextrw ((__v4hi)(A), (N))
+#define _mm_extract_pi16(A, N)	__builtin_ia32_vec_ext_v4hi ((__v4hi)(A), (N))
 #define _m_pextrw(A, N)		_mm_extract_pi16((A), (N))
 #endif
 
 /* Inserts word D into one of four words of A.  The selector N must be
    immediate.  */
 #if 0
-static __inline __m64
-_mm_insert_pi16 (__m64 __A, int __D, int __N)
+static __inline __m64 __attribute__((__always_inline__))
+_mm_insert_pi16 (__m64 const __A, int const __D, int const __N)
 {
-  return (__m64)__builtin_ia32_pinsrw ((__v4hi)__A, __D, __N);
+  return (__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)__A, __D, __N);
 }
 
-static __inline __m64
-_m_pinsrw (__m64 __A, int __D, int __N)
+static __inline __m64 __attribute__((__always_inline__))
+_m_pinsrw (__m64 const __A, int const __D, int const __N)
 {
   return _mm_insert_pi16 (__A, __D, __N);
 }
 #else
 #define _mm_insert_pi16(A, D, N) \
-  ((__m64) __builtin_ia32_pinsrw ((__v4hi)(A), (D), (N)))
+  ((__m64) __builtin_ia32_vec_set_v4hi ((__v4hi)(A), (D), (N)))
 #define _m_pinsrw(A, D, N)	 _mm_insert_pi16((A), (D), (N))
 #endif
author	rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>	2005-01-11 21:33:14 +0000
committer	rth <rth@138bc75d-0d04-0410-961f-82ee72b054a4>	2005-01-11 21:33:14 +0000
commit	ad2c46cf58ec0f1ec0328005b016ba8159c34530 (patch)
tree	41dfb70d2dcb0969d9a1eb218ace89a7d8b7331b /gcc/config/i386/xmmintrin.h
parent	a633f77e84acdcbfdd803d817eaf744012080639 (diff)
download	gcc-ad2c46cf58ec0f1ec0328005b016ba8159c34530.tar.gz