summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorH.J. Lu <hjl.tools@gmail.com>2015-12-19 06:51:44 -0800
committerH.J. Lu <hjl.tools@gmail.com>2016-02-12 05:08:29 -0800
commitba5e18d8d98a3beb3d51da03f4721f5576fcb997 (patch)
tree683731b55aaa500b3a601a274cd7f6bf37da0dfd
parentcea7085941f85780f1b32c3a218f2009f2f44ab2 (diff)
downloadgcc-hjl/pr68991/gcc-5-branch.tar.gz
Add vector_memory_operand and "Bm" constrainthjl/pr68991/gcc-5-branch
SSE vector arithmetic and logic instructions only accept aligned memory operand. This patch adds vector_memory_operand and "Bm" constraint for aligned SSE memory operand. They are applied to SSE plusminus and any_logic patterns. gcc/ PR target/68991 * config/i386/constraints.md (Bm): New constraint. * config/i386/predicates.md (vector_memory_operand): New predicate. * config/i386/sse.md: Replace xm with xBm in plusminus and any_logic patterns. gcc/testsuite/ PR target/68991 * g++.dg/pr68991-1.C: New test. * g++.dg/pr68991-2.C: Likewise.
-rw-r--r--gcc/config/i386/constraints.md5
-rw-r--r--gcc/config/i386/predicates.md7
-rw-r--r--gcc/config/i386/sse.md18
-rw-r--r--gcc/testsuite/g++.dg/pr68991-1.C191
-rw-r--r--gcc/testsuite/g++.dg/pr68991-2.C191
5 files changed, 403 insertions, 9 deletions
diff --git a/gcc/config/i386/constraints.md b/gcc/config/i386/constraints.md
index 00a84a03885..cf8053dda67 100644
--- a/gcc/config/i386/constraints.md
+++ b/gcc/config/i386/constraints.md
@@ -146,11 +146,16 @@
"@internal Lower SSE register when avoiding REX prefix and all SSE registers otherwise.")
;; We use the B prefix to denote any number of internal operands:
+;; m Vector memory operand
;; s Sibcall memory operand, not valid for TARGET_X32
;; w Call memory operand, not valid for TARGET_X32
;; z Constant call address operand.
;; C SSE constant operand.
+(define_constraint "Bm"
+ "@internal Vector memory operand."
+ (match_operand 0 "vector_memory_operand"))
+
(define_constraint "Bs"
"@internal Sibcall memory operand."
(and (not (match_test "TARGET_X32"))
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index f0c999cb182..b47aca0df64 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -937,6 +937,13 @@
return false;
})
+; Return true when OP is operand acceptable for vector memory operand.
+; Only AVX can have misaligned memory operand.
+(define_predicate "vector_memory_operand"
+ (and (match_operand 0 "memory_operand")
+ (ior (match_test "TARGET_AVX")
+ (match_test "MEM_ALIGN (op) >= GET_MODE_ALIGNMENT (mode)"))))
+
; Return true when OP is operand acceptable for standard SSE move.
(define_predicate "vector_move_operand"
(ior (match_operand 0 "nonimmediate_operand")
diff --git a/gcc/config/i386/sse.md b/gcc/config/i386/sse.md
index 2b374089dfa..7ea82edefe8 100644
--- a/gcc/config/i386/sse.md
+++ b/gcc/config/i386/sse.md
@@ -1618,7 +1618,7 @@
[(set (match_operand:VF 0 "register_operand" "=x,v")
(plusminus:VF
(match_operand:VF 1 "<round_nimm_predicate>" "<comm>0,v")
- (match_operand:VF 2 "<round_nimm_predicate>" "xm,<round_constraint>")))]
+ (match_operand:VF 2 "<round_nimm_predicate>" "xBm,<round_constraint>")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands) && <mask_mode512bit_condition> && <round_mode512bit_condition>"
"@
<plusminus_mnemonic><ssemodesuffix>\t{%2, %0|%0, %2}
@@ -1633,7 +1633,7 @@
(vec_merge:VF_128
(plusminus:VF_128
(match_operand:VF_128 1 "register_operand" "0,v")
- (match_operand:VF_128 2 "nonimmediate_operand" "xm,<round_constraint>"))
+ (match_operand:VF_128 2 "nonimmediate_operand" "xBm,<round_constraint>"))
(match_dup 1)
(const_int 1)))]
"TARGET_SSE"
@@ -2248,7 +2248,7 @@
(vec_concat:V2SF
(plusminus:SF
(vec_select:SF
- (match_operand:V4SF 2 "nonimmediate_operand" "xm,xm")
+ (match_operand:V4SF 2 "nonimmediate_operand" "xBm,xm")
(parallel [(const_int 0)]))
(vec_select:SF (match_dup 2) (parallel [(const_int 1)])))
(plusminus:SF
@@ -2819,7 +2819,7 @@
[(set (match_operand:VF_128_256 0 "register_operand" "=x,v")
(any_logic:VF_128_256
(match_operand:VF_128_256 1 "nonimmediate_operand" "%0,v")
- (match_operand:VF_128_256 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VF_128_256 2 "nonimmediate_operand" "xBm,vm")))]
"TARGET_SSE && <mask_avx512vl_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
@@ -3068,7 +3068,7 @@
[(set (match_operand:TF 0 "register_operand" "=x,x")
(any_logic:TF
(match_operand:TF 1 "nonimmediate_operand" "%0,x")
- (match_operand:TF 2 "nonimmediate_operand" "xm,xm")))]
+ (match_operand:TF 2 "nonimmediate_operand" "xBm,xm")))]
"TARGET_SSE
&& ix86_binary_operator_ok (<CODE>, TFmode, operands)"
{
@@ -9242,7 +9242,7 @@
[(set (match_operand:VI_AVX2 0 "register_operand" "=x,v")
(plusminus:VI_AVX2
(match_operand:VI_AVX2 1 "nonimmediate_operand" "<comm>0,v")
- (match_operand:VI_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI_AVX2 2 "nonimmediate_operand" "xBm,vm")))]
"TARGET_SSE2
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -9295,7 +9295,7 @@
[(set (match_operand:VI12_AVX2 0 "register_operand" "=x,v")
(sat_plusminus:VI12_AVX2
(match_operand:VI12_AVX2 1 "nonimmediate_operand" "<comm>0,v")
- (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI12_AVX2 2 "nonimmediate_operand" "xBm,vm")))]
"TARGET_SSE2 && <mask_mode512bit_condition> && <mask_avx512bw_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
"@
@@ -10950,7 +10950,7 @@
[(set (match_operand:VI48_AVX_AVX512F 0 "register_operand" "=x,v")
(any_logic:VI48_AVX_AVX512F
(match_operand:VI48_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
- (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI48_AVX_AVX512F 2 "nonimmediate_operand" "xBm,vm")))]
"TARGET_SSE && <mask_mode512bit_condition>
&& ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
@@ -11046,7 +11046,7 @@
[(set (match_operand:VI12_AVX_AVX512F 0 "register_operand" "=x,v")
(any_logic: VI12_AVX_AVX512F
(match_operand:VI12_AVX_AVX512F 1 "nonimmediate_operand" "%0,v")
- (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xm,vm")))]
+ (match_operand:VI12_AVX_AVX512F 2 "nonimmediate_operand" "xBm,vm")))]
"TARGET_SSE && ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)"
{
static char buf[64];
diff --git a/gcc/testsuite/g++.dg/pr68991-1.C b/gcc/testsuite/g++.dg/pr68991-1.C
new file mode 100644
index 00000000000..744d13c7915
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr68991-1.C
@@ -0,0 +1,191 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-std=c++11 -O3 -msse2 -mno-avx -fno-exceptions -fno-rtti -fdump-rtl-final" }
+
+typedef unsigned int size_type;
+
+#define _GLIBCXX_BITSET_BITS_PER_WORD (__CHAR_BIT__ * __SIZEOF_INT__)
+#define _GLIBCXX_BITSET_WORDS(__n) \
+ ((__n) / _GLIBCXX_BITSET_BITS_PER_WORD + \
+ ((__n) % _GLIBCXX_BITSET_BITS_PER_WORD == 0 ? 0 : 1))
+
+namespace std
+{
+ template<size_type _Nw>
+ struct _Base_bitset
+ {
+ typedef unsigned int _WordT;
+ _WordT _M_w[_Nw];
+
+ _WordT&
+ _M_hiword()
+ { return _M_w[_Nw - 1]; }
+
+ void
+ _M_do_and(const _Base_bitset<_Nw>& __x)
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ _M_w[__i] &= __x._M_w[__i];
+ }
+
+ void
+ _M_do_flip()
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ _M_w[__i] = ~_M_w[__i];
+ }
+
+ bool
+ _M_is_equal(const _Base_bitset<_Nw>& __x) const
+ {
+ for (size_type __i = 0; __i < _Nw; ++__i)
+ if (_M_w[__i] != __x._M_w[__i])
+ return false;
+ return true;
+ }
+
+ bool
+ _M_is_any() const
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ if (_M_w[__i] != static_cast<_WordT>(0))
+ return true;
+ return false;
+ }
+ };
+
+ template<size_type _Extrabits>
+ struct _Sanitize
+ {
+ typedef unsigned int _WordT;
+
+ static void
+ _S_do_sanitize(_WordT& __val)
+ { __val &= ~((~static_cast<_WordT>(0)) << _Extrabits); }
+ };
+
+ template<size_type _Nb>
+ class bitset
+ : private _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)>
+ {
+ private:
+ typedef _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)> _Base;
+ typedef unsigned int _WordT;
+
+ void
+ _M_do_sanitize()
+ {
+ typedef _Sanitize<_Nb % _GLIBCXX_BITSET_BITS_PER_WORD> __sanitize_type;
+ __sanitize_type::_S_do_sanitize(this->_M_hiword());
+ }
+
+ public:
+ class reference
+ {
+ friend class bitset;
+
+ _WordT* _M_wp;
+ size_type _M_bpos;
+
+ public:
+ reference&
+ flip()
+ {
+ *_M_wp ^= _Base::_S_maskbit(_M_bpos);
+ return *this;
+ }
+ };
+
+ bitset<_Nb>&
+ operator&=(const bitset<_Nb>& __rhs)
+ {
+ this->_M_do_and(__rhs);
+ return *this;
+ }
+
+ bitset<_Nb>&
+ flip()
+ {
+ this->_M_do_flip();
+ this->_M_do_sanitize();
+ return *this;
+ }
+
+ bitset<_Nb>
+ operator~() const
+ { return bitset<_Nb>(*this).flip(); }
+
+ bool
+ operator==(const bitset<_Nb>& __rhs) const
+ { return this->_M_is_equal(__rhs); }
+
+ bool
+ any() const
+ { return this->_M_is_any(); }
+ };
+
+ template<size_type _Nb>
+ inline bitset<_Nb>
+ operator&(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
+ {
+ bitset<_Nb> __result(__x);
+ __result &= __y;
+ return __result;
+ }
+}
+template<typename T>
+class ArrayRef {
+public:
+ typedef const T *iterator;
+
+private:
+ const T *Data;
+ size_type Length;
+
+public:
+ iterator begin() const { return Data; }
+ iterator end() const { return Data + Length; }
+};
+
+const unsigned MAX_SUBTARGET_FEATURES = 128;
+class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
+};
+
+struct SubtargetFeatureKV {
+ FeatureBitset Value;
+ FeatureBitset Implies;
+};
+
+struct SubtargetInfoKV {
+ const void *Value;
+};
+class SubtargetFeatures {
+public:
+ FeatureBitset ToggleFeature(FeatureBitset Bits,
+ const SubtargetFeatureKV *,
+ ArrayRef<SubtargetFeatureKV> FeatureTable);
+};
+
+static
+void ClearImpliedBits(FeatureBitset &Bits,
+ const SubtargetFeatureKV *FeatureEntry,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ for (auto &FE : FeatureTable) {
+ if ((FE.Implies & FeatureEntry->Value).any()) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable);
+ }
+ }
+}
+
+FeatureBitset
+SubtargetFeatures::ToggleFeature(FeatureBitset Bits,
+ const SubtargetFeatureKV *FeatureEntry,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+ Bits &= ~FeatureEntry->Value;
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+ }
+ return Bits;
+}
+
+// { dg-final { scan-rtl-dump-not "S16 A32\[^\n\]*\\\*xorv4si3" "final" } }
diff --git a/gcc/testsuite/g++.dg/pr68991-2.C b/gcc/testsuite/g++.dg/pr68991-2.C
new file mode 100644
index 00000000000..a3c59f02c57
--- /dev/null
+++ b/gcc/testsuite/g++.dg/pr68991-2.C
@@ -0,0 +1,191 @@
+// { dg-do compile { target i?86-*-* x86_64-*-* } }
+// { dg-options "-std=c++11 -O3 -msse2 -mno-avx -fno-exceptions -fno-rtti -fdump-rtl-final" }
+
+typedef unsigned int size_type;
+
+#define _GLIBCXX_BITSET_BITS_PER_WORD (__CHAR_BIT__ * __SIZEOF_INT__)
+#define _GLIBCXX_BITSET_WORDS(__n) \
+ ((__n) / _GLIBCXX_BITSET_BITS_PER_WORD + \
+ ((__n) % _GLIBCXX_BITSET_BITS_PER_WORD == 0 ? 0 : 1))
+
+namespace std
+{
+ template<size_type _Nw>
+ struct _Base_bitset
+ {
+ typedef unsigned int _WordT;
+ _WordT _M_w[_Nw];
+
+ _WordT&
+ _M_hiword()
+ { return _M_w[_Nw - 1]; }
+
+ void
+ _M_do_and(const _Base_bitset<_Nw>& __x)
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ _M_w[__i] += __x._M_w[__i];
+ }
+
+ void
+ _M_do_flip()
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ _M_w[__i] = ~_M_w[__i];
+ }
+
+ bool
+ _M_is_equal(const _Base_bitset<_Nw>& __x) const
+ {
+ for (size_type __i = 0; __i < _Nw; ++__i)
+ if (_M_w[__i] != __x._M_w[__i])
+ return false;
+ return true;
+ }
+
+ bool
+ _M_is_any() const
+ {
+ for (size_type __i = 0; __i < _Nw; __i++)
+ if (_M_w[__i] != static_cast<_WordT>(0))
+ return true;
+ return false;
+ }
+ };
+
+ template<size_type _Extrabits>
+ struct _Sanitize
+ {
+ typedef unsigned int _WordT;
+
+ static void
+ _S_do_sanitize(_WordT& __val)
+ { __val &= ~((~static_cast<_WordT>(0)) << _Extrabits); }
+ };
+
+ template<size_type _Nb>
+ class bitset
+ : private _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)>
+ {
+ private:
+ typedef _Base_bitset<_GLIBCXX_BITSET_WORDS(_Nb)> _Base;
+ typedef unsigned int _WordT;
+
+ void
+ _M_do_sanitize()
+ {
+ typedef _Sanitize<_Nb % _GLIBCXX_BITSET_BITS_PER_WORD> __sanitize_type;
+ __sanitize_type::_S_do_sanitize(this->_M_hiword());
+ }
+
+ public:
+ class reference
+ {
+ friend class bitset;
+
+ _WordT* _M_wp;
+ size_type _M_bpos;
+
+ public:
+ reference&
+ flip()
+ {
+ *_M_wp ^= _Base::_S_maskbit(_M_bpos);
+ return *this;
+ }
+ };
+
+ bitset<_Nb>&
+ operator&=(const bitset<_Nb>& __rhs)
+ {
+ this->_M_do_and(__rhs);
+ return *this;
+ }
+
+ bitset<_Nb>&
+ flip()
+ {
+ this->_M_do_flip();
+ this->_M_do_sanitize();
+ return *this;
+ }
+
+ bitset<_Nb>
+ operator~() const
+ { return bitset<_Nb>(*this).flip(); }
+
+ bool
+ operator==(const bitset<_Nb>& __rhs) const
+ { return this->_M_is_equal(__rhs); }
+
+ bool
+ any() const
+ { return this->_M_is_any(); }
+ };
+
+ template<size_type _Nb>
+ inline bitset<_Nb>
+ operator&(const bitset<_Nb>& __x, const bitset<_Nb>& __y)
+ {
+ bitset<_Nb> __result(__x);
+ __result &= __y;
+ return __result;
+ }
+}
+template<typename T>
+class ArrayRef {
+public:
+ typedef const T *iterator;
+
+private:
+ const T *Data;
+ size_type Length;
+
+public:
+ iterator begin() const { return Data; }
+ iterator end() const { return Data + Length; }
+};
+
+const unsigned MAX_SUBTARGET_FEATURES = 128;
+class FeatureBitset : public std::bitset<MAX_SUBTARGET_FEATURES> {
+};
+
+struct SubtargetFeatureKV {
+ FeatureBitset Value;
+ FeatureBitset Implies;
+};
+
+struct SubtargetInfoKV {
+ const void *Value;
+};
+class SubtargetFeatures {
+public:
+ FeatureBitset ToggleFeature(FeatureBitset Bits,
+ const SubtargetFeatureKV *,
+ ArrayRef<SubtargetFeatureKV> FeatureTable);
+};
+
+static
+void ClearImpliedBits(FeatureBitset &Bits,
+ const SubtargetFeatureKV *FeatureEntry,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ for (auto &FE : FeatureTable) {
+ if ((FE.Implies & FeatureEntry->Value).any()) {
+ Bits &= ~FE.Value;
+ ClearImpliedBits(Bits, &FE, FeatureTable);
+ }
+ }
+}
+
+FeatureBitset
+SubtargetFeatures::ToggleFeature(FeatureBitset Bits,
+ const SubtargetFeatureKV *FeatureEntry,
+ ArrayRef<SubtargetFeatureKV> FeatureTable) {
+ if ((Bits & FeatureEntry->Value) == FeatureEntry->Value) {
+ Bits &= ~FeatureEntry->Value;
+ ClearImpliedBits(Bits, FeatureEntry, FeatureTable);
+ }
+ return Bits;
+}
+
+// { dg-final { scan-rtl-dump-not "S16 A32\[^\n\]*\\\*addv4si3" "final" } }