diff options
Diffstat (limited to 'gcc/config/rs6000')
-rw-r--r-- | gcc/config/rs6000/altivec.md | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/crypto.md | 13 | ||||
-rw-r--r-- | gcc/config/rs6000/htm.md | 294 | ||||
-rw-r--r-- | gcc/config/rs6000/htmxlintrin.h | 3 | ||||
-rw-r--r-- | gcc/config/rs6000/predicates.md | 10 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-builtin.def | 107 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 289 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.h | 8 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.opt | 22 | ||||
-rw-r--r-- | gcc/config/rs6000/t-rs6000 | 1 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 2 |
11 files changed, 367 insertions, 386 deletions
diff --git a/gcc/config/rs6000/altivec.md b/gcc/config/rs6000/altivec.md index 38314f93b8..f7684026b2 100644 --- a/gcc/config/rs6000/altivec.md +++ b/gcc/config/rs6000/altivec.md @@ -2455,7 +2455,7 @@ } }) -(define_insn "*altivec_lvx_<mode>_internal" +(define_insn "altivec_lvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "register_operand" "=v") (match_operand:VM2 1 "memory_operand" "Z")) @@ -2478,7 +2478,7 @@ } }) -(define_insn "*altivec_stvx_<mode>_internal" +(define_insn "altivec_stvx_<mode>_internal" [(parallel [(set (match_operand:VM2 0 "memory_operand" "=Z") (match_operand:VM2 1 "register_operand" "v")) diff --git a/gcc/config/rs6000/crypto.md b/gcc/config/rs6000/crypto.md index 2e4ead2444..6a2cfc60f4 100644 --- a/gcc/config/rs6000/crypto.md +++ b/gcc/config/rs6000/crypto.md @@ -18,6 +18,15 @@ ;; along with GCC; see the file COPYING3. If not see ;; <http://www.gnu.org/licenses/>. +;; NOTE: Although this file contains all the instructions from +;; section 5.11 of ISA 2.07, only those in sections 5.11.1 and +;; 5.11.2 are in Category:Vector.Crypto. Those are the only +;; ones controlled by -m[no-]crypto. + +;; FIXME: The builtin names for the instructions in this file +;; are likely to be deprecated in favor of other names to be +;; agreed upon with the XL compilers and LLVM. + (define_c_enum "unspec" [UNSPEC_VCIPHER UNSPEC_VNCIPHER @@ -65,7 +74,7 @@ (unspec:CR_mode [(match_operand:CR_mode 1 "register_operand" "v") (match_operand:CR_mode 2 "register_operand" "v")] UNSPEC_VPMSUM))] - "TARGET_CRYPTO" + "TARGET_P8_VECTOR" "vpmsum<CR_char> %0,%1,%2" [(set_attr "type" "crypto")]) @@ -76,7 +85,7 @@ (match_operand:CR_mode 2 "register_operand" "v") (match_operand:CR_mode 3 "register_operand" "v")] UNSPEC_VPERMXOR))] - "TARGET_CRYPTO" + "TARGET_P8_VECTOR" "vpermxor %0,%1,%2,%3" [(set_attr "type" "crypto")]) diff --git a/gcc/config/rs6000/htm.md b/gcc/config/rs6000/htm.md index 79fb740521..dbfd0db596 100644 --- a/gcc/config/rs6000/htm.md +++ b/gcc/config/rs6000/htm.md @@ -32,197 +32,52 @@ (define_c_enum "unspecv" [UNSPECV_HTM_TABORT - UNSPECV_HTM_TABORTDC - UNSPECV_HTM_TABORTDCI - UNSPECV_HTM_TABORTWC - UNSPECV_HTM_TABORTWCI + UNSPECV_HTM_TABORTXC + UNSPECV_HTM_TABORTXCI UNSPECV_HTM_TBEGIN UNSPECV_HTM_TCHECK UNSPECV_HTM_TEND UNSPECV_HTM_TRECHKPT UNSPECV_HTM_TRECLAIM UNSPECV_HTM_TSR + UNSPECV_HTM_TTEST UNSPECV_HTM_MFSPR UNSPECV_HTM_MTSPR ]) -(define_expand "tabort" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand:SI 1 "int_reg_operand" "")] - UNSPECV_HTM_TABORT)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tabort_internal" +(define_insn "tabort" [(set (match_operand:CC 1 "cc_reg_operand" "=x") - (unspec_volatile:CC [(match_operand:SI 0 "int_reg_operand" "r")] + (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] UNSPECV_HTM_TABORT))] "TARGET_HTM" "tabort. %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tabortdc" - [(set (match_dup 4) - (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n") - (match_operand:SI 2 "gpc_reg_operand" "r") - (match_operand:SI 3 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORTDC)) - (set (match_dup 5) - (eq:SI (match_dup 4) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 5) - (const_int 1)))] - "TARGET_HTM" -{ - operands[4] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[5] = gen_reg_rtx (SImode); -}) - -(define_insn "*tabortdc_internal" +(define_insn "tabort<wd>c" [(set (match_operand:CC 3 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") - (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORTDC))] + (match_operand:GPR 1 "gpc_reg_operand" "r") + (match_operand:GPR 2 "gpc_reg_operand" "r")] + UNSPECV_HTM_TABORTXC))] "TARGET_HTM" - "tabortdc. %0,%1,%2" + "tabort<wd>c. %0,%1,%2" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tabortdci" - [(set (match_dup 4) - (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n") - (match_operand:SI 2 "gpc_reg_operand" "r") - (match_operand 3 "s5bit_cint_operand" "n")] - UNSPECV_HTM_TABORTDCI)) - (set (match_dup 5) - (eq:SI (match_dup 4) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 5) - (const_int 1)))] - "TARGET_HTM" -{ - operands[4] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[5] = gen_reg_rtx (SImode); -}) - -(define_insn "*tabortdci_internal" +(define_insn "tabort<wd>ci" [(set (match_operand:CC 3 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") - (match_operand:SI 1 "gpc_reg_operand" "r") + (match_operand:GPR 1 "gpc_reg_operand" "r") (match_operand 2 "s5bit_cint_operand" "n")] - UNSPECV_HTM_TABORTDCI))] - "TARGET_HTM" - "tabortdci. %0,%1,%2" - [(set_attr "type" "htm") - (set_attr "length" "4")]) - -(define_expand "tabortwc" - [(set (match_dup 4) - (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n") - (match_operand:SI 2 "gpc_reg_operand" "r") - (match_operand:SI 3 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORTWC)) - (set (match_dup 5) - (eq:SI (match_dup 4) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 5) - (const_int 1)))] - "TARGET_HTM" -{ - operands[4] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[5] = gen_reg_rtx (SImode); -}) - -(define_insn "*tabortwc_internal" - [(set (match_operand:CC 3 "cc_reg_operand" "=x") - (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") - (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand:SI 2 "gpc_reg_operand" "r")] - UNSPECV_HTM_TABORTWC))] + UNSPECV_HTM_TABORTXCI))] "TARGET_HTM" - "tabortwc. %0,%1,%2" + "tabort<wd>ci. %0,%1,%2" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tabortwci" - [(set (match_dup 4) - (unspec_volatile:CC [(match_operand 1 "u5bit_cint_operand" "n") - (match_operand:SI 2 "gpc_reg_operand" "r") - (match_operand 3 "s5bit_cint_operand" "n")] - UNSPECV_HTM_TABORTWCI)) - (set (match_dup 5) - (eq:SI (match_dup 4) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 5) - (const_int 1)))] - "TARGET_HTM" -{ - operands[4] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[5] = gen_reg_rtx (SImode); -}) - -(define_expand "ttest" - [(set (match_dup 1) - (unspec_volatile:CC [(const_int 0) - (reg:SI 0) - (const_int 0)] - UNSPECV_HTM_TABORTWCI)) - (set (subreg:CC (match_dup 2) 0) (match_dup 1)) - (set (match_dup 3) (lshiftrt:SI (match_dup 2) (const_int 28))) - (set (match_operand:SI 0 "int_reg_operand" "") - (and:SI (match_dup 3) - (const_int 15)))] - "TARGET_HTM" -{ - operands[1] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[2] = gen_reg_rtx (SImode); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tabortwci_internal" - [(set (match_operand:CC 3 "cc_reg_operand" "=x") - (unspec_volatile:CC [(match_operand 0 "u5bit_cint_operand" "n") - (match_operand:SI 1 "gpc_reg_operand" "r") - (match_operand 2 "s5bit_cint_operand" "n")] - UNSPECV_HTM_TABORTWCI))] - "TARGET_HTM" - "tabortwci. %0,%1,%2" - [(set_attr "type" "htm") - (set_attr "length" "4")]) - -(define_expand "tbegin" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TBEGIN)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tbegin_internal" +(define_insn "tbegin" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] UNSPECV_HTM_TBEGIN))] @@ -231,48 +86,16 @@ [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tcheck" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand 1 "u3bit_cint_operand" "n")] - UNSPECV_HTM_TCHECK)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tcheck_internal" - [(set (match_operand:CC 1 "cc_reg_operand" "=x") - (unspec_volatile:CC [(match_operand 0 "u3bit_cint_operand" "n")] +(define_insn "tcheck" + [(set (match_operand:CC 0 "cc_reg_operand" "=y") + (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TCHECK))] "TARGET_HTM" "tcheck %0" [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tend" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TEND)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tend_internal" +(define_insn "tend" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] UNSPECV_HTM_TEND))] @@ -281,23 +104,7 @@ [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "trechkpt" - [(set (match_dup 1) - (unspec_volatile:CC [(const_int 0)] - UNSPECV_HTM_TRECHKPT)) - (set (match_dup 2) - (eq:SI (match_dup 1) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 2) - (const_int 1)))] - "TARGET_HTM" -{ - operands[1] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[2] = gen_reg_rtx (SImode); -}) - -(define_insn "*trechkpt_internal" +(define_insn "trechkpt" [(set (match_operand:CC 0 "cc_reg_operand" "=x") (unspec_volatile:CC [(const_int 0)] UNSPECV_HTM_TRECHKPT))] @@ -306,23 +113,7 @@ [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "treclaim" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand:SI 1 "gpc_reg_operand" "r")] - UNSPECV_HTM_TRECLAIM)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*treclaim_internal" +(define_insn "treclaim" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand:SI 0 "gpc_reg_operand" "r")] UNSPECV_HTM_TRECLAIM))] @@ -331,23 +122,7 @@ [(set_attr "type" "htm") (set_attr "length" "4")]) -(define_expand "tsr" - [(set (match_dup 2) - (unspec_volatile:CC [(match_operand 1 "const_0_to_1_operand" "n")] - UNSPECV_HTM_TSR)) - (set (match_dup 3) - (eq:SI (match_dup 2) - (const_int 0))) - (set (match_operand:SI 0 "int_reg_operand" "") - (xor:SI (match_dup 3) - (const_int 1)))] - "TARGET_HTM" -{ - operands[2] = gen_rtx_REG (CCmode, CR0_REGNO); - operands[3] = gen_reg_rtx (SImode); -}) - -(define_insn "*tsr_internal" +(define_insn "tsr" [(set (match_operand:CC 1 "cc_reg_operand" "=x") (unspec_volatile:CC [(match_operand 0 "const_0_to_1_operand" "n")] UNSPECV_HTM_TSR))] @@ -356,21 +131,30 @@ [(set_attr "type" "htm") (set_attr "length" "4")]) +(define_insn "ttest" + [(set (match_operand:CC 0 "cc_reg_operand" "=x") + (unspec_volatile:CC [(const_int 0)] + UNSPECV_HTM_TTEST))] + "TARGET_HTM" + "tabortwci. 0,1,0" + [(set_attr "type" "htm") + (set_attr "length" "4")]) + (define_insn "htm_mfspr_<mode>" - [(set (match_operand:P 0 "gpc_reg_operand" "=r") - (unspec_volatile:P [(match_operand 1 "u10bit_cint_operand" "n") - (match_operand:P 2 "htm_spr_reg_operand" "")] - UNSPECV_HTM_MFSPR))] + [(set (match_operand:GPR 0 "gpc_reg_operand" "=r") + (unspec_volatile:GPR [(match_operand 1 "u10bit_cint_operand" "n") + (match_operand:GPR 2 "htm_spr_reg_operand" "")] + UNSPECV_HTM_MFSPR))] "TARGET_HTM" "mfspr %0,%1"; [(set_attr "type" "htm") (set_attr "length" "4")]) (define_insn "htm_mtspr_<mode>" - [(set (match_operand:P 2 "htm_spr_reg_operand" "") - (unspec_volatile:P [(match_operand:P 0 "gpc_reg_operand" "r") - (match_operand 1 "u10bit_cint_operand" "n")] - UNSPECV_HTM_MTSPR))] + [(set (match_operand:GPR 2 "htm_spr_reg_operand" "") + (unspec_volatile:GPR [(match_operand:GPR 0 "gpc_reg_operand" "r") + (match_operand 1 "u10bit_cint_operand" "n")] + UNSPECV_HTM_MTSPR))] "TARGET_HTM" "mtspr %1,%0"; [(set_attr "type" "htm") diff --git a/gcc/config/rs6000/htmxlintrin.h b/gcc/config/rs6000/htmxlintrin.h index bf7c4172b4..a10771cd4f 100644 --- a/gcc/config/rs6000/htmxlintrin.h +++ b/gcc/config/rs6000/htmxlintrin.h @@ -81,7 +81,8 @@ extern __inline long __attribute__ ((__gnu_inline__, __always_inline__, __artificial__)) __TM_end (void) { - if (__builtin_expect (__builtin_tend (0), 1)) + unsigned char status = _HTM_STATE (__builtin_tend (0)); + if (__builtin_expect (status, _HTM_TRANSACTIONAL)) return 1; return 0; } diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 3c8dfe6032..887263c5d0 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -41,7 +41,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) > LAST_VIRTUAL_REGISTER) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return ALTIVEC_REGNO_P (REGNO (op)); @@ -57,7 +57,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) > LAST_VIRTUAL_REGISTER) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return VSX_REGNO_P (REGNO (op)); @@ -74,7 +74,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) > LAST_VIRTUAL_REGISTER) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return VFLOAT_REGNO_P (REGNO (op)); @@ -91,7 +91,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) > LAST_VIRTUAL_REGISTER) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return VINT_REGNO_P (REGNO (op)); @@ -108,7 +108,7 @@ if (!REG_P (op)) return 0; - if (REGNO (op) > LAST_VIRTUAL_REGISTER) + if (REGNO (op) >= FIRST_PSEUDO_REGISTER) return 1; return VLOGICAL_REGNO_P (REGNO (op)); diff --git a/gcc/config/rs6000/rs6000-builtin.def b/gcc/config/rs6000/rs6000-builtin.def index b3354977ea..7b79efcedb 100644 --- a/gcc/config/rs6000/rs6000-builtin.def +++ b/gcc/config/rs6000/rs6000-builtin.def @@ -392,6 +392,14 @@ | RS6000_BTC_BINARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_CRYPTO_2A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_BINARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_CRYPTO_3(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_crypto_" NAME, /* NAME */ \ @@ -400,6 +408,14 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ +#define BU_CRYPTO_3A(ENUM, NAME, ATTR, ICODE) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_ ## ATTR /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_ ## ICODE) /* ICODE */ + #define BU_CRYPTO_OVERLOAD_1(ENUM, NAME) \ RS6000_BUILTIN_1 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_crypto_" NAME, /* NAME */ \ @@ -408,10 +424,10 @@ | RS6000_BTC_UNARY), \ CODE_FOR_nothing) /* ICODE */ -#define BU_CRYPTO_OVERLOAD_2(ENUM, NAME) \ +#define BU_CRYPTO_OVERLOAD_2A(ENUM, NAME) \ RS6000_BUILTIN_2 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_crypto_" NAME, /* NAME */ \ - RS6000_BTM_CRYPTO, /* MASK */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ (RS6000_BTC_OVERLOADED /* ATTR */ \ | RS6000_BTC_BINARY), \ CODE_FOR_nothing) /* ICODE */ @@ -424,6 +440,14 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_nothing) /* ICODE */ +#define BU_CRYPTO_OVERLOAD_3A(ENUM, NAME) \ + RS6000_BUILTIN_3 (CRYPTO_BUILTIN_ ## ENUM, /* ENUM */ \ + "__builtin_crypto_" NAME, /* NAME */ \ + RS6000_BTM_P8_VECTOR, /* MASK */ \ + (RS6000_BTC_OVERLOADED /* ATTR */ \ + | RS6000_BTC_TERNARY), \ + CODE_FOR_nothing) /* ICODE */ + /* HTM convenience macros. */ #define BU_HTM_0(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ @@ -456,21 +480,12 @@ | RS6000_BTC_TERNARY), \ CODE_FOR_ ## ICODE) /* ICODE */ -#define BU_HTM_SPR0(ENUM, NAME, ATTR, ICODE) \ - RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ - "__builtin_" NAME, /* NAME */ \ - RS6000_BTM_HTM, /* MASK */ \ - (RS6000_BTC_ ## ATTR /* ATTR */ \ - | RS6000_BTC_SPR), \ - CODE_FOR_ ## ICODE) /* ICODE */ - -#define BU_HTM_SPR1(ENUM, NAME, ATTR, ICODE) \ +#define BU_HTM_V1(ENUM, NAME, ATTR, ICODE) \ RS6000_BUILTIN_H (HTM_BUILTIN_ ## ENUM, /* ENUM */ \ "__builtin_" NAME, /* NAME */ \ RS6000_BTM_HTM, /* MASK */ \ (RS6000_BTC_ ## ATTR /* ATTR */ \ | RS6000_BTC_UNARY \ - | RS6000_BTC_SPR \ | RS6000_BTC_VOID), \ CODE_FOR_ ## ICODE) /* ICODE */ @@ -1611,52 +1626,52 @@ BU_CRYPTO_2 (VCIPHER, "vcipher", CONST, crypto_vcipher) BU_CRYPTO_2 (VCIPHERLAST, "vcipherlast", CONST, crypto_vcipherlast) BU_CRYPTO_2 (VNCIPHER, "vncipher", CONST, crypto_vncipher) BU_CRYPTO_2 (VNCIPHERLAST, "vncipherlast", CONST, crypto_vncipherlast) -BU_CRYPTO_2 (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) -BU_CRYPTO_2 (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) -BU_CRYPTO_2 (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) -BU_CRYPTO_2 (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) +BU_CRYPTO_2A (VPMSUMB, "vpmsumb", CONST, crypto_vpmsumb) +BU_CRYPTO_2A (VPMSUMH, "vpmsumh", CONST, crypto_vpmsumh) +BU_CRYPTO_2A (VPMSUMW, "vpmsumw", CONST, crypto_vpmsumw) +BU_CRYPTO_2A (VPMSUMD, "vpmsumd", CONST, crypto_vpmsumd) /* 3 argument crypto functions. */ -BU_CRYPTO_3 (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) -BU_CRYPTO_3 (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) -BU_CRYPTO_3 (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) -BU_CRYPTO_3 (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) +BU_CRYPTO_3A (VPERMXOR_V2DI, "vpermxor_v2di", CONST, crypto_vpermxor_v2di) +BU_CRYPTO_3A (VPERMXOR_V4SI, "vpermxor_v4si", CONST, crypto_vpermxor_v4si) +BU_CRYPTO_3A (VPERMXOR_V8HI, "vpermxor_v8hi", CONST, crypto_vpermxor_v8hi) +BU_CRYPTO_3A (VPERMXOR_V16QI, "vpermxor_v16qi", CONST, crypto_vpermxor_v16qi) BU_CRYPTO_3 (VSHASIGMAW, "vshasigmaw", CONST, crypto_vshasigmaw) BU_CRYPTO_3 (VSHASIGMAD, "vshasigmad", CONST, crypto_vshasigmad) /* 2 argument crypto overloaded functions. */ -BU_CRYPTO_OVERLOAD_2 (VPMSUM, "vpmsum") +BU_CRYPTO_OVERLOAD_2A (VPMSUM, "vpmsum") /* 3 argument crypto overloaded functions. */ -BU_CRYPTO_OVERLOAD_3 (VPERMXOR, "vpermxor") +BU_CRYPTO_OVERLOAD_3A (VPERMXOR, "vpermxor") BU_CRYPTO_OVERLOAD_3 (VSHASIGMA, "vshasigma") /* HTM functions. */ -BU_HTM_1 (TABORT, "tabort", MISC, tabort) -BU_HTM_3 (TABORTDC, "tabortdc", MISC, tabortdc) -BU_HTM_3 (TABORTDCI, "tabortdci", MISC, tabortdci) -BU_HTM_3 (TABORTWC, "tabortwc", MISC, tabortwc) -BU_HTM_3 (TABORTWCI, "tabortwci", MISC, tabortwci) -BU_HTM_1 (TBEGIN, "tbegin", MISC, tbegin) -BU_HTM_1 (TCHECK, "tcheck", MISC, tcheck) -BU_HTM_1 (TEND, "tend", MISC, tend) -BU_HTM_0 (TENDALL, "tendall", MISC, tend) -BU_HTM_0 (TRECHKPT, "trechkpt", MISC, trechkpt) -BU_HTM_1 (TRECLAIM, "treclaim", MISC, treclaim) -BU_HTM_0 (TRESUME, "tresume", MISC, tsr) -BU_HTM_0 (TSUSPEND, "tsuspend", MISC, tsr) -BU_HTM_1 (TSR, "tsr", MISC, tsr) -BU_HTM_0 (TTEST, "ttest", MISC, ttest) - -BU_HTM_SPR0 (GET_TFHAR, "get_tfhar", MISC, nothing) -BU_HTM_SPR1 (SET_TFHAR, "set_tfhar", MISC, nothing) -BU_HTM_SPR0 (GET_TFIAR, "get_tfiar", MISC, nothing) -BU_HTM_SPR1 (SET_TFIAR, "set_tfiar", MISC, nothing) -BU_HTM_SPR0 (GET_TEXASR, "get_texasr", MISC, nothing) -BU_HTM_SPR1 (SET_TEXASR, "set_texasr", MISC, nothing) -BU_HTM_SPR0 (GET_TEXASRU, "get_texasru", MISC, nothing) -BU_HTM_SPR1 (SET_TEXASRU, "set_texasru", MISC, nothing) +BU_HTM_1 (TABORT, "tabort", CR, tabort) +BU_HTM_3 (TABORTDC, "tabortdc", CR, tabortdc) +BU_HTM_3 (TABORTDCI, "tabortdci", CR, tabortdci) +BU_HTM_3 (TABORTWC, "tabortwc", CR, tabortwc) +BU_HTM_3 (TABORTWCI, "tabortwci", CR, tabortwci) +BU_HTM_1 (TBEGIN, "tbegin", CR, tbegin) +BU_HTM_0 (TCHECK, "tcheck", CR, tcheck) +BU_HTM_1 (TEND, "tend", CR, tend) +BU_HTM_0 (TENDALL, "tendall", CR, tend) +BU_HTM_0 (TRECHKPT, "trechkpt", CR, trechkpt) +BU_HTM_1 (TRECLAIM, "treclaim", CR, treclaim) +BU_HTM_0 (TRESUME, "tresume", CR, tsr) +BU_HTM_0 (TSUSPEND, "tsuspend", CR, tsr) +BU_HTM_1 (TSR, "tsr", CR, tsr) +BU_HTM_0 (TTEST, "ttest", CR, ttest) + +BU_HTM_0 (GET_TFHAR, "get_tfhar", SPR, nothing) +BU_HTM_V1 (SET_TFHAR, "set_tfhar", SPR, nothing) +BU_HTM_0 (GET_TFIAR, "get_tfiar", SPR, nothing) +BU_HTM_V1 (SET_TFIAR, "set_tfiar", SPR, nothing) +BU_HTM_0 (GET_TEXASR, "get_texasr", SPR, nothing) +BU_HTM_V1 (SET_TEXASR, "set_texasr", SPR, nothing) +BU_HTM_0 (GET_TEXASRU, "get_texasru", SPR, nothing) +BU_HTM_V1 (SET_TEXASRU, "set_texasru", SPR, nothing) /* 3 argument paired floating point builtins. */ diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 58ea3d106a..97c5842f49 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -4251,6 +4251,22 @@ rs6000_option_override_internal (bool global_init_p) } } + /* Determine when unaligned vector accesses are permitted, and when + they are preferred over masked Altivec loads. Note that if + TARGET_ALLOW_MOVMISALIGN has been disabled by the user, then + TARGET_EFFICIENT_UNALIGNED_VSX must be as well. The converse is + not true. */ + if (TARGET_EFFICIENT_UNALIGNED_VSX == -1) { + if (TARGET_VSX && rs6000_cpu == PROCESSOR_POWER8 + && TARGET_ALLOW_MOVMISALIGN != 0) + TARGET_EFFICIENT_UNALIGNED_VSX = 1; + else + TARGET_EFFICIENT_UNALIGNED_VSX = 0; + } + + if (TARGET_ALLOW_MOVMISALIGN == -1 && rs6000_cpu == PROCESSOR_POWER8) + TARGET_ALLOW_MOVMISALIGN = 1; + /* Set the builtin mask of the various options used that could affect which builtins were used. In the past we used target_flags, but we've run out of bits, and some options like SPE and PAIRED are no longer in @@ -4299,7 +4315,9 @@ rs6000_option_override (void) static tree rs6000_builtin_mask_for_load (void) { - if (TARGET_ALTIVEC || TARGET_VSX) + /* Don't use lvsl/vperm for P8 and similarly efficient machines. */ + if ((TARGET_ALTIVEC && !TARGET_VSX) + || (TARGET_VSX && !TARGET_EFFICIENT_UNALIGNED_VSX)) return altivec_builtin_mask_for_load; else return 0; @@ -4378,6 +4396,9 @@ rs6000_builtin_support_vector_misalignment (machine_mode mode, { if (TARGET_VSX) { + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return true; + /* Return if movmisalign pattern is not supported for this mode. */ if (optab_handler (movmisalign_optab, mode) == CODE_FOR_nothing) return false; @@ -4441,6 +4462,9 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return 3; case unaligned_load: + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return 1; + if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) { elements = TYPE_VECTOR_SUBPARTS (vectype); @@ -4476,6 +4500,9 @@ rs6000_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost, return 2; case unaligned_store: + if (TARGET_EFFICIENT_UNALIGNED_VSX) + return 1; + if (TARGET_VSX && TARGET_ALLOW_MOVMISALIGN) { elements = TYPE_VECTOR_SUBPARTS (vectype); @@ -8371,6 +8398,11 @@ rs6000_emit_le_vsx_store (rtx dest, rtx source, machine_mode mode) { rtx tmp, permute_src, permute_tmp; + /* This should never be called during or after reload, because it does + not re-permute the source register. It is intended only for use + during expand. */ + gcc_assert (!reload_in_progress && !lra_in_progress && !reload_completed); + /* Use V2DImode to do swaps of types with 128-bit scalare parts (TImode, V1TImode). */ if (mode == TImode || mode == V1TImode) @@ -12655,9 +12687,9 @@ static inline enum insn_code rs6000_htm_spr_icode (bool nonvoid) { if (nonvoid) - return (TARGET_64BIT) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; + return (TARGET_POWERPC64) ? CODE_FOR_htm_mfspr_di : CODE_FOR_htm_mfspr_si; else - return (TARGET_64BIT) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; + return (TARGET_POWERPC64) ? CODE_FOR_htm_mtspr_di : CODE_FOR_htm_mtspr_si; } /* Expand the HTM builtin in EXP and store the result in TARGET. @@ -12671,7 +12703,17 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) const struct builtin_description *d; size_t i; - *expandedp = false; + *expandedp = true; + + if (!TARGET_POWERPC64 + && (fcode == HTM_BUILTIN_TABORTDC + || fcode == HTM_BUILTIN_TABORTDCI)) + { + size_t uns_fcode = (size_t)fcode; + const char *name = rs6000_builtin_info[uns_fcode].name; + error ("builtin %s is only valid in 64-bit mode", name); + return const0_rtx; + } /* Expand the HTM builtins. */ d = bdesc_htm; @@ -12684,26 +12726,29 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) call_expr_arg_iterator iter; unsigned attr = rs6000_builtin_info[fcode].attr; enum insn_code icode = d->icode; + const struct insn_operand_data *insn_op; + bool uses_spr = (attr & RS6000_BTC_SPR); + rtx cr = NULL_RTX; - if (attr & RS6000_BTC_SPR) + if (uses_spr) icode = rs6000_htm_spr_icode (nonvoid); + insn_op = &insn_data[icode].operand[0]; if (nonvoid) { - machine_mode tmode = insn_data[icode].operand[0].mode; + machine_mode tmode = (uses_spr) ? insn_op->mode : SImode; if (!target || GET_MODE (target) != tmode - || !(*insn_data[icode].operand[0].predicate) (target, tmode)) + || (uses_spr && !(*insn_op->predicate) (target, tmode))) target = gen_reg_rtx (tmode); - op[nopnds++] = target; + if (uses_spr) + op[nopnds++] = target; } FOR_EACH_CALL_EXPR_ARG (arg, iter, exp) { - const struct insn_operand_data *insn_op; - if (arg == error_mark_node || nopnds >= MAX_HTM_OPERANDS) - return NULL_RTX; + return const0_rtx; insn_op = &insn_data[icode].operand[nopnds]; @@ -12750,10 +12795,17 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) /* If this builtin accesses SPRs, then pass in the appropriate SPR number and SPR regno as the last two operands. */ - if (attr & RS6000_BTC_SPR) + if (uses_spr) { - op[nopnds++] = gen_rtx_CONST_INT (Pmode, htm_spr_num (fcode)); - op[nopnds++] = gen_rtx_REG (Pmode, htm_spr_regno (fcode)); + machine_mode mode = (TARGET_POWERPC64) ? DImode : SImode; + op[nopnds++] = gen_rtx_CONST_INT (mode, htm_spr_num (fcode)); + op[nopnds++] = gen_rtx_REG (mode, htm_spr_regno (fcode)); + } + /* If this builtin accesses a CR, then pass in a scratch + CR as the last operand. */ + else if (attr & RS6000_BTC_CR) + { cr = gen_reg_rtx (CCmode); + op[nopnds++] = cr; } #ifdef ENABLE_CHECKING @@ -12766,7 +12818,7 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) expected_nopnds = 3; if (!(attr & RS6000_BTC_VOID)) expected_nopnds += 1; - if (attr & RS6000_BTC_SPR) + if (uses_spr) expected_nopnds += 2; gcc_assert (nopnds == expected_nopnds && nopnds <= MAX_HTM_OPERANDS); @@ -12793,12 +12845,41 @@ htm_expand_builtin (tree exp, rtx target, bool * expandedp) return NULL_RTX; emit_insn (pat); - *expandedp = true; + if (attr & RS6000_BTC_CR) + { + if (fcode == HTM_BUILTIN_TBEGIN) + { + /* Emit code to set TARGET to true or false depending on + whether the tbegin. instruction successfully or failed + to start a transaction. We do this by placing the 1's + complement of CR's EQ bit into TARGET. */ + rtx scratch = gen_reg_rtx (SImode); + emit_insn (gen_rtx_SET (VOIDmode, scratch, + gen_rtx_EQ (SImode, cr, + const0_rtx))); + emit_insn (gen_rtx_SET (VOIDmode, target, + gen_rtx_XOR (SImode, scratch, + GEN_INT (1)))); + } + else + { + /* Emit code to copy the 4-bit condition register field + CR into the least significant end of register TARGET. */ + rtx scratch1 = gen_reg_rtx (SImode); + rtx scratch2 = gen_reg_rtx (SImode); + rtx subreg = simplify_gen_subreg (CCmode, scratch1, SImode, 0); + emit_insn (gen_movcc (subreg, cr)); + emit_insn (gen_lshrsi3 (scratch2, scratch1, GEN_INT (28))); + emit_insn (gen_andsi3 (target, scratch2, GEN_INT (0xf))); + } + } + if (nonvoid) return target; return const0_rtx; } + *expandedp = false; return NULL_RTX; } @@ -15287,8 +15368,31 @@ htm_init_builtins (void) bool void_func = (attr & RS6000_BTC_VOID); int attr_args = (attr & RS6000_BTC_TYPE_MASK); int nopnds = 0; - tree argtype = (attr & RS6000_BTC_SPR) ? long_unsigned_type_node - : unsigned_type_node; + tree gpr_type_node; + tree rettype; + tree argtype; + + if (TARGET_32BIT && TARGET_POWERPC64) + gpr_type_node = long_long_unsigned_type_node; + else + gpr_type_node = long_unsigned_type_node; + + if (attr & RS6000_BTC_SPR) + { + rettype = gpr_type_node; + argtype = gpr_type_node; + } + else if (d->code == HTM_BUILTIN_TABORTDC + || d->code == HTM_BUILTIN_TABORTDCI) + { + rettype = unsigned_type_node; + argtype = gpr_type_node; + } + else + { + rettype = unsigned_type_node; + argtype = unsigned_type_node; + } if ((mask & builtin_mask) != mask) { @@ -15305,7 +15409,7 @@ htm_init_builtins (void) continue; } - op[nopnds++] = (void_func) ? void_type_node : argtype; + op[nopnds++] = (void_func) ? void_type_node : rettype; if (attr_args == RS6000_BTC_UNARY) op[nopnds++] = argtype; @@ -22768,7 +22872,7 @@ output_probe_stack_range (rtx reg1, rtx reg2) static rtx rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val, - rtx reg2, rtx rreg, rtx split_reg) + rtx reg2, rtx rreg) { rtx real, temp; @@ -22859,11 +22963,6 @@ rs6000_frame_related (rtx insn, rtx reg, HOST_WIDE_INT val, } } - /* If a store insn has been split into multiple insns, the - true source register is given by split_reg. */ - if (split_reg != NULL_RTX) - real = gen_rtx_SET (VOIDmode, SET_DEST (real), split_reg); - RTX_FRAME_RELATED_P (insn) = 1; add_reg_note (insn, REG_FRAME_RELATED_EXPR, real); @@ -22971,7 +23070,7 @@ emit_frame_save (rtx frame_reg, machine_mode mode, reg = gen_rtx_REG (mode, regno); insn = emit_insn (gen_frame_store (reg, frame_reg, offset)); return rs6000_frame_related (insn, frame_reg, frame_reg_to_sp, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); } /* Emit an offset memory reference suitable for a frame store, while @@ -23551,7 +23650,7 @@ rs6000_emit_prologue (void) insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, - treg, GEN_INT (-info->total_size), NULL_RTX); + treg, GEN_INT (-info->total_size)); sp_off = frame_off = info->total_size; } @@ -23636,7 +23735,7 @@ rs6000_emit_prologue (void) insn = emit_move_insn (mem, reg); rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); END_USE (0); } } @@ -23692,7 +23791,7 @@ rs6000_emit_prologue (void) info->lr_save_offset, DFmode, sel); rs6000_frame_related (insn, ptr_reg, sp_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); if (lr) END_USE (0); } @@ -23771,7 +23870,7 @@ rs6000_emit_prologue (void) SAVRES_SAVE | SAVRES_GPR); rs6000_frame_related (insn, spe_save_area_ptr, sp_off - save_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); } /* Move the static chain pointer back. */ @@ -23821,7 +23920,7 @@ rs6000_emit_prologue (void) info->lr_save_offset + ptr_off, reg_mode, sel); rs6000_frame_related (insn, ptr_reg, sp_off - ptr_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); if (lr) END_USE (0); } @@ -23837,7 +23936,7 @@ rs6000_emit_prologue (void) info->gp_save_offset + frame_off + reg_size * i); insn = emit_insn (gen_rtx_PARALLEL (VOIDmode, p)); rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); } else if (!WORLD_SAVE_P (info)) { @@ -24160,7 +24259,7 @@ rs6000_emit_prologue (void) info->altivec_save_offset + ptr_off, 0, V4SImode, SAVRES_SAVE | SAVRES_VR); rs6000_frame_related (insn, scratch_reg, sp_off - ptr_off, - NULL_RTX, NULL_RTX, NULL_RTX); + NULL_RTX, NULL_RTX); if (REGNO (frame_reg_rtx) == REGNO (scratch_reg)) { /* The oddity mentioned above clobbered our frame reg. */ @@ -24176,7 +24275,7 @@ rs6000_emit_prologue (void) for (i = info->first_altivec_reg_save; i <= LAST_ALTIVEC_REGNO; ++i) if (info->vrsave_mask & ALTIVEC_REG_BIT (i)) { - rtx areg, savereg, mem, split_reg; + rtx areg, savereg, mem; int offset; offset = (info->altivec_save_offset + frame_off @@ -24192,20 +24291,13 @@ rs6000_emit_prologue (void) mem = gen_frame_mem (V4SImode, gen_rtx_PLUS (Pmode, frame_reg_rtx, areg)); - insn = emit_move_insn (mem, savereg); - - /* When we split a VSX store into two insns, we need to make - sure the DWARF info knows which register we are storing. - Pass it in to be used on the appropriate note. */ - if (!BYTES_BIG_ENDIAN - && GET_CODE (PATTERN (insn)) == SET - && GET_CODE (SET_SRC (PATTERN (insn))) == VEC_SELECT) - split_reg = savereg; - else - split_reg = NULL_RTX; + /* Rather than emitting a generic move, force use of the stvx + instruction, which we always want. In particular we don't + want xxpermdi/stxvd2x for little endian. */ + insn = emit_insn (gen_altivec_stvx_v4si_internal (mem, savereg)); rs6000_frame_related (insn, frame_reg_rtx, sp_off - frame_off, - areg, GEN_INT (offset), split_reg); + areg, GEN_INT (offset)); } } @@ -24847,7 +24939,10 @@ rs6000_emit_epilogue (int sibcall) mem = gen_frame_mem (V4SImode, addr); reg = gen_rtx_REG (V4SImode, i); - emit_move_insn (reg, mem); + /* Rather than emitting a generic move, force use of the + lvx instruction, which we always want. In particular + we don't want lxvd2x/xxpermdi for little endian. */ + (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem)); } } @@ -25050,7 +25145,10 @@ rs6000_emit_epilogue (int sibcall) mem = gen_frame_mem (V4SImode, addr); reg = gen_rtx_REG (V4SImode, i); - emit_move_insn (reg, mem); + /* Rather than emitting a generic move, force use of the + lvx instruction, which we always want. In particular + we don't want lxvd2x/xxpermdi for little endian. */ + (void) emit_insn (gen_altivec_lvx_v4si_internal (reg, mem)); } } @@ -30564,7 +30662,7 @@ rs6000_rtx_costs (rtx x, int code, int outer_code, int opno ATTRIBUTE_UNUSED, *total = COSTS_N_INSNS (2); return true; } - else if (mode == Pmode) + else { *total = COSTS_N_INSNS (3); return false; @@ -32194,10 +32292,11 @@ static struct rs6000_opt_mask const rs6000_opt_masks[] = { "quad-memory", OPTION_MASK_QUAD_MEMORY, false, true }, { "quad-memory-atomic", OPTION_MASK_QUAD_MEMORY_ATOMIC, false, true }, { "recip-precision", OPTION_MASK_RECIP_PRECISION, false, true }, + { "save-toc-indirect", OPTION_MASK_SAVE_TOC_INDIRECT, false, true }, { "string", OPTION_MASK_STRING, false, true }, { "update", OPTION_MASK_NO_UPDATE, true , true }, - { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, false }, - { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, false }, + { "upper-regs-df", OPTION_MASK_UPPER_REGS_DF, false, true }, + { "upper-regs-sf", OPTION_MASK_UPPER_REGS_SF, false, true }, { "vsx", OPTION_MASK_VSX, false, true }, { "vsx-timode", OPTION_MASK_VSX_TIMODE, false, true }, #ifdef OPTION_MASK_64BIT @@ -32270,6 +32369,42 @@ static struct rs6000_opt_var const rs6000_opt_vars[] = { "longcall", offsetof (struct gcc_options, x_rs6000_default_long_calls), offsetof (struct cl_target_option, x_rs6000_default_long_calls), }, + { "optimize-swaps", + offsetof (struct gcc_options, x_rs6000_optimize_swaps), + offsetof (struct cl_target_option, x_rs6000_optimize_swaps), }, + { "allow-movmisalign", + offsetof (struct gcc_options, x_TARGET_ALLOW_MOVMISALIGN), + offsetof (struct cl_target_option, x_TARGET_ALLOW_MOVMISALIGN), }, + { "allow-df-permute", + offsetof (struct gcc_options, x_TARGET_ALLOW_DF_PERMUTE), + offsetof (struct cl_target_option, x_TARGET_ALLOW_DF_PERMUTE), }, + { "sched-groups", + offsetof (struct gcc_options, x_TARGET_SCHED_GROUPS), + offsetof (struct cl_target_option, x_TARGET_SCHED_GROUPS), }, + { "always-hint", + offsetof (struct gcc_options, x_TARGET_ALWAYS_HINT), + offsetof (struct cl_target_option, x_TARGET_ALWAYS_HINT), }, + { "align-branch-targets", + offsetof (struct gcc_options, x_TARGET_ALIGN_BRANCH_TARGETS), + offsetof (struct cl_target_option, x_TARGET_ALIGN_BRANCH_TARGETS), }, + { "vectorize-builtins", + offsetof (struct gcc_options, x_TARGET_VECTORIZE_BUILTINS), + offsetof (struct cl_target_option, x_TARGET_VECTORIZE_BUILTINS), }, + { "tls-markers", + offsetof (struct gcc_options, x_tls_markers), + offsetof (struct cl_target_option, x_tls_markers), }, + { "sched-prolog", + offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), + offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, + { "sched-epilog", + offsetof (struct gcc_options, x_TARGET_SCHED_PROLOG), + offsetof (struct cl_target_option, x_TARGET_SCHED_PROLOG), }, + { "gen-cell-microcode", + offsetof (struct gcc_options, x_rs6000_gen_cell_microcode), + offsetof (struct cl_target_option, x_rs6000_gen_cell_microcode), }, + { "warn-cell-microcode", + offsetof (struct gcc_options, x_rs6000_warn_cell_microcode), + offsetof (struct cl_target_option, x_rs6000_warn_cell_microcode), }, }; /* Inner function to handle attribute((target("..."))) and #pragma GCC target @@ -32343,9 +32478,15 @@ rs6000_inner_target_options (tree args, bool attr_p) rs6000_isa_flags_explicit |= mask; /* VSX needs altivec, so -mvsx automagically sets - altivec. */ - if (mask == OPTION_MASK_VSX && !invert) - mask |= OPTION_MASK_ALTIVEC; + altivec and disables -mavoid-indexed-addresses. */ + if (!invert) + { + if (mask == OPTION_MASK_VSX) + { + mask |= OPTION_MASK_ALTIVEC; + TARGET_AVOID_XFORM = 0; + } + } if (rs6000_opt_masks[i].invert) invert = !invert; @@ -32366,6 +32507,7 @@ rs6000_inner_target_options (tree args, bool attr_p) size_t j = rs6000_opt_vars[i].global_offset; *((int *) ((char *)&global_options + j)) = !invert; error_p = false; + not_valid_p = false; break; } } @@ -34218,7 +34360,8 @@ rtx_is_swappable_p (rtx op, unsigned int *special) order-dependent element, so additional fixup code would be needed to make those work. Vector set and non-immediate-form vector splat are element-order sensitive. A few of these - cases might be workable with special handling if required. */ + cases might be workable with special handling if required. + Adding cost modeling would be appropriate in some cases. */ int val = XINT (op, 1); switch (val) { @@ -34257,12 +34400,6 @@ rtx_is_swappable_p (rtx op, unsigned int *special) case UNSPEC_VUPKLPX: case UNSPEC_VUPKLS_V4SF: case UNSPEC_VUPKLU_V4SF: - /* The following could be handled as an idiom with XXSPLTW. - These place a scalar in BE element zero, but the XXSPLTW - will currently expect it in BE element 2 in a swapped - region. When one of these feeds an XXSPLTW with no other - defs/uses either way, we can avoid the lane change for - XXSPLTW and things will be correct. TBD. */ case UNSPEC_VSX_CVDPSPN: case UNSPEC_VSX_CVSPDP: case UNSPEC_VSX_CVSPDPN: @@ -34353,6 +34490,36 @@ insn_is_swappable_p (swap_web_entry *insn_entry, rtx insn, return 0; } + /* A convert to single precision can be left as is provided that + all of its uses are in xxspltw instructions that splat BE element + zero. */ + if (GET_CODE (body) == SET + && GET_CODE (SET_SRC (body)) == UNSPEC + && XINT (SET_SRC (body), 1) == UNSPEC_VSX_CVDPSPN) + { + df_ref def; + struct df_insn_info *insn_info = DF_INSN_INFO_GET (insn); + + FOR_EACH_INSN_INFO_DEF (def, insn_info) + { + struct df_link *link = DF_REF_CHAIN (def); + if (!link) + return 0; + + for (; link; link = link->next) { + rtx use_insn = DF_REF_INSN (link->ref); + rtx use_body = PATTERN (use_insn); + if (GET_CODE (use_body) != SET + || GET_CODE (SET_SRC (use_body)) != UNSPEC + || XINT (SET_SRC (use_body), 1) != UNSPEC_VSX_XXSPLTW + || XEXP (XEXP (SET_SRC (use_body), 0), 1) != const0_rtx) + return 0; + } + } + + return 1; + } + /* Otherwise check the operands for vector lane violations. */ return rtx_is_swappable_p (body, special); } diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h index ef6bb2f7a5..653c2c94e1 100644 --- a/gcc/config/rs6000/rs6000.h +++ b/gcc/config/rs6000/rs6000.h @@ -891,7 +891,8 @@ enum data_align { align_abi, align_opt, align_both }; || (((MODE) == SFmode || (MODE) == DFmode || (MODE) == TFmode \ || (MODE) == SDmode || (MODE) == DDmode || (MODE) == TDmode) \ && (ALIGN) < 32) \ - || (VECTOR_MODE_P ((MODE)) && (((int)(ALIGN)) < VECTOR_ALIGN (MODE)))) + || (!TARGET_EFFICIENT_UNALIGNED_VSX \ + && (VECTOR_MODE_P ((MODE)) && (((int)(ALIGN)) < VECTOR_ALIGN (MODE))))) /* Standard register usage. */ @@ -2573,9 +2574,8 @@ extern int frame_pointer_needed; /* Miscellaneous information. */ #define RS6000_BTC_SPR 0x01000000 /* function references SPRs. */ #define RS6000_BTC_VOID 0x02000000 /* function has no return value. */ -#define RS6000_BTC_OVERLOADED 0x04000000 /* function is overloaded. */ -#define RS6000_BTC_32BIT 0x08000000 /* function references SPRs. */ -#define RS6000_BTC_64BIT 0x10000000 /* function references SPRs. */ +#define RS6000_BTC_CR 0x04000000 /* function references a CR. */ +#define RS6000_BTC_OVERLOADED 0x08000000 /* function is overloaded. */ #define RS6000_BTC_MISC_MASK 0x1f000000 /* Mask of the misc info. */ /* Convenience macros to document the instruction type. */ diff --git a/gcc/config/rs6000/rs6000.opt b/gcc/config/rs6000/rs6000.opt index e69910f656..79d01d4965 100644 --- a/gcc/config/rs6000/rs6000.opt +++ b/gcc/config/rs6000/rs6000.opt @@ -204,31 +204,35 @@ mvsx-scalar-memory Target Undocumented Report Alias(mupper-regs-df) mvsx-align-128 -Target Undocumented Report Var(TARGET_VSX_ALIGN_128) +Target Undocumented Report Var(TARGET_VSX_ALIGN_128) Save ; If -mvsx, set alignment to 128 bits instead of 32/64 mallow-movmisalign -Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) +Target Undocumented Var(TARGET_ALLOW_MOVMISALIGN) Init(-1) Save ; Allow/disallow the movmisalign in DF/DI vectors +mefficient-unaligned-vector +Target Undocumented Report Var(TARGET_EFFICIENT_UNALIGNED_VSX) Init(-1) Save +; Consider unaligned VSX accesses to be efficient/inefficient + mallow-df-permute -Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE) +Target Undocumented Var(TARGET_ALLOW_DF_PERMUTE) Save ; Allow/disallow permutation of DF/DI vectors msched-groups -Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1) +Target Undocumented Report Var(TARGET_SCHED_GROUPS) Init(-1) Save ; Explicitly set/unset whether rs6000_sched_groups is set malways-hint -Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1) +Target Undocumented Report Var(TARGET_ALWAYS_HINT) Init(-1) Save ; Explicitly set/unset whether rs6000_always_hint is set malign-branch-targets -Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) +Target Undocumented Report Var(TARGET_ALIGN_BRANCH_TARGETS) Init(-1) Save ; Explicitly set/unset whether rs6000_align_branch_targets is set mvectorize-builtins -Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) +Target Undocumented Report Var(TARGET_VECTORIZE_BUILTINS) Init(-1) Save ; Explicitly control whether we vectorize the builtins or not. mno-update @@ -538,7 +542,7 @@ Target Report Var(TARGET_POINTERS_TO_NESTED_FUNCTIONS) Init(1) Save Use/do not use r11 to hold the static link in calls to functions via pointers. msave-toc-indirect -Target Report Var(TARGET_SAVE_TOC_INDIRECT) Save +Target Report Mask(SAVE_TOC_INDIRECT) Var(rs6000_isa_flags) Control whether we save the TOC in the prologue for indirect calls or generate the save inline mvsx-timode @@ -559,7 +563,7 @@ Use/do not use vector and scalar instructions added in ISA 2.07. mcrypto Target Report Mask(CRYPTO) Var(rs6000_isa_flags) -Use ISA 2.07 crypto instructions +Use ISA 2.07 Category:Vector.AES and Category:Vector.SHA2 instructions mdirect-move Target Report Mask(DIRECT_MOVE) Var(rs6000_isa_flags) diff --git a/gcc/config/rs6000/t-rs6000 b/gcc/config/rs6000/t-rs6000 index 796d7d8ab2..1fe5a53ff8 100644 --- a/gcc/config/rs6000/t-rs6000 +++ b/gcc/config/rs6000/t-rs6000 @@ -19,6 +19,7 @@ # <http://www.gnu.org/licenses/>. TM_H += $(srcdir)/config/rs6000/rs6000-builtin.def +TM_H += $(srcdir)/config/rs6000/rs6000-cpus.def rs6000-c.o: $(srcdir)/config/rs6000/rs6000-c.c $(COMPILE) $< diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 2988c72864..d7a235e913 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -1863,7 +1863,7 @@ (define_insn_and_split "vsx_extract_v4sf" [(set (match_operand:SF 0 "vsx_register_operand" "=f,f") (vec_select:SF - (match_operand:V4SF 1 "vsx_register_operand" "<VSa>,<VSa>") + (match_operand:V4SF 1 "vsx_register_operand" "wa,wa") (parallel [(match_operand:QI 2 "u5bit_cint_operand" "O,i")]))) (clobber (match_scratch:V4SF 3 "=X,0"))] "VECTOR_UNIT_VSX_P (V4SFmode)" |