diff options
author | Michael Meissner <meissner@linux.vnet.ibm.com> | 2013-07-31 20:04:07 +0000 |
---|---|---|
committer | Michael Meissner <meissner@gcc.gnu.org> | 2013-07-31 20:04:07 +0000 |
commit | d86e633abc4aeb1be6c443b3dc3593629491a838 (patch) | |
tree | 8fca80180a443821ea64db1f6fcab18e36937b55 | |
parent | b26e3fc277ea869668b129563f9276caace5e5d9 (diff) | |
download | gcc-d86e633abc4aeb1be6c443b3dc3593629491a838.tar.gz |
predicates.md (fusion_gpr_addis): New predicates to support power8 load fusion.
[gcc]
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* config/rs6000/predicates.md (fusion_gpr_addis): New predicates
to support power8 load fusion.
(fusion_gpr_mem_load): Likewise.
* config/rs6000/rs6000-modes.def (PTImode): Update a comment.
* config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New
declarations for power8 load fusion.
(emit_fusion_gpr_load): Likewise.
* config/rs6000/rs6000.c (rs6000_option_override_internal): If
tuning for power8, turn on fusion mode by default. Turn on sign
extending fusion mode if normal fusion mode is on, and we are at
-O2 or -O3.
(fusion_gpr_load_p): New function, return true if we can fuse an
addis instruction with a dependent load to a GPR.
(emit_fusion_gpr_load): Emit the instructions for power8 load
fusion to GPRs.
* config/rs6000/vsx.md (VSX_M2): New iterator for fusion
peepholes.
(VSX load fusion peepholes): New peepholes to fuse together an
addi instruction with a VSX load instruction.
* config/rs6000/rs6000.md (GPR load fusion peepholes): New
peepholes to fuse an addis instruction with a load to a GPR base
register. If we are supporting sign extending fusions, convert
sign extending loads to zero extending loads and add an explicit
sign extension.
[gcc/testsuite]
2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com>
* gcc.target/powerpc/fusion.c: New file, test power8 fusion
support.
From-SVN: r201385
-rw-r--r-- | gcc/ChangeLog | 32 | ||||
-rw-r--r-- | gcc/config/rs6000/predicates.md | 88 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-modes.def | 4 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000-protos.h | 2 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.c | 279 | ||||
-rw-r--r-- | gcc/config/rs6000/rs6000.md | 107 | ||||
-rw-r--r-- | gcc/config/rs6000/vsx.md | 32 | ||||
-rw-r--r-- | gcc/testsuite/ChangeLog | 5 | ||||
-rw-r--r-- | gcc/testsuite/gcc.target/powerpc/fusion.c | 23 |
9 files changed, 571 insertions, 1 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog index e343f2c64a3..f2d25f06ef3 100644 --- a/gcc/ChangeLog +++ b/gcc/ChangeLog @@ -1,3 +1,35 @@ +2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com> + + * config/rs6000/predicates.md (fusion_gpr_addis): New predicates + to support power8 load fusion. + (fusion_gpr_mem_load): Likewise. + + * config/rs6000/rs6000-modes.def (PTImode): Update a comment. + + * config/rs6000/rs6000-protos.h (fusion_gpr_load_p): New + declarations for power8 load fusion. + (emit_fusion_gpr_load): Likewise. + + * config/rs6000/rs6000.c (rs6000_option_override_internal): If + tuning for power8, turn on fusion mode by default. Turn on sign + extending fusion mode if normal fusion mode is on, and we are at + -O2 or -O3. + (fusion_gpr_load_p): New function, return true if we can fuse an + addis instruction with a dependent load to a GPR. + (emit_fusion_gpr_load): Emit the instructions for power8 load + fusion to GPRs. + + * config/rs6000/vsx.md (VSX_M2): New iterator for fusion + peepholes. + (VSX load fusion peepholes): New peepholes to fuse together an + addi instruction with a VSX load instruction. + + * config/rs6000/rs6000.md (GPR load fusion peepholes): New + peepholes to fuse an addis instruction with a load to a GPR base + register. If we are supporting sign extending fusions, convert + sign extending loads to zero extending loads and add an explicit + sign extension. + 2013-07-31 Sofiane Naci <sofiane.naci@arm.com> * config.gcc (arm*-*-*): Add aarch-common.o to extra_objs. Add diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md index 18912f15a4a..09013c3d553 100644 --- a/gcc/config/rs6000/predicates.md +++ b/gcc/config/rs6000/predicates.md @@ -1702,3 +1702,91 @@ return GET_CODE (op) == UNSPEC && XINT (op, 1) == UNSPEC_TOCREL; }) + +;; Match the first insn (addis) in fusing the combination of addis and loads to +;; GPR registers on power8. +(define_predicate "fusion_gpr_addis" + (match_code "const_int,high,plus") +{ + HOST_WIDE_INT value; + rtx int_const; + + if (GET_CODE (op) == HIGH) + return 1; + + if (CONST_INT_P (op)) + int_const = op; + + else if (GET_CODE (op) == PLUS + && base_reg_operand (XEXP (op, 0), Pmode) + && CONST_INT_P (XEXP (op, 1))) + int_const = XEXP (op, 1); + + else + return 0; + + /* Power8 currently will only do the fusion if the top 11 bits of the addis + value are all 1's or 0's. */ + value = INTVAL (int_const); + if ((value & (HOST_WIDE_INT)0xffff) != 0) + return 0; + + if ((value & (HOST_WIDE_INT)0xffff0000) == 0) + return 0; + + return (IN_RANGE (value >> 16, -32, 31)); +}) + +;; Match the second insn (lbz, lhz, lwz, ld) in fusing the combination of addis +;; and loads to GPR registers on power8. +(define_predicate "fusion_gpr_mem_load" + (match_code "mem") +{ + rtx addr; + + if (!MEM_P (op)) + return 0; + + switch (mode) + { + case QImode: + case HImode: + case SImode: + break; + + case DImode: + if (!TARGET_POWERPC64) + return 0; + break; + + default: + return 0; + } + + addr = XEXP (op, 0); + if (GET_CODE (addr) == PLUS) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + return (base_reg_operand (base, GET_MODE (base)) + && satisfies_constraint_I (offset)); + } + + else if (GET_CODE (addr) == LO_SUM) + { + rtx base = XEXP (addr, 0); + rtx offset = XEXP (addr, 1); + + if (!base_reg_operand (base, GET_MODE (base))) + return 0; + + else if (TARGET_XCOFF || (TARGET_ELF && TARGET_POWERPC64)) + return small_toc_ref (offset, GET_MODE (offset)); + + else if (TARGET_ELF && !TARGET_POWERPC64) + return CONSTANT_P (offset); + } + + return 0; +}) diff --git a/gcc/config/rs6000/rs6000-modes.def b/gcc/config/rs6000/rs6000-modes.def index 54548be7038..5124e1665d4 100644 --- a/gcc/config/rs6000/rs6000-modes.def +++ b/gcc/config/rs6000/rs6000-modes.def @@ -42,5 +42,7 @@ VECTOR_MODES (FLOAT, 8); /* V4HF V2SF */ VECTOR_MODES (FLOAT, 16); /* V8HF V4SF V2DF */ VECTOR_MODES (FLOAT, 32); /* V16HF V8SF V4DF */ -/* Replacement for TImode that only is allowed in GPRs. */ +/* Replacement for TImode that only is allowed in GPRs. We also use PTImode + for quad memory atomic operations to force getting an even/odd register + combination. */ PARTIAL_INT_MODE (TI); diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h index 3a7b37a8270..410042bdcc1 100644 --- a/gcc/config/rs6000/rs6000-protos.h +++ b/gcc/config/rs6000/rs6000-protos.h @@ -73,6 +73,8 @@ extern int mems_ok_for_quad_peep (rtx, rtx); extern bool gpr_or_gpr_p (rtx, rtx); extern bool direct_move_p (rtx, rtx); extern bool quad_load_store_p (rtx, rtx); +extern bool fusion_gpr_load_p (rtx, rtx, rtx, rtx, rtx); +extern const char *emit_fusion_gpr_load (rtx, rtx, rtx, rtx); extern enum reg_class (*rs6000_preferred_reload_class_ptr) (rtx, enum reg_class); extern enum reg_class (*rs6000_secondary_reload_class_ptr) (enum reg_class, diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c index 8b939d8e826..10a036c1249 100644 --- a/gcc/config/rs6000/rs6000.c +++ b/gcc/config/rs6000/rs6000.c @@ -3074,6 +3074,21 @@ rs6000_option_override_internal (bool global_init_p) rs6000_isa_flags &= ~OPTION_MASK_QUAD_MEMORY; } + /* Enable power8 fusion if we are tuning for power8, even if we aren't + generating power8 instructions. */ + if (!(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION)) + rs6000_isa_flags |= (processor_target_table[tune_index].target_enable + & OPTION_MASK_P8_FUSION); + + /* Power8 does not fuse sign extended loads with the addis. If we are + optimizing at high levels for speed, convert a sign extended load into a + zero extending load, and an explicit sign extension. */ + if (TARGET_P8_FUSION + && !(rs6000_isa_flags_explicit & OPTION_MASK_P8_FUSION_SIGN) + && optimize_function_for_speed_p (cfun) + && optimize >= 3) + rs6000_isa_flags |= OPTION_MASK_P8_FUSION_SIGN; + if (TARGET_DEBUG_REG || TARGET_DEBUG_TARGET) rs6000_print_isa_options (stderr, 0, "after defaults", rs6000_isa_flags); @@ -30419,6 +30434,270 @@ rs6000_split_logical (rtx operands[3], } +/* Return true if the peephole2 can combine a load involving a combination of + an addis instruction and a load with an offset that can be fused together on + a power8. */ + +bool +fusion_gpr_load_p (rtx addis_reg, /* reg. to hold high value. */ + rtx addis_value, /* high value loaded. */ + rtx target, /* reg. that is loaded. */ + rtx mem, /* memory to load. */ + rtx insn) /* insn for looking up reg notes or + NULL_RTX if this is a peephole2. */ +{ + rtx addr; + rtx base_reg; + + /* Validate arguments. */ + if (!base_reg_operand (addis_reg, GET_MODE (addis_reg))) + return false; + + if (!base_reg_operand (target, GET_MODE (target))) + return false; + + if (!fusion_gpr_addis (addis_value, GET_MODE (addis_value))) + return false; + + if (!fusion_gpr_mem_load (mem, GET_MODE (mem))) + return false; + + /* Validate that the register used to load the high value is either the + register being loaded, or we can safely replace its use in a peephole. + + If this is a peephole2, we assume that there are 2 instructions in the + peephole (addis and load), so we want to check if the target register was + not used and the register to hold the addis result is dead after the + peephole. */ + if (REGNO (addis_reg) != REGNO (target)) + { + if (reg_mentioned_p (target, mem)) + return false; + + if (insn) + { + if (!find_reg_note (insn, REG_DEAD, addis_reg)) + return false; + } + else + { + if (!peep2_reg_dead_p (2, addis_reg)) + return false; + } + } + + /* Validate that the value being loaded in the addis is used in the load. */ + addr = XEXP (mem, 0); /* either PLUS or LO_SUM. */ + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + return false; + + base_reg = XEXP (addr, 0); + return REGNO (addis_reg) == REGNO (base_reg); +} + +/* Return a string to fuse an addis instruction with a gpr load to the same + register that we loaded up the addis instruction. The code is complicated, + so we call output_asm_insn directly, and just return "". */ + +const char * +emit_fusion_gpr_load (rtx addis_reg, rtx addis_value, rtx target, rtx mem) +{ + rtx fuse_ops[10]; + rtx addr; + rtx load_offset; + const char *addis_str = NULL; + const char *load_str = NULL; + const char *mode_name = NULL; + char insn_template[80]; + enum machine_mode mode = GET_MODE (mem); + const char *comment_str = ASM_COMMENT_START; + + if (*comment_str == ' ') + comment_str++; + + if (!MEM_P (mem)) + gcc_unreachable (); + + addr = XEXP (mem, 0); + if (GET_CODE (addr) != PLUS && GET_CODE (addr) != LO_SUM) + gcc_unreachable (); + + load_offset = XEXP (addr, 1); + + /* Now emit the load instruction to the same register. */ + switch (mode) + { + case QImode: + mode_name = "char"; + load_str = "lbz"; + break; + + case HImode: + mode_name = "short"; + load_str = "lhz"; + break; + + case SImode: + mode_name = "int"; + load_str = "lwz"; + break; + + case DImode: + if (TARGET_POWERPC64) + { + mode_name = "long"; + load_str = "ld"; + } + break; + + default: + break; + } + + if (!load_str) + gcc_unreachable (); + + /* Emit the addis instruction. */ + fuse_ops[0] = target; + fuse_ops[1] = addis_reg; + if (satisfies_constraint_L (addis_value)) + { + fuse_ops[2] = addis_value; + addis_str = "lis %0,%v2"; + } + + else if (GET_CODE (addis_value) == PLUS) + { + rtx op0 = XEXP (addis_value, 0); + rtx op1 = XEXP (addis_value, 1); + + if (REG_P (op0) && CONST_INT_P (op1) + && satisfies_constraint_L (op1)) + { + fuse_ops[2] = op0; + fuse_ops[3] = op1; + addis_str = "addis %0,%2,%v3"; + } + } + + else if (GET_CODE (addis_value) == HIGH) + { + rtx value = XEXP (addis_value, 0); + if (GET_CODE (value) == UNSPEC && XINT (value, 1) == UNSPEC_TOCREL) + { + fuse_ops[2] = XVECEXP (value, 0, 0); /* symbol ref. */ + fuse_ops[3] = XVECEXP (value, 0, 1); /* TOC register. */ + if (TARGET_ELF) + addis_str = "addis %0,%3,%2@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%2@u(%3)"; + + else + gcc_unreachable (); + } + + else if (GET_CODE (value) == PLUS) + { + rtx op0 = XEXP (value, 0); + rtx op1 = XEXP (value, 1); + + if (GET_CODE (op0) == UNSPEC + && XINT (op0, 1) == UNSPEC_TOCREL + && CONST_INT_P (op1)) + { + fuse_ops[2] = XVECEXP (op0, 0, 0); /* symbol ref. */ + fuse_ops[3] = XVECEXP (op0, 0, 1); /* TOC register. */ + fuse_ops[4] = op1; + if (TARGET_ELF) + addis_str = "addis %0,%3,%2+%4@toc@ha"; + + else if (TARGET_XCOFF) + addis_str = "addis %0,%2+%4@u(%3)"; + + else + gcc_unreachable (); + } + } + + else if (satisfies_constraint_L (value)) + { + fuse_ops[2] = value; + addis_str = "lis %0,%v2"; + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (value)) + { + fuse_ops[2] = value; + addis_str = "lis %0,%2@ha"; + } + } + + if (!addis_str) + fatal_insn ("Could not generate addis value for fusion", addis_value); + + sprintf (insn_template, "%s\t\t%s gpr load fusion, type %s, addis reg %%1", + addis_str, comment_str, mode_name); + output_asm_insn (insn_template, fuse_ops); + + if (CONST_INT_P (load_offset) && satisfies_constraint_I (load_offset)) + { + sprintf (insn_template, "%s %%0,%%1(%%0)", load_str); + fuse_ops[1] = load_offset; + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (load_offset) == UNSPEC + && XINT (load_offset, 1) == UNSPEC_TOCREL) + { + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%1@toc@l(%%0)", load_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str); + + else + gcc_unreachable (); + + fuse_ops[1] = XVECEXP (load_offset, 0, 0); + output_asm_insn (insn_template, fuse_ops); + } + + else if (GET_CODE (load_offset) == PLUS + && GET_CODE (XEXP (load_offset, 0)) == UNSPEC + && XINT (XEXP (load_offset, 0), 1) == UNSPEC_TOCREL + && CONST_INT_P (XEXP (load_offset, 1))) + { + rtx tocrel_unspec = XEXP (load_offset, 0); + if (TARGET_ELF) + sprintf (insn_template, "%s %%0,%%1+%%2@toc@l(%%0)", load_str); + + else if (TARGET_XCOFF) + sprintf (insn_template, "%s %%0,%%1+%%2@l(%%0)", load_str); + + else + gcc_unreachable (); + + fuse_ops[1] = XVECEXP (tocrel_unspec, 0, 0); + fuse_ops[2] = XEXP (load_offset, 1); + output_asm_insn (insn_template, fuse_ops); + } + + else if (TARGET_ELF && !TARGET_POWERPC64 && CONSTANT_P (load_offset)) + { + sprintf (insn_template, "%s %%0,%%1@l(%%0)", load_str); + + fuse_ops[1] = load_offset; + output_asm_insn (insn_template, fuse_ops); + } + + else + fatal_insn ("Unable to generate load offset for fusion", load_offset); + + return ""; +} + + struct gcc_target targetm = TARGET_INITIALIZER; #include "gt-rs6000.h" diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md index 064a51da608..98c43df3d81 100644 --- a/gcc/config/rs6000/rs6000.md +++ b/gcc/config/rs6000/rs6000.md @@ -15771,6 +15771,113 @@ }) +;; Power8 fusion support for fusing an addis instruction with a D-form load of +;; a GPR. The addis instruction must be adjacent to the load, and use the same +;; register that is being loaded. The fused ops must be physically adjacent. + +;; GPR fusion for single word integer types + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:INT1 2 "base_reg_operand" "") + (match_operand:INT1 3 "fusion_gpr_mem_load" ""))] + "TARGET_P8_FUSION + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + insn)" +{ + return emit_fusion_gpr_load (operands[0], operands[1], operands[2], + operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +(define_peephole + [(set (match_operand:DI 0 "base_reg_operand" "") + (match_operand:DI 1 "fusion_gpr_addis" "")) + (set (match_operand:DI 2 "base_reg_operand" "") + (zero_extend:DI (match_operand:QHSI 3 "fusion_gpr_mem_load" "")))] + "TARGET_P8_FUSION && TARGET_POWERPC64 + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + insn)" +{ + return emit_fusion_gpr_load (operands[0], operands[1], operands[2], + operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +;; Power8 does not fuse a sign extending load, so convert the sign extending +;; load into a zero extending load, and do an explicit sign extension. Don't +;; do this if we are trying to optimize for space. Do this as a peephole2 to +;; allow final rtl optimizations and scheduling to move the sign extend. +(define_peephole2 + [(set (match_operand:DI 0 "base_reg_operand" "") + (match_operand:DI 1 "fusion_gpr_addis" "")) + (set (match_operand:DI 2 "base_reg_operand" "") + (sign_extend:DI (match_operand:HSI 3 "fusion_gpr_mem_load" "")))] + "TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN && TARGET_POWERPC64 + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + NULL_RTX)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 4) (match_dup 3)) + (set (match_dup 2) (sign_extend:DI (match_dup 4)))] +{ + unsigned int offset + = (BYTES_BIG_ENDIAN ? 8 - GET_MODE_SIZE (<MODE>mode) : 0); + + operands[4] = simplify_subreg (<MODE>mode, operands[2], DImode, + offset); +}) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:SI 2 "base_reg_operand" "") + (zero_extend:SI (match_operand:QHI 3 "fusion_gpr_mem_load" "")))] + "TARGET_P8_FUSION + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + insn)" +{ + return emit_fusion_gpr_load (operands[0], operands[1], operands[2], + operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + +(define_peephole2 + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:SI 2 "base_reg_operand" "") + (sign_extend:SI (match_operand:HI 3 "fusion_gpr_mem_load" "")))] + "TARGET_P8_FUSION && TARGET_P8_FUSION_SIGN + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + NULL_RTX)" + [(set (match_dup 0) (match_dup 1)) + (set (match_dup 4) (match_dup 3)) + (set (match_dup 2) (sign_extend:SI (match_dup 4)))] +{ + unsigned int offset = (BYTES_BIG_ENDIAN ? 2 : 0); + + operands[4] = simplify_subreg (HImode, operands[2], SImode, offset); +}) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "fusion_gpr_addis" "")) + (set (match_operand:HI 2 "base_reg_operand" "") + (zero_extend:HI (match_operand:QI 3 "fusion_gpr_mem_load" "")))] + "TARGET_P8_FUSION + && fusion_gpr_load_p (operands[0], operands[1], operands[2], operands[3], + insn)" +{ + return emit_fusion_gpr_load (operands[0], operands[1], operands[2], + operands[3]); +} + [(set_attr "type" "load") + (set_attr "length" "8")]) + + (include "sync.md") (include "vector.md") diff --git a/gcc/config/rs6000/vsx.md b/gcc/config/rs6000/vsx.md index 5e6f397031c..11d6b8bb4d0 100644 --- a/gcc/config/rs6000/vsx.md +++ b/gcc/config/rs6000/vsx.md @@ -40,6 +40,14 @@ ;; it to use gprs as well as vsx registers. (define_mode_iterator VSX_M [V16QI V8HI V4SI V2DI V4SF V2DF]) +(define_mode_iterator VSX_M2 [V16QI + V8HI + V4SI + V2DI + V4SF + V2DF + (TI "TARGET_VSX_TIMODE")]) + ;; Map into the appropriate load/store name based on the type (define_mode_attr VSm [(V16QI "vw4") (V8HI "vw4") @@ -1446,3 +1454,27 @@ }" [(set_attr "length" "20") (set_attr "type" "veccomplex")]) + + +;; Power8 Vector fusion. The fused ops must be physically adjacent. +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_dup 0) + (match_operand:P 3 "int_reg_operand" ""))))] + "TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) + +(define_peephole + [(set (match_operand:P 0 "base_reg_operand" "") + (match_operand:P 1 "short_cint_operand" "")) + (set (match_operand:VSX_M2 2 "vsx_register_operand" "") + (mem:VSX_M2 (plus:P (match_operand:P 3 "int_reg_operand" "") + (match_dup 0))))] + "TARGET_P8_FUSION" + "li %0,%1\t\t\t# vector load fusion\;lx<VSX_M2:VSm>x %x2,%0,%3" + [(set_attr "length" "8") + (set_attr "type" "vecload")]) diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog index 6a02f1b70e4..846c9664fbd 100644 --- a/gcc/testsuite/ChangeLog +++ b/gcc/testsuite/ChangeLog @@ -1,3 +1,8 @@ +2013-07-31 Michael Meissner <meissner@linux.vnet.ibm.com> + + * gcc.target/powerpc/fusion.c: New file, test power8 fusion + support. + 2013-07-31 Richard Sandiford <rdsandiford@googlemail.com> * gcc.target/mips/mips.exp (mips-dg-options): Test for mabicalls diff --git a/gcc/testsuite/gcc.target/powerpc/fusion.c b/gcc/testsuite/gcc.target/powerpc/fusion.c new file mode 100644 index 00000000000..3bea1c9f5a8 --- /dev/null +++ b/gcc/testsuite/gcc.target/powerpc/fusion.c @@ -0,0 +1,23 @@ +/* { dg-do compile { target { powerpc*-*-* } } } */ +/* { dg-skip-if "" { powerpc*-*-darwin* } { "*" } { "" } } */ +/* { dg-require-effective-target powerpc_p8vector_ok } */ +/* { dg-options "-mcpu=power7 -mtune=power8 -O3" } */ + +#define LARGE 0x12345 + +int fusion_uchar (unsigned char *p){ return p[LARGE]; } +int fusion_schar (signed char *p){ return p[LARGE]; } +int fusion_ushort (unsigned short *p){ return p[LARGE]; } +int fusion_short (short *p){ return p[LARGE]; } +int fusion_int (int *p){ return p[LARGE]; } +unsigned fusion_uns (unsigned *p){ return p[LARGE]; } + +vector double fusion_vector (vector double *p) { return p[2]; } + +/* { dg-final { scan-assembler-times "gpr load fusion" 6 } } */ +/* { dg-final { scan-assembler-times "vector load fusion" 1 } } */ +/* { dg-final { scan-assembler-times "lbz" 2 } } */ +/* { dg-final { scan-assembler-times "extsb" 1 } } */ +/* { dg-final { scan-assembler-times "lhz" 2 } } */ +/* { dg-final { scan-assembler-times "extsh" 1 } } */ +/* { dg-final { scan-assembler-times "lwz" 2 } } */ |