;; AArch64 ldp/stp peephole optimizations. ;; Copyright (C) 2014 Free Software Foundation, Inc. ;; Contributed by ARM Ltd. ;; ;; This file is part of GCC. ;; ;; GCC is free software; you can redistribute it and/or modify it ;; under the terms of the GNU General Public License as published by ;; the Free Software Foundation; either version 3, or (at your option) ;; any later version. ;; ;; GCC is distributed in the hope that it will be useful, but ;; WITHOUT ANY WARRANTY; without even the implied warranty of ;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU ;; General Public License for more details. ;; ;; You should have received a copy of the GNU General Public License ;; along with GCC; see the file COPYING3. If not see ;; . (define_peephole2 [(set (match_operand:GPI 0 "register_operand" "") (match_operand:GPI 1 "aarch64_mem_pair_operand" "")) (set (match_operand:GPI 2 "register_operand" "") (match_operand:GPI 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) (define_peephole2 [(set (match_operand:GPI 0 "aarch64_mem_pair_operand" "") (match_operand:GPI 1 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 2 "memory_operand" "") (match_operand:GPI 3 "aarch64_reg_or_zero" ""))] "aarch64_operands_ok_for_ldpstp (operands, false, mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[0], &base, &offset_1); extract_base_offset_in_addr (operands[2], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) (define_peephole2 [(set (match_operand:GPF 0 "register_operand" "") (match_operand:GPF 1 "aarch64_mem_pair_operand" "")) (set (match_operand:GPF 2 "register_operand" "") (match_operand:GPF 3 "memory_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, true, mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) (define_peephole2 [(set (match_operand:GPF 0 "aarch64_mem_pair_operand" "") (match_operand:GPF 1 "register_operand" "")) (set (match_operand:GPF 2 "memory_operand" "") (match_operand:GPF 3 "register_operand" ""))] "aarch64_operands_ok_for_ldpstp (operands, false, mode)" [(parallel [(set (match_dup 0) (match_dup 1)) (set (match_dup 2) (match_dup 3))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[0], &base, &offset_1); extract_base_offset_in_addr (operands[2], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) ;; Handle sign/zero extended consecutive load/store. (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") (sign_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (sign_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (sign_extend:DI (match_dup 1))) (set (match_dup 2) (sign_extend:DI (match_dup 3)))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) (define_peephole2 [(set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (match_operand:SI 1 "aarch64_mem_pair_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (zero_extend:DI (match_operand:SI 3 "memory_operand" "")))] "aarch64_operands_ok_for_ldpstp (operands, true, SImode)" [(parallel [(set (match_dup 0) (zero_extend:DI (match_dup 1))) (set (match_dup 2) (zero_extend:DI (match_dup 3)))])] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[2]; operands[2] = tmp; tmp = operands[1]; operands[1] = operands[3]; operands[3] = tmp; } }) ;; Handle consecutive load/store whose offset is out of the range ;; supported by ldp/ldpsw/stp. We firstly adjust offset in a scratch ;; register, then merge them into ldp/ldpsw/stp by using the adjusted ;; offset. (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:GPI 0 "register_operand" "") (match_operand:GPI 1 "memory_operand" "")) (set (match_operand:GPI 2 "register_operand" "") (match_operand:GPI 3 "memory_operand" "")) (set (match_operand:GPI 4 "register_operand" "") (match_operand:GPI 5 "memory_operand" "")) (set (match_operand:GPI 6 "register_operand" "") (match_operand:GPI 7 "memory_operand" "")) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN)) DONE; else FAIL; }) (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:GPF 0 "register_operand" "") (match_operand:GPF 1 "memory_operand" "")) (set (match_operand:GPF 2 "register_operand" "") (match_operand:GPF 3 "memory_operand" "")) (set (match_operand:GPF 4 "register_operand" "") (match_operand:GPF 5 "memory_operand" "")) (set (match_operand:GPF 6 "register_operand" "") (match_operand:GPF 7 "memory_operand" "")) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, true, mode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, true, mode, UNKNOWN)) DONE; else FAIL; }) (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:DI 0 "register_operand" "") (sign_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (sign_extend:DI (match_operand:SI 3 "memory_operand" ""))) (set (match_operand:DI 4 "register_operand" "") (sign_extend:DI (match_operand:SI 5 "memory_operand" ""))) (set (match_operand:DI 6 "register_operand" "") (sign_extend:DI (match_operand:SI 7 "memory_operand" ""))) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, SIGN_EXTEND)) DONE; else FAIL; }) (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:DI 0 "register_operand" "") (zero_extend:DI (match_operand:SI 1 "memory_operand" ""))) (set (match_operand:DI 2 "register_operand" "") (zero_extend:DI (match_operand:SI 3 "memory_operand" ""))) (set (match_operand:DI 4 "register_operand" "") (zero_extend:DI (match_operand:SI 5 "memory_operand" ""))) (set (match_operand:DI 6 "register_operand" "") (zero_extend:DI (match_operand:SI 7 "memory_operand" ""))) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, true, SImode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[1], &base, &offset_1); extract_base_offset_in_addr (operands[3], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, true, SImode, ZERO_EXTEND)) DONE; else FAIL; }) (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:GPI 0 "memory_operand" "") (match_operand:GPI 1 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 2 "memory_operand" "") (match_operand:GPI 3 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 4 "memory_operand" "") (match_operand:GPI 5 "aarch64_reg_or_zero" "")) (set (match_operand:GPI 6 "memory_operand" "") (match_operand:GPI 7 "aarch64_reg_or_zero" "")) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, false, mode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[0], &base, &offset_1); extract_base_offset_in_addr (operands[2], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, false, mode, UNKNOWN)) DONE; else FAIL; }) (define_peephole2 [(match_scratch:DI 8 "r") (set (match_operand:GPF 0 "memory_operand" "") (match_operand:GPF 1 "aarch64_reg_or_zero" "")) (set (match_operand:GPF 2 "memory_operand" "") (match_operand:GPF 3 "aarch64_reg_or_zero" "")) (set (match_operand:GPF 4 "memory_operand" "") (match_operand:GPF 5 "aarch64_reg_or_zero" "")) (set (match_operand:GPF 6 "memory_operand" "") (match_operand:GPF 7 "aarch64_reg_or_zero" "")) (match_dup 8)] "aarch64_operands_adjust_ok_for_ldpstp (operands, false, mode)" [(const_int 0)] { rtx base, offset_1, offset_2, tmp; extract_base_offset_in_addr (operands[0], &base, &offset_1); extract_base_offset_in_addr (operands[2], &base, &offset_2); if (INTVAL (offset_1) > INTVAL (offset_2)) { tmp = operands[0]; operands[0] = operands[6]; operands[6] = tmp; tmp = operands[1]; operands[1] = operands[7]; operands[7] = tmp; tmp = operands[2]; operands[2] = operands[4]; operands[4] = tmp; tmp = operands[3]; operands[3] = operands[5]; operands[5] = tmp; } if (aarch64_gen_adjusted_ldpstp (operands, false, mode, UNKNOWN)) DONE; else FAIL; })