/* Definitions of target machine for GNU compiler. Copyright (C) 1999, 2000 Free Software Foundation, Inc. Contributed by James E. Wilson and David Mosberger . This file is part of GNU CC. GNU CC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2, or (at your option) any later version. GNU CC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GNU CC; see the file COPYING. If not, write to the Free Software Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. */ #include "config.h" #include "system.h" #include "rtl.h" #include "tree.h" #include "tm_p.h" #include "regs.h" #include "hard-reg-set.h" #include "real.h" #include "insn-config.h" #include "conditions.h" #include "insn-flags.h" #include "output.h" #include "insn-attr.h" #include "flags.h" #include "recog.h" #include "expr.h" #include "obstack.h" #include "except.h" #include "function.h" #include "ggc.h" #include "basic-block.h" #include "toplev.h" /* This is used for communication between ASM_OUTPUT_LABEL and ASM_OUTPUT_LABELREF. */ int ia64_asm_output_label = 0; /* Define the information needed to generate branch and scc insns. This is stored from the compare operation. */ struct rtx_def * ia64_compare_op0; struct rtx_def * ia64_compare_op1; /* Register number where ar.pfs was saved in the prologue, or zero if it was not saved. */ int ia64_arpfs_regno; /* Register number where rp was saved in the prologue, or zero if it was not saved. */ int ia64_rp_regno; /* Register number where frame pointer was saved in the prologue, or zero if it was not saved. */ int ia64_fp_regno; /* Number of input and local registers used. This is needed for the .regstk directive, and also for debugging info. */ int ia64_input_regs; int ia64_local_regs; /* If true, then we must emit a .regstk directive. */ int ia64_need_regstk; /* Register names for ia64_expand_prologue. */ static const char * const ia64_reg_numbers[96] = { "r32", "r33", "r34", "r35", "r36", "r37", "r38", "r39", "r40", "r41", "r42", "r43", "r44", "r45", "r46", "r47", "r48", "r49", "r50", "r51", "r52", "r53", "r54", "r55", "r56", "r57", "r58", "r59", "r60", "r61", "r62", "r63", "r64", "r65", "r66", "r67", "r68", "r69", "r70", "r71", "r72", "r73", "r74", "r75", "r76", "r77", "r78", "r79", "r80", "r81", "r82", "r83", "r84", "r85", "r86", "r87", "r88", "r89", "r90", "r91", "r92", "r93", "r94", "r95", "r96", "r97", "r98", "r99", "r100","r101","r102","r103", "r104","r105","r106","r107","r108","r109","r110","r111", "r112","r113","r114","r115","r116","r117","r118","r119", "r120","r121","r122","r123","r124","r125","r126","r127"}; /* ??? These strings could be shared with REGISTER_NAMES. */ static const char * const ia64_input_reg_names[8] = { "in0", "in1", "in2", "in3", "in4", "in5", "in6", "in7" }; /* ??? These strings could be shared with REGISTER_NAMES. */ static const char * const ia64_local_reg_names[80] = { "loc0", "loc1", "loc2", "loc3", "loc4", "loc5", "loc6", "loc7", "loc8", "loc9", "loc10","loc11","loc12","loc13","loc14","loc15", "loc16","loc17","loc18","loc19","loc20","loc21","loc22","loc23", "loc24","loc25","loc26","loc27","loc28","loc29","loc30","loc31", "loc32","loc33","loc34","loc35","loc36","loc37","loc38","loc39", "loc40","loc41","loc42","loc43","loc44","loc45","loc46","loc47", "loc48","loc49","loc50","loc51","loc52","loc53","loc54","loc55", "loc56","loc57","loc58","loc59","loc60","loc61","loc62","loc63", "loc64","loc65","loc66","loc67","loc68","loc69","loc70","loc71", "loc72","loc73","loc74","loc75","loc76","loc77","loc78","loc79" }; /* ??? These strings could be shared with REGISTER_NAMES. */ static const char * const ia64_output_reg_names[8] = { "out0", "out1", "out2", "out3", "out4", "out5", "out6", "out7" }; /* String used with the -mfixed-range= option. */ const char *ia64_fixed_range_string; /* Variables which are this size or smaller are put in the sdata/sbss sections. */ unsigned int ia64_section_threshold; static enum machine_mode hfa_element_mode PARAMS ((tree, int)); static void fix_range PARAMS ((const char *)); static void ia64_add_gc_roots PARAMS ((void)); static void ia64_init_machine_status PARAMS ((struct function *)); static void ia64_mark_machine_status PARAMS ((struct function *)); static void emit_insn_group_barriers PARAMS ((rtx)); static void emit_predicate_relation_info PARAMS ((rtx)); static int process_set PARAMS ((FILE *, rtx)); static rtx ia64_expand_compare_and_swap PARAMS ((enum insn_code, tree, rtx, int)); static rtx ia64_expand_binop_builtin PARAMS ((enum insn_code, tree, rtx)); /* Return 1 if OP is a valid operand for the MEM of a CALL insn. */ int call_operand (op, mode) rtx op; enum machine_mode mode; { if (mode != GET_MODE (op)) return 0; return (GET_CODE (op) == SYMBOL_REF || GET_CODE (op) == REG || (GET_CODE (op) == SUBREG && GET_CODE (XEXP (op, 0)) == REG)); } /* Return 1 if OP refers to a symbol in the sdata section. */ int sdata_symbolic_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { switch (GET_CODE (op)) { case CONST: if (GET_CODE (XEXP (op, 0)) != PLUS || GET_CODE (XEXP (XEXP (op, 0), 0)) != SYMBOL_REF) break; op = XEXP (XEXP (op, 0), 0); /* FALLTHRU */ case SYMBOL_REF: if (CONSTANT_POOL_ADDRESS_P (op)) return GET_MODE_SIZE (get_pool_mode (op)) <= ia64_section_threshold; else return XSTR (op, 0)[0] == SDATA_NAME_FLAG_CHAR; default: break; } return 0; } /* Return 1 if OP refers to a symbol, and is appropriate for a GOT load. */ int got_symbolic_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { switch (GET_CODE (op)) { case CONST: op = XEXP (op, 0); if (GET_CODE (op) != PLUS) return 0; if (GET_CODE (XEXP (op, 0)) != SYMBOL_REF) return 0; op = XEXP (op, 1); if (GET_CODE (op) != CONST_INT) return 0; return 1; /* Ok if we're not using GOT entries at all. */ if (TARGET_NO_PIC || TARGET_AUTO_PIC) return 1; /* "Ok" while emitting rtl, since otherwise we won't be provided with the entire offset during emission, which makes it very hard to split the offset into high and low parts. */ if (rtx_equal_function_value_matters) return 1; /* Force the low 14 bits of the constant to zero so that we do not use up so many GOT entries. */ return (INTVAL (op) & 0x3fff) == 0; case SYMBOL_REF: case LABEL_REF: return 1; default: break; } return 0; } /* Return 1 if OP refers to a symbol. */ int symbolic_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { switch (GET_CODE (op)) { case CONST: case SYMBOL_REF: case LABEL_REF: return 1; default: break; } return 0; } /* Return 1 if OP refers to a function. */ int function_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { if (GET_CODE (op) == SYMBOL_REF && SYMBOL_REF_FLAG (op)) return 1; else return 0; } /* Return 1 if OP is setjmp or a similar function. */ /* ??? This is an unsatisfying solution. Should rethink. */ int setjmp_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { const char *name; int retval = 0; if (GET_CODE (op) != SYMBOL_REF) return 0; name = XSTR (op, 0); /* The following code is borrowed from special_function_p in calls.c. */ /* Disregard prefix _, __ or __x. */ if (name[0] == '_') { if (name[1] == '_' && name[2] == 'x') name += 3; else if (name[1] == '_') name += 2; else name += 1; } if (name[0] == 's') { retval = ((name[1] == 'e' && (! strcmp (name, "setjmp") || ! strcmp (name, "setjmp_syscall"))) || (name[1] == 'i' && ! strcmp (name, "sigsetjmp")) || (name[1] == 'a' && ! strcmp (name, "savectx"))); } else if ((name[0] == 'q' && name[1] == 's' && ! strcmp (name, "qsetjmp")) || (name[0] == 'v' && name[1] == 'f' && ! strcmp (name, "vfork"))) retval = 1; return retval; } /* Return 1 if OP is a general operand, but when pic exclude symbolic operands. */ /* ??? If we drop no-pic support, can delete SYMBOL_REF, CONST, and LABEL_REF from PREDICATE_CODES. */ int move_operand (op, mode) rtx op; enum machine_mode mode; { if (! TARGET_NO_PIC && symbolic_operand (op, mode)) return 0; return general_operand (op, mode); } /* Return 1 if OP is a register operand, or zero. */ int reg_or_0_operand (op, mode) rtx op; enum machine_mode mode; { return (op == const0_rtx || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or a 6 bit immediate operand. */ int reg_or_6bit_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or an 8 bit immediate operand. */ int reg_or_8bit_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or an 8 bit adjusted immediate operand. */ int reg_or_8bit_adjusted_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_L (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or is valid for both an 8 bit immediate and an 8 bit adjusted immediate operand. This is necessary because when we emit a compare, we don't know what the condition will be, so we need the union of the immediates accepted by GT and LT. */ int reg_or_8bit_and_adjusted_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_K (INTVAL (op)) && CONST_OK_FOR_L (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or a 14 bit immediate operand. */ int reg_or_14bit_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_I (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a register operand, or a 22 bit immediate operand. */ int reg_or_22bit_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_J (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Return 1 if OP is a 6 bit immediate operand. */ int shift_count_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { return ((GET_CODE (op) == CONST_INT && CONST_OK_FOR_M (INTVAL (op))) || GET_CODE (op) == CONSTANT_P_RTX); } /* Return 1 if OP is a 5 bit immediate operand. */ int shift_32bit_count_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { return ((GET_CODE (op) == CONST_INT && (INTVAL (op) >= 0 && INTVAL (op) < 32)) || GET_CODE (op) == CONSTANT_P_RTX); } /* Return 1 if OP is a 2, 4, 8, or 16 immediate operand. */ int shladd_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { return (GET_CODE (op) == CONST_INT && (INTVAL (op) == 2 || INTVAL (op) == 4 || INTVAL (op) == 8 || INTVAL (op) == 16)); } /* Return 1 if OP is a -16, -8, -4, -1, 1, 4, 8, or 16 immediate operand. */ int fetchadd_operand (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { return (GET_CODE (op) == CONST_INT && (INTVAL (op) == -16 || INTVAL (op) == -8 || INTVAL (op) == -4 || INTVAL (op) == -1 || INTVAL (op) == 1 || INTVAL (op) == 4 || INTVAL (op) == 8 || INTVAL (op) == 16)); } /* Return 1 if OP is a floating-point constant zero, one, or a register. */ int reg_or_fp01_operand (op, mode) rtx op; enum machine_mode mode; { return ((GET_CODE (op) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (op)) || GET_CODE (op) == CONSTANT_P_RTX || register_operand (op, mode)); } /* Like nonimmediate_operand, but don't allow MEMs that try to use a POST_MODIFY with a REG as displacement. */ int destination_operand (op, mode) rtx op; enum machine_mode mode; { if (! nonimmediate_operand (op, mode)) return 0; if (GET_CODE (op) == MEM && GET_CODE (XEXP (op, 0)) == POST_MODIFY && GET_CODE (XEXP (XEXP (XEXP (op, 0), 1), 1)) == REG) return 0; return 1; } /* Return 1 if this is a comparison operator, which accepts an normal 8-bit signed immediate operand. */ int normal_comparison_operator (op, mode) register rtx op; enum machine_mode mode; { enum rtx_code code = GET_CODE (op); return ((mode == VOIDmode || GET_MODE (op) == mode) && (code == EQ || code == NE || code == GT || code == LE || code == GTU || code == LEU)); } /* Return 1 if this is a comparison operator, which accepts an adjusted 8-bit signed immediate operand. */ int adjusted_comparison_operator (op, mode) register rtx op; enum machine_mode mode; { enum rtx_code code = GET_CODE (op); return ((mode == VOIDmode || GET_MODE (op) == mode) && (code == LT || code == GE || code == LTU || code == GEU)); } /* Return 1 if OP is a call returning an HFA. It is known to be a PARALLEL and the first section has already been tested. */ int call_multiple_values_operation (op, mode) rtx op; enum machine_mode mode ATTRIBUTE_UNUSED; { int count = XVECLEN (op, 0) - 2; int i; unsigned int dest_regno; /* Perform a quick check so we don't block up below. */ if (count <= 1 || GET_CODE (XVECEXP (op, 0, 0)) != SET || GET_CODE (SET_DEST (XVECEXP (op, 0, 0))) != REG || GET_CODE (SET_SRC (XVECEXP (op, 0, 0))) != CALL) return 0; dest_regno = REGNO (SET_DEST (XVECEXP (op, 0, 0))); for (i = 1; i < count; i++) { rtx elt = XVECEXP (op, 0, i + 2); if (GET_CODE (elt) != SET || GET_CODE (SET_SRC (elt)) != CALL || GET_CODE (SET_DEST (elt)) != REG || REGNO (SET_DEST (elt)) != dest_regno + i) return 0; } return 1; } /* Return 1 if this operator is valid for predication. */ int predicate_operator (op, mode) register rtx op; enum machine_mode mode; { enum rtx_code code = GET_CODE (op); return ((GET_MODE (op) == mode || mode == VOIDmode) && (code == EQ || code == NE)); } /* Return 1 if this is the ar.lc register. */ int ar_lc_reg_operand (op, mode) register rtx op; enum machine_mode mode; { return (GET_MODE (op) == DImode && (mode == DImode || mode == VOIDmode) && GET_CODE (op) == REG && REGNO (op) == AR_LC_REGNUM); } /* Return 1 if the operands of a move are ok. */ int ia64_move_ok (dst, src) rtx dst, src; { /* If we're under init_recog_no_volatile, we'll not be able to use memory_operand. So check the code directly and don't worry about the validity of the underlying address, which should have been checked elsewhere anyway. */ if (GET_CODE (dst) != MEM) return 1; if (GET_CODE (src) == MEM) return 0; if (register_operand (src, VOIDmode)) return 1; /* Otherwise, this must be a constant, and that either 0 or 0.0 or 1.0. */ if (INTEGRAL_MODE_P (GET_MODE (dst))) return src == const0_rtx; else return GET_CODE (src) == CONST_DOUBLE && CONST_DOUBLE_OK_FOR_G (src); } /* Expand a symbolic constant load. */ /* ??? Should generalize this, so that we can also support 32 bit pointers. */ void ia64_expand_load_address (dest, src) rtx dest, src; { rtx temp; /* The destination could be a MEM during initial rtl generation, which isn't a valid destination for the PIC load address patterns. */ if (! register_operand (dest, DImode)) temp = gen_reg_rtx (DImode); else temp = dest; if (TARGET_AUTO_PIC) emit_insn (gen_load_gprel64 (temp, src)); else if (GET_CODE (src) == SYMBOL_REF && SYMBOL_REF_FLAG (src)) emit_insn (gen_load_fptr (temp, src)); else if (sdata_symbolic_operand (src, DImode)) emit_insn (gen_load_gprel (temp, src)); else if (GET_CODE (src) == CONST && GET_CODE (XEXP (src, 0)) == PLUS && GET_CODE (XEXP (XEXP (src, 0), 1)) == CONST_INT && (INTVAL (XEXP (XEXP (src, 0), 1)) & 0x1fff) != 0) { rtx subtarget = no_new_pseudos ? temp : gen_reg_rtx (DImode); rtx sym = XEXP (XEXP (src, 0), 0); HOST_WIDE_INT ofs, hi, lo; /* Split the offset into a sign extended 14-bit low part and a complementary high part. */ ofs = INTVAL (XEXP (XEXP (src, 0), 1)); lo = ((ofs & 0x3fff) ^ 0x2000) - 0x2000; hi = ofs - lo; emit_insn (gen_load_symptr (subtarget, plus_constant (sym, hi))); emit_insn (gen_adddi3 (temp, subtarget, GEN_INT (lo))); } else emit_insn (gen_load_symptr (temp, src)); if (temp != dest) emit_move_insn (dest, temp); } /* Begin the assembly file. */ void ia64_file_start (f) FILE *f; { unsigned int rs, re; int out_state; rs = 1; out_state = 0; while (1) { while (rs < 64 && call_used_regs[PR_REG (rs)]) rs++; if (rs >= 64) break; for (re = rs + 1; re < 64 && ! call_used_regs[PR_REG (re)]; re++) continue; if (out_state == 0) { fputs ("\t.pred.safe_across_calls ", f); out_state = 1; } else fputc (',', f); if (re == rs + 1) fprintf (f, "p%u", rs); else fprintf (f, "p%u-p%u", rs, re - 1); rs = re + 1; } if (out_state) fputc ('\n', f); } /* Structure to be filled in by ia64_compute_frame_size with register save masks and offsets for the current function. */ struct ia64_frame_info { long total_size; /* # bytes that the entire frame takes up. */ long var_size; /* # bytes that variables take up. */ long args_size; /* # bytes that outgoing arguments take up. */ long pretend_size; /* # bytes that stdarg arguments take up. */ long pretend_pad_size; /* # bytes padding to align stdarg args. */ long extra_size; /* # bytes of extra gunk. */ long gr_size; /* # bytes needed to store general regs. */ long fr_size; /* # bytes needed to store FP regs. */ long fr_pad_size; /* # bytes needed to align FP save area. */ long pr_size; /* # bytes needed to store predicate regs. */ long br_size; /* # bytes needed to store branch regs. */ long ar_size; /* # bytes needed to store AR regs. */ HARD_REG_SET mask; /* mask of saved registers. */ int initialized; /* != 0 is frame size already calculated. */ }; /* Current frame information calculated by compute_frame_size. */ struct ia64_frame_info current_frame_info; /* Helper function for INITIAL_ELIMINATION_OFFSET. Return the offset from the frame pointer where b0 is saved. */ int ia64_rap_fp_offset () { return - current_frame_info.br_size; } /* Returns the number of bytes offset between the frame pointer and the stack pointer for the current function. SIZE is the number of bytes of space needed for local variables. */ unsigned int ia64_compute_frame_size (size) int size; { int total_size; int extra_size; int gr_size = 0; int fr_size = 0; int fr_pad_size = 0; int pr_size = 0; int br_size = 0; int ar_size = 0; int pretend_pad_size = 0; int tmp; int regno; HARD_REG_SET mask; /* Reload used to round the frame size to STACK_BOUNDARY. Now we do it here. */ size = IA64_STACK_ALIGN (size); CLEAR_HARD_REG_SET (mask); /* Calculate space needed for general registers. */ /* We never need to save any of the stacked registers, which are regs 32 to 127. */ for (regno = GR_REG (0); regno <= GR_REG (31); regno++) if (regs_ever_live[regno] && ! call_used_regs[regno]) { SET_HARD_REG_BIT (mask, regno); gr_size += 8; } /* Allocate space to save/restore the unat from. */ if (gr_size != 0 || current_function_varargs || current_function_stdarg) gr_size += 8; /* Calculate space needed for FP registers. */ for (regno = FR_REG (0); regno <= FR_REG (127); regno++) if (regs_ever_live[regno] && ! call_used_regs[regno]) { SET_HARD_REG_BIT (mask, regno); fr_size += 16; } /* Calculate space needed for predicate registers. */ for (regno = PR_REG (0); regno <= PR_REG (63); regno++) if (regs_ever_live[regno] && ! call_used_regs[regno]) { SET_HARD_REG_BIT (mask, regno); pr_size = 8; } /* Calculate space needed for branch registers. */ for (regno = BR_REG (0); regno <= BR_REG (7); regno++) if (regs_ever_live[regno] && ! call_used_regs[regno]) { SET_HARD_REG_BIT (mask, regno); br_size += 8; } /* The FR save area needs to be 16-byte aligned. */ if (fr_size) { tmp = (size + fr_size + br_size); fr_pad_size = IA64_STACK_ALIGN (tmp) - tmp; } else fr_pad_size = 0; /* AR.LC, for reasons unexplained, is call saved. */ if (regs_ever_live[AR_LC_REGNUM]) { SET_HARD_REG_BIT (mask, AR_LC_REGNUM); ar_size = 8; } /* If we have an odd number of words of pretend arguments written to the stack, then the FR save area will be unaligned. We pad below this area to keep things 16 byte aligned. This needs to be kept distinct, to avoid confusing it with padding added below the GR save area, which does not affect the FR area alignment. */ pretend_pad_size = current_function_pretend_args_size % 16; /* The 16 bytes is for the scratch area. */ tmp = (size + gr_size + fr_pad_size + fr_size + pr_size + br_size + ar_size + current_function_outgoing_args_size + 16); tmp += (current_function_pretend_args_size ? current_function_pretend_args_size - 16 : 0) + pretend_pad_size; total_size = IA64_STACK_ALIGN (tmp); extra_size = total_size - tmp + 16; /* If this is a leaf routine, and if there is no stack space needed for register saves, then don't allocate the 16 byte scratch area. */ if (total_size == 16 && current_function_is_leaf) { total_size = 0; extra_size = 0; } current_frame_info.total_size = total_size; current_frame_info.var_size = size; current_frame_info.args_size = current_function_outgoing_args_size; current_frame_info.pretend_size = (current_function_pretend_args_size ? current_function_pretend_args_size - 16 : 0); current_frame_info.pretend_pad_size = pretend_pad_size; current_frame_info.extra_size = extra_size; current_frame_info.gr_size = gr_size; current_frame_info.fr_size = fr_size; current_frame_info.fr_pad_size = fr_pad_size; current_frame_info.pr_size = pr_size; current_frame_info.br_size = br_size; current_frame_info.ar_size = ar_size; COPY_HARD_REG_SET (current_frame_info.mask, mask); current_frame_info.initialized = reload_completed; return total_size; } void save_restore_insns (save_p) int save_p; { rtx insn; if (current_frame_info.gr_size + current_frame_info.fr_size + current_frame_info.br_size + current_frame_info.pr_size + current_frame_info.ar_size) { rtx tmp_reg = gen_rtx_REG (DImode, GR_REG (2)); rtx tmp_post_inc = gen_rtx_POST_INC (DImode, tmp_reg); rtx tmp2_reg = gen_rtx_REG (DImode, GR_REG (3)); int offset = (current_frame_info.total_size - (current_frame_info.gr_size + current_frame_info.fr_size + current_frame_info.fr_pad_size + current_frame_info.br_size + current_frame_info.pr_size + current_frame_info.ar_size + current_frame_info.var_size + current_frame_info.pretend_size + current_frame_info.pretend_pad_size)); rtx offset_rtx; int regno; /* If there is a frame pointer, then we use it instead of the stack pointer, so that the stack pointer does not need to be valid when the epilogue starts. See EXIT_IGNORE_STACK. */ if (frame_pointer_needed) offset = offset - current_frame_info.total_size; if (CONST_OK_FOR_I (offset)) offset_rtx = GEN_INT (offset); else { offset_rtx = tmp_reg; insn = emit_insn (gen_movdi (tmp_reg, GEN_INT (offset))); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } insn = emit_insn (gen_adddi3 (tmp_reg, (frame_pointer_needed ? frame_pointer_rtx : stack_pointer_rtx), offset_rtx)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; /* If one is used, we save/restore all of them. */ for (regno = PR_REG (0); regno <= PR_REG (63); regno++) if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) { rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); if (save_p) { insn = emit_insn (gen_pr_spill (tmp2_reg)); RTX_FRAME_RELATED_P (insn) = 1; insn = emit_insn (gen_movdi (mem, tmp2_reg)); RTX_FRAME_RELATED_P (insn) = 1; } else { insn = emit_insn (gen_movdi (tmp2_reg, mem)); insn = emit_insn (gen_pr_restore (tmp2_reg)); } break; } /* Must save/restore ar.unat if any GR is spilled/restored. */ if (current_frame_info.gr_size != 0 || current_function_varargs || current_function_stdarg) { rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); if (save_p) { insn = emit_insn (gen_unat_spill (tmp2_reg)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; insn = emit_insn (gen_movdi (mem, tmp2_reg)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } else { insn = emit_insn (gen_movdi (tmp2_reg, mem)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; /* The restore happens after the last ld8.fill instruction. */ } } for (regno = GR_REG (0); regno <= GR_REG (127); regno++) if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) { rtx mem = gen_rtx_MEM (DImode, tmp_post_inc); if (save_p) insn = emit_insn (gen_gr_spill (mem, gen_rtx_REG (DImode, regno))); else insn = emit_insn (gen_gr_restore (gen_rtx_REG (DImode, regno), mem)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } /* Now restore the unat register if necessary. */ if ((current_frame_info.gr_size != 0 || current_function_varargs || current_function_stdarg) && ! save_p) emit_insn (gen_unat_restore (tmp2_reg)); for (regno = FR_REG (0); regno <= FR_REG (127); regno++) if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) { rtx mem = gen_rtx_MEM (XFmode, tmp_post_inc); if (save_p) insn = emit_insn (gen_fr_spill (mem, gen_rtx_REG (XFmode, regno))); else insn = emit_insn (gen_fr_restore (gen_rtx_REG (XFmode, regno), mem)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } for (regno = BR_REG (0); regno <= BR_REG (7); regno++) if (TEST_HARD_REG_BIT (current_frame_info.mask, regno)) { rtx src, dest; if (save_p) { src = gen_rtx_REG (DImode, regno); dest = gen_rtx_MEM (DImode, tmp_post_inc); } else { src = gen_rtx_MEM (DImode, tmp_post_inc); dest = gen_rtx_REG (DImode, regno); } insn = emit_insn (gen_movdi (tmp2_reg, src)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; insn = emit_insn (gen_movdi (dest, tmp2_reg)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } if (TEST_HARD_REG_BIT (current_frame_info.mask, AR_LC_REGNUM)) { rtx src, dest; if (save_p) { src = gen_rtx_REG (DImode, AR_LC_REGNUM); dest = gen_rtx_MEM (DImode, tmp_post_inc); } else { src = gen_rtx_MEM (DImode, tmp_post_inc); dest = gen_rtx_REG (DImode, AR_LC_REGNUM); } insn = emit_insn (gen_movdi (tmp2_reg, src)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; insn = emit_insn (gen_movdi (dest, tmp2_reg)); if (save_p) RTX_FRAME_RELATED_P (insn) = 1; } } } /* Called after register allocation to add any instructions needed for the prologue. Using a prologue insn is favored compared to putting all of the instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler to intermix instructions with the saves of the caller saved registers. In some cases, it might be necessary to emit a barrier instruction as the last insn to prevent such scheduling. Also any insns generated here should have RTX_FRAME_RELATED_P(insn) = 1 so that the debug info generation code can handle them properly. */ /* ??? Get inefficient code when the frame size is larger than can fit in an adds instruction. */ /* ??? If this is a leaf function, then fp/rp/ar.pfs should be put in the low 32 regs. */ /* ??? Should not reserve a local register for rp/ar.pfs. Should instead check to see if any local registers are unused, and if so, allocate them to rp/ar.pfs in that order. Not sure what to do about fp, we may still need to reserve a local register for it. */ void ia64_expand_prologue () { rtx insn, offset; int i, locals, inputs, outputs, rotates; int frame_size = ia64_compute_frame_size (get_frame_size ()); int epilogue_p; edge e; /* If there is no epilogue, then we don't need some prologue insns. We need to avoid emitting the dead prologue insns, because flow will complain about them. */ if (optimize) { for (e = EXIT_BLOCK_PTR->pred; e ; e = e->pred_next) if ((e->flags & EDGE_FAKE) == 0 && (e->flags & EDGE_FALLTHRU) != 0) break; epilogue_p = (e != NULL); } else epilogue_p = 1; /* Find the highest local register used. */ /* We have only 80 local registers, because we reserve 8 for the inputs and 8 for the outputs. */ for (i = LOC_REG (79); i >= LOC_REG (0); i--) if (regs_ever_live[i]) break; locals = i - LOC_REG (0) + 1; /* Likewise for inputs. */ for (i = IN_REG (7); i >= IN_REG (0); i--) if (regs_ever_live[i]) break; inputs = i - IN_REG (0) + 1; #if 0 /* If the function was declared with syscall_linkage, then we may need to preserve all declared input registers, even if they weren't used. Currently, syscall_linkage does not have this effect. */ if (lookup_attribute ("syscall_linkage", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) inputs = MAX (inputs, current_function_args_info.words); #endif /* Likewise for outputs. */ for (i = OUT_REG (7); i >= OUT_REG (0); i--) if (regs_ever_live[i]) break; outputs = i - OUT_REG (0) + 1; /* When -p profiling, we need one output register for the mcount argument. Likwise for -a profiling for the bb_init_func argument. For -ax profiling, we need two output registers for the two bb_init_trace_func arguments. */ if (profile_flag || profile_block_flag == 1) outputs = MAX (outputs, 1); else if (profile_block_flag == 2) outputs = MAX (outputs, 2); /* No rotating register support as yet. */ rotates = 0; /* Allocate two extra locals for saving/restoring rp and ar.pfs. Also allocate one local for use as the frame pointer if frame_pointer_needed is true. */ /* ??? If this is a leaf function, then we aren't using one of these local registers for the RP anymore. */ locals += 2 + frame_pointer_needed; /* Save these values in global registers for debugging info. */ ia64_input_regs = inputs; ia64_local_regs = locals; /* Set the local, input, and output register names. We need to do this for GNU libc, which creates crti.S/crtn.S by splitting initfini.c in half. If we use in/loc/out register names, then we get assembler errors in crtn.S because there is no alloc insn or regstk directive in there. We give in/loc/out names to unused registers, to make invalid uses of them easy to spot. */ if (! TARGET_REG_NAMES) { for (i = 0; i < 8; i++) { if (i < inputs) reg_names[IN_REG (i)] = ia64_reg_numbers[i]; else reg_names[IN_REG (i)] = ia64_input_reg_names[i]; } for (i = 0; i < 80; i++) { if (i < locals) reg_names[LOC_REG (i)] = ia64_reg_numbers[inputs + i]; else reg_names[LOC_REG (i)] = ia64_local_reg_names[i]; } for (i = 0; i < 8; i++) { if (i < outputs) reg_names[OUT_REG (i)] = ia64_reg_numbers[inputs + locals + i]; else reg_names[OUT_REG (i)] = ia64_output_reg_names[i]; } } /* Set the frame pointer register name now that it is known, and the local register names are known. */ if (frame_pointer_needed) { reg_names[FRAME_POINTER_REGNUM] = reg_names[LOC_REG (locals - 3)]; ia64_fp_regno = LOC_REG (inputs + locals - 3); } else ia64_fp_regno = 0; /* We don't need an alloc instruction if this is a leaf function, and the locals and outputs are both zero sized. Since we have already allocated two locals for rp and ar.pfs, we check for two locals. */ /* Leaf functions can use output registers as call-clobbered temporaries. */ if (locals == 2 && outputs == 0 && current_function_is_leaf) { /* If there is no alloc, but there are input registers used, then we need a .regstk directive. */ if (TARGET_REG_NAMES) ia64_need_regstk = 1; else ia64_need_regstk = 0; ia64_arpfs_regno = 0; ia64_rp_regno = 0; } else { ia64_need_regstk = 0; ia64_arpfs_regno = LOC_REG (locals - 1); insn = emit_insn (gen_alloc (gen_rtx_REG (DImode, ia64_arpfs_regno), GEN_INT (inputs), GEN_INT (locals), GEN_INT (outputs), GEN_INT (rotates))); RTX_FRAME_RELATED_P (insn) = 1; /* Emit a save of BR_REG (0) if we call other functions. Do this even if this function doesn't return, as EH depends on this to be able to unwind the stack. */ if (! current_function_is_leaf) { rtx ia64_rp_reg; ia64_rp_regno = LOC_REG (locals - 2); ia64_rp_reg = gen_rtx_REG (DImode, ia64_rp_regno); insn = emit_move_insn (ia64_rp_reg, gen_rtx_REG (DImode, BR_REG (0))); RTX_FRAME_RELATED_P (insn) = 1; if (! epilogue_p) { /* If we don't have an epilogue, then the return value doesn't appear to be needed and the above store will appear dead and will elicit a warning from flow. */ emit_insn (gen_rtx_USE (VOIDmode, ia64_rp_reg)); } /* Fix up the return address placeholder. */ if (regs_ever_live[RETURN_ADDRESS_POINTER_REGNUM]) XINT (return_address_pointer_rtx, 0) = ia64_rp_regno; } else ia64_rp_regno = 0; } /* Set up frame pointer and stack pointer. */ if (frame_pointer_needed) { insn = emit_insn (gen_movdi (hard_frame_pointer_rtx, stack_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; } if (frame_size != 0) { if (CONST_OK_FOR_I (-frame_size)) offset = GEN_INT (-frame_size); else { /* ??? We use r2 to tell process_set that this is a stack pointer decrement. See also ia64_expand_epilogue. */ offset = gen_rtx_REG (DImode, GR_REG (2)); insn = emit_insn (gen_movdi (offset, GEN_INT (-frame_size))); RTX_FRAME_RELATED_P (insn) = 1; } /* If there is a frame pointer, then we need to make the stack pointer decrement depend on the frame pointer, so that the stack pointer update won't be moved past fp-relative stores to the frame. */ if (frame_pointer_needed) insn = emit_insn (gen_prologue_allocate_stack (stack_pointer_rtx, stack_pointer_rtx, offset, hard_frame_pointer_rtx)); else insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, offset)); RTX_FRAME_RELATED_P (insn) = 1; } /* Save registers to frame. */ save_restore_insns (1); } /* Called after register allocation to add any instructions needed for the epilogue. Using a epilogue insn is favored compared to putting all of the instructions in the FUNCTION_PROLOGUE macro, since it allows the scheduler to intermix instructions with the saves of the caller saved registers. In some cases, it might be necessary to emit a barrier instruction as the last insn to prevent such scheduling. */ void ia64_expand_epilogue () { rtx insn; /* Restore registers from frame. */ save_restore_insns (0); /* ??? The gen_epilogue_deallocate_stack call below does not work. This is mainly because there is no fp+offset addressing mode, so most loads from the frame do not actually use the frame pointer; they use a pseudo computed from the frame pointer. The same problem exists with the stack pointer when there is no frame pointer. I think this can be fixed only by making the dependency analysis code in sched smarter, so that it recognizes references to the frame, and makes succeeding stack pointer updates anti-dependent on them. */ emit_insn (gen_blockage ()); if (cfun->machine->ia64_eh_epilogue_sp == NULL_RTX) { if (frame_pointer_needed) { /* If there is a frame pointer, then we need to make the stack pointer restore depend on the frame pointer, so that the stack pointer restore won't be moved up past fp-relative loads from the frame. */ insn = emit_insn (gen_epilogue_deallocate_stack (stack_pointer_rtx, hard_frame_pointer_rtx)); RTX_FRAME_RELATED_P (insn) = 1; } else { int frame_size = current_frame_info.total_size; rtx offset; if (frame_size != 0) { if (CONST_OK_FOR_I (frame_size)) offset = GEN_INT (frame_size); else { /* ??? We use r3 to tell process_set that this is a stack pointer increment. See also ia64_expand_prologue. */ offset = gen_rtx_REG (DImode, GR_REG (3)); emit_insn (gen_movdi (offset, GEN_INT (frame_size))); } insn = emit_insn (gen_adddi3 (stack_pointer_rtx, stack_pointer_rtx, offset)); RTX_FRAME_RELATED_P (insn) = 1; } } } /* Return via eh_epilogue, so we already have our new stack pointer. */ else emit_insn (gen_movdi (stack_pointer_rtx, cfun->machine->ia64_eh_epilogue_sp)); if (ia64_arpfs_regno) emit_insn (gen_pfs_restore (gen_rtx_REG (DImode, ia64_arpfs_regno))); if (ia64_rp_regno) emit_move_insn (gen_rtx_REG (DImode, BR_REG (0)), gen_rtx_REG (DImode, ia64_rp_regno)); if (cfun->machine->ia64_eh_epilogue_bsp != NULL_RTX) { /* We have to restore the bsp. */ emit_insn (gen_set_bsp (cfun->machine->ia64_eh_epilogue_bsp)); } emit_jump_insn (gen_return_internal (gen_rtx_REG (DImode, BR_REG (0)))); } /* Emit the function prologue. */ void ia64_function_prologue (file, size) FILE *file; int size ATTRIBUTE_UNUSED; { rtx insn; if (ia64_need_regstk) fprintf (file, "\t.regstk %d, 0, 0, 0\n", ia64_input_regs); if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp)) return; /* Emit the .prologue directive. in order to do this, we need to find where the stack pointer is moved toa GR, if it is, and mark it. */ for (insn = get_insns (); insn != NULL_RTX; insn = NEXT_INSN (insn)) { if (RTX_FRAME_RELATED_P (insn) && GET_CODE (insn) == INSN) { rtx pat = PATTERN (insn); if (GET_CODE (pat) == SET) { rtx dest = SET_DEST (pat); rtx src = SET_SRC (pat); if (GET_CODE (src) == REG && REGNO (src) == STACK_POINTER_REGNUM && GET_CODE (dest) == REG) { int reg = REGNO (dest); if (REGNO (dest) == FRAME_POINTER_REGNUM) reg = ia64_fp_regno; fprintf (file, "\t.prologue 0x2, %d\n", reg); break; } } } } if (insn == NULL_RTX) fprintf (file, "\t.prologue\n"); } /* Emit the .body directive at the scheduled end of the prologue. */ void ia64_output_end_prologue (file) FILE *file; { if (!flag_unwind_tables && (!flag_exceptions || exceptions_via_longjmp)) return; fputs ("\t.body\n", file); } /* Emit the function epilogue. */ void ia64_function_epilogue (file, size) FILE *file ATTRIBUTE_UNUSED; int size ATTRIBUTE_UNUSED; { } /* Return 1 if br.ret can do all the work required to return from a function. */ int ia64_direct_return () { return (reload_completed && ! frame_pointer_needed && ia64_compute_frame_size (get_frame_size ()) == 0); } /* Do any needed setup for a variadic function. CUM has not been updated for the last named argument which has type TYPE and mode MODE. */ void ia64_setup_incoming_varargs (cum, int_mode, type, pretend_size, second_time) CUMULATIVE_ARGS cum; int int_mode ATTRIBUTE_UNUSED; tree type ATTRIBUTE_UNUSED; int * pretend_size; int second_time; { /* If this is a stdarg function, then don't save the current argument. */ int offset = ! current_function_varargs; if (cum.words < MAX_ARGUMENT_SLOTS) { if (! second_time) { int i; int first_reg = GR_ARG_FIRST + cum.words + offset; rtx reg1 = gen_reg_rtx (Pmode); rtx mem1 = gen_rtx_MEM (DImode, reg1); /* We must emit st8.spill insns instead of st8 because we might be saving non-argument registers, and non-argument registers might not contain valid values. */ emit_move_insn (reg1, virtual_incoming_args_rtx); for (i = first_reg; i < GR_ARG_FIRST + 8; i++) { emit_insn (gen_gr_spill (mem1, gen_rtx_REG (DImode, i))); emit_insn (gen_adddi3 (reg1, reg1, GEN_INT (8))); } } *pretend_size = ((MAX_ARGUMENT_SLOTS - cum.words - offset) * UNITS_PER_WORD); } } /* Check whether TYPE is a homogeneous floating point aggregate. If it is, return the mode of the floating point type that appears in all leafs. If it is not, return VOIDmode. An aggregate is a homogeneous floating point aggregate is if all fields/elements in it have the same floating point type (e.g, SFmode). 128-bit quad-precision floats are excluded. */ static enum machine_mode hfa_element_mode (type, nested) tree type; int nested; { enum machine_mode element_mode = VOIDmode; enum machine_mode mode; enum tree_code code = TREE_CODE (type); int know_element_mode = 0; tree t; switch (code) { case VOID_TYPE: case INTEGER_TYPE: case ENUMERAL_TYPE: case BOOLEAN_TYPE: case CHAR_TYPE: case POINTER_TYPE: case OFFSET_TYPE: case REFERENCE_TYPE: case METHOD_TYPE: case FILE_TYPE: case SET_TYPE: case LANG_TYPE: case FUNCTION_TYPE: return VOIDmode; /* Fortran complex types are supposed to be HFAs, so we need to handle gcc's COMPLEX_TYPEs as HFAs. We need to exclude the integral complex types though. */ case COMPLEX_TYPE: if (GET_MODE_CLASS (TYPE_MODE (type)) == MODE_COMPLEX_FLOAT) return mode_for_size (GET_MODE_UNIT_SIZE (TYPE_MODE (type)) * BITS_PER_UNIT, MODE_FLOAT, 0); else return VOIDmode; case REAL_TYPE: /* We want to return VOIDmode for raw REAL_TYPEs, but the actual mode if this is contained within an aggregate. */ if (nested) return TYPE_MODE (type); else return VOIDmode; case ARRAY_TYPE: return TYPE_MODE (TREE_TYPE (type)); case RECORD_TYPE: case UNION_TYPE: case QUAL_UNION_TYPE: for (t = TYPE_FIELDS (type); t; t = TREE_CHAIN (t)) { if (TREE_CODE (t) != FIELD_DECL) continue; mode = hfa_element_mode (TREE_TYPE (t), 1); if (know_element_mode) { if (mode != element_mode) return VOIDmode; } else if (GET_MODE_CLASS (mode) != MODE_FLOAT) return VOIDmode; else { know_element_mode = 1; element_mode = mode; } } return element_mode; default: /* If we reach here, we probably have some front-end specific type that the backend doesn't know about. This can happen via the aggregate_value_p call in init_function_start. All we can do is ignore unknown tree types. */ return VOIDmode; } return VOIDmode; } /* Return rtx for register where argument is passed, or zero if it is passed on the stack. */ /* ??? 128-bit quad-precision floats are always passed in general registers. */ rtx ia64_function_arg (cum, mode, type, named, incoming) CUMULATIVE_ARGS *cum; enum machine_mode mode; tree type; int named; int incoming; { int basereg = (incoming ? GR_ARG_FIRST : AR_ARG_FIRST); int words = (((mode == BLKmode ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); int offset = 0; enum machine_mode hfa_mode = VOIDmode; /* Integer and float arguments larger than 8 bytes start at the next even boundary. Aggregates larger than 8 bytes start at the next even boundary if the aggregate has 16 byte alignment. Net effect is that types with alignment greater than 8 start at the next even boundary. */ /* ??? The ABI does not specify how to handle aggregates with alignment from 9 to 15 bytes, or greater than 16. We handle them all as if they had 16 byte alignment. Such aggregates can occur only if gcc extensions are used. */ if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) : (words > 1)) && (cum->words & 1)) offset = 1; /* If all argument slots are used, then it must go on the stack. */ if (cum->words + offset >= MAX_ARGUMENT_SLOTS) return 0; /* Check for and handle homogeneous FP aggregates. */ if (type) hfa_mode = hfa_element_mode (type, 0); /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas and unprototyped hfas are passed specially. */ if (hfa_mode != VOIDmode && (! cum->prototype || named)) { rtx loc[16]; int i = 0; int fp_regs = cum->fp_regs; int int_regs = cum->words + offset; int hfa_size = GET_MODE_SIZE (hfa_mode); int byte_size; int args_byte_size; /* If prototyped, pass it in FR regs then GR regs. If not prototyped, pass it in both FR and GR regs. If this is an SFmode aggregate, then it is possible to run out of FR regs while GR regs are still left. In that case, we pass the remaining part in the GR regs. */ /* Fill the FP regs. We do this always. We stop if we reach the end of the argument, the last FP register, or the last argument slot. */ byte_size = ((mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); args_byte_size = int_regs * UNITS_PER_WORD; offset = 0; for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD)); i++) { loc[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (hfa_mode, (FR_ARG_FIRST + fp_regs)), GEN_INT (offset)); /* ??? Padding for XFmode type? */ offset += hfa_size; args_byte_size += hfa_size; fp_regs++; } /* If no prototype, then the whole thing must go in GR regs. */ if (! cum->prototype) offset = 0; /* If this is an SFmode aggregate, then we might have some left over that needs to go in GR regs. */ else if (byte_size != offset) int_regs += offset / UNITS_PER_WORD; /* Fill in the GR regs. We must use DImode here, not the hfa mode. */ for (; offset < byte_size && int_regs < MAX_ARGUMENT_SLOTS; i++) { enum machine_mode gr_mode = DImode; /* If we have an odd 4 byte hunk because we ran out of FR regs, then this goes in a GR reg left adjusted/little endian, right adjusted/big endian. */ /* ??? Currently this is handled wrong, because 4-byte hunks are always right adjusted/little endian. */ if (offset & 0x4) gr_mode = SImode; /* If we have an even 4 byte hunk because the aggregate is a multiple of 4 bytes in size, then this goes in a GR reg right adjusted/little endian. */ else if (byte_size - offset == 4) gr_mode = SImode; loc[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (gr_mode, (basereg + int_regs)), GEN_INT (offset)); offset += GET_MODE_SIZE (gr_mode); int_regs++; } /* If we ended up using just one location, just return that one loc. */ if (i == 1) return XEXP (loc[0], 0); else return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); } /* Integral and aggregates go in general registers. If we have run out of FR registers, then FP values must also go in general registers. This can happen when we have a SFmode HFA. */ else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) return gen_rtx_REG (mode, basereg + cum->words + offset); /* If there is a prototype, then FP values go in a FR register when named, and in a GR registeer when unnamed. */ else if (cum->prototype) { if (! named) return gen_rtx_REG (mode, basereg + cum->words + offset); else return gen_rtx_REG (mode, FR_ARG_FIRST + cum->fp_regs); } /* If there is no prototype, then FP values go in both FR and GR registers. */ else { rtx fp_reg = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, (FR_ARG_FIRST + cum->fp_regs)), const0_rtx); rtx gr_reg = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (mode, (basereg + cum->words + offset)), const0_rtx); return gen_rtx_PARALLEL (mode, gen_rtvec (2, fp_reg, gr_reg)); } } /* Return number of words, at the beginning of the argument, that must be put in registers. 0 is the argument is entirely in registers or entirely in memory. */ int ia64_function_arg_partial_nregs (cum, mode, type, named) CUMULATIVE_ARGS *cum; enum machine_mode mode; tree type; int named ATTRIBUTE_UNUSED; { int words = (((mode == BLKmode ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); int offset = 0; /* Arguments with alignment larger than 8 bytes start at the next even boundary. */ if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) : (words > 1)) && (cum->words & 1)) offset = 1; /* If all argument slots are used, then it must go on the stack. */ if (cum->words + offset >= MAX_ARGUMENT_SLOTS) return 0; /* It doesn't matter whether the argument goes in FR or GR regs. If it fits within the 8 argument slots, then it goes entirely in registers. If it extends past the last argument slot, then the rest goes on the stack. */ if (words + cum->words + offset <= MAX_ARGUMENT_SLOTS) return 0; return MAX_ARGUMENT_SLOTS - cum->words - offset; } /* Update CUM to point after this argument. This is patterned after ia64_function_arg. */ void ia64_function_arg_advance (cum, mode, type, named) CUMULATIVE_ARGS *cum; enum machine_mode mode; tree type; int named; { int words = (((mode == BLKmode ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)) + UNITS_PER_WORD - 1) / UNITS_PER_WORD); int offset = 0; enum machine_mode hfa_mode = VOIDmode; /* If all arg slots are already full, then there is nothing to do. */ if (cum->words >= MAX_ARGUMENT_SLOTS) return; /* Arguments with alignment larger than 8 bytes start at the next even boundary. */ if ((type ? (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) : (words > 1)) && (cum->words & 1)) offset = 1; cum->words += words + offset; /* Check for and handle homogeneous FP aggregates. */ if (type) hfa_mode = hfa_element_mode (type, 0); /* Unnamed prototyped hfas are passed as usual. Named prototyped hfas and unprototyped hfas are passed specially. */ if (hfa_mode != VOIDmode && (! cum->prototype || named)) { int fp_regs = cum->fp_regs; /* This is the original value of cum->words + offset. */ int int_regs = cum->words - words; int hfa_size = GET_MODE_SIZE (hfa_mode); int byte_size; int args_byte_size; /* If prototyped, pass it in FR regs then GR regs. If not prototyped, pass it in both FR and GR regs. If this is an SFmode aggregate, then it is possible to run out of FR regs while GR regs are still left. In that case, we pass the remaining part in the GR regs. */ /* Fill the FP regs. We do this always. We stop if we reach the end of the argument, the last FP register, or the last argument slot. */ byte_size = ((mode == BLKmode) ? int_size_in_bytes (type) : GET_MODE_SIZE (mode)); args_byte_size = int_regs * UNITS_PER_WORD; offset = 0; for (; (offset < byte_size && fp_regs < MAX_ARGUMENT_SLOTS && args_byte_size < (MAX_ARGUMENT_SLOTS * UNITS_PER_WORD));) { /* ??? Padding for XFmode type? */ offset += hfa_size; args_byte_size += hfa_size; fp_regs++; } cum->fp_regs = fp_regs; } /* Integral and aggregates go in general registers. If we have run out of FR registers, then FP values must also go in general registers. This can happen when we have a SFmode HFA. */ else if (! FLOAT_MODE_P (mode) || cum->fp_regs == MAX_ARGUMENT_SLOTS) return; /* If there is a prototype, then FP values go in a FR register when named, and in a GR registeer when unnamed. */ else if (cum->prototype) { if (! named) return; else /* ??? Complex types should not reach here. */ cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); } /* If there is no prototype, then FP values go in both FR and GR registers. */ else /* ??? Complex types should not reach here. */ cum->fp_regs += (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT ? 2 : 1); return; } /* Implement va_start. */ void ia64_va_start (stdarg_p, valist, nextarg) int stdarg_p; tree valist; rtx nextarg; { int arg_words; int ofs; arg_words = current_function_args_info.words; if (stdarg_p) ofs = 0; else ofs = (arg_words >= MAX_ARGUMENT_SLOTS ? -UNITS_PER_WORD : 0); nextarg = plus_constant (nextarg, ofs); std_expand_builtin_va_start (1, valist, nextarg); } /* Implement va_arg. */ rtx ia64_va_arg (valist, type) tree valist, type; { HOST_WIDE_INT size; tree t; /* Arguments with alignment larger than 8 bytes start at the next even boundary. */ if (TYPE_ALIGN (type) > 8 * BITS_PER_UNIT) { t = build (PLUS_EXPR, TREE_TYPE (valist), valist, build_int_2 (2 * UNITS_PER_WORD - 1, 0)); t = build (BIT_AND_EXPR, TREE_TYPE (t), t, build_int_2 (-2 * UNITS_PER_WORD, -1)); t = build (MODIFY_EXPR, TREE_TYPE (valist), valist, t); TREE_SIDE_EFFECTS (t) = 1; expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL); } return std_expand_builtin_va_arg (valist, type); } /* Return 1 if function return value returned in memory. Return 0 if it is in a register. */ int ia64_return_in_memory (valtype) tree valtype; { enum machine_mode mode; enum machine_mode hfa_mode; int byte_size; mode = TYPE_MODE (valtype); byte_size = ((mode == BLKmode) ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); /* Hfa's with up to 8 elements are returned in the FP argument registers. */ hfa_mode = hfa_element_mode (valtype, 0); if (hfa_mode != VOIDmode) { int hfa_size = GET_MODE_SIZE (hfa_mode); /* ??? Padding for XFmode type? */ if (byte_size / hfa_size > MAX_ARGUMENT_SLOTS) return 1; else return 0; } else if (byte_size > UNITS_PER_WORD * MAX_INT_RETURN_SLOTS) return 1; else return 0; } /* Return rtx for register that holds the function return value. */ rtx ia64_function_value (valtype, func) tree valtype; tree func ATTRIBUTE_UNUSED; { enum machine_mode mode; enum machine_mode hfa_mode; mode = TYPE_MODE (valtype); hfa_mode = hfa_element_mode (valtype, 0); if (hfa_mode != VOIDmode) { rtx loc[8]; int i; int hfa_size; int byte_size; int offset; hfa_size = GET_MODE_SIZE (hfa_mode); byte_size = ((mode == BLKmode) ? int_size_in_bytes (valtype) : GET_MODE_SIZE (mode)); offset = 0; for (i = 0; offset < byte_size; i++) { loc[i] = gen_rtx_EXPR_LIST (VOIDmode, gen_rtx_REG (hfa_mode, FR_ARG_FIRST + i), GEN_INT (offset)); /* ??? Padding for XFmode type? */ offset += hfa_size; } if (i == 1) return XEXP (loc[0], 0); else return gen_rtx_PARALLEL (mode, gen_rtvec_v (i, loc)); } else if (FLOAT_TYPE_P (valtype)) return gen_rtx_REG (mode, FR_ARG_FIRST); else return gen_rtx_REG (mode, GR_RET_FIRST); } /* Print a memory address as an operand to reference that memory location. */ /* ??? Do we need this? It gets used only for 'a' operands. We could perhaps also call this from ia64_print_operand for memory addresses. */ void ia64_print_operand_address (stream, address) FILE * stream ATTRIBUTE_UNUSED; rtx address ATTRIBUTE_UNUSED; { } /* Print an operand to a assembler instruction. B Work arounds for hardware bugs. C Swap and print a comparison operator. D Print an FP comparison operator. E Print 32 - constant, for SImode shifts as extract. F A floating point constant 0.0 emitted as f0, or 1.0 emitted as f1, or a floating point register emitted normally. I Invert a predicate register by adding 1. J Select the proper predicate register for a condition. j Select the inverse predicate register for a condition. O Append .acq for volatile load. P Postincrement of a MEM. Q Append .rel for volatile store. S Shift amount for shladd instruction. T Print an 8-bit sign extended number (K) as a 32-bit unsigned number for Intel assembler. U Print an 8-bit sign extended number (K) as a 64-bit unsigned number for Intel assembler. r Print register name, or constant 0 as r0. HP compatibility for Linux kernel. */ void ia64_print_operand (file, x, code) FILE * file; rtx x; int code; { const char *str; switch (code) { case 0: /* Handled below. */ break; case 'B': if (TARGET_A_STEP) fputs (" ;; nop 0 ;; nop 0 ;;", file); return; case 'C': { enum rtx_code c = swap_condition (GET_CODE (x)); fputs (GET_RTX_NAME (c), file); return; } case 'D': switch (GET_CODE (x)) { case NE: str = "neq"; break; case UNORDERED: str = "unord"; break; case ORDERED: str = "ord"; break; default: str = GET_RTX_NAME (GET_CODE (x)); break; } fputs (str, file); return; case 'E': fprintf (file, HOST_WIDE_INT_PRINT_DEC, 32 - INTVAL (x)); return; case 'F': if (x == CONST0_RTX (GET_MODE (x))) str = reg_names [FR_REG (0)]; else if (x == CONST1_RTX (GET_MODE (x))) str = reg_names [FR_REG (1)]; else if (GET_CODE (x) == REG) str = reg_names [REGNO (x)]; else abort (); fputs (str, file); return; case 'I': fputs (reg_names [REGNO (x) + 1], file); return; case 'J': case 'j': { unsigned int regno = REGNO (XEXP (x, 0)); if (GET_CODE (x) == EQ) regno += 1; if (code == 'j') regno ^= 1; fputs (reg_names [regno], file); } return; case 'O': if (MEM_VOLATILE_P (x)) fputs(".acq", file); return; case 'P': { HOST_WIDE_INT value; switch (GET_CODE (XEXP (x, 0))) { default: return; case POST_MODIFY: x = XEXP (XEXP (XEXP (x, 0), 1), 1); if (GET_CODE (x) == CONST_INT) value = INTVAL (x); else if (GET_CODE (x) == REG) { fprintf (file, ", %s", reg_names[REGNO (x)]); return; } else abort (); break; case POST_INC: value = GET_MODE_SIZE (GET_MODE (x)); /* ??? This is for ldf.fill and stf.spill which use XFmode, but which actually need 16 bytes increments. Perhaps we can change them to use TFmode instead. Or don't use POST_DEC/POST_INC for them. */ if (value == 12) value = 16; break; case POST_DEC: value = - (HOST_WIDE_INT) GET_MODE_SIZE (GET_MODE (x)); if (value == -12) value = -16; break; } putc (',', file); putc (' ', file); fprintf (file, HOST_WIDE_INT_PRINT_DEC, value); return; } case 'Q': if (MEM_VOLATILE_P (x)) fputs(".rel", file); return; case 'S': fprintf (file, "%d", exact_log2 (INTVAL (x))); return; case 'T': if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) { fprintf (file, "0x%x", (int) INTVAL (x) & 0xffffffff); return; } break; case 'U': if (! TARGET_GNU_AS && GET_CODE (x) == CONST_INT) { const char *prefix = "0x"; if (INTVAL (x) & 0x80000000) { fprintf (file, "0xffffffff"); prefix = ""; } fprintf (file, "%s%x", prefix, (int) INTVAL (x) & 0xffffffff); return; } break; case 'r': /* If this operand is the constant zero, write it as zero. */ if (GET_CODE (x) == REG) fputs (reg_names[REGNO (x)], file); else if (x == CONST0_RTX (GET_MODE (x))) fputs ("r0", file); else output_operand_lossage ("invalid %%r value"); return; case '+': { const char *which; /* For conditional branches, returns or calls, substitute sptk, dptk, dpnt, or spnt for %s. */ x = find_reg_note (current_output_insn, REG_BR_PROB, 0); if (x) { int pred_val = INTVAL (XEXP (x, 0)); /* Guess top and bottom 10% statically predicted. */ if (pred_val < REG_BR_PROB_BASE / 10) which = ".spnt"; else if (pred_val < REG_BR_PROB_BASE / 2) which = ".dpnt"; else if (pred_val < REG_BR_PROB_BASE * 9 / 10) which = ".dptk"; else which = ".sptk"; } else if (GET_CODE (current_output_insn) == CALL_INSN) which = ".sptk"; else which = ".dptk"; fputs (which, file); return; } case ',': x = current_insn_predicate; if (x) { unsigned int regno = REGNO (XEXP (x, 0)); if (GET_CODE (x) == EQ) regno += 1; fprintf (file, "(%s) ", reg_names [regno]); } return; default: output_operand_lossage ("ia64_print_operand: unknown code"); return; } switch (GET_CODE (x)) { /* This happens for the spill/restore instructions. */ case POST_INC: case POST_DEC: case POST_MODIFY: x = XEXP (x, 0); /* ... fall through ... */ case REG: fputs (reg_names [REGNO (x)], file); break; case MEM: { rtx addr = XEXP (x, 0); if (GET_RTX_CLASS (GET_CODE (addr)) == 'a') addr = XEXP (addr, 0); fprintf (file, "[%s]", reg_names [REGNO (addr)]); break; } default: output_addr_const (file, x); break; } return; } /* Calulate the cost of moving data from a register in class FROM to one in class TO. */ int ia64_register_move_cost (from, to) enum reg_class from, to; { int from_hard, to_hard; int from_gr, to_gr; from_hard = (from == BR_REGS || from == AR_M_REGS || from == AR_I_REGS); to_hard = (to == BR_REGS || to == AR_M_REGS || to == AR_I_REGS); from_gr = (from == GENERAL_REGS); to_gr = (to == GENERAL_REGS); if (from_hard && to_hard) return 8; else if ((from_hard && !to_gr) || (!from_gr && to_hard)) return 6; return 2; } /* This function returns the register class required for a secondary register when copying between one of the registers in CLASS, and X, using MODE. A return value of NO_REGS means that no secondary register is required. */ enum reg_class ia64_secondary_reload_class (class, mode, x) enum reg_class class; enum machine_mode mode ATTRIBUTE_UNUSED; rtx x; { int regno = -1; if (GET_CODE (x) == REG || GET_CODE (x) == SUBREG) regno = true_regnum (x); /* ??? This is required because of a bad gcse/cse/global interaction. We end up with two pseudos with overlapping lifetimes both of which are equiv to the same constant, and both which need to be in BR_REGS. This results in a BR_REGS to BR_REGS copy which doesn't exist. To reproduce, return NO_REGS here, and compile divdi3 in libgcc2.c. This seems to be a cse bug. cse_basic_block_end changes depending on the path length, which means the qty_first_reg check in make_regs_eqv can give different answers at different times. */ /* ??? At some point I'll probably need a reload_indi pattern to handle this. */ if (class == BR_REGS && BR_REGNO_P (regno)) return GR_REGS; /* This is needed if a pseudo used as a call_operand gets spilled to a stack slot. */ if (class == BR_REGS && GET_CODE (x) == MEM) return GR_REGS; /* This can happen when a paradoxical subreg is an operand to the muldi3 pattern. */ /* ??? This shouldn't be necessary after instruction scheduling is enabled, because paradoxical subregs are not accepted by register_operand when INSN_SCHEDULING is defined. Or alternatively, stop the paradoxical subreg stupidity in the *_operand functions in recog.c. */ if (class == FR_REGS && GET_CODE (x) == MEM && (GET_MODE (x) == SImode || GET_MODE (x) == HImode || GET_MODE (x) == QImode)) return GR_REGS; /* This can happen because of the ior/and/etc patterns that accept FP registers as operands. If the third operand is a constant, then it needs to be reloaded into a FP register. */ if (class == FR_REGS && GET_CODE (x) == CONST_INT) return GR_REGS; /* ??? This happens if we cse/gcse a CCmode value across a call, and the function has a nonlocal goto. This is because global does not allocate call crossing pseudos to hard registers when current_function_has_ nonlocal_goto is true. This is relatively common for C++ programs that use exceptions. To reproduce, return NO_REGS and compile libstdc++. */ if (class == PR_REGS && GET_CODE (x) == MEM) return GR_REGS; return NO_REGS; } /* Emit text to declare externally defined variables and functions, because the Intel assembler does not support undefined externals. */ void ia64_asm_output_external (file, decl, name) FILE *file; tree decl; const char *name; { int save_referenced; /* GNU as does not need anything here. */ if (TARGET_GNU_AS) return; /* ??? The Intel assembler creates a reference that needs to be satisfied by the linker when we do this, so we need to be careful not to do this for builtin functions which have no library equivalent. Unfortunately, we can't tell here whether or not a function will actually be called by expand_expr, so we pull in library functions even if we may not need them later. */ if (! strcmp (name, "__builtin_next_arg") || ! strcmp (name, "alloca") || ! strcmp (name, "__builtin_constant_p") || ! strcmp (name, "__builtin_args_info")) return; /* assemble_name will set TREE_SYMBOL_REFERENCED, so we must save and restore it. */ save_referenced = TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)); if (TREE_CODE (decl) == FUNCTION_DECL) { fprintf (file, "\t%s\t ", TYPE_ASM_OP); assemble_name (file, name); putc (',', file); fprintf (file, TYPE_OPERAND_FMT, "function"); putc ('\n', file); } ASM_GLOBALIZE_LABEL (file, name); TREE_SYMBOL_REFERENCED (DECL_ASSEMBLER_NAME (decl)) = save_referenced; } /* Parse the -mfixed-range= option string. */ static void fix_range (const_str) const char *const_str; { int i, first, last; char *str, *dash, *comma; /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and REG2 are either register names or register numbers. The effect of this option is to mark the registers in the range from REG1 to REG2 as ``fixed'' so they won't be used by the compiler. This is used, e.g., to ensure that kernel mode code doesn't use f32-f127. */ i = strlen (const_str); str = (char *) alloca (i + 1); memcpy (str, const_str, i + 1); while (1) { dash = strchr (str, '-'); if (!dash) { warning ("value of -mfixed-range must have form REG1-REG2"); return; } *dash = '\0'; comma = strchr (dash + 1, ','); if (comma) *comma = '\0'; first = decode_reg_name (str); if (first < 0) { warning ("unknown register name: %s", str); return; } last = decode_reg_name (dash + 1); if (last < 0) { warning ("unknown register name: %s", dash + 1); return; } *dash = '-'; if (first > last) { warning ("%s-%s is an empty range", str, dash + 1); return; } for (i = first; i <= last; ++i) fixed_regs[i] = call_used_regs[i] = 1; if (!comma) break; *comma = ','; str = comma + 1; } } /* Called to register all of our global variables with the garbage collector. */ static void ia64_add_gc_roots () { ggc_add_rtx_root (&ia64_compare_op0, 1); ggc_add_rtx_root (&ia64_compare_op1, 1); } static void ia64_init_machine_status (p) struct function *p; { p->machine = (struct machine_function *) xcalloc (1, sizeof (struct machine_function)); /* Reset from the previous function's potential modifications. */ XINT (return_address_pointer_rtx, 0) = RETURN_ADDRESS_POINTER_REGNUM; } static void ia64_mark_machine_status (p) struct function *p; { ggc_mark_rtx (p->machine->ia64_eh_epilogue_sp); ggc_mark_rtx (p->machine->ia64_eh_epilogue_bsp); } /* Handle TARGET_OPTIONS switches. */ void ia64_override_options () { if (TARGET_AUTO_PIC) target_flags |= MASK_CONST_GP; if (ia64_fixed_range_string) fix_range (ia64_fixed_range_string); ia64_section_threshold = g_switch_set ? g_switch_value : IA64_DEFAULT_GVALUE; init_machine_status = ia64_init_machine_status; mark_machine_status = ia64_mark_machine_status; ia64_add_gc_roots (); } /* The following collection of routines emit instruction group stop bits as necessary to avoid dependencies. */ /* Need to track some additional registers as far as serialization is concerned so we can properly handle br.call and br.ret. We could make these registers visible to gcc, but since these registers are never explicitly used in gcc generated code, it seems wasteful to do so (plus it would make the call and return patterns needlessly complex). */ #define REG_GP (GR_REG (1)) #define REG_RP (BR_REG (0)) #define REG_AR_CFM (FIRST_PSEUDO_REGISTER + 1) /* This is used for volatile asms which may require a stop bit immediately before and after them. */ #define REG_VOLATILE (FIRST_PSEUDO_REGISTER + 2) #define NUM_REGS (FIRST_PSEUDO_REGISTER + 3) /* For each register, we keep track of how many times it has been written in the current instruction group. If a register is written unconditionally (no qualifying predicate), WRITE_COUNT is set to 2 and FIRST_PRED is ignored. If a register is written if its qualifying predicate P is true, we set WRITE_COUNT to 1 and FIRST_PRED to P. Later on, the same register may be written again by the complement of P (P+1 if P is even, P-1, otherwise) and when this happens, WRITE_COUNT gets set to 2. The result of this is that whenever an insn attempts to write a register whose WRITE_COUNT is two, we need to issue a insn group barrier first. */ struct reg_write_state { char write_count; char written_by_fp; /* Was register written by a floating-point insn? */ short first_pred; /* 0 means ``no predicate'' */ }; /* Cumulative info for the current instruction group. */ struct reg_write_state rws_sum[NUM_REGS]; /* Info for the current instruction. This gets copied to rws_sum after a stop bit is emitted. */ struct reg_write_state rws_insn[NUM_REGS]; /* Misc flags needed to compute RAW/WAW dependencies while we are traversing RTL for one instruction. */ struct reg_flags { unsigned int is_write : 1; /* Is register being written? */ unsigned int is_fp : 1; /* Is register used as part of an fp op? */ unsigned int is_branch : 1; /* Is register used as part of a branch? */ }; static void rws_update PARAMS ((struct reg_write_state *, int, struct reg_flags, int)); static int rws_access_reg PARAMS ((int, struct reg_flags, int)); static int rtx_needs_barrier PARAMS ((rtx, struct reg_flags, int)); /* Update *RWS for REGNO, which is being written by the current instruction, with predicate PRED, and associated register flags in FLAGS. */ static void rws_update (rws, regno, flags, pred) struct reg_write_state *rws; int regno; struct reg_flags flags; int pred; { rws[regno].write_count += pred ? 1 : 2; rws[regno].written_by_fp |= flags.is_fp; rws[regno].first_pred = pred; } /* Handle an access to register REGNO of type FLAGS using predicate register PRED. Update rws_insn and rws_sum arrays. Return 1 if this access creates a dependency with an earlier instruction in the same group. */ static int rws_access_reg (regno, flags, pred) int regno; struct reg_flags flags; int pred; { int need_barrier = 0; int is_predicate_reg; if (regno >= NUM_REGS) abort (); if (flags.is_write) { int write_count; /* One insn writes same reg multiple times? */ if (rws_insn[regno].write_count > 0) abort (); /* Update info for current instruction. */ rws_update (rws_insn, regno, flags, pred); /* ??? This is necessary because predicate regs require two hard registers. However, this should be using HARD_REGNO_NREGS so that it works for all multi-reg hard registers, instead of only for predicate registers. */ is_predicate_reg = REGNO_REG_CLASS (regno) == PR_REGS; if (is_predicate_reg) rws_update (rws_insn, regno + 1, flags, pred); /* ??? Likewise. */ write_count = rws_sum[regno].write_count; if (is_predicate_reg) write_count = MAX (write_count, rws_sum[regno + 1].write_count); switch (write_count) { case 0: /* The register has not been written yet. */ rws_update (rws_sum, regno, flags, pred); if (is_predicate_reg) rws_update (rws_sum, regno + 1, flags, pred); break; case 1: /* The register has been written via a predicate. If this is not a complementary predicate, then we need a barrier. */ /* ??? This assumes that P and P+1 are always complementary predicates for P even. */ if ((rws_sum[regno].first_pred ^ 1) != pred) need_barrier = 1; rws_update (rws_sum, regno, flags, pred); if (is_predicate_reg) rws_update (rws_sum, regno + 1, flags, pred); break; case 2: /* The register has been unconditionally written already. We need a barrier. */ need_barrier = 1; break; default: abort (); } } else { if (flags.is_branch) { /* Branches have several RAW exceptions that allow to avoid barriers. */ if (REGNO_REG_CLASS (regno) == BR_REGS || regno == AR_PFS_REGNUM) /* RAW dependencies on branch regs are permissible as long as the writer is a non-branch instruction. Since we never generate code that uses a branch register written by a branch instruction, handling this case is easy. */ return 0; if (REGNO_REG_CLASS (regno) == PR_REGS && ! rws_sum[regno].written_by_fp) /* The predicates of a branch are available within the same insn group as long as the predicate was written by something other than a floating-point instruction. */ return 0; } switch (rws_sum[regno].write_count) { case 0: /* The register has not been written yet. */ break; case 1: /* The register has been written via a predicate. If this is not a complementary predicate, then we need a barrier. */ /* ??? This assumes that P and P+1 are always complementary predicates for P even. */ if ((rws_sum[regno].first_pred ^ 1) != pred) need_barrier = 1; break; case 2: /* The register has been unconditionally written already. We need a barrier. */ need_barrier = 1; break; default: abort (); } } return need_barrier; } /* Handle an access to rtx X of type FLAGS using predicate register PRED. Return 1 is this access creates a dependency with an earlier instruction in the same group. */ static int rtx_needs_barrier (x, flags, pred) rtx x; struct reg_flags flags; int pred; { int i, j; int is_complemented = 0; int need_barrier = 0; const char *format_ptr; struct reg_flags new_flags; rtx src, dst; rtx cond = 0; if (! x) return 0; new_flags = flags; switch (GET_CODE (x)) { case SET: src = SET_SRC (x); switch (GET_CODE (src)) { case CALL: /* We don't need to worry about the result registers that get written by subroutine call. */ need_barrier = rtx_needs_barrier (src, flags, pred); return need_barrier; case IF_THEN_ELSE: if (SET_DEST (x) == pc_rtx) { /* X is a conditional branch. */ /* ??? This seems redundant, as the caller sets this bit for all JUMP_INSNs. */ new_flags.is_branch = 1; need_barrier = rtx_needs_barrier (src, new_flags, pred); return need_barrier; } else { /* X is a conditional move. */ cond = XEXP (src, 0); if (GET_CODE (cond) == EQ) is_complemented = 1; cond = XEXP (cond, 0); if (GET_CODE (cond) != REG && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) abort (); if (XEXP (src, 1) == SET_DEST (x) || XEXP (src, 2) == SET_DEST (x)) { /* X is a conditional move that conditionally writes the destination. */ /* We need another complement in this case. */ if (XEXP (src, 1) == SET_DEST (x)) is_complemented = ! is_complemented; pred = REGNO (cond); if (is_complemented) ++pred; } /* ??? If this is a conditional write to the dest, then this instruction does not actually read one source. This probably doesn't matter, because that source is also the dest. */ /* ??? Multiple writes to predicate registers are allowed if they are all AND type compares, or if they are all OR type compares. We do not generate such instructions currently. */ } /* ... fall through ... */ default: if (GET_RTX_CLASS (GET_CODE (src)) == '<' && GET_MODE_CLASS (GET_MODE (XEXP (src, 0))) == MODE_FLOAT) /* Set new_flags.is_fp to 1 so that we know we're dealing with a floating point comparison when processing the destination of the SET. */ new_flags.is_fp = 1; break; } need_barrier = rtx_needs_barrier (src, flags, pred); /* This instruction unconditionally uses a predicate register. */ if (cond) need_barrier |= rws_access_reg (REGNO (cond), flags, 0); dst = SET_DEST (x); if (GET_CODE (dst) == ZERO_EXTRACT) { need_barrier |= rtx_needs_barrier (XEXP (dst, 1), flags, pred); need_barrier |= rtx_needs_barrier (XEXP (dst, 2), flags, pred); dst = XEXP (dst, 0); } new_flags.is_write = 1; need_barrier |= rtx_needs_barrier (dst, new_flags, pred); break; case CALL: new_flags.is_write = 0; /* ??? Why is this here? It seems unnecessary. */ need_barrier |= rws_access_reg (REG_GP, new_flags, pred); need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred); /* Avoid multiple register writes, in case this is a pattern with multiple CALL rtx. This avoids an abort in rws_access_reg. */ /* ??? This assumes that no rtx other than CALL/RETURN sets REG_AR_CFM, and that we don't have predicated calls/returns. */ if (! rws_insn[REG_AR_CFM].write_count) { new_flags.is_write = 1; need_barrier |= rws_access_reg (REG_RP, new_flags, pred); need_barrier |= rws_access_reg (AR_PFS_REGNUM, new_flags, pred); need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); } break; case COND_EXEC: /* X is a predicated instruction. */ cond = COND_EXEC_TEST (x); if (pred) abort (); need_barrier = rtx_needs_barrier (cond, flags, 0); if (GET_CODE (cond) == EQ) is_complemented = 1; cond = XEXP (cond, 0); if (GET_CODE (cond) != REG && REGNO_REG_CLASS (REGNO (cond)) != PR_REGS) abort (); pred = REGNO (cond); if (is_complemented) ++pred; need_barrier |= rtx_needs_barrier (COND_EXEC_CODE (x), flags, pred); return need_barrier; case CLOBBER: #if 0 case USE: /* We must handle USE here in case it occurs within a PARALLEL. For instance, the mov ar.pfs= instruction has a USE which requires a barrier between it and an immediately preceeding alloc. */ #endif /* Clobber & use are for earlier compiler-phases only. */ break; case ASM_OPERANDS: case ASM_INPUT: /* We always emit stop bits for traditional asms. We emit stop bits for volatile extended asms if TARGET_VOL_ASM_STOP is true. */ if (GET_CODE (x) != ASM_OPERANDS || (MEM_VOLATILE_P (x) && TARGET_VOL_ASM_STOP)) { /* Avoid writing the register multiple times if we have multiple asm outputs. This avoids an abort in rws_access_reg. */ if (! rws_insn[REG_VOLATILE].write_count) { new_flags.is_write = 1; rws_access_reg (REG_VOLATILE, new_flags, pred); } return 1; } /* For all ASM_OPERANDS, we must traverse the vector of input operands. We can not just fall through here since then we would be confused by the ASM_INPUT rtx inside ASM_OPERANDS, which do not indicate traditional asms unlike their normal usage. */ for (i = ASM_OPERANDS_INPUT_LENGTH (x) - 1; i >= 0; --i) if (rtx_needs_barrier (ASM_OPERANDS_INPUT (x, i), flags, pred)) need_barrier = 1; break; case PARALLEL: for (i = XVECLEN (x, 0) - 1; i >= 0; --i) if (rtx_needs_barrier (XVECEXP (x, 0, i), flags, pred)) need_barrier = 1; break; case SUBREG: x = SUBREG_REG (x); /* FALLTHRU */ case REG: need_barrier = rws_access_reg (REGNO (x), flags, pred); break; case MEM: /* Find the regs used in memory address computation. */ new_flags.is_write = 0; need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); break; case CONST_INT: case CONST_DOUBLE: case SYMBOL_REF: case LABEL_REF: case CONST: break; /* Operators with side-effects. */ case POST_INC: case POST_DEC: if (GET_CODE (XEXP (x, 0)) != REG) abort (); new_flags.is_write = 0; need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); new_flags.is_write = 1; need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); break; case POST_MODIFY: if (GET_CODE (XEXP (x, 0)) != REG) abort (); new_flags.is_write = 0; need_barrier = rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); new_flags.is_write = 1; need_barrier |= rws_access_reg (REGNO (XEXP (x, 0)), new_flags, pred); break; /* Handle common unary and binary ops for efficiency. */ case COMPARE: case PLUS: case MINUS: case MULT: case DIV: case MOD: case UDIV: case UMOD: case AND: case IOR: case XOR: case ASHIFT: case ROTATE: case ASHIFTRT: case LSHIFTRT: case ROTATERT: case SMIN: case SMAX: case UMIN: case UMAX: case NE: case EQ: case GE: case GT: case LE: case LT: case GEU: case GTU: case LEU: case LTU: need_barrier = rtx_needs_barrier (XEXP (x, 0), new_flags, pred); need_barrier |= rtx_needs_barrier (XEXP (x, 1), new_flags, pred); break; case NEG: case NOT: case SIGN_EXTEND: case ZERO_EXTEND: case TRUNCATE: case FLOAT_EXTEND: case FLOAT_TRUNCATE: case FLOAT: case FIX: case UNSIGNED_FLOAT: case UNSIGNED_FIX: case ABS: case SQRT: case FFS: need_barrier = rtx_needs_barrier (XEXP (x, 0), flags, pred); break; case UNSPEC: switch (XINT (x, 1)) { /* ??? For the st8.spill/ld8.fill instructions, we can ignore unat dependencies as long as we don't have both a spill and fill in the same instruction group. We need to check for that. */ case 1: /* st8.spill */ case 2: /* ld8.fill */ case 3: /* stf.spill */ case 4: /* ldf.spill */ case 8: /* popcnt */ need_barrier = rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); break; case 5: /* mov =pr */ /* This reads all predicate registers. */ for (i = PR_REG (1); i < PR_REG (64); i++) need_barrier |= rws_access_reg (i, flags, pred); break; case 6: case 7: abort (); /* ??? Should track unat reads and writes. */ case 9: /* mov =ar.unat */ case 10: /* mov ar.unat= */ break; case 11: /* mov ar.ccv= */ break; case 12: /* mf */ break; case 13: /* cmpxchg_acq */ break; case 14: /* val_compare_and_swap */ break; case 15: /* lock_release */ break; case 16: /* lock_test_and_set */ break; case 17: /* _and_fetch */ break; case 18: /* fetch_and_ */ break; case 19: /* fetchadd_acq */ break; case 20: /* mov = ar.bsp */ break; case 21: /* flushrs */ break; default: abort (); } break; case UNSPEC_VOLATILE: switch (XINT (x, 1)) { case 0: /* alloc */ /* Alloc must always be the first instruction. Currently, we only emit it at the function start, so we don't need to worry about emitting a stop bit before it. */ need_barrier = rws_access_reg (AR_PFS_REGNUM, flags, pred); new_flags.is_write = 1; need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); return need_barrier; case 1: /* blockage */ case 2: /* insn group barrier */ return 0; case 3: /* flush_cache */ return rtx_needs_barrier (XVECEXP (x, 0, 0), flags, pred); case 4: /* mov ar.pfs= */ new_flags.is_write = 1; need_barrier = rws_access_reg (AR_PFS_REGNUM, new_flags, pred); break; case 5: /* set_bsp */ need_barrier = 1; break; case 6: /* mov pr= */ /* This writes all predicate registers. */ new_flags.is_write = 1; /* We need to skip by two, because rws_access_reg always writes to two predicate registers at a time. */ /* ??? Strictly speaking, we shouldn't be counting writes to pr0. */ for (i = PR_REG (0); i < PR_REG (64); i += 2) need_barrier |= rws_access_reg (i, new_flags, pred); break; case 7: /* pred.rel.mutex */ return 0; default: abort (); } break; case RETURN: new_flags.is_write = 0; need_barrier = rws_access_reg (REG_RP, flags, pred); need_barrier |= rws_access_reg (AR_PFS_REGNUM, flags, pred); new_flags.is_write = 1; need_barrier |= rws_access_reg (AR_EC_REGNUM, new_flags, pred); need_barrier |= rws_access_reg (REG_AR_CFM, new_flags, pred); break; default: format_ptr = GET_RTX_FORMAT (GET_CODE (x)); for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--) switch (format_ptr[i]) { case '0': /* unused field */ case 'i': /* integer */ case 'n': /* note */ case 'w': /* wide integer */ case 's': /* pointer to string */ case 'S': /* optional pointer to string */ break; case 'e': if (rtx_needs_barrier (XEXP (x, i), flags, pred)) need_barrier = 1; break; case 'E': for (j = XVECLEN (x, i) - 1; j >= 0; --j) if (rtx_needs_barrier (XVECEXP (x, i, j), flags, pred)) need_barrier = 1; break; default: abort (); } } return need_barrier; } /* INSNS is an chain of instructions. Scan the chain, and insert stop bits as necessary to eliminate dependendencies. */ static void emit_insn_group_barriers (insns) rtx insns; { rtx insn, prev_insn; memset (rws_sum, 0, sizeof (rws_sum)); prev_insn = 0; for (insn = insns; insn; insn = NEXT_INSN (insn)) { int need_barrier = 0; struct reg_flags flags; memset (&flags, 0, sizeof (flags)); switch (GET_CODE (insn)) { case NOTE: break; case CALL_INSN: flags.is_branch = 1; memset (rws_insn, 0, sizeof (rws_insn)); need_barrier = rtx_needs_barrier (PATTERN (insn), flags, 0); if (need_barrier) { /* PREV_INSN null can happen if the very first insn is a volatile asm. */ if (prev_insn) emit_insn_after (gen_insn_group_barrier (), prev_insn); memcpy (rws_sum, rws_insn, sizeof (rws_sum)); } /* A call must end a group, otherwise the assembler might pack it in with a following branch and then the function return goes to the wrong place. Do this unconditionally for unconditional calls, simply because it (1) looks nicer and (2) keeps the data structures more accurate for the insns following the call. */ need_barrier = 1; if (GET_CODE (PATTERN (insn)) == COND_EXEC) { rtx next_insn = insn; do next_insn = next_nonnote_insn (next_insn); while (next_insn && GET_CODE (next_insn) == INSN && (GET_CODE (PATTERN (next_insn)) == USE || GET_CODE (PATTERN (next_insn)) == CLOBBER)); if (next_insn && GET_CODE (next_insn) != JUMP_INSN) need_barrier = 0; } if (need_barrier) { emit_insn_after (gen_insn_group_barrier (), insn); memset (rws_sum, 0, sizeof (rws_sum)); prev_insn = NULL_RTX; } break; case JUMP_INSN: flags.is_branch = 1; /* FALLTHRU */ case INSN: if (GET_CODE (PATTERN (insn)) == USE) /* Don't care about USE "insns"---those are used to indicate to the optimizer that it shouldn't get rid of certain operations. */ break; else { rtx pat = PATTERN (insn); /* We play dependency tricks with the epilogue in order to get proper schedules. Undo this for dv analysis. */ if (INSN_CODE (insn) == CODE_FOR_epilogue_deallocate_stack) pat = XVECEXP (pat, 0, 0); /* ??? Similarly, the pattern we use for br.cloop confuses the code above. The second element of the vector is representative. */ else if (INSN_CODE (insn) == CODE_FOR_doloop_end_internal) pat = XVECEXP (pat, 0, 1); memset (rws_insn, 0, sizeof (rws_insn)); need_barrier |= rtx_needs_barrier (pat, flags, 0); /* Check to see if the previous instruction was a volatile asm. */ if (! need_barrier) need_barrier = rws_access_reg (REG_VOLATILE, flags, 0); if (need_barrier) { /* PREV_INSN null can happen if the very first insn is a volatile asm. */ if (prev_insn) emit_insn_after (gen_insn_group_barrier (), prev_insn); memcpy (rws_sum, rws_insn, sizeof (rws_sum)); } prev_insn = insn; } break; case BARRIER: /* A barrier doesn't imply an instruction group boundary. */ break; case CODE_LABEL: /* Leave prev_insn alone so the barrier gets generated in front of the label, if one is needed. */ break; default: abort (); } } } /* Emit pseudo-ops for the assembler to describe predicate relations. At present this assumes that we only consider predicate pairs to be mutex, and that the assembler can deduce proper values from straight-line code. */ static void emit_predicate_relation_info (insns) rtx insns; { int i; /* Make sure the CFG and global_live_at_start are correct. */ find_basic_blocks (insns, max_reg_num (), NULL); life_analysis (insns, NULL, 0); for (i = n_basic_blocks - 1; i >= 0; --i) { basic_block bb = BASIC_BLOCK (i); int r; rtx head = bb->head; /* We only need such notes at code labels. */ if (GET_CODE (head) != CODE_LABEL) continue; if (GET_CODE (NEXT_INSN (head)) == NOTE && NOTE_LINE_NUMBER (NEXT_INSN (head)) == NOTE_INSN_BASIC_BLOCK) head = NEXT_INSN (head); for (r = PR_REG (0); r < PR_REG (64); r += 2) if (REGNO_REG_SET_P (bb->global_live_at_start, r)) { rtx p = gen_rtx_REG (CCmode, r); rtx n = emit_insn_after (gen_pred_rel_mutex (p), head); if (head == bb->end) bb->end = n; head = n; } } } /* Perform machine dependent operations on the rtl chain INSNS. */ void ia64_reorg (insns) rtx insns; { /* If optimizing, we'll have split before scheduling. */ if (optimize == 0) split_all_insns (0); emit_predicate_relation_info (insns); emit_insn_group_barriers (insns); } /* Return true if REGNO is used by the epilogue. */ int ia64_epilogue_uses (regno) int regno; { /* When a function makes a call through a function descriptor, we will write a (potentially) new value to "gp". After returning from such a call, we need to make sure the function restores the original gp-value, even if the function itself does not use the gp anymore. */ if (regno == R_GR (1) && TARGET_CONST_GP && !(TARGET_AUTO_PIC || TARGET_NO_PIC)) return 1; /* For functions defined with the syscall_linkage attribute, all input registers are marked as live at all function exits. This prevents the register allocator from using the input registers, which in turn makes it possible to restart a system call after an interrupt without having to save/restore the input registers. */ if (IN_REGNO_P (regno) && (regno < IN_REG (current_function_args_info.words)) && lookup_attribute ("syscall_linkage", TYPE_ATTRIBUTES (TREE_TYPE (current_function_decl)))) return 1; /* Conditional return patterns can't represent the use of `b0' as the return address, so we force the value live this way. */ if (regno == R_BR (0)) return 1; if (regno == AR_LC_REGNUM) return 1; return 0; } /* Return true if IDENTIFIER is a valid attribute for TYPE. */ int ia64_valid_type_attribute (type, attributes, identifier, args) tree type; tree attributes ATTRIBUTE_UNUSED; tree identifier; tree args; { /* We only support an attribute for function calls. */ if (TREE_CODE (type) != FUNCTION_TYPE && TREE_CODE (type) != METHOD_TYPE) return 0; /* The "syscall_linkage" attribute says the callee is a system call entry point. This affects ia64_epilogue_uses. */ if (is_attribute_p ("syscall_linkage", identifier)) return args == NULL_TREE; return 0; } /* For ia64, SYMBOL_REF_FLAG set means that it is a function. We add @ to the name if this goes in small data/bss. We can only put a variable in small data/bss if it is defined in this module or a module that we are statically linked with. We can't check the second condition, but TREE_STATIC gives us the first one. */ /* ??? If we had IPA, we could check the second condition. We could support programmer added section attributes if the variable is not defined in this module. */ /* ??? See the v850 port for a cleaner way to do this. */ /* ??? We could also support own long data here. Generating movl/add/ld8 instead of addl,ld8/ld8. This makes the code bigger, but should make the code faster because there is one less load. This also includes incomplete types which can't go in sdata/sbss. */ /* ??? See select_section. We must put short own readonly variables in sdata/sbss instead of the more natural rodata, because we can't perform the DECL_READONLY_SECTION test here. */ extern struct obstack * saveable_obstack; void ia64_encode_section_info (decl) tree decl; { const char *symbol_str; if (TREE_CODE (decl) == FUNCTION_DECL) { SYMBOL_REF_FLAG (XEXP (DECL_RTL (decl), 0)) = 1; return; } /* Careful not to prod global register variables. */ if (TREE_CODE (decl) != VAR_DECL || GET_CODE (DECL_RTL (decl)) != MEM || GET_CODE (XEXP (DECL_RTL (decl), 0)) != SYMBOL_REF) return; symbol_str = XSTR (XEXP (DECL_RTL (decl), 0), 0); /* We assume that -fpic is used only to create a shared library (dso). With -fpic, no global data can ever be sdata. Without -fpic, global common uninitialized data can never be sdata, since it can unify with a real definition in a dso. */ /* ??? Actually, we can put globals in sdata, as long as we don't use gprel to access them. The linker may then be able to do linker relaxation to optimize references to them. Currently sdata implies use of gprel. */ if (! TARGET_NO_SDATA && TREE_STATIC (decl) && ! (DECL_ONE_ONLY (decl) || DECL_WEAK (decl)) && ! (TREE_PUBLIC (decl) && (flag_pic || (DECL_COMMON (decl) && (DECL_INITIAL (decl) == 0 || DECL_INITIAL (decl) == error_mark_node)))) /* Either the variable must be declared without a section attribute, or the section must be sdata or sbss. */ && (DECL_SECTION_NAME (decl) == 0 || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), ".sdata") || ! strcmp (TREE_STRING_POINTER (DECL_SECTION_NAME (decl)), ".sbss"))) { int size = int_size_in_bytes (TREE_TYPE (decl)); /* If the variable has already been defined in the output file, then it is too late to put it in sdata if it wasn't put there in the first place. The test is here rather than above, because if it is already in sdata, then it can stay there. */ if (TREE_ASM_WRITTEN (decl)) ; /* If this is an incomplete type with size 0, then we can't put it in sdata because it might be too big when completed. */ else if (size > 0 && size <= ia64_section_threshold && symbol_str[0] != SDATA_NAME_FLAG_CHAR) { int len = strlen (symbol_str); char *newstr; if (ggc_p) newstr = ggc_alloc_string (NULL, len + 1); else newstr = obstack_alloc (saveable_obstack, len + 2); *newstr = SDATA_NAME_FLAG_CHAR; memcpy (newstr + 1, symbol_str, len + 1); XSTR (XEXP (DECL_RTL (decl), 0), 0) = newstr; } } /* This decl is marked as being in small data/bss but it shouldn't be; one likely explanation for this is that the decl has been moved into a different section from the one it was in when ENCODE_SECTION_INFO was first called. Remove the '@'.*/ else if (symbol_str[0] == SDATA_NAME_FLAG_CHAR) { if (ggc_p) XSTR (XEXP (DECL_RTL (decl), 0), 0) = ggc_alloc_string (symbol_str + 1, -1); else XSTR (XEXP (DECL_RTL (decl), 0), 0) = symbol_str + 1; } } /* Output assmebly directives for prologue regions. */ static int spill_offset; static int sp_offset; static int spill_offset_emitted = 1; static rtx tmp_reg = NULL_RTX; static int tmp_saved = -1; /* This function processes a SET pattern looking for specific patterns which result in emitting an assembly directive required for unwinding. */ static int process_set (asm_out_file, pat) FILE *asm_out_file; rtx pat; { rtx src = SET_SRC (pat); rtx dest = SET_DEST (pat); static rtx frame_reg = NULL_RTX; static int frame_size = 0; /* Look for the ALLOC insn. reg = alloc .... */ if (GET_CODE (src) == UNSPEC_VOLATILE && XINT (src, 1) == 0 && GET_CODE (dest) == REG && GR_REGNO_P (REGNO (dest))) { /* Assume this is a stack allocate insn. */ fprintf (asm_out_file, "\t.save ar.pfs, r%d\n", REGNO (dest) + ia64_input_regs); return 1; } /* look for SP = .... */ if (GET_CODE (dest) == REG && REGNO (dest) == STACK_POINTER_REGNUM) { if (GET_CODE (src) == PLUS) { rtx op0 = XEXP (src, 0); rtx op1 = XEXP (src, 1); if (op0 == dest && GET_CODE (op1) == CONST_INT) { if (INTVAL (op1) < 0) { fputs ("\t.fframe ", asm_out_file); fprintf (asm_out_file, HOST_WIDE_INT_PRINT_DEC, -INTVAL (op1)); fputc ('\n', asm_out_file); frame_size = INTVAL (op1); } else fprintf (asm_out_file, "\t.restore sp\n"); } else if (op0 == dest && GET_CODE (op1) == REG) { /* ia64_expand_prologue uses r2 for stack pointer decrements, ia64_expand_epilogue uses r3 for stack pointer increments. */ if (REGNO (op1) == GR_REG (2)) { fprintf (asm_out_file, "\t.vframe r%d\n", REGNO (op1)); frame_size = 0; } else if (REGNO (op1) == GR_REG (3)) fprintf (asm_out_file, "\t.restore sp\n"); else abort (); } else abort (); } else if (GET_CODE (src) == REG && REGNO (src) == FRAME_POINTER_REGNUM) fprintf (asm_out_file, "\t.restore sp\n"); else abort (); return 1; } /* Look for a frame offset. */ if (GET_CODE (dest) == REG) { if (GET_CODE (src) == PLUS) { rtx op0 = XEXP (src, 0); rtx op1 = XEXP (src, 1); if (GET_CODE (op0) == REG && REGNO (op0) == FRAME_POINTER_REGNUM && GET_CODE (op1) == CONST_INT) { sp_offset = -frame_size + INTVAL (op1); spill_offset = INTVAL (op1); spill_offset_emitted = 0; frame_reg = dest; /* We delay issuing the spill offset since we might be saving non-spill things off this register, thus adjusting its offset before a spill is seen. */ return 1; } } } /* Register move we need to look at. */ if (GET_CODE (dest) == REG && GET_CODE (src) == REG) { int regno = REGNO (src); if (BR_REGNO_P (regno)) { /* Saving return address pointer. */ if (regno == BR_REG (0)) { fprintf (asm_out_file, "\t.save rp, r%d\n", REGNO (dest) + ia64_input_regs); return 1; } /* If its br1 to br5, we copy them to temp regs, then save the temp reg to memory next. */ if (regno >= BR_REG (1) && regno <= BR_REG (5)) { tmp_reg = dest; tmp_saved = regno; return 1; } } } /* Search for special reg moves. */ if (GET_CODE (dest) == REG && GET_CODE (src) == UNSPEC) { int unspec_code = XINT (src, 1); /* Copied to a temp register, save it until we see the temp register stored. */ if (unspec_code == 5 || unspec_code == 9) { tmp_reg = dest; tmp_saved = unspec_code; return 1; } } if (GET_CODE (dest) == MEM && GET_CODE (XEXP (dest, 0)) == POST_INC && GET_CODE (XEXP (XEXP (dest, 0), 0)) == REG) { int spill_unspec = 0; /* We adjust the spill_offset early, so we dont miss it later. */ spill_offset += 8; sp_offset += 8; if (GET_CODE (src) == UNSPEC) { spill_unspec = XINT (src, 1); /* 1 and 3 are unspecs for the GR and FR spills. */ if (spill_unspec != 1 && spill_unspec != 3) spill_unspec = 0; } /* ST8 or st8.spill insn. */ if ((GET_CODE (src) == REG) || spill_unspec != 0) { int regno; if (spill_unspec != 0) { regno = REGNO (XVECEXP (src, 0, 0)); if (!spill_offset_emitted) { fprintf (asm_out_file, "\t.spill %d\n", (-(spill_offset - 8) + 16)); spill_offset_emitted = 1; } } else regno = REGNO (src); if (GR_REGNO_P (regno)) { if (regno >= GR_REG (4) && regno <= GR_REG (7)) fprintf (asm_out_file, "\t.save.g 0x%x\n", 1 << (regno - GR_REG (4))); else if (tmp_reg != NULL_RTX && regno == REGNO (tmp_reg)) { /* We saved a special reg to a temp reg, and now we're dumping it to memory. */ tmp_reg = NULL_RTX; /* register 9 is ar.unat. */ if (tmp_saved == 9) fprintf (asm_out_file, "\t.savesp ar.unat, %d\n", (sp_offset - 8)); else if (tmp_saved == 5) fprintf (asm_out_file, "\t.savesp pr, %d\n", (sp_offset - 8)); else if (tmp_saved >= BR_REG (1) && tmp_saved <= BR_REG (5)) { /* BR regs are saved this way too. */ fprintf (asm_out_file, "\t.save.b 0x%x\n", 1 << (tmp_saved - BR_REG (1))); } } else return 0; } if (FR_REGNO_P (regno)) { if (regno >= FR_REG (2) && regno <= FR_REG (5)) fprintf (asm_out_file, "\t.save.f 0x%x\n", 1 << (regno - FR_REG (2))); else if (regno >= FR_REG (16) && regno <= FR_REG (31)) fprintf (asm_out_file, "\t.save.gf 0x0, 0x%x\n", 1 << (regno - FR_REG (12))); else return 0; } return 1; } } return 0; } /* This function looks at a single insn and emits any directives required to unwind this insn. */ void process_for_unwind_directive (asm_out_file, insn) FILE *asm_out_file; rtx insn; { if ((flag_unwind_tables || (flag_exceptions && !exceptions_via_longjmp)) && RTX_FRAME_RELATED_P (insn)) { rtx pat = PATTERN (insn); switch (GET_CODE (pat)) { case SET: process_set (asm_out_file, pat); break; case PARALLEL: { int par_index; int limit = XVECLEN (pat, 0); for (par_index = 0; par_index < limit; par_index++) { rtx x = XVECEXP (pat, 0, par_index); if (GET_CODE (x) == SET) process_set (asm_out_file, x); } break; } default: abort (); } } } #define def_builtin(name, type, code) \ builtin_function ((name), (type), (code), BUILT_IN_MD, NULL_PTR) struct builtin_description { enum insn_code icode; const char *name; enum ia64_builtins code; enum rtx_code comparison; unsigned int flag; }; /* All 32 bit intrinsics that take 2 arguments. */ static struct builtin_description bdesc_2argsi[] = { { CODE_FOR_fetch_and_add_si, "__sync_fetch_and_add_si", IA64_BUILTIN_FETCH_AND_ADD_SI, 0, 0 }, { CODE_FOR_fetch_and_sub_si, "__sync_fetch_and_sub_si", IA64_BUILTIN_FETCH_AND_SUB_SI, 0, 0 }, { CODE_FOR_fetch_and_or_si, "__sync_fetch_and_or_si", IA64_BUILTIN_FETCH_AND_OR_SI, 0, 0 }, { CODE_FOR_fetch_and_and_si, "__sync_fetch_and_and_si", IA64_BUILTIN_FETCH_AND_AND_SI, 0, 0 }, { CODE_FOR_fetch_and_xor_si, "__sync_fetch_and_xor_si", IA64_BUILTIN_FETCH_AND_XOR_SI, 0, 0 }, { CODE_FOR_fetch_and_nand_si, "__sync_fetch_and_nand_si", IA64_BUILTIN_FETCH_AND_NAND_SI, 0, 0 }, { CODE_FOR_add_and_fetch_si, "__sync_add_and_fetch_si", IA64_BUILTIN_ADD_AND_FETCH_SI, 0, 0 }, { CODE_FOR_sub_and_fetch_si, "__sync_sub_and_fetch_si", IA64_BUILTIN_SUB_AND_FETCH_SI, 0, 0 }, { CODE_FOR_or_and_fetch_si, "__sync_or_and_fetch_si", IA64_BUILTIN_OR_AND_FETCH_SI, 0, 0 }, { CODE_FOR_and_and_fetch_si, "__sync_and_and_fetch_si", IA64_BUILTIN_AND_AND_FETCH_SI, 0, 0 }, { CODE_FOR_xor_and_fetch_si, "__sync_xor_and_fetch_si", IA64_BUILTIN_XOR_AND_FETCH_SI, 0, 0 }, { CODE_FOR_nand_and_fetch_si, "__sync_nand_and_fetch_si", IA64_BUILTIN_NAND_AND_FETCH_SI, 0, 0 } }; /* All 64 bit intrinsics that take 2 arguments. */ static struct builtin_description bdesc_2argdi[] = { { CODE_FOR_fetch_and_add_di, "__sync_fetch_and_add_di", IA64_BUILTIN_FETCH_AND_ADD_DI, 0, 0 }, { CODE_FOR_fetch_and_sub_di, "__sync_fetch_and_sub_di", IA64_BUILTIN_FETCH_AND_SUB_DI, 0, 0 }, { CODE_FOR_fetch_and_or_di, "__sync_fetch_and_or_di", IA64_BUILTIN_FETCH_AND_OR_DI, 0, 0 }, { CODE_FOR_fetch_and_and_di, "__sync_fetch_and_and_di", IA64_BUILTIN_FETCH_AND_AND_DI, 0, 0 }, { CODE_FOR_fetch_and_xor_di, "__sync_fetch_and_xor_di", IA64_BUILTIN_FETCH_AND_XOR_DI, 0, 0 }, { CODE_FOR_fetch_and_nand_di, "__sync_fetch_and_nand_di", IA64_BUILTIN_FETCH_AND_NAND_DI, 0, 0 }, { CODE_FOR_add_and_fetch_di, "__sync_add_and_fetch_di", IA64_BUILTIN_ADD_AND_FETCH_DI, 0, 0 }, { CODE_FOR_sub_and_fetch_di, "__sync_sub_and_fetch_di", IA64_BUILTIN_SUB_AND_FETCH_DI, 0, 0 }, { CODE_FOR_or_and_fetch_di, "__sync_or_and_fetch_di", IA64_BUILTIN_OR_AND_FETCH_DI, 0, 0 }, { CODE_FOR_and_and_fetch_di, "__sync_and_and_fetch_di", IA64_BUILTIN_AND_AND_FETCH_DI, 0, 0 }, { CODE_FOR_xor_and_fetch_di, "__sync_xor_and_fetch_di", IA64_BUILTIN_XOR_AND_FETCH_DI, 0, 0 }, { CODE_FOR_nand_and_fetch_di, "__sync_nand_and_fetch_di", IA64_BUILTIN_NAND_AND_FETCH_DI, 0, 0 } }; void ia64_init_builtins () { size_t i; tree psi_type_node = build_pointer_type (integer_type_node); tree pdi_type_node = build_pointer_type (long_integer_type_node); tree endlink = tree_cons (NULL_TREE, void_type_node, NULL_TREE); /* __sync_val_compare_and_swap_si, __sync_bool_compare_and_swap_si */ tree si_ftype_psi_si_si = build_function_type (integer_type_node, tree_cons (NULL_TREE, psi_type_node, tree_cons (NULL_TREE, integer_type_node, tree_cons (NULL_TREE, integer_type_node, endlink)))); /* __sync_val_compare_and_swap_di, __sync_bool_compare_and_swap_di */ tree di_ftype_pdi_di_di = build_function_type (long_integer_type_node, tree_cons (NULL_TREE, pdi_type_node, tree_cons (NULL_TREE, long_integer_type_node, tree_cons (NULL_TREE, long_integer_type_node, endlink)))); /* __sync_synchronize */ tree void_ftype_void = build_function_type (void_type_node, endlink); /* __sync_lock_test_and_set_si */ tree si_ftype_psi_si = build_function_type (integer_type_node, tree_cons (NULL_TREE, psi_type_node, tree_cons (NULL_TREE, integer_type_node, endlink))); /* __sync_lock_test_and_set_di */ tree di_ftype_pdi_di = build_function_type (long_integer_type_node, tree_cons (NULL_TREE, pdi_type_node, tree_cons (NULL_TREE, long_integer_type_node, endlink))); /* __sync_lock_release_si */ tree void_ftype_psi = build_function_type (void_type_node, tree_cons (NULL_TREE, psi_type_node, endlink)); /* __sync_lock_release_di */ tree void_ftype_pdi = build_function_type (void_type_node, tree_cons (NULL_TREE, pdi_type_node, endlink)); def_builtin ("__sync_val_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI); def_builtin ("__sync_val_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_bool_compare_and_swap_si", si_ftype_psi_si_si, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI); def_builtin ("__sync_bool_compare_and_swap_di", di_ftype_pdi_di_di, IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI); def_builtin ("__sync_synchronize", void_ftype_void, IA64_BUILTIN_SYNCHRONIZE); def_builtin ("__sync_lock_test_and_set_si", si_ftype_psi_si, IA64_BUILTIN_LOCK_TEST_AND_SET_SI); def_builtin ("__sync_lock_test_and_set_di", di_ftype_pdi_di, IA64_BUILTIN_LOCK_TEST_AND_SET_DI); def_builtin ("__sync_lock_release_si", void_ftype_psi, IA64_BUILTIN_LOCK_RELEASE_SI); def_builtin ("__sync_lock_release_di", void_ftype_pdi, IA64_BUILTIN_LOCK_RELEASE_DI); def_builtin ("__builtin_ia64_bsp", build_function_type (ptr_type_node, endlink), IA64_BUILTIN_BSP); def_builtin ("__builtin_ia64_flushrs", build_function_type (void_type_node, endlink), IA64_BUILTIN_FLUSHRS); /* Add all builtins that are operations on two args. */ for (i = 0; i < sizeof(bdesc_2argsi) / sizeof *bdesc_2argsi; i++) def_builtin (bdesc_2argsi[i].name, si_ftype_psi_si, bdesc_2argsi[i].code); for (i = 0; i < sizeof(bdesc_2argdi) / sizeof *bdesc_2argdi; i++) def_builtin (bdesc_2argdi[i].name, si_ftype_psi_si, bdesc_2argdi[i].code); } /* Expand fetch_and_op intrinsics. The basic code sequence is: mf ldsz return = [ptr]; tmp = return; do { oldval = tmp; ar.ccv = tmp; tmp = value; cmpxchgsz.acq tmp = [ptr], tmp cmpxchgsz.acq tmp = [ptr], tmp } while (tmp != oldval) */ void ia64_expand_fetch_and_op (code, mode, operands) enum fetchop_code code; enum machine_mode mode; rtx operands[]; { rtx oldval, newlabel; rtx tmp_reg = gen_rtx_REG (mode, GR_REG(0)); rtx mfreg = gen_rtx_MEM (BLKmode, tmp_reg); RTX_UNCHANGING_P (mfreg) = 1; emit_insn (gen_mf (mfreg)); tmp_reg = gen_reg_rtx (mode); oldval = gen_reg_rtx (mode); if (mode == SImode) { emit_insn (gen_movsi (operands[0], operands[1])); emit_insn (gen_movsi (tmp_reg, operands[0])); } else { emit_insn (gen_movdi (operands[0], operands[1])); emit_insn (gen_movdi (tmp_reg, operands[0])); } newlabel = gen_label_rtx (); emit_label (newlabel); if (mode == SImode) { emit_insn (gen_movsi (oldval, tmp_reg)); emit_insn (gen_ccv_restore_si (tmp_reg)); } else { emit_insn (gen_movdi (oldval, tmp_reg)); emit_insn (gen_ccv_restore_di (tmp_reg)); } /* Perform the specific operation. */ switch (code) { case IA64_ADD_OP: { rtx reg; if (GET_CODE (operands[2]) == CONST_INT) reg = gen_reg_rtx (mode); else reg = operands[2]; if (mode == SImode) { if (reg != operands[2]) emit_insn (gen_movsi (reg, operands[2])); emit_insn (gen_addsi3 (tmp_reg, tmp_reg, reg)); } else { if (reg != operands[2]) emit_insn (gen_movdi (reg, operands[2])); emit_insn (gen_adddi3 (tmp_reg, tmp_reg, reg)); } break; } case IA64_SUB_OP: if (mode == SImode) emit_insn (gen_subsi3 (tmp_reg, tmp_reg, operands[2])); else emit_insn (gen_subdi3 (tmp_reg, tmp_reg, operands[2])); break; case IA64_OR_OP: emit_insn (gen_iordi3 (tmp_reg, tmp_reg, operands[2])); break; case IA64_AND_OP: emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); break; case IA64_XOR_OP: emit_insn (gen_xordi3 (tmp_reg, tmp_reg, operands[2])); break; case IA64_NAND_OP: emit_insn (gen_anddi3 (tmp_reg, tmp_reg, operands[2])); if (mode == SImode) emit_insn (gen_one_cmplsi2 (tmp_reg, operands[0])); else emit_insn (gen_one_cmpldi2 (tmp_reg, operands[0])); break; default: break; } if (mode == SImode) emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], tmp_reg)); else emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], tmp_reg)); emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); } /* Expand op_and_fetch intrinsics. The basic code sequence is: mf ldsz return = [ptr]; do { oldval = tmp; ar.ccv = tmp; return = tmp + value; cmpxchgsz.acq tmp = [ptr], return } while (tmp != oldval) */ void ia64_expand_op_and_fetch (code, mode, operands) enum fetchop_code code; enum machine_mode mode; rtx operands[]; { rtx oldval, newlabel; rtx tmp_reg, tmp2_reg = gen_rtx_REG (mode, GR_REG(0)); rtx mfreg = gen_rtx_MEM (BLKmode, tmp2_reg); RTX_UNCHANGING_P (mfreg) = 1; emit_insn (gen_mf (mfreg)); tmp_reg = gen_reg_rtx (mode); if (mode == SImode) emit_insn (gen_movsi (tmp_reg, operands[1])); else emit_insn (gen_movdi (tmp_reg, operands[1])); newlabel = gen_label_rtx (); emit_label (newlabel); oldval = gen_reg_rtx (mode); if (mode == SImode) { emit_insn (gen_movsi (oldval, tmp_reg)); emit_insn (gen_ccv_restore_si (tmp_reg)); } else { emit_insn (gen_movdi (oldval, tmp_reg)); emit_insn (gen_ccv_restore_di (tmp_reg)); } /* Perform the specific operation. */ switch (code) { case IA64_ADD_OP: if (mode == SImode) emit_insn (gen_addsi3 (operands[0], tmp_reg, operands[2])); else emit_insn (gen_adddi3 (operands[0], tmp_reg, operands[2])); break; case IA64_SUB_OP: if (mode == SImode) emit_insn (gen_subsi3 (operands[0], tmp_reg, operands[2])); else emit_insn (gen_subdi3 (operands[0], tmp_reg, operands[2])); break; case IA64_OR_OP: emit_insn (gen_iordi3 (operands[0], tmp_reg, operands[2])); break; case IA64_AND_OP: emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); break; case IA64_XOR_OP: emit_insn (gen_xordi3 (operands[0], tmp_reg, operands[2])); break; case IA64_NAND_OP: emit_insn (gen_anddi3 (operands[0], tmp_reg, operands[2])); if (mode == SImode) emit_insn (gen_one_cmplsi2 (operands[0], operands[0])); else emit_insn (gen_one_cmpldi2 (operands[0], operands[0])); break; default: break; } if (mode == SImode) emit_insn (gen_cmpxchg_acq_si (tmp_reg, operands[1], operands[0])); else emit_insn (gen_cmpxchg_acq_di (tmp_reg, operands[1], operands[0])); emit_cmp_and_jump_insns (tmp_reg, oldval, NE, 0, mode, 1, 0, newlabel); } /* Expand val_ and bool_compare_and_swap. For val_ we want: ar.ccv = oldval mf cmpxchgsz.acq ret = [ptr], newval, ar.ccv return ret For bool_ it's the same except return ret == oldval. */ static rtx ia64_expand_compare_and_swap (icode, arglist, target, boolcode) enum insn_code icode; tree arglist; rtx target; int boolcode; { tree arg0, arg1, arg2; rtx op0, op1, op2, pat; enum machine_mode tmode, mode0, mode1, mode2; arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); arg2 = TREE_VALUE (TREE_CHAIN (TREE_CHAIN (arglist))); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); op2 = expand_expr (arg2, NULL_RTX, VOIDmode, 0); tmode = insn_data[icode].operand[0].mode; mode0 = insn_data[icode].operand[1].mode; mode1 = insn_data[icode].operand[2].mode; mode2 = insn_data[icode].operand[3].mode; op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); RTX_UNCHANGING_P (op0) = 1; if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); if (! (*insn_data[icode].operand[3].predicate) (op2, mode2)) op2 = copy_to_mode_reg (mode2, op2); if (target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); pat = GEN_FCN (icode) (target, op0, op1, op2); if (! pat) return 0; emit_insn (pat); if (boolcode) { if (tmode == SImode) { emit_insn (gen_cmpsi (target, op1)); emit_insn (gen_seq (gen_lowpart (DImode, target))); } else { emit_insn (gen_cmpdi (target, op1)); emit_insn (gen_seq (target)); } } return target; } /* Expand all intrinsics that take 2 arguments. */ static rtx ia64_expand_binop_builtin (icode, arglist, target) enum insn_code icode; tree arglist; rtx target; { rtx pat; tree arg0 = TREE_VALUE (arglist); tree arg1 = TREE_VALUE (TREE_CHAIN (arglist)); rtx op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); rtx op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); enum machine_mode tmode = insn_data[icode].operand[0].mode; enum machine_mode mode0 = insn_data[icode].operand[1].mode; enum machine_mode mode1 = insn_data[icode].operand[2].mode; if (! target || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); pat = GEN_FCN (icode) (target, op0, op1); if (! pat) return 0; emit_insn (pat); return target; } rtx ia64_expand_builtin (exp, target, subtarget, mode, ignore) tree exp; rtx target; rtx subtarget ATTRIBUTE_UNUSED; enum machine_mode mode ATTRIBUTE_UNUSED; int ignore ATTRIBUTE_UNUSED; { rtx op0, op1, pat; rtx tmp_reg; tree arg0, arg1; tree arglist = TREE_OPERAND (exp, 1); tree fndecl = TREE_OPERAND (TREE_OPERAND (exp, 0), 0); int fcode = DECL_FUNCTION_CODE (fndecl); enum machine_mode tmode, mode0, mode1; enum insn_code icode; size_t i; struct builtin_description *d; switch (fcode) { case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_SI: return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 1); case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_SI: return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_si, arglist, target, 0); case IA64_BUILTIN_BOOL_COMPARE_AND_SWAP_DI: return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 1); case IA64_BUILTIN_VAL_COMPARE_AND_SWAP_DI: return ia64_expand_compare_and_swap (CODE_FOR_val_compare_and_swap_di, arglist, target, 0); case IA64_BUILTIN_SYNCHRONIZE: /* Pass a volatile memory operand. */ tmp_reg = gen_rtx_REG (DImode, GR_REG(0)); target = gen_rtx_MEM (BLKmode, tmp_reg); emit_insn (gen_mf (target)); return const0_rtx; case IA64_BUILTIN_LOCK_TEST_AND_SET_SI: icode = CODE_FOR_lock_test_and_set_si; arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); tmode = insn_data[icode].operand[0].mode; mode0 = insn_data[icode].operand[1].mode; mode1 = insn_data[icode].operand[2].mode; op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); RTX_UNCHANGING_P (op0) = 1; if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); if (target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); pat = GEN_FCN (icode) (target, op0, op1); if (! pat) return 0; emit_insn (pat); return target; case IA64_BUILTIN_LOCK_TEST_AND_SET_DI: icode = CODE_FOR_lock_test_and_set_di; arg0 = TREE_VALUE (arglist); arg1 = TREE_VALUE (TREE_CHAIN (arglist)); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); op1 = expand_expr (arg1, NULL_RTX, VOIDmode, 0); tmode = insn_data[icode].operand[0].mode; mode0 = insn_data[icode].operand[1].mode; mode1 = insn_data[icode].operand[2].mode; op0 = gen_rtx_MEM (mode0, copy_to_mode_reg (Pmode, op0)); RTX_UNCHANGING_P (op0) = 1; if (! (*insn_data[icode].operand[2].predicate) (op1, mode1)) op1 = copy_to_mode_reg (mode1, op1); if (target == 0 || GET_MODE (target) != tmode || ! (*insn_data[icode].operand[0].predicate) (target, tmode)) target = gen_reg_rtx (tmode); pat = GEN_FCN (icode) (target, op0, op1); if (! pat) return 0; emit_insn (pat); return target; case IA64_BUILTIN_LOCK_RELEASE_SI: arg0 = TREE_VALUE (arglist); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); op0 = gen_rtx_MEM (SImode, copy_to_mode_reg (Pmode, op0)); MEM_VOLATILE_P (op0) = 1; emit_insn (gen_movsi (op0, GEN_INT(0))); return const0_rtx; case IA64_BUILTIN_LOCK_RELEASE_DI: arg0 = TREE_VALUE (arglist); op0 = expand_expr (arg0, NULL_RTX, VOIDmode, 0); op0 = gen_rtx_MEM (DImode, copy_to_mode_reg (Pmode, op0)); MEM_VOLATILE_P (op0) = 1; emit_insn (gen_movdi (op0, GEN_INT(0))); return const0_rtx; case IA64_BUILTIN_BSP: { rtx reg = gen_reg_rtx (DImode); emit_insn (gen_bsp_value (reg)); return reg; } case IA64_BUILTIN_FLUSHRS: emit_insn (gen_flushrs ()); return const0_rtx; default: break; } /* Expand all 32 bit intrinsics that take 2 arguments. */ for (i=0, d = bdesc_2argsi; i < sizeof (bdesc_2argsi) / sizeof *d; i++, d++) if (d->code == fcode) return ia64_expand_binop_builtin (d->icode, arglist, target); /* Expand all 64 bit intrinsics that take 2 arguments. */ for (i=0, d = bdesc_2argdi; i < sizeof (bdesc_2argdi) / sizeof *d; i++, d++) if (d->code == fcode) return ia64_expand_binop_builtin (d->icode, arglist, target); return 0; }