/* Sign extension elimination optimization for GNU compiler. Copyright (C) 2005, 2006, 2007, 2008 Free Software Foundation, Inc. Contributed by Leehod Baruch This file is part of GCC. GCC is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free -Software Foundation; either version 3, or (at your option) any later version. GCC is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. You should have received a copy of the GNU General Public License along with GCC; see the file COPYING3. If not see . Problem description: -------------------- In order to support 32bit computations on a 64bit machine, sign extension instructions are generated to ensure the correctness of the computation. A possible policy (as currently implemented) is to generate a sign extension right after each 32bit computation. Depending on the instruction set of the architecture, some of these sign extension instructions may be redundant. There are two cases in which the extension may be redundant: Case1: The instruction that uses the 64bit operands that are sign extended has a dual mode that works with 32bit operands. For example: int32 a, b; a = .... --> a = .... a = sign extend a --> b = .... --> b = .... b = sign extend a --> --> cmpd a, b --> cmpw a, b //half word compare Case2: The instruction that defines the 64bit operand (which is later sign extended) has a dual mode that defines and sign-extends simultaneously a 32bit operand. For example: int32 a; ld a --> lwa a // load half word and sign extend a = sign extend a --> --> return a --> return a General idea for solution: -------------------------- First, try to merge the sign extension with the instruction that defines the source of the extension and (separately) with the instructions that uses the extended result. By doing this, both cases of redundancies (as described above) will be eliminated. Then, use partial redundancy elimination to place the non redundant ones at optimal placements. Implementation by example: -------------------------- Note: The instruction stream is not changed till the last phase. Phase 0: Initial code, as currently generated by gcc. def1 def3 se1 def2 se3 | \ | / | | \ | / | | \ | / | | \ | / | | \ | / | | \|/ | use1 use2 use3 use4 def1 + se1: set ((reg:SI 10) (..def1rhs..)) set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) def2: set ((reg:DI 100) (const_int 7)) def3 + se3: set ((reg:SI 20) (..def3rhs..)) set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) use1: set ((reg:CC...) (compare:CC (reg:DI 100) (...))) use2, use3, use4: set ((...) (reg:DI 100)) Phase 1: Propagate extensions to uses. def1 def3 se1 def2 se3 | \ | / | | \ | / | | \ | / | | \ | / | | \ | / | | \|/ | se se se use1 use2 use3 se use4 From here, all of the subregs are lowpart ! def1, def2, def3: No change. use1: set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) set ((reg:CC...) (compare:CC (reg:DI 100) (...))) use2, use3, use4: set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) set ((...) (reg:DI 100)) Phase 2: Merge and eliminate locally redundant extensions. *def1 def2 *def3 [se removed] se se3 | \ | / | | \ | / | | \ | / | | \ | / | | \ | / | | \|/ | [se removed] se se *use1 use2 use3 [se removed] use4 The instructions that were changed at this phase are marked with asterisk. *def1: Merge failed. Remove the sign extension instruction, modify def1 and insert a move instruction to assure to correctness of the code. set ((subreg:SI (reg:DI 100)) (..def1rhs..)) set ((reg:SI 10) (subreg:SI (reg:DI 100))) def2 + se: There is no need for merge. Def2 is not changed but a sign extension instruction is created. set ((reg:DI 100) (const_int 7)) set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) *def3 + se3: Merge succeeded. set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) set ((reg:SI 20) (reg:DI 100)) set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) (The extension instruction is the original one). *use1: Merge succeeded. Remove the sign extension instruction. set ((reg:CC...) (compare:CC (subreg:SI (reg:DI 100)) (...))) use2, use3: Merge failed. No change. use4: The extension is locally redundant, therefore it is eliminated at this point. Phase 3: Eliminate globally redundant extensions. Following the LCM output: def1 def2 def3 se se3 | \ | / | | \ | / | | se | / | | \ | / | | \ | / | | \|/ | [ses removed] use1 use2 use3 use4 se: set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) se3: set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) Phase 4: Commit changes to the insn stream. def1 def3 *def1 def2 *def3 se1 def2 se3 [se removed] [se removed] | \ | / | | \ | / | | \ | / | ------> | \ | / | | \ | / | ------> | se | / | | \ | / | | \ | / | | \ | / | | \ | / | | \|/ | | \|/ | use1 use2 use3 *use1 use2 use3 use4 use4 The instructions that were changed during the whole optimization are marked with asterisk. The result: def1 + se1: [ set ((reg:SI 10) (..def1rhs..)) ] - Deleted [ set ((reg:DI 100) (sign_extend:DI (reg:SI 10))) ] - Deleted set ((subreg:SI (reg:DI 100)) (..def3rhs..)) - Inserted set ((reg:SI 10) (subreg:SI (reg:DI 100))) - Inserted def2: set ((reg:DI 100) (const_int 7)) - No change def3 + se3: [ set ((reg:SI 20) (..def3rhs..)) ] - Deleted [ set ((reg:DI 100) (sign_extend:DI (reg:SI 20))) ] - Deleted set ((reg:DI 100) (sign_extend:DI (..def3rhs..))) - Inserted set ((reg:SI 20) (reg:DI 100)) - Inserted use1: [ set ((reg:CC...) (compare:CC (reg:DI 100) (...))) ] - Deleted set ((reg:CC...) - Inserted (compare:CC (subreg:SI (reg:DI 100)) (...))) use2, use3, use4: set ((...) (reg:DI 100)) - No change se: - Inserted set ((reg:DI 100) (sign_extend:DI ((subreg:SI (reg:DI 100))))) Note: Most of the simple move instructions that were inserted will be trivially dead and therefore eliminated. The implementation outline: --------------------------- Some definitions: A web is RELEVANT if at the end of phase 1, his leader's relevancy is {ZERO, SIGN}_EXTENDED_DEF. The source_mode of the web is the source_mode of his leader. A definition is a candidate for the optimization if it is part of a RELEVANT web and his local source_mode is not narrower then the source_mode of its web. A use is a candidate for the optimization if it is part of a RELEVANT web. A simple explicit extension is a single set instruction that extends a register (or a subregister) to a register (or subregister). A complex explicit extension is an explicit extension instruction that is not simple. A def extension is a simple explicit extension that is also a candidate for the optimization. This extension is part of the instruction stream, it is not generated by this optimization. A use extension is a simple explicit extension that is generated and stored for candidate use during this optimization. It is not emitted to the instruction stream till the last phase of the optimization. A reference is an instruction that satisfy at least on of these criteria: - It contains a definition with EXTENDED_DEF relevancy in a RELEVANT web. - It is followed by a def extension. - It contains a candidate use. Phase 1: Propagate extensions to uses. In this phase, we find candidate extensions for the optimization and we generate (but not emit) proper extensions "right before the uses". a. Build a DF object. b. Traverse over all the instructions that contains a definition and set their local relevancy and local source_mode like this: - If the instruction is a simple explicit extension instruction, mark it as {ZERO, SIGN}_EXTENDED_DEF according to the extension type and mark its source_mode to be the mode of the quantity that is been extended. - Otherwise, If the instruction has an implicit extension, which means that its high part is an extension of its low part, or if it is a complicated explicit extension, mark it as EXTENDED_DEF and set its source_mode to be the narrowest mode that is been extended in the instruction. c. Traverse over all the instructions that contains a use and set their local relevancy to RELEVANT_USE (except for few corner cases). d. Produce the web. During union of two entries, update the relevancy and source_mode of the leader. There are two major guide lines for this update: - If one of the entries is NOT_RELEVANT, mark the leader NOT_RELEVANT. - If one is ZERO_EXTENDED_DEF and the other is SIGN_EXTENDED_DEF (or vice versa) mark the leader as NOT_RELEVANT. We don't handle this kind of mixed webs. (For more details about this update process, see see_update_leader_extra_info ()). e. Generate uses extensions according to the relevancy and source_mode of the webs. Phase 2: Merge and eliminate locally redundant extensions. In this phase, we try to merge def extensions and use extensions with their references, and eliminate redundant extensions in the same basic block. Traverse over all the references. Do this in basic block number and luid number forward order. For each reference do: a. Peephole optimization - try to merge it with all its def extensions and use extensions in the following order: - Try to merge only the def extensions, one by one. - Try to merge only the use extensions, one by one. - Try to merge any couple of use extensions simultaneously. - Try to merge any def extension with one or two uses extensions simultaneously. b. Handle each EXTENDED_DEF in it as if it was already merged with an extension. During the merge process we save the following data for each register in each basic block: a. The first instruction that defines the register in the basic block. b. The last instruction that defines the register in the basic block. c. The first extension of this register before the first instruction that defines it in the basic block. c. The first extension of this register after the last instruction that defines it in the basic block. This data will help us eliminate (or more precisely, not generate) locally redundant extensions, and will be useful in the next stage. While merging extensions with their reference there are 4 possible situations: a. A use extension was merged with the reference: Delete the extension instruction and save the merged reference for phase 4. (For details, see see_use_extension_merged ()) b. A use extension failed to be merged with the reference: If there is already such an extension in the same basic block and it is not dead at this point, delete the unmerged extension (it is locally redundant), otherwise properly update the above basic block data. (For details, see see_merge_one_use_extension ()) c. A def extension was merged with the reference: Mark this extension as a merged_def extension and properly update the above basic block data. (For details, see see_merge_one_def_extension ()) d. A def extension failed to be merged with the reference: Replace the definition of the NARROWmode register in the reference with the proper subreg of WIDEmode register and save the result as a merged reference. Also, properly update the the above basic block data. (For details, see see_def_extension_not_merged ()) Phase 3: Eliminate globally redundant extensions. In this phase, we set the bit vectors input of the edge based LCM using the recorded data on the registers in each basic block. We also save pointers for all the anticipatable and available occurrences of the relevant extensions. Then we run the LCM. a. Initialize the comp, antloc, kill bit vectors to zero and the transp bit vector to ones. b. Traverse over all the references. Do this in basic block number and luid number forward order. For each reference: - Go over all its use extensions. For each such extension - If it is not dead from the beginning of the basic block SET the antloc bit of the current extension in the current basic block bits. If it is not dead till the end of the basic block SET the comp bit of the current extension in the current basic block bits. - Go over all its def extensions that were merged with it. For each such extension - If it is not dead till the end of the basic block SET the comp bit of the current extension in the current basic block bits. RESET the proper transp and kill bits. - Go over all its def extensions that were not merged with it. For each such extension - RESET the transp bit and SET the kill bit of the current extension in the current basic block bits. c. Run the edge based LCM. Phase 4: Commit changes to the insn stream. This is the only phase that actually changes the instruction stream. Up to this point the optimization could be aborted at any time. Here we insert extensions at their best placements and delete the redundant ones according to the output of the LCM. We also replace some of the instructions according to the second phase merges results. a. Use the pre_delete_map (from the output of the LCM) in order to delete redundant extensions. This will prevent them from been emitted in the first place. b. Insert extensions on edges where needed according to pre_insert_map and edge_list (from the output of the LCM). c. For each reference do- - Emit all the uses extensions that were not deleted until now, right before the reference. - Delete all the merged and unmerged def extensions from the instruction stream. - Replace the reference with the merged one, if exist. The implementation consists of four data structures: - Data structure I Purpose: To handle the relevancy of the uses, definitions and webs. Relevant structures: web_entry (from df.h), see_entry_extra_info. Details: This is a disjoint-set data structure. Most of its functions are implemented in web.c. Each definition and use in the code are elements. A web_entry structure is allocated for each element to hold the element's relevancy and source_mode. The union rules are defined in see_update_leader_extra_info (). - Data structure II Purpose: To store references and their extensions (uses and defs) and to enable traverse over these references according to basic block order. Relevant structure: see_ref_s. Details: This data structure consists of an array of splay trees. One splay tree for each basic block. The splay tree nodes are references and the keys are the luids of the references. A see_ref_s structure is allocated for each reference. It holds the reference itself, its def and uses extensions and later the merged version of the reference. Using this data structure we can traverse over all the references of a basic block and their extensions in forward order. - Data structure III. Purpose: To store local properties of registers for each basic block. This data will later help us build the LCM sbitmap_vectors input. Relevant structure: see_register_properties. Details: This data structure consists of an array of hash tables. One hash for each basic block. The hash node are a register properties and the keys are the numbers of the registers. A see_register_properties structure is allocated for each register that we might be interested in its properties. Using this data structure we can easily find the properties of a register in a specific basic block. This is necessary for locally redundancy elimination and for setting up the LCM input. - Data structure IV. Purpose: To store the extensions that are candidate for PRE and their anticipatable and available occurrences. Relevant structure: see_occr, see_pre_extension_expr. Details: This data structure is a hash tables. Its nodes are the extensions that are candidate for PRE. A see_pre_extension_expr structure is allocated for each candidate extension. It holds a copy of the extension and a linked list of all the anticipatable and available occurrences of it. We use this data structure when we read the output of the LCM. */ #include "config.h" #include "system.h" #include "coretypes.h" #include "tm.h" #include "obstack.h" #include "rtl.h" #include "output.h" #include "df.h" #include "insn-config.h" #include "recog.h" #include "expr.h" #include "splay-tree.h" #include "hashtab.h" #include "regs.h" #include "timevar.h" #include "tree-pass.h" #include "dce.h" /* Used to classify defs and uses according to relevancy. */ enum entry_type { NOT_RELEVANT, SIGN_EXTENDED_DEF, ZERO_EXTENDED_DEF, EXTENDED_DEF, RELEVANT_USE }; /* Used to classify extensions in relevant webs. */ enum extension_type { DEF_EXTENSION, EXPLICIT_DEF_EXTENSION, IMPLICIT_DEF_EXTENSION, USE_EXTENSION }; /* Global data structures and flags. */ /* This structure will be assigned for each web_entry structure (defined in df.h). It is placed in the extra_info field of a web_entry and holds the relevancy and source mode of the web_entry. */ struct see_entry_extra_info { /* The relevancy of the ref. */ enum entry_type relevancy; /* The relevancy of the ref. This field is updated only once - when this structure is created. */ enum entry_type local_relevancy; /* The source register mode. */ enum machine_mode source_mode; /* This field is used only if the relevancy is ZERO/SIGN_EXTENDED_DEF. It is updated only once when this structure is created. */ enum machine_mode local_source_mode; /* This field is used only if the relevancy is EXTENDED_DEF. It holds the narrowest mode that is sign extended. */ enum machine_mode source_mode_signed; /* This field is used only if the relevancy is EXTENDED_DEF. It holds the narrowest mode that is zero extended. */ enum machine_mode source_mode_unsigned; }; /* There is one such structure for every reference. It stores the reference itself as well as its extensions (uses and definitions). Used as the value in splay_tree see_bb_splay_ar[]. */ struct see_ref_s { /* The luid of the insn. */ unsigned int luid; /* The insn of the ref. */ rtx insn; /* The merged insn that was formed from the reference's insn and extensions. If all merges failed, it remains NULL. */ rtx merged_insn; /* The def extensions of the reference that were not merged with it. */ htab_t unmerged_def_se_hash; /* The def extensions of the reference that were merged with it. Implicit extensions of the reference will be stored here too. */ htab_t merged_def_se_hash; /* The uses extensions of reference. */ htab_t use_se_hash; }; /* There is one such structure for every relevant extended register in a specific basic block. This data will help us build the LCM sbitmap_vectors input. */ struct see_register_properties { /* The register number. */ unsigned int regno; /* The last luid of the reference that defines this register in this basic block. */ int last_def; /* The luid of the reference that has the first extension of this register that appears before any definition in this basic block. */ int first_se_before_any_def; /* The luid of the reference that has the first extension of this register that appears after the last definition in this basic block. */ int first_se_after_last_def; }; /* Occurrence of an expression. There must be at most one available occurrence and at most one anticipatable occurrence per basic block. */ struct see_occr { /* Next occurrence of this expression. */ struct see_occr *next; /* The insn that computes the expression. */ rtx insn; int block_num; }; /* There is one such structure for every relevant extension expression. It holds a copy of this extension instruction as well as a linked lists of pointers to all the antic and avail occurrences of it. */ struct see_pre_extension_expr { /* A copy of the extension instruction. */ rtx se_insn; /* Index in the available expression bitmaps. */ int bitmap_index; /* List of anticipatable occurrences in basic blocks in the function. An "anticipatable occurrence" is the first occurrence in the basic block, the operands are not modified in the basic block prior to the occurrence and the output is not used between the start of the block and the occurrence. */ struct see_occr *antic_occr; /* List of available occurrence in basic blocks in the function. An "available occurrence" is the last occurrence in the basic block and the operands are not modified by following statements in the basic block [including this insn]. */ struct see_occr *avail_occr; }; /* Helper structure for the note_uses and see_replace_src functions. */ struct see_replace_data { rtx from; rtx to; }; /* Helper structure for the note_uses and see_mentioned_reg functions. */ struct see_mentioned_reg_data { rtx reg; bool mentioned; }; /* An array of web_entries. The i'th definition in the df object is associated with def_entry[i] */ static struct web_entry *def_entry = NULL; /* An array of web_entries. The i'th use in the df object is associated with use_entry[i] */ static struct web_entry *use_entry = NULL; /* Array of splay_trees. see_bb_splay_ar[i] refers to the splay tree of the i'th basic block. The splay tree will hold see_ref_s structures. The key is the luid of the insn. This way we can traverse over the references of each basic block in forward or backward order. */ static splay_tree *see_bb_splay_ar = NULL; /* Array of hashes. see_bb_hash_ar[i] refers to the hash of the i'th basic block. The hash will hold see_register_properties structure. The key is regno. */ static htab_t *see_bb_hash_ar = NULL; /* Hash table that holds a copy of all the extensions. The key is the right hand side of the se_insn field. */ static htab_t see_pre_extension_hash = NULL; /* Local LCM properties of expressions. */ /* Nonzero for expressions that are transparent in the block. */ static sbitmap *transp = NULL; /* Nonzero for expressions that are computed (available) in the block. */ static sbitmap *comp = NULL; /* Nonzero for expressions that are locally anticipatable in the block. */ static sbitmap *antloc = NULL; /* Nonzero for expressions that are locally killed in the block. */ static sbitmap *ae_kill = NULL; /* Nonzero for expressions which should be inserted on a specific edge. */ static sbitmap *pre_insert_map = NULL; /* Nonzero for expressions which should be deleted in a specific block. */ static sbitmap *pre_delete_map = NULL; /* Contains the edge_list returned by pre_edge_lcm. */ static struct edge_list *edge_list = NULL; /* Records the last basic block at the beginning of the optimization. */ static int last_bb; /* Records the number of uses at the beginning of the optimization. */ static unsigned int uses_num; /* Records the number of definitions at the beginning of the optimization. */ static unsigned int defs_num; #define ENTRY_EI(ENTRY) ((struct see_entry_extra_info *) (ENTRY)->extra_info) /* Functions implementation. */ /* Verifies that EXTENSION's pattern is this: set (reg/subreg reg1) (sign/zero_extend:WIDEmode (reg/subreg reg2)) If it doesn't have the expected pattern return NULL. Otherwise, if RETURN_DEST_REG is set, return reg1 else return reg2. */ static rtx see_get_extension_reg (rtx extension, bool return_dest_reg) { rtx set, rhs, lhs; rtx reg1 = NULL; rtx reg2 = NULL; /* Parallel pattern for extension not supported for the moment. */ if (GET_CODE (PATTERN (extension)) == PARALLEL) return NULL; set = single_set (extension); if (!set) return NULL; lhs = SET_DEST (set); rhs = SET_SRC (set); if (REG_P (lhs)) reg1 = lhs; else if (REG_P (SUBREG_REG (lhs))) reg1 = SUBREG_REG (lhs); else return NULL; if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND) return NULL; rhs = XEXP (rhs, 0); if (REG_P (rhs)) reg2 = rhs; else if (REG_P (SUBREG_REG (rhs))) reg2 = SUBREG_REG (rhs); else return NULL; if (return_dest_reg) return reg1; return reg2; } /* Verifies that EXTENSION's pattern is this: set (reg/subreg reg1) (sign/zero_extend: (...expr...) If it doesn't have the expected pattern return UNKNOWN. Otherwise, set SOURCE_MODE to be the mode of the extended expr and return the rtx code of the extension. */ static enum entry_type see_get_extension_data (rtx extension, enum machine_mode *source_mode) { rtx rhs, lhs, set; if (!extension || !INSN_P (extension)) return NOT_RELEVANT; /* Parallel pattern for extension not supported for the moment. */ if (GET_CODE (PATTERN (extension)) == PARALLEL) return NOT_RELEVANT; set = single_set (extension); if (!set) return NOT_RELEVANT; rhs = SET_SRC (set); lhs = SET_DEST (set); /* Don't handle extensions to something other then register or subregister. */ if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG) return NOT_RELEVANT; if (GET_CODE (rhs) != SIGN_EXTEND && GET_CODE (rhs) != ZERO_EXTEND) return NOT_RELEVANT; if (!REG_P (XEXP (rhs, 0)) && !(GET_CODE (XEXP (rhs, 0)) == SUBREG && REG_P (SUBREG_REG (XEXP (rhs, 0))))) return NOT_RELEVANT; *source_mode = GET_MODE (XEXP (rhs, 0)); if (GET_CODE (rhs) == SIGN_EXTEND) return SIGN_EXTENDED_DEF; return ZERO_EXTENDED_DEF; } /* Generate instruction with the pattern: set ((reg r) (sign/zero_extend (subreg:mode (reg r)))) (the register r on both sides of the set is the same register). And recognize it. If the recognition failed, this is very bad, return NULL (This will abort the entire optimization). Otherwise, return the generated instruction. */ static rtx see_gen_normalized_extension (rtx reg, enum entry_type extension_code, enum machine_mode mode) { rtx subreg, insn; rtx extension = NULL; if (!reg || !REG_P (reg) || (extension_code != SIGN_EXTENDED_DEF && extension_code != ZERO_EXTENDED_DEF)) return NULL; subreg = gen_lowpart_SUBREG (mode, reg); if (extension_code == SIGN_EXTENDED_DEF) extension = gen_rtx_SIGN_EXTEND (GET_MODE (reg), subreg); else extension = gen_rtx_ZERO_EXTEND (GET_MODE (reg), subreg); start_sequence (); emit_insn (gen_rtx_SET (VOIDmode, reg, extension)); insn = get_insns (); end_sequence (); if (insn_invalid_p (insn)) /* Recognition failed, this is very bad for this optimization. Abort the optimization. */ return NULL; return insn; } /* Hashes and splay_trees related functions implementation. */ /* Helper functions for the pre_extension hash. This kind of hash will hold see_pre_extension_expr structures. The key is the right hand side of the se_insn field. Note that the se_insn is an expression that looks like: set ((reg:WIDEmode r1) (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r2)))) */ /* Return TRUE if P1 has the same value in its rhs as P2. Otherwise, return FALSE. P1 and P2 are see_pre_extension_expr structures. */ static int eq_descriptor_pre_extension (const void *p1, const void *p2) { const struct see_pre_extension_expr *const extension1 = (const struct see_pre_extension_expr *) p1; const struct see_pre_extension_expr *const extension2 = (const struct see_pre_extension_expr *) p2; rtx set1 = single_set (extension1->se_insn); rtx set2 = single_set (extension2->se_insn); rtx rhs1, rhs2; gcc_assert (set1 && set2); rhs1 = SET_SRC (set1); rhs2 = SET_SRC (set2); return rtx_equal_p (rhs1, rhs2); } /* P is a see_pre_extension_expr struct, use the RHS of the se_insn field. Note that the RHS is an expression that looks like this: (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r))) */ static hashval_t hash_descriptor_pre_extension (const void *p) { const struct see_pre_extension_expr *const extension = (const struct see_pre_extension_expr *) p; rtx set = single_set (extension->se_insn); rtx rhs; gcc_assert (set); rhs = SET_SRC (set); return hash_rtx (rhs, GET_MODE (rhs), 0, NULL, 0); } /* Free the allocated memory of the current see_pre_extension_expr struct. It frees the two linked list of the occurrences structures. */ static void hash_del_pre_extension (void *p) { struct see_pre_extension_expr *const extension = (struct see_pre_extension_expr *) p; struct see_occr *curr_occr = extension->antic_occr; struct see_occr *next_occr = NULL; /* Free the linked list of the anticipatable occurrences. */ while (curr_occr) { next_occr = curr_occr->next; free (curr_occr); curr_occr = next_occr; } /* Free the linked list of the available occurrences. */ curr_occr = extension->avail_occr; while (curr_occr) { next_occr = curr_occr->next; free (curr_occr); curr_occr = next_occr; } /* Free the see_pre_extension_expr structure itself. */ free (extension); } /* Helper functions for the register_properties hash. This kind of hash will hold see_register_properties structures. The value of the key is the regno field of the structure. */ /* Return TRUE if P1 has the same value in the regno field as P2. Otherwise, return FALSE. Where P1 and P2 are see_register_properties structures. */ static int eq_descriptor_properties (const void *p1, const void *p2) { const struct see_register_properties *const curr_prop1 = (const struct see_register_properties *) p1; const struct see_register_properties *const curr_prop2 = (const struct see_register_properties *) p2; return curr_prop1->regno == curr_prop2->regno; } /* P is a see_register_properties struct, use the register number in the regno field. */ static hashval_t hash_descriptor_properties (const void *p) { const struct see_register_properties *const curr_prop = (const struct see_register_properties *) p; return curr_prop->regno; } /* Free the allocated memory of the current see_register_properties struct. */ static void hash_del_properties (void *p) { struct see_register_properties *const curr_prop = (struct see_register_properties *) p; free (curr_prop); } /* Helper functions for an extension hash. This kind of hash will hold insns that look like: set ((reg:WIDEmode r1) (sign_extend:WIDEmode (subreg:NARROWmode (reg:WIDEmode r2)))) or set ((reg:WIDEmode r1) (sign_extend:WIDEmode (reg:NARROWmode r2))) The value of the key is (REGNO (reg:WIDEmode r1)) It is possible to search this hash in two ways: 1. By a register rtx. The Value that is been compared to the keys is the REGNO of it. 2. By an insn with the above pattern. The Value that is been compared to the keys is the REGNO of the reg on the lhs. */ /* Return TRUE if P1 has the same value as P2. Otherwise, return FALSE. Where P1 is an insn and P2 is an insn or a register. */ static int eq_descriptor_extension (const void *p1, const void *p2) { const_rtx const insn = (const_rtx) p1; const_rtx const element = (const_rtx) p2; rtx set1 = single_set (insn); rtx dest_reg1; rtx set2 = NULL; const_rtx dest_reg2 = NULL; gcc_assert (set1 && element && (REG_P (element) || INSN_P (element))); dest_reg1 = SET_DEST (set1); if (INSN_P (element)) { set2 = single_set (element); dest_reg2 = SET_DEST (set2); } else dest_reg2 = element; return REGNO (dest_reg1) == REGNO (dest_reg2); } /* If P is an insn, use the register number of its lhs otherwise, P is a register, use its number. */ static hashval_t hash_descriptor_extension (const void *p) { const_rtx const r = (const_rtx) p; rtx set, lhs; if (r && REG_P (r)) return REGNO (r); gcc_assert (r && INSN_P (r)); set = single_set (r); gcc_assert (set); lhs = SET_DEST (set); return REGNO (lhs); } /* Helper function for a see_bb_splay_ar[i] splay tree. It frees all the allocated memory of a struct see_ref_s pointer. VALUE is the value of a splay tree node. */ static void see_free_ref_s (splay_tree_value value) { struct see_ref_s *ref_s = (struct see_ref_s *)value; if (ref_s->unmerged_def_se_hash) htab_delete (ref_s->unmerged_def_se_hash); if (ref_s->merged_def_se_hash) htab_delete (ref_s->merged_def_se_hash); if (ref_s->use_se_hash) htab_delete (ref_s->use_se_hash); free (ref_s); } /* Rest of the implementation. */ /* Search the extension hash for a suitable entry for EXTENSION. TYPE is the type of EXTENSION (USE_EXTENSION or DEF_EXTENSION). If TYPE is DEF_EXTENSION we need to normalize EXTENSION before searching the extension hash. If a suitable entry was found, return the slot. Otherwise, store EXTENSION in the hash and return NULL. */ static struct see_pre_extension_expr * see_seek_pre_extension_expr (rtx extension, enum extension_type type) { struct see_pre_extension_expr **slot_pre_exp, temp_pre_exp; rtx dest_extension_reg = see_get_extension_reg (extension, 1); enum entry_type extension_code; enum machine_mode source_extension_mode; if (type == DEF_EXTENSION) { extension_code = see_get_extension_data (extension, &source_extension_mode); gcc_assert (extension_code != NOT_RELEVANT); extension = see_gen_normalized_extension (dest_extension_reg, extension_code, source_extension_mode); } temp_pre_exp.se_insn = extension; slot_pre_exp = (struct see_pre_extension_expr **) htab_find_slot (see_pre_extension_hash, &temp_pre_exp, INSERT); if (*slot_pre_exp == NULL) /* This is the first time this extension instruction is encountered. Store it in the hash. */ { (*slot_pre_exp) = XNEW (struct see_pre_extension_expr); (*slot_pre_exp)->se_insn = extension; (*slot_pre_exp)->bitmap_index = (htab_elements (see_pre_extension_hash) - 1); (*slot_pre_exp)->antic_occr = NULL; (*slot_pre_exp)->avail_occr = NULL; return NULL; } return *slot_pre_exp; } /* This function defines how to update the extra_info of the web_entry. FIRST is the pointer of the extra_info of the first web_entry. SECOND is the pointer of the extra_info of the second web_entry. The first web_entry will be the predecessor (leader) of the second web_entry after the union. Return true if FIRST and SECOND points to the same web entry structure and nothing is done. Otherwise, return false. */ static bool see_update_leader_extra_info (struct web_entry *first, struct web_entry *second) { struct see_entry_extra_info *first_ei, *second_ei; first = unionfind_root (first); second = unionfind_root (second); if (unionfind_union (first, second)) return true; first_ei = (struct see_entry_extra_info *) first->extra_info; second_ei = (struct see_entry_extra_info *) second->extra_info; gcc_assert (first_ei && second_ei); if (second_ei->relevancy == NOT_RELEVANT) { first_ei->relevancy = NOT_RELEVANT; return false; } switch (first_ei->relevancy) { case NOT_RELEVANT: break; case RELEVANT_USE: switch (second_ei->relevancy) { case RELEVANT_USE: break; case EXTENDED_DEF: first_ei->relevancy = second_ei->relevancy; first_ei->source_mode_signed = second_ei->source_mode_signed; first_ei->source_mode_unsigned = second_ei->source_mode_unsigned; break; case SIGN_EXTENDED_DEF: case ZERO_EXTENDED_DEF: first_ei->relevancy = second_ei->relevancy; first_ei->source_mode = second_ei->source_mode; break; default: gcc_unreachable (); } break; case SIGN_EXTENDED_DEF: switch (second_ei->relevancy) { case SIGN_EXTENDED_DEF: /* The mode of the root should be the wider one in this case. */ first_ei->source_mode = (first_ei->source_mode > second_ei->source_mode) ? first_ei->source_mode : second_ei->source_mode; break; case RELEVANT_USE: break; case ZERO_EXTENDED_DEF: /* Don't mix webs with zero extend and sign extend. */ first_ei->relevancy = NOT_RELEVANT; break; case EXTENDED_DEF: if (second_ei->source_mode_signed == MAX_MACHINE_MODE) first_ei->relevancy = NOT_RELEVANT; else /* The mode of the root should be the wider one in this case. */ first_ei->source_mode = (first_ei->source_mode > second_ei->source_mode_signed) ? first_ei->source_mode : second_ei->source_mode_signed; break; default: gcc_unreachable (); } break; /* This case is similar to the previous one, with little changes. */ case ZERO_EXTENDED_DEF: switch (second_ei->relevancy) { case SIGN_EXTENDED_DEF: /* Don't mix webs with zero extend and sign extend. */ first_ei->relevancy = NOT_RELEVANT; break; case RELEVANT_USE: break; case ZERO_EXTENDED_DEF: /* The mode of the root should be the wider one in this case. */ first_ei->source_mode = (first_ei->source_mode > second_ei->source_mode) ? first_ei->source_mode : second_ei->source_mode; break; case EXTENDED_DEF: if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) first_ei->relevancy = NOT_RELEVANT; else /* The mode of the root should be the wider one in this case. */ first_ei->source_mode = (first_ei->source_mode > second_ei->source_mode_unsigned) ? first_ei->source_mode : second_ei->source_mode_unsigned; break; default: gcc_unreachable (); } break; case EXTENDED_DEF: if (first_ei->source_mode_signed != MAX_MACHINE_MODE && first_ei->source_mode_unsigned != MAX_MACHINE_MODE) { switch (second_ei->relevancy) { case SIGN_EXTENDED_DEF: first_ei->relevancy = SIGN_EXTENDED_DEF; first_ei->source_mode = (first_ei->source_mode_signed > second_ei->source_mode) ? first_ei->source_mode_signed : second_ei->source_mode; break; case RELEVANT_USE: break; case ZERO_EXTENDED_DEF: first_ei->relevancy = ZERO_EXTENDED_DEF; first_ei->source_mode = (first_ei->source_mode_unsigned > second_ei->source_mode) ? first_ei->source_mode_unsigned : second_ei->source_mode; break; case EXTENDED_DEF: if (second_ei->source_mode_unsigned != MAX_MACHINE_MODE) first_ei->source_mode_unsigned = (first_ei->source_mode_unsigned > second_ei->source_mode_unsigned) ? first_ei->source_mode_unsigned : second_ei->source_mode_unsigned; if (second_ei->source_mode_signed != MAX_MACHINE_MODE) first_ei->source_mode_signed = (first_ei->source_mode_signed > second_ei->source_mode_signed) ? first_ei->source_mode_signed : second_ei->source_mode_signed; break; default: gcc_unreachable (); } } else if (first_ei->source_mode_signed == MAX_MACHINE_MODE) { gcc_assert (first_ei->source_mode_unsigned != MAX_MACHINE_MODE); switch (second_ei->relevancy) { case SIGN_EXTENDED_DEF: first_ei->relevancy = NOT_RELEVANT; break; case RELEVANT_USE: break; case ZERO_EXTENDED_DEF: first_ei->relevancy = ZERO_EXTENDED_DEF; first_ei->source_mode = (first_ei->source_mode_unsigned > second_ei->source_mode) ? first_ei->source_mode_unsigned : second_ei->source_mode; break; case EXTENDED_DEF: if (second_ei->source_mode_unsigned == MAX_MACHINE_MODE) first_ei->relevancy = NOT_RELEVANT; else first_ei->source_mode_unsigned = (first_ei->source_mode_unsigned > second_ei->source_mode_unsigned) ? first_ei->source_mode_unsigned : second_ei->source_mode_unsigned; break; default: gcc_unreachable (); } } else { gcc_assert (first_ei->source_mode_unsigned == MAX_MACHINE_MODE); gcc_assert (first_ei->source_mode_signed != MAX_MACHINE_MODE); switch (second_ei->relevancy) { case SIGN_EXTENDED_DEF: first_ei->relevancy = SIGN_EXTENDED_DEF; first_ei->source_mode = (first_ei->source_mode_signed > second_ei->source_mode) ? first_ei->source_mode_signed : second_ei->source_mode; break; case RELEVANT_USE: break; case ZERO_EXTENDED_DEF: first_ei->relevancy = NOT_RELEVANT; break; case EXTENDED_DEF: if (second_ei->source_mode_signed == MAX_MACHINE_MODE) first_ei->relevancy = NOT_RELEVANT; else first_ei->source_mode_signed = (first_ei->source_mode_signed > second_ei->source_mode_signed) ? first_ei->source_mode_signed : second_ei->source_mode_signed; break; default: gcc_unreachable (); } } break; default: /* Unknown pattern type. */ gcc_unreachable (); } return false; } /* Free global data structures. */ static void see_free_data_structures (void) { int i; unsigned int j; /* Free the bitmap vectors. */ if (transp) { sbitmap_vector_free (transp); transp = NULL; sbitmap_vector_free (comp); comp = NULL; sbitmap_vector_free (antloc); antloc = NULL; sbitmap_vector_free (ae_kill); ae_kill = NULL; } if (pre_insert_map) { sbitmap_vector_free (pre_insert_map); pre_insert_map = NULL; } if (pre_delete_map) { sbitmap_vector_free (pre_delete_map); pre_delete_map = NULL; } if (edge_list) { free_edge_list (edge_list); edge_list = NULL; } /* Free the extension hash. */ htab_delete (see_pre_extension_hash); /* Free the array of hashes. */ for (i = 0; i < last_bb; i++) if (see_bb_hash_ar[i]) htab_delete (see_bb_hash_ar[i]); free (see_bb_hash_ar); /* Free the array of splay trees. */ for (i = 0; i < last_bb; i++) if (see_bb_splay_ar[i]) splay_tree_delete (see_bb_splay_ar[i]); free (see_bb_splay_ar); /* Free the array of web entries and their extra info field. */ for (j = 0; j < defs_num; j++) free (def_entry[j].extra_info); free (def_entry); for (j = 0; j < uses_num; j++) free (use_entry[j].extra_info); free (use_entry); } /* Initialize global data structures and variables. */ static void see_initialize_data_structures (void) { unsigned int max_reg = max_reg_num (); unsigned int i; /* Build the df object. */ df_set_flags (DF_EQ_NOTES); df_chain_add_problem (DF_DU_CHAIN + DF_UD_CHAIN); df_analyze (); df_set_flags (DF_DEFER_INSN_RESCAN); if (dump_file) df_dump (dump_file); /* Record the last basic block at the beginning of the optimization. */ last_bb = last_basic_block; /* Record the number of uses and defs at the beginning of the optimization. */ uses_num = 0; defs_num = 0; for (i = 0; i < max_reg; i++) { uses_num += DF_REG_USE_COUNT (i) + DF_REG_EQ_USE_COUNT (i); defs_num += DF_REG_DEF_COUNT (i); } /* Allocate web entries array for the union-find data structure. */ def_entry = XCNEWVEC (struct web_entry, defs_num); use_entry = XCNEWVEC (struct web_entry, uses_num); /* Allocate an array of splay trees. One splay tree for each basic block. */ see_bb_splay_ar = XCNEWVEC (splay_tree, last_bb); /* Allocate an array of hashes. One hash for each basic block. */ see_bb_hash_ar = XCNEWVEC (htab_t, last_bb); /* Allocate the extension hash. It will hold the extensions that we want to PRE. */ see_pre_extension_hash = htab_create (10, hash_descriptor_pre_extension, eq_descriptor_pre_extension, hash_del_pre_extension); } /* Function called by note_uses to check if a register is used in a subexpressions. X is a pointer to the subexpression and DATA is a pointer to a see_mentioned_reg_data structure that contains the register to look for and a place for the result. */ static void see_mentioned_reg (rtx *x, void *data) { struct see_mentioned_reg_data *d = (struct see_mentioned_reg_data *) data; if (reg_mentioned_p (d->reg, *x)) d->mentioned = true; } /* We don't want to merge a use extension with a reference if the extended register is used only in a simple move instruction. We also don't want to merge a def extension with a reference if the source register of the extension is defined only in a simple move in the reference. REF is the reference instruction. EXTENSION is the use extension or def extension instruction. TYPE is the type of the extension (use or def). Return true if the reference is complicated enough, so we would like to merge it with the extension. Otherwise, return false. */ static bool see_want_to_be_merged_with_extension (rtx ref, rtx extension, enum extension_type type) { rtx pat; rtx dest_extension_reg = see_get_extension_reg (extension, 1); rtx source_extension_reg = see_get_extension_reg (extension, 0); enum rtx_code code; struct see_mentioned_reg_data d; int i; pat = PATTERN (ref); code = GET_CODE (pat); if (code == PARALLEL) { for (i = 0; i < XVECLEN (pat, 0); i++) { rtx sub = XVECEXP (pat, 0, i); if (GET_CODE (sub) == SET && (REG_P (SET_DEST (sub)) || (GET_CODE (SET_DEST (sub)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (sub))))) && (REG_P (SET_SRC (sub)) || (GET_CODE (SET_SRC (sub)) == SUBREG && REG_P (SUBREG_REG (SET_SRC (sub)))))) { /* This is a simple move SET. */ if (type == DEF_EXTENSION && reg_mentioned_p (source_extension_reg, SET_DEST (sub))) return false; } else { /* This is not a simple move SET. Check if it uses the source of the extension. */ if (type == USE_EXTENSION) { d.reg = dest_extension_reg; d.mentioned = false; note_uses (&sub, see_mentioned_reg, &d); if (d.mentioned) return true; } } } if (type == USE_EXTENSION) return false; } else { if (code == SET && (REG_P (SET_DEST (pat)) || (GET_CODE (SET_DEST (pat)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (pat))))) && (REG_P (SET_SRC (pat)) || (GET_CODE (SET_SRC (pat)) == SUBREG && REG_P (SUBREG_REG (SET_SRC (pat)))))) /* This is a simple move SET. */ return false; } return true; } /* Print the register number of the current see_register_properties structure. This is a subroutine of see_main called via htab_traverse. SLOT contains the current see_register_properties structure pointer. */ static int see_print_register_properties (void **slot, void *b ATTRIBUTE_UNUSED) { const struct see_register_properties *const prop = (const struct see_register_properties *) *slot; gcc_assert (prop); fprintf (dump_file, "Property found for register %d\n", prop->regno); return 1; } /* Print the extension instruction of the current see_register_properties structure. This is a subroutine of see_main called via htab_traverse. SLOT contains the current see_pre_extension_expr structure pointer. */ static int see_print_pre_extension_expr (void **slot, void *b ATTRIBUTE_UNUSED) { const struct see_pre_extension_expr *const pre_extension = (const struct see_pre_extension_expr *) *slot; gcc_assert (pre_extension && pre_extension->se_insn && INSN_P (pre_extension->se_insn)); fprintf (dump_file, "Index %d for:\n", pre_extension->bitmap_index); print_rtl_single (dump_file, pre_extension->se_insn); return 1; } /* Phase 4 implementation: Commit changes to the insn stream. */ /* Delete the merged def extension. This is a subroutine of see_commit_ref_changes called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_delete_merged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) { rtx def_se = (rtx) *slot; if (dump_file) { fprintf (dump_file, "Deleting merged def extension:\n"); print_rtl_single (dump_file, def_se); } if (INSN_DELETED_P (def_se)) /* This def extension is an implicit one. No need to delete it since it is not in the insn stream. */ return 1; delete_insn (def_se); return 1; } /* Delete the unmerged def extension. This is a subroutine of see_commit_ref_changes called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_delete_unmerged_def_extension (void **slot, void *b ATTRIBUTE_UNUSED) { rtx def_se = (rtx) *slot; if (dump_file) { fprintf (dump_file, "Deleting unmerged def extension:\n"); print_rtl_single (dump_file, def_se); } delete_insn (def_se); return 1; } /* Emit the non-redundant use extension to the instruction stream. This is a subroutine of see_commit_ref_changes called via htab_traverse. SLOT contains the current use extension instruction. B is the see_ref_s structure pointer. */ static int see_emit_use_extension (void **slot, void *b) { rtx use_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; if (INSN_DELETED_P (use_se)) /* This use extension was previously removed according to the lcm output. */ return 1; if (dump_file) { fprintf (dump_file, "Inserting use extension:\n"); print_rtl_single (dump_file, use_se); } add_insn_before (use_se, curr_ref_s->insn, NULL); return 1; } /* For each relevant reference: a. Emit the non-redundant use extensions. b. Delete the def extensions. c. Replace the original reference with the merged one (if exists) and add the move instructions that were generated. This is a subroutine of see_commit_changes called via splay_tree_foreach. STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a see_ref_s structure. */ static int see_commit_ref_changes (splay_tree_node stn, void *data ATTRIBUTE_UNUSED) { htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; htab_t unmerged_def_se_hash = ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; htab_t merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; rtx ref = ((struct see_ref_s *) (stn->value))->insn; rtx merged_ref = ((struct see_ref_s *) (stn->value))->merged_insn; /* Emit the non-redundant use extensions. */ if (use_se_hash) htab_traverse_noresize (use_se_hash, see_emit_use_extension, (PTR) (stn->value)); /* Delete the def extensions. */ if (unmerged_def_se_hash) htab_traverse (unmerged_def_se_hash, see_delete_unmerged_def_extension, (PTR) (stn->value)); if (merged_def_se_hash) htab_traverse (merged_def_se_hash, see_delete_merged_def_extension, (PTR) (stn->value)); /* Replace the original reference with the merged one (if exists) and add the move instructions that were generated. */ if (merged_ref && !INSN_DELETED_P (ref)) { if (dump_file) { fprintf (dump_file, "Replacing orig reference:\n"); print_rtl_single (dump_file, ref); fprintf (dump_file, "With merged reference:\n"); print_rtl_single (dump_file, merged_ref); } emit_insn_after (merged_ref, ref); delete_insn (ref); } /* Continue to the next reference. */ return 0; } /* Insert partially redundant expressions on edges to make the expressions fully redundant. INDEX_MAP is a mapping of an index to an expression. Return true if an instruction was inserted on an edge. Otherwise, return false. */ static bool see_pre_insert_extensions (struct see_pre_extension_expr **index_map) { int num_edges = NUM_EDGES (edge_list); int set_size = pre_insert_map[0]->size; size_t pre_extension_num = htab_elements (see_pre_extension_hash); int did_insert = 0; int e; int i; int j; for (e = 0; e < num_edges; e++) { int indx; basic_block bb = INDEX_EDGE_PRED_BB (edge_list, e); for (i = indx = 0; i < set_size; i++, indx += SBITMAP_ELT_BITS) { SBITMAP_ELT_TYPE insert = pre_insert_map[e]->elms[i]; for (j = indx; insert && j < (int) pre_extension_num; j++, insert >>= 1) if (insert & 1) { struct see_pre_extension_expr *expr = index_map[j]; int idx = expr->bitmap_index; rtx se_insn = NULL; edge eg = INDEX_EDGE (edge_list, e); start_sequence (); emit_insn (copy_insn (PATTERN (expr->se_insn))); se_insn = get_insns (); end_sequence (); if (eg->flags & EDGE_ABNORMAL) { rtx new_insn = NULL; new_insn = insert_insn_end_bb_new (se_insn, bb); gcc_assert (new_insn && INSN_P (new_insn)); if (dump_file) { fprintf (dump_file, "PRE: end of bb %d, insn %d, ", bb->index, INSN_UID (new_insn)); fprintf (dump_file, "inserting expression %d\n", idx); } } else { insert_insn_on_edge (se_insn, eg); if (dump_file) { fprintf (dump_file, "PRE: edge (%d,%d), ", bb->index, INDEX_EDGE_SUCC_BB (edge_list, e)->index); fprintf (dump_file, "inserting expression %d\n", idx); } } did_insert = true; } } } return did_insert; } /* Since all the redundant extensions must be anticipatable, they must be a use extensions. Mark them as deleted. This will prevent them from been emitted in the first place. This is a subroutine of see_commit_changes called via htab_traverse. SLOT contains the current see_pre_extension_expr structure pointer. */ static int see_pre_delete_extension (void **slot, void *b ATTRIBUTE_UNUSED) { struct see_pre_extension_expr *const expr = (struct see_pre_extension_expr *) *slot; struct see_occr *occr; int indx = expr->bitmap_index; for (occr = expr->antic_occr; occr != NULL; occr = occr->next) { if (TEST_BIT (pre_delete_map[occr->block_num], indx)) { /* Mark as deleted. */ INSN_DELETED_P (occr->insn) = 1; if (dump_file) { fprintf (dump_file,"Redundant extension deleted:\n"); print_rtl_single (dump_file, occr->insn); } } } return 1; } /* Create the index_map mapping of an index to an expression. This is a subroutine of see_commit_changes called via htab_traverse. SLOT contains the current see_pre_extension_expr structure pointer. B a pointer to see_pre_extension_expr structure pointer. */ static int see_map_extension (void **slot, void *b) { struct see_pre_extension_expr *const expr = (struct see_pre_extension_expr *) *slot; struct see_pre_extension_expr **const index_map = (struct see_pre_extension_expr **) b; index_map[expr->bitmap_index] = expr; return 1; } /* Phase 4 top level function. In this phase we finally change the instruction stream. Here we insert extensions at their best placements and delete the redundant ones according to the output of the LCM. We also replace some of the instructions according to phase 2 merges results. */ static void see_commit_changes (void) { struct see_pre_extension_expr **index_map; size_t pre_extension_num = htab_elements (see_pre_extension_hash); bool did_insert = false; int i; index_map = XCNEWVEC (struct see_pre_extension_expr *, pre_extension_num); if (dump_file) fprintf (dump_file, "* Phase 4: Commit changes to the insn stream. *\n"); /* Produce a mapping of all the pre_extensions. */ htab_traverse (see_pre_extension_hash, see_map_extension, (PTR) index_map); /* Delete redundant extension. This will prevent them from been emitted in the first place. */ htab_traverse (see_pre_extension_hash, see_pre_delete_extension, NULL); /* Insert extensions on edges, according to the LCM result. */ did_insert = see_pre_insert_extensions (index_map); if (did_insert) commit_edge_insertions (); /* Commit the rest of the changes. */ for (i = 0; i < last_bb; i++) { if (see_bb_splay_ar[i]) { /* Traverse over all the references in the basic block in forward order. */ splay_tree_foreach (see_bb_splay_ar[i], see_commit_ref_changes, NULL); } } free (index_map); } /* Phase 3 implementation: Eliminate globally redundant extensions. */ /* Analyze the properties of a merged def extension for the LCM and record avail occurrences. This is a subroutine of see_analyze_ref_local_prop called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_analyze_merged_def_local_prop (void **slot, void *b) { rtx def_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx ref = curr_ref_s->insn; struct see_pre_extension_expr *extension_expr; int indx; int bb_num = BLOCK_NUM (ref); htab_t curr_bb_hash; struct see_register_properties *curr_prop, **slot_prop; struct see_register_properties temp_prop; rtx dest_extension_reg = see_get_extension_reg (def_se, 1); struct see_occr *curr_occr = NULL; struct see_occr *tmp_occr = NULL; extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); /* The extension_expr must be found. */ gcc_assert (extension_expr); curr_bb_hash = see_bb_hash_ar[bb_num]; gcc_assert (curr_bb_hash); temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); curr_prop = *slot_prop; gcc_assert (curr_prop); indx = extension_expr->bitmap_index; /* Reset the transparency bit. */ RESET_BIT (transp[bb_num], indx); /* Reset the killed bit. */ RESET_BIT (ae_kill[bb_num], indx); if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref)) { /* Set the available bit. */ SET_BIT (comp[bb_num], indx); /* Record the available occurrence. */ curr_occr = XNEW (struct see_occr); curr_occr->next = NULL; curr_occr->insn = def_se; curr_occr->block_num = bb_num; tmp_occr = extension_expr->avail_occr; if (!tmp_occr) extension_expr->avail_occr = curr_occr; else { while (tmp_occr->next) tmp_occr = tmp_occr->next; tmp_occr->next = curr_occr; } } return 1; } /* Analyze the properties of a unmerged def extension for the LCM. This is a subroutine of see_analyze_ref_local_prop called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_analyze_unmerged_def_local_prop (void **slot, void *b) { rtx def_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx ref = curr_ref_s->insn; struct see_pre_extension_expr *extension_expr; int indx; int bb_num = BLOCK_NUM (ref); htab_t curr_bb_hash; struct see_register_properties *curr_prop, **slot_prop; struct see_register_properties temp_prop; rtx dest_extension_reg = see_get_extension_reg (def_se, 1); extension_expr = see_seek_pre_extension_expr (def_se, DEF_EXTENSION); /* The extension_expr must be found. */ gcc_assert (extension_expr); curr_bb_hash = see_bb_hash_ar[bb_num]; gcc_assert (curr_bb_hash); temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); curr_prop = *slot_prop; gcc_assert (curr_prop); indx = extension_expr->bitmap_index; /* Reset the transparency bit. */ RESET_BIT (transp[bb_num], indx); /* Set the killed bit. */ SET_BIT (ae_kill[bb_num], indx); return 1; } /* Analyze the properties of a use extension for the LCM and record any and avail occurrences. This is a subroutine of see_analyze_ref_local_prop called via htab_traverse. SLOT contains the current use extension instruction. B is the see_ref_s structure pointer. */ static int see_analyze_use_local_prop (void **slot, void *b) { struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx use_se = (rtx) *slot; rtx ref = curr_ref_s->insn; rtx dest_extension_reg = see_get_extension_reg (use_se, 1); struct see_pre_extension_expr *extension_expr; struct see_register_properties *curr_prop, **slot_prop; struct see_register_properties temp_prop; struct see_occr *curr_occr = NULL; struct see_occr *tmp_occr = NULL; htab_t curr_bb_hash; int indx; int bb_num = BLOCK_NUM (ref); extension_expr = see_seek_pre_extension_expr (use_se, USE_EXTENSION); /* The extension_expr must be found. */ gcc_assert (extension_expr); curr_bb_hash = see_bb_hash_ar[bb_num]; gcc_assert (curr_bb_hash); temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); curr_prop = *slot_prop; gcc_assert (curr_prop); indx = extension_expr->bitmap_index; if (curr_prop->first_se_before_any_def == DF_INSN_LUID (ref)) { /* Set the anticipatable bit. */ SET_BIT (antloc[bb_num], indx); /* Record the anticipatable occurrence. */ curr_occr = XNEW (struct see_occr); curr_occr->next = NULL; curr_occr->insn = use_se; curr_occr->block_num = bb_num; tmp_occr = extension_expr->antic_occr; if (!tmp_occr) extension_expr->antic_occr = curr_occr; else { while (tmp_occr->next) tmp_occr = tmp_occr->next; tmp_occr->next = curr_occr; } if (curr_prop->last_def < 0) { /* Set the available bit. */ SET_BIT (comp[bb_num], indx); /* Record the available occurrence. */ curr_occr = XNEW (struct see_occr); curr_occr->next = NULL; curr_occr->insn = use_se; curr_occr->block_num = bb_num; tmp_occr = extension_expr->avail_occr; if (!tmp_occr) extension_expr->avail_occr = curr_occr; else { while (tmp_occr->next) tmp_occr = tmp_occr->next; tmp_occr->next = curr_occr; } } /* Note: there is no need to reset the killed bit since it must be zero at this point. */ } else if (curr_prop->first_se_after_last_def == DF_INSN_LUID (ref)) { /* Set the available bit. */ SET_BIT (comp[bb_num], indx); /* Reset the killed bit. */ RESET_BIT (ae_kill[bb_num], indx); /* Record the available occurrence. */ curr_occr = XNEW (struct see_occr); curr_occr->next = NULL; curr_occr->insn = use_se; curr_occr->block_num = bb_num; tmp_occr = extension_expr->avail_occr; if (!tmp_occr) extension_expr->avail_occr = curr_occr; else { while (tmp_occr->next) tmp_occr = tmp_occr->next; tmp_occr->next = curr_occr; } } return 1; } /* Here we traverse over all the merged and unmerged extensions of the reference and analyze their properties for the LCM. This is a subroutine of see_execute_LCM called via splay_tree_foreach. STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a see_ref_s structure. */ static int see_analyze_ref_local_prop (splay_tree_node stn, void *data ATTRIBUTE_UNUSED) { htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; htab_t unmerged_def_se_hash = ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; htab_t merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; /* Analyze use extensions that were not merged with the reference. */ if (use_se_hash) htab_traverse_noresize (use_se_hash, see_analyze_use_local_prop, (PTR) (stn->value)); /* Analyze def extensions that were not merged with the reference. */ if (unmerged_def_se_hash) htab_traverse (unmerged_def_se_hash, see_analyze_unmerged_def_local_prop, (PTR) (stn->value)); /* Analyze def extensions that were merged with the reference. */ if (merged_def_se_hash) htab_traverse (merged_def_se_hash, see_analyze_merged_def_local_prop, (PTR) (stn->value)); /* Continue to the next definition. */ return 0; } /* Phase 3 top level function. In this phase, we set the input bit vectors of the LCM according to data gathered in phase 2. Then we run the edge based LCM. */ static void see_execute_LCM (void) { size_t pre_extension_num = htab_elements (see_pre_extension_hash); int i = 0; if (dump_file) fprintf (dump_file, "* Phase 3: Eliminate globally redundant extensions. *\n"); /* Initialize the global sbitmap vectors. */ transp = sbitmap_vector_alloc (last_bb, pre_extension_num); comp = sbitmap_vector_alloc (last_bb, pre_extension_num); antloc = sbitmap_vector_alloc (last_bb, pre_extension_num); ae_kill = sbitmap_vector_alloc (last_bb, pre_extension_num); sbitmap_vector_ones (transp, last_bb); sbitmap_vector_zero (comp, last_bb); sbitmap_vector_zero (antloc, last_bb); sbitmap_vector_zero (ae_kill, last_bb); /* Traverse over all the splay trees of the basic blocks. */ for (i = 0; i < last_bb; i++) { if (see_bb_splay_ar[i]) { /* Traverse over all the references in the basic block in forward order. */ splay_tree_foreach (see_bb_splay_ar[i], see_analyze_ref_local_prop, NULL); } } /* Add fake exit edges before running the lcm. */ add_noreturn_fake_exit_edges (); /* Run the LCM. */ edge_list = pre_edge_lcm (pre_extension_num, transp, comp, antloc, ae_kill, &pre_insert_map, &pre_delete_map); /* Remove the fake edges. */ remove_fake_exit_edges (); } /* Phase 2 implementation: Merge and eliminate locally redundant extensions. */ /* In this function we set the register properties for the register that is defined and extended in the reference. The properties are defined in see_register_properties structure which is allocated per basic block and per register. Later the extension is inserted into the see_pre_extension_hash for the next phase of the optimization. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_set_prop_merged_def (void **slot, void *b) { rtx def_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx insn = curr_ref_s->insn; rtx dest_extension_reg = see_get_extension_reg (def_se, 1); htab_t curr_bb_hash; struct see_register_properties *curr_prop = NULL; struct see_register_properties **slot_prop; struct see_register_properties temp_prop; int ref_luid = DF_INSN_LUID (insn); curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; if (!curr_bb_hash) { /* The hash doesn't exist yet. Create it. */ curr_bb_hash = htab_create (10, hash_descriptor_properties, eq_descriptor_properties, hash_del_properties); see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; } /* Find the right register properties in the right basic block. */ temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); if (slot_prop && *slot_prop != NULL) { /* Property already exists. */ curr_prop = *slot_prop; gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); curr_prop->last_def = ref_luid; curr_prop->first_se_after_last_def = ref_luid; } else { /* Property doesn't exist yet. */ curr_prop = XNEW (struct see_register_properties); curr_prop->regno = REGNO (dest_extension_reg); curr_prop->last_def = ref_luid; curr_prop->first_se_before_any_def = -1; curr_prop->first_se_after_last_def = ref_luid; *slot_prop = curr_prop; } /* Insert the def_se into see_pre_extension_hash if it isn't already there. */ see_seek_pre_extension_expr (def_se, DEF_EXTENSION); return 1; } /* In this function we set the register properties for the register that is defined but not extended in the reference. The properties are defined in see_register_properties structure which is allocated per basic block and per register. Later the extension is inserted into the see_pre_extension_hash for the next phase of the optimization. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_set_prop_unmerged_def (void **slot, void *b) { rtx def_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx insn = curr_ref_s->insn; rtx dest_extension_reg = see_get_extension_reg (def_se, 1); htab_t curr_bb_hash; struct see_register_properties *curr_prop = NULL; struct see_register_properties **slot_prop; struct see_register_properties temp_prop; int ref_luid = DF_INSN_LUID (insn); curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; if (!curr_bb_hash) { /* The hash doesn't exist yet. Create it. */ curr_bb_hash = htab_create (10, hash_descriptor_properties, eq_descriptor_properties, hash_del_properties); see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; } /* Find the right register properties in the right basic block. */ temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); if (slot_prop && *slot_prop != NULL) { /* Property already exists. */ curr_prop = *slot_prop; gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); curr_prop->last_def = ref_luid; curr_prop->first_se_after_last_def = -1; } else { /* Property doesn't exist yet. */ curr_prop = XNEW (struct see_register_properties); curr_prop->regno = REGNO (dest_extension_reg); curr_prop->last_def = ref_luid; curr_prop->first_se_before_any_def = -1; curr_prop->first_se_after_last_def = -1; *slot_prop = curr_prop; } /* Insert the def_se into see_pre_extension_hash if it isn't already there. */ see_seek_pre_extension_expr (def_se, DEF_EXTENSION); return 1; } /* In this function we set the register properties for the register that is used in the reference. The properties are defined in see_register_properties structure which is allocated per basic block and per register. When a redundant use extension is found it is removed from the hash of the reference. If the extension is non redundant it is inserted into the see_pre_extension_hash for the next phase of the optimization. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the current use extension instruction. B is the see_ref_s structure pointer. */ static int see_set_prop_unmerged_use (void **slot, void *b) { rtx use_se = (rtx) *slot; struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx insn = curr_ref_s->insn; rtx dest_extension_reg = see_get_extension_reg (use_se, 1); htab_t curr_bb_hash; struct see_register_properties *curr_prop = NULL; struct see_register_properties **slot_prop; struct see_register_properties temp_prop; bool locally_redundant = false; int ref_luid = DF_INSN_LUID (insn); curr_bb_hash = see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)]; if (!curr_bb_hash) { /* The hash doesn't exist yet. Create it. */ curr_bb_hash = htab_create (10, hash_descriptor_properties, eq_descriptor_properties, hash_del_properties); see_bb_hash_ar[BLOCK_NUM (curr_ref_s->insn)] = curr_bb_hash; } /* Find the right register properties in the right basic block. */ temp_prop.regno = REGNO (dest_extension_reg); slot_prop = (struct see_register_properties **) htab_find_slot (curr_bb_hash, &temp_prop, INSERT); if (slot_prop && *slot_prop != NULL) { /* Property already exists. */ curr_prop = *slot_prop; gcc_assert (curr_prop->regno == REGNO (dest_extension_reg)); if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def < 0) curr_prop->first_se_before_any_def = ref_luid; else if (curr_prop->last_def < 0 && curr_prop->first_se_before_any_def >= 0) { /* In this case the extension is locally redundant. */ htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); locally_redundant = true; } else if (curr_prop->last_def >= 0 && curr_prop->first_se_after_last_def < 0) curr_prop->first_se_after_last_def = ref_luid; else if (curr_prop->last_def >= 0 && curr_prop->first_se_after_last_def >= 0) { /* In this case the extension is locally redundant. */ htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); locally_redundant = true; } else gcc_unreachable (); } else { /* Property doesn't exist yet. Create a new one. */ curr_prop = XNEW (struct see_register_properties); curr_prop->regno = REGNO (dest_extension_reg); curr_prop->last_def = -1; curr_prop->first_se_before_any_def = ref_luid; curr_prop->first_se_after_last_def = -1; *slot_prop = curr_prop; } /* Insert the use_se into see_pre_extension_hash if it isn't already there. */ if (!locally_redundant) see_seek_pre_extension_expr (use_se, USE_EXTENSION); if (locally_redundant && dump_file) { fprintf (dump_file, "Locally redundant extension:\n"); print_rtl_single (dump_file, use_se); } return 1; } /* Print an extension instruction. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the extension instruction. */ static int see_print_one_extension (void **slot, void *b ATTRIBUTE_UNUSED) { rtx def_se = (rtx) *slot; gcc_assert (def_se && INSN_P (def_se)); print_rtl_single (dump_file, def_se); return 1; } /* Function called by note_uses to replace used subexpressions. X is a pointer to the subexpression and DATA is a pointer to a see_replace_data structure that contains the data for the replacement. */ static void see_replace_src (rtx *x, void *data) { struct see_replace_data *d = (struct see_replace_data *) data; *x = replace_rtx (*x, d->from, d->to); } static rtx see_copy_insn (rtx insn) { rtx pat = copy_insn (PATTERN (insn)), ret; if (NONJUMP_INSN_P (insn)) ret = make_insn_raw (pat); else if (JUMP_P (insn)) ret = make_jump_insn_raw (pat); else if (CALL_P (insn)) { start_sequence (); ret = emit_call_insn (pat); end_sequence (); if (CALL_INSN_FUNCTION_USAGE (insn)) CALL_INSN_FUNCTION_USAGE (ret) = copy_rtx (CALL_INSN_FUNCTION_USAGE (insn)); SIBLING_CALL_P (ret) = SIBLING_CALL_P (insn); RTL_CONST_CALL_P (ret) = RTL_CONST_CALL_P (insn); RTL_PURE_CALL_P (ret) = RTL_PURE_CALL_P (insn); RTL_LOOPING_CONST_OR_PURE_CALL_P (ret) = RTL_LOOPING_CONST_OR_PURE_CALL_P (insn); } else gcc_unreachable (); if (REG_NOTES (insn)) REG_NOTES (ret) = copy_rtx (REG_NOTES (insn)); INSN_LOCATOR (ret) = INSN_LOCATOR (insn); RTX_FRAME_RELATED_P (ret) = RTX_FRAME_RELATED_P (insn); PREV_INSN (ret) = NULL_RTX; NEXT_INSN (ret) = NULL_RTX; return ret; } /* At this point the pattern is expected to be: ref: set (dest_reg) (rhs) def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) The merge of these two instructions didn't succeed. We try to generate the pattern: set (subreg (dest_extension_reg)) (rhs) We do this in 4 steps: a. Replace every use of dest_reg with a new pseudo register. b. Replace every instance of dest_reg with the subreg. c. Replace every use of the new pseudo register back to dest_reg. d. Try to recognize and simplify. If the manipulation failed, leave the original ref but try to generate and recognize a simple move instruction: set (subreg (dest_extension_reg)) (dest_reg) This move instruction will be emitted right after the ref to the instruction stream and assure the correctness of the code after def_se will be removed. CURR_REF_S is the current reference. DEF_SE is the extension that couldn't be merged. */ static void see_def_extension_not_merged (struct see_ref_s *curr_ref_s, rtx def_se) { struct see_replace_data d; /* If the original insn was already merged with an extension before, take the merged one. */ rtx ref = curr_ref_s->merged_insn ? curr_ref_s->merged_insn : curr_ref_s->insn; rtx merged_ref_next = curr_ref_s->merged_insn ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX; rtx ref_copy = see_copy_insn (ref); rtx source_extension_reg = see_get_extension_reg (def_se, 0); rtx dest_extension_reg = see_get_extension_reg (def_se, 1); rtx set, rhs; rtx dest_reg, dest_real_reg; rtx new_pseudo_reg, subreg; enum machine_mode source_extension_mode = GET_MODE (source_extension_reg); enum machine_mode dest_mode; set = single_set (def_se); gcc_assert (set); rhs = SET_SRC (set); gcc_assert (GET_CODE (rhs) == SIGN_EXTEND || GET_CODE (rhs) == ZERO_EXTEND); dest_reg = XEXP (rhs, 0); gcc_assert (REG_P (dest_reg) || (GET_CODE (dest_reg) == SUBREG && REG_P (SUBREG_REG (dest_reg)))); dest_real_reg = REG_P (dest_reg) ? dest_reg : SUBREG_REG (dest_reg); dest_mode = GET_MODE (dest_reg); subreg = gen_lowpart_SUBREG (dest_mode, dest_extension_reg); new_pseudo_reg = gen_reg_rtx (source_extension_mode); /* Step a: Replace every use of dest_real_reg with a new pseudo register. */ d.from = dest_real_reg; d.to = new_pseudo_reg; note_uses (&PATTERN (ref_copy), see_replace_src, &d); /* Step b: Replace every instance of dest_reg with the subreg. */ ref_copy = replace_rtx (ref_copy, dest_reg, copy_rtx (subreg)); /* Step c: Replace every use of the new pseudo register back to dest_real_reg. */ d.from = new_pseudo_reg; d.to = dest_real_reg; note_uses (&PATTERN (ref_copy), see_replace_src, &d); if (rtx_equal_p (PATTERN (ref), PATTERN (ref_copy)) || insn_invalid_p (ref_copy)) { /* The manipulation failed. */ df_insn_delete (NULL, INSN_UID (ref_copy)); /* Create a new copy. */ ref_copy = see_copy_insn (ref); if (curr_ref_s->merged_insn) df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn)); /* Create a simple move instruction that will replace the def_se. */ start_sequence (); emit_insn (ref_copy); emit_move_insn (subreg, dest_reg); if (merged_ref_next != NULL_RTX) emit_insn (merged_ref_next); curr_ref_s->merged_insn = get_insns (); end_sequence (); if (dump_file) { fprintf (dump_file, "Following def merge failure a move "); fprintf (dump_file, "insn was added after the ref.\n"); fprintf (dump_file, "Original ref:\n"); print_rtl_single (dump_file, ref); fprintf (dump_file, "Move insn that was added:\n"); print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn)); } return; } /* The manipulation succeeded. Store the new manipulated reference. */ /* It is possible for dest_reg to appear multiple times in ref_copy. In this case, ref_copy now has invalid sharing. Copying solves the problem. We don't use copy_rtx as an optimization for the common case (no sharing). We can't just use copy_rtx_if_shared since it does nothing on INSNs. Another possible solution would be to make validate_replace_rtx_1 public and use it instead of replace_rtx. */ reset_used_flags (PATTERN (ref_copy)); reset_used_flags (REG_NOTES (ref_copy)); PATTERN (ref_copy) = copy_rtx_if_shared (PATTERN (ref_copy)); REG_NOTES (ref_copy) = copy_rtx_if_shared (REG_NOTES (ref_copy)); /* Try to simplify the new manipulated insn. */ validate_simplify_insn (ref_copy); if (curr_ref_s->merged_insn) df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn)); /* Create a simple move instruction to assure the correctness of the code. */ start_sequence (); emit_insn (ref_copy); emit_move_insn (dest_reg, subreg); if (merged_ref_next != NULL_RTX) emit_insn (merged_ref_next); curr_ref_s->merged_insn = get_insns (); end_sequence (); if (dump_file) { fprintf (dump_file, "Following merge failure the ref was transformed!\n"); fprintf (dump_file, "Original ref:\n"); print_rtl_single (dump_file, ref); fprintf (dump_file, "Transformed ref:\n"); print_rtl_single (dump_file, curr_ref_s->merged_insn); fprintf (dump_file, "Move insn that was added:\n"); print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn)); } } /* Merge the reference instruction (ref) with the current use extension. use_se extends a NARROWmode register to a WIDEmode register. ref uses the WIDEmode register. The pattern we try to merge is this: use_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) ref: use (dest_extension_reg) where dest_extension_reg and source_extension_reg can be subregs. The merge is done by generating, simplifying and recognizing the pattern: use (sign/zero_extend (source_extension_reg)) If ref is too simple (according to see_want_to_be_merged_with_extension ()) we don't try to merge it with use_se and we continue as if the merge failed. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the current use extension instruction. B is the see_ref_s structure pointer. */ static int see_merge_one_use_extension (void **slot, void *b) { struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx use_se = (rtx) *slot; rtx ref = curr_ref_s->merged_insn ? curr_ref_s->merged_insn : curr_ref_s->insn; rtx merged_ref_next = curr_ref_s->merged_insn ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX; rtx ref_copy = see_copy_insn (ref); rtx extension_set = single_set (use_se); rtx extension_rhs = NULL; rtx dest_extension_reg = see_get_extension_reg (use_se, 1); rtx note = NULL; rtx simplified_note = NULL; gcc_assert (use_se && curr_ref_s && extension_set); extension_rhs = SET_SRC (extension_set); /* In REG_EQUIV and REG_EQUAL notes that mention the register we need to replace the uses of the dest_extension_reg with the rhs of the extension instruction. This is necessary since there might not be an extension in the path between the definition and the note when this optimization is over. */ note = find_reg_equal_equiv_note (ref_copy); if (note) { simplified_note = simplify_replace_rtx (XEXP (note, 0), dest_extension_reg, extension_rhs); if (rtx_equal_p (XEXP (note, 0), simplified_note)) /* Replacement failed. Remove the note. */ remove_note (ref_copy, note); else set_unique_reg_note (ref_copy, REG_NOTE_KIND (note), simplified_note); } if (!see_want_to_be_merged_with_extension (ref, use_se, USE_EXTENSION)) { /* The use in the reference is too simple. Don't try to merge. */ if (dump_file) { fprintf (dump_file, "Use merge skipped!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, use_se); print_rtl_single (dump_file, ref); } df_insn_delete (NULL, INSN_UID (ref_copy)); /* Don't remove the current use_se from the use_se_hash and continue to the next extension. */ return 1; } validate_replace_src_group (dest_extension_reg, extension_rhs, ref_copy); if (!num_changes_pending ()) /* In this case this is not a real use (the only use is/was in the notes list). Remove the use extension from the hash. This will prevent it from been emitted in the first place. */ { if (dump_file) { fprintf (dump_file, "Use extension not necessary before:\n"); print_rtl_single (dump_file, ref); } htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); if (curr_ref_s->merged_insn) df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn)); if (merged_ref_next != NULL_RTX) { start_sequence (); emit_insn (ref_copy); emit_insn (merged_ref_next); curr_ref_s->merged_insn = get_insns (); end_sequence (); } else curr_ref_s->merged_insn = ref_copy; return 1; } if (!apply_change_group ()) { /* The merge failed. */ if (dump_file) { fprintf (dump_file, "Use merge failed!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, use_se); print_rtl_single (dump_file, ref); } df_insn_delete (NULL, INSN_UID (ref_copy)); /* Don't remove the current use_se from the use_se_hash and continue to the next extension. */ return 1; } /* The merge succeeded! */ /* Try to simplify the new merged insn. */ validate_simplify_insn (ref_copy); if (curr_ref_s->merged_insn) df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn)); if (merged_ref_next != NULL_RTX) { start_sequence (); emit_insn (ref_copy); emit_insn (merged_ref_next); curr_ref_s->merged_insn = get_insns (); end_sequence (); } else curr_ref_s->merged_insn = ref_copy; if (dump_file) { fprintf (dump_file, "Use merge succeeded!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, use_se); print_rtl_single (dump_file, ref); fprintf (dump_file, "Merged instruction:\n"); print_rtl_single (dump_file, curr_ref_s->merged_insn); } /* Remove the current use_se from the use_se_hash. This will prevent it from been emitted in the first place. */ htab_clear_slot (curr_ref_s->use_se_hash, (PTR *)slot); return 1; } /* Merge the reference instruction (ref) with the extension that follows it in the same basic block (def_se). ref sets a NARROWmode register and def_se extends it to WIDEmode register. The pattern we try to merge is this: ref: set (dest_reg) (rhs) def_se: set (dest_extension_reg) (sign/zero_extend (source_extension_reg)) where dest_reg and source_extension_reg can both be subregs (together) and (REGNO (dest_reg) == REGNO (source_extension_reg)) The merge is done by generating, simplifying and recognizing the pattern: set (dest_extension_reg) (sign/zero_extend (rhs)) If ref is a parallel instruction we just replace the relevant set in it. If ref is too simple (according to see_want_to_be_merged_with_extension ()) we don't try to merge it with def_se and we continue as if the merge failed. This is a subroutine of see_handle_extensions_for_one_ref called via htab_traverse. SLOT contains the current def extension instruction. B is the see_ref_s structure pointer. */ static int see_merge_one_def_extension (void **slot, void *b) { struct see_ref_s *curr_ref_s = (struct see_ref_s *) b; rtx def_se = (rtx) *slot; /* If the original insn was already merged with an extension before, take the merged one. */ rtx ref = curr_ref_s->merged_insn ? curr_ref_s->merged_insn : curr_ref_s->insn; rtx merged_ref_next = curr_ref_s->merged_insn ? NEXT_INSN (curr_ref_s->merged_insn) : NULL_RTX; rtx ref_copy = see_copy_insn (ref); rtx new_set = NULL; rtx source_extension_reg = see_get_extension_reg (def_se, 0); rtx dest_extension_reg = see_get_extension_reg (def_se, 1); rtx *rtx_slot, subreg; rtx temp_extension = NULL; rtx simplified_temp_extension = NULL; rtx *pat; enum rtx_code code; enum entry_type extension_code; enum machine_mode source_extension_mode; enum machine_mode source_mode = VOIDmode; enum machine_mode dest_extension_mode; bool merge_success = false; int i; gcc_assert (def_se && INSN_P (def_se) && curr_ref_s && ref && INSN_P (ref)); if (!see_want_to_be_merged_with_extension (ref, def_se, DEF_EXTENSION)) { /* The definition in the reference is too simple. Don't try to merge. */ if (dump_file) { fprintf (dump_file, "Def merge skipped!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, ref); print_rtl_single (dump_file, def_se); } df_insn_delete (NULL, INSN_UID (ref_copy)); see_def_extension_not_merged (curr_ref_s, def_se); /* Continue to the next extension. */ return 1; } extension_code = see_get_extension_data (def_se, &source_mode); /* Try to merge and simplify the extension. */ source_extension_mode = GET_MODE (source_extension_reg); dest_extension_mode = GET_MODE (dest_extension_reg); pat = &PATTERN (ref_copy); code = GET_CODE (*pat); if (code == PARALLEL) { bool need_to_apply_change = false; for (i = 0; i < XVECLEN (*pat, 0); i++) { rtx *sub = &XVECEXP (*pat, 0, i); if (GET_CODE (*sub) == SET && GET_MODE (SET_SRC (*sub)) != VOIDmode && GET_MODE (SET_DEST (*sub)) == source_mode && ((REG_P (SET_DEST (*sub)) && REGNO (SET_DEST (*sub)) == REGNO (source_extension_reg)) || (GET_CODE (SET_DEST (*sub)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (*sub))) && (REGNO (SUBREG_REG (SET_DEST (*sub))) == REGNO (source_extension_reg))))) { rtx orig_src = SET_SRC (*sub); if (extension_code == SIGN_EXTENDED_DEF) temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src); else temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src); simplified_temp_extension = simplify_rtx (temp_extension); temp_extension = (simplified_temp_extension) ? simplified_temp_extension : temp_extension; new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension); validate_change (ref_copy, sub, new_set, 1); need_to_apply_change = true; } } if (need_to_apply_change) if (apply_change_group ()) merge_success = true; } else if (code == SET && GET_MODE (SET_SRC (*pat)) != VOIDmode && GET_MODE (SET_DEST (*pat)) == source_mode && ((REG_P (SET_DEST (*pat)) && REGNO (SET_DEST (*pat)) == REGNO (source_extension_reg)) || (GET_CODE (SET_DEST (*pat)) == SUBREG && REG_P (SUBREG_REG (SET_DEST (*pat))) && (REGNO (SUBREG_REG (SET_DEST (*pat))) == REGNO (source_extension_reg))))) { rtx orig_src = SET_SRC (*pat); if (extension_code == SIGN_EXTENDED_DEF) temp_extension = gen_rtx_SIGN_EXTEND (dest_extension_mode, orig_src); else temp_extension = gen_rtx_ZERO_EXTEND (dest_extension_mode, orig_src); simplified_temp_extension = simplify_rtx (temp_extension); temp_extension = (simplified_temp_extension) ? simplified_temp_extension : temp_extension; new_set = gen_rtx_SET (VOIDmode, dest_extension_reg, temp_extension); if (validate_change (ref_copy, pat, new_set, 0)) merge_success = true; } if (!merge_success) { /* The merge failed. */ if (dump_file) { fprintf (dump_file, "Def merge failed!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, ref); print_rtl_single (dump_file, def_se); } df_insn_delete (NULL, INSN_UID (ref_copy)); see_def_extension_not_merged (curr_ref_s, def_se); /* Continue to the next extension. */ return 1; } /* The merge succeeded! */ if (curr_ref_s->merged_insn) df_insn_delete (NULL, INSN_UID (curr_ref_s->merged_insn)); /* Create a simple move instruction to assure the correctness of the code. */ subreg = gen_lowpart_SUBREG (source_extension_mode, dest_extension_reg); start_sequence (); emit_insn (ref_copy); emit_move_insn (source_extension_reg, subreg); if (merged_ref_next != NULL_RTX) emit_insn (merged_ref_next); curr_ref_s->merged_insn = get_insns (); end_sequence (); if (dump_file) { fprintf (dump_file, "Def merge succeeded!\n"); fprintf (dump_file, "Original instructions:\n"); print_rtl_single (dump_file, ref); print_rtl_single (dump_file, def_se); fprintf (dump_file, "Merged instruction:\n"); print_rtl_single (dump_file, curr_ref_s->merged_insn); fprintf (dump_file, "Move instruction that was added:\n"); print_rtl_single (dump_file, NEXT_INSN (curr_ref_s->merged_insn)); } /* Remove the current def_se from the unmerged_def_se_hash and insert it to the merged_def_se_hash. */ htab_clear_slot (curr_ref_s->unmerged_def_se_hash, (PTR *)slot); if (!curr_ref_s->merged_def_se_hash) curr_ref_s->merged_def_se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); rtx_slot = (rtx *) htab_find_slot (curr_ref_s->merged_def_se_hash, dest_extension_reg, INSERT); gcc_assert (*rtx_slot == NULL); *rtx_slot = def_se; return 1; } /* Try to eliminate extensions in this order: a. Try to merge only the def extensions, one by one. b. Try to merge only the use extensions, one by one. TODO: Try to merge any couple of use extensions simultaneously. Try to merge any def extension with one or two uses extensions simultaneously. After all the merges are done, update the register properties for the basic block and eliminate locally redundant use extensions. This is a subroutine of see_merge_and_eliminate_extensions called via splay_tree_foreach. STN is the current node in the see_bb_splay_ar[i] splay tree. It holds a see_ref_s structure. */ static int see_handle_extensions_for_one_ref (splay_tree_node stn, void *data ATTRIBUTE_UNUSED) { htab_t use_se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; htab_t unmerged_def_se_hash = ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; htab_t merged_def_se_hash; rtx ref = ((struct see_ref_s *) (stn->value))->insn; if (dump_file) { fprintf (dump_file, "Handling ref:\n"); print_rtl_single (dump_file, ref); } /* a. Try to eliminate only def extensions, one by one. */ if (unmerged_def_se_hash) htab_traverse_noresize (unmerged_def_se_hash, see_merge_one_def_extension, (PTR) (stn->value)); if (use_se_hash) /* b. Try to eliminate only use extensions, one by one. */ htab_traverse_noresize (use_se_hash, see_merge_one_use_extension, (PTR) (stn->value)); merged_def_se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; if (dump_file) { fprintf (dump_file, "The hashes of the current reference:\n"); if (unmerged_def_se_hash) { fprintf (dump_file, "unmerged_def_se_hash:\n"); htab_traverse (unmerged_def_se_hash, see_print_one_extension, NULL); } if (merged_def_se_hash) { fprintf (dump_file, "merged_def_se_hash:\n"); htab_traverse (merged_def_se_hash, see_print_one_extension, NULL); } if (use_se_hash) { fprintf (dump_file, "use_se_hash:\n"); htab_traverse (use_se_hash, see_print_one_extension, NULL); } } /* Now that all the merges are done, update the register properties of the basic block and eliminate locally redundant extensions. It is important that we first traverse the use extensions hash and afterwards the def extensions hashes. */ if (use_se_hash) htab_traverse_noresize (use_se_hash, see_set_prop_unmerged_use, (PTR) (stn->value)); if (unmerged_def_se_hash) htab_traverse (unmerged_def_se_hash, see_set_prop_unmerged_def, (PTR) (stn->value)); if (merged_def_se_hash) htab_traverse (merged_def_se_hash, see_set_prop_merged_def, (PTR) (stn->value)); /* Continue to the next definition. */ return 0; } /* Phase 2 top level function. In this phase, we try to merge def extensions and use extensions with their references, and eliminate redundant extensions in the same basic block. We also gather information for the next phases. */ static void see_merge_and_eliminate_extensions (void) { int i = 0; if (dump_file) fprintf (dump_file, "* Phase 2: Merge and eliminate locally redundant extensions. *\n"); /* Traverse over all the splay trees of the basic blocks. */ for (i = 0; i < last_bb; i++) { if (see_bb_splay_ar[i]) { if (dump_file) fprintf (dump_file, "Handling references for bb %d\n", i); /* Traverse over all the references in the basic block in forward order. */ splay_tree_foreach (see_bb_splay_ar[i], see_handle_extensions_for_one_ref, NULL); } } } /* Phase 1 implementation: Propagate extensions to uses. */ /* Insert REF_INSN into the splay tree of its basic block. SE_INSN is the extension to store in the proper hash according to TYPE. Return true if everything went well. Otherwise, return false (this will cause the optimization to be aborted). */ static bool see_store_reference_and_extension (rtx ref_insn, rtx se_insn, enum extension_type type) { rtx *rtx_slot; int curr_bb_num; splay_tree_node stn = NULL; htab_t se_hash = NULL; struct see_ref_s *ref_s = NULL; /* Check the arguments. */ gcc_assert (ref_insn && se_insn); if (!see_bb_splay_ar) return false; curr_bb_num = BLOCK_NUM (ref_insn); gcc_assert (curr_bb_num < last_bb && curr_bb_num >= 0); /* Insert the reference to the splay tree of its basic block. */ if (!see_bb_splay_ar[curr_bb_num]) /* The splay tree for this block doesn't exist yet, create it. */ see_bb_splay_ar[curr_bb_num] = splay_tree_new (splay_tree_compare_ints, NULL, see_free_ref_s); else /* Splay tree already exists, check if the current reference is already in it. */ { stn = splay_tree_lookup (see_bb_splay_ar[curr_bb_num], DF_INSN_LUID (ref_insn)); if (stn) switch (type) { case EXPLICIT_DEF_EXTENSION: se_hash = ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash; if (!se_hash) { se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); ((struct see_ref_s *) (stn->value))->unmerged_def_se_hash = se_hash; } break; case IMPLICIT_DEF_EXTENSION: se_hash = ((struct see_ref_s *) (stn->value))->merged_def_se_hash; if (!se_hash) { se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); ((struct see_ref_s *) (stn->value))->merged_def_se_hash = se_hash; } break; case USE_EXTENSION: se_hash = ((struct see_ref_s *) (stn->value))->use_se_hash; if (!se_hash) { se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); ((struct see_ref_s *) (stn->value))->use_se_hash = se_hash; } break; default: gcc_unreachable (); } } /* Initialize a new see_ref_s structure and insert it to the splay tree. */ if (!stn) { ref_s = XNEW (struct see_ref_s); ref_s->luid = DF_INSN_LUID (ref_insn); ref_s->insn = ref_insn; ref_s->merged_insn = NULL; /* Initialize the hashes. */ switch (type) { case EXPLICIT_DEF_EXTENSION: ref_s->unmerged_def_se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); se_hash = ref_s->unmerged_def_se_hash; ref_s->merged_def_se_hash = NULL; ref_s->use_se_hash = NULL; break; case IMPLICIT_DEF_EXTENSION: ref_s->merged_def_se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); se_hash = ref_s->merged_def_se_hash; ref_s->unmerged_def_se_hash = NULL; ref_s->use_se_hash = NULL; break; case USE_EXTENSION: ref_s->use_se_hash = htab_create (10, hash_descriptor_extension, eq_descriptor_extension, NULL); se_hash = ref_s->use_se_hash; ref_s->unmerged_def_se_hash = NULL; ref_s->merged_def_se_hash = NULL; break; default: gcc_unreachable (); } } /* Insert the new extension instruction into the correct se_hash of the current reference. */ rtx_slot = (rtx *) htab_find_slot (se_hash, se_insn, INSERT); if (*rtx_slot != NULL) { gcc_assert (type == USE_EXTENSION); gcc_assert (rtx_equal_p (PATTERN (*rtx_slot), PATTERN (se_insn))); } else *rtx_slot = se_insn; /* If this is a new reference, insert it into the splay_tree. */ if (!stn) splay_tree_insert (see_bb_splay_ar[curr_bb_num], DF_INSN_LUID (ref_insn), (splay_tree_value) ref_s); return true; } /* Go over all the defs, for each relevant definition (defined below) store its instruction as a reference. A definition is relevant if its root has ((entry_type == SIGN_EXTENDED_DEF) || (entry_type == ZERO_EXTENDED_DEF)) and his source_mode is not narrower then the roots source_mode. Return the number of relevant defs or negative number if something bad had happened and the optimization should be aborted. */ static int see_handle_relevant_defs (struct df_ref *ref, rtx insn) { struct web_entry *root_entry = NULL; rtx se_insn = NULL; enum entry_type extension_code; rtx reg = DF_REF_REAL_REG (ref); rtx ref_insn = NULL; unsigned int i = DF_REF_ID (ref); root_entry = unionfind_root (&def_entry[DF_REF_ID (ref)]); if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF) /* The current web is not relevant. Continue to the next def. */ return 0; if (root_entry->reg) /* It isn't possible to have two different register for the same web. */ gcc_assert (rtx_equal_p (root_entry->reg, reg)); else root_entry->reg = reg; /* The current definition is an EXTENDED_DEF or a definition that its source_mode is narrower then its web's source_mode. This means that we need to generate the implicit extension explicitly and store it in the current reference's merged_def_se_hash. */ if (ENTRY_EI (&def_entry[i])->local_relevancy == EXTENDED_DEF || (ENTRY_EI (&def_entry[i])->local_source_mode < ENTRY_EI (root_entry)->source_mode)) { if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) extension_code = SIGN_EXTENDED_DEF; else extension_code = ZERO_EXTENDED_DEF; se_insn = see_gen_normalized_extension (reg, extension_code, ENTRY_EI (root_entry)->source_mode); /* This is a dummy extension, mark it as deleted. */ INSN_DELETED_P (se_insn) = 1; if (!see_store_reference_and_extension (insn, se_insn, IMPLICIT_DEF_EXTENSION)) /* Something bad happened. Abort the optimization. */ return -1; return 1; } ref_insn = PREV_INSN (insn); gcc_assert (BLOCK_NUM (ref_insn) == BLOCK_NUM (insn)); if (!see_store_reference_and_extension (ref_insn, insn, EXPLICIT_DEF_EXTENSION)) /* Something bad happened. Abort the optimization. */ return -1; return 0; } /* Go over all the uses, for each use in relevant web store its instruction as a reference and generate an extension before it. Return the number of relevant uses or negative number if something bad had happened and the optimization should be aborted. */ static int see_handle_relevant_uses (struct df_ref *ref, rtx insn) { struct web_entry *root_entry = NULL; rtx se_insn = NULL; enum entry_type extension_code; rtx reg = DF_REF_REAL_REG (ref); root_entry = unionfind_root (&use_entry[DF_REF_ID (ref)]); if (ENTRY_EI (root_entry)->relevancy != SIGN_EXTENDED_DEF && ENTRY_EI (root_entry)->relevancy != ZERO_EXTENDED_DEF) /* The current web is not relevant. Continue to the next use. */ return 0; if (root_entry->reg) /* It isn't possible to have two different register for the same web. */ gcc_assert (rtx_equal_p (root_entry->reg, reg)); else root_entry->reg = reg; /* Generate the use extension. */ if (ENTRY_EI (root_entry)->relevancy == SIGN_EXTENDED_DEF) extension_code = SIGN_EXTENDED_DEF; else extension_code = ZERO_EXTENDED_DEF; se_insn = see_gen_normalized_extension (reg, extension_code, ENTRY_EI (root_entry)->source_mode); if (!se_insn) /* This is very bad, abort the transformation. */ return -1; if (!see_store_reference_and_extension (insn, se_insn, USE_EXTENSION)) /* Something bad happened. Abort the optimization. */ return -1; return 1; } static int see_handle_relevant_refs (void) { int num_relevant_refs = 0; basic_block bb; FOR_ALL_BB (bb) { rtx insn; FOR_BB_INSNS (bb, insn) { unsigned int uid = INSN_UID (insn); if (INSN_P (insn)) { struct df_ref **use_rec; struct df_ref **def_rec; for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; int result = see_handle_relevant_uses (use, insn); if (result == -1) return -1; num_relevant_refs += result; } for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; int result = see_handle_relevant_uses (use, insn); if (result == -1) return -1; num_relevant_refs += result; } for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) { struct df_ref *def = *def_rec; int result = see_handle_relevant_defs (def, insn); if (result == -1) return -1; num_relevant_refs += result; } } } } return num_relevant_refs; } /* Initialized the use_entry field for REF in INSN at INDEX with ET. */ static void see_update_uses_relevancy (rtx insn, struct df_ref *ref, enum entry_type et, unsigned int index) { struct see_entry_extra_info *curr_entry_extra_info; if (dump_file) { rtx reg = DF_REF_REAL_REG (ref); fprintf (dump_file, "u%i insn %i reg %i ", index, (insn ? INSN_UID (insn) : -1), REGNO (reg)); if (et == NOT_RELEVANT) fprintf (dump_file, "NOT RELEVANT \n"); else fprintf (dump_file, "RELEVANT USE \n"); } DF_REF_ID (ref) = index; curr_entry_extra_info = XNEW (struct see_entry_extra_info); curr_entry_extra_info->relevancy = et; curr_entry_extra_info->local_relevancy = et; use_entry[index].extra_info = curr_entry_extra_info; use_entry[index].reg = NULL; use_entry[index].pred = NULL; } /* A definition in a candidate for this optimization only if its pattern is recognized as relevant in this function. INSN is the instruction to be recognized. - If this is the pattern of a common sign extension after definition: PREV_INSN (INSN): def (reg:NARROWmode r) INSN: set ((reg:WIDEmode r') (sign_extend:WIDEmode (reg:NARROWmode r))) return SIGN_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. - If this is the pattern of a common zero extension after definition: PREV_INSN (INSN): def (reg:NARROWmode r) INSN: set ((reg:WIDEmode r') (zero_extend:WIDEmode (reg:NARROWmode r))) return ZERO_EXTENDED_DEF and set SOURCE_MODE to NARROWmode. - Otherwise, For the pattern: INSN: set ((reg:WIDEmode r) (sign_extend:WIDEmode (...expr...))) return EXTENDED_DEF and set SOURCE_MODE to the mode of expr. For the pattern: INSN: set ((reg:WIDEmode r) (zero_extend:WIDEmode (...expr...))) return EXTENDED_DEF and set SOURCE_MODE_UNSIGNED to the mode of expr. For the pattern: INSN: set ((reg:WIDEmode r) (CONST_INT (...))) return EXTENDED_DEF and set SOURCE_MODE(_UNSIGNED) to the narrowest mode that is implicitly sign(zero) extended to WIDEmode in the INSN. - FIXME: Extensions that are not adjacent to their definition and EXTENDED_DEF that is part of a PARALLEL instruction are not handled. These restriction can be relaxed. */ static enum entry_type see_analyze_one_def (rtx insn, enum machine_mode *source_mode, enum machine_mode *source_mode_unsigned) { enum entry_type extension_code; rtx rhs = NULL; rtx lhs = NULL; rtx set = NULL; rtx source_register = NULL; rtx prev_insn = NULL; rtx next_insn = NULL; enum machine_mode mode; enum machine_mode next_source_mode; HOST_WIDE_INT val = 0; HOST_WIDE_INT val2 = 0; int i = 0; *source_mode = MAX_MACHINE_MODE; *source_mode_unsigned = MAX_MACHINE_MODE; extension_code = see_get_extension_data (insn, source_mode); switch (extension_code) { case SIGN_EXTENDED_DEF: case ZERO_EXTENDED_DEF: source_register = see_get_extension_reg (insn, 0); /* FIXME: This restriction can be relaxed. The only thing that is important is that the reference would be inside the same basic block as the extension. */ prev_insn = PREV_INSN (insn); if (!prev_insn || !INSN_P (prev_insn)) return NOT_RELEVANT; if (!reg_set_between_p (source_register, PREV_INSN (prev_insn), insn)) return NOT_RELEVANT; /* If we can't use copy_rtx on the reference it can't be a reference. */ if (GET_CODE (PATTERN (prev_insn)) == PARALLEL && asm_noperands (PATTERN (prev_insn)) >= 0) return NOT_RELEVANT; /* Now, check if this extension is a reference itself. If so, it is not relevant. Handling this extension as relevant would make things much more complicated. */ next_insn = NEXT_INSN (insn); if (next_insn && INSN_P (next_insn) && (see_get_extension_data (next_insn, &next_source_mode) != NOT_RELEVANT)) { rtx curr_dest_register = see_get_extension_reg (insn, 1); rtx next_source_register = see_get_extension_reg (next_insn, 0); if (REGNO (curr_dest_register) == REGNO (next_source_register)) return NOT_RELEVANT; } return extension_code; case NOT_RELEVANT: /* This may still be an EXTENDED_DEF. */ /* FIXME: This restriction can be relaxed. It is possible to handle PARALLEL insns too. */ set = single_set (insn); if (!set) return NOT_RELEVANT; rhs = SET_SRC (set); lhs = SET_DEST (set); /* Don't handle extensions to something other then register or subregister. */ if (!REG_P (lhs) && GET_CODE (lhs) != SUBREG) return NOT_RELEVANT; switch (GET_CODE (rhs)) { case SIGN_EXTEND: *source_mode = GET_MODE (XEXP (rhs, 0)); *source_mode_unsigned = MAX_MACHINE_MODE; return EXTENDED_DEF; case ZERO_EXTEND: *source_mode = MAX_MACHINE_MODE; *source_mode_unsigned = GET_MODE (XEXP (rhs, 0)); return EXTENDED_DEF; case CONST_INT: val = INTVAL (rhs); /* Find the narrowest mode, val could fit into. */ for (mode = GET_CLASS_NARROWEST_MODE (MODE_INT), i = 0; GET_MODE_BITSIZE (mode) < BITS_PER_WORD; mode = GET_MODE_WIDER_MODE (mode), i++) { val2 = trunc_int_for_mode (val, mode); if (val2 == val && *source_mode == MAX_MACHINE_MODE) *source_mode = mode; if (val == (val & (HOST_WIDE_INT)GET_MODE_MASK (mode)) && *source_mode_unsigned == MAX_MACHINE_MODE) *source_mode_unsigned = mode; if (*source_mode != MAX_MACHINE_MODE && *source_mode_unsigned !=MAX_MACHINE_MODE) return EXTENDED_DEF; } if (*source_mode != MAX_MACHINE_MODE || *source_mode_unsigned !=MAX_MACHINE_MODE) return EXTENDED_DEF; return NOT_RELEVANT; default: return NOT_RELEVANT; } default: gcc_unreachable (); } } /* Initialized the def_entry field for REF in INSN at INDEX with ET. */ static void see_update_defs_relevancy (rtx insn, struct df_ref *ref, enum entry_type et, enum machine_mode source_mode, enum machine_mode source_mode_unsigned, unsigned int index) { struct see_entry_extra_info *curr_entry_extra_info = XNEW (struct see_entry_extra_info); curr_entry_extra_info->relevancy = et; curr_entry_extra_info->local_relevancy = et; DF_REF_ID (ref) = index; if (et != EXTENDED_DEF) { curr_entry_extra_info->source_mode = source_mode; curr_entry_extra_info->local_source_mode = source_mode; } else { curr_entry_extra_info->source_mode_signed = source_mode; curr_entry_extra_info->source_mode_unsigned = source_mode_unsigned; } def_entry[index].extra_info = curr_entry_extra_info; def_entry[index].reg = NULL; def_entry[index].pred = NULL; if (dump_file) { rtx reg = DF_REF_REAL_REG (ref); if (et == NOT_RELEVANT) { fprintf (dump_file, "d%i insn %i reg %i ", index, (insn ? INSN_UID (insn) : -1), REGNO (reg)); fprintf (dump_file, "NOT RELEVANT \n"); } else { fprintf (dump_file, "d%i insn %i reg %i ", index, INSN_UID (insn), REGNO (reg)); fprintf (dump_file, "RELEVANT - "); switch (et) { case SIGN_EXTENDED_DEF : fprintf (dump_file, "SIGN_EXTENDED_DEF, source_mode = %s\n", GET_MODE_NAME (source_mode)); break; case ZERO_EXTENDED_DEF : fprintf (dump_file, "ZERO_EXTENDED_DEF, source_mode = %s\n", GET_MODE_NAME (source_mode)); break; case EXTENDED_DEF : fprintf (dump_file, "EXTENDED_DEF, "); if (source_mode != MAX_MACHINE_MODE && source_mode_unsigned != MAX_MACHINE_MODE) { fprintf (dump_file, "positive const, "); fprintf (dump_file, "source_mode_signed = %s, ", GET_MODE_NAME (source_mode)); fprintf (dump_file, "source_mode_unsigned = %s\n", GET_MODE_NAME (source_mode_unsigned)); } else if (source_mode != MAX_MACHINE_MODE) fprintf (dump_file, "source_mode_signed = %s\n", GET_MODE_NAME (source_mode)); else fprintf (dump_file, "source_mode_unsigned = %s\n", GET_MODE_NAME (source_mode_unsigned)); break; default : gcc_unreachable (); } } } } /* Updates the relevancy of all the uses and all defs. The information of the u'th use is stored in use_entry[u] and the information of the d'th definition is stored in def_entry[d]. Currently all the uses are relevant for the optimization except for uses that are in LIBCALL or RETVAL instructions. */ static void see_update_relevancy (void) { unsigned int d = 0; unsigned int u = 0; enum entry_type et; enum machine_mode source_mode; enum machine_mode source_mode_unsigned; basic_block bb; if (!def_entry) return; FOR_ALL_BB (bb) { struct df_ref **use_rec; struct df_ref **def_rec; rtx insn; FOR_BB_INSNS (bb, insn) { unsigned int uid = INSN_UID (insn); if (INSN_P (insn)) { et = RELEVANT_USE; for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; see_update_uses_relevancy (insn, use, et, u); u++; } for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; see_update_uses_relevancy (insn, use, et, u); u++; } et = see_analyze_one_def (insn, &source_mode, &source_mode_unsigned); for (def_rec = DF_INSN_UID_DEFS (uid); *def_rec; def_rec++) { struct df_ref *def = *def_rec; see_update_defs_relevancy (insn, def, et, source_mode, source_mode_unsigned, d); d++; } } } for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++) { struct df_ref *use = *use_rec; see_update_uses_relevancy (NULL, use, NOT_RELEVANT, u); u++; } for (def_rec = df_get_artificial_defs (bb->index); *def_rec; def_rec++) { struct df_ref *def = *def_rec; see_update_defs_relevancy (NULL, def, NOT_RELEVANT, MAX_MACHINE_MODE, MAX_MACHINE_MODE, d); d++; } } } /* Phase 1 top level function. In this phase the relevancy of all the definitions and uses are checked, later the webs are produces and the extensions are generated. These extensions are not emitted yet into the insns stream. returns true if at list one relevant web was found and there were no problems, otherwise return false. */ static bool see_propagate_extensions_to_uses (void) { int num_relevant_refs; basic_block bb; if (dump_file) fprintf (dump_file, "* Phase 1: Propagate extensions to uses. *\n"); /* Update the relevancy of references using the DF object. */ see_update_relevancy (); /* Produce the webs and update the extra_info of the root. In general, a web is relevant if all its definitions and uses are relevant and there is at least one definition that was marked as SIGN_EXTENDED_DEF or ZERO_EXTENDED_DEF. */ FOR_ALL_BB (bb) { rtx insn; struct df_ref **use_rec; FOR_BB_INSNS (bb, insn) { unsigned int uid = INSN_UID (insn); if (INSN_P (insn)) { for (use_rec = DF_INSN_UID_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; union_defs (use, def_entry, use_entry, see_update_leader_extra_info); } for (use_rec = DF_INSN_UID_EQ_USES (uid); *use_rec; use_rec++) { struct df_ref *use = *use_rec; union_defs (use, def_entry, use_entry, see_update_leader_extra_info); } } } for (use_rec = df_get_artificial_uses (bb->index); *use_rec; use_rec++) { struct df_ref *use = *use_rec; union_defs (use, def_entry, use_entry, see_update_leader_extra_info); } } /* Generate use extensions for references and insert these references to see_bb_splay_ar data structure. */ num_relevant_refs = see_handle_relevant_refs (); return num_relevant_refs > 0; } /* Main entry point for the sign extension elimination optimization. */ static void see_main (void) { bool cont = false; int i = 0; /* Initialize global data structures. */ see_initialize_data_structures (); /* Phase 1: Propagate extensions to uses. */ cont = see_propagate_extensions_to_uses (); if (cont) { init_recog (); /* Phase 2: Merge and eliminate locally redundant extensions. */ see_merge_and_eliminate_extensions (); /* Phase 3: Eliminate globally redundant extensions. */ see_execute_LCM (); /* Phase 4: Commit changes to the insn stream. */ see_commit_changes (); if (dump_file) { /* For debug purpose only. */ fprintf (dump_file, "see_pre_extension_hash:\n"); htab_traverse (see_pre_extension_hash, see_print_pre_extension_expr, NULL); for (i = 0; i < last_bb; i++) { if (see_bb_hash_ar[i]) /* Traverse over all the references in the basic block in forward order. */ { fprintf (dump_file, "Searching register properties in bb %d\n", i); htab_traverse (see_bb_hash_ar[i], see_print_register_properties, NULL); } } } } /* Free global data structures. */ see_free_data_structures (); } static bool gate_handle_see (void) { return optimize > 1 && flag_see; } static unsigned int rest_of_handle_see (void) { see_main (); df_clear_flags (DF_DEFER_INSN_RESCAN); df_process_deferred_rescans (); run_fast_dce (); return 0; } struct rtl_opt_pass pass_see = { { RTL_PASS, "see", /* name */ gate_handle_see, /* gate */ rest_of_handle_see, /* execute */ NULL, /* sub */ NULL, /* next */ 0, /* static_pass_number */ TV_SEE, /* tv_id */ 0, /* properties_required */ 0, /* properties_provided */ 0, /* properties_destroyed */ 0, /* todo_flags_start */ TODO_df_verify | TODO_df_finish | TODO_verify_rtl_sharing | TODO_dump_func /* todo_flags_finish */ } };