Merge trunk into sve

author: Richard Sandiford <richard.sandiford@linaro.org> 2017-10-23 19:56:19 +0100
committer: Richard Sandiford <richard.sandiford@linaro.org> 2017-10-23 19:56:19 +0100
commit: 7bef5b82e4109778a0988d20e19e1ed29dadd835 (patch)
tree: f5c594a5206e2b23c95741c1338fc1d11acffd25 /gcc
parent: 246229fdf9230ca040aa990a3fbb42698f30ae5f (diff)
parent: b11bf8d85f574c56cab353544b50396c18ab9b93 (diff)
download: gcc-7bef5b82e4109778a0988d20e19e1ed29dadd835.tar.gz
511 files changed, 13573 insertions, 8090 deletions
diff --git a/gcc/ChangeLog b/gcc/ChangeLog
index 351b1f01379..cb9f1a392aa 100644
--- a/gcc/ChangeLog
+++ b/gcc/ChangeLog
@@ -1,3 +1,615 @@
+2017-10-13  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/82524
+	* config/i386/i386.md (addqi_ext_1, andqi_ext_1,
+	*andqi_ext_1_cc, *<code>qi_ext_1, *xorqi_ext_1_cc): Change
+	=Q constraints to +Q and into insn condition add check
+	that operands[0] and operands[1] are equal.
+	(*addqi_ext_2, *andqi_ext_2, *<code>qi_ext_2): Change
+	=Q constraints to +Q and into insn condition add check
+	that operands[0] is equal to either operands[1] or operands[2].
+
+	PR target/82498
+	* fold-const.c (fold_binary_loc) <bit_rotate>: Code cleanups,
+	instead of handling MINUS_EXPR twice (once for each argument),
+	canonicalize operand order and handle just once, use rtype where
+	possible.  Handle (A << B) | (A >> (-B & (Z - 1))).
+
+	PR target/82498
+	* config/i386/ia32intrin.h (__rold, __rord, __rolq, __rorq): Allow
+	any values of __C while still being pattern recognizable as a simple
+	rotate instruction.
+
+2017-10-13  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/82451
+	Revert
+	2017-10-02  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/82355
+	* graphite-isl-ast-to-gimple.c (build_iv_mapping): Also build
+	a mapping for the enclosing loop but avoid generating one for
+	the loop tree root.
+	(copy_bb_and_scalar_dependences): Remove premature codegen
+	error on PHIs in blocks duplicated into multiple places.
+	* graphite-scop-detection.c
+	(scop_detection::stmt_has_simple_data_refs_p): For a loop not
+	in the region use it as loop and nest to analyze the DR in.
+	(try_generate_gimple_bb): Likewise.
+	* graphite-sese-to-poly.c (extract_affine_chrec): Adjust.
+	(add_loop_constraints): For blocks in a loop not in the region
+	create a dimension with a single iteration.
+	* sese.h (gbb_loop_at_index): Remove assert.
+
+	* cfgloop.c (loop_preheader_edge): For the loop tree root
+	return the single successor of the entry block.
+	* graphite-isl-ast-to-gimple.c (graphite_regenerate_ast_isl):
+	Reset the SCEV hashtable and niters.
+	* graphite-scop-detection.c
+	(scop_detection::graphite_can_represent_scev): Add SCOP parameter,
+	assert that we only have POLYNOMIAL_CHREC that vary in loops
+	contained in the region.
+	(scop_detection::graphite_can_represent_expr): Adjust.
+	(scop_detection::stmt_has_simple_data_refs_p): For loops
+	not in the region set loop to NULL.  The nest is now the
+	entry edge to the region.
+	(try_generate_gimple_bb): Likewise.
+	* sese.c (scalar_evolution_in_region): Adjust for
+	instantiate_scev change.
+	* tree-data-ref.h (graphite_find_data_references_in_stmt):
+	Make nest parameter the edge into the region.
+	(create_data_ref): Likewise.
+	* tree-data-ref.c (dr_analyze_indices): Make nest parameter an
+	entry edge into a region and adjust instantiate_scev calls.
+	(create_data_ref): Likewise.
+	(graphite_find_data_references_in_stmt): Likewise.
+	(find_data_references_in_stmt): Pass the loop preheader edge
+	from the nest argument.
+	* tree-scalar-evolution.h (instantiate_scev): Make instantiate_below
+	parameter the edge into the region.
+	(instantiate_parameters): Use the loop preheader edge as entry.
+	* tree-scalar-evolution.c (analyze_scalar_evolution): Handle
+	NULL loop.
+	(get_instantiated_value_entry): Make instantiate_below parameter
+	the edge into the region.
+	(instantiate_scev_name): Likewise.  Adjust dominance checks,
+	when we cannot use loop-based instantiation instantiate by
+	walking use-def chains.
+	(instantiate_scev_poly): Adjust.
+	(instantiate_scev_binary): Likewise.
+	(instantiate_scev_convert): Likewise.
+	(instantiate_scev_not): Likewise.
+	(instantiate_array_ref): Remove.
+	(instantiate_scev_3): Likewise.
+	(instantiate_scev_2): Likewise.
+	(instantiate_scev_1): Likewise.
+	(instantiate_scev_r): Do not blindly handle N-operand trees.
+	Do not instantiate array-refs.  Handle all constants and invariants.
+	(instantiate_scev): Make instantiate_below parameter
+	the edge into the region.
+	(resolve_mixers): Use the loop preheader edge for the region
+	parameter to instantiate_scev_r.
+	* tree-ssa-loop-prefetch.c (determine_loop_nest_reuse): Adjust.
+
+2017-10-13  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/82525
+	* graphite-isl-ast-to-gimple.c
+	(translate_isl_ast_to_gimple::widest_int_from_isl_expr_int): Split
+	out from ...
+	(translate_isl_ast_to_gimple::gcc_expression_from_isl_expr_int): Here.
+	Fail code generation when we cannot represent the isl integer.
+	(binary_op_to_tree): Elide modulo operations that are no-ops
+	in the type we code generate.  Remove now superfluous code
+	generation errors.
+
+2017-10-13  Richard Biener  <rguenther@suse.de>
+
+	* graphite-scop-detection.c (loop_ivs_can_be_represented): Remove.
+	(scop_detection::harmful_loop_in_region): Remove premature
+	IV type restriction.
+	(scop_detection::graphite_can_represent_scev): We can handle
+	pointer IVs just fine.
+
+2017-10-13  Alan Modra  <amodra@gmail.com>
+
+	* doc/extend.texi (Extended Asm <Clobbers>): Rename to
+	"Clobbers and Scratch Registers".  Add paragraph on
+	alternative to clobbers for scratch registers and OpenBLAS
+	example.
+
+2017-10-13  Alan Modra  <amodra@gmail.com>
+
+	* doc/extend.texi (Clobbers): Correct vax example.  Delete old
+	example of a memory input for a string of known length.  Move
+	commentary out of table.  Add a number of new examples
+	covering array memory inputs.
+
+2017-10-12  Martin Liska  <mliska@suse.cz>
+
+	PR tree-optimization/82493
+	* sbitmap.c (bitmap_bit_in_range_p): Fix the implementation.
+	(test_range_functions): New function.
+	(sbitmap_c_tests): Likewise.
+	* selftest-run-tests.c (selftest::run_tests): Run new tests.
+	* selftest.h (sbitmap_c_tests): New function.
+
+	* tree-ssa-dse.c (live_bytes_read): Fix thinko.
+
+2017-10-12  Michael Meissner  <meissner@linux.vnet.ibm.com>
+
+	* config/rs6000/amo.h: Fix spacing issue.
+
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/82498
+	* config/i386/i386.md (*ashl<mode>3_mask_1,
+	*<shift_insn><mode>3_mask_1, *<rotate_insn><mode>3_mask_1,
+	*<btsc><mode>_mask_1, *btr<mode>_mask_1): New define_insn_and_split
+	patterns.
+
+2017-10-12  Jan Hubicka  <hubicka@ucw.cz>
+
+	* profile-count.h (safe_scale_64bit): Fix GCC4.x path.
+	(profile_probability): Set max_probability
+	to (uint32_t) 1 << (n_bits - 2) and update accessors to avoid overlfows
+	in temporaries.
+	* profile-count.c (profile_probability::differs_from_p): Do not
+	rely on max_probaiblity == 10000
+
+2017-10-12  Jeff Law  <law@redhat.com>
+
+	* tree-ssa-dse.c (valid_ao_ref_for_dse): Reject ao_refs with
+	negative offsets.
+
+2017-10-12  Martin Sebor  <msebor@redhat.com>
+
+	PR other/82301
+	PR c/82435
+	* cgraphunit.c (maybe_diag_incompatible_alias): New function.
+	(handle_alias_pairs): Call it.
+	* common.opt (-Wattribute-alias): New option.
+	* doc/extend.texi (ifunc attribute): Discuss C++ specifics.
+	* doc/invoke.texi (-Wattribute-alias): Document.
+
+2017-10-12  Vladimir Makarov  <vmakarov@redhat.com>
+
+	Revert
+	2017-10-11  Vladimir Makarov  <vmakarov@redhat.com>
+	PR sanitizer/82353
+	* lra.c (collect_non_operand_hard_regs): Don't ignore operator
+	locations.
+	* lra-lives.c (bb_killed_pseudos, bb_gen_pseudos): Move up.
+	(make_hard_regno_born, make_hard_regno_dead): Update
+	bb_killed_pseudos and bb_gen_pseudos.
+
+2017-10-12  Jan Hubicka  <hubicka@ucw.cz>
+
+	* x86-tune-sched.c (ix86_adjust_cost): Fix Zen support.
+
+2017-10-12  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/alpha/alpha.c (alpha_split_conditional_move):
+	Use std::swap instead of manually swapping.
+	(alpha_stdarg_optimize_hook): Ditto.
+	(alpha_canonicalize_comparison): Ditto.
+
+2017-10-12  Bin Cheng  <bin.cheng@arm.com>
+
+	* tree-loop-distribution.c (struct builtin_info): New struct.
+	(struct partition): Refactor fields into struct builtin_info.
+	(partition_free): Free struct builtin_info.
+	(build_size_arg_loc, build_addr_arg_loc): Delete.
+	(generate_memset_builtin, generate_memcpy_builtin): Get memory range
+	information from struct builtin_info.
+	(find_single_drs): New function refactored from classify_partition.
+	Also moved builtin validity checks to this function.
+	(compute_access_range, alloc_builtin): New functions.
+	(classify_builtin_st, classify_builtin_ldst): New functions.
+	(classify_partition): Refactor code into functions find_single_drs,
+	classify_builtin_st and classify_builtin_ldst.
+	(distribute_loop): Don't do runtime alias check when distributing
+	loop nest.
+	(find_seed_stmts_for_distribution): New function.
+	(pass_loop_distribution::execute): Refactor code finding seed
+	stmts into above function.  Support distribution for the innermost
+	two-level loop nest.  Adjust dump information.
+
+2017-10-12  Bin Cheng  <bin.cheng@arm.com>
+
+	* tree-loop-distribution.c: Adjust the general comment.
+	(NUM_PARTITION_THRESHOLD): New macro.
+	(ssa_name_has_uses_outside_loop_p): Support loop nest distribution.
+	(classify_partition): Skip builtin pattern of loop nest's inner loop.
+	(merge_dep_scc_partitions): New parameter ignore_alias_p and use it
+	in call to build_partition_graph.
+	(finalize_partitions): New parameter.  Make loop distribution more
+	conservative by fusing more partitions.
+	(distribute_loop): Don't do runtime alias check in case of loop nest
+	distribution.
+	(find_seed_stmts_for_distribution): New function.
+	(prepare_perfect_loop_nest): New function.
+	(pass_loop_distribution::execute): Refactor code finding seed stmts
+	and loop nest into above functions.  Support loop nest distribution.
+	Adjust dump information accordingly.
+
+2017-10-12  Bin Cheng  <bin.cheng@arm.com>
+
+	* tree-loop-distribution.c (break_alias_scc_partitions): Add comment
+	and set PTYPE_SEQUENTIAL for merged partition.
+
+2017-10-12  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/69728
+	Revert
+	2017-09-19  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/69728
+	* graphite-sese-to-poly.c (schedule_error): New global.
+	(add_loop_schedule): Handle empty domain by failing the
+	schedule.
+	(build_original_schedule): Handle schedule_error.
+
+	* graphite-sese-to-poly.c (add_loop_schedule): Handle empty
+	domain by returning an unchanged schedule.
+
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	* genrecog.c (validate_pattern): For VEC_SELECT verify that
+	CONST_INT selectors are 0 to GET_MODE_NUNITS (imode) - 1.
+
+2017-10-12  Aldy Hernandez  <aldyh@redhat.com>
+
+	* Makefile.in (TAGS): Merge all the *.def files into one pattern.
+	Handle params.def.
+
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/82159
+	* expr.c (store_field): Don't optimize away bitsize == 0 store
+	from CALL_EXPR with addressable return type.
+
+2017-10-11  Segher Boessenkool  <segher@kernel.crashing.org>
+
+	* config/rs6000/rs6000.h (TARGET_ISEL64): Delete.
+	* config/rs6000/rs6000.md (sel): Delete mode attribute.
+	(mov<mode>cc, isel_signed_<mode>, isel_unsigned_<mode>,
+	*isel_reversed_signed_<mode>, *isel_reversed_unsigned_<mode>): Use
+	TARGET_ISEL instead of TARGET_ISEL<sel>.
+
+2017-10-11  David Edelsohn  <dje.gcc@gmail.com>
+
+	* config/rs6000/rs6000.c
+	(rs6000_xcoff_asm_output_aligned_decl_common): Test for NULL decl.
+
+2017-10-11  Segher Boessenkool  <segher@kernel.crashing.org>
+
+	* config/rs6000/predicates.md (zero_constant, all_ones_constant):
+	Move up in file.
+	(reg_or_cint_operand): Fix comment.
+	(reg_or_zero_operand): New predicate.
+	* config/rs6000/rs6000-protos.h (output_isel): Delete.
+	* config/rs6000/rs6000.c (output_isel): Delete.
+	* config/rs6000/rs6000.md (isel_signed_<mode>): Use reg_or_zero_operand
+	instead of reg_or_cint_operand.  Output instruction directly (not via
+	output_isel).
+	(isel_unsigned_<mode>): Ditto.
+	(*isel_reversed_signed_<mode>): Use reg_or_zero_operand instead of
+	gpc_reg_operand.  Add an instruction alternative for this.  Output
+	instruction directly.
+	(*isel_reversed_unsigned_<mode>): Ditto.
+
+2017-10-11  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.c (ix86_canonicalize_comparison): New function.
+	(TARGET_CANONICALIZE_COMPARISON): Define.
+
+2017-10-11  Qing Zhao  <qing.zhao@oracle.com>
+
+	PR target/81422
+	* config/aarch64/aarch64.c (aarch64_load_symref_appropriately):
+	Check whether the dest is REG before adding REG_EQUIV note.
+
+2017-10-11  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR sanitizer/82353
+	* lra.c (collect_non_operand_hard_regs): Don't ignore operator
+	locations.
+	* lra-lives.c (bb_killed_pseudos, bb_gen_pseudos): Move up.
+	(make_hard_regno_born, make_hard_regno_dead): Update
+	bb_killed_pseudos and bb_gen_pseudos.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* incpath.h (enum incpath_kind): Name enum, prefix values.
+	(add_path, add_cpp_dir_path, get_added_cpp_dirs): Use incpath_kind.
+	* incpath.c (heads, tails): Use INC_MAX.
+	(add_env_var_paths, add_standard_paths): Use incpath_kind.
+	(merge_include_chains, split_quote_chain,
+	register_include_chains): Update incpath_kind names.
+	(add_cpp_dir_path, add_path, get_added_cpp_dirs): Use incpath_kind.
+	* config/darwin-c.c (add_system_framework_path): Update incpath_kind
+	names.
+	(add_framework_path, darwin_register_objc_includes): Likewise.
+	* config/vms/vms-c.c (vms_c_register_includes): Likewise.
+
+2017-10-11  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.md (*cmp<X87MODEF:mode>_<SWI24:mode>_i387):
+	Do not use float_operator operator predicate.
+	(*cmp<X87MODEF:mode>_<SWI24:mode>_cc_i387): Ditto.
+	* config/i386/predicates.md (float_operator): Remove predicate.
+
+2017-10-11  Uros Bizjak  <ubizjak@gmail.com>
+
+	* config/i386/i386.md (*jcc<mode>_0_i387): Remove insn pattern.
+	(*jccxf_i387): Ditto.
+	(*jcc<mode>_i387): Ditto.
+	(*jccu<mode>_i387): Ditto.
+	(*jcc<X87MODEF:mode>_<SWI24:mode>_i387): Ditto.
+	(*jcc_*_i387 splitters): Remove.
+	* config/i386/i386-protos.h (ix86_split_fp_branch): Remove prototype.
+	* config/i386/i386.c (ix86_split_fp_branch): Remove.
+	* config/i386/predicates.md (ix86_swapped_fp_comparison_operator):
+	Remove predicate.
+
+2017-10-11  Jan Hubicka  <hubicka@ucw.cz>
+
+	* profile-count.h (slow_safe_scale_64bit): New function.
+	(safe_scale_64bit): New inline.
+	(profile_count::max_safe_multiplier): Remove; use safe_scale_64bit.
+	* profile-count.c: Include wide-int.h
+	(slow_safe_scale_64bit): New.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* tree.h (DECL_ASSEMBLER_NAME_SET_P): Don't check
+	HAS_DECL_ASSEMBLER_NAME_P.
+	* gimple-expr.c (gimple_decl_printable_name: Check
+	HAS_DECL_ASSEMBLER_NAME_P too.
+	* ipa-utils.h (type_in_anonymous_namespace_p): Check
+	DECL_ASSEMBLER_NAME_SET_P of TYPE_NAME.
+	(odr_type_p): No need to assert TYPE_NAME is a TYPE_DECL.
+	* passes.c (rest_of_decl_compilation): Check
+	HAS_DECL_ASSEMBLER_NAME_P too.
+	* recog.c (verify_changes): Likewise.
+	* tree-pretty-print.c (dump_decl_name): Likewise.
+	* tree-ssa-structalias.c (alias_get_name): Likewise.  Reimplement.
+
+	* tree.h (DECL_ASSEMBLER_NAME_RAW): New.
+	(SET_DECL_ASSEMBLER_NAME): Use it.
+	(DECL_ASSEMBLER_NAME_SET_P): Likewise.
+	(COPY_DECL_ASSEMBLER_NAME): Likewise.
+	* tree.c (decl_assembler_name): Use DECL_ASSEMBLER_NAME_RAW.
+
+2017-10-11  Jan Hubicka  <hubicka@ucw.cz>
+
+	* config.gcc (i386, x86_64): Add extra objects.
+	* i386/i386-protos.h (ix86_rip_relative_addr_p): Declare.
+	(ix86_min_insn_size): Declare.
+	(ix86_issue_rate): Declare.
+	(ix86_adjust_cost): Declare.
+	(ia32_multipass_dfa_lookahead): Declare.
+	(ix86_macro_fusion_p): Declare.
+	(ix86_macro_fusion_pair_p): Declare.
+	(ix86_bd_has_dispatch): Declare.
+	(ix86_bd_do_dispatch): Declare.
+	(ix86_core2i7_init_hooks): Declare.
+	(ix86_atom_sched_reorder): Declare.
+	* i386/i386.c Move all CPU cost tables to x86-tune-costs.h.
+	(COSTS_N_BYTES): Move to x86-tune-costs.h.
+	(DUMMY_STRINGOP_ALGS):Move to x86-tune-costs.h.
+	(rip_relative_addr_p): Rename to ...
+	(ix86_rip_relative_addr_p): ... this one; export.
+	(memory_address_length): Update.
+	(ix86_issue_rate): Move to x86-tune-sched.c.
+	(ix86_flags_dependent): Move to x86-tune-sched.c.
+	(ix86_agi_dependent): Move to x86-tune-sched.c.
+	(exact_dependency_1): Move to x86-tune-sched.c.
+	(exact_store_load_dependency): Move to x86-tune-sched.c.
+	(ix86_adjust_cost): Move to x86-tune-sched.c.
+	(ia32_multipass_dfa_lookahead): Move to x86-tune-sched.c.
+	(ix86_macro_fusion_p): Move to x86-tune-sched.c.
+	(ix86_macro_fusion_pair_p): Move to x86-tune-sched.c.
+	(do_reorder_for_imul): Move to x86-tune-sched-atom.c.
+	(swap_top_of_ready_list): Move to x86-tune-sched-atom.c.
+	(ix86_sched_reorder): Move to x86-tune-sched-atom.c.
+	(core2i7_first_cycle_multipass_init): Move to x86-tune-sched-core.c.
+	(core2i7_dfa_post_advance_cycle): Move to x86-tune-sched-core.c.
+	(min_insn_size): Rename to ...
+	(ix86_min_insn_size): ... this one; export.
+	(core2i7_first_cycle_multipass_begin): Move to x86-tune-sched-core.c.
+	(core2i7_first_cycle_multipass_issue): Move to x86-tune-sched-core.c.
+	(core2i7_first_cycle_multipass_backtrack): Move to
+	x86-tune-sched-core.c.
+	(core2i7_first_cycle_multipass_end): Move to x86-tune-sched-core.c.
+	(core2i7_first_cycle_multipass_fini): Move to x86-tune-sched-core.c.
+	(ix86_sched_init_global): Break up logic to ix86_core2i7_init_hooks.
+	(ix86_avoid_jump_mispredicts): Update.
+	(TARGET_SCHED_DISPATCH): Move to ix86-tune-sched-bd.c.
+	(TARGET_SCHED_DISPATCH_DO): Move to ix86-tune-sched-bd.c.
+	(TARGET_SCHED_REORDER): Move to ix86-tune-sched-bd.c.
+	(DISPATCH_WINDOW_SIZE): Move to ix86-tune-sched-bd.c.
+	(MAX_DISPATCH_WINDOWS): Move to ix86-tune-sched-bd.c.
+	(MAX_INSN): Move to ix86-tune-sched-bd.c.
+	(MAX_IMM): Move to ix86-tune-sched-bd.c.
+	(MAX_IMM_SIZE): Move to ix86-tune-sched-bd.c.
+	(MAX_IMM_32): Move to ix86-tune-sched-bd.c.
+	(MAX_IMM_64): Move to ix86-tune-sched-bd.c.
+	(MAX_LOAD): Move to ix86-tune-sched-bd.c.
+	(MAX_STORE): Move to ix86-tune-sched-bd.c.
+	(BIG): Move to ix86-tune-sched-bd.c.
+	(enum dispatch_group): Move to ix86-tune-sched-bd.c.
+	(enum insn_path): Move to ix86-tune-sched-bd.c.
+	(get_mem_group): Move to ix86-tune-sched-bd.c.
+	(is_cmp): Move to ix86-tune-sched-bd.c.
+	(dispatch_violation): Move to ix86-tune-sched-bd.c.
+	(is_branch): Move to ix86-tune-sched-bd.c.
+	(is_prefetch): Move to ix86-tune-sched-bd.c.
+	(init_window): Move to ix86-tune-sched-bd.c.
+	(allocate_window): Move to ix86-tune-sched-bd.c.
+	(init_dispatch_sched): Move to ix86-tune-sched-bd.c.
+	(is_end_basic_block): Move to ix86-tune-sched-bd.c.
+	(process_end_window): Move to ix86-tune-sched-bd.c.
+	(allocate_next_window): Move to ix86-tune-sched-bd.c.
+	(find_constant): Move to ix86-tune-sched-bd.c.
+	(get_num_immediates): Move to ix86-tune-sched-bd.c.
+	(has_immediate): Move to ix86-tune-sched-bd.c.
+	(get_insn_path): Move to ix86-tune-sched-bd.c.
+	(get_insn_group): Move to ix86-tune-sched-bd.c.
+	(count_num_restricted): Move to ix86-tune-sched-bd.c.
+	(fits_dispatch_window): Move to ix86-tune-sched-bd.c.
+	(add_insn_window): Move to ix86-tune-sched-bd.c.
+	(add_to_dispatch_window): Move to ix86-tune-sched-bd.c.
+	(debug_dispatch_window_file): Move to ix86-tune-sched-bd.c.
+	(debug_dispatch_window): Move to ix86-tune-sched-bd.c.
+	(debug_insn_dispatch_info_file): Move to ix86-tune-sched-bd.c.
+	(debug_ready_dispatch): Move to ix86-tune-sched-bd.c.
+	(do_dispatch): Move to ix86-tune-sched-bd.c.
+	(has_dispatch): Move to ix86-tune-sched-bd.c.
+	* i386/t-i386: Add new object files.
+	* i386/x86-tune-costs.h: New file.
+	* i386/x86-tune-sched-atom.c: New file.
+	* i386/x86-tune-sched-bd.c: New file.
+	* i386/x86-tune-sched-core.c: New file.
+	* i386/x86-tune-sched.c: New file.
+
+2017-10-11  Liu Hao  <lh_mouse@126.com>
+
+	* pretty-print.c [_WIN32] (colorize_init): Remove.  Use
+	the generic version below instead.
+	(should_colorize): Recognize Windows consoles as terminals
+	for MinGW targets.
+	* pretty-print.c [__MINGW32__] (write_all): New function.
+	[__MINGW32__] (find_esc_head): Likewise.
+	[__MINGW32__] (find_esc_terminator): Likewise.
+	[__MINGW32__] (eat_esc_sequence): Likewise.
+	[__MINGW32__] (mingw_ansi_fputs): New function that handles
+	ANSI escape codes.
+	(pp_write_text_to_stream): Use mingw_ansi_fputs instead of fputs
+	for MinGW targets.
+
+2017-10-11  Richard Biener  <rguenther@suse.de>
+
+	* tree-ssa-loop-niter.c (infer_loop_bounds_from_pointer_arith):
+	Properly call analyze_scalar_evolution with the loop of the stmt.
+
+2017-10-11  Richard Biener  <rguenther@suse.de>
+
+	* tree.def (POLYNOMIAL_CHREC): Remove CHREC_VARIABLE tree operand.
+	* tree-core.h (tree_base): Add chrec_var union member.
+	* tree.h (CHREC_VAR): Remove.
+	(CHREC_LEFT, CHREC_RIGHT, CHREC_VARIABLE): Adjust.
+	* tree-chrec.h (build_polynomial_chrec): Adjust.
+	* tree-chrec.c (reset_evolution_in_loop): Use build_polynomial_chrec.
+	* tree-pretty-print.c (dump_generic_node): Use CHREC_VARIABLE.
+
+2017-10-11  Marc Glisse  <marc.glisse@inria.fr>
+
+	* fold-const.c (fold_binary_loc) [X +- Y CMP X]: Move ...
+	* match.pd: ... here.
+	((T) X == (T) Y): Relax condition.
+
+2017-10-11  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/82472
+	* tree-loop-distribution.c (sort_partitions_by_post_order): Refine
+	comment.
+	(break_alias_scc_partitions): Update postorder number.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	PR sanitizer/82490
+	* opts.c (parse_no_sanitize_attribute): Do not use error_value
+	variable.
+	* opts.h (parse_no_sanitize_attribute): Remove last argument.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	* print-rtl.c (print_insn): Move declaration of idbuf
+	to same scope as name.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	Revert r253637:
+
+	PR sanitizer/82484
+	* sanopt.c (sanitize_rewrite_addressable_params): Do not handle
+	volatile arguments.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	PR sanitizer/82484
+	* sanopt.c (sanitize_rewrite_addressable_params): Do not handle
+	volatile arguments.
+
+2017-10-11  Adhemerval Zanella  <adhemerval.zanella@linaro.org>
+
+	* config.gcc (default_gnu_indirect_function): Default to yes for
+	arm*-*-linux* with glibc.
+
+2017-10-11  Richard Biener  <rguenther@suse.de>
+
+	* tree-scalar-evolution.c (get_scalar_evolution): Handle
+	default-defs and types we do not want to analyze.
+	(interpret_loop_phi): Replace unreachable code with an assert.
+	(compute_scalar_evolution_in_loop): Remove and inline ...
+	(analyze_scalar_evolution_1): ... here, replacing condition with
+	what makes the intent clearer.  Remove handling of cases
+	get_scalar_evolution now handles.
+
+2017-10-10  Jim Wilson  <wilson@tuliptree.org>
+
+	PR rtl-optimization/81434
+	* haifa-sched.c (prune_ready_list): Init min_cost_group to 0.  Update
+	comment for main loop.  In sched_group_found if, also add checks for
+	pass and min_cost_group.
+
+2017-10-10  Segher Boessenkool  <segher@kernel.crashing.org>
+
+	* config/rs6000/rs6000.c (TARGET_INSN_COST): New.
+	(rs6000_insn_cost): New function.
+	* config/rs6000/rs6000.md (cost): New attribute.
+
+2017-10-10  Jakub Jelinek  <jakub@redhat.com>
+	    H.J. Lu  <hongjiu.lu@intel.com>
+
+	PR target/79565
+	PR target/82483
+	* config/i386/i386.c (ix86_init_mmx_sse_builtins): Add
+	OPTION_MASK_ISA_MMX for __builtin_ia32_maskmovq,
+	__builtin_ia32_vec_ext_v4hi and __builtin_ia32_vec_set_v4hi.
+	(ix86_expand_builtin): Treat OPTION_MASK_ISA_MMX similarly
+	to OPTION_MASK_ISA_AVX512VL - builtins that have both
+	OPTION_MASK_ISA_MMX and some other bit set require both
+	mmx and the ISAs without the mmx bit.
+	* config/i386/i386-builtin.def (__builtin_ia32_cvtps2pi,
+	__builtin_ia32_cvttps2pi, __builtin_ia32_cvtpi2ps,
+	__builtin_ia32_pavgb, __builtin_ia32_pavgw, __builtin_ia32_pmulhuw,
+	__builtin_ia32_pmaxub, __builtin_ia32_pmaxsw, __builtin_ia32_pminub,
+	__builtin_ia32_pminsw, __builtin_ia32_psadbw, __builtin_ia32_pmovmskb,
+	__builtin_ia32_pshufw, __builtin_ia32_cvtpd2pi,
+	__builtin_ia32_cvttpd2pi, __builtin_ia32_cvtpi2pd,
+	__builtin_ia32_pmuludq, __builtin_ia32_pabsb, __builtin_ia32_pabsw,
+	__builtin_ia32_pabsd, __builtin_ia32_phaddw, __builtin_ia32_phaddd,
+	__builtin_ia32_phaddsw, __builtin_ia32_phsubw, __builtin_ia32_phsubd,
+	__builtin_ia32_phsubsw, __builtin_ia32_pmaddubsw,
+	__builtin_ia32_pmulhrsw, __builtin_ia32_pshufb, __builtin_ia32_psignb,
+	__builtin_ia32_psignw, __builtin_ia32_psignd, __builtin_ia32_movntq,
+	__builtin_ia32_paddq, __builtin_ia32_psubq, __builtin_ia32_palignr):
+	Add OPTION_MASK_ISA_MMX.
+
+2017-10-10  Andreas Tobler  <andreast@gcc.gnu.org>
+
+	* config.gcc (armv7*-*-freebsd*): New target.
+	(armv6*-*-freebsd*): Remove obsolete TARGET_FREEBSD_ARMv6 define.
+
+2017-10-10  Jan Hubicka  <hubicka@ucw.cz>
+
+	* x86-tune.def (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI,
+	X86_TUNE_ADJUST_UNROLL, X86_TUNE_ONE_IF_CONV_INSN): Move to right
+	spot in the file.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* wide-int.h (wide_int_ref_storage): Make host_dependent_precision
@@ -586,7 +1198,8 @@
 	(znver1_cost): Set scalar reassoc width to 4 and vector to 3 and 6
 	for int and fp.
 	(atom_cost): Set reassociation width to 2.
-	(slm_cost, generic_cost): Set fp reassociation width to 2 and 1 otherwise.
+	(slm_cost, generic_cost): Set fp reassociation width
+	to 2 and 1 otherwise.
 	(intel_cost): Set fp reassociation width to 4 and 1 otherwise.
 	(core_cost): Set fp reassociation width to 4 and vector to 2.
 	(ix86_reassociation_width): Rewrite using cost table; special case
@@ -852,7 +1465,7 @@
 	(class dom_opt_dom_walker): Initialize m_dummy_cond member in the
 	class ctor.
 	(pass_dominator:execute): Build the dummy_cond here and pass it
-	to the dom_opt_dom_walker ctor. 
+	to the dom_opt_dom_walker ctor.
 	(test_for_singularity): New function.
 
 2017-09-30  Krister Walfridsson  <krister.walfridsson@gmail.com>
@@ -1297,7 +1910,7 @@
 	* rs6000.md (allocate_stack): Handle -fstack-clash-protection.
 	(probe_stack_range<P:mode>): Operand 0 is now early-clobbered.
 	Add additional operand and pass it to output_probe_stack_range.
-    
+
 2017-09-25  Bin Cheng  <bin.cheng@arm.com>
 
 	PR tree-optimization/82163
@@ -1729,7 +2342,7 @@
 
 2017-09-22  Sergey Shalnov  <sergey.shalnov@intel.com>
 
-        * config/i386/sse.md ("mov<mode>_internal"): Use <sseinsnmode>
+	* config/i386/sse.md ("mov<mode>_internal"): Use <sseinsnmode>
 	mode attribute for TARGET_AVX512VL.
 
 2017-09-21  Sergey Shalnov  <sergey.shalnov@intel.com>
@@ -2008,9 +2621,9 @@
 	(ix86_expand_prologue): Dump stack clash info as needed.
 	Call ix86_adjust_stack_and_probe_stack_clash as needed.
 
-        * function.c (dump_stack_clash_frame_info): New function.
-        * function.h (dump_stack_clash_frame_info): Prototype.
-        (enum stack_clash_probes): New enum.
+	* function.c (dump_stack_clash_frame_info): New function.
+	* function.h (dump_stack_clash_frame_info): Prototype.
+	(enum stack_clash_probes): New enum.
 
 	* config/alpha/alpha.c (alpha_expand_prologue): Also check
 	flag_stack_clash_protection.
diff --git a/gcc/DATESTAMP b/gcc/DATESTAMP
index 915b0fd713c..96354492ed7 100644
--- a/gcc/DATESTAMP
+++ b/gcc/DATESTAMP
@@ -1 +1 @@
-20171010
+20171013
diff --git a/gcc/Makefile.in b/gcc/Makefile.in
index 640674d939c..0f7110d227a 100644
--- a/gcc/Makefile.in
+++ b/gcc/Makefile.in
@@ -1413,7 +1413,6 @@ OBJS = \
 	opts-global.o \
 	passes.o \
 	plugin.o \
-	poly-int.o \
 	postreload-gcse.o \
 	postreload.o \
 	predict.o \
@@ -4137,8 +4136,7 @@ TAGS: lang.tags
 	      ../include/*.h ../libiberty/*.c \
 	      ../libcpp/*.c ../libcpp/include/*.h \
 	      --language=none --regex="/\(char\|unsigned int\|int\|bool\|void\|HOST_WIDE_INT\|enum [A-Za-z_0-9]+\) [*]?\([A-Za-z_0-9]+\)/\2/" common.opt	\
-	      --language=none --regex="/\(DEF_RTL_EXPR\|DEFTREECODE\|DEFGSCODE\).*(\([A-Za-z_0-9]+\)/\2/" rtl.def tree.def gimple.def \
-	      --language=none --regex="/DEFTIMEVAR (\([A-Za-z_0-9]+\)/\1/" timevar.def \
+	      --language=none --regex="/\(DEF_RTL_EXPR\|DEFTREECODE\|DEFGSCODE\|DEFTIMEVAR\|DEFPARAM\|DEFPARAMENUM5\)[ ]?(\([A-Za-z_0-9]+\)/\2/" rtl.def tree.def gimple.def timevar.def params.def \
 		; \
 	etags --include TAGS.sub $$incs)
 
diff --git a/gcc/alias.c b/gcc/alias.c
index 9070edb6074..f288299ec32 100644
--- a/gcc/alias.c
+++ b/gcc/alias.c
@@ -175,105 +175,12 @@ static struct {
   unsigned long long num_disambiguated;
 } alias_stats;
 
-/* Represents the size of a memory reference during alias analysis.
-   There are three possibilities:
-
-   (1) the size needs to be treated as completely unknown
-   (2) the size is known exactly and no alignment is applied to the address
-   (3) the size is known exactly but an alignment is applied to the address
-
-   (3) is used for aligned addresses of the form (and X (const_int -N)),
-   which can subtract something in the range [0, N) from the original
-   address X.  We handle this by subtracting N - 1 from X and adding N - 1
-   to the size, so that the range spans all possible bytes.  */
-class mem_alias_size {
-public:
-  /* Return an unknown size (case (1) above).  */
-  static mem_alias_size unknown () { return poly_int64 (0); }
-
-  /* Return an exact size (case (2) above).  */
-  static mem_alias_size exact (poly_int64 size) { return size; }
-
-  /* Return a worst-case size after alignment (case (3) above).
-     SIZE includes the maximum adjustment applied by the alignment.  */
-  static mem_alias_size aligned (poly_int64 size) { return -size; }
-
-  /* Return the size of memory reference X.  */
-  static mem_alias_size mem (const_rtx x) { return MEM_SIZE (x); }
-
-  static mem_alias_size mode (machine_mode m);
-
-  /* Return true if the exact size of the memory is known.  */
-  bool exact_p () const { return may_gt (m_value, 0); }
-  bool exact_p (poly_int64 *) const;
-
-  /* Return true if an upper bound on the memory size is known;
-     i.e. not case (1) above.  */
-  bool max_size_known_p () const { return may_ne (m_value, 0); }
-  bool max_size_known_p (poly_int64 *) const;
-
-  /* Return true if the size is subject to alignment.  */
-  bool aligned_p () const { return may_lt (m_value, 0); }
-
-private:
-  mem_alias_size (poly_int64 value) : m_value (value) {}
-
-  poly_int64 m_value;
-};
-
-/* Return the size of mode M.  */
-
-inline mem_alias_size
-mem_alias_size::mode (machine_mode m)
-{
-  return poly_int64 (GET_MODE_SIZE (m));
-}
-
-/* Return true if the exact memory size is known, storing it in *RES if so.  */
-
-inline bool
-mem_alias_size::exact_p (poly_int64 *res) const
-{
-  if (!exact_p ())
-    return false;
-  *res = m_value;
-  return true;
-}
-
-/* Return true if an upper bound on the memory size is known,
-   storing it in *RES if so.  */
-
-inline bool
-mem_alias_size::max_size_known_p (poly_int64 *res) const
-{
-  if (!max_size_known_p ())
-    return false;
-  *res = may_lt (m_value, 0) ? -m_value : m_value;
-  return true;
-}
-
-/* Align X to POW2 bytes, where POW2 is known to be a power of two.  */
-
-inline mem_alias_size
-align_to (mem_alias_size x, uint64_t pow2)
-{
-  poly_int64 value;
-  if (x.max_size_known_p (&value))
-    return mem_alias_size::aligned (value + (pow2 - 1));
-  return mem_alias_size::unknown ();
-}
-
-/* Return true if X might be greater than Y bytes.  */
-
-inline bool
-alias_may_gt (mem_alias_size x, poly_int64 y)
-{
-  poly_int64 value;
-  return !x.max_size_known_p (&value) || may_gt (value, y);
-}
 
 /* Set up all info needed to perform alias analysis on memory references.  */
 
+/* Returns the size in bytes of the mode of X.  */
+#define SIZE_FOR_MODE(X) (GET_MODE_SIZE (GET_MODE (X)))
+
 /* Cap the number of passes we make over the insns propagating alias
    information through set chains.
    ??? 10 is a completely arbitrary choice.  This should be based on the
@@ -437,13 +344,7 @@ ao_ref_from_mem (ao_ref *ref, const_rtx mem)
   /* The MEM may extend into adjacent fields, so adjust max_size if
      necessary.  */
   if (ref->max_size_known_p ())
-    {
-      if (must_lt (ref->max_size, ref->size))
-	ref->max_size = ref->size;
-      else if (!ordered_p (ref->max_size, ref->size))
-	/* max_size is no longer known.  */
-	ref->max_size = -1;
-    }
+    ref->max_size = upper_bound (ref->max_size, ref->size);
 
   /* If MEM_OFFSET and MEM_SIZE might get us outside of the base object of
      the MEM_EXPR punt.  This happens for STRICT_ALIGNMENT targets a lot.  */
@@ -451,7 +352,7 @@ ao_ref_from_mem (ao_ref *ref, const_rtx mem)
       && (may_lt (ref->offset, 0)
 	  || (DECL_P (ref->base)
 	      && (DECL_SIZE (ref->base) == NULL_TREE
-		  || !poly_tree_p (DECL_SIZE (ref->base))
+		  || !poly_int_tree_p (DECL_SIZE (ref->base))
 		  || may_lt (wi::to_poly_offset (DECL_SIZE (ref->base)),
 			     ref->offset + ref->size)))))
     return false;
@@ -1446,6 +1347,7 @@ static rtx
 find_base_value (rtx src)
 {
   unsigned int regno;
+  scalar_int_mode int_mode;
 
 #if defined (FIND_BASE_TERM)
   /* Try machine-dependent ways to find the base term.  */
@@ -1572,7 +1474,8 @@ find_base_value (rtx src)
 	 address modes depending on the address space.  */
       if (!target_default_pointer_address_modes_p ())
 	break;
-      if (is_narrower_int_mode (GET_MODE (src), Pmode))
+      if (!is_a <scalar_int_mode> (GET_MODE (src), &int_mode)
+	  || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode))
 	break;
       /* Fall through.  */
     case HIGH:
@@ -1978,6 +1881,7 @@ find_base_term (rtx x)
   cselib_val *val;
   struct elt_loc_list *l, *f;
   rtx ret;
+  scalar_int_mode int_mode;
 
 #if defined (FIND_BASE_TERM)
   /* Try machine-dependent ways to find the base term.  */
@@ -1995,7 +1899,8 @@ find_base_term (rtx x)
 	 address modes depending on the address space.  */
       if (!target_default_pointer_address_modes_p ())
 	return 0;
-      if (is_narrower_int_mode (GET_MODE (x), Pmode))
+      if (!is_a <scalar_int_mode> (GET_MODE (x), &int_mode)
+	  || GET_MODE_PRECISION (int_mode) < GET_MODE_PRECISION (Pmode))
 	return 0;
       /* Fall through.  */
     case HIGH:
@@ -2384,52 +2289,55 @@ get_addr (rtx x)
   return x;
 }
 
-/* Return the address of the (N_REFS + 1)th memory reference to ADDR
-   where SIZE is the size in bytes of the memory reference.  If ADDR
-   is not modified by the memory reference then ADDR is returned.  */
+/*  Return the address of the (N_REFS + 1)th memory reference to ADDR
+    where SIZE is the size in bytes of the memory reference.  If ADDR
+    is not modified by the memory reference then ADDR is returned.  */
 
 static rtx
-addr_side_effect_eval (rtx addr, mem_alias_size size, int n_refs)
+addr_side_effect_eval (rtx addr, poly_int64 size, int n_refs)
 {
-  int count;
+  poly_int64 offset = 0;
 
   switch (GET_CODE (addr))
     {
     case PRE_INC:
-      count = (n_refs + 1);
+      offset = (n_refs + 1) * size;
       break;
     case PRE_DEC:
-      count = -(n_refs + 1);
+      offset = -(n_refs + 1) * size;
       break;
     case POST_INC:
-      count = n_refs;
+      offset = n_refs * size;
       break;
     case POST_DEC:
-      count = -n_refs;
+      offset = -n_refs * size;
       break;
 
     default:
       return addr;
     }
 
-  poly_int64 value;
-  /* Can only automodify a pointer to a known memory size.  */
-  if (!size.exact_p (&value))
-    gcc_unreachable ();
-  addr = plus_constant (GET_MODE (addr), XEXP (addr, 0), value * count);
+  addr = plus_constant (GET_MODE (addr), XEXP (addr, 0), offset);
   addr = canon_rtx (addr);
 
   return addr;
 }
 
 /* Return TRUE if an object X sized at XSIZE bytes and another object
-   Y sized at YSIZE bytes, starting C bytes after X, may overlap.  */
+   Y sized at YSIZE bytes, starting C bytes after X, may overlap.  If
+   any of the sizes is zero, assume an overlap, otherwise use the
+   absolute value of the sizes as the actual sizes.  */
 
 static inline bool
-offset_overlap_p (poly_int64 c, mem_alias_size xsize, mem_alias_size ysize)
+offset_overlap_p (poly_int64 c, poly_int64 xsize, poly_int64 ysize)
 {
-  return ((may_ge (c, 0) && alias_may_gt (xsize, c))
-	  || (may_lt (c, 0) && alias_may_gt (ysize, -c)));
+  if (known_zero (xsize) || known_zero (ysize))
+    return true;
+
+  if (may_ge (c, 0))
+    return may_gt (may_lt (xsize, 0) ? -xsize : xsize, c);
+  else
+    return may_gt (may_lt (ysize, 0) ? -ysize : ysize, -c);
 }
 
 /* Return one if X and Y (memory addresses) reference the
@@ -2443,6 +2351,14 @@ offset_overlap_p (poly_int64 c, mem_alias_size xsize, mem_alias_size ysize)
    similarly YSIZE is the size in bytes for Y.
    Expect that canon_rtx has been already called for X and Y.
 
+   If XSIZE or YSIZE is zero, we do not know the amount of memory being
+   referenced (the reference was BLKmode), so make the most pessimistic
+   assumptions.
+
+   If XSIZE or YSIZE is negative, we may access memory outside the object
+   being referenced as a side effect.  This can happen when using AND to
+   align memory references, as is done on the Alpha.
+
    Nice to notice that varying addresses cannot conflict with fp if no
    local variables had their addresses taken, but that's too hard now.
 
@@ -2451,8 +2367,7 @@ offset_overlap_p (poly_int64 c, mem_alias_size xsize, mem_alias_size ysize)
    If that is fixed the TBAA hack for union type-punning can be removed.  */
 
 static int
-memrefs_conflict_p (mem_alias_size xsize, rtx x,
-		    mem_alias_size ysize, rtx y,
+memrefs_conflict_p (poly_int64 xsize, rtx x, poly_int64 ysize, rtx y,
 		    poly_int64 c)
 {
   if (GET_CODE (x) == VALUE)
@@ -2498,13 +2413,13 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
   else if (GET_CODE (x) == LO_SUM)
     x = XEXP (x, 1);
   else
-    x = addr_side_effect_eval (x, xsize, 0);
+    x = addr_side_effect_eval (x, may_lt (xsize, 0) ? -xsize : xsize, 0);
   if (GET_CODE (y) == HIGH)
     y = XEXP (y, 0);
   else if (GET_CODE (y) == LO_SUM)
     y = XEXP (y, 1);
   else
-    y = addr_side_effect_eval (y, ysize, 0);
+    y = addr_side_effect_eval (y, may_lt (ysize, 0) ? -ysize : ysize, 0);
 
   if (GET_CODE (x) == SYMBOL_REF && GET_CODE (y) == SYMBOL_REF)
     {
@@ -2517,7 +2432,7 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
 	 through alignment adjustments (i.e., that have negative
 	 sizes), because we can't know how far they are from each
 	 other.  */
-      if (xsize.aligned_p () || ysize.aligned_p ())
+      if (may_lt (xsize, 0) || may_lt (ysize, 0))
 	return -1;
       /* If decls are different or we know by offsets that there is no overlap,
 	 we win.  */
@@ -2548,6 +2463,7 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
       else if (x1 == y)
 	return memrefs_conflict_p (xsize, x0, ysize, const0_rtx, c);
 
+      poly_int64 cx1, cy1;
       if (GET_CODE (y) == PLUS)
 	{
 	  /* The fact that Y is canonicalized means that this
@@ -2564,22 +2480,21 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
 	    return memrefs_conflict_p (xsize, x0, ysize, y0, c);
 	  if (rtx_equal_for_memref_p (x0, y0))
 	    return memrefs_conflict_p (xsize, x1, ysize, y1, c);
-	  if (CONST_INT_P (x1))
+	  if (poly_int_rtx_p (x1, &cx1))
 	    {
-	      if (CONST_INT_P (y1))
+	      if (poly_int_rtx_p (y1, &cy1))
 		return memrefs_conflict_p (xsize, x0, ysize, y0,
-					   c - INTVAL (x1) + INTVAL (y1));
+					   c - cx1 + cy1);
 	      else
-		return memrefs_conflict_p (xsize, x0, ysize, y,
-					   c - INTVAL (x1));
+		return memrefs_conflict_p (xsize, x0, ysize, y, c - cx1);
 	    }
-	  else if (CONST_INT_P (y1))
-	    return memrefs_conflict_p (xsize, x, ysize, y0, c + INTVAL (y1));
+	  else if (poly_int_rtx_p (y1, &cy1))
+	    return memrefs_conflict_p (xsize, x, ysize, y0, c + cy1);
 
 	  return -1;
 	}
-      else if (CONST_INT_P (x1))
-	return memrefs_conflict_p (xsize, x0, ysize, y, c - INTVAL (x1));
+      else if (poly_int_rtx_p (x1, &cx1))
+	return memrefs_conflict_p (xsize, x0, ysize, y, c - cx1);
     }
   else if (GET_CODE (y) == PLUS)
     {
@@ -2593,8 +2508,9 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
       if (x == y1)
 	return memrefs_conflict_p (xsize, const0_rtx, ysize, y0, c);
 
-      if (CONST_INT_P (y1))
-	return memrefs_conflict_p (xsize, x, ysize, y0, c + INTVAL (y1));
+      poly_int64 cy1;
+      if (poly_int_rtx_p (y1, &cy1))
+	return memrefs_conflict_p (xsize, x, ysize, y0, c + cy1);
       else
 	return -1;
     }
@@ -2618,17 +2534,12 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
 	    return offset_overlap_p (c, xsize, ysize);
 
 	  /* Can't properly adjust our sizes.  */
-	  poly_int64 new_xsize, new_ysize, new_c;
 	  if (!CONST_INT_P (x1)
-	      || !xsize.exact_p (&new_xsize)
-	      || !ysize.exact_p (&new_ysize)
-	      || !multiple_p (new_xsize, INTVAL (x1), &new_xsize)
-	      || !multiple_p (new_ysize, INTVAL (x1), &new_ysize)
-	      || !multiple_p (c, INTVAL (x1), &new_c))
+	      || !can_div_trunc_p (xsize, INTVAL (x1), &xsize)
+	      || !can_div_trunc_p (ysize, INTVAL (x1), &ysize)
+	      || !can_div_trunc_p (c, INTVAL (x1), &c))
 	    return -1;
-	  return memrefs_conflict_p (mem_alias_size::exact (new_xsize), x0,
-				     mem_alias_size::exact (new_ysize), y0,
-				     new_c);
+	  return memrefs_conflict_p (xsize, x0, ysize, y0, c);
 	}
 
       default:
@@ -2645,11 +2556,14 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
   if (GET_CODE (x) == AND && CONST_INT_P (XEXP (x, 1)))
     {
       HOST_WIDE_INT sc = INTVAL (XEXP (x, 1));
-      unsigned HOST_WIDE_INT align = -sc;
-      if (sc < 0 && pow2_or_zerop (align))
+      unsigned HOST_WIDE_INT uc = sc;
+      if (sc < 0 && pow2_or_zerop (-uc))
 	{
-	  xsize = align_to (xsize, align);
-	  c += align - 1;
+	  if (may_gt (xsize, 0))
+	    xsize = -xsize;
+	  if (maybe_nonzero (xsize))
+	    xsize += sc + 1;
+	  c -= sc + 1;
 	  return memrefs_conflict_p (xsize, canon_rtx (XEXP (x, 0)),
 				     ysize, y, c);
 	}
@@ -2657,11 +2571,14 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
   if (GET_CODE (y) == AND && CONST_INT_P (XEXP (y, 1)))
     {
       HOST_WIDE_INT sc = INTVAL (XEXP (y, 1));
-      unsigned HOST_WIDE_INT align = -sc;
-      if (sc < 0 && pow2_or_zerop (align))
+      unsigned HOST_WIDE_INT uc = sc;
+      if (sc < 0 && pow2_or_zerop (-uc))
 	{
-	  ysize = align_to (ysize, align);
-	  c -= align - 1;
+	  if (may_gt (ysize, 0))
+	    ysize = -ysize;
+	  if (maybe_nonzero (ysize))
+	    ysize += sc + 1;
+	  c += sc + 1;
 	  return memrefs_conflict_p (xsize, x,
 				     ysize, canon_rtx (XEXP (y, 0)), c);
 	}
@@ -2669,9 +2586,10 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
 
   if (CONSTANT_P (x))
     {
-      if (CONST_INT_P (x) && CONST_INT_P (y))
+      poly_int64 cx, cy;
+      if (poly_int_rtx_p (x, &cx) && poly_int_rtx_p (y, &cy))
 	{
-	  c += (INTVAL (y) - INTVAL (x));
+	  c += cy - cx;
 	  return offset_overlap_p (c, xsize, ysize);
 	}
 
@@ -2688,6 +2606,15 @@ memrefs_conflict_p (mem_alias_size xsize, rtx x,
 	return memrefs_conflict_p (xsize, x, ysize,
 				   canon_rtx (XEXP (y, 0)), c);
 
+      /* Assume a potential overlap for symbolic addresses that went
+	 through alignment adjustments (i.e., that have negative
+	 sizes), because we can't know how far they are from each
+	 other.  */
+      if (CONSTANT_P (y))
+	return (may_lt (xsize, 0)
+		|| may_lt (ysize, 0)
+		|| offset_overlap_p (c, xsize, ysize));
+
       return -1;
     }
 
@@ -2844,8 +2771,7 @@ nonoverlapping_memrefs_p (const_rtx x, const_rtx y, bool loop_invariant)
       || (moffsetx_known_p && moffsety_known_p
 	  && MEM_SIZE_KNOWN_P (x) && MEM_SIZE_KNOWN_P (y)
 	  && !offset_overlap_p (moffsety - moffsetx,
-				mem_alias_size::mem (x),
-				mem_alias_size::mem (y)));
+				MEM_SIZE (x), MEM_SIZE (y)));
 
   /* With invalid code we can end up storing into the constant pool.
      Bail out to avoid ICEing when creating RTL for this.
@@ -2891,12 +2817,10 @@ nonoverlapping_memrefs_p (const_rtx x, const_rtx y, bool loop_invariant)
      we can avoid overlap is if we can deduce that they are nonoverlapping
      pieces of that decl, which is very rare.  */
   basex = MEM_P (rtlx) ? XEXP (rtlx, 0) : rtlx;
-  if (GET_CODE (basex) == PLUS && CONST_INT_P (XEXP (basex, 1)))
-    offsetx = INTVAL (XEXP (basex, 1)), basex = XEXP (basex, 0);
+  basex = strip_offset_and_add (basex, &offsetx);
 
   basey = MEM_P (rtly) ? XEXP (rtly, 0) : rtly;
-  if (GET_CODE (basey) == PLUS && CONST_INT_P (XEXP (basey, 1)))
-    offsety = INTVAL (XEXP (basey, 1)), basey = XEXP (basey, 0);
+  basey = strip_offset_and_add (basey, &offsety);
 
   /* If the bases are different, we know they do not overlap if both
      are constants or if one is a constant and the other a pointer into the
@@ -2919,10 +2843,10 @@ nonoverlapping_memrefs_p (const_rtx x, const_rtx y, bool loop_invariant)
 
   sizex = (!MEM_P (rtlx) ? poly_int64 (GET_MODE_SIZE (GET_MODE (rtlx)))
 	   : MEM_SIZE_KNOWN_P (rtlx) ? MEM_SIZE (rtlx)
-	   : poly_int64 (-1));
+	   : -1);
   sizey = (!MEM_P (rtly) ? poly_int64 (GET_MODE_SIZE (GET_MODE (rtly)))
 	   : MEM_SIZE_KNOWN_P (rtly) ? MEM_SIZE (rtly)
-	   : poly_int64 (-1));
+	   : -1);
 
   /* If we have an offset for either memref, it can update the values computed
      above.  */
@@ -3020,9 +2944,8 @@ true_dependence_1 (const_rtx mem, machine_mode mem_mode, rtx mem_addr,
   if (!mem_canonicalized)
     mem_addr = canon_rtx (true_mem_addr);
 
-  if ((ret = memrefs_conflict_p (mem_alias_size::mode (mem_mode), mem_addr,
-				 mem_alias_size::mode (GET_MODE (x)),
-				 x_addr, 0)) != -1)
+  if ((ret = memrefs_conflict_p (GET_MODE_SIZE (mem_mode), mem_addr,
+				 SIZE_FOR_MODE (x), x_addr, 0)) != -1)
     return ret;
 
   if (mems_in_disjoint_alias_sets_p (x, mem))
@@ -3133,9 +3056,8 @@ write_dependence_p (const_rtx mem,
   if (!mem_canonicalized)
     mem_addr = canon_rtx (true_mem_addr);
 
-  if ((ret = memrefs_conflict_p (mem_alias_size::mode (GET_MODE (mem)),
-				 mem_addr, mem_alias_size::mode (x_mode),
-				 x_addr, 0)) != -1)
+  if ((ret = memrefs_conflict_p (SIZE_FOR_MODE (mem), mem_addr,
+				 GET_MODE_SIZE (x_mode), x_addr, 0)) != -1)
     return ret;
 
   if (nonoverlapping_memrefs_p (x, mem, false))
@@ -3466,7 +3388,7 @@ init_alias_analysis (void)
 			       && GET_CODE (src) == PLUS
 			       && REG_P (XEXP (src, 0))
 			       && (t = get_reg_known_value (REGNO (XEXP (src, 0))))
-			       && poly_int_const_p (XEXP (src, 1), &offset))
+			       && poly_int_rtx_p (XEXP (src, 1), &offset))
 			{
 			  t = plus_constant (GET_MODE (src), t, offset);
 			  set_reg_known_value (regno, t);
diff --git a/gcc/asan.c b/gcc/asan.c
index 2c9db291e34..779aa78976d 100644
--- a/gcc/asan.c
+++ b/gcc/asan.c
@@ -2093,12 +2093,12 @@ instrument_derefs (gimple_stmt_iterator *iter, tree t,
   if (VAR_P (inner) && DECL_HARD_REGISTER (inner))
     return;
 
-  poly_int64 size;
+  poly_int64 decl_size;
   if (VAR_P (inner)
       && offset == NULL_TREE
       && DECL_SIZE (inner)
-      && poly_tree_p (DECL_SIZE (inner), &size)
-      && known_subrange_p (bitpos, bitsize, 0, size))
+      && poly_int_tree_p (DECL_SIZE (inner), &decl_size)
+      && known_subrange_p (bitpos, bitsize, 0, decl_size))
     {
       if (DECL_THREAD_LOCAL_P (inner))
 	return;
diff --git a/gcc/brig/brigfrontend/brig-basic-inst-handler.cc b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc
index ef82f2c108f..63ea41d16d9 100644
--- a/gcc/brig/brigfrontend/brig-basic-inst-handler.cc
+++ b/gcc/brig/brigfrontend/brig-basic-inst-handler.cc
@@ -97,7 +97,6 @@ brig_basic_inst_handler::build_shuffle (tree arith_type,
      output elements can originate from any input element.  */
   vec<constructor_elt, va_gc> *mask_offset_vals = NULL;
 
-  /* BRIG doesn't support variable-length vectors.  */
   unsigned int element_count = gccbrig_type_vector_subparts (arith_type);
 
   vec<constructor_elt, va_gc> *input_mask_vals = NULL;
@@ -158,7 +157,6 @@ brig_basic_inst_handler::build_unpack (tree_stl_vec &operands)
   vec<constructor_elt, va_gc> *input_mask_vals = NULL;
   vec<constructor_elt, va_gc> *and_mask_vals = NULL;
 
-  /* BRIG doesn't support variable-length vectors.  */
   size_t element_count
     = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
   tree vec_type = build_vector_type (element_type, element_count);
@@ -215,7 +213,6 @@ brig_basic_inst_handler::build_pack (tree_stl_vec &operands)
      TODO: Reuse this for implementing 'bitinsert'
      without a builtin call.  */
 
-  /* BRIG doesn't support variable-length vectors.  */
   size_t ecount = gccbrig_type_vector_subparts (TREE_TYPE (operands[0]));
   size_t vecsize = int_size_in_bytes (TREE_TYPE (operands[0])) * BITS_PER_UNIT;
   tree wide_type = build_nonstandard_integer_type (vecsize, 1);
@@ -278,9 +275,9 @@ brig_basic_inst_handler::build_unpack_lo_or_hi (BrigOpcode16_t brig_opcode,
 {
   tree element_type = get_unsigned_int_type (TREE_TYPE (arith_type));
   tree mask_vec_type
-    = build_vector_type (element_type, TYPE_VECTOR_SUBPARTS (arith_type));
+    = build_vector_type (element_type,
+			 gccbrig_type_vector_subparts (arith_type));
 
-  /* BRIG doesn't support variable-length vectors.  */
   size_t element_count = gccbrig_type_vector_subparts (arith_type);
   vec<constructor_elt, va_gc> *input_mask_vals = NULL;
 
@@ -604,7 +601,6 @@ brig_basic_inst_handler::operator () (const BrigBase *base)
 	}
 
       size_t promoted_type_size = int_size_in_bytes (promoted_type) * 8;
-      /* BRIG doesn't support variable-length vectors.  */
       size_t element_count = gccbrig_type_vector_subparts (arith_type);
       for (size_t i = 0; i < element_count; ++i)
 	{
@@ -713,7 +709,6 @@ brig_basic_inst_handler::build_lower_element_broadcast (tree vec_operand)
   tree element_type = TREE_TYPE (TREE_TYPE (vec_operand));
   size_t esize = 8 * int_size_in_bytes (element_type);
 
-  /* BRIG doesn't support variable-length vectors.  */
   size_t element_count
     = gccbrig_type_vector_subparts (TREE_TYPE (vec_operand));
   tree mask_inner_type = build_nonstandard_integer_type (esize, 1);
diff --git a/gcc/brig/brigfrontend/brig-code-entry-handler.cc b/gcc/brig/brigfrontend/brig-code-entry-handler.cc
index 2b4aa57659f..d84e71b980e 100644
--- a/gcc/brig/brigfrontend/brig-code-entry-handler.cc
+++ b/gcc/brig/brigfrontend/brig-code-entry-handler.cc
@@ -641,7 +641,6 @@ brig_code_entry_handler::get_tree_cst_for_hsa_operand
 	{
 	  /* In case of vector type elements (or sole vectors),
 	     create a vector ctor.  */
-	  /* BRIG doesn't support variable-length vectors.  */
 	  size_t element_count
 	    = gccbrig_type_vector_subparts (tree_element_type);
 	  if (bytes_left < scalar_element_size * element_count)
@@ -847,7 +846,7 @@ brig_code_entry_handler::get_comparison_result_type (tree source_type)
       size_t element_size = int_size_in_bytes (TREE_TYPE (source_type));
       return build_vector_type
 	(build_nonstandard_boolean_type (element_size * BITS_PER_UNIT),
-	 TYPE_VECTOR_SUBPARTS (source_type));
+	 gccbrig_type_vector_subparts (source_type));
     }
   else
     return gccbrig_tree_type_for_hsa_type (BRIG_TYPE_B1);
@@ -952,7 +951,6 @@ brig_code_entry_handler::expand_or_call_builtin (BrigOpcode16_t brig_opcode,
 
       tree_stl_vec result_elements;
 
-      /* BRIG doesn't support variable-length vectors.  */
       size_t element_count = gccbrig_type_vector_subparts (arith_type);
       for (size_t i = 0; i < element_count; ++i)
 	{
diff --git a/gcc/brig/brigfrontend/brig-util.h b/gcc/brig/brigfrontend/brig-util.h
index f96de4b6aa1..9b0c3e91066 100644
--- a/gcc/brig/brigfrontend/brig-util.h
+++ b/gcc/brig/brigfrontend/brig-util.h
@@ -77,7 +77,7 @@ bool gccbrig_might_be_host_defined_var_p (const BrigDirectiveVariable *brigVar);
 bool hsa_type_packed_p (BrigType16_t type);
 
 /* Return the number of elements in a VECTOR_TYPE.  BRIG does not support
-   variable-sized vectors.  */
+   variable-length vectors.  */
 inline unsigned HOST_WIDE_INT
 gccbrig_type_vector_subparts (const_tree type)
 {
diff --git a/gcc/builtins.c b/gcc/builtins.c
index cb34129be62..c50d7f43f76 100644
--- a/gcc/builtins.c
+++ b/gcc/builtins.c
@@ -388,7 +388,6 @@ get_object_alignment_2 (tree exp, unsigned int *alignp,
 
   *alignp = align;
   *bitposp = bitpos.coeffs[0] & (align - 1);
-
   return known_alignment;
 }
 
@@ -910,7 +909,7 @@ expand_builtin_setjmp_receiver (rtx receiver_label)
 	 to the underlying register (fp in this case) that makes
 	 the original assignment true.
 	 So the following insn will actually be decrementing fp by
-	 STARTING_FRAME_OFFSET.  */
+	 TARGET_STARTING_FRAME_OFFSET.  */
       emit_move_insn (virtual_stack_vars_rtx, hard_frame_pointer_rtx);
 
       /* Restoring the frame pointer also modifies the hard frame pointer.
diff --git a/gcc/c-family/ChangeLog b/gcc/c-family/ChangeLog
index b63673035f4..ee6fc87dd6f 100644
--- a/gcc/c-family/ChangeLog
+++ b/gcc/c-family/ChangeLog
@@ -1,3 +1,21 @@
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* c-common.c (enum missing_token_insertion_kind): New enum.
+	(get_missing_token_insertion_kind): New function.
+	(maybe_suggest_missing_token_insertion): New function.
+	* c-common.h (maybe_suggest_missing_token_insertion): New decl.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* c-opts.c (add_prefixed_path): Change chain to incpath_kind.
+	(c_common_handle_option): Update incpath_kind names.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	PR sanitizer/82490
+	* c-attribs.c (handle_no_sanitize_attribute): Report directly
+	Wattributes warning.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* c-ada-spec.c (dump_generic_ada_node): Use wi::to_wide when
diff --git a/gcc/c-family/c-attribs.c b/gcc/c-family/c-attribs.c
index 4e6754fba20..bd8ca306c2d 100644
--- a/gcc/c-family/c-attribs.c
+++ b/gcc/c-family/c-attribs.c
@@ -613,15 +613,8 @@ handle_no_sanitize_attribute (tree *node, tree name, tree args, int,
       return NULL_TREE;
     }
 
-  char *error_value = NULL;
   char *string = ASTRDUP (TREE_STRING_POINTER (id));
-  unsigned int flags = parse_no_sanitize_attribute (string, &error_value);
-
-  if (error_value)
-    {
-      error ("wrong argument: \"%s\"", error_value);
-      return NULL_TREE;
-    }
+  unsigned int flags = parse_no_sanitize_attribute (string);
 
   add_no_sanitize_value (*node, flags);
 
diff --git a/gcc/c-family/c-common.c b/gcc/c-family/c-common.c
index bcf8e12621a..1f2bf646e76 100644
--- a/gcc/c-family/c-common.c
+++ b/gcc/c-family/c-common.c
@@ -7956,6 +7956,164 @@ c_flt_eval_method (bool maybe_c11_only_p)
     return c_ts18661_flt_eval_method ();
 }
 
+/* An enum for get_missing_token_insertion_kind for describing the best
+   place to insert a missing token, if there is one.  */
+
+enum missing_token_insertion_kind
+{
+  MTIK_IMPOSSIBLE,
+  MTIK_INSERT_BEFORE_NEXT,
+  MTIK_INSERT_AFTER_PREV
+};
+
+/* Given a missing token of TYPE, determine if it is reasonable to
+   emit a fix-it hint suggesting the insertion of the token, and,
+   if so, where the token should be inserted relative to other tokens.
+
+   It only makes sense to do this for values of TYPE that are symbols.
+
+   Some symbols should go before the next token, e.g. in:
+     if flag)
+   we want to insert the missing '(' immediately before "flag",
+   giving:
+     if (flag)
+   rather than:
+     if( flag)
+   These use MTIK_INSERT_BEFORE_NEXT.
+
+   Other symbols should go after the previous token, e.g. in:
+     if (flag
+       do_something ();
+   we want to insert the missing ')' immediately after the "flag",
+   giving:
+     if (flag)
+       do_something ();
+   rather than:
+     if (flag
+       )do_something ();
+   These use MTIK_INSERT_AFTER_PREV.  */
+
+static enum missing_token_insertion_kind
+get_missing_token_insertion_kind (enum cpp_ttype type)
+{
+  switch (type)
+    {
+      /* Insert missing "opening" brackets immediately
+	 before the next token.  */
+    case CPP_OPEN_SQUARE:
+    case CPP_OPEN_PAREN:
+      return MTIK_INSERT_BEFORE_NEXT;
+
+      /* Insert other missing symbols immediately after
+	 the previous token.  */
+    case CPP_CLOSE_PAREN:
+    case CPP_CLOSE_SQUARE:
+    case CPP_SEMICOLON:
+    case CPP_COMMA:
+    case CPP_COLON:
+      return MTIK_INSERT_AFTER_PREV;
+
+      /* Other kinds of token don't get fix-it hints.  */
+    default:
+      return MTIK_IMPOSSIBLE;
+    }
+}
+
+/* Given RICHLOC, a location for a diagnostic describing a missing token
+   of kind TOKEN_TYPE, potentially add a fix-it hint suggesting the
+   insertion of the token.
+
+   The location of the attempted fix-it hint depends on TOKEN_TYPE:
+   it will either be:
+     (a) immediately after PREV_TOKEN_LOC, or
+
+     (b) immediately before the primary location within RICHLOC (taken to
+	 be that of the token following where the token was expected).
+
+   If we manage to add a fix-it hint, then the location of the
+   fix-it hint is likely to be more useful as the primary location
+   of the diagnostic than that of the following token, so we swap
+   these locations.
+
+   For example, given this bogus code:
+       123456789012345678901234567890
+   1 | int missing_semicolon (void)
+   2 | {
+   3 |   return 42
+   4 | }
+
+   we will emit:
+
+     "expected ';' before '}'"
+
+   RICHLOC's primary location is at the closing brace, so before "swapping"
+   we would emit the error at line 4 column 1:
+
+       123456789012345678901234567890
+   3 |   return 42  |< fix-it hint emitted for this line
+     |            ; |
+   4 | }            |< "expected ';' before '}'" emitted at this line
+     | ^            |
+
+   It's more useful for the location of the diagnostic to be at the
+   fix-it hint, so we swap the locations, so the primary location
+   is at the fix-it hint, with the old primary location inserted
+   as a secondary location, giving this, with the error at line 3
+   column 12:
+
+       123456789012345678901234567890
+   3 |   return 42   |< "expected ';' before '}'" emitted at this line,
+     |            ^  |   with fix-it hint
+   4 |            ;  |
+     | }             |< secondary range emitted here
+     | ~             |.  */
+
+void
+maybe_suggest_missing_token_insertion (rich_location *richloc,
+				       enum cpp_ttype token_type,
+				       location_t prev_token_loc)
+{
+  gcc_assert (richloc);
+
+  enum missing_token_insertion_kind mtik
+    = get_missing_token_insertion_kind (token_type);
+
+  switch (mtik)
+    {
+    default:
+      gcc_unreachable ();
+      break;
+
+    case MTIK_IMPOSSIBLE:
+      return;
+
+    case MTIK_INSERT_BEFORE_NEXT:
+      /* Attempt to add the fix-it hint before the primary location
+	 of RICHLOC.  */
+      richloc->add_fixit_insert_before (cpp_type2name (token_type, 0));
+      break;
+
+    case MTIK_INSERT_AFTER_PREV:
+      /* Attempt to add the fix-it hint after PREV_TOKEN_LOC.  */
+      richloc->add_fixit_insert_after (prev_token_loc,
+				       cpp_type2name (token_type, 0));
+      break;
+    }
+
+  /* If we were successful, use the fix-it hint's location as the
+     primary location within RICHLOC, adding the old primary location
+     back as a secondary location.  */
+  if (!richloc->seen_impossible_fixit_p ())
+    {
+      fixit_hint *hint = richloc->get_last_fixit_hint ();
+      location_t hint_loc = hint->get_start_loc ();
+      location_t old_loc = richloc->get_loc ();
+
+      richloc->set_range (line_table, 0, hint_loc, true);
+      richloc->add_range (old_loc, false);
+    }
+}
+
 #if CHECKING_P
 
 namespace selftest {
diff --git a/gcc/c-family/c-common.h b/gcc/c-family/c-common.h
index da6a0be9200..7e1877e8d16 100644
--- a/gcc/c-family/c-common.h
+++ b/gcc/c-family/c-common.h
@@ -1550,6 +1550,9 @@ extern int c_flt_eval_method (bool ts18661_p);
 extern void add_no_sanitize_value (tree node, unsigned int flags);
 
 extern void maybe_add_include_fixit (rich_location *, const char *);
+extern void maybe_suggest_missing_token_insertion (rich_location *richloc,
+						   enum cpp_ttype token_type,
+						   location_t prev_token_loc);
 
 #if CHECKING_P
 namespace selftest {
diff --git a/gcc/c-family/c-opts.c b/gcc/c-family/c-opts.c
index 3662aa37be6..6bd535532d3 100644
--- a/gcc/c-family/c-opts.c
+++ b/gcc/c-family/c-opts.c
@@ -118,7 +118,7 @@ static void set_std_c11 (int);
 static void check_deps_environment_vars (void);
 static void handle_deferred_opts (void);
 static void sanitize_cpp_opts (void);
-static void add_prefixed_path (const char *, size_t);
+static void add_prefixed_path (const char *, incpath_kind);
 static void push_command_line_include (void);
 static void cb_file_change (cpp_reader *, const line_map_ordinary *);
 static void cb_dir_change (cpp_reader *, const char *);
@@ -316,7 +316,7 @@ c_common_handle_option (size_t scode, const char *arg, int value,
 
     case OPT_I:
       if (strcmp (arg, "-"))
-	add_path (xstrdup (arg), BRACKET, 0, true);
+	add_path (xstrdup (arg), INC_BRACKET, 0, true);
       else
 	{
 	  if (quote_chain_split)
@@ -550,7 +550,7 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       break;
 
     case OPT_idirafter:
-      add_path (xstrdup (arg), AFTER, 0, true);
+      add_path (xstrdup (arg), INC_AFTER, 0, true);
       break;
 
     case OPT_imacros:
@@ -567,7 +567,7 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       break;
 
     case OPT_iquote:
-      add_path (xstrdup (arg), QUOTE, 0, true);
+      add_path (xstrdup (arg), INC_QUOTE, 0, true);
       break;
 
     case OPT_isysroot:
@@ -575,15 +575,15 @@ c_common_handle_option (size_t scode, const char *arg, int value,
       break;
 
     case OPT_isystem:
-      add_path (xstrdup (arg), SYSTEM, 0, true);
+      add_path (xstrdup (arg), INC_SYSTEM, 0, true);
       break;
 
     case OPT_iwithprefix:
-      add_prefixed_path (arg, SYSTEM);
+      add_prefixed_path (arg, INC_SYSTEM);
       break;
 
     case OPT_iwithprefixbefore:
-      add_prefixed_path (arg, BRACKET);
+      add_prefixed_path (arg, INC_BRACKET);
       break;
 
     case OPT_lang_asm:
@@ -1326,7 +1326,7 @@ sanitize_cpp_opts (void)
 
 /* Add include path with a prefix at the front of its name.  */
 static void
-add_prefixed_path (const char *suffix, size_t chain)
+add_prefixed_path (const char *suffix, incpath_kind chain)
 {
   char *path;
   const char *prefix;
diff --git a/gcc/c/ChangeLog b/gcc/c/ChangeLog
index 822d0227ee8..1f697f17f99 100644
--- a/gcc/c/ChangeLog
+++ b/gcc/c/ChangeLog
@@ -1,3 +1,17 @@
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* c-parser.c (c_parser_require): Add "type_is_unique" param and
+	use it to guard calls to maybe_suggest_missing_token_insertion.
+	(c_parser_parms_list_declarator): Override default value of new
+	"type_is_unique" param to c_parser_require.
+	(c_parser_asm_statement): Likewise.
+	* c-parser.h (c_parser_require): Add "type_is_unique" param,
+	defaulting to true.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* c-decl.c (grokdeclarator): Check HAS_DECL_ASSEMBLER_NAME_P too.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* c-parser.c (c_parser_cilk_clause_vectorlength): Use wi::to_wide when
diff --git a/gcc/c/c-decl.c b/gcc/c/c-decl.c
index 724d193f01f..26b34ab3e50 100644
--- a/gcc/c/c-decl.c
+++ b/gcc/c/c-decl.c
@@ -7011,7 +7011,8 @@ grokdeclarator (const struct c_declarator *declarator,
 
   /* This is the earliest point at which we might know the assembler
      name of a variable.  Thus, if it's known before this, die horribly.  */
-    gcc_assert (!DECL_ASSEMBLER_NAME_SET_P (decl));
+    gcc_assert (!HAS_DECL_ASSEMBLER_NAME_P (decl)
+		|| !DECL_ASSEMBLER_NAME_SET_P (decl));
 
     if (warn_cxx_compat
 	&& VAR_P (decl)
diff --git a/gcc/c/c-parser.c b/gcc/c/c-parser.c
index a622e2a89c9..6b843247911 100644
--- a/gcc/c/c-parser.c
+++ b/gcc/c/c-parser.c
@@ -1041,13 +1041,21 @@ get_matching_symbol (enum cpp_ttype type)
    If MATCHING_LOCATION is not UNKNOWN_LOCATION, then highlight it
    within any error as the location of an "opening" token matching
    the close token TYPE (e.g. the location of the '(' when TYPE is
-   CPP_CLOSE_PAREN).  */
+   CPP_CLOSE_PAREN).
+
+   If TYPE_IS_UNIQUE is true (the default) then msgid describes exactly
+   one type (e.g. "expected %<)%>") and thus it may be reasonable to
+   attempt to generate a fix-it hint for the problem.
+   Otherwise msgid describes multiple token types (e.g.
+   "expected %<;%>, %<,%> or %<)%>"), and thus we shouldn't attempt to
+   generate a fix-it hint.  */
 
 bool
 c_parser_require (c_parser *parser,
 		  enum cpp_ttype type,
 		  const char *msgid,
-		  location_t matching_location)
+		  location_t matching_location,
+		  bool type_is_unique)
 {
   if (c_parser_next_token_is (parser, type))
     {
@@ -1059,6 +1067,13 @@ c_parser_require (c_parser *parser,
       location_t next_token_loc = c_parser_peek_token (parser)->location;
       gcc_rich_location richloc (next_token_loc);
 
+      /* Potentially supply a fix-it hint, suggesting to add the
+	 missing token immediately after the *previous* token.
+	 This may move the primary location within richloc.  */
+      if (!parser->error && type_is_unique)
+	maybe_suggest_missing_token_insertion (&richloc, type,
+					       parser->last_token_location);
+
       /* If matching_location != UNKNOWN_LOCATION, highlight it.
 	 Attempt to consolidate diagnostics by printing it as a
 	 secondary range within the main diagnostic.  */
@@ -3975,7 +3990,8 @@ c_parser_parms_list_declarator (c_parser *parser, tree attrs, tree expr)
 	    return get_parm_info (false, expr);
 	}
       if (!c_parser_require (parser, CPP_COMMA,
-			     "expected %<;%>, %<,%> or %<)%>"))
+			     "expected %<;%>, %<,%> or %<)%>",
+			     UNKNOWN_LOCATION, false))
 	{
 	  c_parser_skip_until_found (parser, CPP_CLOSE_PAREN, NULL);
 	  return NULL;
@@ -6429,7 +6445,8 @@ c_parser_asm_statement (c_parser *parser)
       if (!c_parser_require (parser, CPP_COLON,
 			     is_goto
 			     ? G_("expected %<:%>")
-			     : G_("expected %<:%> or %<)%>")))
+			     : G_("expected %<:%> or %<)%>"),
+			     UNKNOWN_LOCATION, is_goto))
 	goto error_close_paren;
 
       /* Once past any colon, we're no longer a simple asm.  */
diff --git a/gcc/c/c-parser.h b/gcc/c/c-parser.h
index 01a7b724081..21e40541ce6 100644
--- a/gcc/c/c-parser.h
+++ b/gcc/c/c-parser.h
@@ -137,7 +137,8 @@ extern c_token * c_parser_peek_2nd_token (c_parser *parser);
 extern c_token * c_parser_peek_nth_token (c_parser *parser, unsigned int n);
 extern bool c_parser_require (c_parser *parser, enum cpp_ttype type,
 			      const char *msgid,
-			      location_t matching_location = UNKNOWN_LOCATION);
+			      location_t matching_location = UNKNOWN_LOCATION,
+			      bool type_is_unique=true);
 extern bool c_parser_error (c_parser *parser, const char *gmsgid);
 extern void c_parser_consume_token (c_parser *parser);
 extern void c_parser_skip_until_found (c_parser *parser, enum cpp_ttype type,
diff --git a/gcc/calls.c b/gcc/calls.c
index 064235ed15c..ff9724358c5 100644
--- a/gcc/calls.c
+++ b/gcc/calls.c
@@ -1593,7 +1593,7 @@ initialize_argument_information (int num_actuals ATTRIBUTE_UNUSED,
 				 cumulative_args_t args_so_far,
 				 int reg_parm_stack_space,
 				 rtx *old_stack_level,
-				 poly_int64 *old_pending_adj,
+				 poly_int64_pod *old_pending_adj,
 				 int *must_preallocate, int *ecf_flags,
 				 bool *may_tailcall, bool call_from_thunk_p)
 {
@@ -2207,10 +2207,14 @@ compute_argument_addresses (struct arg_data *args, rtx argblock, int num_actuals
   if (argblock)
     {
       rtx arg_reg = argblock;
-      int i, arg_offset = 0;
+      int i;
+      poly_int64 arg_offset = 0;
 
       if (GET_CODE (argblock) == PLUS)
-	arg_reg = XEXP (argblock, 0), arg_offset = INTVAL (XEXP (argblock, 1));
+	{
+	  arg_reg = XEXP (argblock, 0);
+	  arg_offset = rtx_to_poly_int64 (XEXP (argblock, 1));
+	}
 
       for (i = 0; i < num_actuals; i++)
 	{
@@ -2252,12 +2256,16 @@ compute_argument_addresses (struct arg_data *args, rtx argblock, int num_actuals
 	    }
 	  align = BITS_PER_UNIT;
 	  boundary = args[i].locate.boundary;
+	  poly_int64 offset_val;
 	  if (args[i].locate.where_pad != PAD_DOWNWARD)
 	    align = boundary;
-	  else if (CONST_INT_P (offset))
+	  else if (poly_int_rtx_p (offset, &offset_val))
 	    {
-	      align = INTVAL (offset) * BITS_PER_UNIT | boundary;
-	      align = least_bit_hwi (align);
+	      align = least_bit_hwi (boundary);
+	      unsigned int offset_align
+		= known_alignment (offset_val) * BITS_PER_UNIT;
+	      if (offset_align != 0)
+		align = MIN (align, offset_align);
 	    }
 	  set_mem_align (args[i].stack, align);
 
@@ -2401,7 +2409,7 @@ internal_arg_pointer_based_exp (const_rtx rtl, bool toplevel)
     return NULL_RTX;
 
   poly_int64 offset;
-  if (GET_CODE (rtl) == PLUS && poly_int_const_p (XEXP (rtl, 1), &offset))
+  if (GET_CODE (rtl) == PLUS && poly_int_rtx_p (XEXP (rtl, 1), &offset))
     {
       rtx val = internal_arg_pointer_based_exp (XEXP (rtl, 0), toplevel);
       if (val == NULL_RTX || val == pc_rtx)
@@ -2454,7 +2462,7 @@ mem_might_overlap_already_clobbered_arg_p (rtx addr, poly_uint64 size)
   val = internal_arg_pointer_based_exp (addr, true);
   if (val == NULL_RTX)
     return false;
-  else if (!poly_int_const_p (val, &i))
+  else if (!poly_int_rtx_p (val, &i))
     return true;
 
   if (known_zero (size))
@@ -2530,7 +2538,10 @@ load_register_parameters (struct arg_data *args, int num_actuals,
 	    }
 	  else if (TYPE_MODE (TREE_TYPE (args[i].tree_value)) == BLKmode)
 	    {
+	      /* Variable-sized parameters should be described by a
+		 PARALLEL instead.  */
 	      const_size = int_size_in_bytes (TREE_TYPE (args[i].tree_value));
+	      gcc_assert (const_size >= 0);
 	      nregs = (const_size + (UNITS_PER_WORD - 1)) / UNITS_PER_WORD;
 	      size = const_size;
 	    }
@@ -2701,12 +2712,12 @@ combine_pending_stack_adjustment_and_call (poly_int64_pod *adjustment_out,
      -UNADJUSTED_ALIGNMENT modulo the PREFERRED_UNIT_STACK_BOUNDARY.  */
 
   /* Begin by trying to pop all the bytes.  */
-  unsigned HOST_WIDE_INT tmp_alignment;
+  unsigned HOST_WIDE_INT tmp_misalignment;
   if (!known_misalignment (pending_stack_adjust,
 			   preferred_unit_stack_boundary,
-			   &tmp_alignment))
+			   &tmp_misalignment))
     return false;
-  unadjusted_alignment -= tmp_alignment;
+  unadjusted_alignment -= tmp_misalignment;
   adjustment = pending_stack_adjust;
   /* Push enough additional bytes that the stack will be aligned
      after the arguments are pushed.  */
@@ -2828,7 +2839,7 @@ shift_return_value (machine_mode mode, bool left_p, rtx value)
   machine_mode value_mode = GET_MODE (value);
   poly_int64 shift = GET_MODE_BITSIZE (value_mode) - GET_MODE_BITSIZE (mode);
 
-  if (must_eq (shift, 0))
+  if (known_zero (shift))
     return false;
 
   /* Use ashr rather than lshr for right shifts.  This is for the benefit
@@ -3199,7 +3210,7 @@ expand_call (tree exp, rtx target, int ignore)
       }
 #else /* not PCC_STATIC_STRUCT_RETURN */
       {
-	if (!poly_tree_p (TYPE_SIZE_UNIT (rettype), &struct_value_size))
+	if (!poly_int_tree_p (TYPE_SIZE_UNIT (rettype), &struct_value_size))
 	  struct_value_size = -1;
 
 	/* Even if it is semantically safe to use the target as the return
@@ -4221,8 +4232,8 @@ expand_call (tree exp, rtx target, int ignore)
 					 funtype, 1);
 	  gcc_assert (GET_MODE (target) == pmode);
 
-	  poly_int64 offset = subreg_lowpart_offset (TYPE_MODE (type),
-						     GET_MODE (target));
+	  poly_uint64 offset = subreg_lowpart_offset (TYPE_MODE (type),
+						      GET_MODE (target));
 	  target = gen_rtx_SUBREG (TYPE_MODE (type), target, offset);
 	  SUBREG_PROMOTED_VAR_P (target) = 1;
 	  SUBREG_PROMOTED_SET (target, unsignedp);
@@ -5319,7 +5330,10 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	      /* stack_slot is negative, but we want to index stack_usage_map
 		 with positive values.  */
 	      if (GET_CODE (XEXP (arg->stack_slot, 0)) == PLUS)
-		upper_bound = -INTVAL (XEXP (XEXP (arg->stack_slot, 0), 1)) + 1;
+		{
+		  rtx offset = XEXP (XEXP (arg->stack_slot, 0), 1);
+		  upper_bound = -rtx_to_poly_int64 (offset) + 1;
+		}
 	      else
 		upper_bound = 0;
 
@@ -5328,7 +5342,10 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	  else
 	    {
 	      if (GET_CODE (XEXP (arg->stack_slot, 0)) == PLUS)
-		lower_bound = INTVAL (XEXP (XEXP (arg->stack_slot, 0), 1));
+		{
+		  rtx offset = XEXP (XEXP (arg->stack_slot, 0), 1);
+		  lower_bound = rtx_to_poly_int64 (offset);
+		}
 	      else
 		lower_bound = 0;
 
@@ -5444,7 +5461,6 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
     ;
   else if (arg->mode != BLKmode)
     {
-      poly_int64 size;
       unsigned int parm_align;
 
       /* Argument is a scalar, not entirely passed in registers.
@@ -5457,12 +5473,12 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	 Note that in C the default argument promotions
 	 will prevent such mismatches.  */
 
-      size = GET_MODE_SIZE (arg->mode);
+      poly_int64 size = GET_MODE_SIZE (arg->mode);
       /* Compute how much space the push instruction will push.
 	 On many machines, pushing a byte will advance the stack
 	 pointer by a halfword.  */
 #ifdef PUSH_ROUNDING
-      size = PUSH_ROUNDING (MACRO_INT (size));
+      size = PUSH_ROUNDING (size);
 #endif
       used = size;
 
@@ -5470,9 +5486,9 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	 round up to a multiple of the alignment for arguments.  */
       if (targetm.calls.function_arg_padding (arg->mode, TREE_TYPE (pval))
 	  != PAD_NONE)
-	/* At the moment we don't support ABIs for which the padding isn't
-	   known at compile time.  In principle it should be easy to add
-	   though.  */
+	/* At the moment we don't (need to) support ABIs for which the
+	   padding isn't known at compile time.  In principle it should
+	   be easy to add though.  */
 	used = force_align_up (size, PARM_BOUNDARY / BITS_PER_UNIT);
 
       /* Compute the alignment of the pushed argument.  */
@@ -5481,9 +5497,9 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	  == PAD_DOWNWARD)
 	{
 	  poly_int64 pad = used - size;
-	  unsigned int pad_align_bytes = known_alignment (pad);
-	  if (pad_align_bytes != 0)
-	    parm_align = MIN (parm_align, pad_align_bytes * BITS_PER_UNIT);
+	  unsigned int pad_align = known_alignment (pad) * BITS_PER_UNIT;
+	  if (pad_align != 0)
+	    parm_align = MIN (parm_align, pad_align);
 	}
 
       /* This isn't already where we want it on the stack, so put it there.
@@ -5542,10 +5558,10 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	    parm_align = BITS_PER_UNIT;
 	  else
 	    {
-	      unsigned int excess_align_bytes = known_alignment (excess);
-	      if (excess_align_bytes)
-		parm_align = MIN (parm_align,
-				  excess_align_bytes * BITS_PER_UNIT);
+	      unsigned int excess_align
+		= known_alignment (excess) * BITS_PER_UNIT;
+	      if (excess_align != 0)
+		parm_align = MIN (parm_align, excess_align);
 	    }
 	}
 
@@ -5560,7 +5576,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	      || (GET_CODE (XEXP (x, 0)) == PLUS
 		  && XEXP (XEXP (x, 0), 0) ==
 		     crtl->args.internal_arg_pointer
-		  && poly_int_const_p (XEXP (XEXP (x, 0), 1), &i)))
+		  && poly_int_rtx_p (XEXP (XEXP (x, 0), 1), &i)))
 	    {
 	      /* arg.locate doesn't contain the pretend_args_size offset,
 		 it's part of argblock.  Ensure we don't count it in I.  */
@@ -5572,7 +5588,7 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 	      /* expand_call should ensure this.  */
 	      gcc_assert (!arg->locate.offset.var
 			  && arg->locate.size.var == 0);
-	      poly_int64 const_size = rtx_to_poly_int64 (size_rtx);
+	      poly_int64 size_val = rtx_to_poly_int64 (size_rtx);
 
 	      if (must_eq (arg->locate.offset.constant, i))
 		{
@@ -5581,11 +5597,11 @@ store_one_arg (struct arg_data *arg, rtx argblock, int flags,
 		     they aren't really at the same location.  Check for
 		     this by making sure that the incoming size is the
 		     same as the outgoing size.  */
-		  if (may_ne (arg->locate.size.constant, const_size))
+		  if (may_ne (arg->locate.size.constant, size_val))
 		    sibcall_failure = 1;
 		}
 	      else if (maybe_in_range_p (arg->locate.offset.constant,
-					 i, const_size))
+					 i, size_val))
 		sibcall_failure = 1;
 	      /* Use arg->locate.size.constant instead of const_size
 		 because we only care about the part of the argument
diff --git a/gcc/cfgcleanup.c b/gcc/cfgcleanup.c
index 788f25cf327..25327d545ba 100644
--- a/gcc/cfgcleanup.c
+++ b/gcc/cfgcleanup.c
@@ -904,6 +904,7 @@ merge_memattrs (rtx x, rtx y)
 	    set_mem_size (y, MEM_SIZE (x));
 	  else
 	    {
+	      /* The sizes aren't ordered, so we can't merge them.  */
 	      clear_mem_size (x);
 	      clear_mem_size (y);
 	    }
diff --git a/gcc/cfgexpand.c b/gcc/cfgexpand.c
index cff13390dd3..07b5c9df4c6 100644
--- a/gcc/cfgexpand.c
+++ b/gcc/cfgexpand.c
@@ -1134,6 +1134,7 @@ expand_stack_vars (bool (*pred) (size_t), struct stack_vars_data *data)
 		= alloc_stack_frame_space (stack_vars[i].size
 					   + ASAN_RED_ZONE_SIZE,
 					   MAX (alignb, ASAN_RED_ZONE_SIZE));
+
 	      data->asan_vec.safe_push (prev_offset);
 	      /* Allocating a constant amount of space from a constant
 		 starting offset must give a constant result.  */
@@ -1507,7 +1508,7 @@ defer_stack_allocation (tree var, bool toplevel)
   /* Whether the variable is small enough for immediate allocation not to be
      a problem with regard to the frame size.  */
   bool smallish
-    = (poly_tree_p (size_unit, &size)
+    = (poly_int_tree_p (size_unit, &size)
        && (estimated_poly_value (size)
 	   < PARAM_VALUE (PARAM_MIN_SIZE_FOR_STACK_SHARING)));
 
@@ -1647,7 +1648,7 @@ expand_one_var (tree var, bool toplevel, bool really_expand)
       if (really_expand)
         expand_one_register_var (origvar);
     }
-  else if (!poly_tree_p (DECL_SIZE_UNIT (var), &size)
+  else if (!poly_int_tree_p (DECL_SIZE_UNIT (var), &size)
 	   || !valid_constant_size_p (DECL_SIZE_UNIT (var)))
     {
       /* Reject variables which cover more than half of the address-space.  */
@@ -2032,10 +2033,7 @@ expand_used_vars (void)
   /* Compute the phase of the stack frame for this function.  */
   {
     int align = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
-    /* At present we only support frame layouts in which the misalignment
-       of STARTING_FRAME_OFFSET is known at compile time.  */
-    int off = force_get_misalignment (poly_int64 (STARTING_FRAME_OFFSET),
-				      align);
+    int off = targetm.starting_frame_offset () % align;
     frame_phase = off ? align - off : 0;
   }
 
@@ -4411,7 +4409,7 @@ expand_debug_expr (tree exp)
 
 	  op1 = expand_debug_expr (TREE_OPERAND (exp, 1));
 	  poly_int64 offset;
-	  if (!op1 || !poly_int_const_p (op1, &offset))
+	  if (!op1 || !poly_int_rtx_p (op1, &offset))
 	    return NULL;
 
 	  op0 = plus_constant (inner_mode, op0, offset);
@@ -4518,7 +4516,7 @@ expand_debug_expr (tree exp)
 	      /* Bitfield.  */
 	      mode1 = smallest_int_mode_for_size (bitsize);
 	    poly_int64 bytepos = bits_to_bytes_round_down (bitpos);
-	    if (may_ne (bytepos, 0))
+	    if (maybe_nonzero (bytepos))
 	      {
 		op0 = adjust_address_nv (op0, mode1, bytepos);
 		bitpos = num_trailing_bits (bitpos);
@@ -4924,7 +4922,7 @@ expand_debug_expr (tree exp)
 		  op1 = expand_debug_expr (TREE_OPERAND (TREE_OPERAND (exp, 0),
 							 1));
 		  poly_int64 offset;
-		  if (!op1 || !poly_int_const_p (op1, &offset))
+		  if (!op1 || !poly_int_rtx_p (op1, &offset))
 		    return NULL;
 
 		  return plus_constant (mode, op0, offset);
diff --git a/gcc/cfgloop.c b/gcc/cfgloop.c
index 6911426787b..c3bd9c05013 100644
--- a/gcc/cfgloop.c
+++ b/gcc/cfgloop.c
@@ -1713,12 +1713,19 @@ loop_preheader_edge (const struct loop *loop)
   edge e;
   edge_iterator ei;
 
-  gcc_assert (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS));
+  gcc_assert (loops_state_satisfies_p (LOOPS_HAVE_PREHEADERS)
+	      && ! loops_state_satisfies_p (LOOPS_MAY_HAVE_MULTIPLE_LATCHES));
 
   FOR_EACH_EDGE (e, ei, loop->header->preds)
     if (e->src != loop->latch)
       break;
 
+  if (! e)
+    {
+      gcc_assert (! loop_outer (loop));
+      return single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun));
+    }
+
   return e;
 }
 
diff --git a/gcc/cgraphunit.c b/gcc/cgraphunit.c
index 8c1acf770b9..9385dc825ab 100644
--- a/gcc/cgraphunit.c
+++ b/gcc/cgraphunit.c
@@ -1296,6 +1296,93 @@ analyze_functions (bool first_time)
   input_location = saved_loc;
 }
 
+/* Check declaration of the type of ALIAS for compatibility with its TARGET
+   (which may be an ifunc resolver) and issue a diagnostic when they are
+   not compatible according to language rules (plus a C++ extension for
+   non-static member functions).  */
+
+static void
+maybe_diag_incompatible_alias (tree alias, tree target)
+{
+  tree altype = TREE_TYPE (alias);
+  tree targtype = TREE_TYPE (target);
+
+  bool ifunc = lookup_attribute ("ifunc", DECL_ATTRIBUTES (alias));
+  tree funcptr = altype;
+
+  if (ifunc)
+    {
+      /* Handle attribute ifunc first.  */
+      if (TREE_CODE (altype) == METHOD_TYPE)
+	{
+	  /* Set FUNCPTR to the type of the alias target.  If the type
+	     is a non-static member function of class C, construct a type
+	     of an ordinary function taking C* as the first argument,
+	     followed by the member function argument list, and use it
+	     instead to check for incompatibility.  This conversion is
+	     not defined by the language but an extension provided by
+	     G++.  */
+
+	  tree rettype = TREE_TYPE (altype);
+	  tree args = TYPE_ARG_TYPES (altype);
+	  altype = build_function_type (rettype, args);
+	  funcptr = altype;
+	}
+
+      targtype = TREE_TYPE (targtype);
+
+      if (POINTER_TYPE_P (targtype))
+	{
+	  targtype = TREE_TYPE (targtype);
+
+	  /* Only issue Wattribute-alias for conversions to void* with
+	     -Wextra.  */
+	  if (VOID_TYPE_P (targtype) && !extra_warnings)
+	    return;
+
+	  /* Proceed to handle incompatible ifunc resolvers below.  */
+	}
+      else
+	{
+	  funcptr = build_pointer_type (funcptr);
+
+	  error_at (DECL_SOURCE_LOCATION (target),
+		    "%<ifunc%> resolver for %qD must return %qT",
+		 alias, funcptr);
+	  inform (DECL_SOURCE_LOCATION (alias),
+		  "resolver indirect function declared here");
+	  return;
+	}
+    }
+
+  if ((!FUNC_OR_METHOD_TYPE_P (targtype)
+       || (prototype_p (altype)
+	   && prototype_p (targtype)
+	   && !types_compatible_p (altype, targtype))))
+    {
+      /* Warn for incompatibilities.  Avoid warning for functions
+	 without a prototype to make it possible to declare aliases
+	 without knowing the exact type, as libstdc++ does.  */
+      if (ifunc)
+	{
+	  funcptr = build_pointer_type (funcptr);
+
+	  if (warning_at (DECL_SOURCE_LOCATION (target),
+			  OPT_Wattribute_alias,
+			  "%<ifunc%> resolver for %qD should return %qT",
+			  alias, funcptr))
+	    inform (DECL_SOURCE_LOCATION (alias),
+		    "resolver indirect function declared here");
+	}
+      else if (warning_at (DECL_SOURCE_LOCATION (alias),
+			   OPT_Wattribute_alias,
+			   "%qD alias between functions of incompatible "
+			   "types %qT and %qT", alias, altype, targtype))
+	inform (DECL_SOURCE_LOCATION (target),
+		"aliased declaration here");
+    }
+}
+
 /* Translate the ugly representation of aliases as alias pairs into nice
    representation in callgraph.  We don't handle all cases yet,
    unfortunately.  */
@@ -1305,7 +1392,7 @@ handle_alias_pairs (void)
 {
   alias_pair *p;
   unsigned i;
-  
+
   for (i = 0; alias_pairs && alias_pairs->iterate (i, &p);)
     {
       symtab_node *target_node = symtab_node::get_for_asmname (p->target);
@@ -1352,65 +1439,7 @@ handle_alias_pairs (void)
       if (TREE_CODE (p->decl) == FUNCTION_DECL
           && target_node && is_a <cgraph_node *> (target_node))
 	{
-	  tree t1 = TREE_TYPE (p->decl);
-	  tree t2 = TREE_TYPE (target_node->decl);
-
-	  if (lookup_attribute ("ifunc", DECL_ATTRIBUTES (p->decl)))
-	    {
-	      t2 = TREE_TYPE (t2);
-	      if (POINTER_TYPE_P (t2))
-		{
-		  t2 = TREE_TYPE (t2);
-		  if (!FUNC_OR_METHOD_TYPE_P (t2))
-		    {
-		      if (warning_at (DECL_SOURCE_LOCATION (p->decl),
-				      OPT_Wattributes,
-				      "%q+D %<ifunc%> resolver should return "
-				      "a function pointer",
-				      p->decl))
-			inform (DECL_SOURCE_LOCATION (target_node->decl),
-				"resolver declaration here");
-
-		      t2 = NULL_TREE;
-		    }
-		}
-	      else
-		{
-		  /* Deal with static member function pointers.  */
-		  if (TREE_CODE (t2) == RECORD_TYPE
-		      && TYPE_FIELDS (t2)
-		      && TREE_CODE (TREE_TYPE (TYPE_FIELDS (t2))) == POINTER_TYPE
-		      && (TREE_CODE (TREE_TYPE (TREE_TYPE (TYPE_FIELDS (t2))))
-			  == METHOD_TYPE))
-		    t2 = TREE_TYPE (TREE_TYPE (TYPE_FIELDS (t2)));
-		  else
-		    {
-		      error ("%q+D %<ifunc%> resolver must return a function "
-			     "pointer",
-			     p->decl);
-		      inform (DECL_SOURCE_LOCATION (target_node->decl),
-			      "resolver declaration here");
-
-		      t2 = NULL_TREE;
-		    }
-		}
-	    }
-
-	  if (t2
-	      && (!FUNC_OR_METHOD_TYPE_P (t2)
-		  || (prototype_p (t1)
-		      && prototype_p (t2)
-		      && !types_compatible_p (t1, t2))))
-	    {
-	      /* Warn for incompatibilities.  Avoid warning for functions
-		 without a prototype to make it possible to declare aliases
-		 without knowing the exact type, as libstdc++ does.  */
-	      if (warning_at (DECL_SOURCE_LOCATION (p->decl), OPT_Wattributes,
-			      "%q+D alias between functions of incompatible "
-			      "types %qT and %qT", p->decl, t1, t2))
-		inform (DECL_SOURCE_LOCATION (target_node->decl),
-			"aliased declaration here");
-	    }
+	  maybe_diag_incompatible_alias (p->decl, target_node->decl);
 
 	  cgraph_node *src_node = cgraph_node::get (p->decl);
 	  if (src_node && src_node->definition)
diff --git a/gcc/combine.c b/gcc/combine.c
index 41a347a744d..ff0cb2a7c62 100644
--- a/gcc/combine.c
+++ b/gcc/combine.c
@@ -7903,7 +7903,7 @@ make_extraction (machine_mode mode, rtx inner, HOST_WIDE_INT pos,
 		 return a new hard register.  */
 	      if (pos || in_dest)
 		{
-		  poly_int64 offset
+		  poly_uint64 offset
 		    = subreg_offset_from_lsb (tmode, inner_mode, pos);
 
 		  /* Avoid creating invalid subregs, for example when
@@ -9857,13 +9857,9 @@ rtx_equal_for_field_assignment_p (rtx x, rtx y, bool widen_x)
 	return 0;
       if (BYTES_BIG_ENDIAN != WORDS_BIG_ENDIAN)
 	return 0;
-      /* For big endian, adjust the memory offset.  */
-      if (BYTES_BIG_ENDIAN)
-	x = adjust_address_nv (x, GET_MODE (y),
-			       -subreg_lowpart_offset (GET_MODE (x),
-						       GET_MODE (y)));
-      else
-	x = adjust_address_nv (x, GET_MODE (y), 0);
+      x = adjust_address_nv (x, GET_MODE (y),
+			     byte_lowpart_offset (GET_MODE (y),
+						  GET_MODE (x)));
     }
 
   if (x == y || rtx_equal_p (x, y))
@@ -11768,8 +11764,11 @@ change_zero_ext (rtx pat)
 	  if (BITS_BIG_ENDIAN)
 	    start = GET_MODE_PRECISION (inner_mode) - size - start;
 
-	  x = simplify_gen_binary (LSHIFTRT, inner_mode, XEXP (x, 0),
-				   gen_int_shift_amount (inner_mode, start));
+	  if (start != 0)
+	    x = gen_rtx_LSHIFTRT (inner_mode, XEXP (x, 0),
+				  gen_int_shift_amount (inner_mode, start));
+	  else
+	    x = XEXP (x, 0);
 	  if (mode != inner_mode)
 	    x = gen_lowpart_SUBREG (mode, x);
 	}
diff --git a/gcc/common.opt b/gcc/common.opt
index ce8194b58fa..c95da640174 100644
--- a/gcc/common.opt
+++ b/gcc/common.opt
@@ -562,6 +562,10 @@ Wattributes
 Common Var(warn_attributes) Init(1) Warning
 Warn about inappropriate attribute usage.
 
+Wattribute-alias
+Common Var(warn_attributes) Init(1) Warning
+Warn about type safety and similar errors in attribute alias and related.
+
 Wcast-align
 Common Var(warn_cast_align) Warning
 Warn about pointer casts which increase alignment.
diff --git a/gcc/config.gcc b/gcc/config.gcc
index ee92f70904a..22702396a9f 100644
--- a/gcc/config.gcc
+++ b/gcc/config.gcc
@@ -360,6 +360,7 @@ i[34567]86-*-*)
 	cpu_type=i386
 	c_target_objs="i386-c.o"
 	cxx_target_objs="i386-c.o"
+	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o"
 	extra_options="${extra_options} fused-madd.opt"
 	extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
@@ -384,6 +385,7 @@ x86_64-*-*)
 	c_target_objs="i386-c.o"
 	cxx_target_objs="i386-c.o"
 	extra_options="${extra_options} fused-madd.opt"
+	extra_objs="x86-tune-sched.o x86-tune-sched-bd.o x86-tune-sched-atom.o x86-tune-sched-core.o"
 	extra_headers="cpuid.h mmintrin.h mm3dnow.h xmmintrin.h emmintrin.h
 		       pmmintrin.h tmmintrin.h ammintrin.h smmintrin.h
 		       nmmintrin.h bmmintrin.h fma4intrin.h wmmintrin.h
@@ -1094,11 +1096,14 @@ arm*-*-freebsd*)                # ARM FreeBSD EABI
 	case $target in
 	armv6*-*-freebsd*)
 	    target_cpu_cname="arm1176jzf-s"
-	    tm_defines="${tm_defines} TARGET_FREEBSD_ARMv6=1"
             if test $fbsd_major -ge 11; then
                tm_defines="${tm_defines} TARGET_FREEBSD_ARM_HARD_FLOAT=1"
             fi
 	    ;;
+	armv7*-*-freebsd*)
+	    target_cpu_cname="generic-armv7-a"
+	    tm_defines="${tm_defines} TARGET_FREEBSD_ARM_HARD_FLOAT=1"
+	    ;;
 	*)
 	    target_cpu_cname="arm9"
 	    ;;
@@ -3105,7 +3110,7 @@ case ${target} in
         ;;
 *-*-linux*)
 	case ${target} in
-	aarch64*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-*)
+	aarch64*-* | arm*-* | i[34567]86-* | powerpc*-* | s390*-* | sparc*-* | x86_64-*)
 		default_gnu_indirect_function=yes
 		;;
 	esac
diff --git a/gcc/config/aarch64/aarch64-builtins.c b/gcc/config/aarch64/aarch64-builtins.c
index 5b9ce46f7c1..8ca4cfc299f 100644
--- a/gcc/config/aarch64/aarch64-builtins.c
+++ b/gcc/config/aarch64/aarch64-builtins.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/aarch64/aarch64-c.c b/gcc/config/aarch64/aarch64-c.c
index 1106b89f932..d7f42b3d5ab 100644
--- a/gcc/config/aarch64/aarch64-c.c
+++ b/gcc/config/aarch64/aarch64-c.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/aarch64/aarch64-protos.h b/gcc/config/aarch64/aarch64-protos.h
index a51f78a8f19..705675f6d91 100644
--- a/gcc/config/aarch64/aarch64-protos.h
+++ b/gcc/config/aarch64/aarch64-protos.h
@@ -469,7 +469,7 @@ void aarch64_simd_emit_reg_reg_move (rtx *, machine_mode, unsigned int);
 rtx aarch64_simd_expand_builtin (int, tree, rtx);
 
 void aarch64_simd_lane_bounds (rtx, HOST_WIDE_INT, HOST_WIDE_INT, const_tree);
-rtx endian_lane_rtx (machine_mode, poly_uint64);
+rtx endian_lane_rtx (machine_mode, unsigned int);
 
 void aarch64_split_128bit_move (rtx, rtx);
 
diff --git a/gcc/config/aarch64/aarch64.c b/gcc/config/aarch64/aarch64.c
index fc7209db4c0..a1894a79dd7 100644
--- a/gcc/config/aarch64/aarch64.c
+++ b/gcc/config/aarch64/aarch64.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #define INCLUDE_STRING
@@ -1249,6 +1249,7 @@ aarch64_hard_regno_nregs (unsigned regno, machine_mode mode)
       return CEIL (size, UNITS_PER_VREG);
     case PR_REGS:
     case PR_LO_REGS:
+    case PR_HI_REGS:
       return 1;
     default:
       size = constant_lower_bound (GET_MODE_SIZE (mode));
@@ -1693,7 +1694,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	  tp = gen_lowpart (mode, tp);
 
 	emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, x0)));
-	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	if (REG_P (dest))
+	  set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
 
@@ -1727,7 +1729,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	  }
 
 	emit_insn (gen_rtx_SET (dest, gen_rtx_PLUS (mode, tp, tmp_reg)));
-	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	if (REG_P (dest))
+	  set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
 
@@ -1768,7 +1771,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	    gcc_unreachable ();
 	  }
 
-	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	if (REG_P (dest))
+	  set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
 
@@ -1797,7 +1801,8 @@ aarch64_load_symref_appropriately (rtx dest, rtx imm,
 	    emit_insn (gen_tlsie_tiny_sidi (dest, imm, tp));
 	  }
 
-	set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
+	if (REG_P (dest))
+	  set_unique_reg_note (get_last_insn (), REG_EQUIV, imm);
 	return;
       }
 
@@ -2041,7 +2046,7 @@ bool
 aarch64_sve_cnt_immediate_p (rtx x)
 {
   poly_int64 value;
-  return poly_int_const_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
+  return poly_int_rtx_p (x, &value) && aarch64_sve_cnt_immediate_p (value);
 }
 
 /* Return the asm string for an instruction with a CNT-like vector size
@@ -2118,7 +2123,7 @@ bool
 aarch64_sve_addvl_addpl_immediate_p (rtx x)
 {
   poly_int64 value;
-  return (poly_int_const_p (x, &value)
+  return (poly_int_rtx_p (x, &value)
 	  && aarch64_sve_addvl_addpl_immediate_p (value));
 }
 
@@ -2159,7 +2164,7 @@ aarch64_sve_inc_dec_immediate_p (rtx x, int *factor_out,
   poly_int64 value;
 
   if (!const_vec_duplicate_p (x, &elt)
-      || !poly_int_const_p (elt, &value))
+      || !poly_int_rtx_p (elt, &value))
     return false;
 
   unsigned int nelts_per_vq = 128 / GET_MODE_UNIT_BITSIZE (GET_MODE (x));
@@ -2643,7 +2648,7 @@ int
 aarch64_add_offset_temporaries (rtx x)
 {
   poly_int64 offset;
-  if (!poly_int_const_p (x, &offset))
+  if (!poly_int_rtx_p (x, &offset))
     return -1;
   return aarch64_offset_temporaries (true, offset);
 }
@@ -5316,7 +5321,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
       if (! strict_p
 	  && REG_P (op0)
 	  && virt_or_elim_regno_p (REGNO (op0))
-	  && poly_int_const_p (op1, &offset))
+	  && poly_int_rtx_p (op1, &offset))
 	{
 	  info->type = ADDRESS_REG_IMM;
 	  info->base = op0;
@@ -5328,7 +5333,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
 
       if (may_ne (GET_MODE_SIZE (mode), 0)
 	  && aarch64_base_register_rtx_p (op0, strict_p)
-	  && poly_int_const_p (op1, &offset))
+	  && poly_int_rtx_p (op1, &offset))
 	{
 	  info->type = ADDRESS_REG_IMM;
 	  info->base = op0;
@@ -5436,7 +5441,7 @@ aarch64_classify_address (struct aarch64_address_info *info,
       info->type = ADDRESS_REG_WB;
       info->base = XEXP (x, 0);
       if (GET_CODE (XEXP (x, 1)) == PLUS
-	  && poly_int_const_p (XEXP (XEXP (x, 1), 1), &offset)
+	  && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset)
 	  && rtx_equal_p (XEXP (XEXP (x, 1), 0), info->base)
 	  && aarch64_base_register_rtx_p (info->base, strict_p))
 	{
@@ -6753,7 +6758,7 @@ aarch64_regno_regclass (unsigned regno)
     return FP_LO_REGNUM_P (regno) ?  FP_LO_REGS : FP_REGS;
 
   if (PR_REGNUM_P (regno))
-    return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_REGS;
+    return PR_LO_REGNUM_P (regno) ? PR_LO_REGS : PR_HI_REGS;
 
   return NO_REGS;
 }
@@ -7169,6 +7174,7 @@ aarch64_class_max_nregs (reg_class_t regclass, machine_mode mode)
     case STACK_REG:
     case PR_REGS:
     case PR_LO_REGS:
+    case PR_HI_REGS:
       return 1;
 
     case NO_REGS:
@@ -11567,7 +11573,7 @@ aarch64_legitimate_constant_p (machine_mode mode, rtx x)
      temporary cannot be rematerialized (they can't be forced to memory
      and also aren't legitimate constants).  */
   poly_int64 offset;
-  if (poly_int_const_p (x, &offset))
+  if (poly_int_rtx_p (x, &offset))
     return aarch64_offset_temporaries (false, offset) <= 1;
 
   /* Treat symbols as constants.  Avoid TLS symbols as they are complex,
@@ -13200,7 +13206,7 @@ aarch64_simd_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
    of mode MODE, and return the result as an SImode rtx.  */
 
 rtx
-endian_lane_rtx (machine_mode mode, poly_uint64 n)
+endian_lane_rtx (machine_mode mode, unsigned int n)
 {
   return gen_int_mode (ENDIAN_LANE_N (GET_MODE_NUNITS (mode), n), SImode);
 }
@@ -17018,6 +17024,25 @@ aarch64_sched_can_speculate_insn (rtx_insn *insn)
     }
 }
 
+/* Implement TARGET_COMPUTE_PRESSURE_CLASSES.  */
+
+static int
+aarch64_compute_pressure_classes (reg_class *classes)
+{
+  int i = 0;
+  classes[i++] = GENERAL_REGS;
+  classes[i++] = FP_REGS;
+  /* PR_REGS isn't a useful pressure class because many predicate pseudo
+     registers need to go in PR_LO_REGS at some point during their
+     lifetime.  Splitting it into two halves has the effect of making
+     all predicates count against PR_LO_REGS, so that we try whenever
+     possible to restrict the number of live predicates to 8.  This
+     greatly reduces the amount of spilling in certain loops.  */
+  classes[i++] = PR_LO_REGS;
+  classes[i++] = PR_HI_REGS;
+  return i;
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -17492,6 +17517,9 @@ aarch64_libgcc_floating_mode_supported_p
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT aarch64_constant_alignment
 
+#undef TARGET_COMPUTE_PRESSURE_CLASSES
+#define TARGET_COMPUTE_PRESSURE_CLASSES aarch64_compute_pressure_classes
+
 #if CHECKING_P
 #undef TARGET_RUN_TARGET_SELFTESTS
 #define TARGET_RUN_TARGET_SELFTESTS selftest::aarch64_run_selftests
diff --git a/gcc/config/aarch64/aarch64.h b/gcc/config/aarch64/aarch64.h
index 778d81b263b..6b9ffae6823 100644
--- a/gcc/config/aarch64/aarch64.h
+++ b/gcc/config/aarch64/aarch64.h
@@ -469,6 +469,7 @@ enum reg_class
   FP_REGS,
   POINTER_AND_FP_REGS,
   PR_LO_REGS,
+  PR_HI_REGS,
   PR_REGS,
   ALL_REGS,
   LIM_REG_CLASSES		/* Last */
@@ -487,6 +488,7 @@ enum reg_class
   "FP_REGS",					\
   "POINTER_AND_FP_REGS",			\
   "PR_LO_REGS",					\
+  "PR_HI_REGS",					\
   "PR_REGS",					\
   "ALL_REGS"					\
 }
@@ -502,6 +504,7 @@ enum reg_class
   { 0x00000000, 0xffffffff, 0x00000000 },       /* FP_REGS  */		\
   { 0xffffffff, 0xffffffff, 0x00000003 },	/* POINTER_AND_FP_REGS */\
   { 0x00000000, 0x00000000, 0x000007f8 },	/* PR_LO_REGS */	\
+  { 0x00000000, 0x00000000, 0x0007f800 },	/* PR_HI_REGS */	\
   { 0x00000000, 0x00000000, 0x0007fff8 },	/* PR_REGS */		\
   { 0xffffffff, 0xffffffff, 0x000fffff }	/* ALL_REGS */		\
 }
@@ -565,8 +568,6 @@ extern enum aarch64_processor aarch64_tune;
 
 #define FRAME_GROWS_DOWNWARD	1
 
-#define STARTING_FRAME_OFFSET	0
-
 #define ACCUMULATE_OUTGOING_ARGS	1
 
 #define FIRST_PARM_OFFSET(FNDECL) 0
diff --git a/gcc/config/aarch64/cortex-a57-fma-steering.c b/gcc/config/aarch64/cortex-a57-fma-steering.c
index 5b726f4be2f..a07fc9c39f4 100644
--- a/gcc/config/aarch64/cortex-a57-fma-steering.c
+++ b/gcc/config/aarch64/cortex-a57-fma-steering.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #define INCLUDE_LIST
diff --git a/gcc/config/aarch64/driver-aarch64.c b/gcc/config/aarch64/driver-aarch64.c
index d857f7274be..dedd482f395 100644
--- a/gcc/config/aarch64/driver-aarch64.c
+++ b/gcc/config/aarch64/driver-aarch64.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #define INCLUDE_STRING
diff --git a/gcc/config/alpha/alpha.c b/gcc/config/alpha/alpha.c
index 6bdd5bd6433..b8a4e28869d 100644
--- a/gcc/config/alpha/alpha.c
+++ b/gcc/config/alpha/alpha.c
@@ -19,7 +19,7 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -2912,8 +2912,8 @@ alpha_split_conditional_move (enum rtx_code code, rtx dest, rtx cond,
       || (code == GE || code == GT))
     {
       code = reverse_condition (code);
-      diff = t, t = f, f = diff;
-      diff = t - f;
+      std::swap (t, f);
+      diff = -diff;
     }
 
   subtarget = target = dest;
@@ -6080,10 +6080,8 @@ alpha_stdarg_optimize_hook (struct stdarg_info *si, const gimple *stmt)
 	  else if (code2 == COMPONENT_REF
 		   && (code1 == MINUS_EXPR || code1 == PLUS_EXPR))
 	    {
-	      gimple *tem = arg1_stmt;
+	      std::swap (arg1_stmt, arg2_stmt);
 	      code2 = code1;
-	      arg1_stmt = arg2_stmt;
-	      arg2_stmt = tem;
 	    }
 	  else
 	    goto escapes;
@@ -9833,9 +9831,7 @@ alpha_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
       && (*code == GE || *code == GT || *code == GEU || *code == GTU)
       && (REG_P (*op1) || *op1 == const0_rtx))
     {
-      rtx tem = *op0;
-      *op0 = *op1;
-      *op1 = tem;
+      std::swap (*op0, *op1);
       *code = (int)swap_condition ((enum rtx_code)*code);
     }
 
diff --git a/gcc/config/alpha/alpha.h b/gcc/config/alpha/alpha.h
index a2878c64e94..257c1de2422 100644
--- a/gcc/config/alpha/alpha.h
+++ b/gcc/config/alpha/alpha.h
@@ -493,13 +493,6 @@ enum reg_class {
    goes at a more negative offset in the frame.  */
 /* #define FRAME_GROWS_DOWNWARD 0 */
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-
-#define STARTING_FRAME_OFFSET 0
-
 /* If we generate an insn to push BYTES bytes,
    this says how many the stack pointer really advances by.
    On Alpha, don't define this because there are no push insns.  */
diff --git a/gcc/config/alpha/driver-alpha.c b/gcc/config/alpha/driver-alpha.c
index 6eec89959ac..f2dba39712c 100644
--- a/gcc/config/alpha/driver-alpha.c
+++ b/gcc/config/alpha/driver-alpha.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/arc/arc-c.c b/gcc/config/arc/arc-c.c
index 44ff338c269..7f4a4deec0b 100644
--- a/gcc/config/arc/arc-c.c
+++ b/gcc/config/arc/arc-c.c
@@ -17,6 +17,8 @@
    <http://www.gnu.org/licenses/>.
 */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/arc/arc.c b/gcc/config/arc/arc.c
index b11a260db2a..0a68db8e889 100644
--- a/gcc/config/arc/arc.c
+++ b/gcc/config/arc/arc.c
@@ -28,7 +28,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/arc/arc.h b/gcc/config/arc/arc.h
index 0831480e2e1..0481bc64f5d 100644
--- a/gcc/config/arc/arc.h
+++ b/gcc/config/arc/arc.h
@@ -658,12 +658,6 @@ extern enum reg_class arc_regno_reg_class[];
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  */
 #define STACK_POINTER_OFFSET (0)
@@ -1297,7 +1291,8 @@ do {							\
   do                                                    \
     {                                                   \
       if (GET_CODE (PATTERN (JUMPTABLE)) == ADDR_DIFF_VEC \
-	  && ((GET_MODE_SIZE (MACRO_MODE (GET_MODE (PATTERN (JUMPTABLE)))) \
+	  && ((GET_MODE_SIZE (as_a <scalar_int_mode>	\
+			      (GET_MODE (PATTERN (JUMPTABLE)))) \
 	       * XVECLEN (PATTERN (JUMPTABLE), 1) + 1)	\
 	      & 2))					\
       arc_toggle_unalign ();				\
@@ -1408,7 +1403,8 @@ do { \
  : SImode)
 
 #define ADDR_VEC_ALIGN(VEC_INSN) \
-  (exact_log2 (GET_MODE_SIZE (MACRO_MODE (GET_MODE (PATTERN (VEC_INSN))))))
+  (exact_log2 (GET_MODE_SIZE (as_a <scalar_int_mode> \
+			      (GET_MODE (PATTERN (VEC_INSN))))))
 #undef ASM_OUTPUT_BEFORE_CASE_LABEL
 #define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE) \
   ASM_OUTPUT_ALIGN ((FILE), ADDR_VEC_ALIGN (TABLE));
diff --git a/gcc/config/arc/driver-arc.c b/gcc/config/arc/driver-arc.c
index 0e13878ff73..6d571d1bf58 100644
--- a/gcc/config/arc/driver-arc.c
+++ b/gcc/config/arc/driver-arc.c
@@ -18,6 +18,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/arm/aarch-common.c b/gcc/config/arm/aarch-common.c
index 9241dcfedb7..34751aff8a3 100644
--- a/gcc/config/arm/aarch-common.c
+++ b/gcc/config/arm/aarch-common.c
@@ -21,7 +21,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/arm/arm-builtins.c b/gcc/config/arm/arm-builtins.c
index a8fb6441842..692496d49d5 100644
--- a/gcc/config/arm/arm-builtins.c
+++ b/gcc/config/arm/arm-builtins.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/arm/arm-c.c b/gcc/config/arm/arm-c.c
index 04fe78b9e0a..b2e9af6c45d 100644
--- a/gcc/config/arm/arm-c.c
+++ b/gcc/config/arm/arm-c.c
@@ -16,7 +16,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/arm/arm.c b/gcc/config/arm/arm.c
index f5fa223e563..ce3aaeb04e0 100644
--- a/gcc/config/arm/arm.c
+++ b/gcc/config/arm/arm.c
@@ -20,7 +20,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -19764,8 +19764,8 @@ arm_output_function_prologue (FILE *f)
   if (IS_CMSE_ENTRY (func_type))
     asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
 
-  asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
-	       crtl->args.size,
+  asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
+	       (HOST_WIDE_INT) crtl->args.size,
 	       crtl->args.pretend_args_size,
 	       (HOST_WIDE_INT) get_frame_size ());
 
diff --git a/gcc/config/arm/arm.h b/gcc/config/arm/arm.h
index a3ca800f7a5..336db4b042d 100644
--- a/gcc/config/arm/arm.h
+++ b/gcc/config/arm/arm.h
@@ -1248,15 +1248,9 @@ enum reg_class
    couldn't convert a direct call into an indirect one.  */
 #define CALLER_INTERWORKING_SLOT_SIZE			\
   (TARGET_CALLER_INTERWORKING				\
-   && crtl->outgoing_args_size != 0		\
+   && maybe_nonzero (crtl->outgoing_args_size)		\
    ? UNITS_PER_WORD : 0)
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET  0
-
 /* If we generate an insn to push BYTES bytes,
    this says how many the stack pointer really advances by.  */
 /* The push insns do not do this rounding implicitly.
diff --git a/gcc/config/arm/driver-arm.c b/gcc/config/arm/driver-arm.c
index 4e77509195d..a7b8182618a 100644
--- a/gcc/config/arm/driver-arm.c
+++ b/gcc/config/arm/driver-arm.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/avr/avr-c.c b/gcc/config/avr/avr-c.c
index 3ce32587d6e..aaab8e65eaa 100644
--- a/gcc/config/avr/avr-c.c
+++ b/gcc/config/avr/avr-c.c
@@ -19,7 +19,7 @@
 
 /* Not included in avr.c since this requires C front end.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/avr/avr-devices.c b/gcc/config/avr/avr-devices.c
index f10e5845346..07f1976085d 100644
--- a/gcc/config/avr/avr-devices.c
+++ b/gcc/config/avr/avr-devices.c
@@ -18,7 +18,7 @@
    <http://www.gnu.org/licenses/>.  */
 
 #ifndef IN_GEN_AVR_MMCU_TEXI
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/avr/avr-log.c b/gcc/config/avr/avr-log.c
index b6d719bbe08..e3a821a4844 100644
--- a/gcc/config/avr/avr-log.c
+++ b/gcc/config/avr/avr-log.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/avr/avr-protos.h b/gcc/config/avr/avr-protos.h
index b5691e754e8..089e2099874 100644
--- a/gcc/config/avr/avr-protos.h
+++ b/gcc/config/avr/avr-protos.h
@@ -84,7 +84,6 @@ extern void avr_expand_prologue (void);
 extern void avr_expand_epilogue (bool);
 extern bool avr_emit_movmemhi (rtx*);
 extern int avr_epilogue_uses (int regno);
-extern int avr_starting_frame_offset (void);
 
 extern void avr_output_addr_vec (rtx_insn*, rtx);
 extern const char *avr_out_sbxx_branch (rtx_insn *insn, rtx operands[]);
diff --git a/gcc/config/avr/avr.c b/gcc/config/avr/avr.c
index 45c1a8781c2..cf12c78ae6a 100644
--- a/gcc/config/avr/avr.c
+++ b/gcc/config/avr/avr.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -1156,11 +1156,11 @@ avr_outgoing_args_size (void)
 }
 
 
-/* Implement `STARTING_FRAME_OFFSET'.  */
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
 /* This is the offset from the frame pointer register to the first stack slot
    that contains a variable living in the frame.  */
 
-int
+static HOST_WIDE_INT
 avr_starting_frame_offset (void)
 {
   return 1 + avr_outgoing_args_size ();
@@ -1317,8 +1317,8 @@ avr_build_builtin_va_list (void)
 
 /* Implement `TARGET_BUILTIN_SETJMP_FRAME_VALUE'.  */
 /* Actual start of frame is virtual_stack_vars_rtx this is offset from
-   frame pointer by +STARTING_FRAME_OFFSET.
-   Using saved frame = virtual_stack_vars_rtx - STARTING_FRAME_OFFSET
+   frame pointer by +TARGET_STARTING_FRAME_OFFSET.
+   Using saved frame = virtual_stack_vars_rtx - TARGET_STARTING_FRAME_OFFSET
    avoids creating add/sub of offset in nonlocal goto and setjmp.  */
 
 static rtx
@@ -1326,7 +1326,7 @@ avr_builtin_setjmp_frame_value (void)
 {
   rtx xval = gen_reg_rtx (Pmode);
   emit_insn (gen_subhi3 (xval, virtual_stack_vars_rtx,
-                         gen_int_mode (STARTING_FRAME_OFFSET, Pmode)));
+                         gen_int_mode (avr_starting_frame_offset (), Pmode)));
   return xval;
 }
 
@@ -2045,7 +2045,7 @@ avr_asm_function_end_prologue (FILE *file)
              avr_outgoing_args_size());
 
   fprintf (file, "/* frame size = " HOST_WIDE_INT_PRINT_DEC " */\n",
-           get_frame_size());
+           (HOST_WIDE_INT) get_frame_size());
 
   if (!cfun->machine->gasisr.yes)
     {
@@ -14792,6 +14792,9 @@ avr_fold_builtin (tree fndecl, int n_args ATTRIBUTE_UNUSED, tree *arg,
 #undef  TARGET_LEGITIMATE_COMBINED_INSN
 #define TARGET_LEGITIMATE_COMBINED_INSN avr_legitimate_combined_insn
 
+#undef  TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET avr_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
diff --git a/gcc/config/avr/avr.h b/gcc/config/avr/avr.h
index 6d00dbd5d6e..2272df4afcc 100644
--- a/gcc/config/avr/avr.h
+++ b/gcc/config/avr/avr.h
@@ -284,8 +284,6 @@ enum reg_class {
 
 #define STACK_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET avr_starting_frame_offset()
-
 #define STACK_POINTER_OFFSET 1
 
 #define FIRST_PARM_OFFSET(FUNDECL) 0
diff --git a/gcc/config/avr/avr.md b/gcc/config/avr/avr.md
index 436f036704a..14f9298f305 100644
--- a/gcc/config/avr/avr.md
+++ b/gcc/config/avr/avr.md
@@ -334,10 +334,9 @@
         (unspec_volatile:HI [(const_int 0)] UNSPECV_GOTO_RECEIVER))]
   ""
   {
+    rtx offset = gen_int_mode (targetm.starting_frame_offset (), Pmode);
     emit_move_insn (virtual_stack_vars_rtx,
-                    gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx,
-                                  gen_int_mode (STARTING_FRAME_OFFSET,
-                                                Pmode)));
+                    gen_rtx_PLUS (Pmode, hard_frame_pointer_rtx, offset));
     /* ; This might change the hard frame pointer in ways that aren't
        ; apparent to early optimization passes, so force a clobber.  */
     emit_clobber (hard_frame_pointer_rtx);
diff --git a/gcc/config/avr/driver-avr.c b/gcc/config/avr/driver-avr.c
index 2e2520d2462..fe17e2df151 100644
--- a/gcc/config/avr/driver-avr.c
+++ b/gcc/config/avr/driver-avr.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/avr/gen-avr-mmcu-specs.c b/gcc/config/avr/gen-avr-mmcu-specs.c
index 9e7fd5aaeaf..e09f3e46502 100644
--- a/gcc/config/avr/gen-avr-mmcu-specs.c
+++ b/gcc/config/avr/gen-avr-mmcu-specs.c
@@ -21,7 +21,7 @@
 #include <stdio.h>
 #include <string.h>
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 
diff --git a/gcc/config/bfin/bfin.c b/gcc/config/bfin/bfin.c
index d7cfe10aad6..b45a15481b2 100644
--- a/gcc/config/bfin/bfin.c
+++ b/gcc/config/bfin/bfin.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/bfin/bfin.h b/gcc/config/bfin/bfin.h
index 787600f3308..e15a05d8784 100644
--- a/gcc/config/bfin/bfin.h
+++ b/gcc/config/bfin/bfin.h
@@ -252,12 +252,6 @@ extern const char *bfin_library_id_string;
    it. */
 #define FIRST_PARM_OFFSET(DECL) 0
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Register to use for pushing function arguments.  */
 #define STACK_POINTER_REGNUM REG_P6
 
diff --git a/gcc/config/c6x/c6x.c b/gcc/config/c6x/c6x.c
index 1f767fcadf0..ab67577d0a6 100644
--- a/gcc/config/c6x/c6x.c
+++ b/gcc/config/c6x/c6x.c
@@ -19,7 +19,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/c6x/c6x.h b/gcc/config/c6x/c6x.h
index c8c407369d3..233ac13fc8c 100644
--- a/gcc/config/c6x/c6x.h
+++ b/gcc/config/c6x/c6x.h
@@ -298,7 +298,6 @@ enum reg_class
 #define STACK_POINTER_OFFSET 4
 /* Likewise for AP (which is the incoming stack pointer).  */
 #define FIRST_PARM_OFFSET(fundecl) 4
-#define STARTING_FRAME_OFFSET 0
 #define FRAME_GROWS_DOWNWARD 1
 #define STACK_GROWS_DOWNWARD 1
 
diff --git a/gcc/config/cr16/cr16-protos.h b/gcc/config/cr16/cr16-protos.h
index 4872c0b8b2e..886669092c5 100644
--- a/gcc/config/cr16/cr16-protos.h
+++ b/gcc/config/cr16/cr16-protos.h
@@ -94,5 +94,6 @@ extern const char *cr16_emit_logical_di (rtx *, enum rtx_code);
 /* Handling the "interrupt" attribute.  */
 extern int cr16_interrupt_function_p (void);
 extern bool cr16_is_data_model (enum data_model_type);
+extern poly_int64 cr16_push_rounding (poly_int64);
 
 #endif /* Not GCC_CR16_PROTOS_H.  */ 
diff --git a/gcc/config/cr16/cr16.c b/gcc/config/cr16/cr16.c
index d7573fea274..3569c30a7dc 100644
--- a/gcc/config/cr16/cr16.c
+++ b/gcc/config/cr16/cr16.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -2215,6 +2215,14 @@ cr16_emit_logical_di (rtx *operands, enum rtx_code code)
   return "";
 }
 
+/* Implement PUSH_ROUNDING.  */
+
+poly_int64
+cr16_push_rounding (poly_int64 bytes)
+{
+  return (bytes + 1) & ~1;
+}
+
 /* Initialize 'targetm' variable which contains pointers to functions 
    and data relating to the target machine.  */
 
diff --git a/gcc/config/cr16/cr16.h b/gcc/config/cr16/cr16.h
index 143a1dbfda3..1757467d165 100644
--- a/gcc/config/cr16/cr16.h
+++ b/gcc/config/cr16/cr16.h
@@ -349,8 +349,6 @@ enum reg_class
 /* Stack layout and calling conventions.  */
 #define STACK_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET   0
-
 #define STACK_POINTER_REGNUM    15
 
 #define FRAME_POINTER_REGNUM    13
@@ -383,7 +381,7 @@ enum reg_class
 
 #define PUSH_ARGS 1
 
-#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1)
+#define PUSH_ROUNDING(BYTES) cr16_push_rounding (BYTES)
 
 #ifndef CUMULATIVE_ARGS
 struct cumulative_args
diff --git a/gcc/config/cris/cris.c b/gcc/config/cris/cris.c
index 860acdcd23d..a31e74358e1 100644
--- a/gcc/config/cris/cris.c
+++ b/gcc/config/cris/cris.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/cris/cris.h b/gcc/config/cris/cris.h
index 570e2d9defe..f9149c717a7 100644
--- a/gcc/config/cris/cris.h
+++ b/gcc/config/cris/cris.h
@@ -589,10 +589,6 @@ enum reg_class
 #define STACK_GROWS_DOWNWARD 1
 #define FRAME_GROWS_DOWNWARD 1
 
-/* It seems to be indicated in the code (at least 2.1) that this is
-   better a constant, and best 0.  */
-#define STARTING_FRAME_OFFSET 0
-
 #define FIRST_PARM_OFFSET(FNDECL) 0
 
 #define RETURN_ADDR_RTX(COUNT, FRAMEADDR) \
diff --git a/gcc/config/darwin-c.c b/gcc/config/darwin-c.c
index 157c2fd597d..91f08a0dcee 100644
--- a/gcc/config/darwin-c.c
+++ b/gcc/config/darwin-c.c
@@ -433,7 +433,7 @@ add_system_framework_path (char *path)
   p->construct = framework_construct_pathname;
   using_frameworks = 1;
 
-  add_cpp_dir_path (p, SYSTEM);
+  add_cpp_dir_path (p, INC_SYSTEM);
 }
 
 /* Add PATH to the bracket includes. PATH must be malloc-ed and
@@ -451,7 +451,7 @@ add_framework_path (char *path)
   p->construct = framework_construct_pathname;
   using_frameworks = 1;
 
-  add_cpp_dir_path (p, BRACKET);
+  add_cpp_dir_path (p, INC_BRACKET);
 }
 
 static const char *framework_defaults [] =
@@ -488,7 +488,7 @@ darwin_register_objc_includes (const char *sysroot, const char *iprefix,
 	{
 	  str = concat (iprefix, fname + len, NULL);
           /* FIXME: wrap the headers for C++awareness.  */
-	  add_path (str, SYSTEM, /*c++aware=*/false, false);
+	  add_path (str, INC_SYSTEM, /*c++aware=*/false, false);
 	}
 
       /* Should this directory start with the sysroot?  */
@@ -497,7 +497,7 @@ darwin_register_objc_includes (const char *sysroot, const char *iprefix,
       else
 	str = update_path (fname, "");
 
-      add_path (str, SYSTEM, /*c++aware=*/false, false);
+      add_path (str, INC_SYSTEM, /*c++aware=*/false, false);
     }
 }
 
diff --git a/gcc/config/darwin.c b/gcc/config/darwin.c
index 84254ebd291..7d3b754fd3e 100644
--- a/gcc/config/darwin.c
+++ b/gcc/config/darwin.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/epiphany/epiphany.c b/gcc/config/epiphany/epiphany.c
index 1dc63cdee64..c99440924a4 100644
--- a/gcc/config/epiphany/epiphany.c
+++ b/gcc/config/epiphany/epiphany.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -178,6 +178,9 @@ static rtx_insn *frame_insn (rtx);
 
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT epiphany_constant_alignment
+
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET epiphany_starting_frame_offset
 
 bool
 epiphany_is_interrupt_p (tree decl)
@@ -3030,4 +3033,12 @@ epiphany_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   return align;
 }
 
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+epiphany_starting_frame_offset (void)
+{
+  return epiphany_stack_offset;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/epiphany/epiphany.h b/gcc/config/epiphany/epiphany.h
index 290ea20a583..c0767b4642a 100644
--- a/gcc/config/epiphany/epiphany.h
+++ b/gcc/config/epiphany/epiphany.h
@@ -447,12 +447,6 @@ typedef struct GTY (()) machine_function
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET epiphany_stack_offset
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  */
 #define STACK_POINTER_OFFSET epiphany_stack_offset
diff --git a/gcc/config/epiphany/mode-switch-use.c b/gcc/config/epiphany/mode-switch-use.c
index 74d806a16cb..d607ead56b0 100644
--- a/gcc/config/epiphany/mode-switch-use.c
+++ b/gcc/config/epiphany/mode-switch-use.c
@@ -19,7 +19,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/epiphany/resolve-sw-modes.c b/gcc/config/epiphany/resolve-sw-modes.c
index c409cacf693..36e4672e77b 100644
--- a/gcc/config/epiphany/resolve-sw-modes.c
+++ b/gcc/config/epiphany/resolve-sw-modes.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/fr30/fr30.c b/gcc/config/fr30/fr30.c
index 23f2a0fa5a4..920e65aaa29 100644
--- a/gcc/config/fr30/fr30.c
+++ b/gcc/config/fr30/fr30.c
@@ -20,7 +20,7 @@
 
 /*{{{  Includes */ 
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/fr30/fr30.h b/gcc/config/fr30/fr30.h
index c4485096f0f..8bcabf59299 100644
--- a/gcc/config/fr30/fr30.h
+++ b/gcc/config/fr30/fr30.h
@@ -345,15 +345,6 @@ enum reg_class
    are at negative offsets from the frame pointer.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset from the frame pointer to the first local variable slot to be
-   allocated.
-
-   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
-   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
-   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
-/* #define STARTING_FRAME_OFFSET -4 */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  If not specified, the default value of zero
    is used.  This is the proper value for most machines.
diff --git a/gcc/config/frv/frv.c b/gcc/config/frv/frv.c
index 10112e7cd3a..b644a63d48e 100644
--- a/gcc/config/frv/frv.c
+++ b/gcc/config/frv/frv.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/frv/frv.h b/gcc/config/frv/frv.h
index 44a73e08f90..7403e1ae621 100644
--- a/gcc/config/frv/frv.h
+++ b/gcc/config/frv/frv.h
@@ -966,14 +966,6 @@ typedef struct frv_stack {
    are at negative offsets from the frame pointer.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset from the frame pointer to the first local variable slot to be
-   allocated.
-
-   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by subtracting the
-   first slot's length from `STARTING_FRAME_OFFSET'.  Otherwise, it is found by
-   adding the length of the first slot to the value `STARTING_FRAME_OFFSET'.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  If not specified, the default value of zero
    is used.  This is the proper value for most machines.
diff --git a/gcc/config/ft32/ft32.c b/gcc/config/ft32/ft32.c
index e9dbdda2b3e..99e93821b3a 100644
--- a/gcc/config/ft32/ft32.c
+++ b/gcc/config/ft32/ft32.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/ft32/ft32.h b/gcc/config/ft32/ft32.h
index 32aa32095ea..d52bb9af17c 100644
--- a/gcc/config/ft32/ft32.h
+++ b/gcc/config/ft32/ft32.h
@@ -233,10 +233,6 @@ enum reg_class
    pointer to a smaller address.  */
 #define STACK_GROWS_DOWNWARD 1
 
-/* Offset from the frame pointer to the first local variable slot to
-   be allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the argument pointer register to the first argument's
    address.  On some machines it may depend on the data type of the
    function.  */
diff --git a/gcc/config/h8300/h8300-protos.h b/gcc/config/h8300/h8300-protos.h
index d6adebbbe94..1d4a574c7b9 100644
--- a/gcc/config/h8300/h8300-protos.h
+++ b/gcc/config/h8300/h8300-protos.h
@@ -112,5 +112,6 @@ extern bool            h8sx_mergeable_memrefs_p (rtx, rtx);
 extern bool            h8sx_emit_movmd (rtx, rtx, rtx, HOST_WIDE_INT);
 extern void            h8300_swap_into_er6 (rtx);
 extern void            h8300_swap_out_of_er6 (rtx);
+extern poly_int64      h8300_push_rounding (poly_int64);
 
 #endif /* ! GCC_H8300_PROTOS_H */
diff --git a/gcc/config/h8300/h8300.c b/gcc/config/h8300/h8300.c
index 0cab39b85ff..b47fee90f6f 100644
--- a/gcc/config/h8300/h8300.c
+++ b/gcc/config/h8300/h8300.c
@@ -19,7 +19,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -6044,6 +6044,21 @@ h8300_trampoline_init (rtx m_tramp, tree fndecl, rtx cxt)
       emit_move_insn (mem, tem);
     }
 }
+
+/* Implement PUSH_ROUNDING.
+
+   On the H8/300, @-sp really pushes a byte if you ask it to - but that's
+   dangerous, so we claim that it always pushes a word, then we catch
+   the mov.b rx,@-sp and turn it into a mov.w rx,@-sp on output.
+
+   On the H8/300H, we simplify TARGET_QUICKCALL by setting this to 4
+   and doing a similar thing.  */
+
+poly_int64
+h8300_push_rounding (poly_int64 bytes)
+{
+  return ((bytes + PARM_BOUNDARY / 8 - 1) & (-PARM_BOUNDARY / 8));
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ATTRIBUTE_TABLE
diff --git a/gcc/config/h8300/h8300.h b/gcc/config/h8300/h8300.h
index 8ff38605a23..a57e7b543e5 100644
--- a/gcc/config/h8300/h8300.h
+++ b/gcc/config/h8300/h8300.h
@@ -352,25 +352,7 @@ enum reg_class {
 
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-
-#define STARTING_FRAME_OFFSET 0
-
-/* If we generate an insn to push BYTES bytes,
-   this says how many the stack pointer really advances by.
-
-   On the H8/300, @-sp really pushes a byte if you ask it to - but that's
-   dangerous, so we claim that it always pushes a word, then we catch
-   the mov.b rx,@-sp and turn it into a mov.w rx,@-sp on output.
-
-   On the H8/300H, we simplify TARGET_QUICKCALL by setting this to 4
-   and doing a similar thing.  */
-
-#define PUSH_ROUNDING(BYTES) \
-  (((BYTES) + PARM_BOUNDARY / 8 - 1) & -PARM_BOUNDARY / 8)
+#define PUSH_ROUNDING(BYTES) h8300_push_rounding (BYTES)
 
 /* Offset of first parameter from the argument pointer register value.  */
 /* Is equal to the size of the saved fp + pc, even if an fp isn't
diff --git a/gcc/config/i386/djgpp.c b/gcc/config/i386/djgpp.c
index ed6f926277f..54c11537875 100644
--- a/gcc/config/i386/djgpp.c
+++ b/gcc/config/i386/djgpp.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/driver-i386.c b/gcc/config/i386/driver-i386.c
index 460a2d690df..4e7fda68281 100644
--- a/gcc/config/i386/driver-i386.c
+++ b/gcc/config/i386/driver-i386.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/driver-mingw32.c b/gcc/config/i386/driver-mingw32.c
index b70363ad26a..bc3cacd3e25 100644
--- a/gcc/config/i386/driver-mingw32.c
+++ b/gcc/config/i386/driver-mingw32.c
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 
 /* When defined, force the use (if non null) or not (otherwise) of CLI
diff --git a/gcc/config/i386/host-cygwin.c b/gcc/config/i386/host-cygwin.c
index 71c1130ef6e..abe6b52873c 100644
--- a/gcc/config/i386/host-cygwin.c
+++ b/gcc/config/i386/host-cygwin.c
@@ -17,7 +17,7 @@
  along with GCC; see the file COPYING3.  If not see
  <http://www.gnu.org/licenses/>. */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/host-i386-darwin.c b/gcc/config/i386/host-i386-darwin.c
index c78d86f07c8..d360380c35f 100644
--- a/gcc/config/i386/host-i386-darwin.c
+++ b/gcc/config/i386/host-i386-darwin.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/host-mingw32.c b/gcc/config/i386/host-mingw32.c
index f272bbef544..5d2c608e113 100644
--- a/gcc/config/i386/host-mingw32.c
+++ b/gcc/config/i386/host-mingw32.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/i386-builtin.def b/gcc/config/i386/i386-builtin.def
index 7ff1bb1a7e0..0d5d5b74675 100644
--- a/gcc/config/i386/i386-builtin.def
+++ b/gcc/config/i386/i386-builtin.def
@@ -137,7 +137,7 @@ BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_storelps, "__builtin_ia32_storelps", IX
 
 /* SSE or 3DNow!A  */
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_sfence, "__builtin_ia32_sfence", IX86_BUILTIN_SFENCE, UNKNOWN, (int) VOID_FTYPE_VOID)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_sse_movntq, "__builtin_ia32_movntq", IX86_BUILTIN_MOVNTQ, UNKNOWN, (int) VOID_FTYPE_PULONGLONG_ULONGLONG)
 
 /* SSE2 */
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_lfence, "__builtin_ia32_lfence", IX86_BUILTIN_LFENCE, UNKNOWN, (int) VOID_FTYPE_VOID)
@@ -505,10 +505,10 @@ BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sqrtv4sf2, "__builtin_ia32_sqrtps_nr", IX86
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_rsqrtv4sf2, "__builtin_ia32_rsqrtps", IX86_BUILTIN_RSQRTPS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_rsqrtv4sf2, "__builtin_ia32_rsqrtps_nr", IX86_BUILTIN_RSQRTPS_NR, UNKNOWN, (int) V4SF_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_rcpv4sf2, "__builtin_ia32_rcpps", IX86_BUILTIN_RCPPS, UNKNOWN, (int) V4SF_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, CODE_FOR_sse_cvtps2pi, "__builtin_ia32_cvtps2pi", IX86_BUILTIN_CVTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtss2si, "__builtin_ia32_cvtss2si", IX86_BUILTIN_CVTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtss2siq, "__builtin_ia32_cvtss2si64", IX86_BUILTIN_CVTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF)
-BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, CODE_FOR_sse_cvttps2pi, "__builtin_ia32_cvttps2pi", IX86_BUILTIN_CVTTPS2PI, UNKNOWN, (int) V2SI_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvttss2si, "__builtin_ia32_cvttss2si", IX86_BUILTIN_CVTTSS2SI, UNKNOWN, (int) INT_FTYPE_V4SF)
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvttss2siq, "__builtin_ia32_cvttss2si64", IX86_BUILTIN_CVTTSS2SI64, UNKNOWN, (int) INT64_FTYPE_V4SF)
 
@@ -562,7 +562,7 @@ BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_movlhps_exp,  "__builtin_ia32_movlhps",
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_highv4sf, "__builtin_ia32_unpckhps", IX86_BUILTIN_UNPCKHPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_vec_interleave_lowv4sf, "__builtin_ia32_unpcklps", IX86_BUILTIN_UNPCKLPS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V4SF)
 
-BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_MMX, CODE_FOR_sse_cvtpi2ps, "__builtin_ia32_cvtpi2ps", IX86_BUILTIN_CVTPI2PS, UNKNOWN, (int) V4SF_FTYPE_V4SF_V2SI)
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse_cvtsi2ss, "__builtin_ia32_cvtsi2ss", IX86_BUILTIN_CVTSI2SS, UNKNOWN, (int) V4SF_FTYPE_V4SF_SI)
 BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_64BIT, CODE_FOR_sse_cvtsi2ssq, "__builtin_ia32_cvtsi642ss", IX86_BUILTIN_CVTSI642SS, UNKNOWN, V4SF_FTYPE_V4SF_DI)
 
@@ -576,19 +576,19 @@ BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_abstf2, 0, IX86_BUILTIN_FABSQ, UNKNOWN, (in
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_copysigntf3, 0, IX86_BUILTIN_COPYSIGNQ, UNKNOWN, (int) FLOAT128_FTYPE_FLOAT128_FLOAT128)
 
 /* SSE MMX or 3Dnow!A */
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_uavgv8qi3, "__builtin_ia32_pavgb", IX86_BUILTIN_PAVGB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_uavgv4hi3, "__builtin_ia32_pavgw", IX86_BUILTIN_PAVGW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_umulv4hi3_highpart, "__builtin_ia32_pmulhuw", IX86_BUILTIN_PMULHUW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_umaxv8qi3, "__builtin_ia32_pmaxub", IX86_BUILTIN_PMAXUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_smaxv4hi3, "__builtin_ia32_pmaxsw", IX86_BUILTIN_PMAXSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_uminv8qi3, "__builtin_ia32_pminub", IX86_BUILTIN_PMINUB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_sminv4hi3, "__builtin_ia32_pminsw", IX86_BUILTIN_PMINSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI)
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_psadbw, "__builtin_ia32_psadbw", IX86_BUILTIN_PSADBW, UNKNOWN, (int) V1DI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pmovmskb, "__builtin_ia32_pmovmskb", IX86_BUILTIN_PMOVMSKB, UNKNOWN, (int) INT_FTYPE_V8QI)
 
-BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT)
+BDESC (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_pshufw, "__builtin_ia32_pshufw", IX86_BUILTIN_PSHUFW, UNKNOWN, (int) V4HI_FTYPE_V4HI_INT)
 
 /* SSE2 */
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_shufpd, "__builtin_ia32_shufpd", IX86_BUILTIN_SHUFPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
@@ -600,12 +600,12 @@ BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtdq2pd, "__builtin_ia32_cvtdq2pd",
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_floatv4siv4sf2, "__builtin_ia32_cvtdq2ps", IX86_BUILTIN_CVTDQ2PS, UNKNOWN, (int) V4SF_FTYPE_V4SI)
 
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2dq, "__builtin_ia32_cvtpd2dq", IX86_BUILTIN_CVTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_sse2_cvtpd2pi, "__builtin_ia32_cvtpd2pi", IX86_BUILTIN_CVTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpd2ps, "__builtin_ia32_cvtpd2ps", IX86_BUILTIN_CVTPD2PS, UNKNOWN, (int) V4SF_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2dq, "__builtin_ia32_cvttpd2dq", IX86_BUILTIN_CVTTPD2DQ, UNKNOWN, (int) V4SI_FTYPE_V2DF)
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_sse2_cvttpd2pi, "__builtin_ia32_cvttpd2pi", IX86_BUILTIN_CVTTPD2PI, UNKNOWN, (int) V2SI_FTYPE_V2DF)
 
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_sse2_cvtpi2pd, "__builtin_ia32_cvtpi2pd", IX86_BUILTIN_CVTPI2PD, UNKNOWN, (int) V2DF_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvtsd2si, "__builtin_ia32_cvtsd2si", IX86_BUILTIN_CVTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF)
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_cvttsd2si, "__builtin_ia32_cvttsd2si", IX86_BUILTIN_CVTTSD2SI, UNKNOWN, (int) INT_FTYPE_V2DF)
@@ -721,7 +721,7 @@ BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_packuswb, "__builtin_ia32_packuswb128
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_umulv8hi3_highpart, "__builtin_ia32_pmulhuw128", IX86_BUILTIN_PMULHUW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_psadbw, "__builtin_ia32_psadbw128", IX86_BUILTIN_PSADBW128, UNKNOWN, (int) V2DI_FTYPE_V16QI_V16QI)
 
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_sse2_umulv1siv1di3, "__builtin_ia32_pmuludq", IX86_BUILTIN_PMULUDQ, UNKNOWN, (int) V1DI_FTYPE_V2SI_V2SI)
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_vec_widen_umult_even_v4si, "__builtin_ia32_pmuludq128", IX86_BUILTIN_PMULUDQ128, UNKNOWN, (int) V2DI_FTYPE_V4SI_V4SI)
 
 BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_pmaddwd, "__builtin_ia32_pmaddwd128", IX86_BUILTIN_PMADDWD128, UNKNOWN, (int) V4SI_FTYPE_V8HI_V8HI)
@@ -761,8 +761,8 @@ BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_sse2_vmsqrtv2df2, "__builtin_ia32_sqrtsd",
 BDESC (OPTION_MASK_ISA_SSE, CODE_FOR_sse2_movq128, "__builtin_ia32_movq128", IX86_BUILTIN_MOVQ128, UNKNOWN, (int) V2DI_FTYPE_V2DI)
 
 /* SSE2 MMX */
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI)
-BDESC (OPTION_MASK_ISA_SSE2, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_addv1di3, "__builtin_ia32_paddq", IX86_BUILTIN_PADDQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI)
+BDESC (OPTION_MASK_ISA_SSE2 | OPTION_MASK_ISA_MMX, CODE_FOR_mmx_subv1di3, "__builtin_ia32_psubq", IX86_BUILTIN_PSUBQ, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI)
 
 /* SSE3 */
 BDESC (OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_movshdup, "__builtin_ia32_movshdup", IX86_BUILTIN_MOVSHDUP, UNKNOWN, (int) V4SF_FTYPE_V4SF)
@@ -777,40 +777,40 @@ BDESC (OPTION_MASK_ISA_SSE3, CODE_FOR_sse3_hsubv2df3, "__builtin_ia32_hsubpd", I
 
 /* SSSE3 */
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv16qi2, "__builtin_ia32_pabsb128", IX86_BUILTIN_PABSB128, UNKNOWN, (int) V16QI_FTYPE_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_absv8qi2, "__builtin_ia32_pabsb", IX86_BUILTIN_PABSB, UNKNOWN, (int) V8QI_FTYPE_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv8hi2, "__builtin_ia32_pabsw128", IX86_BUILTIN_PABSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_absv4hi2, "__builtin_ia32_pabsw", IX86_BUILTIN_PABSW, UNKNOWN, (int) V4HI_FTYPE_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv4si2, "__builtin_ia32_pabsd128", IX86_BUILTIN_PABSD128, UNKNOWN, (int) V4SI_FTYPE_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_absv2si2, "__builtin_ia32_pabsd", IX86_BUILTIN_PABSD, UNKNOWN, (int) V2SI_FTYPE_V2SI)
 
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv8hi3, "__builtin_ia32_phaddw128", IX86_BUILTIN_PHADDW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phaddwv4hi3, "__builtin_ia32_phaddw", IX86_BUILTIN_PHADDW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv4si3, "__builtin_ia32_phaddd128", IX86_BUILTIN_PHADDD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phadddv2si3, "__builtin_ia32_phaddd", IX86_BUILTIN_PHADDD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv8hi3, "__builtin_ia32_phaddsw128", IX86_BUILTIN_PHADDSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phaddswv4hi3, "__builtin_ia32_phaddsw", IX86_BUILTIN_PHADDSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv8hi3, "__builtin_ia32_phsubw128", IX86_BUILTIN_PHSUBW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phsubwv4hi3, "__builtin_ia32_phsubw", IX86_BUILTIN_PHSUBW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv4si3, "__builtin_ia32_phsubd128", IX86_BUILTIN_PHSUBD128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phsubdv2si3, "__builtin_ia32_phsubd", IX86_BUILTIN_PHSUBD, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv8hi3, "__builtin_ia32_phsubsw128", IX86_BUILTIN_PHSUBSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_phsubswv4hi3, "__builtin_ia32_phsubsw", IX86_BUILTIN_PHSUBSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw128, "__builtin_ia32_pmaddubsw128", IX86_BUILTIN_PMADDUBSW128, UNKNOWN, (int) V8HI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_pmaddubsw, "__builtin_ia32_pmaddubsw", IX86_BUILTIN_PMADDUBSW, UNKNOWN, (int) V4HI_FTYPE_V8QI_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv8hi3, "__builtin_ia32_pmulhrsw128", IX86_BUILTIN_PMULHRSW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_pmulhrswv4hi3, "__builtin_ia32_pmulhrsw", IX86_BUILTIN_PMULHRSW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv16qi3, "__builtin_ia32_pshufb128", IX86_BUILTIN_PSHUFB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_pshufbv8qi3, "__builtin_ia32_pshufb", IX86_BUILTIN_PSHUFB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv16qi3, "__builtin_ia32_psignb128", IX86_BUILTIN_PSIGNB128, UNKNOWN, (int) V16QI_FTYPE_V16QI_V16QI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_psignv8qi3, "__builtin_ia32_psignb", IX86_BUILTIN_PSIGNB, UNKNOWN, (int) V8QI_FTYPE_V8QI_V8QI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv8hi3, "__builtin_ia32_psignw128", IX86_BUILTIN_PSIGNW128, UNKNOWN, (int) V8HI_FTYPE_V8HI_V8HI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_psignv4hi3, "__builtin_ia32_psignw", IX86_BUILTIN_PSIGNW, UNKNOWN, (int) V4HI_FTYPE_V4HI_V4HI)
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv4si3, "__builtin_ia32_psignd128", IX86_BUILTIN_PSIGND128, UNKNOWN, (int) V4SI_FTYPE_V4SI_V4SI)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_psignv2si3, "__builtin_ia32_psignd", IX86_BUILTIN_PSIGND, UNKNOWN, (int) V2SI_FTYPE_V2SI_V2SI)
 
 /* SSSE3.  */
 BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrti, "__builtin_ia32_palignr128", IX86_BUILTIN_PALIGNR128, UNKNOWN, (int) V2DI_FTYPE_V2DI_V2DI_INT_CONVERT)
-BDESC (OPTION_MASK_ISA_SSSE3, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT)
+BDESC (OPTION_MASK_ISA_SSSE3 | OPTION_MASK_ISA_MMX, CODE_FOR_ssse3_palignrdi, "__builtin_ia32_palignr", IX86_BUILTIN_PALIGNR, UNKNOWN, (int) V1DI_FTYPE_V1DI_V1DI_INT_CONVERT)
 
 /* SSE4.1 */
 BDESC (OPTION_MASK_ISA_SSE4_1, CODE_FOR_sse4_1_blendpd, "__builtin_ia32_blendpd", IX86_BUILTIN_BLENDPD, UNKNOWN, (int) V2DF_FTYPE_V2DF_V2DF_INT)
diff --git a/gcc/config/i386/i386-c.c b/gcc/config/i386/i386-c.c
index 488926cab6d..9bed360c43b 100644
--- a/gcc/config/i386/i386-c.c
+++ b/gcc/config/i386/i386-c.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/i386-protos.h b/gcc/config/i386/i386-protos.h
index fbe9f271434..a5d7a6c75bb 100644
--- a/gcc/config/i386/i386-protos.h
+++ b/gcc/config/i386/i386-protos.h
@@ -27,6 +27,7 @@ extern bool ix86_handle_option (struct gcc_options *opts,
 extern bool ix86_target_stack_probe (void);
 extern bool ix86_can_use_return_insn_p (void);
 extern void ix86_setup_frame_addresses (void);
+extern bool ix86_rip_relative_addr_p (struct ix86_address *parts);
 
 extern HOST_WIDE_INT ix86_initial_elimination_offset (int, int);
 extern void ix86_expand_prologue (void);
@@ -165,9 +166,6 @@ extern void ix86_asm_output_function_label (FILE *, const char *, tree);
 extern void ix86_call_abi_override (const_tree);
 extern int ix86_reg_parm_stack_space (const_tree);
 
-extern void ix86_split_fp_branch (enum rtx_code code, rtx, rtx,
-				  rtx, rtx, rtx);
-
 extern bool ix86_libc_has_function (enum function_class fn_class);
 
 extern void x86_order_regs_for_local_alloc (void);
@@ -314,6 +312,23 @@ extern enum attr_cpu ix86_schedule;
 extern const char * ix86_output_call_insn (rtx_insn *insn, rtx call_op);
 extern bool ix86_operands_ok_for_move_multiple (rtx *operands, bool load,
 						machine_mode mode);
+extern int ix86_min_insn_size (rtx_insn *);
+
+extern int ix86_issue_rate (void);
+extern int ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn,
+			     int cost, unsigned int);
+extern int ia32_multipass_dfa_lookahead (void);
+extern bool ix86_macro_fusion_p (void);
+extern bool ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp);
+
+extern bool ix86_bd_has_dispatch (rtx_insn *insn, int action);
+extern void ix86_bd_do_dispatch (rtx_insn *insn, int mode);
+
+extern void ix86_core2i7_init_hooks (void);
+
+extern int ix86_atom_sched_reorder (FILE *, int, rtx_insn **, int *, int);
+
+extern poly_int64 ix86_push_rounding (poly_int64);
 
 #ifdef RTX_CODE
 /* Target data for multipass lookahead scheduling.
diff --git a/gcc/config/i386/i386.c b/gcc/config/i386/i386.c
index f8a07689719..619b13b3d09 100644
--- a/gcc/config/i386/i386.c
+++ b/gcc/config/i386/i386.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -94,6 +94,8 @@ along with GCC; see the file COPYING3.  If not see
 /* This file should be included last.  */
 #include "target-def.h"
 
+#include "x86-tune-costs.h"
+
 static rtx legitimize_dllimport_symbol (rtx, bool);
 static rtx legitimize_pe_coff_extern_decl (rtx, bool);
 static rtx legitimize_pe_coff_symbol (rtx, bool);
@@ -113,2094 +115,12 @@ static bool ix86_function_naked (const_tree);
    : (mode) == DImode ? 3					\
    : 4)
 
-/* Processor costs (relative to an add) */
-/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
-#define COSTS_N_BYTES(N) ((N) * 2)
-
-#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
-
-static stringop_algs ix86_size_memcpy[2] = {
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
-static stringop_algs ix86_size_memset[2] = {
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
-
-const
-struct processor_costs ix86_size_cost = {/* costs for tuning for size */
-  COSTS_N_BYTES (2),			/* cost of an add instruction */
-  COSTS_N_BYTES (3),			/* cost of a lea instruction */
-  COSTS_N_BYTES (2),			/* variable shift costs */
-  COSTS_N_BYTES (3),			/* constant shift costs */
-  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
-   COSTS_N_BYTES (3),			/*				 HI */
-   COSTS_N_BYTES (3),			/*				 SI */
-   COSTS_N_BYTES (3),			/*				 DI */
-   COSTS_N_BYTES (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
-   COSTS_N_BYTES (3),			/*			    HI */
-   COSTS_N_BYTES (3),			/*			    SI */
-   COSTS_N_BYTES (3),			/*			    DI */
-   COSTS_N_BYTES (5)},			/*			    other */
-  COSTS_N_BYTES (3),			/* cost of movsx */
-  COSTS_N_BYTES (3),			/* cost of movzx */
-  0,					/* "large" insn */
-  2,					/* MOVE_RATIO */
-  2,				     /* cost for loading QImode using movzbl */
-  {2, 2, 2},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 2, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {2, 2, 2},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {2, 2, 2},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  3,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {3, 3},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  3,					/* cost of moving SSE register */
-  {3, 3, 3},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {3, 3, 3},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  0,					/* size of l1 cache  */
-  0,					/* size of l2 cache  */
-  0,					/* size of prefetch block */
-  0,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
-  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  ix86_size_memcpy,
-  ix86_size_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  1,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  1,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* Processor costs (relative to an add) */
-static stringop_algs i386_memcpy[2] = {
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs i386_memset[2] = {
-  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-
-static const
-struct processor_costs i386_cost = {	/* 386 specific costs */
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (3),			/* variable shift costs */
-  COSTS_N_INSNS (2),			/* constant shift costs */
-  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (6),			/*				 HI */
-   COSTS_N_INSNS (6),			/*				 SI */
-   COSTS_N_INSNS (6),			/*				 DI */
-   COSTS_N_INSNS (6)},			/*			      other */
-  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (23),			/*			    HI */
-   COSTS_N_INSNS (23),			/*			    SI */
-   COSTS_N_INSNS (23),			/*			    DI */
-   COSTS_N_INSNS (23)},			/*			    other */
-  COSTS_N_INSNS (3),			/* cost of movsx */
-  COSTS_N_INSNS (2),			/* cost of movzx */
-  15,					/* "large" insn */
-  3,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {2, 4, 2},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 4, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {8, 8, 8},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {8, 8, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 8, 16},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 8, 16},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  0,					/* size of l1 cache  */
-  0,					/* size of l2 cache  */
-  0,					/* size of prefetch block */
-  0,					/* number of parallel prefetches */
-  1,					/* Branch cost */
-  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  i386_memcpy,
-  i386_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs i486_memcpy[2] = {
-  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs i486_memset[2] = {
-  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-
-static const
-struct processor_costs i486_cost = {	/* 486 specific costs */
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (3),			/* variable shift costs */
-  COSTS_N_INSNS (2),			/* constant shift costs */
-  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (12),			/*				 HI */
-   COSTS_N_INSNS (12),			/*				 SI */
-   COSTS_N_INSNS (12),			/*				 DI */
-   COSTS_N_INSNS (12)},			/*			      other */
-  1,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (40),			/*			    HI */
-   COSTS_N_INSNS (40),			/*			    SI */
-   COSTS_N_INSNS (40),			/*			    DI */
-   COSTS_N_INSNS (40)},			/*			    other */
-  COSTS_N_INSNS (3),			/* cost of movsx */
-  COSTS_N_INSNS (2),			/* cost of movzx */
-  15,					/* "large" insn */
-  3,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {2, 4, 2},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 4, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {8, 8, 8},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {8, 8, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 8, 16},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 8, 16},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  4,					/* size of l1 cache.  486 has 8kB cache
-					   shared for code and data, so 4kB is
-					   not really precise.  */
-  4,					/* size of l2 cache  */
-  0,					/* size of prefetch block */
-  0,					/* number of parallel prefetches */
-  1,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  i486_memcpy,
-  i486_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs pentium_memcpy[2] = {
-  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs pentium_memset[2] = {
-  {libcall, {{-1, rep_prefix_4_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-
-static const
-struct processor_costs pentium_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (4),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (11),			/*				 HI */
-   COSTS_N_INSNS (11),			/*				 SI */
-   COSTS_N_INSNS (11),			/*				 DI */
-   COSTS_N_INSNS (11)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (25),			/*			    HI */
-   COSTS_N_INSNS (25),			/*			    SI */
-   COSTS_N_INSNS (25),			/*			    DI */
-   COSTS_N_INSNS (25)},			/*			    other */
-  COSTS_N_INSNS (3),			/* cost of movsx */
-  COSTS_N_INSNS (2),			/* cost of movzx */
-  8,					/* "large" insn */
-  6,					/* MOVE_RATIO */
-  6,				     /* cost for loading QImode using movzbl */
-  {2, 4, 2},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 4, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {2, 2, 6},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 6},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  8,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 8, 16},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 8, 16},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  8,					/* size of l1 cache.  */
-  8,					/* size of l2 cache  */
-  0,					/* size of prefetch block */
-  0,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  pentium_memcpy,
-  pentium_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static const
-struct processor_costs lakemont_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (11),			/*				 HI */
-   COSTS_N_INSNS (11),			/*				 SI */
-   COSTS_N_INSNS (11),			/*				 DI */
-   COSTS_N_INSNS (11)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (25),			/*			    HI */
-   COSTS_N_INSNS (25),			/*			    SI */
-   COSTS_N_INSNS (25),			/*			    DI */
-   COSTS_N_INSNS (25)},			/*			    other */
-  COSTS_N_INSNS (3),			/* cost of movsx */
-  COSTS_N_INSNS (2),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  6,				     /* cost for loading QImode using movzbl */
-  {2, 4, 2},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 4, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {2, 2, 6},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 6},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  8,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 8, 16},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 8, 16},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  8,					/* size of l1 cache.  */
-  8,					/* size of l2 cache  */
-  0,					/* size of prefetch block */
-  0,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  pentium_memcpy,
-  pentium_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
-   (we ensure the alignment).  For small blocks inline loop is still a
-   noticeable win, for bigger blocks either rep movsl or rep movsb is
-   way to go.  Rep movsb has apparently more expensive startup time in CPU,
-   but after 4K the difference is down in the noise.  */
-static stringop_algs pentiumpro_memcpy[2] = {
-  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
-                       {8192, rep_prefix_4_byte, false},
-                       {-1, rep_prefix_1_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs pentiumpro_memset[2] = {
-  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
-                       {8192, rep_prefix_4_byte, false},
-                       {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static const
-struct processor_costs pentiumpro_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (4),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (4)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (17),			/*			    HI */
-   COSTS_N_INSNS (17),			/*			    SI */
-   COSTS_N_INSNS (17),			/*			    DI */
-   COSTS_N_INSNS (17)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  6,					/* MOVE_RATIO */
-  2,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 2, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {2, 2, 6},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 6},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {2, 2},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {2, 2},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {2, 2, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {2, 2, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  8,					/* size of l1 cache.  */
-  256,					/* size of l2 cache  */
-  32,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  pentiumpro_memcpy,
-  pentiumpro_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs geode_memcpy[2] = {
-  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs geode_memset[2] = {
-  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static const
-struct processor_costs geode_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (2),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (7),			/*				 SI */
-   COSTS_N_INSNS (7),			/*				 DI */
-   COSTS_N_INSNS (7)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (23),			/*			    HI */
-   COSTS_N_INSNS (39),			/*			    SI */
-   COSTS_N_INSNS (39),			/*			    DI */
-   COSTS_N_INSNS (39)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  4,					/* MOVE_RATIO */
-  1,				     /* cost for loading QImode using movzbl */
-  {1, 1, 1},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {1, 1, 1},				/* cost of storing integer registers */
-  1,					/* cost of reg,reg fld/fst */
-  {1, 1, 1},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 6, 6},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-
-  2,					/* cost of moving MMX register */
-  {2, 2},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {2, 2},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {2, 2, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {2, 2, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  64,					/* size of l1 cache.  */
-  128,					/* size of l2 cache.  */
-  32,					/* size of prefetch block */
-  1,					/* number of parallel prefetches */
-  1,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  geode_memcpy,
-  geode_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs k6_memcpy[2] = {
-  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs k6_memset[2] = {
-  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static const
-struct processor_costs k6_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (3),			/*				 DI */
-   COSTS_N_INSNS (3)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (18),			/*			    HI */
-   COSTS_N_INSNS (18),			/*			    SI */
-   COSTS_N_INSNS (18),			/*			    DI */
-   COSTS_N_INSNS (18)},			/*			    other */
-  COSTS_N_INSNS (2),			/* cost of movsx */
-  COSTS_N_INSNS (2),			/* cost of movzx */
-  8,					/* "large" insn */
-  4,					/* MOVE_RATIO */
-  3,				     /* cost for loading QImode using movzbl */
-  {4, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 3, 2},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {6, 6, 6},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 4},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {2, 2},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {2, 2},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {2, 2, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {2, 2, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  6,					/* MMX or SSE register to integer */
-  32,					/* size of l1 cache.  */
-  32,					/* size of l2 cache.  Some models
-					   have integrated l2 cache, but
-					   optimizing for k6 is not important
-					   enough to worry about that.  */
-  32,					/* size of prefetch block */
-  1,					/* number of parallel prefetches */
-  1,					/* Branch cost */
-  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  k6_memcpy,
-  k6_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* For some reason, Athlon deals better with REP prefix (relative to loops)
-   compared to K8. Alignment becomes important after 8 bytes for memcpy and
-   128 bytes for memset.  */
-static stringop_algs athlon_memcpy[2] = {
-  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs athlon_memset[2] = {
-  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-static const
-struct processor_costs athlon_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (5),			/*				 HI */
-   COSTS_N_INSNS (5),			/*				 SI */
-   COSTS_N_INSNS (5),			/*				 DI */
-   COSTS_N_INSNS (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {3, 4, 3},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {3, 4, 3},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {4, 4, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 6},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 5},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  64,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  5,					/* Branch cost */
-  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  athlon_memcpy,
-  athlon_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* K8 has optimized REP instruction for medium sized blocks, but for very
-   small blocks it is better to use loop. For large blocks, libcall can
-   do nontemporary accesses and beat inline considerably.  */
-static stringop_algs k8_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs k8_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs k8_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {3, 4, 3},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {3, 4, 3},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {4, 4, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 3, 6},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 5},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  64,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  k8_memcpy,
-  k8_memset,
-  4,					/* scalar_stmt_cost.  */
-  2,					/* scalar load_cost.  */
-  2,					/* scalar_store_cost.  */
-  5,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  3,					/* vec_unalign_load_cost.  */
-  3,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
-   very small blocks it is better to use loop. For large blocks, libcall can
-   do nontemporary accesses and beat inline considerably.  */
-static stringop_algs amdfam10_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs amdfam10_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-struct processor_costs amdfam10_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {3, 4, 3},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {3, 4, 3},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {4, 4, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 3},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 5},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-  					/* On K8:
-  					    MOVD reg64, xmmreg Double FSTORE 4
-					    MOVD reg32, xmmreg Double FSTORE 4
-					   On AMDFAM10:
-					    MOVD reg64, xmmreg Double FADD 3
-							       1/1  1/1
-					    MOVD reg32, xmmreg Double FADD 3
-							       1/1  1/1 */
-  64,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  amdfam10_memcpy,
-  amdfam10_memset,
-  4,					/* scalar_stmt_cost.  */
-  2,					/* scalar load_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/*  BDVER1 has optimized REP instruction for medium sized blocks, but for
-    very small blocks it is better to use loop. For large blocks, libcall
-    can do nontemporary accesses and beat inline considerably.  */
-static stringop_algs bdver1_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs bdver1_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-
-const struct processor_costs bdver1_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (4),			/*				 SI */
-   COSTS_N_INSNS (6),			/*				 DI */
-   COSTS_N_INSNS (6)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {5, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {5, 5, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode */
-  {4, 4, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 4},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 4},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  2,					/* MMX or SSE register to integer */
-  					/* On K8:
-					    MOVD reg64, xmmreg Double FSTORE 4
-					    MOVD reg32, xmmreg Double FSTORE 4
-					   On AMDFAM10:
-					    MOVD reg64, xmmreg Double FADD 3
-							       1/1  1/1
-					    MOVD reg32, xmmreg Double FADD 3
-							       1/1  1/1 */
-  16,					/* size of l1 cache.  */
-  2048,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  bdver1_memcpy,
-  bdver1_memset,
-  6,					/* scalar_stmt_cost.  */
-  4,					/* scalar load_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-/*  BDVER2 has optimized REP instruction for medium sized blocks, but for
-    very small blocks it is better to use loop. For large blocks, libcall
-    can do nontemporary accesses and beat inline considerably.  */
-
-static stringop_algs bdver2_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs bdver2_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-
-const struct processor_costs bdver2_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (4),			/*				 SI */
-   COSTS_N_INSNS (6),			/*				 DI */
-   COSTS_N_INSNS (6)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {5, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {5, 5, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode */
-  {4, 4, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 4},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 4},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  2,					/* MMX or SSE register to integer */
-  					/* On K8:
-					    MOVD reg64, xmmreg Double FSTORE 4
-					    MOVD reg32, xmmreg Double FSTORE 4
-					   On AMDFAM10:
-					    MOVD reg64, xmmreg Double FADD 3
-							       1/1  1/1
-					    MOVD reg32, xmmreg Double FADD 3
-							       1/1  1/1 */
-  16,					/* size of l1 cache.  */
-  2048,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  bdver2_memcpy,
-  bdver2_memset,
-  6,					/* scalar_stmt_cost.  */
-  4,					/* scalar load_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-
-  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
-      very small blocks it is better to use loop. For large blocks, libcall
-      can do nontemporary accesses and beat inline considerably.  */
-static stringop_algs bdver3_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs bdver3_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-struct processor_costs bdver3_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (4),			/*				 SI */
-   COSTS_N_INSNS (6),			/*				 DI */
-   COSTS_N_INSNS (6)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {5, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {5, 5, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode */
-  {4, 4, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 4},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 4},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  2,					/* MMX or SSE register to integer */
-  16,					/* size of l1 cache.  */
-  2048,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  bdver3_memcpy,
-  bdver3_memset,
-  6,					/* scalar_stmt_cost.  */
-  4,					/* scalar load_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-/*  BDVER4 has optimized REP instruction for medium sized blocks, but for
-    very small blocks it is better to use loop. For large blocks, libcall
-    can do nontemporary accesses and beat inline considerably.  */
-static stringop_algs bdver4_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs bdver4_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-struct processor_costs bdver4_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (4),			/*				 SI */
-   COSTS_N_INSNS (6),			/*				 DI */
-   COSTS_N_INSNS (6)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {5, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {5, 5, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode */
-  {4, 4, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 4},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 4},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  2,					/* MMX or SSE register to integer */
-  16,					/* size of l1 cache.  */
-  2048,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  bdver4_memcpy,
-  bdver4_memset,
-  6,					/* scalar_stmt_cost.  */
-  4,					/* scalar load_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-
-/*  ZNVER1 has optimized REP instruction for medium sized blocks, but for
-    very small blocks it is better to use loop.  For large blocks, libcall
-    can do nontemporary accesses and beat inline considerably.  */
-static stringop_algs znver1_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-	     {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-	     {-1, libcall, false}}}};
-static stringop_algs znver1_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-	     {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-	     {-1, libcall, false}}}};
-struct processor_costs znver1_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction.  */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction.  */
-  COSTS_N_INSNS (1),			/* variable shift costs.  */
-  COSTS_N_INSNS (1),			/* constant shift costs.  */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI.  */
-   COSTS_N_INSNS (3),			/*				 HI.  */
-   COSTS_N_INSNS (3),			/*				 SI.  */
-   COSTS_N_INSNS (4),			/*				 DI.  */
-   COSTS_N_INSNS (4)},			/*			      other.  */
-  0,					/* cost of multiply per each bit
-					    set.  */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI.  */
-   COSTS_N_INSNS (35),			/*			    HI.  */
-   COSTS_N_INSNS (51),			/*			    SI.  */
-   COSTS_N_INSNS (83),			/*			    DI.  */
-   COSTS_N_INSNS (83)},			/*			    other.  */
-  COSTS_N_INSNS (1),			/* cost of movsx.  */
-  COSTS_N_INSNS (1),			/* cost of movzx.  */
-  8,					/* "large" insn.  */
-  9,					/* MOVE_RATIO.  */
-  4,					/* cost for loading QImode using
-					   movzbl.  */
-  {5, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer
-					   registers.  */
-  2,					/* cost of reg,reg fld/fst.  */
-  {5, 5, 12},				/* cost of loading fp registers
-		   			   in SFmode, DFmode and XFmode.  */
-  {4, 4, 8},				/* cost of storing fp registers
- 		   			   in SFmode, DFmode and XFmode.  */
-  2,					/* cost of moving MMX register.  */
-  {4, 4},				/* cost of loading MMX registers
-					   in SImode and DImode.  */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode.  */
-  2,					/* cost of moving SSE register.  */
-  {4, 4, 4},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode.  */
-  {4, 4, 4},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode.  */
-  2,					/* MMX or SSE register to integer.  */
-  32,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block.  */
-  /* New AMD processors never drop prefetches; if they cannot be performed
-     immediately, they are queued.  We set number of simultaneous prefetches
-     to a large constant to reflect this (it probably is not a good idea not
-     to limit number of prefetches at all, as their execution also takes some
-     time).  */
-  100,					/* number of parallel prefetches.  */
-  3,					/* Branch cost.  */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
-  /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
-     and it can execute 2 integer additions and 2 multiplications thus
-     reassociation may make sense up to with of 6.  SPEC2k6 bencharks suggests
-     that 4 works better than 6 probably due to register pressure.
-
-     Integer vector operations are taken by FP unit and execute 3 vector
-     plus/minus operations per cycle but only one multiply.  This is adjusted
-     in ix86_reassociation_width.  */
-  4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
-  znver1_memcpy,
-  znver1_memset,
-  6,					/* scalar_stmt_cost.  */
-  4,					/* scalar load_cost.  */
-  4,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  4,					/* vec_align_load_cost.  */
-  4,					/* vec_unalign_load_cost.  */
-  4,					/* vec_store_cost.  */
-  4,					/* cond_taken_branch_cost.  */
-  2,					/* cond_not_taken_branch_cost.  */
-};
-
-  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
-     very small blocks it is better to use loop. For large blocks, libcall can
-     do nontemporary accesses and beat inline considerably.  */
-static stringop_algs btver1_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs btver1_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-const struct processor_costs btver1_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {3, 4, 3},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {3, 4, 3},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {4, 4, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 3},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 5},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-					/* On K8:
-					   MOVD reg64, xmmreg Double FSTORE 4
-					   MOVD reg32, xmmreg Double FSTORE 4
-					   On AMDFAM10:
-					   MOVD reg64, xmmreg Double FADD 3
-							       1/1  1/1
-					    MOVD reg32, xmmreg Double FADD 3
-							       1/1  1/1 */
-  32,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  btver1_memcpy,
-  btver1_memset,
-  4,					/* scalar_stmt_cost.  */
-  2,					/* scalar load_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs btver2_memcpy[2] = {
-  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
-             {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs btver2_memset[2] = {
-  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-const struct processor_costs btver2_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (2),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (5)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (35),			/*			    HI */
-   COSTS_N_INSNS (51),			/*			    SI */
-   COSTS_N_INSNS (83),			/*			    DI */
-   COSTS_N_INSNS (83)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  9,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {3, 4, 3},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {3, 4, 3},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {4, 4, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {3, 3},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {4, 4},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {4, 4, 3},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {4, 4, 5},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  3,					/* MMX or SSE register to integer */
-					/* On K8:
-					   MOVD reg64, xmmreg Double FSTORE 4
-					   MOVD reg32, xmmreg Double FSTORE 4
-					   On AMDFAM10:
-					   MOVD reg64, xmmreg Double FADD 3
-							       1/1  1/1
-					    MOVD reg32, xmmreg Double FADD 3
-							       1/1  1/1 */
-  32,					/* size of l1 cache.  */
-  2048,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  100,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  btver2_memcpy,
-  btver2_memset,
-  4,					/* scalar_stmt_cost.  */
-  2,					/* scalar load_cost.  */
-  2,					/* scalar_store_cost.  */
-  6,					/* vec_stmt_cost.  */
-  0,					/* vec_to_scalar_cost.  */
-  2,					/* scalar_to_vec_cost.  */
-  2,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  2,					/* vec_store_cost.  */
-  2,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs pentium4_memcpy[2] = {
-  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
-  DUMMY_STRINGOP_ALGS};
-static stringop_algs pentium4_memset[2] = {
-  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
-             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  DUMMY_STRINGOP_ALGS};
-
-static const
-struct processor_costs pentium4_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (3),			/* cost of a lea instruction */
-  COSTS_N_INSNS (4),			/* variable shift costs */
-  COSTS_N_INSNS (4),			/* constant shift costs */
-  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (15),			/*				 HI */
-   COSTS_N_INSNS (15),			/*				 SI */
-   COSTS_N_INSNS (15),			/*				 DI */
-   COSTS_N_INSNS (15)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (56),			/*			    HI */
-   COSTS_N_INSNS (56),			/*			    SI */
-   COSTS_N_INSNS (56),			/*			    DI */
-   COSTS_N_INSNS (56)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  16,					/* "large" insn */
-  6,					/* MOVE_RATIO */
-  2,				     /* cost for loading QImode using movzbl */
-  {4, 5, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {2, 3, 2},				/* cost of storing integer registers */
-  2,					/* cost of reg,reg fld/fst */
-  {2, 2, 6},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 6},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {2, 2},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {2, 2},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  12,					/* cost of moving SSE register */
-  {12, 12, 12},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {2, 2, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  10,					/* MMX or SSE register to integer */
-  8,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  2,					/* Branch cost */
-  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  pentium4_memcpy,
-  pentium4_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs nocona_memcpy[2] = {
-  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
-             {100000, unrolled_loop, false}, {-1, libcall, false}}}};
-
-static stringop_algs nocona_memset[2] = {
-  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
-             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {64, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-
-static const
-struct processor_costs nocona_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1),			/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (10),			/*				 HI */
-   COSTS_N_INSNS (10),			/*				 SI */
-   COSTS_N_INSNS (10),			/*				 DI */
-   COSTS_N_INSNS (10)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (66),			/*			    HI */
-   COSTS_N_INSNS (66),			/*			    SI */
-   COSTS_N_INSNS (66),			/*			    DI */
-   COSTS_N_INSNS (66)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  16,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  3,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {4, 4, 4},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  6,					/* cost of moving MMX register */
-  {12, 12},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {12, 12},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  6,					/* cost of moving SSE register */
-  {12, 12, 12},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {12, 12, 12},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  8,					/* MMX or SSE register to integer */
-  8,					/* size of l1 cache.  */
-  1024,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  8,					/* number of parallel prefetches */
-  1,					/* Branch cost */
-  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
-  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  nocona_memcpy,
-  nocona_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs atom_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs atom_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs atom_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,					/* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {8, 8, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {8, 8, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  32,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
-  atom_memcpy,
-  atom_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs slm_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs slm_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs slm_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,					/* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {8, 8, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {8, 8, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  32,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  slm_memcpy,
-  slm_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  4,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-static stringop_algs intel_memcpy[2] = {
-  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
-  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static stringop_algs intel_memset[2] = {
-  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
-             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
-  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
-             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
-static const
-struct processor_costs intel_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (3),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,					/* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {8, 8, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {8, 8, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  32,					/* size of l1 cache.  */
-  256,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  intel_memcpy,
-  intel_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  4,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* Generic should produce code tuned for Core-i7 (and newer chips)
-   and btver1 (and newer chips).  */
-
-static stringop_algs generic_memcpy[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static stringop_algs generic_memset[2] = {
-  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
-             {-1, libcall, false}}},
-  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
-             {-1, libcall, false}}}};
-static const
-struct processor_costs generic_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  /* On all chips taken into consideration lea is 2 cycles and more.  With
-     this cost however our current implementation of synth_mult results in
-     use of unnecessary temporary registers causing regression on several
-     SPECfp benchmarks.  */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {8, 8, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {8, 8, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  32,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
-     value is increased to perhaps more appropriate value of 5.  */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
-  generic_memcpy,
-  generic_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
-/* core_cost should produce code tuned for Core familly of CPUs.  */
-static stringop_algs core_memcpy[2] = {
-  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
-  {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
-             {-1, libcall, false}}}};
-static stringop_algs core_memset[2] = {
-  {libcall, {{6, loop_1_byte, true},
-             {24, loop, true},
-             {8192, rep_prefix_4_byte, true},
-             {-1, libcall, false}}},
-  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
-             {-1, libcall, false}}}};
-
-static const
-struct processor_costs core_cost = {
-  COSTS_N_INSNS (1),			/* cost of an add instruction */
-  /* On all chips taken into consideration lea is 2 cycles and more.  With
-     this cost however our current implementation of synth_mult results in
-     use of unnecessary temporary registers causing regression on several
-     SPECfp benchmarks.  */
-  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
-  COSTS_N_INSNS (1),			/* variable shift costs */
-  COSTS_N_INSNS (1),			/* constant shift costs */
-  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
-   COSTS_N_INSNS (4),			/*				 HI */
-   COSTS_N_INSNS (3),			/*				 SI */
-   COSTS_N_INSNS (4),			/*				 DI */
-   COSTS_N_INSNS (2)},			/*			      other */
-  0,					/* cost of multiply per each bit set */
-  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
-   COSTS_N_INSNS (26),			/*			    HI */
-   COSTS_N_INSNS (42),			/*			    SI */
-   COSTS_N_INSNS (74),			/*			    DI */
-   COSTS_N_INSNS (74)},			/*			    other */
-  COSTS_N_INSNS (1),			/* cost of movsx */
-  COSTS_N_INSNS (1),			/* cost of movzx */
-  8,					/* "large" insn */
-  17,					/* MOVE_RATIO */
-  4,				     /* cost for loading QImode using movzbl */
-  {4, 4, 4},				/* cost of loading integer registers
-					   in QImode, HImode and SImode.
-					   Relative to reg-reg move (2).  */
-  {4, 4, 4},				/* cost of storing integer registers */
-  4,					/* cost of reg,reg fld/fst */
-  {12, 12, 12},				/* cost of loading fp registers
-					   in SFmode, DFmode and XFmode */
-  {6, 6, 8},				/* cost of storing fp registers
-					   in SFmode, DFmode and XFmode */
-  2,					/* cost of moving MMX register */
-  {8, 8},				/* cost of loading MMX registers
-					   in SImode and DImode */
-  {8, 8},				/* cost of storing MMX registers
-					   in SImode and DImode */
-  2,					/* cost of moving SSE register */
-  {8, 8, 8},				/* cost of loading SSE registers
-					   in SImode, DImode and TImode */
-  {8, 8, 8},				/* cost of storing SSE registers
-					   in SImode, DImode and TImode */
-  5,					/* MMX or SSE register to integer */
-  64,					/* size of l1 cache.  */
-  512,					/* size of l2 cache.  */
-  64,					/* size of prefetch block */
-  6,					/* number of parallel prefetches */
-  /* FIXME perhaps more appropriate value is 5.  */
-  3,					/* Branch cost */
-  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
-  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
-  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
-  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
-  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
-  1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
-  core_memcpy,
-  core_memset,
-  1,					/* scalar_stmt_cost.  */
-  1,					/* scalar load_cost.  */
-  1,					/* scalar_store_cost.  */
-  1,					/* vec_stmt_cost.  */
-  1,					/* vec_to_scalar_cost.  */
-  1,					/* scalar_to_vec_cost.  */
-  1,					/* vec_align_load_cost.  */
-  2,					/* vec_unalign_load_cost.  */
-  1,					/* vec_store_cost.  */
-  3,					/* cond_taken_branch_cost.  */
-  1,					/* cond_not_taken_branch_cost.  */
-};
-
 
 /* Set by -mtune.  */
-const struct processor_costs *ix86_tune_cost = &pentium_cost;
+const struct processor_costs *ix86_tune_cost = NULL;
 
 /* Set by -mtune or -Os.  */
-const struct processor_costs *ix86_cost = &pentium_cost;
+const struct processor_costs *ix86_cost = NULL;
 
 /* Processor feature/optimization bitmasks.  */
 #define m_386 (1U<<PROCESSOR_I386)
@@ -6874,6 +4794,30 @@ ix86_conditional_register_usage (void)
       fixed_regs[i] = call_used_regs[i] = 1, reg_names[i] = "";
 }
 
+/* Canonicalize a comparison from one we don't have to one we do have.  */
+
+static void
+ix86_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
+			      bool op0_preserve_value)
+{
+  /* The order of operands in x87 ficom compare is forced by combine in
+     simplify_comparison () function. Float operator is treated as RTX_OBJ
+     with a precedence over other operators and is always put in the first
+     place. Swap condition and operands to match ficom instruction.  */
+  if (!op0_preserve_value
+      && GET_CODE (*op0) == FLOAT && MEM_P (XEXP (*op0, 0)) && REG_P (*op1))
+    {
+      enum rtx_code scode = swap_condition ((enum rtx_code) *code);
+
+      /* We are called only for compares that are split to SAHF instruction.
+	 Ensure that we have setcc/jcc insn for the swapped condition.  */
+      if (ix86_fp_compare_code_to_integer (scode) != UNKNOWN)
+	{
+	  std::swap (*op0, *op1);
+	  *code = (int) scode;
+	}
+    }
+}
 
 /* Save the current options */
 
@@ -24341,31 +22285,6 @@ ix86_expand_branch (enum rtx_code code, rtx op0, rtx op1, rtx label)
     }
 }
 
-/* Split branch based on floating point condition.  */
-void
-ix86_split_fp_branch (enum rtx_code code, rtx op1, rtx op2,
-		      rtx target1, rtx target2, rtx tmp)
-{
-  rtx condition;
-  rtx_insn *i;
-
-  if (target2 != pc_rtx)
-    {
-      std::swap (target1, target2);
-      code = reverse_condition_maybe_unordered (code);
-    }
-
-  condition = ix86_expand_fp_compare (code, op1, op2,
-				      tmp);
-
-  i = emit_jump_insn (gen_rtx_SET
-		      (pc_rtx,
-		       gen_rtx_IF_THEN_ELSE (VOIDmode,
-					     condition, target1, target2)));
-  if (split_branch_probability.initialized_p ())
-    add_reg_br_prob_note (i, split_branch_probability);
-}
-
 void
 ix86_expand_setcc (rtx dest, enum rtx_code code, rtx op0, rtx op1)
 {
@@ -30047,8 +27966,8 @@ ix86_get_modrm_for_rop (rtx_insn *insn, rtx *operands, int noperands,
 
 /* Check whether x86 address PARTS is a pc-relative address.  */
 
-static bool
-rip_relative_addr_p (struct ix86_address *parts)
+bool
+ix86_rip_relative_addr_p (struct ix86_address *parts)
 {
   rtx base, index, disp;
 
@@ -30152,7 +28071,7 @@ memory_address_length (rtx addr, bool lea)
   else if (disp && !base && !index)
     {
       len += 4;
-      if (!rip_relative_addr_p (&parts))
+      if (!ix86_rip_relative_addr_p (&parts))
 	len++;
     }
   else
@@ -30334,773 +28253,6 @@ ix86_attr_length_vex_default (rtx_insn *insn, bool has_0f_opcode,
   return 2 + 1;
 }
 
-/* Return the maximum number of instructions a cpu can issue.  */
-
-static int
-ix86_issue_rate (void)
-{
-  switch (ix86_tune)
-    {
-    case PROCESSOR_PENTIUM:
-    case PROCESSOR_LAKEMONT:
-    case PROCESSOR_BONNELL:
-    case PROCESSOR_SILVERMONT:
-    case PROCESSOR_KNL:
-    case PROCESSOR_KNM:
-    case PROCESSOR_INTEL:
-    case PROCESSOR_K6:
-    case PROCESSOR_BTVER2:
-    case PROCESSOR_PENTIUM4:
-    case PROCESSOR_NOCONA:
-      return 2;
-
-    case PROCESSOR_PENTIUMPRO:
-    case PROCESSOR_ATHLON:
-    case PROCESSOR_K8:
-    case PROCESSOR_AMDFAM10:
-    case PROCESSOR_GENERIC:
-    case PROCESSOR_BTVER1:
-      return 3;
-
-    case PROCESSOR_BDVER1:
-    case PROCESSOR_BDVER2:
-    case PROCESSOR_BDVER3:
-    case PROCESSOR_BDVER4:
-    case PROCESSOR_ZNVER1:
-    case PROCESSOR_CORE2:
-    case PROCESSOR_NEHALEM:
-    case PROCESSOR_SANDYBRIDGE:
-    case PROCESSOR_HASWELL:
-      return 4;
-
-    default:
-      return 1;
-    }
-}
-
-/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
-   by DEP_INSN and nothing set by DEP_INSN.  */
-
-static bool
-ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
-{
-  rtx set, set2;
-
-  /* Simplify the test for uninteresting insns.  */
-  if (insn_type != TYPE_SETCC
-      && insn_type != TYPE_ICMOV
-      && insn_type != TYPE_FCMOV
-      && insn_type != TYPE_IBR)
-    return false;
-
-  if ((set = single_set (dep_insn)) != 0)
-    {
-      set = SET_DEST (set);
-      set2 = NULL_RTX;
-    }
-  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
-	   && XVECLEN (PATTERN (dep_insn), 0) == 2
-	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
-	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
-    {
-      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
-      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
-    }
-  else
-    return false;
-
-  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
-    return false;
-
-  /* This test is true if the dependent insn reads the flags but
-     not any other potentially set register.  */
-  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
-    return false;
-
-  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
-    return false;
-
-  return true;
-}
-
-/* Return true iff USE_INSN has a memory address with operands set by
-   SET_INSN.  */
-
-bool
-ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
-{
-  int i;
-  extract_insn_cached (use_insn);
-  for (i = recog_data.n_operands - 1; i >= 0; --i)
-    if (MEM_P (recog_data.operand[i]))
-      {
-	rtx addr = XEXP (recog_data.operand[i], 0);
-	if (modified_in_p (addr, set_insn) != 0)
-	  {
-	    /* No AGI stall if SET_INSN is a push or pop and USE_INSN
-	       has SP based memory (unless index reg is modified in a pop).  */
-	    rtx set = single_set (set_insn);
-	    if (set
-		&& (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))
-		    || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))))
-	      {
-		struct ix86_address parts;
-		if (ix86_decompose_address (addr, &parts)
-		    && parts.base == stack_pointer_rtx
-		    && (parts.index == NULL_RTX
-			|| MEM_P (SET_DEST (set))
-			|| !modified_in_p (parts.index, set_insn)))
-		  return false;
-	      }
-	    return true;
-	  }
-	return false;
-      }
-  return false;
-}
-
-/* Helper function for exact_store_load_dependency.
-   Return true if addr is found in insn.  */
-static bool
-exact_dependency_1 (rtx addr, rtx insn)
-{
-  enum rtx_code code;
-  const char *format_ptr;
-  int i, j;
-
-  code = GET_CODE (insn);
-  switch (code)
-    {
-    case MEM:
-      if (rtx_equal_p (addr, insn))
-	return true;
-      break;
-    case REG:
-    CASE_CONST_ANY:
-    case SYMBOL_REF:
-    case CODE_LABEL:
-    case PC:
-    case CC0:
-    case EXPR_LIST:
-      return false;
-    default:
-      break;
-    }
-
-  format_ptr = GET_RTX_FORMAT (code);
-  for (i = 0; i < GET_RTX_LENGTH (code); i++)
-    {
-      switch (*format_ptr++)
-	{
-	case 'e':
-	  if (exact_dependency_1 (addr, XEXP (insn, i)))
-	    return true;
-	  break;
-	case 'E':
-	  for (j = 0; j < XVECLEN (insn, i); j++)
-	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
-	      return true;
-	  break;
-	}
-    }
-  return false;
-}
-
-/* Return true if there exists exact dependency for store & load, i.e.
-   the same memory address is used in them.  */
-static bool
-exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
-{
-  rtx set1, set2;
-
-  set1 = single_set (store);
-  if (!set1)
-    return false;
-  if (!MEM_P (SET_DEST (set1)))
-    return false;
-  set2 = single_set (load);
-  if (!set2)
-    return false;
-  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
-    return true;
-  return false;
-}
-
-static int
-ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
-		  unsigned int)
-{
-  enum attr_type insn_type, dep_insn_type;
-  enum attr_memory memory;
-  rtx set, set2;
-  int dep_insn_code_number;
-
-  /* Anti and output dependencies have zero cost on all CPUs.  */
-  if (dep_type != 0)
-    return 0;
-
-  dep_insn_code_number = recog_memoized (dep_insn);
-
-  /* If we can't recognize the insns, we can't really do anything.  */
-  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
-    return cost;
-
-  insn_type = get_attr_type (insn);
-  dep_insn_type = get_attr_type (dep_insn);
-
-  switch (ix86_tune)
-    {
-    case PROCESSOR_PENTIUM:
-    case PROCESSOR_LAKEMONT:
-      /* Address Generation Interlock adds a cycle of latency.  */
-      if (insn_type == TYPE_LEA)
-	{
-	  rtx addr = PATTERN (insn);
-
-	  if (GET_CODE (addr) == PARALLEL)
-	    addr = XVECEXP (addr, 0, 0);
-
-	  gcc_assert (GET_CODE (addr) == SET);
-
-	  addr = SET_SRC (addr);
-	  if (modified_in_p (addr, dep_insn))
-	    cost += 1;
-	}
-      else if (ix86_agi_dependent (dep_insn, insn))
-	cost += 1;
-
-      /* ??? Compares pair with jump/setcc.  */
-      if (ix86_flags_dependent (insn, dep_insn, insn_type))
-	cost = 0;
-
-      /* Floating point stores require value to be ready one cycle earlier.  */
-      if (insn_type == TYPE_FMOV
-	  && get_attr_memory (insn) == MEMORY_STORE
-	  && !ix86_agi_dependent (dep_insn, insn))
-	cost += 1;
-      break;
-
-    case PROCESSOR_PENTIUMPRO:
-      /* INT->FP conversion is expensive.  */
-      if (get_attr_fp_int_src (dep_insn))
-	cost += 5;
-
-      /* There is one cycle extra latency between an FP op and a store.  */
-      if (insn_type == TYPE_FMOV
-	  && (set = single_set (dep_insn)) != NULL_RTX
-	  && (set2 = single_set (insn)) != NULL_RTX
-	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
-	  && MEM_P (SET_DEST (set2)))
-	cost += 1;
-
-      memory = get_attr_memory (insn);
-
-      /* Show ability of reorder buffer to hide latency of load by executing
-	 in parallel with previous instruction in case
-	 previous instruction is not needed to compute the address.  */
-      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (dep_insn, insn))
-	{
-	  /* Claim moves to take one cycle, as core can issue one load
-	     at time and the next load can start cycle later.  */
-	  if (dep_insn_type == TYPE_IMOV
-	      || dep_insn_type == TYPE_FMOV)
-	    cost = 1;
-	  else if (cost > 1)
-	    cost--;
-	}
-      break;
-
-    case PROCESSOR_K6:
-     /* The esp dependency is resolved before
-	the instruction is really finished.  */
-      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
-	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
-	return 1;
-
-      /* INT->FP conversion is expensive.  */
-      if (get_attr_fp_int_src (dep_insn))
-	cost += 5;
-
-      memory = get_attr_memory (insn);
-
-      /* Show ability of reorder buffer to hide latency of load by executing
-	 in parallel with previous instruction in case
-	 previous instruction is not needed to compute the address.  */
-      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (dep_insn, insn))
-	{
-	  /* Claim moves to take one cycle, as core can issue one load
-	     at time and the next load can start cycle later.  */
-	  if (dep_insn_type == TYPE_IMOV
-	      || dep_insn_type == TYPE_FMOV)
-	    cost = 1;
-	  else if (cost > 2)
-	    cost -= 2;
-	  else
-	    cost = 1;
-	}
-      break;
-
-    case PROCESSOR_AMDFAM10:
-    case PROCESSOR_BDVER1:
-    case PROCESSOR_BDVER2:
-    case PROCESSOR_BDVER3:
-    case PROCESSOR_BDVER4:
-    case PROCESSOR_ZNVER1:
-    case PROCESSOR_BTVER1:
-    case PROCESSOR_BTVER2:
-    case PROCESSOR_GENERIC:
-      /* Stack engine allows to execute push&pop instructions in parall.  */
-      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
-	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
-	return 0;
-      /* FALLTHRU */
-
-    case PROCESSOR_ATHLON:
-    case PROCESSOR_K8:
-      memory = get_attr_memory (insn);
-
-      /* Show ability of reorder buffer to hide latency of load by executing
-	 in parallel with previous instruction in case
-	 previous instruction is not needed to compute the address.  */
-      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (dep_insn, insn))
-	{
-	  enum attr_unit unit = get_attr_unit (insn);
-	  int loadcost = 3;
-
-	  /* Because of the difference between the length of integer and
-	     floating unit pipeline preparation stages, the memory operands
-	     for floating point are cheaper.
-
-	     ??? For Athlon it the difference is most probably 2.  */
-	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
-	    loadcost = 3;
-	  else
-	    loadcost = TARGET_ATHLON ? 2 : 0;
-
-	  if (cost >= loadcost)
-	    cost -= loadcost;
-	  else
-	    cost = 0;
-	}
-      break;
-
-    case PROCESSOR_CORE2:
-    case PROCESSOR_NEHALEM:
-    case PROCESSOR_SANDYBRIDGE:
-    case PROCESSOR_HASWELL:
-      /* Stack engine allows to execute push&pop instructions in parall.  */
-      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
-	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
-	return 0;
-
-      memory = get_attr_memory (insn);
-
-      /* Show ability of reorder buffer to hide latency of load by executing
-	 in parallel with previous instruction in case
-	 previous instruction is not needed to compute the address.  */
-      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	  && !ix86_agi_dependent (dep_insn, insn))
-	{
-	  if (cost >= 4)
-	    cost -= 4;
-	  else
-	    cost = 0;
-	}
-      break;
-
-    case PROCESSOR_SILVERMONT:
-    case PROCESSOR_KNL:
-    case PROCESSOR_KNM:
-    case PROCESSOR_INTEL:
-      if (!reload_completed)
-	return cost;
-
-      /* Increase cost of integer loads.  */
-      memory = get_attr_memory (dep_insn);
-      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
-	{
-	  enum attr_unit unit = get_attr_unit (dep_insn);
-	  if (unit == UNIT_INTEGER && cost == 1)
-	    {
-	      if (memory == MEMORY_LOAD)
-		cost = 3;
-	      else
-		{
-		  /* Increase cost of ld/st for short int types only
-		     because of store forwarding issue.  */
-		  rtx set = single_set (dep_insn);
-		  if (set && (GET_MODE (SET_DEST (set)) == QImode
-			      || GET_MODE (SET_DEST (set)) == HImode))
-		    {
-		      /* Increase cost of store/load insn if exact
-			 dependence exists and it is load insn.  */
-		      enum attr_memory insn_memory = get_attr_memory (insn);
-		      if (insn_memory == MEMORY_LOAD
-			  && exact_store_load_dependency (dep_insn, insn))
-			cost = 3;
-		    }
-		}
-	    }
-	}
-
-    default:
-      break;
-    }
-
-  return cost;
-}
-
-/* How many alternative schedules to try.  This should be as wide as the
-   scheduling freedom in the DFA, but no wider.  Making this value too
-   large results extra work for the scheduler.  */
-
-static int
-ia32_multipass_dfa_lookahead (void)
-{
-  /* Generally, we want haifa-sched:max_issue() to look ahead as far
-     as many instructions can be executed on a cycle, i.e.,
-     issue_rate.  */
-  if (reload_completed)
-    return ix86_issue_rate ();
-  /* Don't use lookahead for pre-reload schedule to save compile time.  */
-  return 0;
-}
-
-/* Return true if target platform supports macro-fusion.  */
-
-static bool
-ix86_macro_fusion_p ()
-{
-  return TARGET_FUSE_CMP_AND_BRANCH;
-}
-
-/* Check whether current microarchitecture support macro fusion
-   for insn pair "CONDGEN + CONDJMP". Refer to
-   "Intel Architectures Optimization Reference Manual". */
-
-static bool
-ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
-{
-  rtx src, dest;
-  enum rtx_code ccode;
-  rtx compare_set = NULL_RTX, test_if, cond;
-  rtx alu_set = NULL_RTX, addr = NULL_RTX;
-
-  if (!any_condjump_p (condjmp))
-    return false;
-
-  unsigned int condreg1, condreg2;
-  rtx cc_reg_1;
-  ix86_fixed_condition_code_regs (&condreg1, &condreg2);
-  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
-  if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
-      || !condgen
-      || !modified_in_p (cc_reg_1, condgen))
-    return false;
-
-  if (get_attr_type (condgen) != TYPE_TEST
-      && get_attr_type (condgen) != TYPE_ICMP
-      && get_attr_type (condgen) != TYPE_INCDEC
-      && get_attr_type (condgen) != TYPE_ALU)
-    return false;
-
-  compare_set = single_set (condgen);
-  if (compare_set == NULL_RTX
-      && !TARGET_FUSE_ALU_AND_BRANCH)
-    return false;
-
-  if (compare_set == NULL_RTX)
-    {
-      int i;
-      rtx pat = PATTERN (condgen);
-      for (i = 0; i < XVECLEN (pat, 0); i++)
-	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
-	  {
-	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
-	    if (GET_CODE (set_src) == COMPARE)
-	      compare_set = XVECEXP (pat, 0, i);
-	    else
-	      alu_set = XVECEXP (pat, 0, i);
-	  }
-    }
-  if (compare_set == NULL_RTX)
-    return false;
-  src = SET_SRC (compare_set);
-  if (GET_CODE (src) != COMPARE)
-    return false;
-
-  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
-     supported.  */
-  if ((MEM_P (XEXP (src, 0))
-       && CONST_INT_P (XEXP (src, 1)))
-      || (MEM_P (XEXP (src, 1))
-	  && CONST_INT_P (XEXP (src, 0))))
-    return false;
-
-  /* No fusion for RIP-relative address.  */
-  if (MEM_P (XEXP (src, 0)))
-    addr = XEXP (XEXP (src, 0), 0);
-  else if (MEM_P (XEXP (src, 1)))
-    addr = XEXP (XEXP (src, 1), 0);
-
-  if (addr) {
-    ix86_address parts;
-    int ok = ix86_decompose_address (addr, &parts);
-    gcc_assert (ok);
-
-    if (rip_relative_addr_p (&parts))
-      return false;
-  }
-
-  test_if = SET_SRC (pc_set (condjmp));
-  cond = XEXP (test_if, 0);
-  ccode = GET_CODE (cond);
-  /* Check whether conditional jump use Sign or Overflow Flags.  */
-  if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
-      && (ccode == GE
-          || ccode == GT
-	  || ccode == LE
-	  || ccode == LT))
-    return false;
-
-  /* Return true for TYPE_TEST and TYPE_ICMP.  */
-  if (get_attr_type (condgen) == TYPE_TEST
-      || get_attr_type (condgen) == TYPE_ICMP)
-    return true;
-
-  /* The following is the case that macro-fusion for alu + jmp.  */
-  if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
-    return false;
-
-  /* No fusion for alu op with memory destination operand.  */
-  dest = SET_DEST (alu_set);
-  if (MEM_P (dest))
-    return false;
-
-  /* Macro-fusion for inc/dec + unsigned conditional jump is not
-     supported.  */
-  if (get_attr_type (condgen) == TYPE_INCDEC
-      && (ccode == GEU
-	  || ccode == GTU
-	  || ccode == LEU
-	  || ccode == LTU))
-    return false;
-
-  return true;
-}
-
-/* Try to reorder ready list to take advantage of Atom pipelined IMUL
-   execution. It is applied if
-   (1) IMUL instruction is on the top of list;
-   (2) There exists the only producer of independent IMUL instruction in
-       ready list.
-   Return index of IMUL producer if it was found and -1 otherwise.  */
-static int
-do_reorder_for_imul (rtx_insn **ready, int n_ready)
-{
-  rtx_insn *insn;
-  rtx set, insn1, insn2;
-  sd_iterator_def sd_it;
-  dep_t dep;
-  int index = -1;
-  int i;
-
-  if (!TARGET_BONNELL)
-    return index;
-
-  /* Check that IMUL instruction is on the top of ready list.  */
-  insn = ready[n_ready - 1];
-  set = single_set (insn);
-  if (!set)
-    return index;
-  if (!(GET_CODE (SET_SRC (set)) == MULT
-      && GET_MODE (SET_SRC (set)) == SImode))
-    return index;
-
-  /* Search for producer of independent IMUL instruction.  */
-  for (i = n_ready - 2; i >= 0; i--)
-    {
-      insn = ready[i];
-      if (!NONDEBUG_INSN_P (insn))
-	continue;
-      /* Skip IMUL instruction.  */
-      insn2 = PATTERN (insn);
-      if (GET_CODE (insn2) == PARALLEL)
-	insn2 = XVECEXP (insn2, 0, 0);
-      if (GET_CODE (insn2) == SET
-	  && GET_CODE (SET_SRC (insn2)) == MULT
-	  && GET_MODE (SET_SRC (insn2)) == SImode)
-	continue;
-
-      FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
-	{
-	  rtx con;
-	  con = DEP_CON (dep);
-	  if (!NONDEBUG_INSN_P (con))
-	    continue;
-	  insn1 = PATTERN (con);
-	  if (GET_CODE (insn1) == PARALLEL)
-	    insn1 = XVECEXP (insn1, 0, 0);
-
-	  if (GET_CODE (insn1) == SET
-	      && GET_CODE (SET_SRC (insn1)) == MULT
-	      && GET_MODE (SET_SRC (insn1)) == SImode)
-	    {
-	      sd_iterator_def sd_it1;
-	      dep_t dep1;
-	      /* Check if there is no other dependee for IMUL.  */
-	      index = i;
-	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
-		{
-		  rtx pro;
-		  pro = DEP_PRO (dep1);
-		  if (!NONDEBUG_INSN_P (pro))
-		    continue;
-		  if (pro != insn)
-		    index = -1;
-		}
-	      if (index >= 0)
-		break;
-	    }
-	}
-      if (index >= 0)
-	break;
-    }
-  return index;
-}
-
-/* Try to find the best candidate on the top of ready list if two insns
-   have the same priority - candidate is best if its dependees were
-   scheduled earlier. Applied for Silvermont only.
-   Return true if top 2 insns must be interchanged.  */
-static bool
-swap_top_of_ready_list (rtx_insn **ready, int n_ready)
-{
-  rtx_insn *top = ready[n_ready - 1];
-  rtx_insn *next = ready[n_ready - 2];
-  rtx set;
-  sd_iterator_def sd_it;
-  dep_t dep;
-  int clock1 = -1;
-  int clock2 = -1;
-  #define INSN_TICK(INSN) (HID (INSN)->tick)
-
-  if (!TARGET_SILVERMONT && !TARGET_INTEL)
-    return false;
-
-  if (!NONDEBUG_INSN_P (top))
-    return false;
-  if (!NONJUMP_INSN_P (top))
-    return false;
-  if (!NONDEBUG_INSN_P (next))
-    return false;
-  if (!NONJUMP_INSN_P (next))
-    return false;
-  set = single_set (top);
-  if (!set)
-    return false;
-  set = single_set (next);
-  if (!set)
-    return false;
-
-  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
-    {
-      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
-	return false;
-      /* Determine winner more precise.  */
-      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
-	{
-	  rtx pro;
-	  pro = DEP_PRO (dep);
-	  if (!NONDEBUG_INSN_P (pro))
-	    continue;
-	  if (INSN_TICK (pro) > clock1)
-	    clock1 = INSN_TICK (pro);
-	}
-      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
-	{
-	  rtx pro;
-	  pro = DEP_PRO (dep);
-	  if (!NONDEBUG_INSN_P (pro))
-	    continue;
-	  if (INSN_TICK (pro) > clock2)
-	    clock2 = INSN_TICK (pro);
-	}
-
-      if (clock1 == clock2)
-	{
-	  /* Determine winner - load must win. */
-	  enum attr_memory memory1, memory2;
-	  memory1 = get_attr_memory (top);
-	  memory2 = get_attr_memory (next);
-	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
-	    return true;
-	}
-	return (bool) (clock2 < clock1);
-    }
-  return false;
-  #undef INSN_TICK
-}
-
-/* Perform possible reodering of ready list for Atom/Silvermont only.
-   Return issue rate.  */
-static int
-ix86_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
-		    int *pn_ready, int clock_var)
-{
-  int issue_rate = -1;
-  int n_ready = *pn_ready;
-  int i;
-  rtx_insn *insn;
-  int index = -1;
-
-  /* Set up issue rate.  */
-  issue_rate = ix86_issue_rate ();
-
-  /* Do reodering for BONNELL/SILVERMONT only.  */
-  if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
-    return issue_rate;
-
-  /* Nothing to do if ready list contains only 1 instruction.  */
-  if (n_ready <= 1)
-    return issue_rate;
-
-  /* Do reodering for post-reload scheduler only.  */
-  if (!reload_completed)
-    return issue_rate;
-
-  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
-    {
-      if (sched_verbose > 1)
-	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
-		 INSN_UID (ready[index]));
-
-      /* Put IMUL producer (ready[index]) at the top of ready list.  */
-      insn = ready[index];
-      for (i = index; i < n_ready - 1; i++)
-	ready[i] = ready[i + 1];
-      ready[n_ready - 1] = insn;
-      return issue_rate;
-    }
-
-  /* Skip selective scheduling since HID is not populated in it.  */
-  if (clock_var != 0
-      && !sel_sched_p ()
-      && swap_top_of_ready_list (ready, n_ready))
-    {
-      if (sched_verbose > 1)
-	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
-		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
-      /* Swap 2 top elements of ready list.  */
-      insn = ready[n_ready - 1];
-      ready[n_ready - 1] = ready[n_ready - 2];
-      ready[n_ready - 2] = insn;
-    }
-  return issue_rate;
-}
 
 static bool
 ix86_class_likely_spilled_p (reg_class_t);
@@ -31326,204 +28478,6 @@ ix86_adjust_priority (rtx_insn *insn, int priority)
   return priority;
 }
 
-/* Model decoder of Core 2/i7.
-   Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
-   track the instruction fetch block boundaries and make sure that long
-   (9+ bytes) instructions are assigned to D0.  */
-
-/* Maximum length of an insn that can be handled by
-   a secondary decoder unit.  '8' for Core 2/i7.  */
-static int core2i7_secondary_decoder_max_insn_size;
-
-/* Ifetch block size, i.e., number of bytes decoder reads per cycle.
-   '16' for Core 2/i7.  */
-static int core2i7_ifetch_block_size;
-
-/* Maximum number of instructions decoder can handle per cycle.
-   '6' for Core 2/i7.  */
-static int core2i7_ifetch_block_max_insns;
-
-typedef struct ix86_first_cycle_multipass_data_ *
-  ix86_first_cycle_multipass_data_t;
-typedef const struct ix86_first_cycle_multipass_data_ *
-  const_ix86_first_cycle_multipass_data_t;
-
-/* A variable to store target state across calls to max_issue within
-   one cycle.  */
-static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
-  *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
-
-/* Initialize DATA.  */
-static void
-core2i7_first_cycle_multipass_init (void *_data)
-{
-  ix86_first_cycle_multipass_data_t data
-    = (ix86_first_cycle_multipass_data_t) _data;
-
-  data->ifetch_block_len = 0;
-  data->ifetch_block_n_insns = 0;
-  data->ready_try_change = NULL;
-  data->ready_try_change_size = 0;
-}
-
-/* Advancing the cycle; reset ifetch block counts.  */
-static void
-core2i7_dfa_post_advance_cycle (void)
-{
-  ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
-
-  gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
-
-  data->ifetch_block_len = 0;
-  data->ifetch_block_n_insns = 0;
-}
-
-static int min_insn_size (rtx_insn *);
-
-/* Filter out insns from ready_try that the core will not be able to issue
-   on current cycle due to decoder.  */
-static void
-core2i7_first_cycle_multipass_filter_ready_try
-(const_ix86_first_cycle_multipass_data_t data,
- signed char *ready_try, int n_ready, bool first_cycle_insn_p)
-{
-  while (n_ready--)
-    {
-      rtx_insn *insn;
-      int insn_size;
-
-      if (ready_try[n_ready])
-	continue;
-
-      insn = get_ready_element (n_ready);
-      insn_size = min_insn_size (insn);
-
-      if (/* If this is a too long an insn for a secondary decoder ...  */
-	  (!first_cycle_insn_p
-	   && insn_size > core2i7_secondary_decoder_max_insn_size)
-	  /* ... or it would not fit into the ifetch block ...  */
-	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
-	  /* ... or the decoder is full already ...  */
-	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
-	/* ... mask the insn out.  */
-	{
-	  ready_try[n_ready] = 1;
-
-	  if (data->ready_try_change)
-	    bitmap_set_bit (data->ready_try_change, n_ready);
-	}
-    }
-}
-
-/* Prepare for a new round of multipass lookahead scheduling.  */
-static void
-core2i7_first_cycle_multipass_begin (void *_data,
-				     signed char *ready_try, int n_ready,
-				     bool first_cycle_insn_p)
-{
-  ix86_first_cycle_multipass_data_t data
-    = (ix86_first_cycle_multipass_data_t) _data;
-  const_ix86_first_cycle_multipass_data_t prev_data
-    = ix86_first_cycle_multipass_data;
-
-  /* Restore the state from the end of the previous round.  */
-  data->ifetch_block_len = prev_data->ifetch_block_len;
-  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
-
-  /* Filter instructions that cannot be issued on current cycle due to
-     decoder restrictions.  */
-  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
-						  first_cycle_insn_p);
-}
-
-/* INSN is being issued in current solution.  Account for its impact on
-   the decoder model.  */
-static void
-core2i7_first_cycle_multipass_issue (void *_data,
-				     signed char *ready_try, int n_ready,
-				     rtx_insn *insn, const void *_prev_data)
-{
-  ix86_first_cycle_multipass_data_t data
-    = (ix86_first_cycle_multipass_data_t) _data;
-  const_ix86_first_cycle_multipass_data_t prev_data
-    = (const_ix86_first_cycle_multipass_data_t) _prev_data;
-
-  int insn_size = min_insn_size (insn);
-
-  data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
-  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
-  gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
-	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
-
-  /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
-  if (!data->ready_try_change)
-    {
-      data->ready_try_change = sbitmap_alloc (n_ready);
-      data->ready_try_change_size = n_ready;
-    }
-  else if (data->ready_try_change_size < n_ready)
-    {
-      data->ready_try_change = sbitmap_resize (data->ready_try_change,
-					       n_ready, 0);
-      data->ready_try_change_size = n_ready;
-    }
-  bitmap_clear (data->ready_try_change);
-
-  /* Filter out insns from ready_try that the core will not be able to issue
-     on current cycle due to decoder.  */
-  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
-						  false);
-}
-
-/* Revert the effect on ready_try.  */
-static void
-core2i7_first_cycle_multipass_backtrack (const void *_data,
-					 signed char *ready_try,
-					 int n_ready ATTRIBUTE_UNUSED)
-{
-  const_ix86_first_cycle_multipass_data_t data
-    = (const_ix86_first_cycle_multipass_data_t) _data;
-  unsigned int i = 0;
-  sbitmap_iterator sbi;
-
-  gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
-  EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
-    {
-      ready_try[i] = 0;
-    }
-}
-
-/* Save the result of multipass lookahead scheduling for the next round.  */
-static void
-core2i7_first_cycle_multipass_end (const void *_data)
-{
-  const_ix86_first_cycle_multipass_data_t data
-    = (const_ix86_first_cycle_multipass_data_t) _data;
-  ix86_first_cycle_multipass_data_t next_data
-    = ix86_first_cycle_multipass_data;
-
-  if (data != NULL)
-    {
-      next_data->ifetch_block_len = data->ifetch_block_len;
-      next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
-    }
-}
-
-/* Deallocate target data.  */
-static void
-core2i7_first_cycle_multipass_fini (void *_data)
-{
-  ix86_first_cycle_multipass_data_t data
-    = (ix86_first_cycle_multipass_data_t) _data;
-
-  if (data->ready_try_change)
-    {
-      sbitmap_free (data->ready_try_change);
-      data->ready_try_change = NULL;
-      data->ready_try_change_size = 0;
-    }
-}
-
 /* Prepare for scheduling pass.  */
 static void
 ix86_sched_init_global (FILE *, int, int)
@@ -31541,25 +28495,7 @@ ix86_sched_init_global (FILE *, int, int)
          to save compile time.  */
       if (reload_completed)
 	{
-	  targetm.sched.dfa_post_advance_cycle
-	    = core2i7_dfa_post_advance_cycle;
-	  targetm.sched.first_cycle_multipass_init
-	    = core2i7_first_cycle_multipass_init;
-	  targetm.sched.first_cycle_multipass_begin
-	    = core2i7_first_cycle_multipass_begin;
-	  targetm.sched.first_cycle_multipass_issue
-	    = core2i7_first_cycle_multipass_issue;
-	  targetm.sched.first_cycle_multipass_backtrack
-	    = core2i7_first_cycle_multipass_backtrack;
-	  targetm.sched.first_cycle_multipass_end
-	    = core2i7_first_cycle_multipass_end;
-	  targetm.sched.first_cycle_multipass_fini
-	    = core2i7_first_cycle_multipass_fini;
-
-	  /* Set decoder parameters.  */
-	  core2i7_secondary_decoder_max_insn_size = 8;
-	  core2i7_ifetch_block_size = 16;
-	  core2i7_ifetch_block_max_insns = 6;
+	  ix86_core2i7_init_hooks ();
 	  break;
 	}
       /* Fall through.  */
@@ -32995,7 +29931,9 @@ ix86_init_mmx_sse_builtins (void)
 		    UNSIGNED_FTYPE_VOID, IX86_BUILTIN_STMXCSR);
 
   /* SSE or 3DNow!A */
-  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+  def_builtin (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
+	       /* As it uses V4HImode, we have to require -mmmx too.  */
+	       | OPTION_MASK_ISA_MMX,
 	       "__builtin_ia32_maskmovq", VOID_FTYPE_V8QI_V8QI_PCHAR,
 	       IX86_BUILTIN_MASKMOVQ);
 
@@ -33433,7 +30371,9 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_ext_v8hi",
 		     HI_FTYPE_V8HI_INT, IX86_BUILTIN_VEC_EXT_V8HI);
 
-  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
+		     /* As it uses V4HImode, we have to require -mmmx too.  */
+		     | OPTION_MASK_ISA_MMX,
 		     "__builtin_ia32_vec_ext_v4hi",
 		     HI_FTYPE_V4HI_INT, IX86_BUILTIN_VEC_EXT_V4HI);
 
@@ -33457,7 +30397,9 @@ ix86_init_mmx_sse_builtins (void)
   def_builtin_const (OPTION_MASK_ISA_SSE2, "__builtin_ia32_vec_set_v8hi",
 		     V8HI_FTYPE_V8HI_HI_INT, IX86_BUILTIN_VEC_SET_V8HI);
 
-  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A,
+  def_builtin_const (OPTION_MASK_ISA_SSE | OPTION_MASK_ISA_3DNOW_A
+		     /* As it uses V4HImode, we have to require -mmmx too.  */
+		     | OPTION_MASK_ISA_MMX,
 		     "__builtin_ia32_vec_set_v4hi",
 		     V4HI_FTYPE_V4HI_HI_INT, IX86_BUILTIN_VEC_SET_V4HI);
 
@@ -37921,18 +34863,23 @@ ix86_expand_builtin (tree exp, rtx target, rtx subtarget,
      Originally the builtin was not created if it wasn't applicable to the
      current ISA based on the command line switches.  With function specific
      options, we need to check in the context of the function making the call
-     whether it is supported.  Treat AVX512VL specially.  For other flags,
+     whether it is supported.  Treat AVX512VL and MMX specially.  For other flags,
      if isa includes more than one ISA bit, treat those are requiring any
      of them.  For AVX512VL, require both AVX512VL and the non-AVX512VL
-     ISAs.  Similarly for 64BIT, but we shouldn't be building such builtins
+     ISAs.  Likewise for MMX, require both MMX and the non-MMX ISAs.
+     Similarly for 64BIT, but we shouldn't be building such builtins
      at all, -m64 is a whole TU option.  */
   if (((ix86_builtins_isa[fcode].isa
-	& ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT))
+	& ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
+	    | OPTION_MASK_ISA_64BIT))
        && !(ix86_builtins_isa[fcode].isa
-	    & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_64BIT)
+	    & ~(OPTION_MASK_ISA_AVX512VL | OPTION_MASK_ISA_MMX
+		| OPTION_MASK_ISA_64BIT)
 	    & ix86_isa_flags))
       || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_AVX512VL)
 	  && !(ix86_isa_flags & OPTION_MASK_ISA_AVX512VL))
+      || ((ix86_builtins_isa[fcode].isa & OPTION_MASK_ISA_MMX)
+	  && !(ix86_isa_flags & OPTION_MASK_ISA_MMX))
       || (ix86_builtins_isa[fcode].isa2
 	  && !(ix86_builtins_isa[fcode].isa2 & ix86_isa_flags2)))
     {
@@ -43124,8 +40071,8 @@ x86_function_profiler (FILE *file, int labelno ATTRIBUTE_UNUSED)
    address sizes.  This is enough to eliminate unnecessary padding in
    99% of cases.  */
 
-static int
-min_insn_size (rtx_insn *insn)
+int
+ix86_min_insn_size (rtx_insn *insn)
 {
   int l = 0, len;
 
@@ -43234,13 +40181,13 @@ ix86_avoid_jump_mispredicts (void)
 		    njumps--, isjump = true;
 		  else
 		    isjump = false;
-		  nbytes -= min_insn_size (start);
+		  nbytes -= ix86_min_insn_size (start);
 		}
 	    }
 	  continue;
 	}
 
-      min_size = min_insn_size (insn);
+      min_size = ix86_min_insn_size (insn);
       nbytes += min_size;
       if (dump_file)
 	fprintf (dump_file, "Insn %i estimated to %i bytes\n",
@@ -43259,7 +40206,7 @@ ix86_avoid_jump_mispredicts (void)
 	    njumps--, isjump = true;
 	  else
 	    isjump = false;
-	  nbytes -= min_insn_size (start);
+	  nbytes -= ix86_min_insn_size (start);
 	}
       gcc_assert (njumps >= 0);
       if (dump_file)
@@ -43268,7 +40215,7 @@ ix86_avoid_jump_mispredicts (void)
 
       if (njumps == 3 && isjump && nbytes < 16)
 	{
-	  int padsize = 15 - nbytes + min_insn_size (insn);
+	  int padsize = 15 - nbytes + ix86_min_insn_size (insn);
 
 	  if (dump_file)
 	    fprintf (dump_file, "Padding insn %i by %i bytes!\n",
@@ -51029,806 +47976,19 @@ ix86_enum_va_list (int idx, const char **pname, tree *ptree)
 }
 
 #undef TARGET_SCHED_DISPATCH
-#define TARGET_SCHED_DISPATCH has_dispatch
+#define TARGET_SCHED_DISPATCH ix86_bd_has_dispatch
 #undef TARGET_SCHED_DISPATCH_DO
-#define TARGET_SCHED_DISPATCH_DO do_dispatch
+#define TARGET_SCHED_DISPATCH_DO ix86_bd_do_dispatch
 #undef TARGET_SCHED_REASSOCIATION_WIDTH
 #define TARGET_SCHED_REASSOCIATION_WIDTH ix86_reassociation_width
 #undef TARGET_SCHED_REORDER
-#define TARGET_SCHED_REORDER ix86_sched_reorder
+#define TARGET_SCHED_REORDER ix86_atom_sched_reorder
 #undef TARGET_SCHED_ADJUST_PRIORITY
 #define TARGET_SCHED_ADJUST_PRIORITY ix86_adjust_priority
 #undef TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK
 #define TARGET_SCHED_DEPENDENCIES_EVALUATION_HOOK \
   ix86_dependencies_evaluation_hook
 
-/* The size of the dispatch window is the total number of bytes of
-   object code allowed in a window.  */
-#define DISPATCH_WINDOW_SIZE 16
-
-/* Number of dispatch windows considered for scheduling.  */
-#define MAX_DISPATCH_WINDOWS 3
-
-/* Maximum number of instructions in a window.  */
-#define MAX_INSN 4
-
-/* Maximum number of immediate operands in a window.  */
-#define MAX_IMM 4
-
-/* Maximum number of immediate bits allowed in a window.  */
-#define MAX_IMM_SIZE 128
-
-/* Maximum number of 32 bit immediates allowed in a window.  */
-#define MAX_IMM_32 4
-
-/* Maximum number of 64 bit immediates allowed in a window.  */
-#define MAX_IMM_64 2
-
-/* Maximum total of loads or prefetches allowed in a window.  */
-#define MAX_LOAD 2
-
-/* Maximum total of stores allowed in a window.  */
-#define MAX_STORE 1
-
-#undef BIG
-#define BIG 100
-
-
-/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
-enum dispatch_group {
-  disp_no_group = 0,
-  disp_load,
-  disp_store,
-  disp_load_store,
-  disp_prefetch,
-  disp_imm,
-  disp_imm_32,
-  disp_imm_64,
-  disp_branch,
-  disp_cmp,
-  disp_jcc,
-  disp_last
-};
-
-/* Number of allowable groups in a dispatch window.  It is an array
-   indexed by dispatch_group enum.  100 is used as a big number,
-   because the number of these kind of operations does not have any
-   effect in dispatch window, but we need them for other reasons in
-   the table.  */
-static unsigned int num_allowable_groups[disp_last] = {
-  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
-};
-
-char group_name[disp_last + 1][16] = {
-  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
-  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
-  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
-};
-
-/* Instruction path.  */
-enum insn_path {
-  no_path = 0,
-  path_single, /* Single micro op.  */
-  path_double, /* Double micro op.  */
-  path_multi,  /* Instructions with more than 2 micro op..  */
-  last_path
-};
-
-/* sched_insn_info defines a window to the instructions scheduled in
-   the basic block.  It contains a pointer to the insn_info table and
-   the instruction scheduled.
-
-   Windows are allocated for each basic block and are linked
-   together.  */
-typedef struct sched_insn_info_s {
-  rtx insn;
-  enum dispatch_group group;
-  enum insn_path path;
-  int byte_len;
-  int imm_bytes;
-} sched_insn_info;
-
-/* Linked list of dispatch windows.  This is a two way list of
-   dispatch windows of a basic block.  It contains information about
-   the number of uops in the window and the total number of
-   instructions and of bytes in the object code for this dispatch
-   window.  */
-typedef struct dispatch_windows_s {
-  int num_insn;            /* Number of insn in the window.  */
-  int num_uops;            /* Number of uops in the window.  */
-  int window_size;         /* Number of bytes in the window.  */
-  int window_num;          /* Window number between 0 or 1.  */
-  int num_imm;             /* Number of immediates in an insn.  */
-  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
-  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
-  int imm_size;            /* Total immediates in the window.  */
-  int num_loads;           /* Total memory loads in the window.  */
-  int num_stores;          /* Total memory stores in the window.  */
-  int violation;          /* Violation exists in window.  */
-  sched_insn_info *window; /* Pointer to the window.  */
-  struct dispatch_windows_s *next;
-  struct dispatch_windows_s *prev;
-} dispatch_windows;
-
-/* Immediate valuse used in an insn.  */
-typedef struct imm_info_s
-  {
-    int imm;
-    int imm32;
-    int imm64;
-  } imm_info;
-
-static dispatch_windows *dispatch_window_list;
-static dispatch_windows *dispatch_window_list1;
-
-/* Get dispatch group of insn.  */
-
-static enum dispatch_group
-get_mem_group (rtx_insn *insn)
-{
-  enum attr_memory memory;
-
-  if (INSN_CODE (insn) < 0)
-    return disp_no_group;
-  memory = get_attr_memory (insn);
-  if (memory == MEMORY_STORE)
-    return disp_store;
-
-  if (memory == MEMORY_LOAD)
-    return disp_load;
-
-  if (memory == MEMORY_BOTH)
-    return disp_load_store;
-
-  return disp_no_group;
-}
-
-/* Return true if insn is a compare instruction.  */
-
-static bool
-is_cmp (rtx_insn *insn)
-{
-  enum attr_type type;
-
-  type = get_attr_type (insn);
-  return (type == TYPE_TEST
-	  || type == TYPE_ICMP
-	  || type == TYPE_FCMP
-	  || GET_CODE (PATTERN (insn)) == COMPARE);
-}
-
-/* Return true if a dispatch violation encountered.  */
-
-static bool
-dispatch_violation (void)
-{
-  if (dispatch_window_list->next)
-    return dispatch_window_list->next->violation;
-  return dispatch_window_list->violation;
-}
-
-/* Return true if insn is a branch instruction.  */
-
-static bool
-is_branch (rtx_insn *insn)
-{
-  return (CALL_P (insn) || JUMP_P (insn));
-}
-
-/* Return true if insn is a prefetch instruction.  */
-
-static bool
-is_prefetch (rtx_insn *insn)
-{
-  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
-}
-
-/* This function initializes a dispatch window and the list container holding a
-   pointer to the window.  */
-
-static void
-init_window (int window_num)
-{
-  int i;
-  dispatch_windows *new_list;
-
-  if (window_num == 0)
-    new_list = dispatch_window_list;
-  else
-    new_list = dispatch_window_list1;
-
-  new_list->num_insn = 0;
-  new_list->num_uops = 0;
-  new_list->window_size = 0;
-  new_list->next = NULL;
-  new_list->prev = NULL;
-  new_list->window_num = window_num;
-  new_list->num_imm = 0;
-  new_list->num_imm_32 = 0;
-  new_list->num_imm_64 = 0;
-  new_list->imm_size = 0;
-  new_list->num_loads = 0;
-  new_list->num_stores = 0;
-  new_list->violation = false;
-
-  for (i = 0; i < MAX_INSN; i++)
-    {
-      new_list->window[i].insn = NULL;
-      new_list->window[i].group = disp_no_group;
-      new_list->window[i].path = no_path;
-      new_list->window[i].byte_len = 0;
-      new_list->window[i].imm_bytes = 0;
-    }
-  return;
-}
-
-/* This function allocates and initializes a dispatch window and the
-   list container holding a pointer to the window.  */
-
-static dispatch_windows *
-allocate_window (void)
-{
-  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
-  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
-
-  return new_list;
-}
-
-/* This routine initializes the dispatch scheduling information.  It
-   initiates building dispatch scheduler tables and constructs the
-   first dispatch window.  */
-
-static void
-init_dispatch_sched (void)
-{
-  /* Allocate a dispatch list and a window.  */
-  dispatch_window_list = allocate_window ();
-  dispatch_window_list1 = allocate_window ();
-  init_window (0);
-  init_window (1);
-}
-
-/* This function returns true if a branch is detected.  End of a basic block
-   does not have to be a branch, but here we assume only branches end a
-   window.  */
-
-static bool
-is_end_basic_block (enum dispatch_group group)
-{
-  return group == disp_branch;
-}
-
-/* This function is called when the end of a window processing is reached.  */
-
-static void
-process_end_window (void)
-{
-  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
-  if (dispatch_window_list->next)
-    {
-      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
-      gcc_assert (dispatch_window_list->window_size
-		  + dispatch_window_list1->window_size <= 48);
-      init_window (1);
-    }
-  init_window (0);
-}
-
-/* Allocates a new dispatch window and adds it to WINDOW_LIST.
-   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
-   for 48 bytes of instructions.  Note that these windows are not dispatch
-   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
-
-static dispatch_windows *
-allocate_next_window (int window_num)
-{
-  if (window_num == 0)
-    {
-      if (dispatch_window_list->next)
-	  init_window (1);
-      init_window (0);
-      return dispatch_window_list;
-    }
-
-  dispatch_window_list->next = dispatch_window_list1;
-  dispatch_window_list1->prev = dispatch_window_list;
-
-  return dispatch_window_list1;
-}
-
-/* Compute number of immediate operands of an instruction.  */
-
-static void
-find_constant (rtx in_rtx, imm_info *imm_values)
-{
-  if (INSN_P (in_rtx))
-    in_rtx = PATTERN (in_rtx);
-  subrtx_iterator::array_type array;
-  FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
-    if (const_rtx x = *iter)
-      switch (GET_CODE (x))
-	{
-	case CONST:
-	case SYMBOL_REF:
-	case CONST_INT:
-	  (imm_values->imm)++;
-	  if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
-	    (imm_values->imm32)++;
-	  else
-	    (imm_values->imm64)++;
-	  break;
-
-	case CONST_DOUBLE:
-	case CONST_WIDE_INT:
-	  (imm_values->imm)++;
-	  (imm_values->imm64)++;
-	  break;
-
-	case CODE_LABEL:
-	  if (LABEL_KIND (x) == LABEL_NORMAL)
-	    {
-	      (imm_values->imm)++;
-	      (imm_values->imm32)++;
-	    }
-	  break;
-
-	default:
-	  break;
-	}
-}
-
-/* Return total size of immediate operands of an instruction along with number
-   of corresponding immediate-operands.  It initializes its parameters to zero
-   befor calling FIND_CONSTANT.
-   INSN is the input instruction.  IMM is the total of immediates.
-   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
-   bit immediates.  */
-
-static int
-get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
-{
-  imm_info imm_values = {0, 0, 0};
-
-  find_constant (insn, &imm_values);
-  *imm = imm_values.imm;
-  *imm32 = imm_values.imm32;
-  *imm64 = imm_values.imm64;
-  return imm_values.imm32 * 4 + imm_values.imm64 * 8;
-}
-
-/* This function indicates if an operand of an instruction is an
-   immediate.  */
-
-static bool
-has_immediate (rtx_insn *insn)
-{
-  int num_imm_operand;
-  int num_imm32_operand;
-  int num_imm64_operand;
-
-  if (insn)
-    return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
-			       &num_imm64_operand);
-  return false;
-}
-
-/* Return single or double path for instructions.  */
-
-static enum insn_path
-get_insn_path (rtx_insn *insn)
-{
-  enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
-
-  if ((int)path == 0)
-    return path_single;
-
-  if ((int)path == 1)
-    return path_double;
-
-  return path_multi;
-}
-
-/* Return insn dispatch group.  */
-
-static enum dispatch_group
-get_insn_group (rtx_insn *insn)
-{
-  enum dispatch_group group = get_mem_group (insn);
-  if (group)
-    return group;
-
-  if (is_branch (insn))
-    return disp_branch;
-
-  if (is_cmp (insn))
-    return disp_cmp;
-
-  if (has_immediate (insn))
-    return disp_imm;
-
-  if (is_prefetch (insn))
-    return disp_prefetch;
-
-  return disp_no_group;
-}
-
-/* Count number of GROUP restricted instructions in a dispatch
-   window WINDOW_LIST.  */
-
-static int
-count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
-{
-  enum dispatch_group group = get_insn_group (insn);
-  int imm_size;
-  int num_imm_operand;
-  int num_imm32_operand;
-  int num_imm64_operand;
-
-  if (group == disp_no_group)
-    return 0;
-
-  if (group == disp_imm)
-    {
-      imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
-			      &num_imm64_operand);
-      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
-	  || num_imm_operand + window_list->num_imm > MAX_IMM
-	  || (num_imm32_operand > 0
-	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
-		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
-	  || (num_imm64_operand > 0
-	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
-		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
-	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
-	      && num_imm64_operand > 0
-	      && ((window_list->num_imm_64 > 0
-		   && window_list->num_insn >= 2)
-		  || window_list->num_insn >= 3)))
-	return BIG;
-
-      return 1;
-    }
-
-  if ((group == disp_load_store
-       && (window_list->num_loads >= MAX_LOAD
-	   || window_list->num_stores >= MAX_STORE))
-      || ((group == disp_load
-	   || group == disp_prefetch)
-	  && window_list->num_loads >= MAX_LOAD)
-      || (group == disp_store
-	  && window_list->num_stores >= MAX_STORE))
-    return BIG;
-
-  return 1;
-}
-
-/* This function returns true if insn satisfies dispatch rules on the
-   last window scheduled.  */
-
-static bool
-fits_dispatch_window (rtx_insn *insn)
-{
-  dispatch_windows *window_list = dispatch_window_list;
-  dispatch_windows *window_list_next = dispatch_window_list->next;
-  unsigned int num_restrict;
-  enum dispatch_group group = get_insn_group (insn);
-  enum insn_path path = get_insn_path (insn);
-  int sum;
-
-  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
-     instructions should be given the lowest priority in the
-     scheduling process in Haifa scheduler to make sure they will be
-     scheduled in the same dispatch window as the reference to them.  */
-  if (group == disp_jcc || group == disp_cmp)
-    return false;
-
-  /* Check nonrestricted.  */
-  if (group == disp_no_group || group == disp_branch)
-    return true;
-
-  /* Get last dispatch window.  */
-  if (window_list_next)
-    window_list = window_list_next;
-
-  if (window_list->window_num == 1)
-    {
-      sum = window_list->prev->window_size + window_list->window_size;
-
-      if (sum == 32
-	  || (min_insn_size (insn) + sum) >= 48)
-	/* Window 1 is full.  Go for next window.  */
-	return true;
-    }
-
-  num_restrict = count_num_restricted (insn, window_list);
-
-  if (num_restrict > num_allowable_groups[group])
-    return false;
-
-  /* See if it fits in the first window.  */
-  if (window_list->window_num == 0)
-    {
-      /* The first widow should have only single and double path
-	 uops.  */
-      if (path == path_double
-	  && (window_list->num_uops + 2) > MAX_INSN)
-	return false;
-      else if (path != path_single)
-        return false;
-    }
-  return true;
-}
-
-/* Add an instruction INSN with NUM_UOPS micro-operations to the
-   dispatch window WINDOW_LIST.  */
-
-static void
-add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
-{
-  int byte_len = min_insn_size (insn);
-  int num_insn = window_list->num_insn;
-  int imm_size;
-  sched_insn_info *window = window_list->window;
-  enum dispatch_group group = get_insn_group (insn);
-  enum insn_path path = get_insn_path (insn);
-  int num_imm_operand;
-  int num_imm32_operand;
-  int num_imm64_operand;
-
-  if (!window_list->violation && group != disp_cmp
-      && !fits_dispatch_window (insn))
-    window_list->violation = true;
-
-  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
-				 &num_imm64_operand);
-
-  /* Initialize window with new instruction.  */
-  window[num_insn].insn = insn;
-  window[num_insn].byte_len = byte_len;
-  window[num_insn].group = group;
-  window[num_insn].path = path;
-  window[num_insn].imm_bytes = imm_size;
-
-  window_list->window_size += byte_len;
-  window_list->num_insn = num_insn + 1;
-  window_list->num_uops = window_list->num_uops + num_uops;
-  window_list->imm_size += imm_size;
-  window_list->num_imm += num_imm_operand;
-  window_list->num_imm_32 += num_imm32_operand;
-  window_list->num_imm_64 += num_imm64_operand;
-
-  if (group == disp_store)
-    window_list->num_stores += 1;
-  else if (group == disp_load
-	   || group == disp_prefetch)
-    window_list->num_loads += 1;
-  else if (group == disp_load_store)
-    {
-      window_list->num_stores += 1;
-      window_list->num_loads += 1;
-    }
-}
-
-/* Adds a scheduled instruction, INSN, to the current dispatch window.
-   If the total bytes of instructions or the number of instructions in
-   the window exceed allowable, it allocates a new window.  */
-
-static void
-add_to_dispatch_window (rtx_insn *insn)
-{
-  int byte_len;
-  dispatch_windows *window_list;
-  dispatch_windows *next_list;
-  dispatch_windows *window0_list;
-  enum insn_path path;
-  enum dispatch_group insn_group;
-  bool insn_fits;
-  int num_insn;
-  int num_uops;
-  int window_num;
-  int insn_num_uops;
-  int sum;
-
-  if (INSN_CODE (insn) < 0)
-    return;
-
-  byte_len = min_insn_size (insn);
-  window_list = dispatch_window_list;
-  next_list = window_list->next;
-  path = get_insn_path (insn);
-  insn_group = get_insn_group (insn);
-
-  /* Get the last dispatch window.  */
-  if (next_list)
-      window_list = dispatch_window_list->next;
-
-  if (path == path_single)
-    insn_num_uops = 1;
-  else if (path == path_double)
-    insn_num_uops = 2;
-  else
-    insn_num_uops = (int) path;
-
-  /* If current window is full, get a new window.
-     Window number zero is full, if MAX_INSN uops are scheduled in it.
-     Window number one is full, if window zero's bytes plus window
-     one's bytes is 32, or if the bytes of the new instruction added
-     to the total makes it greater than 48, or it has already MAX_INSN
-     instructions in it.  */
-  num_insn = window_list->num_insn;
-  num_uops = window_list->num_uops;
-  window_num = window_list->window_num;
-  insn_fits = fits_dispatch_window (insn);
-
-  if (num_insn >= MAX_INSN
-      || num_uops + insn_num_uops > MAX_INSN
-      || !(insn_fits))
-    {
-      window_num = ~window_num & 1;
-      window_list = allocate_next_window (window_num);
-    }
-
-  if (window_num == 0)
-    {
-      add_insn_window (insn, window_list, insn_num_uops);
-      if (window_list->num_insn >= MAX_INSN
-	  && insn_group == disp_branch)
-	{
-	  process_end_window ();
-	  return;
-	}
-    }
-  else if (window_num == 1)
-    {
-      window0_list = window_list->prev;
-      sum = window0_list->window_size + window_list->window_size;
-      if (sum == 32
-	  || (byte_len + sum) >= 48)
-	{
-	  process_end_window ();
-	  window_list = dispatch_window_list;
-	}
-
-      add_insn_window (insn, window_list, insn_num_uops);
-    }
-  else
-    gcc_unreachable ();
-
-  if (is_end_basic_block (insn_group))
-    {
-      /* End of basic block is reached do end-basic-block process.  */
-      process_end_window ();
-      return;
-    }
-}
-
-/* Print the dispatch window, WINDOW_NUM, to FILE.  */
-
-DEBUG_FUNCTION static void
-debug_dispatch_window_file (FILE *file, int window_num)
-{
-  dispatch_windows *list;
-  int i;
-
-  if (window_num == 0)
-    list = dispatch_window_list;
-  else
-    list = dispatch_window_list1;
-
-  fprintf (file, "Window #%d:\n", list->window_num);
-  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
-	  list->num_insn, list->num_uops, list->window_size);
-  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
-	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
-
-  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
-	  list->num_stores);
-  fprintf (file, " insn info:\n");
-
-  for (i = 0; i < MAX_INSN; i++)
-    {
-      if (!list->window[i].insn)
-	break;
-      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
-	      i, group_name[list->window[i].group],
-	      i, (void *)list->window[i].insn,
-	      i, list->window[i].path,
-	      i, list->window[i].byte_len,
-	      i, list->window[i].imm_bytes);
-    }
-}
-
-/* Print to stdout a dispatch window.  */
-
-DEBUG_FUNCTION void
-debug_dispatch_window (int window_num)
-{
-  debug_dispatch_window_file (stdout, window_num);
-}
-
-/* Print INSN dispatch information to FILE.  */
-
-DEBUG_FUNCTION static void
-debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
-{
-  int byte_len;
-  enum insn_path path;
-  enum dispatch_group group;
-  int imm_size;
-  int num_imm_operand;
-  int num_imm32_operand;
-  int num_imm64_operand;
-
-  if (INSN_CODE (insn) < 0)
-    return;
-
-  byte_len = min_insn_size (insn);
-  path = get_insn_path (insn);
-  group = get_insn_group (insn);
-  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
-				 &num_imm64_operand);
-
-  fprintf (file, " insn info:\n");
-  fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
-	   group_name[group], path, byte_len);
-  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
-	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
-}
-
-/* Print to STDERR the status of the ready list with respect to
-   dispatch windows.  */
-
-DEBUG_FUNCTION void
-debug_ready_dispatch (void)
-{
-  int i;
-  int no_ready = number_in_ready ();
-
-  fprintf (stdout, "Number of ready: %d\n", no_ready);
-
-  for (i = 0; i < no_ready; i++)
-    debug_insn_dispatch_info_file (stdout, get_ready_element (i));
-}
-
-/* This routine is the driver of the dispatch scheduler.  */
-
-static void
-do_dispatch (rtx_insn *insn, int mode)
-{
-  if (mode == DISPATCH_INIT)
-    init_dispatch_sched ();
-  else if (mode == ADD_TO_DISPATCH_WINDOW)
-    add_to_dispatch_window (insn);
-}
-
-/* Return TRUE if Dispatch Scheduling is supported.  */
-
-static bool
-has_dispatch (rtx_insn *insn, int action)
-{
-  /* Current implementation of dispatch scheduler models buldozer only.  */
-  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
-      || TARGET_BDVER4) && flag_dispatch_scheduler)
-    switch (action)
-      {
-      default:
-	return false;
-
-      case IS_DISPATCH_ON:
-	return true;
-
-      case IS_CMP:
-	return is_cmp (insn);
-
-      case DISPATCH_VIOLATION:
-	return dispatch_violation ();
-
-      case FITS_DISPATCH_WINDOW:
-	return fits_dispatch_window (insn);
-      }
-
-  return false;
-}
 
 /* Implementation of reassociation_width target hook used by
    reassoc phase to identify parallelism level in reassociated
@@ -52862,6 +49022,19 @@ ix86_excess_precision (enum excess_precision_type type)
   return FLT_EVAL_METHOD_UNPREDICTABLE;
 }
 
+/* Implement PUSH_ROUNDING.  On 386, we have pushw instruction that
+   decrements by exactly 2 no matter what the position was, there is no pushb.
+
+   But as CIE data alignment factor on this arch is -4 for 32bit targets
+   and -8 for 64bit targets, we need to make sure all stack pointer adjustments
+   are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
+
+poly_int64
+ix86_push_rounding (poly_int64 bytes)
+{
+  return ROUND_UP (bytes, UNITS_PER_WORD);
+}
+
 /* Target-specific selftests.  */
 
 #if CHECKING_P
@@ -53521,6 +49694,9 @@ ix86_run_selftests (void)
 #undef TARGET_CONDITIONAL_REGISTER_USAGE
 #define TARGET_CONDITIONAL_REGISTER_USAGE ix86_conditional_register_usage
 
+#undef TARGET_CANONICALIZE_COMPARISON
+#define TARGET_CANONICALIZE_COMPARISON ix86_canonicalize_comparison
+
 #undef TARGET_LOOP_UNROLL_ADJUST
 #define TARGET_LOOP_UNROLL_ADJUST ix86_loop_unroll_adjust
 
diff --git a/gcc/config/i386/i386.h b/gcc/config/i386/i386.h
index ef88d89cae2..1c796ef392d 100644
--- a/gcc/config/i386/i386.h
+++ b/gcc/config/i386/i386.h
@@ -1519,21 +1519,7 @@ enum reg_class
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
-/* If we generate an insn to push BYTES bytes, this says how many the stack
-   pointer really advances by.  On 386, we have pushw instruction that
-   decrements by exactly 2 no matter what the position was, there is no pushb.
-
-   But as CIE data alignment factor on this arch is -4 for 32bit targets
-   and -8 for 64bit targets, we need to make sure all stack pointer adjustments
-   are in multiple of 4 for 32bit targets and 8 for 64bit targets.  */
-
-#define PUSH_ROUNDING(BYTES) ROUND_UP ((BYTES), UNITS_PER_WORD)
+#define PUSH_ROUNDING(BYTES) ix86_push_rounding (BYTES)
 
 /* If defined, the maximum amount of space required for outgoing arguments
    will be computed and placed into the variable `crtl->outgoing_args_size'.
diff --git a/gcc/config/i386/i386.md b/gcc/config/i386/i386.md
index 1a794c0ae06..3413b90028f 100644
--- a/gcc/config/i386/i386.md
+++ b/gcc/config/i386/i386.md
@@ -1612,8 +1612,8 @@
 	(unspec:HI
 	  [(compare:CCFP
 	     (match_operand:X87MODEF 1 "register_operand" "f")
-	     (match_operator:X87MODEF 3 "float_operator"
-	       [(match_operand:SWI24 2 "memory_operand" "m")]))]
+	     (float:X87MODEF
+	       (match_operand:SWI24 2 "memory_operand" "m")))]
 	  UNSPEC_FNSTSW))]
   "TARGET_80387
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
@@ -1628,8 +1628,8 @@
   [(set (reg:CCFP FLAGS_REG)
 	(compare:CCFP
 	  (match_operand:X87MODEF 1 "register_operand" "f")
-	  (match_operator:X87MODEF 3 "float_operator"
-	    [(match_operand:SWI24 2 "memory_operand" "m")])))
+	  (float:X87MODEF
+	    (match_operand:SWI24 2 "memory_operand" "m"))))
    (clobber (match_operand:HI 0 "register_operand" "=a"))]
   "TARGET_80387 && TARGET_SAHF && !TARGET_CMOVE
    && (TARGET_USE_<SWI24:MODE>MODE_FIOP
@@ -1640,7 +1640,7 @@
 	(unspec:HI
 	  [(compare:CCFP
 	     (match_dup 1)
-	     (match_op_dup 3 [(match_dup 2)]))]
+	     (float:X87MODEF (match_dup 2)))]
 	UNSPEC_FNSTSW))
    (set (reg:CC FLAGS_REG)
 	(unspec:CC [(match_dup 0)] UNSPEC_SAHF))]
@@ -6264,7 +6264,7 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "addqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -6275,7 +6275,8 @@
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
 {
   switch (get_attr_type (insn))
     {
@@ -6300,7 +6301,7 @@
    (set_attr "mode" "QI")])
 
 (define_insn "*addqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -6314,7 +6315,9 @@
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
   (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "add{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -8998,7 +9001,7 @@
    (set_attr "mode" "QI")])
 
 (define_insn "andqi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9009,7 +9012,8 @@
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
@@ -9027,7 +9031,7 @@
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m"))
 	  (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9037,14 +9041,16 @@
 			       (const_int 8)
 			       (const_int 8)) 0)
 	    (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "and{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*andqi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9058,7 +9064,9 @@
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  ""
+  "/* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   rtx_equal_p (operands[0], operands[1])
+   || rtx_equal_p (operands[0], operands[2])"
   "and{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9431,7 +9439,7 @@
    (set_attr "mode" "<MODE>")])
 
 (define_insn "*<code>qi_ext_1"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9442,14 +9450,16 @@
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m")) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "<logic>{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
    (set_attr "mode" "QI")])
 
 (define_insn "*<code>qi_ext_2"
-  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q")
+  [(set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9463,7 +9473,10 @@
 			       (const_int 8)
 			       (const_int 8)) 0)) 0))
    (clobber (reg:CC FLAGS_REG))]
-  "!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun)"
+  "(!TARGET_PARTIAL_REG_STALL || optimize_function_for_size_p (cfun))
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && (rtx_equal_p (operands[0], operands[1])
+       || rtx_equal_p (operands[0], operands[2]))"
   "<logic>{b}\t{%h2, %h0|%h0, %h2}"
   [(set_attr "type" "alu")
    (set_attr "mode" "QI")])
@@ -9552,7 +9565,7 @@
 			       (const_int 8)) 0)
 	    (match_operand:QI 2 "general_operand" "QnBc,m"))
 	  (const_int 0)))
-   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "=Q,Q")
+   (set (zero_extract:SI (match_operand 0 "ext_register_operand" "+Q,Q")
 			 (const_int 8)
 			 (const_int 8))
 	(subreg:SI
@@ -9562,7 +9575,9 @@
 			       (const_int 8)
 			       (const_int 8)) 0)
 	  (match_dup 2)) 0))]
-  "ix86_match_ccmode (insn, CCNOmode)"
+  "ix86_match_ccmode (insn, CCNOmode)
+   /* FIXME: without this LRA can't reload this pattern, see PR82524.  */
+   && rtx_equal_p (operands[0], operands[1])"
   "xor{b}\t{%2, %h0|%h0, %2}"
   [(set_attr "isa" "*,nox64")
    (set_attr "type" "alu")
@@ -10228,6 +10243,26 @@
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*ashl<mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(ashift:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (ASHIFT, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (ashift:SWI48 (match_dup 1)
+			 (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*bmi2_ashl<mode>3_1"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(ashift:SWI48 (match_operand:SWI48 1 "nonimmediate_operand" "rm")
@@ -10728,6 +10763,26 @@
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<shift_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_shiftrt:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_shiftrt:SWI48 (match_dup 1)
+			      (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn_and_split "*<shift_insn><mode>3_doubleword"
   [(set (match_operand:DWI 0 "register_operand" "=&r")
 	(any_shiftrt:DWI (match_operand:DWI 1 "register_operand" "0")
@@ -11187,6 +11242,26 @@
       (clobber (reg:CC FLAGS_REG))])]
   "operands[2] = gen_lowpart (QImode, operands[2]);")
 
+(define_insn_and_split "*<rotate_insn><mode>3_mask_1"
+  [(set (match_operand:SWI48 0 "nonimmediate_operand")
+	(any_rotate:SWI48
+	  (match_operand:SWI48 1 "nonimmediate_operand")
+	  (and:QI
+	    (match_operand:QI 2 "register_operand")
+	    (match_operand:QI 3 "const_int_operand"))))
+   (clobber (reg:CC FLAGS_REG))]
+  "ix86_binary_operator_ok (<CODE>, <MODE>mode, operands)
+   && (INTVAL (operands[3]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_rotate:SWI48 (match_dup 1)
+			     (match_dup 2)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; Implement rotation using two double-precision
 ;; shift instructions and a scratch register.
 
@@ -11494,6 +11569,30 @@
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*<btsc><mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+	(any_or:SWI48
+	  (ashift:SWI48
+	    (const_int 1)
+	    (and:QI
+	      (match_operand:QI 1 "register_operand")
+	      (match_operand:QI 2 "const_int_operand")))
+	  (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (any_or:SWI48
+	     (ashift:SWI48 (const_int 1)
+			   (match_dup 1))
+	     (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 (define_insn "*btr<mode>"
   [(set (match_operand:SWI48 0 "register_operand" "=r")
 	(and:SWI48
@@ -11535,6 +11634,30 @@
       (clobber (reg:CC FLAGS_REG))])]
   "operands[1] = gen_lowpart (QImode, operands[1]);")
 
+(define_insn_and_split "*btr<mode>_mask_1"
+  [(set (match_operand:SWI48 0 "register_operand")
+	(and:SWI48
+	  (rotate:SWI48
+	    (const_int -2)
+	    (and:QI
+	      (match_operand:QI 1 "register_operand")
+	      (match_operand:QI 2 "const_int_operand")))
+	  (match_operand:SWI48 3 "register_operand")))
+   (clobber (reg:CC FLAGS_REG))]
+  "TARGET_USE_BT
+   && (INTVAL (operands[2]) & (GET_MODE_BITSIZE (<MODE>mode)-1))
+      == GET_MODE_BITSIZE (<MODE>mode)-1
+   && can_create_pseudo_p ()"
+  "#"
+  "&& 1"
+  [(parallel
+     [(set (match_dup 0)
+	   (and:SWI48
+	     (rotate:SWI48 (const_int -2)
+			   (match_dup 1))
+	     (match_dup 3)))
+      (clobber (reg:CC FLAGS_REG))])])
+
 ;; These instructions are never faster than the corresponding
 ;; and/ior/xor operations when using immediate operand, so with
 ;; 32-bit there's no point.  But in 64-bit, we can't hold the
@@ -12032,142 +12155,6 @@
   if (! ix86_comparison_operator (operands[0], VOIDmode))
     FAIL;
 })
-
-;; Define combination compare-and-branch fp compare instructions to help
-;; combine.
-
-(define_insn "*jcc<mode>_0_i387"
-  [(set (pc)
-	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
-			[(match_operand:X87MODEF 1 "register_operand" "f")
-			 (match_operand:X87MODEF 2 "const0_operand")])
-	  (label_ref (match_operand 3))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387 && !TARGET_CMOVE"
-  "#")
-
-(define_insn "*jccxf_i387"
-  [(set (pc)
-	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
-			[(match_operand:XF 1 "register_operand" "f")
-			 (match_operand:XF 2 "register_operand" "f")])
-	  (label_ref (match_operand 3))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387 && !TARGET_CMOVE"
-  "#")
-
-(define_insn "*jcc<mode>_i387"
-  [(set (pc)
-	(if_then_else (match_operator:CCFP 0 "ix86_fp_comparison_operator"
-			[(match_operand:MODEF 1 "register_operand" "f")
-			 (match_operand:MODEF 2 "nonimmediate_operand" "fm")])
-	  (label_ref (match_operand 3))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387 && !TARGET_CMOVE"
-  "#")
-
-(define_insn "*jccu<mode>_i387"
-  [(set (pc)
-	(if_then_else (match_operator:CCFPU 0 "ix86_fp_comparison_operator"
-			[(match_operand:X87MODEF 1 "register_operand" "f")
-			 (match_operand:X87MODEF 2 "register_operand" "f")])
-	  (label_ref (match_operand 3))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 4 "=a"))]
-  "TARGET_80387 && !TARGET_CMOVE"
-  "#")
-
-(define_split
-  [(set (pc)
-	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
-			[(match_operand:X87MODEF 1 "register_operand")
-			 (match_operand:X87MODEF 2 "nonimmediate_operand")])
-	  (match_operand 3)
-	  (match_operand 4)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))]
-  "TARGET_80387 && !TARGET_CMOVE
-   && reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
-	                operands[3], operands[4], NULL_RTX);
-  DONE;
-})
-
-(define_split
-  [(set (pc)
-	(if_then_else (match_operator 0 "ix86_fp_comparison_operator"
-			[(match_operand:X87MODEF 1 "register_operand")
-			 (match_operand:X87MODEF 2 "general_operand")])
-	  (match_operand 3)
-	  (match_operand 4)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 5))]
-  "TARGET_80387 && !TARGET_CMOVE
-   && reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_fp_branch (GET_CODE (operands[0]), operands[1], operands[2],
-			operands[3], operands[4], operands[5]);
-  DONE;
-})
-
-;; The order of operands in *jcc<fp>_<int>_i387 is forced by combine in
-;; simplify_comparison () function. Float operator is treated as RTX_OBJ
-;; with a precedence over other operators and is always put in the first
-;; place. Swap condition and operands to match ficom instruction.
-
-(define_insn "*jcc<X87MODEF:mode>_<SWI24:mode>_i387"
-  [(set (pc)
-	(if_then_else
-	  (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator"
-	    [(match_operator:X87MODEF 1 "float_operator"
-	      [(match_operand:SWI24 2 "nonimmediate_operand" "m")])
-	     (match_operand:X87MODEF 3 "register_operand" "f")])
-	  (label_ref (match_operand 4))
-	  (pc)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 5 "=a"))]
-  "TARGET_80387 && !TARGET_CMOVE
-   && (TARGET_USE_<SWI24:MODE>MODE_FIOP
-       || optimize_function_for_size_p (cfun))"
-  "#")
-
-(define_split
-  [(set (pc)
-	(if_then_else
-	  (match_operator:CCFP 0 "ix86_swapped_fp_comparison_operator"
-	    [(match_operator:X87MODEF 1 "float_operator"
-	      [(match_operand:SWI24 2 "memory_operand")])
-	     (match_operand:X87MODEF 3 "register_operand")])
-	  (match_operand 4)
-	  (match_operand 5)))
-   (clobber (reg:CCFP FPSR_REG))
-   (clobber (reg:CCFP FLAGS_REG))
-   (clobber (match_scratch:HI 6))]
-  "TARGET_80387 && !TARGET_CMOVE
-   && reload_completed"
-  [(const_int 0)]
-{
-  ix86_split_fp_branch (swap_condition (GET_CODE (operands[0])), operands[3],
-		        gen_rtx_FLOAT (GET_MODE (operands[1]), operands[2]),
-			operands[4], operands[5], operands[6]);
-  DONE;
-})
 
 ;; Unconditional and other jump instructions
 
diff --git a/gcc/config/i386/ia32intrin.h b/gcc/config/i386/ia32intrin.h
index 5f954fce85e..1f4e484d55b 100644
--- a/gcc/config/i386/ia32intrin.h
+++ b/gcc/config/i386/ia32intrin.h
@@ -147,7 +147,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rold (unsigned int __X, int __C)
 {
-  return (__X << __C) | (__X >> (32 - __C));
+  __C &= 31;
+  return (__X << __C) | (__X >> (-__C & 31));
 }
 
 /* 8bit ror */
@@ -171,7 +172,8 @@ extern __inline unsigned int
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rord (unsigned int __X, int __C)
 {
-  return (__X >> __C) | (__X << (32 - __C));
+  __C &= 31;
+  return (__X >> __C) | (__X << (-__C & 31));
 }
 
 /* Pause */
@@ -239,7 +241,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rolq (unsigned long long __X, int __C)
 {
-  return (__X << __C) | (__X >> (64 - __C));
+  __C &= 63;
+  return (__X << __C) | (__X >> (-__C & 63));
 }
 
 /* 64bit ror */
@@ -247,7 +250,8 @@ extern __inline unsigned long long
 __attribute__((__gnu_inline__, __always_inline__, __artificial__))
 __rorq (unsigned long long __X, int __C)
 {
-  return (__X >> __C) | (__X << (64 - __C));
+  __C &= 63;
+  return (__X >> __C) | (__X << (-__C & 63));
 }
 
 /* Read flags register */
diff --git a/gcc/config/i386/intelmic-mkoffload.c b/gcc/config/i386/intelmic-mkoffload.c
index bc0490f2ea6..0b7c9548d19 100644
--- a/gcc/config/i386/intelmic-mkoffload.c
+++ b/gcc/config/i386/intelmic-mkoffload.c
@@ -20,7 +20,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include <libgen.h>
diff --git a/gcc/config/i386/msformat-c.c b/gcc/config/i386/msformat-c.c
index 6ce823fc001..6006d9b0017 100644
--- a/gcc/config/i386/msformat-c.c
+++ b/gcc/config/i386/msformat-c.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/predicates.md b/gcc/config/i386/predicates.md
index 4e023afb110..0917fad15d4 100644
--- a/gcc/config/i386/predicates.md
+++ b/gcc/config/i386/predicates.md
@@ -1387,19 +1387,6 @@
                (match_operand 0 "comparison_operator")
                (match_operand 0 "ix86_trivial_fp_comparison_operator")))
 
-;; Same as above, but for swapped comparison used in *jcc<fp>_<int>_i387.
-(define_predicate "ix86_swapped_fp_comparison_operator"
-  (match_operand 0 "comparison_operator")
-{
-  enum rtx_code code = GET_CODE (op);
-  bool ret;
-
-  PUT_CODE (op, swap_condition (code));
-  ret = ix86_fp_comparison_operator (op, mode);
-  PUT_CODE (op, code);
-  return ret;
-})
-
 ;; Nearly general operand, but accept any const_double, since we wish
 ;; to be able to drop them into memory rather than have them get pulled
 ;; into registers.
@@ -1423,10 +1410,6 @@
 (define_predicate "plusminuslogic_operator"
   (match_code "plus,minus,and,ior,xor"))
 
-;; Return true if this is a float extend operation.
-(define_predicate "float_operator"
-  (match_code "float"))
-
 ;; Return true for ARITHMETIC_P.
 (define_predicate "arith_or_logical_operator"
   (match_code "plus,mult,and,ior,xor,smin,smax,umin,umax,compare,minus,div,
diff --git a/gcc/config/i386/t-i386 b/gcc/config/i386/t-i386
index 0a8524bfbe2..8411a9680ff 100644
--- a/gcc/config/i386/t-i386
+++ b/gcc/config/i386/t-i386
@@ -24,6 +24,22 @@ i386-c.o: $(srcdir)/config/i386/i386-c.c
 	  $(COMPILE) $<
 	  $(POSTCOMPILE)
 
+x86-tune-sched.o: $(srcdir)/config/i386/x86-tune-sched.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
+
+x86-tune-sched-bd.o: $(srcdir)/config/i386/x86-tune-sched-bd.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
+
+x86-tune-sched-atom.o: $(srcdir)/config/i386/x86-tune-sched-atom.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
+
+x86-tune-sched-core.o: $(srcdir)/config/i386/x86-tune-sched-core.c
+	  $(COMPILE) $<
+	  $(POSTCOMPILE)
+
 i386.o: i386-builtin-types.inc
 
 i386-builtin-types.inc: s-i386-bt ; @true
diff --git a/gcc/config/i386/winnt-cxx.c b/gcc/config/i386/winnt-cxx.c
index 542e4fca1f9..532d9c993ba 100644
--- a/gcc/config/i386/winnt-cxx.c
+++ b/gcc/config/i386/winnt-cxx.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/winnt-stubs.c b/gcc/config/i386/winnt-stubs.c
index 6fb6f56b9f8..52a2bbf613e 100644
--- a/gcc/config/i386/winnt-stubs.c
+++ b/gcc/config/i386/winnt-stubs.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/winnt.c b/gcc/config/i386/winnt.c
index cf6e49c597e..e690d2b907d 100644
--- a/gcc/config/i386/winnt.c
+++ b/gcc/config/i386/winnt.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/i386/x86-tune-costs.h b/gcc/config/i386/x86-tune-costs.h
new file mode 100644
index 00000000000..d27072c0901
--- /dev/null
+++ b/gcc/config/i386/x86-tune-costs.h
@@ -0,0 +1,2083 @@
+
+/* Processor costs (relative to an add) */
+/* We assume COSTS_N_INSNS is defined as (N)*4 and an addition is 2 bytes.  */
+#define COSTS_N_BYTES(N) ((N) * 2)
+
+#define DUMMY_STRINGOP_ALGS {libcall, {{-1, libcall, false}}}
+
+static stringop_algs ix86_size_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+static stringop_algs ix86_size_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}}};
+
+const
+struct processor_costs ix86_size_cost = {/* costs for tuning for size */
+  COSTS_N_BYTES (2),			/* cost of an add instruction */
+  COSTS_N_BYTES (3),			/* cost of a lea instruction */
+  COSTS_N_BYTES (2),			/* variable shift costs */
+  COSTS_N_BYTES (3),			/* constant shift costs */
+  {COSTS_N_BYTES (3),			/* cost of starting multiply for QI */
+   COSTS_N_BYTES (3),			/*				 HI */
+   COSTS_N_BYTES (3),			/*				 SI */
+   COSTS_N_BYTES (3),			/*				 DI */
+   COSTS_N_BYTES (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_BYTES (3),			/* cost of a divide/mod for QI */
+   COSTS_N_BYTES (3),			/*			    HI */
+   COSTS_N_BYTES (3),			/*			    SI */
+   COSTS_N_BYTES (3),			/*			    DI */
+   COSTS_N_BYTES (5)},			/*			    other */
+  COSTS_N_BYTES (3),			/* cost of movsx */
+  COSTS_N_BYTES (3),			/* cost of movzx */
+  0,					/* "large" insn */
+  2,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {2, 2, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 2},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {2, 2, 2},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  3,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {3, 3},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  3,					/* cost of moving SSE register */
+  {3, 3, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {3, 3, 3},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_BYTES (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_BYTES (2),			/* cost of FMUL instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FDIV instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FABS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FCHS instruction.  */
+  COSTS_N_BYTES (2),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  ix86_size_memcpy,
+  ix86_size_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  1,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  1,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Processor costs (relative to an add) */
+static stringop_algs i386_memcpy[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i386_memset[2] = {
+  {rep_prefix_1_byte, {{-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs i386_cost = {	/* 386 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (6),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (6),			/*				 HI */
+   COSTS_N_INSNS (6),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  COSTS_N_INSNS (1),			/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (23),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (23),			/*			    SI */
+   COSTS_N_INSNS (23),			/*			    DI */
+   COSTS_N_INSNS (23)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  0,					/* size of l1 cache  */
+  0,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (23),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (27),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (88),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (22),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (122),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  i386_memcpy,
+  i386_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs i486_memcpy[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs i486_memset[2] = {
+  {rep_prefix_4_byte, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs i486_cost = {	/* 486 specific costs */
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (3),			/* variable shift costs */
+  COSTS_N_INSNS (2),			/* constant shift costs */
+  {COSTS_N_INSNS (12),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (12),			/*				 HI */
+   COSTS_N_INSNS (12),			/*				 SI */
+   COSTS_N_INSNS (12),			/*				 DI */
+   COSTS_N_INSNS (12)},			/*			      other */
+  1,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (40),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (40),			/*			    HI */
+   COSTS_N_INSNS (40),			/*			    SI */
+   COSTS_N_INSNS (40),			/*			    DI */
+   COSTS_N_INSNS (40)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  15,					/* "large" insn */
+  3,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {8, 8, 8},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {8, 8, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  4,					/* size of l1 cache.  486 has 8kB cache
+					   shared for code and data, so 4kB is
+					   not really precise.  */
+  4,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (16),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (73),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (83),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  i486_memcpy,
+  i486_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs pentium_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium_memset[2] = {
+  {libcall, {{-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs pentium_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (11),			/*				 HI */
+   COSTS_N_INSNS (11),			/*				 SI */
+   COSTS_N_INSNS (11),			/*				 DI */
+   COSTS_N_INSNS (11)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (25),			/*			    HI */
+   COSTS_N_INSNS (25),			/*			    SI */
+   COSTS_N_INSNS (25),			/*			    DI */
+   COSTS_N_INSNS (25)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  6,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  8,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  8,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  pentium_memcpy,
+  pentium_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static const
+struct processor_costs lakemont_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (11),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (11),			/*				 HI */
+   COSTS_N_INSNS (11),			/*				 SI */
+   COSTS_N_INSNS (11),			/*				 DI */
+   COSTS_N_INSNS (11)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (25),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (25),			/*			    HI */
+   COSTS_N_INSNS (25),			/*			    SI */
+   COSTS_N_INSNS (25),			/*			    DI */
+   COSTS_N_INSNS (25)},			/*			    other */
+  COSTS_N_INSNS (3),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  6,				     /* cost for loading QImode using movzbl */
+  {2, 4, 2},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 4, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  8,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 8, 16},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 8, 16},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  8,					/* size of l2 cache  */
+  0,					/* size of prefetch block */
+  0,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (3),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (39),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (70),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  pentium_memcpy,
+  pentium_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* PentiumPro has optimized rep instructions for blocks aligned by 8 bytes
+   (we ensure the alignment).  For small blocks inline loop is still a
+   noticeable win, for bigger blocks either rep movsl or rep movsb is
+   way to go.  Rep movsb has apparently more expensive startup time in CPU,
+   but after 4K the difference is down in the noise.  */
+static stringop_algs pentiumpro_memcpy[2] = {
+  {rep_prefix_4_byte, {{128, loop, false}, {1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, rep_prefix_1_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentiumpro_memset[2] = {
+  {rep_prefix_4_byte, {{1024, unrolled_loop, false},
+                       {8192, rep_prefix_4_byte, false},
+                       {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs pentiumpro_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (4)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (17),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (17),			/*			    HI */
+   COSTS_N_INSNS (17),			/*			    SI */
+   COSTS_N_INSNS (17),			/*			    DI */
+   COSTS_N_INSNS (17)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 2, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache  */
+  32,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (3),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (5),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  pentiumpro_memcpy,
+  pentiumpro_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs geode_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs geode_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs geode_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (2),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (7),			/*				 SI */
+   COSTS_N_INSNS (7),			/*				 DI */
+   COSTS_N_INSNS (7)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (15),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (23),			/*			    HI */
+   COSTS_N_INSNS (39),			/*			    SI */
+   COSTS_N_INSNS (39),			/*			    DI */
+   COSTS_N_INSNS (39)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  1,				     /* cost for loading QImode using movzbl */
+  {1, 1, 1},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {1, 1, 1},				/* cost of storing integer registers */
+  1,					/* cost of reg,reg fld/fst */
+  {1, 1, 1},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 6, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  128,					/* size of l2 cache.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (11),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (47),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (1),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (54),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  geode_memcpy,
+  geode_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs k6_memcpy[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs k6_memset[2] = {
+  {libcall, {{256, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs k6_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (3),			/*				 DI */
+   COSTS_N_INSNS (3)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (18),			/*			    HI */
+   COSTS_N_INSNS (18),			/*			    SI */
+   COSTS_N_INSNS (18),			/*			    DI */
+   COSTS_N_INSNS (18)},			/*			    other */
+  COSTS_N_INSNS (2),			/* cost of movsx */
+  COSTS_N_INSNS (2),			/* cost of movzx */
+  8,					/* "large" insn */
+  4,					/* MOVE_RATIO */
+  3,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {6, 6, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {2, 2, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  6,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  32,					/* size of l2 cache.  Some models
+					   have integrated l2 cache, but
+					   optimizing for k6 is not important
+					   enough to worry about that.  */
+  32,					/* size of prefetch block */
+  1,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (2),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (2),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (56),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  k6_memcpy,
+  k6_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* For some reason, Athlon deals better with REP prefix (relative to loops)
+   compared to K8. Alignment becomes important after 8 bytes for memcpy and
+   128 bytes for memset.  */
+static stringop_algs athlon_memcpy[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs athlon_memset[2] = {
+  {libcall, {{2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+static const
+struct processor_costs athlon_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (5),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (5),			/*				 HI */
+   COSTS_N_INSNS (5),			/*				 SI */
+   COSTS_N_INSNS (5),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  5,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (24),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  athlon_memcpy,
+  athlon_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* K8 has optimized REP instruction for medium sized blocks, but for very
+   small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs k8_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs k8_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs k8_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 3, 6},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  k8_memcpy,
+  k8_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  5,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  3,					/* vec_unalign_load_cost.  */
+  3,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+/* AMDFAM10 has optimized REP instruction for medium sized blocks, but for
+   very small blocks it is better to use loop. For large blocks, libcall can
+   do nontemporary accesses and beat inline considerably.  */
+static stringop_algs amdfam10_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs amdfam10_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs amdfam10_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+  					/* On K8:
+  					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  amdfam10_memcpy,
+  amdfam10_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER1 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  					/* On K8:
+					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  bdver1_memcpy,
+  bdver1_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  4,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER2 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+
+static stringop_algs bdver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+
+const struct processor_costs bdver2_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  					/* On K8:
+					    MOVD reg64, xmmreg Double FSTORE 4
+					    MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					    MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  bdver2_memcpy,
+  bdver2_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  4,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+
+  /*  BDVER3 has optimized REP instruction for medium sized blocks, but for
+      very small blocks it is better to use loop. For large blocks, libcall
+      can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver3_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver3_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs bdver3_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  bdver3_memcpy,
+  bdver3_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  4,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+/*  BDVER4 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop. For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs bdver4_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs bdver4_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+struct processor_costs bdver4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (4),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (4),			/*				 SI */
+   COSTS_N_INSNS (6),			/*				 DI */
+   COSTS_N_INSNS (6)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  2,					/* MMX or SSE register to integer */
+  16,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  bdver4_memcpy,
+  bdver4_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  4,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+
+/*  ZNVER1 has optimized REP instruction for medium sized blocks, but for
+    very small blocks it is better to use loop.  For large blocks, libcall
+    can do nontemporary accesses and beat inline considerably.  */
+static stringop_algs znver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+	     {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+	     {-1, libcall, false}}}};
+static stringop_algs znver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+	     {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+	     {-1, libcall, false}}}};
+struct processor_costs znver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction.  */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction.  */
+  COSTS_N_INSNS (1),			/* variable shift costs.  */
+  COSTS_N_INSNS (1),			/* constant shift costs.  */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI.  */
+   COSTS_N_INSNS (3),			/*				 HI.  */
+   COSTS_N_INSNS (3),			/*				 SI.  */
+   COSTS_N_INSNS (4),			/*				 DI.  */
+   COSTS_N_INSNS (4)},			/*			      other.  */
+  0,					/* cost of multiply per each bit
+					    set.  */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI.  */
+   COSTS_N_INSNS (35),			/*			    HI.  */
+   COSTS_N_INSNS (51),			/*			    SI.  */
+   COSTS_N_INSNS (83),			/*			    DI.  */
+   COSTS_N_INSNS (83)},			/*			    other.  */
+  COSTS_N_INSNS (1),			/* cost of movsx.  */
+  COSTS_N_INSNS (1),			/* cost of movzx.  */
+  8,					/* "large" insn.  */
+  9,					/* MOVE_RATIO.  */
+  4,					/* cost for loading QImode using
+					   movzbl.  */
+  {5, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer
+					   registers.  */
+  2,					/* cost of reg,reg fld/fst.  */
+  {5, 5, 12},				/* cost of loading fp registers
+		   			   in SFmode, DFmode and XFmode.  */
+  {4, 4, 8},				/* cost of storing fp registers
+ 		   			   in SFmode, DFmode and XFmode.  */
+  2,					/* cost of moving MMX register.  */
+  {4, 4},				/* cost of loading MMX registers
+					   in SImode and DImode.  */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode.  */
+  2,					/* cost of moving SSE register.  */
+  {4, 4, 4},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode.  */
+  {4, 4, 4},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode.  */
+  2,					/* MMX or SSE register to integer.  */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block.  */
+  /* New AMD processors never drop prefetches; if they cannot be performed
+     immediately, they are queued.  We set number of simultaneous prefetches
+     to a large constant to reflect this (it probably is not a good idea not
+     to limit number of prefetches at all, as their execution also takes some
+     time).  */
+  100,					/* number of parallel prefetches.  */
+  3,					/* Branch cost.  */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (6),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (42),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (52),			/* cost of FSQRT instruction.  */
+  /* Zen can execute 4 integer operations per cycle. FP operations take 3 cycles
+     and it can execute 2 integer additions and 2 multiplications thus
+     reassociation may make sense up to with of 6.  SPEC2k6 bencharks suggests
+     that 4 works better than 6 probably due to register pressure.
+
+     Integer vector operations are taken by FP unit and execute 3 vector
+     plus/minus operations per cycle but only one multiply.  This is adjusted
+     in ix86_reassociation_width.  */
+  4, 4, 3, 6,				/* reassoc int, fp, vec_int, vec_fp.  */
+  znver1_memcpy,
+  znver1_memset,
+  6,					/* scalar_stmt_cost.  */
+  4,					/* scalar load_cost.  */
+  4,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  4,					/* vec_align_load_cost.  */
+  4,					/* vec_unalign_load_cost.  */
+  4,					/* vec_store_cost.  */
+  4,					/* cond_taken_branch_cost.  */
+  2,					/* cond_not_taken_branch_cost.  */
+};
+
+  /* BTVER1 has optimized REP instruction for medium sized blocks, but for
+     very small blocks it is better to use loop. For large blocks, libcall can
+     do nontemporary accesses and beat inline considerably.  */
+static stringop_algs btver1_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver1_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver1_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+					/* On K8:
+					   MOVD reg64, xmmreg Double FSTORE 4
+					   MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					   MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  btver1_memcpy,
+  btver1_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs btver2_memcpy[2] = {
+  {libcall, {{6, loop, false}, {14, unrolled_loop, false},
+             {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{16, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs btver2_memset[2] = {
+  {libcall, {{8, loop, false}, {24, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{48, unrolled_loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+const struct processor_costs btver2_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (2),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (5)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (19),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (35),			/*			    HI */
+   COSTS_N_INSNS (51),			/*			    SI */
+   COSTS_N_INSNS (83),			/*			    DI */
+   COSTS_N_INSNS (83)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  9,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {3, 4, 3},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {3, 4, 3},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {4, 4, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {3, 3},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {4, 4},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {4, 4, 3},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {4, 4, 5},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  3,					/* MMX or SSE register to integer */
+					/* On K8:
+					   MOVD reg64, xmmreg Double FSTORE 4
+					   MOVD reg32, xmmreg Double FSTORE 4
+					   On AMDFAM10:
+					   MOVD reg64, xmmreg Double FADD 3
+							       1/1  1/1
+					    MOVD reg32, xmmreg Double FADD 3
+							       1/1  1/1 */
+  32,					/* size of l1 cache.  */
+  2048,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  100,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (4),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (4),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (19),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (35),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  btver2_memcpy,
+  btver2_memset,
+  4,					/* scalar_stmt_cost.  */
+  2,					/* scalar load_cost.  */
+  2,					/* scalar_store_cost.  */
+  6,					/* vec_stmt_cost.  */
+  0,					/* vec_to_scalar_cost.  */
+  2,					/* scalar_to_vec_cost.  */
+  2,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  2,					/* vec_store_cost.  */
+  2,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs pentium4_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  DUMMY_STRINGOP_ALGS};
+static stringop_algs pentium4_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  DUMMY_STRINGOP_ALGS};
+
+static const
+struct processor_costs pentium4_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (3),			/* cost of a lea instruction */
+  COSTS_N_INSNS (4),			/* variable shift costs */
+  COSTS_N_INSNS (4),			/* constant shift costs */
+  {COSTS_N_INSNS (15),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (15),			/*				 HI */
+   COSTS_N_INSNS (15),			/*				 SI */
+   COSTS_N_INSNS (15),			/*				 DI */
+   COSTS_N_INSNS (15)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (56),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (56),			/*			    HI */
+   COSTS_N_INSNS (56),			/*			    SI */
+   COSTS_N_INSNS (56),			/*			    DI */
+   COSTS_N_INSNS (56)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  6,					/* MOVE_RATIO */
+  2,				     /* cost for loading QImode using movzbl */
+  {4, 5, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {2, 3, 2},				/* cost of storing integer registers */
+  2,					/* cost of reg,reg fld/fst */
+  {2, 2, 6},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 6},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {2, 2},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {2, 2},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  12,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {2, 2, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  10,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  2,					/* Branch cost */
+  COSTS_N_INSNS (5),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (7),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (2),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (43),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  pentium4_memcpy,
+  pentium4_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs nocona_memcpy[2] = {
+  {libcall, {{12, loop_1_byte, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {20000, rep_prefix_8_byte, false},
+             {100000, unrolled_loop, false}, {-1, libcall, false}}}};
+
+static stringop_algs nocona_memset[2] = {
+  {libcall, {{6, loop_1_byte, false}, {48, loop, false},
+             {20480, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {64, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+
+static const
+struct processor_costs nocona_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1),			/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (10),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (10),			/*				 HI */
+   COSTS_N_INSNS (10),			/*				 SI */
+   COSTS_N_INSNS (10),			/*				 DI */
+   COSTS_N_INSNS (10)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (66),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (66),			/*			    HI */
+   COSTS_N_INSNS (66),			/*			    SI */
+   COSTS_N_INSNS (66),			/*			    DI */
+   COSTS_N_INSNS (66)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  16,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  3,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {4, 4, 4},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  6,					/* cost of moving MMX register */
+  {12, 12},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {12, 12},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  6,					/* cost of moving SSE register */
+  {12, 12, 12},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {12, 12, 12},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  8,					/* MMX or SSE register to integer */
+  8,					/* size of l1 cache.  */
+  1024,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  8,					/* number of parallel prefetches */
+  1,					/* Branch cost */
+  COSTS_N_INSNS (6),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (3),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (44),			/* cost of FSQRT instruction.  */
+  1, 1, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  nocona_memcpy,
+  nocona_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs atom_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs atom_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs atom_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  2, 2, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  atom_memcpy,
+  atom_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs slm_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs slm_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs slm_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  slm_memcpy,
+  slm_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  4,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+static stringop_algs intel_memcpy[2] = {
+  {libcall, {{11, loop, false}, {-1, rep_prefix_4_byte, false}}},
+  {libcall, {{32, loop, false}, {64, rep_prefix_4_byte, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static stringop_algs intel_memset[2] = {
+  {libcall, {{8, loop, false}, {15, unrolled_loop, false},
+             {2048, rep_prefix_4_byte, false}, {-1, libcall, false}}},
+  {libcall, {{24, loop, false}, {32, unrolled_loop, false},
+             {8192, rep_prefix_8_byte, false}, {-1, libcall, false}}}};
+static const
+struct processor_costs intel_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (3),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,					/* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  256,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  1, 4, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  intel_memcpy,
+  intel_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  4,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* Generic should produce code tuned for Core-i7 (and newer chips)
+   and btver1 (and newer chips).  */
+
+static stringop_algs generic_memcpy[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static stringop_algs generic_memset[2] = {
+  {libcall, {{32, loop, false}, {8192, rep_prefix_4_byte, false},
+             {-1, libcall, false}}},
+  {libcall, {{32, loop, false}, {8192, rep_prefix_8_byte, false},
+             {-1, libcall, false}}}};
+static const
+struct processor_costs generic_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  32,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* Benchmarks shows large regressions on K8 sixtrack benchmark when this
+     value is increased to perhaps more appropriate value of 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  1, 2, 1, 1,				/* reassoc int, fp, vec_int, vec_fp.  */
+  generic_memcpy,
+  generic_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
+/* core_cost should produce code tuned for Core familly of CPUs.  */
+static stringop_algs core_memcpy[2] = {
+  {libcall, {{1024, rep_prefix_4_byte, true}, {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {128, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+static stringop_algs core_memset[2] = {
+  {libcall, {{6, loop_1_byte, true},
+             {24, loop, true},
+             {8192, rep_prefix_4_byte, true},
+             {-1, libcall, false}}},
+  {libcall, {{24, loop, true}, {512, rep_prefix_8_byte, true},
+             {-1, libcall, false}}}};
+
+static const
+struct processor_costs core_cost = {
+  COSTS_N_INSNS (1),			/* cost of an add instruction */
+  /* On all chips taken into consideration lea is 2 cycles and more.  With
+     this cost however our current implementation of synth_mult results in
+     use of unnecessary temporary registers causing regression on several
+     SPECfp benchmarks.  */
+  COSTS_N_INSNS (1) + 1,		/* cost of a lea instruction */
+  COSTS_N_INSNS (1),			/* variable shift costs */
+  COSTS_N_INSNS (1),			/* constant shift costs */
+  {COSTS_N_INSNS (3),			/* cost of starting multiply for QI */
+   COSTS_N_INSNS (4),			/*				 HI */
+   COSTS_N_INSNS (3),			/*				 SI */
+   COSTS_N_INSNS (4),			/*				 DI */
+   COSTS_N_INSNS (2)},			/*			      other */
+  0,					/* cost of multiply per each bit set */
+  {COSTS_N_INSNS (18),			/* cost of a divide/mod for QI */
+   COSTS_N_INSNS (26),			/*			    HI */
+   COSTS_N_INSNS (42),			/*			    SI */
+   COSTS_N_INSNS (74),			/*			    DI */
+   COSTS_N_INSNS (74)},			/*			    other */
+  COSTS_N_INSNS (1),			/* cost of movsx */
+  COSTS_N_INSNS (1),			/* cost of movzx */
+  8,					/* "large" insn */
+  17,					/* MOVE_RATIO */
+  4,				     /* cost for loading QImode using movzbl */
+  {4, 4, 4},				/* cost of loading integer registers
+					   in QImode, HImode and SImode.
+					   Relative to reg-reg move (2).  */
+  {4, 4, 4},				/* cost of storing integer registers */
+  4,					/* cost of reg,reg fld/fst */
+  {12, 12, 12},				/* cost of loading fp registers
+					   in SFmode, DFmode and XFmode */
+  {6, 6, 8},				/* cost of storing fp registers
+					   in SFmode, DFmode and XFmode */
+  2,					/* cost of moving MMX register */
+  {8, 8},				/* cost of loading MMX registers
+					   in SImode and DImode */
+  {8, 8},				/* cost of storing MMX registers
+					   in SImode and DImode */
+  2,					/* cost of moving SSE register */
+  {8, 8, 8},				/* cost of loading SSE registers
+					   in SImode, DImode and TImode */
+  {8, 8, 8},				/* cost of storing SSE registers
+					   in SImode, DImode and TImode */
+  5,					/* MMX or SSE register to integer */
+  64,					/* size of l1 cache.  */
+  512,					/* size of l2 cache.  */
+  64,					/* size of prefetch block */
+  6,					/* number of parallel prefetches */
+  /* FIXME perhaps more appropriate value is 5.  */
+  3,					/* Branch cost */
+  COSTS_N_INSNS (8),			/* cost of FADD and FSUB insns.  */
+  COSTS_N_INSNS (8),			/* cost of FMUL instruction.  */
+  COSTS_N_INSNS (20),			/* cost of FDIV instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FABS instruction.  */
+  COSTS_N_INSNS (8),			/* cost of FCHS instruction.  */
+  COSTS_N_INSNS (40),			/* cost of FSQRT instruction.  */
+  1, 4, 2, 2,				/* reassoc int, fp, vec_int, vec_fp.  */
+  core_memcpy,
+  core_memset,
+  1,					/* scalar_stmt_cost.  */
+  1,					/* scalar load_cost.  */
+  1,					/* scalar_store_cost.  */
+  1,					/* vec_stmt_cost.  */
+  1,					/* vec_to_scalar_cost.  */
+  1,					/* scalar_to_vec_cost.  */
+  1,					/* vec_align_load_cost.  */
+  2,					/* vec_unalign_load_cost.  */
+  1,					/* vec_store_cost.  */
+  3,					/* cond_taken_branch_cost.  */
+  1,					/* cond_not_taken_branch_cost.  */
+};
+
diff --git a/gcc/config/i386/x86-tune-sched-atom.c b/gcc/config/i386/x86-tune-sched-atom.c
new file mode 100644
index 00000000000..9e9f253d5c3
--- /dev/null
+++ b/gcc/config/i386/x86-tune-sched-atom.c
@@ -0,0 +1,246 @@
+/* Scheduler hooks for IA-32 which implement atom+ specific logic.
+   Copyright (C) 1988-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "target.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "sched-int.h"
+
+/* Try to reorder ready list to take advantage of Atom pipelined IMUL
+   execution. It is applied if
+   (1) IMUL instruction is on the top of list;
+   (2) There exists the only producer of independent IMUL instruction in
+       ready list.
+   Return index of IMUL producer if it was found and -1 otherwise.  */
+static int
+do_reorder_for_imul (rtx_insn **ready, int n_ready)
+{
+  rtx_insn *insn;
+  rtx set, insn1, insn2;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int index = -1;
+  int i;
+
+  if (!TARGET_BONNELL)
+    return index;
+
+  /* Check that IMUL instruction is on the top of ready list.  */
+  insn = ready[n_ready - 1];
+  set = single_set (insn);
+  if (!set)
+    return index;
+  if (!(GET_CODE (SET_SRC (set)) == MULT
+      && GET_MODE (SET_SRC (set)) == SImode))
+    return index;
+
+  /* Search for producer of independent IMUL instruction.  */
+  for (i = n_ready - 2; i >= 0; i--)
+    {
+      insn = ready[i];
+      if (!NONDEBUG_INSN_P (insn))
+	continue;
+      /* Skip IMUL instruction.  */
+      insn2 = PATTERN (insn);
+      if (GET_CODE (insn2) == PARALLEL)
+	insn2 = XVECEXP (insn2, 0, 0);
+      if (GET_CODE (insn2) == SET
+	  && GET_CODE (SET_SRC (insn2)) == MULT
+	  && GET_MODE (SET_SRC (insn2)) == SImode)
+	continue;
+
+      FOR_EACH_DEP (insn, SD_LIST_FORW, sd_it, dep)
+	{
+	  rtx con;
+	  con = DEP_CON (dep);
+	  if (!NONDEBUG_INSN_P (con))
+	    continue;
+	  insn1 = PATTERN (con);
+	  if (GET_CODE (insn1) == PARALLEL)
+	    insn1 = XVECEXP (insn1, 0, 0);
+
+	  if (GET_CODE (insn1) == SET
+	      && GET_CODE (SET_SRC (insn1)) == MULT
+	      && GET_MODE (SET_SRC (insn1)) == SImode)
+	    {
+	      sd_iterator_def sd_it1;
+	      dep_t dep1;
+	      /* Check if there is no other dependee for IMUL.  */
+	      index = i;
+	      FOR_EACH_DEP (con, SD_LIST_BACK, sd_it1, dep1)
+		{
+		  rtx pro;
+		  pro = DEP_PRO (dep1);
+		  if (!NONDEBUG_INSN_P (pro))
+		    continue;
+		  if (pro != insn)
+		    index = -1;
+		}
+	      if (index >= 0)
+		break;
+	    }
+	}
+      if (index >= 0)
+	break;
+    }
+  return index;
+}
+
+/* Try to find the best candidate on the top of ready list if two insns
+   have the same priority - candidate is best if its dependees were
+   scheduled earlier. Applied for Silvermont only.
+   Return true if top 2 insns must be interchanged.  */
+static bool
+swap_top_of_ready_list (rtx_insn **ready, int n_ready)
+{
+  rtx_insn *top = ready[n_ready - 1];
+  rtx_insn *next = ready[n_ready - 2];
+  rtx set;
+  sd_iterator_def sd_it;
+  dep_t dep;
+  int clock1 = -1;
+  int clock2 = -1;
+  #define INSN_TICK(INSN) (HID (INSN)->tick)
+
+  if (!TARGET_SILVERMONT && !TARGET_INTEL)
+    return false;
+
+  if (!NONDEBUG_INSN_P (top))
+    return false;
+  if (!NONJUMP_INSN_P (top))
+    return false;
+  if (!NONDEBUG_INSN_P (next))
+    return false;
+  if (!NONJUMP_INSN_P (next))
+    return false;
+  set = single_set (top);
+  if (!set)
+    return false;
+  set = single_set (next);
+  if (!set)
+    return false;
+
+  if (INSN_PRIORITY_KNOWN (top) && INSN_PRIORITY_KNOWN (next))
+    {
+      if (INSN_PRIORITY (top) != INSN_PRIORITY (next))
+	return false;
+      /* Determine winner more precise.  */
+      FOR_EACH_DEP (top, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock1)
+	    clock1 = INSN_TICK (pro);
+	}
+      FOR_EACH_DEP (next, SD_LIST_RES_BACK, sd_it, dep)
+	{
+	  rtx pro;
+	  pro = DEP_PRO (dep);
+	  if (!NONDEBUG_INSN_P (pro))
+	    continue;
+	  if (INSN_TICK (pro) > clock2)
+	    clock2 = INSN_TICK (pro);
+	}
+
+      if (clock1 == clock2)
+	{
+	  /* Determine winner - load must win. */
+	  enum attr_memory memory1, memory2;
+	  memory1 = get_attr_memory (top);
+	  memory2 = get_attr_memory (next);
+	  if (memory2 == MEMORY_LOAD && memory1 != MEMORY_LOAD)
+	    return true;
+	}
+	return (bool) (clock2 < clock1);
+    }
+  return false;
+  #undef INSN_TICK
+}
+
+/* Perform possible reodering of ready list for Atom/Silvermont only.
+   Return issue rate.  */
+int
+ix86_atom_sched_reorder (FILE *dump, int sched_verbose, rtx_insn **ready,
+		         int *pn_ready, int clock_var)
+{
+  int issue_rate = -1;
+  int n_ready = *pn_ready;
+  int i;
+  rtx_insn *insn;
+  int index = -1;
+
+  /* Set up issue rate.  */
+  issue_rate = ix86_issue_rate ();
+
+  /* Do reodering for BONNELL/SILVERMONT only.  */
+  if (!TARGET_BONNELL && !TARGET_SILVERMONT && !TARGET_INTEL)
+    return issue_rate;
+
+  /* Nothing to do if ready list contains only 1 instruction.  */
+  if (n_ready <= 1)
+    return issue_rate;
+
+  /* Do reodering for post-reload scheduler only.  */
+  if (!reload_completed)
+    return issue_rate;
+
+  if ((index = do_reorder_for_imul (ready, n_ready)) >= 0)
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tatom sched_reorder: put %d insn on top\n",
+		 INSN_UID (ready[index]));
+
+      /* Put IMUL producer (ready[index]) at the top of ready list.  */
+      insn = ready[index];
+      for (i = index; i < n_ready - 1; i++)
+	ready[i] = ready[i + 1];
+      ready[n_ready - 1] = insn;
+      return issue_rate;
+    }
+
+  /* Skip selective scheduling since HID is not populated in it.  */
+  if (clock_var != 0
+      && !sel_sched_p ()
+      && swap_top_of_ready_list (ready, n_ready))
+    {
+      if (sched_verbose > 1)
+	fprintf (dump, ";;\tslm sched_reorder: swap %d and %d insns\n",
+		 INSN_UID (ready[n_ready - 1]), INSN_UID (ready[n_ready - 2]));
+      /* Swap 2 top elements of ready list.  */
+      insn = ready[n_ready - 1];
+      ready[n_ready - 1] = ready[n_ready - 2];
+      ready[n_ready - 2] = insn;
+    }
+  return issue_rate;
+}
diff --git a/gcc/config/i386/x86-tune-sched-bd.c b/gcc/config/i386/x86-tune-sched-bd.c
new file mode 100644
index 00000000000..0045c631bbc
--- /dev/null
+++ b/gcc/config/i386/x86-tune-sched-bd.c
@@ -0,0 +1,824 @@
+/* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
+   Copyright (C) 1988-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "target.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "sched-int.h"
+
+/* The size of the dispatch window is the total number of bytes of
+   object code allowed in a window.  */
+#define DISPATCH_WINDOW_SIZE 16
+
+/* Number of dispatch windows considered for scheduling.  */
+#define MAX_DISPATCH_WINDOWS 3
+
+/* Maximum number of instructions in a window.  */
+#define MAX_INSN 4
+
+/* Maximum number of immediate operands in a window.  */
+#define MAX_IMM 4
+
+/* Maximum number of immediate bits allowed in a window.  */
+#define MAX_IMM_SIZE 128
+
+/* Maximum number of 32 bit immediates allowed in a window.  */
+#define MAX_IMM_32 4
+
+/* Maximum number of 64 bit immediates allowed in a window.  */
+#define MAX_IMM_64 2
+
+/* Maximum total of loads or prefetches allowed in a window.  */
+#define MAX_LOAD 2
+
+/* Maximum total of stores allowed in a window.  */
+#define MAX_STORE 1
+
+#undef BIG
+#define BIG 100
+
+
+/* Dispatch groups.  Istructions that affect the mix in a dispatch window.  */
+enum dispatch_group {
+  disp_no_group = 0,
+  disp_load,
+  disp_store,
+  disp_load_store,
+  disp_prefetch,
+  disp_imm,
+  disp_imm_32,
+  disp_imm_64,
+  disp_branch,
+  disp_cmp,
+  disp_jcc,
+  disp_last
+};
+
+/* Number of allowable groups in a dispatch window.  It is an array
+   indexed by dispatch_group enum.  100 is used as a big number,
+   because the number of these kind of operations does not have any
+   effect in dispatch window, but we need them for other reasons in
+   the table.  */
+static unsigned int num_allowable_groups[disp_last] = {
+  0, 2, 1, 1, 2, 4, 4, 2, 1, BIG, BIG
+};
+
+char group_name[disp_last + 1][16] = {
+  "disp_no_group", "disp_load", "disp_store", "disp_load_store",
+  "disp_prefetch", "disp_imm", "disp_imm_32", "disp_imm_64",
+  "disp_branch", "disp_cmp", "disp_jcc", "disp_last"
+};
+
+/* Instruction path.  */
+enum insn_path {
+  no_path = 0,
+  path_single, /* Single micro op.  */
+  path_double, /* Double micro op.  */
+  path_multi,  /* Instructions with more than 2 micro op..  */
+  last_path
+};
+
+/* sched_insn_info defines a window to the instructions scheduled in
+   the basic block.  It contains a pointer to the insn_info table and
+   the instruction scheduled.
+
+   Windows are allocated for each basic block and are linked
+   together.  */
+typedef struct sched_insn_info_s {
+  rtx insn;
+  enum dispatch_group group;
+  enum insn_path path;
+  int byte_len;
+  int imm_bytes;
+} sched_insn_info;
+
+/* Linked list of dispatch windows.  This is a two way list of
+   dispatch windows of a basic block.  It contains information about
+   the number of uops in the window and the total number of
+   instructions and of bytes in the object code for this dispatch
+   window.  */
+typedef struct dispatch_windows_s {
+  int num_insn;            /* Number of insn in the window.  */
+  int num_uops;            /* Number of uops in the window.  */
+  int window_size;         /* Number of bytes in the window.  */
+  int window_num;          /* Window number between 0 or 1.  */
+  int num_imm;             /* Number of immediates in an insn.  */
+  int num_imm_32;          /* Number of 32 bit immediates in an insn.  */
+  int num_imm_64;          /* Number of 64 bit immediates in an insn.  */
+  int imm_size;            /* Total immediates in the window.  */
+  int num_loads;           /* Total memory loads in the window.  */
+  int num_stores;          /* Total memory stores in the window.  */
+  int violation;          /* Violation exists in window.  */
+  sched_insn_info *window; /* Pointer to the window.  */
+  struct dispatch_windows_s *next;
+  struct dispatch_windows_s *prev;
+} dispatch_windows;
+
+/* Immediate valuse used in an insn.  */
+typedef struct imm_info_s
+  {
+    int imm;
+    int imm32;
+    int imm64;
+  } imm_info;
+
+static dispatch_windows *dispatch_window_list;
+static dispatch_windows *dispatch_window_list1;
+
+/* Get dispatch group of insn.  */
+
+static enum dispatch_group
+get_mem_group (rtx_insn *insn)
+{
+  enum attr_memory memory;
+
+  if (INSN_CODE (insn) < 0)
+    return disp_no_group;
+  memory = get_attr_memory (insn);
+  if (memory == MEMORY_STORE)
+    return disp_store;
+
+  if (memory == MEMORY_LOAD)
+    return disp_load;
+
+  if (memory == MEMORY_BOTH)
+    return disp_load_store;
+
+  return disp_no_group;
+}
+
+/* Return true if insn is a compare instruction.  */
+
+static bool
+is_cmp (rtx_insn *insn)
+{
+  enum attr_type type;
+
+  type = get_attr_type (insn);
+  return (type == TYPE_TEST
+	  || type == TYPE_ICMP
+	  || type == TYPE_FCMP
+	  || GET_CODE (PATTERN (insn)) == COMPARE);
+}
+
+/* Return true if a dispatch violation encountered.  */
+
+static bool
+dispatch_violation (void)
+{
+  if (dispatch_window_list->next)
+    return dispatch_window_list->next->violation;
+  return dispatch_window_list->violation;
+}
+
+/* Return true if insn is a branch instruction.  */
+
+static bool
+is_branch (rtx_insn *insn)
+{
+  return (CALL_P (insn) || JUMP_P (insn));
+}
+
+/* Return true if insn is a prefetch instruction.  */
+
+static bool
+is_prefetch (rtx_insn *insn)
+{
+  return NONJUMP_INSN_P (insn) && GET_CODE (PATTERN (insn)) == PREFETCH;
+}
+
+/* This function initializes a dispatch window and the list container holding a
+   pointer to the window.  */
+
+static void
+init_window (int window_num)
+{
+  int i;
+  dispatch_windows *new_list;
+
+  if (window_num == 0)
+    new_list = dispatch_window_list;
+  else
+    new_list = dispatch_window_list1;
+
+  new_list->num_insn = 0;
+  new_list->num_uops = 0;
+  new_list->window_size = 0;
+  new_list->next = NULL;
+  new_list->prev = NULL;
+  new_list->window_num = window_num;
+  new_list->num_imm = 0;
+  new_list->num_imm_32 = 0;
+  new_list->num_imm_64 = 0;
+  new_list->imm_size = 0;
+  new_list->num_loads = 0;
+  new_list->num_stores = 0;
+  new_list->violation = false;
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      new_list->window[i].insn = NULL;
+      new_list->window[i].group = disp_no_group;
+      new_list->window[i].path = no_path;
+      new_list->window[i].byte_len = 0;
+      new_list->window[i].imm_bytes = 0;
+    }
+  return;
+}
+
+/* This function allocates and initializes a dispatch window and the
+   list container holding a pointer to the window.  */
+
+static dispatch_windows *
+allocate_window (void)
+{
+  dispatch_windows *new_list = XNEW (struct dispatch_windows_s);
+  new_list->window = XNEWVEC (struct sched_insn_info_s, MAX_INSN + 1);
+
+  return new_list;
+}
+
+/* This routine initializes the dispatch scheduling information.  It
+   initiates building dispatch scheduler tables and constructs the
+   first dispatch window.  */
+
+static void
+init_dispatch_sched (void)
+{
+  /* Allocate a dispatch list and a window.  */
+  dispatch_window_list = allocate_window ();
+  dispatch_window_list1 = allocate_window ();
+  init_window (0);
+  init_window (1);
+}
+
+/* This function returns true if a branch is detected.  End of a basic block
+   does not have to be a branch, but here we assume only branches end a
+   window.  */
+
+static bool
+is_end_basic_block (enum dispatch_group group)
+{
+  return group == disp_branch;
+}
+
+/* This function is called when the end of a window processing is reached.  */
+
+static void
+process_end_window (void)
+{
+  gcc_assert (dispatch_window_list->num_insn <= MAX_INSN);
+  if (dispatch_window_list->next)
+    {
+      gcc_assert (dispatch_window_list1->num_insn <= MAX_INSN);
+      gcc_assert (dispatch_window_list->window_size
+		  + dispatch_window_list1->window_size <= 48);
+      init_window (1);
+    }
+  init_window (0);
+}
+
+/* Allocates a new dispatch window and adds it to WINDOW_LIST.
+   WINDOW_NUM is either 0 or 1.  A maximum of two windows are generated
+   for 48 bytes of instructions.  Note that these windows are not dispatch
+   windows that their sizes are DISPATCH_WINDOW_SIZE.  */
+
+static dispatch_windows *
+allocate_next_window (int window_num)
+{
+  if (window_num == 0)
+    {
+      if (dispatch_window_list->next)
+	  init_window (1);
+      init_window (0);
+      return dispatch_window_list;
+    }
+
+  dispatch_window_list->next = dispatch_window_list1;
+  dispatch_window_list1->prev = dispatch_window_list;
+
+  return dispatch_window_list1;
+}
+
+/* Compute number of immediate operands of an instruction.  */
+
+static void
+find_constant (rtx in_rtx, imm_info *imm_values)
+{
+  if (INSN_P (in_rtx))
+    in_rtx = PATTERN (in_rtx);
+  subrtx_iterator::array_type array;
+  FOR_EACH_SUBRTX (iter, array, in_rtx, ALL)
+    if (const_rtx x = *iter)
+      switch (GET_CODE (x))
+	{
+	case CONST:
+	case SYMBOL_REF:
+	case CONST_INT:
+	  (imm_values->imm)++;
+	  if (x86_64_immediate_operand (CONST_CAST_RTX (x), SImode))
+	    (imm_values->imm32)++;
+	  else
+	    (imm_values->imm64)++;
+	  break;
+
+	case CONST_DOUBLE:
+	case CONST_WIDE_INT:
+	  (imm_values->imm)++;
+	  (imm_values->imm64)++;
+	  break;
+
+	case CODE_LABEL:
+	  if (LABEL_KIND (x) == LABEL_NORMAL)
+	    {
+	      (imm_values->imm)++;
+	      (imm_values->imm32)++;
+	    }
+	  break;
+
+	default:
+	  break;
+	}
+}
+
+/* Return total size of immediate operands of an instruction along with number
+   of corresponding immediate-operands.  It initializes its parameters to zero
+   befor calling FIND_CONSTANT.
+   INSN is the input instruction.  IMM is the total of immediates.
+   IMM32 is the number of 32 bit immediates.  IMM64 is the number of 64
+   bit immediates.  */
+
+static int
+get_num_immediates (rtx_insn *insn, int *imm, int *imm32, int *imm64)
+{
+  imm_info imm_values = {0, 0, 0};
+
+  find_constant (insn, &imm_values);
+  *imm = imm_values.imm;
+  *imm32 = imm_values.imm32;
+  *imm64 = imm_values.imm64;
+  return imm_values.imm32 * 4 + imm_values.imm64 * 8;
+}
+
+/* This function indicates if an operand of an instruction is an
+   immediate.  */
+
+static bool
+has_immediate (rtx_insn *insn)
+{
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (insn)
+    return get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			       &num_imm64_operand);
+  return false;
+}
+
+/* Return single or double path for instructions.  */
+
+static enum insn_path
+get_insn_path (rtx_insn *insn)
+{
+  enum attr_amdfam10_decode path = get_attr_amdfam10_decode (insn);
+
+  if ((int)path == 0)
+    return path_single;
+
+  if ((int)path == 1)
+    return path_double;
+
+  return path_multi;
+}
+
+/* Return insn dispatch group.  */
+
+static enum dispatch_group
+get_insn_group (rtx_insn *insn)
+{
+  enum dispatch_group group = get_mem_group (insn);
+  if (group)
+    return group;
+
+  if (is_branch (insn))
+    return disp_branch;
+
+  if (is_cmp (insn))
+    return disp_cmp;
+
+  if (has_immediate (insn))
+    return disp_imm;
+
+  if (is_prefetch (insn))
+    return disp_prefetch;
+
+  return disp_no_group;
+}
+
+/* Count number of GROUP restricted instructions in a dispatch
+   window WINDOW_LIST.  */
+
+static int
+count_num_restricted (rtx_insn *insn, dispatch_windows *window_list)
+{
+  enum dispatch_group group = get_insn_group (insn);
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (group == disp_no_group)
+    return 0;
+
+  if (group == disp_imm)
+    {
+      imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+			      &num_imm64_operand);
+      if (window_list->imm_size + imm_size > MAX_IMM_SIZE
+	  || num_imm_operand + window_list->num_imm > MAX_IMM
+	  || (num_imm32_operand > 0
+	      && (window_list->num_imm_32 + num_imm32_operand > MAX_IMM_32
+		  || window_list->num_imm_64 * 2 + num_imm32_operand > MAX_IMM_32))
+	  || (num_imm64_operand > 0
+	      && (window_list->num_imm_64 + num_imm64_operand > MAX_IMM_64
+		  || window_list->num_imm_32 + num_imm64_operand * 2 > MAX_IMM_32))
+	  || (window_list->imm_size + imm_size == MAX_IMM_SIZE
+	      && num_imm64_operand > 0
+	      && ((window_list->num_imm_64 > 0
+		   && window_list->num_insn >= 2)
+		  || window_list->num_insn >= 3)))
+	return BIG;
+
+      return 1;
+    }
+
+  if ((group == disp_load_store
+       && (window_list->num_loads >= MAX_LOAD
+	   || window_list->num_stores >= MAX_STORE))
+      || ((group == disp_load
+	   || group == disp_prefetch)
+	  && window_list->num_loads >= MAX_LOAD)
+      || (group == disp_store
+	  && window_list->num_stores >= MAX_STORE))
+    return BIG;
+
+  return 1;
+}
+
+/* This function returns true if insn satisfies dispatch rules on the
+   last window scheduled.  */
+
+static bool
+fits_dispatch_window (rtx_insn *insn)
+{
+  dispatch_windows *window_list = dispatch_window_list;
+  dispatch_windows *window_list_next = dispatch_window_list->next;
+  unsigned int num_restrict;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int sum;
+
+  /* Make disp_cmp and disp_jcc get scheduled at the latest.  These
+     instructions should be given the lowest priority in the
+     scheduling process in Haifa scheduler to make sure they will be
+     scheduled in the same dispatch window as the reference to them.  */
+  if (group == disp_jcc || group == disp_cmp)
+    return false;
+
+  /* Check nonrestricted.  */
+  if (group == disp_no_group || group == disp_branch)
+    return true;
+
+  /* Get last dispatch window.  */
+  if (window_list_next)
+    window_list = window_list_next;
+
+  if (window_list->window_num == 1)
+    {
+      sum = window_list->prev->window_size + window_list->window_size;
+
+      if (sum == 32
+	  || (ix86_min_insn_size (insn) + sum) >= 48)
+	/* Window 1 is full.  Go for next window.  */
+	return true;
+    }
+
+  num_restrict = count_num_restricted (insn, window_list);
+
+  if (num_restrict > num_allowable_groups[group])
+    return false;
+
+  /* See if it fits in the first window.  */
+  if (window_list->window_num == 0)
+    {
+      /* The first widow should have only single and double path
+	 uops.  */
+      if (path == path_double
+	  && (window_list->num_uops + 2) > MAX_INSN)
+	return false;
+      else if (path != path_single)
+        return false;
+    }
+  return true;
+}
+
+/* Add an instruction INSN with NUM_UOPS micro-operations to the
+   dispatch window WINDOW_LIST.  */
+
+static void
+add_insn_window (rtx_insn *insn, dispatch_windows *window_list, int num_uops)
+{
+  int byte_len = ix86_min_insn_size (insn);
+  int num_insn = window_list->num_insn;
+  int imm_size;
+  sched_insn_info *window = window_list->window;
+  enum dispatch_group group = get_insn_group (insn);
+  enum insn_path path = get_insn_path (insn);
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (!window_list->violation && group != disp_cmp
+      && !fits_dispatch_window (insn))
+    window_list->violation = true;
+
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  /* Initialize window with new instruction.  */
+  window[num_insn].insn = insn;
+  window[num_insn].byte_len = byte_len;
+  window[num_insn].group = group;
+  window[num_insn].path = path;
+  window[num_insn].imm_bytes = imm_size;
+
+  window_list->window_size += byte_len;
+  window_list->num_insn = num_insn + 1;
+  window_list->num_uops = window_list->num_uops + num_uops;
+  window_list->imm_size += imm_size;
+  window_list->num_imm += num_imm_operand;
+  window_list->num_imm_32 += num_imm32_operand;
+  window_list->num_imm_64 += num_imm64_operand;
+
+  if (group == disp_store)
+    window_list->num_stores += 1;
+  else if (group == disp_load
+	   || group == disp_prefetch)
+    window_list->num_loads += 1;
+  else if (group == disp_load_store)
+    {
+      window_list->num_stores += 1;
+      window_list->num_loads += 1;
+    }
+}
+
+/* Adds a scheduled instruction, INSN, to the current dispatch window.
+   If the total bytes of instructions or the number of instructions in
+   the window exceed allowable, it allocates a new window.  */
+
+static void
+add_to_dispatch_window (rtx_insn *insn)
+{
+  int byte_len;
+  dispatch_windows *window_list;
+  dispatch_windows *next_list;
+  dispatch_windows *window0_list;
+  enum insn_path path;
+  enum dispatch_group insn_group;
+  bool insn_fits;
+  int num_insn;
+  int num_uops;
+  int window_num;
+  int insn_num_uops;
+  int sum;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = ix86_min_insn_size (insn);
+  window_list = dispatch_window_list;
+  next_list = window_list->next;
+  path = get_insn_path (insn);
+  insn_group = get_insn_group (insn);
+
+  /* Get the last dispatch window.  */
+  if (next_list)
+      window_list = dispatch_window_list->next;
+
+  if (path == path_single)
+    insn_num_uops = 1;
+  else if (path == path_double)
+    insn_num_uops = 2;
+  else
+    insn_num_uops = (int) path;
+
+  /* If current window is full, get a new window.
+     Window number zero is full, if MAX_INSN uops are scheduled in it.
+     Window number one is full, if window zero's bytes plus window
+     one's bytes is 32, or if the bytes of the new instruction added
+     to the total makes it greater than 48, or it has already MAX_INSN
+     instructions in it.  */
+  num_insn = window_list->num_insn;
+  num_uops = window_list->num_uops;
+  window_num = window_list->window_num;
+  insn_fits = fits_dispatch_window (insn);
+
+  if (num_insn >= MAX_INSN
+      || num_uops + insn_num_uops > MAX_INSN
+      || !(insn_fits))
+    {
+      window_num = ~window_num & 1;
+      window_list = allocate_next_window (window_num);
+    }
+
+  if (window_num == 0)
+    {
+      add_insn_window (insn, window_list, insn_num_uops);
+      if (window_list->num_insn >= MAX_INSN
+	  && insn_group == disp_branch)
+	{
+	  process_end_window ();
+	  return;
+	}
+    }
+  else if (window_num == 1)
+    {
+      window0_list = window_list->prev;
+      sum = window0_list->window_size + window_list->window_size;
+      if (sum == 32
+	  || (byte_len + sum) >= 48)
+	{
+	  process_end_window ();
+	  window_list = dispatch_window_list;
+	}
+
+      add_insn_window (insn, window_list, insn_num_uops);
+    }
+  else
+    gcc_unreachable ();
+
+  if (is_end_basic_block (insn_group))
+    {
+      /* End of basic block is reached do end-basic-block process.  */
+      process_end_window ();
+      return;
+    }
+}
+
+/* Print the dispatch window, WINDOW_NUM, to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_dispatch_window_file (FILE *file, int window_num)
+{
+  dispatch_windows *list;
+  int i;
+
+  if (window_num == 0)
+    list = dispatch_window_list;
+  else
+    list = dispatch_window_list1;
+
+  fprintf (file, "Window #%d:\n", list->window_num);
+  fprintf (file, "  num_insn = %d, num_uops = %d, window_size = %d\n",
+	  list->num_insn, list->num_uops, list->window_size);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   list->num_imm, list->num_imm_32, list->num_imm_64, list->imm_size);
+
+  fprintf (file, "  num_loads = %d, num_stores = %d\n", list->num_loads,
+	  list->num_stores);
+  fprintf (file, " insn info:\n");
+
+  for (i = 0; i < MAX_INSN; i++)
+    {
+      if (!list->window[i].insn)
+	break;
+      fprintf (file, "    group[%d] = %s, insn[%d] = %p, path[%d] = %d byte_len[%d] = %d, imm_bytes[%d] = %d\n",
+	      i, group_name[list->window[i].group],
+	      i, (void *)list->window[i].insn,
+	      i, list->window[i].path,
+	      i, list->window[i].byte_len,
+	      i, list->window[i].imm_bytes);
+    }
+}
+
+/* Print to stdout a dispatch window.  */
+
+DEBUG_FUNCTION void
+debug_dispatch_window (int window_num)
+{
+  debug_dispatch_window_file (stdout, window_num);
+}
+
+/* Print INSN dispatch information to FILE.  */
+
+DEBUG_FUNCTION static void
+debug_insn_dispatch_info_file (FILE *file, rtx_insn *insn)
+{
+  int byte_len;
+  enum insn_path path;
+  enum dispatch_group group;
+  int imm_size;
+  int num_imm_operand;
+  int num_imm32_operand;
+  int num_imm64_operand;
+
+  if (INSN_CODE (insn) < 0)
+    return;
+
+  byte_len = ix86_min_insn_size (insn);
+  path = get_insn_path (insn);
+  group = get_insn_group (insn);
+  imm_size = get_num_immediates (insn, &num_imm_operand, &num_imm32_operand,
+				 &num_imm64_operand);
+
+  fprintf (file, " insn info:\n");
+  fprintf (file, "  group = %s, path = %d, byte_len = %d\n",
+	   group_name[group], path, byte_len);
+  fprintf (file, "  num_imm = %d, num_imm_32 = %d, num_imm_64 = %d, imm_size = %d\n",
+	   num_imm_operand, num_imm32_operand, num_imm64_operand, imm_size);
+}
+
+/* Print to STDERR the status of the ready list with respect to
+   dispatch windows.  */
+
+DEBUG_FUNCTION void
+debug_ready_dispatch (void)
+{
+  int i;
+  int no_ready = number_in_ready ();
+
+  fprintf (stdout, "Number of ready: %d\n", no_ready);
+
+  for (i = 0; i < no_ready; i++)
+    debug_insn_dispatch_info_file (stdout, get_ready_element (i));
+}
+
+/* This routine is the driver of the dispatch scheduler.  */
+
+void
+ix86_bd_do_dispatch (rtx_insn *insn, int mode)
+{
+  if (mode == DISPATCH_INIT)
+    init_dispatch_sched ();
+  else if (mode == ADD_TO_DISPATCH_WINDOW)
+    add_to_dispatch_window (insn);
+}
+
+/* Return TRUE if Dispatch Scheduling is supported.  */
+
+bool
+ix86_bd_has_dispatch (rtx_insn *insn, int action)
+{
+  /* Current implementation of dispatch scheduler models buldozer only.  */
+  if ((TARGET_BDVER1 || TARGET_BDVER2 || TARGET_BDVER3
+      || TARGET_BDVER4) && flag_dispatch_scheduler)
+    switch (action)
+      {
+      default:
+	return false;
+
+      case IS_DISPATCH_ON:
+	return true;
+
+      case IS_CMP:
+	return is_cmp (insn);
+
+      case DISPATCH_VIOLATION:
+	return dispatch_violation ();
+
+      case FITS_DISPATCH_WINDOW:
+	return fits_dispatch_window (insn);
+      }
+
+  return false;
+}
diff --git a/gcc/config/i386/x86-tune-sched-core.c b/gcc/config/i386/x86-tune-sched-core.c
new file mode 100644
index 00000000000..70e8556a1db
--- /dev/null
+++ b/gcc/config/i386/x86-tune-sched-core.c
@@ -0,0 +1,257 @@
+/* Scheduler hooks for IA-32 which implement bdver1-4 specific logic.
+   Copyright (C) 1988-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "target.h"
+#include "rtl-iter.h"
+#include "regset.h"
+#include "sched-int.h"
+
+
+/* Model decoder of Core 2/i7.
+   Below hooks for multipass scheduling (see haifa-sched.c:max_issue)
+   track the instruction fetch block boundaries and make sure that long
+   (9+ bytes) instructions are assigned to D0.  */
+
+/* Maximum length of an insn that can be handled by
+   a secondary decoder unit.  '8' for Core 2/i7.  */
+static int core2i7_secondary_decoder_max_insn_size;
+
+/* Ifetch block size, i.e., number of bytes decoder reads per cycle.
+   '16' for Core 2/i7.  */
+static int core2i7_ifetch_block_size;
+
+/* Maximum number of instructions decoder can handle per cycle.
+   '6' for Core 2/i7.  */
+static int core2i7_ifetch_block_max_insns;
+
+typedef struct ix86_first_cycle_multipass_data_ *
+  ix86_first_cycle_multipass_data_t;
+typedef const struct ix86_first_cycle_multipass_data_ *
+  const_ix86_first_cycle_multipass_data_t;
+
+/* A variable to store target state across calls to max_issue within
+   one cycle.  */
+static struct ix86_first_cycle_multipass_data_ _ix86_first_cycle_multipass_data,
+  *ix86_first_cycle_multipass_data = &_ix86_first_cycle_multipass_data;
+
+/* Initialize DATA.  */
+static void
+core2i7_first_cycle_multipass_init (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+  data->ready_try_change = NULL;
+  data->ready_try_change_size = 0;
+}
+
+/* Advancing the cycle; reset ifetch block counts.  */
+static void
+core2i7_dfa_post_advance_cycle (void)
+{
+  ix86_first_cycle_multipass_data_t data = ix86_first_cycle_multipass_data;
+
+  gcc_assert (data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  data->ifetch_block_len = 0;
+  data->ifetch_block_n_insns = 0;
+}
+
+/* Filter out insns from ready_try that the core will not be able to issue
+   on current cycle due to decoder.  */
+static void
+core2i7_first_cycle_multipass_filter_ready_try
+(const_ix86_first_cycle_multipass_data_t data,
+ signed char *ready_try, int n_ready, bool first_cycle_insn_p)
+{
+  while (n_ready--)
+    {
+      rtx_insn *insn;
+      int insn_size;
+
+      if (ready_try[n_ready])
+	continue;
+
+      insn = get_ready_element (n_ready);
+      insn_size = ix86_min_insn_size (insn);
+
+      if (/* If this is a too long an insn for a secondary decoder ...  */
+	  (!first_cycle_insn_p
+	   && insn_size > core2i7_secondary_decoder_max_insn_size)
+	  /* ... or it would not fit into the ifetch block ...  */
+	  || data->ifetch_block_len + insn_size > core2i7_ifetch_block_size
+	  /* ... or the decoder is full already ...  */
+	  || data->ifetch_block_n_insns + 1 > core2i7_ifetch_block_max_insns)
+	/* ... mask the insn out.  */
+	{
+	  ready_try[n_ready] = 1;
+
+	  if (data->ready_try_change)
+	    bitmap_set_bit (data->ready_try_change, n_ready);
+	}
+    }
+}
+
+/* Prepare for a new round of multipass lookahead scheduling.  */
+static void
+core2i7_first_cycle_multipass_begin (void *_data,
+				     signed char *ready_try, int n_ready,
+				     bool first_cycle_insn_p)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = ix86_first_cycle_multipass_data;
+
+  /* Restore the state from the end of the previous round.  */
+  data->ifetch_block_len = prev_data->ifetch_block_len;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns;
+
+  /* Filter instructions that cannot be issued on current cycle due to
+     decoder restrictions.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  first_cycle_insn_p);
+}
+
+/* INSN is being issued in current solution.  Account for its impact on
+   the decoder model.  */
+static void
+core2i7_first_cycle_multipass_issue (void *_data,
+				     signed char *ready_try, int n_ready,
+				     rtx_insn *insn, const void *_prev_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+  const_ix86_first_cycle_multipass_data_t prev_data
+    = (const_ix86_first_cycle_multipass_data_t) _prev_data;
+
+  int insn_size = ix86_min_insn_size (insn);
+
+  data->ifetch_block_len = prev_data->ifetch_block_len + insn_size;
+  data->ifetch_block_n_insns = prev_data->ifetch_block_n_insns + 1;
+  gcc_assert (data->ifetch_block_len <= core2i7_ifetch_block_size
+	      && data->ifetch_block_n_insns <= core2i7_ifetch_block_max_insns);
+
+  /* Allocate or resize the bitmap for storing INSN's effect on ready_try.  */
+  if (!data->ready_try_change)
+    {
+      data->ready_try_change = sbitmap_alloc (n_ready);
+      data->ready_try_change_size = n_ready;
+    }
+  else if (data->ready_try_change_size < n_ready)
+    {
+      data->ready_try_change = sbitmap_resize (data->ready_try_change,
+					       n_ready, 0);
+      data->ready_try_change_size = n_ready;
+    }
+  bitmap_clear (data->ready_try_change);
+
+  /* Filter out insns from ready_try that the core will not be able to issue
+     on current cycle due to decoder.  */
+  core2i7_first_cycle_multipass_filter_ready_try (data, ready_try, n_ready,
+						  false);
+}
+
+/* Revert the effect on ready_try.  */
+static void
+core2i7_first_cycle_multipass_backtrack (const void *_data,
+					 signed char *ready_try,
+					 int n_ready ATTRIBUTE_UNUSED)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  unsigned int i = 0;
+  sbitmap_iterator sbi;
+
+  gcc_assert (bitmap_last_set_bit (data->ready_try_change) < n_ready);
+  EXECUTE_IF_SET_IN_BITMAP (data->ready_try_change, 0, i, sbi)
+    {
+      ready_try[i] = 0;
+    }
+}
+
+/* Save the result of multipass lookahead scheduling for the next round.  */
+static void
+core2i7_first_cycle_multipass_end (const void *_data)
+{
+  const_ix86_first_cycle_multipass_data_t data
+    = (const_ix86_first_cycle_multipass_data_t) _data;
+  ix86_first_cycle_multipass_data_t next_data
+    = ix86_first_cycle_multipass_data;
+
+  if (data != NULL)
+    {
+      next_data->ifetch_block_len = data->ifetch_block_len;
+      next_data->ifetch_block_n_insns = data->ifetch_block_n_insns;
+    }
+}
+
+/* Deallocate target data.  */
+static void
+core2i7_first_cycle_multipass_fini (void *_data)
+{
+  ix86_first_cycle_multipass_data_t data
+    = (ix86_first_cycle_multipass_data_t) _data;
+
+  if (data->ready_try_change)
+    {
+      sbitmap_free (data->ready_try_change);
+      data->ready_try_change = NULL;
+      data->ready_try_change_size = 0;
+    }
+}
+
+void
+ix86_core2i7_init_hooks (void)
+{
+  targetm.sched.dfa_post_advance_cycle
+    = core2i7_dfa_post_advance_cycle;
+  targetm.sched.first_cycle_multipass_init
+    = core2i7_first_cycle_multipass_init;
+  targetm.sched.first_cycle_multipass_begin
+    = core2i7_first_cycle_multipass_begin;
+  targetm.sched.first_cycle_multipass_issue
+    = core2i7_first_cycle_multipass_issue;
+  targetm.sched.first_cycle_multipass_backtrack
+    = core2i7_first_cycle_multipass_backtrack;
+  targetm.sched.first_cycle_multipass_end
+    = core2i7_first_cycle_multipass_end;
+  targetm.sched.first_cycle_multipass_fini
+    = core2i7_first_cycle_multipass_fini;
+
+  /* Set decoder parameters.  */
+  core2i7_secondary_decoder_max_insn_size = 8;
+  core2i7_ifetch_block_size = 16;
+  core2i7_ifetch_block_max_insns = 6;
+}
diff --git a/gcc/config/i386/x86-tune-sched.c b/gcc/config/i386/x86-tune-sched.c
new file mode 100644
index 00000000000..d6ef5af7e00
--- /dev/null
+++ b/gcc/config/i386/x86-tune-sched.c
@@ -0,0 +1,629 @@
+/* Scheduler hooks for IA-32 which implement CPU specific logic.
+   Copyright (C) 1988-2017 Free Software Foundation, Inc.
+
+This file is part of GCC.
+
+GCC is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License as published by
+the Free Software Foundation; either version 3, or (at your option)
+any later version.
+
+GCC is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+You should have received a copy of the GNU General Public License
+along with GCC; see the file COPYING3.  If not see
+<http://www.gnu.org/licenses/>.  */
+
+#define IN_TARGET_CODE 1
+
+#include "config.h"
+#include "system.h"
+#include "coretypes.h"
+#include "backend.h"
+#include "rtl.h"
+#include "tree.h"
+#include "cfghooks.h"
+#include "tm_p.h"
+#include "insn-config.h"
+#include "insn-attr.h"
+#include "recog.h"
+#include "target.h"
+
+/* Return the maximum number of instructions a cpu can issue.  */
+
+int
+ix86_issue_rate (void)
+{
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+    case PROCESSOR_LAKEMONT:
+    case PROCESSOR_BONNELL:
+    case PROCESSOR_SILVERMONT:
+    case PROCESSOR_KNL:
+    case PROCESSOR_KNM:
+    case PROCESSOR_INTEL:
+    case PROCESSOR_K6:
+    case PROCESSOR_BTVER2:
+    case PROCESSOR_PENTIUM4:
+    case PROCESSOR_NOCONA:
+      return 2;
+
+    case PROCESSOR_PENTIUMPRO:
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_GENERIC:
+    case PROCESSOR_BTVER1:
+      return 3;
+
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BDVER2:
+    case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
+    case PROCESSOR_ZNVER1:
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+      return 4;
+
+    default:
+      return 1;
+    }
+}
+
+/* Return true iff USE_INSN has a memory address with operands set by
+   SET_INSN.  */
+
+bool
+ix86_agi_dependent (rtx_insn *set_insn, rtx_insn *use_insn)
+{
+  int i;
+  extract_insn_cached (use_insn);
+  for (i = recog_data.n_operands - 1; i >= 0; --i)
+    if (MEM_P (recog_data.operand[i]))
+      {
+	rtx addr = XEXP (recog_data.operand[i], 0);
+	if (modified_in_p (addr, set_insn) != 0)
+	  {
+	    /* No AGI stall if SET_INSN is a push or pop and USE_INSN
+	       has SP based memory (unless index reg is modified in a pop).  */
+	    rtx set = single_set (set_insn);
+	    if (set
+		&& (push_operand (SET_DEST (set), GET_MODE (SET_DEST (set)))
+		    || pop_operand (SET_SRC (set), GET_MODE (SET_SRC (set)))))
+	      {
+		struct ix86_address parts;
+		if (ix86_decompose_address (addr, &parts)
+		    && parts.base == stack_pointer_rtx
+		    && (parts.index == NULL_RTX
+			|| MEM_P (SET_DEST (set))
+			|| !modified_in_p (parts.index, set_insn)))
+		  return false;
+	      }
+	    return true;
+	  }
+	return false;
+      }
+  return false;
+}
+
+/* A subroutine of ix86_adjust_cost -- return TRUE iff INSN reads flags set
+   by DEP_INSN and nothing set by DEP_INSN.  */
+
+static bool
+ix86_flags_dependent (rtx_insn *insn, rtx_insn *dep_insn, enum attr_type insn_type)
+{
+  rtx set, set2;
+
+  /* Simplify the test for uninteresting insns.  */
+  if (insn_type != TYPE_SETCC
+      && insn_type != TYPE_ICMOV
+      && insn_type != TYPE_FCMOV
+      && insn_type != TYPE_IBR)
+    return false;
+
+  if ((set = single_set (dep_insn)) != 0)
+    {
+      set = SET_DEST (set);
+      set2 = NULL_RTX;
+    }
+  else if (GET_CODE (PATTERN (dep_insn)) == PARALLEL
+	   && XVECLEN (PATTERN (dep_insn), 0) == 2
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 0)) == SET
+	   && GET_CODE (XVECEXP (PATTERN (dep_insn), 0, 1)) == SET)
+    {
+      set = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+      set2 = SET_DEST (XVECEXP (PATTERN (dep_insn), 0, 0));
+    }
+  else
+    return false;
+
+  if (!REG_P (set) || REGNO (set) != FLAGS_REG)
+    return false;
+
+  /* This test is true if the dependent insn reads the flags but
+     not any other potentially set register.  */
+  if (!reg_overlap_mentioned_p (set, PATTERN (insn)))
+    return false;
+
+  if (set2 && reg_overlap_mentioned_p (set2, PATTERN (insn)))
+    return false;
+
+  return true;
+}
+
+/* Helper function for exact_store_load_dependency.
+   Return true if addr is found in insn.  */
+static bool
+exact_dependency_1 (rtx addr, rtx insn)
+{
+  enum rtx_code code;
+  const char *format_ptr;
+  int i, j;
+
+  code = GET_CODE (insn);
+  switch (code)
+    {
+    case MEM:
+      if (rtx_equal_p (addr, insn))
+	return true;
+      break;
+    case REG:
+    CASE_CONST_ANY:
+    case SYMBOL_REF:
+    case CODE_LABEL:
+    case PC:
+    case CC0:
+    case EXPR_LIST:
+      return false;
+    default:
+      break;
+    }
+
+  format_ptr = GET_RTX_FORMAT (code);
+  for (i = 0; i < GET_RTX_LENGTH (code); i++)
+    {
+      switch (*format_ptr++)
+	{
+	case 'e':
+	  if (exact_dependency_1 (addr, XEXP (insn, i)))
+	    return true;
+	  break;
+	case 'E':
+	  for (j = 0; j < XVECLEN (insn, i); j++)
+	    if (exact_dependency_1 (addr, XVECEXP (insn, i, j)))
+	      return true;
+	  break;
+	}
+    }
+  return false;
+}
+
+/* Return true if there exists exact dependency for store & load, i.e.
+   the same memory address is used in them.  */
+static bool
+exact_store_load_dependency (rtx_insn *store, rtx_insn *load)
+{
+  rtx set1, set2;
+
+  set1 = single_set (store);
+  if (!set1)
+    return false;
+  if (!MEM_P (SET_DEST (set1)))
+    return false;
+  set2 = single_set (load);
+  if (!set2)
+    return false;
+  if (exact_dependency_1 (SET_DEST (set1), SET_SRC (set2)))
+    return true;
+  return false;
+}
+
+
+/* This function corrects the value of COST (latency) based on the relationship
+   between INSN and DEP_INSN through a dependence of type DEP_TYPE, and strength
+   DW.  It should return the new value.
+
+   On x86 CPUs this is most commonly used to model the fact that valus of
+   registers used to compute address of memory operand  needs to be ready
+   earlier than values of registers used in the actual operation.  */
+
+int
+ix86_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
+		  unsigned int)
+{
+  enum attr_type insn_type, dep_insn_type;
+  enum attr_memory memory;
+  rtx set, set2;
+  int dep_insn_code_number;
+
+  /* Anti and output dependencies have zero cost on all CPUs.  */
+  if (dep_type != 0)
+    return 0;
+
+  dep_insn_code_number = recog_memoized (dep_insn);
+
+  /* If we can't recognize the insns, we can't really do anything.  */
+  if (dep_insn_code_number < 0 || recog_memoized (insn) < 0)
+    return cost;
+
+  insn_type = get_attr_type (insn);
+  dep_insn_type = get_attr_type (dep_insn);
+
+  switch (ix86_tune)
+    {
+    case PROCESSOR_PENTIUM:
+    case PROCESSOR_LAKEMONT:
+      /* Address Generation Interlock adds a cycle of latency.  */
+      if (insn_type == TYPE_LEA)
+	{
+	  rtx addr = PATTERN (insn);
+
+	  if (GET_CODE (addr) == PARALLEL)
+	    addr = XVECEXP (addr, 0, 0);
+
+	  gcc_assert (GET_CODE (addr) == SET);
+
+	  addr = SET_SRC (addr);
+	  if (modified_in_p (addr, dep_insn))
+	    cost += 1;
+	}
+      else if (ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+
+      /* ??? Compares pair with jump/setcc.  */
+      if (ix86_flags_dependent (insn, dep_insn, insn_type))
+	cost = 0;
+
+      /* Floating point stores require value to be ready one cycle earlier.  */
+      if (insn_type == TYPE_FMOV
+	  && get_attr_memory (insn) == MEMORY_STORE
+	  && !ix86_agi_dependent (dep_insn, insn))
+	cost += 1;
+      break;
+
+    case PROCESSOR_PENTIUMPRO:
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      /* There is one cycle extra latency between an FP op and a store.  */
+      if (insn_type == TYPE_FMOV
+	  && (set = single_set (dep_insn)) != NULL_RTX
+	  && (set2 = single_set (insn)) != NULL_RTX
+	  && rtx_equal_p (SET_DEST (set), SET_SRC (set2))
+	  && MEM_P (SET_DEST (set2)))
+	cost += 1;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 1)
+	    cost--;
+	}
+      break;
+
+    case PROCESSOR_K6:
+     /* The esp dependency is resolved before
+	the instruction is really finished.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 1;
+
+      /* INT->FP conversion is expensive.  */
+      if (get_attr_fp_int_src (dep_insn))
+	cost += 5;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  /* Claim moves to take one cycle, as core can issue one load
+	     at time and the next load can start cycle later.  */
+	  if (dep_insn_type == TYPE_IMOV
+	      || dep_insn_type == TYPE_FMOV)
+	    cost = 1;
+	  else if (cost > 2)
+	    cost -= 2;
+	  else
+	    cost = 1;
+	}
+      break;
+
+    case PROCESSOR_AMDFAM10:
+    case PROCESSOR_BDVER1:
+    case PROCESSOR_BDVER2:
+    case PROCESSOR_BDVER3:
+    case PROCESSOR_BDVER4:
+    case PROCESSOR_BTVER1:
+    case PROCESSOR_BTVER2:
+    case PROCESSOR_GENERIC:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 0;
+      /* FALLTHRU */
+
+    case PROCESSOR_ATHLON:
+    case PROCESSOR_K8:
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  enum attr_unit unit = get_attr_unit (insn);
+	  int loadcost = 3;
+
+	  /* Because of the difference between the length of integer and
+	     floating unit pipeline preparation stages, the memory operands
+	     for floating point are cheaper.
+
+	     ??? For Athlon it the difference is most probably 2.  */
+	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+	    loadcost = 3;
+	  else
+	    loadcost = TARGET_ATHLON ? 2 : 0;
+
+	  if (cost >= loadcost)
+	    cost -= loadcost;
+	  else
+	    cost = 0;
+	}
+      break;
+
+    case PROCESSOR_ZNVER1:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 0;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  enum attr_unit unit = get_attr_unit (insn);
+	  int loadcost;
+
+	  if (unit == UNIT_INTEGER || unit == UNIT_UNKNOWN)
+	    loadcost = 4;
+	  else
+	    loadcost = 7;
+
+	  if (cost >= loadcost)
+	    cost -= loadcost;
+	  else
+	    cost = 0;
+	}
+      break;
+
+    case PROCESSOR_CORE2:
+    case PROCESSOR_NEHALEM:
+    case PROCESSOR_SANDYBRIDGE:
+    case PROCESSOR_HASWELL:
+      /* Stack engine allows to execute push&pop instructions in parall.  */
+      if ((insn_type == TYPE_PUSH || insn_type == TYPE_POP)
+	  && (dep_insn_type == TYPE_PUSH || dep_insn_type == TYPE_POP))
+	return 0;
+
+      memory = get_attr_memory (insn);
+
+      /* Show ability of reorder buffer to hide latency of load by executing
+	 in parallel with previous instruction in case
+	 previous instruction is not needed to compute the address.  */
+      if ((memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	  && !ix86_agi_dependent (dep_insn, insn))
+	{
+	  if (cost >= 4)
+	    cost -= 4;
+	  else
+	    cost = 0;
+	}
+      break;
+
+    case PROCESSOR_SILVERMONT:
+    case PROCESSOR_KNL:
+    case PROCESSOR_KNM:
+    case PROCESSOR_INTEL:
+      if (!reload_completed)
+	return cost;
+
+      /* Increase cost of integer loads.  */
+      memory = get_attr_memory (dep_insn);
+      if (memory == MEMORY_LOAD || memory == MEMORY_BOTH)
+	{
+	  enum attr_unit unit = get_attr_unit (dep_insn);
+	  if (unit == UNIT_INTEGER && cost == 1)
+	    {
+	      if (memory == MEMORY_LOAD)
+		cost = 3;
+	      else
+		{
+		  /* Increase cost of ld/st for short int types only
+		     because of store forwarding issue.  */
+		  rtx set = single_set (dep_insn);
+		  if (set && (GET_MODE (SET_DEST (set)) == QImode
+			      || GET_MODE (SET_DEST (set)) == HImode))
+		    {
+		      /* Increase cost of store/load insn if exact
+			 dependence exists and it is load insn.  */
+		      enum attr_memory insn_memory = get_attr_memory (insn);
+		      if (insn_memory == MEMORY_LOAD
+			  && exact_store_load_dependency (dep_insn, insn))
+			cost = 3;
+		    }
+		}
+	    }
+	}
+
+    default:
+      break;
+    }
+
+  return cost;
+}
+
+/* How many alternative schedules to try.  This should be as wide as the
+   scheduling freedom in the DFA, but no wider.  Making this value too
+   large results extra work for the scheduler.  */
+
+int
+ia32_multipass_dfa_lookahead (void)
+{
+  /* Generally, we want haifa-sched:max_issue() to look ahead as far
+     as many instructions can be executed on a cycle, i.e.,
+     issue_rate.  */
+  if (reload_completed)
+    return ix86_issue_rate ();
+  /* Don't use lookahead for pre-reload schedule to save compile time.  */
+  return 0;
+}
+
+/* Return true if target platform supports macro-fusion.  */
+
+bool
+ix86_macro_fusion_p ()
+{
+  return TARGET_FUSE_CMP_AND_BRANCH;
+}
+
+/* Check whether current microarchitecture support macro fusion
+   for insn pair "CONDGEN + CONDJMP". Refer to
+   "Intel Architectures Optimization Reference Manual". */
+
+bool
+ix86_macro_fusion_pair_p (rtx_insn *condgen, rtx_insn *condjmp)
+{
+  rtx src, dest;
+  enum rtx_code ccode;
+  rtx compare_set = NULL_RTX, test_if, cond;
+  rtx alu_set = NULL_RTX, addr = NULL_RTX;
+
+  if (!any_condjump_p (condjmp))
+    return false;
+
+  unsigned int condreg1, condreg2;
+  rtx cc_reg_1;
+  targetm.fixed_condition_code_regs (&condreg1, &condreg2);
+  cc_reg_1 = gen_rtx_REG (CCmode, condreg1);
+  if (!reg_referenced_p (cc_reg_1, PATTERN (condjmp))
+      || !condgen
+      || !modified_in_p (cc_reg_1, condgen))
+    return false;
+
+  if (get_attr_type (condgen) != TYPE_TEST
+      && get_attr_type (condgen) != TYPE_ICMP
+      && get_attr_type (condgen) != TYPE_INCDEC
+      && get_attr_type (condgen) != TYPE_ALU)
+    return false;
+
+  compare_set = single_set (condgen);
+  if (compare_set == NULL_RTX
+      && !TARGET_FUSE_ALU_AND_BRANCH)
+    return false;
+
+  if (compare_set == NULL_RTX)
+    {
+      int i;
+      rtx pat = PATTERN (condgen);
+      for (i = 0; i < XVECLEN (pat, 0); i++)
+	if (GET_CODE (XVECEXP (pat, 0, i)) == SET)
+	  {
+	    rtx set_src = SET_SRC (XVECEXP (pat, 0, i));
+	    if (GET_CODE (set_src) == COMPARE)
+	      compare_set = XVECEXP (pat, 0, i);
+	    else
+	      alu_set = XVECEXP (pat, 0, i);
+	  }
+    }
+  if (compare_set == NULL_RTX)
+    return false;
+  src = SET_SRC (compare_set);
+  if (GET_CODE (src) != COMPARE)
+    return false;
+
+  /* Macro-fusion for cmp/test MEM-IMM + conditional jmp is not
+     supported.  */
+  if ((MEM_P (XEXP (src, 0))
+       && CONST_INT_P (XEXP (src, 1)))
+      || (MEM_P (XEXP (src, 1))
+	  && CONST_INT_P (XEXP (src, 0))))
+    return false;
+
+  /* No fusion for RIP-relative address.  */
+  if (MEM_P (XEXP (src, 0)))
+    addr = XEXP (XEXP (src, 0), 0);
+  else if (MEM_P (XEXP (src, 1)))
+    addr = XEXP (XEXP (src, 1), 0);
+
+  if (addr) {
+    ix86_address parts;
+    int ok = ix86_decompose_address (addr, &parts);
+    gcc_assert (ok);
+
+    if (ix86_rip_relative_addr_p (&parts))
+      return false;
+  }
+
+  test_if = SET_SRC (pc_set (condjmp));
+  cond = XEXP (test_if, 0);
+  ccode = GET_CODE (cond);
+  /* Check whether conditional jump use Sign or Overflow Flags.  */
+  if (!TARGET_FUSE_CMP_AND_BRANCH_SOFLAGS
+      && (ccode == GE
+          || ccode == GT
+	  || ccode == LE
+	  || ccode == LT))
+    return false;
+
+  /* Return true for TYPE_TEST and TYPE_ICMP.  */
+  if (get_attr_type (condgen) == TYPE_TEST
+      || get_attr_type (condgen) == TYPE_ICMP)
+    return true;
+
+  /* The following is the case that macro-fusion for alu + jmp.  */
+  if (!TARGET_FUSE_ALU_AND_BRANCH || !alu_set)
+    return false;
+
+  /* No fusion for alu op with memory destination operand.  */
+  dest = SET_DEST (alu_set);
+  if (MEM_P (dest))
+    return false;
+
+  /* Macro-fusion for inc/dec + unsigned conditional jump is not
+     supported.  */
+  if (get_attr_type (condgen) == TYPE_INCDEC
+      && (ccode == GEU
+	  || ccode == GTU
+	  || ccode == LEU
+	  || ccode == LTU))
+    return false;
+
+  return true;
+}
+
diff --git a/gcc/config/i386/x86-tune.def b/gcc/config/i386/x86-tune.def
index 63f69b4b503..9d01761eff9 100644
--- a/gcc/config/i386/x86-tune.def
+++ b/gcc/config/i386/x86-tune.def
@@ -284,6 +284,22 @@ DEF_TUNE (X86_TUNE_USE_BT, "use_bt",
           m_CORE_ALL | m_BONNELL | m_SILVERMONT | m_KNL | m_KNM | m_INTEL
 	  | m_LAKEMONT | m_AMD_MULTIPLE | m_GENERIC)
 
+/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
+   for bit-manipulation instructions.  */
+DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
+	  m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
+
+/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
+   on hardware capabilities. Bdver3 hardware has a loop buffer which makes
+   unrolling small loop less important. For, such architectures we adjust
+   the unroll factor so that the unrolled loop fits the loop buffer.  */
+DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
+
+/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
+   if-converted sequence to one.  */
+DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
+	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)
+
 /*****************************************************************************/
 /* 387 instruction selection tuning                                          */
 /*****************************************************************************/
@@ -503,11 +519,6 @@ DEF_TUNE (X86_TUNE_NOT_VECTORMODE, "not_vectormode", m_K6)
 DEF_TUNE (X86_TUNE_AVOID_VECTOR_DECODE, "avoid_vector_decode",
           m_K8)
 
-/* X86_TUNE_AVOID_FALSE_DEP_FOR_BMI: Avoid false dependency
-   for bit-manipulation instructions.  */
-DEF_TUNE (X86_TUNE_AVOID_FALSE_DEP_FOR_BMI, "avoid_false_dep_for_bmi",
-	  m_SANDYBRIDGE | m_HASWELL | m_GENERIC)
-
 /*****************************************************************************/
 /* This never worked well before.                                            */
 /*****************************************************************************/
@@ -525,14 +536,3 @@ DEF_TUNE (X86_TUNE_QIMODE_MATH, "qimode_math", ~0U)
    arithmetic to 32bit via PROMOTE_MODE macro.  This code generation scheme
    is usually used for RISC targets.  */
 DEF_TUNE (X86_TUNE_PROMOTE_QI_REGS, "promote_qi_regs", 0U)
-
-/* X86_TUNE_ADJUST_UNROLL: This enables adjusting the unroll factor based
-   on hardware capabilities. Bdver3 hardware has a loop buffer which makes
-   unrolling small loop less important. For, such architectures we adjust
-   the unroll factor so that the unrolled loop fits the loop buffer.  */
-DEF_TUNE (X86_TUNE_ADJUST_UNROLL, "adjust_unroll_factor", m_BDVER3 | m_BDVER4)
-
-/* X86_TUNE_ONE_IF_CONV_INSNS: Restrict a number of cmov insns in
-   if-converted sequence to one.  */
-DEF_TUNE (X86_TUNE_ONE_IF_CONV_INSN, "one_if_conv_insn",
-	  m_SILVERMONT | m_KNL | m_KNM | m_INTEL | m_CORE_ALL | m_GENERIC)
diff --git a/gcc/config/ia64/ia64-c.c b/gcc/config/ia64/ia64-c.c
index e42ff3bdda7..37791f3e71a 100644
--- a/gcc/config/ia64/ia64-c.c
+++ b/gcc/config/ia64/ia64-c.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/ia64/ia64.c b/gcc/config/ia64/ia64.c
index 6eb8b64b064..8a4597e954c 100644
--- a/gcc/config/ia64/ia64.c
+++ b/gcc/config/ia64/ia64.c
@@ -19,7 +19,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/ia64/ia64.h b/gcc/config/ia64/ia64.h
index 4a9313e1d49..e7073d1cf20 100644
--- a/gcc/config/ia64/ia64.h
+++ b/gcc/config/ia64/ia64.h
@@ -779,10 +779,6 @@ enum reg_class
    are at negative offsets from the frame pointer.  */
 #define FRAME_GROWS_DOWNWARD 0
 
-/* Offset from the frame pointer to the first local variable slot to
-   be allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  If not specified, the default value of zero
    is used.  This is the proper value for most machines.  */
diff --git a/gcc/config/iq2000/iq2000.c b/gcc/config/iq2000/iq2000.c
index b26482b5407..ee107cbd81f 100644
--- a/gcc/config/iq2000/iq2000.c
+++ b/gcc/config/iq2000/iq2000.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -183,6 +183,7 @@ static bool iq2000_print_operand_punct_valid_p (unsigned char code);
 static bool iq2000_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool iq2000_modes_tieable_p (machine_mode, machine_mode);
 static HOST_WIDE_INT iq2000_constant_alignment (const_tree, HOST_WIDE_INT);
+static HOST_WIDE_INT iq2000_starting_frame_offset (void);
 
 #undef  TARGET_INIT_BUILTINS
 #define TARGET_INIT_BUILTINS 		iq2000_init_builtins
@@ -270,6 +271,9 @@ static HOST_WIDE_INT iq2000_constant_alignment (const_tree, HOST_WIDE_INT);
 #undef  TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT	iq2000_constant_alignment
 
+#undef  TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET	iq2000_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Return nonzero if we split the address into high and low parts.  */
@@ -3548,4 +3552,12 @@ iq2000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   return align;
 }
 
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+iq2000_starting_frame_offset (void)
+{
+  return crtl->outgoing_args_size;
+}
+
 #include "gt-iq2000.h"
diff --git a/gcc/config/iq2000/iq2000.h b/gcc/config/iq2000/iq2000.h
index 58c3597fd78..72122e27577 100644
--- a/gcc/config/iq2000/iq2000.h
+++ b/gcc/config/iq2000/iq2000.h
@@ -215,9 +215,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD 0
 
-#define STARTING_FRAME_OFFSET						\
-  (crtl->outgoing_args_size)
-
 /* Use the default value zero.  */
 /* #define STACK_POINTER_OFFSET 0 */
 
diff --git a/gcc/config/lm32/lm32.c b/gcc/config/lm32/lm32.c
index 448da99e756..9677087cf0f 100644
--- a/gcc/config/lm32/lm32.c
+++ b/gcc/config/lm32/lm32.c
@@ -19,7 +19,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -81,6 +81,7 @@ static void lm32_function_arg_advance (cumulative_args_t cum,
 				       const_tree type, bool named);
 static bool lm32_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool lm32_modes_tieable_p (machine_mode, machine_mode);
+static HOST_WIDE_INT lm32_starting_frame_offset (void);
 
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE lm32_option_override
@@ -118,6 +119,9 @@ static bool lm32_modes_tieable_p (machine_mode, machine_mode);
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET lm32_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Current frame information calculated by lm32_compute_frame_size.  */
@@ -1251,3 +1255,11 @@ lm32_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 	  && GET_MODE_SIZE (mode1) <= UNITS_PER_WORD
 	  && GET_MODE_SIZE (mode2) <= UNITS_PER_WORD);
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+lm32_starting_frame_offset (void)
+{
+  return UNITS_PER_WORD;
+}
diff --git a/gcc/config/lm32/lm32.h b/gcc/config/lm32/lm32.h
index 605bd68ca7c..8f2035455ae 100644
--- a/gcc/config/lm32/lm32.h
+++ b/gcc/config/lm32/lm32.h
@@ -204,8 +204,6 @@ enum reg_class
 
 #define STACK_POINTER_OFFSET (UNITS_PER_WORD)
 
-#define STARTING_FRAME_OFFSET (UNITS_PER_WORD)
-
 #define FIRST_PARM_OFFSET(FNDECL) (UNITS_PER_WORD)
 
 #define STACK_POINTER_REGNUM SP_REGNUM
diff --git a/gcc/config/m32c/m32c-pragma.c b/gcc/config/m32c/m32c-pragma.c
index 0da26c97303..c44e59c4aa6 100644
--- a/gcc/config/m32c/m32c-pragma.c
+++ b/gcc/config/m32c/m32c-pragma.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/m32c/m32c-protos.h b/gcc/config/m32c/m32c-protos.h
index 8647a208420..f68d3b3185d 100644
--- a/gcc/config/m32c/m32c-protos.h
+++ b/gcc/config/m32c/m32c-protos.h
@@ -29,7 +29,7 @@ void m32c_init_expanders (void);
 int  m32c_initial_elimination_offset (int, int);
 void m32c_output_reg_pop (FILE *, int);
 void m32c_output_reg_push (FILE *, int);
-unsigned int  m32c_push_rounding (int);
+poly_int64 m32c_push_rounding (poly_int64);
 void m32c_register_pragmas (void);
 void m32c_note_pragma_address (const char *, unsigned);
 int  m32c_regno_ok_for_base_p (int);
diff --git a/gcc/config/m32c/m32c.c b/gcc/config/m32c/m32c.c
index d8a62cc727a..edad8b6e2e8 100644
--- a/gcc/config/m32c/m32c.c
+++ b/gcc/config/m32c/m32c.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -1290,8 +1290,8 @@ m32c_initial_elimination_offset (int from, int to)
 
 /* Implements PUSH_ROUNDING.  The R8C and M16C have byte stacks, the
    M32C has word stacks.  */
-unsigned int
-m32c_push_rounding (int n)
+poly_int64
+m32c_push_rounding (poly_int64 n)
 {
   if (TARGET_R8C || TARGET_M16C)
     return n;
diff --git a/gcc/config/m32c/m32c.h b/gcc/config/m32c/m32c.h
index 594288b76e9..4bf67fb9c67 100644
--- a/gcc/config/m32c/m32c.h
+++ b/gcc/config/m32c/m32c.h
@@ -421,7 +421,6 @@ enum reg_class
 #define STACK_PUSH_CODE PRE_DEC
 #define FRAME_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET 0
 #define FIRST_PARM_OFFSET(F) 0
 
 #define RETURN_ADDR_RTX(COUNT,FA) m32c_return_addr_rtx (COUNT)
diff --git a/gcc/config/m32r/m32r.c b/gcc/config/m32r/m32r.c
index 10c1c143974..1226860732b 100644
--- a/gcc/config/m32r/m32r.c
+++ b/gcc/config/m32r/m32r.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -106,6 +106,7 @@ static bool m32r_legitimate_constant_p (machine_mode, rtx);
 static bool m32r_attribute_identifier (const_tree);
 static bool m32r_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool m32r_modes_tieable_p (machine_mode, machine_mode);
+static HOST_WIDE_INT m32r_starting_frame_offset (void);
 
 /* M32R specific attributes.  */
 
@@ -222,6 +223,9 @@ static const struct attribute_spec m32r_attribute_table[] =
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET m32r_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Called by m32r_option_override to initialize various things.  */
@@ -2961,3 +2965,12 @@ m32r_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
 	   && CONST_INT_P (XEXP (XEXP (x, 0), 1))
 	   && UINTVAL (XEXP (XEXP (x, 0), 1)) > 32767);
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  The frame pointer points at
+   the same place as the stack pointer, except if alloca has been called.  */
+
+static HOST_WIDE_INT
+m32r_starting_frame_offset (void)
+{
+  return M32R_STACK_ALIGN (crtl->outgoing_args_size);
+}
diff --git a/gcc/config/m32r/m32r.h b/gcc/config/m32r/m32r.h
index 436877456d5..0fa4c5f24e1 100644
--- a/gcc/config/m32r/m32r.h
+++ b/gcc/config/m32r/m32r.h
@@ -492,15 +492,6 @@ extern enum reg_class m32r_regno_reg_class[FIRST_PSEUDO_REGISTER];
    pointer to a smaller address.  */
 #define STACK_GROWS_DOWNWARD 1
 
-/* Offset from frame pointer to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-/* The frame pointer points at the same place as the stack pointer, except if
-   alloca has been called.  */
-#define STARTING_FRAME_OFFSET \
-  M32R_STACK_ALIGN (MACRO_INT (crtl->outgoing_args_size))
-
 /* Offset from the stack pointer register to the first location at which
    outgoing arguments are placed.  */
 #define STACK_POINTER_OFFSET 0
diff --git a/gcc/config/m68k/m68k-protos.h b/gcc/config/m68k/m68k-protos.h
index cdd6798c660..3c285a98fe2 100644
--- a/gcc/config/m68k/m68k-protos.h
+++ b/gcc/config/m68k/m68k-protos.h
@@ -99,3 +99,4 @@ extern void init_68881_table (void);
 extern rtx m68k_legitimize_call_address (rtx);
 extern rtx m68k_legitimize_sibcall_address (rtx);
 extern int m68k_hard_regno_rename_ok(unsigned int, unsigned int);
+extern poly_int64 m68k_push_rounding (poly_int64);
diff --git a/gcc/config/m68k/m68k.c b/gcc/config/m68k/m68k.c
index d1607dce14e..1d03eae6a34 100644
--- a/gcc/config/m68k/m68k.c
+++ b/gcc/config/m68k/m68k.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -6612,4 +6612,15 @@ m68k_excess_precision (enum excess_precision_type type)
   return FLT_EVAL_METHOD_UNPREDICTABLE;
 }
 
+/* Implement PUSH_ROUNDING.  On the 680x0, sp@- in a byte insn really pushes
+   a word.  On the ColdFire, sp@- in a byte insn pushes just a byte.  */
+
+poly_int64
+m68k_push_rounding (poly_int64 bytes)
+{
+  if (TARGET_COLDFIRE)
+    return bytes;
+  return (bytes + 1) & ~1;
+}
+
 #include "gt-m68k.h"
diff --git a/gcc/config/m68k/m68k.h b/gcc/config/m68k/m68k.h
index 2fefc7251ba..8eb898e6eba 100644
--- a/gcc/config/m68k/m68k.h
+++ b/gcc/config/m68k/m68k.h
@@ -467,11 +467,8 @@ extern enum reg_class regno_reg_class[];
 
 #define STACK_GROWS_DOWNWARD 1
 #define FRAME_GROWS_DOWNWARD 1
-#define STARTING_FRAME_OFFSET 0
 
-/* On the 680x0, sp@- in a byte insn really pushes a word.
-   On the ColdFire, sp@- in a byte insn pushes just a byte.  */
-#define PUSH_ROUNDING(BYTES) (TARGET_COLDFIRE ? BYTES : ((BYTES) + 1) & ~1)
+#define PUSH_ROUNDING(BYTES) m68k_push_rounding (BYTES)
 
 #define FIRST_PARM_OFFSET(FNDECL) 8
 
diff --git a/gcc/config/mcore/mcore.c b/gcc/config/mcore/mcore.c
index 7e6e69125e7..67238c8360b 100644
--- a/gcc/config/mcore/mcore.c
+++ b/gcc/config/mcore/mcore.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/mcore/mcore.h b/gcc/config/mcore/mcore.h
index f411186e567..50d087c1cd5 100644
--- a/gcc/config/mcore/mcore.h
+++ b/gcc/config/mcore/mcore.h
@@ -390,12 +390,6 @@ extern const enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
    makes the stack pointer a smaller address.  */
 #define STACK_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET  0
-
 /* If defined, the maximum amount of space required for outgoing arguments
    will be computed and placed into the variable
    `crtl->outgoing_args_size'.  No space will be pushed
diff --git a/gcc/config/microblaze/microblaze-c.c b/gcc/config/microblaze/microblaze-c.c
index ee90b25a4c9..6dd4be6cc11 100644
--- a/gcc/config/microblaze/microblaze-c.c
+++ b/gcc/config/microblaze/microblaze-c.c
@@ -19,7 +19,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/microblaze/microblaze.c b/gcc/config/microblaze/microblaze.c
index d48ca7cad81..f1ea042c88f 100644
--- a/gcc/config/microblaze/microblaze.c
+++ b/gcc/config/microblaze/microblaze.c
@@ -19,7 +19,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -2728,7 +2728,7 @@ microblaze_function_prologue (FILE * file)
 			  STACK_POINTER_REGNUM]), fsiz,
 	       reg_names[MB_ABI_SUB_RETURN_ADDR_REGNUM + GP_REG_FIRST],
 	       current_frame_info.var_size, current_frame_info.num_gp,
-	       crtl->outgoing_args_size);
+	       (int) crtl->outgoing_args_size);
       fprintf (file, "\t.mask\t0x%08lx\n", current_frame_info.mask);
     }
 }
@@ -3812,6 +3812,14 @@ microblaze_constant_alignment (const_tree exp, HOST_WIDE_INT align)
     return MAX (align, BITS_PER_WORD);
   return align;
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+microblaze_starting_frame_offset (void)
+{
+  return (crtl->outgoing_args_size + FIRST_PARM_OFFSET(FNDECL));
+}
 
 #undef TARGET_ENCODE_SECTION_INFO
 #define TARGET_ENCODE_SECTION_INFO      microblaze_encode_section_info
@@ -3919,6 +3927,9 @@ microblaze_constant_alignment (const_tree exp, HOST_WIDE_INT align)
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT microblaze_constant_alignment
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET microblaze_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-microblaze.h"
diff --git a/gcc/config/microblaze/microblaze.h b/gcc/config/microblaze/microblaze.h
index 2fc7fc50a15..59cc1cc2e37 100644
--- a/gcc/config/microblaze/microblaze.h
+++ b/gcc/config/microblaze/microblaze.h
@@ -402,10 +402,6 @@ extern enum reg_class microblaze_regno_to_class[];
 
 #define STACK_GROWS_DOWNWARD 1
 
-/* Changed the starting frame offset to including the new link stuff */
-#define STARTING_FRAME_OFFSET						\
-   (crtl->outgoing_args_size + FIRST_PARM_OFFSET(FNDECL))
-
 /* The return address for the current frame is in r31 if this is a leaf
    function.  Otherwise, it is on the stack.  It is at a variable offset
    from sp/fp/ap, so we define a fake hard register rap which is a
diff --git a/gcc/config/mips/driver-native.c b/gcc/config/mips/driver-native.c
index 18fab958398..48952160bed 100644
--- a/gcc/config/mips/driver-native.c
+++ b/gcc/config/mips/driver-native.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/mips/frame-header-opt.c b/gcc/config/mips/frame-header-opt.c
index f0f867d4409..dba96db051e 100644
--- a/gcc/config/mips/frame-header-opt.c
+++ b/gcc/config/mips/frame-header-opt.c
@@ -23,7 +23,7 @@ along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/mips/mips.c b/gcc/config/mips/mips.c
index dc6ddec5d10..fd2beee36ff 100644
--- a/gcc/config/mips/mips.c
+++ b/gcc/config/mips/mips.c
@@ -21,7 +21,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -10957,7 +10957,7 @@ mips_compute_frame_info (void)
      if we know that none of the called functions will use this space.
 
      But if the target-independent frame size is nonzero, we have already
-     committed to allocating these in STARTING_FRAME_OFFSET for
+     committed to allocating these in TARGET_STARTING_FRAME_OFFSET for
      !FRAME_GROWS_DOWNWARD.  */
 
   if ((size == 0 || FRAME_GROWS_DOWNWARD)
@@ -22337,6 +22337,17 @@ mips_constant_alignment (const_tree exp, HOST_WIDE_INT align)
     return MAX (align, BITS_PER_WORD);
   return align;
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  See mips_compute_frame_info
+   for details about the frame layout.  */
+
+static HOST_WIDE_INT
+mips_starting_frame_offset (void)
+{
+  if (FRAME_GROWS_DOWNWARD)
+    return 0;
+  return crtl->outgoing_args_size + MIPS_GP_SAVE_AREA_SIZE;
+}
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -22638,6 +22649,9 @@ mips_constant_alignment (const_tree exp, HOST_WIDE_INT align)
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT mips_constant_alignment
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET mips_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 #include "gt-mips.h"
diff --git a/gcc/config/mips/mips.h b/gcc/config/mips/mips.h
index a2d8c7c1350..550d283158e 100644
--- a/gcc/config/mips/mips.h
+++ b/gcc/config/mips/mips.h
@@ -2298,14 +2298,6 @@ enum reg_class
 #define MIPS_GP_SAVE_AREA_SIZE \
   (TARGET_CALL_CLOBBERED_GP ? MIPS_STACK_ALIGN (UNITS_PER_WORD) : 0)
 
-/* The offset of the first local variable from the frame pointer.  See
-   mips_compute_frame_info for details about the frame layout.  */
-
-#define STARTING_FRAME_OFFSET				\
-  (FRAME_GROWS_DOWNWARD					\
-   ? 0							\
-   : crtl->outgoing_args_size + MIPS_GP_SAVE_AREA_SIZE)
-
 #define RETURN_ADDR_RTX mips_return_addr
 
 /* Mask off the MIPS16 ISA bit in unwind addresses.
diff --git a/gcc/config/mmix/mmix-protos.h b/gcc/config/mmix/mmix-protos.h
index 6c38924f479..4e3a7520765 100644
--- a/gcc/config/mmix/mmix-protos.h
+++ b/gcc/config/mmix/mmix-protos.h
@@ -21,7 +21,6 @@ along with GCC; see the file COPYING3.  If not see
 extern void mmix_init_expanders (void);
 extern int mmix_eh_return_data_regno (int);
 extern int mmix_initial_elimination_offset (int, int);
-extern int mmix_starting_frame_offset (void);
 extern int mmix_function_arg_regno_p (int, int);
 extern void mmix_function_profiler (FILE *, int);
 extern int mmix_reversible_cc_mode (machine_mode);
diff --git a/gcc/config/mmix/mmix.c b/gcc/config/mmix/mmix.c
index ea5769ebe2b..038c394705b 100644
--- a/gcc/config/mmix/mmix.c
+++ b/gcc/config/mmix/mmix.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -172,6 +172,7 @@ static bool mmix_print_operand_punct_valid_p (unsigned char);
 static void mmix_conditional_register_usage (void);
 static HOST_WIDE_INT mmix_static_rtx_alignment (machine_mode);
 static HOST_WIDE_INT mmix_constant_alignment (const_tree, HOST_WIDE_INT);
+static HOST_WIDE_INT mmix_starting_frame_offset (void);
 
 /* Target structure macros.  Listed by node.  See `Using and Porting GCC'
    for a general description.  */
@@ -291,6 +292,9 @@ static HOST_WIDE_INT mmix_constant_alignment (const_tree, HOST_WIDE_INT);
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT mmix_constant_alignment
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET mmix_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Functions that are expansions for target macros.
@@ -511,9 +515,9 @@ mmix_dynamic_chain_address (rtx frame)
   return plus_constant (Pmode, frame, -8);
 }
 
-/* STARTING_FRAME_OFFSET.  */
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
 
-int
+static HOST_WIDE_INT
 mmix_starting_frame_offset (void)
 {
   /* The old frame pointer is in the slot below the new one, so
@@ -579,7 +583,7 @@ mmix_initial_elimination_offset (int fromreg, int toreg)
      counted; the others go on the register stack.
 
      The frame-pointer is counted too if it is what is eliminated, as we
-     need to balance the offset for it from STARTING_FRAME_OFFSET.
+     need to balance the offset for it from TARGET_STARTING_FRAME_OFFSET.
 
      Also add in the slot for the register stack pointer we save if we
      have a landing pad.
diff --git a/gcc/config/mmix/mmix.h b/gcc/config/mmix/mmix.h
index 7d1e83c28e0..5dafe2dbf98 100644
--- a/gcc/config/mmix/mmix.h
+++ b/gcc/config/mmix/mmix.h
@@ -437,9 +437,6 @@ enum reg_class
 #define STACK_GROWS_DOWNWARD 1
 #define FRAME_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET \
-  mmix_starting_frame_offset ()
-
 #define FIRST_PARM_OFFSET(FUNDECL) 0
 
 #define DYNAMIC_CHAIN_ADDRESS(FRAMEADDR) \
diff --git a/gcc/config/mn10300/mn10300.c b/gcc/config/mn10300/mn10300.c
index e537f874466..705f577c0fc 100644
--- a/gcc/config/mn10300/mn10300.c
+++ b/gcc/config/mn10300/mn10300.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/mn10300/mn10300.h b/gcc/config/mn10300/mn10300.h
index 1078b45c30a..b074540f7b1 100644
--- a/gcc/config/mn10300/mn10300.h
+++ b/gcc/config/mn10300/mn10300.h
@@ -392,13 +392,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset of first parameter from the argument pointer register value.  */
 /* Is equal to the size of the saved fp + pc, even if an fp isn't
    saved since the value is used before we know.  */
diff --git a/gcc/config/moxie/moxie.c b/gcc/config/moxie/moxie.c
index d0731f63e34..4c9c5818fc9 100644
--- a/gcc/config/moxie/moxie.c
+++ b/gcc/config/moxie/moxie.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/moxie/moxie.h b/gcc/config/moxie/moxie.h
index 29442fb47e8..3742967d01c 100644
--- a/gcc/config/moxie/moxie.h
+++ b/gcc/config/moxie/moxie.h
@@ -226,10 +226,6 @@ enum reg_class
    pointer to a smaller address.  */
 #define STACK_GROWS_DOWNWARD 1
 
-/* Offset from the frame pointer to the first local variable slot to
-   be allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Define this if the above stack space is to be considered part of the
    space allocated by the caller.  */
 #define OUTGOING_REG_PARM_STACK_SPACE(FNTYPE) 1
diff --git a/gcc/config/msp430/driver-msp430.c b/gcc/config/msp430/driver-msp430.c
index bbd993241e5..fa1d90b13a4 100644
--- a/gcc/config/msp430/driver-msp430.c
+++ b/gcc/config/msp430/driver-msp430.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/msp430/msp430-c.c b/gcc/config/msp430/msp430-c.c
index 3c4436b78b3..7293fb6606f 100644
--- a/gcc/config/msp430/msp430-c.c
+++ b/gcc/config/msp430/msp430-c.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/msp430/msp430.c b/gcc/config/msp430/msp430.c
index 7805f834d80..8b025c755bf 100644
--- a/gcc/config/msp430/msp430.c
+++ b/gcc/config/msp430/msp430.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/msp430/msp430.h b/gcc/config/msp430/msp430.h
index 5a997670700..d4227259148 100644
--- a/gcc/config/msp430/msp430.h
+++ b/gcc/config/msp430/msp430.h
@@ -196,7 +196,6 @@ extern const char * msp430_select_hwmult_lib (int, const char **);
 #define WORD_REGISTER_OPERATIONS	1
 
 #define MOVE_MAX 			8
-#define STARTING_FRAME_OFFSET		0
 
 #define INCOMING_RETURN_ADDR_RTX \
   msp430_incoming_return_addr_rtx ()
diff --git a/gcc/config/nds32/nds32-cost.c b/gcc/config/nds32/nds32-cost.c
index ae54c36bba2..8f7c316766b 100644
--- a/gcc/config/nds32/nds32-cost.c
+++ b/gcc/config/nds32/nds32-cost.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-fp-as-gp.c b/gcc/config/nds32/nds32-fp-as-gp.c
index 6260f118bf3..743179b6313 100644
--- a/gcc/config/nds32/nds32-fp-as-gp.c
+++ b/gcc/config/nds32/nds32-fp-as-gp.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-intrinsic.c b/gcc/config/nds32/nds32-intrinsic.c
index afe36449012..064a8c1731d 100644
--- a/gcc/config/nds32/nds32-intrinsic.c
+++ b/gcc/config/nds32/nds32-intrinsic.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-isr.c b/gcc/config/nds32/nds32-isr.c
index 1bc42c2f7d8..698315e1fc4 100644
--- a/gcc/config/nds32/nds32-isr.c
+++ b/gcc/config/nds32/nds32-isr.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-md-auxiliary.c b/gcc/config/nds32/nds32-md-auxiliary.c
index 764075b36ac..b4041df6b66 100644
--- a/gcc/config/nds32/nds32-md-auxiliary.c
+++ b/gcc/config/nds32/nds32-md-auxiliary.c
@@ -21,7 +21,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-memory-manipulation.c b/gcc/config/nds32/nds32-memory-manipulation.c
index b1888b95bd7..5503dba8036 100644
--- a/gcc/config/nds32/nds32-memory-manipulation.c
+++ b/gcc/config/nds32/nds32-memory-manipulation.c
@@ -21,7 +21,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-pipelines-auxiliary.c b/gcc/config/nds32/nds32-pipelines-auxiliary.c
index 4f4ddf37228..603ca7e69bf 100644
--- a/gcc/config/nds32/nds32-pipelines-auxiliary.c
+++ b/gcc/config/nds32/nds32-pipelines-auxiliary.c
@@ -21,7 +21,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32-predicates.c b/gcc/config/nds32/nds32-predicates.c
index 79865d5811c..44b10119a15 100644
--- a/gcc/config/nds32/nds32-predicates.c
+++ b/gcc/config/nds32/nds32-predicates.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32.c b/gcc/config/nds32/nds32.c
index bb1581cf9f8..add64ee4a80 100644
--- a/gcc/config/nds32/nds32.c
+++ b/gcc/config/nds32/nds32.c
@@ -20,7 +20,7 @@
 
 /* ------------------------------------------------------------------------ */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nds32/nds32.h b/gcc/config/nds32/nds32.h
index d33237c004c..81522b848c8 100644
--- a/gcc/config/nds32/nds32.h
+++ b/gcc/config/nds32/nds32.h
@@ -681,8 +681,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET 0
-
 #define STACK_POINTER_OFFSET 0
 
 #define FIRST_PARM_OFFSET(fundecl) \
diff --git a/gcc/config/nios2/nios2.c b/gcc/config/nios2/nios2.c
index 36ae35eb815..2a23886da48 100644
--- a/gcc/config/nios2/nios2.c
+++ b/gcc/config/nios2/nios2.c
@@ -20,7 +20,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nios2/nios2.h b/gcc/config/nios2/nios2.h
index 10bebfbcdd3..420543e4f46 100644
--- a/gcc/config/nios2/nios2.h
+++ b/gcc/config/nios2/nios2.h
@@ -252,7 +252,6 @@ enum reg_class
 
 /* Stack layout.  */
 #define STACK_GROWS_DOWNWARD 1
-#define STARTING_FRAME_OFFSET 0
 #define FIRST_PARM_OFFSET(FUNDECL) 0
 
 /* Before the prologue, RA lives in r31.  */
diff --git a/gcc/config/nvptx/mkoffload.c b/gcc/config/nvptx/mkoffload.c
index 45aa53e2d50..560ea23e72b 100644
--- a/gcc/config/nvptx/mkoffload.c
+++ b/gcc/config/nvptx/mkoffload.c
@@ -27,7 +27,7 @@
    This is not a complete assembler.  We presume the source is well
    formed from the compiler and can die horribly if it is not.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/nvptx/nvptx.c b/gcc/config/nvptx/nvptx.c
index dc848d723c1..8ad404ccd13 100644
--- a/gcc/config/nvptx/nvptx.c
+++ b/gcc/config/nvptx/nvptx.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include <sstream>
diff --git a/gcc/config/nvptx/nvptx.h b/gcc/config/nvptx/nvptx.h
index d02e8541b6b..f81b9e2ae69 100644
--- a/gcc/config/nvptx/nvptx.h
+++ b/gcc/config/nvptx/nvptx.h
@@ -122,7 +122,6 @@ enum reg_class             {  NO_REGS,    ALL_REGS,	LIM_REG_CLASSES };
 
 /* Stack and Calling.  */
 
-#define STARTING_FRAME_OFFSET 0
 #define FRAME_GROWS_DOWNWARD 0
 #define STACK_GROWS_DOWNWARD 1
 
diff --git a/gcc/config/pa/pa.c b/gcc/config/pa/pa.c
index ef9eb33463d..279d261320b 100644
--- a/gcc/config/pa/pa.c
+++ b/gcc/config/pa/pa.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -205,6 +205,7 @@ static unsigned int pa_hard_regno_nregs (unsigned int, machine_mode);
 static bool pa_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool pa_modes_tieable_p (machine_mode, machine_mode);
 static bool pa_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
+static HOST_WIDE_INT pa_starting_frame_offset (void);
 
 /* The following extra sections are only used for SOM.  */
 static GTY(()) section *som_readonly_data_section;
@@ -424,6 +425,9 @@ static size_t n_deferred_plabels = 0;
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET pa_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Parse the -mfixed-range= option string.  */
@@ -3781,11 +3785,11 @@ pa_compute_frame_size (poly_int64 size, int *fregs_live)
   size = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
 
   /* Space for previous frame pointer + filler.  If any frame is
-     allocated, we need to add in the STARTING_FRAME_OFFSET.  We
+     allocated, we need to add in the TARGET_STARTING_FRAME_OFFSET.  We
      waste some space here for the sake of HP compatibility.  The
      first slot is only used when the frame pointer is needed.  */
   if (size || frame_pointer_needed)
-    size += STARTING_FRAME_OFFSET;
+    size += pa_starting_frame_offset ();
   
   /* If the current function calls __builtin_eh_return, then we need
      to allocate stack space for registers that will hold data for
@@ -3920,7 +3924,7 @@ pa_expand_prologue (void)
      and must be changed in tandem with this code.  */
   local_fsize = (size + UNITS_PER_WORD - 1) & ~(UNITS_PER_WORD - 1);
   if (local_fsize || frame_pointer_needed)
-    local_fsize += STARTING_FRAME_OFFSET;
+    local_fsize += pa_starting_frame_offset ();
 
   actual_fsize = pa_compute_frame_size (size, &save_fregs);
   if (flag_stack_usage_info)
@@ -10811,4 +10815,17 @@ pa_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
   return PA_HARD_REGNO_MODE_OK (regno, mode);
 }
 
+/* Implement TARGET_STARTING_FRAME_OFFSET.
+
+   On the 32-bit ports, we reserve one slot for the previous frame
+   pointer and one fill slot.  The fill slot is for compatibility
+   with HP compiled programs.  On the 64-bit ports, we reserve one
+   slot for the previous frame pointer.  */
+
+static HOST_WIDE_INT
+pa_starting_frame_offset (void)
+{
+  return 8;
+}
+
 #include "gt-pa.h"
diff --git a/gcc/config/pa/pa.h b/gcc/config/pa/pa.h
index c4f7d0a66e3..9ed929a301e 100644
--- a/gcc/config/pa/pa.h
+++ b/gcc/config/pa/pa.h
@@ -494,17 +494,6 @@ extern rtx hppa_pic_save_rtx (void);
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 0
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.
-
-   On the 32-bit ports, we reserve one slot for the previous frame
-   pointer and one fill slot.  The fill slot is for compatibility
-   with HP compiled programs.  On the 64-bit ports, we reserve one
-   slot for the previous frame pointer.  */
-#define STARTING_FRAME_OFFSET 8
-
 /* Define STACK_ALIGNMENT_NEEDED to zero to disable final alignment
    of the stack.  The default is to align it to STACK_BOUNDARY.  */
 #define STACK_ALIGNMENT_NEEDED 0
diff --git a/gcc/config/pdp11/pdp11-protos.h b/gcc/config/pdp11/pdp11-protos.h
index f6c7d1a7f1b..467b0d87a0a 100644
--- a/gcc/config/pdp11/pdp11-protos.h
+++ b/gcc/config/pdp11/pdp11-protos.h
@@ -44,3 +44,4 @@ extern void pdp11_asm_output_var (FILE *, const char *, int, int, bool);
 extern void pdp11_expand_prologue (void);
 extern void pdp11_expand_epilogue (void);
 extern int pdp11_branch_cost (void);
+extern poly_int64 pdp11_push_rounding (poly_int64);
diff --git a/gcc/config/pdp11/pdp11.c b/gcc/config/pdp11/pdp11.c
index 5397d61d2d4..1367a072b1b 100644
--- a/gcc/config/pdp11/pdp11.c
+++ b/gcc/config/pdp11/pdp11.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -1977,4 +1977,13 @@ pdp11_modes_tieable_p (machine_mode, machine_mode)
   return false;
 }
 
+/* Implement PUSH_ROUNDING.  On the pdp11, the stack is on an even
+   boundary.  */
+
+poly_int64
+pdp11_push_rounding (poly_int64 bytes)
+{
+  return (bytes + 1) & ~1;
+}
+
 struct gcc_target targetm = TARGET_INITIALIZER;
diff --git a/gcc/config/pdp11/pdp11.h b/gcc/config/pdp11/pdp11.h
index 3c89327c983..5ccec18e91a 100644
--- a/gcc/config/pdp11/pdp11.h
+++ b/gcc/config/pdp11/pdp11.h
@@ -257,16 +257,7 @@ enum reg_class { NO_REGS, MUL_REGS, GENERAL_REGS, LOAD_FPU_REGS, NO_LOAD_FPU_REG
 */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
-/* If we generate an insn to push BYTES bytes,
-   this says how many the stack pointer really advances by.
-   On the pdp11, the stack is on an even boundary */
-#define PUSH_ROUNDING(BYTES) ((BYTES + 1) & ~1)
+#define PUSH_ROUNDING(BYTES) pdp11_push_rounding (BYTES)
 
 /* current_first_parm_offset stores the # of registers pushed on the 
    stack */
diff --git a/gcc/config/powerpcspe/aix.h b/gcc/config/powerpcspe/aix.h
index 6c91a90cc00..dc7fc44b044 100644
--- a/gcc/config/powerpcspe/aix.h
+++ b/gcc/config/powerpcspe/aix.h
@@ -54,13 +54,11 @@
    sizes of the fixed area and the parameter area must be a multiple of
    STACK_BOUNDARY.  */
 
-#undef STARTING_FRAME_OFFSET
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (cfun->calls_alloca						\
-      ? RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, 16)	\
-      : (RS6000_ALIGN (crtl->outgoing_args_size, 16) + RS6000_SAVE_AREA)))
+#undef RS6000_STARTING_FRAME_OFFSET
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (cfun->calls_alloca							\
+   ? RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, 16)	\
+   : (RS6000_ALIGN (crtl->outgoing_args_size, 16) + RS6000_SAVE_AREA))
 
 /* Offset from the stack pointer register to an item dynamically
    allocated on the stack, e.g., by `alloca'.
@@ -73,7 +71,8 @@
    `emit-rtl.c').  */
 #undef STACK_DYNAMIC_OFFSET
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-   RS6000_ALIGN (crtl->outgoing_args_size + STACK_POINTER_OFFSET, 16)
+  RS6000_ALIGN (crtl->outgoing_args_size.to_constant () \
+		+ STACK_POINTER_OFFSET, 16)
 
 #undef  TARGET_IEEEQUAD
 #define TARGET_IEEEQUAD 0
diff --git a/gcc/config/powerpcspe/darwin.h b/gcc/config/powerpcspe/darwin.h
index 51474ae00c7..a454049c130 100644
--- a/gcc/config/powerpcspe/darwin.h
+++ b/gcc/config/powerpcspe/darwin.h
@@ -148,16 +148,14 @@ extern int darwin_emit_branch_islands;
 
 /* Pad the outgoing args area to 16 bytes instead of the usual 8.  */
 
-#undef STARTING_FRAME_OFFSET
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
-      + RS6000_SAVE_AREA))
+#undef RS6000_STARTING_FRAME_OFFSET
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (RS6000_ALIGN (crtl->outgoing_args_size, 16)				\
+   + RS6000_SAVE_AREA)
 
 #undef STACK_DYNAMIC_OFFSET
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-  (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+  (RS6000_ALIGN (crtl->outgoing_args_size.to_constant (), 16)		\
    + (STACK_POINTER_OFFSET))
 
 /* Darwin uses a function call if everything needs to be saved/restored.  */
diff --git a/gcc/config/powerpcspe/driver-powerpcspe.c b/gcc/config/powerpcspe/driver-powerpcspe.c
index 8215abbd625..f3edb861138 100644
--- a/gcc/config/powerpcspe/driver-powerpcspe.c
+++ b/gcc/config/powerpcspe/driver-powerpcspe.c
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/powerpcspe/host-darwin.c b/gcc/config/powerpcspe/host-darwin.c
index da90e2f6452..ecafe08962f 100644
--- a/gcc/config/powerpcspe/host-darwin.c
+++ b/gcc/config/powerpcspe/host-darwin.c
@@ -17,6 +17,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/powerpcspe/host-ppc64-darwin.c b/gcc/config/powerpcspe/host-ppc64-darwin.c
index 381fe1673ae..8f995148b24 100644
--- a/gcc/config/powerpcspe/host-ppc64-darwin.c
+++ b/gcc/config/powerpcspe/host-ppc64-darwin.c
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/powerpcspe/powerpcspe-c.c b/gcc/config/powerpcspe/powerpcspe-c.c
index 661480fd479..6020d0c15b2 100644
--- a/gcc/config/powerpcspe/powerpcspe-c.c
+++ b/gcc/config/powerpcspe/powerpcspe-c.c
@@ -20,6 +20,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/powerpcspe/powerpcspe-linux.c b/gcc/config/powerpcspe/powerpcspe-linux.c
index 4a8d9fad6ea..f9965b660b1 100644
--- a/gcc/config/powerpcspe/powerpcspe-linux.c
+++ b/gcc/config/powerpcspe/powerpcspe-linux.c
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/powerpcspe/powerpcspe.c b/gcc/config/powerpcspe/powerpcspe.c
index 528f62da71d..6da9f59148d 100644
--- a/gcc/config/powerpcspe/powerpcspe.c
+++ b/gcc/config/powerpcspe/powerpcspe.c
@@ -18,6 +18,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
@@ -1987,6 +1989,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
+
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
 
 
 /* Processor table.  */
@@ -43770,6 +43775,16 @@ rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
     return MAX (align, BITS_PER_WORD);
   return align;
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+rs6000_starting_frame_offset (void)
+{
+  if (FRAME_GROWS_DOWNWARD)
+    return 0;
+  return RS6000_STARTING_FRAME_OFFSET;
+}
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/config/powerpcspe/powerpcspe.h b/gcc/config/powerpcspe/powerpcspe.h
index 8381785067c..45edc35f39a 100644
--- a/gcc/config/powerpcspe/powerpcspe.h
+++ b/gcc/config/powerpcspe/powerpcspe.h
@@ -1648,15 +1648,13 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    sizes of the fixed area and the parameter area must be a multiple of
    STACK_BOUNDARY.  */
 
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (cfun->calls_alloca						\
-      ? (RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA,	\
-		       (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8 ))	\
-      : (RS6000_ALIGN (crtl->outgoing_args_size,			\
-		       (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
-	 + RS6000_SAVE_AREA)))
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (cfun->calls_alloca							\
+   ? (RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA,	\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8 ))		\
+   : (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+      + RS6000_SAVE_AREA))
 
 /* Offset from the stack pointer register to an item dynamically
    allocated on the stack, e.g., by `alloca'.
@@ -1668,7 +1666,8 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    This value must be a multiple of STACK_BOUNDARY (hard coded in
    `emit-rtl.c').  */
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-  RS6000_ALIGN (crtl->outgoing_args_size + STACK_POINTER_OFFSET,	\
+  RS6000_ALIGN (crtl->outgoing_args_size.to_constant ()			\
+		+ STACK_POINTER_OFFSET,					\
 		(TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)
 
 /* If we generate an insn to push BYTES bytes,
diff --git a/gcc/config/riscv/riscv-builtins.c b/gcc/config/riscv/riscv-builtins.c
index 1311fee6f70..39d2316e9ef 100644
--- a/gcc/config/riscv/riscv-builtins.c
+++ b/gcc/config/riscv/riscv-builtins.c
@@ -18,6 +18,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/riscv/riscv-c.c b/gcc/config/riscv/riscv-c.c
index 64e7cf877af..2a045ec026b 100644
--- a/gcc/config/riscv/riscv-c.c
+++ b/gcc/config/riscv/riscv-c.c
@@ -18,6 +18,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/riscv/riscv.c b/gcc/config/riscv/riscv.c
index c34468e018d..06106f22b8b 100644
--- a/gcc/config/riscv/riscv.c
+++ b/gcc/config/riscv/riscv.c
@@ -19,6 +19,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/riscv/riscv.h b/gcc/config/riscv/riscv.h
index c31d2cc9957..e53555efe82 100644
--- a/gcc/config/riscv/riscv.h
+++ b/gcc/config/riscv/riscv.h
@@ -446,8 +446,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET 0
-
 #define RETURN_ADDR_RTX riscv_return_addr
 
 #define ELIMINABLE_REGS							\
diff --git a/gcc/config/rl78/rl78-c.c b/gcc/config/rl78/rl78-c.c
index bb1e8db3a53..59409c8aa24 100644
--- a/gcc/config/rl78/rl78-c.c
+++ b/gcc/config/rl78/rl78-c.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rl78/rl78.c b/gcc/config/rl78/rl78.c
index 2455599c51d..ce66866ef84 100644
--- a/gcc/config/rl78/rl78.c
+++ b/gcc/config/rl78/rl78.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rl78/rl78.h b/gcc/config/rl78/rl78.h
index 30dfee89e6b..e6b64098653 100644
--- a/gcc/config/rl78/rl78.h
+++ b/gcc/config/rl78/rl78.h
@@ -149,7 +149,6 @@
 #define HAS_LONG_UNCOND_BRANCH		0
 
 #define MOVE_MAX 			2
-#define STARTING_FRAME_OFFSET		0
 
 #define ADDR_SPACE_NEAR			1
 #define ADDR_SPACE_FAR			2
diff --git a/gcc/config/rs6000/aix.h b/gcc/config/rs6000/aix.h
index 6c91a90cc00..36c4a522b4f 100644
--- a/gcc/config/rs6000/aix.h
+++ b/gcc/config/rs6000/aix.h
@@ -54,13 +54,11 @@
    sizes of the fixed area and the parameter area must be a multiple of
    STACK_BOUNDARY.  */
 
-#undef STARTING_FRAME_OFFSET
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (cfun->calls_alloca						\
-      ? RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, 16)	\
-      : (RS6000_ALIGN (crtl->outgoing_args_size, 16) + RS6000_SAVE_AREA)))
+#undef RS6000_STARTING_FRAME_OFFSET
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (cfun->calls_alloca							\
+   ? RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA, 16)	\
+   : (RS6000_ALIGN (crtl->outgoing_args_size, 16) + RS6000_SAVE_AREA))
 
 /* Offset from the stack pointer register to an item dynamically
    allocated on the stack, e.g., by `alloca'.
@@ -73,7 +71,8 @@
    `emit-rtl.c').  */
 #undef STACK_DYNAMIC_OFFSET
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-   RS6000_ALIGN (crtl->outgoing_args_size + STACK_POINTER_OFFSET, 16)
+   RS6000_ALIGN (crtl->outgoing_args_size.to_constant ()		\
+		 + STACK_POINTER_OFFSET, 16)
 
 #undef  TARGET_IEEEQUAD
 #define TARGET_IEEEQUAD 0
diff --git a/gcc/config/rs6000/amo.h b/gcc/config/rs6000/amo.h
index d77bf649fcb..d83e035da05 100644
--- a/gcc/config/rs6000/amo.h
+++ b/gcc/config/rs6000/amo.h
@@ -1,152 +1,152 @@
- /* Power ISA 3.0 atomic memory operation include file.
-    Copyright (C) 2017 Free Software Foundation, Inc.
-    Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>.
-
-    This file is part of GCC.
-
-    GCC is free software; you can redistribute it and/or modify it
-    under the terms of the GNU General Public License as published
-    by the Free Software Foundation; either version 3, or (at your
-    option) any later version.
-
-    GCC is distributed in the hope that it will be useful, but WITHOUT
-    ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
-    or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
-    License for more details.
-
-    Under Section 7 of GPL version 3, you are granted additional
-    permissions described in the GCC Runtime Library Exception, version
-    3.1, as published by the Free Software Foundation.
-
-    You should have received a copy of the GNU General Public License and
-    a copy of the GCC Runtime Library Exception along with this program;
-    see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
-    <http://www.gnu.org/licenses/>.  */
-
- #ifndef _AMO_H
- #define _AMO_H
-
- #if !defined(_ARCH_PWR9) || !defined(_ARCH_PPC64)
- #error "The atomic memory operations require Power 64-bit ISA 3.0"
-
- #else
- #include <stdint.h>
-
- /* Enumeration of the LWAT/LDAT sub-opcodes.  */
- enum _AMO_LD {
-   _AMO_LD_ADD		= 0x00,		/* Fetch and Add.  */
-   _AMO_LD_XOR		= 0x01,		/* Fetch and Xor.  */
-   _AMO_LD_IOR		= 0x02,		/* Fetch and Ior.  */
-   _AMO_LD_AND		= 0x03,		/* Fetch and And.  */
-   _AMO_LD_UMAX		= 0x04,		/* Fetch and Unsigned Maximum.  */
-   _AMO_LD_SMAX		= 0x05,		/* Fetch and Signed Maximum.  */
-   _AMO_LD_UMIN		= 0x06,		/* Fetch and Unsigned Minimum.  */
-   _AMO_LD_SMIN		= 0x07,		/* Fetch and Signed Minimum.  */
-   _AMO_LD_SWAP		= 0x08,		/* Swap.  */
-   _AMO_LD_CS_NE		= 0x10,		/* Compare and Swap Not Equal.  */
-   _AMO_LD_INC_BOUNDED	= 0x18,		/* Fetch and Increment Bounded.  */
-   _AMO_LD_INC_EQUAL	= 0x19,		/* Fetch and Increment Equal.  */
-   _AMO_LD_DEC_BOUNDED	= 0x1A		/* Fetch and Decrement Bounded.  */
- };
-
- /* Implementation of the simple LWAT/LDAT operations that take one register and
-    modify one word or double-word of memory and return the value that was
-    previously in the memory location.
-
-    The LWAT/LDAT opcode requires the address to be a single register, and that
-    points to a suitably aligned memory location.  Asm volatile is used to
-    prevent the optimizer from moving the operation.  */
-
- #define _AMO_LD_SIMPLE(NAME, TYPE, OPCODE, FC)				\
- static __inline__ TYPE							\
- NAME (TYPE *_PTR, TYPE _VALUE)						\
- {									\
-   unsigned __int128 _TMP;						\
-   TYPE _RET;								\
-   __asm__ volatile ("mr %L1,%3\n"					\
- 		    "\t" OPCODE " %1,%P0,%4\n"				\
- 		    "\tmr %2,%1\n"					\
- 		    : "+Q" (_PTR[0]), "=&r" (_TMP), "=r" (_RET)		\
- 		    : "r" (_VALUE), "n" (FC));				\
-   return _RET;								\
- }
-
- _AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
- _AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
- _AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
- _AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", _AMO_LD_AND)
- _AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
- _AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
- _AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
-
- _AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
- _AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
- _AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
- _AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
-
- _AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
- _AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
- _AMO_LD_SIMPLE (amo_ldat_ior,   uint64_t, "ldat", _AMO_LD_IOR)
- _AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", _AMO_LD_AND)
- _AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
- _AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
- _AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
-
- _AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
- _AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
- _AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
- _AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
-
- /* Enumeration of the STWAT/STDAT sub-opcodes.  */
- enum _AMO_ST {
-   _AMO_ST_ADD		= 0x00,		/* Store Add.  */
-   _AMO_ST_XOR		= 0x01,		/* Store Xor.  */
-   _AMO_ST_IOR		= 0x02,		/* Store Ior.  */
-   _AMO_ST_AND		= 0x03,		/* Store And.  */
-   _AMO_ST_UMAX		= 0x04,		/* Store Unsigned Maximum.  */
-   _AMO_ST_SMAX		= 0x05,		/* Store Signed Maximum.  */
-   _AMO_ST_UMIN		= 0x06,		/* Store Unsigned Minimum.  */
-   _AMO_ST_SMIN		= 0x07,		/* Store Signed Minimum.  */
-   _AMO_ST_TWIN		= 0x18		/* Store Twin.  */
- };
-
- /* Implementation of the simple STWAT/STDAT operations that take one register
-    and modify one word or double-word of memory.  No value is returned.
-
-    The STWAT/STDAT opcode requires the address to be a single register, and
-    that points to a suitably aligned memory location.  Asm volatile is used to
-    prevent the optimizer from moving the operation.  */
-
- #define _AMO_ST_SIMPLE(NAME, TYPE, OPCODE, FC)				\
- static __inline__ void							\
- NAME (TYPE *_PTR, TYPE _VALUE)						\
- {									\
-   __asm__ volatile (OPCODE " %1,%P0,%2"					\
- 		    : "+Q" (_PTR[0])					\
- 		    : "r" (_VALUE), "n" (FC));				\
-   return;								\
- }
-
- _AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
- _AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
- _AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
- _AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
- _AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
- _AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
-
- _AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
- _AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
- _AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
-
- _AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
- _AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
- _AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", _AMO_ST_IOR)
- _AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
- _AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
- _AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
-
- _AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
- _AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
- _AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
- #endif	/* _ARCH_PWR9 && _ARCH_PPC64.  */
- #endif	/* _POWERPC_AMO_H.  */
+/* Power ISA 3.0 atomic memory operation include file.
+   Copyright (C) 2017 Free Software Foundation, Inc.
+   Contributed by Michael Meissner <meissner@linux.vnet.ibm.com>.
+
+   This file is part of GCC.
+
+   GCC is free software; you can redistribute it and/or modify it
+   under the terms of the GNU General Public License as published
+   by the Free Software Foundation; either version 3, or (at your
+   option) any later version.
+
+   GCC is distributed in the hope that it will be useful, but WITHOUT
+   ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
+   or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public
+   License for more details.
+
+   Under Section 7 of GPL version 3, you are granted additional
+   permissions described in the GCC Runtime Library Exception, version
+   3.1, as published by the Free Software Foundation.
+
+   You should have received a copy of the GNU General Public License and
+   a copy of the GCC Runtime Library Exception along with this program;
+   see the files COPYING3 and COPYING.RUNTIME respectively.  If not, see
+   <http://www.gnu.org/licenses/>.  */
+
+#ifndef _AMO_H
+#define _AMO_H
+
+#if !defined(_ARCH_PWR9) || !defined(_ARCH_PPC64)
+#error "The atomic memory operations require Power 64-bit ISA 3.0"
+
+#else
+#include <stdint.h>
+
+/* Enumeration of the LWAT/LDAT sub-opcodes.  */
+enum _AMO_LD {
+  _AMO_LD_ADD		= 0x00,		/* Fetch and Add.  */
+  _AMO_LD_XOR		= 0x01,		/* Fetch and Xor.  */
+  _AMO_LD_IOR		= 0x02,		/* Fetch and Ior.  */
+  _AMO_LD_AND		= 0x03,		/* Fetch and And.  */
+  _AMO_LD_UMAX		= 0x04,		/* Fetch and Unsigned Maximum.  */
+  _AMO_LD_SMAX		= 0x05,		/* Fetch and Signed Maximum.  */
+  _AMO_LD_UMIN		= 0x06,		/* Fetch and Unsigned Minimum.  */
+  _AMO_LD_SMIN		= 0x07,		/* Fetch and Signed Minimum.  */
+  _AMO_LD_SWAP		= 0x08,		/* Swap.  */
+  _AMO_LD_CS_NE		= 0x10,		/* Compare and Swap Not Equal.  */
+  _AMO_LD_INC_BOUNDED	= 0x18,		/* Fetch and Increment Bounded.  */
+  _AMO_LD_INC_EQUAL	= 0x19,		/* Fetch and Increment Equal.  */
+  _AMO_LD_DEC_BOUNDED	= 0x1A		/* Fetch and Decrement Bounded.  */
+};
+
+/* Implementation of the simple LWAT/LDAT operations that take one register and
+   modify one word or double-word of memory and return the value that was
+   previously in the memory location.
+
+   The LWAT/LDAT opcode requires the address to be a single register, and that
+   points to a suitably aligned memory location.  Asm volatile is used to
+   prevent the optimizer from moving the operation.  */
+
+#define _AMO_LD_SIMPLE(NAME, TYPE, OPCODE, FC)				\
+static __inline__ TYPE							\
+NAME (TYPE *_PTR, TYPE _VALUE)						\
+{									\
+  unsigned __int128 _TMP;						\
+  TYPE _RET;								\
+  __asm__ volatile ("mr %L1,%3\n"					\
+		    "\t" OPCODE " %1,%P0,%4\n"				\
+		    "\tmr %2,%1\n"					\
+		    : "+Q" (_PTR[0]), "=&r" (_TMP), "=r" (_RET)		\
+		    : "r" (_VALUE), "n" (FC));				\
+  return _RET;								\
+}
+
+_AMO_LD_SIMPLE (amo_lwat_add,   uint32_t, "lwat", _AMO_LD_ADD)
+_AMO_LD_SIMPLE (amo_lwat_xor,   uint32_t, "lwat", _AMO_LD_XOR)
+_AMO_LD_SIMPLE (amo_lwat_ior,   uint32_t, "lwat", _AMO_LD_IOR)
+_AMO_LD_SIMPLE (amo_lwat_and,   uint32_t, "lwat", _AMO_LD_AND)
+_AMO_LD_SIMPLE (amo_lwat_umax,  uint32_t, "lwat", _AMO_LD_UMAX)
+_AMO_LD_SIMPLE (amo_lwat_umin,  uint32_t, "lwat", _AMO_LD_UMIN)
+_AMO_LD_SIMPLE (amo_lwat_swap,  uint32_t, "lwat", _AMO_LD_SWAP)
+
+_AMO_LD_SIMPLE (amo_lwat_sadd,  int32_t,  "lwat", _AMO_LD_ADD)
+_AMO_LD_SIMPLE (amo_lwat_smax,  int32_t,  "lwat", _AMO_LD_SMAX)
+_AMO_LD_SIMPLE (amo_lwat_smin,  int32_t,  "lwat", _AMO_LD_SMIN)
+_AMO_LD_SIMPLE (amo_lwat_sswap, int32_t,  "lwat", _AMO_LD_SWAP)
+
+_AMO_LD_SIMPLE (amo_ldat_add,   uint64_t, "ldat", _AMO_LD_ADD)
+_AMO_LD_SIMPLE (amo_ldat_xor,   uint64_t, "ldat", _AMO_LD_XOR)
+_AMO_LD_SIMPLE (amo_ldat_ior,   uint64_t, "ldat", _AMO_LD_IOR)
+_AMO_LD_SIMPLE (amo_ldat_and,   uint64_t, "ldat", _AMO_LD_AND)
+_AMO_LD_SIMPLE (amo_ldat_umax,  uint64_t, "ldat", _AMO_LD_UMAX)
+_AMO_LD_SIMPLE (amo_ldat_umin,  uint64_t, "ldat", _AMO_LD_UMIN)
+_AMO_LD_SIMPLE (amo_ldat_swap,  uint64_t, "ldat", _AMO_LD_SWAP)
+
+_AMO_LD_SIMPLE (amo_ldat_sadd,  int64_t,  "ldat", _AMO_LD_ADD)
+_AMO_LD_SIMPLE (amo_ldat_smax,  int64_t,  "ldat", _AMO_LD_SMAX)
+_AMO_LD_SIMPLE (amo_ldat_smin,  int64_t,  "ldat", _AMO_LD_SMIN)
+_AMO_LD_SIMPLE (amo_ldat_sswap, int64_t,  "ldat", _AMO_LD_SWAP)
+
+/* Enumeration of the STWAT/STDAT sub-opcodes.  */
+enum _AMO_ST {
+  _AMO_ST_ADD		= 0x00,		/* Store Add.  */
+  _AMO_ST_XOR		= 0x01,		/* Store Xor.  */
+  _AMO_ST_IOR		= 0x02,		/* Store Ior.  */
+  _AMO_ST_AND		= 0x03,		/* Store And.  */
+  _AMO_ST_UMAX		= 0x04,		/* Store Unsigned Maximum.  */
+  _AMO_ST_SMAX		= 0x05,		/* Store Signed Maximum.  */
+  _AMO_ST_UMIN		= 0x06,		/* Store Unsigned Minimum.  */
+  _AMO_ST_SMIN		= 0x07,		/* Store Signed Minimum.  */
+  _AMO_ST_TWIN		= 0x18		/* Store Twin.  */
+};
+
+/* Implementation of the simple STWAT/STDAT operations that take one register
+   and modify one word or double-word of memory.  No value is returned.
+
+   The STWAT/STDAT opcode requires the address to be a single register, and
+   that points to a suitably aligned memory location.  Asm volatile is used to
+   prevent the optimizer from moving the operation.  */
+
+#define _AMO_ST_SIMPLE(NAME, TYPE, OPCODE, FC)				\
+static __inline__ void							\
+NAME (TYPE *_PTR, TYPE _VALUE)						\
+{									\
+  __asm__ volatile (OPCODE " %1,%P0,%2"					\
+		    : "+Q" (_PTR[0])					\
+		    : "r" (_VALUE), "n" (FC));				\
+  return;								\
+}
+
+_AMO_ST_SIMPLE (amo_stwat_add,  uint32_t, "stwat", _AMO_ST_ADD)
+_AMO_ST_SIMPLE (amo_stwat_xor,  uint32_t, "stwat", _AMO_ST_XOR)
+_AMO_ST_SIMPLE (amo_stwat_ior,  uint32_t, "stwat", _AMO_ST_IOR)
+_AMO_ST_SIMPLE (amo_stwat_and,  uint32_t, "stwat", _AMO_ST_AND)
+_AMO_ST_SIMPLE (amo_stwat_umax, uint32_t, "stwat", _AMO_ST_UMAX)
+_AMO_ST_SIMPLE (amo_stwat_umin, uint32_t, "stwat", _AMO_ST_UMIN)
+
+_AMO_ST_SIMPLE (amo_stwat_sadd, int32_t,  "stwat", _AMO_ST_ADD)
+_AMO_ST_SIMPLE (amo_stwat_smax, int32_t,  "stwat", _AMO_ST_SMAX)
+_AMO_ST_SIMPLE (amo_stwat_smin, int32_t,  "stwat", _AMO_ST_SMIN)
+
+_AMO_ST_SIMPLE (amo_stdat_add,  uint64_t, "stdat", _AMO_ST_ADD)
+_AMO_ST_SIMPLE (amo_stdat_xor,  uint64_t, "stdat", _AMO_ST_XOR)
+_AMO_ST_SIMPLE (amo_stdat_ior,  uint64_t, "stdat", _AMO_ST_IOR)
+_AMO_ST_SIMPLE (amo_stdat_and,  uint64_t, "stdat", _AMO_ST_AND)
+_AMO_ST_SIMPLE (amo_stdat_umax, uint64_t, "stdat", _AMO_ST_UMAX)
+_AMO_ST_SIMPLE (amo_stdat_umin, uint64_t, "stdat", _AMO_ST_UMIN)
+
+_AMO_ST_SIMPLE (amo_stdat_sadd, int64_t,  "stdat", _AMO_ST_ADD)
+_AMO_ST_SIMPLE (amo_stdat_smax, int64_t,  "stdat", _AMO_ST_SMAX)
+_AMO_ST_SIMPLE (amo_stdat_smin, int64_t,  "stdat", _AMO_ST_SMIN)
+#endif	/* _ARCH_PWR9 && _ARCH_PPC64.  */
+#endif	/* _POWERPC_AMO_H.  */
diff --git a/gcc/config/rs6000/darwin.h b/gcc/config/rs6000/darwin.h
index aeb4762387f..2c3b04c613b 100644
--- a/gcc/config/rs6000/darwin.h
+++ b/gcc/config/rs6000/darwin.h
@@ -148,16 +148,14 @@ extern int darwin_emit_branch_islands;
 
 /* Pad the outgoing args area to 16 bytes instead of the usual 8.  */
 
-#undef STARTING_FRAME_OFFSET
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
-      + RS6000_SAVE_AREA))
+#undef RS6000_STARTING_FRAME_OFFSET
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (RS6000_ALIGN (crtl->outgoing_args_size, 16)				\
+   + RS6000_SAVE_AREA)
 
 #undef STACK_DYNAMIC_OFFSET
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-  (RS6000_ALIGN (crtl->outgoing_args_size, 16)		\
+  (RS6000_ALIGN (crtl->outgoing_args_size.to_constant (), 16)		\
    + (STACK_POINTER_OFFSET))
 
 /* Darwin uses a function call if everything needs to be saved/restored.  */
diff --git a/gcc/config/rs6000/driver-rs6000.c b/gcc/config/rs6000/driver-rs6000.c
index 860208f9ffb..f3edb861138 100644
--- a/gcc/config/rs6000/driver-rs6000.c
+++ b/gcc/config/rs6000/driver-rs6000.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rs6000/host-darwin.c b/gcc/config/rs6000/host-darwin.c
index 272d38e9748..ecafe08962f 100644
--- a/gcc/config/rs6000/host-darwin.c
+++ b/gcc/config/rs6000/host-darwin.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rs6000/host-ppc64-darwin.c b/gcc/config/rs6000/host-ppc64-darwin.c
index bd674fae42f..8f995148b24 100644
--- a/gcc/config/rs6000/host-ppc64-darwin.c
+++ b/gcc/config/rs6000/host-ppc64-darwin.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rs6000/predicates.md b/gcc/config/rs6000/predicates.md
index 237b4323b4c..569158f4c35 100644
--- a/gcc/config/rs6000/predicates.md
+++ b/gcc/config/rs6000/predicates.md
@@ -199,6 +199,16 @@
   return CA_REGNO_P (REGNO (op));
 })
 
+;; Return 1 if operand is constant zero (scalars and vectors).
+(define_predicate "zero_constant"
+  (and (match_code "const_int,const_double,const_wide_int,const_vector")
+       (match_test "op == CONST0_RTX (mode)")))
+
+;; Return 1 if operand is constant -1 (scalars and vectors).
+(define_predicate "all_ones_constant"
+  (and (match_code "const_int,const_double,const_wide_int,const_vector")
+       (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)")))
+
 ;; Return 1 if op is a signed 5-bit constant integer.
 (define_predicate "s5bit_cint_operand"
   (and (match_code "const_int")
@@ -543,12 +553,16 @@
     (match_operand 0 "u_short_cint_operand")
     (match_operand 0 "gpc_reg_operand")))
 
-;; Return 1 if op is any constant integer 
-;; or non-special register.
+;; Return 1 if op is any constant integer or a non-special register.
 (define_predicate "reg_or_cint_operand"
   (ior (match_code "const_int")
        (match_operand 0 "gpc_reg_operand")))
 
+;; Return 1 if op is constant zero or a non-special register.
+(define_predicate "reg_or_zero_operand"
+  (ior (match_operand 0 "zero_constant")
+       (match_operand 0 "gpc_reg_operand")))
+
 ;; Return 1 if op is a constant integer valid for addition with addis, addi.
 (define_predicate "add_cint_operand"
   (and (match_code "const_int")
@@ -744,16 +758,6 @@
 	    (and (match_test "easy_altivec_constant (op, mode)")
 		 (match_test "vspltis_shifted (op) != 0")))))
 
-;; Return 1 if operand is constant zero (scalars and vectors).
-(define_predicate "zero_constant"
-  (and (match_code "const_int,const_double,const_wide_int,const_vector")
-       (match_test "op == CONST0_RTX (mode)")))
-
-;; Return 1 if operand is constant -1 (scalars and vectors).
-(define_predicate "all_ones_constant"
-  (and (match_code "const_int,const_double,const_wide_int,const_vector")
-       (match_test "op == CONSTM1_RTX (mode) && !FLOAT_MODE_P (mode)")))
-
 ;; Return 1 if operand is a vector int register or is either a vector constant
 ;; of all 0 bits of a vector constant of all 1 bits.
 (define_predicate "vector_int_reg_or_same_bit"
diff --git a/gcc/config/rs6000/rs6000-c.c b/gcc/config/rs6000/rs6000-c.c
index deec97e81df..5c4b7664430 100644
--- a/gcc/config/rs6000/rs6000-c.c
+++ b/gcc/config/rs6000/rs6000-c.c
@@ -20,7 +20,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rs6000/rs6000-linux.c b/gcc/config/rs6000/rs6000-linux.c
index 0a17aff5440..f9965b660b1 100644
--- a/gcc/config/rs6000/rs6000-linux.c
+++ b/gcc/config/rs6000/rs6000-linux.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rs6000/rs6000-p8swap.c b/gcc/config/rs6000/rs6000-p8swap.c
index e1324b72c25..7d9ab878625 100644
--- a/gcc/config/rs6000/rs6000-p8swap.c
+++ b/gcc/config/rs6000/rs6000-p8swap.c
@@ -18,6 +18,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/rs6000/rs6000-protos.h b/gcc/config/rs6000/rs6000-protos.h
index c6be5b1ef59..db0e692739c 100644
--- a/gcc/config/rs6000/rs6000-protos.h
+++ b/gcc/config/rs6000/rs6000-protos.h
@@ -209,7 +209,6 @@ extern void rs6000_emit_epilogue (int);
 extern void rs6000_expand_split_stack_prologue (void);
 extern void rs6000_split_stack_space_check (rtx, rtx);
 extern void rs6000_emit_eh_reg_restore (rtx, rtx);
-extern const char * output_isel (rtx *);
 extern void rs6000_call_aix (rtx, rtx, rtx, rtx);
 extern void rs6000_sibcall_aix (rtx, rtx, rtx, rtx);
 extern void rs6000_aix_asm_output_dwarf_table_ref (char *);
diff --git a/gcc/config/rs6000/rs6000-string.c b/gcc/config/rs6000/rs6000-string.c
index 8c2a93e3d91..5812b4a6f50 100644
--- a/gcc/config/rs6000/rs6000-string.c
+++ b/gcc/config/rs6000/rs6000-string.c
@@ -18,6 +18,8 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/rs6000/rs6000.c b/gcc/config/rs6000/rs6000.c
index d94ebde00e4..e02b0863dbf 100644
--- a/gcc/config/rs6000/rs6000.c
+++ b/gcc/config/rs6000/rs6000.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -1730,6 +1730,8 @@ static const struct attribute_spec rs6000_attribute_table[] =
 #define TARGET_RTX_COSTS rs6000_rtx_costs
 #undef TARGET_ADDRESS_COST
 #define TARGET_ADDRESS_COST hook_int_rtx_mode_as_bool_0
+#undef TARGET_INSN_COST
+#define TARGET_INSN_COST rs6000_insn_cost
 
 #undef TARGET_INIT_DWARF_REG_SIZES_EXTRA
 #define TARGET_INIT_DWARF_REG_SIZES_EXTRA rs6000_init_dwarf_reg_sizes_extra
@@ -1958,6 +1960,9 @@ static const struct attribute_spec rs6000_attribute_table[] =
 
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT rs6000_constant_alignment
+
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET rs6000_starting_frame_offset
 
 
 /* Processor table.  */
@@ -23255,24 +23260,6 @@ rs6000_emit_int_cmove (rtx dest, rtx op, rtx true_cond, rtx false_cond)
   return 1;
 }
 
-const char *
-output_isel (rtx *operands)
-{
-  enum rtx_code code;
-
-  code = GET_CODE (operands[1]);
-
-  if (code == GE || code == GEU || code == LE || code == LEU || code == NE)
-    {
-      gcc_assert (GET_CODE (operands[2]) == REG
-		  && GET_CODE (operands[3]) == REG);
-      PUT_CODE (operands[1], reverse_condition (code));
-      return "isel %0,%3,%2,%j1";
-    }
-
-  return "isel %0,%2,%3,%j1";
-}
-
 void
 rs6000_emit_minmax (rtx dest, enum rtx_code code, rtx op0, rtx op1)
 {
@@ -34393,7 +34380,8 @@ rs6000_xcoff_asm_output_aligned_decl_common (FILE *stream,
 	   size, align2);
 
 #ifdef HAVE_GAS_HIDDEN
-  fputs (rs6000_xcoff_visibility (decl), stream);
+  if (decl != NULL)
+    fputs (rs6000_xcoff_visibility (decl), stream);
 #endif
   putc ('\n', stream);
 }
@@ -34938,6 +34926,88 @@ rs6000_debug_rtx_costs (rtx x, machine_mode mode, int outer_code,
   return ret;
 }
 
+static int
+rs6000_insn_cost (rtx_insn *insn, bool speed)
+{
+  if (recog_memoized (insn) < 0)
+    return 0;
+
+  if (!speed)
+    return get_attr_length (insn);
+
+  int cost = get_attr_cost (insn);
+  if (cost > 0)
+    return cost;
+
+  int n = get_attr_length (insn) / 4;
+  enum attr_type type = get_attr_type (insn);
+
+  switch (type)
+    {
+    case TYPE_LOAD:
+    case TYPE_FPLOAD:
+    case TYPE_VECLOAD:
+      cost = COSTS_N_INSNS (n + 1);
+      break;
+
+    case TYPE_MUL:
+      switch (get_attr_size (insn))
+	{
+	case SIZE_8:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const9;
+	  break;
+	case SIZE_16:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi_const;
+	  break;
+	case SIZE_32:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->mulsi;
+	  break;
+	case SIZE_64:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->muldi;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+    case TYPE_DIV:
+      switch (get_attr_size (insn))
+	{
+	case SIZE_32:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divsi;
+	  break;
+	case SIZE_64:
+	  cost = COSTS_N_INSNS (n - 1) + rs6000_cost->divdi;
+	  break;
+	default:
+	  gcc_unreachable ();
+	}
+      break;
+
+    case TYPE_FP:
+      cost = n * rs6000_cost->fp;
+      break;
+    case TYPE_DMUL:
+      cost = n * rs6000_cost->dmul;
+      break;
+    case TYPE_SDIV:
+      cost = n * rs6000_cost->sdiv;
+      break;
+    case TYPE_DDIV:
+      cost = n * rs6000_cost->ddiv;
+      break;
+
+    case TYPE_SYNC:
+    case TYPE_LOAD_L:
+      cost = COSTS_N_INSNS (n + 2);
+      break;
+
+    default:
+      cost = COSTS_N_INSNS (n);
+    }
+
+  return cost;
+}
+
 /* Debug form of ADDRESS_COST that is selected if -mdebug=cost.  */
 
 static int
@@ -39433,6 +39503,16 @@ rs6000_constant_alignment (const_tree exp, HOST_WIDE_INT align)
     return MAX (align, BITS_PER_WORD);
   return align;
 }
+
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+rs6000_starting_frame_offset (void)
+{
+  if (FRAME_GROWS_DOWNWARD)
+    return 0;
+  return RS6000_STARTING_FRAME_OFFSET;
+}
 
 struct gcc_target targetm = TARGET_INITIALIZER;
 
diff --git a/gcc/config/rs6000/rs6000.h b/gcc/config/rs6000/rs6000.h
index 1dfc3bf49ac..5c7c817b152 100644
--- a/gcc/config/rs6000/rs6000.h
+++ b/gcc/config/rs6000/rs6000.h
@@ -565,8 +565,6 @@ extern int rs6000_vector_align[];
 #define TARGET_ALTIVEC_ABI rs6000_altivec_abi
 #define TARGET_LDBRX (TARGET_POPCNTD || rs6000_cpu == PROCESSOR_CELL)
 
-#define TARGET_ISEL64 (TARGET_ISEL && TARGET_POWERPC64)
-
 /* ISA 2.01 allowed FCFID to be done in 32-bit, previously it was 64-bit only.
    Enable 32-bit fcfid's on any of the switches for newer ISA machines or
    XILINX.  */
@@ -1552,16 +1550,13 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    sizes of the fixed area and the parameter area must be a multiple of
    STACK_BOUNDARY.  */
 
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD							\
-   ? 0									\
-   : (cfun->calls_alloca						\
-      ? (RS6000_ALIGN (MACRO_INT (crtl->outgoing_args_size)		\
-		       + RS6000_SAVE_AREA,				\
-		       (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8 ))	\
-      : (RS6000_ALIGN (MACRO_INT (crtl->outgoing_args_size),		\
-		       (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
-	 + RS6000_SAVE_AREA)))
+#define RS6000_STARTING_FRAME_OFFSET					\
+  (cfun->calls_alloca							\
+   ? (RS6000_ALIGN (crtl->outgoing_args_size + RS6000_SAVE_AREA,	\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8 ))		\
+   : (RS6000_ALIGN (crtl->outgoing_args_size,				\
+		    (TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)		\
+      + RS6000_SAVE_AREA))
 
 /* Offset from the stack pointer register to an item dynamically
    allocated on the stack, e.g., by `alloca'.
@@ -1573,7 +1568,7 @@ extern enum reg_class rs6000_constraints[RS6000_CONSTRAINT_MAX];
    This value must be a multiple of STACK_BOUNDARY (hard coded in
    `emit-rtl.c').  */
 #define STACK_DYNAMIC_OFFSET(FUNDECL)					\
-  RS6000_ALIGN (MACRO_INT (crtl->outgoing_args_size)			\
+  RS6000_ALIGN (crtl->outgoing_args_size.to_constant ()			\
 		+ STACK_POINTER_OFFSET,					\
 		(TARGET_ALTIVEC || TARGET_VSX) ? 16 : 8)
 
diff --git a/gcc/config/rs6000/rs6000.md b/gcc/config/rs6000/rs6000.md
index 97a75a2291c..aad382ced33 100644
--- a/gcc/config/rs6000/rs6000.md
+++ b/gcc/config/rs6000/rs6000.md
@@ -193,6 +193,10 @@
 ;; This is used for insert, mul and others as necessary.
 (define_attr "size" "8,16,32,64,128" (const_string "32"))
 
+;; What is the insn_cost for this insn?  The target hook can still override
+;; this.  For optimizing for size the "length" attribute is used instead.
+(define_attr "cost" "" (const_int 0))
+
 ;; Is this instruction record form ("dot", signed compare to 0, writing CR0)?
 ;; This is used for add, logical, shift, exts, mul.
 (define_attr "dot" "no,yes" (const_string "no"))
@@ -574,9 +578,6 @@
 ; DImode bits
 (define_mode_attr dbits [(QI "56") (HI "48") (SI "32")])
 
-;; ISEL/ISEL64 target selection
-(define_mode_attr sel [(SI "") (DI "64")])
-
 ;; Bitmask for shift instructions
 (define_mode_attr hH [(SI "h") (DI "H")])
 
@@ -4911,7 +4912,7 @@
 	 (if_then_else:GPR (match_operand 1 "comparison_operator" "")
 			   (match_operand:GPR 2 "gpc_reg_operand" "")
 			   (match_operand:GPR 3 "gpc_reg_operand" "")))]
-  "TARGET_ISEL<sel>"
+  "TARGET_ISEL"
   "
 {
   if (rs6000_emit_cmove (operands[0], operands[1], operands[2], operands[3]))
@@ -4934,13 +4935,11 @@
 	 (match_operator 1 "scc_comparison_operator"
 			 [(match_operand:CC 4 "cc_reg_operand" "y,y")
 			  (const_int 0)])
-	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 2 "reg_or_zero_operand" "O,b")
 	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
-  "TARGET_ISEL<sel>"
-  "*
-{ return output_isel (operands); }"
-  [(set_attr "type" "isel")
-   (set_attr "length" "4")])
+  "TARGET_ISEL"
+  "isel %0,%2,%3,%j1"
+  [(set_attr "type" "isel")])
 
 (define_insn "isel_unsigned_<mode>"
   [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
@@ -4948,45 +4947,45 @@
 	 (match_operator 1 "scc_comparison_operator"
 			 [(match_operand:CCUNS 4 "cc_reg_operand" "y,y")
 			  (const_int 0)])
-	 (match_operand:GPR 2 "reg_or_cint_operand" "O,b")
+	 (match_operand:GPR 2 "reg_or_zero_operand" "O,b")
 	 (match_operand:GPR 3 "gpc_reg_operand" "r,r")))]
-  "TARGET_ISEL<sel>"
-  "*
-{ return output_isel (operands); }"
-  [(set_attr "type" "isel")
-   (set_attr "length" "4")])
+  "TARGET_ISEL"
+  "isel %0,%2,%3,%j1"
+  [(set_attr "type" "isel")])
 
 ;; These patterns can be useful for combine; they let combine know that
 ;; isel can handle reversed comparisons so long as the operands are
 ;; registers.
 
 (define_insn "*isel_reversed_signed_<mode>"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
 	(if_then_else:GPR
 	 (match_operator 1 "scc_rev_comparison_operator"
-			 [(match_operand:CC 4 "cc_reg_operand" "y")
+			 [(match_operand:CC 4 "cc_reg_operand" "y,y")
 			  (const_int 0)])
-	 (match_operand:GPR 2 "gpc_reg_operand" "b")
-	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
-  "TARGET_ISEL<sel>"
-  "*
-{ return output_isel (operands); }"
-  [(set_attr "type" "isel")
-   (set_attr "length" "4")])
+	 (match_operand:GPR 2 "gpc_reg_operand" "r,r")
+	 (match_operand:GPR 3 "reg_or_zero_operand" "O,b")))]
+  "TARGET_ISEL"
+{
+  PUT_CODE (operands[1], reverse_condition (GET_CODE (operands[1])));
+  return "isel %0,%3,%2,%j1";
+}
+  [(set_attr "type" "isel")])
 
 (define_insn "*isel_reversed_unsigned_<mode>"
-  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r")
+  [(set (match_operand:GPR 0 "gpc_reg_operand" "=r,r")
 	(if_then_else:GPR
 	 (match_operator 1 "scc_rev_comparison_operator"
-			 [(match_operand:CCUNS 4 "cc_reg_operand" "y")
+			 [(match_operand:CCUNS 4 "cc_reg_operand" "y,y")
 			  (const_int 0)])
-	 (match_operand:GPR 2 "gpc_reg_operand" "b")
-	 (match_operand:GPR 3 "gpc_reg_operand" "b")))]
-  "TARGET_ISEL<sel>"
-  "*
-{ return output_isel (operands); }"
-  [(set_attr "type" "isel")
-   (set_attr "length" "4")])
+	 (match_operand:GPR 2 "gpc_reg_operand" "r,r")
+	 (match_operand:GPR 3 "reg_or_zero_operand" "O,b")))]
+  "TARGET_ISEL"
+{
+  PUT_CODE (operands[1], reverse_condition (GET_CODE (operands[1])));
+  return "isel %0,%3,%2,%j1";
+}
+  [(set_attr "type" "isel")])
 
 ;; Floating point conditional move
 (define_expand "mov<mode>cc"
diff --git a/gcc/config/rx/rx.c b/gcc/config/rx/rx.c
index ff586bbb827..86a47082283 100644
--- a/gcc/config/rx/rx.c
+++ b/gcc/config/rx/rx.c
@@ -22,7 +22,7 @@
 
  * Re-enable memory-to-memory copies and fix up reload.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/rx/rx.h b/gcc/config/rx/rx.h
index 4bc43c2ef5c..850033c1ee7 100644
--- a/gcc/config/rx/rx.h
+++ b/gcc/config/rx/rx.h
@@ -169,7 +169,6 @@
 #define HAS_LONG_UNCOND_BRANCH		0
 
 #define MOVE_MAX 			4
-#define STARTING_FRAME_OFFSET		0
 
 #define HAVE_PRE_DECREMENT		1
 #define HAVE_POST_INCREMENT		1
diff --git a/gcc/config/s390/driver-native.c b/gcc/config/s390/driver-native.c
index b5c89dff36e..40b59238042 100644
--- a/gcc/config/s390/driver-native.c
+++ b/gcc/config/s390/driver-native.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/s390/s390-c.c b/gcc/config/s390/s390-c.c
index 4fb7653f2b6..0a2733c8d62 100644
--- a/gcc/config/s390/s390-c.c
+++ b/gcc/config/s390/s390-c.c
@@ -27,7 +27,7 @@
    included anymore - a mechanism supposed to avoid adding frontend -
    backend dependencies.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/s390/s390.c b/gcc/config/s390/s390.c
index 2f4063b0ad3..2258148c573 100644
--- a/gcc/config/s390/s390.c
+++ b/gcc/config/s390/s390.c
@@ -20,7 +20,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/s390/s390.h b/gcc/config/s390/s390.h
index 52eee948bdf..15d33906023 100644
--- a/gcc/config/s390/s390.h
+++ b/gcc/config/s390/s390.h
@@ -590,9 +590,6 @@ extern const enum reg_class regclass_map[FIRST_PSEUDO_REGISTER];
 /* Offset from stack-pointer to first location of outgoing args.  */
 #define STACK_POINTER_OFFSET (TARGET_64BIT ? 160 : 96)
 
-/* Offset within stack frame to start allocating local variables at.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset from the stack pointer register to an item dynamically
    allocated on the stack, e.g., by `alloca'.  */
 #define STACK_DYNAMIC_OFFSET(FUNDECL) \
diff --git a/gcc/config/sh/sh-c.c b/gcc/config/sh/sh-c.c
index 6902f1d4510..ca74f0fb78d 100644
--- a/gcc/config/sh/sh-c.c
+++ b/gcc/config/sh/sh-c.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/sh/sh-mem.cc b/gcc/config/sh/sh-mem.cc
index 8fce9799921..1a7650d25ae 100644
--- a/gcc/config/sh/sh-mem.cc
+++ b/gcc/config/sh/sh-mem.cc
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/sh/sh.c b/gcc/config/sh/sh.c
index 693a5a3457c..6e5a5e49b4d 100644
--- a/gcc/config/sh/sh.c
+++ b/gcc/config/sh/sh.c
@@ -21,7 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 
 #include <sstream>
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #define INCLUDE_VECTOR
diff --git a/gcc/config/sh/sh.h b/gcc/config/sh/sh.h
index fd156d2fa82..1c345d659e2 100644
--- a/gcc/config/sh/sh.h
+++ b/gcc/config/sh/sh.h
@@ -469,7 +469,8 @@ extern const sh_atomic_model& selected_atomic_model (void);
   ((GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_INT \
     || GET_MODE_CLASS (TYPE_MODE (TYPE)) == MODE_COMPLEX_FLOAT) \
    ? (unsigned) MIN (BIGGEST_ALIGNMENT, \
-		     GET_MODE_BITSIZE (MACRO_MODE (TYPE_MODE (TYPE)))) \
+		     GET_MODE_BITSIZE (as_a <fixed_size_mode> \
+				       (TYPE_MODE (TYPE)))) \
    : (unsigned) DATA_ALIGNMENT(TYPE, ALIGN))
 
 /* Make arrays of chars word-aligned for the same reasons.  */
@@ -1110,10 +1111,6 @@ extern enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER];
     are at negative offsets from the frame pointer.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset from the frame pointer to the first local variable slot to
-   be allocated.  */
-#define STARTING_FRAME_OFFSET  0
-
 /* If we generate an insn to push BYTES bytes,
    this says how many the stack pointer really advances by.  */
 /* Don't define PUSH_ROUNDING, since the hardware doesn't do this.
diff --git a/gcc/config/sh/sh_optimize_sett_clrt.cc b/gcc/config/sh/sh_optimize_sett_clrt.cc
index 449261e86de..bfb9aba1f7b 100644
--- a/gcc/config/sh/sh_optimize_sett_clrt.cc
+++ b/gcc/config/sh/sh_optimize_sett_clrt.cc
@@ -17,6 +17,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #include "system.h"
 #include "coretypes.h"
diff --git a/gcc/config/sh/sh_treg_combine.cc b/gcc/config/sh/sh_treg_combine.cc
index cb3a7a85d17..7a7b02d4a1d 100644
--- a/gcc/config/sh/sh_treg_combine.cc
+++ b/gcc/config/sh/sh_treg_combine.cc
@@ -18,6 +18,8 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
+#define IN_TARGET_CODE 1
+
 #include "config.h"
 #define INCLUDE_ALGORITHM
 #define INCLUDE_LIST
diff --git a/gcc/config/sparc/driver-sparc.c b/gcc/config/sparc/driver-sparc.c
index b87b182e01f..9b811a871fa 100644
--- a/gcc/config/sparc/driver-sparc.c
+++ b/gcc/config/sparc/driver-sparc.c
@@ -17,7 +17,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/sparc/sparc-c.c b/gcc/config/sparc/sparc-c.c
index cfeef6f26fb..f8124bf96ef 100644
--- a/gcc/config/sparc/sparc-c.c
+++ b/gcc/config/sparc/sparc-c.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/sparc/sparc.c b/gcc/config/sparc/sparc.c
index 880c6d9627b..4bcd0610158 100644
--- a/gcc/config/sparc/sparc.c
+++ b/gcc/config/sparc/sparc.c
@@ -20,7 +20,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -5278,8 +5278,9 @@ sparc_compute_frame_size (HOST_WIDE_INT size, int leaf_function)
     frame_size = apparent_frame_size = 0;
   else
     {
-      /* We subtract STARTING_FRAME_OFFSET, remember it's negative.  */
-      apparent_frame_size = ROUND_UP (size - STARTING_FRAME_OFFSET, 8);
+      /* We subtract TARGET_STARTING_FRAME_OFFSET, remember it's negative.  */
+      apparent_frame_size
+	= ROUND_UP (size - targetm.starting_frame_offset (), 8);
       apparent_frame_size += n_global_fp_regs * 4;
 
       /* We need to add the size of the outgoing argument area.  */
diff --git a/gcc/config/sparc/sparc.h b/gcc/config/sparc/sparc.h
index 9ac741f569b..a0b56126fd5 100644
--- a/gcc/config/sparc/sparc.h
+++ b/gcc/config/sparc/sparc.h
@@ -1043,12 +1043,6 @@ extern char leaf_reg_remap[];
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset of first parameter from the argument pointer register value.
    !v9: This is 64 for the ins and locals, plus 4 for the struct-return reg
    even if this function isn't going to use it.
diff --git a/gcc/config/spu/spu-c.c b/gcc/config/spu/spu-c.c
index 98533da123d..4f78bb744a5 100644
--- a/gcc/config/spu/spu-c.c
+++ b/gcc/config/spu/spu-c.c
@@ -14,7 +14,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/spu/spu.c b/gcc/config/spu/spu.c
index edec3eaea23..e792650184b 100644
--- a/gcc/config/spu/spu.c
+++ b/gcc/config/spu/spu.c
@@ -14,7 +14,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/spu/spu.h b/gcc/config/spu/spu.h
index de4aa99f276..cf99b88b6ac 100644
--- a/gcc/config/spu/spu.h
+++ b/gcc/config/spu/spu.h
@@ -222,8 +222,6 @@ targetm.resolve_overloaded_builtin = spu_resolve_overloaded_builtin;	\
 
 #define FRAME_GROWS_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET (0)
-
 #define STACK_POINTER_OFFSET 32
 
 #define FIRST_PARM_OFFSET(FNDECL) (0)
diff --git a/gcc/config/stormy16/stormy16-protos.h b/gcc/config/stormy16/stormy16-protos.h
index 38be8d2adfd..c4ce1145c21 100644
--- a/gcc/config/stormy16/stormy16-protos.h
+++ b/gcc/config/stormy16/stormy16-protos.h
@@ -28,6 +28,7 @@ extern int direct_return (void);
 extern int xstormy16_interrupt_function_p (void);
 extern int xstormy16_epilogue_uses (int);
 extern void xstormy16_function_profiler (void);
+extern poly_int64 xstormy16_push_rounding (poly_int64);
 
 #if defined (TREE_CODE)
 extern void xstormy16_asm_output_aligned_common (FILE *, tree, const char *,
diff --git a/gcc/config/stormy16/stormy16.c b/gcc/config/stormy16/stormy16.c
index fa403db95ba..805064a23c2 100644
--- a/gcc/config/stormy16/stormy16.c
+++ b/gcc/config/stormy16/stormy16.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -2635,6 +2635,14 @@ xstormy16_modes_tieable_p (machine_mode mode1, machine_mode mode2)
 {
   return mode1 != BImode && mode2 != BImode;
 }
+
+/* Implement PUSH_ROUNDING.  */
+
+poly_int64
+xstormy16_push_rounding (poly_int64 bytes)
+{
+  return (bytes + 1) & ~1;
+}
 
 #undef  TARGET_ASM_ALIGNED_HI_OP
 #define TARGET_ASM_ALIGNED_HI_OP "\t.hword\t"
diff --git a/gcc/config/stormy16/stormy16.h b/gcc/config/stormy16/stormy16.h
index 31d6156cc26..094a2f08e43 100644
--- a/gcc/config/stormy16/stormy16.h
+++ b/gcc/config/stormy16/stormy16.h
@@ -216,8 +216,6 @@ enum reg_class
 
 #define ARGS_GROW_DOWNWARD 1
 
-#define STARTING_FRAME_OFFSET 0
-
 #define FIRST_PARM_OFFSET(FUNDECL) 0
 
 #define RETURN_ADDR_RTX(COUNT, FRAMEADDR)	\
@@ -257,7 +255,7 @@ enum reg_class
 
 /* Passing Function Arguments on the Stack.  */
 
-#define PUSH_ROUNDING(BYTES) (((BYTES) + 1) & ~1)
+#define PUSH_ROUNDING(BYTES) xstormy16_push_rounding (BYTES)
 
 
 /* Function Arguments in Registers.  */
diff --git a/gcc/config/tilegx/mul-tables.c b/gcc/config/tilegx/mul-tables.c
index da971beb124..877df275e33 100644
--- a/gcc/config/tilegx/mul-tables.c
+++ b/gcc/config/tilegx/mul-tables.c
@@ -21,7 +21,7 @@
 /* Note this file is auto-generated from gen-mul-tables.cc.
    Make any required changes there.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilegx/tilegx-c.c b/gcc/config/tilegx/tilegx-c.c
index bb5fb7b937e..ba63fdef8c6 100644
--- a/gcc/config/tilegx/tilegx-c.c
+++ b/gcc/config/tilegx/tilegx-c.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilegx/tilegx.c b/gcc/config/tilegx/tilegx.c
index f7df451f9ae..140cce14ec8 100644
--- a/gcc/config/tilegx/tilegx.c
+++ b/gcc/config/tilegx/tilegx.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilegx/tilegx.h b/gcc/config/tilegx/tilegx.h
index 066c34d2155..ea3c1b308c7 100644
--- a/gcc/config/tilegx/tilegx.h
+++ b/gcc/config/tilegx/tilegx.h
@@ -242,7 +242,6 @@ enum reg_class
 
 #define STACK_GROWS_DOWNWARD 1
 #define FRAME_GROWS_DOWNWARD 1
-#define STARTING_FRAME_OFFSET 0
 
 #define DYNAMIC_CHAIN_ADDRESS(FRAME) \
   plus_constant (Pmode, (FRAME), UNITS_PER_WORD)
diff --git a/gcc/config/tilepro/mul-tables.c b/gcc/config/tilepro/mul-tables.c
index 88f98498edd..32e6ec0a97c 100644
--- a/gcc/config/tilepro/mul-tables.c
+++ b/gcc/config/tilepro/mul-tables.c
@@ -21,7 +21,7 @@
 /* Note this file is auto-generated from gen-mul-tables.cc.
    Make any required changes there.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilepro/tilepro-c.c b/gcc/config/tilepro/tilepro-c.c
index 2935f9dca47..13e8056628f 100644
--- a/gcc/config/tilepro/tilepro-c.c
+++ b/gcc/config/tilepro/tilepro-c.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilepro/tilepro.c b/gcc/config/tilepro/tilepro.c
index 310f26f1c2d..ae71f752690 100644
--- a/gcc/config/tilepro/tilepro.c
+++ b/gcc/config/tilepro/tilepro.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/tilepro/tilepro.h b/gcc/config/tilepro/tilepro.h
index 5ab8be49be0..3aa6dc8cd67 100644
--- a/gcc/config/tilepro/tilepro.h
+++ b/gcc/config/tilepro/tilepro.h
@@ -205,7 +205,6 @@ enum reg_class
 
 #define STACK_GROWS_DOWNWARD 1
 #define FRAME_GROWS_DOWNWARD 1
-#define STARTING_FRAME_OFFSET 0
 
 #define DYNAMIC_CHAIN_ADDRESS(FRAME) \
   plus_constant (Pmode, (FRAME), UNITS_PER_WORD)
diff --git a/gcc/config/v850/v850-c.c b/gcc/config/v850/v850-c.c
index 2a96216b4ad..145eef510ef 100644
--- a/gcc/config/v850/v850-c.c
+++ b/gcc/config/v850/v850-c.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/v850/v850.c b/gcc/config/v850/v850.c
index e7898bc37ec..dbcb5a4e03b 100644
--- a/gcc/config/v850/v850.c
+++ b/gcc/config/v850/v850.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/v850/v850.h b/gcc/config/v850/v850.h
index da096f708b4..5eb2e8828fa 100644
--- a/gcc/config/v850/v850.h
+++ b/gcc/config/v850/v850.h
@@ -398,13 +398,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-
-#define STARTING_FRAME_OFFSET 0
-
 /* Offset of first parameter from the argument pointer register value.  */
 /* Is equal to the size of the saved fp + pc, even if an fp isn't
    saved since the value is used before we know.  */
diff --git a/gcc/config/vax/elf.h b/gcc/config/vax/elf.h
index 1ff1814cc1a..e699c12943c 100644
--- a/gcc/config/vax/elf.h
+++ b/gcc/config/vax/elf.h
@@ -66,10 +66,6 @@ along with GCC; see the file COPYING3.  If not see
 			      16))
 
 
-/* Reserve the top of the stack for exception handler stackadj value.  */
-#undef STARTING_FRAME_OFFSET
-#define STARTING_FRAME_OFFSET -4
-
 /* The VAX wants no space between the case instruction and the jump table.  */
 #undef  ASM_OUTPUT_BEFORE_CASE_LABEL
 #define ASM_OUTPUT_BEFORE_CASE_LABEL(FILE, PREFIX, NUM, TABLE)
diff --git a/gcc/config/vax/vax.c b/gcc/config/vax/vax.c
index 2af3edd0746..59b3f28fa10 100644
--- a/gcc/config/vax/vax.c
+++ b/gcc/config/vax/vax.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -64,6 +64,7 @@ static void vax_asm_trampoline_template (FILE *);
 static void vax_trampoline_init (rtx, tree, rtx);
 static poly_int64 vax_return_pops_args (tree, tree, poly_int64);
 static bool vax_mode_dependent_address_p (const_rtx, addr_space_t);
+static HOST_WIDE_INT vax_starting_frame_offset (void);
 
 /* Initialize the GCC target structure.  */
 #undef TARGET_ASM_ALIGNED_HI_OP
@@ -122,6 +123,9 @@ static bool vax_mode_dependent_address_p (const_rtx, addr_space_t);
 #undef TARGET_OPTION_OVERRIDE
 #define TARGET_OPTION_OVERRIDE vax_option_override
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET vax_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 /* Set global variables as needed for the options enabled.  */
@@ -210,7 +214,7 @@ vax_expand_prologue (void)
 
   /* Allocate the local stack frame.  */
   size = get_frame_size ();
-  size -= STARTING_FRAME_OFFSET;
+  size -= vax_starting_frame_offset ();
   emit_insn (gen_addsi3 (stack_pointer_rtx,
 			 stack_pointer_rtx, GEN_INT (-size)));
 
@@ -2181,3 +2185,12 @@ vax_function_arg_advance (cumulative_args_t cum_v, machine_mode mode,
 	   ? (GET_MODE_SIZE (mode) + 3) & ~3
 	   : (int_size_in_bytes (type) + 3) & ~3);
 }
+
+static HOST_WIDE_INT
+vax_starting_frame_offset (void)
+{
+  /* On ELF targets, reserve the top of the stack for exception handler
+     stackadj value.  */
+  return TARGET_ELF ? -4 : 0;
+}
+
diff --git a/gcc/config/vax/vax.h b/gcc/config/vax/vax.h
index 04a865cfa64..990140df869 100644
--- a/gcc/config/vax/vax.h
+++ b/gcc/config/vax/vax.h
@@ -228,12 +228,6 @@ enum reg_class { NO_REGS, ALL_REGS, LIM_REG_CLASSES };
    goes at a more negative offset in the frame.  */
 #define FRAME_GROWS_DOWNWARD 1
 
-/* Offset within stack frame to start allocating local variables at.
-   If FRAME_GROWS_DOWNWARD, this is the offset to the END of the
-   first local allocated.  Otherwise, it is the offset to the BEGINNING
-   of the first local allocated.  */
-#define STARTING_FRAME_OFFSET 0
-
 /* Given an rtx for the address of a frame,
    return an rtx for the address of the word in the frame
    that holds the dynamic chain--the previous frame's address.  */
diff --git a/gcc/config/visium/visium.c b/gcc/config/visium/visium.c
index 2f9d1783f82..e028dc479d3 100644
--- a/gcc/config/visium/visium.c
+++ b/gcc/config/visium/visium.c
@@ -18,7 +18,7 @@
    along with GCC; see the file COPYING3.  If not see
    <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/visium/visium.h b/gcc/config/visium/visium.h
index 3cc0bc41280..3b229f1a1e6 100644
--- a/gcc/config/visium/visium.h
+++ b/gcc/config/visium/visium.h
@@ -727,17 +727,6 @@ enum reg_class
    pointer to a smaller address.  */
 #define STACK_GROWS_DOWNWARD 1
 
-/* `STARTING_FRAME_OFFSET'
-
-   Offset from the frame pointer to the first local variable slot to
-   be allocated.
-
-   If `FRAME_GROWS_DOWNWARD', find the next slot's offset by
-   subtracting the first slot's length from `STARTING_FRAME_OFFSET'.
-   Otherwise, it is found by adding the length of the first slot to
-   the value `STARTING_FRAME_OFFSET'. */
-#define STARTING_FRAME_OFFSET 0
-
 /* `FIRST_PARM_OFFSET (FUNDECL)'
 
    Offset from the argument pointer register to the first argument's
diff --git a/gcc/config/vms/vms-c.c b/gcc/config/vms/vms-c.c
index 83c4c00a8a3..f0c9fdc95e8 100644
--- a/gcc/config/vms/vms-c.c
+++ b/gcc/config/vms/vms-c.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -420,7 +420,7 @@ vms_c_register_includes (const char *sysroot,
   if (!stdinc)
     return;
 
-  for (dir = get_added_cpp_dirs (SYSTEM); dir != NULL; dir = dir->next)
+  for (dir = get_added_cpp_dirs (INC_SYSTEM); dir != NULL; dir = dir->next)
     {
       const char * const *lib;
       for (lib = vms_std_modules; *lib != NULL; lib++)
@@ -443,7 +443,7 @@ vms_c_register_includes (const char *sysroot,
               p->sysp = 1;
               p->construct = vms_construct_include_filename;
               p->user_supplied_p = 0;
-              add_cpp_dir_path (p, SYSTEM);
+              add_cpp_dir_path (p, INC_SYSTEM);
             }
           else
             free (path);
diff --git a/gcc/config/vms/vms-f.c b/gcc/config/vms/vms-f.c
index bb6757137c3..bde19480f3b 100644
--- a/gcc/config/vms/vms-f.c
+++ b/gcc/config/vms/vms-f.c
@@ -17,7 +17,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/vms/vms.c b/gcc/config/vms/vms.c
index aaeccfffdc8..f17097cb478 100644
--- a/gcc/config/vms/vms.c
+++ b/gcc/config/vms/vms.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
diff --git a/gcc/config/xtensa/xtensa.c b/gcc/config/xtensa/xtensa.c
index bf7aa4ca98c..70b3d8447a8 100644
--- a/gcc/config/xtensa/xtensa.c
+++ b/gcc/config/xtensa/xtensa.c
@@ -18,7 +18,7 @@ You should have received a copy of the GNU General Public License
 along with GCC; see the file COPYING3.  If not see
 <http://www.gnu.org/licenses/>.  */
 
-#define TARGET_C_FILE 1
+#define IN_TARGET_CODE 1
 
 #include "config.h"
 #include "system.h"
@@ -184,6 +184,7 @@ static unsigned int xtensa_hard_regno_nregs (unsigned int, machine_mode);
 static bool xtensa_hard_regno_mode_ok (unsigned int, machine_mode);
 static bool xtensa_modes_tieable_p (machine_mode, machine_mode);
 static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT);
+static HOST_WIDE_INT xtensa_starting_frame_offset (void);
 
 
 
@@ -323,6 +324,9 @@ static HOST_WIDE_INT xtensa_constant_alignment (const_tree, HOST_WIDE_INT);
 #undef TARGET_CONSTANT_ALIGNMENT
 #define TARGET_CONSTANT_ALIGNMENT xtensa_constant_alignment
 
+#undef TARGET_STARTING_FRAME_OFFSET
+#define TARGET_STARTING_FRAME_OFFSET xtensa_starting_frame_offset
+
 struct gcc_target targetm = TARGET_INITIALIZER;
 
 
@@ -4401,4 +4405,14 @@ xtensa_constant_alignment (const_tree exp, HOST_WIDE_INT align)
   return align;
 }
 
+/* Implement TARGET_STARTING_FRAME_OFFSET.  */
+
+static HOST_WIDE_INT
+xtensa_starting_frame_offset (void)
+{
+  if (FRAME_GROWS_DOWNWARD)
+    return 0;
+  return crtl->outgoing_args_size;
+}
+
 #include "gt-xtensa.h"
diff --git a/gcc/config/xtensa/xtensa.h b/gcc/config/xtensa/xtensa.h
index f9216391e25..b4cf53708b3 100644
--- a/gcc/config/xtensa/xtensa.h
+++ b/gcc/config/xtensa/xtensa.h
@@ -432,10 +432,6 @@ enum reg_class
 
 #define FRAME_GROWS_DOWNWARD flag_stack_protect
 
-/* Offset within stack frame to start allocating local variables at.  */
-#define STARTING_FRAME_OFFSET						\
-  (FRAME_GROWS_DOWNWARD ? 0 : crtl->outgoing_args_size)
-
 /* The ARG_POINTER and FRAME_POINTER are not real Xtensa registers, so
    they are eliminated to either the stack pointer or hard frame pointer.  */
 #define ELIMINABLE_REGS							\
diff --git a/gcc/coretypes.h b/gcc/coretypes.h
index 2c74796102a..f4c76ff2aab 100644
--- a/gcc/coretypes.h
+++ b/gcc/coretypes.h
@@ -404,7 +404,7 @@ typedef unsigned char uchar;
    conversion operator going from the former to the latter.  We also
    allow this for gencondmd.c for all targets, so that we can treat
    machine_modes as enums without causing build failures.  */
-#if (defined (TARGET_C_FILE) \
+#if (defined (IN_TARGET_CODE) \
      && (defined (USE_ENUM_MODES) || NUM_POLY_INT_COEFFS == 1))
 #define POLY_INT_CONVERSION 1
 #else
diff --git a/gcc/cp/ChangeLog b/gcc/cp/ChangeLog
index 0d69bda285d..3abf79440cc 100644
--- a/gcc/cp/ChangeLog
+++ b/gcc/cp/ChangeLog
@@ -1,3 +1,89 @@
+2017-10-12  Nathan Sidwell  <nathan@acm.org>
+
+	* cp-tree.h (cp_expr): Add const operator * and operator->
+	accessors.
+	(cp_tree_node_structure_enum): Delete TS_CP_BINDING,
+	TS_CP_WRAPPER, LAST_TS_CP_ENUM.
+
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* parser.c (get_required_cpp_ttype): New function.
+	(cp_parser_error_1): Call it, using the result to call
+	maybe_suggest_missing_token_insertion.
+
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* parser.c (get_matching_symbol): Move to before...
+	(cp_parser_error): Split out into...
+	(cp_parser_error_1): ...this new function, merging in content
+	from...
+	(cp_parser_required_error): ...here.  Eliminate partial duplicate
+	of body of cp_parser_error in favor of a call to the new
+	cp_parser_error_1 helper function.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* decl2.c (struct mangled_decl_hash): Use DECL_ASSEMBLER_NAME_RAW.
+	(record_mangling): Likewise.
+
+2017-10-10  Nathan Sidwell  <nathan@acm.org>
+
+	* name-lookup.c (extern_c_fns): Rename to ...
+	(extern_c_decls): ... here.
+	(check_extern_c_conflict, extern_c_linkage_bindings): Update.
+	(do_pushdecl): Check extern-c fns and vars.
+
+	* cp-tree.h (default_hash_traits <lang_identifier *>): Delete
+	specialization.
+
+	* decl2.c (struct mangled_decl_hash): New hash traits.
+	(mangled_decls): Make hash_table<mangled_decl_hash>.
+	(generate_mangling_alias, record_mangling): Adjust.
+
+2017-10-10  Jason Merrill  <jason@redhat.com>
+
+	More delayed lambda capture fixes.
+	* call.c (add_function_candidate): Use build_address.
+	(build_op_call_1): Call mark_lvalue_use early.
+	(build_over_call): Handle error from build_this.
+	* constexpr.c (cxx_bind_parameters_in_call): Use build_address.
+	(cxx_eval_increment_expression): Don't use rvalue().
+	* cvt.c (convert_to_void): Use mark_discarded_use.
+	* expr.c (mark_use): Handle PARM_DECL, NON_DEPENDENT_EXPR.  Fix
+	reference handling.  Don't copy the expression.
+	(mark_discarded_use): New.
+	* lambda.c (insert_capture_proxy): Add some sanity checking.
+	(maybe_add_lambda_conv_op): Set cp_unevaluated_operand.
+	* pt.c (register_local_specialization): Add sanity check.
+	* semantics.c (process_outer_var_ref): Fix check for existing proxy.
+	* typeck.c (cp_build_addr_expr_1): Handle error from
+	mark_lvalue_use.
+	(cp_build_modify_expr): Call mark_lvalue_use_nonread, handle error
+	from rvalue.
+
+	Handle generic lambda capture in dependent expressions.
+	* lambda.c (need_generic_capture, dependent_capture_r)
+	(do_dependent_capture): New.
+	* pt.c (processing_nonlambda_template): Use need_generic_capture.
+	* semantics.c (maybe_cleanup_point_expr)
+	(maybe_cleanup_point_expr_void, finish_goto_stmt)
+	(maybe_convert_cond): Call do_dependent_capture.
+	* typeck.c (build_static_cast): Remove dependent capture handling.
+
+	* typeck.c (condition_conversion): Assert !processing_template_decl.
+	* semantics.c (finish_omp_clauses): Don't
+	fold_build_cleanup_point_expr if processing_template_decl.
+	(outer_var_p): A temporary can't be from an outer scope.
+	* pt.c (type_dependent_expression_p): Fix dependency checking of
+	functions without DECL_TEMPLATE_INFO.
+	(instantiate_decl): Use lss_copy.
+	* constexpr.c (is_valid_constexpr_fn): Fix lambdas before C++17.
+
+	* typeck.c (check_return_expr): Check non-dependent conversion in
+	templates.
+	* constraint.cc (check_function_concept): Don't complain about an
+	empty concept if seen_error.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* cvt.c (ignore_overflows): Use wi::to_wide when
diff --git a/gcc/cp/call.c b/gcc/cp/call.c
index df156e3b0b4..8794210be0a 100644
--- a/gcc/cp/call.c
+++ b/gcc/cp/call.c
@@ -2160,7 +2160,10 @@ add_function_candidate (struct z_candidate **candidates,
 	      else
 		{
 		  parmtype = build_pointer_type (parmtype);
-		  arg = build_this (arg);
+		  /* We don't use build_this here because we don't want to
+		     capture the object argument until we've chosen a
+		     non-static member function.  */
+		  arg = build_address (arg);
 		  argtype = lvalue_type (arg);
 		}
 	    }
@@ -4446,14 +4449,17 @@ build_op_call_1 (tree obj, vec<tree, va_gc> **args, tsubst_flags_t complain)
 {
   struct z_candidate *candidates = 0, *cand;
   tree fns, convs, first_mem_arg = NULL_TREE;
-  tree type = TREE_TYPE (obj);
   bool any_viable_p;
   tree result = NULL_TREE;
   void *p;
 
+  obj = mark_lvalue_use (obj);
+
   if (error_operand_p (obj))
     return error_mark_node;
 
+  tree type = TREE_TYPE (obj);
+
   obj = prep_operand (obj);
 
   if (TYPE_PTRMEMFUNC_P (type))
@@ -7772,6 +7778,9 @@ build_over_call (struct z_candidate *cand, int flags, tsubst_flags_t complain)
       tree converted_arg;
       tree base_binfo;
 
+      if (arg == error_mark_node)
+	return error_mark_node;
+
       if (convs[i]->bad_p)
 	{
 	  if (complain & tf_error)
diff --git a/gcc/cp/constexpr.c b/gcc/cp/constexpr.c
index 77c18716bc9..1aa529eb8dc 100644
--- a/gcc/cp/constexpr.c
+++ b/gcc/cp/constexpr.c
@@ -196,7 +196,14 @@ is_valid_constexpr_fn (tree fun, bool complain)
 	  }
     }
 
-  if (!DECL_CONSTRUCTOR_P (fun))
+  if (LAMBDA_TYPE_P (CP_DECL_CONTEXT (fun)) && cxx_dialect < cxx17)
+    {
+      ret = false;
+      if (complain)
+	inform (DECL_SOURCE_LOCATION (fun),
+		"lambdas are implicitly constexpr only in C++17 and later");
+    }
+  else if (!DECL_CONSTRUCTOR_P (fun))
     {
       tree rettype = TREE_TYPE (TREE_TYPE (fun));
       if (!literal_type_p (rettype))
@@ -1261,7 +1268,10 @@ cxx_bind_parameters_in_call (const constexpr_ctx *ctx, tree t,
 	  && is_dummy_object (x))
 	{
 	  x = ctx->object;
-	  x = cp_build_addr_expr (x, tf_warning_or_error);
+	  /* We don't use cp_build_addr_expr here because we don't want to
+	     capture the object argument until we've chosen a non-static member
+	     function.  */
+	  x = build_address (x);
 	}
       bool lval = false;
       arg = cxx_eval_constant_expression (ctx, x, lval,
@@ -3636,9 +3646,9 @@ cxx_eval_increment_expression (const constexpr_ctx *ctx, tree t,
 				     non_constant_p, overflow_p);
 
   /* The operand as an rvalue.  */
-  tree val = rvalue (op);
-  val = cxx_eval_constant_expression (ctx, val, false,
-				      non_constant_p, overflow_p);
+  tree val
+    = cxx_eval_constant_expression (ctx, op, false,
+				    non_constant_p, overflow_p);
   /* Don't VERIFY_CONSTANT if this might be dealing with a pointer to
      a local array in a constexpr function.  */
   bool ptr = POINTER_TYPE_P (TREE_TYPE (val));
diff --git a/gcc/cp/constraint.cc b/gcc/cp/constraint.cc
index 64a8ea926d2..8b49455a526 100644
--- a/gcc/cp/constraint.cc
+++ b/gcc/cp/constraint.cc
@@ -2504,7 +2504,12 @@ check_function_concept (tree fn)
     {
       location_t loc = DECL_SOURCE_LOCATION (fn);
       if (TREE_CODE (body) == STATEMENT_LIST && !STATEMENT_LIST_HEAD (body))
-        error_at (loc, "definition of concept %qD is empty", fn);
+	{
+	  if (seen_error ())
+	    /* The definition was probably erroneous, not empty.  */;
+	  else
+	    error_at (loc, "definition of concept %qD is empty", fn);
+	}
       else
         error_at (loc, "definition of concept %qD has multiple statements", fn);
     }
diff --git a/gcc/cp/cp-tree.h b/gcc/cp/cp-tree.h
index b29e4e0be02..dc98dd881c5 100644
--- a/gcc/cp/cp-tree.h
+++ b/gcc/cp/cp-tree.h
@@ -65,7 +65,9 @@ public:
   /* Implicit conversions to tree.  */
   operator tree () const { return m_value; }
   tree & operator* () { return m_value; }
+  tree operator* () const { return m_value; }
   tree & operator-> () { return m_value; }
+  tree operator-> () const { return m_value; }
 
   tree get_value () const { return m_value; }
   location_t get_location () const { return m_loc; }
@@ -572,30 +574,6 @@ identifier_p (tree t)
   return NULL;
 }
 
-/* Hash trait specialization for lang_identifiers.  This allows
-   PCH-safe maps keyed by DECL_NAME.  If it wasn't for PCH, we could
-   just use a regular tree key.  */
-
-template <>
-struct default_hash_traits <lang_identifier *>
-  : pointer_hash <tree_node>
-{
-  /* Use a regular tree as the type, to make using the hash table
-     simpler.  We'll get dynamic type checking with the hash function
-     itself.  */
-  GTY((skip)) typedef tree value_type;
-  GTY((skip)) typedef tree compare_type;
-
-  static hashval_t hash (const value_type id)
-  {
-    return IDENTIFIER_HASH_VALUE (id);
-  }
-
-  /* Nothing is deletable.  Everything is insertable.  */
-  static bool is_deleted (value_type) { return false; }
-  static void remove (value_type) { gcc_unreachable (); }
-};
-
 #define LANG_IDENTIFIER_CAST(NODE) \
 	((struct lang_identifier*)IDENTIFIER_NODE_CHECK (NODE))
 
@@ -1491,11 +1469,9 @@ enum cp_tree_node_structure_enum {
   TS_CP_IDENTIFIER,
   TS_CP_TPI,
   TS_CP_PTRMEM,
-  TS_CP_BINDING,
   TS_CP_OVERLOAD,
   TS_CP_BASELINK,
   TS_CP_TEMPLATE_DECL,
-  TS_CP_WRAPPER,
   TS_CP_DEFAULT_ARG,
   TS_CP_DEFERRED_NOEXCEPT,
   TS_CP_STATIC_ASSERT,
@@ -1504,8 +1480,7 @@ enum cp_tree_node_structure_enum {
   TS_CP_LAMBDA_EXPR,
   TS_CP_TEMPLATE_INFO,
   TS_CP_CONSTRAINT_INFO,
-  TS_CP_USERDEF_LITERAL,
-  LAST_TS_CP_ENUM
+  TS_CP_USERDEF_LITERAL
 };
 
 /* The resulting tree type.  */
@@ -6270,6 +6245,7 @@ extern tree mark_rvalue_use			(tree,
 extern tree mark_lvalue_use			(tree);
 extern tree mark_lvalue_use_nonread		(tree);
 extern tree mark_type_use			(tree);
+extern tree mark_discarded_use			(tree);
 extern void mark_exp_read			(tree);
 
 /* friend.c */
@@ -6432,6 +6408,7 @@ extern tree lookup_template_variable		(tree, tree);
 extern int uses_template_parms			(tree);
 extern bool uses_template_parms_level		(tree, int);
 extern bool in_template_function		(void);
+extern bool need_generic_capture		(void);
 extern bool processing_nonlambda_template	(void);
 extern tree instantiate_class_template		(tree);
 extern tree instantiate_template		(tree, tree, tsubst_flags_t);
@@ -6833,6 +6810,7 @@ extern tree current_nonlambda_function		(void);
 extern tree nonlambda_method_basetype		(void);
 extern tree current_nonlambda_scope		(void);
 extern bool generic_lambda_fn_p			(tree);
+extern tree do_dependent_capture		(tree, bool = false);
 extern bool lambda_fn_in_template_p		(tree);
 extern void maybe_add_lambda_conv_op            (tree);
 extern bool is_lambda_ignored_entity            (tree);
diff --git a/gcc/cp/cvt.c b/gcc/cp/cvt.c
index d82293b421d..c0d0a600562 100644
--- a/gcc/cp/cvt.c
+++ b/gcc/cp/cvt.c
@@ -1055,24 +1055,10 @@ convert_to_void (tree expr, impl_conv_void implicit, tsubst_flags_t complain)
       || TREE_TYPE (expr) == error_mark_node)
     return error_mark_node;
 
+  expr = mark_discarded_use (expr);
   if (implicit == ICV_CAST)
+    /* An explicit cast to void avoids all -Wunused-but-set* warnings.  */
     mark_exp_read (expr);
-  else
-    {
-      tree exprv = expr;
-
-      while (TREE_CODE (exprv) == COMPOUND_EXPR)
-	exprv = TREE_OPERAND (exprv, 1);
-      if (DECL_P (exprv)
-	  || handled_component_p (exprv)
-	  || INDIRECT_REF_P (exprv))
-	/* Expr is not being 'used' here, otherwise we whould have
-	   called mark_{rl}value_use use here, which would have in turn
-	   called mark_exp_read.  Rather, we call mark_exp_read directly
-	   to avoid some warnings when
-	   -Wunused-but-set-{variable,parameter} is in effect.  */
-	mark_exp_read (exprv);
-    }
 
   if (!TREE_TYPE (expr))
     return expr;
diff --git a/gcc/cp/decl.c b/gcc/cp/decl.c
index 372d888dd83..5d3f39e1f59 100644
--- a/gcc/cp/decl.c
+++ b/gcc/cp/decl.c
@@ -6867,6 +6867,8 @@ cp_finish_decl (tree decl, tree init, bool init_const_expr_p,
 	  DECL_INITIAL (decl) = NULL_TREE;
 	}
 
+      init = do_dependent_capture (init);
+
       /* Generally, initializers in templates are expanded when the
 	 template is instantiated.  But, if DECL is a variable constant
 	 then it can be used in future constant expressions, so its value
diff --git a/gcc/cp/decl2.c b/gcc/cp/decl2.c
index 1cbd11dac45..bc509623b36 100644
--- a/gcc/cp/decl2.c
+++ b/gcc/cp/decl2.c
@@ -102,9 +102,35 @@ static GTY(()) vec<tree, va_gc> *no_linkage_decls;
    is to be an alias for the former if the former is defined.  */
 static GTY(()) vec<tree, va_gc> *mangling_aliases;
 
-/* A hash table of mangled names to decls.  Used to figure out if we
-   need compatibility aliases.  */
-static GTY(()) hash_map<lang_identifier *, tree> *mangled_decls;
+/* hash traits for declarations.  Hashes single decls via
+   DECL_ASSEMBLER_NAME_RAW.  */
+
+struct mangled_decl_hash : ggc_remove <tree>
+{
+  typedef tree value_type; /* A DECL.  */
+  typedef tree compare_type; /* An identifier.  */
+
+  static hashval_t hash (const value_type decl)
+  {
+    return IDENTIFIER_HASH_VALUE (DECL_ASSEMBLER_NAME_RAW (decl));
+  }
+  static bool equal (const value_type existing, compare_type candidate)
+  {
+    tree name = DECL_ASSEMBLER_NAME_RAW (existing);
+    return candidate == name;
+  }
+
+  static inline void mark_empty (value_type &p) {p = NULL_TREE;}
+  static inline bool is_empty (value_type p) {return !p;}
+
+  /* Nothing is deletable.  Everything is insertable.  */
+  static bool is_deleted (value_type) { return false; }
+  static void mark_deleted (value_type) { gcc_unreachable (); }
+};
+
+/* A hash table of decls keyed by mangled name.  Used to figure out if
+   we need compatibility aliases.  */
+static GTY(()) hash_table<mangled_decl_hash> *mangled_decls;
 
 /* Nonzero if we're done parsing and into end-of-file activities.  */
 
@@ -4304,12 +4330,13 @@ generate_mangling_alias (tree decl, tree id2)
 	return;
     }
 
-  bool existed;
-  tree *slot = &mangled_decls->get_or_insert (id2, &existed);
+  tree *slot
+    = mangled_decls->find_slot_with_hash (id2, IDENTIFIER_HASH_VALUE (id2),
+					  INSERT);
 
   /* If there's a declaration already using this mangled name,
      don't create a compatibility alias that conflicts.  */
-  if (existed)
+  if (*slot)
     return;
 
   tree alias = make_alias_for (decl, id2);
@@ -4369,24 +4396,25 @@ void
 record_mangling (tree decl, bool need_warning)
 {
   if (!mangled_decls)
-    mangled_decls = hash_map<lang_identifier *, tree>::create_ggc (499);
+    mangled_decls = hash_table<mangled_decl_hash>::create_ggc (499);
 
   gcc_checking_assert (DECL_ASSEMBLER_NAME_SET_P (decl));
-  tree id = DECL_ASSEMBLER_NAME (decl);
-  bool existed;
-  tree *slot = &mangled_decls->get_or_insert (id, &existed);
+  tree id = DECL_ASSEMBLER_NAME_RAW (decl);
+  tree *slot
+    = mangled_decls->find_slot_with_hash (id, IDENTIFIER_HASH_VALUE (id),
+					  INSERT);
 
   /* If this is already an alias, remove the alias, because the real
      decl takes precedence.  */
-  if (existed && DECL_ARTIFICIAL (*slot) && DECL_IGNORED_P (*slot))
+  if (*slot && DECL_ARTIFICIAL (*slot) && DECL_IGNORED_P (*slot))
     if (symtab_node *n = symtab_node::get (*slot))
       if (n->cpp_implicit_alias)
 	{
 	  n->remove ();
-	  existed = false;
+	  *slot = NULL_TREE;
 	}
 
-  if (!existed)
+  if (!*slot)
     *slot = decl;
   else if (need_warning)
     {
diff --git a/gcc/cp/expr.c b/gcc/cp/expr.c
index f5c8e801918..23e30cf789c 100644
--- a/gcc/cp/expr.c
+++ b/gcc/cp/expr.c
@@ -96,16 +96,21 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
 {
 #define RECUR(t) mark_use ((t), rvalue_p, read_p, loc, reject_builtin)
 
+  if (expr == NULL_TREE || expr == error_mark_node)
+    return expr;
+
   if (reject_builtin && reject_gcc_builtin (expr, loc))
     return error_mark_node;
 
   if (read_p)
     mark_exp_read (expr);
 
+  tree oexpr = expr;
   bool recurse_op[3] = { false, false, false };
   switch (TREE_CODE (expr))
     {
     case VAR_DECL:
+    case PARM_DECL:
       if (outer_automatic_var_p (expr)
 	  && decl_constant_var_p (expr))
 	{
@@ -119,10 +124,13 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
 		}
 	    }
 	  expr = process_outer_var_ref (expr, tf_warning_or_error, true);
-	  expr = convert_from_reference (expr);
+	  if (!(TREE_TYPE (oexpr)
+		&& TREE_CODE (TREE_TYPE (oexpr)) == REFERENCE_TYPE))
+	    expr = convert_from_reference (expr);
 	}
       break;
     case COMPONENT_REF:
+    case NON_DEPENDENT_EXPR:
       recurse_op[0] = true;
       break;
     case COMPOUND_EXPR:
@@ -140,35 +148,23 @@ mark_use (tree expr, bool rvalue_p, bool read_p,
 	  tree ref = TREE_OPERAND (expr, 0);
 	  tree r = mark_rvalue_use (ref, loc, reject_builtin);
 	  if (r != ref)
-	    {
-	      expr = copy_node (expr);
-	      TREE_OPERAND (expr, 0) = r;
-	    }
+	    expr = convert_from_reference (r);
 	}
       break;
     default:
       break;
     }
 
-  bool changed = false;
-  tree ops[3];
   for (int i = 0; i < 3; ++i)
     if (recurse_op[i])
       {
 	tree op = TREE_OPERAND (expr, i);
-	ops[i] = RECUR (op);
-	if (ops[i] != op)
-	  changed = true;
+	op = RECUR (op);
+	if (op == error_mark_node)
+	  return error_mark_node;
+	TREE_OPERAND (expr, i) = op;
       }
 
-  if (changed)
-    {
-      expr = copy_node (expr);
-      for (int i = 0; i < 3; ++i)
-	if (recurse_op[i])
-	  TREE_OPERAND (expr, i) = ops[i];
-    }
-
   return expr;
 #undef RECUR
 }
@@ -187,6 +183,52 @@ mark_rvalue_use (tree e,
   return mark_use (e, true, true, loc, reject_builtin);
 }
 
+/* Called when expr appears as a discarded-value expression.  */
+
+tree
+mark_discarded_use (tree expr)
+{
+  /* The lvalue-to-rvalue conversion (7.1) is applied if and only if the
+     expression is a glvalue of volatile-qualified type and it is one of the
+     following:
+     * ( expression ), where expression is one of these expressions,
+     * id-expression (8.1.4),
+     * subscripting (8.2.1),
+     * class member access (8.2.5),
+     * indirection (8.3.1),
+     * pointer-to-member operation (8.5),
+     * conditional expression (8.16) where both the second and the third
+       operands are one of these expressions, or
+     * comma expression (8.19) where the right operand is one of these
+       expressions.  */
+  if (expr == NULL_TREE)
+    return expr;
+
+  switch (TREE_CODE (expr))
+    {
+    case COND_EXPR:
+      TREE_OPERAND (expr, 2) = mark_discarded_use (TREE_OPERAND (expr, 2));
+      gcc_fallthrough ();
+    case COMPOUND_EXPR:
+      TREE_OPERAND (expr, 1) = mark_discarded_use (TREE_OPERAND (expr, 1));
+      return expr;
+
+    case COMPONENT_REF:
+    case ARRAY_REF:
+    case INDIRECT_REF:
+    case MEMBER_REF:
+      break;
+    default:
+      if (DECL_P (expr))
+	break;
+      else
+	return expr;
+    }
+
+  /* Like mark_rvalue_use, but don't reject built-ins.  */
+  return mark_use (expr, true, true, input_location, false);
+}
+
 /* Called whenever an expression is used in an lvalue context.  */
 
 tree
diff --git a/gcc/cp/lambda.c b/gcc/cp/lambda.c
index 78bd89782aa..76f2f29578f 100644
--- a/gcc/cp/lambda.c
+++ b/gcc/cp/lambda.c
@@ -297,7 +297,17 @@ void
 insert_capture_proxy (tree var)
 {
   if (is_normal_capture_proxy (var))
-    register_local_specialization (var, DECL_CAPTURED_VARIABLE (var));
+    {
+      tree cap = DECL_CAPTURED_VARIABLE (var);
+      if (CHECKING_P)
+	{
+	  gcc_assert (!is_normal_capture_proxy (cap));
+	  tree old = retrieve_local_specialization (cap);
+	  if (old)
+	    gcc_assert (DECL_CONTEXT (old) != DECL_CONTEXT (var));
+	}
+      register_local_specialization (var, cap);
+    }
 
   /* Put the capture proxy in the extra body block so that it won't clash
      with a later local variable.  */
@@ -977,6 +987,121 @@ generic_lambda_fn_p (tree callop)
 	  && PRIMARY_TEMPLATE_P (DECL_TI_TEMPLATE (callop)));
 }
 
+/* Returns true iff we need to consider default capture for an enclosing
+   generic lambda.  */
+
+bool
+need_generic_capture (void)
+{
+  if (!processing_template_decl)
+    return false;
+
+  tree outer_closure = NULL_TREE;
+  for (tree t = current_class_type; t;
+       t = decl_type_context (TYPE_MAIN_DECL (t)))
+    {
+      tree lam = CLASSTYPE_LAMBDA_EXPR (t);
+      if (!lam || LAMBDA_EXPR_DEFAULT_CAPTURE_MODE (lam) == CPLD_NONE)
+	/* No default capture.  */
+	break;
+      outer_closure = t;
+    }
+
+  if (!outer_closure)
+    /* No lambda.  */
+    return false;
+  else if (dependent_type_p (outer_closure))
+    /* The enclosing context isn't instantiated.  */
+    return false;
+  else
+    return true;
+}
+
+/* A lambda-expression...is said to implicitly capture the entity...if the
+   compound-statement...names the entity in a potentially-evaluated
+   expression where the enclosing full-expression depends on a generic lambda
+   parameter declared within the reaching scope of the lambda-expression.  */
+
+static tree
+dependent_capture_r (tree *tp, int *walk_subtrees, void *data)
+{
+  hash_set<tree> *pset = (hash_set<tree> *)data;
+
+  if (TYPE_P (*tp))
+    *walk_subtrees = 0;
+
+  if (outer_automatic_var_p (*tp))
+    {
+      tree t = process_outer_var_ref (*tp, tf_warning_or_error, /*force*/true);
+      if (t != *tp
+	  && TREE_CODE (TREE_TYPE (t)) == REFERENCE_TYPE
+	  && TREE_CODE (TREE_TYPE (*tp)) != REFERENCE_TYPE)
+	t = convert_from_reference (t);
+      *tp = t;
+    }
+
+  if (pset->add (*tp))
+    *walk_subtrees = 0;
+
+  switch (TREE_CODE (*tp))
+    {
+      /* Don't walk into unevaluated context or another lambda.  */
+    case SIZEOF_EXPR:
+    case ALIGNOF_EXPR:
+    case TYPEID_EXPR:
+    case NOEXCEPT_EXPR:
+    case LAMBDA_EXPR:
+      *walk_subtrees = 0;
+      break;
+
+      /* Don't walk into statements whose subexpressions we already
+	 handled.  */
+    case TRY_BLOCK:
+    case EH_SPEC_BLOCK:
+    case HANDLER:
+    case IF_STMT:
+    case FOR_STMT:
+    case RANGE_FOR_STMT:
+    case WHILE_STMT:
+    case DO_STMT:
+    case SWITCH_STMT:
+    case STATEMENT_LIST:
+    case RETURN_EXPR:
+      *walk_subtrees = 0;
+      break;
+
+    case DECL_EXPR:
+      {
+	tree decl = DECL_EXPR_DECL (*tp);
+	if (VAR_P (decl))
+	  {
+	    /* walk_tree_1 won't step in here.  */
+	    cp_walk_tree (&DECL_INITIAL (decl),
+			  dependent_capture_r, &pset, NULL);
+	    *walk_subtrees = 0;
+	  }
+      }
+      break;
+
+    default:
+      break;
+    }
+
+  return NULL_TREE;
+}
+
+tree
+do_dependent_capture (tree expr, bool force)
+{
+  if (!need_generic_capture ()
+      || (!force && !instantiation_dependent_expression_p (expr)))
+    return expr;
+
+  hash_set<tree> pset;
+  cp_walk_tree (&expr, dependent_capture_r, &pset, NULL);
+  return expr;
+}
+
 /* If the closure TYPE has a static op(), also add a conversion to function
    pointer.  */
 
@@ -1073,7 +1198,10 @@ maybe_add_lambda_conv_op (tree type)
 
 	if (generic_lambda_p)
 	  {
+	    /* Avoid capturing variables in this context.  */
+	    ++cp_unevaluated_operand;
 	    tree a = forward_parm (tgt);
+	    --cp_unevaluated_operand;
 
 	    CALL_EXPR_ARG (call, ix) = a;
 	    if (decltype_call)
diff --git a/gcc/cp/name-lookup.c b/gcc/cp/name-lookup.c
index ae30cf96b91..b1b4ebbb7de 100644
--- a/gcc/cp/name-lookup.c
+++ b/gcc/cp/name-lookup.c
@@ -2511,13 +2511,13 @@ update_binding (cp_binding_level *level, cxx_binding *binding, tree *slot,
   return decl;
 }
 
-/* Table of identifiers to extern C functions (or LISTS thereof).  */
+/* Table of identifiers to extern C declarations (or LISTS thereof).  */
 
-static GTY(()) hash_table<named_decl_hash> *extern_c_fns;
+static GTY(()) hash_table<named_decl_hash> *extern_c_decls;
 
-/* DECL has C linkage. If we have an existing instance, make sure it
-   has the same exception specification [7.5, 7.6].  If there's no
-   instance, add DECL to the map.  */
+/* DECL has C linkage. If we have an existing instance, make sure the
+   new one is compatible.  Make sure it has the same exception
+   specification [7.5, 7.6].  Add DECL to the map.  */
 
 static void
 check_extern_c_conflict (tree decl)
@@ -2526,10 +2526,10 @@ check_extern_c_conflict (tree decl)
   if (DECL_ARTIFICIAL (decl) || DECL_IN_SYSTEM_HEADER (decl))
     return;
 
-  if (!extern_c_fns)
-    extern_c_fns = hash_table<named_decl_hash>::create_ggc (127);
+  if (!extern_c_decls)
+    extern_c_decls = hash_table<named_decl_hash>::create_ggc (127);
 
-  tree *slot = extern_c_fns
+  tree *slot = extern_c_decls
     ->find_slot_with_hash (DECL_NAME (decl),
 			   IDENTIFIER_HASH_VALUE (DECL_NAME (decl)), INSERT);
   if (tree old = *slot)
@@ -2543,9 +2543,10 @@ check_extern_c_conflict (tree decl)
 	     about a (possible) mismatch, when inserting the decl.  */
       else if (!decls_match (decl, old))
 	mismatch = 1;
-      else if (!comp_except_specs (TYPE_RAISES_EXCEPTIONS (TREE_TYPE (old)),
-				   TYPE_RAISES_EXCEPTIONS (TREE_TYPE (decl)),
-				   ce_normal))
+      else if (TREE_CODE (decl) == FUNCTION_DECL
+	       && !comp_except_specs (TYPE_RAISES_EXCEPTIONS (TREE_TYPE (old)),
+				      TYPE_RAISES_EXCEPTIONS (TREE_TYPE (decl)),
+				      ce_normal))
 	mismatch = -1;
       else if (DECL_ASSEMBLER_NAME_SET_P (old))
 	SET_DECL_ASSEMBLER_NAME (decl, DECL_ASSEMBLER_NAME (old));
@@ -2553,12 +2554,12 @@ check_extern_c_conflict (tree decl)
       if (mismatch)
 	{
 	  pedwarn (input_location, 0,
-		   "declaration of %q#D with C language linkage", decl);
-	  pedwarn (DECL_SOURCE_LOCATION (old), 0,
-		   "conflicts with previous declaration %q#D", old);
+		   "conflicting C language linkage declaration %q#D", decl);
+	  inform (DECL_SOURCE_LOCATION (old),
+		  "previous declaration %q#D", old);
 	  if (mismatch < 0)
-	    pedwarn (input_location, 0,
-		     "due to different exception specifications");
+	    inform (input_location,
+		    "due to different exception specifications");
 	}
       else
 	{
@@ -2587,8 +2588,8 @@ check_extern_c_conflict (tree decl)
 tree
 c_linkage_bindings (tree name)
 {
-  if (extern_c_fns)
-    if (tree *slot = extern_c_fns
+  if (extern_c_decls)
+    if (tree *slot = extern_c_decls
 	->find_slot_with_hash (name, IDENTIFIER_HASH_VALUE (name), NO_INSERT))
       {
 	tree result = *slot;
@@ -3030,9 +3031,8 @@ do_pushdecl (tree decl, bool is_friend)
 		    else
 		      *slot = head;
 		  }
-		if (TREE_CODE (match) == FUNCTION_DECL
-		    && DECL_EXTERN_C_P (match))
-		  /* We need to check and register the fn now.  */
+		if (DECL_EXTERN_C_P (match))
+		  /* We need to check and register the decl now.  */
 		  check_extern_c_conflict (match);
 	      }
 	    return match;
@@ -3113,7 +3113,9 @@ do_pushdecl (tree decl, bool is_friend)
 	}
       else if (VAR_P (decl))
 	maybe_register_incomplete_var (decl);
-      else if (TREE_CODE (decl) == FUNCTION_DECL && DECL_EXTERN_C_P (decl))
+
+      if ((VAR_P (decl) || TREE_CODE (decl) == FUNCTION_DECL)
+	  && DECL_EXTERN_C_P (decl))
 	check_extern_c_conflict (decl);
     }
   else
diff --git a/gcc/cp/parser.c b/gcc/cp/parser.c
index 7883c64f33f..810e2b7f72e 100644
--- a/gcc/cp/parser.c
+++ b/gcc/cp/parser.c
@@ -2770,51 +2770,159 @@ cp_lexer_peek_conflict_marker (cp_lexer *lexer, enum cpp_ttype tok1_kind,
   return true;
 }
 
-/* If not parsing tentatively, issue a diagnostic of the form
+/* Get a description of the matching symbol to TOKEN_DESC e.g. "(" for
+   RT_CLOSE_PAREN.  */
+
+static const char *
+get_matching_symbol (required_token token_desc)
+{
+  switch (token_desc)
+    {
+    default:
+      gcc_unreachable ();
+      return "";
+    case RT_CLOSE_BRACE:
+      return "{";
+    case RT_CLOSE_PAREN:
+      return "(";
+    }
+}
+
+/* Attempt to convert TOKEN_DESC from a required_token to an
+   enum cpp_ttype, returning CPP_EOF if there is no good conversion.  */
+
+static enum cpp_ttype
+get_required_cpp_ttype (required_token token_desc)
+{
+  switch (token_desc)
+    {
+    case RT_SEMICOLON:
+      return CPP_SEMICOLON;
+    case RT_OPEN_PAREN:
+      return CPP_OPEN_PAREN;
+    case RT_CLOSE_BRACE:
+      return CPP_CLOSE_BRACE;
+    case RT_OPEN_BRACE:
+      return CPP_OPEN_BRACE;
+    case RT_CLOSE_SQUARE:
+      return CPP_CLOSE_SQUARE;
+    case RT_OPEN_SQUARE:
+      return CPP_OPEN_SQUARE;
+    case RT_COMMA:
+      return CPP_COMMA;
+    case RT_COLON:
+      return CPP_COLON;
+    case RT_CLOSE_PAREN:
+      return CPP_CLOSE_PAREN;
+
+    default:
+      /* Use CPP_EOF as a "no completions possible" code.  */
+      return CPP_EOF;
+    }
+}
+
+
+/* Subroutine of cp_parser_error and cp_parser_required_error.
+
+   Issue a diagnostic of the form
       FILE:LINE: MESSAGE before TOKEN
    where TOKEN is the next token in the input stream.  MESSAGE
    (specified by the caller) is usually of the form "expected
-   OTHER-TOKEN".  */
+   OTHER-TOKEN".
+
+   This bypasses the check for tentative passing, and potentially
+   adds material needed by cp_parser_required_error.
+
+   If MISSING_TOKEN_DESC is not RT_NONE, then potentially add fix-it hints
+   suggesting insertion of the missing token.
+
+   Additionally, if MATCHING_LOCATION is not UNKNOWN_LOCATION, then we
+   have an unmatched symbol at MATCHING_LOCATION; highlight this secondary
+   location.  */
 
 static void
-cp_parser_error (cp_parser* parser, const char* gmsgid)
+cp_parser_error_1 (cp_parser* parser, const char* gmsgid,
+		   required_token missing_token_desc,
+		   location_t matching_location)
 {
-  if (!cp_parser_simulate_error (parser))
+  cp_token *token = cp_lexer_peek_token (parser->lexer);
+  /* This diagnostic makes more sense if it is tagged to the line
+     of the token we just peeked at.  */
+  cp_lexer_set_source_position_from_token (token);
+
+  if (token->type == CPP_PRAGMA)
     {
-      cp_token *token = cp_lexer_peek_token (parser->lexer);
-      /* This diagnostic makes more sense if it is tagged to the line
-	 of the token we just peeked at.  */
-      cp_lexer_set_source_position_from_token (token);
+      error_at (token->location,
+		"%<#pragma%> is not allowed here");
+      cp_parser_skip_to_pragma_eol (parser, token);
+      return;
+    }
 
-      if (token->type == CPP_PRAGMA)
+  /* If this is actually a conflict marker, report it as such.  */
+  if (token->type == CPP_LSHIFT
+      || token->type == CPP_RSHIFT
+      || token->type == CPP_EQ_EQ)
+    {
+      location_t loc;
+      if (cp_lexer_peek_conflict_marker (parser->lexer, token->type, &loc))
 	{
-	  error_at (token->location,
-		    "%<#pragma%> is not allowed here");
-	  cp_parser_skip_to_pragma_eol (parser, token);
+	  error_at (loc, "version control conflict marker in file");
 	  return;
 	}
+    }
 
-      /* If this is actually a conflict marker, report it as such.  */
-      if (token->type == CPP_LSHIFT
-	  || token->type == CPP_RSHIFT
-	  || token->type == CPP_EQ_EQ)
-	{
-	  location_t loc;
-	  if (cp_lexer_peek_conflict_marker (parser->lexer, token->type, &loc))
-	    {
-	      error_at (loc, "version control conflict marker in file");
-	      return;
-	    }
-	}
+  gcc_rich_location richloc (input_location);
+
+  bool added_matching_location = false;
 
-      rich_location richloc (line_table, input_location);
-      c_parse_error (gmsgid,
-		     /* Because c_parser_error does not understand
-			CPP_KEYWORD, keywords are treated like
-			identifiers.  */
-		     (token->type == CPP_KEYWORD ? CPP_NAME : token->type),
-		     token->u.value, token->flags, &richloc);
+  if (missing_token_desc != RT_NONE)
+    {
+      /* Potentially supply a fix-it hint, suggesting to add the
+	 missing token immediately after the *previous* token.
+	 This may move the primary location within richloc.  */
+      enum cpp_ttype ttype = get_required_cpp_ttype (missing_token_desc);
+      location_t prev_token_loc
+	= cp_lexer_previous_token (parser->lexer)->location;
+      maybe_suggest_missing_token_insertion (&richloc, ttype, prev_token_loc);
+
+      /* If matching_location != UNKNOWN_LOCATION, highlight it.
+	 Attempt to consolidate diagnostics by printing it as a
+	secondary range within the main diagnostic.  */
+      if (matching_location != UNKNOWN_LOCATION)
+	added_matching_location
+	  = richloc.add_location_if_nearby (matching_location);
     }
+
+  /* Actually emit the error.  */
+  c_parse_error (gmsgid,
+		 /* Because c_parser_error does not understand
+		    CPP_KEYWORD, keywords are treated like
+		    identifiers.  */
+		 (token->type == CPP_KEYWORD ? CPP_NAME : token->type),
+		 token->u.value, token->flags, &richloc);
+
+  if (missing_token_desc != RT_NONE)
+    {
+      /* If we weren't able to consolidate matching_location, then
+	 print it as a secondary diagnostic.  */
+      if (matching_location != UNKNOWN_LOCATION
+	  && !added_matching_location)
+	inform (matching_location, "to match this %qs",
+		get_matching_symbol (missing_token_desc));
+    }
+}
+
+/* If not parsing tentatively, issue a diagnostic of the form
+      FILE:LINE: MESSAGE before TOKEN
+   where TOKEN is the next token in the input stream.  MESSAGE
+   (specified by the caller) is usually of the form "expected
+   OTHER-TOKEN".  */
+
+static void
+cp_parser_error (cp_parser* parser, const char* gmsgid)
+{
+  if (!cp_parser_simulate_error (parser))
+    cp_parser_error_1 (parser, gmsgid, RT_NONE, UNKNOWN_LOCATION);
 }
 
 /* Issue an error about name-lookup failing.  NAME is the
@@ -11873,6 +11981,8 @@ cp_convert_range_for (tree statement, tree range_decl, tree range_expr,
   tree iter_type, begin_expr, end_expr;
   tree condition, expression;
 
+  range_expr = mark_lvalue_use (range_expr);
+
   if (range_decl == error_mark_node || range_expr == error_mark_node)
     /* If an error happened previously do nothing or else a lot of
        unhelpful errors would be issued.  */
@@ -28079,24 +28189,6 @@ cp_parser_friend_p (const cp_decl_specifier_seq *decl_specifiers)
   return decl_spec_seq_has_spec_p (decl_specifiers, ds_friend);
 }
 
-/* Get a description of the matching symbol to TOKEN_DESC e.g. "(" for
-   RT_CLOSE_PAREN.  */
-
-static const char *
-get_matching_symbol (required_token token_desc)
-{
-  switch (token_desc)
-    {
-    default:
-      gcc_unreachable ();
-      return "";
-    case RT_CLOSE_BRACE:
-      return "{";
-    case RT_CLOSE_PAREN:
-      return "(";
-    }
-}
-
 /* Issue an error message indicating that TOKEN_DESC was expected.
    If KEYWORD is true, it indicated this function is called by
    cp_parser_require_keword and the required token can only be
@@ -28274,31 +28366,7 @@ cp_parser_required_error (cp_parser *parser,
     }
 
   if (gmsgid)
-    {
-      /* Emulate rest of cp_parser_error.  */
-      cp_token *token = cp_lexer_peek_token (parser->lexer);
-      cp_lexer_set_source_position_from_token (token);
-
-      gcc_rich_location richloc (input_location);
-
-      /* If matching_location != UNKNOWN_LOCATION, highlight it.
-	 Attempt to consolidate diagnostics by printing it as a
-	secondary range within the main diagnostic.  */
-      bool added_matching_location = false;
-      if (matching_location != UNKNOWN_LOCATION)
-	added_matching_location
-	  = richloc.add_location_if_nearby (matching_location);
-
-      c_parse_error (gmsgid,
-		     (token->type == CPP_KEYWORD ? CPP_NAME : token->type),
-		     token->u.value, token->flags, &richloc);
-
-      /* If we weren't able to consolidate matching_location, then
-	 print it as a secondary diagnostic.  */
-      if (matching_location != UNKNOWN_LOCATION && !added_matching_location)
-	inform (matching_location, "to match this %qs",
-		get_matching_symbol (token_desc));
-    }
+    cp_parser_error_1 (parser, gmsgid, token_desc, matching_location);
 }
 
 
diff --git a/gcc/cp/pt.c b/gcc/cp/pt.c
index 52fc4d6a222..ba52f3b57a6 100644
--- a/gcc/cp/pt.c
+++ b/gcc/cp/pt.c
@@ -1895,6 +1895,7 @@ reregister_specialization (tree spec, tree tinfo, tree new_spec)
 void
 register_local_specialization (tree spec, tree tmpl)
 {
+  gcc_assert (tmpl != spec);
   local_specializations->put (tmpl, spec);
 }
 
@@ -9494,30 +9495,14 @@ in_template_function (void)
   return ret;
 }
 
-/* Returns true iff we are currently within a template other than a generic
-   lambda.  We test this by finding the outermost closure type and checking
-   whether it is dependent.  */
+/* Returns true iff we are currently within a template other than a
+   default-capturing generic lambda, so we don't need to worry about semantic
+   processing.  */
 
 bool
 processing_nonlambda_template (void)
 {
-  if (!processing_template_decl)
-    return false;
-
-  tree outer_closure = NULL_TREE;
-  for (tree t = current_class_type; t;
-       t = decl_type_context (TYPE_MAIN_DECL (t)))
-    {
-      if (LAMBDA_TYPE_P (t))
-	outer_closure = t;
-      else
-	break;
-    }
-
-  if (outer_closure)
-    return dependent_type_p (outer_closure);
-  else
-    return true;
+  return processing_template_decl && !need_generic_capture ();
 }
 
 /* Returns true if T depends on any template parameter with level LEVEL.  */
@@ -23224,15 +23209,9 @@ instantiate_decl (tree d, bool defer_ok, bool expl_inst_class_mem_p)
     synthesize_method (d);
   else if (TREE_CODE (d) == FUNCTION_DECL)
     {
-      hash_map<tree, tree> *saved_local_specializations;
-      tree block = NULL_TREE;
-
-      /* Save away the current list, in case we are instantiating one
-	 template from within the body of another.  */
-      saved_local_specializations = local_specializations;
-
       /* Set up the list of local specializations.  */
-      local_specializations = new hash_map<tree, tree>;
+      local_specialization_stack lss (push_to_top ? lss_blank : lss_copy);
+      tree block = NULL_TREE;
 
       /* Set up context.  */
       if (DECL_OMP_DECLARE_REDUCTION_P (code_pattern)
@@ -23271,10 +23250,6 @@ instantiate_decl (tree d, bool defer_ok, bool expl_inst_class_mem_p)
 	    = DECL_STRUCT_FUNCTION (code_pattern)->language->infinite_loop;
 	}
 
-      /* We don't need the local specializations any more.  */
-      delete local_specializations;
-      local_specializations = saved_local_specializations;
-
       /* Finish the function.  */
       if (DECL_OMP_DECLARE_REDUCTION_P (code_pattern)
 	  && TREE_CODE (DECL_CONTEXT (code_pattern)) == FUNCTION_DECL)
@@ -24307,21 +24282,22 @@ type_dependent_expression_p (tree expression)
 	  && (any_dependent_template_arguments_p
 	      (INNERMOST_TEMPLATE_ARGS (DECL_TI_ARGS (expression)))))
 	return true;
+    }
 
-      /* Otherwise, if the decl isn't from a dependent scope, it can't be
-	 type-dependent.  Checking this is important for functions with auto
-	 return type, which looks like a dependent type.  */
-      if (TREE_CODE (expression) == FUNCTION_DECL
-	  && (!DECL_CLASS_SCOPE_P (expression)
-	      || !dependent_type_p (DECL_CONTEXT (expression)))
-	  && (!DECL_FRIEND_CONTEXT (expression)
-	      || !dependent_type_p (DECL_FRIEND_CONTEXT (expression)))
-	  && !DECL_LOCAL_FUNCTION_P (expression))
-	{
-	  gcc_assert (!dependent_type_p (TREE_TYPE (expression))
-		      || undeduced_auto_decl (expression));
-	  return false;
-	}
+  /* Otherwise, if the function decl isn't from a dependent scope, it can't be
+     type-dependent.  Checking this is important for functions with auto return
+     type, which looks like a dependent type.  */
+  if (TREE_CODE (expression) == FUNCTION_DECL
+      && !(DECL_CLASS_SCOPE_P (expression)
+	   && dependent_type_p (DECL_CONTEXT (expression)))
+      && !(DECL_FRIEND_P (expression)
+	   && (!DECL_FRIEND_CONTEXT (expression)
+	       || dependent_type_p (DECL_FRIEND_CONTEXT (expression))))
+      && !DECL_LOCAL_FUNCTION_P (expression))
+    {
+      gcc_assert (!dependent_type_p (TREE_TYPE (expression))
+		  || undeduced_auto_decl (expression));
+      return false;
     }
 
   /* Always dependent, on the number of arguments if nothing else.  */
diff --git a/gcc/cp/semantics.c b/gcc/cp/semantics.c
index 77c71e71bcf..a512664e396 100644
--- a/gcc/cp/semantics.c
+++ b/gcc/cp/semantics.c
@@ -410,6 +410,8 @@ maybe_cleanup_point_expr (tree expr)
 {
   if (!processing_template_decl && stmts_are_full_exprs_p ())
     expr = fold_build_cleanup_point_expr (TREE_TYPE (expr), expr);
+  else
+    expr = do_dependent_capture (expr);
   return expr;
 }
 
@@ -423,6 +425,8 @@ maybe_cleanup_point_expr_void (tree expr)
 {
   if (!processing_template_decl && stmts_are_full_exprs_p ())
     expr = fold_build_cleanup_point_expr (void_type_node, expr);
+  else
+    expr = do_dependent_capture (expr);
   return expr;
 }
 
@@ -629,6 +633,8 @@ finish_goto_stmt (tree destination)
 	    = fold_build_cleanup_point_expr (TREE_TYPE (destination),
 					     destination);
 	}
+      else
+	destination = do_dependent_capture (destination);
     }
 
   check_goto (destination);
@@ -650,7 +656,7 @@ maybe_convert_cond (tree cond)
 
   /* Wait until we instantiate templates before doing conversion.  */
   if (processing_template_decl)
-    return cond;
+    return do_dependent_capture (cond);
 
   if (warn_sequence_point)
     verify_sequence_points (cond);
@@ -3265,6 +3271,8 @@ outer_var_p (tree decl)
 {
   return ((VAR_P (decl) || TREE_CODE (decl) == PARM_DECL)
 	  && DECL_FUNCTION_SCOPE_P (decl)
+	  /* Don't get confused by temporaries.  */
+	  && DECL_NAME (decl)
 	  && (DECL_CONTEXT (decl) != current_function_decl
 	      || parsing_nsdmi ()));
 }
@@ -3312,8 +3320,12 @@ process_outer_var_ref (tree decl, tsubst_flags_t complain, bool force_use)
   if (containing_function && LAMBDA_FUNCTION_P (containing_function))
     {
       /* Check whether we've already built a proxy.  */
-      tree d = retrieve_local_specialization (decl);
-      if (d && is_capture_proxy (d))
+      tree var = decl;
+      while (is_normal_capture_proxy (var))
+	var = DECL_CAPTURED_VARIABLE (var);
+      tree d = retrieve_local_specialization (var);
+
+      if (d && d != decl && is_capture_proxy (d))
 	{
 	  if (DECL_CONTEXT (d) == containing_function)
 	    /* We already have an inner proxy.  */
@@ -6213,8 +6225,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 				      "positive");
 			  t = integer_one_node;
 			}
+		      t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
 		    }
-		  t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
 		}
 	      OMP_CLAUSE_OPERAND (c, 1) = t;
 	    }
@@ -7095,8 +7107,8 @@ finish_omp_clauses (tree clauses, enum c_omp_region_type ort)
 				    "integral constant");
 			  remove = true;
 			}
+		      t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
 		    }
-		  t = fold_build_cleanup_point_expr (TREE_TYPE (t), t);
 		}
 
 		/* Update list item.  */
diff --git a/gcc/cp/typeck.c b/gcc/cp/typeck.c
index 6fe5199a00e..d87ee62ad1a 100644
--- a/gcc/cp/typeck.c
+++ b/gcc/cp/typeck.c
@@ -5606,8 +5606,9 @@ tree
 condition_conversion (tree expr)
 {
   tree t;
-  if (processing_template_decl)
-    return expr;
+  /* Anything that might happen in a template should go through
+     maybe_convert_cond.  */
+  gcc_assert (!processing_template_decl);
   t = perform_implicit_conversion_flags (boolean_type_node, expr,
 					 tf_warning_or_error, LOOKUP_NORMAL);
   t = fold_build_cleanup_point_expr (boolean_type_node, t);
@@ -5656,6 +5657,9 @@ cp_build_addr_expr_1 (tree arg, bool strict_lvalue, tsubst_flags_t complain)
     return error_mark_node;
 
   arg = mark_lvalue_use (arg);
+  if (error_operand_p (arg))
+    return error_mark_node;
+
   argtype = lvalue_type (arg);
 
   gcc_assert (!(identifier_p (arg) && IDENTIFIER_ANY_OP_P (arg)));
@@ -7061,11 +7065,7 @@ build_static_cast (tree type, tree oexpr, tsubst_flags_t complain)
   if (dependent)
     {
     tmpl:
-      expr = oexpr;
-      if (dependent)
-	/* Handle generic lambda capture.  */
-	expr = mark_lvalue_use (expr);
-      expr = build_min (STATIC_CAST_EXPR, type, expr);
+      expr = build_min (STATIC_CAST_EXPR, type, oexpr);
       /* We don't know if it will or will not have side effects.  */
       TREE_SIDE_EFFECTS (expr) = 1;
       return convert_from_reference (expr);
@@ -7704,6 +7704,8 @@ tree
 cp_build_modify_expr (location_t loc, tree lhs, enum tree_code modifycode,
 		      tree rhs, tsubst_flags_t complain)
 {
+  lhs = mark_lvalue_use_nonread (lhs);
+
   tree result = NULL_TREE;
   tree newrhs = rhs;
   tree lhstype = TREE_TYPE (lhs);
@@ -7926,6 +7928,8 @@ cp_build_modify_expr (location_t loc, tree lhs, enum tree_code modifycode,
 	     operator. -- end note ]  */
 	  lhs = cp_stabilize_reference (lhs);
 	  rhs = rvalue (rhs);
+	  if (rhs == error_mark_node)
+	    return error_mark_node;
 	  rhs = stabilize_expr (rhs, &init);
 	  newrhs = cp_build_binary_op (loc, modifycode, lhs, rhs, complain);
 	  if (newrhs == error_mark_node)
@@ -8960,10 +8964,14 @@ check_return_expr (tree retval, bool *no_warning)
       if (check_for_bare_parameter_packs (retval))
 	return error_mark_node;
 
-      if (WILDCARD_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl)))
+      /* If one of the types might be void, we can't tell whether we're
+	 returning a value.  */
+      if ((WILDCARD_TYPE_P (TREE_TYPE (DECL_RESULT (current_function_decl)))
+	   && !current_function_auto_return_pattern)
 	  || (retval != NULL_TREE
-	      && type_dependent_expression_p (retval)))
-        return retval;
+	      && (TREE_TYPE (retval) == NULL_TREE
+		  || WILDCARD_TYPE_P (TREE_TYPE (retval)))))
+	goto dependent;
     }
 
   functype = TREE_TYPE (TREE_TYPE (current_function_decl));
@@ -9101,11 +9109,13 @@ check_return_expr (tree retval, bool *no_warning)
 	warning (OPT_Weffc__, "%<operator=%> should return a reference to %<*this%>");
     }
 
-  if (processing_template_decl)
+  if (dependent_type_p (functype)
+      || type_dependent_expression_p (retval))
     {
+    dependent:
       /* We should not have changed the return value.  */
       gcc_assert (retval == saved_retval);
-      return retval;
+      return do_dependent_capture (retval, /*force*/true);
     }
 
   /* The fabled Named Return Value optimization, as per [class.copy]/15:
@@ -9129,6 +9139,7 @@ check_return_expr (tree retval, bool *no_warning)
 
   named_return_value_okay_p = 
     (retval != NULL_TREE
+     && !processing_template_decl
      /* Must be a local, automatic variable.  */
      && VAR_P (retval)
      && DECL_CONTEXT (retval) == current_function_decl
@@ -9225,6 +9236,9 @@ check_return_expr (tree retval, bool *no_warning)
 			 build_zero_cst (TREE_TYPE (retval)));
     }
 
+  if (processing_template_decl)
+    return saved_retval;
+
   /* Actually copy the value returned into the appropriate location.  */
   if (retval && retval != result)
     retval = build2 (INIT_EXPR, TREE_TYPE (result), result, retval);
diff --git a/gcc/cse.c b/gcc/cse.c
index 629365039a8..e3c0710215d 100644
--- a/gcc/cse.c
+++ b/gcc/cse.c
@@ -561,7 +561,7 @@ static struct table_elt *insert (rtx, struct table_elt *, unsigned,
 static void merge_equiv_classes (struct table_elt *, struct table_elt *);
 static void invalidate (rtx, machine_mode);
 static void remove_invalid_refs (unsigned int);
-static void remove_invalid_subreg_refs (unsigned int, poly_int64,
+static void remove_invalid_subreg_refs (unsigned int, poly_uint64,
 					machine_mode);
 static void rehash_using_reg (rtx);
 static void invalidate_memory (void);
@@ -1994,7 +1994,7 @@ remove_invalid_refs (unsigned int regno)
 /* Likewise for a subreg with subreg_reg REGNO, subreg_byte OFFSET,
    and mode MODE.  */
 static void
-remove_invalid_subreg_refs (unsigned int regno, poly_int64 offset,
+remove_invalid_subreg_refs (unsigned int regno, poly_uint64 offset,
 			    machine_mode mode)
 {
   unsigned int i;
@@ -3593,10 +3593,10 @@ fold_rtx (rtx x, rtx_insn *insn)
 	case MINUS:
 	  /* If we have (MINUS Y C), see if Y is known to be (PLUS Z C2).
 	     If so, produce (PLUS Z C2-C).  */
-	  if (const_arg1 != 0 && poly_int_const_p (const_arg1, &xval))
+	  if (const_arg1 != 0 && poly_int_rtx_p (const_arg1, &xval))
 	    {
 	      rtx y = lookup_as_function (XEXP (x, 0), PLUS);
-	      if (y && poly_int_const_p (XEXP (y, 1), &yval))
+	      if (y && poly_int_rtx_p (XEXP (y, 1), &yval))
 		return fold_rtx (plus_constant (mode, copy_rtx (XEXP (y, 0)),
 						yval - xval),
 				 NULL);
@@ -3809,7 +3809,7 @@ equiv_constant (rtx x)
       /* If we didn't and if doing so makes sense, see if we previously
 	 assigned a constant value to the enclosing word mode SUBREG.  */
       if (must_lt (GET_MODE_SIZE (mode), UNITS_PER_WORD)
-	  && must_gt (GET_MODE_SIZE (imode), UNITS_PER_WORD))
+	  && must_lt (UNITS_PER_WORD, GET_MODE_SIZE (imode)))
 	{
 	  poly_int64 byte = (SUBREG_BYTE (x)
 			     - subreg_lowpart_offset (mode, word_mode));
@@ -6015,7 +6015,7 @@ cse_insn (rtx_insn *insn)
 		  new_src = elt->exp;
 		else
 		  {
-		    poly_int64 byte
+		    poly_uint64 byte
 		      = subreg_lowpart_offset (new_mode, GET_MODE (dest));
 		    new_src = simplify_gen_subreg (new_mode, elt->exp,
 					           GET_MODE (dest), byte);
diff --git a/gcc/diagnostic-color.c b/gcc/diagnostic-color.c
index 6adb872146b..b8cf6f2c045 100644
--- a/gcc/diagnostic-color.c
+++ b/gcc/diagnostic-color.c
@@ -20,6 +20,10 @@
 #include "system.h"
 #include "diagnostic-color.h"
 
+#ifdef __MINGW32__
+#  include <windows.h>
+#endif
+
 /* Select Graphic Rendition (SGR, "\33[...m") strings.  */
 /* Also Erase in Line (EL) to Right ("\33[K") by default.  */
 /*    Why have EL to Right after SGR?
@@ -275,23 +279,28 @@ parse_gcc_colors (void)
       return true;
 }
 
-#if defined(_WIN32)
-bool
-colorize_init (diagnostic_color_rule_t)
-{
-  return false;
-}
-#else
-
 /* Return true if we should use color when in auto mode, false otherwise. */
 static bool
 should_colorize (void)
 {
+#ifdef __MINGW32__
+  /* For consistency reasons, one should check the handle returned by
+     _get_osfhandle(_fileno(stderr)) because the function
+     pp_write_text_to_stream() in pretty-print.c calls fputs() on
+     that stream.  However, the code below for non-Windows doesn't seem
+     to care about it either...  */
+  HANDLE h;
+  DWORD m;
+
+  h = GetStdHandle (STD_ERROR_HANDLE);
+  return (h != INVALID_HANDLE_VALUE) && (h != NULL)
+	  && GetConsoleMode (h, &m);
+#else
   char const *t = getenv ("TERM");
   return t && strcmp (t, "dumb") != 0 && isatty (STDERR_FILENO);
+#endif
 }
 
-
 bool
 colorize_init (diagnostic_color_rule_t rule)
 {
@@ -310,4 +319,3 @@ colorize_init (diagnostic_color_rule_t rule)
       gcc_unreachable ();
     }
 }
-#endif
diff --git a/gcc/doc/extend.texi b/gcc/doc/extend.texi
index 0014490e869..d9b7a540cbd 100644
--- a/gcc/doc/extend.texi
+++ b/gcc/doc/extend.texi
@@ -2801,7 +2801,7 @@ void *my_memcpy (void *dst, const void *src, size_t len)
 
 static void * (*resolve_memcpy (void))(void *, const void *, size_t)
 @{
-  return my_memcpy; // we'll just always select this routine
+  return my_memcpy; // we will just always select this routine
 @}
 @end smallexample
 
@@ -2814,15 +2814,56 @@ extern void *memcpy (void *, const void *, size_t);
 @end smallexample
 
 @noindent
-allowing the user to call this as a regular function, unaware of the
-implementation.  Finally, the indirect function needs to be defined in
-the same translation unit as the resolver function:
+allowing the user to call @code{memcpy} as a regular function, unaware of
+the actual implementation.  Finally, the indirect function needs to be
+defined in the same translation unit as the resolver function:
 
 @smallexample
 void *memcpy (void *, const void *, size_t)
      __attribute__ ((ifunc ("resolve_memcpy")));
 @end smallexample
 
+In C++, the @code{ifunc} attribute takes a string that is the mangled name
+of the resolver function.  A C++ resolver for a non-static member function
+of class @code{C} should be declared to return a pointer to a non-member
+function taking pointer to @code{C} as the first argument, followed by
+the same arguments as of the implementation function.  G++ checks
+the signatures of the two functions and issues
+a @option{-Wattribute-alias} warning for mismatches.  To suppress a warning
+for the necessary cast from a pointer to the implementation member function
+to the type of the corresponding non-member function use
+the @option{-Wno-pmf-conversions} option.  For example:
+
+@smallexample
+class S
+@{
+private:
+  int debug_impl (int);
+  int optimized_impl (int);
+
+  typedef int Func (S*, int);
+
+  static Func* resolver ();
+public:
+
+  int interface (int);
+@};
+
+int S::debug_impl (int) @{ /* @r{@dots{}} */ @}
+int S::optimized_impl (int) @{ /* @r{@dots{}} */ @}
+
+S::Func* S::resolver ()
+@{
+  int (S::*pimpl) (int)
+    = getenv ("DEBUG") ? &S::debug_impl : &S::optimized_impl;
+
+  // Cast triggers -Wno-pmf-conversions.
+  return reinterpret_cast<Func*>(pimpl);
+@}
+
+int S::interface (int) __attribute__ ((ifunc ("_ZN1S8resolverEv")));
+@end smallexample
+
 Indirect functions cannot be weak.  Binutils version 2.20.1 or higher
 and GNU C Library version 2.11.1 are required to use this feature.
 
@@ -8081,7 +8122,7 @@ A comma-separated list of C expressions read by the instructions in the
 @item Clobbers
 A comma-separated list of registers or other values changed by the 
 @var{AssemblerTemplate}, beyond those listed as outputs.
-An empty list is permitted.  @xref{Clobbers}.
+An empty list is permitted.  @xref{Clobbers and Scratch Registers}.
 
 @item GotoLabels
 When you are using the @code{goto} form of @code{asm}, this section contains 
@@ -8441,7 +8482,7 @@ The enclosing parentheses are a required part of the syntax.
 
 When the compiler selects the registers to use to 
 represent the output operands, it does not use any of the clobbered registers 
-(@pxref{Clobbers}).
+(@pxref{Clobbers and Scratch Registers}).
 
 Output operand expressions must be lvalues. The compiler cannot check whether 
 the operands have data types that are reasonable for the instruction being 
@@ -8677,7 +8718,8 @@ as input.  The enclosing parentheses are a required part of the syntax.
 @end table
 
 When the compiler selects the registers to use to represent the input 
-operands, it does not use any of the clobbered registers (@pxref{Clobbers}).
+operands, it does not use any of the clobbered registers
+(@pxref{Clobbers and Scratch Registers}).
 
 If there are no output operands but there are input operands, place two 
 consecutive colons where the output operands would go:
@@ -8728,9 +8770,10 @@ asm ("cmoveq %1, %2, %[result]"
    : "r" (test), "r" (new), "[result]" (old));
 @end example
 
-@anchor{Clobbers}
-@subsubsection Clobbers
+@anchor{Clobbers and Scratch Registers}
+@subsubsection Clobbers and Scratch Registers
 @cindex @code{asm} clobbers
+@cindex @code{asm} scratch registers
 
 While the compiler is aware of changes to entries listed in the output 
 operands, the inline @code{asm} code may modify more than just the outputs. For 
@@ -8761,7 +8804,7 @@ registers:
 asm volatile ("movc3 %0, %1, %2"
                    : /* No outputs. */
                    : "g" (from), "g" (to), "g" (count)
-                   : "r0", "r1", "r2", "r3", "r4", "r5");
+                   : "r0", "r1", "r2", "r3", "r4", "r5", "memory");
 @end example
 
 Also, there are two special clobber arguments:
@@ -8792,14 +8835,141 @@ Note that this clobber does not prevent the @emph{processor} from doing
 speculative reads past the @code{asm} statement. To prevent that, you need 
 processor-specific fence instructions.
 
-Flushing registers to memory has performance implications and may be an issue 
-for time-sensitive code.  You can use a trick to avoid this if the size of 
-the memory being accessed is known at compile time. For example, if accessing 
-ten bytes of a string, use a memory input like: 
+@end table
 
-@code{@{"m"( (@{ struct @{ char x[10]; @} *p = (void *)ptr ; *p; @}) )@}}.
+Flushing registers to memory has performance implications and may be
+an issue for time-sensitive code.  You can provide better information
+to GCC to avoid this, as shown in the following examples.  At a
+minimum, aliasing rules allow GCC to know what memory @emph{doesn't}
+need to be flushed.
 
-@end table
+Here is a fictitious sum of squares instruction, that takes two
+pointers to floating point values in memory and produces a floating
+point register output.
+Notice that @code{x}, and @code{y} both appear twice in the @code{asm}
+parameters, once to specify memory accessed, and once to specify a
+base register used by the @code{asm}.  You won't normally be wasting a
+register by doing this as GCC can use the same register for both
+purposes.  However, it would be foolish to use both @code{%1} and
+@code{%3} for @code{x} in this @code{asm} and expect them to be the
+same.  In fact, @code{%3} may well not be a register.  It might be a
+symbolic memory reference to the object pointed to by @code{x}.
+
+@smallexample
+asm ("sumsq %0, %1, %2"
+     : "+f" (result)
+     : "r" (x), "r" (y), "m" (*x), "m" (*y));
+@end smallexample
+
+Here is a fictitious @code{*z++ = *x++ * *y++} instruction.
+Notice that the @code{x}, @code{y} and @code{z} pointer registers
+must be specified as input/output because the @code{asm} modifies
+them.
+
+@smallexample
+asm ("vecmul %0, %1, %2"
+     : "+r" (z), "+r" (x), "+r" (y), "=m" (*z)
+     : "m" (*x), "m" (*y));
+@end smallexample
+
+An x86 example where the string memory argument is of unknown length.
+
+@smallexample
+asm("repne scasb"
+    : "=c" (count), "+D" (p)
+    : "m" (*(const char (*)[]) p), "0" (-1), "a" (0));
+@end smallexample
+
+If you know the above will only be reading a ten byte array then you
+could instead use a memory input like:
+@code{"m" (*(const char (*)[10]) p)}.
+
+Here is an example of a PowerPC vector scale implemented in assembly,
+complete with vector and condition code clobbers, and some initialized
+offset registers that are unchanged by the @code{asm}.
+
+@smallexample
+void
+dscal (size_t n, double *x, double alpha)
+@{
+  asm ("/* lots of asm here */"
+       : "+m" (*(double (*)[n]) x), "+&r" (n), "+b" (x)
+       : "d" (alpha), "b" (32), "b" (48), "b" (64),
+         "b" (80), "b" (96), "b" (112)
+       : "cr0",
+         "vs32","vs33","vs34","vs35","vs36","vs37","vs38","vs39",
+         "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47");
+@}
+@end smallexample
+
+Rather than allocating fixed registers via clobbers to provide scratch
+registers for an @code{asm} statement, an alternative is to define a
+variable and make it an early-clobber output as with @code{a2} and
+@code{a3} in the example below.  This gives the compiler register
+allocator more freedom.  You can also define a variable and make it an
+output tied to an input as with @code{a0} and @code{a1}, tied
+respectively to @code{ap} and @code{lda}.  Of course, with tied
+outputs your @code{asm} can't use the input value after modifying the
+output register since they are one and the same register.  What's
+more, if you omit the early-clobber on the output, it is possible that
+GCC might allocate the same register to another of the inputs if GCC
+could prove they had the same value on entry to the @code{asm}.  This
+is why @code{a1} has an early-clobber.  Its tied input, @code{lda}
+might conceivably be known to have the value 16 and without an
+early-clobber share the same register as @code{%11}.  On the other
+hand, @code{ap} can't be the same as any of the other inputs, so an
+early-clobber on @code{a0} is not needed.  It is also not desirable in
+this case.  An early-clobber on @code{a0} would cause GCC to allocate
+a separate register for the @code{"m" (*(const double (*)[]) ap)}
+input.  Note that tying an input to an output is the way to set up an
+initialized temporary register modified by an @code{asm} statement.
+An input not tied to an output is assumed by GCC to be unchanged, for
+example @code{"b" (16)} below sets up @code{%11} to 16, and GCC might
+use that register in following code if the value 16 happened to be
+needed.  You can even use a normal @code{asm} output for a scratch if
+all inputs that might share the same register are consumed before the
+scratch is used.  The VSX registers clobbered by the @code{asm}
+statement could have used this technique except for GCC's limit on the
+number of @code{asm} parameters.
+
+@smallexample
+static void
+dgemv_kernel_4x4 (long n, const double *ap, long lda,
+                  const double *x, double *y, double alpha)
+@{
+  double *a0;
+  double *a1;
+  double *a2;
+  double *a3;
+
+  __asm__
+    (
+     /* lots of asm here */
+     "#n=%1 ap=%8=%12 lda=%13 x=%7=%10 y=%0=%2 alpha=%9 o16=%11\n"
+     "#a0=%3 a1=%4 a2=%5 a3=%6"
+     :
+       "+m" (*(double (*)[n]) y),
+       "+&r" (n),	// 1
+       "+b" (y),	// 2
+       "=b" (a0),	// 3
+       "=&b" (a1),	// 4
+       "=&b" (a2),	// 5
+       "=&b" (a3)	// 6
+     :
+       "m" (*(const double (*)[n]) x),
+       "m" (*(const double (*)[]) ap),
+       "d" (alpha),	// 9
+       "r" (x),		// 10
+       "b" (16),	// 11
+       "3" (ap),	// 12
+       "4" (lda)	// 13
+     :
+       "cr0",
+       "vs32","vs33","vs34","vs35","vs36","vs37",
+       "vs40","vs41","vs42","vs43","vs44","vs45","vs46","vs47"
+     );
+@}
+@end smallexample
 
 @anchor{GotoLabels}
 @subsubsection Goto Labels
diff --git a/gcc/doc/generic.texi b/gcc/doc/generic.texi
index a5c6d2174c3..5c39f453e7f 100644
--- a/gcc/doc/generic.texi
+++ b/gcc/doc/generic.texi
@@ -1037,6 +1037,7 @@ As this example indicates, the operands are zero-indexed.
 @tindex COMPLEX_CST
 @tindex VECTOR_CST
 @tindex VEC_DUPLICATE_CST
+@tindex VEC_SERIES_CST
 @tindex STRING_CST
 @tindex POLY_INT_CST
 @findex TREE_STRING_LENGTH
@@ -1099,6 +1100,16 @@ instead.  The scalar element value is given by
 @code{VEC_DUPLICATE_CST_ELT} and has the same restrictions as the
 element of a @code{VECTOR_CST}.
 
+@item VEC_SERIES_CST
+These nodes represent a vector constant in which element @var{i}
+has the value @samp{@var{base} + @var{i} * @var{step}}, for some
+constant @var{base} and @var{step}.  The value of @var{base} is
+given by @code{VEC_SERIES_CST_BASE} and the value of @var{step} is
+given by @code{VEC_SERIES_CST_STEP}.
+
+These nodes are restricted to integral types, in order to avoid
+specifying the rounding behavior for floating-point types.
+
 @item STRING_CST
 These nodes represent string-constants.  The @code{TREE_STRING_LENGTH}
 returns the length of the string, as an @code{int}.  The
@@ -1125,7 +1136,8 @@ coefficients, with the first coefficient being the constant term and
 the others being multipliers that are applied to the runtime parameters.
 
 @code{POLY_INT_CST_ELT (@var{x}, @var{i})} references coefficient number
-@var{i} of @code{POLY_INT_CST} node @var{x}.
+@var{i} of @code{POLY_INT_CST} node @var{x}.  Each coefficient is an
+@code{INTEGER_CST}.
 
 @end table
 
@@ -1736,7 +1748,9 @@ element is equal to that operand.
 This node represents a vector formed from a scalar base and step,
 given as the first and second operands respectively.  Element @var{i}
 of the result is equal to @samp{@var{base} + @var{i}*@var{step}}.
-The elements must not have floating-point type.
+
+This node is restricted to integral types, in order to avoid
+specifying the rounding behavior for floating-point types.
 
 @item VEC_LSHIFT_EXPR
 @itemx VEC_RSHIFT_EXPR
diff --git a/gcc/doc/invoke.texi b/gcc/doc/invoke.texi
index 629fbf56bb3..9bf1a17ebfb 100644
--- a/gcc/doc/invoke.texi
+++ b/gcc/doc/invoke.texi
@@ -5402,6 +5402,11 @@ pointers. This warning level may give a larger number of
 false positives and is deactivated by default.
 @end table
 
+@item -Wattribute-alias
+Warn about declarations using the @code{alias} and similar attributes whose
+target is incompatible with the type of the alias.  @xref{Function Attributes,
+,Declaring Attributes of Functions}.
+
 @item -Wbool-compare
 @opindex Wno-bool-compare
 @opindex Wbool-compare
diff --git a/gcc/doc/md.texi b/gcc/doc/md.texi
index f8454877735..0aa8ec8812c 100644
--- a/gcc/doc/md.texi
+++ b/gcc/doc/md.texi
@@ -4917,7 +4917,7 @@ This pattern is not allowed to @code{FAIL}.
 @cindex @code{vec_series@var{m}} instruction pattern
 @item @samp{vec_series@var{m}}
 Initialize vector output operand 0 so that element @var{i} is equal to
-operand 1 plus @var{i} times operand 2.  In other words, return a linear
+operand 1 plus @var{i} times operand 2.  In other words, create a linear
 series whose base value is operand 1 and whose step is operand 2.
 
 The vector output has mode @var{m} and the scalar inputs have the mode
diff --git a/gcc/doc/poly-int.texi b/gcc/doc/poly-int.texi
index 25a9c057d2e..851a86a431c 100644
--- a/gcc/doc/poly-int.texi
+++ b/gcc/doc/poly-int.texi
@@ -468,11 +468,12 @@ known size.
 As well as the core comparisons
 (@pxref{Comparison functions for @code{poly_int}}), @code{poly_int} provides
 utilities for various kinds of range check.  In each case the range
-is represented by a start position and a length rather than a start
+is represented by a start position and a size rather than a start
 position and an end position; this is because the former is used
 much more often than the latter in GCC@.  Also, the sizes can be
 -1 (or all ones for unsigned sizes) to indicate a range with a known
-start position but an unknown size.
+start position but an unknown size.  All other sizes must be nonnegative.
+A range of size 0 does not contain anything or overlap anything.
 
 @table @samp
 @item known_size_p (@var{size})
@@ -508,6 +509,19 @@ in the same type as @var{pos} and @var{size}.  The function returns false
 if adding @var{size} to @var{pos} makes conceptual sense but could overflow.
 @end table
 
+There is also a @code{poly_int} version of the @code{IN_RANGE_P} macro:
+
+@table @samp
+@item coeffs_in_range_p (@var{x}, @var{lower}, @var{upper})
+Return true if every coefficient of @var{x} is in the inclusive range
+[@var{lower}, @var{upper}].  This function can be useful when testing
+whether an operation would cause the values of coefficients to
+overflow.
+
+Note that the function does not indicate whether @var{x} itself is in the
+given range.  @var{x} can be either a constant or a @code{poly_int}.
+@end table
+
 @node Sorting @code{poly_int}s
 @subsection Sorting @code{poly_int}s
 
@@ -626,6 +640,8 @@ wi::sub (@var{p1}, @var{p2}, @var{sign}, &@var{overflow})
 wi::mul (@var{p1}, @var{c2})
 wi::mul (@var{c1}, @var{p1})
 wi::mul (@var{p1}, @var{c2}, @var{sign}, &@var{overflow})
+
+wi::lshift (@var{p1}, @var{c2})
 @end smallexample
 
 These routines just check whether overflow occurs on any individual
@@ -866,6 +882,19 @@ Forcibly convert each coefficient of @samp{poly_int<@var{N}, @var{T}>}
 out of range.  Return the result as a
 @samp{poly_int<@var{N}, @code{unsigned HOST_WIDE_INT}>}.
 
+@item wi::shwi (@var{value}, @var{precision})
+Return a @code{poly_int} with the same value as @var{value}, but with
+the coefficients converted from @code{HOST_WIDE_INT} to @code{wide_int}.
+@var{precision} specifies the precision of the @code{wide_int} cofficients;
+if this is wider than a @code{HOST_WIDE_INT}, the coefficients of
+@var{value} will be sign-extended to fit.
+
+@item wi::uhwi (@var{value}, @var{precision})
+Like @code{wi::shwi}, except that @var{value} has coefficients of
+type @code{unsigned HOST_WIDE_INT}.  If @var{precision} is wider than
+a @code{HOST_WIDE_INT}, the coefficients of @var{value} will be
+zero-extended to fit.
+
 @item wi::sext (@var{value}, @var{precision})
 Return a @code{poly_int} of the same type as @var{value}, sign-extending
 every coefficient from the low @var{precision} bits.  This in effect
diff --git a/gcc/doc/rtl.texi b/gcc/doc/rtl.texi
index bb594c76cf3..2156daf64a8 100644
--- a/gcc/doc/rtl.texi
+++ b/gcc/doc/rtl.texi
@@ -343,16 +343,13 @@ stored in the operand.  You would do this based on the expression code of
 the containing expression.  That is also how you would know how many
 operands there are.
 
-For example, if @var{x} is an @code{unspec} expression, you know that it has
-two operands which can be correctly accessed as @code{XVEC (@var{x}, 0)}
-and @code{XINT (@var{x}, 1)}.  If you did @code{XINT (@var{x}, 0)}, you
-would get the address of the vector operand but cast as an integer;
-that might occasionally be useful, but it would be cleaner to write
-@code{(int) XVEC (@var{x}, 0)}.  @code{XEXP (@var{x}, 1)} would also
-compile without error, and would return the second, integer operand cast as
-an expression pointer, which would probably result in a crash when
-accessed.  Nothing stops you from writing @code{XEXP (@var{x}, 28)} either,
-but this will access memory past the end of the expression with
+For example, if @var{x} is an @code{int_list} expression, you know that it has
+two operands which can be correctly accessed as @code{XINT (@var{x}, 0)}
+and @code{XEXP (@var{x}, 1)}.  Incorrect accesses like
+@code{XEXP (@var{x}, 0)} and @code{XINT (@var{x}, 1)} would compile,
+but would trigger an internal compiler error when rtl checking is enabled.
+Nothing stops you from writing @code{XEXP (@var{x}, 28)} either, but
+this will access memory past the end of the expression with
 unpredictable results.
 
 Access to operands which are vectors is more complicated.  You can use the
@@ -1726,6 +1723,15 @@ is accessed with the macro @code{CONST_FIXED_VALUE}.  The high part of
 data is accessed with @code{CONST_FIXED_VALUE_HIGH}; the low part is
 accessed with @code{CONST_FIXED_VALUE_LOW}.
 
+@findex const_poly_int
+@item (const_poly_int:@var{m} [@var{c0} @var{c1} @dots{}])
+Represents a @code{poly_int}-style polynomial integer with coefficients
+@var{c0}, @var{c1}, @dots{}.  The coefficients are @code{wide_int}-based
+integers rather than rtxes.  @code{CONST_POLY_INT_COEFFS} gives the
+values of individual coefficients (which is mostly only useful in
+low-level routines) and @code{const_poly_int_value} gives the full
+@code{poly_int} value.
+
 @findex const_vector
 @item (const_vector:@var{m} [@var{x0} @var{x1} @dots{}])
 Represents a vector constant.  The square brackets stand for the vector
@@ -1740,15 +1746,6 @@ Individual elements in a vector constant are accessed with the macro
 where @var{v} is the vector constant and @var{n} is the element
 desired.
 
-@findex const_param
-@item (const_param:@var{m} @var{n})
-Represents a target-specific parameter whose value is a runtime invariant
-but whose value is not known at compile time.  @var{n} is a 0-based identifier
-for the parameter.  If @var{n} + 1 is less than @code{NUM_POLY_INT_COEFFS},
-the parameter is a nonnegative integer that is multiplied by coefficient
-@var{n} + 1 in a @code{poly_int} value.  Other identifiers have no defined
-meaning; the target can use them for any purpose and with any mode.
-
 @findex const_string
 @item (const_string @var{str})
 Represents a constant string with value @var{str}.  Currently this is
@@ -1904,11 +1901,11 @@ If @code{FRAME_GROWS_DOWNWARD} is defined to a nonzero value, this points
 to immediately above the first variable on the stack.  Otherwise, it points
 to the first variable on the stack.
 
-@cindex @code{STARTING_FRAME_OFFSET} and virtual registers
+@cindex @code{TARGET_STARTING_FRAME_OFFSET} and virtual registers
 @cindex @code{FRAME_POINTER_REGNUM} and virtual registers
 @code{VIRTUAL_STACK_VARS_REGNUM} is replaced with the sum of the
 register given by @code{FRAME_POINTER_REGNUM} and the value
-@code{STARTING_FRAME_OFFSET}.
+@code{TARGET_STARTING_FRAME_OFFSET}.
 
 @findex VIRTUAL_STACK_DYNAMIC_REGNUM
 @item VIRTUAL_STACK_DYNAMIC_REGNUM
diff --git a/gcc/doc/tm.texi b/gcc/doc/tm.texi
index 81a0f304dcd..a6bdb8ff277 100644
--- a/gcc/doc/tm.texi
+++ b/gcc/doc/tm.texi
@@ -2976,22 +2976,19 @@ Define this macro if successive arguments to a function occupy decreasing
 addresses on the stack.
 @end defmac
 
-@defmac STARTING_FRAME_OFFSET
-Offset from the frame pointer to the first local variable slot to be allocated.
-
-If @code{FRAME_GROWS_DOWNWARD}, find the next slot's offset by
-subtracting the first slot's length from @code{STARTING_FRAME_OFFSET}.
-Otherwise, it is found by adding the length of the first slot to the
-value @code{STARTING_FRAME_OFFSET}.
-@c i'm not sure if the above is still correct.. had to change it to get
-@c rid of an overfull.  --mew 2feb93
-@end defmac
+@deftypefn {Target Hook} HOST_WIDE_INT TARGET_STARTING_FRAME_OFFSET (void)
+This hook returns the offset from the frame pointer to the first local
+variable slot to be allocated.  If @code{FRAME_GROWS_DOWNWARD}, it is the
+offset to @emph{end} of the first slot allocated, otherwise it is the
+offset to @emph{beginning} of the first slot allocated.  The default
+implementation returns 0.
+@end deftypefn
 
 @defmac STACK_ALIGNMENT_NEEDED
 Define to zero to disable final alignment of the stack during reload.
 The nonzero default for this macro is suitable for most ports.
 
-On ports where @code{STARTING_FRAME_OFFSET} is nonzero or where there
+On ports where @code{TARGET_STARTING_FRAME_OFFSET} is nonzero or where there
 is a register save block following the local block that doesn't require
 alignment to @code{STACK_BOUNDARY}, it may be beneficial to disable
 stack alignment and do it in the backend.
@@ -5856,8 +5853,8 @@ transformations even in absence of specialized @acronym{SIMD} hardware.
 @end deftypefn
 
 @deftypefn {Target Hook} void TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES (vector_sizes *@var{sizes})
-If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} isn't
-the only one that's worth considering, this hook should add all suitable
+If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not
+the only one that is worth considering, this hook should add all suitable
 vector sizes to @var{sizes}, in order of decreasing preference.  The first
 one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.
 
diff --git a/gcc/doc/tm.texi.in b/gcc/doc/tm.texi.in
index ae51e05f32e..b5e2771a831 100644
--- a/gcc/doc/tm.texi.in
+++ b/gcc/doc/tm.texi.in
@@ -2413,22 +2413,13 @@ Define this macro if successive arguments to a function occupy decreasing
 addresses on the stack.
 @end defmac
 
-@defmac STARTING_FRAME_OFFSET
-Offset from the frame pointer to the first local variable slot to be allocated.
-
-If @code{FRAME_GROWS_DOWNWARD}, find the next slot's offset by
-subtracting the first slot's length from @code{STARTING_FRAME_OFFSET}.
-Otherwise, it is found by adding the length of the first slot to the
-value @code{STARTING_FRAME_OFFSET}.
-@c i'm not sure if the above is still correct.. had to change it to get
-@c rid of an overfull.  --mew 2feb93
-@end defmac
+@hook TARGET_STARTING_FRAME_OFFSET
 
 @defmac STACK_ALIGNMENT_NEEDED
 Define to zero to disable final alignment of the stack during reload.
 The nonzero default for this macro is suitable for most ports.
 
-On ports where @code{STARTING_FRAME_OFFSET} is nonzero or where there
+On ports where @code{TARGET_STARTING_FRAME_OFFSET} is nonzero or where there
 is a register save block following the local block that doesn't require
 alignment to @code{STACK_BOUNDARY}, it may be beneficial to disable
 stack alignment and do it in the backend.
diff --git a/gcc/dse.c b/gcc/dse.c
index 7bbcfdaae38..90ec76a36f7 100644
--- a/gcc/dse.c
+++ b/gcc/dse.c
@@ -950,7 +950,7 @@ static void
 set_usage_bits (group_info *group, poly_int64 offset, poly_int64 width,
                 tree expr)
 {
-  /* Non-constant starts and ends act as global kills, so there's no point
+  /* Non-constant offsets and widths act as global kills, so there's no point
      trying to use them to derive global DSE candidates.  */
   HOST_WIDE_INT i, const_offset, const_width;
   bool expr_escapes = can_escape (expr);
@@ -1092,7 +1092,7 @@ const_or_frame_p (rtx x)
 static bool
 canon_address (rtx mem,
 	       int *group_id,
-	       HOST_WIDE_INT *offset,
+	       poly_int64 *offset,
 	       cselib_val **base)
 {
   machine_mode address_mode = get_address_mode (mem);
@@ -1159,12 +1159,7 @@ canon_address (rtx mem,
       if (GET_CODE (address) == CONST)
 	address = XEXP (address, 0);
 
-      if (GET_CODE (address) == PLUS
-	  && CONST_INT_P (XEXP (address, 1)))
-	{
-	  *offset = INTVAL (XEXP (address, 1));
-	  address = XEXP (address, 0);
-	}
+      address = strip_offset_and_add (address, offset);
 
       if (ADDR_SPACE_GENERIC_P (MEM_ADDR_SPACE (mem))
 	  && const_or_frame_p (address))
@@ -1172,8 +1167,11 @@ canon_address (rtx mem,
 	  group_info *group = get_group_info (address);
 
 	  if (dump_file && (dump_flags & TDF_DETAILS))
-	    fprintf (dump_file, "  gid=%d offset=%d \n",
-		     group->id, (int)*offset);
+	    {
+	      fprintf (dump_file, "  gid=%d offset=", group->id);
+	      print_dec (*offset, dump_file);
+	      fprintf (dump_file, "\n");
+	    }
 	  *base = NULL;
 	  *group_id = group->id;
 	  return true;
@@ -1190,8 +1188,12 @@ canon_address (rtx mem,
       return false;
     }
   if (dump_file && (dump_flags & TDF_DETAILS))
-    fprintf (dump_file, "  varying cselib base=%u:%u offset = %d\n",
-	     (*base)->uid, (*base)->hash, (int)*offset);
+    {
+      fprintf (dump_file, "  varying cselib base=%u:%u offset = ",
+	       (*base)->uid, (*base)->hash);
+      print_dec (*offset, dump_file);
+      fprintf (dump_file, "\n");
+    }
   return true;
 }
 
@@ -1294,6 +1296,9 @@ all_positions_needed_p (store_info *s_info, poly_int64 start,
       return s_info->positions_needed.large.count == 0;
     }
 
+  /* Otherwise, if START and WIDTH are non-constant, we're asking about
+     a non-constant region of a constant-sized store.  We can't say for
+     sure that all positions are needed.  */
   HOST_WIDE_INT const_start, const_width;
   if (!start.is_constant (&const_start)
       || !width.is_constant (&const_width))
@@ -1395,7 +1400,7 @@ record_store (rtx body, bb_info_t bb_info)
   if (MEM_VOLATILE_P (mem))
     insn_info->cannot_delete = true;
 
-  if (!canon_address (mem, &group_id, &offset.coeffs[0], &base))
+  if (!canon_address (mem, &group_id, &offset, &base))
     {
       clear_rhs_from_active_local_stores ();
       return 0;
@@ -1698,7 +1703,7 @@ find_shift_sequence (poly_int64 access_size,
 	 e.g. at -Os, even when no actual shift will be needed.  */
       if (store_info->const_rhs)
 	{
-	  poly_int64 byte = subreg_lowpart_offset (new_mode, store_mode);
+	  poly_uint64 byte = subreg_lowpart_offset (new_mode, store_mode);
 	  rtx ret = simplify_subreg (new_mode, store_info->const_rhs,
 				     store_mode, byte);
 	  if (ret && CONSTANT_P (ret))
@@ -2067,7 +2072,7 @@ check_mem_read_rtx (rtx *loc, bb_info_t bb_info)
   if (MEM_READONLY_P (mem))
     return;
 
-  if (!canon_address (mem, &group_id, &offset.coeffs[0], &base))
+  if (!canon_address (mem, &group_id, &offset, &base))
     {
       if (dump_file && (dump_flags & TDF_DETAILS))
 	fprintf (dump_file, " adding wild read, canon_address failure.\n");
diff --git a/gcc/dwarf2cfi.c b/gcc/dwarf2cfi.c
index e9a40f01d50..82e6571964b 100644
--- a/gcc/dwarf2cfi.c
+++ b/gcc/dwarf2cfi.c
@@ -261,10 +261,10 @@ struct init_one_dwarf_reg_state
    use for the size entry to initialize, and INIT_STATE is the communication
    datastructure conveying what we're doing to our caller.  */
 
-static void
-init_one_dwarf_reg_size (int regno, machine_mode regmode,
-			 rtx table, machine_mode slotmode,
-			 init_one_dwarf_reg_state *init_state)
+static
+void init_one_dwarf_reg_size (int regno, machine_mode regmode,
+			      rtx table, machine_mode slotmode,
+			      init_one_dwarf_reg_state *init_state)
 {
   const unsigned int dnum = DWARF_FRAME_REGNUM (regno);
   const unsigned int rnum = DWARF2_FRAME_REG_OUT (dnum, 1);
@@ -1135,21 +1135,11 @@ dwarf2out_frame_debug_def_cfa (rtx pat)
 {
   memset (cur_cfa, 0, sizeof (*cur_cfa));
 
-  poly_int64 offset;
-  if (GET_CODE (pat) == PLUS && poly_int_const_p (XEXP (pat, 1), &offset))
-    {
-      cur_cfa->offset = offset;
-      pat = XEXP (pat, 0);
-    }
+  pat = strip_offset (pat, &cur_cfa->offset);
   if (MEM_P (pat))
     {
       cur_cfa->indirect = 1;
-      pat = XEXP (pat, 0);
-      if (GET_CODE (pat) == PLUS && poly_int_const_p (XEXP (pat, 1), &offset))
-	{
-	  cur_cfa->base_offset = offset;
-	  pat = XEXP (pat, 0);
-	}
+      pat = strip_offset (XEXP (pat, 0), &cur_cfa->base_offset);
     }
   /* ??? If this fails, we could be calling into the _loc functions to
      define a full expression.  So far no port does that.  */
@@ -1678,14 +1668,14 @@ dwarf2out_frame_debug_expr (rtx expr)
 	    {
 	      /* Rule 2 */
 	      /* Adjusting SP.  */
-	      if (GET_CODE (XEXP (src, 1)) == REG)
+	      if (REG_P (XEXP (src, 1)))
 		{
 		  gcc_assert (dwf_regno (XEXP (src, 1))
 			      == cur_trace->cfa_temp.reg);
 		  offset = cur_trace->cfa_temp.offset;
 		}
-	      else
-		offset = rtx_to_poly_int64 (XEXP (src, 1));
+	      else if (!poly_int_rtx_p (XEXP (src, 1), &offset))
+		gcc_unreachable ();
 
 	      if (XEXP (src, 0) == hard_frame_pointer_rtx)
 		{
@@ -1728,7 +1718,7 @@ dwarf2out_frame_debug_expr (rtx expr)
 	      /* Rule 4 */
 	      if (REG_P (XEXP (src, 0))
 		  && dwf_regno (XEXP (src, 0)) == cur_cfa->reg
-		  && poly_int_const_p (XEXP (src, 1), &offset))
+		  && poly_int_rtx_p (XEXP (src, 1), &offset))
 		{
 		  /* Setting a temporary CFA register that will be copied
 		     into the FP later on.  */
@@ -1754,12 +1744,10 @@ dwarf2out_frame_debug_expr (rtx expr)
 		}
 
 	      /* Rule 9 */
-	      else if (GET_CODE (src) == LO_SUM)
-		{
-		  cur_trace->cfa_temp.reg = dwf_regno (dest);
-		  cur_trace->cfa_temp.offset
-		    = rtx_to_poly_int64 (XEXP (src, 1));
-		}
+	      else if (GET_CODE (src) == LO_SUM
+		       && poly_int_rtx_p (XEXP (src, 1),
+					  &cur_trace->cfa_temp.offset))
+		cur_trace->cfa_temp.reg = dwf_regno (dest);
 	      else
 		gcc_unreachable ();
 	    }
@@ -1767,8 +1755,9 @@ dwarf2out_frame_debug_expr (rtx expr)
 
 	  /* Rule 6 */
 	case CONST_INT:
+	case POLY_INT_CST:
 	  cur_trace->cfa_temp.reg = dwf_regno (dest);
-	  cur_trace->cfa_temp.offset = INTVAL (src);
+	  cur_trace->cfa_temp.offset = rtx_to_poly_int64 (src);
 	  break;
 
 	  /* Rule 7 */
@@ -1780,6 +1769,8 @@ dwarf2out_frame_debug_expr (rtx expr)
 	  cur_trace->cfa_temp.reg = dwf_regno (dest);
 	  if (!can_ior_p (cur_trace->cfa_temp.offset, INTVAL (XEXP (src, 1)),
 			  &cur_trace->cfa_temp.offset))
+	    /* The target shouldn't generate this kind of CFI note if we
+	       can't represent it.  */
 	    gcc_unreachable ();
 	  break;
 
@@ -2915,7 +2906,6 @@ initial_return_save (rtx rtl)
 {
   unsigned int reg = INVALID_REGNUM;
   poly_int64 offset = 0;
-  rtx base;
 
   switch (GET_CODE (rtl))
     {
@@ -2926,9 +2916,28 @@ initial_return_save (rtx rtl)
 
     case MEM:
       /* RA is on the stack.  */
-      base = strip_offset (XEXP (rtl, 0), &offset);
-      gcc_assert (REG_P (base));
-      gcc_assert (REGNO (base) == STACK_POINTER_REGNUM);
+      rtl = XEXP (rtl, 0);
+      switch (GET_CODE (rtl))
+	{
+	case REG:
+	  gcc_assert (REGNO (rtl) == STACK_POINTER_REGNUM);
+	  offset = 0;
+	  break;
+
+	case PLUS:
+	  gcc_assert (REGNO (XEXP (rtl, 0)) == STACK_POINTER_REGNUM);
+	  offset = rtx_to_poly_int64 (XEXP (rtl, 1));
+	  break;
+
+	case MINUS:
+	  gcc_assert (REGNO (XEXP (rtl, 0)) == STACK_POINTER_REGNUM);
+	  offset = -rtx_to_poly_int64 (XEXP (rtl, 1));
+	  break;
+
+	default:
+	  gcc_unreachable ();
+	}
+
       break;
 
     case PLUS:
diff --git a/gcc/dwarf2out.c b/gcc/dwarf2out.c
index 6a752d2101d..82b46169a18 100644
--- a/gcc/dwarf2out.c
+++ b/gcc/dwarf2out.c
@@ -13222,7 +13222,7 @@ int_shift_loc_descriptor (HOST_WIDE_INT i, int shift)
   return ret;
 }
 
-/* Return a location descriptor that designates a constant.  */
+/* Return a location descriptor that designates constant POLY_I.  */
 
 static dw_loc_descr_ref
 int_loc_descriptor (poly_int64 poly_i)
diff --git a/gcc/emit-rtl.c b/gcc/emit-rtl.c
index c0f2ea1d328..28e6dd85e0d 100644
--- a/gcc/emit-rtl.c
+++ b/gcc/emit-rtl.c
@@ -150,7 +150,7 @@ static GTY ((cache)) hash_table<const_wide_int_hasher> *const_wide_int_htab;
 
 struct const_poly_int_hasher : ggc_cache_ptr_hash<rtx_def>
 {
-  typedef poly_wide_int_ref compare_type;
+  typedef std::pair<machine_mode, poly_wide_int_ref> compare_type;
 
   static hashval_t hash (rtx x);
   static bool equal (rtx x, const compare_type &y);
@@ -266,29 +266,27 @@ const_wide_int_hasher::equal (rtx x, rtx y)
 }
 #endif
 
-/* Returns a hash code for X (which is a really a CONST_INT).  */
+/* Returns a hash code for CONST_POLY_INT X.  */
 
 hashval_t
 const_poly_int_hasher::hash (rtx x)
 {
   inchash::hash h;
+  h.add_int (GET_MODE (x));
   for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
     h.add_wide_int (CONST_POLY_INT_COEFFS (x)[i]);
   return h.end ();
 }
 
-/* Returns nonzero if the value represented by X (which is really a
-   CONST_INT) is the same as that given by Y (which is really a
-   HOST_WIDE_INT *).  */
+/* Returns nonzero if CONST_POLY_INT X is an rtx representation of Y.  */
 
 bool
 const_poly_int_hasher::equal (rtx x, const compare_type &y)
 {
-  if (CONST_POLY_INT_COEFFS (x).get_precision ()
-      != y.coeffs[0].get_precision ())
+  if (GET_MODE (x) != y.first)
     return false;
   for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    if (CONST_POLY_INT_COEFFS (x)[i] != y.coeffs[i])
+    if (CONST_POLY_INT_COEFFS (x)[i] != y.second.coeffs[i])
       return false;
   return true;
 }
@@ -773,15 +771,24 @@ immed_wide_int_const (const poly_wide_int_ref &c, machine_mode mode)
   gcc_assert (prec <= c.coeffs[0].get_precision ());
   poly_wide_int newc = poly_wide_int::from (c, prec, SIGNED);
 
+  /* See whether we already have an rtx for this constant.  */
   inchash::hash h;
+  h.add_int (mode);
   for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
     h.add_wide_int (newc.coeffs[i]);
-  rtx *slot = const_poly_int_htab->find_slot_with_hash (newc, h.end (),
-							INSERT);
+  const_poly_int_hasher::compare_type typed_value (mode, newc);
+  rtx *slot = const_poly_int_htab->find_slot_with_hash (typed_value,
+							h.end (), INSERT);
   rtx x = *slot;
   if (x)
     return x;
 
+  /* Create a new rtx.  There's a choice to be made here between installing
+     the actual mode of the rtx or leaving it as VOIDmode (for consistency
+     with CONST_INT).  In practice the handling of the codes is different
+     enough that we get no benefit from using VOIDmode, and various places
+     assume that VOIDmode implies CONST_INT.  Using the real mode seems like
+     the right long-term direction anyway.  */
   typedef trailing_wide_ints<NUM_POLY_INT_COEFFS> twi;
   size_t extra_size = twi::extra_size (prec);
   x = rtx_alloc_v (CONST_POLY_INT,
@@ -1015,7 +1022,7 @@ validate_subreg (machine_mode omode, machine_mode imode,
     {
       /* It is invalid for the target to pick a register size for a mode
 	 that isn't ordered wrt to the size of that mode.  */
-      poly_uint64 block_size = ordered_max (isize, regsize);
+      poly_uint64 block_size = ordered_min (isize, regsize);
       unsigned int start_reg;
       poly_uint64 offset_within_reg;
       if (!can_div_trunc_p (offset, block_size, &start_reg, &offset_within_reg)
@@ -1028,7 +1035,7 @@ validate_subreg (machine_mode omode, machine_mode imode,
 }
 
 rtx
-gen_rtx_SUBREG (machine_mode mode, rtx reg, poly_int64 offset)
+gen_rtx_SUBREG (machine_mode mode, rtx reg, poly_uint64 offset)
 {
   gcc_assert (validate_subreg (mode, GET_MODE (reg), reg, offset));
   return gen_rtx_raw_SUBREG (mode, reg, offset);
@@ -1145,7 +1152,7 @@ byte_lowpart_offset (machine_mode outer_mode,
 
 poly_int64
 subreg_memory_offset (machine_mode outer_mode, machine_mode inner_mode,
-		      poly_int64 offset)
+		      poly_uint64 offset)
 {
   if (paradoxical_subreg_p (outer_mode, inner_mode))
     {
@@ -1628,12 +1635,12 @@ gen_lowpart_common (machine_mode mode, rtx x)
 rtx
 gen_highpart (machine_mode mode, rtx x)
 {
-  poly_int64 msize = GET_MODE_SIZE (mode);
+  poly_uint64 msize = GET_MODE_SIZE (mode);
   rtx result;
 
   /* This case loses if X is a subreg.  To catch bugs early,
      complain if an invalid MODE is used even in other cases.  */
-  gcc_assert (must_le (msize, UNITS_PER_WORD)
+  gcc_assert (must_le (msize, (unsigned int) UNITS_PER_WORD)
 	      || must_eq (msize, GET_MODE_UNIT_SIZE (GET_MODE (x))));
 
   result = simplify_gen_subreg (mode, x, GET_MODE (x),
@@ -1669,8 +1676,8 @@ gen_highpart_mode (machine_mode outermode, machine_mode innermode, rtx exp)
 /* Return the SUBREG_BYTE for a lowpart subreg whose outer mode has
    OUTER_BYTES bytes and whose inner mode has INNER_BYTES bytes.  */
 
-poly_int64
-subreg_size_lowpart_offset (poly_int64 outer_bytes, poly_int64 inner_bytes)
+poly_uint64
+subreg_size_lowpart_offset (poly_uint64 outer_bytes, poly_uint64 inner_bytes)
 {
   gcc_checking_assert (ordered_p (outer_bytes, inner_bytes));
   if (may_gt (outer_bytes, inner_bytes))
@@ -1688,8 +1695,8 @@ subreg_size_lowpart_offset (poly_int64 outer_bytes, poly_int64 inner_bytes)
 /* Return the SUBREG_BYTE for a highpart subreg whose outer mode has
    OUTER_BYTES bytes and whose inner mode has INNER_BYTES bytes.  */
 
-poly_int64
-subreg_size_highpart_offset (poly_int64 outer_bytes, poly_int64 inner_bytes)
+poly_uint64
+subreg_size_highpart_offset (poly_uint64 outer_bytes, poly_uint64 inner_bytes)
 {
   gcc_assert (must_ge (inner_bytes, outer_bytes));
 
@@ -1719,15 +1726,6 @@ subreg_lowpart_p (const_rtx x)
 					 GET_MODE (SUBREG_REG (x))),
 		  SUBREG_BYTE (x));
 }
-
-/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE,
-   return the smaller of the two modes if they are different sizes,
-   otherwise return the outer mode.  */
-machine_mode
-narrower_subreg_mode (machine_mode outermode, machine_mode innermode)
-{
-  return paradoxical_subreg_p (outermode, innermode) ? innermode : outermode;
-}
 
 /* Return subword OFFSET of operand OP.
    The word number, OFFSET, is interpreted as the word number starting
@@ -1755,7 +1753,7 @@ narrower_subreg_mode (machine_mode outermode, machine_mode innermode)
  */
 
 rtx
-operand_subword (rtx op, poly_int64 offset, int validate_address,
+operand_subword (rtx op, poly_uint64 offset, int validate_address,
 		 machine_mode mode)
 {
   if (mode == VOIDmode)
@@ -1804,7 +1802,7 @@ operand_subword (rtx op, poly_int64 offset, int validate_address,
    MODE is the mode of OP, in case it is CONST_INT.  */
 
 rtx
-operand_subword_force (rtx op, poly_int64 offset, machine_mode mode)
+operand_subword_force (rtx op, poly_uint64 offset, machine_mode mode)
 {
   rtx result = operand_subword (op, offset, 1, mode);
 
@@ -1906,12 +1904,13 @@ get_mem_align_offset (rtx mem, unsigned int align)
 	  tree byte_offset = component_ref_field_offset (expr);
 	  tree bit_offset = DECL_FIELD_BIT_OFFSET (field);
 
+	  poly_uint64 suboffset;
 	  if (!byte_offset
-	      || !tree_fits_uhwi_p (byte_offset)
+	      || !poly_int_tree_p (byte_offset, &suboffset)
 	      || !tree_fits_uhwi_p (bit_offset))
 	    return -1;
 
-	  offset += tree_to_uhwi (byte_offset);
+	  offset += suboffset;
 	  offset += tree_to_uhwi (bit_offset) / BITS_PER_UNIT;
 
 	  if (inner == NULL_TREE)
@@ -2140,10 +2139,9 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
 	    {
 	      attrs.expr = t2;
 	      attrs.offset_known_p = false;
-	      if (tree_fits_uhwi_p (off_tree))
+	      if (poly_int_tree_p (off_tree, &attrs.offset))
 		{
 		  attrs.offset_known_p = true;
-		  attrs.offset = tree_to_uhwi (off_tree);
 		  apply_bitpos = bitpos;
 		}
 	    }
@@ -2171,7 +2169,7 @@ set_mem_attributes_minus_bitpos (rtx ref, tree t, int objectp,
     }
 
   poly_uint64 const_size;
-  if (poly_tree_p (new_size, &const_size))
+  if (poly_int_tree_p (new_size, &const_size))
     {
       attrs.size_known_p = true;
       attrs.size = const_size;
@@ -2620,7 +2618,6 @@ widen_memory_access (rtx memref, machine_mode mode, poly_int64 offset)
 
   while (attrs.expr)
     {
-      poly_uint64 decl_size;
       if (TREE_CODE (attrs.expr) == COMPONENT_REF)
 	{
 	  tree field = TREE_OPERAND (attrs.expr, 1);
@@ -2634,29 +2631,29 @@ widen_memory_access (rtx memref, machine_mode mode, poly_int64 offset)
 
 	  /* Is the field at least as large as the access?  If so, ok,
 	     otherwise strip back to the containing structure.  */
-	  poly_uint64 field_size;
-	  if (poly_tree_p (DECL_SIZE_UNIT (field), &field_size)
-	      && must_ge (field_size, size)
+	  if (poly_int_tree_p (DECL_SIZE_UNIT (field))
+	      && must_ge (wi::to_poly_offset (DECL_SIZE_UNIT (field)), size)
 	      && must_ge (attrs.offset, 0))
 	    break;
 
-	  poly_uint64 offset_val;
-	  if (!poly_tree_p (offset, &offset_val))
+	  poly_uint64 suboffset;
+	  if (!poly_int_tree_p (offset, &suboffset))
 	    {
 	      attrs.expr = NULL_TREE;
 	      break;
 	    }
 
 	  attrs.expr = TREE_OPERAND (attrs.expr, 0);
-	  attrs.offset += offset_val;
+	  attrs.offset += suboffset;
 	  attrs.offset += (tree_to_uhwi (DECL_FIELD_BIT_OFFSET (field))
 			   / BITS_PER_UNIT);
 	}
       /* Similarly for the decl.  */
       else if (DECL_P (attrs.expr)
 	       && DECL_SIZE_UNIT (attrs.expr)
-	       && poly_tree_p (DECL_SIZE_UNIT (attrs.expr), &decl_size)
-	       && must_ge (decl_size, size)
+	       && poly_int_tree_p (DECL_SIZE_UNIT (attrs.expr))
+	       && must_ge (wi::to_poly_offset (DECL_SIZE_UNIT (attrs.expr)),
+			   size)
 	       && must_ge (attrs.offset, 0))
 	break;
       else
@@ -2731,10 +2728,7 @@ set_mem_attrs_for_spill (rtx mem)
      with perhaps the plus missing for offset = 0.  */
   addr = XEXP (mem, 0);
   attrs.offset_known_p = true;
-  attrs.offset = 0;
-  if (GET_CODE (addr) == PLUS
-      && CONST_INT_P (XEXP (addr, 1)))
-    attrs.offset = INTVAL (XEXP (addr, 1));
+  strip_offset (addr, &attrs.offset);
 
   set_mem_attrs (mem, &attrs);
   MEM_NOTRAP_P (mem) = 1;
@@ -6643,5 +6637,6 @@ rtl_data::init_stack_alignment ()
   stack_alignment_estimated = 0;
   preferred_stack_boundary = STACK_BOUNDARY;
 }
+
 
 #include "gt-emit-rtl.h"
diff --git a/gcc/emit-rtl.h b/gcc/emit-rtl.h
index c537ee0f309..ac1f5ce1ede 100644
--- a/gcc/emit-rtl.h
+++ b/gcc/emit-rtl.h
@@ -369,7 +369,6 @@ extern void set_reg_attrs_for_parm (rtx, rtx);
 extern void set_reg_attrs_for_decl_rtl (tree t, rtx x);
 extern void adjust_reg_mode (rtx, machine_mode);
 extern int mem_expr_equal_p (const_tree, const_tree);
-extern scalar_int_mode get_shift_amount_mode (machine_mode);
 extern rtx gen_int_shift_amount (machine_mode, poly_int64);
 
 extern bool need_atomic_barrier_p (enum memmodel, bool);
diff --git a/gcc/expmed.c b/gcc/expmed.c
index 92e45194e8a..e99ff2e3dde 100644
--- a/gcc/expmed.c
+++ b/gcc/expmed.c
@@ -920,7 +920,7 @@ store_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
 	 is not allowed.
 
 	 The mode must be fixed-size, since insertions into variable-sized
-	 objects must be handled before calling this function.  */
+	 objects are meant to be handled before calling this function.  */
       fixed_size_mode value_mode = as_a <fixed_size_mode> (GET_MODE (value));
       if (value_mode == VOIDmode)
 	value_mode = smallest_int_mode_for_size (nwords * BITS_PER_WORD);
@@ -1861,8 +1861,9 @@ extract_integral_bit_field (rtx op0, opt_scalar_int_mode op0_mode,
       /* Indicate for flow that the entire target reg is being set.  */
       emit_clobber (target);
 
-      /* The mode must be fixed-size, since extractions from variable-sized
-	 objects must be handled before calling this function.  */
+      /* The mode must be fixed-size, since extract_bit_field_1 handles
+	 extractions from variable-sized objects before calling this
+	 function.  */
       unsigned int target_size
 	= GET_MODE_SIZE (GET_MODE (target)).to_constant ();
       last = get_last_insn ();
@@ -2050,9 +2051,9 @@ extract_bit_field (rtx str_rtx, poly_uint64 bitsize, poly_uint64 bitnum,
   machine_mode mode1;
 
   /* Handle -fstrict-volatile-bitfields in the cases where it applies.  */
-  if (may_ne (GET_MODE_BITSIZE (GET_MODE (str_rtx)), 0))
+  if (maybe_nonzero (GET_MODE_BITSIZE (GET_MODE (str_rtx))))
     mode1 = GET_MODE (str_rtx);
-  else if (target && may_ne (GET_MODE_BITSIZE (GET_MODE (target)), 0))
+  else if (target && maybe_nonzero (GET_MODE_BITSIZE (GET_MODE (target))))
     mode1 = GET_MODE (target);
   else
     mode1 = tmode;
@@ -2352,7 +2353,7 @@ extract_low_bits (machine_mode mode, machine_mode src_mode, rtx src)
       /* simplify_gen_subreg can't be used here, as if simplify_subreg
 	 fails, it will happily create (subreg (symbol_ref)) or similar
 	 invalid SUBREGs.  */
-      poly_int64 byte = subreg_lowpart_offset (mode, src_mode);
+      poly_uint64 byte = subreg_lowpart_offset (mode, src_mode);
       rtx ret = simplify_subreg (mode, src, src_mode, byte);
       if (ret)
 	return ret;
@@ -5277,10 +5278,6 @@ make_tree (tree type, rtx x)
 
       return t;
 
-    case CONST_POLY_INT:
-      /* FIXME */
-      gcc_unreachable ();
-
     case CONST_VECTOR:
       {
 	int units = CONST_VECTOR_NUNITS (x);
@@ -5375,6 +5372,9 @@ make_tree (tree type, rtx x)
       /* fall through.  */
 
     default:
+      if (CONST_POLY_INT_P (x))
+	return wide_int_to_tree (t, const_poly_int_value (x));
+
       t = build_decl (RTL_LOCATION (x), VAR_DECL, NULL_TREE, type);
 
       /* If TYPE is a POINTER_TYPE, we might need to convert X from
diff --git a/gcc/expr.c b/gcc/expr.c
index 03cb881f08a..8e290dedda7 100644
--- a/gcc/expr.c
+++ b/gcc/expr.c
@@ -680,15 +680,6 @@ convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp)
       return immed_wide_int_const (w, int_mode);
     }
 
-  if (CONST_POLY_INT_P (x)
-      && is_int_mode (mode, &int_mode))
-    {
-      poly_wide_int val = poly_wide_int::from (const_poly_int_value (x),
-					       GET_MODE_PRECISION (int_mode),
-					       unsignedp ? UNSIGNED : SIGNED);
-      return immed_wide_int_const (val, int_mode);
-    }
-
   /* We can do this with a gen_lowpart if both desired and current modes
      are integer, and this is either a constant integer, a register, or a
      non-volatile MEM. */
@@ -697,6 +688,7 @@ convert_modes (machine_mode mode, machine_mode oldmode, rtx x, int unsignedp)
       && is_int_mode (oldmode, &int_oldmode)
       && GET_MODE_PRECISION (int_mode) <= GET_MODE_PRECISION (int_oldmode)
       && ((MEM_P (x) && !MEM_VOLATILE_P (x) && direct_load[(int) int_mode])
+	  || CONST_POLY_INT_P (x)
           || (REG_P (x)
               && (!HARD_REGISTER_P (x)
 		  || targetm.hard_regno_mode_ok (REGNO (x), int_mode))
@@ -2205,10 +2197,10 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type,
 	tmps[i] = src;
       else if (GET_CODE (src) == CONCAT)
 	{
-	  poly_uint64 slen = GET_MODE_SIZE (GET_MODE (src));
-	  poly_uint64 slen0 = GET_MODE_SIZE (GET_MODE (XEXP (src, 0)));
+	  poly_int64 slen = GET_MODE_SIZE (GET_MODE (src));
+	  poly_int64 slen0 = GET_MODE_SIZE (GET_MODE (XEXP (src, 0)));
 	  unsigned int elt;
-	  poly_uint64 subpos;
+	  poly_int64 subpos;
 
 	  if (can_div_trunc_p (bytepos, slen0, &elt, &subpos)
 	      && must_le (subpos + bytelen, slen0))
@@ -2245,7 +2237,7 @@ emit_group_load_1 (rtx *tmps, rtx dst, rtx orig_src, tree type,
       else if (VECTOR_MODE_P (GET_MODE (dst))
 	       && REG_P (src))
 	{
-	  poly_uint16 slen = GET_MODE_SIZE (GET_MODE (src));
+	  poly_uint64 slen = GET_MODE_SIZE (GET_MODE (src));
 	  rtx mem;
 
 	  mem = assign_stack_temp (GET_MODE (src), slen);
@@ -2975,7 +2967,7 @@ clear_storage_hints (rtx object, rtx size, enum block_op_methods method,
      just move a zero.  Otherwise, do this a piece at a time.  */
   poly_int64 size_val;
   if (mode != BLKmode
-      && poly_int_const_p (size, &size_val)
+      && poly_int_rtx_p (size, &size_val)
       && must_eq (size_val, GET_MODE_SIZE (mode)))
     {
       rtx zero = CONST0_RTX (mode);
@@ -3359,17 +3351,16 @@ emit_move_resolve_push (machine_mode mode, rtx x)
 
   poly_int64 adjust = GET_MODE_SIZE (mode);
 #ifdef PUSH_ROUNDING
-  adjust = PUSH_ROUNDING (MACRO_INT (adjust));
+  adjust = PUSH_ROUNDING (adjust);
 #endif
   if (code == PRE_DEC || code == POST_DEC)
     adjust = -adjust;
   else if (code == PRE_MODIFY || code == POST_MODIFY)
     {
       rtx expr = XEXP (XEXP (x, 0), 1);
-      poly_int64 val;
 
       gcc_assert (GET_CODE (expr) == PLUS || GET_CODE (expr) == MINUS);
-      val = rtx_to_poly_int64 (XEXP (expr, 1));
+      poly_int64 val = rtx_to_poly_int64 (XEXP (expr, 1));
       if (GET_CODE (expr) == MINUS)
 	val = -val;
       gcc_assert (must_eq (adjust, val) || must_eq (adjust, -val));
@@ -3414,11 +3405,11 @@ emit_move_complex_push (machine_mode mode, rtx x, rtx y)
   bool imag_first;
 
 #ifdef PUSH_ROUNDING
-  unsigned int submodesize = GET_MODE_SIZE (submode);
+  poly_int64 submodesize = GET_MODE_SIZE (submode);
 
   /* In case we output to the stack, but the size is smaller than the
      machine can push exactly, we need to use move instructions.  */
-  if (PUSH_ROUNDING (submodesize) != submodesize)
+  if (may_ne (PUSH_ROUNDING (submodesize), submodesize))
     {
       x = emit_move_resolve_push (mode, x);
       return emit_move_insn (x, y);
@@ -3914,7 +3905,7 @@ push_block (rtx size, poly_int64 extra, int below)
   else
     {
       poly_int64 csize;
-      if (poly_int_const_p (size, &csize))
+      if (poly_int_rtx_p (size, &csize))
 	temp = plus_constant (Pmode, virtual_outgoing_args_rtx,
 			      -csize - (below ? 0 : extra));
       else if (maybe_nonzero (extra) && !below)
@@ -4129,11 +4120,11 @@ static void
 emit_single_push_insn_1 (machine_mode mode, rtx x, tree type)
 {
   rtx dest_addr;
-  poly_int64 rounded_size = PUSH_ROUNDING (MACRO_INT (GET_MODE_SIZE (mode)));
+  poly_int64 rounded_size = PUSH_ROUNDING (GET_MODE_SIZE (mode));
   rtx dest;
   enum insn_code icode;
 
-  stack_pointer_delta += PUSH_ROUNDING (MACRO_INT (GET_MODE_SIZE (mode)));
+  stack_pointer_delta += PUSH_ROUNDING (GET_MODE_SIZE (mode));
   /* If there is push pattern, use it.  Otherwise try old way of throwing
      MEM representing push operation to move expander.  */
   icode = optab_handler (push_optab, mode);
@@ -4365,9 +4356,9 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
 	     and such small pushes do rounding that causes trouble.  */
 	  && ((!targetm.slow_unaligned_access (word_mode, align))
 	      || align >= BIGGEST_ALIGNMENT
-	      || (PUSH_ROUNDING (align / BITS_PER_UNIT)
-		  == (align / BITS_PER_UNIT)))
-	  && (HOST_WIDE_INT) PUSH_ROUNDING (INTVAL (size)) == INTVAL (size))
+	      || must_eq (PUSH_ROUNDING (align / BITS_PER_UNIT),
+			  align / BITS_PER_UNIT))
+	  && must_eq (PUSH_ROUNDING (INTVAL (size)), INTVAL (size)))
 	{
 	  /* Push padding now if padding above and stack grows down,
 	     or if padding below and stack grows up.
@@ -4408,7 +4399,7 @@ emit_push_insn (rtx x, machine_mode mode, tree type, rtx size,
 	      temp = push_block (size, extra, where_pad == PAD_DOWNWARD);
 	      extra = 0;
 	    }
-	  else if (poly_int_const_p (args_so_far, &offset))
+	  else if (poly_int_rtx_p (args_so_far, &offset))
 	    temp = memory_address (BLKmode,
 				   plus_constant (Pmode, args_addr,
 						  offset + skip));
@@ -4829,8 +4820,8 @@ optimize_bitfield_assignment_op (poly_uint64 pbitsize,
    *BITSTART and *BITEND.  */
 
 void
-get_bit_range (poly_uint64 *bitstart, poly_uint64 *bitend, tree exp,
-	       poly_int64 *bitpos, tree *offset)
+get_bit_range (poly_uint64_pod *bitstart, poly_uint64_pod *bitend, tree exp,
+	       poly_int64_pod *bitpos, tree *offset)
 {
   poly_int64 bitoffset;
   tree field, repr;
@@ -4871,8 +4862,8 @@ get_bit_range (poly_uint64 *bitstart, poly_uint64 *bitend, tree exp,
      repr are the same by construction if they are not constants,
      see finish_bitfield_layout.  */
   poly_uint64 field_offset, repr_offset;
-  if (poly_tree_p (DECL_FIELD_OFFSET (field), &field_offset)
-      && poly_tree_p (DECL_FIELD_OFFSET (repr), &repr_offset))
+  if (poly_int_tree_p (DECL_FIELD_OFFSET (field), &field_offset)
+      && poly_int_tree_p (DECL_FIELD_OFFSET (repr), &repr_offset))
     bitoffset = (field_offset - repr_offset) * BITS_PER_UNIT;
   else
     bitoffset = 0;
@@ -5134,12 +5125,12 @@ expand_assignment (tree to, tree from, bool nontemporal)
 	  unsigned short inner_bitsize = GET_MODE_UNIT_BITSIZE (to_mode);
 	  if (COMPLEX_MODE_P (TYPE_MODE (TREE_TYPE (from)))
 	      && known_zero (bitpos)
-	      && must_eq (bitsize, inner_bitsize * 2))
+	      && must_eq (bitsize, mode_bitsize))
 	    result = store_expr (from, to_rtx, false, nontemporal, reversep);
 	  else if (must_eq (bitsize, inner_bitsize)
 		   && (known_zero (bitpos)
 		       || must_eq (bitpos, inner_bitsize)))
-	    result = store_expr (from, XEXP (to_rtx, may_ne (bitpos, 0)),
+	    result = store_expr (from, XEXP (to_rtx, maybe_nonzero (bitpos)),
 				 false, nontemporal, reversep);
 	  else if (must_le (bitpos + bitsize, inner_bitsize))
 	    result = store_field (XEXP (to_rtx, 0), bitsize, bitpos,
@@ -6122,12 +6113,12 @@ store_constructor_field (rtx target, poly_uint64 bitsize, poly_int64 bitpos,
 			 alias_set_type alias_set, bool reverse)
 {
   poly_int64 bytepos;
-  poly_int64 bytesize;
+  poly_uint64 bytesize;
   if (TREE_CODE (exp) == CONSTRUCTOR
       /* We can only call store_constructor recursively if the size and
 	 bit position are on a byte boundary.  */
       && multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
-      && known_nonzero (bitsize)
+      && maybe_nonzero (bitsize)
       && multiple_p (bitsize, BITS_PER_UNIT, &bytesize)
       /* If we have a nonzero bitpos for a register target, then we just
 	 let store_field do the bitfield handling.  This is unlikely to
@@ -6191,8 +6182,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 {
   tree type = TREE_TYPE (exp);
   HOST_WIDE_INT exp_size = int_size_in_bytes (type);
-  poly_int64 bitregion_end
-    = maybe_nonzero (size) ? size * BITS_PER_UNIT - 1 : 0;
+  poly_int64 bitregion_end = must_gt (size, 0) ? size * BITS_PER_UNIT - 1 : 0;
 
   switch (TREE_CODE (type))
     {
@@ -6236,8 +6226,9 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	   the whole structure first.  Don't do this if TARGET is a
 	   register whose mode size isn't equal to SIZE since
 	   clear_storage can't handle this case.  */
-	else if ((((int) CONSTRUCTOR_NELTS (exp) != fields_length (type))
-		  || mostly_zeros_p (exp))
+	else if (known_size_p (size)
+		 && (((int) CONSTRUCTOR_NELTS (exp) != fields_length (type))
+		     || mostly_zeros_p (exp))
 		 && (!REG_P (target)
 		     || must_eq (GET_MODE_SIZE (GET_MODE (target)), size)))
 	  {
@@ -6423,7 +6414,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	      need_to_clear = 1;
 	  }
 
-	if (need_to_clear && maybe_nonzero (size))
+	if (need_to_clear && may_gt (size, 0))
 	  {
 	    if (REG_P (target))
 	      emit_move_insn (target, CONST0_RTX (GET_MODE (target)));
@@ -6453,7 +6444,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 	    mode = TYPE_MODE (elttype);
 	    if (mode != BLKmode)
 	      bitsize = GET_MODE_BITSIZE (mode);
-	    else if (!poly_tree_p (TYPE_SIZE (elttype), &bitsize))
+	    else if (!poly_int_tree_p (TYPE_SIZE (elttype), &bitsize))
 	      bitsize = -1;
 
 	    if (index != NULL_TREE && TREE_CODE (index) == RANGE_EXPR)
@@ -6709,7 +6700,7 @@ store_constructor (tree exp, rtx target, int cleared, poly_int64 size,
 			     || 4 * zero_count >= 3 * count);
 	  }
 
-	if (need_to_clear && maybe_nonzero (size) && !vector)
+	if (need_to_clear && may_gt (size, 0) && !vector)
 	  {
 	    if (REG_P (target))
 	      emit_move_insn (target, CONST0_RTX (mode));
@@ -6812,8 +6803,11 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
     return const0_rtx;
 
   /* If we have nothing to store, do nothing unless the expression has
-     side-effects.  */
-  if (known_zero (bitsize))
+     side-effects.  Don't do that for zero sized addressable lhs of
+     calls.  */
+  if (known_zero (bitsize)
+      && (!TREE_ADDRESSABLE (TREE_TYPE (exp))
+	  || TREE_CODE (exp) != CALL_EXPR))
     return expand_expr (exp, const0_rtx, VOIDmode, EXPAND_NORMAL);
 
   if (GET_CODE (target) == CONCAT)
@@ -6828,7 +6822,7 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
      is a bit field, we cannot use addressing to access it.
      Use bit-field techniques or SUBREG to store in it.  */
 
-  poly_uint64 type_bitsize;
+  poly_int64 decl_bitsize;
   if (mode == VOIDmode
       || (mode != BLKmode && ! direct_store[(int) mode]
 	  && GET_MODE_CLASS (mode) != MODE_COMPLEX_INT
@@ -6849,8 +6843,8 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
 	 RHS isn't the same size as the bitfield, we must use bitfield
 	 operations.  */
       || (known_size_p (bitsize)
-	  && poly_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
-	  && may_ne (wi::to_poly_wide (TYPE_SIZE (TREE_TYPE (exp))), bitsize)
+	  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
+	  && may_ne (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp))), bitsize)
 	  /* Except for initialization of full bytes from a CONSTRUCTOR, which
 	     we will handle specially below.  */
 	  && !(TREE_CODE (exp) == CONSTRUCTOR
@@ -6867,9 +6861,9 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
 	      || TREE_CODE (exp) != COMPONENT_REF
 	      || !multiple_p (bitsize, BITS_PER_UNIT)
 	      || !multiple_p (bitpos, BITS_PER_UNIT)
-	      || !poly_tree_p (DECL_SIZE (TREE_OPERAND (exp, 1)))
-	      || may_ne (wi::to_poly_wide (DECL_SIZE (TREE_OPERAND (exp, 1))),
-			 bitsize)))
+	      || !poly_int_tree_p (DECL_SIZE (TREE_OPERAND (exp, 1)),
+				   &decl_bitsize)
+	      || may_ne (decl_bitsize, bitsize)))
       /* If we are expanding a MEM_REF of a non-BLKmode non-addressable
          decl we must use bitfield operations.  */
       || (known_size_p (bitsize)
@@ -6906,8 +6900,9 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
 
       temp = expand_normal (exp);
 
-      /* We don't support variable-sized BLKmode bitfields, which would
-	 imply variable-sized scalar integers.  */
+      /* We don't support variable-sized BLKmode bitfields, since our
+	 handling of BLKmode is bound up with the ability to break
+	 things into words.  */
       gcc_assert (mode != BLKmode || bitsize.is_constant ());
 
       /* Handle calls that return values in multiple non-contiguous locations.
@@ -7050,8 +7045,8 @@ store_field (rtx target, poly_int64 bitsize, poly_int64 bitpos,
    this case, but the address of the object can be found.  */
 
 tree
-get_inner_reference (tree exp, poly_int64 *pbitsize,
-		     poly_int64 *pbitpos, tree *poffset,
+get_inner_reference (tree exp, poly_int64_pod *pbitsize,
+		     poly_int64_pod *pbitpos, tree *poffset,
 		     machine_mode *pmode, int *punsignedp,
 		     int *preversep, int *pvolatilep)
 {
@@ -7139,7 +7134,7 @@ get_inner_reference (tree exp, poly_int64 *pbitsize,
 	      break;
 
 	    offset = size_binop (PLUS_EXPR, offset, this_offset);
-	    bit_offset += wi::to_offset (DECL_FIELD_BIT_OFFSET (field));
+	    bit_offset += wi::to_poly_offset (DECL_FIELD_BIT_OFFSET (field));
 
 	    /* ??? Right now we don't do anything with DECL_OFFSET_ALIGN.  */
 	  }
@@ -7207,7 +7202,7 @@ get_inner_reference (tree exp, poly_int64 *pbitsize,
   /* If OFFSET is constant, see if we can return the whole thing as a
      constant bit position.  Make sure to handle overflow during
      this conversion.  */
-  if (poly_tree_p (offset))
+  if (poly_int_tree_p (offset))
     {
       poly_offset_int tem = wi::sext (wi::to_poly_offset (offset),
 				      TYPE_PRECISION (sizetype));
@@ -9199,8 +9194,8 @@ expand_expr_real_2 (sepops ops, rtx target, machine_mode tmode,
 			>= GET_MODE_BITSIZE (word_mode)))
 		  {
 		    rtx_insn *seq, *seq_old;
-		    poly_int64 high_off = subreg_highpart_offset (word_mode,
-								  int_mode);
+		    poly_uint64 high_off = subreg_highpart_offset (word_mode,
+								   int_mode);
 		    bool extend_unsigned
 		      = TYPE_UNSIGNED (TREE_TYPE (gimple_assign_rhs1 (def)));
 		    rtx low = lowpart_subreg (word_mode, op0, int_mode);
@@ -10249,7 +10244,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    poly_uint64 type_size;
 	    if (known_zero (offset)
 	        && !reverse
-		&& poly_tree_p (TYPE_SIZE (type), &type_size)
+		&& poly_int_tree_p (TYPE_SIZE (type), &type_size)
 		&& must_eq (GET_MODE_BITSIZE (DECL_MODE (base)), type_size))
 	      return expand_expr (build1 (VIEW_CONVERT_EXPR, type, base),
 				  target, tmode, modifier);
@@ -10623,7 +10618,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    if (known_zero (bitpos)
 		&& must_eq (bitsize,
 			    GET_MODE_BITSIZE (GET_MODE (XEXP (op0, 0))))
-		&& must_ne (bitsize, 0))
+		&& maybe_nonzero (bitsize))
 	      {
 		op0 = XEXP (op0, 0);
 		mode2 = GET_MODE (op0);
@@ -10632,8 +10627,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 			      GET_MODE_BITSIZE (GET_MODE (XEXP (op0, 0))))
 		     && must_eq (bitsize,
 				 GET_MODE_BITSIZE (GET_MODE (XEXP (op0, 1))))
-		     && must_ne (bitpos, 0)
-		     && must_ne (bitsize, 0))
+		     && maybe_nonzero (bitpos)
+		     && maybe_nonzero (bitsize))
 	      {
 		op0 = XEXP (op0, 1);
 		bitpos = 0;
@@ -10688,7 +10683,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 
 	    /* See the comment in expand_assignment for the rationale.  */
 	    if (mode1 != VOIDmode
-		&& may_ne (bitpos, 0)
+		&& maybe_nonzero (bitpos)
 		&& may_gt (bitsize, 0)
 		&& multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
 		&& multiple_p (bitpos, bitsize)
@@ -10706,7 +10701,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	/* If OFFSET is making OP0 more aligned than BIGGEST_ALIGNMENT,
 	   record its alignment as BIGGEST_ALIGNMENT.  */
 	if (MEM_P (op0)
-	    && must_eq (bitpos, 0)
+	    && known_zero (bitpos)
 	    && offset != 0
 	    && is_aligning_offset (offset, tem))
 	  set_mem_align (op0, BIGGEST_ALIGNMENT);
@@ -10761,8 +10756,8 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	       we must use bitfield operations.  */
 	    || (known_size_p (bitsize)
 		&& TYPE_SIZE (TREE_TYPE (exp))
-		&& poly_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
-		&& may_ne (wi::to_poly_wide (TYPE_SIZE (TREE_TYPE (exp))),
+		&& poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
+		&& may_ne (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp))),
 			   bitsize)))
 	  {
 	    machine_mode ext_mode = mode;
@@ -10957,7 +10952,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
       /* If we are converting to BLKmode, try to avoid an intermediate
 	 temporary by fetching an inner memory reference.  */
       if (mode == BLKmode
-	  && poly_tree_p (TYPE_SIZE (type))
+	  && poly_int_tree_p (TYPE_SIZE (type))
 	  && TYPE_MODE (TREE_TYPE (treeop0)) != BLKmode
 	  && handled_component_p (treeop0))
       {
@@ -10975,7 +10970,7 @@ expand_expr_real_1 (tree exp, rtx target, machine_mode tmode,
 	    && multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
 	    && !reversep
 	    && known_size_p (bitsize)
-	    && must_eq (wi::to_poly_wide (TYPE_SIZE (type)), bitsize))
+	    && must_eq (wi::to_poly_offset (TYPE_SIZE (type)), bitsize))
 	  {
 	    /* See the normal_inner_ref case for the rationale.  */
 	    orig_op0
diff --git a/gcc/expr.h b/gcc/expr.h
index 4aa3220e476..105c30e6551 100644
--- a/gcc/expr.h
+++ b/gcc/expr.h
@@ -240,8 +240,8 @@ extern bool emit_push_insn (rtx, machine_mode, tree, rtx, unsigned int,
 			    int, rtx, poly_int64, rtx, rtx, int, rtx, bool);
 
 /* Extract the accessible bit-range from a COMPONENT_REF.  */
-extern void get_bit_range (poly_uint64 *, poly_uint64 *, tree, poly_int64 *,
-			   tree *);
+extern void get_bit_range (poly_uint64_pod *, poly_uint64_pod *, tree,
+			   poly_int64_pod *, tree *);
 
 /* Expand an assignment that stores the value of FROM into TO.  */
 extern void expand_assignment (tree, tree, bool);
diff --git a/gcc/fold-const.c b/gcc/fold-const.c
index 5d35a104532..72e4f2d4c96 100644
--- a/gcc/fold-const.c
+++ b/gcc/fold-const.c
@@ -984,13 +984,10 @@ int_binop_types_match_p (enum tree_code code, const_tree type1, const_tree type2
 	 && TYPE_MODE (type1) == TYPE_MODE (type2);
 }
 
-
-/* Combine two integer constants PARG1 and PARG2 under operation CODE
-   to produce a new constant.  Return NULL_TREE if we don't know how
-   to evaluate CODE at compile-time.  */
+/* Subroutine of int_const_binop_1 that handles two INTEGER_CSTs.  */
 
 static tree
-int_const_binop_1 (enum tree_code code, const_tree parg1, const_tree parg2,
+int_const_binop_2 (enum tree_code code, const_tree parg1, const_tree parg2,
 		   int overflowable)
 {
   wide_int res;
@@ -1138,58 +1135,84 @@ int_const_binop_1 (enum tree_code code, const_tree parg1, const_tree parg2,
   return t;
 }
 
-tree
-int_const_binop (enum tree_code code, const_tree arg1, const_tree arg2)
+/* Combine two integer constants PARG1 and PARG2 under operation CODE
+   to produce a new constant.  Return NULL_TREE if we don't know how
+   to evaluate CODE at compile-time.  */
+
+static tree
+int_const_binop_1 (enum tree_code code, const_tree arg1, const_tree arg2,
+		   int overflowable)
 {
   if (TREE_CODE (arg1) == INTEGER_CST && TREE_CODE (arg2) == INTEGER_CST)
-    return int_const_binop_1 (code, arg1, arg2, 1);
+    return int_const_binop_2 (code, arg1, arg2, overflowable);
 
   gcc_assert (NUM_POLY_INT_COEFFS != 1);
 
-  if (poly_tree_p (arg1) && poly_tree_p (arg2))
+  if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
     {
       poly_wide_int res;
       bool overflow;
       tree type = TREE_TYPE (arg1);
+      signop sign = TYPE_SIGN (type);
       switch (code)
 	{
 	case PLUS_EXPR:
 	  res = wi::add (wi::to_poly_wide (arg1),
-			 wi::to_poly_wide (arg2),
-			 TYPE_SIGN (type), &overflow);
-	  if (!overflow)
-	    return wide_int_to_tree (type, res);
+			 wi::to_poly_wide (arg2), sign, &overflow);
 	  break;
 
 	case MINUS_EXPR:
 	  res = wi::sub (wi::to_poly_wide (arg1),
-			 wi::to_poly_wide (arg2),
-			 TYPE_SIGN (type), &overflow);
-	  if (!overflow)
-	    return wide_int_to_tree (type, res);
+			 wi::to_poly_wide (arg2), sign, &overflow);
 	  break;
 
 	case MULT_EXPR:
 	  if (TREE_CODE (arg2) == INTEGER_CST)
-	    res = wi::mul (wi::to_poly_wide (arg1), wi::to_wide (arg2),
-			   TYPE_SIGN (type), &overflow);
+	    res = wi::mul (wi::to_poly_wide (arg1),
+			   wi::to_wide (arg2), sign, &overflow);
 	  else if (TREE_CODE (arg1) == INTEGER_CST)
-	    res = wi::mul (wi::to_poly_wide (arg2), wi::to_wide (arg1),
-			   TYPE_SIGN (type), &overflow);
+	    res = wi::mul (wi::to_poly_wide (arg2),
+			   wi::to_wide (arg1), sign, &overflow);
 	  else
-	    break;
-	  if (!overflow)
-	    return wide_int_to_tree (type, res);
+	    return NULL_TREE;
 	  break;
 
-	default:
+	case LSHIFT_EXPR:
+	  if (TREE_CODE (arg2) == INTEGER_CST)
+	    res = wi::to_poly_wide (arg1) << wi::to_wide (arg2);
+	  else
+	    return NULL_TREE;
 	  break;
+
+	case BIT_IOR_EXPR:
+	  if (TREE_CODE (arg2) != INTEGER_CST
+	      || !can_ior_p (wi::to_poly_wide (arg1), wi::to_wide (arg2),
+			     &res))
+	    return NULL_TREE;
+	  break;
+
+	default:
+	  return NULL_TREE;
 	}
+      return force_fit_type (type, res, overflowable,
+			     (((sign == SIGNED || overflowable == -1)
+			       && overflow)
+			      | TREE_OVERFLOW (arg1) | TREE_OVERFLOW (arg2)));
     }
 
   return NULL_TREE;
 }
 
+tree
+int_const_binop (enum tree_code code, const_tree arg1, const_tree arg2)
+{
+  return int_const_binop_1 (code, arg1, arg2, 1);
+}
+
+/* Return true if EXP is a VEC_DUPLICATE_CST or a VEC_SERIES_CST,
+   and if so express it as a linear series in *BASE_OUT and *STEP_OUT.
+   The step will be zero for VEC_DUPLICATE_CST.  */
+
 static bool
 vec_series_equivalent_p (const_tree exp, tree *base_out, tree *step_out)
 {
@@ -1223,7 +1246,7 @@ const_binop (enum tree_code code, tree arg1, tree arg2)
   STRIP_NOPS (arg1);
   STRIP_NOPS (arg2);
 
-  if (poly_tree_p (arg1) && poly_tree_p (arg2))
+  if (poly_int_tree_p (arg1) && poly_int_tree_p (arg2))
     {
       if (code == POINTER_PLUS_EXPR)
 	return int_const_binop (PLUS_EXPR,
@@ -1788,10 +1811,7 @@ const_unop (enum tree_code code, tree type, tree arg0)
       if (TREE_CODE (arg0) == INTEGER_CST)
 	return fold_not_const (arg0, type);
       else if (POLY_INT_CST_P (arg0))
-	{
-	  poly_wide_int res = -poly_int_cst_value (arg0) - 1;
-	  return wide_int_to_tree (type, res);
-	}
+	return wide_int_to_tree (type, -poly_int_cst_value (arg0));
       /* Perform BIT_NOT_EXPR on each element individually.  */
       else if (TREE_CODE (arg0) == VECTOR_CST)
 	{
@@ -1945,8 +1965,8 @@ size_binop_loc (location_t loc, enum tree_code code, tree arg0, tree arg1)
   gcc_assert (int_binop_types_match_p (code, TREE_TYPE (arg0),
                                        TREE_TYPE (arg1)));
 
-  /* Handle the special case of two integer constants faster.  */
-  if (TREE_CODE (arg0) == INTEGER_CST && TREE_CODE (arg1) == INTEGER_CST)
+  /* Handle the special case of two poly_int constants faster.  */
+  if (poly_int_tree_p (arg0) && poly_int_tree_p (arg1))
     {
       /* And some specific cases even faster than that.  */
       if (code == PLUS_EXPR)
@@ -1970,7 +1990,9 @@ size_binop_loc (location_t loc, enum tree_code code, tree arg0, tree arg1)
       /* Handle general case of two integer constants.  For sizetype
          constant calculations we always want to know about overflow,
 	 even in the unsigned case.  */
-      return int_const_binop_1 (code, arg0, arg1, -1);
+      tree res = int_const_binop_1 (code, arg0, arg1, -1);
+      if (res != NULL_TREE)
+	return res;
     }
 
   return fold_build2_loc (loc, code, type, arg0, arg1);
@@ -2294,18 +2316,19 @@ fold_convert_const_fixed_from_real (tree type, const_tree arg1)
 static tree
 fold_convert_const (enum tree_code code, tree type, tree arg1)
 {
-  if (TREE_TYPE (arg1) == type)
+  tree arg_type = TREE_TYPE (arg1);
+  if (arg_type == type)
     return arg1;
 
   /* We can't widen types, since the runtime value could overflow the
      original type before being extended to the new type.  */
   if (POLY_INT_CST_P (arg1)
       && (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type))
-      && TYPE_PRECISION (type) <= TYPE_PRECISION (TREE_TYPE (arg1)))
+      && TYPE_PRECISION (type) <= TYPE_PRECISION (arg_type))
     return build_poly_int_cst (type,
 			       poly_wide_int::from (poly_int_cst_value (arg1),
 						    TYPE_PRECISION (type),
-						    TYPE_SIGN (type)));
+						    TYPE_SIGN (arg_type)));
 
   if (POINTER_TYPE_P (type) || INTEGRAL_TYPE_P (type)
       || TREE_CODE (type) == OFFSET_TYPE)
@@ -8462,7 +8485,7 @@ pointer_may_wrap_p (tree base, tree offset, poly_int64 bitpos)
   int precision = TYPE_PRECISION (TREE_TYPE (base));
   if (offset == NULL_TREE)
     wi_offset = wi::zero (precision);
-  else if (!poly_tree_p (offset) || TREE_OVERFLOW (offset))
+  else if (!poly_int_tree_p (offset) || TREE_OVERFLOW (offset))
     return true;
   else
     wi_offset = wi::to_poly_wide (offset);
@@ -8474,26 +8497,26 @@ pointer_may_wrap_p (tree base, tree offset, poly_int64 bitpos)
   if (overflow)
     return true;
 
-  poly_uint64 total_hwi;
-  if (!total.to_uhwi (&total_hwi))
+  poly_uint64 total_hwi, size;
+  if (!total.to_uhwi (&total_hwi)
+      || !poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (TREE_TYPE (base))),
+			   &size)
+      || known_zero (size))
     return true;
 
-  HOST_WIDE_INT size = int_size_in_bytes (TREE_TYPE (TREE_TYPE (base)));
-  if (size <= 0)
-    return true;
+  if (must_le (total_hwi, size))
+    return false;
 
   /* We can do slightly better for SIZE if we have an ADDR_EXPR of an
      array.  */
-  if (TREE_CODE (base) == ADDR_EXPR)
-    {
-      HOST_WIDE_INT base_size;
-
-      base_size = int_size_in_bytes (TREE_TYPE (TREE_OPERAND (base, 0)));
-      if (base_size > 0 && size < base_size)
-	size = base_size;
-    }
+  if (TREE_CODE (base) == ADDR_EXPR
+      && poly_int_tree_p (TYPE_SIZE_UNIT (TREE_TYPE (TREE_OPERAND (base, 0))),
+			  &size)
+      && maybe_nonzero (size)
+      && must_le (total_hwi, size))
+    return false;
 
-  return may_gt (total_hwi, (unsigned HOST_WIDE_INT) size);
+  return true;
 }
 
 /* Return a positive integer when the symbol DECL is known to have
@@ -8591,7 +8614,7 @@ fold_comparison (location_t loc, enum tree_code code, tree type,
 	  else
 	    offset0 = size_binop (PLUS_EXPR, offset0,
 				  TREE_OPERAND (arg0, 1));
-	  if (poly_tree_p (offset0))
+	  if (poly_int_tree_p (offset0))
 	    {
 	      poly_offset_int tem = wi::sext (wi::to_poly_offset (offset0),
 					      TYPE_PRECISION (sizetype));
@@ -8634,7 +8657,7 @@ fold_comparison (location_t loc, enum tree_code code, tree type,
 	  else
 	    offset1 = size_binop (PLUS_EXPR, offset1,
 				  TREE_OPERAND (arg1, 1));
-	  if (poly_tree_p (offset1))
+	  if (poly_int_tree_p (offset1))
 	    {
 	      poly_offset_int tem = wi::sext (wi::to_poly_offset (offset1),
 					      TYPE_PRECISION (sizetype));
@@ -9697,7 +9720,10 @@ fold_binary_loc (location_t loc,
       /* (A << C1) + (A >> C2) if A is unsigned and C1+C2 is the size of A
 	 is a rotate of A by C1 bits.  */
       /* (A << B) + (A >> (Z - B)) if A is unsigned and Z is the size of A
-	 is a rotate of A by B bits.  */
+	 is a rotate of A by B bits.
+	 Similarly for (A << B) | (A >> (-B & C3)) where C3 is Z-1,
+	 though in this case CODE must be | and not + or ^, otherwise
+	 it doesn't return A when B is 0.  */
       {
 	enum tree_code code0, code1;
 	tree rtype;
@@ -9715,25 +9741,32 @@ fold_binary_loc (location_t loc,
 		== GET_MODE_UNIT_PRECISION (TYPE_MODE (rtype))))
 	  {
 	    tree tree01, tree11;
+	    tree orig_tree01, orig_tree11;
 	    enum tree_code code01, code11;
 
-	    tree01 = TREE_OPERAND (arg0, 1);
-	    tree11 = TREE_OPERAND (arg1, 1);
+	    tree01 = orig_tree01 = TREE_OPERAND (arg0, 1);
+	    tree11 = orig_tree11 = TREE_OPERAND (arg1, 1);
 	    STRIP_NOPS (tree01);
 	    STRIP_NOPS (tree11);
 	    code01 = TREE_CODE (tree01);
 	    code11 = TREE_CODE (tree11);
+	    if (code11 != MINUS_EXPR
+		&& (code01 == MINUS_EXPR || code01 == BIT_AND_EXPR))
+	      {
+		std::swap (code0, code1);
+		std::swap (code01, code11);
+		std::swap (tree01, tree11);
+		std::swap (orig_tree01, orig_tree11);
+	      }
 	    if (code01 == INTEGER_CST
 		&& code11 == INTEGER_CST
 		&& (wi::to_widest (tree01) + wi::to_widest (tree11)
-		    == element_precision (TREE_TYPE (TREE_OPERAND (arg0, 0)))))
+		    == element_precision (rtype)))
 	      {
 		tem = build2_loc (loc, LROTATE_EXPR,
-				  TREE_TYPE (TREE_OPERAND (arg0, 0)),
-				  TREE_OPERAND (arg0, 0),
+				  rtype, TREE_OPERAND (arg0, 0),
 				  code0 == LSHIFT_EXPR
-				  ? TREE_OPERAND (arg0, 1)
-				  : TREE_OPERAND (arg1, 1));
+				  ? orig_tree01 : orig_tree11);
 		return fold_convert_loc (loc, type, tem);
 	      }
 	    else if (code11 == MINUS_EXPR)
@@ -9745,39 +9778,37 @@ fold_binary_loc (location_t loc,
 		STRIP_NOPS (tree111);
 		if (TREE_CODE (tree110) == INTEGER_CST
 		    && 0 == compare_tree_int (tree110,
-					      element_precision
-					      (TREE_TYPE (TREE_OPERAND
-							  (arg0, 0))))
+					      element_precision (rtype))
 		    && operand_equal_p (tree01, tree111, 0))
-		  return
-		    fold_convert_loc (loc, type,
-				      build2 ((code0 == LSHIFT_EXPR
-					       ? LROTATE_EXPR
-					       : RROTATE_EXPR),
-					      TREE_TYPE (TREE_OPERAND (arg0, 0)),
-					      TREE_OPERAND (arg0, 0),
-					      TREE_OPERAND (arg0, 1)));
+		  {
+		    tem = build2_loc (loc, (code0 == LSHIFT_EXPR
+					    ? LROTATE_EXPR : RROTATE_EXPR),
+				      rtype, TREE_OPERAND (arg0, 0),
+				      orig_tree01);
+		    return fold_convert_loc (loc, type, tem);
+		  }
 	      }
-	    else if (code01 == MINUS_EXPR)
+	    else if (code == BIT_IOR_EXPR
+		     && code11 == BIT_AND_EXPR
+		     && pow2p_hwi (element_precision (rtype)))
 	      {
-		tree tree010, tree011;
-		tree010 = TREE_OPERAND (tree01, 0);
-		tree011 = TREE_OPERAND (tree01, 1);
-		STRIP_NOPS (tree010);
-		STRIP_NOPS (tree011);
-		if (TREE_CODE (tree010) == INTEGER_CST
-		    && 0 == compare_tree_int (tree010,
-					      element_precision
-					      (TREE_TYPE (TREE_OPERAND
-							  (arg0, 0))))
-		    && operand_equal_p (tree11, tree011, 0))
-		    return fold_convert_loc
-		      (loc, type,
-		       build2 ((code0 != LSHIFT_EXPR
-				? LROTATE_EXPR
-				: RROTATE_EXPR),
-			       TREE_TYPE (TREE_OPERAND (arg0, 0)),
-			       TREE_OPERAND (arg0, 0), TREE_OPERAND (arg1, 1)));
+		tree tree110, tree111;
+		tree110 = TREE_OPERAND (tree11, 0);
+		tree111 = TREE_OPERAND (tree11, 1);
+		STRIP_NOPS (tree110);
+		STRIP_NOPS (tree111);
+		if (TREE_CODE (tree110) == NEGATE_EXPR
+		    && TREE_CODE (tree111) == INTEGER_CST
+		    && 0 == compare_tree_int (tree111,
+					      element_precision (rtype) - 1)
+		    && operand_equal_p (tree01, TREE_OPERAND (tree110, 0), 0))
+		  {
+		    tem = build2_loc (loc, (code0 == LSHIFT_EXPR
+					    ? LROTATE_EXPR : RROTATE_EXPR),
+				      rtype, TREE_OPERAND (arg0, 0),
+				      orig_tree01);
+		    return fold_convert_loc (loc, type, tem);
+		  }
 	      }
 	  }
       }
@@ -10770,40 +10801,6 @@ fold_binary_loc (location_t loc,
 	  && code == NE_EXPR)
         return non_lvalue_loc (loc, fold_convert_loc (loc, type, arg0));
 
-      /* Transform comparisons of the form X +- Y CMP X to Y CMP 0.  */
-      if ((TREE_CODE (arg0) == PLUS_EXPR
-	   || TREE_CODE (arg0) == POINTER_PLUS_EXPR
-	   || TREE_CODE (arg0) == MINUS_EXPR)
-	  && operand_equal_p (tree_strip_nop_conversions (TREE_OPERAND (arg0,
-									0)),
-			      arg1, 0)
-	  && (INTEGRAL_TYPE_P (TREE_TYPE (arg0))
-	      || POINTER_TYPE_P (TREE_TYPE (arg0))))
-	{
-	  tree val = TREE_OPERAND (arg0, 1);
-	  val = fold_build2_loc (loc, code, type, val,
-				 build_int_cst (TREE_TYPE (val), 0));
-	  return omit_two_operands_loc (loc, type, val,
-					TREE_OPERAND (arg0, 0), arg1);
-	}
-
-      /* Transform comparisons of the form X CMP X +- Y to Y CMP 0.  */
-      if ((TREE_CODE (arg1) == PLUS_EXPR
-	   || TREE_CODE (arg1) == POINTER_PLUS_EXPR
-	   || TREE_CODE (arg1) == MINUS_EXPR)
-	  && operand_equal_p (tree_strip_nop_conversions (TREE_OPERAND (arg1,
-									0)),
-			      arg0, 0)
-	  && (INTEGRAL_TYPE_P (TREE_TYPE (arg1))
-	      || POINTER_TYPE_P (TREE_TYPE (arg1))))
-	{
-	  tree val = TREE_OPERAND (arg1, 1);
-	  val = fold_build2_loc (loc, code, type, val,
-				 build_int_cst (TREE_TYPE (val), 0));
-	  return omit_two_operands_loc (loc, type, val,
-					TREE_OPERAND (arg1, 0), arg0);
-	}
-
       /* If this is an EQ or NE comparison with zero and ARG0 is
 	 (1 << foo) & bar, convert it to (bar >> foo) & 1.  Both require
 	 two operations, but the latter can be done in one less insn
@@ -11186,130 +11183,38 @@ fold_binary_loc (location_t loc,
       /* Transform comparisons of the form X +- C CMP X.  */
       if ((TREE_CODE (arg0) == PLUS_EXPR || TREE_CODE (arg0) == MINUS_EXPR)
 	  && operand_equal_p (TREE_OPERAND (arg0, 0), arg1, 0)
-	  && ((TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST
-	       && !HONOR_SNANS (arg0))
-	      || (TREE_CODE (TREE_OPERAND (arg0, 1)) == INTEGER_CST
-		  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))))
+	  && TREE_CODE (TREE_OPERAND (arg0, 1)) == REAL_CST
+	  && !HONOR_SNANS (arg0))
 	{
 	  tree arg01 = TREE_OPERAND (arg0, 1);
 	  enum tree_code code0 = TREE_CODE (arg0);
-	  int is_positive;
-
-	  if (TREE_CODE (arg01) == REAL_CST)
-	    is_positive = REAL_VALUE_NEGATIVE (TREE_REAL_CST (arg01)) ? -1 : 1;
-	  else
-	    is_positive = tree_int_cst_sgn (arg01);
+	  int is_positive = REAL_VALUE_NEGATIVE (TREE_REAL_CST (arg01)) ? -1 : 1;
 
 	  /* (X - c) > X becomes false.  */
 	  if (code == GT_EXPR
 	      && ((code0 == MINUS_EXPR && is_positive >= 0)
 		  || (code0 == PLUS_EXPR && is_positive <= 0)))
-	    {
-	      if (TREE_CODE (arg01) == INTEGER_CST
-		  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		fold_overflow_warning (("assuming signed overflow does not "
-					"occur when assuming that (X - c) > X "
-					"is always false"),
-				       WARN_STRICT_OVERFLOW_ALL);
-	      return constant_boolean_node (0, type);
-	    }
+	    return constant_boolean_node (0, type);
 
 	  /* Likewise (X + c) < X becomes false.  */
 	  if (code == LT_EXPR
 	      && ((code0 == PLUS_EXPR && is_positive >= 0)
 		  || (code0 == MINUS_EXPR && is_positive <= 0)))
-	    {
-	      if (TREE_CODE (arg01) == INTEGER_CST
-		  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		fold_overflow_warning (("assuming signed overflow does not "
-					"occur when assuming that "
-					"(X + c) < X is always false"),
-				       WARN_STRICT_OVERFLOW_ALL);
-	      return constant_boolean_node (0, type);
-	    }
+	    return constant_boolean_node (0, type);
 
 	  /* Convert (X - c) <= X to true.  */
 	  if (!HONOR_NANS (arg1)
 	      && code == LE_EXPR
 	      && ((code0 == MINUS_EXPR && is_positive >= 0)
 		  || (code0 == PLUS_EXPR && is_positive <= 0)))
-	    {
-	      if (TREE_CODE (arg01) == INTEGER_CST
-		  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		fold_overflow_warning (("assuming signed overflow does not "
-					"occur when assuming that "
-					"(X - c) <= X is always true"),
-				       WARN_STRICT_OVERFLOW_ALL);
-	      return constant_boolean_node (1, type);
-	    }
+	    return constant_boolean_node (1, type);
 
 	  /* Convert (X + c) >= X to true.  */
 	  if (!HONOR_NANS (arg1)
 	      && code == GE_EXPR
 	      && ((code0 == PLUS_EXPR && is_positive >= 0)
 		  || (code0 == MINUS_EXPR && is_positive <= 0)))
-	    {
-	      if (TREE_CODE (arg01) == INTEGER_CST
-		  && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		fold_overflow_warning (("assuming signed overflow does not "
-					"occur when assuming that "
-					"(X + c) >= X is always true"),
-				       WARN_STRICT_OVERFLOW_ALL);
-	      return constant_boolean_node (1, type);
-	    }
-
-	  if (TREE_CODE (arg01) == INTEGER_CST)
-	    {
-	      /* Convert X + c > X and X - c < X to true for integers.  */
-	      if (code == GT_EXPR
-	          && ((code0 == PLUS_EXPR && is_positive > 0)
-		      || (code0 == MINUS_EXPR && is_positive < 0)))
-		{
-		  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		    fold_overflow_warning (("assuming signed overflow does "
-					    "not occur when assuming that "
-					    "(X + c) > X is always true"),
-					   WARN_STRICT_OVERFLOW_ALL);
-		  return constant_boolean_node (1, type);
-		}
-
-	      if (code == LT_EXPR
-	          && ((code0 == MINUS_EXPR && is_positive > 0)
-		      || (code0 == PLUS_EXPR && is_positive < 0)))
-		{
-		  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		    fold_overflow_warning (("assuming signed overflow does "
-					    "not occur when assuming that "
-					    "(X - c) < X is always true"),
-					   WARN_STRICT_OVERFLOW_ALL);
-		  return constant_boolean_node (1, type);
-		}
-
-	      /* Convert X + c <= X and X - c >= X to false for integers.  */
-	      if (code == LE_EXPR
-	          && ((code0 == PLUS_EXPR && is_positive > 0)
-		      || (code0 == MINUS_EXPR && is_positive < 0)))
-		{
-		  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		    fold_overflow_warning (("assuming signed overflow does "
-					    "not occur when assuming that "
-					    "(X + c) <= X is always false"),
-					   WARN_STRICT_OVERFLOW_ALL);
-		  return constant_boolean_node (0, type);
-		}
-
-	      if (code == GE_EXPR
-	          && ((code0 == MINUS_EXPR && is_positive > 0)
-		      || (code0 == PLUS_EXPR && is_positive < 0)))
-		{
-		  if (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (arg1)))
-		    fold_overflow_warning (("assuming signed overflow does "
-					    "not occur when assuming that "
-					    "(X - c) >= X is always false"),
-					   WARN_STRICT_OVERFLOW_ALL);
-		  return constant_boolean_node (0, type);
-		}
-	    }
+	    return constant_boolean_node (1, type);
 	}
 
       /* If we are comparing an ABS_EXPR with a constant, we can
@@ -12903,7 +12808,7 @@ multiple_of_p (tree type, const_tree top, const_tree bottom)
       /* fall through */
 
     default:
-      if (POLY_INT_CST_P (top) && poly_tree_p (bottom))
+      if (POLY_INT_CST_P (top) && poly_int_tree_p (bottom))
 	return multiple_p (wi::to_poly_widest (top),
 			   wi::to_poly_widest (bottom));
 
@@ -13963,16 +13868,6 @@ fold_negate_const (tree arg0, tree type)
 
   switch (TREE_CODE (arg0))
     {
-    case INTEGER_CST:
-      {
-	bool overflow;
-	wide_int val = wi::neg (wi::to_wide (arg0), &overflow);
-	t = force_fit_type (type, val, 1,
-			    (overflow && ! TYPE_UNSIGNED (type))
-			    || TREE_OVERFLOW (arg0));
-	break;
-      }
-
     case REAL_CST:
       t = build_real (type, real_value_negate (&TREE_REAL_CST (arg0)));
       break;
@@ -13991,13 +13886,14 @@ fold_negate_const (tree arg0, tree type)
       }
 
     default:
-      if (POLY_INT_CST_P (arg0))
+      if (poly_int_tree_p (arg0))
 	{
 	  bool overflow;
-	  poly_wide_int res = wi::neg (poly_int_cst_value (arg0), &overflow);
-	  if (!TYPE_UNSIGNED (type) && overflow)
-	    return NULL_TREE;
-	  return wide_int_to_tree (type, res);
+	  poly_wide_int res = wi::neg (wi::to_poly_wide (arg0), &overflow);
+	  t = force_fit_type (type, res, 1,
+			      (overflow && ! TYPE_UNSIGNED (type))
+			      || TREE_OVERFLOW (arg0));
+	  break;
 	}
 
       gcc_unreachable ();
@@ -14156,9 +14052,9 @@ fold_relational_const (enum tree_code code, tree type, tree op0, tree op1)
 	      tree elem0 = VECTOR_CST_ELT (op0, i);
 	      tree elem1 = VECTOR_CST_ELT (op1, i);
 	      tree tmp = fold_relational_const (code, type, elem0, elem1);
-	      if (tmp == NULL_TREE || TREE_CODE (tmp) != INTEGER_CST)
+	      if (tmp == NULL_TREE)
 		return NULL_TREE;
-	      if (wi::to_wide (tmp) == 0)
+	      if (integer_zerop (tmp))
 		return constant_boolean_node (false, type);
 	    }
 	  return constant_boolean_node (true, type);
@@ -14323,7 +14219,7 @@ fold_indirect_ref_1 (location_t loc, tree type, tree op0)
     }
 
   if (TREE_CODE (sub) == POINTER_PLUS_EXPR
-      && poly_tree_p (TREE_OPERAND (sub, 1), &const_op01))
+      && poly_int_tree_p (TREE_OPERAND (sub, 1), &const_op01))
     {
       tree op00 = TREE_OPERAND (sub, 0);
       tree op01 = TREE_OPERAND (sub, 1);
@@ -14355,7 +14251,8 @@ fold_indirect_ref_1 (location_t loc, tree type, tree op0)
 	  else if (TREE_CODE (op00type) == COMPLEX_TYPE
 		   && type == TREE_TYPE (op00type))
 	    {
-	      if (equal_tree_size (TYPE_SIZE_UNIT (type), const_op01))
+	      if (must_eq (wi::to_poly_offset (TYPE_SIZE_UNIT (type)),
+			   const_op01))
 		return fold_build1_loc (loc, IMAGPART_EXPR, type, op00);
 	    }
 	  /* ((foo *)&fooarray)[1] => fooarray[1] */
@@ -14594,7 +14491,7 @@ round_down_loc (location_t loc, tree value, int divisor)
 
 static tree
 split_address_to_core_and_offset (tree exp,
-				  poly_int64 *pbitpos, tree *poffset)
+				  poly_int64_pod *pbitpos, tree *poffset)
 {
   tree core;
   machine_mode mode;
@@ -14615,16 +14512,14 @@ split_address_to_core_and_offset (tree exp,
       STRIP_NOPS (core);
       *pbitpos = 0;
       *poffset = TREE_OPERAND (exp, 1);
-      if (TREE_CODE (*poffset) == INTEGER_CST)
+      if (poly_int_tree_p (*poffset))
 	{
-	  offset_int tem = wi::sext (wi::to_offset (*poffset),
-				     TYPE_PRECISION (TREE_TYPE (*poffset)));
+	  poly_offset_int tem
+	    = wi::sext (wi::to_poly_offset (*poffset),
+			TYPE_PRECISION (TREE_TYPE (*poffset)));
 	  tem <<= LOG2_BITS_PER_UNIT;
-	  if (wi::fits_shwi_p (tem))
-	    {
-	      *pbitpos = tem.to_shwi ();
-	      *poffset = NULL_TREE;
-	    }
+	  if (tem.to_shwi (pbitpos))
+	    *poffset = NULL_TREE;
 	}
     }
   else
@@ -14641,7 +14536,7 @@ split_address_to_core_and_offset (tree exp,
    otherwise.  If they do, E1 - E2 is stored in *DIFF.  */
 
 bool
-ptr_difference_const (tree e1, tree e2, poly_int64 *diff)
+ptr_difference_const (tree e1, tree e2, poly_int64_pod *diff)
 {
   tree core1, core2;
   poly_int64 bitpos1, bitpos2;
diff --git a/gcc/fold-const.h b/gcc/fold-const.h
index 0d6a028c4f6..b777606b6e9 100644
--- a/gcc/fold-const.h
+++ b/gcc/fold-const.h
@@ -122,7 +122,7 @@ extern tree div_if_zero_remainder (const_tree, const_tree);
 extern bool tree_swap_operands_p (const_tree, const_tree);
 extern enum tree_code swap_tree_comparison (enum tree_code);
 
-extern bool ptr_difference_const (tree, tree, poly_int64 *);
+extern bool ptr_difference_const (tree, tree, poly_int64_pod *);
 extern enum tree_code invert_tree_comparison (enum tree_code, bool);
 
 extern bool tree_unary_nonzero_warnv_p (enum tree_code, tree, tree, bool *);
diff --git a/gcc/fortran/ChangeLog b/gcc/fortran/ChangeLog
index 926bee73529..a37d16e51fa 100644
--- a/gcc/fortran/ChangeLog
+++ b/gcc/fortran/ChangeLog
@@ -1,3 +1,8 @@
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* cpp.c (gfc_cpp_add_include_path): Update incpath_e names.
+	(gfc_cpp_add_include_path_after): Likewise.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* target-memory.c (gfc_interpret_logical): Use wi::to_wide when
diff --git a/gcc/fortran/cpp.c b/gcc/fortran/cpp.c
index 4d1b56a0045..af8a69ca3f7 100644
--- a/gcc/fortran/cpp.c
+++ b/gcc/fortran/cpp.c
@@ -683,14 +683,14 @@ gfc_cpp_add_include_path (char *path, bool user_supplied)
      include path. Fortran does not define any system include paths.  */
   int cxx_aware = 0;
 
-  add_path (path, BRACKET, cxx_aware, user_supplied);
+  add_path (path, INC_BRACKET, cxx_aware, user_supplied);
 }
 
 void
 gfc_cpp_add_include_path_after (char *path, bool user_supplied)
 {
   int cxx_aware = 0;
-  add_path (path, AFTER, cxx_aware, user_supplied);
+  add_path (path, INC_AFTER, cxx_aware, user_supplied);
 }
 
 void
diff --git a/gcc/function.c b/gcc/function.c
index 274d2732323..1c94329c063 100644
--- a/gcc/function.c
+++ b/gcc/function.c
@@ -240,13 +240,12 @@ frame_offset_overflow (poly_int64 offset, tree func)
        /* Leave room for the fixed part of the frame.  */
        - 64 * UNITS_PER_WORD);
 
-  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    if (size.coeffs[i] > limit)
-      {
-	error_at (DECL_SOURCE_LOCATION (func),
-		  "total size of local objects too large");
-	return true;
-      }
+  if (!coeffs_in_range_p (size, 0U, limit))
+    {
+      error_at (DECL_SOURCE_LOCATION (func),
+		"total size of local objects too large");
+      return true;
+    }
 
   return false;
 }
@@ -289,7 +288,7 @@ get_stack_local_alignment (tree type, machine_mode mode)
 static bool
 try_fit_stack_local (poly_int64 start, poly_int64 length,
 		     poly_int64 size, unsigned int alignment,
-		     poly_int64 *poffset)
+		     poly_int64_pod *poffset)
 {
   poly_int64 this_frame_offset;
   int frame_off, frame_alignment, frame_phase;
@@ -297,10 +296,7 @@ try_fit_stack_local (poly_int64 start, poly_int64 length,
   /* Calculate how many bytes the start of local variables is off from
      stack alignment.  */
   frame_alignment = PREFERRED_STACK_BOUNDARY / BITS_PER_UNIT;
-  /* At present we only support frame layouts in which the misalignment
-     of STARTING_FRAME_OFFSET is known at compile time.  */
-  frame_off = force_get_misalignment (poly_int64 (STARTING_FRAME_OFFSET),
-				      frame_alignment);
+  frame_off = targetm.starting_frame_offset () % frame_alignment;
   frame_phase = frame_off ? frame_alignment - frame_off : 0;
 
   /* Round the frame offset to the specified alignment.  */
@@ -496,7 +492,12 @@ assign_stack_local_1 (machine_mode mode, poly_int64 size,
      use the least significant bytes of those that are allocated.  */
   if (mode != BLKmode)
     {
-      gcc_checking_assert (must_le (GET_MODE_SIZE (mode), size));
+      /* The slot size can sometimes be smaller than the mode size;
+	 e.g. the rs6000 port allocates slots with a vector mode
+	 that have the size of only one element.  However, the slot
+	 size must always be ordered wrt to the mode size, in the
+	 same way as for a subreg.  */
+      gcc_checking_assert (ordered_p (GET_MODE_SIZE (mode), size));
       if (BYTES_BIG_ENDIAN && may_lt (GET_MODE_SIZE (mode), size))
 	bigend_correction = size - GET_MODE_SIZE (mode);
     }
@@ -507,7 +508,7 @@ assign_stack_local_1 (machine_mode mode, poly_int64 size,
     addr = plus_constant (Pmode, frame_pointer_rtx,
 			  trunc_int_for_mode
 			  (slot_offset + bigend_correction
-			   + STARTING_FRAME_OFFSET, Pmode));
+			   + targetm.starting_frame_offset (), Pmode));
   else
     addr = plus_constant (Pmode, virtual_stack_vars_rtx,
 			  trunc_int_for_mode
@@ -1366,11 +1367,11 @@ initial_value_entry (int i, rtx *hreg, rtx *preg)
    routines.  They contain the offsets of the virtual registers from their
    respective hard registers.  */
 
-static int in_arg_offset;
+static poly_int64 in_arg_offset;
 static poly_int64 var_offset;
 static poly_int64 dynamic_offset;
 static poly_int64 out_arg_offset;
-static int cfa_offset;
+static poly_int64 cfa_offset;
 
 /* In most machines, the stack pointer register is equivalent to the bottom
    of the stack.  */
@@ -1406,7 +1407,7 @@ static int cfa_offset;
   : 0) + (STACK_POINTER_OFFSET))
 #else
 #define STACK_DYNAMIC_OFFSET(FNDECL)	\
-  ((ACCUMULATE_OUTGOING_ARGS ? poly_int64 (crtl->outgoing_args_size) : 0) \
+  ((ACCUMULATE_OUTGOING_ARGS ? crtl->outgoing_args_size : poly_int64 (0)) \
  + (STACK_POINTER_OFFSET))
 #endif
 #endif
@@ -1417,7 +1418,7 @@ static int cfa_offset;
    offset indirectly through the pointer.  Otherwise, return 0.  */
 
 static rtx
-instantiate_new_reg (rtx x, poly_int64 *poffset)
+instantiate_new_reg (rtx x, poly_int64_pod *poffset)
 {
   rtx new_rtx;
   poly_int64 offset;
@@ -1598,14 +1599,15 @@ instantiate_virtual_regs_in_insn (rtx_insn *insn)
 
       /* Handle a plus involving a virtual register by determining if the
 	 operands remain valid if they're modified in place.  */
+      poly_int64 delta;
       if (GET_CODE (SET_SRC (set)) == PLUS
 	  && recog_data.n_operands >= 3
 	  && recog_data.operand_loc[1] == &XEXP (SET_SRC (set), 0)
 	  && recog_data.operand_loc[2] == &XEXP (SET_SRC (set), 1)
-	  && CONST_INT_P (recog_data.operand[2])
+	  && poly_int_rtx_p (recog_data.operand[2], &delta)
 	  && (new_rtx = instantiate_new_reg (recog_data.operand[1], &offset)))
 	{
-	  offset += INTVAL (recog_data.operand[2]);
+	  offset += delta;
 
 	  /* If the sum is zero, then replace with a plain move.  */
 	  if (known_zero (offset)
@@ -1935,7 +1937,7 @@ instantiate_virtual_regs (void)
 
   /* Compute the offsets to use for this function.  */
   in_arg_offset = FIRST_PARM_OFFSET (current_function_decl);
-  var_offset = STARTING_FRAME_OFFSET;
+  var_offset = targetm.starting_frame_offset ();
   dynamic_offset = STACK_DYNAMIC_OFFSET (current_function_decl);
   out_arg_offset = STACK_POINTER_OFFSET;
 #ifdef FRAME_POINTER_CFA_OFFSET
@@ -2718,12 +2720,15 @@ assign_parm_find_stack_rtl (tree parm, struct assign_parm_data_one *data)
      is TARGET_FUNCTION_ARG_BOUNDARY.  If we're using slot_offset, we're
      intentionally forcing upward padding.  Otherwise we have to come
      up with a guess at the alignment based on OFFSET_RTX.  */
+  poly_int64 offset;
   if (data->locate.where_pad != PAD_DOWNWARD || data->entry_parm)
     align = boundary;
-  else if (CONST_INT_P (offset_rtx))
+  else if (poly_int_rtx_p (offset_rtx, &offset))
     {
-      align = INTVAL (offset_rtx) * BITS_PER_UNIT | boundary;
-      align = least_bit_hwi (align);
+      align = least_bit_hwi (boundary);
+      unsigned int offset_align = known_alignment (offset) * BITS_PER_UNIT;
+      if (offset_align != 0)
+	align = MIN (align, offset_align);
     }
   set_mem_align (stack_parm, align);
 
diff --git a/gcc/genattrtab.c b/gcc/genattrtab.c
index 60305cd613d..463782411de 100644
--- a/gcc/genattrtab.c
+++ b/gcc/genattrtab.c
@@ -5073,7 +5073,7 @@ write_header (FILE *outf)
   fprintf (outf, "/* Generated automatically by the program `genattrtab'\n"
 	         "   from the machine description file `md'.  */\n\n");
 
-  fprintf (outf, "#define TARGET_C_FILE 1\n");
+  fprintf (outf, "#define IN_TARGET_CODE 1\n");
   fprintf (outf, "#include \"config.h\"\n");
   fprintf (outf, "#include \"system.h\"\n");
   fprintf (outf, "#include \"coretypes.h\"\n");
diff --git a/gcc/genautomata.c b/gcc/genautomata.c
index 2d1808c404b..aed10a34e42 100644
--- a/gcc/genautomata.c
+++ b/gcc/genautomata.c
@@ -9641,7 +9641,7 @@ main (int argc, const char **argv)
 	{
 	  puts ("/* Generated automatically by the program `genautomata'\n"
 		"   from the machine description file `md'.  */\n\n"
-		"#define TARGET_C_FILE 1\n"
+		"#define IN_TARGET_CODE 1\n"
 		"#include \"config.h\"\n"
 		"#include \"system.h\"\n"
 		"#include \"coretypes.h\"\n"
diff --git a/gcc/genconditions.c b/gcc/genconditions.c
index 07bcb7e3235..178026a3ecb 100644
--- a/gcc/genconditions.c
+++ b/gcc/genconditions.c
@@ -50,7 +50,7 @@ write_header (void)
 /* Generated automatically by the program `genconditions' from the target\n\
    machine description file.  */\n\
 \n\
-#define TARGET_C_FILE 1\n\
+#define IN_TARGET_CODE 1\n\
 #include \"bconfig.h\"\n\
 #define INCLUDE_STRING\n\
 #include \"system.h\"\n\
diff --git a/gcc/genemit.c b/gcc/genemit.c
index 877d0afa4b8..708da272215 100644
--- a/gcc/genemit.c
+++ b/gcc/genemit.c
@@ -770,7 +770,7 @@ main (int argc, const char **argv)
   printf ("/* Generated automatically by the program `genemit'\n\
 from the machine description file `md'.  */\n\n");
 
-  printf ("#define TARGET_C_FILE 1\n");
+  printf ("#define IN_TARGET_CODE 1\n");
   printf ("#include \"config.h\"\n");
   printf ("#include \"system.h\"\n");
   printf ("#include \"coretypes.h\"\n");
diff --git a/gcc/genextract.c b/gcc/genextract.c
index 2430c3f0643..258d234d272 100644
--- a/gcc/genextract.c
+++ b/gcc/genextract.c
@@ -343,7 +343,7 @@ print_header (void)
 /* Generated automatically by the program `genextract'\n\
    from the machine description file `md'.  */\n\
 \n\
-#define TARGET_C_FILE 1\n\
+#define IN_TARGET_CODE 1\n\
 #include \"config.h\"\n\
 #include \"system.h\"\n\
 #include \"coretypes.h\"\n\
diff --git a/gcc/genmodes.c b/gcc/genmodes.c
index 2df8299a8bb..9a5ed03b6ec 100644
--- a/gcc/genmodes.c
+++ b/gcc/genmodes.c
@@ -1762,7 +1762,7 @@ emit_mode_adjustments (void)
 	    {
 	    case MODE_COMPLEX_INT:
 	    case MODE_COMPLEX_FLOAT:
-	      printf ("  mode_size[E_%smode] = 2 * ps;\n", m->name);
+	      printf ("  mode_size[E_%smode] = 2*s;\n", m->name);
 	      printf ("  mode_unit_size[E_%smode] = s;\n", m->name);
 	      printf ("  mode_base_align[E_%smode] = s & (~s + 1);\n",
 		      m->name);
diff --git a/gcc/genopinit.c b/gcc/genopinit.c
index 807653b084c..79a564199e8 100644
--- a/gcc/genopinit.c
+++ b/gcc/genopinit.c
@@ -253,7 +253,7 @@ main (int argc, const char **argv)
 	   "#endif\n");
 
   fprintf (s_file,
-	   "#define TARGET_C_FILE 1\n"
+	   "#define IN_TARGET_CODE 1\n"
 	   "#include \"config.h\"\n"
 	   "#include \"system.h\"\n"
 	   "#include \"coretypes.h\"\n"
diff --git a/gcc/genoutput.c b/gcc/genoutput.c
index ece3262f669..1deb9b08e06 100644
--- a/gcc/genoutput.c
+++ b/gcc/genoutput.c
@@ -203,7 +203,7 @@ output_prologue (void)
   printf ("/* Generated automatically by the program `genoutput'\n\
    from the machine description file `md'.  */\n\n");
 
-  printf ("#define TARGET_C_FILE 1\n");
+  printf ("#define IN_TARGET_CODE 1\n");
   printf ("#include \"config.h\"\n");
   printf ("#include \"system.h\"\n");
   printf ("#include \"coretypes.h\"\n");
diff --git a/gcc/genpeep.c b/gcc/genpeep.c
index 0e5ca08440e..20239544bdb 100644
--- a/gcc/genpeep.c
+++ b/gcc/genpeep.c
@@ -358,7 +358,7 @@ main (int argc, const char **argv)
   printf ("/* Generated automatically by the program `genpeep'\n\
 from the machine description file `md'.  */\n\n");
 
-  printf ("#define TARGET_C_FILE 1\n");
+  printf ("#define IN_TARGET_CODE 1\n");
   printf ("#include \"config.h\"\n");
   printf ("#include \"system.h\"\n");
   printf ("#include \"coretypes.h\"\n");
diff --git a/gcc/genpreds.c b/gcc/genpreds.c
index 48c5d40491d..200869f1787 100644
--- a/gcc/genpreds.c
+++ b/gcc/genpreds.c
@@ -1558,7 +1558,7 @@ write_insn_preds_c (void)
 	  md_reader_ptr->get_top_level_filename ());
 
   puts ("\
-#define TARGET_C_FILE 1\n\
+#define IN_TARGET_CODE 1\n\
 #include \"config.h\"\n\
 #include \"system.h\"\n\
 #include \"coretypes.h\"\n\
diff --git a/gcc/genrecog.c b/gcc/genrecog.c
index 77c7e2e9e5c..695383badab 100644
--- a/gcc/genrecog.c
+++ b/gcc/genrecog.c
@@ -747,6 +747,7 @@ validate_pattern (rtx pattern, md_rtx_info *info, rtx set, int set_code)
 	  if (GET_CODE (XEXP (pattern, 1)) == PARALLEL)
 	    {
 	      int expected = 1;
+	      unsigned int nelems;
 	      if (VECTOR_MODE_P (mode)
 		  && !GET_MODE_NUNITS (mode).is_constant (&expected))
 		error_at (info->loc,
@@ -756,6 +757,21 @@ validate_pattern (rtx pattern, md_rtx_info *info, rtx set, int set_code)
 		error_at (info->loc,
 			  "vec_select parallel with %d elements, expected %d",
 			  XVECLEN (XEXP (pattern, 1), 0), expected);
+	      else if (VECTOR_MODE_P (imode)
+		       && GET_MODE_NUNITS (imode).is_constant (&nelems))
+		{
+		  int i;
+		  for (i = 0; i < expected; ++i)
+		    if (CONST_INT_P (XVECEXP (XEXP (pattern, 1), 0, i))
+			&& (UINTVAL (XVECEXP (XEXP (pattern, 1), 0, i))
+			    >= nelems))
+		      error_at (info->loc,
+				"out of bounds selector %u in vec_select, "
+				"expected at most %u",
+				(unsigned)
+				UINTVAL (XVECEXP (XEXP (pattern, 1), 0, i)),
+				nelems - 1);
+		}
 	    }
 	  if (imode != VOIDmode && !VECTOR_MODE_P (imode))
 	    error_at (info->loc, "%smode of first vec_select operand is not a "
@@ -3461,7 +3477,6 @@ safe_predicate_mode (const struct pred_data *pred, machine_mode mode)
       && (pred->codes[CONST_INT]
 	  || pred->codes[CONST_DOUBLE]
 	  || pred->codes[CONST_WIDE_INT]
-	  || pred->codes[CONST_POLY_INT]
 	  || pred->codes[LABEL_REF]))
     return false;
 
@@ -4231,7 +4246,7 @@ write_header (void)
 /* Generated automatically by the program `genrecog' from the target\n\
    machine description file.  */\n\
 \n\
-#define TARGET_C_FILE 1\n\
+#define IN_TARGET_CODE 1\n\
 \n\
 #include \"config.h\"\n\
 #include \"system.h\"\n\
diff --git a/gcc/gimple-expr.c b/gcc/gimple-expr.c
index 89fdd428da2..00444a54c2c 100644
--- a/gcc/gimple-expr.c
+++ b/gcc/gimple-expr.c
@@ -337,9 +337,8 @@ gimple_decl_printable_name (tree decl, int verbosity)
   if (!DECL_NAME (decl))
     return NULL;
 
-  if (DECL_ASSEMBLER_NAME_SET_P (decl))
+  if (HAS_DECL_ASSEMBLER_NAME_P (decl) && DECL_ASSEMBLER_NAME_SET_P (decl))
     {
-      const char *str, *mangled_str;
       int dmgl_opts = DMGL_NO_OPTS;
 
       if (verbosity >= 2)
@@ -352,9 +351,10 @@ gimple_decl_printable_name (tree decl, int verbosity)
 	    dmgl_opts |= DMGL_PARAMS;
 	}
 
-      mangled_str = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-      str = cplus_demangle_v3 (mangled_str, dmgl_opts);
-      return (str) ? str : mangled_str;
+      const char *mangled_str
+	= IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME_RAW (decl));
+      const char *str = cplus_demangle_v3 (mangled_str, dmgl_opts);
+      return str ? str : mangled_str;
     }
 
   return IDENTIFIER_POINTER (DECL_NAME (decl));
diff --git a/gcc/gimple-fold.c b/gcc/gimple-fold.c
index d7a788b677b..0a460a693c7 100644
--- a/gcc/gimple-fold.c
+++ b/gcc/gimple-fold.c
@@ -850,7 +850,7 @@ gimple_fold_builtin_memory_op (gimple_stmt_iterator *gsi,
 							 &dest_offset);
 	      if (dest_base == NULL)
 		dest_base = destvar;
-	      if (!poly_tree_p (len, &maxsize))
+	      if (!poly_int_tree_p (len, &maxsize))
 		maxsize = -1;
 	      if (SSA_VAR_P (src_base)
 		  && SSA_VAR_P (dest_base))
@@ -6164,7 +6164,7 @@ gimple_fold_stmt_to_constant (gimple *stmt, tree (*valueize) (tree))
    is not explicitly available, but it is known to be zero
    such as 'static const int a;'.  */
 static tree
-get_base_constructor (tree base, poly_int64 *bit_offset,
+get_base_constructor (tree base, poly_int64_pod *bit_offset,
 		      tree (*valueize)(tree))
 {
   poly_int64 bit_offset2, size, max_size;
@@ -6358,27 +6358,31 @@ fold_nonarray_ctor_reference (tree type, tree ctor,
   return build_zero_cst (type);
 }
 
-/* CTOR is value initializing memory, fold reference of type TYPE and size SIZE
-   to the memory at bit OFFSET.  */
+/* CTOR is value initializing memory, fold reference of type TYPE and
+   size POLY_SIZE to the memory at bit POLY_OFFSET.  */
 
 tree
-fold_ctor_reference (tree type, tree ctor, poly_uint64 offset,
-		     poly_uint64 size, tree from_decl)
+fold_ctor_reference (tree type, tree ctor, poly_uint64 poly_offset,
+		     poly_uint64 poly_size, tree from_decl)
 {
   tree ret;
 
   /* We found the field with exact match.  */
   if (useless_type_conversion_p (type, TREE_TYPE (ctor))
-      && known_zero (offset))
+      && known_zero (poly_offset))
     return canonicalize_constructor_val (unshare_expr (ctor), from_decl);
 
+  /* The remaining optimizations need a constant size and offset.  */
+  unsigned HOST_WIDE_INT size, offset;
+  if (!poly_size.is_constant (&size) || !poly_offset.is_constant (&offset))
+    return NULL_TREE;
+
   /* We are at the end of walk, see if we can view convert the
      result.  */
-  if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor))
-      && known_zero (offset)
+  if (!AGGREGATE_TYPE_P (TREE_TYPE (ctor)) && !offset
       /* VIEW_CONVERT_EXPR is defined only for matching sizes.  */
-      && equal_tree_size (TYPE_SIZE (type), size)
-      && equal_tree_size (TYPE_SIZE (TREE_TYPE (ctor)), size))
+      && !compare_tree_int (TYPE_SIZE (type), size)
+      && !compare_tree_int (TYPE_SIZE (TREE_TYPE (ctor)), size))
     {
       ret = canonicalize_constructor_val (unshare_expr (ctor), from_decl);
       if (ret)
@@ -6391,30 +6395,28 @@ fold_ctor_reference (tree type, tree ctor, poly_uint64 offset,
     }
   /* For constants and byte-aligned/sized reads try to go through
      native_encode/interpret.  */
-  HOST_WIDE_INT byte_offset, byte_size;
   if (CONSTANT_CLASS_P (ctor)
       && BITS_PER_UNIT == 8
-      && constant_multiple_p (offset, BITS_PER_UNIT, &byte_offset)
-      && constant_multiple_p (size, BITS_PER_UNIT, &byte_size)
-      && byte_size * BITS_PER_UNIT <= MAX_BITSIZE_MODE_ANY_MODE)
+      && offset % BITS_PER_UNIT == 0
+      && size % BITS_PER_UNIT == 0
+      && size <= MAX_BITSIZE_MODE_ANY_MODE)
     {
       unsigned char buf[MAX_BITSIZE_MODE_ANY_MODE / BITS_PER_UNIT];
-      int len = native_encode_expr (ctor, buf, byte_size, byte_offset);
+      int len = native_encode_expr (ctor, buf, size / BITS_PER_UNIT,
+				    offset / BITS_PER_UNIT);
       if (len > 0)
 	return native_interpret_expr (type, buf, len);
     }
-  unsigned HOST_WIDE_INT const_offset, const_size;
-  if (TREE_CODE (ctor) == CONSTRUCTOR
-      && offset.is_constant (&const_offset)
-      && size.is_constant (&const_size))
+  if (TREE_CODE (ctor) == CONSTRUCTOR)
     {
+
       if (TREE_CODE (TREE_TYPE (ctor)) == ARRAY_TYPE
 	  || TREE_CODE (TREE_TYPE (ctor)) == VECTOR_TYPE)
-	return fold_array_ctor_reference (type, ctor, const_offset,
-					  const_size, from_decl);
+	return fold_array_ctor_reference (type, ctor, offset, size,
+					  from_decl);
       else
-	return fold_nonarray_ctor_reference (type, ctor, const_offset,
-					     const_size, from_decl);
+	return fold_nonarray_ctor_reference (type, ctor, offset, size,
+					     from_decl);
     }
 
   return NULL_TREE;
@@ -6454,23 +6456,23 @@ fold_const_aggregate_ref_1 (tree t, tree (*valueize) (tree))
       if (TREE_CODE (TREE_OPERAND (t, 1)) == SSA_NAME
 	  && valueize
 	  && (idx = (*valueize) (TREE_OPERAND (t, 1)))
-	  && TREE_CODE (idx) == INTEGER_CST)
+	  && poly_int_tree_p (idx))
 	{
 	  tree low_bound, unit_size;
 
 	  /* If the resulting bit-offset is constant, track it.  */
 	  if ((low_bound = array_ref_low_bound (t),
-	       TREE_CODE (low_bound) == INTEGER_CST)
+	       poly_int_tree_p (low_bound))
 	      && (unit_size = array_ref_element_size (t),
 		  tree_fits_uhwi_p (unit_size)))
 	    {
-	      offset_int woffset
-		= wi::sext (wi::to_offset (idx) - wi::to_offset (low_bound),
+	      poly_offset_int woffset
+		= wi::sext (wi::to_poly_offset (idx)
+			    - wi::to_poly_offset (low_bound),
 			    TYPE_PRECISION (TREE_TYPE (idx)));
 
-	      if (wi::fits_shwi_p (woffset))
+	      if (woffset.to_shwi (&offset))
 		{
-		  offset = woffset.to_shwi ();
 		  /* TODO: This code seems wrong, multiply then check
 		     to see if it fits.  */
 		  offset *= tree_to_uhwi (unit_size);
@@ -7063,7 +7065,7 @@ gimple_build_vector_from_val (gimple_seq *seq, location_t loc, tree type,
 			      tree op)
 {
   if (!TYPE_VECTOR_SUBPARTS (type).is_constant ()
-      && CONSTANT_CLASS_P (op))
+      && !CONSTANT_CLASS_P (op))
     return gimple_build (seq, loc, VEC_DUPLICATE_EXPR, type, op);
 
   tree res, vec = build_vector_from_val (type, op);
diff --git a/gcc/gimplify.c b/gcc/gimplify.c
index 1a2ae47e350..8a5c380027c 100644
--- a/gcc/gimplify.c
+++ b/gcc/gimplify.c
@@ -702,7 +702,7 @@ gimple_add_tmp_var_fn (struct function *fn, tree tmp)
      not be true at this point.  Force the use of a constant upper bound in
      this case.  */
   poly_uint64 size;
-  if (!poly_tree_p (DECL_SIZE_UNIT (tmp), &size))
+  if (!poly_int_tree_p (DECL_SIZE_UNIT (tmp), &size))
     force_constant_size (tmp);
 
   DECL_CONTEXT (tmp) = fn->decl;
@@ -722,7 +722,7 @@ gimple_add_tmp_var (tree tmp)
      not be true at this point.  Force the use of a constant upper bound in
      this case.  */
   poly_uint64 size;
-  if (!poly_tree_p (DECL_SIZE_UNIT (tmp), &size))
+  if (!poly_int_tree_p (DECL_SIZE_UNIT (tmp), &size))
     force_constant_size (tmp);
 
   DECL_CONTEXT (tmp) = current_function_decl;
@@ -3030,7 +3030,7 @@ maybe_with_size_expr (tree *expr_p)
 
   /* If the size isn't known or is a constant, we have nothing to do.  */
   size = TYPE_SIZE_UNIT (type);
-  if (!size || poly_tree_p (size))
+  if (!size || poly_int_tree_p (size))
     return;
 
   /* Otherwise, make a WITH_SIZE_EXPR.  */
@@ -7888,7 +7888,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 		    base = TREE_OPERAND (base, 0);
 		  gcc_assert (base == decl
 			      && (offset == NULL_TREE
-				  || TREE_CODE (offset) == INTEGER_CST));
+				  || poly_int_tree_p (offset)));
 
 		  splay_tree_node n
 		    = splay_tree_lookup (ctx->variables, (splay_tree_key)decl);
@@ -7969,7 +7969,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			n->value |= GOVD_SEEN;
 		      poly_offset_int o1, o2;
 		      if (offset)
-			o1 = wi::to_offset (offset);
+			o1 = wi::to_poly_offset (offset);
 		      else
 			o1 = 0;
 		      if (maybe_nonzero (bitpos))
@@ -8028,7 +8028,7 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 			    if (scp)
 			      continue;
 			    gcc_assert (offset == NULL_TREE
-					|| TREE_CODE (offset) == INTEGER_CST);
+					|| poly_int_tree_p (offset));
 			    tree d1 = OMP_CLAUSE_DECL (*sc);
 			    tree d2 = OMP_CLAUSE_DECL (c);
 			    while (TREE_CODE (d1) == ARRAY_REF)
@@ -8058,15 +8058,13 @@ gimplify_scan_omp_clauses (tree *list_p, gimple_seq *pre_p,
 				break;
 			      }
 			    if (offset2)
-			      o2 = wi::to_offset (offset2);
+			      o2 = wi::to_poly_offset (offset2);
 			    else
 			      o2 = 0;
 			    o2 += bits_to_bytes_round_down (bitpos2);
-			    int order = compare_sizes_for_sort (o1, o2);
-			    if (order < 0
-				|| (order == 0
-				    && compare_sizes_for_sort (bitpos,
-							       bitpos2) < 0))
+			    if (may_lt (o1, o2)
+				|| (must_eq (o1, 2)
+				    && may_lt (bitpos, bitpos2)))
 			      {
 				if (ptr)
 				  scp = sc;
diff --git a/gcc/go/ChangeLog b/gcc/go/ChangeLog
index 3918fa814d7..1c0ef932914 100644
--- a/gcc/go/ChangeLog
+++ b/gcc/go/ChangeLog
@@ -1,3 +1,8 @@
+2017-10-11  Tony Reix  <tony.reix@atos.net>
+
+	* go-system.h (__STDC_FORMAT_MACROS): Define before including any
+	system header files, as is done in ../system.h.
+
 2017-10-05  Ian Lance Taylor  <iant@golang.org>
 
 	* Make-lang.in (GO_OBJS): Add go/names.o.
diff --git a/gcc/go/go-system.h b/gcc/go/go-system.h
index 90185435c1e..b1c67c3cd73 100644
--- a/gcc/go/go-system.h
+++ b/gcc/go/go-system.h
@@ -22,6 +22,12 @@
 
 #include "config.h"
 
+/* Define this so that inttypes.h defines the PRI?64 macros even
+   when compiling with a C++ compiler.  Define it here so in the
+   event inttypes.h gets pulled in by another header it is already
+   defined.  */
+#define __STDC_FORMAT_MACROS
+
 // These must be included before the #poison declarations in system.h.
 
 #include <algorithm>
diff --git a/gcc/go/gofrontend/MERGE b/gcc/go/gofrontend/MERGE
index acb1d9584a3..418e1274fdf 100644
--- a/gcc/go/gofrontend/MERGE
+++ b/gcc/go/gofrontend/MERGE
@@ -1,4 +1,4 @@
-adc6eb826f156d0980f0ad9f9efc5c919ec4905e
+44132970e4b6c1186036bf8eda8982fb6e905d6f
 
 The first line of this file holds the git revision number of the last
 merge done from the gofrontend repository.
diff --git a/gcc/go/gofrontend/import.cc b/gcc/go/gofrontend/import.cc
index 20b077f7f99..2a3ea83ca78 100644
--- a/gcc/go/gofrontend/import.cc
+++ b/gcc/go/gofrontend/import.cc
@@ -756,13 +756,6 @@ Import::read_type()
 
   this->require_c_string(" ");
 
-  bool is_alias = false;
-  if (this->match_c_string("= "))
-    {
-      stream->advance(2);
-      is_alias = true;
-    }
-
   // The package name may follow.  This is the name of the package in
   // the package clause of that package.  The type name will include
   // the pkgpath, which may be different.
@@ -775,6 +768,13 @@ Import::read_type()
       this->require_c_string(" ");
     }
 
+  bool is_alias = false;
+  if (this->match_c_string("= "))
+    {
+      stream->advance(2);
+      is_alias = true;
+    }
+
   // Declare the type in the appropriate package.  If we haven't seen
   // it before, mark it as invisible.  We declare it before we read
   // the actual definition of the type, since the definition may refer
diff --git a/gcc/graphite-isl-ast-to-gimple.c b/gcc/graphite-isl-ast-to-gimple.c
index 4939b2140cb..e7d95e22110 100644
--- a/gcc/graphite-isl-ast-to-gimple.c
+++ b/gcc/graphite-isl-ast-to-gimple.c
@@ -177,6 +177,7 @@ class translate_isl_ast_to_gimple
   tree gcc_expression_from_isl_ast_expr_id (tree type,
 					    __isl_keep isl_ast_expr *expr_id,
 					    ivs_params &ip);
+  widest_int widest_int_from_isl_expr_int (__isl_keep isl_ast_expr *expr);
   tree gcc_expression_from_isl_expr_int (tree type,
 					 __isl_take isl_ast_expr *expr);
   tree gcc_expression_from_isl_expr_op (tree type,
@@ -266,29 +267,46 @@ gcc_expression_from_isl_ast_expr_id (tree type,
   return fold_convert (type, *val);
 }
 
-/* Converts an isl_ast_expr_int expression E to a GCC expression tree of
-   type TYPE.  */
+/* Converts an isl_ast_expr_int expression E to a widest_int.
+   Raises a code generation error when the constant doesn't fit.  */
 
-tree translate_isl_ast_to_gimple::
-gcc_expression_from_isl_expr_int (tree type, __isl_take isl_ast_expr *expr)
+widest_int translate_isl_ast_to_gimple::
+widest_int_from_isl_expr_int (__isl_keep isl_ast_expr *expr)
 {
   gcc_assert (isl_ast_expr_get_type (expr) == isl_ast_expr_int);
   isl_val *val = isl_ast_expr_get_val (expr);
   size_t n = isl_val_n_abs_num_chunks (val, sizeof (HOST_WIDE_INT));
   HOST_WIDE_INT *chunks = XALLOCAVEC (HOST_WIDE_INT, n);
-  tree res;
-  if (isl_val_get_abs_num_chunks (val, sizeof (HOST_WIDE_INT), chunks) == -1)
-    res = NULL_TREE;
-  else
+  if (n > WIDE_INT_MAX_ELTS
+      || isl_val_get_abs_num_chunks (val, sizeof (HOST_WIDE_INT), chunks) == -1)
     {
-      widest_int wi = widest_int::from_array (chunks, n, true);
-      if (isl_val_is_neg (val))
-	wi = -wi;
-      res = wide_int_to_tree (type, wi);
+      isl_val_free (val);
+      set_codegen_error ();
+      return 0;
     }
+  widest_int wi = widest_int::from_array (chunks, n, true);
+  if (isl_val_is_neg (val))
+    wi = -wi;
   isl_val_free (val);
+  return wi;
+}
+
+/* Converts an isl_ast_expr_int expression E to a GCC expression tree of
+   type TYPE.  Raises a code generation error when the constant doesn't fit.  */
+
+tree translate_isl_ast_to_gimple::
+gcc_expression_from_isl_expr_int (tree type, __isl_take isl_ast_expr *expr)
+{
+  widest_int wi = widest_int_from_isl_expr_int (expr);
   isl_ast_expr_free (expr);
-  return res;
+  if (codegen_error_p ())
+    return NULL_TREE;
+  if (wi::min_precision (wi, TYPE_SIGN (type)) > TYPE_PRECISION (type))
+    {
+      set_codegen_error ();
+      return NULL_TREE;
+    }
+  return wide_int_to_tree (type, wi);
 }
 
 /* Converts a binary isl_ast_expr_op expression E to a GCC expression tree of
@@ -297,14 +315,25 @@ gcc_expression_from_isl_expr_int (tree type, __isl_take isl_ast_expr *expr)
 tree translate_isl_ast_to_gimple::
 binary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, ivs_params &ip)
 {
+  enum isl_ast_op_type expr_type = isl_ast_expr_get_op_type (expr);
   isl_ast_expr *arg_expr = isl_ast_expr_get_op_arg (expr, 0);
   tree tree_lhs_expr = gcc_expression_from_isl_expression (type, arg_expr, ip);
   arg_expr = isl_ast_expr_get_op_arg (expr, 1);
-  tree tree_rhs_expr = gcc_expression_from_isl_expression (type, arg_expr, ip);
-
-  enum isl_ast_op_type expr_type = isl_ast_expr_get_op_type (expr);
   isl_ast_expr_free (expr);
 
+  /* From our constraint generation we may get modulo operations that
+     we cannot represent explicitely but that are no-ops for TYPE.
+     Elide those.  */
+  if (expr_type == isl_ast_op_pdiv_r
+      && isl_ast_expr_get_type (arg_expr) == isl_ast_expr_int
+      && (wi::exact_log2 (widest_int_from_isl_expr_int (arg_expr))
+	  >= TYPE_PRECISION (type)))
+    {
+      isl_ast_expr_free (arg_expr);
+      return tree_lhs_expr;
+    }
+
+  tree tree_rhs_expr = gcc_expression_from_isl_expression (type, arg_expr, ip);
   if (codegen_error_p ())
     return NULL_TREE;
 
@@ -320,44 +349,16 @@ binary_op_to_tree (tree type, __isl_take isl_ast_expr *expr, ivs_params &ip)
       return fold_build2 (MULT_EXPR, type, tree_lhs_expr, tree_rhs_expr);
 
     case isl_ast_op_div:
-      /* As isl operates on arbitrary precision numbers, we may end up with
-	 division by 2^64 that is folded to 0.  */
-      if (integer_zerop (tree_rhs_expr))
-	{
-	  set_codegen_error ();
-	  return NULL_TREE;
-	}
       return fold_build2 (EXACT_DIV_EXPR, type, tree_lhs_expr, tree_rhs_expr);
 
     case isl_ast_op_pdiv_q:
-      /* As isl operates on arbitrary precision numbers, we may end up with
-	 division by 2^64 that is folded to 0.  */
-      if (integer_zerop (tree_rhs_expr))
-	{
-	  set_codegen_error ();
-	  return NULL_TREE;
-	}
       return fold_build2 (TRUNC_DIV_EXPR, type, tree_lhs_expr, tree_rhs_expr);
 
     case isl_ast_op_zdiv_r:
     case isl_ast_op_pdiv_r:
-      /* As isl operates on arbitrary precision numbers, we may end up with
-	 division by 2^64 that is folded to 0.  */
-      if (integer_zerop (tree_rhs_expr))
-	{
-	  set_codegen_error ();
-	  return NULL_TREE;
-	}
       return fold_build2 (TRUNC_MOD_EXPR, type, tree_lhs_expr, tree_rhs_expr);
 
     case isl_ast_op_fdiv_q:
-      /* As isl operates on arbitrary precision numbers, we may end up with
-	 division by 2^64 that is folded to 0.  */
-      if (integer_zerop (tree_rhs_expr))
-	{
-	  set_codegen_error ();
-	  return NULL_TREE;
-	}
       return fold_build2 (FLOOR_DIV_EXPR, type, tree_lhs_expr, tree_rhs_expr);
 
     case isl_ast_op_and:
@@ -750,10 +751,8 @@ build_iv_mapping (vec<tree> iv_map, gimple_poly_bb_p gbb,
       if (codegen_error_p ())
 	t = integer_zero_node;
 
-      loop_p old_loop = gbb_loop_at_index (gbb, region, i - 2);
-      /* Record sth only for real loops.  */
-      if (loop_in_sese_p (old_loop, region))
-	iv_map[old_loop->num] = t;
+      loop_p old_loop = gbb_loop_at_index (gbb, region, i - 1);
+      iv_map[old_loop->num] = t;
     }
 }
 
@@ -1571,6 +1570,12 @@ graphite_regenerate_ast_isl (scop_p scop)
       update_ssa (TODO_update_ssa);
       checking_verify_ssa (true, true);
       rewrite_into_loop_closed_ssa (NULL, 0);
+      /* We analyzed evolutions of all SCOPs during SCOP detection
+         which cached evolutions.  Now we've introduced PHIs for
+	 liveouts which causes those cached solutions to be invalid
+	 for code-generation purposes given we'd insert references
+	 to SSA names not dominating their new use.  */
+      scev_reset ();
     }
 
   if (t.codegen_error_p ())
diff --git a/gcc/graphite-scop-detection.c b/gcc/graphite-scop-detection.c
index e133ab55439..f9d69247b0c 100644
--- a/gcc/graphite-scop-detection.c
+++ b/gcc/graphite-scop-detection.c
@@ -254,28 +254,6 @@ dot_cfg ()
   scops.release ();
 }
 
-/* Can all ivs be represented by a signed integer?
-   As isl might generate negative values in its expressions, signed loop ivs
-   are required in the backend.  */
-
-static bool
-loop_ivs_can_be_represented (loop_p loop)
-{
-  unsigned type_long_long = TYPE_PRECISION (long_long_integer_type_node);
-  for (gphi_iterator psi = gsi_start_phis (loop->header); !gsi_end_p (psi);
-       gsi_next (&psi))
-    {
-      gphi *phi = psi.phi ();
-      tree res = PHI_RESULT (phi);
-      tree type = TREE_TYPE (res);
-
-      if (TYPE_UNSIGNED (type) && TYPE_PRECISION (type) >= type_long_long)
-	return false;
-    }
-
-  return true;
-}
-
 /* Returns a COND_EXPR statement when BB has a single predecessor, the
    edge between BB and its predecessor is not a loop exit edge, and
    the last statement of the single predecessor is a COND_EXPR.  */
@@ -403,7 +381,7 @@ public:
 
      Something like "i * n" or "n * m" is not allowed.  */
 
-  static bool graphite_can_represent_scev (tree scev);
+  static bool graphite_can_represent_scev (sese_l scop, tree scev);
 
   /* Return true when EXPR can be represented in the polyhedral model.
 
@@ -822,13 +800,6 @@ scop_detection::harmful_loop_in_region (sese_l scop) const
 	  return true;
 	}
 
-      if (! loop_ivs_can_be_represented (loop))
-	{
-	  DEBUG_PRINT (dp << "[scop-detection-fail] loop_" << loop->num
-		       << "IV cannot be represented.\n");
-	  return true;
-	}
-
       /* Check if all loop nests have at least one data reference.
 	 ???  This check is expensive and loops premature at this point.
 	 If important to retain we can pre-compute this for all innermost
@@ -963,32 +934,24 @@ scop_detection::graphite_can_represent_init (tree e)
    Something like "i * n" or "n * m" is not allowed.  */
 
 bool
-scop_detection::graphite_can_represent_scev (tree scev)
+scop_detection::graphite_can_represent_scev (sese_l scop, tree scev)
 {
   if (chrec_contains_undetermined (scev))
     return false;
 
-  /* We disable the handling of pointer types, because it’s currently not
-     supported by Graphite with the isl AST generator. SSA_NAME nodes are
-     the only nodes, which are disabled in case they are pointers to object
-     types, but this can be changed.  */
-
-  if (POINTER_TYPE_P (TREE_TYPE (scev)) && TREE_CODE (scev) == SSA_NAME)
-    return false;
-
   switch (TREE_CODE (scev))
     {
     case NEGATE_EXPR:
     case BIT_NOT_EXPR:
     CASE_CONVERT:
     case NON_LVALUE_EXPR:
-      return graphite_can_represent_scev (TREE_OPERAND (scev, 0));
+      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0));
 
     case PLUS_EXPR:
     case POINTER_PLUS_EXPR:
     case MINUS_EXPR:
-      return graphite_can_represent_scev (TREE_OPERAND (scev, 0))
-	&& graphite_can_represent_scev (TREE_OPERAND (scev, 1));
+      return graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
+	&& graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
 
     case MULT_EXPR:
       return !CONVERT_EXPR_CODE_P (TREE_CODE (TREE_OPERAND (scev, 0)))
@@ -996,18 +959,20 @@ scop_detection::graphite_can_represent_scev (tree scev)
 	&& !(chrec_contains_symbols (TREE_OPERAND (scev, 0))
 	     && chrec_contains_symbols (TREE_OPERAND (scev, 1)))
 	&& graphite_can_represent_init (scev)
-	&& graphite_can_represent_scev (TREE_OPERAND (scev, 0))
-	&& graphite_can_represent_scev (TREE_OPERAND (scev, 1));
+	&& graphite_can_represent_scev (scop, TREE_OPERAND (scev, 0))
+	&& graphite_can_represent_scev (scop, TREE_OPERAND (scev, 1));
 
     case POLYNOMIAL_CHREC:
       /* Check for constant strides.  With a non constant stride of
 	 'n' we would have a value of 'iv * n'.  Also check that the
 	 initial value can represented: for example 'n * m' cannot be
 	 represented.  */
+      gcc_assert (loop_in_sese_p (get_loop (cfun,
+					    CHREC_VARIABLE (scev)), scop));
       if (!evolution_function_right_is_integer_cst (scev)
 	  || !graphite_can_represent_init (scev))
 	return false;
-      return graphite_can_represent_scev (CHREC_LEFT (scev));
+      return graphite_can_represent_scev (scop, CHREC_LEFT (scev));
 
     default:
       break;
@@ -1031,7 +996,7 @@ scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop,
 					     tree expr)
 {
   tree scev = scalar_evolution_in_region (scop, loop, expr);
-  return graphite_can_represent_scev (scev);
+  return graphite_can_represent_scev (scop, scev);
 }
 
 /* Return true if the data references of STMT can be represented by Graphite.
@@ -1040,12 +1005,15 @@ scop_detection::graphite_can_represent_expr (sese_l scop, loop_p loop,
 bool
 scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt)
 {
-  loop_p nest;
+  edge nest;
   loop_p loop = loop_containing_stmt (stmt);
   if (!loop_in_sese_p (loop, scop))
-    nest = loop;
+    {
+      nest = scop.entry;
+      loop = NULL;
+    }
   else
-    nest = outermost_loop_in_sese (scop, gimple_bb (stmt));
+    nest = loop_preheader_edge (outermost_loop_in_sese (scop, gimple_bb (stmt)));
 
   auto_vec<data_reference_p> drs;
   if (! graphite_find_data_references_in_stmt (nest, loop, stmt, &drs))
@@ -1056,7 +1024,7 @@ scop_detection::stmt_has_simple_data_refs_p (sese_l scop, gimple *stmt)
   FOR_EACH_VEC_ELT (drs, j, dr)
     {
       for (unsigned i = 0; i < DR_NUM_DIMENSIONS (dr); ++i)
-	if (! graphite_can_represent_scev (DR_ACCESS_FN (dr, i)))
+	if (! graphite_can_represent_scev (scop, DR_ACCESS_FN (dr, i)))
 	  return false;
     }
 
@@ -1415,12 +1383,15 @@ try_generate_gimple_bb (scop_p scop, basic_block bb)
   vec<scalar_use> reads = vNULL;
 
   sese_l region = scop->scop_info->region;
-  loop_p nest;
+  edge nest;
   loop_p loop = bb->loop_father;
   if (!loop_in_sese_p (loop, region))
-    nest = loop;
+    {
+      nest = region.entry;
+      loop = NULL;
+    }
   else
-    nest = outermost_loop_in_sese (region, bb);
+    nest = loop_preheader_edge (outermost_loop_in_sese (region, bb));
 
   for (gimple_stmt_iterator gsi = gsi_start_bb (bb); !gsi_end_p (gsi);
        gsi_next (&gsi))
diff --git a/gcc/graphite-sese-to-poly.c b/gcc/graphite-sese-to-poly.c
index 0e6824bbd45..ed6cbeccca1 100644
--- a/gcc/graphite-sese-to-poly.c
+++ b/gcc/graphite-sese-to-poly.c
@@ -86,7 +86,7 @@ extract_affine_chrec (scop_p s, tree e, __isl_take isl_space *space)
   isl_pw_aff *lhs = extract_affine (s, CHREC_LEFT (e), isl_space_copy (space));
   isl_pw_aff *rhs = extract_affine (s, CHREC_RIGHT (e), isl_space_copy (space));
   isl_local_space *ls = isl_local_space_from_space (space);
-  unsigned pos = sese_loop_depth (s->scop_info->region, get_chrec_loop (e));
+  unsigned pos = sese_loop_depth (s->scop_info->region, get_chrec_loop (e)) - 1;
   isl_aff *loop = isl_aff_set_coefficient_si
     (isl_aff_zero_on_domain (ls), isl_dim_in, pos, 1);
   isl_pw_aff *l = isl_pw_aff_from_aff (loop);
@@ -763,10 +763,10 @@ add_loop_constraints (scop_p scop, __isl_take isl_set *domain, loop_p loop,
     return domain;
   const sese_l &region = scop->scop_info->region;
   if (!loop_in_sese_p (loop, region))
-    ;
-  else
-    /* Recursion all the way up to the context loop.  */
-    domain = add_loop_constraints (scop, domain, loop_outer (loop), context);
+    return domain;
+
+  /* Recursion all the way up to the context loop.  */
+  domain = add_loop_constraints (scop, domain, loop_outer (loop), context);
 
   /* Then, build constraints over the loop in post-order: outer to inner.  */
 
@@ -777,21 +777,6 @@ add_loop_constraints (scop_p scop, __isl_take isl_set *domain, loop_p loop,
   domain = add_iter_domain_dimension (domain, loop, scop);
   isl_space *space = isl_set_get_space (domain);
 
-  if (!loop_in_sese_p (loop, region))
-    {
-      /* 0 == loop_i */
-      isl_local_space *ls = isl_local_space_from_space (space);
-      isl_constraint *c = isl_equality_alloc (ls);
-      c = isl_constraint_set_coefficient_si (c, isl_dim_set, loop_index, 1);
-      if (dump_file)
-	{
-	  fprintf (dump_file, "[sese-to-poly] adding constraint to the domain: ");
-	  print_isl_constraint (dump_file, c);
-	}
-      domain = isl_set_add_constraint (domain, c);
-      return domain;
-    }
-
   /* 0 <= loop_i */
   isl_local_space *ls = isl_local_space_from_space (isl_space_copy (space));
   isl_constraint *c = isl_inequality_alloc (ls);
@@ -1066,8 +1051,6 @@ outer_projection_mupa (__isl_take isl_union_set *set, int n)
   return isl_multi_union_pw_aff_from_union_pw_multi_aff (data.res);
 }
 
-static bool schedule_error;
-
 /* Embed SCHEDULE in the constraints of the LOOP domain.  */
 
 static isl_schedule *
@@ -1082,11 +1065,9 @@ add_loop_schedule (__isl_take isl_schedule *schedule, loop_p loop,
     return empty < 0 ? isl_schedule_free (schedule) : schedule;
 
   isl_union_set *domain = isl_schedule_get_domain (schedule);
-  /* We cannot apply an empty domain to pbbs in this loop so fail.
-     ??? Somehow drop pbbs in the loop instead.  */
+  /* We cannot apply an empty domain to pbbs in this loop so return early.  */
   if (isl_union_set_is_empty (domain))
     {
-      schedule_error = true;
       isl_union_set_free (domain);
       return schedule;
     }
@@ -1216,8 +1197,6 @@ build_schedule_loop_nest (scop_p scop, int *index, loop_p context_loop)
 static bool
 build_original_schedule (scop_p scop)
 {
-  schedule_error = false;
-
   int i = 0;
   int n = scop->pbbs.length ();
   while (i < n)
@@ -1232,14 +1211,6 @@ build_original_schedule (scop_p scop)
       scop->original_schedule = add_in_sequence (scop->original_schedule, s);
     }
 
-  if (schedule_error)
-    {
-      if (dump_file)
-	fprintf (dump_file, "[sese-to-poly] failed to build "
-		 "original schedule\n");
-      return false;
-    }
-
   if (dump_file)
     {
       fprintf (dump_file, "[sese-to-poly] original schedule:\n");
diff --git a/gcc/haifa-sched.c b/gcc/haifa-sched.c
index e7014cbb8b3..d6dab57101b 100644
--- a/gcc/haifa-sched.c
+++ b/gcc/haifa-sched.c
@@ -6303,7 +6303,7 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
 {
   int i, pass;
   bool sched_group_found = false;
-  int min_cost_group = 1;
+  int min_cost_group = 0;
 
   if (sched_fusion)
     return;
@@ -6319,8 +6319,8 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
     }
 
   /* Make two passes if there's a SCHED_GROUP_P insn; make sure to handle
-     such an insn first and note its cost, then schedule all other insns
-     for one cycle later.  */
+     such an insn first and note its cost.  If at least one SCHED_GROUP_P insn
+     gets queued, then all other insns get queued for one cycle later.  */
   for (pass = sched_group_found ? 0 : 1; pass < 2; )
     {
       int n = ready.n_ready;
@@ -6333,7 +6333,8 @@ prune_ready_list (state_t temp_state, bool first_cycle_insn_p,
 	  if (DEBUG_INSN_P (insn))
 	    continue;
 
-	  if (sched_group_found && !SCHED_GROUP_P (insn))
+	  if (sched_group_found && !SCHED_GROUP_P (insn)
+	      && ((pass == 0) || (min_cost_group >= 1)))
 	    {
 	      if (pass == 0)
 		continue;
diff --git a/gcc/hooks.c b/gcc/hooks.c
index c0123a8b57a..e69b7a03c2c 100644
--- a/gcc/hooks.c
+++ b/gcc/hooks.c
@@ -259,6 +259,12 @@ hook_uint_void_0 (void)
   return 0;
 }
 
+HOST_WIDE_INT
+hook_hwi_void_0 (void)
+{
+  return 0;
+}
+
 void
 hook_void_tree (tree)
 {
diff --git a/gcc/hooks.h b/gcc/hooks.h
index 5de979ca1b8..a211a637307 100644
--- a/gcc/hooks.h
+++ b/gcc/hooks.h
@@ -98,6 +98,8 @@ extern int hook_int_rtx_bool_0 (rtx, bool);
 extern int hook_int_rtx_mode_as_bool_0 (rtx, machine_mode, addr_space_t,
 					bool);
 
+extern HOST_WIDE_INT hook_hwi_void_0 (void);
+
 extern tree hook_tree_const_tree_null (const_tree);
 extern tree hook_tree_void_null (void);
 
diff --git a/gcc/ifcvt.c b/gcc/ifcvt.c
index 4362d0206b9..47169270d6b 100644
--- a/gcc/ifcvt.c
+++ b/gcc/ifcvt.c
@@ -1724,8 +1724,8 @@ noce_emit_cmove (struct noce_if_info *if_info, rtx x, enum rtx_code code,
     {
       rtx reg_vtrue = SUBREG_REG (vtrue);
       rtx reg_vfalse = SUBREG_REG (vfalse);
-      poly_int64 byte_vtrue = SUBREG_BYTE (vtrue);
-      poly_int64 byte_vfalse = SUBREG_BYTE (vfalse);
+      poly_uint64 byte_vtrue = SUBREG_BYTE (vtrue);
+      poly_uint64 byte_vfalse = SUBREG_BYTE (vfalse);
       rtx promoted_target;
 
       if (GET_MODE (reg_vtrue) != GET_MODE (reg_vfalse)
diff --git a/gcc/incpath.c b/gcc/incpath.c
index 47942e2547d..a2ee69f428c 100644
--- a/gcc/incpath.c
+++ b/gcc/incpath.c
@@ -46,7 +46,7 @@
 
 static const char dir_separator_str[] = { DIR_SEPARATOR, 0 };
 
-static void add_env_var_paths (const char *, int);
+static void add_env_var_paths (const char *, incpath_kind);
 static void add_standard_paths (const char *, const char *, const char *, int);
 static void free_path (struct cpp_dir *, int);
 static void merge_include_chains (const char *, cpp_reader *, int);
@@ -56,8 +56,9 @@ static struct cpp_dir *remove_duplicates (cpp_reader *, struct cpp_dir *,
 					   struct cpp_dir *, int);
 
 /* Include chains heads and tails.  */
-static struct cpp_dir *heads[4];
-static struct cpp_dir *tails[4];
+static struct cpp_dir *heads[INC_MAX];
+static struct cpp_dir *tails[INC_MAX];
+
 static bool quote_ignores_source_dir;
 enum { REASON_QUIET = 0, REASON_NOENT, REASON_DUP, REASON_DUP_SYS };
 
@@ -92,7 +93,7 @@ free_path (struct cpp_dir *path, int reason)
 /* Read ENV_VAR for a PATH_SEPARATOR-separated list of file names; and
    append all the names to the search path CHAIN.  */
 static void
-add_env_var_paths (const char *env_var, int chain)
+add_env_var_paths (const char *env_var, incpath_kind chain)
 {
   char *p, *q, *path;
 
@@ -116,7 +117,7 @@ add_env_var_paths (const char *env_var, int chain)
 	  path[q - p] = '\0';
 	}
 
-      add_path (path, chain, chain == SYSTEM, false);
+      add_path (path, chain, chain == INC_SYSTEM, false);
     }
 }
 
@@ -159,7 +160,7 @@ add_standard_paths (const char *sysroot, const char *iprefix,
 		      str = reconcat (str, str, dir_separator_str,
 				      imultiarch, NULL);
 		    }
-		  add_path (str, SYSTEM, p->cxx_aware, false);
+		  add_path (str, INC_SYSTEM, p->cxx_aware, false);
 		}
 	    }
 	}
@@ -225,7 +226,7 @@ add_standard_paths (const char *sysroot, const char *iprefix,
 	      str = reconcat (str, str, dir_separator_str, imultiarch, NULL);
 	    }
 
-	  add_path (str, SYSTEM, p->cxx_aware, false);
+	  add_path (str, INC_SYSTEM, p->cxx_aware, false);
 	}
     }
 }
@@ -349,29 +350,32 @@ merge_include_chains (const char *sysroot, cpp_reader *pfile, int verbose)
   /* Add the sysroot to user-supplied paths starting with "=".  */
   if (sysroot)
     {
-      add_sysroot_to_chain (sysroot, QUOTE);
-      add_sysroot_to_chain (sysroot, BRACKET);
-      add_sysroot_to_chain (sysroot, SYSTEM);
-      add_sysroot_to_chain (sysroot, AFTER);
+      add_sysroot_to_chain (sysroot, INC_QUOTE);
+      add_sysroot_to_chain (sysroot, INC_BRACKET);
+      add_sysroot_to_chain (sysroot, INC_SYSTEM);
+      add_sysroot_to_chain (sysroot, INC_AFTER);
     }
 
   /* Join the SYSTEM and AFTER chains.  Remove duplicates in the
      resulting SYSTEM chain.  */
-  if (heads[SYSTEM])
-    tails[SYSTEM]->next = heads[AFTER];
+  if (heads[INC_SYSTEM])
+    tails[INC_SYSTEM]->next = heads[INC_AFTER];
   else
-    heads[SYSTEM] = heads[AFTER];
-  heads[SYSTEM] = remove_duplicates (pfile, heads[SYSTEM], 0, 0, verbose);
+    heads[INC_SYSTEM] = heads[INC_AFTER];
+  heads[INC_SYSTEM]
+    = remove_duplicates (pfile, heads[INC_SYSTEM], 0, 0, verbose);
 
   /* Remove duplicates from BRACKET that are in itself or SYSTEM, and
      join it to SYSTEM.  */
-  heads[BRACKET] = remove_duplicates (pfile, heads[BRACKET], heads[SYSTEM],
-				      heads[SYSTEM], verbose);
+  heads[INC_BRACKET]
+    = remove_duplicates (pfile, heads[INC_BRACKET], heads[INC_SYSTEM],
+			 heads[INC_SYSTEM], verbose);
 
   /* Remove duplicates from QUOTE that are in itself or SYSTEM, and
      join it to BRACKET.  */
-  heads[QUOTE] = remove_duplicates (pfile, heads[QUOTE], heads[SYSTEM],
-				    heads[BRACKET], verbose);
+  heads[INC_QUOTE]
+    = remove_duplicates (pfile, heads[INC_QUOTE], heads[INC_SYSTEM],
+			 heads[INC_BRACKET], verbose);
 
   /* If verbose, print the list of dirs to search.  */
   if (verbose)
@@ -379,9 +383,9 @@ merge_include_chains (const char *sysroot, cpp_reader *pfile, int verbose)
       struct cpp_dir *p;
 
       fprintf (stderr, _("#include \"...\" search starts here:\n"));
-      for (p = heads[QUOTE];; p = p->next)
+      for (p = heads[INC_QUOTE];; p = p->next)
 	{
-	  if (p == heads[BRACKET])
+	  if (p == heads[INC_BRACKET])
 	    fprintf (stderr, _("#include <...> search starts here:\n"));
 	  if (!p)
 	    break;
@@ -398,14 +402,14 @@ merge_include_chains (const char *sysroot, cpp_reader *pfile, int verbose)
 void
 split_quote_chain (void)
 {
-  if (heads[QUOTE])
-    free_path (heads[QUOTE], REASON_QUIET);
-  if (tails[QUOTE])
-    free_path (tails[QUOTE], REASON_QUIET);
-  heads[QUOTE] = heads[BRACKET];
-  tails[QUOTE] = tails[BRACKET];
-  heads[BRACKET] = NULL;
-  tails[BRACKET] = NULL;
+  if (heads[INC_QUOTE])
+    free_path (heads[INC_QUOTE], REASON_QUIET);
+  if (tails[INC_QUOTE])
+    free_path (tails[INC_QUOTE], REASON_QUIET);
+  heads[INC_QUOTE] = heads[INC_BRACKET];
+  tails[INC_QUOTE] = tails[INC_BRACKET];
+  heads[INC_BRACKET] = NULL;
+  tails[INC_BRACKET] = NULL;
   /* This is NOT redundant.  */
   quote_ignores_source_dir = true;
 }
@@ -413,7 +417,7 @@ split_quote_chain (void)
 /* Add P to the chain specified by CHAIN.  */
 
 void
-add_cpp_dir_path (cpp_dir *p, int chain)
+add_cpp_dir_path (cpp_dir *p, incpath_kind chain)
 {
   if (tails[chain])
     tails[chain]->next = p;
@@ -425,7 +429,7 @@ add_cpp_dir_path (cpp_dir *p, int chain)
 /* Add PATH to the include chain CHAIN. PATH must be malloc-ed and
    NUL-terminated.  */
 void
-add_path (char *path, int chain, int cxx_aware, bool user_supplied_p)
+add_path (char *path, incpath_kind chain, int cxx_aware, bool user_supplied_p)
 {
   cpp_dir *p;
 
@@ -450,7 +454,7 @@ add_path (char *path, int chain, int cxx_aware, bool user_supplied_p)
 #ifndef INO_T_EQ
   p->canonical_name = lrealpath (path);
 #endif
-  if (chain == SYSTEM || chain == AFTER)
+  if (chain == INC_SYSTEM || chain == INC_AFTER)
     p->sysp = 1 + !cxx_aware;
   else
     p->sysp = 0;
@@ -480,8 +484,8 @@ register_include_chains (cpp_reader *pfile, const char *sysroot,
 
   /* CPATH and language-dependent environment variables may add to the
      include chain.  */
-  add_env_var_paths ("CPATH", BRACKET);
-  add_env_var_paths (lang_env_vars[idx], SYSTEM);
+  add_env_var_paths ("CPATH", INC_BRACKET);
+  add_env_var_paths (lang_env_vars[idx], INC_SYSTEM);
 
   target_c_incpath.extra_pre_includes (sysroot, iprefix, stdinc);
 
@@ -493,14 +497,14 @@ register_include_chains (cpp_reader *pfile, const char *sysroot,
 
   merge_include_chains (sysroot, pfile, verbose);
 
-  cpp_set_include_chains (pfile, heads[QUOTE], heads[BRACKET],
+  cpp_set_include_chains (pfile, heads[INC_QUOTE], heads[INC_BRACKET],
 			  quote_ignores_source_dir);
 }
 
 /* Return the current chain of cpp dirs.  */
 
 struct cpp_dir *
-get_added_cpp_dirs (int chain)
+get_added_cpp_dirs (incpath_kind chain)
 {
   return heads[chain];
 }
diff --git a/gcc/incpath.h b/gcc/incpath.h
index 39a29cdd47e..32c3dceb78b 100644
--- a/gcc/incpath.h
+++ b/gcc/incpath.h
@@ -18,13 +18,22 @@
 #ifndef GCC_INCPATH_H
 #define GCC_INCPATH_H
 
+/* Various fragments of include path.  */
+enum incpath_kind {
+  INC_QUOTE = 0, /* include "foo" */
+  INC_BRACKET,   /* include <foo> */
+  INC_SYSTEM,    /* sysinclude */
+  INC_AFTER,	/* post-sysinclude.  */
+  INC_MAX
+};
+
 extern void split_quote_chain (void);
-extern void add_path (char *, int, int, bool);
+extern void add_path (char *, incpath_kind, int, bool);
 extern void register_include_chains (cpp_reader *, const char *,
 				     const char *, const char *,
 				     int, int, int);
-extern void add_cpp_dir_path (struct cpp_dir *, int);
-extern struct cpp_dir *get_added_cpp_dirs (int);
+extern void add_cpp_dir_path (struct cpp_dir *, incpath_kind);
+extern struct cpp_dir *get_added_cpp_dirs (incpath_kind);
 
 struct target_c_incpath_s {
   /* Do extra includes processing.  STDINC is false iff -nostdinc was given.  */
@@ -34,6 +43,4 @@ struct target_c_incpath_s {
 
 extern struct target_c_incpath_s target_c_incpath;
 
-enum { QUOTE = 0, BRACKET, SYSTEM, AFTER };
-
 #endif /* GCC_INCPATH_H */
diff --git a/gcc/internal-fn.c b/gcc/internal-fn.c
index eb3ac418b83..4264bb81fe1 100644
--- a/gcc/internal-fn.c
+++ b/gcc/internal-fn.c
@@ -76,6 +76,7 @@ init_internal_fns ()
 
 /* Create static initializers for the information returned by
    direct_internal_fn.  */
+
 #define NOT_VECTORIZABLE false, false, 0
 #define VECTORIZABLE true, false, 0
 #define VECTORIZABLE_COND true, true, 0
@@ -91,8 +92,8 @@ init_internal_fns ()
 #define scatter_store_direct { 3, 3, NOT_VECTORIZABLE }
 #define unary_direct { 0, 0, VECTORIZABLE }
 #define binary_direct { 0, 0, VECTORIZABLE }
-#define ternary_direct { 0, 0, VECTORIZABLE }
 #define cond_binary_direct { 1, 1, VECTORIZABLE_COND }
+#define ternary_direct { 0, 0, VECTORIZABLE }
 #define cond_ternary_direct { 1, 1, VECTORIZABLE_COND }
 #define while_direct { 0, 2, NOT_VECTORIZABLE }
 #define clastb_direct { 2, 2, NOT_VECTORIZABLE }
diff --git a/gcc/internal-fn.h b/gcc/internal-fn.h
index e25287d5ea1..fff1cb4204a 100644
--- a/gcc/internal-fn.h
+++ b/gcc/internal-fn.h
@@ -230,7 +230,8 @@ extern void expand_internal_call (gcall *);
 extern void expand_internal_call (internal_fn, gcall *);
 extern void expand_PHI (internal_fn, gcall *);
 
-extern bool vectorized_internal_fn_supported_p (internal_fn, machine_mode);
 extern internal_fn get_gather_scatter_internal_fn (bool, tree, tree, bool);
 
+extern bool vectorized_internal_fn_supported_p (internal_fn, machine_mode);
+
 #endif
diff --git a/gcc/ipa-utils.h b/gcc/ipa-utils.h
index f061c84c8a9..2affbd66d13 100644
--- a/gcc/ipa-utils.h
+++ b/gcc/ipa-utils.h
@@ -217,11 +217,11 @@ type_in_anonymous_namespace_p (const_tree t)
     {
       /* C++ FE uses magic <anon> as assembler names of anonymous types.
  	 verify that this match with type_in_anonymous_namespace_p.  */
-      gcc_checking_assert (!in_lto_p || !DECL_ASSEMBLER_NAME_SET_P (t)
-			   || !strcmp
-				 ("<anon>",
-				  IDENTIFIER_POINTER
-				     (DECL_ASSEMBLER_NAME (TYPE_NAME (t)))));
+      gcc_checking_assert (!in_lto_p
+			   || !DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t))
+			   || !strcmp ("<anon>",
+				       IDENTIFIER_POINTER
+				       (DECL_ASSEMBLER_NAME (TYPE_NAME (t)))));
       return true;
     }
   return false;
@@ -245,14 +245,13 @@ odr_type_p (const_tree t)
   if (type_in_anonymous_namespace_p (t))
     return true;
 
-  if (TYPE_NAME (t) && TREE_CODE (TYPE_NAME (t)) == TYPE_DECL
-      && DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t)))
+  if (TYPE_NAME (t) && DECL_ASSEMBLER_NAME_SET_P (TYPE_NAME (t)))
     {
       /* C++ FE uses magic <anon> as assembler names of anonymous types.
  	 verify that this match with type_in_anonymous_namespace_p.  */
       gcc_checking_assert (strcmp ("<anon>",
-				      IDENTIFIER_POINTER
-					(DECL_ASSEMBLER_NAME (TYPE_NAME (t)))));
+				   IDENTIFIER_POINTER
+				   (DECL_ASSEMBLER_NAME (TYPE_NAME (t)))));
       return true;
     }
   return false;
diff --git a/gcc/ira-conflicts.c b/gcc/ira-conflicts.c
index 32e653dc8ee..61c03e2ddfd 100644
--- a/gcc/ira-conflicts.c
+++ b/gcc/ira-conflicts.c
@@ -228,6 +228,8 @@ go_through_subreg (rtx x, int *offset)
 				   SUBREG_BYTE (x), GET_MODE (x));
   else if (!can_div_trunc_p (SUBREG_BYTE (x),
 			     REGMODE_NATURAL_SIZE (GET_MODE (x)), offset))
+    /* Checked by validate_subreg.  We must know at compile time which
+       inner hard registers are being accessed.  */
     gcc_unreachable ();
   return reg;
 }
diff --git a/gcc/ira.c b/gcc/ira.c
index 5d3086cfa14..4345f7595d0 100644
--- a/gcc/ira.c
+++ b/gcc/ira.c
@@ -4048,16 +4048,10 @@ static bool
 get_subreg_tracking_sizes (rtx x, HOST_WIDE_INT *outer_size,
 			   HOST_WIDE_INT *inner_size, HOST_WIDE_INT *start)
 {
-  HOST_WIDE_INT tmp_outer_size, tmp_inner_size, tmp_start;
   rtx reg = regno_reg_rtx[REGNO (SUBREG_REG (x))];
-  if (!GET_MODE_SIZE (GET_MODE (reg)).is_constant (&tmp_inner_size)
-      || !GET_MODE_SIZE (GET_MODE (x)).is_constant (&tmp_outer_size)
-      || !SUBREG_BYTE (x).is_constant (&tmp_start))
-    return false;
-  *outer_size = tmp_outer_size;
-  *inner_size = tmp_inner_size;
-  *start = tmp_start;
-  return true;
+  return (GET_MODE_SIZE (GET_MODE (x)).is_constant (outer_size)
+	  && GET_MODE_SIZE (GET_MODE (reg)).is_constant (inner_size)
+	  && SUBREG_BYTE (x).is_constant (start));
 }
 
 /* Init LIVE_SUBREGS[ALLOCNUM] and LIVE_SUBREGS_USED[ALLOCNUM] for
@@ -4182,12 +4176,7 @@ build_insn_chain (void)
 			   size is not known; in those cases we need to be
 			   conservative and treat the definition as a partial
 			   definition of the full register rather than a full
-			   definition of a specific part of the register.
-
-			   ??? We might be able to do better for polynomial
-			   sizes, e.g. for pairs of variable-length vectors
-			   where only one half of the pair is being
-			   accessed.  */
+			   definition of a specific part of the register.  */
 			if (GET_CODE (reg) == SUBREG
 			    && !DF_REF_FLAGS_IS_SET (def, DF_REF_ZERO_EXTRACT)
 			    && get_subreg_tracking_sizes (reg, &outer_size,
diff --git a/gcc/lower-subreg.c b/gcc/lower-subreg.c
index e36d6765d4d..dd853d799bc 100644
--- a/gcc/lower-subreg.c
+++ b/gcc/lower-subreg.c
@@ -610,13 +610,13 @@ decompose_register (unsigned int regno)
 /* Get a SUBREG of a CONCATN.  */
 
 static rtx
-simplify_subreg_concatn (machine_mode outermode, rtx op, poly_int64 orig_byte)
+simplify_subreg_concatn (machine_mode outermode, rtx op, poly_uint64 orig_byte)
 {
   unsigned int outer_size, outer_words, inner_size, inner_words;
   machine_mode innermode, partmode;
   rtx part;
   unsigned int final_offset;
-  HOST_WIDE_INT byte;
+  unsigned int byte;
 
   innermode = GET_MODE (op);
   if (!interesting_mode_p (outermode, &outer_size, &outer_words)
diff --git a/gcc/lra-constraints.c b/gcc/lra-constraints.c
index e7f3a3d9c5b..ff192733955 100644
--- a/gcc/lra-constraints.c
+++ b/gcc/lra-constraints.c
@@ -1672,9 +1672,7 @@ simplify_operand_subreg (int nop, machine_mode reg_mode)
     {
       enum reg_class rclass;
 
-      subreg_shape shape (innermode, SUBREG_BYTE (operand), mode);
-      if (REG_P (reg)
-	  || hard_reg_set_empty_p (simplifiable_subregs (shape)))
+      if (REG_P (reg))
 	/* There is a big probability that we will get the same class
 	   for the new pseudo and we will get the same insn which
 	   means infinite looping.  So spill the new pseudo.  */
@@ -3103,7 +3101,8 @@ static bool
 equiv_address_substitution (struct address_info *ad)
 {
   rtx base_reg, new_base_reg, index_reg, new_index_reg, *base_term, *index_term;
-  HOST_WIDE_INT disp, scale;
+  poly_int64 disp;
+  HOST_WIDE_INT scale;
   bool change_p;
 
   base_term = strip_subreg (ad->base_term);
@@ -3134,6 +3133,7 @@ equiv_address_substitution (struct address_info *ad)
     }
   if (base_reg != new_base_reg)
     {
+      poly_int64 offset;
       if (REG_P (new_base_reg))
 	{
 	  *base_term = new_base_reg;
@@ -3141,10 +3141,10 @@ equiv_address_substitution (struct address_info *ad)
 	}
       else if (GET_CODE (new_base_reg) == PLUS
 	       && REG_P (XEXP (new_base_reg, 0))
-	       && CONST_INT_P (XEXP (new_base_reg, 1))
+	       && poly_int_rtx_p (XEXP (new_base_reg, 1), &offset)
 	       && can_add_disp_p (ad))
 	{
-	  disp += INTVAL (XEXP (new_base_reg, 1));
+	  disp += offset;
 	  *base_term = XEXP (new_base_reg, 0);
 	  change_p = true;
 	}
@@ -3153,6 +3153,7 @@ equiv_address_substitution (struct address_info *ad)
     }
   if (index_reg != new_index_reg)
     {
+      poly_int64 offset;
       if (REG_P (new_index_reg))
 	{
 	  *index_term = new_index_reg;
@@ -3160,16 +3161,16 @@ equiv_address_substitution (struct address_info *ad)
 	}
       else if (GET_CODE (new_index_reg) == PLUS
 	       && REG_P (XEXP (new_index_reg, 0))
-	       && CONST_INT_P (XEXP (new_index_reg, 1))
+	       && poly_int_rtx_p (XEXP (new_index_reg, 1), &offset)
 	       && can_add_disp_p (ad)
 	       && (scale = get_index_scale (ad)))
 	{
-	  disp += INTVAL (XEXP (new_index_reg, 1)) * scale;
+	  disp += offset * scale;
 	  *index_term = XEXP (new_index_reg, 0);
 	  change_p = true;
 	}
     }
-  if (disp != 0)
+  if (maybe_nonzero (disp))
     {
       if (ad->disp != NULL)
 	*ad->disp = plus_constant (GET_MODE (*ad->inner), *ad->disp, disp);
@@ -3420,7 +3421,7 @@ process_address_1 (int nop, bool check_only_p,
 	  gcc_assert (ad.disp == ad.disp_term);
 	  poly_int64 orig_offset;
 	  rtx offset1, offset2;
-	  if (poly_int_const_p (*ad.disp, &orig_offset)
+	  if (poly_int_rtx_p (*ad.disp, &orig_offset)
 	      && targetm.legitimize_address_displacement (&offset1, &offset2,
 							  orig_offset,
 							  ad.mode))
@@ -3641,9 +3642,10 @@ emit_inc (enum reg_class new_rclass, rtx in, rtx value, poly_int64 inc_amount)
 	 register.  */
       if (plus_p)
 	{
-	  if (CONST_INT_P (inc))
+	  poly_int64 offset;
+	  if (poly_int_rtx_p (inc, &offset))
 	    emit_insn (gen_add2_insn (result,
-				      gen_int_mode (-INTVAL (inc),
+				      gen_int_mode (-offset,
 						    GET_MODE (result))));
 	  else
 	    emit_insn (gen_sub2_insn (result, inc));
diff --git a/gcc/lra-eliminations.c b/gcc/lra-eliminations.c
index 1e48dfad4b1..b958190c2c7 100644
--- a/gcc/lra-eliminations.c
+++ b/gcc/lra-eliminations.c
@@ -197,6 +197,7 @@ static rtx
 form_sum (rtx x, rtx y)
 {
   machine_mode mode = GET_MODE (x);
+  poly_int64 offset;
 
   if (mode == VOIDmode)
     mode = GET_MODE (y);
@@ -204,11 +205,10 @@ form_sum (rtx x, rtx y)
   if (mode == VOIDmode)
     mode = Pmode;
 
-  poly_int64 c;
-  if (poly_int_const_p (x, &c))
-    return plus_constant (mode, y, c);
-  else if (poly_int_const_p (y, &c))
-    return plus_constant (mode, x, c);
+  if (poly_int_rtx_p (x, &offset))
+    return plus_constant (mode, y, offset);
+  else if (poly_int_rtx_p (y, &offset))
+    return plus_constant (mode, x, offset);
   else if (CONSTANT_P (x))
     std::swap (x, y);
 
@@ -406,7 +406,7 @@ lra_eliminate_regs_1 (rtx_insn *insn, rtx x, machine_mode mem_mode,
 			  ? ep->offset - ep->previous_offset : ep->offset);
 	      if (full_p && insn != NULL_RTX && ep->to_rtx == stack_pointer_rtx)
 		offset -= lra_get_insn_recog_data (insn)->sp_offset;
-	      if (poly_int_const_p (XEXP (x, 1), &curr_offset)
+	      if (poly_int_rtx_p (XEXP (x, 1), &curr_offset)
 		  && must_eq (curr_offset, -offset))
 		return to;
 	      else
@@ -732,7 +732,7 @@ mark_not_eliminable (rtx x, machine_mode mem_mode)
   struct lra_elim_table *ep;
   int i, j;
   const char *fmt;
-  poly_int64 offset;
+  poly_int64 offset = 0;
 
   switch (code)
     {
@@ -746,21 +746,21 @@ mark_not_eliminable (rtx x, machine_mode mem_mode)
 	  && ((code != PRE_MODIFY && code != POST_MODIFY)
 	      || (GET_CODE (XEXP (x, 1)) == PLUS
 		  && XEXP (x, 0) == XEXP (XEXP (x, 1), 0)
-		  && CONST_INT_P (XEXP (XEXP (x, 1), 1)))))
+		  && poly_int_rtx_p (XEXP (XEXP (x, 1), 1), &offset))))
 	{
 	  poly_int64 size = GET_MODE_SIZE (mem_mode);
 	  
 #ifdef PUSH_ROUNDING
 	  /* If more bytes than MEM_MODE are pushed, account for
 	     them.  */
-	  size = PUSH_ROUNDING (MACRO_INT (size));
+	  size = PUSH_ROUNDING (size);
 #endif
 	  if (code == PRE_DEC || code == POST_DEC)
 	    curr_sp_change -= size;
 	  else if (code == PRE_INC || code == POST_INC)
 	    curr_sp_change += size;
 	  else if (code == PRE_MODIFY || code == POST_MODIFY)
-	    curr_sp_change += INTVAL (XEXP (XEXP (x, 1), 1));
+	    curr_sp_change += offset;
 	}
       else if (REG_P (XEXP (x, 0))
 	       && REGNO (XEXP (x, 0)) >= FIRST_PSEUDO_REGISTER)
@@ -810,7 +810,7 @@ mark_not_eliminable (rtx x, machine_mode mem_mode)
       if (SET_DEST (x) == stack_pointer_rtx
 	  && GET_CODE (SET_SRC (x)) == PLUS
 	  && XEXP (SET_SRC (x), 0) == SET_DEST (x)
-	  && poly_int_const_p (XEXP (SET_SRC (x), 1), &offset))
+	  && poly_int_rtx_p (XEXP (SET_SRC (x), 1), &offset))
 	{
 	  curr_sp_change += offset;
 	  return;
@@ -881,7 +881,7 @@ remove_reg_equal_offset_note (rtx_insn *insn, rtx what, poly_int64 *offset_out)
     if (REG_NOTE_KIND (link) == REG_EQUAL
 	&& GET_CODE (XEXP (link, 0)) == PLUS
 	&& XEXP (XEXP (link, 0), 0) == what
-	&& poly_int_const_p (XEXP (XEXP (link, 0), 1), offset_out))
+	&& poly_int_rtx_p (XEXP (XEXP (link, 0), 1), offset_out))
       {
 	*link_loc = XEXP (link, 1);
 	return true;
@@ -1004,7 +1004,7 @@ eliminate_regs_in_insn (rtx_insn *insn, bool replace_p, bool first_p,
       if (GET_CODE (SET_SRC (old_set)) == PLUS)
 	plus_src = SET_SRC (old_set);
       /* First see if the source is of the form (plus (...) CST).  */
-      if (plus_src && poly_int_const_p (XEXP (plus_src, 1), &offset))
+      if (plus_src && poly_int_rtx_p (XEXP (plus_src, 1), &offset))
 	plus_cst_src = plus_src;
       /* Check that the first operand of the PLUS is a hard reg or
 	 the lowpart subreg of one.  */
@@ -1357,13 +1357,13 @@ init_elimination (void)
 	    if (NONDEBUG_INSN_P (insn))
 	      {
 		mark_not_eliminable (PATTERN (insn), VOIDmode);
-		if (may_ne (curr_sp_change, 0)
+		if (maybe_nonzero (curr_sp_change)
 		    && find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
 		  stop_to_sp_elimination_p = true;
 	      }
 	  }
       if (! frame_pointer_needed
-	  && (may_ne (curr_sp_change, 0) || stop_to_sp_elimination_p)
+	  && (maybe_nonzero (curr_sp_change) || stop_to_sp_elimination_p)
 	  && bb->succs && bb->succs->length () != 0)
 	for (ep = reg_eliminate; ep < &reg_eliminate[NUM_ELIMINABLE_REGS]; ep++)
 	  if (ep->to == STACK_POINTER_REGNUM)
diff --git a/gcc/lto/ChangeLog b/gcc/lto/ChangeLog
index fef0f9cc96d..3e6b00bc487 100644
--- a/gcc/lto/ChangeLog
+++ b/gcc/lto/ChangeLog
@@ -1,3 +1,9 @@
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* lto.c (mentions_vars_p_decl_with_vis): Use
+	DECL_ASSEMBLER_NAME_RAW.
+	(lto_fixup_prevailing_decls): Likewise.
+
 2017-10-10  Richard Sandiford  <richard.sandiford@linaro.org>
 
 	* lto.c (compare_tree_sccs_1): Use wi::to_wide when
diff --git a/gcc/lto/lto.c b/gcc/lto/lto.c
index 1b9b2496f82..688c5c8eea3 100644
--- a/gcc/lto/lto.c
+++ b/gcc/lto/lto.c
@@ -591,7 +591,7 @@ mentions_vars_p_decl_with_vis (tree t)
     return true;
 
   /* Accessor macro has side-effects, use field-name here. */
-  CHECK_NO_VAR (t->decl_with_vis.assembler_name);
+  CHECK_NO_VAR (DECL_ASSEMBLER_NAME_RAW (t));
   return false;
 }
 
@@ -2557,7 +2557,7 @@ lto_fixup_prevailing_decls (tree t)
 	}
       if (CODE_CONTAINS_STRUCT (code, TS_DECL_WITH_VIS))
 	{
-	  LTO_NO_PREVAIL (t->decl_with_vis.assembler_name);
+	  LTO_NO_PREVAIL (DECL_ASSEMBLER_NAME_RAW (t));
 	}
       if (CODE_CONTAINS_STRUCT (code, TS_DECL_NON_COMMON))
 	{
diff --git a/gcc/machmode.h b/gcc/machmode.h
index c773026c8f3..04c1e877e1c 100644
--- a/gcc/machmode.h
+++ b/gcc/machmode.h
@@ -78,7 +78,7 @@ struct mode_traits<machine_mode>
 
 /* Always treat machine modes as fixed-size while compiling code specific
    to targets that have no variable-size modes.  */
-#if defined (TARGET_C_FILE) && NUM_POLY_INT_COEFFS == 1
+#if defined (IN_TARGET_CODE) && NUM_POLY_INT_COEFFS == 1
 #define ONLY_FIXED_SIZE_MODES 1
 #else
 #define ONLY_FIXED_SIZE_MODES 0
diff --git a/gcc/match.pd b/gcc/match.pd
index fc17d8a1510..11c04dba77d 100644
--- a/gcc/match.pd
+++ b/gcc/match.pd
@@ -1291,6 +1291,44 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))))
    (op @1 @0))))
 
+/* X + Y < Y is the same as X < 0 when there is no overflow.  */
+(for op (lt le gt ge)
+ (simplify
+  (op:c (plus:c@2 @0 @1) @1)
+  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+       && (CONSTANT_CLASS_P (@0) || single_use (@2)))
+   (op @0 { build_zero_cst (TREE_TYPE (@0)); }))))
+/* For equality, this is also true with wrapping overflow.  */
+(for op (eq ne)
+ (simplify
+  (op:c (nop_convert@3 (plus:c@2 @0 (convert1? @1))) (convert2? @1))
+  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+	   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
+       && (CONSTANT_CLASS_P (@0) || (single_use (@2) && single_use (@3)))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@2))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@1)))
+   (op @0 { build_zero_cst (TREE_TYPE (@0)); })))
+ (simplify
+  (op:c (nop_convert@3 (pointer_plus@2 (convert1? @0) @1)) (convert2? @0))
+  (if (tree_nop_conversion_p (TREE_TYPE (@2), TREE_TYPE (@0))
+       && tree_nop_conversion_p (TREE_TYPE (@3), TREE_TYPE (@0))
+       && (CONSTANT_CLASS_P (@1) || (single_use (@2) && single_use (@3))))
+   (op @1 { build_zero_cst (TREE_TYPE (@1)); }))))
+
+/* X - Y < X is the same as Y > 0 when there is no overflow.
+   For equality, this is also true with wrapping overflow.  */
+(for op (simple_comparison)
+ (simplify
+  (op:c @0 (minus@2 @0 @1))
+  (if (ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
+       && (TYPE_OVERFLOW_UNDEFINED (TREE_TYPE (@0))
+	   || ((op == EQ_EXPR || op == NE_EXPR)
+	       && TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0))))
+       && (CONSTANT_CLASS_P (@1) || single_use (@2)))
+   (op @1 { build_zero_cst (TREE_TYPE (@1)); }))))
+
 /* Transform:
  * (X / Y) == 0 -> X < Y if X, Y are unsigned.
  * (X / Y) != 0 -> X >= Y, if X, Y are unsigned.
@@ -1533,22 +1571,14 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
 	       CONSTANT_CLASS_P@2)
      /* If one of the types wraps, use that one.  */
      (if (!ANY_INTEGRAL_TYPE_P (type) || TYPE_OVERFLOW_WRAPS (type))
-      (with { tree cst = fold_unary (VIEW_CONVERT_EXPR, type, @1);
-              if (cst)
-	        cst = const_binop (outer_op == PLUS_EXPR
-				   ? inner_op : neg_inner_op,
-				   type, @2, cst); }
-       (if (cst)
-	(outer_op (view_convert @0) { cst; })))
+      (if (outer_op == PLUS_EXPR)
+       (plus (view_convert @0) (inner_op @2 (view_convert @1)))
+       (minus (view_convert @0) (neg_inner_op @2 (view_convert @1))))
       (if (!ANY_INTEGRAL_TYPE_P (TREE_TYPE (@0))
 	   || TYPE_OVERFLOW_WRAPS (TREE_TYPE (@0)))
-       (with { tree cst = fold_unary (VIEW_CONVERT_EXPR, TREE_TYPE (@0), @2);
-	       if (cst)
-		 cst = const_binop (outer_op == PLUS_EXPR
-				    ? inner_op : neg_inner_op,
-				    TREE_TYPE (@0), cst, @1); }
-	(if (cst)
-	 (view_convert (outer_op @0 { cst; }))))
+       (if (outer_op == PLUS_EXPR)
+	(view_convert (plus @0 (inner_op (view_convert @2) @1)))
+	(view_convert (minus @0 (neg_inner_op (view_convert @2) @1))))
        /* If the constant operation overflows we cannot do the transform
 	  directly as we would introduce undefined overflow, for example
 	  with (a - 1) + INT_MIN.  */
@@ -3146,7 +3176,8 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
   (op (abs @0) zerop@1)
   (op @0 @1)))
 
-/* From fold_sign_changed_comparison and fold_widened_comparison.  */
+/* From fold_sign_changed_comparison and fold_widened_comparison.
+   FIXME: the lack of symmetry is disturbing.  */
 (for cmp (simple_comparison)
  (simplify
   (cmp (convert@0 @00) (convert?@1 @10))
@@ -3159,11 +3190,11 @@ DEFINE_INT_AND_FLOAT_ROUND_FN (RINT)
        && single_use (@0))
    (if (TYPE_PRECISION (TREE_TYPE (@00)) == TYPE_PRECISION (TREE_TYPE (@0))
 	&& (TREE_CODE (@10) == INTEGER_CST
-	    || (@1 != @10 && types_match (TREE_TYPE (@10), TREE_TYPE (@00))))
+	    || @1 != @10)
 	&& (TYPE_UNSIGNED (TREE_TYPE (@00)) == TYPE_UNSIGNED (TREE_TYPE (@0))
 	    || cmp == NE_EXPR
 	    || cmp == EQ_EXPR)
-	&& (POINTER_TYPE_P (TREE_TYPE (@00)) == POINTER_TYPE_P (TREE_TYPE (@0))))
+	&& !POINTER_TYPE_P (TREE_TYPE (@00)))
     /* ???  The special-casing of INTEGER_CST conversion was in the original
        code and here to avoid a spurious overflow flag on the resulting
        constant which fold_convert produces.  */
diff --git a/gcc/omp-expand.c b/gcc/omp-expand.c
index 49e82bac0ac..130814e68f2 100644
--- a/gcc/omp-expand.c
+++ b/gcc/omp-expand.c
@@ -4611,7 +4611,7 @@ expand_omp_simd (struct omp_region *region, struct omp_for_data *fd)
     {
       poly_uint64 val;
       safelen = OMP_CLAUSE_SAFELEN_EXPR (safelen);
-      if (!poly_tree_p (safelen, &val))
+      if (!poly_int_tree_p (safelen, &val))
 	safelen_int = 0;
       else
 	safelen_int = MIN (constant_lower_bound (val), INT_MAX);
diff --git a/gcc/omp-general.c b/gcc/omp-general.c
index c5f3f940d65..fcc1148246f 100644
--- a/gcc/omp-general.c
+++ b/gcc/omp-general.c
@@ -437,10 +437,10 @@ omp_max_vf (void)
   targetm.vectorize.autovectorize_vector_sizes (&sizes);
   if (!sizes.is_empty ())
     {
-      poly_uint64 vs = 0;
+      poly_uint64 vf = 0;
       for (unsigned int i = 0; i < sizes.length (); ++i)
-	vs = ordered_max (vs, sizes[i]);
-      return vs;
+	vf = ordered_max (vf, sizes[i]);
+      return vf;
     }
 
   machine_mode vqimode = targetm.vectorize.preferred_simd_mode (QImode);
diff --git a/gcc/omp-low.c b/gcc/omp-low.c
index ef665cc2199..ec838c5a175 100644
--- a/gcc/omp-low.c
+++ b/gcc/omp-low.c
@@ -3516,11 +3516,11 @@ lower_rec_simd_input_clauses (tree new_var, omp_context *ctx,
 	  if (c)
 	    {
 	      poly_uint64 safe_len;
-	      if (!poly_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
-		  || maybe_zero (safe_len))
+	      if (!poly_int_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
+		  || may_lt (safe_len, 1U))
 		sctx->max_vf = 1;
-	      else if (may_lt (safe_len, sctx->max_vf))
-		sctx->max_vf = safe_len;
+	      else
+		sctx->max_vf = lower_bound (sctx->max_vf, safe_len);
 	    }
 	}
       if (may_gt (sctx->max_vf, 1U))
@@ -4676,7 +4676,7 @@ lower_rec_input_clauses (tree clauses, gimple_seq *ilist, gimple_seq *dlist,
 				OMP_CLAUSE_SAFELEN);
       poly_uint64 safe_len;
       if (c == NULL_TREE
-	  || (poly_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
+	  || (poly_int_tree_p (OMP_CLAUSE_SAFELEN_EXPR (c), &safe_len)
 	      && may_gt (safe_len, sctx.max_vf)))
 	{
 	  c = build_omp_clause (UNKNOWN_LOCATION, OMP_CLAUSE_SAFELEN);
@@ -4934,7 +4934,7 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
   gimple_seq after_join = NULL;
   tree init_code = NULL_TREE, fini_code = NULL_TREE,
     setup_code = NULL_TREE, teardown_code = NULL_TREE;
-  HOST_WIDE_INT offset = 0;
+  unsigned offset = 0;
 
   for (tree c = clauses; c; c = OMP_CLAUSE_CHAIN (c))
     if (OMP_CLAUSE_CODE (c) == OMP_CLAUSE_REDUCTION)
@@ -5073,13 +5073,14 @@ lower_oacc_reductions (location_t loc, tree clauses, tree level, bool inner,
 	  v1 = v2 = v3 = var;
 
 	/* Determine position in reduction buffer, which may be used
-	   by target.  */
-	machine_mode mode = TYPE_MODE (TREE_TYPE (var));
+	   by target.  The parser has ensured that this is not a
+	   variable-sized type.  */
+	fixed_size_mode mode
+	  = as_a <fixed_size_mode> (TYPE_MODE (TREE_TYPE (var)));
 	unsigned align = GET_MODE_ALIGNMENT (mode) /  BITS_PER_UNIT;
 	offset = (offset + align - 1) & ~(align - 1);
 	tree off = build_int_cst (sizetype, offset);
-	/* The offset must be a compile-time constant.  */
-	offset += GET_MODE_SIZE (mode).to_constant ();
+	offset += GET_MODE_SIZE (mode);
 
 	if (!init_code)
 	  {
diff --git a/gcc/opts.c b/gcc/opts.c
index 5aa5d066dbe..adf3d89851d 100644
--- a/gcc/opts.c
+++ b/gcc/opts.c
@@ -1700,11 +1700,10 @@ parse_sanitizer_options (const char *p, location_t loc, int scode,
 }
 
 /* Parse string values of no_sanitize attribute passed in VALUE.
-   Values are separated with comma.  Wrong argument is stored to
-   WRONG_ARGUMENT variable.  */
+   Values are separated with comma.  */
 
 unsigned int
-parse_no_sanitize_attribute (char *value, char **wrong_argument)
+parse_no_sanitize_attribute (char *value)
 {
   unsigned int flags = 0;
   unsigned int i;
@@ -1722,7 +1721,8 @@ parse_no_sanitize_attribute (char *value, char **wrong_argument)
 	  }
 
       if (sanitizer_opts[i].name == NULL)
-	*wrong_argument = q;
+	warning (OPT_Wattributes,
+		 "%<%s%> attribute directive ignored", q);
 
       q = strtok (NULL, ",");
     }
diff --git a/gcc/opts.h b/gcc/opts.h
index 2774e2c8b40..10938615725 100644
--- a/gcc/opts.h
+++ b/gcc/opts.h
@@ -390,7 +390,7 @@ extern void handle_common_deferred_options (void);
 unsigned int parse_sanitizer_options (const char *, location_t, int,
 				      unsigned int, int, bool);
 
-unsigned int parse_no_sanitize_attribute (char *value, char **wrong_argument);
+unsigned int parse_no_sanitize_attribute (char *value);
 extern bool common_handle_option (struct gcc_options *opts,
 				  struct gcc_options *opts_set,
 				  const struct cl_decoded_option *decoded,
diff --git a/gcc/passes.c b/gcc/passes.c
index 2c9add84c1d..65568e052fc 100644
--- a/gcc/passes.c
+++ b/gcc/passes.c
@@ -197,7 +197,9 @@ rest_of_decl_compilation (tree decl,
 
   /* Can't defer this, because it needs to happen before any
      later function definitions are processed.  */
-  if (DECL_ASSEMBLER_NAME_SET_P (decl) && DECL_REGISTER (decl))
+  if (HAS_DECL_ASSEMBLER_NAME_P (decl)
+      && DECL_ASSEMBLER_NAME_SET_P (decl)
+      && DECL_REGISTER (decl))
     make_decl_rtl (decl);
 
   /* Forward declarations for nested functions are not "external",
diff --git a/gcc/poly-int.h b/gcc/poly-int.h
index d925dac5226..61cf3213db8 100644
--- a/gcc/poly-int.h
+++ b/gcc/poly-int.h
@@ -20,7 +20,11 @@ along with GCC; see the file COPYING3.  If not see
 /* This file provides a representation of sizes and offsets whose exact
    values depend on certain runtime properties.  The motivating example
    is the Arm SVE ISA, in which the number of vector elements is only
-   known at runtime.  See doc/poly-int.texi for more details.  */
+   known at runtime.  See doc/poly-int.texi for more details.
+
+   Tests for poly-int.h are located in testsuite/gcc.dg/plugin,
+   since they are too expensive (in terms of binary size) to be
+   included as selftests.  */
 
 #ifndef HAVE_POLY_INT_H
 #define HAVE_POLY_INT_H
@@ -40,15 +44,15 @@ template<unsigned int N, typename T> class poly_int;
      Any fixed-width integer should be promoted to wide_int if possible
      and lead to an error otherwise.
 
-   - poly_Coeff_traits<T>::int_type is the type to which an integer
+   - poly_coeff_traits<T>::int_type is the type to which an integer
      literal should be cast before comparing it with T.
 
    - poly_coeff_traits<T>::precision is the number of bits that T can hold.
 
    - poly_coeff_traits<T>::signedness is:
-	0 if T1 is unsigned
-	1 if T1 is signed
-       -1 if T1 has no inherent sign (as for wide_int).
+	0 if T is unsigned
+	1 if T is signed
+       -1 if T has no inherent sign (as for wide_int).
 
    - poly_coeff_traits<T>::max_value, if defined, is the maximum value of T.
 
@@ -87,9 +91,9 @@ struct poly_coeff_traits<T, wi::CONST_PRECISION>
 {
   typedef WI_UNARY_RESULT (T) result;
   typedef int int_type;
+  /* These types are always signed.  */
   static const int signedness = 1;
   static const int precision = wi::int_traits<T>::precision;
-  /* These types are always signed.  */
   static const int rank = precision * 2 / CHAR_BIT;
 };
 
@@ -281,12 +285,12 @@ struct poly_result<T1, T2, 2>
 
 /* Enforce that T1 is non-polynomial and provide the result type
    for a binary operation on T1 and poly_int<N, C2>.  */
-#define CONST_POLY_RESULT(N, C1, C2) poly_int<N, CONST_POLY_COEFF (C1, C2)>
+#define CONST_POLY_RESULT(N, T1, C2) poly_int<N, CONST_POLY_COEFF (T1, C2)>
 
 /* Enforce that T1 and T2 are non-polynomial and provide the result type
    for a binary operation on T1 and T2.  */
-#define CONST_CONST_RESULT(N, C1, C2) \
-  POLY_POLY_COEFF (typename if_nonpoly<C1>::t, typename if_nonpoly<C2>::t)
+#define CONST_CONST_RESULT(N, T1, T2) \
+  POLY_POLY_COEFF (typename if_nonpoly<T1>::t, typename if_nonpoly<T2>::t)
 
 /* The type to which a coefficient of type C1 should be cast before
    using it in a binary operation with a coefficient of type C2.  */
@@ -323,11 +327,7 @@ struct poly_result<T1, T2, 2>
    : (void) ((RES).coeffs[I].~C (), new (&(RES).coeffs[I]) C (VALUE)))
 
 /* A base POD class for polynomial integers.  The polynomial has N
-   coefficients of type C.
-
-   Most of these functions are ALWAYS_INLINE to speed up compilers
-   built at -O0.  The functions are heavily used and not interesting
-   as function calls even in debug builds.  */
+   coefficients of type C.  */
 template<unsigned int N, typename C>
 class poly_int_pod
 {
@@ -379,7 +379,7 @@ public:
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int_pod<N, C>&
+inline poly_int_pod<N, C>&
 poly_int_pod<N, C>::operator = (const poly_int_pod<N, Ca> &a)
 {
   POLY_SET_COEFF (C, *this, 0, a.coeffs[0]);
@@ -391,7 +391,7 @@ poly_int_pod<N, C>::operator = (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
 poly_int_pod<N, C>::operator = (const Ca &a)
 {
   POLY_SET_COEFF (C, *this, 0, a);
@@ -403,7 +403,7 @@ poly_int_pod<N, C>::operator = (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int_pod<N, C>&
+inline poly_int_pod<N, C>&
 poly_int_pod<N, C>::operator += (const poly_int_pod<N, Ca> &a)
 {
   this->coeffs[0] += a.coeffs[0];
@@ -415,7 +415,7 @@ poly_int_pod<N, C>::operator += (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
 poly_int_pod<N, C>::operator += (const Ca &a)
 {
   this->coeffs[0] += a;
@@ -424,7 +424,7 @@ poly_int_pod<N, C>::operator += (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int_pod<N, C>&
+inline poly_int_pod<N, C>&
 poly_int_pod<N, C>::operator -= (const poly_int_pod<N, Ca> &a)
 {
   this->coeffs[0] -= a.coeffs[0];
@@ -436,7 +436,7 @@ poly_int_pod<N, C>::operator -= (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
 poly_int_pod<N, C>::operator -= (const Ca &a)
 {
   this->coeffs[0] -= a;
@@ -445,7 +445,7 @@ poly_int_pod<N, C>::operator -= (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int_pod<N, C> >::t &
 poly_int_pod<N, C>::operator *= (const Ca &a)
 {
   this->coeffs[0] *= a;
@@ -456,7 +456,7 @@ poly_int_pod<N, C>::operator *= (const Ca &a)
 }
 
 template<unsigned int N, typename C>
-ALWAYS_INLINE poly_int_pod<N, C>&
+inline poly_int_pod<N, C>&
 poly_int_pod<N, C>::operator <<= (unsigned int a)
 {
   POLY_SET_COEFF (C, *this, 0, this->coeffs[0] << a);
@@ -469,7 +469,7 @@ poly_int_pod<N, C>::operator <<= (unsigned int a)
 /* Return true if the polynomial value is a compile-time constant.  */
 
 template<unsigned int N, typename C>
-ALWAYS_INLINE bool
+inline bool
 poly_int_pod<N, C>::is_constant () const
 {
   if (N >= 2)
@@ -484,7 +484,7 @@ poly_int_pod<N, C>::is_constant () const
 
 template<unsigned int N, typename C>
 template<typename T>
-ALWAYS_INLINE typename if_lossless<T, C, bool>::t
+inline typename if_lossless<T, C, bool>::t
 poly_int_pod<N, C>::is_constant (T *const_value) const
 {
   if (is_constant ())
@@ -502,14 +502,14 @@ poly_int_pod<N, C>::is_constant (T *const_value) const
    explaining why we know the value is constant in that context.  */
 
 template<unsigned int N, typename C>
-ALWAYS_INLINE C
+inline C
 poly_int_pod<N, C>::to_constant () const
 {
   gcc_checking_assert (is_constant ());
   return this->coeffs[0];
 }
 
-/* Convert X to a wide-int-based polynomial in which each coefficient
+/* Convert X to a wide_int-based polynomial in which each coefficient
    has BITSIZE bits.  If X's coefficients are smaller than BITSIZE,
    extend them according to SGN.  */
 
@@ -525,7 +525,7 @@ poly_int_pod<N, C>::from (const poly_int_pod<N, Ca> &a,
   return r;
 }
 
-/* Convert X to a fixed-wide-int-based polynomial, extending according
+/* Convert X to a fixed_wide_int-based polynomial, extending according
    to SGN.  */
 
 template<unsigned int N, typename C>
@@ -539,9 +539,9 @@ poly_int_pod<N, C>::from (const poly_int_pod<N, Ca> &a, signop sgn)
   return r;
 }
 
-/* Return true if the coefficients of this wide-int-based polynomial can
-   be represented as signed HOST_WIDE_INTs without loss of precision.
-   Store the HOST_WIDE_INT representation in *R if so.  */
+/* Return true if the coefficients of this generic_wide_int-based
+   polynomial can be represented as signed HOST_WIDE_INTs without loss
+   of precision.  Store the HOST_WIDE_INT representation in *R if so.  */
 
 template<unsigned int N, typename C>
 inline bool
@@ -555,9 +555,10 @@ poly_int_pod<N, C>::to_shwi (poly_int_pod<N, HOST_WIDE_INT> *r) const
   return true;
 }
 
-/* Return true if the coefficients of this wide-int-based polynomial can
-   be represented as unsigned HOST_WIDE_INTs without loss of precision.
-   Store the unsigned HOST_WIDE_INT representation in *R if so.  */
+/* Return true if the coefficients of this generic_wide_int-based
+   polynomial can be represented as unsigned HOST_WIDE_INTs without
+   loss of precision.  Store the unsigned HOST_WIDE_INT representation
+   in *R if so.  */
 
 template<unsigned int N, typename C>
 inline bool
@@ -571,7 +572,7 @@ poly_int_pod<N, C>::to_uhwi (poly_int_pod<N, unsigned HOST_WIDE_INT> *r) const
   return true;
 }
 
-/* Force a wide-int based constant to HOST_WIDE_INT precision,
+/* Force a generic_wide_int-based constant to HOST_WIDE_INT precision,
    truncating if necessary.  */
 
 template<unsigned int N, typename C>
@@ -584,7 +585,7 @@ poly_int_pod<N, C>::force_shwi () const
   return r;
 }
 
-/* Force a wide-int based constant to unsigned HOST_WIDE_INT precision,
+/* Force a generic_wide_int-based constant to unsigned HOST_WIDE_INT precision,
    truncating if necessary.  */
 
 template<unsigned int N, typename C>
@@ -601,7 +602,7 @@ poly_int_pod<N, C>::force_uhwi () const
 /* Provide a conversion operator to constants.  */
 
 template<unsigned int N, typename C>
-ALWAYS_INLINE
+inline
 poly_int_pod<N, C>::operator C () const
 {
   gcc_checking_assert (this->is_constant ());
@@ -615,7 +616,7 @@ template<unsigned int N, typename C>
 class poly_int : public poly_int_pod<N, C>
 {
 public:
-  ALWAYS_INLINE poly_int () {}
+  poly_int () {}
 
   template<typename Ca>
   poly_int (const poly_int<N, Ca> &);
@@ -689,7 +690,7 @@ poly_int<N, C>::poly_int (const C0 &c0, const C1 &c1)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int<N, C>&
+inline poly_int<N, C>&
 poly_int<N, C>::operator = (const poly_int_pod<N, Ca> &a)
 {
   this->coeffs[0] = a.coeffs[0];
@@ -701,7 +702,7 @@ poly_int<N, C>::operator = (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int<N, C> >::t &
 poly_int<N, C>::operator = (const Ca &a)
 {
   this->coeffs[0] = a;
@@ -713,7 +714,7 @@ poly_int<N, C>::operator = (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int<N, C>&
+inline poly_int<N, C>&
 poly_int<N, C>::operator += (const poly_int_pod<N, Ca> &a)
 {
   this->coeffs[0] += a.coeffs[0];
@@ -725,7 +726,7 @@ poly_int<N, C>::operator += (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int<N, C> >::t &
 poly_int<N, C>::operator += (const Ca &a)
 {
   this->coeffs[0] += a;
@@ -734,7 +735,7 @@ poly_int<N, C>::operator += (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE poly_int<N, C>&
+inline poly_int<N, C>&
 poly_int<N, C>::operator -= (const poly_int_pod<N, Ca> &a)
 {
   this->coeffs[0] -= a.coeffs[0];
@@ -746,7 +747,7 @@ poly_int<N, C>::operator -= (const poly_int_pod<N, Ca> &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int<N, C> >::t &
 poly_int<N, C>::operator -= (const Ca &a)
 {
   this->coeffs[0] -= a;
@@ -755,7 +756,7 @@ poly_int<N, C>::operator -= (const Ca &a)
 
 template<unsigned int N, typename C>
 template<typename Ca>
-ALWAYS_INLINE typename if_nonpoly<Ca, poly_int<N, C> >::t &
+inline typename if_nonpoly<Ca, poly_int<N, C> >::t &
 poly_int<N, C>::operator *= (const Ca &a)
 {
   this->coeffs[0] *= a;
@@ -766,7 +767,7 @@ poly_int<N, C>::operator *= (const Ca &a)
 }
 
 template<unsigned int N, typename C>
-ALWAYS_INLINE poly_int<N, C>&
+inline poly_int<N, C>&
 poly_int<N, C>::operator <<= (unsigned int a)
 {
   this->coeffs[0] = this->coeffs[0] << a;
@@ -776,10 +777,31 @@ poly_int<N, C>::operator <<= (unsigned int a)
   return *this;
 }
 
+/* Return true if every coefficient of A is in the inclusive range [B, C].  */
+
+template<typename Ca, typename Cb, typename Cc>
+inline typename if_nonpoly<Ca, bool>::t
+coeffs_in_range_p (const Ca &a, const Cb &b, const Cc &c)
+{
+  return a >= b && a <= c;
+}
+
+template<unsigned int N, typename Ca, typename Cb, typename Cc>
+inline typename if_nonpoly<Ca, bool>::t
+coeffs_in_range_p (const poly_int_pod<N, Ca> &a, const Cb &b, const Cc &c)
+{
+  for (unsigned int i = 0; i < N; i++)
+    if (a.coeffs[i] < b || a.coeffs[i] > c)
+      return false;
+  return true;
+}
+
 namespace wi {
+/* Poly version of wi::shwi, with the same interface.  */
+
 template<unsigned int N>
 inline poly_int<N, hwi_with_prec>
-shwi (const poly_int<N, HOST_WIDE_INT> &a, unsigned int precision)
+shwi (const poly_int_pod<N, HOST_WIDE_INT> &a, unsigned int precision)
 {
   poly_int<N, hwi_with_prec> r;
   for (unsigned int i = 0; i < N; i++)
@@ -787,9 +809,11 @@ shwi (const poly_int<N, HOST_WIDE_INT> &a, unsigned int precision)
   return r;
 }
 
+/* Poly version of wi::uhwi, with the same interface.  */
+
 template<unsigned int N>
 inline poly_int<N, hwi_with_prec>
-uhwi (const poly_int<N, unsigned HOST_WIDE_INT> &a, unsigned int precision)
+uhwi (const poly_int_pod<N, unsigned HOST_WIDE_INT> &a, unsigned int precision)
 {
   poly_int<N, hwi_with_prec> r;
   for (unsigned int i = 0; i < N; i++)
@@ -810,7 +834,7 @@ sext (const poly_int_pod<N, Ca> &a, unsigned int precision)
   return r;
 }
 
-/* Poly version of wi::uext, with the same interface.  */
+/* Poly version of wi::zext, with the same interface.  */
 
 template<unsigned int N, typename Ca>
 inline POLY_POLY_RESULT (N, Ca, Ca)
@@ -825,7 +849,7 @@ zext (const poly_int_pod<N, Ca> &a, unsigned int precision)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE POLY_POLY_RESULT (N, Ca, Cb)
+inline POLY_POLY_RESULT (N, Ca, Cb)
 operator + (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -839,7 +863,7 @@ operator + (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE POLY_CONST_RESULT (N, Ca, Cb)
+inline POLY_CONST_RESULT (N, Ca, Cb)
 operator + (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -853,7 +877,7 @@ operator + (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE CONST_POLY_RESULT (N, Ca, Cb)
+inline CONST_POLY_RESULT (N, Ca, Cb)
 operator + (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   typedef POLY_CAST (Cb, Ca) NCb;
@@ -926,7 +950,7 @@ add (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b,
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE POLY_POLY_RESULT (N, Ca, Cb)
+inline POLY_POLY_RESULT (N, Ca, Cb)
 operator - (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -940,7 +964,7 @@ operator - (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE POLY_CONST_RESULT (N, Ca, Cb)
+inline POLY_CONST_RESULT (N, Ca, Cb)
 operator - (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -954,7 +978,7 @@ operator - (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE CONST_POLY_RESULT (N, Ca, Cb)
+inline CONST_POLY_RESULT (N, Ca, Cb)
 operator - (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   typedef POLY_CAST (Cb, Ca) NCb;
@@ -982,7 +1006,7 @@ sub (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
+inline poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
 sub (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   typedef WI_BINARY_RESULT (Ca, Cb) C;
@@ -995,7 +1019,7 @@ sub (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
+inline poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
 sub (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   typedef WI_BINARY_RESULT (Ca, Cb) C;
@@ -1027,7 +1051,7 @@ sub (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b,
 }
 
 template<unsigned int N, typename Ca>
-ALWAYS_INLINE POLY_POLY_RESULT (N, Ca, Ca)
+inline POLY_POLY_RESULT (N, Ca, Ca)
 operator - (const poly_int_pod<N, Ca> &a)
 {
   typedef POLY_CAST (Ca, Ca) NCa;
@@ -1071,8 +1095,17 @@ neg (const poly_int_pod<N, Ca> &a, bool *overflow)
 }
 }
 
+template<unsigned int N, typename Ca>
+inline POLY_POLY_RESULT (N, Ca, Ca)
+operator ~ (const poly_int_pod<N, Ca> &a)
+{
+  if (N >= 2)
+    return -1 - a;
+  return ~a.coeffs[0];
+}
+
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE POLY_CONST_RESULT (N, Ca, Cb)
+inline POLY_CONST_RESULT (N, Ca, Cb)
 operator * (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -1086,7 +1119,7 @@ operator * (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE CONST_POLY_RESULT (N, Ca, Cb)
+inline CONST_POLY_RESULT (N, Ca, Cb)
 operator * (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   typedef POLY_CAST (Ca, Cb) NCa;
@@ -1114,7 +1147,7 @@ mul (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
+inline poly_int<N, WI_BINARY_RESULT (Ca, Cb)>
 mul (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   typedef WI_BINARY_RESULT (Ca, Cb) C;
@@ -1142,9 +1175,9 @@ mul (const poly_int_pod<N, Ca> &a, const Cb &b,
 }
 }
 
-template<unsigned int N, typename Ca>
-ALWAYS_INLINE POLY_POLY_RESULT (N, Ca, Ca)
-operator << (const poly_int_pod<N, Ca> &a, unsigned int b)
+template<unsigned int N, typename Ca, typename Cb>
+inline POLY_POLY_RESULT (N, Ca, Ca)
+operator << (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   typedef POLY_CAST (Ca, Ca) NCa;
   typedef POLY_POLY_COEFF (Ca, Ca) C;
@@ -1156,6 +1189,21 @@ operator << (const poly_int_pod<N, Ca> &a, unsigned int b)
   return r;
 }
 
+namespace wi {
+/* Poly version of wi::lshift, with the same interface.  */
+
+template<unsigned int N, typename Ca, typename Cb>
+inline poly_int<N, WI_BINARY_RESULT (Ca, Ca)>
+lshift (const poly_int_pod<N, Ca> &a, const Cb &b)
+{
+  typedef WI_BINARY_RESULT (Ca, Ca) C;
+  poly_int<N, C> r;
+  for (unsigned int i = 0; i < N; i++)
+    POLY_SET_COEFF (C, r, i, wi::lshift (a.coeffs[i], b));
+  return r;
+}
+}
+
 /* Return true if a0 + a1 * x might equal b0 + b1 * x for some nonnegative
    integer x.  */
 
@@ -1171,8 +1219,9 @@ may_eq_2 (const Ca &a0, const Ca &a1, const Cb &b0, const Cb &b1)
        We need to test whether that's a valid value of x.
        (b0 - a0) and (a1 - b1) must not have opposite signs
        and the result must be integral.  */
-    return ((a1 < b1 ? b0 <= a0 : b0 >= a0)
-	    && (b0 - a0) % (a1 - b1) == 0);
+    return (a1 < b1
+	    ? b0 <= a0 && (a0 - b0) % (b1 - a1) == 0
+	    : b0 >= a0 && (b0 - a0) % (a1 - b1) == 0);
   return a0 == b0;
 }
 
@@ -1189,18 +1238,17 @@ may_eq_2 (const Ca &a0, const Ca &a1, const Cb &b)
 
        We need to test whether that's a valid value of x.
        (b - a0) and a1 must not have opposite signs and the
-       result must be integral.  For the latter test we use
-       "a0 - b" rather than "b - a0" in order to cope with
-       cases in which a0 is a wide_int.  */
-    return ((a1 < 0 ? b <= a0 : b >= a0)
-	    && (a0 - b) % a1 == 0);
+       result must be integral.  */
+    return (a1 < 0
+	    ? b <= a0 && (a0 - b) % a1 == 0
+	    : b >= a0 && (b - a0) % a1 == 0);
   return a0 == b;
 }
 
 /* Return true if A might equal B for some indeterminate values.  */
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE bool
+inline bool
 may_eq (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   STATIC_ASSERT (N <= 2);
@@ -1210,7 +1258,7 @@ may_eq (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Cb, bool>::t
+inline typename if_nonpoly<Cb, bool>::t
 may_eq (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   STATIC_ASSERT (N <= 2);
@@ -1220,7 +1268,7 @@ may_eq (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Ca, bool>::t
+inline typename if_nonpoly<Ca, bool>::t
 may_eq (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   STATIC_ASSERT (N <= 2);
@@ -1230,7 +1278,7 @@ may_eq (const Ca &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly2<Ca, Cb, bool>::t
+inline typename if_nonpoly2<Ca, Cb, bool>::t
 may_eq (const Ca &a, const Cb &b)
 {
   return a == b;
@@ -1239,7 +1287,7 @@ may_eq (const Ca &a, const Cb &b)
 /* Return true if A might not equal B for some indeterminate values.  */
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE bool
+inline bool
 may_ne (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1250,7 +1298,7 @@ may_ne (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Cb, bool>::t
+inline typename if_nonpoly<Cb, bool>::t
 may_ne (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   if (N >= 2)
@@ -1261,7 +1309,7 @@ may_ne (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Ca, bool>::t
+inline typename if_nonpoly<Ca, bool>::t
 may_ne (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1272,7 +1320,7 @@ may_ne (const Ca &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly2<Ca, Cb, bool>::t
+inline typename if_nonpoly2<Ca, Cb, bool>::t
 may_ne (const Ca &a, const Cb &b)
 {
   return a != b;
@@ -1287,7 +1335,7 @@ may_ne (const Ca &a, const Cb &b)
 /* Return true if A is known to be zero.  */
 
 template<typename T>
-ALWAYS_INLINE bool
+inline bool
 known_zero (const T &a)
 {
   typedef POLY_INT_TYPE (T) int_type;
@@ -1297,7 +1345,7 @@ known_zero (const T &a)
 /* Return true if A is known to be nonzero.  */
 
 template<typename T>
-ALWAYS_INLINE bool
+inline bool
 known_nonzero (const T &a)
 {
   typedef POLY_INT_TYPE (T) int_type;
@@ -1313,7 +1361,7 @@ known_nonzero (const T &a)
 /* Return true if A is known to be equal to 1.  */
 
 template<typename T>
-ALWAYS_INLINE bool
+inline bool
 known_one (const T &a)
 {
   typedef POLY_INT_TYPE (T) int_type;
@@ -1323,7 +1371,7 @@ known_one (const T &a)
 /* Return true if A is known to be all ones.  */
 
 template<typename T>
-ALWAYS_INLINE bool
+inline bool
 known_all_ones (const T &a)
 {
   typedef POLY_INT_TYPE (T) int_type;
@@ -1334,7 +1382,7 @@ known_all_ones (const T &a)
    indeterminate values.  */
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE bool
+inline bool
 may_le (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1345,7 +1393,7 @@ may_le (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Cb, bool>::t
+inline typename if_nonpoly<Cb, bool>::t
 may_le (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   if (N >= 2)
@@ -1356,7 +1404,7 @@ may_le (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Ca, bool>::t
+inline typename if_nonpoly<Ca, bool>::t
 may_le (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1367,7 +1415,7 @@ may_le (const Ca &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly2<Ca, Cb, bool>::t
+inline typename if_nonpoly2<Ca, Cb, bool>::t
 may_le (const Ca &a, const Cb &b)
 {
   return a <= b;
@@ -1376,7 +1424,7 @@ may_le (const Ca &a, const Cb &b)
 /* Return true if A might be less than B for some indeterminate values.  */
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE bool
+inline bool
 may_lt (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1387,7 +1435,7 @@ may_lt (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Cb, bool>::t
+inline typename if_nonpoly<Cb, bool>::t
 may_lt (const poly_int_pod<N, Ca> &a, const Cb &b)
 {
   if (N >= 2)
@@ -1398,7 +1446,7 @@ may_lt (const poly_int_pod<N, Ca> &a, const Cb &b)
 }
 
 template<unsigned int N, typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly<Ca, bool>::t
+inline typename if_nonpoly<Ca, bool>::t
 may_lt (const Ca &a, const poly_int_pod<N, Cb> &b)
 {
   if (N >= 2)
@@ -1409,7 +1457,7 @@ may_lt (const Ca &a, const poly_int_pod<N, Cb> &b)
 }
 
 template<typename Ca, typename Cb>
-ALWAYS_INLINE typename if_nonpoly2<Ca, Cb, bool>::t
+inline typename if_nonpoly2<Ca, Cb, bool>::t
 may_lt (const Ca &a, const Cb &b)
 {
   return a < b;
@@ -1602,9 +1650,9 @@ lower_bound (const Ca &a, const Cb &b)
   return a < b ? a : b;
 }
 
-/* Return a value that is known to be no less than A and B, both of
-   which are known to be nonnegative.  This will be the least upper
-   bound for some indeterminate values but not necessarily for all.  */
+/* Return a value that is known to be no less than A and B.  This will
+   be the least upper bound for some indeterminate values but not
+   necessarily for all.  */
 
 template<unsigned int N, typename Ca, typename Cb>
 inline POLY_CONST_RESULT (N, Ca, Cb)
@@ -1781,7 +1829,7 @@ can_align_down (const poly_int_pod<N, Ca> &value, Cb align,
   return true;
 }
 
-/* Return true if we can align A and B to the smallest multiples of
+/* Return true if we can align A and B up to the smallest multiples of
    ALIGN that are >= A and B respectively, and if doing so gives the
    same value.  */
 
@@ -1798,7 +1846,7 @@ known_equal_after_align_up (const poly_int_pod<N, Ca> &a,
 	  && must_eq (aligned_a, aligned_b));
 }
 
-/* Return true if we can align A and B to the largest multiples of
+/* Return true if we can align A and B down to the largest multiples of
    ALIGN that are <= A and B respectively, and if doing so gives the
    same value.  */
 
@@ -2185,8 +2233,9 @@ exact_div (const poly_int_pod<N, Ca> &a, const poly_int_pod<N, Cb> &b)
   C r = NCa (a.coeffs[0]) / NCb (b.coeffs[0]);
   for (unsigned int i = 1; i < N; ++i)
     gcc_checking_assert (b.coeffs[i] == int_type (0)
-			 || (a.coeffs[i] % b.coeffs[i] == 0
-			     && NCa (a.coeffs[i]) / NCb (b.coeffs[i]) == r));
+			 ? a.coeffs[i] == int_type (0)
+			 : (a.coeffs[i] % b.coeffs[i] == 0
+			    && NCa (a.coeffs[i]) / NCb (b.coeffs[i]) == r));
 
   return r;
 }
@@ -2235,44 +2284,44 @@ can_div_trunc_p (const poly_int_pod<N, Ca> &a,
      ordered wrt zero, there can be no two coefficients of the same value
      that have opposite signs.  This means that:
 
-        |a| = |a0| + |a1 * x1| + |a2 * x2| + ...
-        |b| = |b0| + |b1 * x1| + |b2 * x2| + ...
+	 |a| = |a0| + |a1 * x1| + |a2 * x2| + ...
+	 |b| = |b0| + |b1 * x1| + |b2 * x2| + ...
 
       The Q we've just calculated guarantees:
 
-         |b0 * Q| <= |a0|
+	 |b0 * Q| <= |a0|
 	 |a0 - b0 * Q| < |b0|
 
       and so:
 
-         (2) |b * Q| <= |a|
+	 (2) |b * Q| <= |a|
 
       is satisfied if:
 
-         |bi * xi * Q| <= |ai * xi|
+	 |bi * xi * Q| <= |ai * xi|
 
       for each i in [1, N].  This is trivially true when xi is zero.
       When it isn't we need:
 
-         (2') |bi * Q| <= |ai|
+	 (2') |bi * Q| <= |ai|
 
       r is calculated as:
 
-         r = r0 + r1 * x1 + r2 * x2 + ...
+	 r = r0 + r1 * x1 + r2 * x2 + ...
 	 where ri = ai - bi * Q
 
       Restricting to ordered a and b also guarantees that no two ris
       have opposite signs, so we have:
 
-         |r| = |r0| + |r1 * x1| + |r2 * x2| + ...
+	 |r| = |r0| + |r1 * x1| + |r2 * x2| + ...
 
       We know from the calculation of Q that |r0| < |b0|, so:
 
-         (3) |r| < |b|
+	 (3) |r| < |b|
 
       is satisfied if:
 
-         (3') |ai - bi * Q| <= |bi|
+	 (3') |ai - bi * Q| <= |bi|
 
       for each i in [1, N].  */
   bool rem_p = NCa (a.coeffs[0]) % NCb (b.coeffs[0]) != 0;
@@ -2443,8 +2492,8 @@ struct poly_span_traits
 /* The only case a change in type is needed is this one, in which the
    subtraction would give a HOST_WIDE_INT-based result if done on poly_ints
    and adding a zero size would give an unsigned HOST_WIDE_INT-based
-   result.  Since we know must_ge (Pos, Start), it is safe to base
-   Pos - Start on unsigned HOST_WIDE_INT.  */
+   result.  Since we know must_ge (Pos, Start), it is safe to treat
+   Pos - Start as an unsigned HOST_WIDE_INT.  */
 template<typename T1, typename T2, typename T3>
 struct poly_span_traits<T1, T2, T3, HOST_WIDE_INT, unsigned HOST_WIDE_INT>
 {
@@ -2467,7 +2516,7 @@ struct poly_span_traits<T1, T2, T3, HOST_WIDE_INT, unsigned HOST_WIDE_INT>
    open-ended.  */
 
 template<typename T1, typename T2, typename T3>
-static inline bool
+inline bool
 maybe_in_range_p (const T1 &val, const T2 &pos, const T3 &size)
 {
   typedef poly_span_traits<T1, T2, T3> span;
@@ -2489,7 +2538,7 @@ maybe_in_range_p (const T1 &val, const T2 &pos, const T3 &size)
    open-ended.  */
 
 template<typename T1, typename T2, typename T3>
-static inline bool
+inline bool
 known_in_range_p (const T1 &val, const T2 &pos, const T3 &size)
 {
   typedef poly_span_traits<T1, T2, T3> span;
@@ -2503,7 +2552,7 @@ known_in_range_p (const T1 &val, const T2 &pos, const T3 &size)
    case the range is open-ended.  */
 
 template<typename T1, typename T2, typename T3, typename T4>
-static inline bool
+inline bool
 ranges_may_overlap_p (const T1 &pos1, const T2 &size1,
 		      const T3 &pos2, const T4 &size2)
 {
@@ -2519,7 +2568,7 @@ ranges_may_overlap_p (const T1 &pos1, const T2 &size1,
    in which case the range is open-ended.  */
 
 template<typename T1, typename T2, typename T3, typename T4>
-static inline bool
+inline bool
 ranges_must_overlap_p (const T1 &pos1, const T2 &size1,
 		       const T3 &pos2, const T4 &size2)
 {
@@ -2548,7 +2597,7 @@ ranges_must_overlap_p (const T1 &pos1, const T2 &size1,
    in which case the range is open-ended.  */
 
 template<typename T1, typename T2, typename T3, typename T4>
-static inline bool
+inline bool
 known_subrange_p (const T1 &pos1, const T2 &size1,
 		  const T3 &pos2, const T4 &size2)
 {
@@ -2570,7 +2619,7 @@ known_subrange_p (const T1 &pos1, const T2 &size1,
    range open-ended.  */
 
 template<typename T>
-static inline typename if_nonpoly<T, bool>::t
+inline typename if_nonpoly<T, bool>::t
 endpoint_representable_p (const T &pos, const T &size)
 {
   return (!known_size_p (size)
@@ -2578,7 +2627,7 @@ endpoint_representable_p (const T &pos, const T &size)
 }
 
 template<unsigned int N, typename C>
-static inline bool
+inline bool
 endpoint_representable_p (const poly_int_pod<N, C> &pos,
 			  const poly_int_pod<N, C> &size)
 {
diff --git a/gcc/pretty-print.c b/gcc/pretty-print.c
index a755283fd58..86124d85d42 100644
--- a/gcc/pretty-print.c
+++ b/gcc/pretty-print.c
@@ -30,6 +30,666 @@ along with GCC; see the file COPYING3.  If not see
 #include <iconv.h>
 #endif
 
+#ifdef __MINGW32__
+
+/* Replacement for fputs() that handles ANSI escape codes on Windows NT.
+   Contributed by: Liu Hao (lh_mouse at 126 dot com)
+
+   XXX: This file is compiled into libcommon.a that will be self-contained.
+	It looks like that these functions can be put nowhere else.  */
+
+#include <io.h>
+#define WIN32_LEAN_AND_MEAN 1
+#include <windows.h>
+
+/* Write all bytes in [s,s+n) into the specified stream.
+   Errors are ignored.  */
+static void
+write_all (HANDLE h, const char *s, size_t n)
+{
+  size_t rem = n;
+  DWORD step;
+
+  while (rem != 0)
+    {
+      if (rem <= UINT_MAX)
+	step = rem;
+      else
+	step = UINT_MAX;
+      if (!WriteFile (h, s + n - rem, step, &step, NULL))
+	break;
+      rem -= step;
+    }
+}
+
+/* Find the beginning of an escape sequence.
+   There are two cases:
+   1. If the sequence begins with an ESC character (0x1B) and a second
+      character X in [0x40,0x5F], returns X and stores a pointer to
+      the third character into *head.
+   2. If the sequence begins with a character X in [0x80,0x9F], returns
+      (X-0x40) and stores a pointer to the second character into *head.
+   Stores the number of ESC character(s) in *prefix_len.
+   Returns 0 if no such sequence can be found.  */
+static int
+find_esc_head (int *prefix_len, const char **head, const char *str)
+{
+  int c;
+  const char *r = str;
+  int escaped = 0;
+
+  for (;;)
+    {
+      c = (unsigned char) *r;
+      if (c == 0)
+	{
+	  /* Not found.  */
+	  return 0;
+	}
+      if (escaped && 0x40 <= c && c <= 0x5F)
+	{
+	  /* Found (case 1).  */
+	  *prefix_len = 2;
+	  *head = r + 1;
+	  return c;
+	}
+      if (0x80 <= c && c <= 0x9F)
+	{
+	  /* Found (case 2).  */
+	  *prefix_len = 1;
+	  *head = r + 1;
+	  return c - 0x40;
+	}
+      ++r;
+      escaped = c == 0x1B;
+    }
+}
+
+/* Find the terminator of an escape sequence.
+   str should be the value stored in *head by a previous successful
+   call to find_esc_head().
+   Returns 0 if no such sequence can be found.  */
+static int
+find_esc_terminator (const char **term, const char *str)
+{
+  int c;
+  const char *r = str;
+
+  for (;;)
+    {
+      c = (unsigned char) *r;
+      if (c == 0)
+	{
+	  /* Not found.  */
+	  return 0;
+	}
+      if (0x40 <= c && c <= 0x7E)
+	{
+	  /* Found.  */
+	  *term = r;
+	  return c;
+	}
+      ++r;
+    }
+}
+
+/* Handle a sequence of codes.  Sequences that are invalid, reserved,
+   unrecognized or unimplemented are ignored silently.
+   There isn't much we can do because of lameness of Windows consoles.  */
+static void
+eat_esc_sequence (HANDLE h, int esc_code,
+		  const char *esc_head, const char *esc_term)
+{
+  /* Numbers in an escape sequence cannot be negative, because
+     a minus sign in the middle of it would have terminated it.  */
+  long n1, n2;
+  char *eptr, *delim;
+  CONSOLE_SCREEN_BUFFER_INFO sb;
+  COORD cr;
+  /* ED and EL parameters.  */
+  DWORD cnt, step;
+  long rows;
+  /* SGR parameters.  */
+  WORD attrib_add, attrib_rm;
+  const char *param;
+
+  switch (MAKEWORD (esc_code, *esc_term))
+    {
+    /* ESC [ n1 'A'
+	 Move the cursor up by n1 characters.  */
+    case MAKEWORD ('[', 'A'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  /* Stop at the topmost boundary.  */
+	  if (cr.Y > n1)
+	    cr.Y -= n1;
+	  else
+	    cr.Y = 0;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'B'
+	 Move the cursor down by n1 characters.  */
+    case MAKEWORD ('[', 'B'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  /* Stop at the bottommost boundary.  */
+	  if (sb.dwSize.Y - cr.Y > n1)
+	    cr.Y += n1;
+	  else
+	    cr.Y = sb.dwSize.Y;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'C'
+	 Move the cursor right by n1 characters.  */
+    case MAKEWORD ('[', 'C'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  /* Stop at the rightmost boundary.  */
+	  if (sb.dwSize.X - cr.X > n1)
+	    cr.X += n1;
+	  else
+	    cr.X = sb.dwSize.X;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'D'
+	 Move the cursor left by n1 characters.  */
+    case MAKEWORD ('[', 'D'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  /* Stop at the leftmost boundary.  */
+	  if (cr.X > n1)
+	    cr.X -= n1;
+	  else
+	    cr.X = 0;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'E'
+	 Move the cursor to the beginning of the n1-th line downwards.  */
+    case MAKEWORD ('[', 'E'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  cr.X = 0;
+	  /* Stop at the bottommost boundary.  */
+	  if (sb.dwSize.Y - cr.Y > n1)
+	    cr.Y += n1;
+	  else
+	    cr.Y = sb.dwSize.Y;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'F'
+	 Move the cursor to the beginning of the n1-th line upwards.  */
+    case MAKEWORD ('[', 'F'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  cr.X = 0;
+	  /* Stop at the topmost boundary.  */
+	  if (cr.Y > n1)
+	    cr.Y -= n1;
+	  else
+	    cr.Y = 0;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'G'
+	 Move the cursor to the (1-based) n1-th column.  */
+    case MAKEWORD ('[', 'G'):
+      if (esc_head == esc_term)
+	n1 = 1;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  n1 -= 1;
+	  /* Stop at the leftmost or rightmost boundary.  */
+	  if (n1 < 0)
+	    cr.X = 0;
+	  else if (n1 > sb.dwSize.X)
+	    cr.X = sb.dwSize.X;
+	  else
+	    cr.X = n1;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 ';' n2 'H'
+       ESC [ n1 ';' n2 'f'
+	 Move the cursor to the (1-based) n1-th row and
+	 (also 1-based) n2-th column.  */
+    case MAKEWORD ('[', 'H'):
+    case MAKEWORD ('[', 'f'):
+      if (esc_head == esc_term)
+	{
+	  /* Both parameters are omitted and set to 1 by default.  */
+	  n1 = 1;
+	  n2 = 1;
+	}
+      else if (!(delim = (char *) memchr (esc_head, ';',
+					  esc_term - esc_head)))
+	{
+	  /* Only the first parameter is given.  The second one is
+	     set to 1 by default.  */
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	  n2 = 1;
+	}
+      else
+	{
+	  /* Both parameters are given.  The first one shall be
+	     terminated by the semicolon.  */
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != delim)
+	    break;
+	  n2 = strtol (delim + 1, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  cr = sb.dwCursorPosition;
+	  n1 -= 1;
+	  n2 -= 1;
+	  /* The cursor position shall be relative to the view coord of
+	     the console window, which is usually smaller than the actual
+	     buffer.  FWIW, the 'appropriate' solution will be shrinking
+	     the buffer to match the size of the console window,
+	     destroying scrollback in the process.  */
+	  n1 += sb.srWindow.Top;
+	  n2 += sb.srWindow.Left;
+	  /* Stop at the topmost or bottommost boundary.  */
+	  if (n1 < 0)
+	    cr.Y = 0;
+	  else if (n1 > sb.dwSize.Y)
+	    cr.Y = sb.dwSize.Y;
+	  else
+	    cr.Y = n1;
+	  /* Stop at the leftmost or rightmost boundary.  */
+	  if (n2 < 0)
+	    cr.X = 0;
+	  else if (n2 > sb.dwSize.X)
+	    cr.X = sb.dwSize.X;
+	  else
+	    cr.X = n2;
+	  SetConsoleCursorPosition (h, cr);
+	}
+      break;
+
+    /* ESC [ n1 'J'
+	 Erase display.  */
+    case MAKEWORD ('[', 'J'):
+      if (esc_head == esc_term)
+	/* This is one of the very few codes whose parameters have
+	   a default value of zero.  */
+	n1 = 0;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  /* The cursor is not necessarily in the console window, which
+	     makes the behavior of this code harder to define.  */
+	  switch (n1)
+	    {
+	    case 0:
+	      /* If the cursor is in or above the window, erase from
+		 it to the bottom of the window; otherwise, do nothing.  */
+	      cr = sb.dwCursorPosition;
+	      cnt = sb.dwSize.X - sb.dwCursorPosition.X;
+	      rows = sb.srWindow.Bottom - sb.dwCursorPosition.Y;
+	      break;
+	    case 1:
+	      /* If the cursor is in or under the window, erase from
+		 it to the top of the window; otherwise, do nothing.  */
+	      cr.X = 0;
+	      cr.Y = sb.srWindow.Top;
+	      cnt = sb.dwCursorPosition.X + 1;
+	      rows = sb.dwCursorPosition.Y - sb.srWindow.Top;
+	      break;
+	    case 2:
+	      /* Erase the entire window.  */
+	      cr.X = sb.srWindow.Left;
+	      cr.Y = sb.srWindow.Top;
+	      cnt = 0;
+	      rows = sb.srWindow.Bottom - sb.srWindow.Top + 1;
+	      break;
+	    default:
+	      /* Erase the entire buffer.  */
+	      cr.X = 0;
+	      cr.Y = 0;
+	      cnt = 0;
+	      rows = sb.dwSize.Y;
+	      break;
+	    }
+	  if (rows < 0)
+	    break;
+	  cnt += rows * sb.dwSize.X;
+	  FillConsoleOutputCharacterW (h, L' ', cnt, cr, &step);
+	  FillConsoleOutputAttribute (h, sb.wAttributes, cnt, cr, &step);
+	}
+      break;
+
+    /* ESC [ n1 'K'
+	 Erase line.  */
+    case MAKEWORD ('[', 'K'):
+      if (esc_head == esc_term)
+	/* This is one of the very few codes whose parameters have
+	   a default value of zero.  */
+	n1 = 0;
+      else
+	{
+	  n1 = strtol (esc_head, &eptr, 10);
+	  if (eptr != esc_term)
+	    break;
+	}
+
+      if (GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  switch (n1)
+	    {
+	    case 0:
+	      /* Erase from the cursor to the end.  */
+	      cr = sb.dwCursorPosition;
+	      cnt = sb.dwSize.X - sb.dwCursorPosition.X;
+	      break;
+	    case 1:
+	      /* Erase from the cursor to the beginning.  */
+	      cr = sb.dwCursorPosition;
+	      cr.X = 0;
+	      cnt = sb.dwCursorPosition.X + 1;
+	      break;
+	    default:
+	      /* Erase the entire line.  */
+	      cr = sb.dwCursorPosition;
+	      cr.X = 0;
+	      cnt = sb.dwSize.X;
+	      break;
+	    }
+	  FillConsoleOutputCharacterW (h, L' ', cnt, cr, &step);
+	  FillConsoleOutputAttribute (h, sb.wAttributes, cnt, cr, &step);
+	}
+      break;
+
+    /* ESC [ n1 ';' n2 'm'
+	 Set SGR parameters.  Zero or more parameters will follow.  */
+    case MAKEWORD ('[', 'm'):
+      attrib_add = 0;
+      attrib_rm = 0;
+      if (esc_head == esc_term)
+	{
+	  /* When no parameter is given, reset the console.  */
+	  attrib_add |= (FOREGROUND_RED | FOREGROUND_GREEN
+			 | FOREGROUND_BLUE);
+	  attrib_rm = -1; /* Removes everything.  */
+	  goto sgr_set_it;
+	}
+      param = esc_head;
+      do
+	{
+	  /* Parse a parameter.  */
+	  n1 = strtol (param, &eptr, 10);
+	  if (*eptr != ';' && eptr != esc_term)
+	    goto sgr_set_it;
+
+	  switch (n1)
+	    {
+	    case 0:
+	      /* Reset.  */
+	      attrib_add |= (FOREGROUND_RED | FOREGROUND_GREEN
+			     | FOREGROUND_BLUE);
+	      attrib_rm = -1; /* Removes everything.  */
+	      break;
+	    case 1:
+	      /* Bold.  */
+	      attrib_add |= FOREGROUND_INTENSITY;
+	      break;
+	    case 4:
+	      /* Underline.  */
+	      attrib_add |= COMMON_LVB_UNDERSCORE;
+	      break;
+	    case 5:
+	      /* Blink.  */
+	      /* XXX: It is not BLINKING at all! */
+	      attrib_add |= BACKGROUND_INTENSITY;
+	      break;
+	    case 7:
+	      /* Reverse.  */
+	      attrib_add |= COMMON_LVB_REVERSE_VIDEO;
+	      break;
+	    case 22:
+	      /* No bold.  */
+	      attrib_add &= ~FOREGROUND_INTENSITY;
+	      attrib_rm |= FOREGROUND_INTENSITY;
+	      break;
+	    case 24:
+	      /* No underline.  */
+	      attrib_add &= ~COMMON_LVB_UNDERSCORE;
+	      attrib_rm |= COMMON_LVB_UNDERSCORE;
+	      break;
+	    case 25:
+	      /* No blink.  */
+	      /* XXX: It is not BLINKING at all! */
+	      attrib_add &= ~BACKGROUND_INTENSITY;
+	      attrib_rm |= BACKGROUND_INTENSITY;
+	      break;
+	    case 27:
+	      /* No reverse.  */
+	      attrib_add &= ~COMMON_LVB_REVERSE_VIDEO;
+	      attrib_rm |= COMMON_LVB_REVERSE_VIDEO;
+	      break;
+	    case 30:
+	    case 31:
+	    case 32:
+	    case 33:
+	    case 34:
+	    case 35:
+	    case 36:
+	    case 37:
+	      /* Foreground color.  */
+	      attrib_add &= ~(FOREGROUND_RED | FOREGROUND_GREEN
+			      | FOREGROUND_BLUE);
+	      n1 -= 30;
+	      if (n1 & 1)
+		attrib_add |= FOREGROUND_RED;
+	      if (n1 & 2)
+		attrib_add |= FOREGROUND_GREEN;
+	      if (n1 & 4)
+		attrib_add |= FOREGROUND_BLUE;
+	      attrib_rm |= (FOREGROUND_RED | FOREGROUND_GREEN
+			    | FOREGROUND_BLUE);
+	      break;
+	    case 38:
+	      /* Reserved for extended foreground color.
+		 Don't know how to handle parameters remaining.
+		 Bail out.  */
+	      goto sgr_set_it;
+	    case 39:
+	      /* Reset foreground color.  */
+	      /* Set to grey.  */
+	      attrib_add |= (FOREGROUND_RED | FOREGROUND_GREEN
+			     | FOREGROUND_BLUE);
+	      attrib_rm |= (FOREGROUND_RED | FOREGROUND_GREEN
+			    | FOREGROUND_BLUE);
+	      break;
+	    case 40:
+	    case 41:
+	    case 42:
+	    case 43:
+	    case 44:
+	    case 45:
+	    case 46:
+	    case 47:
+	      /* Background color.  */
+	      attrib_add &= ~(BACKGROUND_RED | BACKGROUND_GREEN
+			      | BACKGROUND_BLUE);
+	      n1 -= 40;
+	      if (n1 & 1)
+		attrib_add |= BACKGROUND_RED;
+	      if (n1 & 2)
+		attrib_add |= BACKGROUND_GREEN;
+	      if (n1 & 4)
+		attrib_add |= BACKGROUND_BLUE;
+	      attrib_rm |= (BACKGROUND_RED | BACKGROUND_GREEN
+			    | BACKGROUND_BLUE);
+	      break;
+	    case 48:
+	      /* Reserved for extended background color.
+		 Don't know how to handle parameters remaining.
+		 Bail out.  */
+	      goto sgr_set_it;
+	    case 49:
+	      /* Reset background color.  */
+	      /* Set to black.  */
+	      attrib_add &= ~(BACKGROUND_RED | BACKGROUND_GREEN
+			      | BACKGROUND_BLUE);
+	      attrib_rm |= (BACKGROUND_RED | BACKGROUND_GREEN
+			    | BACKGROUND_BLUE);
+	      break;
+	    }
+
+	  /* Prepare the next parameter.  */
+	  param = eptr + 1;
+	}
+      while (param != esc_term);
+
+sgr_set_it:
+      /* 0xFFFF removes everything.  If it is not the case,
+	 care must be taken to preserve old attributes.  */
+      if (attrib_rm != 0xFFFF && GetConsoleScreenBufferInfo (h, &sb))
+	{
+	  attrib_add |= sb.wAttributes & ~attrib_rm;
+	}
+      SetConsoleTextAttribute (h, attrib_add);
+      break;
+    }
+}
+
+int
+mingw_ansi_fputs (const char *str, FILE *fp)
+{
+  const char *read = str;
+  HANDLE h;
+  DWORD mode;
+  int esc_code, prefix_len;
+  const char *esc_head, *esc_term;
+
+  h = (HANDLE) _get_osfhandle (_fileno (fp));
+  if (h == INVALID_HANDLE_VALUE)
+    return EOF;
+
+  /* Don't mess up stdio functions with Windows APIs.  */
+  fflush (fp);
+
+  if (GetConsoleMode (h, &mode))
+    /* If it is a console, translate ANSI escape codes as needed.  */
+    for (;;)
+      {
+	if ((esc_code = find_esc_head (&prefix_len, &esc_head, read)) == 0)
+	  {
+	    /* Write all remaining characters, then exit.  */
+	    write_all (h, read, strlen (read));
+	    break;
+	  }
+	if (find_esc_terminator (&esc_term, esc_head) == 0)
+	  /* Ignore incomplete escape sequences at the moment.
+	     FIXME: The escape state shall be cached for further calls
+		    to this function.  */
+	  break;
+	write_all (h, read, esc_head - prefix_len - read);
+	eat_esc_sequence (h, esc_code, esc_head, esc_term);
+	read = esc_term + 1;
+      }
+  else
+    /* If it is not a console, write everything as-is.  */
+    write_all (h, read, strlen (read));
+
+  _close ((intptr_t) h);
+  return 1;
+}
+
+#endif /* __MINGW32__ */
+
 static void pp_quoted_string (pretty_printer *, const char *, size_t = -1);
 
 /* Overwrite the given location/range within this text_info's rich_location.
@@ -135,12 +795,40 @@ pp_clear_state (pretty_printer *pp)
   pp_indentation (pp) = 0;
 }
 
+/* Print X to PP in decimal.  */
+template<unsigned int N, typename T>
+void
+pp_wide_integer (pretty_printer *pp, const poly_int_pod<N, T> &x)
+{
+  if (x.is_constant ())
+    pp_wide_integer (pp, x.coeffs[0]);
+  else
+    {
+      pp_left_bracket (pp);
+      for (unsigned int i = 0; i < N; ++i)
+	{
+	  if (i != 0)
+	    pp_comma (pp);
+	  pp_wide_integer (pp, x.coeffs[i]);
+	}
+      pp_right_bracket (pp);
+    }
+}
+
+template void pp_wide_integer (pretty_printer *, const poly_uint16_pod &);
+template void pp_wide_integer (pretty_printer *, const poly_int64_pod &);
+template void pp_wide_integer (pretty_printer *, const poly_uint64_pod &);
+
 /* Flush the formatted text of PRETTY-PRINTER onto the attached stream.  */
 void
 pp_write_text_to_stream (pretty_printer *pp)
 {
   const char *text = pp_formatted_text (pp);
+#ifdef __MINGW32__
+  mingw_ansi_fputs (text, pp_buffer (pp)->stream);
+#else
   fputs (text, pp_buffer (pp)->stream);
+#endif
   pp_clear_output_area (pp);
 }
 
@@ -939,25 +1627,6 @@ pp_printf (pretty_printer *pp, const char *msg, ...)
   va_end (ap);
 }
 
-/* Output polynomial integer X to PP, as decimal.  */
-
-template<unsigned int N, typename T>
-void
-pp_poly_int (pretty_printer *pp, const poly_int_pod<N, T> &x)
-{
-  if (x.is_constant ())
-    pp_printf (pp, "%wd", (HOST_WIDE_INT) x.coeffs[0]);
-  else
-    {
-      pp_printf (pp, "[");
-      for (unsigned int i = 0; i < N; ++i)
-	pp_printf (pp, "%wd%c", (HOST_WIDE_INT) x.coeffs[i],
-		   i == N - 1 ? ']' : ',');
-    }
-}
-
-template void pp_poly_int (pretty_printer *, const poly_uint16_pod &);
-template void pp_poly_int (pretty_printer *, const poly_int64_pod &);
 
 /* Output MESSAGE verbatim into BUFFER.  */
 void
diff --git a/gcc/pretty-print.h b/gcc/pretty-print.h
index 7d2bfe8db62..8822179033d 100644
--- a/gcc/pretty-print.h
+++ b/gcc/pretty-print.h
@@ -368,9 +368,6 @@ extern void pp_separate_with (pretty_printer *, char);
 extern void pp_printf (pretty_printer *, const char *, ...)
      ATTRIBUTE_GCC_PPDIAG(2,3);
 
-template<unsigned int N, typename T>
-extern void pp_poly_int (pretty_printer *, const poly_int_pod<N, T> &);
-
 extern void pp_verbatim (pretty_printer *, const char *, ...)
      ATTRIBUTE_GCC_PPDIAG(2,3);
 extern void pp_flush (pretty_printer *);
@@ -402,6 +399,8 @@ extern const char *identifier_to_locale (const char *);
 extern void *(*identifier_to_locale_alloc) (size_t);
 extern void (*identifier_to_locale_free) (void *);
 
+/* Print I to PP in decimal.  */
+
 inline void
 pp_wide_integer (pretty_printer *pp, HOST_WIDE_INT i)
 {
@@ -409,23 +408,6 @@ pp_wide_integer (pretty_printer *pp, HOST_WIDE_INT i)
 }
 
 template<unsigned int N, typename T>
-void
-pp_wide_integer (pretty_printer *pp, poly_int_pod<N, T> x)
-{
-  T const_x;
-  if (x.is_constant (&const_x))
-    pp_wide_integer (pp, const_x);
-  else
-    {
-      pp_left_bracket (pp);
-      for (unsigned int i = 0; i < N; ++i)
-	{
-	  if (i != 0)
-	    pp_comma (pp);
-	  pp_wide_integer (pp, x.coeffs[i]);
-	}
-      pp_right_bracket (pp);
-    }
-}
+void pp_wide_integer (pretty_printer *pp, const poly_int_pod<N, T> &);
 
 #endif /* GCC_PRETTY_PRINT_H */
diff --git a/gcc/print-rtl.c b/gcc/print-rtl.c
index 360e0754896..a72f9cda188 100644
--- a/gcc/print-rtl.c
+++ b/gcc/print-rtl.c
@@ -190,10 +190,10 @@ print_poly_int (FILE *file, poly_int64 x)
     fprintf (file, HOST_WIDE_INT_PRINT_DEC, const_x);
   else
     {
-      fprintf (file, "[");
-      for (int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-	fprintf (file, HOST_WIDE_INT_PRINT_DEC "%c",
-		 x.coeffs[i], i == NUM_POLY_INT_COEFFS - 1 ? ']' : ',');
+      fprintf (file, "[" HOST_WIDE_INT_PRINT_DEC, x.coeffs[0]);
+      for (int i = 1; i < NUM_POLY_INT_COEFFS; ++i)
+	fprintf (file, ", " HOST_WIDE_INT_PRINT_DEC, x.coeffs[i]);
+      fprintf (file, "]");
     }
 }
 
@@ -1614,7 +1614,7 @@ print_value (pretty_printer *pp, const_rtx x, int verbose)
       pp_wide_int (pp, CONST_POLY_INT_COEFFS (x)[0], SIGNED);
       for (unsigned int i = 1; i < NUM_POLY_INT_COEFFS; ++i)
 	{
-	  pp_comma (pp);
+	  pp_string (pp, ", ");
 	  pp_wide_int (pp, CONST_POLY_INT_COEFFS (x)[i], SIGNED);
 	}
       pp_right_bracket (pp);
@@ -1667,7 +1667,7 @@ print_value (pretty_printer *pp, const_rtx x, int verbose)
     case SUBREG:
       print_value (pp, SUBREG_REG (x), verbose);
       pp_printf (pp, "#");
-      pp_poly_int (pp, SUBREG_BYTE (x));
+      pp_wide_integer (pp, SUBREG_BYTE (x));
       break;
     case SCRATCH:
     case CC0:
@@ -1845,11 +1845,11 @@ print_insn (pretty_printer *pp, const rtx_insn *x, int verbose)
     case DEBUG_INSN:
       {
 	const char *name = "?";
+	char idbuf[32];
 
 	if (DECL_P (INSN_VAR_LOCATION_DECL (x)))
 	  {
 	    tree id = DECL_NAME (INSN_VAR_LOCATION_DECL (x));
-	    char idbuf[32];
 	    if (id)
 	      name = IDENTIFIER_POINTER (id);
 	    else if (TREE_CODE (INSN_VAR_LOCATION_DECL (x))
diff --git a/gcc/profile-count.c b/gcc/profile-count.c
index 4d22428a195..44ceaed2d66 100644
--- a/gcc/profile-count.c
+++ b/gcc/profile-count.c
@@ -30,6 +30,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "gimple.h"
 #include "data-streamer.h"
 #include "cgraph.h"
+#include "wide-int.h"
 
 /* Dump THIS to F.  */
 
@@ -146,12 +147,12 @@ profile_probability::differs_from_p (profile_probability other) const
 {
   if (!initialized_p () || !other.initialized_p ())
     return false;
-  if ((uint64_t)m_val - (uint64_t)other.m_val < 10
-      || (uint64_t)other.m_val - (uint64_t)m_val < 10)
+  if ((uint64_t)m_val - (uint64_t)other.m_val < max_probability / 1000
+      || (uint64_t)other.m_val - (uint64_t)max_probability < 1000)
     return false;
   if (!other.m_val)
     return true;
-  int64_t ratio = m_val * 100 / other.m_val;
+  int64_t ratio = (int64_t)m_val * 100 / other.m_val;
   return ratio < 99 || ratio > 101;
 }
 
@@ -194,3 +195,21 @@ profile_probability::stream_out (struct lto_output_stream *ob)
   streamer_write_uhwi_stream (ob, m_val);
   streamer_write_uhwi_stream (ob, m_quality);
 }
+
+/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
+
+bool
+slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
+{
+  FIXED_WIDE_INT (128) tmp = a;
+  bool overflow;
+  tmp = wi::udiv_floor (wi::umul (tmp, b, &overflow) + (c / 2), c);
+  gcc_checking_assert (!overflow);
+  if (wi::fits_uhwi_p (tmp))
+    {
+      *res = tmp.to_uhwi ();
+      return true;
+    }
+  *res = (uint64_t) -1;
+  return false;
+}
diff --git a/gcc/profile-count.h b/gcc/profile-count.h
index 8fd22b8b68a..4546e199f24 100644
--- a/gcc/profile-count.h
+++ b/gcc/profile-count.h
@@ -43,6 +43,38 @@ enum profile_quality {
 
 #define RDIV(X,Y) (((X) + (Y) / 2) / (Y))
 
+bool slow_safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res);
+
+/* Compute RES=(a*b + c/2)/c capping and return false if overflow happened.  */
+
+inline bool
+safe_scale_64bit (uint64_t a, uint64_t b, uint64_t c, uint64_t *res)
+{
+#if (GCC_VERSION >= 5000)
+  uint64_t tmp;
+  if (!__builtin_mul_overflow (a, b, &tmp)
+      && !__builtin_add_overflow (tmp, c/2, &tmp))
+    {
+      *res = tmp / c;
+      return true;
+    }
+  if (c == 1)
+    {
+      *res = (uint64_t) -1;
+      return false;
+    }
+#else
+  if (a < ((uint64_t)1 << 31)
+      && b < ((uint64_t)1 << 31)
+      && c < ((uint64_t)1 << 31))
+    {
+      *res = (a * b + (c / 2)) / c;
+      return true;
+    }
+#endif
+  return slow_safe_scale_64bit (a, b, c, res);
+}
+
 /* Data type to hold probabilities.  It implements fixed point arithmetics
    with capping so probability is always in range [0,1] and scaling requiring
    values greater than 1 needs to be represented otherwise.
@@ -82,12 +114,12 @@ enum profile_quality {
 
 class GTY((user)) profile_probability
 {
-  /* For now use values in range 0...REG_BR_PROB_BASE.  Later we can use full
-     precision of 30 bits available.  */
-
   static const int n_bits = 30;
-  static const uint32_t max_probability = REG_BR_PROB_BASE;
-  static const uint32_t uninitialized_probability = ((uint32_t) 1 << n_bits) - 1;
+  /* We can technically use ((uint32_t) 1 << (n_bits - 1)) - 2 but that
+     will lead to harder multiplication sequences.  */
+  static const uint32_t max_probability = (uint32_t) 1 << (n_bits - 2);
+  static const uint32_t uninitialized_probability
+		 = ((uint32_t) 1 << (n_bits - 1)) - 1;
 
   uint32_t m_val : 30;
   enum profile_quality m_quality : 2;
@@ -171,7 +203,7 @@ public:
   /* Return true if value can be trusted.  */
   bool reliable_p () const
     {
-      return initialized_p ();
+      return m_quality >= profile_adjusted;
     }
 
   /* Conversion from and to REG_BR_PROB_BASE integer fixpoint arithmetics.
@@ -180,14 +212,14 @@ public:
     {
       profile_probability ret;
       gcc_checking_assert (v >= 0 && v <= REG_BR_PROB_BASE);
-      ret.m_val = RDIV (v * max_probability, REG_BR_PROB_BASE);
+      ret.m_val = RDIV (v * (uint64_t) max_probability, REG_BR_PROB_BASE);
       ret.m_quality = profile_guessed;
       return ret;
     }
   int to_reg_br_prob_base () const
     {
       gcc_checking_assert (initialized_p ());
-      return RDIV (m_val * REG_BR_PROB_BASE, max_probability);
+      return RDIV (m_val * (uint64_t) REG_BR_PROB_BASE, max_probability);
     }
 
   /* Conversion to and from RTL representation of profile probabilities.  */
@@ -216,7 +248,12 @@ public:
       if (val1 > val2)
 	ret.m_val = max_probability;
       else
-	ret.m_val = RDIV (val1 * max_probability, val2);
+	{
+	  uint64_t tmp;
+	  safe_scale_64bit (val1, max_probability, val2, &tmp);
+	  gcc_checking_assert (tmp <= max_probability);
+	  ret.m_val = tmp;
+	}
       ret.m_quality = profile_precise;
       return ret;
     }
@@ -413,8 +450,9 @@ public:
       if (!initialized_p ())
 	return profile_probability::uninitialized ();
       profile_probability ret;
-      ret.m_val = MIN (RDIV (m_val * num, den),
-		       max_probability);
+      uint64_t tmp;
+      safe_scale_64bit (m_val, num, den, &tmp);
+      ret.m_val = MIN (tmp, max_probability);
       ret.m_quality = MIN (m_quality, profile_adjusted);
       return ret;
     }
@@ -452,7 +490,7 @@ public:
       if (m_val == uninitialized_probability)
 	return m_quality == profile_guessed;
       else
-	return m_val <= REG_BR_PROB_BASE;
+	return m_val <= max_probability;
     }
 
   /* Comparsions are three-state and conservative.  False is returned if
@@ -535,11 +573,6 @@ class GTY(()) profile_count
 
   uint64_t m_val : n_bits;
   enum profile_quality m_quality : 2;
-
-  /* Assume numbers smaller than this to multiply.  This is set to make
-     testsuite pass, in future we may implement precise multiplication in higer
-     rangers.  */
-  static const uint64_t max_safe_multiplier = 131072;
 public:
 
   /* Used for counters which are expected to be never executed.  */
@@ -595,7 +628,7 @@ public:
   /* Return true if value can be trusted.  */
   bool reliable_p () const
     {
-      return initialized_p ();
+      return m_quality >= profile_adjusted;
     }
 
   /* When merging basic blocks, the two different profile counts are unified.
@@ -756,8 +789,10 @@ public:
       if (!initialized_p ())
 	return profile_count::uninitialized ();
       profile_count ret;
-      ret.m_val = RDIV (m_val * prob.m_val,
-			profile_probability::max_probability);
+      uint64_t tmp;
+      safe_scale_64bit (m_val, prob.m_val, profile_probability::max_probability,
+			&tmp);
+      ret.m_val = tmp;
       ret.m_quality = MIN (m_quality, prob.m_quality);
       return ret;
     }
@@ -769,11 +804,11 @@ public:
       if (!initialized_p ())
 	return profile_count::uninitialized ();
       profile_count ret;
+      uint64_t tmp;
+
       gcc_checking_assert (num >= 0 && den > 0);
-      /* FIXME: shrink wrapping violates this sanity check.  */
-      gcc_checking_assert ((num <= REG_BR_PROB_BASE
-			    || den <= REG_BR_PROB_BASE) || 1);
-      ret.m_val = RDIV (m_val * num, den);
+      safe_scale_64bit (m_val, num, den, &tmp);
+      ret.m_val = MIN (tmp, max_count);
       ret.m_quality = MIN (m_quality, profile_adjusted);
       return ret;
     }
@@ -790,12 +825,9 @@ public:
 	return *this;
 
       profile_count ret;
-      /* Take care for overflows!  */
-      if (num.m_val < max_safe_multiplier || m_val < max_safe_multiplier)
-	ret.m_val = RDIV (m_val * num.m_val, den.m_val);
-      else
-	ret.m_val = RDIV (m_val * RDIV (num.m_val * max_safe_multiplier,
-					den.m_val), max_safe_multiplier);
+      uint64_t val;
+      safe_scale_64bit (m_val, num.m_val, den.m_val, &val);
+      ret.m_val = MIN (val, max_count);
       ret.m_quality = MIN (m_quality, profile_adjusted);
       return ret;
     }
diff --git a/gcc/recog.c b/gcc/recog.c
index d7f2ee9e691..0ac16b9b87f 100644
--- a/gcc/recog.c
+++ b/gcc/recog.c
@@ -408,6 +408,7 @@ verify_changes (int num)
 	       && REG_P (changes[i].old)
 	       && asm_noperands (PATTERN (object)) > 0
 	       && REG_EXPR (changes[i].old) != NULL_TREE
+	       && HAS_DECL_ASSEMBLER_NAME_P (REG_EXPR (changes[i].old))
 	       && DECL_ASSEMBLER_NAME_SET_P (REG_EXPR (changes[i].old))
 	       && DECL_REGISTER (REG_EXPR (changes[i].old)))
 	{
@@ -1282,7 +1283,7 @@ push_operand (rtx op, machine_mode mode)
       if (GET_CODE (op) != PRE_MODIFY
 	  || GET_CODE (XEXP (op, 1)) != PLUS
 	  || XEXP (XEXP (op, 1), 0) != XEXP (op, 0)
-	  || !poly_int_const_p (XEXP (XEXP (op, 1), 1), &offset)
+	  || !poly_int_rtx_p (XEXP (XEXP (op, 1), 1), &offset)
 	  || (STACK_GROWS_DOWNWARD
 	      ? may_ne (offset, -rounded_size)
 	      : may_ne (offset, rounded_size)))
@@ -1377,10 +1378,9 @@ indirect_operand (rtx op, machine_mode mode)
 	 address is if OFFSET is zero and the address already is an operand
 	 or if the address is (plus Y (const_int -OFFSET)) and Y is an
 	 operand.  */
-      rtx addr = XEXP (SUBREG_REG (op), 0);
       poly_int64 offset;
-      addr = strip_offset (addr, &offset);
-      return (must_eq (offset + SUBREG_BYTE (op), 0)
+      rtx addr = strip_offset (XEXP (SUBREG_REG (op), 0), &offset);
+      return (known_zero (offset + SUBREG_BYTE (op))
 	      && general_operand (addr, Pmode));
     }
 
@@ -1967,7 +1967,7 @@ offsettable_address_addr_space_p (int strictp, machine_mode mode, rtx y,
      Clearly that depends on the situation in which it's being used.
      However, the current situation in which we test 0xffffffff is
      less than ideal.  Caveat user.  */
-  if (must_eq (mode_sz, 0))
+  if (known_zero (mode_sz))
     mode_sz = BIGGEST_ALIGNMENT / BITS_PER_UNIT;
 
   /* If the expression contains a constant term,
diff --git a/gcc/regcprop.c b/gcc/regcprop.c
index f5e38f0d276..55f4ea36a7d 100644
--- a/gcc/regcprop.c
+++ b/gcc/regcprop.c
@@ -406,12 +406,12 @@ maybe_mode_change (machine_mode orig_mode, machine_mode copy_mode,
     {
       int copy_nregs = hard_regno_nregs (copy_regno, copy_mode);
       int use_nregs = hard_regno_nregs (copy_regno, new_mode);
-      poly_int64 bytes_per_reg;
+      poly_uint64 bytes_per_reg;
       if (!can_div_trunc_p (GET_MODE_SIZE (copy_mode),
 			    copy_nregs, &bytes_per_reg))
 	return NULL_RTX;
-      poly_int64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs);
-      poly_int64 offset
+      poly_uint64 copy_offset = bytes_per_reg * (copy_nregs - use_nregs);
+      poly_uint64 offset
 	= subreg_size_lowpart_offset (GET_MODE_SIZE (new_mode) + copy_offset,
 				      GET_MODE_SIZE (orig_mode));
       regno += subreg_regno_offset (regno, orig_mode, offset, new_mode);
diff --git a/gcc/reginfo.c b/gcc/reginfo.c
index 65c2b42c4f7..847305ebe61 100644
--- a/gcc/reginfo.c
+++ b/gcc/reginfo.c
@@ -633,8 +633,8 @@ choose_hard_reg_mode (unsigned int regno ATTRIBUTE_UNUSED,
      held in REGNO.  If none, we look for the largest floating-point mode.
      If we still didn't find a valid mode, try CCmode.
 
-     The tests use may_gt rather than must_gt because we want N V4SFs to
-     win over plain V4SF even though N might be 1.  */
+     The tests use may_gt rather than must_gt because we want (for example)
+     N V4SFs to win over plain V4SF even though N might be 1.  */
   FOR_EACH_MODE_IN_CLASS (mode, MODE_INT)
     if (hard_regno_nregs (regno, mode) == nregs
 	&& targetm.hard_regno_mode_ok (regno, mode)
diff --git a/gcc/reload.c b/gcc/reload.c
index 25fd84e0fea..c09a9c6a3f8 100644
--- a/gcc/reload.c
+++ b/gcc/reload.c
@@ -813,22 +813,21 @@ find_reusable_reload (rtx *p_in, rtx out, enum reg_class rclass,
 
 /* Return true if:
 
-   (a) (subreg:OUTER_MODE (reg:INNER_MODE INNER_REGNO) ...)
-       represents a word or subword subreg of a multiword value; and
+   (a) (subreg:OUTER_MODE REG ...) represents a word or subword subreg
+       of a multiword value; and
 
-   (b) the number of words in INNER_MODE does not match the number of
-       registers in (reg:INNER_MODE INNER_REGNO).  */
+   (b) the number of *words* in REG does not match the number of *registers*
+       in REG.  */
 
 static bool
-complex_word_subreg_p (machine_mode outer_mode, machine_mode inner_mode,
-		       unsigned int inner_regno)
+complex_word_subreg_p (machine_mode outer_mode, rtx reg)
 {
+  machine_mode inner_mode = GET_MODE (reg);
+  poly_uint64 reg_words = REG_NREGS (reg) * UNITS_PER_WORD;
   return (must_le (GET_MODE_SIZE (outer_mode), UNITS_PER_WORD)
 	  && may_gt (GET_MODE_SIZE (inner_mode), UNITS_PER_WORD)
-	  && may_ne (aligned_upper_bound (GET_MODE_SIZE (inner_mode),
-					  UNITS_PER_WORD),
-		     hard_regno_nregs (inner_regno, inner_mode)
-		     * UNITS_PER_WORD));
+	  && !known_equal_after_align_up (GET_MODE_SIZE (inner_mode),
+					  reg_words, UNITS_PER_WORD));
 }
 
 /* Return true if X is a SUBREG that will need reloading of its SUBREG_REG
@@ -862,8 +861,7 @@ reload_inner_reg_of_subreg (rtx x, machine_mode mode, bool output)
      INNER is larger than a word and the number of registers in INNER is
      not the same as the number of words in INNER, then INNER will need
      reloading (with an in-out reload).  */
-  return (output
-	  && complex_word_subreg_p (mode, GET_MODE (inner), REGNO (inner)));
+  return output && complex_word_subreg_p (mode, inner);
 }
 
 /* Return nonzero if IN can be reloaded into REGNO with mode MODE without
@@ -1082,8 +1080,7 @@ push_reload (rtx in, rtx out, rtx *inloc, rtx *outloc,
 	      /* The case where out is nonzero
 		 is handled differently in the following statement.  */
 	      && (out == 0 || subreg_lowpart_p (in))
-	      && (complex_word_subreg_p (inmode, GET_MODE (SUBREG_REG (in)),
-					 REGNO (SUBREG_REG (in)))
+	      && (complex_word_subreg_p (inmode, SUBREG_REG (in))
 		  || !targetm.hard_regno_mode_ok (subreg_regno (in), inmode)))
 	  || (secondary_reload_class (1, rclass, inmode, in) != NO_REGS
 	      && (secondary_reload_class (1, rclass, GET_MODE (SUBREG_REG (in)),
diff --git a/gcc/reload1.c b/gcc/reload1.c
index 680e7be56cb..2ec09c4a7cc 100644
--- a/gcc/reload1.c
+++ b/gcc/reload1.c
@@ -200,7 +200,7 @@ static int last_spill_reg;
 static rtx spill_stack_slot[FIRST_PSEUDO_REGISTER];
 
 /* Width allocated so far for that stack slot.  */
-static poly_int64 spill_stack_slot_width[FIRST_PSEUDO_REGISTER];
+static poly_uint64_pod spill_stack_slot_width[FIRST_PSEUDO_REGISTER];
 
 /* Record which pseudos needed to be spilled.  */
 static regset_head spilled_pseudos;
@@ -261,13 +261,13 @@ struct elim_table
 {
   int from;			/* Register number to be eliminated.  */
   int to;			/* Register number used as replacement.  */
-  poly_int64 initial_offset;	/* Initial difference between values.  */
+  poly_int64_pod initial_offset; /* Initial difference between values.  */
   int can_eliminate;		/* Nonzero if this elimination can be done.  */
   int can_eliminate_previous;	/* Value returned by TARGET_CAN_ELIMINATE
 				   target hook in previous scan over insns
 				   made by reload.  */
-  poly_int64 offset;		/* Current offset between the two regs.  */
-  poly_int64 previous_offset;	/* Offset at end of previous insn.  */
+  poly_int64_pod offset;	/* Current offset between the two regs.  */
+  poly_int64_pod previous_offset; /* Offset at end of previous insn.  */
   int ref_outside_mem;		/* "to" has been referenced outside a MEM.  */
   rtx from_rtx;			/* REG rtx for the register to be eliminated.
 				   We cannot simply compare the number since
@@ -313,7 +313,7 @@ static int num_eliminable_invariants;
 
 static int first_label_num;
 static char *offsets_known_at;
-static poly_int64 (*offsets_at)[NUM_ELIMINABLE_REGS];
+static poly_int64_pod (*offsets_at)[NUM_ELIMINABLE_REGS];
 
 vec<reg_equivs_t, va_gc> *reg_equivs;
 
@@ -963,7 +963,7 @@ reload (rtx_insn *first, int global)
 	     then repeat the elimination bookkeeping.  We don't
 	     realign when there is no stack, as that will cause a
 	     stack frame when none is needed should
-	     STARTING_FRAME_OFFSET not be already aligned to
+	     TARGET_STARTING_FRAME_OFFSET not be already aligned to
 	     STACK_BOUNDARY.  */
 	  assign_stack_local (BLKmode, 0, crtl->stack_alignment_needed);
 	}
@@ -2142,10 +2142,10 @@ alter_reg (int i, int from_reg, bool dont_share_p)
     {
       rtx x = NULL_RTX;
       machine_mode mode = GET_MODE (regno_reg_rtx[i]);
-      poly_int64 inherent_size = GET_MODE_SIZE (mode);
+      poly_uint64 inherent_size = GET_MODE_SIZE (mode);
       unsigned int inherent_align = GET_MODE_ALIGNMENT (mode);
       machine_mode wider_mode = wider_subreg_mode (mode, reg_max_ref_mode[i]);
-      poly_int64 total_size = GET_MODE_SIZE (wider_mode);
+      poly_uint64 total_size = GET_MODE_SIZE (wider_mode);
       /* ??? Seems strange to derive the minimum alignment from the size,
 	 but that's the traditional behavior.  For polynomial-size modes,
 	 the natural extension is to use the minimum possible size.  */
@@ -2178,8 +2178,11 @@ alter_reg (int i, int from_reg, bool dont_share_p)
 	{
 	  rtx stack_slot;
 
-	  /* No known place to spill from => no slot to reuse.  */
+	  /* The sizes are taken from a subreg operation, which guarantees
+	     that they're ordered.  */
 	  gcc_checking_assert (ordered_p (total_size, inherent_size));
+
+	  /* No known place to spill from => no slot to reuse.  */
 	  x = assign_stack_local (mode, total_size,
 				  min_align > inherent_align
 				  || may_gt (total_size, inherent_size)
@@ -2234,8 +2237,11 @@ alter_reg (int i, int from_reg, bool dont_share_p)
 		min_align = MEM_ALIGN (spill_stack_slot[from_reg]);
 	    }
 
-	  /* Make a slot with that size.  */
+	  /* The sizes are taken from a subreg operation, which guarantees
+	     that they're ordered.  */
 	  gcc_checking_assert (ordered_p (total_size, inherent_size));
+
+	  /* Make a slot with that size.  */
 	  x = assign_stack_local (mode, total_size,
 				  min_align > inherent_align
 				  || may_gt (total_size, inherent_size)
@@ -3011,7 +3017,7 @@ elimination_effects (rtx x, machine_mode mem_mode)
 	    /* If more bytes than MEM_MODE are pushed, account for them.  */
 #ifdef PUSH_ROUNDING
 	    if (ep->to_rtx == stack_pointer_rtx)
-	      size = PUSH_ROUNDING (MACRO_INT (size));
+	      size = PUSH_ROUNDING (size);
 #endif
 	    if (code == PRE_DEC || code == POST_DEC)
 	      ep->offset += size;
@@ -4108,7 +4114,7 @@ init_eliminable_invariants (rtx_insn *first, bool do_subregs)
 
   /* Allocate the tables used to store offset information at labels.  */
   offsets_known_at = XNEWVEC (char, num_labels);
-  offsets_at = (poly_int64 (*)[NUM_ELIMINABLE_REGS])
+  offsets_at = (poly_int64_pod (*)[NUM_ELIMINABLE_REGS])
     xmalloc (num_labels * NUM_ELIMINABLE_REGS * sizeof (poly_int64));
 
 /* Look for REG_EQUIV notes; record what each pseudo is equivalent
diff --git a/gcc/rtl-tests.c b/gcc/rtl-tests.c
index 54bd6a37e7f..98dd34b297c 100644
--- a/gcc/rtl-tests.c
+++ b/gcc/rtl-tests.c
@@ -228,6 +228,62 @@ test_uncond_jump ()
 		      jump_insn);
 }
 
+template<unsigned int N>
+struct const_poly_int_tests
+{
+  static void run ();
+};
+
+template<>
+struct const_poly_int_tests<1>
+{
+  static void run () {}
+};
+
+/* Test various CONST_POLY_INT properties.  */
+
+template<unsigned int N>
+void
+const_poly_int_tests<N>::run ()
+{
+  rtx x1 = gen_int_mode (poly_int64 (1, 1), QImode);
+  rtx x255 = gen_int_mode (poly_int64 (1, 255), QImode);
+
+  /* Test that constants are unique.  */
+  ASSERT_EQ (x1, gen_int_mode (poly_int64 (1, 1), QImode));
+  ASSERT_NE (x1, gen_int_mode (poly_int64 (1, 1), HImode));
+  ASSERT_NE (x1, x255);
+
+  /* Test const_poly_int_value.  */
+  ASSERT_MUST_EQ (const_poly_int_value (x1), poly_int64 (1, 1));
+  ASSERT_MUST_EQ (const_poly_int_value (x255), poly_int64 (1, -1));
+
+  /* Test rtx_to_poly_int64.  */
+  ASSERT_MUST_EQ (rtx_to_poly_int64 (x1), poly_int64 (1, 1));
+  ASSERT_MUST_EQ (rtx_to_poly_int64 (x255), poly_int64 (1, -1));
+  ASSERT_MAY_NE (rtx_to_poly_int64 (x255), poly_int64 (1, 255));
+
+  /* Test plus_constant of a symbol.  */
+  rtx symbol = gen_rtx_SYMBOL_REF (Pmode, "foo");
+  rtx offset1 = gen_int_mode (poly_int64 (9, 11), Pmode);
+  rtx sum1 = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, symbol, offset1));
+  ASSERT_RTX_EQ (plus_constant (Pmode, symbol, poly_int64 (9, 11)), sum1);
+
+  /* Test plus_constant of a CONST.  */
+  rtx offset2 = gen_int_mode (poly_int64 (12, 20), Pmode);
+  rtx sum2 = gen_rtx_CONST (Pmode, gen_rtx_PLUS (Pmode, symbol, offset2));
+  ASSERT_RTX_EQ (plus_constant (Pmode, sum1, poly_int64 (3, 9)), sum2);
+
+  /* Test a cancelling plus_constant.  */
+  ASSERT_EQ (plus_constant (Pmode, sum2, poly_int64 (-12, -20)), symbol);
+
+  /* Test plus_constant on integer constants.  */
+  ASSERT_EQ (plus_constant (QImode, const1_rtx, poly_int64 (4, -2)),
+	     gen_int_mode (poly_int64 (5, -2), QImode));
+  ASSERT_EQ (plus_constant (QImode, x1, poly_int64 (4, -2)),
+	     gen_int_mode (poly_int64 (5, -1), QImode));
+}
+
 /* Run all of the selftests within this file.  */
 
 void
@@ -238,6 +294,7 @@ rtl_tests_c_tests ()
   test_dumping_rtx_reuse ();
   test_single_set ();
   test_uncond_jump ();
+  const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
 
   /* Purge state.  */
   set_first_insn (NULL);
diff --git a/gcc/rtl.c b/gcc/rtl.c
index cb7256e6855..3b2728be8b5 100644
--- a/gcc/rtl.c
+++ b/gcc/rtl.c
@@ -265,7 +265,7 @@ shared_const_p (const_rtx orig)
   poly_int64 offset;
   return (GET_CODE (XEXP (orig, 0)) == PLUS
 	  && GET_CODE (XEXP (XEXP (orig, 0), 0)) == SYMBOL_REF
-	  && poly_int_const_p (XEXP (XEXP (orig, 0), 1), &offset));
+	  && poly_int_rtx_p (XEXP (XEXP (orig, 0), 1), &offset));
 }
 
 
diff --git a/gcc/rtl.def b/gcc/rtl.def
index 640919bd375..83bcfcaadca 100644
--- a/gcc/rtl.def
+++ b/gcc/rtl.def
@@ -348,6 +348,7 @@ DEF_RTL_EXPR(CONST_INT, "const_int", "w", RTX_CONST_OBJ)
 /* numeric integer constant */
 DEF_RTL_EXPR(CONST_WIDE_INT, "const_wide_int", "", RTX_CONST_OBJ)
 
+/* An rtx representation of a poly_wide_int.  */
 DEF_RTL_EXPR(CONST_POLY_INT, "const_poly_int", "", RTX_CONST_OBJ)
 
 /* fixed-point constant */
diff --git a/gcc/rtl.h b/gcc/rtl.h
index f01ee29c277..ec5cf314a9e 100644
--- a/gcc/rtl.h
+++ b/gcc/rtl.h
@@ -1887,6 +1887,8 @@ set_regno_raw (rtx x, unsigned int regno, unsigned int nregs)
 #define CONST_WIDE_INT_NUNITS(RTX) CWI_GET_NUM_ELEM (RTX)
 #define CONST_WIDE_INT_ELT(RTX, N) CWI_ELT (RTX, N)
 
+/* For a CONST_POLY_INT, CONST_POLY_INT_COEFFS gives access to the
+   individual coefficients, in the form of a trailing_wide_ints structure.  */
 #define CONST_POLY_INT_COEFFS(RTX) \
   (RTL_FLAG_CHECK1("CONST_POLY_INT_COEFFS", (RTX), \
 		   CONST_POLY_INT)->u.cpi.coeffs)
@@ -1922,9 +1924,6 @@ set_regno_raw (rtx x, unsigned int regno, unsigned int nregs)
 #define SUBREG_REG(RTX) XCEXP (RTX, 0, SUBREG)
 #define SUBREG_BYTE(RTX) XCSUBREG (RTX, 1, SUBREG)
 
-/* The number of the parameter in a CONST_PARAM.  */
-#define CONST_PARAM_ID(RTX) XCUINT (RTX, 0, CONST_PARAM)
-
 /* in rtlanal.c */
 /* Return the right cost to give to an operation
    to make the cost of the corresponding register-to-register instruction
@@ -1999,7 +1998,7 @@ struct subreg_shape {
   subreg_shape (machine_mode, poly_uint16, machine_mode);
   bool operator == (const subreg_shape &) const;
   bool operator != (const subreg_shape &) const;
-  HOST_WIDE_INT unique_id () const;
+  unsigned HOST_WIDE_INT unique_id () const;
 
   machine_mode inner_mode;
   poly_uint16 offset;
@@ -2032,7 +2031,7 @@ subreg_shape::operator != (const subreg_shape &other) const
    current mode is anywhere near being 65536 bytes in size, so the
    id comfortably fits in an int.  */
 
-inline HOST_WIDE_INT
+inline unsigned HOST_WIDE_INT
 subreg_shape::unique_id () const
 {
   { STATIC_ASSERT (MAX_MACHINE_MODE <= 256); }
@@ -2212,9 +2211,17 @@ wi::max_value (machine_mode mode, signop sgn)
   return max_value (GET_MODE_PRECISION (as_a <scalar_mode> (mode)), sgn);
 }
 
+namespace wi
+{
+  typedef poly_int<NUM_POLY_INT_COEFFS,
+		   generic_wide_int <wide_int_ref_storage <false, false> > >
+    rtx_to_poly_wide_ref;
+  rtx_to_poly_wide_ref to_poly_wide (const_rtx, machine_mode);
+}
+
 /* Return the value of a CONST_POLY_INT in its native precision.  */
 
-inline poly_int<NUM_POLY_INT_COEFFS, WIDE_INT_REF_FOR (wide_int)>
+inline wi::rtx_to_poly_wide_ref
 const_poly_int_value (const_rtx x)
 {
   poly_int<NUM_POLY_INT_COEFFS, WIDE_INT_REF_FOR (wide_int)> res;
@@ -2223,6 +2230,26 @@ const_poly_int_value (const_rtx x)
   return res;
 }
 
+/* Return true if X is a scalar integer or a CONST_POLY_INT.  The value
+   can then be extracted using wi::to_poly_wide.  */
+
+inline bool
+poly_int_rtx_p (const_rtx x)
+{
+  return CONST_SCALAR_INT_P (x) || CONST_POLY_INT_P (x);
+}
+
+/* Access X (which satisfies poly_int_rtx_p) as a poly_wide_int.
+   MODE is the mode of X.  */
+
+inline wi::rtx_to_poly_wide_ref
+wi::to_poly_wide (const_rtx x, machine_mode mode)
+{
+  if (CONST_POLY_INT_P (x))
+    return const_poly_int_value (x);
+  return rtx_mode_t (const_cast<rtx> (x), mode);
+}
+
 /* Return the value of X as a poly_int64.  */
 
 inline poly_int64
@@ -2243,7 +2270,7 @@ rtx_to_poly_int64 (const_rtx x)
    otherwise leave it unmodified.  */
 
 inline bool
-poly_int_const_p (const_rtx x, poly_int64 *res)
+poly_int_rtx_p (const_rtx x, poly_int64_pod *res)
 {
   if (CONST_INT_P (x))
     {
@@ -2253,11 +2280,10 @@ poly_int_const_p (const_rtx x, poly_int64 *res)
   if (CONST_POLY_INT_P (x))
     {
       for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-	{
-	  if (!wi::fits_shwi_p (CONST_POLY_INT_COEFFS (x)[i]))
-	    return false;
-	  res->coeffs[i] = CONST_POLY_INT_COEFFS (x)[i].to_shwi ();
-	}
+	if (!wi::fits_shwi_p (CONST_POLY_INT_COEFFS (x)[i]))
+	  return false;
+      for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+	res->coeffs[i] = CONST_POLY_INT_COEFFS (x)[i].to_shwi ();
       return true;
     }
   return false;
@@ -2268,10 +2294,10 @@ extern int rtx_cost (rtx, machine_mode, enum rtx_code, int, bool);
 extern int address_cost (rtx, machine_mode, addr_space_t, bool);
 extern void get_full_rtx_cost (rtx, machine_mode, enum rtx_code, int,
 			       struct full_rtx_costs *);
-extern poly_int64 subreg_lsb (const_rtx);
-extern poly_int64 subreg_lsb_1 (machine_mode, machine_mode, poly_int64);
-extern poly_int64 subreg_size_offset_from_lsb (poly_int64, poly_int64,
-					       poly_int64);
+extern poly_uint64 subreg_lsb (const_rtx);
+extern poly_uint64 subreg_lsb_1 (machine_mode, machine_mode, poly_uint64);
+extern poly_uint64 subreg_size_offset_from_lsb (poly_uint64, poly_uint64,
+						poly_uint64);
 extern bool read_modify_subreg_p (const_rtx);
 
 /* Return the subreg byte offset for a subreg whose outer mode is
@@ -2280,22 +2306,22 @@ extern bool read_modify_subreg_p (const_rtx);
    the inner value.  This is the inverse of subreg_lsb_1 (which converts
    byte offsets to bit shifts).  */
 
-inline poly_int64
+inline poly_uint64
 subreg_offset_from_lsb (machine_mode outer_mode,
 			machine_mode inner_mode,
-			poly_int64 lsb_shift)
+			poly_uint64 lsb_shift)
 {
   return subreg_size_offset_from_lsb (GET_MODE_SIZE (outer_mode),
 				      GET_MODE_SIZE (inner_mode), lsb_shift);
 }
 
-extern unsigned int subreg_regno_offset	(unsigned int, machine_mode,
-					 poly_int64, machine_mode);
+extern unsigned int subreg_regno_offset (unsigned int, machine_mode,
+					 poly_uint64, machine_mode);
 extern bool subreg_offset_representable_p (unsigned int, machine_mode,
-					   poly_int64, machine_mode);
+					   poly_uint64, machine_mode);
 extern unsigned int subreg_regno (const_rtx);
 extern int simplify_subreg_regno (unsigned int, machine_mode,
-				  poly_int64, machine_mode);
+				  poly_uint64, machine_mode);
 extern unsigned int subreg_nregs (const_rtx);
 extern unsigned int subreg_nregs_with_regno (unsigned int, const_rtx);
 extern unsigned HOST_WIDE_INT nonzero_bits (const_rtx, machine_mode);
@@ -2991,13 +3017,12 @@ extern rtx gen_lowpart_if_possible (machine_mode, rtx);
 /* In emit-rtl.c */
 extern rtx gen_highpart (machine_mode, rtx);
 extern rtx gen_highpart_mode (machine_mode, machine_mode, rtx);
-extern rtx operand_subword (rtx, poly_int64, int, machine_mode);
+extern rtx operand_subword (rtx, poly_uint64, int, machine_mode);
 
 /* In emit-rtl.c */
-extern rtx operand_subword_force (rtx, poly_int64, machine_mode);
-extern machine_mode narrower_subreg_mode (machine_mode, machine_mode);
+extern rtx operand_subword_force (rtx, poly_uint64, machine_mode);
 extern int subreg_lowpart_p (const_rtx);
-extern poly_int64 subreg_size_lowpart_offset (poly_int64, poly_int64);
+extern poly_uint64 subreg_size_lowpart_offset (poly_uint64, poly_uint64);
 
 /* Return true if a subreg of mode OUTERMODE would only access part of
    an inner register with mode INNERMODE.  The other bits of the inner
@@ -3054,7 +3079,7 @@ paradoxical_subreg_p (const_rtx x)
 
 /* Return the SUBREG_BYTE for an OUTERMODE lowpart of an INNERMODE value.  */
 
-inline poly_int64
+inline poly_uint64
 subreg_lowpart_offset (machine_mode outermode, machine_mode innermode)
 {
   return subreg_size_lowpart_offset (GET_MODE_SIZE (outermode),
@@ -3062,6 +3087,16 @@ subreg_lowpart_offset (machine_mode outermode, machine_mode innermode)
 }
 
 /* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE,
+   return the smaller of the two modes if they are different sizes,
+   otherwise return the outer mode.  */
+
+inline machine_mode
+narrower_subreg_mode (machine_mode outermode, machine_mode innermode)
+{
+  return paradoxical_subreg_p (outermode, innermode) ? innermode : outermode;
+}
+
+/* Given that a subreg has outer mode OUTERMODE and inner mode INNERMODE,
    return the mode that is big enough to hold both the outer and inner
    values.  Prefer the outer mode in the event of a tie.  */
 
@@ -3079,11 +3114,11 @@ wider_subreg_mode (const_rtx x)
   return wider_subreg_mode (GET_MODE (x), GET_MODE (SUBREG_REG (x)));
 }
 
-extern poly_int64 subreg_size_highpart_offset (poly_int64, poly_int64);
+extern poly_uint64 subreg_size_highpart_offset (poly_uint64, poly_uint64);
 
 /* Return the SUBREG_BYTE for an OUTERMODE highpart of an INNERMODE value.  */
 
-inline poly_int64
+inline poly_uint64
 subreg_highpart_offset (machine_mode outermode, machine_mode innermode)
 {
   return subreg_size_highpart_offset (GET_MODE_SIZE (outermode),
@@ -3092,7 +3127,7 @@ subreg_highpart_offset (machine_mode outermode, machine_mode innermode)
 
 extern poly_int64 byte_lowpart_offset (machine_mode, machine_mode);
 extern poly_int64 subreg_memory_offset (machine_mode, machine_mode,
-					poly_int64);
+					poly_uint64);
 extern poly_int64 subreg_memory_offset (const_rtx);
 extern rtx make_safe_from (rtx, rtx);
 extern rtx convert_memory_address_addr_space_1 (scalar_int_mode, rtx,
@@ -3245,8 +3280,8 @@ extern rtx simplify_gen_ternary (enum rtx_code, machine_mode,
 				 machine_mode, rtx, rtx, rtx);
 extern rtx simplify_gen_relational (enum rtx_code, machine_mode,
 				    machine_mode, rtx, rtx);
-extern rtx simplify_subreg (machine_mode, rtx, machine_mode, poly_int64);
-extern rtx simplify_gen_subreg (machine_mode, rtx, machine_mode, poly_int64);
+extern rtx simplify_subreg (machine_mode, rtx, machine_mode, poly_uint64);
+extern rtx simplify_gen_subreg (machine_mode, rtx, machine_mode, poly_uint64);
 extern rtx lowpart_subreg (machine_mode, rtx, machine_mode);
 extern rtx simplify_replace_fn_rtx (rtx, const_rtx,
 				    rtx (*fn) (rtx, const_rtx, void *), void *);
@@ -3303,7 +3338,7 @@ extern HOST_WIDE_INT get_integer_term (const_rtx);
 extern rtx get_related_value (const_rtx);
 extern bool offset_within_block_p (const_rtx, HOST_WIDE_INT);
 extern void split_const (rtx, rtx *, rtx *);
-extern rtx strip_offset (rtx, poly_int64 *);
+extern rtx strip_offset (rtx, poly_int64_pod *);
 extern poly_int64 get_args_size (const_rtx);
 extern bool unsigned_reg_p (rtx);
 extern int reg_mentioned_p (const_rtx, const_rtx);
@@ -3434,7 +3469,7 @@ struct subreg_info
 };
 
 extern void subreg_get_info (unsigned int, machine_mode,
-			     poly_int64, machine_mode,
+			     poly_uint64, machine_mode,
 			     struct subreg_info *);
 
 /* lists.c */
@@ -3673,7 +3708,7 @@ extern rtx gen_rtx_CONST_VECTOR (machine_mode, rtvec);
 extern void set_mode_and_regno (rtx, machine_mode, unsigned int);
 extern rtx gen_raw_REG (machine_mode, unsigned int);
 extern rtx gen_rtx_REG (machine_mode, unsigned int);
-extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_int64);
+extern rtx gen_rtx_SUBREG (machine_mode, rtx, poly_uint64);
 extern rtx gen_rtx_MEM (machine_mode, rtx);
 extern rtx gen_rtx_VAR_LOCATION (machine_mode, tree, rtx,
 				 enum var_init_status);
@@ -4244,7 +4279,7 @@ load_extend_op (machine_mode mode)
    and return the base.  Return X otherwise.  */
 
 inline rtx
-strip_offset_and_add (rtx x, poly_int64 *offset)
+strip_offset_and_add (rtx x, poly_int64_pod *offset)
 {
   if (GET_CODE (x) == PLUS)
     {
diff --git a/gcc/rtlanal.c b/gcc/rtlanal.c
index 86aa383794a..3c297eb501f 100644
--- a/gcc/rtlanal.c
+++ b/gcc/rtlanal.c
@@ -494,8 +494,10 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
 
 	  if (may_lt (offset, 0))
 	    return 1;
+	  if (known_zero (offset))
+	    return 0;
 	  if (!known_size_p (size))
-	    return maybe_nonzero (offset);
+	    return 1;
 
 	  /* If the size of the access or of the symbol is unknown,
 	     assume the worst.  */
@@ -507,7 +509,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
 	    decl_size = -1;
 	  else if (DECL_P (decl) && DECL_SIZE_UNIT (decl))
 	    {
-	      if (!poly_tree_p (DECL_SIZE_UNIT (decl), &decl_size))
+	      if (!poly_int_tree_p (DECL_SIZE_UNIT (decl), &decl_size))
 		decl_size = -1;
 	    }
 	  else if (TREE_CODE (decl) == STRING_CST)
@@ -517,9 +519,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
 	  else
 	    decl_size = -1;
 
-	  return (may_le (decl_size, 0)
-		  ? maybe_nonzero (offset)
-		  : may_gt (offset + size, decl_size));
+	  return !known_subrange_p (offset, size, 0, decl_size);
         }
 
       return 0;
@@ -550,12 +550,12 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
 	    {
 	      if (FRAME_GROWS_DOWNWARD)
 		{
-		  high_bound = STARTING_FRAME_OFFSET;
+		  high_bound = targetm.starting_frame_offset ();
 		  low_bound  = high_bound - get_frame_size ();
 		}
 	      else
 		{
-		  low_bound  = STARTING_FRAME_OFFSET;
+		  low_bound  = targetm.starting_frame_offset ();
 		  high_bound = low_bound + get_frame_size ();
 		}
 	    }
@@ -686,7 +686,7 @@ rtx_addr_can_trap_p_1 (const_rtx x, poly_int64 offset, poly_int64 size,
 int
 rtx_addr_can_trap_p (const_rtx x)
 {
-  return rtx_addr_can_trap_p_1 (x, 0, 0, BLKmode, false);
+  return rtx_addr_can_trap_p_1 (x, 0, -1, BLKmode, false);
 }
 
 /* Return true if X contains a MEM subrtx.  */
@@ -921,7 +921,7 @@ split_const (rtx x, rtx *base_out, rtx *offset_out)
    to a new rtx).  Return the Y and store the offset in *OFFSET_OUT.  */
 
 rtx
-strip_offset (rtx x, poly_int64 *offset_out)
+strip_offset (rtx x, poly_int64_pod *offset_out)
 {
   rtx base = const0_rtx;
   rtx test = x;
@@ -932,7 +932,7 @@ strip_offset (rtx x, poly_int64 *offset_out)
       base = XEXP (test, 0);
       test = XEXP (test, 1);
     }
-  if (poly_int_const_p (test, offset_out))
+  if (poly_int_rtx_p (test, offset_out))
     return base;
   *offset_out = 0;
   return x;
@@ -2816,7 +2816,7 @@ may_trap_p_1 (const_rtx x, unsigned flags)
 	  code_changed
 	  || !MEM_NOTRAP_P (x))
 	{
-	  poly_int64 size = MEM_SIZE_KNOWN_P (x) ? MEM_SIZE (x) : 0;
+	  poly_int64 size = MEM_SIZE_KNOWN_P (x) ? MEM_SIZE (x) : -1;
 	  return rtx_addr_can_trap_p_1 (XEXP (x, 0), 0, size,
 					GET_MODE (x), code_changed);
 	}
@@ -3577,12 +3577,12 @@ loc_mentioned_in_p (rtx *loc, const_rtx in)
    and SUBREG_BYTE, return the bit offset where the subreg begins
    (counting from the least significant bit of the operand).  */
 
-poly_int64
+poly_uint64
 subreg_lsb_1 (machine_mode outer_mode,
 	      machine_mode inner_mode,
-	      poly_int64 subreg_byte)
+	      poly_uint64 subreg_byte)
 {
-  poly_int64 subreg_end, trailing_bytes, byte_pos;
+  poly_uint64 subreg_end, trailing_bytes, byte_pos;
 
   /* A paradoxical subreg begins at bit position 0.  */
   if (paradoxical_subreg_p (outer_mode, inner_mode))
@@ -3598,13 +3598,14 @@ subreg_lsb_1 (machine_mode outer_mode,
     {
       /* When bytes and words have oppposite endianness, we must be able
 	 to split offsets into words and bytes at compile time.  */
-      poly_int64 leading_word_part
+      poly_uint64 leading_word_part
 	= force_align_down (subreg_byte, UNITS_PER_WORD);
-      poly_int64 trailing_word_part
+      poly_uint64 trailing_word_part
 	= force_align_down (trailing_bytes, UNITS_PER_WORD);
       /* If the subreg crosses a word boundary ensure that
 	 it also begins and ends on a word boundary.  */
-      gcc_assert (must_le (subreg_end - leading_word_part, UNITS_PER_WORD)
+      gcc_assert (must_le (subreg_end - leading_word_part,
+			   (unsigned int) UNITS_PER_WORD)
 		  || (must_eq (leading_word_part, subreg_byte)
 		      && must_eq (trailing_word_part, trailing_bytes)));
       if (WORDS_BIG_ENDIAN)
@@ -3619,7 +3620,7 @@ subreg_lsb_1 (machine_mode outer_mode,
 /* Given a subreg X, return the bit offset where the subreg begins
    (counting from the least significant bit of the reg).  */
 
-poly_int64
+poly_uint64
 subreg_lsb (const_rtx x)
 {
   return subreg_lsb_1 (GET_MODE (x), GET_MODE (SUBREG_REG (x)),
@@ -3632,20 +3633,20 @@ subreg_lsb (const_rtx x)
    lsb of the inner value.  This is the inverse of the calculation
    performed by subreg_lsb_1 (which converts byte offsets to bit shifts).  */
 
-poly_int64
-subreg_size_offset_from_lsb (poly_int64 outer_bytes, poly_int64 inner_bytes,
-			     poly_int64 lsb_shift)
+poly_uint64
+subreg_size_offset_from_lsb (poly_uint64 outer_bytes, poly_uint64 inner_bytes,
+			     poly_uint64 lsb_shift)
 {
   /* A paradoxical subreg begins at bit position 0.  */
   gcc_checking_assert (ordered_p (outer_bytes, inner_bytes));
   if (may_gt (outer_bytes, inner_bytes))
     {
-      gcc_checking_assert (must_eq (lsb_shift, 0));
+      gcc_checking_assert (known_zero (lsb_shift));
       return 0;
     }
 
-  poly_int64 lower_bytes = exact_div (lsb_shift, BITS_PER_UNIT);
-  poly_int64 upper_bytes = inner_bytes - (lower_bytes + outer_bytes);
+  poly_uint64 lower_bytes = exact_div (lsb_shift, BITS_PER_UNIT);
+  poly_uint64 upper_bytes = inner_bytes - (lower_bytes + outer_bytes);
   if (WORDS_BIG_ENDIAN && BYTES_BIG_ENDIAN)
     return upper_bytes;
   else if (!WORDS_BIG_ENDIAN && !BYTES_BIG_ENDIAN)
@@ -3654,10 +3655,10 @@ subreg_size_offset_from_lsb (poly_int64 outer_bytes, poly_int64 inner_bytes,
     {
       /* When bytes and words have oppposite endianness, we must be able
 	 to split offsets into words and bytes at compile time.  */
-      poly_int64 lower_word_part = force_align_down (lower_bytes,
-						     UNITS_PER_WORD);
-      poly_int64 upper_word_part = force_align_down (upper_bytes,
-						     UNITS_PER_WORD);
+      poly_uint64 lower_word_part = force_align_down (lower_bytes,
+						      UNITS_PER_WORD);
+      poly_uint64 upper_word_part = force_align_down (upper_bytes,
+						      UNITS_PER_WORD);
       if (WORDS_BIG_ENDIAN)
 	return upper_word_part + (lower_bytes - lower_word_part);
       else
@@ -3686,15 +3687,15 @@ subreg_size_offset_from_lsb (poly_int64 outer_bytes, poly_int64 inner_bytes,
 
 void
 subreg_get_info (unsigned int xregno, machine_mode xmode,
-		 poly_int64 offset, machine_mode ymode,
+		 poly_uint64 offset, machine_mode ymode,
 		 struct subreg_info *info)
 {
   unsigned int nregs_xmode, nregs_ymode;
 
   gcc_assert (xregno < FIRST_PSEUDO_REGISTER);
 
-  poly_int64 xsize = GET_MODE_SIZE (xmode);
-  poly_int64 ysize = GET_MODE_SIZE (ymode);
+  poly_uint64 xsize = GET_MODE_SIZE (xmode);
+  poly_uint64 ysize = GET_MODE_SIZE (ymode);
 
   bool rknown = false;
 
@@ -3705,7 +3706,7 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
   if (HARD_REGNO_NREGS_HAS_PADDING (xregno, xmode))
     {
       /* As a consequence, we must be dealing with a constant number of
-	 scalars, and thus a constant offset.  */
+	 scalars, and thus a constant offset and number of units.  */
       HOST_WIDE_INT coffset = offset.to_constant ();
       HOST_WIDE_INT cysize = ysize.to_constant ();
       nregs_xmode = HARD_REGNO_NREGS_WITH_PADDING (xregno, xmode);
@@ -3739,8 +3740,11 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
 
   nregs_ymode = hard_regno_nregs (xregno, ymode);
 
-  /* Paradoxical subregs are otherwise valid.  */
+  /* Subreg sizes must be ordered, so that we can tell whether they are
+     partial, paradoxical or complete.  */
   gcc_checking_assert (ordered_p (xsize, ysize));
+
+  /* Paradoxical subregs are otherwise valid.  */
   if (!rknown && known_zero (offset) && may_gt (ysize, xsize))
     {
       info->representable_p = true;
@@ -3763,7 +3767,7 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
 
   /* If registers store different numbers of bits in the different
      modes, we cannot generally form this subreg.  */
-  poly_int64 regsize_xmode, regsize_ymode;
+  poly_uint64 regsize_xmode, regsize_ymode;
   if (!HARD_REGNO_NREGS_HAS_PADDING (xregno, xmode)
       && !HARD_REGNO_NREGS_HAS_PADDING (xregno, ymode)
       && multiple_p (xsize, nregs_xmode, &regsize_xmode)
@@ -3776,6 +3780,8 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
 	  info->representable_p = false;
 	  if (!can_div_away_from_zero_p (ysize, regsize_xmode, &info->nregs)
 	      || !can_div_trunc_p (offset, regsize_xmode, &info->offset))
+	    /* Checked by validate_subreg.  We must know at compile time
+	       which inner registers are being accessed.  */
 	    gcc_unreachable ();
 	  return;
 	}
@@ -3786,6 +3792,8 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
 	  info->representable_p = false;
 	  info->nregs = nregs_ymode;
 	  if (!can_div_trunc_p (offset, regsize_xmode, &info->offset))
+	    /* Checked by validate_subreg.  We must know at compile time
+	       which inner registers are being accessed.  */
 	    gcc_unreachable ();
 	  return;
 	}
@@ -3834,14 +3842,16 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
      be exact, otherwise we don't know how to verify the constraint.
      These conditions may be relaxed but subreg_regno_offset would
      need to be redesigned.  */
-  poly_int64 bytes_per_block = exact_div (xsize, num_blocks);
+  poly_uint64 bytes_per_block = exact_div (xsize, num_blocks);
 
   /* Get the number of the first block that contains the subreg and the byte
      offset of the subreg from the start of that block.  */
   unsigned int block_number;
-  poly_int64 subblock_offset;
+  poly_uint64 subblock_offset;
   if (!can_div_trunc_p (offset, bytes_per_block, &block_number,
 			&subblock_offset))
+    /* Checked by validate_subreg.  We must know at compile time which
+       inner registers are being accessed.  */
     gcc_unreachable ();
 
   if (!rknown)
@@ -3876,7 +3886,7 @@ subreg_get_info (unsigned int xregno, machine_mode xmode,
    RETURN - The regno offset which would be used.  */
 unsigned int
 subreg_regno_offset (unsigned int xregno, machine_mode xmode,
-		     poly_int64 offset, machine_mode ymode)
+		     poly_uint64 offset, machine_mode ymode)
 {
   struct subreg_info info;
   subreg_get_info (xregno, xmode, offset, ymode, &info);
@@ -3892,7 +3902,7 @@ subreg_regno_offset (unsigned int xregno, machine_mode xmode,
    RETURN - Whether the offset is representable.  */
 bool
 subreg_offset_representable_p (unsigned int xregno, machine_mode xmode,
-			       poly_int64 offset, machine_mode ymode)
+			       poly_uint64 offset, machine_mode ymode)
 {
   struct subreg_info info;
   subreg_get_info (xregno, xmode, offset, ymode, &info);
@@ -3909,7 +3919,7 @@ subreg_offset_representable_p (unsigned int xregno, machine_mode xmode,
 
 int
 simplify_subreg_regno (unsigned int xregno, machine_mode xmode,
-		       poly_int64 offset, machine_mode ymode)
+		       poly_uint64 offset, machine_mode ymode)
 {
   struct subreg_info info;
   unsigned int yregno;
@@ -4509,8 +4519,10 @@ nonzero_bits1 (const_rtx x, scalar_int_mode mode, const_rtx known_x,
 	     stack to be momentarily aligned only to that amount,
 	     so we pick the least alignment.  */
 	  if (x == stack_pointer_rtx && PUSH_ARGS)
-	    alignment = MIN ((unsigned HOST_WIDE_INT) PUSH_ROUNDING (1),
-			     alignment);
+	    {
+	      poly_uint64 rounded_1 = PUSH_ROUNDING (poly_int64 (1));
+	      alignment = MIN (known_alignment (rounded_1), alignment);
+	    }
 #endif
 
 	  nonzero &= ~(alignment - 1);
diff --git a/gcc/sanopt.c b/gcc/sanopt.c
index eb115f59693..d726c5e65e9 100644
--- a/gcc/sanopt.c
+++ b/gcc/sanopt.c
@@ -493,7 +493,7 @@ maybe_optimize_ubsan_ptr_ifn (sanopt_ctx *ctx, gimple *stmt)
 	  gcc_assert (!DECL_REGISTER (base));
 	  offset_int expr_offset = bitpos / BITS_PER_UNIT;
 	  offset_int total_offset = expr_offset + cur_offset;
-	  if (may_ne (total_offset, wi::sext (total_offset, POINTER_SIZE)))
+	  if (total_offset != wi::sext (total_offset, POINTER_SIZE))
 	    {
 	      record_ubsan_ptr_check_stmt (ctx, stmt, ptr, cur_offset);
 	      return false;
diff --git a/gcc/sbitmap.c b/gcc/sbitmap.c
index 4bf13a11a1d..baef4d05f0d 100644
--- a/gcc/sbitmap.c
+++ b/gcc/sbitmap.c
@@ -21,6 +21,7 @@ along with GCC; see the file COPYING3.  If not see
 #include "system.h"
 #include "coretypes.h"
 #include "sbitmap.h"
+#include "selftest.h"
 
 typedef SBITMAP_ELT_TYPE *sbitmap_ptr;
 typedef const SBITMAP_ELT_TYPE *const_sbitmap_ptr;
@@ -322,29 +323,22 @@ bitmap_set_range (sbitmap bmap, unsigned int start, unsigned int count)
 bool
 bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end)
 {
+  gcc_checking_assert (start <= end);
   unsigned int start_word = start / SBITMAP_ELT_BITS;
   unsigned int start_bitno = start % SBITMAP_ELT_BITS;
 
-  /* Testing within a word, starting at the beginning of a word.  */
-  if (start_bitno == 0 && (end - start) < SBITMAP_ELT_BITS)
-    {
-      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << (end - start)) - 1;
-      return (bmap->elms[start_word] & mask) != 0;
-    }
-
   unsigned int end_word = end / SBITMAP_ELT_BITS;
   unsigned int end_bitno = end % SBITMAP_ELT_BITS;
 
-  /* Testing starts somewhere in the middle of a word.  Test up to the
-     end of the word or the end of the requested region, whichever comes
-     first.  */
+  /* Check beginning of first word if different from zero.  */
   if (start_bitno != 0)
     {
-      unsigned int nbits = ((start_word == end_word)
-			    ? end_bitno - start_bitno
-			    : SBITMAP_ELT_BITS - start_bitno);
-      SBITMAP_ELT_TYPE mask = ((SBITMAP_ELT_TYPE)1 << nbits) - 1;
-      mask <<= start_bitno;
+      SBITMAP_ELT_TYPE high_mask = ~(SBITMAP_ELT_TYPE)0;
+      if (start_word == end_word && end_bitno + 1 < SBITMAP_ELT_BITS)
+	high_mask = ((SBITMAP_ELT_TYPE)1 << (end_bitno + 1)) - 1;
+
+      SBITMAP_ELT_TYPE low_mask = ((SBITMAP_ELT_TYPE)1 << start_bitno) - 1;
+      SBITMAP_ELT_TYPE mask = high_mask - low_mask;
       if (bmap->elms[start_word] & mask)
 	return true;
       start_word++;
@@ -364,8 +358,9 @@ bitmap_bit_in_range_p (const_sbitmap bmap, unsigned int start, unsigned int end)
     }
 
   /* Now handle residuals in the last word.  */
-  SBITMAP_ELT_TYPE mask
-    = ((SBITMAP_ELT_TYPE)1 << (SBITMAP_ELT_BITS - end_bitno)) - 1;
+  SBITMAP_ELT_TYPE mask = ~(SBITMAP_ELT_TYPE)0;
+  if (end_bitno + 1 < SBITMAP_ELT_BITS)
+    mask = ((SBITMAP_ELT_TYPE)1 << (end_bitno + 1)) - 1;
   return (bmap->elms[start_word] & mask) != 0;
 }
 
@@ -821,3 +816,92 @@ dump_bitmap_vector (FILE *file, const char *title, const char *subtitle,
 
   fprintf (file, "\n");
 }
+
+#if CHECKING_P
+
+namespace selftest {
+
+/* Selftests for sbitmaps.  */
+
+
+/* Verify range functions for sbitmap.  */
+
+static void
+test_range_functions ()
+{
+  sbitmap s = sbitmap_alloc (1024);
+  bitmap_clear (s);
+
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 512, 1023));
+  bitmap_set_bit (s, 100);
+
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 512, 1023));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 99));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 101, 1023));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 1, 100));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 64, 100));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 100, 100));
+  ASSERT_TRUE (bitmap_bit_p (s, 100));
+
+  s = sbitmap_alloc (64);
+  bitmap_clear (s);
+  bitmap_set_bit (s, 63);
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 63));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 1, 63));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 63, 63));
+  ASSERT_TRUE (bitmap_bit_p (s, 63));
+
+  s = sbitmap_alloc (1024);
+  bitmap_clear (s);
+  bitmap_set_bit (s, 128);
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 127));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 129, 1023));
+
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 128));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 1, 128));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 128, 255));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 128, 254));
+  ASSERT_TRUE (bitmap_bit_p (s, 128));
+
+  bitmap_clear (s);
+  bitmap_set_bit (s, 8);
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 8));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 12));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 63));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 127));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 512));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 8, 8));
+  ASSERT_TRUE (bitmap_bit_p (s, 8));
+
+  bitmap_clear (s);
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 0));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 8));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 63));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 1, 63));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 0, 256));
+
+  bitmap_set_bit (s, 0);
+  bitmap_set_bit (s, 16);
+  bitmap_set_bit (s, 32);
+  bitmap_set_bit (s, 48);
+  bitmap_set_bit (s, 64);
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 0, 0));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 1, 16));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 48, 63));
+  ASSERT_TRUE (bitmap_bit_in_range_p (s, 64, 64));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 1, 15));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 17, 31));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 49, 63));
+  ASSERT_FALSE (bitmap_bit_in_range_p (s, 65, 1023));
+}
+
+/* Run all of the selftests within this file.  */
+
+void
+sbitmap_c_tests ()
+{
+  test_range_functions ();
+}
+
+} // namespace selftest
+#endif /* CHECKING_P */
diff --git a/gcc/selftest-run-tests.c b/gcc/selftest-run-tests.c
index 8c5e9ad19b3..80ae8f9799b 100644
--- a/gcc/selftest-run-tests.c
+++ b/gcc/selftest-run-tests.c
@@ -56,13 +56,13 @@ selftest::run_tests ()
 
   /* Low-level data structures.  */
   bitmap_c_tests ();
+  sbitmap_c_tests ();
   et_forest_c_tests ();
   hash_map_tests_c_tests ();
   hash_set_tests_c_tests ();
   vec_c_tests ();
   pretty_print_c_tests ();
   wide_int_cc_tests ();
-  poly_int_cc_tests ();
   ggc_tests_c_tests ();
   sreal_c_tests ();
   fibonacci_heap_c_tests ();
diff --git a/gcc/selftest.h b/gcc/selftest.h
index a9f40a4b915..c5135b0cb60 100644
--- a/gcc/selftest.h
+++ b/gcc/selftest.h
@@ -171,6 +171,7 @@ extern const char *path_to_selftest_files;
 /* Declarations for specific families of tests (by source file), in
    alphabetical order.  */
 extern void bitmap_c_tests ();
+extern void sbitmap_c_tests ();
 extern void diagnostic_c_tests ();
 extern void diagnostic_show_locus_c_tests ();
 extern void edit_context_c_tests ();
@@ -183,7 +184,6 @@ extern void ggc_tests_c_tests ();
 extern void hash_map_tests_c_tests ();
 extern void hash_set_tests_c_tests ();
 extern void input_c_tests ();
-extern void poly_int_cc_tests ();
 extern void pretty_print_c_tests ();
 extern void read_rtl_function_c_tests ();
 extern void rtl_tests_c_tests ();
diff --git a/gcc/sese.c b/gcc/sese.c
index d6702ada5f4..8aa8015290d 100644
--- a/gcc/sese.c
+++ b/gcc/sese.c
@@ -461,7 +461,6 @@ scalar_evolution_in_region (const sese_l &region, loop_p loop, tree t)
 {
   gimple *def;
   struct loop *def_loop;
-  basic_block before = region.entry->src;
 
   /* SCOP parameters.  */
   if (TREE_CODE (t) == SSA_NAME
@@ -472,7 +471,7 @@ scalar_evolution_in_region (const sese_l &region, loop_p loop, tree t)
       || loop_in_sese_p (loop, region))
     /* FIXME: we would need instantiate SCEV to work on a region, and be more
        flexible wrt. memory loads that may be invariant in the region.  */
-    return instantiate_scev (before, loop,
+    return instantiate_scev (region.entry, loop,
 			     analyze_scalar_evolution (loop, t));
 
   def = SSA_NAME_DEF_STMT (t);
@@ -494,7 +493,7 @@ scalar_evolution_in_region (const sese_l &region, loop_p loop, tree t)
   if (has_vdefs)
     return chrec_dont_know;
 
-  return instantiate_scev (before, loop, t);
+  return instantiate_scev (region.entry, loop, t);
 }
 
 /* Return true if BB is empty, contains only DEBUG_INSNs.  */
diff --git a/gcc/sese.h b/gcc/sese.h
index 190deeda8af..faefd806d9d 100644
--- a/gcc/sese.h
+++ b/gcc/sese.h
@@ -334,6 +334,8 @@ gbb_loop_at_index (gimple_poly_bb_p gbb, sese_l &region, int index)
   while (--depth > index)
     loop = loop_outer (loop);
 
+  gcc_assert (loop_in_sese_p (loop, region));
+
   return loop;
 }
 
diff --git a/gcc/simplify-rtx.c b/gcc/simplify-rtx.c
index c4cc44ea5dd..b1b4767d8c4 100644
--- a/gcc/simplify-rtx.c
+++ b/gcc/simplify-rtx.c
@@ -308,7 +308,7 @@ delegitimize_mem_from_attrs (rtx x)
 	case IMAGPART_EXPR:
 	case VIEW_CONVERT_EXPR:
 	  {
-	    poly_int64 bitsize, bitpos, bytepos;
+	    poly_int64 bitsize, bitpos, bytepos, toffset_val = 0;
 	    tree toffset;
 	    int unsignedp, reversep, volatilep = 0;
 
@@ -317,14 +317,10 @@ delegitimize_mem_from_attrs (rtx x)
 				     &unsignedp, &reversep, &volatilep);
 	    if (may_ne (bitsize, GET_MODE_BITSIZE (mode))
 		|| !multiple_p (bitpos, BITS_PER_UNIT, &bytepos)
-		|| (toffset && !tree_fits_shwi_p (toffset)))
+		|| (toffset && !poly_int_tree_p (toffset, &toffset_val)))
 	      decl = NULL;
 	    else
-	      {
-		offset += bytepos;
-		if (toffset)
-		  offset += tree_to_shwi (toffset);
-	      }
+	      offset += bytepos + toffset_val;
 	    break;
 	  }
 	}
@@ -2032,6 +2028,26 @@ simplify_const_unary_operation (enum rtx_code code, machine_mode mode,
 	}
     }
 
+  /* Handle polynomial integers.  */
+  else if (CONST_POLY_INT_P (op))
+    {
+      poly_wide_int result;
+      switch (code)
+	{
+	case NEG:
+	  result = -const_poly_int_value (op);
+	  break;
+
+	case NOT:
+	  result = ~const_poly_int_value (op);
+	  break;
+
+	default:
+	  return NULL_RTX;
+	}
+      return immed_wide_int_const (result, mode);
+    }
+
   return NULL_RTX;
 }
 
@@ -2249,12 +2265,12 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
       if ((GET_CODE (op0) == CONST
 	   || GET_CODE (op0) == SYMBOL_REF
 	   || GET_CODE (op0) == LABEL_REF)
-	  && poly_int_const_p (op1, &offset))
+	  && poly_int_rtx_p (op1, &offset))
 	return plus_constant (mode, op0, offset);
       else if ((GET_CODE (op1) == CONST
 		|| GET_CODE (op1) == SYMBOL_REF
 		|| GET_CODE (op1) == LABEL_REF)
-	       && poly_int_const_p (op0, &offset))
+	       && poly_int_rtx_p (op0, &offset))
 	return plus_constant (mode, op1, offset);
 
       /* See if this is something like X * C - X or vice versa or
@@ -2528,7 +2544,7 @@ simplify_binary_operation_1 (enum rtx_code code, machine_mode mode,
       if ((GET_CODE (op0) == CONST
 	   || GET_CODE (op0) == SYMBOL_REF
 	   || GET_CODE (op0) == LABEL_REF)
-	  && poly_int_const_p (op1, &offset))
+	  && poly_int_rtx_p (op1, &offset))
 	return plus_constant (mode, op0, trunc_int_for_mode (-offset, mode));
 
       /* Don't let a relocatable value get a negative coeff.  */
@@ -4337,6 +4353,57 @@ simplify_const_binary_operation (enum rtx_code code, machine_mode mode,
       return immed_wide_int_const (result, int_mode);
     }
 
+  /* Handle polynomial integers.  */
+  if (NUM_POLY_INT_COEFFS > 1
+      && is_a <scalar_int_mode> (mode, &int_mode)
+      && poly_int_rtx_p (op0)
+      && poly_int_rtx_p (op1))
+    {
+      poly_wide_int result;
+      switch (code)
+	{
+	case PLUS:
+	  result = wi::to_poly_wide (op0, mode) + wi::to_poly_wide (op1, mode);
+	  break;
+
+	case MINUS:
+	  result = wi::to_poly_wide (op0, mode) - wi::to_poly_wide (op1, mode);
+	  break;
+
+	case MULT:
+	  if (CONST_SCALAR_INT_P (op1))
+	    result = wi::to_poly_wide (op0, mode) * rtx_mode_t (op1, mode);
+	  else
+	    return NULL_RTX;
+	  break;
+
+	case ASHIFT:
+	  if (CONST_SCALAR_INT_P (op1))
+	    {
+	      wide_int shift = rtx_mode_t (op1, mode);
+	      if (SHIFT_COUNT_TRUNCATED)
+		shift = wi::umod_trunc (shift, GET_MODE_PRECISION (int_mode));
+	      else if (wi::geu_p (shift, GET_MODE_PRECISION (int_mode)))
+		return NULL_RTX;
+	      result = wi::to_poly_wide (op0, mode) << shift;
+	    }
+	  else
+	    return NULL_RTX;
+	  break;
+
+	case IOR:
+	  if (!CONST_SCALAR_INT_P (op1)
+	      || !can_ior_p (wi::to_poly_wide (op0, mode),
+			     rtx_mode_t (op1, mode), &result))
+	    return NULL_RTX;
+	  break;
+
+	default:
+	  return NULL_RTX;
+	}
+      return immed_wide_int_const (result, int_mode);
+    }
+
   return NULL_RTX;
 }
 
@@ -4592,8 +4659,8 @@ simplify_plus_minus (enum rtx_code code, machine_mode mode, rtx op0,
 		       trivial CONST expressions we handle later.  */
 		    if (GET_CODE (tem) == CONST
 			&& GET_CODE (XEXP (tem, 0)) == ncode
-			&& rtx_equal_p (XEXP (XEXP (tem, 0), 0), lhs)
-			&& rtx_equal_p (XEXP (XEXP (tem, 0), 1), rhs))
+			&& XEXP (XEXP (tem, 0), 0) == lhs
+			&& XEXP (XEXP (tem, 0), 1) == rhs)
 		      break;
 		    lneg &= rneg;
 		    if (GET_CODE (tem) == NEG)
@@ -6124,7 +6191,7 @@ simplify_immed_subreg (fixed_size_mode outermode, rtx op,
    Return 0 if no simplifications are possible.  */
 rtx
 simplify_subreg (machine_mode outermode, rtx op,
-		 machine_mode innermode, poly_int64 byte)
+		 machine_mode innermode, poly_uint64 byte)
 {
   /* Little bit of sanity checking.  */
   gcc_assert (innermode != VOIDmode);
@@ -6135,15 +6202,15 @@ simplify_subreg (machine_mode outermode, rtx op,
   gcc_assert (GET_MODE (op) == innermode
 	      || GET_MODE (op) == VOIDmode);
 
-  poly_int64 outersize = GET_MODE_SIZE (outermode);
+  poly_uint64 outersize = GET_MODE_SIZE (outermode);
   if (!multiple_p (byte, outersize))
     return NULL_RTX;
 
-  poly_int64 innersize = GET_MODE_SIZE (innermode);
+  poly_uint64 innersize = GET_MODE_SIZE (innermode);
   if (may_ge (byte, innersize))
     return NULL_RTX;
 
-  if (outermode == innermode && must_eq (byte, 0))
+  if (outermode == innermode && known_zero (byte))
     return op;
 
   if (multiple_p (byte, GET_MODE_UNIT_SIZE (innermode)))
@@ -6171,7 +6238,7 @@ simplify_subreg (machine_mode outermode, rtx op,
 	 that apply to general modes and offsets should be handled here
 	 before calling simplify_immed_subreg.  */
       fixed_size_mode fs_outermode, fs_innermode;
-      HOST_WIDE_INT cbyte;
+      unsigned HOST_WIDE_INT cbyte;
       if (is_a <fixed_size_mode> (outermode, &fs_outermode)
 	  && is_a <fixed_size_mode> (innermode, &fs_innermode)
 	  && byte.is_constant (&cbyte))
@@ -6185,7 +6252,7 @@ simplify_subreg (machine_mode outermode, rtx op,
   if (GET_CODE (op) == SUBREG)
     {
       machine_mode innermostmode = GET_MODE (SUBREG_REG (op));
-      poly_int64 innermostsize = GET_MODE_SIZE (innermostmode);
+      poly_uint64 innermostsize = GET_MODE_SIZE (innermostmode);
       rtx newx;
 
       if (outermode == innermostmode
@@ -6205,7 +6272,7 @@ simplify_subreg (machine_mode outermode, rtx op,
 	{
 	  /* Bail out in case resulting subreg would be incorrect.  */
 	  if (may_lt (final_offset, 0)
-	      || may_ge (final_offset, innermostsize)
+	      || may_ge (poly_uint64 (final_offset), innermostsize)
 	      || !multiple_p (final_offset, outersize))
 	    return NULL_RTX;
 	}
@@ -6290,13 +6357,13 @@ simplify_subreg (machine_mode outermode, rtx op,
   if (GET_CODE (op) == CONCAT
       || GET_CODE (op) == VEC_CONCAT)
     {
-      poly_int64 final_offset;
+      poly_uint64 final_offset;
       rtx part, res;
 
       machine_mode part_mode = GET_MODE (XEXP (op, 0));
       if (part_mode == VOIDmode)
 	part_mode = GET_MODE_INNER (GET_MODE (op));
-      poly_int64 part_size = GET_MODE_SIZE (part_mode);
+      poly_uint64 part_size = GET_MODE_SIZE (part_mode);
       if (must_lt (byte, part_size))
 	{
 	  part = XEXP (op, 0);
@@ -6328,7 +6395,7 @@ simplify_subreg (machine_mode outermode, rtx op,
      it extracts higher bits that the ZERO_EXTEND's source bits.  */
   if (GET_CODE (op) == ZERO_EXTEND && SCALAR_INT_MODE_P (innermode))
     {
-      poly_int64 bitpos = subreg_lsb_1 (outermode, innermode, byte);
+      poly_uint64 bitpos = subreg_lsb_1 (outermode, innermode, byte);
       if (must_ge (bitpos, GET_MODE_PRECISION (GET_MODE (XEXP (op, 0)))))
 	return CONST0_RTX (outermode);
     }
@@ -6366,7 +6433,7 @@ simplify_subreg (machine_mode outermode, rtx op,
 
 rtx
 simplify_gen_subreg (machine_mode outermode, rtx op,
-		     machine_mode innermode, poly_int64 byte)
+		     machine_mode innermode, poly_uint64 byte)
 {
   rtx newx;
 
@@ -6566,7 +6633,7 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
     }
 
   /* Test a scalar subreg of a VEC_DUPLICATE.  */
-  poly_int64 offset = subreg_lowpart_offset (inner_mode, mode);
+  poly_uint64 offset = subreg_lowpart_offset (inner_mode, mode);
   ASSERT_RTX_EQ (scalar_reg,
 		 simplify_gen_subreg (inner_mode, duplicate,
 				      mode, offset));
@@ -6586,7 +6653,7 @@ test_vector_ops_duplicate (machine_mode mode, rtx scalar_reg)
 						duplicate, vec_par));
 
       /* Test a vector subreg of a VEC_DUPLICATE.  */
-      poly_int64 offset = subreg_lowpart_offset (narrower_mode, mode);
+      poly_uint64 offset = subreg_lowpart_offset (narrower_mode, mode);
       ASSERT_RTX_EQ (narrower_duplicate,
 		     simplify_gen_subreg (narrower_mode, duplicate,
 					  mode, offset));
@@ -6666,12 +6733,60 @@ test_vector_ops ()
     }
 }
 
+template<unsigned int N>
+struct simplify_const_poly_int_tests
+{
+  static void run ();
+};
+
+template<>
+struct simplify_const_poly_int_tests<1>
+{
+  static void run () {}
+};
+
+/* Test various CONST_POLY_INT properties.  */
+
+template<unsigned int N>
+void
+simplify_const_poly_int_tests<N>::run ()
+{
+  rtx x1 = gen_int_mode (poly_int64 (1, 1), QImode);
+  rtx x2 = gen_int_mode (poly_int64 (-80, 127), QImode);
+  rtx x3 = gen_int_mode (poly_int64 (-79, -128), QImode);
+  rtx x4 = gen_int_mode (poly_int64 (5, 4), QImode);
+  rtx x5 = gen_int_mode (poly_int64 (30, 24), QImode);
+  rtx x6 = gen_int_mode (poly_int64 (20, 16), QImode);
+  rtx x7 = gen_int_mode (poly_int64 (7, 4), QImode);
+  rtx x8 = gen_int_mode (poly_int64 (30, 24), HImode);
+  rtx x9 = gen_int_mode (poly_int64 (-30, -24), HImode);
+  rtx x10 = gen_int_mode (poly_int64 (-31, -24), HImode);
+  rtx two = GEN_INT (2);
+  rtx six = GEN_INT (6);
+  poly_uint64 offset = subreg_lowpart_offset (QImode, HImode);
+
+  /* These tests only try limited operation combinations.  Fuller arithmetic
+     testing is done directly on poly_ints.  */
+  ASSERT_EQ (simplify_unary_operation (NEG, HImode, x8, HImode), x9);
+  ASSERT_EQ (simplify_unary_operation (NOT, HImode, x8, HImode), x10);
+  ASSERT_EQ (simplify_unary_operation (TRUNCATE, QImode, x8, HImode), x5);
+  ASSERT_EQ (simplify_binary_operation (PLUS, QImode, x1, x2), x3);
+  ASSERT_EQ (simplify_binary_operation (MINUS, QImode, x3, x1), x2);
+  ASSERT_EQ (simplify_binary_operation (MULT, QImode, x4, six), x5);
+  ASSERT_EQ (simplify_binary_operation (MULT, QImode, six, x4), x5);
+  ASSERT_EQ (simplify_binary_operation (ASHIFT, QImode, x4, two), x6);
+  ASSERT_EQ (simplify_binary_operation (IOR, QImode, x4, two), x7);
+  ASSERT_EQ (simplify_subreg (HImode, x5, QImode, 0), x8);
+  ASSERT_EQ (simplify_subreg (QImode, x8, HImode, offset), x5);
+}
+
 /* Run all of the selftests within this file.  */
 
 void
 simplify_rtx_c_tests ()
 {
   test_vector_ops ();
+  simplify_const_poly_int_tests<NUM_POLY_INT_COEFFS>::run ();
 }
 
 } // namespace selftest
diff --git a/gcc/stor-layout.c b/gcc/stor-layout.c
index a8ee39f889d..95f0afa994d 100644
--- a/gcc/stor-layout.c
+++ b/gcc/stor-layout.c
@@ -302,7 +302,7 @@ mode_for_size (poly_uint64 size, enum mode_class mclass, int limit)
   machine_mode mode;
   int i;
 
-  if (limit && may_gt (size, MAX_FIXED_MODE_SIZE))
+  if (limit && may_gt (size, (unsigned int) MAX_FIXED_MODE_SIZE))
     return opt_machine_mode ();
 
   /* Get the first mode which has this size, in the specified class.  */
@@ -555,8 +555,8 @@ mode_for_array (tree elem_type, tree size)
     return TYPE_MODE (elem_type);
 
   limit_p = true;
-  if (poly_tree_p (size, &int_size)
-      && poly_tree_p (elem_size, &int_elem_size)
+  if (poly_int_tree_p (size, &int_size)
+      && poly_int_tree_p (elem_size, &int_elem_size)
       && may_ne (int_elem_size, 0U)
       && constant_multiple_p (int_size, int_elem_size, &num_elems))
     {
@@ -1778,7 +1778,7 @@ compute_record_mode (tree type)
   poly_uint64 type_size;
   if (TREE_CODE (type) == RECORD_TYPE
       && mode != VOIDmode
-      && poly_tree_p (TYPE_SIZE (type), &type_size)
+      && poly_int_tree_p (TYPE_SIZE (type), &type_size)
       && must_eq (GET_MODE_BITSIZE (mode), type_size))
     ;
   else
diff --git a/gcc/system.h b/gcc/system.h
index f0664e93fc8..01bc134d1cc 100644
--- a/gcc/system.h
+++ b/gcc/system.h
@@ -915,7 +915,8 @@ extern void fancy_abort (const char *, int, const char *)
 	MODES_TIEABLE_P FUNCTION_ARG_PADDING SLOW_UNALIGNED_ACCESS	\
 	HARD_REGNO_NREGS SECONDARY_MEMORY_NEEDED_MODE			\
 	SECONDARY_MEMORY_NEEDED CANNOT_CHANGE_MODE_CLASS		\
-	TRULY_NOOP_TRUNCATION FUNCTION_ARG_OFFSET CONSTANT_ALIGNMENT
+	TRULY_NOOP_TRUNCATION FUNCTION_ARG_OFFSET CONSTANT_ALIGNMENT	\
+	STARTING_FRAME_OFFSET
 
 /* Target macros only used for code built for the target, that have
    moved to libgcc-tm.h or have never been present elsewhere.  */
diff --git a/gcc/target.def b/gcc/target.def
index c99be708078..3129c3f3210 100644
--- a/gcc/target.def
+++ b/gcc/target.def
@@ -1880,8 +1880,8 @@ transformations even in absence of specialized @acronym{SIMD} hardware.",
    after processing the preferred one derived from preferred_simd_mode.  */
 DEFHOOK
 (autovectorize_vector_sizes,
- "If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} isn't\n\
-the only one that's worth considering, this hook should add all suitable\n\
+ "If the mode returned by @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE} is not\n\
+the only one that is worth considering, this hook should add all suitable\n\
 vector sizes to @var{sizes}, in order of decreasing preference.  The first\n\
 one should be the size of @code{TARGET_VECTORIZE_PREFERRED_SIMD_MODE}.\n\
 \n\
@@ -5762,6 +5762,16 @@ five otherwise.  This is best for most machines.",
  unsigned int, (void),
  default_case_values_threshold)
 
+DEFHOOK
+(starting_frame_offset,
+ "This hook returns the offset from the frame pointer to the first local\n\
+variable slot to be allocated.  If @code{FRAME_GROWS_DOWNWARD}, it is the\n\
+offset to @emph{end} of the first slot allocated, otherwise it is the\n\
+offset to @emph{beginning} of the first slot allocated.  The default\n\
+implementation returns 0.",
+ HOST_WIDE_INT, (void),
+ hook_hwi_void_0)
+
 /* Optional callback to advise the target to compute the frame layout.  */
 DEFHOOK
 (compute_frame_layout,
diff --git a/gcc/target.h b/gcc/target.h
index 0c0fc1d45a1..1b8decd1b49 100644
--- a/gcc/target.h
+++ b/gcc/target.h
@@ -216,6 +216,17 @@ typedef auto_vec<poly_uint64, 8> auto_vector_sizes;
 
 extern struct gcc_target targetm;
 
+/* Return the mode that should be used to hold a scalar shift amount
+   when shifting values of the given mode.  */
+/* ??? This could in principle be generated automatically from the .md
+   shift patterns, but for now word_mode should be universally OK.  */
+
+inline scalar_int_mode
+get_shift_amount_mode (machine_mode)
+{
+  return word_mode;
+}
+
 /* Return an estimate of the runtime value of X, for use in things
    like cost calculations or profiling frequencies.  Note that this
    function should never be used in situations where the actual
@@ -231,17 +242,6 @@ estimated_poly_value (poly_int64 x)
     return targetm.estimated_poly_value (x);
 }
 
-/* Return the mode that should be used to hold a scalar shift amount
-   when shifting values of the given mode.  */
-/* ??? This could in principle be generated automatically from the .md
-   shift patterns, but for now word_mode should be universally OK.  */
-
-inline scalar_int_mode
-get_shift_amount_mode (machine_mode)
-{
-  return word_mode;
-}
-
 #ifdef GCC_TM_H
 
 #ifndef CUMULATIVE_ARGS_MAGIC
diff --git a/gcc/targhooks.c b/gcc/targhooks.c
index 473224d37b9..1251e452ff5 100644
--- a/gcc/targhooks.c
+++ b/gcc/targhooks.c
@@ -1821,7 +1821,8 @@ default_class_max_nregs (reg_class_t rclass ATTRIBUTE_UNUSED,
   return (unsigned char) CLASS_MAX_NREGS ((enum reg_class) rclass,
 					  MACRO_MODE (mode));
 #else
-  /* The target must override this if some modes have nonconstant size.  */
+  /* Targets with variable-sized modes must provide their own definition
+     of this hook.  */
   unsigned int size = GET_MODE_SIZE (mode).to_constant ();
   return (size + UNITS_PER_WORD - 1) / UNITS_PER_WORD;
 #endif
diff --git a/gcc/testsuite/ChangeLog b/gcc/testsuite/ChangeLog
index ae985df7792..566864c2183 100644
--- a/gcc/testsuite/ChangeLog
+++ b/gcc/testsuite/ChangeLog
@@ -1,3 +1,282 @@
+2017-10-13  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/82524
+	* gcc.c-torture/execute/pr82524.c: New test.
+
+	PR target/82498
+	* gcc.dg/tree-ssa/pr82498.c: New test.
+
+	PR target/82498
+	* gcc.dg/ubsan/pr82498.c: New test.
+
+2017-10-13  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/82451
+	* gcc.dg/graphite/pr82451.c: New testcase.
+	* gfortran.dg/graphite/id-27.f90: Likewise.
+	* gfortran.dg/graphite/pr82451.f: Likewise.
+
+2017-10-13  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/82525
+	* gcc.dg/graphite/id-30.c: New testcase.
+	* gfortran.dg/graphite/id-28.f90: Likewise.
+
+2017-10-13  Alan Modra  <amodra@gmail.com>
+
+	* gcc.target/i386/asm-mem.c: New test.
+
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR target/82498
+	* gcc.target/i386/pr82498-1.c: New test.
+	* gcc.target/i386/pr82498-2.c: New test.
+
+2017-10-12  Jan Hubicka  <hubicka@ucw.cz>
+
+	* gcc.dg/predict-13.c: Update template for probaility change.
+	* gcc.dg/predict-8.c: Likewise.
+
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* c-c++-common/cilk-plus/AN/parser_errors.c: Update expected
+	output to reflect changes to reported locations of missing
+	symbols.
+	* c-c++-common/cilk-plus/AN/parser_errors2.c: Likewise.
+	* c-c++-common/cilk-plus/AN/parser_errors3.c: Likewise.
+	* c-c++-common/cilk-plus/AN/pr61191.c: Likewise.
+	* c-c++-common/gomp/pr63326.c: Likewise.
+	* c-c++-common/missing-close-symbol.c: Likewise, also update for
+	new fix-it hints.
+	* c-c++-common/missing-symbol.c: Likewise, also add test coverage
+	for missing colon in ternary operator.
+	* g++.dg/cpp1y/digit-sep-neg.C: Likewise.
+	* g++.dg/cpp1y/pr65202.C: Likewise.
+	* g++.dg/missing-symbol-2.C: New test case.
+	* g++.dg/other/do1.C: Update expected output to reflect
+	changes to reported locations of missing symbols.
+	* g++.dg/parse/error11.C: Likewise.
+	* g++.dg/template/error11.C: Likewise.
+	* gcc.dg/missing-symbol-2.c: New test case.
+	* gcc.dg/missing-symbol-3.c: New test case.
+	* gcc.dg/noncompile/940112-1.c: Update expected output to reflect
+	changes to reported locations of missing symbols.
+	* gcc.dg/noncompile/971104-1.c: Likewise.
+	* obj-c++.dg/exceptions-6.mm: Likewise.
+	* obj-c++.dg/pr48187.mm: Likewise.
+	* objc.dg/exceptions-6.m: Likewise.
+
+2017-10-12  Martin Sebor  <msebor@redhat.com>
+
+	PR other/82301
+	PR c/82435
+	* g++.dg/ext/attr-ifunc-1.C: Update.
+	* g++.dg/ext/attr-ifunc-2.C: Same.
+	* g++.dg/ext/attr-ifunc-3.C: Same.
+	* g++.dg/ext/attr-ifunc-4.C: Same.
+	* g++.dg/ext/attr-ifunc-5.C: Same.
+	* g++.dg/ext/attr-ifunc-6.C: New test.
+	* g++.old-deja/g++.abi/vtable2.C: Update.
+	* gcc.dg/attr-ifunc-6.c: New test.
+	* gcc.dg/attr-ifunc-7.c: New test.
+	* gcc.dg/pr81854.c: Update.
+	* lib/target-supports.exp: Update.
+
+2017-10-12  David Malcolm  <dmalcolm@redhat.com>
+
+	* g++.dg/parse/pragma2.C: Update to reflect reinstatement of the
+	"#pragma is not allowed here" error.
+
+2017-10-12  Bin Cheng  <bin.cheng@arm.com>
+
+	* gcc.dg/tree-ssa/ldist-28.c: New test.
+	* gcc.dg/tree-ssa/ldist-29.c: New test.
+	* gcc.dg/tree-ssa/ldist-30.c: New test.
+	* gcc.dg/tree-ssa/ldist-31.c: New test.
+
+2017-10-12  Bin Cheng  <bin.cheng@arm.com>
+
+	* gcc.dg/tree-ssa/ldist-7.c: Adjust test string.
+	* gcc.dg/tree-ssa/ldist-16.c: Ditto.
+	* gcc.dg/tree-ssa/ldist-25.c: Ditto.
+	* gcc.dg/tree-ssa/ldist-33.c: New test.
+
+2017-10-12  Richard Biener  <rguenther@suse.de>
+
+	PR tree-optimization/69728
+	* gcc.dg/graphite/pr69728.c: Adjust to reflect we can handle
+	the loop now.  Remove unrelated undefined behavior.
+
+2017-10-12  Jakub Jelinek  <jakub@redhat.com>
+
+	PR c++/82159
+	* g++.dg/opt/pr82159-2.C: New test.
+
+	PR target/82353
+	* gcc.target/i386/i386.exp (tests): Revert the '.C' extension change.
+	* gcc.target/i386/pr82353.C: Moved to ...
+	* g++.dg/ubsan/pr82353.C: ... here.  Restrict to i?86/x86_64 && lp64.
+
+2017-10-11  Uros Bizjak  <ubizjak@gmail.com>
+
+	* gcc.target/i386/387-ficom-2.c: New test.
+
+2017-10-11  Jakub Jelinek  <jakub@redhat.com>
+
+	PR middle-end/80421
+	* gcc.c-torture/execute/pr80421.c: New test.
+
+	PR tree-optimization/78558
+	* gcc.dg/vect/pr78558.c: New test.
+
+	PR c++/82414
+	* g++.dg/lto/pr82414_0.C: New test.
+
+	PR c++/78523
+	* g++.dg/cpp1y/pr78523.C: New test.
+
+	PR c++/80194
+	* g++.dg/cpp1y/pr80194.C: New test.
+
+2017-10-11  Qing Zhao  <qing.zhao@oracle.com>
+
+	PR target/81422
+	* gcc.target/aarch64/pr81422.C: New test.
+
+2017-10-11  Vladimir Makarov  <vmakarov@redhat.com>
+
+	PR sanitizer/82353
+	* gcc.target/i386/i386.exp (tests): Permit '.C' extension.
+	* gcc.target/i386/pr82353.C: New.
+
+2017-10-11  Uros Bizjak  <ubizjak@gmail.com>
+
+	* gcc.target/i386/387-ficom-1.c: New test.
+
+2017-10-11  Jeff Law  <law@redhat.com>
+
+	* gcc.dg/struct-layout-1_generate.c (generate_fields): Fix typo in
+	address computation of end of complex_attrib_array_types.
+
+2017-10-11  Marc Glisse  <marc.glisse@inria.fr>
+
+	* gcc.dg/Wstrict-overflow-7.c: Xfail.
+	* gcc.dg/pragma-diag-3.c: Likewise.
+
+2017-10-11  Bin Cheng  <bin.cheng@arm.com>
+
+	PR tree-optimization/82472
+	* gcc.dg/tree-ssa/pr82472.c: New test.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	PR sanitizer/82490
+	* c-c++-common/ubsan/attrib-5.c: New test.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	Revert r253637:
+
+	PR sanitizer/82484
+	* gcc.dg/asan/pr82484.c: New test.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	PR sanitizer/82484
+	* gcc.dg/asan/pr82484.c: New test.
+
+2017-10-11  Martin Liska  <mliska@suse.cz>
+
+	* c-c++-common/ubsan/ptr-overflow-sanitization-1.c: Scan
+	optimized dump rather than assembly.
+
+2017-10-11  Nathan Sidwell  <nathan@acm.org>
+
+	* g++.dg/cpp/string-3.C: Fix dg-final.
+
+2017-10-11  Paolo Carlini  <paolo.carlini@oracle.com>
+
+	PR c++/80412
+	* g++.dg/cpp1z/class-deduction44.C: New.
+
+2017-10-11  Paolo Carlini  <paolo.carlini@oracle.com>
+
+	PR c++/82230
+	* g++.dg/cpp1y/lambda-generic-ice8.C: New.
+
+2017-10-11  Paolo Carlini  <paolo.carlini@oracle.com>
+
+	PR c++/81299
+	* g++.dg/cpp1y/lambda-generic-ice7.C: New.
+
+2017-10-10  Nathan Sidwell  <nathan@acm.org>
+
+	* g++.dg/lookup/extern-c-redecl6.C: New.
+	* g++.dg/lookup/extern-c-hidden.C: Adjust diagnostics.
+	* g++.dg/lookup/extern-c-redecl.C: Likewise.
+	* g++.old-deja/g++.other/using9.C: Likewise.
+
+2017-10-10  Paolo Carlini  <paolo.carlini@oracle.com>
+
+	PR c++/78006
+	* g++.dg/cpp1y/auto-fn40.C: New.
+
+2017-10-10  Paolo Carlini  <paolo.carlini@oracle.com>
+
+	PR c++/81032
+	* g++.dg/cpp1y/lambda-generic-ice6.C: New.
+
+2017-10-10  Jakub Jelinek  <jakub@redhat.com>
+
+	PR rtl-optimization/68205
+	* gcc.c-torture/execute/20040709-3.c: New test.
+
+	PR c++/67625
+	* g++.dg/cpp0x/pr67625.C: New test.
+
+	PR middle-end/70887
+	* g++.dg/cpp0x/pr70887.C: New test.
+
+	PR c++/70338
+	* g++.dg/cpp0x/pr70338.C: New test.
+
+	PR c++/77786
+	* g++.dg/cpp1y/pr77786.C: New test.
+
+	PR c++/71875
+	* g++.dg/cpp1y/pr71875.C: New test.
+
+	PR c++/77578
+	* g++.dg/gomp/pr77578.C: New test.
+
+	PR middle-end/70100
+	* g++.dg/opt/pr70100.C: New test.
+
+	PR c++/68252
+	* g++.dg/other/pr68252.C: New test.
+
+	PR target/79565
+	PR target/82483
+	* gcc.target/i386/pr82483-1.c: New test.
+	* gcc.target/i386/pr82483-2.c: New test.
+
+2017-10-10  Will Schmidt <will_schmidt@vnet.ibm.com>
+
+	* gcc.target/powerpc/fold-vec-mult-int128-p8.c: Update options
+	* gcc.target/powerpc/fold-vec-mult-int128-p9.c: Update expected
+	instruction list.
+
+2017-10-10  Nathan Sidwell  <nathan@acm.org>
+
+	PR preprocessor/82506
+	* g++.dg/cpp/string-3.C: New.
+
+2017-10-10  Will Schmidt  <will_schmidt@vnet.ibm.com>
+
+	* gcc.target/powerpc/fold-vec-splat-16.c: New
+	* gcc.target/powerpc/fold-vec-splat-32.c: New.
+	* gcc.target/powerpc/fold-vec-splat-8.c: New.
+
 2017-10-10  Thomas Koenig  <tkoenig@gcc.gnu.org>
 
 	PR libfortran/82233
@@ -928,7 +1207,7 @@
 
 2017-09-22  Sergey Shalnov  <sergey.shalnov@intel.com>
 
-        * gcc.target/i386/avx512f-constant-set.c: New test.
+	* gcc.target/i386/avx512f-constant-set.c: New test.
 
 2017-09-21  Sergey Shalnov  <sergey.shalnov@intel.com>
 
@@ -2497,7 +2776,7 @@
 
 2017-08-23  Richard Biener  <rguenther@suse.de>
 
-        PR target/81921
+	PR target/81921
 	* gcc.target/i386/pr81921.c: New testcase.
 
 2017-08-23  Daniel Santos  <daniel.santos@pobox.com>
@@ -2578,8 +2857,8 @@
 
 2017-08-22  Yvan Roux  <yvan.roux@linaro.org>
 
-        PR c++/80287
-        * g++.dg/pr80287.C: New test.
+	PR c++/80287
+	* g++.dg/pr80287.C: New test.
 
 2017-08-22  Richard Biener  <rguenther@suse.de>
 
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors.c
index 18816e0ec6f..fd4fe5419b6 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors.c
@@ -7,5 +7,5 @@ int main (void)
   
   array2[:] = array2[: ;  /* { dg-error "expected ']'" } */
 
-  return 0;
-} /* { dg-error "expected ';' before" "" { target c } } */
+  return 0; /* { dg-error "expected ';' before" "" { target c } } */
+}
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors2.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors2.c
index 2bb91343a79..d003d7cc2bb 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors2.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors2.c
@@ -7,6 +7,7 @@ int main (void)
   
   array2[:] = array2[1:2:] ;  /* { dg-error "expected expression before" "" { target c } } */ 
   /* { dg-error  "expected primary-expression before" "" { target c++ } .-1 } */
+  /* { dg-error "expected ';' before" "" { target c } .-2 } */
 
-  return 0; /* { dg-error "expected ';' before" "" { target c }  } */
+  return 0;
 }
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors3.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors3.c
index 9270007050e..14256e9579e 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors3.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/AN/parser_errors3.c
@@ -7,6 +7,7 @@ int main (void)
   
   array2[:] = array2[1: :] ;  /* { dg-error "expected expression before" "" { target c }  } */ 
   /* { dg-error "expected primary-expression before" "" { target c++ }  .-1 } */
+  /* { dg-error "expected ';' before" "" { target c } .-2 } */
 
-  return 0; /* { dg-error "expected ';' before" "" { target c } } */
+  return 0;
 }
diff --git a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61191.c b/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61191.c
index a9a9d6601bc..8c32ad9a267 100644
--- a/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61191.c
+++ b/gcc/testsuite/c-c++-common/cilk-plus/AN/pr61191.c
@@ -7,4 +7,5 @@ double f(double * A, double * B)
   return __sec_reduce_add((B[0:500])(; /* { dg-error "called object" "" { target c } } */
 /* { dg-error "expected expression before ';' token" "" { target c } .-1 } */
 /* { dg-error "expected primary-expression before ';' token" "" { target c++ } .-2 } */
-} /* { dg-error "expected" "" { target c } } */
+/* { dg-error "expected" "" { target c } .-3 } */
+}
diff --git a/gcc/testsuite/c-c++-common/gomp/pr63326.c b/gcc/testsuite/c-c++-common/gomp/pr63326.c
index e319f497011..3e627237c43 100644
--- a/gcc/testsuite/c-c++-common/gomp/pr63326.c
+++ b/gcc/testsuite/c-c++-common/gomp/pr63326.c
@@ -156,34 +156,34 @@ f4 (int x)
   {
     do
       #pragma omp barrier			/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   {
     do
       #pragma omp flush				/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   {
     do
       #pragma omp taskwait			/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   {
     do
       #pragma omp taskyield			/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   #pragma omp parallel
   {
     do
       #pragma omp cancel parallel		/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   #pragma omp parallel
   {
     do
       #pragma omp cancellation point parallel	/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   #pragma omp for ordered(1)
   for (i = 0; i < 16; i++)
@@ -191,28 +191,28 @@ f4 (int x)
       {
 	do
 	  #pragma omp ordered depend(source)	/* { dg-error "may only be used in compound statements" } */
-	while (0);
+	while (0); /* { dg-error "before" "" { target c++ } } */
       } /* { dg-error "before" "" { target c++ } } */
       {
 	do
 	  #pragma omp ordered depend(sink: i-1)	/* { dg-error "may only be used in compound statements" } */
-	while (0);
+	while (0); /* { dg-error "before" "" { target c++ } } */
       } /* { dg-error "before" "" { target c++ } } */
     }
   {
     do
       #pragma omp target enter data map(to:i)	/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   {
     do
       #pragma omp target update to(i)		/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
   {
     do
       #pragma omp target exit data map(from:i)	/* { dg-error "may only be used in compound statements" } */
-    while (0);
+    while (0); /* { dg-error "before" "" { target c++ } } */
   } /* { dg-error "before" "" { target c++ } } */
 }
 
diff --git a/gcc/testsuite/c-c++-common/missing-close-symbol.c b/gcc/testsuite/c-c++-common/missing-close-symbol.c
index 85b96f28ef8..abeb83748c1 100644
--- a/gcc/testsuite/c-c++-common/missing-close-symbol.c
+++ b/gcc/testsuite/c-c++-common/missing-close-symbol.c
@@ -12,6 +12,7 @@ void test_static_assert_same_line (void)
   /* { dg-begin-multiline-output "" }
    _Static_assert(sizeof(int) >= sizeof(char), "msg";
                  ~                                  ^
+                                                    )
      { dg-end-multiline-output "" } */
 }
 
@@ -25,6 +26,7 @@ void test_static_assert_different_line (void)
   /* { dg-begin-multiline-output "" }
     "msg";
          ^
+         )
      { dg-end-multiline-output "" } */
   /* { dg-begin-multiline-output "" }
    _Static_assert(sizeof(int) >= sizeof(char),
diff --git a/gcc/testsuite/c-c++-common/missing-symbol.c b/gcc/testsuite/c-c++-common/missing-symbol.c
index 33a501b9988..326b9faad7a 100644
--- a/gcc/testsuite/c-c++-common/missing-symbol.c
+++ b/gcc/testsuite/c-c++-common/missing-symbol.c
@@ -5,15 +5,14 @@ extern int bar (void);
 
 int missing_close_paren_in_switch (int i)
 {
-  switch (i /* { dg-message "10: to match this '\\('" } */
-    { /* { dg-error "5: expected '\\)' before '.' token" } */
-  /* { dg-begin-multiline-output "" }
-     {
-     ^
-     { dg-end-multiline-output "" } */
+  switch (i /* { dg-error "12: expected '\\)' before '.' token" } */
+    {
   /* { dg-begin-multiline-output "" }
    switch (i
-          ^
+          ~ ^
+            )
+     {
+     ~       
      { dg-end-multiline-output "" } */
 
     case 0:
@@ -30,21 +29,33 @@ int missing_close_paren_in_switch (int i)
 void missing_close_paren_in_if (void)
 {
   if (foo () /* { dg-line start_of_if } */
-      && bar () 
-    { /* { dg-error "5: expected '\\)' before '.' token" } */
+      && bar () /* { dg-error "16: expected '\\)' before '.' token" } */
+    {
       /* { dg-begin-multiline-output "" }
+       && bar ()
+                ^
+                )
      {
-     ^
+     ~           
          { dg-end-multiline-output "" } */
       /* { dg-message "6: to match this '\\('" "" { target *-*-* } start_of_if } */
       /* { dg-begin-multiline-output "" }
    if (foo ()
       ^
-      { dg-end-multiline-output "" } */
+         { dg-end-multiline-output "" } */
     }
-
 } /* { dg-error "1: expected" } */
   /* { dg-begin-multiline-output "" }
  }
  ^
      { dg-end-multiline-output "" } */
+
+int missing_colon_in_ternary (int flag)
+{
+  return flag ? 42 0; /* { dg-error "expected ':' before numeric constant" } */
+  /* { dg-begin-multiline-output "" }
+   return flag ? 42 0;
+                   ^~
+                   :
+     { dg-end-multiline-output "" } */
+}
diff --git a/gcc/testsuite/c-c++-common/ubsan/attrib-5.c b/gcc/testsuite/c-c++-common/ubsan/attrib-5.c
new file mode 100644
index 00000000000..fee1df1c433
--- /dev/null
+++ b/gcc/testsuite/c-c++-common/ubsan/attrib-5.c
@@ -0,0 +1,11 @@
+/* { dg-do compile } */
+/* { dg-options "-fsanitize=undefined" } */
+
+__attribute__((no_sanitize("foobar")))
+static void
+float_cast2 (void)
+{ /* { dg-warning "attribute directive ignored" } */
+  volatile double d = 300;
+  volatile signed char c;
+  c = d;
+}
diff --git a/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-sanitization-1.c b/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-sanitization-1.c
index 42c14523764..c12c7df252b 100644
--- a/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-sanitization-1.c
+++ b/gcc/testsuite/c-c++-common/ubsan/ptr-overflow-sanitization-1.c
@@ -1,5 +1,4 @@
-/* { dg-require-effective-target lp64 } */
-/* { dg-options "-O -fsanitize=pointer-overflow" } */
+/* { dg-options "-O -fsanitize=pointer-overflow -fdump-tree-optimized" } */
 /* { dg-skip-if "" { *-*-* } "-flto" } */
 
 #define SMAX   __PTRDIFF_MAX__
@@ -76,5 +75,4 @@ void negative_to_negative (char *ptr)
   p2 += 5;
 }
 
-
-/* { dg-final { scan-assembler-times "call\\s+__ubsan_handle_pointer_overflow" 17 } } */
+/* { dg-final { scan-tree-dump-times "__ubsan_handle_pointer_overflow" 17 "optimized" } } */
diff --git a/gcc/testsuite/g++.dg/concepts/req6.C b/gcc/testsuite/g++.dg/concepts/req6.C
index 670fd542f6f..50fa3b4dadd 100644
--- a/gcc/testsuite/g++.dg/concepts/req6.C
+++ b/gcc/testsuite/g++.dg/concepts/req6.C
@@ -4,7 +4,7 @@ struct X { };
 int operator==(X, X) { return 0; }
 
 template<typename T>
-  concept bool C1() { return X(); }
+  concept bool C1() { return X(); } // { dg-error "bool" }
 
 template<C1 T>
   void h(T) { } // OK until used.
diff --git a/gcc/testsuite/g++.dg/cpp/string-3.C b/gcc/testsuite/g++.dg/cpp/string-3.C
new file mode 100644
index 00000000000..ed9c42ce557
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp/string-3.C
@@ -0,0 +1,9 @@
+// PR c++/82506
+// { dg-do preprocess { target c++11 } }
+
+#define STRINGIZE(A) #A
+
+BEGIN STRINGIZE(R"(
+)") END
+
+// { dg-final { scan-file string-3.i "BEGIN \"R\\\\\"\\(\\\\n\\)\\\\\"\"\n    END" } }
diff --git a/gcc/testsuite/g++.dg/cpp0x/error1.C b/gcc/testsuite/g++.dg/cpp0x/error1.C
index 33557f2f80b..115d800bb35 100644
--- a/gcc/testsuite/g++.dg/cpp0x/error1.C
+++ b/gcc/testsuite/g++.dg/cpp0x/error1.C
@@ -1,10 +1,17 @@
 // PR c++/34395
 // { dg-do compile { target c++11 } }
 
-template<int... N> void foo (int... x[N])	// { dg-message "int \\\[N\\\]\\.\\.\\. x" }
+void f(...);
+template<int... N> void foo (int... x[N])	// { dg-message "declared here" }
 {
   struct A
   {
-    A () { x; }		// { dg-error "use of parameter from containing function" }
+    A () { f(x...); }		// { dg-error "use of parameter from containing function" }
   };
 }
+
+int main()
+{
+  int ar[4];
+  foo<4>(ar);
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/pr67625.C b/gcc/testsuite/g++.dg/cpp0x/pr67625.C
new file mode 100644
index 00000000000..bcff5af5831
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/pr67625.C
@@ -0,0 +1,12 @@
+// PR c++/67625
+// { dg-do compile { target c++11 } }
+
+constexpr unsigned short
+bswap16 (unsigned short x)
+{
+  return __builtin_bswap16 (x);
+}
+constexpr int a = bswap16 (1);
+enum { b = a };
+enum { c = __builtin_bswap16 (1) };
+enum { d = bswap16 (1) };
diff --git a/gcc/testsuite/g++.dg/cpp0x/pr70338.C b/gcc/testsuite/g++.dg/cpp0x/pr70338.C
new file mode 100644
index 00000000000..156cb917080
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/pr70338.C
@@ -0,0 +1,17 @@
+// PR c++/70338
+// { dg-do compile { target c++11 } }
+// { dg-options "-g" }
+
+template<typename T>
+void
+foo (int x)
+{
+  T a[x];
+  auto b = [&]() { for (auto &c: a) c = 0.; };
+}
+
+int
+main ()
+{
+  foo<double> (3);
+}
diff --git a/gcc/testsuite/g++.dg/cpp0x/pr70887.C b/gcc/testsuite/g++.dg/cpp0x/pr70887.C
new file mode 100644
index 00000000000..f5b31b22900
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp0x/pr70887.C
@@ -0,0 +1,31 @@
+// PR middle-end/70887
+// { dg-do compile { target { { i?86-*-* x86_64-*-* } && c++11 } } }
+// { dg-options "-O2 -msse2" }
+
+#include <x86intrin.h>
+
+enum R { S };
+template <R> struct C { static constexpr int value = 10; };
+template <typename R, template <R> class T, R... r>
+struct A {
+  template <int, R...> struct B;
+  template <int N, R M, R... O>
+  struct B<N, M, O...> {
+    static constexpr int d = T<M>::value;
+    static __m128i generate()
+    {
+      __attribute__((__vector_size__(16))) long long
+      a = generate(),
+      b = _mm_bslli_si128 (a, 1),
+      c = _mm_bsrli_si128 (_mm_set1_epi32(d), 12);
+      return _mm_or_si128 (b, c);
+    }
+  };
+  A () { B<0, r...>::generate(); }
+};
+
+int
+main () {
+  using RI = A<R, C, S>;
+  RI ri;
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/auto-fn40.C b/gcc/testsuite/g++.dg/cpp1y/auto-fn40.C
new file mode 100644
index 00000000000..e7f1bd44064
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/auto-fn40.C
@@ -0,0 +1,37 @@
+// PR c++/78006
+// { dg-do compile { target c++14 } }
+
+template<typename T> T&& declval() noexcept;
+
+template<typename... _Tp>
+  struct common_type;
+
+template<typename _Tp>
+  struct common_type<_Tp>
+  { typedef _Tp type; };
+
+template<typename _Tp, typename _Up>
+  struct common_type<_Tp, _Up>
+  { typedef decltype(true ? declval<_Tp>() : declval<_Up>()) type; };
+
+template<typename _Tp, typename _Up, typename... _Vp>
+  struct common_type<_Tp, _Up, _Vp...>
+  {
+    typedef typename
+      common_type<typename common_type<_Tp, _Up>::type, _Vp...>::type type;
+  };
+
+template<typename... _Tp>
+  using common_type_t = typename common_type<_Tp...>::type;
+
+template <typename... TFs>
+auto x(TFs&&... fs)
+{
+  using rt = common_type_t<decltype(fs(0))...>;    
+  return [](auto) -> rt { };    
+}
+
+int main()
+{
+  x([](int){})(0);    
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
index 833fab7c50b..727e74e2e10 100644
--- a/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
+++ b/gcc/testsuite/g++.dg/cpp1y/digit-sep-neg.C
@@ -26,5 +26,5 @@ main()
 }
 
 // { dg-error "exponent has no digits" "exponent has no digits" { target *-*-* } 21 }
-// { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 14 }
-// { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 25 }
+// { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 13 }
+// { dg-error "expected ';' before" "expected ';' before" { target *-*-* } 24 }
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep2.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep2.C
new file mode 100644
index 00000000000..91e3804cb0b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-dep2.C
@@ -0,0 +1,18 @@
+// { dg-do compile { target c++14 } }
+
+struct A { void operator()(int) const {} };
+
+template <class T>
+void f()
+{
+  constexpr A a {};
+
+  [=](auto b) {
+    a(b);
+  }(42);
+}
+
+int main()
+{
+  f<int>();
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice5.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice5.C
index 473e412cb9d..88b7d1a05a1 100644
--- a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice5.C
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice5.C
@@ -12,7 +12,7 @@ using Void = void;
 
 template<typename F,typename A>
 auto
-bar(F f, A a) -> decltype( ( f(a) , 0 ) ) // { dg-error "no match" }
+bar(F f, A a) -> decltype( ( f(a) , 0 ) ) // { dg-message "" }
 { return {}; }
 
 
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice6.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice6.C
new file mode 100644
index 00000000000..6851afc860e
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice6.C
@@ -0,0 +1,13 @@
+// PR c++/81032
+// { dg-do compile { target c++14 } }
+
+template<typename T> constexpr void foo(T t)
+{
+  constexpr int i = t;  // { dg-error "constant" }
+  [=](auto){ return i; }(0);
+}
+
+void bar()
+{
+  foo(0);
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice7.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice7.C
new file mode 100644
index 00000000000..fa0fe1ddaf9
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice7.C
@@ -0,0 +1,15 @@
+// PR c++/81299
+// { dg-do compile { target c++14 } }
+// { dg-options "-Wall" }
+
+struct function_t {
+  template <typename ...Xs>
+  void operator()(Xs&& ...) const { }
+};
+constexpr function_t function{};
+
+int main() {
+  constexpr auto fun = ::function;
+  auto call = [=](auto ...x) { fun(x...); };
+  call();
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice8.C b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice8.C
new file mode 100644
index 00000000000..a39ce44115d
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/lambda-generic-ice8.C
@@ -0,0 +1,16 @@
+// PR c++/82230
+// { dg-do compile { target c++14 } }
+
+template <class>
+  struct c
+  {
+    template <class>
+    void f()
+    {
+      [](auto) { auto x = [] {}; }(0);
+    }
+};
+int main()
+{
+  c<int>{}.f<int>();
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/pr65202.C b/gcc/testsuite/g++.dg/cpp1y/pr65202.C
index 602b264b302..7ce4895a134 100644
--- a/gcc/testsuite/g++.dg/cpp1y/pr65202.C
+++ b/gcc/testsuite/g++.dg/cpp1y/pr65202.C
@@ -22,5 +22,5 @@ struct bar;
 int main()
 {
     foo<ns::bar> f;
-    adl::swap(f, f)
-} // { dg-error "" }
+    adl::swap(f, f) // { dg-error "expected ';'" }
+} // { dg-error "expected '.'" "expected end of namespace" }
diff --git a/gcc/testsuite/g++.dg/cpp1y/pr71875.C b/gcc/testsuite/g++.dg/cpp1y/pr71875.C
new file mode 100644
index 00000000000..4d317966cea
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/pr71875.C
@@ -0,0 +1,24 @@
+// PR c++/71875
+// { dg-do link { target c++14 } }
+
+template <typename T>
+constexpr bool IsMatrix = false;
+
+template<typename TElem>
+class Matrix {};
+
+template <typename TElem>
+constexpr bool IsMatrix<Matrix<TElem>> = true;
+
+template<typename TNestVec>
+class RowVecExpMatrix;
+
+template <typename TNestVec>
+constexpr bool IsMatrix<RowVecExpMatrix<TNestVec>> = true;
+
+int
+main ()
+{
+  static_assert (IsMatrix<RowVecExpMatrix<Matrix<int>>>, "Matrix check error");
+  static_assert (IsMatrix<Matrix<int>>, "Input type is not a matrix");
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/pr77786.C b/gcc/testsuite/g++.dg/cpp1y/pr77786.C
new file mode 100644
index 00000000000..e242228335c
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/pr77786.C
@@ -0,0 +1,21 @@
+// PR c++/77786
+// { dg-do compile { target c++14 } }
+
+#include <vector>
+
+template<int N>
+void
+foo (std::vector<int> a)
+{
+  auto const a_size = a.size();
+  auto bar = [&](auto y) -> void { int a_size_2 = a_size; };
+  double x = 0.0;
+  bar (x);
+}
+
+int
+main ()
+{
+  std::vector<int> a(1);
+  foo<1>(a);
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/pr78523.C b/gcc/testsuite/g++.dg/cpp1y/pr78523.C
new file mode 100644
index 00000000000..31e0cc886fa
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/pr78523.C
@@ -0,0 +1,12 @@
+// PR c++/78523
+// { dg-do compile { target c++14 } }
+
+int bar ();
+
+void
+foo ()
+{
+  const int t = bar ();
+  auto f = [=] (auto x) { return t; };
+  f (0);
+}
diff --git a/gcc/testsuite/g++.dg/cpp1y/pr80194.C b/gcc/testsuite/g++.dg/cpp1y/pr80194.C
new file mode 100644
index 00000000000..2a892c3cf37
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1y/pr80194.C
@@ -0,0 +1,17 @@
+// PR c++/80194
+// { dg-do compile { target c++14 } }
+
+int fn1 ();
+
+template <class Fn>
+void
+fn2 (Fn &&fn)
+{
+  fn (42);
+}
+
+void fn2 ()
+{
+  auto const x = fn1 ();
+  fn2 ([&](auto) { x; });
+}
diff --git a/gcc/testsuite/g++.dg/cpp1z/class-deduction44.C b/gcc/testsuite/g++.dg/cpp1z/class-deduction44.C
new file mode 100644
index 00000000000..15711971f51
--- /dev/null
+++ b/gcc/testsuite/g++.dg/cpp1z/class-deduction44.C
@@ -0,0 +1,5 @@
+// PR c++/80412
+// { dg-options -std=c++17 }
+
+template <typename> struct A;
+template <typename> struct B : A < B { ,  // { dg-error "" }
diff --git a/gcc/testsuite/g++.dg/ext/attr-ifunc-1.C b/gcc/testsuite/g++.dg/ext/attr-ifunc-1.C
index 2c7bba12959..4a29e8bb4d6 100644
--- a/gcc/testsuite/g++.dg/ext/attr-ifunc-1.C
+++ b/gcc/testsuite/g++.dg/ext/attr-ifunc-1.C
@@ -4,26 +4,33 @@
 
 struct Klass
 {
+  int a[4];
+
   int implementation ();
   int magic ();
 
-  typedef int (Klass::*MemFuncPtr)();
+  /* An ifunc resolver must return a pointer to an ordinary (non-member)
+     function.  To make it possible to use ifunc with member functions,
+     the resolver must convert a member function pointer to an ordinary
+     function pointer (slicing off the high word).  */
+  typedef int Func (Klass*);
 
-  static MemFuncPtr resolver ();
+  static Func* resolver ();
 };
 
-Klass::MemFuncPtr p = &Klass::implementation;
-
-int Klass::implementation (void)
+int Klass::implementation ()
 {
   __builtin_printf ("'ere I am JH\n");
-  return 1234;
+  return a[0] + a[1] + a[2] + a[3];
 }
 
-
-Klass::MemFuncPtr Klass::resolver (void)
+Klass::Func* Klass::resolver (void)
 {
-  return &Klass::implementation;
+  /* GCC guarantees this conversion to be safe and the resulting pointer
+     usable to call the member function using ordinary (i.e., non-member)
+     function call syntax.  */
+
+  return reinterpret_cast<Func*>(&Klass::implementation);
 }
 
 int f (void) __attribute__ ((ifunc ("foo")));
@@ -32,11 +39,16 @@ typedef int (F)(void);
 extern "C" F* foo () { return 0; }
 
 
-int Klass::magic (void) __attribute__ ((ifunc ("_ZN5Klass8resolverEv")));
+int Klass::magic () __attribute__ ((ifunc ("_ZN5Klass8resolverEv")));
 
 int main ()
 {
   Klass obj;
 
-  return !(obj.magic () == 1234);
+  obj.a[0] = 1;
+  obj.a[1] = 2;
+  obj.a[2] = 3;
+  obj.a[3] = 4;
+
+  return !(obj.magic () == 10);
 }
diff --git a/gcc/testsuite/g++.dg/ext/attr-ifunc-2.C b/gcc/testsuite/g++.dg/ext/attr-ifunc-2.C
index 1fc940bb7dd..e5be3d29aba 100644
--- a/gcc/testsuite/g++.dg/ext/attr-ifunc-2.C
+++ b/gcc/testsuite/g++.dg/ext/attr-ifunc-2.C
@@ -9,9 +9,9 @@ struct Klass
   int implementation ();
   int magic ();
 
-  typedef int (Klass::*MemFuncPtr)();
+  typedef int Func (Klass*);
 
-  static MemFuncPtr resolver ();
+  static Func* resolver ();
 };
 
 int Klass::implementation (void)
@@ -20,9 +20,13 @@ int Klass::implementation (void)
   return 0;
 }
 
-Klass::MemFuncPtr Klass::resolver (void)
+Klass::Func* Klass::resolver (void)
 {
-  return &Klass::implementation;
+  /* GCC guarantees this conversion to be safe and the resulting pointer
+     usable to call the member function using ordinary (i.e., non-member)
+     function call syntax.  */
+
+  return reinterpret_cast<Func*>(&Klass::implementation);
 }
 
 int Klass::magic (void) __attribute__ ((ifunc ("_ZN5Klass8resolverEv")));
diff --git a/gcc/testsuite/g++.dg/ext/attr-ifunc-3.C b/gcc/testsuite/g++.dg/ext/attr-ifunc-3.C
index 04206a126e8..6d494244331 100644
--- a/gcc/testsuite/g++.dg/ext/attr-ifunc-3.C
+++ b/gcc/testsuite/g++.dg/ext/attr-ifunc-3.C
@@ -6,23 +6,29 @@
 
 struct Klass
 {
+  int a[4];
+
   int implementation ();
   int magic ();
 
-  typedef int (Klass::*MemFuncPtr)();
+  typedef int Func (Klass*);
 
-  static MemFuncPtr resolver ();
+  static Func* resolver ();
 };
 
 int Klass::implementation (void)
 {
   printf ("'ere I am JH\n");
-  return 0;
+  return a[0] + a[1] + a[2] + a[3];
 }
 
-Klass::MemFuncPtr Klass::resolver (void)
+Klass::Func* Klass::resolver ()
 {
-  return &Klass::implementation;
+  /* GCC guarantees this conversion to be safe and the resulting pointer
+     usable to call the member function using ordinary (i.e., non-member)
+     function call syntax.  */
+
+  return reinterpret_cast<Func*>(&Klass::implementation);
 }
 
 int Klass::magic (void) __attribute__ ((ifunc ("_ZN5Klass8resolverEv")));
@@ -36,5 +42,10 @@ int main ()
 {
   Klass obj;
 
-  return Foo (obj, &Klass::magic) != 0;
+  obj.a[0] = 1;
+  obj.a[1] = 2;
+  obj.a[2] = 3;
+  obj.a[3] = 4;
+
+  return Foo (obj, &Klass::magic) != 10;
 }
diff --git a/gcc/testsuite/g++.dg/ext/attr-ifunc-4.C b/gcc/testsuite/g++.dg/ext/attr-ifunc-4.C
index 3127193147e..f71dc3b9ba9 100644
--- a/gcc/testsuite/g++.dg/ext/attr-ifunc-4.C
+++ b/gcc/testsuite/g++.dg/ext/attr-ifunc-4.C
@@ -14,9 +14,9 @@ struct Klassier : Klass
   int implementation ();
   int magic ();
 
-  typedef int (Klassier::*MemFuncPtr)();
+  typedef int Func (Klass*);
 
-  static MemFuncPtr resolver ();
+  static Func* resolver ();
 };
 
 int Klassier::implementation (void)
@@ -25,9 +25,13 @@ int Klassier::implementation (void)
   return 0;
 }
 
-Klassier::MemFuncPtr Klassier::resolver (void)
+Klassier::Func* Klassier::resolver ()
 {
-  return &Klassier::implementation;
+  /* GCC guarantees this conversion to be safe and the resulting pointer
+     usable to call the member function using ordinary (i.e., non-member)
+     function call syntax.  */
+
+  return reinterpret_cast<Func*>(&Klassier::implementation);
 }
 
 int Klassier::magic (void) __attribute__ ((ifunc ("_ZN8Klassier8resolverEv")));
diff --git a/gcc/testsuite/g++.dg/ext/attr-ifunc-5.C b/gcc/testsuite/g++.dg/ext/attr-ifunc-5.C
index 05855dd20c0..fd8bcff79b7 100644
--- a/gcc/testsuite/g++.dg/ext/attr-ifunc-5.C
+++ b/gcc/testsuite/g++.dg/ext/attr-ifunc-5.C
@@ -1,15 +1,21 @@
 // PR c/81854 - weak alias of an incompatible symbol accepted
 // { dg-do compile }
 // { dg-require-ifunc "" } */
+// { dg-options "-Wextra -Wno-pmf-conversions" }
 
 struct Klass
 {
   int implementation ();
-  const char* magic ();
+  int good_magic ();
+  int iffy_magic ();
+  const char* bad_magic ();
 
+  typedef int (Func)(Klass*);
   typedef int (Klass::*MemFuncPtr)();
 
-  static MemFuncPtr resolver ();
+  static Func* good_resolver ();
+  static void* iffy_resolver ();
+  static MemFuncPtr bad_resolver ();
 };
 
 int Klass::implementation (void)
@@ -17,13 +23,42 @@ int Klass::implementation (void)
   return 0;
 }
 
-const char* __attribute__ ((ifunc ("_ZN5Klass8resolverEv")))
-  Klass::magic ();   // { dg-warning "alias between functions of incompatible types" }
+// Verify no warning for the expected/compatible declaration.
 
+int __attribute__ ((ifunc ("_ZN5Klass13good_resolverEv")))
+Klass::good_magic ();
+
+Klass::Func*
+Klass::good_resolver (void)
+{
+  MemFuncPtr mfp = &Klass::implementation;
+
+  return reinterpret_cast<Func*>(mfp);
+}
+
+
+// Verify a warning for the unsafe declaration.
+
+int __attribute__ ((ifunc ("_ZN5Klass13iffy_resolverEv")))
+Klass::iffy_magic ();    // { dg-message "resolver indirect function declared here" }
+
+void*
+Klass::iffy_resolver (void)   // { dg-warning ".ifunc. resolver for .int Klass::iffy_magic\\(\\). should return .int \\(\\*\\)\\(Klass\\*\\)." }
+{
+  MemFuncPtr mfp = &Klass::implementation;
+
+  return reinterpret_cast<void*>(mfp);
+}
+
+
+// Verify an error for an incompatible declaration.
+
+const char* __attribute__ ((ifunc ("_ZN5Klass12bad_resolverEv")))
+Klass::bad_magic ();   // { dg-message "resolver indirect function declared here" }
 
 
 Klass::MemFuncPtr
-Klass::resolver (void) // { dg-message "aliased declaration here" }
+Klass::bad_resolver (void)   // { dg-error ".ifunc. resolver for .const char\\* Klass::bad_magic\\(\\). must return .const char\\* \\(\\*\\)\\(Klass\\*\\)." }
 {
   return &Klass::implementation;
 }
diff --git a/gcc/testsuite/g++.dg/gomp/pr77578.C b/gcc/testsuite/g++.dg/gomp/pr77578.C
new file mode 100644
index 00000000000..d92fddf970b
--- /dev/null
+++ b/gcc/testsuite/g++.dg/gomp/pr77578.C
@@ -0,0 +1,31 @@
+// PR c++/77578
+// { dg-do compile }
+
+template <typename T>
+class A 
+{
+};
+
+template <typename T>
+struct B
+{
+};
+
+template <typename T>
+struct B <A <T> >
+{
+  typedef A <T> C;
+  typedef typename C::D D;
+ 
+  template <typename U>
+  static void
+  foo (const D x, const D y)
+  {
+    U u;
+    {
+      #pragma omp parallel for 
+      for (u.bar().y() = x.y(); u.bar().y() <= y.y(); u.bar().y()++) // { dg-error "expected" }
+	;
+    }
+  }
+};
diff --git a/gcc/testsuite/g++.dg/lookup/extern-c-hidden.C b/gcc/testsuite/g++.dg/lookup/extern-c-hidden.C
index a03dea02376..80593dba735 100644
--- a/gcc/testsuite/g++.dg/lookup/extern-c-hidden.C
+++ b/gcc/testsuite/g++.dg/lookup/extern-c-hidden.C
@@ -1,11 +1,11 @@
 // Make sure unhidding an extern-c still checks it is compatible
 
-extern "C" float fabsf (float);  // { dg-error "conflicts with previous declaration" }
+extern "C" float fabsf (float);  // { dg-message "previous declaration" }
 
 namespace Bob 
 {
   extern "C" float fabsf (float, float); // { dg-error "C language" }
-  extern "C" double fabs (double, double); // { dg-error "conflicts with previous declaration" }
+  extern "C" double fabs (double, double); // { dg-message "previous declaration" }
 }
 
 extern "C" double fabs (double); // { dg-error "C language" }
diff --git a/gcc/testsuite/g++.dg/lookup/extern-c-redecl.C b/gcc/testsuite/g++.dg/lookup/extern-c-redecl.C
index 3e901cc7759..fd49868ee4e 100644
--- a/gcc/testsuite/g++.dg/lookup/extern-c-redecl.C
+++ b/gcc/testsuite/g++.dg/lookup/extern-c-redecl.C
@@ -3,7 +3,7 @@
 // { dg-do compile }
 
 namespace A {
-    extern "C" void foo_func () throw(); // { dg-error "conflicts" }
+    extern "C" void foo_func () throw(); // { dg-message "previous" }
 }
 // next line should trigger an error because
 // it conflicts with previous declaration of foo_func (), due to
diff --git a/gcc/testsuite/g++.dg/lookup/extern-c-redecl6.C b/gcc/testsuite/g++.dg/lookup/extern-c-redecl6.C
new file mode 100644
index 00000000000..b4537d64a26
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lookup/extern-c-redecl6.C
@@ -0,0 +1,25 @@
+extern "C" {
+  int i; // { dg-message "previous" }
+  float f; // { dg-message "previous" }
+  void fn (); // { dg-message "previous" }
+  int ai1[1]; // { dg-message "previous" }
+  extern int ai[];
+
+  namespace OK
+  {
+    int i;
+    float f;
+    void fn ();
+    extern int ai1[];
+    int ai[2];
+  }
+
+  namespace BAD
+  {
+    long i; // { dg-error "C language linkage" }
+    double f; // { dg-error "C language linkage" }
+    int fn (); // { dg-error "C language linkage" }
+    int ai1[2]; // { dg-error "C language linkage" }
+  }
+}
+
diff --git a/gcc/testsuite/g++.dg/lto/pr82414_0.C b/gcc/testsuite/g++.dg/lto/pr82414_0.C
new file mode 100644
index 00000000000..29753718b54
--- /dev/null
+++ b/gcc/testsuite/g++.dg/lto/pr82414_0.C
@@ -0,0 +1,13 @@
+// PR c++/82414
+// { dg-lto-do link }
+// { dg-lto-options { { -flto -g } } }
+
+typedef __attribute__ ((__aligned__ (16))) struct S { __extension__ unsigned long long Part[2]; } T; // bogus warning "violates one definition rule"
+
+int
+main ()
+{
+  T tf;
+  asm volatile ("" : : "g" (__alignof__(tf)), "g" (__alignof__ (struct S)), "g" (__alignof__ (T)));
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/missing-symbol-2.C b/gcc/testsuite/g++.dg/missing-symbol-2.C
new file mode 100644
index 00000000000..4a119f8e9ad
--- /dev/null
+++ b/gcc/testsuite/g++.dg/missing-symbol-2.C
@@ -0,0 +1,58 @@
+/* { dg-options "-fdiagnostics-show-caret" } */
+
+extern int foo (void);
+
+void missing_open_paren (void)
+{
+  if foo ()) /* { dg-error "expected '\\(' before 'foo'" } */
+    {
+    }
+  /* { dg-begin-multiline-output "" }
+   if foo ())
+      ^~~
+      (
+     { dg-end-multiline-output "" } */
+}
+
+
+void missing_close_square (void)
+{
+  const char test [42;  /* { dg-error "22: expected ']' before ';' token" } */
+  /* { dg-begin-multiline-output "" }
+   const char test [42;
+                      ^
+                      ]
+     { dg-end-multiline-output "" } */
+}
+
+int missing_semicolon (void)
+{
+  return 42 /* { dg-error "expected ';'" } */
+}
+/* { dg-begin-multiline-output "" }
+   return 42
+            ^
+            ;
+ }
+ ~           
+   { dg-end-multiline-output "" } */
+
+
+int missing_colon_in_switch (int val)
+{
+  switch (val)
+    {
+    case 42 /* { dg-error "expected ':' before 'return'" } */
+      return 42;
+    /* { dg-begin-multiline-output "" }
+     case 42
+            ^
+            :
+       return 42;
+       ~~~~~~
+       { dg-end-multiline-output "" } */
+
+    default:
+      return val;
+    }
+}
diff --git a/gcc/testsuite/g++.dg/opt/pr70100.C b/gcc/testsuite/g++.dg/opt/pr70100.C
new file mode 100644
index 00000000000..3f612cba3fb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr70100.C
@@ -0,0 +1,21 @@
+// PR middle-end/70100
+// { dg-do compile { target c++11 } }
+// { dg-options "-O0" }
+
+void
+bar (int)
+{
+}
+
+template <typename ... Args>
+void
+foo (Args && ... args)
+{
+  [&] { [&] { bar(args...); }; };
+}
+
+int
+main ()
+{
+  foo (2);
+}
diff --git a/gcc/testsuite/g++.dg/opt/pr82159-2.C b/gcc/testsuite/g++.dg/opt/pr82159-2.C
new file mode 100644
index 00000000000..f153c29ddac
--- /dev/null
+++ b/gcc/testsuite/g++.dg/opt/pr82159-2.C
@@ -0,0 +1,65 @@
+// PR c++/82159
+// { dg-do compile }
+// { dg-options "" }
+
+template <typename T> struct D { T e; };
+struct F : D<int[0]> {
+  F(const F &);
+};
+struct G : F {
+  template <class T> G operator-(T);
+};
+template <class T> struct I {
+  typedef typename T::template J<I> ak;
+};
+template <class T> struct K { typename I<T>::ak an; };
+struct H {
+  G l;
+};
+struct C {
+  ~C();
+};
+template <class T> struct M : T {
+  template <typename U, typename V> M(U, V);
+  H h;
+  virtual void foo() { T::bar(&h); }
+};
+template <int, typename> class A;
+template <class> struct B {
+  typedef int BT;
+  struct BC {};
+  template <class T> struct BD {
+    G g;
+    BD(BT, T n) : g(n.l - 0) {}
+  };
+  B(BT, BC);
+};
+template <typename> struct O;
+template <int T, typename U>
+struct O<B<A<T, U> > > : public B<A<T, U> >::BC {};
+struct L : B<A<2, double> > {
+  struct P : C {
+    void bar(H *x) {
+      BT a;
+      BD<H>(a, *x);
+    }
+  };
+  template <typename U, typename V> L(U x, V n) : B(x, n) {}
+  int ll;
+  virtual int baz() { M<P>(this, ll); }
+};
+template <typename> class Q {
+  O<B<A<2, double> > > q;
+  virtual L baz() { L(0, q); }
+};
+template <template <class> class T> struct R {
+  R() { T<int>(); }
+};
+struct S {
+  template <class> class J : R<Q> {};
+};
+void foo() { K<S> c; }
+
+int main() {
+  return 0;
+}
diff --git a/gcc/testsuite/g++.dg/other/do1.C b/gcc/testsuite/g++.dg/other/do1.C
index b3a9daf9056..db65e7de301 100644
--- a/gcc/testsuite/g++.dg/other/do1.C
+++ b/gcc/testsuite/g++.dg/other/do1.C
@@ -7,7 +7,7 @@
 
 void init ()
 {
-  do {  } while (0)
-	    obj = 0; // { dg-error "expected|not declared" }
+  do {  } while (0) // { dg-error "expected ';'" }
+	    obj = 0; // { dg-error "not declared" }
      
 }
diff --git a/gcc/testsuite/g++.dg/other/pr68252.C b/gcc/testsuite/g++.dg/other/pr68252.C
new file mode 100644
index 00000000000..5460d819780
--- /dev/null
+++ b/gcc/testsuite/g++.dg/other/pr68252.C
@@ -0,0 +1,5 @@
+// PR c++/68252
+
+struct Test {
+  static const int foo = (1 << sizeof (int)) * -3;
+};
diff --git a/gcc/testsuite/g++.dg/parse/error11.C b/gcc/testsuite/g++.dg/parse/error11.C
index d118c19deb8..1a49d6edb12 100644
--- a/gcc/testsuite/g++.dg/parse/error11.C
+++ b/gcc/testsuite/g++.dg/parse/error11.C
@@ -52,7 +52,7 @@ void func(void)
   Foo[:B> k1;       // { dg-bogus "cannot begin|alternate spelling" "smart error should not be triggered here" } 
 // { dg-error "6:missing template arguments before" "template" { target *-*-* } 51 }
 // { dg-error "9:expected primary-expression before ':' token" "primary" { target *-*-* } 51 }
-// { dg-error "9:expected '\]' before ':' token" "backslash" { target *-*-* } 51 }
+// { dg-error "8:expected '\]' before ':' token" "backslash" { target *-*-* } 51 }
 // { dg-error "6:missing template arguments before" "template" { target *-*-* } 52 }
 // { dg-error "7:expected primary-expression before ':' token" "primary" { target *-*-* } 52 }
 // { dg-error "7:expected '\]' before ':' token" "backslash" { target *-*-* } 52 }
diff --git a/gcc/testsuite/g++.dg/parse/pragma2.C b/gcc/testsuite/g++.dg/parse/pragma2.C
index 3dc5fc17788..c5616ff74f5 100644
--- a/gcc/testsuite/g++.dg/parse/pragma2.C
+++ b/gcc/testsuite/g++.dg/parse/pragma2.C
@@ -4,5 +4,5 @@
 // does not.
 int f(int x,
 #pragma interface  // { dg-error "not allowed here" }
-      // { dg-bogus "expected identifier" "" { xfail *-*-* } .-1 }
-      int y);
+      // The parser gets confused and issues an error on the next line.
+      int y); // { dg-bogus "" "" { xfail *-*-* } } 
diff --git a/gcc/testsuite/g++.dg/template/crash108.C b/gcc/testsuite/g++.dg/template/crash108.C
index 221d80ee5f1..9bcabc6009b 100644
--- a/gcc/testsuite/g++.dg/template/crash108.C
+++ b/gcc/testsuite/g++.dg/template/crash108.C
@@ -1,5 +1,5 @@
 // PR c++/50861
 
-template<class T> struct A {A(int b=k(0));}; // { dg-error "parameter|arguments" }
+template<class T> struct A {A(int b=k(0));}; // { dg-error "parameter|argument" }
 void f(int k){A<int> a;} // // { dg-message "declared" }
 // { dg-message "note" "note" { target *-*-* } 3 }
diff --git a/gcc/testsuite/g++.dg/template/error11.C b/gcc/testsuite/g++.dg/template/error11.C
index 3a469fd1a8c..16402988a87 100644
--- a/gcc/testsuite/g++.dg/template/error11.C
+++ b/gcc/testsuite/g++.dg/template/error11.C
@@ -1,4 +1,4 @@
 // PR c++/12132
 
 inline template <int> void foo () {} // { dg-error "<" }
-void abort (); // { dg-error ";" }
+void abort (); // { dg-error ";" "" { target *-*-* } .-1 }
diff --git a/gcc/testsuite/g++.dg/ubsan/pr82353.C b/gcc/testsuite/g++.dg/ubsan/pr82353.C
new file mode 100644
index 00000000000..a967cefa9cb
--- /dev/null
+++ b/gcc/testsuite/g++.dg/ubsan/pr82353.C
@@ -0,0 +1,60 @@
+/* { dg-do compile { target { { i?86-*-* x86_64-*-* } && lp64 } } } */
+/* { dg-options "-O2 -std=c++11 -fsanitize=undefined -fno-sanitize-recover=undefined -w -fdump-rtl-reload" } */
+
+extern unsigned long tf_2_var_1, tf_2_var_21;
+extern bool tf_2_var_2, tf_2_var_24, tf_2_var_6, tf_2_var_5;
+extern unsigned char tf_2_var_16, tf_2_var_31;
+extern short tf_2_var_69;
+extern unsigned tf_2_var_233;
+struct tf_2_struct_1 {
+  short member_1_0 : 27;
+  long member_1_1 : 10;
+};
+struct a {
+  int member_2_0 : 5;
+};
+struct tf_2_struct_3 {
+  static tf_2_struct_1 member_3_0;
+};
+struct tf_2_struct_4 {
+  static unsigned member_4_0;
+  a member_4_1;
+};
+struct tf_2_struct_5 {
+  tf_2_struct_1 member_5_2;
+  tf_2_struct_4 member_5_4;
+};
+struct tf_2_struct_6 {
+  tf_2_struct_5 member_6_2;
+  short member_6_4;
+} extern tf_2_struct_obj_2;
+extern tf_2_struct_3 tf_2_struct_obj_8;
+tf_2_struct_1 a;
+tf_2_struct_5 b;
+tf_2_struct_1 tf_2_struct_3::member_3_0;
+unsigned tf_2_struct_4::member_4_0;
+void tf_2_init() {
+  a.member_1_1 = tf_2_struct_obj_2.member_6_2.member_5_2.member_1_1 = 5;
+}
+void tf_2_foo() {
+  int c = tf_2_struct_obj_2.member_6_2.member_5_4.member_4_1.member_2_0 -
+          -~tf_2_struct_obj_2.member_6_4 * char(90284000534361);
+  tf_2_struct_obj_8.member_3_0.member_1_0 =
+      tf_2_var_24 >
+      tf_2_var_21 * a.member_1_0 * tf_2_var_2 - tf_2_var_5 % a.member_1_1;
+  if ((~(tf_2_var_31 * tf_2_var_6) &&
+       -~tf_2_struct_obj_2.member_6_4 * 90284000534361) %
+      ~tf_2_var_31 * tf_2_var_6)
+    b.member_5_2.member_1_0 << tf_2_var_16 << tf_2_var_1;
+  tf_2_var_233 = -~tf_2_struct_obj_2.member_6_4 * char(90284000534361);
+  int d(tf_2_struct_obj_2.member_6_4);
+  if (b.member_5_2.member_1_0)
+    b.member_5_2.member_1_1 = c;
+  bool e(~-~tf_2_struct_obj_2.member_6_4);
+  a.member_1_1 % e;
+  if (tf_2_var_5 / tf_2_struct_obj_2.member_6_2.member_5_2.member_1_1)
+    b.member_5_4.member_4_0 = tf_2_var_21 * a.member_1_0 * tf_2_var_2;
+  tf_2_var_69 = tf_2_var_6;
+}
+
+/* { dg-final { scan-rtl-dump-not "Inserting rematerialization insn" "reload" } } */
diff --git a/gcc/testsuite/g++.old-deja/g++.abi/vtable2.C b/gcc/testsuite/g++.old-deja/g++.abi/vtable2.C
index 2c88a95800b..96533e09218 100644
--- a/gcc/testsuite/g++.old-deja/g++.abi/vtable2.C
+++ b/gcc/testsuite/g++.old-deja/g++.abi/vtable2.C
@@ -1,5 +1,5 @@
 // { dg-do run  }
-// { dg-options "-Wno-attributes -fno-strict-aliasing" }
+// { dg-options "-Wno-attribute-alias -fno-strict-aliasing" }
 // Origin: Mark Mitchell <mark@codesourcery.com>
 
 #if defined (__GXX_ABI_VERSION) && __GXX_ABI_VERSION >= 100
diff --git a/gcc/testsuite/g++.old-deja/g++.other/using9.C b/gcc/testsuite/g++.old-deja/g++.other/using9.C
index 0e34156d8f6..c79f993fd2b 100644
--- a/gcc/testsuite/g++.old-deja/g++.other/using9.C
+++ b/gcc/testsuite/g++.old-deja/g++.other/using9.C
@@ -13,7 +13,7 @@ struct x {};
 using ::x;
 using ::a;
 
-extern "C" void foo ();		// { dg-error "previous declaration" }
+extern "C" void foo ();		// { dg-message "previous declaration" }
 
 namespace {
   extern "C" int foo ();	// { dg-error "C.*linkage" }
diff --git a/gcc/testsuite/g++.old-deja/g++.pt/crash3.C b/gcc/testsuite/g++.old-deja/g++.pt/crash3.C
index 160cbe541a1..e5b3f25b530 100644
--- a/gcc/testsuite/g++.old-deja/g++.pt/crash3.C
+++ b/gcc/testsuite/g++.old-deja/g++.pt/crash3.C
@@ -6,11 +6,11 @@ public:
     CVector<int> f() const
     {
        CVector<int> v();
-       return v;
+       return v;		// { dg-error "convert" }
     }
     CVector<long> g() const
     {
        CVector<long> v();
-       return v;
+       return v;		// { dg-error "convert" }
     }
 };
diff --git a/gcc/testsuite/gcc.c-torture/execute/20040709-3.c b/gcc/testsuite/gcc.c-torture/execute/20040709-3.c
new file mode 100644
index 00000000000..e6622c6e257
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/20040709-3.c
@@ -0,0 +1,5 @@
+/* PR rtl-optimization/68205 */
+/* { dg-require-effective-target int32plus } */
+/* { dg-additional-options "-fno-common" } */
+
+#include "20040709-2.c"
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr80421.c b/gcc/testsuite/gcc.c-torture/execute/pr80421.c
new file mode 100644
index 00000000000..b13ab5fc121
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr80421.c
@@ -0,0 +1,121 @@
+/* PR middle-end/80421 */
+
+__attribute__ ((noinline, noclone)) void
+baz (const char *t, ...)
+{
+  asm volatile (""::"r" (t):"memory");
+  if (*t == 'T')
+    __builtin_abort ();
+}
+
+unsigned int
+foo (char x)
+{
+  baz ("x %c\n", x);
+  switch (x)
+    {
+    default:
+      baz ("case default\n");
+      if (x == 'D' || x == 'I')
+	baz ("This should never be reached.\n");
+      return 0;
+    case 'D':
+      baz ("case 'D'\n");
+      return 0;
+    case 'I':
+      baz ("case 'I'\n");
+      return 0;
+    }
+}
+
+void
+bar (void)
+{
+  int a = 2;
+  int b = 5;
+  char c[] = {
+    2, 4, 1, 2, 5, 5, 2, 4, 4, 0, 0, 0, 0, 0, 0, 3, 4, 4, 2, 4,
+    1, 2, 5, 5, 2, 4, 1, 0, 0, 0, 2, 4, 4, 3, 4, 3, 3, 5, 1, 3,
+    5, 5, 2, 4, 4, 2, 4, 1, 3, 5, 3, 3, 5, 1, 3, 5, 1, 2, 4, 4,
+    2, 4, 2, 3, 5, 1, 3, 5, 1, 3, 5, 5, 2, 4, 1, 2, 4, 2, 3, 5,
+    3, 3, 5, 1, 3, 5, 5, 2, 4, 1, 2, 4, 1, 3, 5, 3, 3, 5, 1, 3,
+    5, 5, 2, 4, 4, 2, 4, 1, 3, 5, 3, 3, 5, 1, 3, 5, 1, 2, 4, 1,
+    2, 4, 2, 3, 5, 1, 3, 5, 1, 3, 5, 1, 2, 4, 1, 2, 4, 1, 3, 5,
+    1, 3, 5, 1, 3, 5, 1, 2, 4, 4, 2, 4, 1, 3, 5, 1, 3, 5, 1, 3,
+    5, 5, 2, 4, 4, 2, 4, 2, 3, 5, 3, 3, 5, 1, 3, 5, 5, 2, 4, 4,
+    2, 4, 1, 3, 5, 3, 3, 5, 1, 3, 5, 1, 2, 5, 5, 2, 4, 2, 3, 5,
+    1, 3, 4, 1, 3, 5, 1, 2, 5, 5, 2, 4, 1, 2, 5, 1, 3, 5, 3, 3,
+    5, 1, 2, 5, 5, 2, 4, 2, 2, 5, 1, 3, 5, 3, 3, 5, 1, 2, 5, 1,
+    2, 4, 1, 2, 5, 2, 3, 5, 1, 3, 5, 1, 2, 5, 1, 2, 4, 2, 2, 5,
+    1, 3, 5, 1, 3, 5, 1, 2, 5, 5, 2, 4, 2, 2, 5, 2, 3, 5, 3, 3,
+    5, 1, 2, 5, 5, 2, 4, 2, 2, 5, 2, 3, 5, 3, 3, 5, 1, 2, 5, 5,
+    2, 4, 2, 2, 5, 1, 3, 5, 3, 3, 5, 1, 2, 5, 5, 2, 4, 2, 2, 5,
+    1, 3, 5, 3, 3, 5, 1, 2, 5, 1, 2, 4, 1, 2, 5, 2, 3, 5, 1, 3,
+    5, 1, 2, 5, 5, 2, 4, 2, 2, 5, 2, 3, 5, 3, 3, 5, 1, 2, 5, 5,
+    2, 4, 1, 2, 5, 1, 3, 5, 3, 3, 5, 1, 2, 5, 5, 2, 4, 2, 2, 5,
+    1, 3, 5, 3, 3, 5, 1, 2, 5, 5, 2, 4, 2, 2, 5, 1, 3, 5, 3, 3,
+    5, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+  };
+  char *f = c + 390;
+  int i, j, e, g, h;
+  char k, l;
+  i = 26;
+  j = 25;
+  k = l = 'M';
+  h = 2;
+  while (i > 0)
+    {
+      int x = i - a;
+      x = x > 0 ? x : 0;
+      x = j - x;
+      g = x * 3 + h;
+      switch (f[g])
+	{
+	case 1:
+	  --i;
+	  --j;
+	  h = 2;
+	  f -= b * 3;
+	  k = 'M';
+	  break;
+	case 2:
+	  --i;
+	  h = 0;
+	  f -= b * 3;
+	  k = 'I';
+	  break;
+	case 3:
+	  --i;
+	  h = 2;
+	  f -= b * 3;
+	  k = 'I';
+	  break;
+	case 4:
+	  --j;
+	  h = 1;
+	  k = 'D';
+	  break;
+	case 5:
+	  --j;
+	  h = 2;
+	  k = 'D';
+	  break;
+	}
+      if (k == l)
+	++e;
+      else
+	{
+	  foo (l);
+	  l = k;
+	}
+    }
+}
+
+int
+main ()
+{
+  char l = 'D';
+  foo (l);
+  bar ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.c-torture/execute/pr82524.c b/gcc/testsuite/gcc.c-torture/execute/pr82524.c
new file mode 100644
index 00000000000..07ac4b61916
--- /dev/null
+++ b/gcc/testsuite/gcc.c-torture/execute/pr82524.c
@@ -0,0 +1,37 @@
+/* PR target/82524 */
+
+struct S { unsigned char b, g, r, a; };
+union U { struct S c; unsigned v; };
+
+static inline unsigned char
+foo (unsigned char a, unsigned char b)
+{
+  return ((a + 1) * b) >> 8;
+}
+
+__attribute__((noinline, noclone)) unsigned
+bar (union U *x, union U *y)
+{
+  union U z;
+  unsigned char v = x->c.a;
+  unsigned char w = foo (y->c.a, 255 - v);
+  z.c.r = foo (x->c.r, v) + foo (y->c.r, w);
+  z.c.g = foo (x->c.g, v) + foo (y->c.g, w);
+  z.c.b = foo (x->c.b, v) + foo (y->c.b, w);
+  z.c.a = 0;
+  return z.v;
+}
+
+int
+main ()
+{
+  union U a, b, c;
+  if ((unsigned char) ~0 != 255 || sizeof (unsigned) != 4)
+    return 0;
+  a.c = (struct S) { 255, 255, 255, 0 };
+  b.c = (struct S) { 255, 255, 255, 255 };
+  c.v = bar (&a, &b);
+  if (c.c.b != 255 || c.c.g != 255 || c.c.r != 255 || c.c.a != 0)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/Wstrict-overflow-7.c b/gcc/testsuite/gcc.dg/Wstrict-overflow-7.c
index 5bf7b6005ca..401cbc3c987 100644
--- a/gcc/testsuite/gcc.dg/Wstrict-overflow-7.c
+++ b/gcc/testsuite/gcc.dg/Wstrict-overflow-7.c
@@ -6,5 +6,5 @@
 int
 foo (int i)
 {
-  return i + 10 > i; /* { dg-warning "assuming signed overflow does not occur" "correct warning" } */
+  return i + 10 > i; /* { dg-warning "assuming signed overflow does not occur" "correct warning" { xfail *-*-* } } */
 }
diff --git a/gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c b/gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c
index 80c7355a50e..75e902cd1f4 100644
--- a/gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c
+++ b/gcc/testsuite/gcc.dg/compat/struct-layout-1_generate.c
@@ -1893,7 +1893,7 @@ generate_fields (enum FEATURE features, struct entry *e, struct entry *parent,
 		  || (e[n].type >= &attrib_array_types[0]
 		      && e[n].type < &attrib_array_types[NAATYPES2])
 		  || (e[n].type >= &complex_attrib_array_types[0]
-		      && e[n].type < &complex_attrib_array_types[NAATYPES2])
+		      && e[n].type < &complex_attrib_array_types[NCAATYPES2])
 		  || (e[n].type >= &aligned_bitfld_types[0]
 		      && e[n].type < &aligned_bitfld_types[n_aligned_bitfld_types])))
 	    e[n].attrib = NULL;
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-1.c b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
index e3bf7bcf99b..204d3b20703 100644
--- a/gcc/testsuite/gcc.dg/graphite/fuse-1.c
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-1.c
@@ -1,15 +1,15 @@
 /* Check that the two loops are fused and that we manage to fold the two xor
    operations.  */
-/* { dg-options "-O2 -floop-nest-optimize -fdump-tree-forwprop4-all -fdump-tree-graphite-all" } */
+/* { dg-options "-O2 -floop-nest-optimize -fdump-tree-forwprop-all -fdump-tree-graphite-all" } */
 
 /* Make sure we fuse the loops like this:
 AST generated by isl:
 for (int c0 = 0; c0 <= 99; c0 += 1) {
-  S_3(0, c0);
-  S_6(0, c0);
-  S_9(0, c0);
+  S_3(c0);
+  S_6(c0);
+  S_9(c0);
 } */
-/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*\\}" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*\\}" 1 "graphite" } } */
 
 /* Check that after fusing the loops, the scalar computation is also fused.  */
 /* { dg-final { scan-tree-dump-times "gimple_simplified to\[^\\n\]*\\^ 12" 1 "forwprop4" } } */
diff --git a/gcc/testsuite/gcc.dg/graphite/fuse-2.c b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
index dc0a9b2b61c..f4cea4360d9 100644
--- a/gcc/testsuite/gcc.dg/graphite/fuse-2.c
+++ b/gcc/testsuite/gcc.dg/graphite/fuse-2.c
@@ -3,13 +3,13 @@
 /* Make sure we fuse the loops like this:
 AST generated by isl:
 for (int c0 = 0; c0 <= 99; c0 += 1) {
-  S_3(0, c0);
-  S_6(0, c0);
-  S_9(0, c0);
+  S_3(c0);
+  S_6(c0);
+  S_9(c0);
 }
 */
 
-/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*S_.*\\(0, c0\\);.*\\}" 1 "graphite" } } */
+/* { dg-final { scan-tree-dump-times "AST generated by isl:.*for \\(int c0 = 0; c0 <= 99; c0 \\+= 1\\) \\{.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*S_.*\\(c0\\);.*\\}" 1 "graphite" } } */
 
 #define MAX 100
 int A[MAX], B[MAX], C[MAX];
diff --git a/gcc/testsuite/gcc.dg/graphite/id-30.c b/gcc/testsuite/gcc.dg/graphite/id-30.c
new file mode 100644
index 00000000000..f8144cec4f2
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/id-30.c
@@ -0,0 +1,16 @@
+/* The modulo constraints we generate for the niter expression
+     (unsinged long)ubound - (unsigned long)lbound
+   end up with a modulo that we cannot represent in the expression
+   type we are using (int64_t), so we run into the codegen error
+   where ISL generates a modulo/divide by sth that doesn't fit the
+   type we code-generate with.  Verify we properly elide those.  */
+
+void foo (double *a, long int lbound0, long int ubound0,
+	  long int lbound1, long int ubound1, long int stride1)
+{
+  if (lbound0 < ubound0)
+    for (long int i = lbound0; i <= ubound0; ++i)
+      if (lbound1 < ubound1)
+	for (long int j = lbound1; j <= ubound1; ++j)
+	  a[i*stride1 + j] = 0.;
+}
diff --git a/gcc/testsuite/gcc.dg/graphite/pr69728.c b/gcc/testsuite/gcc.dg/graphite/pr69728.c
index 35ea5bd15bb..e8cd7bec0a1 100644
--- a/gcc/testsuite/gcc.dg/graphite/pr69728.c
+++ b/gcc/testsuite/gcc.dg/graphite/pr69728.c
@@ -1,7 +1,7 @@
 /* { dg-do compile } */
-/* { dg-options "-O3 -floop-nest-optimize" } */
+/* { dg-options "-O3 -floop-nest-optimize -fdump-tree-graphite-details" } */
 
-int a[1];
+int a[9];
 int b, c, d, e;
 void
 fn1 ()
@@ -19,3 +19,9 @@ fn1 ()
 	}
     }
 }
+
+/* At the moment only ISL figures that if (d) is always true.  We've
+   run into scheduling issues before here, not being able to handle
+   empty domains.  */
+
+/* { dg-final { scan-tree-dump "loop nest optimized" "graphite" } }  */
diff --git a/gcc/testsuite/gcc.dg/graphite/pr82451.c b/gcc/testsuite/gcc.dg/graphite/pr82451.c
new file mode 100644
index 00000000000..802b931fddd
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/graphite/pr82451.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O -floop-parallelize-all" } */
+
+static int a[];
+int b[1];
+int c;
+static void
+d (int *f, int *g)
+{
+  int e;
+  for (e = 0; e < 2; e++)
+    g[e] = 1;
+  for (e = 0; e < 2; e++)
+    g[e] = f[e] + f[e + 1];
+}
+void
+h ()
+{
+  for (;; c += 8)
+    d (&a[c], b);
+}
diff --git a/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c
index 441a0c70855..71b16f80be2 100644
--- a/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c
+++ b/gcc/testsuite/gcc.dg/ipa/inlinehint-4.c
@@ -35,5 +35,5 @@ test (int i)
     lookup (9 * i);
 }
 /* { dg-final { scan-ipa-dump "Wrapper penalty"  "inline"  } } */
-/* { dg-final { scan-ipa-dump-not "Inlining lookup_slow to lookup"  "inline"  } } */
-/* { dg-final { scan-ipa-dump "Inlining lookup to test"  "inline"  } } */
+/* { dg-final { scan-ipa-dump-not "Inlined lookup_slow into lookup"  "inline"  } } */
+/* { dg-final { scan-ipa-dump "Inlined lookup into test"  "inline"  } } */
diff --git a/gcc/testsuite/gcc.dg/missing-symbol-2.c b/gcc/testsuite/gcc.dg/missing-symbol-2.c
new file mode 100644
index 00000000000..7ee795dfcc5
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/missing-symbol-2.c
@@ -0,0 +1,71 @@
+/* { dg-options "-fdiagnostics-show-caret -Wno-switch-unreachable" } */
+
+extern int foo (void);
+
+void missing_open_paren (void)
+{
+  if foo ()) /* { dg-line missing_open_paren } */
+    {
+    }
+  /* { dg-error "expected '\\(' before 'foo'" "" { target c } missing_open_paren } */
+  /* { dg-begin-multiline-output "" }
+   if foo ())
+      ^~~
+      (
+     { dg-end-multiline-output "" } */
+  /* { dg-error "expected statement before '\\)' token"  "" { target c } missing_open_paren } */
+  /* { dg-begin-multiline-output "" }
+   if foo ())
+            ^
+     { dg-end-multiline-output "" } */
+}
+
+void missing_close_square (void)
+{
+  const char test [42;  /* { dg-error "22: expected ']' before ';' token" } */
+  /* { dg-begin-multiline-output "" }
+   const char test [42;
+                      ^
+                      ]
+     { dg-end-multiline-output "" } */
+}
+
+int missing_semicolon (void)
+{
+  return 42 /* { dg-error "expected ';'" } */
+}
+/* { dg-begin-multiline-output "" }
+   return 42
+            ^
+            ;
+ }
+ ~           
+   { dg-end-multiline-output "" } */
+
+
+/* We don't offer a fix-it hint for this case in C, as it could be
+   colon or ellipsis.
+   TODO: we could be smarter about error-recovery here; given the
+   return perhaps we could assume a missing colon.  */
+
+int missing_colon_in_switch (int val)
+{
+  switch (val)
+    {
+    case 42
+      return 42; /* { dg-error "expected ':' or '...' before 'return'" } */
+    /* { dg-begin-multiline-output "" }
+       return 42;
+       ^~~~~~
+       { dg-end-multiline-output "" } */
+
+    default:
+      return val;
+    }
+}
+
+/* { dg-begin-multiline-output "" }
+ int dummy;
+ ^~~
+   { dg-end-multiline-output "" } */
+int dummy;/* { dg-error "expected declaration or statement at end of input" "" { target c } } */
diff --git a/gcc/testsuite/gcc.dg/missing-symbol-3.c b/gcc/testsuite/gcc.dg/missing-symbol-3.c
new file mode 100644
index 00000000000..e2d00dfa03f
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/missing-symbol-3.c
@@ -0,0 +1,50 @@
+/* { dg-options "-fdiagnostics-show-caret" } */
+
+/* A sequence of bogus _Static_assert.
+   We can offer fix-it hints for some of these, but not all.  */
+
+void test_static_assert_1 (void)
+{
+  _Static_assert sizeof(int) >= sizeof(char); /* { dg-error "expected '\\(' before 'sizeof'" } */
+  /* { dg-begin-multiline-output "" }
+   _Static_assert sizeof(int) >= sizeof(char);
+                  ^~~~~~
+                  (
+     { dg-end-multiline-output "" } */
+}
+
+void test_static_assert_2 (void)
+{
+  _Static_assert(sizeof(int) >= sizeof(char); /* { dg-error "expected ',' before ';' token" } */
+  /* { dg-begin-multiline-output "" }
+   _Static_assert(sizeof(int) >= sizeof(char);
+                                             ^
+                                             ,
+     { dg-end-multiline-output "" } */
+}
+
+void test_static_assert_3 (void)
+{
+  _Static_assert(sizeof(int) >= sizeof(char),; /* { dg-error "expected string literal before ';' token" } */
+  /* { dg-begin-multiline-output "" }
+   _Static_assert(sizeof(int) >= sizeof(char),;
+                                              ^
+     { dg-end-multiline-output "" } */
+}
+
+void test_static_assert_4 (void)
+{
+  _Static_assert(sizeof(int) >= sizeof(char), "msg"; /* { dg-error "expected '\\)' before ';' token" } */
+  /* { dg-begin-multiline-output "" }
+   _Static_assert(sizeof(int) >= sizeof(char), "msg";
+                 ~                                  ^
+                                                    )
+     { dg-end-multiline-output "" } */
+}
+
+/* The final one is correct.  */
+
+void test_static_assert_5 (void)
+{
+  _Static_assert(sizeof(int) >= sizeof(char), "msg");
+}
diff --git a/gcc/testsuite/gcc.dg/noncompile/940112-1.c b/gcc/testsuite/gcc.dg/noncompile/940112-1.c
index bb5e0f66c85..0a9e07dcaf9 100644
--- a/gcc/testsuite/gcc.dg/noncompile/940112-1.c
+++ b/gcc/testsuite/gcc.dg/noncompile/940112-1.c
@@ -3,5 +3,5 @@ f (int x)
 {
   double e = 1;
   e = 1;
-  return (e)
-}	/* { dg-error "parse error|syntax error|expected" } */
+  return (e) /* { dg-error "parse error|syntax error|expected" } */
+}	
diff --git a/gcc/testsuite/gcc.dg/noncompile/971104-1.c b/gcc/testsuite/gcc.dg/noncompile/971104-1.c
index 39e00c60fc2..4a04dad7747 100644
--- a/gcc/testsuite/gcc.dg/noncompile/971104-1.c
+++ b/gcc/testsuite/gcc.dg/noncompile/971104-1.c
@@ -27,6 +27,6 @@ static void up(int sem){
     printf("%s had processes sleeping on it!\n",
     ({ "MUTEX     ", "BARB_SEM 1", "BARB_SEM 2", "CUST_SEM 1",
        "CUST_SEM 2", "WAIT_SEM 1", "WAIT_SEM 2", "WAIT_SEM 3",
-       "WAIT_SEM 4"}	 /* { dg-error "parse error|syntax error|expected" } */
-	[( sb.sem_num )]) ); /* { dg-error "expected" } */
+       "WAIT_SEM 4"}	 /* { dg-error "expected" } */
+	[( sb.sem_num )]) );
 }
diff --git a/gcc/testsuite/gcc.dg/plugin/plugin.exp b/gcc/testsuite/gcc.dg/plugin/plugin.exp
index c7a3b4dbf2f..8859138113a 100644
--- a/gcc/testsuite/gcc.dg/plugin/plugin.exp
+++ b/gcc/testsuite/gcc.dg/plugin/plugin.exp
@@ -62,6 +62,13 @@ set plugin_test_list [list \
     { start_unit_plugin.c start_unit-test-1.c } \
     { finish_unit_plugin.c finish_unit-test-1.c } \
     { wide-int_plugin.c wide-int-test-1.c } \
+    { poly-int-01_plugin.c poly-int-test-1.c } \
+    { poly-int-02_plugin.c poly-int-test-1.c } \
+    { poly-int-03_plugin.c poly-int-test-1.c } \
+    { poly-int-04_plugin.c poly-int-test-1.c } \
+    { poly-int-05_plugin.c poly-int-test-1.c } \
+    { poly-int-06_plugin.c poly-int-test-1.c } \
+    { poly-int-07_plugin.c poly-int-test-1.c } \
     { diagnostic_plugin_test_show_locus.c \
 	  diagnostic-test-show-locus-bw.c \
 	  diagnostic-test-show-locus-color.c \
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c
new file mode 100644
index 00000000000..099c9d94c42
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-01_plugin.c
@@ -0,0 +1,21 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_helper ();
+  test_poly_coeff_traits ();
+  test_nonpoly ();
+  test_endpoint_representable ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c
new file mode 100644
index 00000000000..bf103acba8b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-02_plugin.c
@@ -0,0 +1,18 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_num_coeffs_core<1> ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c
new file mode 100644
index 00000000000..0c08ead8b75
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-03_plugin.c
@@ -0,0 +1,18 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_num_coeffs_extra<1> ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c
new file mode 100644
index 00000000000..8b0a5f91fb4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-04_plugin.c
@@ -0,0 +1,18 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_num_coeffs_core<2> ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c
new file mode 100644
index 00000000000..62493118fe4
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-05_plugin.c
@@ -0,0 +1,18 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_num_coeffs_extra<2> ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c
new file mode 100644
index 00000000000..ee4308c26bf
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-06_plugin.c
@@ -0,0 +1,26 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O0" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_signed_2<int> ();
+  test_signed_2<HOST_WIDE_INT> ();
+  test_signed_2<offset_int> ();
+  test_signed_2<widest_int> ();
+
+  test_ordered_2<unsigned short> ();
+  test_ordered_2<unsigned int> ();
+  test_ordered_2<unsigned HOST_WIDE_INT> ();
+
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c b/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c
new file mode 100644
index 00000000000..e3203d9f3e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-07_plugin.c
@@ -0,0 +1,18 @@
+/* Not worth spending time optimizing this.  */
+/* { dg-options "-O" } */
+
+#include "config.h"
+#include "gcc-plugin.h"
+#include "system.h"
+#include "coretypes.h"
+#include "poly-int-tests.h"
+
+int plugin_is_GPL_compatible;
+
+int
+plugin_init (struct plugin_name_args *plugin_info,
+	     struct plugin_gcc_version *version)
+{
+  test_num_coeffs_core<3> ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c b/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c
new file mode 100644
index 00000000000..fe284d59433
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-test-1.c
@@ -0,0 +1,8 @@
+/* { dg-do compile } */
+/* { dg-options "-O" } */
+
+int
+main (int argc, char **argv)
+{
+  return 0;
+}
diff --git a/gcc/poly-int.cc b/gcc/testsuite/gcc.dg/plugin/poly-int-tests.h
index 6252e01816f..9409ec7bc0f 100644
--- a/gcc/poly-int.cc
+++ b/gcc/testsuite/gcc.dg/plugin/poly-int-tests.h
@@ -1,50 +1,14 @@
-/* Polynomial integer classes.
-   Copyright (C) 2017 Free Software Foundation, Inc.
+/* This file contains templated tests that are then instantiated in
+   multiple plugin tests, in order to reduce the size of each test.  */
 
-This file is part of GCC.
-
-GCC is free software; you can redistribute it and/or modify it
-under the terms of the GNU General Public License as published by the
-Free Software Foundation; either version 3, or (at your option) any
-later version.
-
-GCC is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License
-for more details.
-
-You should have received a copy of the GNU General Public License
-along with GCC; see the file COPYING3.  If not see
-<http://www.gnu.org/licenses/>.  */
-
-#include "config.h"
-#include "system.h"
-#include "coretypes.h"
-#include "selftest.h"
-
-#if CHECKING_P
-
-#if 1
-#undef ASSERT_FALSE
 #define ASSERT_FALSE(X) gcc_assert (!(X))
-
-#undef ASSERT_TRUE
 #define ASSERT_TRUE(X) gcc_assert (X)
-
-#undef ASSERT_EQ
 #define ASSERT_EQ(X, Y) gcc_assert ((X) == (Y))
-
-#undef ASSERT_MUST_EQ
 #define ASSERT_MUST_EQ(X, Y) gcc_assert (must_eq (X, Y))
-
-#undef ASSERT_MAY_NE
 #define ASSERT_MAY_NE(X, Y) gcc_assert (may_ne (X, Y))
-#endif
 
-namespace selftest {
-
-/* make (X) converts int X into T, using an arbitrary precision for
-   wide_int.  It passes other types of X through as-is.  */
+/* make (X) converts an X of type int into T, using an arbitrary
+   precision for wide_int.  It passes other types of X through as-is.  */
 template<typename T>
 struct coeff_helper
 {
@@ -72,7 +36,7 @@ struct poly_helper
 template<typename T>
 template<typename T1, typename T2, typename T3>
 inline T
-poly_helper <T>::make (const T1 &a, const T2 &b, const T3 &c)
+poly_helper<T>::make (const T1 &a, const T2 &b, const T3 &c)
 {
   T res;
   res = coeff_helper<C>::make (a);
@@ -128,6 +92,10 @@ test_poly_coeff_traits ()
   ASSERT_EQ (poly_coeff_traits<unsigned HOST_WIDE_INT>::signedness, 0);
   ASSERT_EQ (poly_coeff_traits<unsigned HOST_WIDE_INT>::max_value,
 	     HOST_WIDE_INT_M1U);
+
+  ASSERT_EQ (poly_coeff_traits<wide_int>::signedness, -1);
+  ASSERT_EQ (poly_coeff_traits<offset_int>::signedness, 1);
+  ASSERT_EQ (poly_coeff_traits<widest_int>::signedness, 1);
 }
 
 /* Test poly_int_traits.  */
@@ -138,16 +106,16 @@ test_poly_int_traits ()
 {
   /* Check the properties of poly_int_traits<C>.  */
   ASSERT_FALSE (poly_int_traits<C>::is_poly);
-  ASSERT_EQ (1, poly_int_traits<C>::num_coeffs);
+  ASSERT_EQ (poly_int_traits<C>::num_coeffs, 1);
   ASSERT_EQ ((C *) 0 - (typename poly_int_traits<C>::coeff_type *) 0, 0);
 
   /* Check the properties of poly_int_traits<T>.  */
   ASSERT_TRUE (poly_int_traits<T>::is_poly);
-  ASSERT_EQ (N, poly_int_traits<T>::num_coeffs);
+  ASSERT_EQ (poly_int_traits<T>::num_coeffs, N);
   ASSERT_EQ ((C *) 0 - (typename poly_int_traits<T>::coeff_type *) 0, 0);
 }
 
-/* Test handling of constants.  */
+/* Test the handling of constants.  */
 
 template<unsigned int N, typename C, typename T>
 static void
@@ -157,7 +125,7 @@ test_constants ()
   T zero, one, two;
   poly_int<N, unsigned char> two_uc = 2;
 
-  /* Test operator= on C.  */
+  /* Test operator = on C.  */
   zero = ch::make (0);
   one = ch::make (1);
   two = ch::make (2);
@@ -298,7 +266,6 @@ test_to_constant ()
 {
   typedef poly_helper<T> ph;
 
-  /* Test to_constant.  */
   ASSERT_TRUE (ph::make (1, 0, 0).to_constant () == 1);
   ASSERT_TRUE (ph::make (111, 0, 0).to_constant () == 111);
 }
@@ -397,7 +364,7 @@ test_multiplication ()
 		  ph::make (777, 666, 555));
 }
 
-/* Test shift left.  */
+/* Test shift left, both via operators and wi::.  */
 
 template<unsigned int N, typename C, typename T>
 static void
@@ -405,8 +372,13 @@ test_shift_left ()
 {
   typedef poly_helper<T> ph;
 
+  /* Test <<.  */
   ASSERT_MUST_EQ (ph::make (1, 20, 300) << 4,
 		  ph::make (16, 320, 4800));
+
+  /* Test wi::lshift.  */
+  ASSERT_MUST_EQ (wi::lshift (ph::make (9, 15, 50), 3),
+		  ph::make (72, 120, 400));
 }
 
 /* Test may_ne.  */
@@ -554,7 +526,7 @@ test_can_align_up ()
   typedef coeff_helper<C> ch;
   typedef poly_helper<T> ph;
 
-  T aligned;  
+  T aligned;
   ASSERT_TRUE (can_align_up (ph::make (41, 32, 16), 16, &aligned));
   ASSERT_MUST_EQ (aligned, ph::make (48, 32, 16));
   ASSERT_EQ (can_align_up (ph::make (15, 64, 8), 16, &aligned), N <= 2);
@@ -861,6 +833,114 @@ test_can_ior_p ()
     ASSERT_MUST_EQ (ior, ch::make (0xc5));
 }
 
+/* Test may_eq for poly_int<2, C>.  */
+
+template<typename C>
+static void
+test_may_eq_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  /* Test may_eq (T, C).  */
+  ASSERT_TRUE (may_eq (T (1, 4), 41));
+  ASSERT_FALSE (may_eq (T (1, 4), 42));
+  ASSERT_FALSE (may_eq (T (1, 4), 40));
+  ASSERT_TRUE (may_eq (T (1, 4), 1));
+  ASSERT_FALSE (may_eq (T (1, 4), 0));
+  ASSERT_FALSE (may_eq (T (1, 4), 2));
+
+  /* Test may_eq (C, T).  */
+  ASSERT_TRUE (may_eq (20, T (5, 3)));
+  ASSERT_FALSE (may_eq (21, T (5, 3)));
+  ASSERT_FALSE (may_eq (19, T (5, 3)));
+  ASSERT_TRUE (may_eq (5, T (5, 3)));
+  ASSERT_FALSE (may_eq (2, T (5, 3)));
+  ASSERT_FALSE (may_eq (6, T (5, 3)));
+
+  /* Test may_eq (T, T).  */
+  ASSERT_TRUE (may_eq (T (2, 5), T (22, 3)));
+  ASSERT_FALSE (may_eq (T (3, 5), T (22, 3)));
+  ASSERT_FALSE (may_eq (T (2, 5), T (23, 3)));
+  ASSERT_FALSE (may_eq (T (2, 5), T (3, 5)));
+  ASSERT_TRUE (may_eq (T (10, 3), T (19, 0)));
+  ASSERT_FALSE (may_eq (T (10, 3), T (20, 0)));
+  ASSERT_TRUE (may_eq (T (10, 0), T (4, 2)));
+  ASSERT_FALSE (may_eq (T (11, 0), T (4, 2)));
+}
+
+/* Test must_ne for poly_int<2, C>.  */
+
+template<typename C>
+static void
+test_must_ne_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  /* Test must_ne (T, C).  */
+  ASSERT_FALSE (must_ne (T (1, 4), 41));
+  ASSERT_TRUE (must_ne (T (1, 4), 42));
+  ASSERT_TRUE (must_ne (T (1, 4), 40));
+  ASSERT_FALSE (must_ne (T (1, 4), 1));
+  ASSERT_TRUE (must_ne (T (1, 4), 0));
+  ASSERT_TRUE (must_ne (T (1, 4), 2));
+
+  /* Test must_ne (C, T).  */
+  ASSERT_FALSE (must_ne (20, T (5, 3)));
+  ASSERT_TRUE (must_ne (21, T (5, 3)));
+  ASSERT_TRUE (must_ne (19, T (5, 3)));
+  ASSERT_FALSE (must_ne (5, T (5, 3)));
+  ASSERT_TRUE (must_ne (2, T (5, 3)));
+  ASSERT_TRUE (must_ne (6, T (5, 3)));
+
+  /* Test must_ne (T, T).  */
+  ASSERT_FALSE (must_ne (T (2, 5), T (22, 3)));
+  ASSERT_TRUE (must_ne (T (3, 5), T (22, 3)));
+  ASSERT_TRUE (must_ne (T (2, 5), T (23, 3)));
+  ASSERT_TRUE (must_ne (T (2, 5), T (3, 5)));
+  ASSERT_FALSE (must_ne (T (10, 3), T (19, 0)));
+  ASSERT_TRUE (must_ne (T (10, 3), T (20, 0)));
+  ASSERT_FALSE (must_ne (T (10, 0), T (4, 2)));
+  ASSERT_TRUE (must_ne (T (11, 0), T (4, 2)));
+}
+
+/* Test maybe_zero for poly_int<2, C>.  */
+
+template<typename C>
+static void
+test_maybe_zero_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  ASSERT_TRUE (maybe_zero (T (0, 0)));
+  ASSERT_TRUE (maybe_zero (T (0, 1)));
+  ASSERT_TRUE (maybe_zero (T (0, -1)));
+  ASSERT_FALSE (maybe_zero (T (1, 0)));
+  ASSERT_FALSE (maybe_zero (T (1, 2)));
+  ASSERT_FALSE (maybe_zero (T (1, -2)));
+  ASSERT_FALSE (maybe_zero (T (-1, 0)));
+  ASSERT_FALSE (maybe_zero (T (-1, 2)));
+  ASSERT_FALSE (maybe_zero (T (-1, -2)));
+}
+
+/* Test known_nonzero for poly_int<2, C>.  */
+
+template<typename C>
+static void
+test_known_nonzero_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  ASSERT_FALSE (known_nonzero (T (0, 0)));
+  ASSERT_FALSE (known_nonzero (T (0, 1)));
+  ASSERT_FALSE (known_nonzero (T (0, -1)));
+  ASSERT_TRUE (known_nonzero (T (1, 0)));
+  ASSERT_TRUE (known_nonzero (T (1, 2)));
+  ASSERT_TRUE (known_nonzero (T (1, -2)));
+  ASSERT_TRUE (known_nonzero (T (-1, 0)));
+  ASSERT_TRUE (known_nonzero (T (-1, 2)));
+  ASSERT_TRUE (known_nonzero (T (-1, -2)));
+}
+
 /* Test may_le for both signed and unsigned C.  */
 
 template<unsigned int N, typename C, typename T>
@@ -1338,14 +1418,13 @@ test_constant_lower_bound ()
 {
   typedef poly_helper<T> ph;
 
-  /* Test constant_lower_bound.  */
   ASSERT_EQ (constant_lower_bound (ph::make (4, 1, 2)), 4);
   ASSERT_EQ (constant_lower_bound (ph::make (5, 0, 1)), 5);
   ASSERT_EQ (constant_lower_bound (ph::make (6, 1, 0)), 6);
   ASSERT_EQ (constant_lower_bound (ph::make (7, 0, 0)), 7);
 }
 
-/* Test ordered_p for both signed and unsigned C.  */
+/* Test lower_bound for both signed and unsigned C.  */
 
 template<unsigned int N, typename C, typename T>
 static void
@@ -1409,7 +1488,6 @@ test_compare_sizes_for_sort ()
 {
   typedef poly_helper<T> ph;
 
-  /* Test compare_sizes_for_sort.  */
   ASSERT_EQ (compare_sizes_for_sort (ph::make (5, 10, 8),
 				     ph::make (7, 9, 11)),
 	     N == 2 ? 1 : -1);
@@ -1439,7 +1517,6 @@ test_force_align_up_and_div ()
   typedef coeff_helper<C> ch;
   typedef poly_helper<T> ph;
 
-  /* Test force_align_up_and_div.  */
   ASSERT_MUST_EQ (force_align_up_and_div (ph::make (41, 32, 16), 16),
 		  ph::make (3, 2, 1));
   ASSERT_MUST_EQ (force_align_up_and_div (ph::make (-39, -64, -32), 32),
@@ -1468,7 +1545,6 @@ test_force_align_down_and_div ()
   typedef coeff_helper<C> ch;
   typedef poly_helper<T> ph;
 
-  /* Test force_align_down_and_div.  */
   ASSERT_MUST_EQ (force_align_down_and_div (ph::make (41, 32, 16), 16),
 		  ph::make (2, 2, 1));
   ASSERT_MUST_EQ (force_align_down_and_div (ph::make (-39, -64, -32), 32),
@@ -2159,6 +2235,8 @@ test_can_div_away_from_zero_p ()
   ASSERT_EQ (const_quot, C (0));
 }
 
+/* Test maybe_in_range_p for both signed and unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_maybe_in_range_p ()
@@ -2211,8 +2289,16 @@ test_maybe_in_range_p ()
   ASSERT_TRUE (maybe_in_range_p (ph::make (6, 100, 1000),
 				 ph::make (5, 10, 11),
 				 ph::make (2, 1, 2)));
+  ASSERT_FALSE (maybe_in_range_p (ph::make (6, 8, 2),
+				  ph::make (6, 8, 2),
+				  ch::make (0)));
+  ASSERT_EQ (maybe_in_range_p (ph::make (6, 8, 1),
+			       ph::make (6, 7, 2),
+			       ph::make (0, 1, 2)), N == 3);
 }
 
+/* Test known_in_range_p for both signed and unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_known_in_range_p ()
@@ -2250,8 +2336,16 @@ test_known_in_range_p ()
   ASSERT_EQ (known_in_range_p (ph::make (6, 5, 5),
 			       ph::make (5, 1, 2),
 			       ph::make (2, 3, 3)), N == 1);
+  ASSERT_FALSE (known_in_range_p (ph::make (6, 8, 2),
+				  ph::make (6, 8, 2),
+				  ch::make (0)));
+  ASSERT_FALSE (known_in_range_p (ph::make (6, 8, 1),
+				  ph::make (6, 7, 2),
+				  ph::make (0, 1, 2)));
 }
 
+/* Test ranges_may_overlap_p for both signed and unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_ranges_may_overlap_p ()
@@ -2329,6 +2423,8 @@ test_ranges_may_overlap_p ()
 				   ph::make (20, 1, 2)), N >= 2);
 }
 
+/* Test ranges_must_overlap_p for both signed and unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_ranges_must_overlap_p ()
@@ -2406,6 +2502,8 @@ test_ranges_must_overlap_p ()
 				      ph::make (4, 4, 4)));
 }
 
+/* Test known_subrange_p for both signed and unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_known_subrange_p ()
@@ -2455,6 +2553,126 @@ test_known_subrange_p ()
 			       ph::make (6, 6, 8)), N == 1);
 }
 
+/* Test coeffs_in_range_p for both signed and unsigned C.  */
+
+template<unsigned int N, typename C, typename T>
+static void
+test_coeffs_in_range_p (void)
+{
+  typedef coeff_helper<C> ch;
+  typedef poly_helper<T> ph;
+
+  ASSERT_TRUE (coeffs_in_range_p (ph::make (10, 20, 30), 10, 30));
+  ASSERT_EQ (coeffs_in_range_p (ph::make (1, 10, 19), 0, 11), N <= 2);
+  ASSERT_EQ (coeffs_in_range_p (ph::make (100, 1, 102), 10, 100), N == 1);
+  ASSERT_FALSE (coeffs_in_range_p (ph::make (10, 11, 12), 7, 9));
+  ASSERT_FALSE (coeffs_in_range_p (ph::make (10, 11, 12), 13, 15));
+}
+
+/* Test may_eq for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_signed_may_eq_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  /* Test may_eq (T, C).  */
+  ASSERT_TRUE (may_eq (T (4, -4), 0));
+  ASSERT_FALSE (may_eq (T (4, -4), 1));
+  ASSERT_TRUE (may_eq (T (4, -4), 4));
+  ASSERT_FALSE (may_eq (T (4, -4), 8));
+  ASSERT_TRUE (may_eq (T (4, -4), -4));
+  ASSERT_FALSE (may_eq (T (4, -4), -3));
+
+  /* Test may_eq (C, T).  */
+  ASSERT_FALSE (may_eq (0, T (4, -3)));
+  ASSERT_TRUE (may_eq (1, T (4, -3)));
+  ASSERT_TRUE (may_eq (4, T (4, -3)));
+  ASSERT_FALSE (may_eq (7, T (4, -3)));
+  ASSERT_FALSE (may_eq (T (4, -3), -3));
+  ASSERT_TRUE (may_eq (T (4, -3), -2));
+
+  /* Test may_eq (T, T).  */
+  ASSERT_TRUE (may_eq (T (0, 3), T (6, 1)));
+  ASSERT_FALSE (may_eq (T (0, -3), T (6, 1)));
+  ASSERT_FALSE (may_eq (T (0, 3), T (7, 1)));
+  ASSERT_TRUE (may_eq (T (-3, 4), T (7, -1)));
+  ASSERT_FALSE (may_eq (T (-3, 4), T (6, -1)));
+}
+
+/* Test must_ne for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_signed_must_ne_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  /* Test must_ne (T, C).  */
+  ASSERT_FALSE (must_ne (T (4, -4), 0));
+  ASSERT_TRUE (must_ne (T (4, -4), 1));
+  ASSERT_FALSE (must_ne (T (4, -4), 4));
+  ASSERT_TRUE (must_ne (T (4, -4), 8));
+  ASSERT_FALSE (must_ne (T (4, -4), -4));
+  ASSERT_TRUE (must_ne (T (4, -4), -3));
+
+  /* Test must_ne (C, T).  */
+  ASSERT_TRUE (must_ne (0, T (4, -3)));
+  ASSERT_FALSE (must_ne (1, T (4, -3)));
+  ASSERT_FALSE (must_ne (4, T (4, -3)));
+  ASSERT_TRUE (must_ne (7, T (4, -3)));
+  ASSERT_TRUE (must_ne (T (4, -3), -3));
+  ASSERT_FALSE (must_ne (T (4, -3), -2));
+
+  /* Test must_ne (T, T).  */
+  ASSERT_FALSE (must_ne (T (0, 3), T (6, 1)));
+  ASSERT_TRUE (must_ne (T (0, -3), T (6, 1)));
+  ASSERT_TRUE (must_ne (T (0, 3), T (7, 1)));
+  ASSERT_FALSE (must_ne (T (-3, 4), T (7, -1)));
+  ASSERT_TRUE (must_ne (T (-3, 4), T (6, -1)));
+}
+
+/* Test maybe_zero for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_signed_maybe_zero_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  ASSERT_TRUE (maybe_zero (T (3, -3)));
+  ASSERT_TRUE (maybe_zero (T (16, -4)));
+  ASSERT_TRUE (maybe_zero (T (-15, 5)));
+  ASSERT_FALSE (maybe_zero (T (3, -4)));
+  ASSERT_FALSE (maybe_zero (T (3, -6)));
+  ASSERT_FALSE (maybe_zero (T (15, -4)));
+  ASSERT_FALSE (maybe_zero (T (17, -4)));
+  ASSERT_FALSE (maybe_zero (T (-14, 5)));
+  ASSERT_FALSE (maybe_zero (T (-16, 5)));
+}
+
+/* Test known_nonzero for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_signed_known_nonzero_2 ()
+{
+  typedef poly_int<2, C> T;
+
+  ASSERT_FALSE (known_nonzero (T (3, -3)));
+  ASSERT_FALSE (known_nonzero (T (16, -4)));
+  ASSERT_FALSE (known_nonzero (T (-15, 5)));
+  ASSERT_TRUE (known_nonzero (T (3, -4)));
+  ASSERT_TRUE (known_nonzero (T (3, -6)));
+  ASSERT_TRUE (known_nonzero (T (15, -4)));
+  ASSERT_TRUE (known_nonzero (T (17, -4)));
+  ASSERT_TRUE (known_nonzero (T (-14, 5)));
+  ASSERT_TRUE (known_nonzero (T (-16, 5)));
+}
+
+/* Test negation for signed C, both via operators and wi::.  */
+
 template<unsigned int N, typename C, typename RC, typename T>
 static void
 test_signed_negation ()
@@ -2485,12 +2703,12 @@ test_signed_may_le ()
   ASSERT_EQ (may_le (ph::make (3, 5, -1), ch::make (2)), N == 3);
   ASSERT_EQ (may_le (ph::make (40, -10, 60), ch::make (15)), N >= 2);
   ASSERT_TRUE (may_le (ph::make (-14, 0, 0), ch::make (13)));
-  
+
   /* Test may_le (C, T).  */
   ASSERT_EQ (may_le (ch::make (4), ph::make (3, 5, -1)), N >= 2);
   ASSERT_EQ (may_le (ch::make (41), ph::make (40, -10, 60)), N == 3);
   ASSERT_TRUE (may_le (ch::make (-15), ph::make (11, 0, 0)));
-  
+
   /* Test may_le (T, T).  */
   ASSERT_EQ (may_le (ph::make (-2, 4, -2),
 		     ph::make (-3, -5, -1)), N == 3);
@@ -2520,7 +2738,7 @@ test_signed_may_lt ()
   ASSERT_EQ (may_lt (ch::make (41), ph::make (40, -10, 60)), N == 3);
   ASSERT_TRUE (may_lt (ch::make (-45), ph::make (40, 0, 0)));
   ASSERT_FALSE (may_lt (ch::make (-2), ph::make (-2, -2, -2)));
-  
+
   /* Test may_lt (T, T).  */
   ASSERT_EQ (may_lt (ph::make (-3, 4, -2),
 		     ph::make (-3, -5, -1)), N == 3);
@@ -2548,7 +2766,7 @@ test_signed_may_ge ()
   ASSERT_EQ (may_ge (ch::make (2), ph::make (3, 5, -1)), N == 3);
   ASSERT_EQ (may_ge (ch::make (15), ph::make (40, -10, 60)), N >= 2);
   ASSERT_TRUE (may_ge (ch::make (13), ph::make (-14, 0, 0)));
-  
+
   /* Test may_ge (T, T).  */
   ASSERT_EQ (may_ge (ph::make (-3, -5, -1),
 		     ph::make (-2, 4, -2)), N == 3);
@@ -2572,7 +2790,7 @@ test_signed_may_gt ()
   ASSERT_EQ (may_gt (ph::make (40, -10, 60), ch::make (41)), N == 3);
   ASSERT_TRUE (may_gt (ph::make (40, 0, 0), ch::make (-45)));
   ASSERT_FALSE (may_gt (ph::make (-2, -2, -2), ch::make (-2)));
-  
+
   /* Test may_gt (C, T).  */
   ASSERT_EQ (may_gt (ch::make (2), ph::make (3, 5, -1)), N == 3);
   ASSERT_EQ (may_gt (ch::make (15), ph::make (40, -10, 60)), N >= 2);
@@ -2601,12 +2819,12 @@ test_signed_must_gt ()
   ASSERT_EQ (must_gt (ph::make (3, 5, -1), ch::make (2)), N <= 2);
   ASSERT_EQ (must_gt (ph::make (40, -10, 60), ch::make (15)), N == 1);
   ASSERT_FALSE (must_gt (ph::make (-14, 0, 0), ch::make (13)));
-  
+
   /* Test must_gt (C, T).  */
   ASSERT_EQ (must_gt (ch::make (4), ph::make (3, 5, -1)), N == 1);
   ASSERT_EQ (must_gt (ch::make (41), ph::make (40, -10, 60)), N <= 2);
   ASSERT_FALSE (must_gt (ch::make (-15), ph::make (11, 0, 0)));
-  
+
   /* Test must_gt (T, T).  */
   ASSERT_EQ (must_gt (ph::make (-2, 4, -2),
 		      ph::make (-3, -5, -1)), N <= 2);
@@ -2636,7 +2854,7 @@ test_signed_must_ge ()
   ASSERT_EQ (must_ge (ch::make (41), ph::make (40, -10, 60)), N <= 2);
   ASSERT_FALSE (must_ge (ch::make (-45), ph::make (40, 0, 0)));
   ASSERT_TRUE (must_ge (ch::make (-2), ph::make (-2, -2, -2)));
-  
+
   /* Test must_ge (T, T).  */
   ASSERT_EQ (must_ge (ph::make (-3, 4, -2),
 		      ph::make (-3, -5, -1)), N <= 2);
@@ -2664,7 +2882,7 @@ test_signed_must_lt ()
   ASSERT_EQ (must_lt (ch::make (2), ph::make (3, 5, -1)), N <= 2);
   ASSERT_EQ (must_lt (ch::make (15), ph::make (40, -10, 60)), N == 1);
   ASSERT_FALSE (must_lt (ch::make (13), ph::make (-14, 0, 0)));
-  
+
   /* Test must_lt (T, T).  */
   ASSERT_EQ (must_lt (ph::make (-3, -5, -1),
 		      ph::make (-2, 4, -2)), N <= 2);
@@ -2688,7 +2906,7 @@ test_signed_must_le ()
   ASSERT_EQ (must_le (ph::make (40, -10, 60), ch::make (41)), N <= 2);
   ASSERT_FALSE (must_le (ph::make (40, 0, 0), ch::make (-45)));
   ASSERT_TRUE (must_le (ph::make (-2, -2, -2), ch::make (-2)));
-  
+
   /* Test must_le (C, T).  */
   ASSERT_EQ (must_le (ch::make (2), ph::make (3, 5, -1)), N <= 2);
   ASSERT_EQ (must_le (ch::make (15), ph::make (40, -10, 60)), N == 1);
@@ -2704,7 +2922,7 @@ test_signed_must_le ()
 			ph::make (-3, 5, 1)));
 }
 
-/* Test may_le for signed C.  */
+/* Test ordered_p for signed C.  */
 
 template<unsigned int N, typename C, typename T>
 static void
@@ -3227,6 +3445,8 @@ test_signed_can_div_away_from_zero_p ()
   ASSERT_EQ (const_quot, -3);
 }
 
+/* Test maybe_in_range_p for signed C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_signed_maybe_in_range_p ()
@@ -3255,12 +3475,12 @@ test_unsigned_may_le ()
   ASSERT_FALSE (may_le (ph::make (3, 5, -1), ch::make (2)));
   ASSERT_FALSE (may_le (ph::make (40, -10, 60), ch::make (15)));
   ASSERT_FALSE (may_le (ph::make (-14, 0, 0), ch::make (13)));
-  
+
   /* Test may_le (C, T).  */
   ASSERT_EQ (may_le (ch::make (4), ph::make (3, 5, -1)), N >= 2);
   ASSERT_EQ (may_le (ch::make (41), ph::make (40, -10, 60)), N >= 2);
   ASSERT_FALSE (may_le (ch::make (-15), ph::make (11, 0, 0)));
-  
+
   /* Test may_le (T, T).  */
   ASSERT_EQ (may_le (ph::make (-2, 4, -2),
 		     ph::make (-3, -5, -1)), N >= 2);
@@ -3290,7 +3510,7 @@ test_unsigned_may_lt ()
   ASSERT_EQ (may_lt (ch::make (41), ph::make (40, -10, 60)), N >= 2);
   ASSERT_FALSE (may_lt (ch::make (-45), ph::make (40, 0, 0)));
   ASSERT_EQ (may_lt (ch::make (-2), ph::make (-2, -2, -2)), N >= 2);
-  
+
   /* Test may_lt (T, T).  */
   ASSERT_EQ (may_lt (ph::make (-3, 4, -2),
 		     ph::make (-3, -5, -1)), N >= 2);
@@ -3318,7 +3538,7 @@ test_unsigned_may_ge ()
   ASSERT_FALSE (may_ge (ch::make (2), ph::make (3, 5, -1)));
   ASSERT_FALSE (may_ge (ch::make (15), ph::make (40, -10, 60)));
   ASSERT_FALSE (may_ge (ch::make (13), ph::make (-14, 0, 0)));
-  
+
   /* Test may_ge (T, T).  */
   ASSERT_EQ (may_ge (ph::make (-3, -5, -1),
 		     ph::make (-2, 4, -2)), N >= 2);
@@ -3342,7 +3562,7 @@ test_unsigned_may_gt ()
   ASSERT_EQ (may_gt (ph::make (40, -10, 60), ch::make (41)), N >= 2);
   ASSERT_FALSE (may_gt (ph::make (40, 0, 0), ch::make (-45)));
   ASSERT_EQ (may_gt (ph::make (-2, -2, -2), ch::make (-2)), N >= 2);
-  
+
   /* Test may_gt (C, T).  */
   ASSERT_FALSE (may_gt (ch::make (2), ph::make (3, 5, -1)));
   ASSERT_FALSE (may_gt (ch::make (15), ph::make (40, -10, 60)));
@@ -3371,12 +3591,12 @@ test_unsigned_must_gt ()
   ASSERT_TRUE (must_gt (ph::make (3, 5, -1), ch::make (2)));
   ASSERT_TRUE (must_gt (ph::make (40, -10, 60), ch::make (15)));
   ASSERT_TRUE (must_gt (ph::make (-14, 0, 0), ch::make (13)));
-  
+
   /* Test must_gt (C, T).  */
   ASSERT_EQ (must_gt (ch::make (4), ph::make (3, 5, -1)), N == 1);
   ASSERT_EQ (must_gt (ch::make (41), ph::make (40, -10, 60)), N == 1);
   ASSERT_TRUE (must_gt (ch::make (-15), ph::make (11, 0, 0)));
-  
+
   /* Test must_gt (T, T).  */
   ASSERT_EQ (must_gt (ph::make (-2, 4, -2),
 		      ph::make (-3, -5, -1)), N == 1);
@@ -3406,7 +3626,7 @@ test_unsigned_must_ge ()
   ASSERT_EQ (must_ge (ch::make (41), ph::make (40, -10, 60)), N == 1);
   ASSERT_TRUE (must_ge (ch::make (-45), ph::make (40, 0, 0)));
   ASSERT_EQ (must_ge (ch::make (-2), ph::make (-2, -2, -2)), N == 1);
-  
+
   /* Test must_ge (T, T).  */
   ASSERT_EQ (must_ge (ph::make (-3, 4, -2),
 		      ph::make (-3, -5, -1)), N == 1);
@@ -3434,7 +3654,7 @@ test_unsigned_must_lt ()
   ASSERT_TRUE (must_lt (ch::make (2), ph::make (3, 5, -1)));
   ASSERT_TRUE (must_lt (ch::make (15), ph::make (40, -10, 60)));
   ASSERT_TRUE (must_lt (ch::make (13), ph::make (-14, 0, 0)));
-  
+
   /* Test must_lt (T, T).  */
   ASSERT_EQ (must_lt (ph::make (-3, -5, -1),
 		      ph::make (-2, 4, -2)), N == 1);
@@ -3458,7 +3678,7 @@ test_unsigned_must_le ()
   ASSERT_EQ (must_le (ph::make (40, -10, 60), ch::make (41)), N == 1);
   ASSERT_TRUE (must_le (ph::make (40, 0, 0), ch::make (-45)));
   ASSERT_EQ (must_le (ph::make (-2, -2, -2), ch::make (-2)), N == 1);
-  
+
   /* Test must_le (C, T).  */
   ASSERT_TRUE (must_le (ch::make (2), ph::make (3, 5, -1)));
   ASSERT_TRUE (must_le (ch::make (15), ph::make (40, -10, 60)));
@@ -3612,6 +3832,8 @@ test_unsigned_upper_bound ()
 		  ph::make (-11, 5, -14));
 }
 
+/* Test maybe_in_range_p for unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_unsigned_maybe_in_range_p ()
@@ -3648,6 +3870,8 @@ test_unsigned_maybe_in_range_p ()
 				 ch::make (-2)));
 }
 
+/* Test known_in_range_p for unsigned C.  */
+
 template<unsigned int N, typename C, typename T>
 static void
 test_unsigned_known_in_range_p ()
@@ -3764,7 +3988,7 @@ test_to_shwi (const C &srcv, int delta, HOST_WIDE_INT destv)
   typedef poly_helper<T> ph;
   poly_int<N, HOST_WIDE_INT> shwi;
 
-  /* Test in-range T::to_shwi (low end).  */
+  /* Test in-range T::to_shwi.  */
   ASSERT_TRUE (ph::make (srcv,
 			 srcv - delta,
 			 srcv - delta * 2).to_shwi (&shwi));
@@ -3772,7 +3996,7 @@ test_to_shwi (const C &srcv, int delta, HOST_WIDE_INT destv)
 				     destv - delta,
 				     destv - delta * 2));
 
-  /* Test partially in-range T::to_shwi (low end).  */
+  /* Test partially in-range T::to_shwi.  */
   ASSERT_EQ (ph::make (srcv,
 		       srcv + delta,
 		       srcv + delta * 2).to_shwi (&shwi), N == 1);
@@ -3786,7 +4010,7 @@ test_to_shwi (const C &srcv, int delta, HOST_WIDE_INT destv)
 				       destv,
 				       destv /* ignored */));
 
-  /* Test fully out-of-range T::to_shwi (low end).  */
+  /* Test fully out-of-range T::to_shwi.  */
   ASSERT_FALSE (ph::make (srcv + delta, srcv, srcv).to_shwi (&shwi));
 }
 
@@ -3802,7 +4026,7 @@ test_to_uhwi (const C &srcv, int delta, unsigned HOST_WIDE_INT destv)
   typedef poly_helper<T> ph;
   poly_int<N, unsigned HOST_WIDE_INT> uhwi;
 
-  /* Test in-range T::to_uhwi (low end).  */
+  /* Test in-range T::to_uhwi.  */
   ASSERT_TRUE (ph::make (srcv,
 			 srcv - delta,
 			 srcv - delta * 2).to_uhwi (&uhwi));
@@ -3810,7 +4034,7 @@ test_to_uhwi (const C &srcv, int delta, unsigned HOST_WIDE_INT destv)
 				     destv - delta,
 				     destv - delta * 2));
 
-  /* Test partially in-range T::to_uhwi (low end).  */
+  /* Test partially in-range T::to_uhwi.  */
   ASSERT_EQ (ph::make (srcv,
 		       srcv + delta,
 		       srcv + delta * 2).to_uhwi (&uhwi), N == 1);
@@ -3824,7 +4048,7 @@ test_to_uhwi (const C &srcv, int delta, unsigned HOST_WIDE_INT destv)
 				       destv,
 				       destv /* ignored */));
 
-  /* Test fully out-of-range T::to_uhwi (low end).  */
+  /* Test fully out-of-range T::to_uhwi.  */
   ASSERT_FALSE (ph::make (srcv + delta, srcv, srcv).to_uhwi (&uhwi));
 }
 
@@ -3884,7 +4108,7 @@ test_force_hwi (const C &mask66)
 			       HOST_WIDE_INT_M1U >> 2,
 			       HOST_WIDE_INT_M1U >> 3));
 }
-  
+
 /* Test poly_int<N, wide_int>::from.  */
 
 template<unsigned int N>
@@ -3925,7 +4149,7 @@ test_wide_int_from ()
   ASSERT_MUST_EQ (T::from (pu8h::make (0xf8,0x23,0x81), 16, UNSIGNED),
 		  p_00f8_0023_0081);
 }
-  
+
 /* Test wi::sext for poly_int<N, wide_int>.  */
 
 template<unsigned int N>
@@ -4050,7 +4274,7 @@ test_wide_int_add ()
 			    wi::shwi (31, 6)));
   ASSERT_EQ (overflow, N == 3);
 }
-  
+
 /* Test wi::sub for poly_int<N, wide_int>.  */
 
 template<unsigned int N>
@@ -4129,7 +4353,7 @@ test_wide_int_sub ()
 			    wi::shwi (-32, 6)));
   ASSERT_EQ (overflow, N == 3);
 }
-  
+
 /* Test wi::mul for poly_int<N, wide_int>.  */
 
 template<unsigned int N>
@@ -4190,7 +4414,7 @@ test_wide_int_mul ()
 			    wi::shwi (29, 6)));
   ASSERT_EQ (overflow, N == 3);
 }
-  
+
 /* Test wi::neg for poly_int<N, wide_int>.  */
 
 template<unsigned int N>
@@ -4223,8 +4447,9 @@ test_wide_int_neg ()
 			    wi::shwi (-32, 6)));
   ASSERT_EQ (overflow, N == 3);
 }
-  
-/* Test poly_int<N, C>, where C is offset_int or widest_int.  */
+
+/* Test poly_int<N, C> for things that only make sense when C is an
+   offset_int or widest_int.  */
 
 template<unsigned int N, typename C>
 static void
@@ -4266,7 +4491,7 @@ test_type_promotions ()
   typedef poly_helper< poly_int<N, HOST_WIDE_INT> > ps64h;
   HOST_WIDE_INT mask32 = ~0U;
 
-  /* Test that + on unsigned short promote to HOST_WIDE_INT.  */
+  /* Test that + on unsigned short promotes to HOST_WIDE_INT.  */
   ASSERT_MUST_EQ (pu16h::make (0xffff, 0xfffe, 0xfffd) + 16,
 		  ps64h::make (0x1000f, 0xfffe, 0xfffd));
   ASSERT_MUST_EQ (32 + pu16h::make (0xffff, 0xfffe, 0xfffd),
@@ -4275,7 +4500,7 @@ test_type_promotions ()
 		  + pu16h::make (4, 10, 17),
 		  ps64h::make (0x10003, 0x10008, 0x1000e));
 
-  /* Test that - on unsigned short promote to HOST_WIDE_INT.  */
+  /* Test that - on unsigned short promotes to HOST_WIDE_INT.  */
   ASSERT_MUST_EQ (pu16h::make (1, 2, 3) - ~0U,
 		  ps64h::make (-mask32 + 1, 2, 3));
   ASSERT_MUST_EQ (INT_MIN - pu16h::make (4, 5, 6),
@@ -4289,13 +4514,13 @@ test_type_promotions ()
   ASSERT_MAY_NE (-pu16h::make (0x8000, 0x9000, 0xa000),
 		 ps64h::make (0x8000, 0x9000, 0xa000));
 
-  /* Test that * on unsigned short promote to HOST_WIDE_INT.  */
+  /* Test that * on unsigned short promotes to HOST_WIDE_INT.  */
   ASSERT_MUST_EQ (pu16h::make (10, 14, 17) * ~0U,
 		  ps64h::make (10 * mask32, 14 * mask32, 17 * mask32));
   ASSERT_MUST_EQ (-400000 * pu16h::make (10, 14, 17),
 		  ps64h::make (-4000000, -5600000, -6800000));
 
-  /* Test that << on unsigned short promote to HOST_WIDE_INT.  */
+  /* Test that << on unsigned short promotes to HOST_WIDE_INT.  */
   ASSERT_MUST_EQ (pu16h::make (4, 5, 6) << 50,
 		  ps64h::make ((HOST_WIDE_INT) 4 << 50,
 			       (HOST_WIDE_INT) 5 << 50,
@@ -4328,10 +4553,12 @@ test_type_promotions ()
 		  ps64h::make (a - 32, b - 48, c - 64));
 }
 
+/* Test endpoint_representable_p.  */
+
 static void
 test_endpoint_representable (void)
 {
-  /* Unknown size.  */
+  /* True because the size is unknown.  */
   ASSERT_TRUE (endpoint_representable_p ((unsigned char) 0x80,
 					 (unsigned char) 0xff));
   ASSERT_FALSE (endpoint_representable_p ((unsigned char) 0x80,
@@ -4345,7 +4572,7 @@ test_endpoint_representable (void)
   ASSERT_TRUE (endpoint_representable_p ((unsigned char) 0x11,
 					 (unsigned char) 0xee));
 
-  /* Unknown size.  */
+  /* True because the size is unknown.  */
   ASSERT_TRUE (endpoint_representable_p (INT_MAX, -1));
   ASSERT_FALSE (endpoint_representable_p (INT_MAX - 100, INT_MAX));
   ASSERT_FALSE (endpoint_representable_p (INT_MAX - 100, 101));
@@ -4353,13 +4580,15 @@ test_endpoint_representable (void)
   ASSERT_TRUE (endpoint_representable_p (0, INT_MAX));
   ASSERT_TRUE (endpoint_representable_p (INT_MIN, INT_MAX));
 
-  /* Unknown size.  */
+  /* True because the size is unknown.  */
   ASSERT_TRUE (endpoint_representable_p (UINT_MAX, -1U));
   ASSERT_FALSE (endpoint_representable_p (UINT_MAX - 400, UINT_MAX - 1));
   ASSERT_FALSE (endpoint_representable_p (UINT_MAX - 400, 401U));
   ASSERT_TRUE (endpoint_representable_p (UINT_MAX - 400, 400U));
 }
 
+/* Test wi::shwi with N coefficients.  */
+
 template<unsigned int N>
 static void
 test_shwi ()
@@ -4376,6 +4605,8 @@ test_shwi ()
 				  wi::shwi (210, 16)));
 }
 
+/* Test wi::uhwi with N coefficients.  */
+
 template<unsigned int N>
 static void
 test_uhwi ()
@@ -4392,6 +4623,8 @@ test_uhwi ()
 				  wi::uhwi (210, 16)));
 }
 
+/* Test known_zero for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_known_zero ()
@@ -4402,6 +4635,8 @@ test_nonpoly_known_zero ()
   ASSERT_FALSE (known_zero (T (-1)));
 }
 
+/* Test maybe_zero for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_maybe_zero ()
@@ -4412,6 +4647,8 @@ test_nonpoly_maybe_zero ()
   ASSERT_FALSE (maybe_zero (T (-1)));
 }
 
+/* Test known_nonzero for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_known_nonzero ()
@@ -4422,6 +4659,8 @@ test_nonpoly_known_nonzero ()
   ASSERT_TRUE (known_nonzero (T (-1)));
 }
 
+/* Test maybe_nonzero for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_maybe_nonzero ()
@@ -4432,6 +4671,8 @@ test_nonpoly_maybe_nonzero ()
   ASSERT_TRUE (maybe_nonzero (T (-1)));
 }
 
+/* Test known_one for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_known_one ()
@@ -4442,6 +4683,8 @@ test_nonpoly_known_one ()
   ASSERT_FALSE (known_one (T (-1)));
 }
 
+/* Test known_all_ones for non-polynomial T.  */
+
 template<typename T>
 static void
 test_nonpoly_known_all_ones ()
@@ -4452,6 +4695,8 @@ test_nonpoly_known_all_ones ()
   ASSERT_TRUE (known_all_ones (T (-1)));
 }
 
+/* Test poly-int.h operations on non-polynomial type T.  */
+
 template<typename T>
 static void
 test_nonpoly_type ()
@@ -4521,6 +4766,18 @@ test_general ()
   test_can_ior_p<N, C, T> ();
 }
 
+/* Test things that work for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_ordered_2 ()
+{
+  test_may_eq_2<C> ();
+  test_must_ne_2<C> ();
+  test_maybe_zero_2<C> ();
+  test_known_nonzero_2<C> ();
+}
+
 /* Test things that work for poly_int-based types T, given that the
    coefficient type C supports all the normal C operators.  N is the
    number of coefficients in C and RC is the type to which C promotes
@@ -4560,8 +4817,26 @@ test_ordered ()
   test_ranges_may_overlap_p<N, C, T> ();
   test_ranges_must_overlap_p<N, C, T> ();
   test_known_subrange_p<N, C, T> ();
+  test_coeffs_in_range_p<N, C, T> ();
 }
 
+/* Test things that work for poly_int<2, C>, given that C is signed.  */
+
+template<typename C>
+static void
+test_signed_2 ()
+{
+  test_ordered_2<C> ();
+  test_signed_may_eq_2<C> ();
+  test_signed_must_ne_2<C> ();
+  test_signed_maybe_zero_2<C> ();
+  test_signed_known_nonzero_2<C> ();
+}
+
+/* Test things that work for poly_int-based types T, given that the
+   coefficient type C is signed.  N is the number of coefficients in C
+   and RC is the type to which C promotes after an operator.  */
+
 template<unsigned int N, typename C, typename RC, typename T>
 static void
 test_signed ()
@@ -4591,6 +4866,10 @@ test_signed ()
   test_signed_maybe_in_range_p<N, C, T> ();
 }
 
+/* Test things that work for poly_int-based types T, given that the
+   coefficient type C is unsigned.  N is the number of coefficients in C
+   and RC is the type to which C promotes after an operator.  */
+
 template<unsigned int N, typename C, typename RC, typename T>
 static void
 test_unsigned ()
@@ -4613,6 +4892,9 @@ test_unsigned ()
   test_unsigned_known_in_range_p<N, C, T> ();
 }
 
+/* Test things that are specific to coefficients of type wide_int,
+   using a poly_int with N coefficients.  */
+
 template<unsigned int N>
 static void
 test_wide_int ()
@@ -4684,22 +4966,3 @@ test_num_coeffs_extra ()
   test_signed<N, widest_int, widest_int,
 	      poly_int<N, widest_int> > ();
 }
-
-void
-poly_int_cc_tests ()
-{
-  test_helper ();
-  test_poly_coeff_traits ();
-  test_nonpoly ();
-  test_num_coeffs_core<1> ();
-  test_num_coeffs_extra<1> ();
-  test_num_coeffs_core<2> ();
-  test_num_coeffs_extra<2> ();
-  test_num_coeffs_core<3> ();
-  test_endpoint_representable ();
-}
-
-  /* FIXME: may_eq, must_ne, maybe_zero, known_nonzero, constant range ops, mixed constructors.  */
-
-}
-#endif
diff --git a/gcc/testsuite/gcc.dg/pr81854.c b/gcc/testsuite/gcc.dg/pr81854.c
index b8499f8b130..1021a811be4 100644
--- a/gcc/testsuite/gcc.dg/pr81854.c
+++ b/gcc/testsuite/gcc.dg/pr81854.c
@@ -1,6 +1,7 @@
 /* PR c/81854 - weak alias of an incompatible symbol accepted
    { dg-do compile }
-   { dg-require-ifunc "" } */
+   { dg-require-ifunc "" }
+   { dg-options "-Wextra" } */
 
 const char* __attribute__ ((weak, alias ("f0_target")))
 f0 (void);          /* { dg-error "alias between function and variable" } */
@@ -26,39 +27,37 @@ const char* f2_target (int i)   /* { dg-message "aliased declaration here" } */
   return 0;
 }
 
-
 int __attribute__ ((ifunc ("f3_resolver")))
-f3 (void);          /* { dg-error ".ifunc. resolver must return a function pointer" } */
+f3 (void);          /* { dg-message "resolver indirect function declared here" } */
 
-int f3_resolver (void)   /* { dg-message "resolver declaration here" } */
+void* f3_resolver (void) /* { dg-warning "ifunc. resolver for .f3. should return .int \\(\\*\\)\\(void\\)." } */
 {
   return 0;
 }
 
 
 int __attribute__ ((ifunc ("f4_resolver")))
-f4 (void);          /* { dg-warning ".ifunc. resolver should return a function pointer" } */
+f4 (void);          /* { dg-message "resolver indirect function declared here" } */
 
-void* f4_resolver (void) /* { dg-message "resolver declaration here" } */
+typedef void F4 (void);
+F4* f4_resolver (void) /* { dg-warning ".ifunc. resolver for .f4. should return .int \\(\\*\\)\\(void\\)" } */
 {
   return 0;
 }
 
+const char* __attribute__ ((ifunc ("f5_resolver")))
+f5 (void);
 
-int __attribute__ ((ifunc ("f5_resolver")))
-f5 (void);          /* { dg-warning "alias between functions of incompatible types" } */
-
-typedef void F5 (void);
-F5* f5_resolver (void) /* { dg-message "aliased declaration here" } */
+typedef const char* F5 (void);
+F5* f5_resolver (void)
 {
   return 0;
 }
 
-const char* __attribute__ ((ifunc ("f6_resolver")))
-f6 (void);
+int __attribute__ ((ifunc ("f6_resolver")))
+f6 (void);          /* { dg-message "resolver indirect function declared here" } */
 
-typedef const char* F6 (void);
-F6* f6_resolver (void)
+int f6_resolver (void)   /* { dg-error ".ifunc. resolver for 'f6' must return .int \\(\\*\\)\\(void\\)." } */
 {
   return 0;
 }
diff --git a/gcc/testsuite/gcc.dg/pragma-diag-3.c b/gcc/testsuite/gcc.dg/pragma-diag-3.c
index 2ee439d7e33..b6ee60f1677 100644
--- a/gcc/testsuite/gcc.dg/pragma-diag-3.c
+++ b/gcc/testsuite/gcc.dg/pragma-diag-3.c
@@ -15,7 +15,7 @@ void testing2() {
 
 void testing3() {
   int k = 4;
-  k + 4 < k; /* { dg-error "overflow" } */
+  k + 4 < k; /* { dg-error "overflow" "" { xfail *-*-* } } */
 }
 
 int bar()
diff --git a/gcc/testsuite/gcc.dg/predict-13.c b/gcc/testsuite/gcc.dg/predict-13.c
index 7fe714a0d72..385be9e1389 100644
--- a/gcc/testsuite/gcc.dg/predict-13.c
+++ b/gcc/testsuite/gcc.dg/predict-13.c
@@ -21,4 +21,4 @@ int main(int argc, char **argv)
 }
 
 /* { dg-final { scan-tree-dump-times "combined heuristics of edge\[^:\]*: 33.3%" 3 "profile_estimate"} } */
-/* { dg-final { scan-tree-dump-times "combined heuristics of edge\[^:\]*: 0.0%" 2 "profile_estimate"} } */
+/* { dg-final { scan-tree-dump-times "combined heuristics of edge\[^:\]*: 0.1%" 2 "profile_estimate"} } */
diff --git a/gcc/testsuite/gcc.dg/predict-8.c b/gcc/testsuite/gcc.dg/predict-8.c
index e13cc006f3a..fa975b3d95f 100644
--- a/gcc/testsuite/gcc.dg/predict-8.c
+++ b/gcc/testsuite/gcc.dg/predict-8.c
@@ -1,5 +1,5 @@
 /* { dg-do compile { target { i?86-*-* x86_64-*-* } } } */
-/* { dg-options "-O2 -fdump-rtl-expand" } */
+/* { dg-options "-O2 -fdump-rtl-expand-details-blocks" } */
 
 int foo(float a, float b) {
   if (a == b)
@@ -8,4 +8,4 @@ int foo(float a, float b) {
     return 2;
 }
 
-/* { dg-final { scan-rtl-dump-times "REG_BR_PROB 400 " 1 "expand"} } */
+/* { dg-final { scan-rtl-dump-times "99.0. .guessed" 2 "expand"} } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
index f43b64ead62..f4f3a44903c 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-16.c
@@ -16,5 +16,5 @@ void foo (int n)
 
 /* We should not apply loop distribution and not generate a memset (0).  */
 
-/* { dg-final { scan-tree-dump "Loop 1 is the same" "ldist" } } */
+/* { dg-final { scan-tree-dump "Loop 1 not distributed" "ldist" } } */
 /* { dg-final { scan-tree-dump-times "generated memset zero" 0 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c
index 699bf38ab03..c0b95fc38ec 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-25.c
@@ -22,4 +22,4 @@ foo (void)
     }
 }
 
-/* { dg-final { scan-tree-dump "Loop . is the same" "ldist" } } */
+/* { dg-final { scan-tree-dump "Loop . not distributed" "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-28.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-28.c
new file mode 100644
index 00000000000..4420139dedb
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-28.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (1024)
+int arr[M][N];
+
+void
+foo (void)
+{
+  for (unsigned i = 0; i < M; ++i)
+    for (unsigned j = 0; j < N; ++j)
+      arr[i][j] = 0;
+}
+
+/* { dg-final { scan-tree-dump "Loop nest . distributed: split to 0 loops and 1 library" "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-29.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-29.c
new file mode 100644
index 00000000000..9ce93e80b07
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-29.c
@@ -0,0 +1,17 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+int arr[M][N];
+
+void
+foo (void)
+{
+  for (unsigned i = 0; i < M; ++i)
+    for (unsigned j = 0; j < N - 1; ++j)
+      arr[i][j] = 0;
+}
+
+/* { dg-final { scan-tree-dump-not "Loop nest . distributed: split to" "ldist" } } */
+/* { dg-final { scan-tree-dump-times "Loop . distributed: split to 0 loops and 1 library" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-30.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-30.c
new file mode 100644
index 00000000000..f31860a574e
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-30.c
@@ -0,0 +1,16 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+int a[M][N], b[M][N];
+
+void
+foo (void)
+{
+  for (unsigned i = 0; i < M; ++i)
+    for (unsigned j = N; j > 0; --j)
+      a[i][j - 1] = b[i][j - 1];
+}
+
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-31.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-31.c
new file mode 100644
index 00000000000..60a9f743b1b
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-31.c
@@ -0,0 +1,19 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define M (256)
+#define N (512)
+int a[M][N], b[M][N], c[M];
+
+void
+foo (void)
+{
+  for (int i = M - 1; i >= 0; --i)
+    {
+      c[i] = 0;
+      for (unsigned j = N; j > 0; --j)
+	a[i][j - 1] = b[i][j - 1];
+    }
+}
+
+/* { dg-final { scan-tree-dump-times "Loop nest . distributed: split to 0 loops and 2 library" 1 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c
new file mode 100644
index 00000000000..24d27fde9da
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-33.c
@@ -0,0 +1,21 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution -ftree-loop-distribute-patterns -fdump-tree-ldist-details" } */
+
+#define N (1024)
+double a[N][N], b[N][N], c[N][N];
+
+void
+foo (void)
+{
+  unsigned i, j, k;
+
+  for (i = 0; i < N; ++i)
+    for (j = 0; j < N; ++j)
+      {
+	c[i][j] = 0.0;
+	for (k = 0; k < N; ++k)
+	  c[i][j] += a[i][k] * b[k][j];
+      }
+}
+
+/* { dg-final { scan-tree-dump "Loop nest . distributed: split to 1 loops and 1 library" "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c b/gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c
index f31d051984a..2eb1f74d4ab 100644
--- a/gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c
+++ b/gcc/testsuite/gcc.dg/tree-ssa/ldist-7.c
@@ -28,4 +28,4 @@ int loop1 (int k)
   return a[1000-2] + b[1000-1] + c[1000-2] + d[1000-2];
 }
 
-/* { dg-final { scan-tree-dump-times "distributed" 0 "ldist" } } */
+/* { dg-final { scan-tree-dump-times "distributed: " 0 "ldist" } } */
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82472.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82472.c
new file mode 100644
index 00000000000..445c95fbc47
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82472.c
@@ -0,0 +1,24 @@
+/* { dg-do compile } */
+/* { dg-options "-O2 -ftree-loop-distribution" } */
+
+long int xj;
+
+int
+cx (long int *ox, short int mk, char tf)
+{
+  int si, f9;
+  char *p4 = &tf;
+  short int *rm = (tf != 0) ? (short int *)&f9 : &mk;
+
+  for (f9 = 0; f9 < 2; ++f9)
+    {
+      *rm = 0;
+      *p4 = *ox;
+      si = mk;
+      xj = 0;
+      while (p4 < (char *)rm)
+        ++p4;
+    }
+
+  return si;
+}
diff --git a/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c b/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c
new file mode 100644
index 00000000000..19a42f0a3c7
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/tree-ssa/pr82498.c
@@ -0,0 +1,53 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -fdump-tree-original" } */
+/* { dg-final { scan-tree-dump-times "x r<< y" 4 "original" { target int32 } } } */
+/* { dg-final { scan-tree-dump-times "x r>> y" 4 "original" { target int32 } } } */
+
+unsigned
+f1 (unsigned x, int y)
+{
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, int y)
+{
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f3 (unsigned x, int y)
+{
+  return (x >> y) | (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, int y)
+{
+  return (x >> y) | (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, int y)
+{
+  return (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x << y);
+}
+
+unsigned
+f6 (unsigned x, int y)
+{
+  return (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x << y);
+}
+
+unsigned
+f7 (unsigned x, int y)
+{
+  return (x << (__CHAR_BIT__ * __SIZEOF_INT__ - y)) | (x >> y);
+}
+
+unsigned
+f8 (unsigned x, int y)
+{
+  return (x << (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1))) | (x >> y);
+}
diff --git a/gcc/testsuite/gcc.dg/ubsan/pr82498.c b/gcc/testsuite/gcc.dg/ubsan/pr82498.c
new file mode 100644
index 00000000000..1d093a058e1
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/ubsan/pr82498.c
@@ -0,0 +1,159 @@
+/* PR target/82498 */
+/* { dg-do run { target i?86-*-* x86_64-*-* } } */
+/* { dg-options "-fsanitize=undefined -fno-sanitize-recover=undefined" } */
+
+#include <x86intrin.h>
+
+volatile unsigned int a;
+volatile unsigned long long b;
+volatile int c;
+
+int
+main ()
+{
+  a = 0x12345678U;
+  a = __rold (a, 0);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, 32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, -32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rold (a, 37);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  a = __rold (a, -5);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, 0);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, 32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, -32);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  a = __rord (a, -37);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  a = __rord (a, 5);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 0;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -32;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 37;
+  a = __rold (a, c);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  c = -5;
+  a = __rold (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 0;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = 32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -32;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+  c = -37;
+  a = __rord (a, c);
+  if (a != 0x468acf02U)
+    __builtin_abort ();
+  c = 5;
+  a = __rord (a, c);
+  if (a != 0x12345678U)
+    __builtin_abort ();
+#ifdef __x86_64__
+  b = 0x123456789abcdef1ULL;
+  b = __rolq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rolq (b, 69);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  b = __rolq (b, -5);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, 0);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, 64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, -64);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  b = __rorq (b, -69);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  b = __rorq (b, 5);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 0;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 64;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -64;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 69;
+  b = __rolq (b, c);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  c = -5;
+  b = __rolq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 0;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = 64;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -64;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+  c = -69;
+  b = __rorq (b, c);
+  if (b != 0x468acf13579bde22ULL)
+    __builtin_abort ();
+  c = 5;
+  b = __rorq (b, c);
+  if (b != 0x123456789abcdef1ULL)
+    __builtin_abort ();
+#endif
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.dg/vect/pr78558.c b/gcc/testsuite/gcc.dg/vect/pr78558.c
new file mode 100644
index 00000000000..2606d4ec10d
--- /dev/null
+++ b/gcc/testsuite/gcc.dg/vect/pr78558.c
@@ -0,0 +1,44 @@
+/* PR tree-optimization/78558 */
+
+#include "tree-vect.h"
+
+struct S
+{
+  char p[48];
+  unsigned long long q, r, s;
+} s[50];
+
+struct D
+{
+  unsigned long long q, r;
+} d[50];
+
+void
+foo (void)
+{
+  unsigned long i;
+  for (i = 0; i < 50; ++i)
+    {
+      d[i].q = s[i].q;
+      d[i].r = s[i].r;
+    }
+}
+
+int
+main ()
+{
+  check_vect ();
+  unsigned long i;
+  for (i = 0; i < 50; ++i)
+    {
+      s[i].q = i;
+      s[i].r = 50 * i;
+    }
+  asm volatile ("" : : "g" (s), "g" (d) : "memory");
+  foo ();
+  asm volatile ("" : : "g" (s), "g" (d) : "memory");
+  for (i = 0; i < 50; ++i)
+    if (d[i].q != i || d[i].r != 50 * i)
+      abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/aarch64/pr71727-2.c b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c
index f72f23c686b..be4150a6c5f 100644
--- a/gcc/testsuite/gcc.target/aarch64/pr71727-2.c
+++ b/gcc/testsuite/gcc.target/aarch64/pr71727-2.c
@@ -13,4 +13,4 @@ unsigned char foo(const unsigned char *buffer, unsigned int length)
   return sum;
 }
 
-/* { dg-final { scan-assembler-times {and\t[wx][0-9]+, [wx][0-9]+, 15} 1 } } */
+/* { dg-final { scan-assembler-times "and\tw\[0-9\]+, w\[0-9\]+, 15" 1 } } */
diff --git a/gcc/testsuite/gcc.target/aarch64/pr81422.C b/gcc/testsuite/gcc.target/aarch64/pr81422.C
new file mode 100644
index 00000000000..5bcc948996e
--- /dev/null
+++ b/gcc/testsuite/gcc.target/aarch64/pr81422.C
@@ -0,0 +1,15 @@
+/* { dg-do compile } */
+/* { dg-options "-O0" } */
+
+struct DArray
+{
+    __SIZE_TYPE__ length;
+    int* ptr;
+};
+
+void foo35(DArray)
+{
+    static __thread int x[5];
+    foo35({5, (int*)&x});
+}
+
diff --git a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c
index 1a899ae0a05..35ab3b3c641 100644
--- a/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c
+++ b/gcc/testsuite/gcc.target/aarch64/sve_loop_add_4.c
@@ -40,7 +40,9 @@ TEST_ALL (LOOP)
 /* { dg-final { scan-assembler-times {\tincb\tx[0-9]+\n} 8 } } */
 
 /* { dg-final { scan-assembler-not {\tdecb\tz[0-9]+\.b} } } */
-/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 16 } } */
+/* We don't need to increment the vector IV for steps -16 and 16, since the
+   increment is always a multiple of 256.  */
+/* { dg-final { scan-assembler-times {\tadd\tz[0-9]+\.b, z[0-9]+\.b, z[0-9]+\.b\n} 14 } } */
 
 /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-16\n} 1 } } */
 /* { dg-final { scan-assembler-times {\tindex\tz[0-9]+\.h, w[0-9]+, #-15\n} 1 } } */
diff --git a/gcc/testsuite/gcc.target/i386/387-ficom-1.c b/gcc/testsuite/gcc.target/i386/387-ficom-1.c
new file mode 100644
index 00000000000..8c73ddcb2da
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/387-ficom-1.c
@@ -0,0 +1,41 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=i386" } } */
+/* { dg-options "-O2 -march=i386 -ffast-math -masm=att" } */
+
+extern short s;
+
+int test_f_s (short x)
+{
+  return (float)x > s;
+}
+
+int test_d_s (short x)
+{
+  return (double)x < s;
+}
+
+int test_ld_s (short x)
+{
+  return (long double)x == s;
+}
+
+extern int i;
+
+int test_f_i (int x)
+{
+  return (float)i >= x;
+}
+
+int test_d_i (int x)
+{
+  return (double)i <= x;
+}
+
+int test_ld_i (int x)
+{
+  return (long double)i != x;
+}
+
+/* { dg-final { scan-assembler-times "ficomps" 3 } } */
+/* { dg-final { scan-assembler-times "ficompl" 3 } } */
diff --git a/gcc/testsuite/gcc.target/i386/387-ficom-2.c b/gcc/testsuite/gcc.target/i386/387-ficom-2.c
new file mode 100644
index 00000000000..4190ebaae71
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/387-ficom-2.c
@@ -0,0 +1,9 @@
+/* { dg-do compile } */
+/* { dg-require-effective-target ia32 } */
+/* { dg-skip-if "" { *-*-* } { "-march=*" } { "-march=i386" } } */
+/* { dg-options "-Os -march=i386 -ffast-math -masm=att" } */
+
+#include "387-ficom-1.c"
+
+/* { dg-final { scan-assembler-times "ficomps" 3 } } */
+/* { dg-final { scan-assembler-times "ficompl" 3 } } */
diff --git a/gcc/testsuite/gcc.target/i386/asm-mem.c b/gcc/testsuite/gcc.target/i386/asm-mem.c
new file mode 100644
index 00000000000..89b713f0201
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/asm-mem.c
@@ -0,0 +1,59 @@
+/* { dg-do run } */
+/* { dg-options "-O3" } */
+
+/* Check that "m" array references are effective in preventing the
+   array initialization from wandering past a use in the asm, and
+   that the casts remain supported.  */
+
+static int
+f1 (const char *p)
+{
+  int count;
+
+  __asm__ ("repne scasb"
+	   : "=c" (count), "+D" (p)
+	   : "m" (*(const char (*)[]) p), "0" (-1), "a" (0));
+  return -2 - count;
+}
+
+static int
+f2 (const char *p)
+{
+  int count;
+
+  __asm__ ("repne scasb"
+	   : "=c" (count), "+D" (p)
+	   : "m" (*(const char (*)[48]) p), "0" (-1), "a" (0));
+  return -2 - count;
+}
+
+static int
+f3 (int n, const char *p)
+{
+  int count;
+
+  __asm__ ("repne scasb"
+	   : "=c" (count), "+D" (p)
+	   : "m" (*(const char (*)[n]) p), "0" (-1), "a" (0));
+  return -2 - count;
+}
+
+int
+main ()
+{
+  int a;
+  char buff[48] = "hello world";
+  buff[4] = 0;
+  a = f1 (buff);
+  if (a != 4)
+    __builtin_abort ();
+  buff[4] = 'o';
+  a = f2 (buff);
+  if (a != 11)
+    __builtin_abort ();
+  buff[4] = 0;
+  a = f3 (48, buff);
+  if (a != 4)
+    __builtin_abort ();
+  return 0;
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-1.c b/gcc/testsuite/gcc.target/i386/pr82483-1.c
new file mode 100644
index 00000000000..59a59dc8dfe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82483-1.c
@@ -0,0 +1,44 @@
+/* PR target/82483 */
+/* { dg-do compile } */
+/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
+/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+
+#include <x86intrin.h>
+
+void f1 (__m64 x, __m64 y, char *z) { _mm_maskmove_si64 (x, y, z); }
+int f2 (__m64 x) { return _mm_extract_pi16 (x, 1); }
+__m64 f3 (__m64 x, int y) { return _mm_insert_pi16 (x, y, 1); }
+__m64 f4 (__m128 x) { return _mm_cvtps_pi32 (x); }
+__m64 f5 (__m128 x) { return _mm_cvttps_pi32 (x); }
+__m128 f6 (__m128 x, __m64 y) { return _mm_cvtpi32_ps (x, y); }
+__m64 f7 (__m64 x, __m64 y) { return _mm_avg_pu8 (x, y); }
+__m64 f8 (__m64 x, __m64 y) { return _mm_avg_pu16 (x, y); }
+__m64 f9 (__m64 x, __m64 y) { return _mm_mulhi_pu16 (x, y); }
+__m64 f10 (__m64 x, __m64 y) { return _mm_max_pu8 (x, y); }
+__m64 f11 (__m64 x, __m64 y) { return _mm_max_pi16 (x, y); }
+__m64 f12 (__m64 x, __m64 y) { return _mm_min_pu8 (x, y); }
+__m64 f13 (__m64 x, __m64 y) { return _mm_min_pi16 (x, y); }
+__m64 f14 (__m64 x, __m64 y) { return _mm_sad_pu8 (x, y); }
+int f15 (__m64 x) { return _mm_movemask_pi8 (x); }
+__m64 f16 (__m64 x) { return _mm_shuffle_pi16 (x, 1); }
+__m64 f17 (__m128d x) { return _mm_cvtpd_pi32 (x); }
+__m64 f18 (__m128d x) { return _mm_cvttpd_pi32 (x); }
+__m128d f19 (__m64 x) { return _mm_cvtpi32_pd (x); }
+__m64 f20 (__m64 x, __m64 y) { return _mm_mul_su32 (x, y); }
+__m64 f21 (__m64 x) { return _mm_abs_pi8 (x); }
+__m64 f22 (__m64 x) { return _mm_abs_pi16 (x); }
+__m64 f23 (__m64 x) { return _mm_abs_pi32 (x); }
+__m64 f24 (__m64 x, __m64 y) { return _mm_hadd_pi16 (x, y); }
+__m64 f25 (__m64 x, __m64 y) { return _mm_hadd_pi32 (x, y); }
+__m64 f26 (__m64 x, __m64 y) { return _mm_hadds_pi16 (x, y); }
+__m64 f27 (__m64 x, __m64 y) { return _mm_hsub_pi16 (x, y); }
+__m64 f28 (__m64 x, __m64 y) { return _mm_hsub_pi32 (x, y); }
+__m64 f29 (__m64 x, __m64 y) { return _mm_hsubs_pi16 (x, y); }
+__m64 f30 (__m64 x, __m64 y) { return _mm_maddubs_pi16 (x, y); }
+__m64 f31 (__m64 x, __m64 y) { return _mm_mulhrs_pi16 (x, y); }
+__m64 f32 (__m64 x, __m64 y) { return _mm_shuffle_pi8 (x, y); }
+__m64 f33 (__m64 x, __m64 y) { return _mm_sign_pi8 (x, y); }
+__m64 f34 (__m64 x, __m64 y) { return _mm_sign_pi16 (x, y); }
+__m64 f35 (__m64 x, __m64 y) { return _mm_sign_pi32 (x, y); }
+void f36 (__m64 *x, __m64 y) { _mm_stream_pi (x, y); }
+__m64 f37 (__m64 x, __m64 y) { return _mm_alignr_pi8 (x, y, 3); }
diff --git a/gcc/testsuite/gcc.target/i386/pr82483-2.c b/gcc/testsuite/gcc.target/i386/pr82483-2.c
new file mode 100644
index 00000000000..305ddbd6c64
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82483-2.c
@@ -0,0 +1,9 @@
+/* PR target/82483 */
+/* { dg-do compile } */
+/* { dg-options "-mssse3 -mno-mmx -Wno-psabi" } */
+/* { dg-error "needs isa option" "" { target *-*-* } 0 } */
+
+#include <x86intrin.h>
+
+__v1di f1 (__v1di x, __v1di y) { return __builtin_ia32_paddq (x, y); }
+__v1di f2 (__v1di x, __v1di y) { return __builtin_ia32_psubq (x, y); }
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-1.c b/gcc/testsuite/gcc.target/i386/pr82498-1.c
new file mode 100644
index 00000000000..78a6698f607
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82498-1.c
@@ -0,0 +1,52 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+unsigned
+f1 (unsigned x, unsigned char y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f2 (unsigned x, unsigned y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f3 (unsigned x, unsigned short y)
+{
+  if (y == 0)
+    return x;
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (__CHAR_BIT__ * __SIZEOF_INT__ - y));
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f5 (unsigned x, unsigned int y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
+
+unsigned
+f6 (unsigned x, unsigned short y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x << y) | (x >> (-y & (__CHAR_BIT__ * __SIZEOF_INT__ - 1)));
+}
diff --git a/gcc/testsuite/gcc.target/i386/pr82498-2.c b/gcc/testsuite/gcc.target/i386/pr82498-2.c
new file mode 100644
index 00000000000..9e065ee7e50
--- /dev/null
+++ b/gcc/testsuite/gcc.target/i386/pr82498-2.c
@@ -0,0 +1,46 @@
+/* PR target/82498 */
+/* { dg-do compile } */
+/* { dg-options "-O2 -mtune=generic -masm=att" } */
+/* { dg-final { scan-assembler-not {\mand[bwlq]\M} } } */
+
+int
+f1 (int x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f2 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x >> y;
+}
+
+unsigned
+f3 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x << y;
+}
+
+unsigned
+f4 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x | (1U << y);
+}
+
+unsigned
+f5 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return x ^ (1U << y);
+}
+
+unsigned
+f6 (unsigned x, unsigned char y)
+{
+  y &= __CHAR_BIT__ * __SIZEOF_INT__ - 1;
+  return (x + 2) & ~(1U << y);
+}
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-char.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-char.c
new file mode 100644
index 00000000000..3a1aa60cbff
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-char.c
@@ -0,0 +1,86 @@
+/* Verify that overloaded built-ins for vec_cmp{eq,ge,gt,le,lt,ne} with
+   char inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2" } */
+
+#include <altivec.h>
+
+vector bool char
+test3_eq (vector signed char x, vector signed char y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool char
+test6_eq (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool char
+test3_ge (vector signed char x, vector signed char y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool char
+test6_ge (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool char
+test3_gt (vector signed char x, vector signed char y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool char
+test6_gt (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool char
+test3_le (vector signed char x, vector signed char y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool char
+test6_le (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool char
+test3_lt (vector signed char x, vector signed char y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool char
+test6_lt (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool char
+test3_ne (vector signed char x, vector signed char y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool char
+test6_ne (vector unsigned char x, vector unsigned char y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequb" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsb" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtub" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-double.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-double.c
new file mode 100644
index 00000000000..9d56862b2ea
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-double.c
@@ -0,0 +1,51 @@
+/* Verify that overloaded built-ins for vec_cmp with
+   double inputs for VSX produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+#include <altivec.h>
+
+vector bool long long
+test2_eq (vector double x, vector double y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool long long
+test2_ge (vector double x, vector double y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool long long
+test2_gt (vector double x, vector double y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool long long
+test2_le (vector double x, vector double y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool long long
+test2_lt (vector double x, vector double y)
+{
+  return vec_cmplt (x, y);
+}
+
+	vector bool long long
+test2_ne (vector double x, vector double y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "xvcmpeqdp" 2 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtdp" 2 } } */
+/* { dg-final { scan-assembler-times "xvcmpnedp" 0 } } */
+/* { dg-final { scan-assembler-times "xvcmpgedp" 2 } } */
+/* { dg-final { scan-assembler-times "fcmpu" 0 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-float.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-float.c
new file mode 100644
index 00000000000..b75250a7a3a
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-float.c
@@ -0,0 +1,51 @@
+/* Verify that overloaded built-ins for vec_cmp with float
+   inputs for VSX produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_vsx_ok } */
+/* { dg-options "-mvsx -O2" } */
+
+#include <altivec.h>
+
+vector bool int
+test1_eq (vector float x, vector float y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool int
+test1_ge (vector float x, vector float y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool int
+test1_gt (vector float x, vector float y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool int
+test1_le (vector float x, vector float y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool int
+test1_lt (vector float x, vector float y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool int
+test1_ne (vector float x, vector float y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "xvcmpeqsp" 2 } } */
+/* { dg-final { scan-assembler-times "xvcmpgtsp" 2 } } */
+/* { dg-final { scan-assembler-times "xvcmpnesp" 0 } } */
+/* { dg-final { scan-assembler-times "xvcmpgesp" 2 } } */
+/* { dg-final { scan-assembler-times "fcmpu" 0 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int.c
new file mode 100644
index 00000000000..d53994d3ac8
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-int.c
@@ -0,0 +1,86 @@
+/* Verify that overloaded built-ins for vec_cmp with int
+   inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2" } */
+
+#include <altivec.h>
+
+vector bool int
+test3_eq (vector signed int x, vector signed int y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool int
+test6_eq (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool int
+test3_ge (vector signed int x, vector signed int y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool int
+test6_ge (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool int
+test3_gt (vector signed int x, vector signed int y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool int
+test6_gt (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool int
+test3_le (vector signed int x, vector signed int y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool int
+test6_le (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool int
+test3_lt (vector signed int x, vector signed int y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool int
+test6_lt (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool int
+test3_ne (vector signed int x, vector signed int y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool int
+test6_ne (vector unsigned int x, vector unsigned int y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequw" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsw" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuw" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-longlong.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-longlong.c
new file mode 100644
index 00000000000..536ee75a854
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-longlong.c
@@ -0,0 +1,86 @@
+/* Verify that overloaded built-ins for vec_cmp with long long
+   inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2" } */
+
+#include <altivec.h>
+
+vector bool long long
+test3_eq (vector signed long long x, vector signed long long y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool long long
+test6_eq (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool long long
+test3_ge (vector signed long long x, vector signed long long y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool long long
+test6_ge (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool long long
+test3_gt (vector signed long long x, vector signed long long y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool long long
+test6_gt (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool long long
+test3_le (vector signed long long x, vector signed long long y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool long long
+test6_le (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool long long
+test3_lt (vector signed long long x, vector signed long long y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool long long
+test6_lt (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool long long
+test3_ne (vector signed long long x, vector signed long long y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool long long
+test6_ne (vector unsigned long long x, vector unsigned long long y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequd" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsd" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtud" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-short.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-short.c
new file mode 100644
index 00000000000..60676691efe
--- /dev/null
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-cmp-short.c
@@ -0,0 +1,87 @@
+/* Verify that overloaded built-ins for vec_cmp with short
+   inputs produce the right code.  */
+
+/* { dg-do compile } */
+/* { dg-require-effective-target powerpc_p8vector_ok } */
+/* { dg-options "-mpower8-vector -O2" } */
+
+#include <altivec.h>
+
+vector bool short
+test3_eq (vector signed short x, vector signed short y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool short
+test6_eq (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmpeq (x, y);
+}
+
+vector bool short
+test3_ge (vector signed short x, vector signed short y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool short
+test6_ge (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmpge (x, y);
+}
+
+vector bool short
+test3_gt (vector signed short x, vector signed short y)
+{
+  return vec_cmpgt (x, y);
+}
+
+vector bool short
+test6_gt (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmpgt (x, y);
+}
+
+
+vector bool short
+test3_le (vector signed short x, vector signed short y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool short
+test6_le (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmple (x, y);
+}
+
+vector bool short
+test3_lt (vector signed short x, vector signed short y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool short
+test6_lt (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmplt (x, y);
+}
+
+vector bool short
+test3_ne (vector signed short x, vector signed short y)
+{
+  return vec_cmpne (x, y);
+}
+
+vector bool short
+test6_ne (vector unsigned short x, vector unsigned short y)
+{
+  return vec_cmpne (x, y);
+}
+
+/* { dg-final { scan-assembler-times "vcmpequh" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtsh" 4 } } */
+/* { dg-final { scan-assembler-times "vcmpgtuh" 4 } } */
+/* { dg-final { scan-assembler-times "xxlnor" 6 } } */
+
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p8.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p8.c
index 97d6b945f43..b1cf0a78628 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p8.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p8.c
@@ -5,7 +5,8 @@
 /* { dg-require-effective-target powerpc_p8vector_ok } */
 /* { dg-require-effective-target int128 } */
 /* { dg-require-effective-target lp64 } */
-/* { dg-options "-mpower8-vector" } */
+/* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power8" } } */
+/* { dg-options "-mpower8-vector -mcpu=power8 -O2" } */
 /* { dg-additional-options "-maix64" { target powerpc-ibm-aix* } } */
 
 #include "altivec.h"
@@ -22,5 +23,5 @@ test2 (vector unsigned __int128 x, vector unsigned __int128 y)
   return vec_mul (x, y);
 }
 
-/* { dg-final { scan-assembler-times "\[ \t\]mulld " 6 } } */
-/* { dg-final { scan-assembler-times "\[ \t\]mulhdu" 2 } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 6 } } */
+/* { dg-final { scan-assembler-times {\mmulhdu\M} 2 } } */
diff --git a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p9.c b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p9.c
index e81ea5f3134..657188435d4 100644
--- a/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p9.c
+++ b/gcc/testsuite/gcc.target/powerpc/fold-vec-mult-int128-p9.c
@@ -2,10 +2,10 @@
    inputs produce the right results.  */
 
 /* { dg-do compile } */
-/* { dg-require-effective-target powerpc_float128_hw_ok } */
+/* { dg-require-effective-target powerpc_p9vector_ok } */
 /* { dg-require-effective-target int128 } */
 /* { dg-skip-if "do not override -mcpu" { powerpc*-*-* } { "-mcpu=*" } { "-mcpu=power9" } } */
-/* { dg-options "-mcpu=power9 -O2" } */
+/* { dg-options "-mpower9-vector -mcpu=power9 -O2" } */
 /* { dg-additional-options "-maix64" { target powerpc-ibm-aix* } } */
 
 #include "altivec.h"
@@ -22,4 +22,5 @@ test2 (vector unsigned __int128 x, vector unsigned __int128 y)
   return vec_mul (x, y);
 }
 
-/* { dg-final { scan-assembler-times "\[ \t\]xsmulqp" 2 } } */
+/* { dg-final { scan-assembler-times {\mmulld\M} 4 } } */
+/* { dg-final { scan-assembler-times {\mmulhdu\M} 2 } } */
diff --git a/gcc/testsuite/gfortran.dg/graphite/id-27.f90 b/gcc/testsuite/gfortran.dg/graphite/id-27.f90
new file mode 100644
index 00000000000..e1e7ec0951f
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/id-27.f90
@@ -0,0 +1,40 @@
+! { dg-additional-options "-Ofast" }
+MODULE module_ra_gfdleta
+      INTEGER, PARAMETER              :: NBLY=15
+      REAL   , SAVE :: EM1(28,180),EM1WDE(28,180),TABLE1(28,180),     &
+                           TABLE2(28,180),TABLE3(28,180),EM3(28,180), &
+                           SOURCE(28,NBLY), DSRCE(28,NBLY)
+CONTAINS
+      SUBROUTINE TABLE 
+ INTEGER, PARAMETER :: NBLX=47
+ INTEGER , PARAMETER:: NBLW = 163
+      REAL ::  &
+               SUM(28,180),PERTSM(28,180),SUM3(28,180),       &
+               SUMWDE(28,180),SRCWD(28,NBLX),SRC1NB(28,NBLW), &
+               DBDTNB(28,NBLW)
+      REAL ::  &
+               ZMASS(181),ZROOT(181),SC(28),DSC(28),XTEMV(28), &
+               TFOUR(28),FORTCU(28),X(28),X1(28),X2(180),SRCS(28), &
+               R1T(28),R2(28),S2(28),T3(28),R1WD(28)
+      REAL ::  EXPO(180),FAC(180)
+      I = 0
+      DO 417 J=121,180
+      FAC(J)=ZMASS(J)*(ONE-(ONE+X2(J))*EXPO(J))/(X2(J)*X2(J))
+417   CONTINUE
+      DO 421 J=121,180
+      SUM3(I,J)=SUM3(I,J)+DBDTNB(I,N)*FAC(J)
+421   CONTINUE
+      IF (CENT.GT.160. .AND. CENT.LT.560.) THEN
+         DO 420 J=1,180
+         DO 420 I=1,28
+         SUMWDE(I,J)=SUMWDE(I,J)+SRC1NB(I,N)*EXPO(J)
+420      CONTINUE
+      ENDIF
+      DO 433 J=121,180
+      EM3(I,J)=SUM3(I,J)/FORTCU(I)
+433   CONTINUE
+      DO 501 I=1,28
+      EM1WDE(I,J)=SUMWDE(I,J)/TFOUR(I)
+501   CONTINUE
+      END SUBROUTINE TABLE
+      END MODULE module_RA_GFDLETA
diff --git a/gcc/testsuite/gfortran.dg/graphite/id-28.f90 b/gcc/testsuite/gfortran.dg/graphite/id-28.f90
new file mode 100644
index 00000000000..d66cb12006e
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/id-28.f90
@@ -0,0 +1,15 @@
+! Verify we elide modulo operations we cannot represent
+module OPMATRIX_MODULE
+   implicit none
+   type opmatrix_type
+   real(kind=kind(1.0d0)), dimension(:,:), pointer :: restricted
+   end type
+   interface zero_
+      module procedure zero
+   end interface
+contains
+   subroutine zero(self)
+      type(opmatrix_type) :: self
+      self%restricted = 0.0d0
+   end subroutine
+end
diff --git a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90 b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90
index 9ad9fd66d0f..8070bbb4a8d 100644
--- a/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90
+++ b/gcc/testsuite/gfortran.dg/graphite/interchange-3.f90
@@ -24,4 +24,4 @@ Program FOO
 
 end Program FOO
 
-! { dg-final { scan-tree-dump "tiled" "graphite" { xfail *-*-* } } }
+! { dg-final { scan-tree-dump "tiled" "graphite" } }
diff --git a/gcc/testsuite/gfortran.dg/graphite/pr14741.f90 b/gcc/testsuite/gfortran.dg/graphite/pr14741.f90
index 0c032fe14b7..e40262f1288 100644
--- a/gcc/testsuite/gfortran.dg/graphite/pr14741.f90
+++ b/gcc/testsuite/gfortran.dg/graphite/pr14741.f90
@@ -24,4 +24,4 @@ SUBROUTINE mult(A,B,C,N)
   ENDDO
 END SUBROUTINE mult
 
-! { dg-final { scan-tree-dump "tiled by" "graphite" { xfail aarch64*-*-* } } }
+! { dg-final { scan-tree-dump "tiled by" "graphite" } }
diff --git a/gcc/testsuite/gfortran.dg/graphite/pr82451.f b/gcc/testsuite/gfortran.dg/graphite/pr82451.f
new file mode 100644
index 00000000000..88ff85b1a99
--- /dev/null
+++ b/gcc/testsuite/gfortran.dg/graphite/pr82451.f
@@ -0,0 +1,39 @@
+! { dg-do compile }
+! { dg-options "-O2 -floop-nest-optimize" }
+      MODULE LES3D_DATA
+      PARAMETER ( NSCHEME = 4, ICHEM = 0, ISGSK = 0, IVISC = 1 )
+      DOUBLE PRECISION DT, TIME, STATTIME, CFL, RELNO, TSTND, ALREF
+      INTEGER IDYN, IMAX, JMAX, KMAX
+      PARAMETER( RUNIV =  8.3145D3,
+     >        TPRANDLT =    0.91D0)
+      DOUBLE PRECISION,ALLOCATABLE,DIMENSION(:,:,:) ::
+     >             U, V, W, P, T, H, EK,
+     >         UAV, VAV, WAV, PAV, TAV, HAV, EKAV
+      DOUBLE PRECISION,ALLOCATABLE,DIMENSION(:,:,:,:) ::
+     >             CONC, HF, QAV, COAV, HFAV, DU
+      DOUBLE PRECISION,ALLOCATABLE,DIMENSION(:,:,:,:,:) ::
+     >             Q
+      END MODULE LES3D_DATA
+      SUBROUTINE FLUXJ()
+      USE LES3D_DATA
+      ALLOCATABLE QS(:), FSJ(:,:,:)
+      ALLOCATABLE DWDX(:),DWDY(:),DWDZ(:)
+      ALLOCATABLE DHDY(:), DKDY(:)
+      PARAMETER (  R12I = 1.0D0 / 12.0D0,
+     >             TWO3 = 2.0D0 / 3.0D0 )
+      ALLOCATE( QS(IMAX-1), FSJ(IMAX-1,0:JMAX-1,ND))
+      ALLOCATE( DWDX(IMAX-1),DWDY(IMAX-1),DWDZ(IMAX-1))
+      I1 = 1
+      DO K = K1,K2
+         DO J = J1,J2
+            DO I = I1, I2
+               FSJ(I,J,5) = FSJ(I,J,5) + PAV(I,J,K) * QS(I)
+            END DO
+            DO I = I1, I2
+               DWDX(I) = DXI * R12I * (WAV(I-2,J,K) - WAV(I+2,J,K) +
+     >                        8.0D0 * (WAV(I+1,J,K) - WAV(I-1,J,K)))
+            END DO
+         END DO
+      END DO
+      DEALLOCATE( QS, FSJ, DHDY, DKDY)
+      END
diff --git a/gcc/testsuite/lib/target-supports.exp b/gcc/testsuite/lib/target-supports.exp
index 713e62c4aa1..5fbdb740ac6 100644
--- a/gcc/testsuite/lib/target-supports.exp
+++ b/gcc/testsuite/lib/target-supports.exp
@@ -440,8 +440,8 @@ proc check_ifunc_available { } {
 	extern "C" {
 	#endif
 	typedef void F (void);
-	F* g() {}
-	void f() __attribute__((ifunc("g")));
+	F* g (void) {}
+	void f () __attribute__ ((ifunc ("g")));
 	#ifdef __cplusplus
 	}
 	#endif
diff --git a/gcc/testsuite/obj-c++.dg/exceptions-6.mm b/gcc/testsuite/obj-c++.dg/exceptions-6.mm
index 58882fed8b7..6f6ba783ea7 100644
--- a/gcc/testsuite/obj-c++.dg/exceptions-6.mm
+++ b/gcc/testsuite/obj-c++.dg/exceptions-6.mm
@@ -11,15 +11,15 @@ void test (id object)
   @throw object;   /* Ok */
   @throw;          /* { dg-error ".@throw. .rethrow. used outside of a @catch block" } */
   @throw (object); /* Ok.  */
-  @throw (id)0
-}                  /* { dg-error "expected" } */
+  @throw (id)0     /* { dg-error "expected" } */
+}
 
 void test2 (id object)
 {
   @throw object);  /* { dg-error "expected" } */
   @throw (...);    /* { dg-error "expected" } */
   @throw ();       /* { dg-error "expected" } */
-  @throw           
+  @throw           /* { dg-error "expected" } */
 }                  /* { dg-error "expected" } */
 
 void test3 (id object1, id object2)
diff --git a/gcc/testsuite/obj-c++.dg/pr48187.mm b/gcc/testsuite/obj-c++.dg/pr48187.mm
index 750710b1f24..99677a56244 100644
--- a/gcc/testsuite/obj-c++.dg/pr48187.mm
+++ b/gcc/testsuite/obj-c++.dg/pr48187.mm
@@ -1,19 +1,19 @@
 /* { dg-do compile } */
 
 @interface A
-{
+{    /* { dg-error "xpected" } */
   ]  /* { dg-error "xpected" } */
 }
 @end
 
 @interface B
-{
+{     /* { dg-error "xpected" } */
   ];  /* { dg-error "xpected" } */
 }
 @end
 
 @interface C
-{
+{     /* { dg-error "xpected" } */
   ];  /* { dg-error "xpected" } */
   int x;
 }
@@ -21,7 +21,7 @@
 
 @interface D
 {
-  (
+  (  /* { dg-error "xpected" } */
 }  /* { dg-error "xpected" } */
 @end
 
diff --git a/gcc/testsuite/objc.dg/exceptions-6.m b/gcc/testsuite/objc.dg/exceptions-6.m
index 58882fed8b7..74be98d39fa 100644
--- a/gcc/testsuite/objc.dg/exceptions-6.m
+++ b/gcc/testsuite/objc.dg/exceptions-6.m
@@ -11,8 +11,8 @@ void test (id object)
   @throw object;   /* Ok */
   @throw;          /* { dg-error ".@throw. .rethrow. used outside of a @catch block" } */
   @throw (object); /* Ok.  */
-  @throw (id)0
-}                  /* { dg-error "expected" } */
+  @throw (id)0     /* { dg-error "expected" } */
+}
 
 void test2 (id object)
 {
diff --git a/gcc/tree-affine.c b/gcc/tree-affine.c
index 759a6807b68..092b1e017af 100644
--- a/gcc/tree-affine.c
+++ b/gcc/tree-affine.c
@@ -329,7 +329,8 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
       aff_combination_const (comb, type, bytepos);
       if (TREE_CODE (core) == MEM_REF)
 	{
-	  aff_combination_add_cst (comb, wi::to_widest (TREE_OPERAND (core, 1)));
+	  tree mem_offset = TREE_OPERAND (core, 1);
+	  aff_combination_add_cst (comb, wi::to_poly_widest (mem_offset));
 	  core = TREE_OPERAND (core, 0);
 	}
       else
@@ -428,7 +429,7 @@ tree_to_aff_combination (tree expr, tree type, aff_tree *comb)
 
     default:
       {
-	if (poly_tree_p (expr))
+	if (poly_int_tree_p (expr))
 	  {
 	    aff_combination_const (comb, type, wi::to_poly_widest (expr));
 	    return;
@@ -816,7 +817,7 @@ wide_int_constant_multiple_p (const poly_widest_int &val,
 
   if (known_zero (val))
     {
-      if (*mult_set && may_ne (*mult, 0))
+      if (*mult_set && maybe_nonzero (*mult))
 	return false;
       *mult_set = true;
       *mult = 0;
diff --git a/gcc/tree-cfg.c b/gcc/tree-cfg.c
index 2fc2c9c52ac..53978fbafa1 100644
--- a/gcc/tree-cfg.c
+++ b/gcc/tree-cfg.c
@@ -2952,7 +2952,7 @@ verify_expr (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 	  error ("invalid first operand of MEM_REF");
 	  return x;
 	}
-      if (!poly_tree_p (TREE_OPERAND (t, 1))
+      if (!poly_int_tree_p (TREE_OPERAND (t, 1))
 	  || !POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (t, 1))))
 	{
 	  error ("invalid offset operand of MEM_REF");
@@ -3055,8 +3055,8 @@ verify_expr (tree *tp, int *walk_subtrees, void *data ATTRIBUTE_UNUSED)
 	  tree t1 = TREE_OPERAND (t, 1);
 	  tree t2 = TREE_OPERAND (t, 2);
 	  poly_uint64 size, bitpos;
-	  if (!poly_tree_p (t1, &size)
-	      || !poly_tree_p (t2, &bitpos)
+	  if (!poly_int_tree_p (t1, &size)
+	      || !poly_int_tree_p (t2, &bitpos)
 	      || !types_compatible_p (bitsizetype, TREE_TYPE (t1))
 	      || !types_compatible_p (bitsizetype, TREE_TYPE (t2)))
 	    {
@@ -3358,7 +3358,7 @@ verify_types_in_gimple_reference (tree expr, bool require_lvalue)
 	  debug_generic_stmt (expr);
 	  return true;
 	}
-      if (!poly_tree_p (TREE_OPERAND (expr, 1))
+      if (!poly_int_tree_p (TREE_OPERAND (expr, 1))
 	  || !POINTER_TYPE_P (TREE_TYPE (TREE_OPERAND (expr, 1))))
 	{
 	  error ("invalid offset operand in MEM_REF");
@@ -3375,7 +3375,7 @@ verify_types_in_gimple_reference (tree expr, bool require_lvalue)
 	  return true;
 	}
       if (!TMR_OFFSET (expr)
-	  || !poly_tree_p (TMR_OFFSET (expr))
+	  || !poly_int_tree_p (TMR_OFFSET (expr))
 	  || !POINTER_TYPE_P (TREE_TYPE (TMR_OFFSET (expr))))
 	{
 	  error ("invalid offset operand in TARGET_MEM_REF");
diff --git a/gcc/tree-chrec.c b/gcc/tree-chrec.c
index 3867072566e..beddf108104 100644
--- a/gcc/tree-chrec.c
+++ b/gcc/tree-chrec.c
@@ -872,8 +872,7 @@ reset_evolution_in_loop (unsigned loop_num,
 					   new_evol);
       tree right = reset_evolution_in_loop (loop_num, CHREC_RIGHT (chrec),
 					    new_evol);
-      return build3 (POLYNOMIAL_CHREC, TREE_TYPE (left),
-		     CHREC_VAR (chrec), left, right);
+      return build_polynomial_chrec (CHREC_VARIABLE (chrec), left, right);
     }
 
   while (TREE_CODE (chrec) == POLYNOMIAL_CHREC
diff --git a/gcc/tree-chrec.h b/gcc/tree-chrec.h
index 4a8a3734d79..4838bae89aa 100644
--- a/gcc/tree-chrec.h
+++ b/gcc/tree-chrec.h
@@ -157,8 +157,9 @@ build_polynomial_chrec (unsigned loop_num,
   if (chrec_zerop (right))
     return left;
 
-  return build3 (POLYNOMIAL_CHREC, TREE_TYPE (left),
-		 build_int_cst (NULL_TREE, loop_num), left, right);
+  tree chrec = build2 (POLYNOMIAL_CHREC, TREE_TYPE (left), left, right);
+  CHREC_VARIABLE (chrec) = loop_num;
+  return chrec;
 }
 
 /* Determines whether the expression CHREC is a constant.  */
diff --git a/gcc/tree-core.h b/gcc/tree-core.h
index 3f30d290d96..4e18dc650c5 100644
--- a/gcc/tree-core.h
+++ b/gcc/tree-core.h
@@ -982,6 +982,9 @@ struct GTY(()) tree_base {
     /* SSA version number.  This field is only used with SSA_NAME.  */
     unsigned int version;
 
+    /* CHREC_VARIABLE.  This field is only used with POLYNOMIAL_CHREC.  */
+    unsigned int chrec_var;
+
     /* Internal function code.  */
     enum internal_fn ifn;
 
diff --git a/gcc/tree-data-ref.c b/gcc/tree-data-ref.c
index d26cf3feb04..54705940a24 100644
--- a/gcc/tree-data-ref.c
+++ b/gcc/tree-data-ref.c
@@ -967,15 +967,14 @@ access_fn_component_p (tree op)
 }
 
 /* Determines the base object and the list of indices of memory reference
-   DR, analyzed in LOOP and instantiated in loop nest NEST.  */
+   DR, analyzed in LOOP and instantiated before NEST.  */
 
 static void
-dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
+dr_analyze_indices (struct data_reference *dr, edge nest, loop_p loop)
 {
   vec<tree> access_fns = vNULL;
   tree ref, op;
   tree base, off, access_fn;
-  basic_block before_loop;
 
   /* If analyzing a basic-block there are no indices to analyze
      and thus no access functions.  */
@@ -987,7 +986,6 @@ dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
     }
 
   ref = DR_REF (dr);
-  before_loop = block_before_loop (nest);
 
   /* REALPART_EXPR and IMAGPART_EXPR can be handled like accesses
      into a two element array with a constant index.  The base is
@@ -1012,7 +1010,7 @@ dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
 	{
 	  op = TREE_OPERAND (ref, 1);
 	  access_fn = analyze_scalar_evolution (loop, op);
-	  access_fn = instantiate_scev (before_loop, loop, access_fn);
+	  access_fn = instantiate_scev (nest, loop, access_fn);
 	  access_fns.safe_push (access_fn);
 	}
       else if (TREE_CODE (ref) == COMPONENT_REF
@@ -1044,7 +1042,7 @@ dr_analyze_indices (struct data_reference *dr, loop_p nest, loop_p loop)
     {
       op = TREE_OPERAND (ref, 0);
       access_fn = analyze_scalar_evolution (loop, op);
-      access_fn = instantiate_scev (before_loop, loop, access_fn);
+      access_fn = instantiate_scev (nest, loop, access_fn);
       if (TREE_CODE (access_fn) == POLYNOMIAL_CHREC)
 	{
 	  tree orig_type;
@@ -1149,7 +1147,7 @@ free_data_ref (data_reference_p dr)
    in which the data reference should be analyzed.  */
 
 struct data_reference *
-create_data_ref (loop_p nest, loop_p loop, tree memref, gimple *stmt,
+create_data_ref (edge nest, loop_p loop, tree memref, gimple *stmt,
 		 bool is_read, bool is_conditional_in_stmt)
 {
   struct data_reference *dr;
@@ -1464,8 +1462,8 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
 				DR_BASE_ADDRESS (dr_a2->dr), 0)
 	      || !operand_equal_p (DR_OFFSET (dr_a1->dr),
 				   DR_OFFSET (dr_a2->dr), 0)
-	      || !poly_tree_p (DR_INIT (dr_a1->dr), &init_a1)
-	      || !poly_tree_p (DR_INIT (dr_a2->dr), &init_a2))
+	      || !poly_int_tree_p (DR_INIT (dr_a1->dr), &init_a1)
+	      || !poly_int_tree_p (DR_INIT (dr_a2->dr), &init_a2))
 	    continue;
 
 	  /* Don't combine if we can't tell which one comes first.  */
@@ -1498,8 +1496,8 @@ prune_runtime_alias_test_list (vec<dr_with_seg_len_pair_t> *alias_pairs,
 	  if (!operand_equal_p (dr_a1->seg_len, dr_a2->seg_len, 0))
 	    {
 	      poly_uint64 seg_len_a1, seg_len_a2;
-	      if (!poly_tree_p (dr_a1->seg_len, &seg_len_a1)
-		  || !poly_tree_p (dr_a2->seg_len, &seg_len_a2))
+	      if (!poly_int_tree_p (dr_a1->seg_len, &seg_len_a1)
+		  || !poly_int_tree_p (dr_a2->seg_len, &seg_len_a2))
 		continue;
 
 	      tree indicator_a = dr_direction_indicator (dr_a1->dr);
@@ -1592,8 +1590,8 @@ create_intersect_range_checks_index (struct loop *loop, tree *cond_expr,
     return false;
 
   poly_uint64 seg_len1, seg_len2;
-  if (!poly_tree_p (dr_a.seg_len, &seg_len1)
-      || !poly_tree_p (dr_b.seg_len, &seg_len2))
+  if (!poly_int_tree_p (dr_a.seg_len, &seg_len1)
+      || !poly_int_tree_p (dr_b.seg_len, &seg_len2))
     return false;
 
   if (!tree_fits_shwi_p (DR_STEP (dr_a.dr)))
@@ -3864,7 +3862,9 @@ analyze_siv_subscript (tree chrec_a,
 				      overlaps_b, overlaps_a, last_conflicts);
 
   else if (evolution_function_is_affine_in_loop (chrec_a, loop_nest_num)
-	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num))
+	   && evolution_function_right_is_integer_cst (chrec_a)
+	   && evolution_function_is_affine_in_loop (chrec_b, loop_nest_num)
+	   && evolution_function_right_is_integer_cst (chrec_b))
     {
       if (!chrec_contains_symbols (chrec_a)
 	  && !chrec_contains_symbols (chrec_b))
@@ -3998,8 +3998,10 @@ analyze_miv_subscript (tree chrec_a,
 
   else if (evolution_function_is_affine_multivariate_p (chrec_a, loop_nest->num)
 	   && !chrec_contains_symbols (chrec_a)
+	   && evolution_function_right_is_integer_cst (chrec_a)
 	   && evolution_function_is_affine_multivariate_p (chrec_b, loop_nest->num)
-	   && !chrec_contains_symbols (chrec_b))
+	   && !chrec_contains_symbols (chrec_b)
+	   && evolution_function_right_is_integer_cst (chrec_b))
     {
       /* testsuite/.../ssa-chrec-35.c
 	 {0, +, 1}_2  vs.  {0, +, 1}_3
@@ -5044,7 +5046,8 @@ find_data_references_in_stmt (struct loop *nest, gimple *stmt,
 
   FOR_EACH_VEC_ELT (references, i, ref)
     {
-      dr = create_data_ref (nest, loop_containing_stmt (stmt), ref->ref,
+      dr = create_data_ref (nest ? loop_preheader_edge (nest) : NULL,
+			    loop_containing_stmt (stmt), ref->ref,
 			    stmt, ref->is_read, ref->is_conditional_in_stmt);
       gcc_assert (dr != NULL);
       datarefs->safe_push (dr);
@@ -5060,7 +5063,7 @@ find_data_references_in_stmt (struct loop *nest, gimple *stmt,
    should be analyzed.  */
 
 bool
-graphite_find_data_references_in_stmt (loop_p nest, loop_p loop, gimple *stmt,
+graphite_find_data_references_in_stmt (edge nest, loop_p loop, gimple *stmt,
 				       vec<data_reference_p> *datarefs)
 {
   unsigned i;
diff --git a/gcc/tree-data-ref.h b/gcc/tree-data-ref.h
index 7a8fb3f5f4c..be5a3b32e63 100644
--- a/gcc/tree-data-ref.h
+++ b/gcc/tree-data-ref.h
@@ -445,11 +445,11 @@ extern void free_data_ref (data_reference_p);
 extern void free_data_refs (vec<data_reference_p> );
 extern bool find_data_references_in_stmt (struct loop *, gimple *,
 					  vec<data_reference_p> *);
-extern bool graphite_find_data_references_in_stmt (loop_p, loop_p, gimple *,
+extern bool graphite_find_data_references_in_stmt (edge, loop_p, gimple *,
 						   vec<data_reference_p> *);
 tree find_data_references_in_loop (struct loop *, vec<data_reference_p> *);
 bool loop_nest_has_data_refs (loop_p loop);
-struct data_reference *create_data_ref (loop_p, loop_p, tree, gimple *, bool,
+struct data_reference *create_data_ref (edge, loop_p, tree, gimple *, bool,
 					bool);
 extern bool find_loop_nest (struct loop *, vec<loop_p> *);
 extern struct data_dependence_relation *initialize_data_dependence_relation
diff --git a/gcc/tree-dfa.c b/gcc/tree-dfa.c
index d91f438371a..4c21275f765 100644
--- a/gcc/tree-dfa.c
+++ b/gcc/tree-dfa.c
@@ -377,9 +377,9 @@ get_or_create_ssa_default_def (struct function *fn, tree var)
    true, the storage order of the reference is reversed.  */
 
 tree
-get_ref_base_and_extent (tree exp, poly_int64 *poffset,
-			 poly_int64 *psize,
-			 poly_int64 *pmax_size,
+get_ref_base_and_extent (tree exp, poly_int64_pod *poffset,
+			 poly_int64_pod *psize,
+			 poly_int64_pod *pmax_size,
 			 bool *preverse)
 {
   poly_offset_int bitsize = -1;
@@ -403,7 +403,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 	bitsize = GET_MODE_BITSIZE (mode);
     }
   if (size_tree != NULL_TREE
-      && poly_tree_p (size_tree))
+      && poly_int_tree_p (size_tree))
     bitsize = wi::to_poly_offset (size_tree);
 
   *preverse = reverse_storage_order_for_component_p (exp);
@@ -427,7 +427,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 	    tree field = TREE_OPERAND (exp, 1);
 	    tree this_offset = component_ref_field_offset (exp);
 
-	    if (this_offset && poly_tree_p (this_offset))
+	    if (this_offset && poly_int_tree_p (this_offset))
 	      {
 		poly_offset_int woffset = (wi::to_poly_offset (this_offset)
 					   << LOG2_BITS_PER_UNIT);
@@ -450,9 +450,9 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 			tree fsize = DECL_SIZE_UNIT (field);
 			tree ssize = TYPE_SIZE_UNIT (stype);
 			if (fsize == NULL
-			    || !poly_tree_p (fsize)
+			    || !poly_int_tree_p (fsize)
 			    || ssize == NULL
-			    || !poly_tree_p (ssize))
+			    || !poly_int_tree_p (ssize))
 			  maxsize = -1;
 			else
 			  {
@@ -474,7 +474,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 		   because that would get us out of the structure otherwise.  */
 		if (known_size_p (maxsize)
 		    && csize
-		    && poly_tree_p (csize))
+		    && poly_int_tree_p (csize))
 		  maxsize = wi::to_poly_offset (csize) - bit_offset;
 		else
 		  maxsize = -1;
@@ -489,9 +489,9 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 	    tree low_bound, unit_size;
 
 	    /* If the resulting bit-offset is constant, track it.  */
-	    if (poly_tree_p (index)
+	    if (poly_int_tree_p (index)
 		&& (low_bound = array_ref_low_bound (exp),
-		    poly_tree_p (low_bound))
+		    poly_int_tree_p (low_bound))
 		&& (unit_size = array_ref_element_size (exp),
 		    TREE_CODE (unit_size) == INTEGER_CST))
 	      {
@@ -516,7 +516,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 		   because that would get us outside of the array otherwise.  */
 		if (known_size_p (maxsize)
 		    && asize
-		    && poly_tree_p (asize))
+		    && poly_int_tree_p (asize))
 		  maxsize = wi::to_poly_offset (asize) - bit_offset;
 		else
 		  maxsize = -1;
@@ -564,7 +564,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 	  if (seen_variable_array_ref
 	      && known_size_p (maxsize)
 	      && (TYPE_SIZE (TREE_TYPE (exp)) == NULL_TREE
-		  || !poly_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
+		  || !poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp)))
 		  || may_eq (bit_offset + maxsize,
 			     wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp))))))
 	    maxsize = -1;
@@ -629,7 +629,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
 	  if (TREE_CODE (TREE_TYPE (exp)) == ARRAY_TYPE
 	      || (seen_variable_array_ref
 		  && (sz_tree == NULL_TREE
-		      || !poly_tree_p (sz_tree)
+		      || !poly_int_tree_p (sz_tree)
 		      || may_eq (bit_offset + maxsize,
 				 wi::to_poly_offset (sz_tree)))))
 	    maxsize = -1;
@@ -638,7 +638,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
          base decl.  */
       else if (!known_size_p (maxsize)
 	       && DECL_SIZE (exp)
-	       && poly_tree_p (DECL_SIZE (exp)))
+	       && poly_int_tree_p (DECL_SIZE (exp)))
 	maxsize = wi::to_poly_offset (DECL_SIZE (exp)) - bit_offset;
     }
   else if (CONSTANT_CLASS_P (exp))
@@ -647,7 +647,7 @@ get_ref_base_and_extent (tree exp, poly_int64 *poffset,
          base type constant.  */
       if (!known_size_p (maxsize)
 	  && TYPE_SIZE (TREE_TYPE (exp))
-	  && poly_tree_p (TYPE_SIZE (TREE_TYPE (exp))))
+	  && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (exp))))
 	maxsize = (wi::to_poly_offset (TYPE_SIZE (TREE_TYPE (exp)))
 		   - bit_offset);
     }
@@ -705,12 +705,10 @@ get_ref_base_and_extent_hwi (tree exp, HOST_WIDE_INT *poffset,
    its argument or a constant if the argument is known to be constant.  */
 
 tree
-get_addr_base_and_unit_offset_1 (tree exp, poly_int64 *poffset,
+get_addr_base_and_unit_offset_1 (tree exp, poly_int64_pod *poffset,
 				 tree (*valueize) (tree))
 {
   poly_int64 byte_offset = 0;
-  poly_int64 sub_byte_offset;
-  poly_uint64 sub_bit_offset;
 
   /* Compute cumulative byte-offset for nested component-refs and array-refs,
      and find the ultimate containing object.  */
@@ -719,10 +717,15 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64 *poffset,
       switch (TREE_CODE (exp))
 	{
 	case BIT_FIELD_REF:
-	  if (!poly_tree_p (TREE_OPERAND (exp, 2), &sub_bit_offset)
-	      || !multiple_p (sub_bit_offset, BITS_PER_UNIT, &sub_byte_offset))
-	    return NULL_TREE;
-	  byte_offset += sub_byte_offset;
+	  {
+	    poly_int64 this_byte_offset;
+	    poly_uint64 this_bit_offset;
+	    if (!poly_int_tree_p (TREE_OPERAND (exp, 2), &this_bit_offset)
+		|| !multiple_p (this_bit_offset, BITS_PER_UNIT,
+				&this_byte_offset))
+	      return NULL_TREE;
+	    byte_offset += this_byte_offset;
+	  }
 	  break;
 
 	case COMPONENT_REF:
@@ -732,7 +735,7 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64 *poffset,
 	    poly_int64 hthis_offset;
 
 	    if (!this_offset
-		|| !poly_tree_p (this_offset, &hthis_offset)
+		|| !poly_int_tree_p (this_offset, &hthis_offset)
 		|| (TREE_INT_CST_LOW (DECL_FIELD_BIT_OFFSET (field))
 		    % BITS_PER_UNIT))
 	      return NULL_TREE;
@@ -754,9 +757,9 @@ get_addr_base_and_unit_offset_1 (tree exp, poly_int64 *poffset,
 	      index = (*valueize) (index);
 
 	    /* If the resulting bit-offset is constant, track it.  */
-	    if (poly_tree_p (index)
+	    if (poly_int_tree_p (index)
 		&& (low_bound = array_ref_low_bound (exp),
-		    poly_tree_p (low_bound))
+		    poly_int_tree_p (low_bound))
 		&& (unit_size = array_ref_element_size (exp),
 		    TREE_CODE (unit_size) == INTEGER_CST))
 	      {
@@ -842,7 +845,7 @@ done:
    is not BITS_PER_UNIT-aligned.  */
 
 tree
-get_addr_base_and_unit_offset (tree exp, poly_int64 *poffset)
+get_addr_base_and_unit_offset (tree exp, poly_int64_pod *poffset)
 {
   return get_addr_base_and_unit_offset_1 (exp, poffset, NULL);
 }
diff --git a/gcc/tree-dfa.h b/gcc/tree-dfa.h
index 6491b3c3f41..ce96360ae49 100644
--- a/gcc/tree-dfa.h
+++ b/gcc/tree-dfa.h
@@ -29,13 +29,13 @@ extern void debug_dfa_stats (void);
 extern tree ssa_default_def (struct function *, tree);
 extern void set_ssa_default_def (struct function *, tree, tree);
 extern tree get_or_create_ssa_default_def (struct function *, tree);
-extern tree get_ref_base_and_extent (tree, poly_int64 *,
-				     poly_int64 *, poly_int64 *, bool *);
+extern tree get_ref_base_and_extent (tree, poly_int64_pod *, poly_int64_pod *,
+				     poly_int64_pod *, bool *);
 extern tree get_ref_base_and_extent_hwi (tree, HOST_WIDE_INT *,
 					 HOST_WIDE_INT *, bool *);
-extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64 *,
+extern tree get_addr_base_and_unit_offset_1 (tree, poly_int64_pod *,
 					     tree (*) (tree));
-extern tree get_addr_base_and_unit_offset (tree, poly_int64 *);
+extern tree get_addr_base_and_unit_offset (tree, poly_int64_pod *);
 extern bool stmt_references_abnormal_ssa_name (gimple *);
 extern void replace_abnormal_ssa_names (gimple *);
 extern void dump_enumerated_decls (FILE *, dump_flags_t);
diff --git a/gcc/tree-eh.c b/gcc/tree-eh.c
index 8741f2bf6ab..a1d35bace3a 100644
--- a/gcc/tree-eh.c
+++ b/gcc/tree-eh.c
@@ -2664,7 +2664,7 @@ tree_could_trap_p (tree expr)
 	    return may_le (TREE_STRING_LENGTH (base), off);
 	  tree size = DECL_SIZE_UNIT (base);
 	  if (size == NULL_TREE
-	      || !poly_tree_p (size)
+	      || !poly_int_tree_p (size)
 	      || may_le (wi::to_poly_offset (size), off))
 	    return true;
 	  /* Now we are sure the first byte of the access is inside
diff --git a/gcc/tree-loop-distribution.c b/gcc/tree-loop-distribution.c
index 37416a0231c..30091453e39 100644
--- a/gcc/tree-loop-distribution.c
+++ b/gcc/tree-loop-distribution.c
@@ -83,8 +83,8 @@ along with GCC; see the file COPYING3.  If not see
 	loops and recover to the original one.
 
    TODO:
-     1) We only distribute innermost loops now.  This pass should handle loop
-	nests in the future.
+     1) We only distribute innermost two-level loop nest now.  We should
+	extend it for arbitrary loop nests in the future.
      2) We only fuse partitions in SCC now.  A better fusion algorithm is
 	desired to minimize loop overhead, maximize parallelism and maximize
 	data reuse.  */
@@ -118,6 +118,11 @@ along with GCC; see the file COPYING3.  If not see
 #define MAX_DATAREFS_NUM \
 	((unsigned) PARAM_VALUE (PARAM_LOOP_MAX_DATAREFS_FOR_DATADEPS))
 
+/* Threshold controlling number of distributed partitions.  Given it may
+   be unnecessary if a memory stream cost model is invented in the future,
+   we define it as a temporary macro, rather than a parameter.  */
+#define NUM_PARTITION_THRESHOLD (4)
+
 /* Hashtable helpers.  */
 
 struct ddr_hasher : nofree_ptr_hash <struct data_dependence_relation>
@@ -588,6 +593,19 @@ enum partition_type {
     PTYPE_SEQUENTIAL
 };
 
+/* Builtin info for loop distribution.  */
+struct builtin_info
+{
+  /* data-references a kind != PKIND_NORMAL partition is about.  */
+  data_reference_p dst_dr;
+  data_reference_p src_dr;
+  /* Base address and size of memory objects operated by the builtin.  Note
+     both dest and source memory objects must have the same size.  */
+  tree dst_base;
+  tree src_base;
+  tree size;
+};
+
 /* Partition for loop distribution.  */
 struct partition
 {
@@ -595,18 +613,12 @@ struct partition
   bitmap stmts;
   /* True if the partition defines variable which is used outside of loop.  */
   bool reduction_p;
-  /* For builtin partition, true if it executes one iteration more than
-     number of loop (latch) iterations.  */
-  bool plus_one;
   enum partition_kind kind;
   enum partition_type type;
-  /* data-references a kind != PKIND_NORMAL partition is about.  */
-  data_reference_p main_dr;
-  data_reference_p secondary_dr;
-  /* Number of loop (latch) iterations.  */
-  tree niter;
   /* Data references in the partition.  */
   bitmap datarefs;
+  /* Information of builtin parition.  */
+  struct builtin_info *builtin;
 };
 
 
@@ -630,6 +642,9 @@ partition_free (partition *partition)
 {
   BITMAP_FREE (partition->stmts);
   BITMAP_FREE (partition->datarefs);
+  if (partition->builtin)
+    free (partition->builtin);
+
   free (partition);
 }
 
@@ -714,9 +729,11 @@ ssa_name_has_uses_outside_loop_p (tree def, loop_p loop)
 
   FOR_EACH_IMM_USE_FAST (use_p, imm_iter, def)
     {
-      gimple *use_stmt = USE_STMT (use_p);
-      if (!is_gimple_debug (use_stmt)
-	  && loop != loop_containing_stmt (use_stmt))
+      if (is_gimple_debug (USE_STMT (use_p)))
+	continue;
+
+      basic_block use_bb = gimple_bb (USE_STMT (use_p));
+      if (!flow_bb_inside_loop_p (loop, use_bb))
 	return true;
     }
 
@@ -887,43 +904,6 @@ generate_loops_for_partition (struct loop *loop, partition *partition,
   free (bbs);
 }
 
-/* Build the size argument for a memory operation call.  */
-
-static tree
-build_size_arg_loc (location_t loc, data_reference_p dr, tree nb_iter,
-		    bool plus_one)
-{
-  tree size = fold_convert_loc (loc, sizetype, nb_iter);
-  if (plus_one)
-    size = size_binop (PLUS_EXPR, size, size_one_node);
-  size = fold_build2_loc (loc, MULT_EXPR, sizetype, size,
-			  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
-  size = fold_convert_loc (loc, size_type_node, size);
-  return size;
-}
-
-/* Build an address argument for a memory operation call.  */
-
-static tree
-build_addr_arg_loc (location_t loc, data_reference_p dr, tree nb_bytes)
-{
-  tree addr_base;
-
-  addr_base = size_binop_loc (loc, PLUS_EXPR, DR_OFFSET (dr), DR_INIT (dr));
-  addr_base = fold_convert_loc (loc, sizetype, addr_base);
-
-  /* Test for a negative stride, iterating over every element.  */
-  if (tree_int_cst_sgn (DR_STEP (dr)) == -1)
-    {
-      addr_base = size_binop_loc (loc, MINUS_EXPR, addr_base,
-				  fold_convert_loc (loc, sizetype, nb_bytes));
-      addr_base = size_binop_loc (loc, PLUS_EXPR, addr_base,
-				  TYPE_SIZE_UNIT (TREE_TYPE (DR_REF (dr))));
-    }
-
-  return fold_build_pointer_plus_loc (loc, DR_BASE_ADDRESS (dr), addr_base);
-}
-
 /* If VAL memory representation contains the same value in all bytes,
    return that value, otherwise return -1.
    E.g. for 0x24242424 return 0x24, for IEEE double
@@ -991,27 +971,23 @@ static void
 generate_memset_builtin (struct loop *loop, partition *partition)
 {
   gimple_stmt_iterator gsi;
-  gimple *stmt, *fn_call;
   tree mem, fn, nb_bytes;
-  location_t loc;
   tree val;
-
-  stmt = DR_STMT (partition->main_dr);
-  loc = gimple_location (stmt);
+  struct builtin_info *builtin = partition->builtin;
+  gimple *fn_call;
 
   /* The new statements will be placed before LOOP.  */
   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
 
-  nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
-				 partition->plus_one);
+  nb_bytes = builtin->size;
   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
 				       false, GSI_CONTINUE_LINKING);
-  mem = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
+  mem = builtin->dst_base;
   mem = force_gimple_operand_gsi (&gsi, mem, true, NULL_TREE,
 				  false, GSI_CONTINUE_LINKING);
 
   /* This exactly matches the pattern recognition in classify_partition.  */
-  val = gimple_assign_rhs1 (stmt);
+  val = gimple_assign_rhs1 (DR_STMT (builtin->dst_dr));
   /* Handle constants like 0x15151515 and similarly
      floating point constants etc. where all bytes are the same.  */
   int bytev = const_with_all_bytes_same (val);
@@ -1047,23 +1023,19 @@ static void
 generate_memcpy_builtin (struct loop *loop, partition *partition)
 {
   gimple_stmt_iterator gsi;
-  gimple *stmt, *fn_call;
+  gimple *fn_call;
   tree dest, src, fn, nb_bytes;
-  location_t loc;
   enum built_in_function kind;
-
-  stmt = DR_STMT (partition->main_dr);
-  loc = gimple_location (stmt);
+  struct builtin_info *builtin = partition->builtin;
 
   /* The new statements will be placed before LOOP.  */
   gsi = gsi_last_bb (loop_preheader_edge (loop)->src);
 
-  nb_bytes = build_size_arg_loc (loc, partition->main_dr, partition->niter,
-				 partition->plus_one);
+  nb_bytes = builtin->size;
   nb_bytes = force_gimple_operand_gsi (&gsi, nb_bytes, true, NULL_TREE,
 				       false, GSI_CONTINUE_LINKING);
-  dest = build_addr_arg_loc (loc, partition->main_dr, nb_bytes);
-  src = build_addr_arg_loc (loc, partition->secondary_dr, nb_bytes);
+  dest = builtin->dst_base;
+  src = builtin->src_base;
   if (partition->kind == PKIND_MEMCPY
       || ! ptr_derefs_may_alias_p (dest, src))
     kind = BUILT_IN_MEMCPY;
@@ -1314,69 +1286,22 @@ build_rdg_partition_for_vertex (struct graph *rdg, int v)
   return partition;
 }
 
-/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
-   For the moment we detect memset, memcpy and memmove patterns.  Bitmap
-   STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.  */
+/* Given PARTITION of RDG, record single load/store data references for
+   builtin partition in SRC_DR/DST_DR, return false if there is no such
+   data references.  */
 
-static void
-classify_partition (loop_p loop, struct graph *rdg, partition *partition,
-		    bitmap stmt_in_all_partitions)
+static bool
+find_single_drs (struct graph *rdg, partition *partition,
+		 data_reference_p *dst_dr, data_reference_p *src_dr)
 {
-  bitmap_iterator bi;
   unsigned i;
-  tree nb_iter;
-  data_reference_p single_load, single_store;
-  bool volatiles_p = false, plus_one = false, has_reduction = false;
-
-  partition->kind = PKIND_NORMAL;
-  partition->main_dr = NULL;
-  partition->secondary_dr = NULL;
-  partition->niter = NULL_TREE;
-  partition->plus_one = false;
-
-  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
-    {
-      gimple *stmt = RDG_STMT (rdg, i);
-
-      if (gimple_has_volatile_ops (stmt))
-	volatiles_p = true;
-
-      /* If the stmt is not included by all partitions and there is uses
-	 outside of the loop, then mark the partition as reduction.  */
-      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
-	{
-	  /* Due to limitation in the transform phase we have to fuse all
-	     reduction partitions.  As a result, this could cancel valid
-	     loop distribution especially for loop that induction variable
-	     is used outside of loop.  To workaround this issue, we skip
-	     marking partition as reudction if the reduction stmt belongs
-	     to all partitions.  In such case, reduction will be computed
-	     correctly no matter how partitions are fused/distributed.  */
-	  if (!bitmap_bit_p (stmt_in_all_partitions, i))
-	    {
-	      partition->reduction_p = true;
-	      return;
-	    }
-	  has_reduction = true;
-	}
-    }
-
-  /* Perform general partition disqualification for builtins.  */
-  if (volatiles_p
-      /* Simple workaround to prevent classifying the partition as builtin
-	 if it contains any use outside of loop.  */
-      || has_reduction
-      || !flag_tree_loop_distribute_patterns)
-    return;
+  data_reference_p single_ld = NULL, single_st = NULL;
+  bitmap_iterator bi;
 
-  /* Detect memset and memcpy.  */
-  single_load = NULL;
-  single_store = NULL;
   EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
     {
       gimple *stmt = RDG_STMT (rdg, i);
       data_reference_p dr;
-      unsigned j;
 
       if (gimple_code (stmt) == GIMPLE_PHI)
 	continue;
@@ -1387,107 +1312,316 @@ classify_partition (loop_p loop, struct graph *rdg, partition *partition,
 
       /* Otherwise just regular loads/stores.  */
       if (!gimple_assign_single_p (stmt))
-	return;
+	return false;
 
       /* But exactly one store and/or load.  */
-      for (j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
+      for (unsigned j = 0; RDG_DATAREFS (rdg, i).iterate (j, &dr); ++j)
 	{
 	  tree type = TREE_TYPE (DR_REF (dr));
 
 	  /* The memset, memcpy and memmove library calls are only
 	     able to deal with generic address space.  */
 	  if (!ADDR_SPACE_GENERIC_P (TYPE_ADDR_SPACE (type)))
-	    return;
+	    return false;
 
 	  if (DR_IS_READ (dr))
 	    {
-	      if (single_load != NULL)
-		return;
-	      single_load = dr;
+	      if (single_ld != NULL)
+		return false;
+	      single_ld = dr;
 	    }
 	  else
 	    {
-	      if (single_store != NULL)
-		return;
-	      single_store = dr;
+	      if (single_st != NULL)
+		return false;
+	      single_st = dr;
 	    }
 	}
     }
 
-  if (!single_store)
-    return;
+  if (!single_st)
+    return false;
 
-  nb_iter = number_of_latch_executions (loop);
-  gcc_assert (nb_iter && nb_iter != chrec_dont_know);
-  if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src,
-		      gimple_bb (DR_STMT (single_store))))
-    plus_one = true;
+  /* Bail out if this is a bitfield memory reference.  */
+  if (TREE_CODE (DR_REF (single_st)) == COMPONENT_REF
+      && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_st), 1)))
+    return false;
 
-  if (single_store && !single_load)
-    {
-      gimple *stmt = DR_STMT (single_store);
-      tree rhs = gimple_assign_rhs1 (stmt);
-      if (const_with_all_bytes_same (rhs) == -1
-	  && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
-	      || (TYPE_MODE (TREE_TYPE (rhs))
-		  != TYPE_MODE (unsigned_char_type_node))))
-	return;
-      if (TREE_CODE (rhs) == SSA_NAME
-	  && !SSA_NAME_IS_DEFAULT_DEF (rhs)
-	  && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
-	return;
-      if (!adjacent_dr_p (single_store)
-	  || !dominated_by_p (CDI_DOMINATORS,
-			      loop->latch, gimple_bb (stmt)))
-	return;
-      partition->kind = PKIND_MEMSET;
-      partition->main_dr = single_store;
-      partition->niter = nb_iter;
-      partition->plus_one = plus_one;
-    }
-  else if (single_store && single_load)
+  /* Data reference must be executed exactly once per iteration.  */
+  basic_block bb_st = gimple_bb (DR_STMT (single_st));
+  struct loop *inner = bb_st->loop_father;
+  if (!dominated_by_p (CDI_DOMINATORS, inner->latch, bb_st))
+    return false;
+
+  if (single_ld)
     {
-      gimple *store = DR_STMT (single_store);
-      gimple *load = DR_STMT (single_load);
+      gimple *store = DR_STMT (single_st), *load = DR_STMT (single_ld);
       /* Direct aggregate copy or via an SSA name temporary.  */
       if (load != store
 	  && gimple_assign_lhs (load) != gimple_assign_rhs1 (store))
-	return;
-      if (!adjacent_dr_p (single_store)
-	  || !adjacent_dr_p (single_load)
-	  || !operand_equal_p (DR_STEP (single_store),
-			       DR_STEP (single_load), 0)
-	  || !dominated_by_p (CDI_DOMINATORS,
-			      loop->latch, gimple_bb (store)))
-	return;
-      /* Now check that if there is a dependence this dependence is
-         of a suitable form for memmove.  */
-      ddr_p ddr = get_data_dependence (rdg, single_load, single_store);
-      if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know)
-	return;
+	return false;
+
+      /* Bail out if this is a bitfield memory reference.  */
+      if (TREE_CODE (DR_REF (single_ld)) == COMPONENT_REF
+	  && DECL_BIT_FIELD (TREE_OPERAND (DR_REF (single_ld), 1)))
+	return false;
+
+      /* Load and store must be in the same loop nest.  */
+      basic_block bb_ld = gimple_bb (DR_STMT (single_ld));
+      if (inner != bb_ld->loop_father)
+	return false;
+
+      /* Data reference must be executed exactly once per iteration.  */
+      if (!dominated_by_p (CDI_DOMINATORS, inner->latch, bb_ld))
+	return false;
+
+      edge e = single_exit (inner);
+      bool dom_ld = dominated_by_p (CDI_DOMINATORS, e->src, bb_ld);
+      bool dom_st = dominated_by_p (CDI_DOMINATORS, e->src, bb_st);
+      if (dom_ld != dom_st)
+	return false;
+    }
+
+  *src_dr = single_ld;
+  *dst_dr = single_st;
+  return true;
+}
+
+/* Given data reference DR in LOOP_NEST, this function checks the enclosing
+   loops from inner to outer to see if loop's step equals to access size at
+   each level of loop.  Return true if yes; record access base and size in
+   BASE and SIZE; save loop's step at each level of loop in STEPS if it is
+   not null.  For example:
+
+     int arr[100][100][100];
+     for (i = 0; i < 100; i++)       ;steps[2] = 40000
+       for (j = 100; j > 0; j--)     ;steps[1] = -400
+	 for (k = 0; k < 100; k++)   ;steps[0] = 4
+	   arr[i][j - 1][k] = 0;     ;base = &arr, size = 4000000.  */
+
+static bool
+compute_access_range (loop_p loop_nest, data_reference_p dr, tree *base,
+		      tree *size, vec<tree> *steps = NULL)
+{
+  location_t loc = gimple_location (DR_STMT (dr));
+  basic_block bb = gimple_bb (DR_STMT (dr));
+  struct loop *loop = bb->loop_father;
+  tree ref = DR_REF (dr);
+  tree access_base = build_fold_addr_expr (ref);
+  tree access_size = TYPE_SIZE_UNIT (TREE_TYPE (ref));
+
+  do {
+      tree scev_fn = analyze_scalar_evolution (loop, access_base);
+      if (TREE_CODE (scev_fn) != POLYNOMIAL_CHREC)
+	return false;
 
-      if (DDR_ARE_DEPENDENT (ddr) != chrec_known)
+      access_base = CHREC_LEFT (scev_fn);
+      if (tree_contains_chrecs (access_base, NULL))
+	return false;
+
+      tree scev_step = CHREC_RIGHT (scev_fn);
+      /* Only support constant steps.  */
+      if (TREE_CODE (scev_step) != INTEGER_CST)
+	return false;
+
+      enum ev_direction access_dir = scev_direction (scev_fn);
+      if (access_dir == EV_DIR_UNKNOWN)
+	return false;
+
+      if (steps != NULL)
+	steps->safe_push (scev_step);
+
+      scev_step = fold_convert_loc (loc, sizetype, scev_step);
+      /* Compute absolute value of scev step.  */
+      if (access_dir == EV_DIR_DECREASES)
+	scev_step = fold_build1_loc (loc, NEGATE_EXPR, sizetype, scev_step);
+
+      /* At each level of loop, scev step must equal to access size.  In other
+	 words, DR must access consecutive memory between loop iterations.  */
+      if (!operand_equal_p (scev_step, access_size, 0))
+	return false;
+
+      /* Compute DR's execution times in loop.  */
+      tree niters = number_of_latch_executions (loop);
+      niters = fold_convert_loc (loc, sizetype, niters);
+      if (dominated_by_p (CDI_DOMINATORS, single_exit (loop)->src, bb))
+	niters = size_binop_loc (loc, PLUS_EXPR, niters, size_one_node);
+
+      /* Compute DR's overall access size in loop.  */
+      access_size = fold_build2_loc (loc, MULT_EXPR, sizetype,
+				     niters, scev_step);
+      /* Adjust base address in case of negative step.  */
+      if (access_dir == EV_DIR_DECREASES)
 	{
-	  if (DDR_NUM_DIST_VECTS (ddr) == 0)
-	    return;
+	  tree adj = fold_build2_loc (loc, MINUS_EXPR, sizetype,
+				      scev_step, access_size);
+	  access_base = fold_build_pointer_plus_loc (loc, access_base, adj);
+	}
+  } while (loop != loop_nest && (loop = loop_outer (loop)) != NULL);
+
+  *base = access_base;
+  *size = access_size;
+  return true;
+}
+
+/* Allocate and return builtin struct.  Record information like DST_DR,
+   SRC_DR, DST_BASE, SRC_BASE and SIZE in the allocated struct.  */
+
+static struct builtin_info *
+alloc_builtin (data_reference_p dst_dr, data_reference_p src_dr,
+	       tree dst_base, tree src_base, tree size)
+{
+  struct builtin_info *builtin = XNEW (struct builtin_info);
+  builtin->dst_dr = dst_dr;
+  builtin->src_dr = src_dr;
+  builtin->dst_base = dst_base;
+  builtin->src_base = src_base;
+  builtin->size = size;
+  return builtin;
+}
+
+/* Given data reference DR in loop nest LOOP, classify if it forms builtin
+   memset call.  */
+
+static void
+classify_builtin_st (loop_p loop, partition *partition, data_reference_p dr)
+{
+  gimple *stmt = DR_STMT (dr);
+  tree base, size, rhs = gimple_assign_rhs1 (stmt);
+
+  if (const_with_all_bytes_same (rhs) == -1
+      && (!INTEGRAL_TYPE_P (TREE_TYPE (rhs))
+	  || (TYPE_MODE (TREE_TYPE (rhs))
+	      != TYPE_MODE (unsigned_char_type_node))))
+    return;
+
+  if (TREE_CODE (rhs) == SSA_NAME
+      && !SSA_NAME_IS_DEFAULT_DEF (rhs)
+      && flow_bb_inside_loop_p (loop, gimple_bb (SSA_NAME_DEF_STMT (rhs))))
+    return;
+
+  if (!compute_access_range (loop, dr, &base, &size))
+    return;
+
+  partition->builtin = alloc_builtin (dr, NULL, base, NULL_TREE, size);
+  partition->kind = PKIND_MEMSET;
+}
+
+/* Given data references DST_DR and SRC_DR in loop nest LOOP and RDG, classify
+   if it forms builtin memcpy or memmove call.  */
+
+static void
+classify_builtin_ldst (loop_p loop, struct graph *rdg, partition *partition,
+		       data_reference_p dst_dr, data_reference_p src_dr)
+{
+  tree base, size, src_base, src_size;
+  auto_vec<tree> dst_steps, src_steps;
 
-	  lambda_vector dist_v;
-	  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+  /* Compute access range of both load and store.  They much have the same
+     access size.  */
+  if (!compute_access_range (loop, dst_dr, &base, &size, &dst_steps)
+      || !compute_access_range (loop, src_dr, &src_base, &src_size, &src_steps)
+      || !operand_equal_p (size, src_size, 0))
+    return;
+
+  /* Load and store in loop nest must access memory in the same way, i.e,
+     their must have the same steps in each loop of the nest.  */
+  if (dst_steps.length () != src_steps.length ())
+    return;
+  for (unsigned i = 0; i < dst_steps.length (); ++i)
+    if (!operand_equal_p (dst_steps[i], src_steps[i], 0))
+      return;
+
+  /* Now check that if there is a dependence.  */
+  ddr_p ddr = get_data_dependence (rdg, src_dr, dst_dr);
+
+  /* Classify as memcpy if no dependence between load and store.  */
+  if (DDR_ARE_DEPENDENT (ddr) == chrec_known)
+    {
+      partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
+      partition->kind = PKIND_MEMCPY;
+      return;
+    }
+
+  /* Can't do memmove in case of unknown dependence or dependence without
+     classical distance vector.  */
+  if (DDR_ARE_DEPENDENT (ddr) == chrec_dont_know
+      || DDR_NUM_DIST_VECTS (ddr) == 0)
+    return;
+
+  unsigned i;
+  lambda_vector dist_v;
+  int num_lev = (DDR_LOOP_NEST (ddr)).length ();
+  FOR_EACH_VEC_ELT (DDR_DIST_VECTS (ddr), i, dist_v)
+    {
+      unsigned dep_lev = dependence_level (dist_v, num_lev);
+      /* Can't do memmove if load depends on store.  */
+      if (dep_lev > 0 && dist_v[dep_lev - 1] > 0 && !DDR_REVERSED_P (ddr))
+	return;
+    }
+
+  partition->builtin = alloc_builtin (dst_dr, src_dr, base, src_base, size);
+  partition->kind = PKIND_MEMMOVE;
+  return;
+}
+
+/* Classifies the builtin kind we can generate for PARTITION of RDG and LOOP.
+   For the moment we detect memset, memcpy and memmove patterns.  Bitmap
+   STMT_IN_ALL_PARTITIONS contains statements belonging to all partitions.  */
+
+static void
+classify_partition (loop_p loop, struct graph *rdg, partition *partition,
+		    bitmap stmt_in_all_partitions)
+{
+  bitmap_iterator bi;
+  unsigned i;
+  data_reference_p single_ld = NULL, single_st = NULL;
+  bool volatiles_p = false, has_reduction = false;
+
+  EXECUTE_IF_SET_IN_BITMAP (partition->stmts, 0, i, bi)
+    {
+      gimple *stmt = RDG_STMT (rdg, i);
+
+      if (gimple_has_volatile_ops (stmt))
+	volatiles_p = true;
+
+      /* If the stmt is not included by all partitions and there is uses
+	 outside of the loop, then mark the partition as reduction.  */
+      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
+	{
+	  /* Due to limitation in the transform phase we have to fuse all
+	     reduction partitions.  As a result, this could cancel valid
+	     loop distribution especially for loop that induction variable
+	     is used outside of loop.  To workaround this issue, we skip
+	     marking partition as reudction if the reduction stmt belongs
+	     to all partitions.  In such case, reduction will be computed
+	     correctly no matter how partitions are fused/distributed.  */
+	  if (!bitmap_bit_p (stmt_in_all_partitions, i))
 	    {
-	      int dist = dist_v[index_in_loop_nest (loop->num,
-						    DDR_LOOP_NEST (ddr))];
-	      if (dist > 0 && !DDR_REVERSED_P (ddr))
-		return;
+	      partition->reduction_p = true;
+	      return;
 	    }
-	  partition->kind = PKIND_MEMMOVE;
+	  has_reduction = true;
 	}
-      else
-	partition->kind = PKIND_MEMCPY;
-      partition->main_dr = single_store;
-      partition->secondary_dr = single_load;
-      partition->niter = nb_iter;
-      partition->plus_one = plus_one;
     }
+
+  /* Perform general partition disqualification for builtins.  */
+  if (volatiles_p
+      /* Simple workaround to prevent classifying the partition as builtin
+	 if it contains any use outside of loop.  */
+      || has_reduction
+      || !flag_tree_loop_distribute_patterns)
+    return;
+
+  /* Find single load/store data references for builtin partition.  */
+  if (!find_single_drs (rdg, partition, &single_st, &single_ld))
+    return;
+
+  /* Classify the builtin kind.  */
+  if (single_ld == NULL)
+    classify_builtin_st (loop, partition, single_st);
+  else
+    classify_builtin_ldst (loop, rdg, partition, single_st, single_ld);
 }
 
 /* Returns true when PARTITION1 and PARTITION2 access the same memory
@@ -1946,7 +2080,8 @@ build_partition_graph (struct graph *rdg,
   return pg;
 }
 
-/* Sort partitions in PG by post order and store them in PARTITIONS.  */
+/* Sort partitions in PG in descending post order and store them in
+   PARTITIONS.  */
 
 static void
 sort_partitions_by_post_order (struct graph *pg,
@@ -1955,7 +2090,7 @@ sort_partitions_by_post_order (struct graph *pg,
   int i;
   struct pg_vdata *data;
 
-  /* Now order the remaining nodes in postorder.  */
+  /* Now order the remaining nodes in descending postorder.  */
   qsort (pg->vertices, pg->n_vertices, sizeof (vertex), pgcmp);
   partitions->truncate (0);
   for (i = 0; i < pg->n_vertices; ++i)
@@ -1967,16 +2102,18 @@ sort_partitions_by_post_order (struct graph *pg,
 }
 
 /* Given reduced dependence graph RDG merge strong connected components
-   of PARTITIONS.  In this function, data dependence caused by possible
-   alias between references is ignored, as if it doesn't exist at all.  */
+   of PARTITIONS.  If IGNORE_ALIAS_P is true, data dependence caused by
+   possible alias between references is ignored, as if it doesn't exist
+   at all; otherwise all depdendences are considered.  */
 
 static void
 merge_dep_scc_partitions (struct graph *rdg,
-			  vec<struct partition *> *partitions)
+			  vec<struct partition *> *partitions,
+			  bool ignore_alias_p)
 {
   struct partition *partition1, *partition2;
   struct pg_vdata *data;
-  graph *pg = build_partition_graph (rdg, partitions, true);
+  graph *pg = build_partition_graph (rdg, partitions, ignore_alias_p);
   int i, j, num_sccs = graphds_scc (pg, NULL);
 
   /* Strong connected compoenent means dependence cycle, we cannot distribute
@@ -2051,7 +2188,7 @@ break_alias_scc_partitions (struct graph *rdg,
 			    vec<struct partition *> *partitions,
 			    vec<ddr_p> *alias_ddrs)
 {
-  int i, j, num_sccs, num_sccs_no_alias;
+  int i, j, k, num_sccs, num_sccs_no_alias;
   /* Build partition dependence graph.  */
   graph *pg = build_partition_graph (rdg, partitions, false);
 
@@ -2068,7 +2205,7 @@ break_alias_scc_partitions (struct graph *rdg,
       auto_vec<enum partition_type> scc_types;
       struct partition *partition, *first;
 
-      /* If all paritions in a SCC has the same type, we can simply merge the
+      /* If all partitions in a SCC have the same type, we can simply merge the
 	 SCC.  This loop finds out such SCCS and record them in bitmap.  */
       bitmap_set_range (sccs_to_merge, 0, (unsigned) num_sccs);
       for (i = 0; i < num_sccs; ++i)
@@ -2081,6 +2218,10 @@ break_alias_scc_partitions (struct graph *rdg,
 	      if (pg->vertices[j].component != i)
 		continue;
 
+	      /* Note we Merge partitions of parallel type on purpose, though
+		 the result partition is sequential.  The reason is vectorizer
+		 can do more accurate runtime alias check in this case.  Also
+		 it results in more conservative distribution.  */
 	      if (first->type != partition->type)
 		{
 		  bitmap_clear_bit (sccs_to_merge, i);
@@ -2102,7 +2243,7 @@ break_alias_scc_partitions (struct graph *rdg,
       if (bitmap_count_bits (sccs_to_merge) != (unsigned) num_sccs)
 	{
 	  /* Run SCC finding algorithm again, with alias dependence edges
-	     skipped.  This is to topologically sort paritions according to
+	     skipped.  This is to topologically sort partitions according to
 	     compilation time known dependence.  Note the topological order
 	     is stored in the form of pg's post order number.  */
 	  num_sccs_no_alias = graphds_scc (pg, NULL, pg_skip_alias_edge);
@@ -2124,19 +2265,29 @@ break_alias_scc_partitions (struct graph *rdg,
 	  for (j = 0; partitions->iterate (j, &first); ++j)
 	    if (cbdata.vertices_component[j] == i)
 	      break;
-	  for (++j; partitions->iterate (j, &partition); ++j)
+	  for (k = j + 1; partitions->iterate (k, &partition); ++k)
 	    {
 	      struct pg_vdata *data;
 
-	      if (cbdata.vertices_component[j] != i)
+	      if (cbdata.vertices_component[k] != i)
 		continue;
 
+	      /* Update postorder number so that merged reduction partition is
+		 sorted after other partitions.  */
+	      if (!partition_reduction_p (first)
+		  && partition_reduction_p (partition))
+		{
+		  gcc_assert (pg->vertices[k].post < pg->vertices[j].post);
+		  pg->vertices[j].post = pg->vertices[k].post;
+		}
 	      partition_merge_into (NULL, first, partition, FUSE_SAME_SCC);
-	      (*partitions)[j] = NULL;
+	      (*partitions)[k] = NULL;
 	      partition_free (partition);
-	      data = (struct pg_vdata *)pg->vertices[j].data;
-	      gcc_assert (data->id == j);
+	      data = (struct pg_vdata *)pg->vertices[k].data;
+	      gcc_assert (data->id == k);
 	      data->partition = NULL;
+	      /* The result partition of merged SCC must be sequential.  */
+	      first->type = PTYPE_SEQUENTIAL;
 	    }
 	}
     }
@@ -2331,38 +2482,49 @@ version_for_distribution_p (vec<struct partition *> *partitions,
   return (alias_ddrs->length () > 0);
 }
 
-/* Fuse all partitions if necessary before finalizing distribution.  */
+/* Fuse PARTITIONS of LOOP if necessary before finalizing distribution.
+   ALIAS_DDRS contains ddrs which need runtime alias check.  */
 
 static void
-finalize_partitions (vec<struct partition *> *partitions,
+finalize_partitions (struct loop *loop, vec<struct partition *> *partitions,
 		     vec<ddr_p> *alias_ddrs)
 {
   unsigned i;
-  struct partition *a, *partition;
+  struct partition *partition, *a;
 
   if (partitions->length () == 1
       || alias_ddrs->length () > 0)
     return;
 
-  a = (*partitions)[0];
-  if (a->kind != PKIND_NORMAL)
-    return;
-
-  for (i = 1; partitions->iterate (i, &partition); ++i)
+  unsigned num_builtin = 0, num_normal = 0;
+  bool same_type_p = true;
+  enum partition_type type = ((*partitions)[0])->type;
+  for (i = 0; partitions->iterate (i, &partition); ++i)
     {
-      /* Don't fuse if partition has different type or it is a builtin.  */
-      if (partition->type != a->type
-	  || partition->kind != PKIND_NORMAL)
-	return;
+      same_type_p &= (type == partition->type);
+      if (partition->kind != PKIND_NORMAL)
+	num_builtin++;
+      else
+	num_normal++;
     }
 
-  /* Fuse all partitions.  */
-  for (i = 1; partitions->iterate (i, &partition); ++i)
+  /* Don't distribute current loop into too many loops given we don't have
+     memory stream cost model.  Be even more conservative in case of loop
+     nest distribution.  */
+  if ((same_type_p && num_builtin == 0)
+      || (loop->inner != NULL
+	  && i >= NUM_PARTITION_THRESHOLD && num_normal > 1)
+      || (loop->inner == NULL
+	  && i >= NUM_PARTITION_THRESHOLD && num_normal > num_builtin))
     {
-      partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
-      partition_free (partition);
+      a = (*partitions)[0];
+      for (i = 1; partitions->iterate (i, &partition); ++i)
+	{
+	  partition_merge_into (NULL, a, partition, FUSE_FINALIZE);
+	  partition_free (partition);
+	}
+      partitions->truncate (1);
     }
-  partitions->truncate (1);
 }
 
 /* Distributes the code from LOOP in such a way that producer statements
@@ -2515,16 +2677,23 @@ distribute_loop (struct loop *loop, vec<gimple *> stmts,
 	i--;
     }
 
-  /* Build the partition dependency graph.  */
+  /* Build the partition dependency graph and fuse partitions in strong
+     connected component.  */
   if (partitions.length () > 1)
     {
-      merge_dep_scc_partitions (rdg, &partitions);
-      alias_ddrs.truncate (0);
-      if (partitions.length () > 1)
-	break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
+      /* Don't support loop nest distribution under runtime alias check
+	 since it's not likely to enable many vectorization opportunities.  */
+      if (loop->inner)
+	merge_dep_scc_partitions (rdg, &partitions, false);
+      else
+	{
+	  merge_dep_scc_partitions (rdg, &partitions, true);
+	  if (partitions.length () > 1)
+	    break_alias_scc_partitions (rdg, &partitions, &alias_ddrs);
+	}
     }
 
-  finalize_partitions (&partitions, &alias_ddrs);
+  finalize_partitions (loop, &partitions, &alias_ddrs);
 
   nbp = partitions.length ();
   if (nbp == 0
@@ -2605,6 +2774,86 @@ public:
 
 }; // class pass_loop_distribution
 
+
+/* Given LOOP, this function records seed statements for distribution in
+   WORK_LIST.  Return false if there is nothing for distribution.  */
+
+static bool
+find_seed_stmts_for_distribution (struct loop *loop, vec<gimple *> *work_list)
+{
+  basic_block *bbs = get_loop_body_in_dom_order (loop);
+
+  /* Initialize the worklist with stmts we seed the partitions with.  */
+  for (unsigned i = 0; i < loop->num_nodes; ++i)
+    {
+      for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gphi *phi = gsi.phi ();
+	  if (virtual_operand_p (gimple_phi_result (phi)))
+	    continue;
+	  /* Distribute stmts which have defs that are used outside of
+	     the loop.  */
+	  if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
+	    continue;
+	  work_list->safe_push (phi);
+	}
+      for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
+	   !gsi_end_p (gsi); gsi_next (&gsi))
+	{
+	  gimple *stmt = gsi_stmt (gsi);
+
+	  /* If there is a stmt with side-effects bail out - we
+	     cannot and should not distribute this loop.  */
+	  if (gimple_has_side_effects (stmt))
+	    {
+	      free (bbs);
+	      return false;
+	    }
+
+	  /* Distribute stmts which have defs that are used outside of
+	     the loop.  */
+	  if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
+	    ;
+	  /* Otherwise only distribute stores for now.  */
+	  else if (!gimple_vdef (stmt))
+	    continue;
+
+	  work_list->safe_push (stmt);
+	}
+    }
+  free (bbs);
+  return work_list->length () > 0;
+}
+
+/* Given innermost LOOP, return the outermost enclosing loop that forms a
+   perfect loop nest.  */
+
+static struct loop *
+prepare_perfect_loop_nest (struct loop *loop)
+{
+  struct loop *outer = loop_outer (loop);
+  tree niters = number_of_latch_executions (loop);
+
+  /* TODO: We only support the innermost 2-level loop nest distribution
+     because of compilation time issue for now.  This should be relaxed
+     in the future.  */
+  while (loop->inner == NULL
+	 && loop_outer (outer)
+	 && outer->inner == loop && loop->next == NULL
+	 && single_exit (outer)
+	 && optimize_loop_for_speed_p (outer)
+	 && !chrec_contains_symbols_defined_in_loop (niters, outer->num)
+	 && (niters = number_of_latch_executions (outer)) != NULL_TREE
+	 && niters != chrec_dont_know)
+    {
+      loop = outer;
+      outer = loop_outer (loop);
+    }
+
+  return loop;
+}
+
 unsigned int
 pass_loop_distribution::execute (function *fun)
 {
@@ -2647,18 +2896,9 @@ pass_loop_distribution::execute (function *fun)
      walking to innermost loops.  */
   FOR_EACH_LOOP (loop, LI_ONLY_INNERMOST)
     {
-      auto_vec<gimple *> work_list;
-      basic_block *bbs;
-      int num = loop->num;
-      unsigned int i;
-
-      /* If the loop doesn't have a single exit we will fail anyway,
-	 so do that early.  */
-      if (!single_exit (loop))
-	continue;
-
-      /* Only optimize hot loops.  */
-      if (!optimize_loop_for_speed_p (loop))
+      /* Don't distribute multiple exit edges loop, or cold loop.  */
+      if (!single_exit (loop)
+	  || !optimize_loop_for_speed_p (loop))
 	continue;
 
       /* Don't distribute loop if niters is unknown.  */
@@ -2666,56 +2906,16 @@ pass_loop_distribution::execute (function *fun)
       if (niters == NULL_TREE || niters == chrec_dont_know)
 	continue;
 
-      /* Initialize the worklist with stmts we seed the partitions with.  */
-      bbs = get_loop_body_in_dom_order (loop);
-      for (i = 0; i < loop->num_nodes; ++i)
+      /* Get the perfect loop nest for distribution.  */
+      loop = prepare_perfect_loop_nest (loop);
+      for (; loop; loop = loop->inner)
 	{
-	  for (gphi_iterator gsi = gsi_start_phis (bbs[i]);
-	       !gsi_end_p (gsi);
-	       gsi_next (&gsi))
-	    {
-	      gphi *phi = gsi.phi ();
-	      if (virtual_operand_p (gimple_phi_result (phi)))
-		continue;
-	      /* Distribute stmts which have defs that are used outside of
-		 the loop.  */
-	      if (!stmt_has_scalar_dependences_outside_loop (loop, phi))
-		continue;
-	      work_list.safe_push (phi);
-	    }
-	  for (gimple_stmt_iterator gsi = gsi_start_bb (bbs[i]);
-	       !gsi_end_p (gsi);
-	       gsi_next (&gsi))
-	    {
-	      gimple *stmt = gsi_stmt (gsi);
-
-	      /* If there is a stmt with side-effects bail out - we
-		 cannot and should not distribute this loop.  */
-	      if (gimple_has_side_effects (stmt))
-		{
-		  work_list.truncate (0);
-		  goto out;
-		}
+	  auto_vec<gimple *> work_list;
+	  if (!find_seed_stmts_for_distribution (loop, &work_list))
+	    break;
 
-	      /* Distribute stmts which have defs that are used outside of
-		 the loop.  */
-	      if (stmt_has_scalar_dependences_outside_loop (loop, stmt))
-		;
-	      /* Otherwise only distribute stores for now.  */
-	      else if (!gimple_vdef (stmt))
-		continue;
-
-	      work_list.safe_push (stmt);
-	    }
-	}
-out:
-      free (bbs);
-
-      int nb_generated_loops = 0;
-      int nb_generated_calls = 0;
-      location_t loc = find_loop_location (loop);
-      if (work_list.length () > 0)
-	{
+	  const char *str = loop->inner ? " nest" : "";
+	  location_t loc = find_loop_location (loop);
 	  if (!cd)
 	    {
 	      calculate_dominance_info (CDI_DOMINATORS);
@@ -2723,24 +2923,29 @@ out:
 	      cd = new control_dependences ();
 	      free_dominance_info (CDI_POST_DOMINATORS);
 	    }
+
 	  bool destroy_p;
+	  int nb_generated_loops, nb_generated_calls;
 	  nb_generated_loops = distribute_loop (loop, work_list, cd,
 						&nb_generated_calls,
 						&destroy_p);
 	  if (destroy_p)
 	    loops_to_be_destroyed.safe_push (loop);
-	}
 
-      if (nb_generated_loops + nb_generated_calls > 0)
-	{
-	  changed = true;
-	  dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
-			   loc, "Loop %d distributed: split to %d loops "
-			   "and %d library calls.\n",
-			   num, nb_generated_loops, nb_generated_calls);
+	  if (nb_generated_loops + nb_generated_calls > 0)
+	    {
+	      changed = true;
+	      dump_printf_loc (MSG_OPTIMIZED_LOCATIONS,
+			       loc, "Loop%s %d distributed: split to %d loops "
+			       "and %d library calls.\n", str, loop->num,
+			       nb_generated_loops, nb_generated_calls);
+
+	      break;
+	    }
+
+	  if (dump_file && (dump_flags & TDF_DETAILS))
+	    fprintf (dump_file, "Loop%s %d not distributed.\n", str, loop->num);
 	}
-      else if (dump_file && (dump_flags & TDF_DETAILS))
-	fprintf (dump_file, "Loop %d is the same.\n", num);
     }
 
   if (cd)
diff --git a/gcc/tree-pretty-print.c b/gcc/tree-pretty-print.c
index 943c3aa4d9b..a39cebff2b0 100644
--- a/gcc/tree-pretty-print.c
+++ b/gcc/tree-pretty-print.c
@@ -249,8 +249,10 @@ dump_decl_name (pretty_printer *pp, tree node, dump_flags_t flags)
 {
   if (DECL_NAME (node))
     {
-      if ((flags & TDF_ASMNAME) && DECL_ASSEMBLER_NAME_SET_P (node))
-	pp_tree_identifier (pp, DECL_ASSEMBLER_NAME (node));
+      if ((flags & TDF_ASMNAME)
+	  && HAS_DECL_ASSEMBLER_NAME_P (node)
+	  && DECL_ASSEMBLER_NAME_SET_P (node))
+	pp_tree_identifier (pp, DECL_ASSEMBLER_NAME_RAW (node));
       /* For DECL_NAMELESS names look for embedded uids in the
 	 names and sanitize them for TDF_NOUID.  */
       else if ((flags & TDF_NOUID) && DECL_NAMELESS (node))
@@ -2845,8 +2847,7 @@ dump_generic_node (pretty_printer *pp, tree node, int spc, dump_flags_t flags,
       dump_generic_node (pp, CHREC_LEFT (node), spc, flags, false);
       pp_string (pp, ", +, ");
       dump_generic_node (pp, CHREC_RIGHT (node), spc, flags, false);
-      pp_string (pp, "}_");
-      dump_generic_node (pp, CHREC_VAR (node), spc, flags, false);
+      pp_printf (pp, "}_%u", CHREC_VARIABLE (node));
       is_stmt = false;
       break;
 
diff --git a/gcc/tree-scalar-evolution.c b/gcc/tree-scalar-evolution.c
index ae102a3c471..a863de3d1d0 100644
--- a/gcc/tree-scalar-evolution.c
+++ b/gcc/tree-scalar-evolution.c
@@ -564,22 +564,30 @@ get_scalar_evolution (basic_block instantiated_below, tree scalar)
 	nb_get_scev++;
     }
 
-  switch (TREE_CODE (scalar))
-    {
-    case SSA_NAME:
-      res = *find_var_scev_info (instantiated_below, scalar);
-      break;
+  if (VECTOR_TYPE_P (TREE_TYPE (scalar))
+      || TREE_CODE (TREE_TYPE (scalar)) == COMPLEX_TYPE)
+    /* For chrec_dont_know we keep the symbolic form.  */
+    res = scalar;
+  else
+    switch (TREE_CODE (scalar))
+      {
+      case SSA_NAME:
+        if (SSA_NAME_IS_DEFAULT_DEF (scalar))
+	  res = scalar;
+	else
+	  res = *find_var_scev_info (instantiated_below, scalar);
+	break;
 
-    case REAL_CST:
-    case FIXED_CST:
-    case INTEGER_CST:
-      res = scalar;
-      break;
+      case REAL_CST:
+      case FIXED_CST:
+      case INTEGER_CST:
+	res = scalar;
+	break;
 
-    default:
-      res = chrec_not_analyzed_yet;
-      break;
-    }
+      default:
+	res = chrec_not_analyzed_yet;
+	break;
+      }
 
   if (dump_file && (dump_flags & TDF_SCEV))
     {
@@ -1628,19 +1636,7 @@ interpret_loop_phi (struct loop *loop, gphi *loop_phi_node)
   struct loop *phi_loop = loop_containing_stmt (loop_phi_node);
   tree init_cond;
 
-  if (phi_loop != loop)
-    {
-      struct loop *subloop;
-      tree evolution_fn = analyze_scalar_evolution
-	(phi_loop, PHI_RESULT (loop_phi_node));
-
-      /* Dive one level deeper.  */
-      subloop = superloop_at_depth (phi_loop, loop_depth (loop) + 1);
-
-      /* Interpret the subloop.  */
-      res = compute_overall_effect_of_inner_loop (subloop, evolution_fn);
-      return res;
-    }
+  gcc_assert (phi_loop == loop);
 
   /* Otherwise really interpret the loop phi.  */
   init_cond = analyze_initial_condition (loop_phi_node);
@@ -2014,54 +2010,24 @@ interpret_gimple_assign (struct loop *loop, gimple *stmt)
    - instantiate_parameters.
 */
 
-/* Compute and return the evolution function in WRTO_LOOP, the nearest
-   common ancestor of DEF_LOOP and USE_LOOP.  */
-
-static tree
-compute_scalar_evolution_in_loop (struct loop *wrto_loop,
-				  struct loop *def_loop,
-				  tree ev)
-{
-  bool val;
-  tree res;
-
-  if (def_loop == wrto_loop)
-    return ev;
-
-  def_loop = superloop_at_depth (def_loop, loop_depth (wrto_loop) + 1);
-  res = compute_overall_effect_of_inner_loop (def_loop, ev);
-
-  if (no_evolution_in_loop_p (res, wrto_loop->num, &val) && val)
-    return res;
-
-  return analyze_scalar_evolution_1 (wrto_loop, res);
-}
-
 /* Helper recursive function.  */
 
 static tree
 analyze_scalar_evolution_1 (struct loop *loop, tree var)
 {
-  tree type = TREE_TYPE (var);
   gimple *def;
   basic_block bb;
   struct loop *def_loop;
   tree res;
 
-  if (loop == NULL
-      || TREE_CODE (type) == VECTOR_TYPE
-      || TREE_CODE (type) == COMPLEX_TYPE)
-    return chrec_dont_know;
-
   if (TREE_CODE (var) != SSA_NAME)
     return interpret_expr (loop, NULL, var);
 
   def = SSA_NAME_DEF_STMT (var);
   bb = gimple_bb (def);
-  def_loop = bb ? bb->loop_father : NULL;
+  def_loop = bb->loop_father;
 
-  if (bb == NULL
-      || !flow_bb_inside_loop_p (loop, bb))
+  if (!flow_bb_inside_loop_p (loop, bb))
     {
       /* Keep symbolic form, but look through obvious copies for constants.  */
       res = follow_copies_to_constant (var);
@@ -2071,8 +2037,11 @@ analyze_scalar_evolution_1 (struct loop *loop, tree var)
   if (loop != def_loop)
     {
       res = analyze_scalar_evolution_1 (def_loop, var);
-      res = compute_scalar_evolution_in_loop (loop, def_loop, res);
-
+      struct loop *loop_to_skip = superloop_at_depth (def_loop,
+						      loop_depth (loop) + 1);
+      res = compute_overall_effect_of_inner_loop (loop_to_skip, res);
+      if (chrec_contains_symbols_defined_in_loop (res, loop->num))
+	res = analyze_scalar_evolution_1 (loop, res);
       goto set_and_end;
     }
 
@@ -2124,6 +2093,10 @@ analyze_scalar_evolution (struct loop *loop, tree var)
 {
   tree res;
 
+  /* ???  Fix callers.  */
+  if (! loop)
+    return var;
+
   if (dump_file && (dump_flags & TDF_SCEV))
     {
       fprintf (dump_file, "(analyze_scalar_evolution \n");
@@ -2300,7 +2273,7 @@ eq_idx_scev_info (const void *e1, const void *e2)
 
 static unsigned
 get_instantiated_value_entry (instantiate_cache_type &cache,
-			      tree name, basic_block instantiate_below)
+			      tree name, edge instantiate_below)
 {
   if (!cache.map)
     {
@@ -2310,7 +2283,7 @@ get_instantiated_value_entry (instantiate_cache_type &cache,
 
   scev_info_str e;
   e.name_version = SSA_NAME_VERSION (name);
-  e.instantiated_below = instantiate_below->index;
+  e.instantiated_below = instantiate_below->dest->index;
   void **slot = htab_find_slot_with_hash (cache.map, &e,
 					  scev_info_hasher::hash (&e), INSERT);
   if (!*slot)
@@ -2354,7 +2327,7 @@ loop_closed_phi_def (tree var)
   return NULL_TREE;
 }
 
-static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
+static tree instantiate_scev_r (edge, struct loop *, struct loop *,
 				tree, bool *, int);
 
 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
@@ -2373,7 +2346,7 @@ static tree instantiate_scev_r (basic_block, struct loop *, struct loop *,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_name (basic_block instantiate_below,
+instantiate_scev_name (edge instantiate_below,
 		       struct loop *evolution_loop, struct loop *inner_loop,
 		       tree chrec,
 		       bool *fold_conversions,
@@ -2387,7 +2360,7 @@ instantiate_scev_name (basic_block instantiate_below,
      evolutions in outer loops), nothing to do.  */
   if (!def_bb
       || loop_depth (def_bb->loop_father) == 0
-      || dominated_by_p (CDI_DOMINATORS, instantiate_below, def_bb))
+      || ! dominated_by_p (CDI_DOMINATORS, def_bb, instantiate_below->dest))
     return chrec;
 
   /* We cache the value of instantiated variable to avoid exponential
@@ -2409,6 +2382,51 @@ instantiate_scev_name (basic_block instantiate_below,
 
   def_loop = find_common_loop (evolution_loop, def_bb->loop_father);
 
+  if (! dominated_by_p (CDI_DOMINATORS,
+			def_loop->header, instantiate_below->dest))
+    {
+      gimple *def = SSA_NAME_DEF_STMT (chrec);
+      if (gassign *ass = dyn_cast <gassign *> (def))
+	{
+	  switch (gimple_assign_rhs_class (ass))
+	    {
+	    case GIMPLE_UNARY_RHS:
+	      {
+		tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
+					       inner_loop, gimple_assign_rhs1 (ass),
+					       fold_conversions, size_expr);
+		if (op0 == chrec_dont_know)
+		  return chrec_dont_know;
+		res = fold_build1 (gimple_assign_rhs_code (ass),
+				   TREE_TYPE (chrec), op0);
+		break;
+	      }
+	    case GIMPLE_BINARY_RHS:
+	      {
+		tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
+					       inner_loop, gimple_assign_rhs1 (ass),
+					       fold_conversions, size_expr);
+		if (op0 == chrec_dont_know)
+		  return chrec_dont_know;
+		tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
+					       inner_loop, gimple_assign_rhs2 (ass),
+					       fold_conversions, size_expr);
+		if (op1 == chrec_dont_know)
+		  return chrec_dont_know;
+		res = fold_build2 (gimple_assign_rhs_code (ass),
+				   TREE_TYPE (chrec), op0, op1);
+		break;
+	      }
+	    default:
+	      res = chrec_dont_know;
+	    }
+	}
+      else
+	res = chrec_dont_know;
+      global_cache->set (si, res);
+      return res;
+    }
+
   /* If the analysis yields a parametric chrec, instantiate the
      result again.  */
   res = analyze_scalar_evolution (def_loop, chrec);
@@ -2440,8 +2458,9 @@ instantiate_scev_name (basic_block instantiate_below,
 				    inner_loop, res,
 				    fold_conversions, size_expr);
 	}
-      else if (!dominated_by_p (CDI_DOMINATORS, instantiate_below,
-				gimple_bb (SSA_NAME_DEF_STMT (res))))
+      else if (dominated_by_p (CDI_DOMINATORS,
+				gimple_bb (SSA_NAME_DEF_STMT (res)),
+				instantiate_below->dest))
 	res = chrec_dont_know;
     }
 
@@ -2479,7 +2498,7 @@ instantiate_scev_name (basic_block instantiate_below,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_poly (basic_block instantiate_below,
+instantiate_scev_poly (edge instantiate_below,
 		       struct loop *evolution_loop, struct loop *,
 		       tree chrec, bool *fold_conversions, int size_expr)
 {
@@ -2524,7 +2543,7 @@ instantiate_scev_poly (basic_block instantiate_below,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_binary (basic_block instantiate_below,
+instantiate_scev_binary (edge instantiate_below,
 			 struct loop *evolution_loop, struct loop *inner_loop,
 			 tree chrec, enum tree_code code,
 			 tree type, tree c0, tree c1,
@@ -2570,43 +2589,6 @@ instantiate_scev_binary (basic_block instantiate_below,
 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
    and EVOLUTION_LOOP, that were left under a symbolic form.
 
-   "CHREC" is an array reference to be instantiated.
-
-   CACHE is the cache of already instantiated values.
-
-   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
-   conversions that may wrap in signed/pointer type are folded, as long
-   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
-   then we don't do such fold.
-
-   SIZE_EXPR is used for computing the size of the expression to be
-   instantiated, and to stop if it exceeds some limit.  */
-
-static tree
-instantiate_array_ref (basic_block instantiate_below,
-		       struct loop *evolution_loop, struct loop *inner_loop,
-		       tree chrec, bool *fold_conversions, int size_expr)
-{
-  tree res;
-  tree index = TREE_OPERAND (chrec, 1);
-  tree op1 = instantiate_scev_r (instantiate_below, evolution_loop,
-				 inner_loop, index,
-				 fold_conversions, size_expr);
-
-  if (op1 == chrec_dont_know)
-    return chrec_dont_know;
-
-  if (chrec && op1 == index)
-    return chrec;
-
-  res = unshare_expr (chrec);
-  TREE_OPERAND (res, 1) = op1;
-  return res;
-}
-
-/* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
-   and EVOLUTION_LOOP, that were left under a symbolic form.
-
    "CHREC" that stands for a convert expression "(TYPE) OP" is to be
    instantiated.
 
@@ -2621,7 +2603,7 @@ instantiate_array_ref (basic_block instantiate_below,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_convert (basic_block instantiate_below,
+instantiate_scev_convert (edge instantiate_below,
 			  struct loop *evolution_loop, struct loop *inner_loop,
 			  tree chrec, tree type, tree op,
 			  bool *fold_conversions, int size_expr)
@@ -2672,7 +2654,7 @@ instantiate_scev_convert (basic_block instantiate_below,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_not (basic_block instantiate_below,
+instantiate_scev_not (edge instantiate_below,
 		      struct loop *evolution_loop, struct loop *inner_loop,
 		      tree chrec,
 		      enum tree_code code, tree type, tree op,
@@ -2710,130 +2692,6 @@ instantiate_scev_not (basic_block instantiate_below,
 /* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
    and EVOLUTION_LOOP, that were left under a symbolic form.
 
-   CHREC is an expression with 3 operands to be instantiated.
-
-   CACHE is the cache of already instantiated values.
-
-   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
-   conversions that may wrap in signed/pointer type are folded, as long
-   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
-   then we don't do such fold.
-
-   SIZE_EXPR is used for computing the size of the expression to be
-   instantiated, and to stop if it exceeds some limit.  */
-
-static tree
-instantiate_scev_3 (basic_block instantiate_below,
-		    struct loop *evolution_loop, struct loop *inner_loop,
-		    tree chrec,
-		    bool *fold_conversions, int size_expr)
-{
-  tree op1, op2;
-  tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
-				 inner_loop, TREE_OPERAND (chrec, 0),
-				 fold_conversions, size_expr);
-  if (op0 == chrec_dont_know)
-    return chrec_dont_know;
-
-  op1 = instantiate_scev_r (instantiate_below, evolution_loop,
-			    inner_loop, TREE_OPERAND (chrec, 1),
-			    fold_conversions, size_expr);
-  if (op1 == chrec_dont_know)
-    return chrec_dont_know;
-
-  op2 = instantiate_scev_r (instantiate_below, evolution_loop,
-			    inner_loop, TREE_OPERAND (chrec, 2),
-			    fold_conversions, size_expr);
-  if (op2 == chrec_dont_know)
-    return chrec_dont_know;
-
-  if (op0 == TREE_OPERAND (chrec, 0)
-      && op1 == TREE_OPERAND (chrec, 1)
-      && op2 == TREE_OPERAND (chrec, 2))
-    return chrec;
-
-  return fold_build3 (TREE_CODE (chrec),
-		      TREE_TYPE (chrec), op0, op1, op2);
-}
-
-/* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
-   and EVOLUTION_LOOP, that were left under a symbolic form.
-
-   CHREC is an expression with 2 operands to be instantiated.
-
-   CACHE is the cache of already instantiated values.
-
-   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
-   conversions that may wrap in signed/pointer type are folded, as long
-   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
-   then we don't do such fold.
-
-   SIZE_EXPR is used for computing the size of the expression to be
-   instantiated, and to stop if it exceeds some limit.  */
-
-static tree
-instantiate_scev_2 (basic_block instantiate_below,
-		    struct loop *evolution_loop, struct loop *inner_loop,
-		    tree chrec,
-		    bool *fold_conversions, int size_expr)
-{
-  tree op1;
-  tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
-				 inner_loop, TREE_OPERAND (chrec, 0),
-				 fold_conversions, size_expr);
-  if (op0 == chrec_dont_know)
-    return chrec_dont_know;
-
-  op1 = instantiate_scev_r (instantiate_below, evolution_loop,
-			    inner_loop, TREE_OPERAND (chrec, 1),
-			    fold_conversions, size_expr);
-  if (op1 == chrec_dont_know)
-    return chrec_dont_know;
-
-  if (op0 == TREE_OPERAND (chrec, 0)
-      && op1 == TREE_OPERAND (chrec, 1))
-    return chrec;
-
-  return fold_build2 (TREE_CODE (chrec), TREE_TYPE (chrec), op0, op1);
-}
-
-/* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
-   and EVOLUTION_LOOP, that were left under a symbolic form.
-
-   CHREC is an expression with 2 operands to be instantiated.
-
-   CACHE is the cache of already instantiated values.
-
-   Variable pointed by FOLD_CONVERSIONS is set to TRUE when the
-   conversions that may wrap in signed/pointer type are folded, as long
-   as the value of the chrec is preserved.  If FOLD_CONVERSIONS is NULL
-   then we don't do such fold.
-
-   SIZE_EXPR is used for computing the size of the expression to be
-   instantiated, and to stop if it exceeds some limit.  */
-
-static tree
-instantiate_scev_1 (basic_block instantiate_below,
-		    struct loop *evolution_loop, struct loop *inner_loop,
-		    tree chrec,
-		    bool *fold_conversions, int size_expr)
-{
-  tree op0 = instantiate_scev_r (instantiate_below, evolution_loop,
-				 inner_loop, TREE_OPERAND (chrec, 0),
-				 fold_conversions, size_expr);
-
-  if (op0 == chrec_dont_know)
-    return chrec_dont_know;
-
-  if (op0 == TREE_OPERAND (chrec, 0))
-    return chrec;
-
-  return fold_build1 (TREE_CODE (chrec), TREE_TYPE (chrec), op0);
-}
-
-/* Analyze all the parameters of the chrec, between INSTANTIATE_BELOW
-   and EVOLUTION_LOOP, that were left under a symbolic form.
-
    CHREC is the scalar evolution to instantiate.
 
    CACHE is the cache of already instantiated values.
@@ -2847,7 +2705,7 @@ instantiate_scev_1 (basic_block instantiate_below,
    instantiated, and to stop if it exceeds some limit.  */
 
 static tree
-instantiate_scev_r (basic_block instantiate_below,
+instantiate_scev_r (edge instantiate_below,
 		    struct loop *evolution_loop, struct loop *inner_loop,
 		    tree chrec,
 		    bool *fold_conversions, int size_expr)
@@ -2899,50 +2757,20 @@ instantiate_scev_r (basic_block instantiate_below,
 				   fold_conversions, size_expr);
 
     case ADDR_EXPR:
+      if (is_gimple_min_invariant (chrec))
+	return chrec;
+      /* Fallthru.  */
     case SCEV_NOT_KNOWN:
       return chrec_dont_know;
 
     case SCEV_KNOWN:
       return chrec_known;
 
-    case ARRAY_REF:
-      return instantiate_array_ref (instantiate_below, evolution_loop,
-				    inner_loop, chrec,
-				    fold_conversions, size_expr);
-
-    default:
-      break;
-    }
-
-  if (VL_EXP_CLASS_P (chrec))
-    return chrec_dont_know;
-
-  switch (TREE_CODE_LENGTH (TREE_CODE (chrec)))
-    {
-    case 3:
-      return instantiate_scev_3 (instantiate_below, evolution_loop,
-				 inner_loop, chrec,
-				 fold_conversions, size_expr);
-
-    case 2:
-      return instantiate_scev_2 (instantiate_below, evolution_loop,
-				 inner_loop, chrec,
-				 fold_conversions, size_expr);
-
-    case 1:
-      return instantiate_scev_1 (instantiate_below, evolution_loop,
-				 inner_loop, chrec,
-				 fold_conversions, size_expr);
-
-    case 0:
-      return chrec;
-
     default:
-      break;
+      if (CONSTANT_CLASS_P (chrec))
+	return chrec;
+      return chrec_dont_know;
     }
-
-  /* Too complicated to handle.  */
-  return chrec_dont_know;
 }
 
 /* Analyze all the parameters of the chrec that were left under a
@@ -2952,7 +2780,7 @@ instantiate_scev_r (basic_block instantiate_below,
    a function parameter.  */
 
 tree
-instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
+instantiate_scev (edge instantiate_below, struct loop *evolution_loop,
 		  tree chrec)
 {
   tree res;
@@ -2960,8 +2788,10 @@ instantiate_scev (basic_block instantiate_below, struct loop *evolution_loop,
   if (dump_file && (dump_flags & TDF_SCEV))
     {
       fprintf (dump_file, "(instantiate_scev \n");
-      fprintf (dump_file, "  (instantiate_below = %d)\n", instantiate_below->index);
-      fprintf (dump_file, "  (evolution_loop = %d)\n", evolution_loop->num);
+      fprintf (dump_file, "  (instantiate_below = %d -> %d)\n",
+	       instantiate_below->src->index, instantiate_below->dest->index);
+      if (evolution_loop)
+	fprintf (dump_file, "  (evolution_loop = %d)\n", evolution_loop->num);
       fprintf (dump_file, "  (chrec = ");
       print_generic_expr (dump_file, chrec);
       fprintf (dump_file, ")\n");
@@ -3009,7 +2839,7 @@ resolve_mixers (struct loop *loop, tree chrec, bool *folded_casts)
       destr = true;
     }
 
-  tree ret = instantiate_scev_r (block_before_loop (loop), loop, NULL,
+  tree ret = instantiate_scev_r (loop_preheader_edge (loop), loop, NULL,
 				 chrec, &fold_conversions, 0);
 
   if (folded_casts && !*folded_casts)
diff --git a/gcc/tree-scalar-evolution.h b/gcc/tree-scalar-evolution.h
index c3980d0fbb2..55b8ca49fae 100644
--- a/gcc/tree-scalar-evolution.h
+++ b/gcc/tree-scalar-evolution.h
@@ -30,7 +30,7 @@ extern void scev_reset (void);
 extern void scev_reset_htab (void);
 extern void scev_finalize (void);
 extern tree analyze_scalar_evolution (struct loop *, tree);
-extern tree instantiate_scev (basic_block, struct loop *, tree);
+extern tree instantiate_scev (edge, struct loop *, tree);
 extern tree resolve_mixers (struct loop *, tree, bool *);
 extern void gather_stats_on_scev_database (void);
 extern void final_value_replacement_loop (struct loop *);
@@ -60,7 +60,7 @@ block_before_loop (loop_p loop)
 static inline tree
 instantiate_parameters (struct loop *loop, tree chrec)
 {
-  return instantiate_scev (block_before_loop (loop), loop, chrec);
+  return instantiate_scev (loop_preheader_edge (loop), loop, chrec);
 }
 
 /* Returns the loop of the polynomial chrec CHREC.  */
diff --git a/gcc/tree-ssa-address.c b/gcc/tree-ssa-address.c
index 1f3febb0c6f..d732c5f6b0c 100644
--- a/gcc/tree-ssa-address.c
+++ b/gcc/tree-ssa-address.c
@@ -201,7 +201,7 @@ addr_for_mem_ref (struct mem_address *addr, addr_space_t as,
   else
     st = NULL_RTX;
 
-  if (addr->offset && maybe_nonzero (wi::to_poly_wide (addr->offset)))
+  if (addr->offset && !integer_zerop (addr->offset))
     {
       poly_offset_int dc
 	= poly_offset_int::from (wi::to_poly_wide (addr->offset), SIGNED);
@@ -1029,8 +1029,8 @@ copy_ref_info (tree new_ref, tree old_ref)
 			   && (TREE_INT_CST_LOW (TMR_STEP (new_ref))
 			       < align)))))
 	    {
-	      poly_int64 inc = (mem_ref_offset (old_ref)
-				- mem_ref_offset (new_ref)).force_shwi ();
+	      poly_uint64 inc = (mem_ref_offset (old_ref)
+				 - mem_ref_offset (new_ref)).force_uhwi ();
 	      adjust_ptr_info_misalignment (new_pi, inc);
 	    }
 	  else
diff --git a/gcc/tree-ssa-alias.c b/gcc/tree-ssa-alias.c
index a12f95daede..f378993f453 100644
--- a/gcc/tree-ssa-alias.c
+++ b/gcc/tree-ssa-alias.c
@@ -683,8 +683,7 @@ ao_ref_alias_set (ao_ref *ref)
 void
 ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size)
 {
-  poly_int64 t;
-  HOST_WIDE_INT size_hwi, extra_offset = 0;
+  poly_int64 t, size_hwi, extra_offset = 0;
   ref->ref = NULL_TREE;
   if (TREE_CODE (ptr) == SSA_NAME)
     {
@@ -694,11 +693,10 @@ ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size)
 	ptr = gimple_assign_rhs1 (stmt);
       else if (is_gimple_assign (stmt)
 	       && gimple_assign_rhs_code (stmt) == POINTER_PLUS_EXPR
-	       && TREE_CODE (gimple_assign_rhs2 (stmt)) == INTEGER_CST)
+	       && ptrdiff_tree_p (gimple_assign_rhs2 (stmt), &extra_offset))
 	{
 	  ptr = gimple_assign_rhs1 (stmt);
-	  extra_offset = BITS_PER_UNIT
-			 * int_cst_value (gimple_assign_rhs2 (stmt));
+	  extra_offset *= BITS_PER_UNIT;
 	}
     }
 
@@ -722,8 +720,8 @@ ao_ref_init_from_ptr_and_size (ao_ref *ref, tree ptr, tree size)
     }
   ref->offset += extra_offset;
   if (size
-      && tree_fits_shwi_p (size)
-      && (size_hwi = tree_to_shwi (size)) <= HOST_WIDE_INT_MAX / BITS_PER_UNIT)
+      && poly_int_tree_p (size, &size_hwi)
+      && coeffs_in_range_p (size_hwi, 0, HOST_WIDE_INT_MAX / BITS_PER_UNIT))
     ref->max_size = ref->size = size_hwi * BITS_PER_UNIT;
   else
     ref->max_size = ref->size = -1;
@@ -1139,16 +1137,12 @@ indirect_ref_may_alias_decl_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
 {
   tree ptr1;
   tree ptrtype1, dbase2;
-  poly_int64 doffset1, doffset2;
 
   gcc_checking_assert ((TREE_CODE (base1) == MEM_REF
 			|| TREE_CODE (base1) == TARGET_MEM_REF)
 		       && DECL_P (base2));
 
   ptr1 = TREE_OPERAND (base1, 0);
-
-  /* The offset embedded in MEM_REFs can be negative.  Bias them
-     so that the resulting offset adjustment is positive.  */
   poly_offset_int moff = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT;
 
   /* If only one reference is based on a variable, they cannot alias if
@@ -1158,8 +1152,7 @@ indirect_ref_may_alias_decl_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
      ???  IVOPTs creates bases that do not honor this restriction,
      so do not apply this optimization for TARGET_MEM_REFs.  */
   if (TREE_CODE (base1) != TARGET_MEM_REF
-      && !ranges_may_overlap_p (offset1 + moff.force_shwi (), -1,
-				offset2, max_size2))
+      && !ranges_may_overlap_p (offset1 + moff, -1, offset2, max_size2))
     return false;
   /* They also cannot alias if the pointer may not point to the decl.  */
   if (!ptr_deref_may_alias_decl_p (ptr1, base2))
@@ -1194,14 +1187,15 @@ indirect_ref_may_alias_decl_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
      is bigger than the size of the decl we can't possibly access the
      decl via that pointer.  */
   if (DECL_SIZE (base2) && COMPLETE_TYPE_P (TREE_TYPE (ptrtype1))
-      && TREE_CODE (DECL_SIZE (base2)) == INTEGER_CST
-      && TREE_CODE (TYPE_SIZE (TREE_TYPE (ptrtype1))) == INTEGER_CST
+      && poly_int_tree_p (DECL_SIZE (base2))
+      && poly_int_tree_p (TYPE_SIZE (TREE_TYPE (ptrtype1)))
       /* ???  This in turn may run afoul when a decl of type T which is
 	 a member of union type U is accessed through a pointer to
 	 type U and sizeof T is smaller than sizeof U.  */
       && TREE_CODE (TREE_TYPE (ptrtype1)) != UNION_TYPE
       && TREE_CODE (TREE_TYPE (ptrtype1)) != QUAL_UNION_TYPE
-      && tree_int_cst_lt (DECL_SIZE (base2), TYPE_SIZE (TREE_TYPE (ptrtype1))))
+      && must_lt (wi::to_poly_widest (DECL_SIZE (base2)),
+		  wi::to_poly_widest (TYPE_SIZE (TREE_TYPE (ptrtype1)))))
     return false;
 
   if (!ref2)
@@ -1212,14 +1206,11 @@ indirect_ref_may_alias_decl_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
   dbase2 = ref2;
   while (handled_component_p (dbase2))
     dbase2 = TREE_OPERAND (dbase2, 0);
-  doffset1 = offset1;
-  doffset2 = offset2;
+  poly_int64 doffset1 = offset1;
+  poly_offset_int doffset2 = offset2;
   if (TREE_CODE (dbase2) == MEM_REF
       || TREE_CODE (dbase2) == TARGET_MEM_REF)
-    {
-      poly_offset_int moff = mem_ref_offset (dbase2) << LOG2_BITS_PER_UNIT;
-      doffset2 += moff.force_shwi ();
-    }
+    doffset2 -= mem_ref_offset (dbase2) << LOG2_BITS_PER_UNIT;
 
   /* If either reference is view-converted, give up now.  */
   if (same_type_for_tbaa (TREE_TYPE (base1), TREE_TYPE (ptrtype1)) != 1
@@ -1309,10 +1300,9 @@ indirect_refs_may_alias_p (tree ref1 ATTRIBUTE_UNUSED, tree base1,
 					  TMR_INDEX2 (base2), 0))))))
     {
       poly_offset_int moff1 = mem_ref_offset (base1) << LOG2_BITS_PER_UNIT;
-      offset1 += moff1.force_shwi ();
       poly_offset_int moff2 = mem_ref_offset (base2) << LOG2_BITS_PER_UNIT;
-      offset2 += moff2.force_shwi ();
-      return ranges_may_overlap_p (offset1, max_size1, offset2, max_size2);
+      return ranges_may_overlap_p (offset1 + moff1, max_size1,
+				   offset2 + moff2, max_size2);
     }
   if (!ptr_derefs_may_alias_p (ptr1, ptr2))
     return false;
@@ -2373,8 +2363,8 @@ same_addr_size_stores_p (tree base1, poly_int64 offset1, poly_int64 size1,
   /* Check that the object size is the same as the store size.  That ensures us
      that ptr points to the start of obj.  */
   return (DECL_SIZE (obj)
-	  && poly_tree_p (DECL_SIZE (obj))
-	  && must_eq (wi::to_poly_wide (DECL_SIZE (obj)), size1));
+	  && poly_int_tree_p (DECL_SIZE (obj))
+	  && must_eq (wi::to_poly_offset (DECL_SIZE (obj)), size1));
 }
 
 /* If STMT kills the memory reference REF return true, otherwise
@@ -2535,7 +2525,7 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
 		return false;
 	      tree dest = gimple_call_arg (stmt, 0);
 	      tree len = gimple_call_arg (stmt, 2);
-	      if (!tree_fits_shwi_p (len))
+	      if (!poly_int_tree_p (len))
 		return false;
 	      tree rbase = ref->base;
 	      poly_offset_int roffset = ref->offset;
@@ -2557,7 +2547,7 @@ stmt_kills_ref_p (gimple *stmt, ao_ref *ref)
 		}
 	      if (base == rbase
 		  && known_subrange_p (roffset, ref->max_size, offset,
-				       wi::to_offset (len)
+				       wi::to_poly_offset (len)
 				       << LOG2_BITS_PER_UNIT))
 		return true;
 	      break;
diff --git a/gcc/tree-ssa-alias.h b/gcc/tree-ssa-alias.h
index 8b3307ec1c8..b6b23c91626 100644
--- a/gcc/tree-ssa-alias.h
+++ b/gcc/tree-ssa-alias.h
@@ -181,6 +181,8 @@ ranges_overlap_p (HOST_WIDE_INT pos1,
 		  HOST_WIDE_INT pos2,
 		  unsigned HOST_WIDE_INT size2)
 {
+  if (size1 == 0 || size2 == 0)
+    return false;
   if (pos1 >= pos2
       && (size2 == (unsigned HOST_WIDE_INT)-1
 	  || pos1 < (pos2 + (HOST_WIDE_INT) size2)))
diff --git a/gcc/tree-ssa-ccp.c b/gcc/tree-ssa-ccp.c
index 3f7b2a0f92d..7b075102df2 100644
--- a/gcc/tree-ssa-ccp.c
+++ b/gcc/tree-ssa-ccp.c
@@ -3035,9 +3035,9 @@ optimize_memcpy (gimple_stmt_iterator *gsip, tree dest, tree src, tree len)
 	    ? DECL_SIZE_UNIT (TREE_OPERAND (src2, 1))
 	    : TYPE_SIZE_UNIT (TREE_TYPE (src2)));
   if (len == NULL_TREE
-      || !poly_tree_p (len)
+      || !poly_int_tree_p (len)
       || len2 == NULL_TREE
-      || !poly_tree_p (len2))
+      || !poly_int_tree_p (len2))
     return;
 
   src = get_addr_base_and_unit_offset (src, &offset);
diff --git a/gcc/tree-ssa-dse.c b/gcc/tree-ssa-dse.c
index 733edd8ce4f..2aeb1f53410 100644
--- a/gcc/tree-ssa-dse.c
+++ b/gcc/tree-ssa-dse.c
@@ -129,8 +129,9 @@ valid_ao_ref_for_dse (ao_ref *ref)
 {
   return (ao_ref_base (ref)
 	  && known_size_p (ref->max_size)
-	  && known_nonzero (ref->size)
+	  && maybe_nonzero (ref->size)
 	  && must_eq (ref->max_size, ref->size)
+	  && must_ge (ref->offset, 0)
 	  && multiple_p (ref->offset, BITS_PER_UNIT)
 	  && multiple_p (ref->size, BITS_PER_UNIT));
 }
@@ -151,16 +152,17 @@ normalize_ref (ao_ref *copy, ao_ref *ref)
   if (may_lt (copy->offset, ref->offset))
     {
       poly_int64 diff = ref->offset - copy->offset;
-      if (may_lt (copy->size, diff))
+      if (may_le (copy->size, diff))
 	return false;
       copy->size -= diff;
       copy->offset = ref->offset;
     }
 
   poly_int64 diff = copy->offset - ref->offset;
-  if (may_lt (ref->size, diff))
+  if (may_le (ref->size, diff))
     return false;
 
+  /* If COPY extends beyond REF, chop off its size appropriately.  */
   poly_int64 limit = ref->size - diff;
   if (!ordered_p (limit, copy->size))
     return false;
@@ -508,7 +510,7 @@ live_bytes_read (ao_ref use_ref, ao_ref *ref, sbitmap live)
 
       /* Now check if any of the remaining bits in use_ref are set in LIVE.  */
       return bitmap_bit_in_range_p (live, start / BITS_PER_UNIT,
-				    (start + size) / BITS_PER_UNIT);
+				    (start + size - 1) / BITS_PER_UNIT);
     }
   return true;
 }
diff --git a/gcc/tree-ssa-loop-ivopts.c b/gcc/tree-ssa-loop-ivopts.c
index 33bfc66f9b0..fd23eba8158 100644
--- a/gcc/tree-ssa-loop-ivopts.c
+++ b/gcc/tree-ssa-loop-ivopts.c
@@ -3054,7 +3054,7 @@ get_loop_invariant_expr (struct ivopts_data *data, tree inv_expr)
 {
   STRIP_NOPS (inv_expr);
 
-  if (poly_tree_p (inv_expr)
+  if (poly_int_tree_p (inv_expr)
       || TREE_CODE (inv_expr) == SSA_NAME)
     return NULL;
 
@@ -3152,7 +3152,7 @@ add_candidate_1 (struct ivopts_data *data,
       cand->incremented_at = incremented_at;
       data->vcands.safe_push (cand);
 
-      if (!poly_tree_p (step))
+      if (!poly_int_tree_p (step))
 	{
 	  find_inv_vars (data, &step, &cand->inv_vars);
 
@@ -3888,7 +3888,7 @@ get_computation_aff_1 (struct loop *loop, gimple *at, struct iv_use *use,
   if (TYPE_PRECISION (utype) < TYPE_PRECISION (ctype))
     {
       if (cand->orig_iv != NULL && CONVERT_EXPR_P (cbase)
-	  && (CONVERT_EXPR_P (cstep) || poly_tree_p (cstep)))
+	  && (CONVERT_EXPR_P (cstep) || poly_int_tree_p (cstep)))
 	{
 	  tree inner_base, inner_step, inner_type;
 	  inner_base = TREE_OPERAND (cbase, 0);
@@ -4146,7 +4146,7 @@ force_expr_to_var_cost (tree expr, bool speed)
 
   if (is_gimple_min_invariant (expr))
     {
-      if (poly_tree_p (expr))
+      if (poly_int_tree_p (expr))
 	return comp_cost (integer_cost [speed], 0);
 
       if (TREE_CODE (expr) == ADDR_EXPR)
@@ -4477,7 +4477,7 @@ get_address_cost (struct ivopts_data *data, struct iv_use *use,
       poly_int64 ainc_step;
       if (can_autoinc
 	  && ratio == 1
-	  && poly_tree_p (cand->iv->step, &ainc_step))
+	  && ptrdiff_tree_p (cand->iv->step, &ainc_step))
 	{
 	  poly_int64 ainc_offset = (aff_inv->offset).force_shwi ();
 
diff --git a/gcc/tree-ssa-loop-niter.c b/gcc/tree-ssa-loop-niter.c
index b67160a2c70..89e57931745 100644
--- a/gcc/tree-ssa-loop-niter.c
+++ b/gcc/tree-ssa-loop-niter.c
@@ -3444,7 +3444,8 @@ infer_loop_bounds_from_pointer_arith (struct loop *loop, gimple *stmt)
   if (TYPE_PRECISION (type) != TYPE_PRECISION (TREE_TYPE (var)))
     return;
 
-  scev = instantiate_parameters (loop, analyze_scalar_evolution (loop, def));
+  struct loop *uloop = loop_containing_stmt (stmt);
+  scev = instantiate_parameters (loop, analyze_scalar_evolution (uloop, def));
   if (chrec_contains_undetermined (scev))
     return;
 
diff --git a/gcc/tree-ssa-loop-prefetch.c b/gcc/tree-ssa-loop-prefetch.c
index ecf14d108fe..67767e1516a 100644
--- a/gcc/tree-ssa-loop-prefetch.c
+++ b/gcc/tree-ssa-loop-prefetch.c
@@ -1632,7 +1632,8 @@ determine_loop_nest_reuse (struct loop *loop, struct mem_ref_group *refs,
   for (gr = refs; gr; gr = gr->next)
     for (ref = gr->refs; ref; ref = ref->next)
       {
-	dr = create_data_ref (nest, loop_containing_stmt (ref->stmt),
+	dr = create_data_ref (loop_preheader_edge (nest),
+			      loop_containing_stmt (ref->stmt),
 			      ref->mem, ref->stmt, !ref->write_p, false);
 
 	if (dr)
diff --git a/gcc/tree-ssa-sccvn.c b/gcc/tree-ssa-sccvn.c
index 310cad063f8..1a130d0d133 100644
--- a/gcc/tree-ssa-sccvn.c
+++ b/gcc/tree-ssa-sccvn.c
@@ -777,24 +777,23 @@ copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
 	  {
 	    tree this_offset = component_ref_field_offset (ref);
 	    if (this_offset
-		&& TREE_CODE (this_offset) == INTEGER_CST)
+		&& poly_int_tree_p (this_offset))
 	      {
 		tree bit_offset = DECL_FIELD_BIT_OFFSET (TREE_OPERAND (ref, 1));
 		if (TREE_INT_CST_LOW (bit_offset) % BITS_PER_UNIT == 0)
 		  {
-		    offset_int off
-		      = (wi::to_offset (this_offset)
+		    poly_offset_int off
+		      = (wi::to_poly_offset (this_offset)
 			 + (wi::to_offset (bit_offset) >> LOG2_BITS_PER_UNIT));
-		    if (wi::fits_shwi_p (off)
-			/* Probibit value-numbering zero offset components
-			   of addresses the same before the pass folding
-			   __builtin_object_size had a chance to run
-			   (checking cfun->after_inlining does the
-			   trick here).  */
-			&& (TREE_CODE (orig) != ADDR_EXPR
-			    || off != 0
-			    || cfun->after_inlining))
-		      temp.off = off.to_shwi ();
+		    /* Probibit value-numbering zero offset components
+		       of addresses the same before the pass folding
+		       __builtin_object_size had a chance to run
+		       (checking cfun->after_inlining does the
+		       trick here).  */
+		    if (TREE_CODE (orig) != ADDR_EXPR
+			|| maybe_nonzero (off)
+			|| cfun->after_inlining)
+		      off.to_shwi (&temp.off);
 		  }
 	      }
 	  }
@@ -813,16 +812,15 @@ copy_reference_ops_from_ref (tree ref, vec<vn_reference_op_s> *result)
 	    if (! temp.op2)
 	      temp.op2 = size_binop (EXACT_DIV_EXPR, TYPE_SIZE_UNIT (eltype),
 				     size_int (TYPE_ALIGN_UNIT (eltype)));
-	    if (TREE_CODE (temp.op0) == INTEGER_CST
-		&& TREE_CODE (temp.op1) == INTEGER_CST
+	    if (poly_int_tree_p (temp.op0)
+		&& poly_int_tree_p (temp.op1)
 		&& TREE_CODE (temp.op2) == INTEGER_CST)
 	      {
-		offset_int off = ((wi::to_offset (temp.op0)
-				   - wi::to_offset (temp.op1))
-				  * wi::to_offset (temp.op2)
-				  * vn_ref_op_align_unit (&temp));
-		if (wi::fits_shwi_p (off))
-		  temp.off = off.to_shwi();
+		poly_offset_int off = ((wi::to_poly_offset (temp.op0)
+					- wi::to_poly_offset (temp.op1))
+				       * wi::to_offset (temp.op2)
+				       * vn_ref_op_align_unit (&temp));
+		off.to_shwi (&temp.off);
 	      }
 	  }
 	  break;
@@ -931,7 +929,8 @@ ao_ref_init_from_vn_reference (ao_ref *ref,
       else
 	size = GET_MODE_BITSIZE (mode);
     }
-  if (size_tree && poly_tree_p (size_tree))
+  if (size_tree != NULL_TREE
+      && poly_int_tree_p (size_tree))
     size = wi::to_poly_offset (size_tree);
 
   /* Initially, maxsize is the same as the accessed element size.
@@ -1000,12 +999,12 @@ ao_ref_init_from_vn_reference (ao_ref *ref,
 	       parts manually.  */
 	    tree this_offset = DECL_FIELD_OFFSET (field);
 
-	    if (op->op1 || TREE_CODE (this_offset) != INTEGER_CST)
+	    if (op->op1 || !poly_int_tree_p (this_offset))
 	      max_size = -1;
 	    else
 	      {
-		offset_int woffset = (wi::to_offset (this_offset)
-				      << LOG2_BITS_PER_UNIT);
+		poly_offset_int woffset = (wi::to_poly_offset (this_offset)
+					   << LOG2_BITS_PER_UNIT);
 		woffset += wi::to_offset (DECL_FIELD_BIT_OFFSET (field));
 		offset += woffset;
 	      }
@@ -1015,14 +1014,15 @@ ao_ref_init_from_vn_reference (ao_ref *ref,
 	case ARRAY_RANGE_REF:
 	case ARRAY_REF:
 	  /* We recorded the lower bound and the element size.  */
-	  if (TREE_CODE (op->op0) != INTEGER_CST
-	      || TREE_CODE (op->op1) != INTEGER_CST
+	  if (!poly_int_tree_p (op->op0)
+	      || !poly_int_tree_p (op->op1)
 	      || TREE_CODE (op->op2) != INTEGER_CST)
 	    max_size = -1;
 	  else
 	    {
-	      offset_int woffset
-		= wi::sext (wi::to_offset (op->op0) - wi::to_offset (op->op1),
+	      poly_offset_int woffset
+		= wi::sext (wi::to_poly_offset (op->op0)
+			    - wi::to_poly_offset (op->op1),
 			    TYPE_PRECISION (TREE_TYPE (op->op0)));
 	      woffset *= wi::to_offset (op->op2) * vn_ref_op_align_unit (op);
 	      woffset <<= LOG2_BITS_PER_UNIT;
@@ -1194,7 +1194,7 @@ vn_reference_maybe_forwprop_address (vec<vn_reference_op_s> *ops,
       && code != POINTER_PLUS_EXPR)
     return false;
 
-  off = offset_int::from (wi::to_wide (mem_op->op0), SIGNED);
+  off = poly_offset_int::from (wi::to_poly_wide (mem_op->op0), SIGNED);
 
   /* The only thing we have to do is from &OBJ.foo.bar add the offset
      from .foo.bar to the preceding MEM_REF offset and replace the
@@ -1229,7 +1229,7 @@ vn_reference_maybe_forwprop_address (vec<vn_reference_op_s> *ops,
 	      vn_reference_op_t new_mem_op = &tem[tem.length () - 2];
 	      new_mem_op->op0
 		= wide_int_to_tree (TREE_TYPE (mem_op->op0),
-				    wi::to_wide (new_mem_op->op0));
+				    wi::to_poly_wide (new_mem_op->op0));
 	    }
 	  else
 	    gcc_assert (tem.last ().opcode == STRING_CST);
@@ -1485,16 +1485,15 @@ valueize_refs_1 (vec<vn_reference_op_s> orig, bool *valueized_anything)
 	 one, adjust the constant offset.  */
       else if (vro->opcode == ARRAY_REF
 	       && must_eq (vro->off, -1)
-	       && TREE_CODE (vro->op0) == INTEGER_CST
-	       && TREE_CODE (vro->op1) == INTEGER_CST
+	       && poly_int_tree_p (vro->op0)
+	       && poly_int_tree_p (vro->op1)
 	       && TREE_CODE (vro->op2) == INTEGER_CST)
 	{
-	  offset_int off = ((wi::to_offset (vro->op0)
-			     - wi::to_offset (vro->op1))
-			    * wi::to_offset (vro->op2)
-			    * vn_ref_op_align_unit (vro));
-	  if (wi::fits_shwi_p (off))
-	    vro->off = off.to_shwi ();
+	  poly_offset_int off = ((wi::to_poly_offset (vro->op0)
+				  - wi::to_poly_offset (vro->op1))
+				 * wi::to_offset (vro->op2)
+				 * vn_ref_op_align_unit (vro));
+	  off.to_shwi (&vro->off);
 	}
     }
 
@@ -1911,7 +1910,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
   if (is_gimple_reg_type (vr->type)
       && gimple_call_builtin_p (def_stmt, BUILT_IN_MEMSET)
       && integer_zerop (gimple_call_arg (def_stmt, 1))
-      && poly_tree_p (gimple_call_arg (def_stmt, 2), &copy_size)
+      && poly_int_tree_p (gimple_call_arg (def_stmt, 2))
       && TREE_CODE (gimple_call_arg (def_stmt, 0)) == ADDR_EXPR)
     {
       tree ref2 = TREE_OPERAND (gimple_call_arg (def_stmt, 0), 0);
@@ -1920,10 +1919,11 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
       bool reverse;
       base2 = get_ref_base_and_extent (ref2, &offset2, &size2, &maxsize2,
 				       &reverse);
-      poly_offset_int arg2 = poly_offset_int (copy_size) << LOG2_BITS_PER_UNIT;
+      tree len = gimple_call_arg (def_stmt, 2);
       if (known_size_p (maxsize2)
 	  && operand_equal_p (base, base2, 0)
-	  && known_subrange_p (offset, maxsize, offset2, arg2))
+	  && known_subrange_p (offset, maxsize, offset2,
+			       wi::to_poly_offset (len) << LOG2_BITS_PER_UNIT))
 	{
 	  tree val = build_zero_cst (vr->type);
 	  return vn_reference_lookup_or_insert_for_pieces
@@ -2148,7 +2148,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
       copy_reference_ops_from_ref (gimple_assign_rhs1 (def_stmt), &rhs);
 
       /* Apply an extra offset to the inner MEM_REF of the RHS.  */
-      if (may_ne (extra_off, 0))
+      if (maybe_nonzero (extra_off))
 	{
 	  if (rhs.length () < 2
 	      || rhs[0].opcode != MEM_REF
@@ -2206,7 +2206,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
 	       || TREE_CODE (gimple_call_arg (def_stmt, 0)) == SSA_NAME)
 	   && (TREE_CODE (gimple_call_arg (def_stmt, 1)) == ADDR_EXPR
 	       || TREE_CODE (gimple_call_arg (def_stmt, 1)) == SSA_NAME)
-	   && poly_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
+	   && poly_int_tree_p (gimple_call_arg (def_stmt, 2), &copy_size))
     {
       tree lhs, rhs;
       ao_ref r;
@@ -2242,7 +2242,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
 	  if (!tem)
 	    return (void *)-1;
 	  if (TREE_CODE (tem) == MEM_REF
-	      && poly_tree_p (TREE_OPERAND (tem, 1), &mem_offset))
+	      && poly_int_tree_p (TREE_OPERAND (tem, 1), &mem_offset))
 	    {
 	      lhs = TREE_OPERAND (tem, 0);
 	      if (TREE_CODE (lhs) == SSA_NAME)
@@ -2270,7 +2270,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
 	  if (!tem)
 	    return (void *)-1;
 	  if (TREE_CODE (tem) == MEM_REF
-	      && poly_tree_p (TREE_OPERAND (tem, 1), &mem_offset))
+	      && poly_int_tree_p (TREE_OPERAND (tem, 1), &mem_offset))
 	    {
 	      rhs = TREE_OPERAND (tem, 0);
 	      rhs_offset += mem_offset;
@@ -2288,7 +2288,7 @@ vn_reference_lookup_3 (ao_ref *ref, tree vuse, void *vr_,
       if (TREE_CODE (base) == MEM_REF)
 	{
 	  if (TREE_OPERAND (base, 0) != lhs
-	      || !poly_tree_p (TREE_OPERAND (base, 1), &mem_offset))
+	      || !poly_int_tree_p (TREE_OPERAND (base, 1), &mem_offset))
 	    return (void *) -1;
 	  at += mem_offset;
 	}
diff --git a/gcc/tree-ssa-structalias.c b/gcc/tree-ssa-structalias.c
index c42405c80f4..3296058dd4b 100644
--- a/gcc/tree-ssa-structalias.c
+++ b/gcc/tree-ssa-structalias.c
@@ -2849,41 +2849,33 @@ lookup_vi_for_tree (tree t)
 static const char *
 alias_get_name (tree decl)
 {
-  const char *res = NULL;
-  char *temp;
-
-  if (!dump_file)
-    return "NULL";
-
-  if (TREE_CODE (decl) == SSA_NAME)
-    {
-      res = get_name (decl);
-      if (res)
-	temp = xasprintf ("%s_%u", res, SSA_NAME_VERSION (decl));
-      else
-	temp = xasprintf ("_%u", SSA_NAME_VERSION (decl));
-      res = ggc_strdup (temp);
-      free (temp);
-    }
-  else if (DECL_P (decl))
+  const char *res = "NULL";
+  if (dump_file)
     {
-      if (DECL_ASSEMBLER_NAME_SET_P (decl))
-	res = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (decl));
-      else
+      char *temp = NULL;
+      if (TREE_CODE (decl) == SSA_NAME)
+	{
+	  res = get_name (decl);
+	  temp = xasprintf ("%s_%u", res ? res : "", SSA_NAME_VERSION (decl));
+	}
+      else if (HAS_DECL_ASSEMBLER_NAME_P (decl)
+	       && DECL_ASSEMBLER_NAME_SET_P (decl))
+	res = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME_RAW (decl));
+      else if (DECL_P (decl))
 	{
 	  res = get_name (decl);
 	  if (!res)
-	    {
-	      temp = xasprintf ("D.%u", DECL_UID (decl));
-	      res = ggc_strdup (temp);
-	      free (temp);
-	    }
+	    temp = xasprintf ("D.%u", DECL_UID (decl));
+	}
+
+      if (temp)
+	{
+	  res = ggc_strdup (temp);
+	  free (temp);
 	}
     }
-  if (res != NULL)
-    return res;
 
-  return "NULL";
+  return res;
 }
 
 /* Find the variable id for tree T in the map.
@@ -3264,7 +3256,7 @@ get_constraint_for_component_ref (tree t, vec<ce_s> *results,
 	 we may have to do something cute here.  */
 
       if (may_lt (poly_uint64 (bitpos), get_varinfo (result.var)->fullsize)
-	  && may_ne (bitmaxsize, 0))
+	  && maybe_nonzero (bitmaxsize))
 	{
 	  /* It's also not true that the constraint will actually start at the
 	     right offset, it may start in some padding.  We only care about
diff --git a/gcc/tree-ssa-uninit.c b/gcc/tree-ssa-uninit.c
index b1478acbdc9..3bd9077e590 100644
--- a/gcc/tree-ssa-uninit.c
+++ b/gcc/tree-ssa-uninit.c
@@ -301,7 +301,7 @@ warn_uninitialized_vars (bool warn_possibly_uninitialized)
 		  && (must_le (ref.offset + ref.size, 0)
 		      || (must_ge (ref.offset, 0)
 			  && DECL_SIZE (base)
-			  && poly_tree_p (DECL_SIZE (base), &decl_size)
+			  && poly_int_tree_p (DECL_SIZE (base), &decl_size)
 			  && must_le (decl_size, ref.offset))))
 		continue;
 
diff --git a/gcc/tree-ssa.c b/gcc/tree-ssa.c
index 060c663ab89..64c690e980f 100644
--- a/gcc/tree-ssa.c
+++ b/gcc/tree-ssa.c
@@ -1433,7 +1433,7 @@ non_rewritable_mem_ref_base (tree ref)
 	   || TREE_CODE (TREE_TYPE (decl)) == COMPLEX_TYPE)
 	  && useless_type_conversion_p (TREE_TYPE (base),
 					TREE_TYPE (TREE_TYPE (decl)))
-	  && must_gt (wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
+	  && must_gt (wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (decl))),
 		      mem_ref_offset (base))
 	  && multiple_of_p (sizetype, TREE_OPERAND (base, 1),
 			    TYPE_SIZE_UNIT (TREE_TYPE (base))))
@@ -1444,11 +1444,10 @@ non_rewritable_mem_ref_base (tree ref)
 	return NULL_TREE;
       /* For integral typed extracts we can use a BIT_FIELD_REF.  */
       if (DECL_SIZE (decl)
-	  && TREE_CODE (DECL_SIZE (decl)) == INTEGER_CST
 	  && (known_subrange_p
 	      (mem_ref_offset (base),
-	       wi::to_offset (TYPE_SIZE_UNIT (TREE_TYPE (base))),
-	       0, wi::to_offset (DECL_SIZE_UNIT (decl))))
+	       wi::to_poly_offset (TYPE_SIZE_UNIT (TREE_TYPE (base))),
+	       0, wi::to_poly_offset (DECL_SIZE_UNIT (decl))))
 	  /* ???  We can't handle bitfield precision extracts without
 	     either using an alternate type for the BIT_FIELD_REF and
 	     then doing a conversion or possibly adjusting the offset
diff --git a/gcc/tree-vect-data-refs.c b/gcc/tree-vect-data-refs.c
index 68515fec70c..446a3917238 100644
--- a/gcc/tree-vect-data-refs.c
+++ b/gcc/tree-vect-data-refs.c
@@ -1045,7 +1045,7 @@ vect_compute_data_ref_alignment (struct data_reference *dr)
       base_misalignment = 0;
     }
   poly_int64 misalignment
-    = base_misalignment + wi::to_poly_wide (drb->init).force_shwi ();
+    = base_misalignment + wi::to_poly_offset (drb->init).force_shwi ();
 
   /* If this is a backward running DR then first access in the larger
      vectype actually is N-1 elements before the address in the DR.
@@ -2001,7 +2001,7 @@ vect_enhance_data_refs_alignment (loop_vec_info loop_vinfo)
       unsigned int load_outside_cost = 0;
       unsigned int store_inside_cost = 0;
       unsigned int store_outside_cost = 0;
-      unsigned int estimated_npeels = estimated_poly_value (vf) / 2;
+      unsigned int estimated_npeels = vect_vf_for_cost (loop_vinfo) / 2;
 
       stmt_vector_for_cost dummy;
       dummy.create (2);
@@ -3288,37 +3288,42 @@ vect_vfa_align (const data_reference *dr)
 
 static int
 vect_compile_time_alias (struct data_reference *a, struct data_reference *b,
-			 poly_uint64 segment_length_a,
-			 poly_uint64 segment_length_b,
+			 tree segment_length_a, tree segment_length_b,
 			 unsigned HOST_WIDE_INT access_size_a,
 			 unsigned HOST_WIDE_INT access_size_b)
 {
   poly_offset_int offset_a = wi::to_poly_offset (DR_INIT (a));
   poly_offset_int offset_b = wi::to_poly_offset (DR_INIT (b));
+  poly_uint64 const_length_a;
+  poly_uint64 const_length_b;
 
   /* For negative step, we need to adjust address range by TYPE_SIZE_UNIT
      bytes, e.g., int a[3] -> a[1] range is [a+4, a+16) instead of
      [a, a+12) */
   if (tree_int_cst_compare (DR_STEP (a), size_zero_node) < 0)
     {
-      segment_length_a = -segment_length_a;
-      offset_a = (offset_a + access_size_a) - segment_length_a;
+      const_length_a = (-wi::to_poly_wide (segment_length_a)).force_uhwi ();
+      offset_a = (offset_a + access_size_a) - const_length_a;
     }
+  else
+    const_length_a = tree_to_poly_uint64 (segment_length_a);
   if (tree_int_cst_compare (DR_STEP (b), size_zero_node) < 0)
     {
-      segment_length_b = -segment_length_b;
-      offset_b = (offset_b + access_size_b) - segment_length_b;
+      const_length_b = (-wi::to_poly_wide (segment_length_b)).force_uhwi ();
+      offset_b = (offset_b + access_size_b) - const_length_b;
     }
+  else
+    const_length_b = tree_to_poly_uint64 (segment_length_b);
 
   segment_length_a += access_size_a;
   segment_length_b += access_size_b;
 
-  if (ranges_must_overlap_p (offset_a, segment_length_a,
-			     offset_b, segment_length_b))
+  if (ranges_must_overlap_p (offset_a, const_length_a,
+			     offset_b, const_length_b))
     return 1;
 
-  if (!ranges_may_overlap_p (offset_a, segment_length_a,
-			     offset_b, segment_length_b))
+  if (!ranges_may_overlap_p (offset_a, const_length_a,
+			     offset_b, const_length_b))
     return 0;
 
   return -1;
@@ -3440,8 +3445,8 @@ vectorizable_with_step_bound_p (data_reference *dr_a, data_reference *dr_b,
   if (!operand_equal_p (DR_BASE_ADDRESS (dr_a), DR_BASE_ADDRESS (dr_b), 0)
       || !operand_equal_p (DR_OFFSET (dr_a), DR_OFFSET (dr_b), 0)
       || !operand_equal_p (DR_STEP (dr_a), DR_STEP (dr_b), 0)
-      || !poly_tree_p (DR_INIT (dr_a), &init_a)
-      || !poly_tree_p (DR_INIT (dr_b), &init_b)
+      || !poly_int_tree_p (DR_INIT (dr_a), &init_a)
+      || !poly_int_tree_p (DR_INIT (dr_b), &init_b)
       || !ordered_p (init_a, init_b))
     return false;
 
@@ -3658,22 +3663,16 @@ vect_prune_runtime_alias_test_list (loop_vec_info loop_vinfo)
 	comp_res = data_ref_compare_tree (DR_OFFSET (dr_a),
 					  DR_OFFSET (dr_b));
 
-      /* See whether the alias is known at compilation time.
-
-	 Note that the segment lengths have sizetype and so are always
-	 represented as unsigned values, even for negative steps.
-	 The sign of the DR_STEP indicates whether they are logically
-	 positive or negative.  */
-      poly_uint64 const_segment_length_a, const_segment_length_b;
+      /* See whether the alias is known at compilation time.  */
       if (comp_res == 0
 	  && TREE_CODE (DR_STEP (dr_a)) == INTEGER_CST
 	  && TREE_CODE (DR_STEP (dr_b)) == INTEGER_CST
-	  && poly_tree_p (segment_length_a, &const_segment_length_a)
-	  && poly_tree_p (segment_length_b, &const_segment_length_b))
+	  && poly_int_tree_p (segment_length_a)
+	  && poly_int_tree_p (segment_length_b))
 	{
 	  int res = vect_compile_time_alias (dr_a, dr_b,
-					     const_segment_length_a,
-					     const_segment_length_b,
+					     segment_length_a,
+					     segment_length_b,
 					     access_size_a,
 					     access_size_b);
 	  if (res >= 0 && dump_enabled_p ())
@@ -5515,7 +5514,7 @@ vect_permute_store_chain (vec<tree> dr_chain,
 
   if (length == 3)
     {
-      /* Enforced by vect_grouped_store_supported.  */
+      /* vect_grouped_store_supported ensures that this is constant.  */
       unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
       unsigned int j0 = 0, j1 = 0, j2 = 0;
 
@@ -5591,7 +5590,7 @@ vect_permute_store_chain (vec<tree> dr_chain,
 	}
       else
 	{
-	  /* Enforced by vect_grouped_store_supported.  */
+	  /* vect_grouped_store_supported ensures that this is constant.  */
 	  unsigned int nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
 	  auto_vec_perm_indices sel (nelt);
 	  sel.quick_grow (nelt);
@@ -6149,7 +6148,7 @@ vect_permute_load_chain (vec<tree> dr_chain,
 
   if (length == 3)
     {
-      /* Enforced by vect_grouped_load_supported.  */
+      /* vect_grouped_load_supported ensures that this is constant.  */
       unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
       unsigned int k;
 
@@ -6212,7 +6211,7 @@ vect_permute_load_chain (vec<tree> dr_chain,
 	}
       else
 	{
-	  /* Enforced by vect_grouped_load_supported.  */
+	  /* vect_grouped_load_supported ensures that this is constant.  */
 	  unsigned nelt = TYPE_VECTOR_SUBPARTS (vectype).to_constant ();
 	  auto_vec_perm_indices sel (nelt);
 	  sel.quick_grow (nelt);
@@ -6375,7 +6374,7 @@ vect_shift_permute_load_chain (vec<tree> dr_chain,
   unsigned HOST_WIDE_INT nelt, vf;
   if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nelt)
       || !LOOP_VINFO_VECT_FACTOR (loop_vinfo).is_constant (&vf))
-    /* Not supported for variable-width vectors.  */
+    /* Not supported for variable-length vectors.  */
     return false;
 
   auto_vec_perm_indices sel (nelt);
diff --git a/gcc/tree-vect-generic.c b/gcc/tree-vect-generic.c
index 799ffb14e38..1152222be08 100644
--- a/gcc/tree-vect-generic.c
+++ b/gcc/tree-vect-generic.c
@@ -41,6 +41,26 @@ along with GCC; see the file COPYING3.  If not see
 
 static void expand_vector_operations_1 (gimple_stmt_iterator *);
 
+/* Return the number of elements in a vector type TYPE that we have
+   already decided needs to be expanded piecewise.  We don't support
+   this kind of expansion for variable-length vectors, since we should
+   always check for target support before introducing uses of those.  */
+static unsigned int
+nunits_for_known_piecewise_op (const_tree type)
+{
+  return TYPE_VECTOR_SUBPARTS (type).to_constant ();
+}
+
+/* Return true if TYPE1 has more elements than TYPE2, where either
+   type may be a vector or a scalar.  */
+
+static inline bool
+subparts_gt (tree type1, tree type2)
+{
+  poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
+  poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
+  return must_gt (n1, n2);
+}
 
 /* Build a constant of type TYPE, made of VALUE's bits replicated
    every TYPE_SIZE (INNER_TYPE) bits to fit TYPE's precision.  */
@@ -254,8 +274,7 @@ expand_vector_piecewise (gimple_stmt_iterator *gsi, elem_op_func f,
   vec<constructor_elt, va_gc> *v;
   tree part_width = TYPE_SIZE (inner_type);
   tree index = bitsize_int (0);
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  int nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+  int nunits = nunits_for_known_piecewise_op (type);
   int delta = tree_to_uhwi (part_width)
 	      / tree_to_uhwi (TYPE_SIZE (TREE_TYPE (type)));
   int i;
@@ -339,8 +358,7 @@ expand_vector_addition (gimple_stmt_iterator *gsi,
 
   if (INTEGRAL_TYPE_P (TREE_TYPE (type))
       && parts_per_word >= 4
-      /* We don't support piecewise expansion for variable-length vectors.  */
-      && TYPE_VECTOR_SUBPARTS (type).to_constant () >= 4)
+      && nunits_for_known_piecewise_op (type) >= 4)
     return expand_vector_parallel (gsi, f_parallel,
 				   type, a, b, code);
   else
@@ -375,8 +393,7 @@ static tree
 add_rshift (gimple_stmt_iterator *gsi, tree type, tree op0, int *shiftcnts)
 {
   optab op;
-  /* Enforced by the caller.  */
-  unsigned int i, nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+  unsigned int i, nunits = nunits_for_known_piecewise_op (type);
   bool scalar_shift = true;
 
   for (i = 1; i < nunits; i++)
@@ -421,8 +438,7 @@ expand_vector_divmod (gimple_stmt_iterator *gsi, tree type, tree op0,
   bool has_vector_shift = true;
   int mode = -1, this_mode;
   int pre_shift = -1, post_shift;
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  unsigned int nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+  unsigned int nunits = nunits_for_known_piecewise_op (type);
   int *shifts = XALLOCAVEC (int, nunits * 4);
   int *pre_shifts = shifts + nunits;
   int *post_shifts = pre_shifts + nunits;
@@ -923,8 +939,7 @@ expand_vector_condition (gimple_stmt_iterator *gsi)
   warning_at (loc, OPT_Wvector_operation_performance,
 	      "vector condition will be expanded piecewise");
 
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  int nunits = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+  int nunits = nunits_for_known_piecewise_op (type);
   vec_alloc (v, nunits);
   for (i = 0; i < nunits; i++)
     {
@@ -1063,15 +1078,17 @@ optimize_vector_constructor (gimple_stmt_iterator *gsi)
   tree lhs = gimple_assign_lhs (stmt);
   tree rhs = gimple_assign_rhs1 (stmt);
   tree type = TREE_TYPE (rhs);
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  unsigned int i, j, nelts = TYPE_VECTOR_SUBPARTS (type).to_constant ();
+  unsigned int i, j;
+  unsigned HOST_WIDE_INT nelts;
   bool all_same = true;
   constructor_elt *elt;
   gimple *g;
   tree base = NULL_TREE;
   optab op;
 
-  if (nelts <= 2 || CONSTRUCTOR_NELTS (rhs) != nelts)
+  if (!TYPE_VECTOR_SUBPARTS (type).is_constant (&nelts)
+      || nelts <= 2
+      || CONSTRUCTOR_NELTS (rhs) != nelts)
     return;
   op = optab_for_tree_code (PLUS_EXPR, type, optab_default);
   if (op == unknown_optab
@@ -1192,8 +1209,7 @@ vector_element (gimple_stmt_iterator *gsi, tree vect, tree idx, tree *ptmpvec)
 
   vect_type = TREE_TYPE (vect);
   vect_elt_type = TREE_TYPE (vect_type);
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  elements = TYPE_VECTOR_SUBPARTS (vect_type).to_constant ();
+  elements = nunits_for_known_piecewise_op (vect_type);
 
   if (TREE_CODE (idx) == INTEGER_CST)
     {
@@ -1286,8 +1302,7 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   tree mask_type = TREE_TYPE (mask);
   tree vect_elt_type = TREE_TYPE (vect_type);
   tree mask_elt_type = TREE_TYPE (mask_type);
-  /* We don't support piecewise expansion for variable-length vectors.  */
-  unsigned int elements = TYPE_VECTOR_SUBPARTS (vect_type).to_constant ();
+  unsigned HOST_WIDE_INT elements;
   vec<constructor_elt, va_gc> *v;
   tree constr, t, si, i_val;
   tree vec0tmp = NULL_TREE, vec1tmp = NULL_TREE, masktmp = NULL_TREE;
@@ -1295,6 +1310,9 @@ lower_vec_perm (gimple_stmt_iterator *gsi)
   location_t loc = gimple_location (gsi_stmt (*gsi));
   unsigned i;
 
+  if (!TYPE_VECTOR_SUBPARTS (vect_type).is_constant (&elements))
+    return;
+
   if (TREE_CODE (mask) == SSA_NAME)
     {
       gimple *def_stmt = SSA_NAME_DEF_STMT (mask);
@@ -1451,9 +1469,8 @@ get_compute_type (enum tree_code code, optab op, tree type)
       tree vector_compute_type
 	= type_for_widest_vector_mode (TREE_TYPE (type), op);
       if (vector_compute_type != NULL_TREE
+	  && subparts_gt (compute_type, vector_compute_type)
 	  && may_ne (TYPE_VECTOR_SUBPARTS (vector_compute_type), 1U)
-	  && multiple_p (TYPE_VECTOR_SUBPARTS (compute_type),
-			 TYPE_VECTOR_SUBPARTS (vector_compute_type))
 	  && (optab_handler (op, TYPE_MODE (vector_compute_type))
 	      != CODE_FOR_nothing))
 	compute_type = vector_compute_type;
@@ -1481,20 +1498,6 @@ get_compute_type (enum tree_code code, optab op, tree type)
   return compute_type;
 }
 
-/* Helper function of expand_vector_operations_1.  Return true if
-   TYPE1 has more elements than TYPE2.  */
-
-static inline bool
-subparts_gt (tree type1, tree type2)
-{
-  poly_uint64 n1 = VECTOR_TYPE_P (type1) ? TYPE_VECTOR_SUBPARTS (type1) : 1;
-  poly_uint64 n2 = VECTOR_TYPE_P (type2) ? TYPE_VECTOR_SUBPARTS (type2) : 1;
-  if (multiple_p (n1, n2))
-    return true;
-  gcc_checking_assert (multiple_p (n2, n1));
-  return false;
-}
-
 static tree
 do_cond (gimple_stmt_iterator *gsi, tree inner_type, tree a, tree b,
 	 tree bitpos, tree bitsize, enum tree_code code,
diff --git a/gcc/tree-vect-loop-manip.c b/gcc/tree-vect-loop-manip.c
index a44f3110ced..8cdc3c2521e 100644
--- a/gcc/tree-vect-loop-manip.c
+++ b/gcc/tree-vect-loop-manip.c
@@ -786,7 +786,7 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
 	 to have a full mask.  */
       poly_uint64 const_limit;
       bool first_iteration_full
-	= (poly_tree_p (first_limit, &const_limit)
+	= (poly_int_tree_p (first_limit, &const_limit)
 	   && must_ge (const_limit, (i + 1) * nscalars_per_mask));
 
       /* Rather than have a new IV that starts at BIAS and goes up to
@@ -836,7 +836,7 @@ vect_set_loop_masks_directly (struct loop *loop, loop_vec_info loop_vinfo,
 	 scalars.  */
       poly_uint64 const_skip;
       if (nscalars_skip
-	  && !(poly_tree_p (nscalars_skip, &const_skip)
+	  && !(poly_int_tree_p (nscalars_skip, &const_skip)
 	       && must_le (const_skip, bias)))
 	{
 	  tree unskipped_mask = vect_gen_while_not (preheader_seq, mask_type,
@@ -1837,7 +1837,7 @@ vect_update_ivs_after_vectorizer (loop_vec_info loop_vinfo,
    to SEQ.  */
 
 static tree
-get_misalign_in_elems (gimple_seq *seq, loop_vec_info loop_vinfo)
+get_misalign_in_elems (gimple **seq, loop_vec_info loop_vinfo)
 {
   struct data_reference *dr = LOOP_VINFO_UNALIGNED_DR (loop_vinfo);
   gimple *dr_stmt = DR_STMT (dr);
@@ -1861,14 +1861,13 @@ get_misalign_in_elems (gimple_seq *seq, loop_vec_info loop_vinfo)
   tree elem_size_log = build_int_cst (type, exact_log2 (elem_size));
 
   /* Create:  misalign_in_bytes = addr & (target_align - 1).  */
-  tree int_start_addr = gimple_convert (seq, type, start_addr);
-  tree misalign_in_bytes = gimple_build (seq, BIT_AND_EXPR, type,
-					 int_start_addr,
-					 target_align_minus_1);
+  tree int_start_addr = fold_convert (type, start_addr);
+  tree misalign_in_bytes = fold_build2 (BIT_AND_EXPR, type, int_start_addr,
+					target_align_minus_1);
 
   /* Create:  misalign_in_elems = misalign_in_bytes / element_size.  */
-  tree misalign_in_elems = gimple_build (seq, RSHIFT_EXPR, type,
-					 misalign_in_bytes, elem_size_log);
+  tree misalign_in_elems = fold_build2 (RSHIFT_EXPR, type, misalign_in_bytes,
+					elem_size_log);
 
   return misalign_in_elems;
 }
@@ -1946,14 +1945,13 @@ vect_gen_prolog_loop_niters (loop_vec_info loop_vinfo,
 				 & (align_in_elems - 1)).  */
       bool negative = tree_int_cst_compare (DR_STEP (dr), size_zero_node) < 0;
       if (negative)
-	iters = gimple_build (&stmts, MINUS_EXPR, type,
-			      misalign_in_elems, align_in_elems_tree);
+	iters = fold_build2 (MINUS_EXPR, type, misalign_in_elems,
+			     align_in_elems_tree);
       else
-	iters = gimple_build (&stmts, MINUS_EXPR, type,
-			      align_in_elems_tree, misalign_in_elems);
-      iters = gimple_build (&stmts, BIT_AND_EXPR, type, iters,
-			    align_in_elems_minus_1);
-      iters = gimple_convert (&stmts, niters_type, iters);
+	iters = fold_build2 (MINUS_EXPR, type, align_in_elems_tree,
+			     misalign_in_elems);
+      iters = fold_build2 (BIT_AND_EXPR, type, iters, align_in_elems_minus_1);
+      iters = fold_convert (niters_type, iters);
       *bound = align_in_elems - 1;
     }
 
@@ -2068,13 +2066,16 @@ vect_prepare_for_masked_peels (loop_vec_info loop_vinfo)
     }
   else
     {
-      gimple_seq seq = NULL;
-      misalign_in_elems = get_misalign_in_elems (&seq, loop_vinfo);
-      misalign_in_elems = gimple_convert (&seq, type, misalign_in_elems);
-      if (seq)
+      gimple_seq seq1 = NULL, seq2 = NULL;
+      misalign_in_elems = get_misalign_in_elems (&seq1, loop_vinfo);
+      misalign_in_elems = fold_convert (type, misalign_in_elems);
+      misalign_in_elems = force_gimple_operand (misalign_in_elems,
+						&seq2, true, NULL_TREE);
+      gimple_seq_add_seq (&seq1, seq2);
+      if (seq1)
 	{
 	  edge pe = loop_preheader_edge (LOOP_VINFO_LOOP (loop_vinfo));
-	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq);
+	  basic_block new_bb = gsi_insert_seq_on_edge_immediate (pe, seq1);
 	  gcc_assert (!new_bb);
 	}
     }
@@ -2286,11 +2287,10 @@ vect_gen_vector_loop_niters_mult_vf (loop_vec_info loop_vinfo,
 				     tree niters_vector,
 				     tree *niters_vector_mult_vf_ptr)
 {
-  tree type = TREE_TYPE (niters_vector);
-  /* FIXME!!!! */
   /* We should be using a step_vector of VF if VF is variable.  */
   int vf = LOOP_VINFO_VECT_FACTOR (loop_vinfo).to_constant ();
   struct loop *loop = LOOP_VINFO_LOOP (loop_vinfo);
+  tree type = TREE_TYPE (niters_vector);
   tree log_vf = build_int_cst (type, exact_log2 (vf));
   basic_block exit_bb = single_exit (loop)->dest;
 
@@ -2725,7 +2725,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
 
   type = TREE_TYPE (niters);
   prob_vector = profile_probability::guessed_always ().apply_scale (9, 10);
-  estimated_vf = estimated_poly_value (LOOP_VINFO_VECT_FACTOR (loop_vinfo));
+  estimated_vf = vect_vf_for_cost (loop_vinfo);
   if (estimated_vf == 2)
     estimated_vf = 3;
   prob_prolog = prob_epilog = profile_probability::guessed_always ()
@@ -2856,7 +2856,7 @@ vect_do_peeling (loop_vec_info loop_vinfo, tree niters, tree nitersm1,
       /* It's guaranteed that vector loop bound before vectorization is at
 	 least VF, so set range information for newly generated var.  */
       poly_uint64 const_vf;
-      if (new_var_p && poly_tree_p (vf, &const_vf))
+      if (new_var_p && poly_int_tree_p (vf, &const_vf))
 	set_range_info (niters, VR_RANGE,
 			wi::to_wide (build_int_cstu
 				     (type, constant_lower_bound (const_vf))),
diff --git a/gcc/tree-vect-loop.c b/gcc/tree-vect-loop.c
index 77378274c25..61da0e7998d 100644
--- a/gcc/tree-vect-loop.c
+++ b/gcc/tree-vect-loop.c
@@ -190,7 +190,6 @@ vect_determine_vectorization_factor (loop_vec_info loop_vinfo)
   tree scalar_type = NULL_TREE;
   gphi *phi;
   tree vectype;
-  poly_uint64 nunits;
   stmt_vec_info stmt_info;
   unsigned i;
   HOST_WIDE_INT dummy;
@@ -7560,7 +7559,6 @@ vect_worthwhile_without_simd_p (vec_info *vinfo, tree_code code)
 	  && value >= vect_min_worthwhile_factor (code));
 }
 
-
 /* Function vectorizable_induction
 
    Check if PHI performs an induction computation that can be vectorized.
diff --git a/gcc/tree-vect-slp.c b/gcc/tree-vect-slp.c
index 7e85604ea10..c6ad2c557fa 100644
--- a/gcc/tree-vect-slp.c
+++ b/gcc/tree-vect-slp.c
@@ -1633,14 +1633,14 @@ vect_attempt_slp_rearrange_stmts (slp_instance slp_instn)
 			    node->load_permutation);
 
   /* We are done, no actual permutations need to be generated.  */
-  poly_int64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
+  poly_uint64 unrolling_factor = SLP_INSTANCE_UNROLLING_FACTOR (slp_instn);
   FOR_EACH_VEC_ELT (SLP_INSTANCE_LOADS (slp_instn), i, node)
     {
       gimple *first_stmt = SLP_TREE_SCALAR_STMTS (node)[0];
       first_stmt = GROUP_FIRST_ELEMENT (vinfo_for_stmt (first_stmt));
       /* But we have to keep those permutations that are required because
          of handling of gaps.  */
-      if (must_eq (unrolling_factor, 1)
+      if (must_eq (unrolling_factor, 1U)
 	  || (group_size == GROUP_SIZE (vinfo_for_stmt (first_stmt))
 	      && GROUP_GAP (vinfo_for_stmt (first_stmt)) == 0))
 	SLP_TREE_LOAD_PERMUTATION (node).release ();
@@ -1729,16 +1729,16 @@ vect_supported_load_permutation_p (slp_instance slp_instn)
 	      stmt_vec_info group_info
 		= vinfo_for_stmt (SLP_TREE_SCALAR_STMTS (node)[0]);
 	      group_info = vinfo_for_stmt (GROUP_FIRST_ELEMENT (group_info));
-	      unsigned int lowest_nunits
-		= (constant_lower_bound
-		   (TYPE_VECTOR_SUBPARTS (STMT_VINFO_VECTYPE (group_info))));
+	      unsigned HOST_WIDE_INT nunits;
 	      unsigned k, maxk = 0;
 	      FOR_EACH_VEC_ELT (SLP_TREE_LOAD_PERMUTATION (node), j, k)
 		if (k > maxk)
 		  maxk = k;
 	      /* In BB vectorization we may not actually use a loaded vector
 		 accessing elements in excess of GROUP_SIZE.  */
-	      if (maxk >= (GROUP_SIZE (group_info) & ~(lowest_nunits - 1)))
+	      tree vectype = STMT_VINFO_VECTYPE (group_info);
+	      if (!TYPE_VECTOR_SUBPARTS (vectype).is_constant (&nunits)
+		  || maxk >= (GROUP_SIZE (group_info) & ~(nunits - 1)))
 		{
 		  dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
 				   "BB vectorization with gaps at the end of "
@@ -2004,7 +2004,7 @@ vect_analyze_slp_cost (slp_instance instance, void *data)
 	default:;
 	}
     }
-  unsigned assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
+  unsigned int assumed_nunits = vect_nunits_for_cost (vectype_for_cost);
   ncopies_for_cost = (least_common_multiple (assumed_nunits,
 					     group_size * assumed_vf)
 		      / assumed_nunits);
diff --git a/gcc/tree-vect-stmts.c b/gcc/tree-vect-stmts.c
index 5ee3f24ff21..1bc2d43773c 100644
--- a/gcc/tree-vect-stmts.c
+++ b/gcc/tree-vect-stmts.c
@@ -4137,9 +4137,15 @@ vectorizable_call (gimple *gs, gimple_stmt_iterator *gsi, gimple **vec_stmt,
     return false;
 
   combined_fn cfn = gimple_call_combined_fn (stmt);
-  if (cfn == CFN_MASK_LOAD || cfn == CFN_MASK_STORE)
-    return vectorizable_mask_load_store (stmt, gsi, vec_stmt,
-					 slp_node);
+  switch (cfn)
+    {
+    case CFN_MASK_LOAD:
+    case CFN_MASK_STORE:
+      return vectorizable_mask_load_store (stmt, gsi, vec_stmt, slp_node);
+
+    default:
+      break;
+    }
 
   if (gimple_call_lhs (stmt) == NULL_TREE
       || TREE_CODE (gimple_call_lhs (stmt)) != SSA_NAME)
@@ -4710,7 +4716,8 @@ vect_simd_lane_linear (tree op, struct loop *loop,
 }
 
 /* Return the number of elements in vector type VECTYPE, which is associated
-   with a SIMD clone.  At present these are always constant-width.  */
+   with a SIMD clone.  At present these vectors always have a constant
+   length.  */
 
 static unsigned HOST_WIDE_INT
 simd_clone_subparts (tree vectype)
@@ -8153,14 +8160,6 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 			    &memory_access_type, &gs_info))
     return false;
 
-  if (firstfaulting_p && memory_access_type != VMAT_CONTIGUOUS)
-    {
-      if (dump_enabled_p ())
-	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
-			"Non-contiguous not supported for first faulting\n");
-      return false;
-    }
-
   wgather_info wgather = DEFAULT_WGATHER_INFO;
   if (memory_access_type == VMAT_GATHER_SCATTER)
     {
@@ -8170,6 +8169,14 @@ vectorizable_load (gimple *stmt, gimple_stmt_iterator *gsi, gimple **vec_stmt,
 	return false;
     }
 
+  if (firstfaulting_p && memory_access_type != VMAT_CONTIGUOUS)
+    {
+      if (dump_enabled_p ())
+	dump_printf_loc (MSG_MISSED_OPTIMIZATION, vect_location,
+			"Non-contiguous not supported for first faulting\n");
+      return false;
+    }
+
   if (firstfaulting_p)
     gcc_assert (LOOP_VINFO_SPECULATIVE_EXECUTION (loop_vinfo));
 
diff --git a/gcc/tree-vectorizer.c b/gcc/tree-vectorizer.c
index 297fbfd4194..436d04cb305 100644
--- a/gcc/tree-vectorizer.c
+++ b/gcc/tree-vectorizer.c
@@ -1041,8 +1041,8 @@ get_vec_alignment_for_array_type (tree type)
 
   tree vectype = get_vectype_for_scalar_type (strip_array_types (type));
   if (!vectype
-      || !poly_tree_p (TYPE_SIZE (type), &array_size)
-      || !poly_tree_p (TYPE_SIZE (vectype), &vector_size)
+      || !poly_int_tree_p (TYPE_SIZE (type), &array_size)
+      || !poly_int_tree_p (TYPE_SIZE (vectype), &vector_size)
       || must_lt (array_size, vector_size))
     return 0;
 
diff --git a/gcc/tree-vrp.c b/gcc/tree-vrp.c
index 7e267aa2a54..a34dea905d3 100644
--- a/gcc/tree-vrp.c
+++ b/gcc/tree-vrp.c
@@ -1117,29 +1117,28 @@ compare_values_warnv (tree val1, tree val2, bool *strict_overflow_p)
 
   if (!POINTER_TYPE_P (TREE_TYPE (val1)))
     {
-      if (TREE_CODE (val1) != INTEGER_CST
-	  || TREE_CODE (val2) != INTEGER_CST)
-	{
-	  if (poly_tree_p (val1) && poly_tree_p (val2))
-	    {
-	      if (must_eq (wi::to_poly_widest (val1),
-			   wi::to_poly_widest (val2)))
-		return 0;
-	      if (must_lt (wi::to_poly_widest (val1),
-			   wi::to_poly_widest (val2)))
-		return -1;
-	      if (must_gt (wi::to_poly_widest (val1),
-			   wi::to_poly_widest (val2)))
-		return 1;
-	    }
-	  return -2;
-	}
-
       /* We cannot compare overflowed values.  */
       if (TREE_OVERFLOW (val1) || TREE_OVERFLOW (val2))
 	return -2;
 
-      return tree_int_cst_compare (val1, val2);
+      if (TREE_CODE (val1) == INTEGER_CST
+	  && TREE_CODE (val2) == INTEGER_CST)
+	return tree_int_cst_compare (val1, val2);
+
+      if (poly_int_tree_p (val1) && poly_int_tree_p (val2))
+	{
+	  if (must_eq (wi::to_poly_widest (val1),
+		       wi::to_poly_widest (val2)))
+	    return 0;
+	  if (must_lt (wi::to_poly_widest (val1),
+		       wi::to_poly_widest (val2)))
+	    return -1;
+	  if (must_gt (wi::to_poly_widest (val1),
+		       wi::to_poly_widest (val2)))
+	    return 1;
+	}
+
+      return -2;
     }
   else
     {
diff --git a/gcc/tree.c b/gcc/tree.c
index fe6140ecda0..ee12d3f3c4f 100644
--- a/gcc/tree.c
+++ b/gcc/tree.c
@@ -105,8 +105,6 @@ static const char *const tree_code_name[] = {
 #undef DEFTREECODE
 #undef END_OF_BASE_TREE_CODES
 
-static tree wide_int_to_tree_1 (tree, const wide_int_ref &);
-
 /* Each tree code class has an associated string representation.
    These must correspond to the tree_code_class entries.  */
 
@@ -690,7 +688,7 @@ decl_assembler_name (tree decl)
 {
   if (!DECL_ASSEMBLER_NAME_SET_P (decl))
     lang_hooks.set_decl_assembler_name (decl);
-  return DECL_WITH_VIS_CHECK (decl)->decl_with_vis.assembler_name;
+  return DECL_ASSEMBLER_NAME_RAW (decl);
 }
 
 /* When the target supports COMDAT groups, this indicates which group the
@@ -1316,50 +1314,51 @@ build_new_int_cst (tree type, const wide_int &cst)
   return nt;
 }
 
-/* Create a constant tree that contains VALUE sign-extended to TYPE.  */
+/* Return a new POLY_INT_CST with coefficients COEFFS and type TYPE.  */
 
-tree
-build_int_cst (tree type, poly_int64 value)
+static tree
+build_new_poly_int_cst (tree type, tree (&coeffs)[NUM_POLY_INT_COEFFS])
 {
-  /* Support legacy code.  */
-  if (!type)
-    type = integer_type_node;
+  size_t length = sizeof (struct tree_poly_int_cst);
+  record_node_allocation_statistics (POLY_INT_CST, length);
 
-  unsigned int prec = TYPE_PRECISION (type);
-  if (value.is_constant ())
-    return wide_int_to_tree_1 (type, wi::shwi (value.coeffs[0], prec));
-  return build_poly_int_cst (type, poly_wide_int::from (value, prec, SIGNED));
+  tree t = ggc_alloc_cleared_tree_node_stat (length PASS_MEM_STAT);
+
+  TREE_SET_CODE (t, POLY_INT_CST);
+  TREE_CONSTANT (t) = 1;
+  TREE_TYPE (t) = type;
+  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+    POLY_INT_CST_COEFF (t, i) = coeffs[i];
+  return t;
 }
 
-/* Create a constant tree that contains VALUE zero-extended to TYPE.  */
+/* Create a constant tree that contains CST sign-extended to TYPE.  */
 
 tree
-build_int_cstu (tree type, poly_uint64 value)
+build_int_cst (tree type, poly_int64 cst)
 {
-  unsigned int prec = TYPE_PRECISION (type);
-  if (value.is_constant ())
-    return wide_int_to_tree_1 (type, wi::uhwi (value.coeffs[0], prec));
-  return build_poly_int_cst (type, poly_wide_int::from (value, prec,
-							UNSIGNED));
+  /* Support legacy code.  */
+  if (!type)
+    type = integer_type_node;
+
+  return wide_int_to_tree (type, wi::shwi (cst, TYPE_PRECISION (type)));
 }
 
-/* Create a constant tree that contains VALUE sign-extended to TYPE.  */
+/* Create a constant tree that contains CST zero-extended to TYPE.  */
 
 tree
-build_int_cst_type (tree type, poly_int64 value)
+build_int_cstu (tree type, poly_uint64 cst)
 {
-  gcc_assert (type);
-  return build_int_cst (type, value);
+  return wide_int_to_tree (type, wi::uhwi (cst, TYPE_PRECISION (type)));
 }
 
-/* Create a constant tree with value VALUE in type TYPE.  */
+/* Create a constant tree that contains CST sign-extended to TYPE.  */
 
 tree
-wide_int_to_tree (tree type, const poly_wide_int_ref &value)
+build_int_cst_type (tree type, poly_int64 cst)
 {
-  if (value.is_constant ())
-    return wide_int_to_tree_1 (type, value.coeffs[0]);
-  return build_poly_int_cst (type, value);
+  gcc_assert (type);
+  return wide_int_to_tree (type, wi::shwi (cst, TYPE_PRECISION (type)));
 }
 
 /* Constructs tree in type TYPE from with value given by CST.  Signedness
@@ -1368,7 +1367,7 @@ wide_int_to_tree (tree type, const poly_wide_int_ref &value)
 tree
 double_int_to_tree (tree type, double_int cst)
 {
-  return wide_int_to_tree_1 (type, widest_int::from (cst, TYPE_SIGN (type)));
+  return wide_int_to_tree (type, widest_int::from (cst, TYPE_SIGN (type)));
 }
 
 /* We force the wide_int CST to the range of the type TYPE by sign or
@@ -1401,8 +1400,19 @@ force_fit_type (tree type, const poly_wide_int_ref &cst,
 	{
 	  poly_wide_int tmp = poly_wide_int::from (cst, TYPE_PRECISION (type),
 						   sign);
-	  /* FIXME */
-	  tree t = build_new_int_cst (type, tmp.coeffs[0]);
+	  tree t;
+	  if (tmp.is_constant ())
+	    t = build_new_int_cst (type, tmp.coeffs[0]);
+	  else
+	    {
+	      tree coeffs[NUM_POLY_INT_COEFFS];
+	      for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+		{
+		  coeffs[i] = build_new_int_cst (type, tmp.coeffs[i]);
+		  TREE_OVERFLOW (coeffs[i]) = 1;
+		}
+	      t = build_new_poly_int_cst (type, coeffs);
+	    }
 	  TREE_OVERFLOW (t) = 1;
 	  return t;
 	}
@@ -1605,6 +1615,66 @@ wide_int_to_tree_1 (tree type, const wide_int_ref &pcst)
   return t;
 }
 
+hashval_t
+poly_int_cst_hasher::hash (tree t)
+{
+  inchash::hash hstate;
+
+  hstate.add_int (TYPE_UID (TREE_TYPE (t)));
+  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+    hstate.add_wide_int (wi::to_wide (POLY_INT_CST_COEFF (t, i)));
+
+  return hstate.end ();
+}
+
+bool
+poly_int_cst_hasher::equal (tree x, const compare_type &y)
+{
+  if (TREE_TYPE (x) != y.first)
+    return false;
+  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+    if (wi::to_wide (POLY_INT_CST_COEFF (x, i)) != y.second->coeffs[i])
+      return false;
+  return true;
+}
+
+/* Build a POLY_INT_CST node with type TYPE and with the elements in VALUES.
+   The elements must also have type TYPE.  */
+
+tree
+build_poly_int_cst (tree type, const poly_wide_int_ref &values)
+{
+  unsigned int prec = TYPE_PRECISION (type);
+  gcc_assert (prec <= values.coeffs[0].get_precision ());
+  poly_wide_int c = poly_wide_int::from (values, prec, SIGNED);
+
+  inchash::hash h;
+  h.add_int (TYPE_UID (type));
+  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+    h.add_wide_int (c.coeffs[i]);
+  poly_int_cst_hasher::compare_type comp (type, &c);
+  tree *slot = poly_int_cst_hash_table->find_slot_with_hash (comp, h.end (),
+							     INSERT);
+  if (*slot == NULL_TREE)
+    {
+      tree coeffs[NUM_POLY_INT_COEFFS];
+      for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
+	coeffs[i] = wide_int_to_tree_1 (type, c.coeffs[i]);
+      *slot = build_new_poly_int_cst (type, coeffs);
+    }
+  return *slot;
+}
+
+/* Create a constant tree with value VALUE in type TYPE.  */
+
+tree
+wide_int_to_tree (tree type, const poly_wide_int_ref &value)
+{
+  if (value.is_constant ())
+    return wide_int_to_tree_1 (type, value.coeffs[0]);
+  return build_poly_int_cst (type, value);
+}
+
 void
 cache_integer_cst (tree t)
 {
@@ -1718,8 +1788,8 @@ build_low_bits_mask (tree type, unsigned bits)
 {
   gcc_assert (bits <= TYPE_PRECISION (type));
 
-  return wide_int_to_tree_1 (type, wi::mask (bits, false,
-					     TYPE_PRECISION (type)));
+  return wide_int_to_tree (type, wi::mask (bits, false,
+					   TYPE_PRECISION (type)));
 }
 
 /* Checks that X is integer constant that can be expressed in (unsigned)
@@ -2168,64 +2238,6 @@ build_string (int len, const char *str)
   return s;
 }
 
-hashval_t
-poly_int_cst_hasher::hash (tree t)
-{
-  inchash::hash hstate;
-
-  hstate.add_int (TYPE_UID (TREE_TYPE (t)));
-  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    hstate.add_wide_int (wi::to_wide (POLY_INT_CST_COEFF (t, i)));
-
-  return hstate.end ();
-}
-
-bool
-poly_int_cst_hasher::equal (tree x, const compare_type &y)
-{
-  if (TREE_TYPE (x) != y.first)
-    return false;
-  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    if (wi::to_wide (POLY_INT_CST_COEFF (x, i)) != y.second->coeffs[i])
-      return false;
-  return true;
-}
-
-/* Build a POLY_INT_CST node with type TYPE and with the elements in VALUES.
-   The elements must also have type TYPE.  */
-
-tree
-build_poly_int_cst (tree type, const poly_wide_int_ref &values)
-{
-  unsigned int prec = TYPE_PRECISION (type);
-  gcc_assert (prec <= values.coeffs[0].get_precision ());
-  poly_wide_int c = poly_wide_int::from (values, prec, SIGNED);
-
-  inchash::hash h;
-  h.add_int (TYPE_UID (type));
-  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    h.add_wide_int (c.coeffs[i]);
-  poly_int_cst_hasher::compare_type comp (type, &c);
-  tree *slot = poly_int_cst_hash_table->find_slot_with_hash (comp, h.end (),
-							     INSERT);
-  if (*slot)
-    return *slot;
-
-  size_t length = sizeof (struct tree_poly_int_cst);
-  record_node_allocation_statistics (POLY_INT_CST, length);
-
-  tree t = ggc_alloc_cleared_tree_node_stat (length PASS_MEM_STAT);
-
-  TREE_SET_CODE (t, POLY_INT_CST);
-  TREE_CONSTANT (t) = 1;
-  TREE_TYPE (t) = type;
-  for (unsigned int i = 0; i < NUM_POLY_INT_COEFFS; ++i)
-    POLY_INT_CST_COEFF (t, i) = wide_int_to_tree_1 (type, c.coeffs[i]);
-
-  *slot = t;
-  return t;
-}
-
 /* Return a newly constructed COMPLEX_CST node whose value is
    specified by the real and imaginary parts REAL and IMAG.
    Both REAL and IMAG should be constant nodes.  TYPE, if specified,
@@ -2940,7 +2952,7 @@ really_constant_p (const_tree exp)
    pointer value to get the second.  */
 
 bool
-ptrdiff_tree_p (const_tree t, poly_int64 *value)
+ptrdiff_tree_p (const_tree t, poly_int64_pod *value)
 {
   if (!t)
     return false;
@@ -2963,21 +2975,6 @@ ptrdiff_tree_p (const_tree t, poly_int64 *value)
   return false;
 }
 
-#if NUM_POLY_INT_COEFFS == 1
-poly_int64
-tree_to_poly_int64 (const_tree t)
-{
-  gcc_assert (tree_fits_poly_int64_p (t));
-  return TREE_INT_CST_LOW (t);
-}
-
-poly_uint64
-tree_to_poly_uint64 (const_tree t)
-{
-  gcc_assert (tree_fits_poly_uint64_p (t));
-  return TREE_INT_CST_LOW (t);
-}
-#else
 poly_int64
 tree_to_poly_int64 (const_tree t)
 {
@@ -2995,7 +2992,6 @@ tree_to_poly_uint64 (const_tree t)
     return poly_int_cst_value (t).force_uhwi ();
   return TREE_INT_CST_LOW (t);
 }
-#endif
 
 /* Return first list element whose TREE_VALUE is ELEM.
    Return 0 if ELEM is not in LIST.  */
@@ -6896,7 +6892,7 @@ tree_fits_uhwi_p (const_tree t)
 }
 
 /* Return true if T is an INTEGER_CST or POLY_INT_CST whose numerical
-   value (extended according to TYPE_UNSIGNED) fits in a poly_int64.  */
+   value (extended according to TYPE_UNSIGNED) fits in a poly_uint64.  */
 
 bool
 tree_fits_poly_uint64_p (const_tree t)
@@ -7180,15 +7176,6 @@ compare_tree_int (const_tree t, unsigned HOST_WIDE_INT u)
     return 1;
 }
 
-/* Return true if T is known to be equal to N.  */
-
-bool
-equal_tree_size (const_tree t, poly_uint64 size)
-{
-  poly_uint64 t_size;
-  return poly_tree_p (t, &t_size) && must_eq (t_size, size);
-}
-
 /* Return true if SIZE represents a constant size that is in bounds of
    what the middle-end and the backend accepts (covering not more than
    half of the address-space).  */
@@ -10636,7 +10623,7 @@ build_same_sized_truth_vector_type (tree vectype)
 
   poly_uint64 size = GET_MODE_SIZE (TYPE_MODE (vectype));
 
-  if (must_eq (size, 0U))
+  if (known_zero (size))
     size = tree_to_uhwi (TYPE_SIZE_UNIT (vectype));
 
   return build_truth_vector_type (TYPE_VECTOR_SUBPARTS (vectype), size);
@@ -11249,8 +11236,8 @@ upper_bound_in_type (tree outer, tree inner)
       gcc_unreachable ();
     }
 
-  return wide_int_to_tree_1 (outer,
-			     wi::mask (prec, false, TYPE_PRECISION (outer)));
+  return wide_int_to_tree (outer,
+			   wi::mask (prec, false, TYPE_PRECISION (outer)));
 }
 
 /* Returns the smallest value obtainable by casting something in INNER type to
@@ -11277,9 +11264,9 @@ lower_bound_in_type (tree outer, tree inner)
 	 precision or narrowing to a signed type, we want to obtain
 	 -2^(oprec-1).  */
       unsigned prec = oprec > iprec ? iprec : oprec;
-      return wide_int_to_tree_1 (outer,
-				 wi::mask (prec - 1, true,
-					   TYPE_PRECISION (outer)));
+      return wide_int_to_tree (outer,
+			       wi::mask (prec - 1, true,
+					 TYPE_PRECISION (outer)));
     }
 }
 
@@ -12729,7 +12716,7 @@ drop_tree_overflow (tree t)
   gcc_checking_assert (TREE_OVERFLOW (t));
 
   /* For tree codes with a sharing machinery re-build the result.  */
-  if (poly_tree_p (t))
+  if (poly_int_tree_p (t))
     return wide_int_to_tree (TREE_TYPE (t), wi::to_poly_wide (t));
 
   /* Otherwise, as all tcc_constants are possibly shared, copy the node
diff --git a/gcc/tree.def b/gcc/tree.def
index 2c08031e3dd..608d950b20e 100644
--- a/gcc/tree.def
+++ b/gcc/tree.def
@@ -291,6 +291,7 @@ DEFTREECODE (VOID_CST, "void_cst", tcc_constant, 0)
    some circumstances.  */
 DEFTREECODE (INTEGER_CST, "integer_cst", tcc_constant, 0)
 
+/* Contents are given by POLY_INT_CST_COEFF.  */
 DEFTREECODE (POLY_INT_CST, "poly_int_cst", tcc_constant, 0)
 
 /* Contents are in TREE_REAL_CST field.  */
@@ -310,6 +311,8 @@ DEFTREECODE (VECTOR_CST, "vector_cst", tcc_constant, 0)
    VEC_DUPLICATE_CST_ELT.  */
 DEFTREECODE (VEC_DUPLICATE_CST, "vec_duplicate_cst", tcc_constant, 0)
 
+/* Represents a vector constant in which element i is equal to
+   VEC_SERIES_CST_BASE + i * VEC_SERIES_CST_STEP.  */
 DEFTREECODE (VEC_SERIES_CST, "vec_series_cst", tcc_constant, 0)
 
 /* Contents are TREE_STRING_LENGTH and the actual contents of the string.  */
@@ -1003,8 +1006,8 @@ DEFTREECODE (SCEV_KNOWN, "scev_known", tcc_expression, 0)
 DEFTREECODE (SCEV_NOT_KNOWN, "scev_not_known", tcc_expression, 0)
 
 /* Polynomial chains of recurrences.
-   Under the form: cr = {CHREC_LEFT (cr), +, CHREC_RIGHT (cr)}.  */
-DEFTREECODE (POLYNOMIAL_CHREC, "polynomial_chrec", tcc_expression, 3)
+   cr = {CHREC_LEFT (cr), +, CHREC_RIGHT (cr)}_CHREC_VARIABLE (cr).  */
+DEFTREECODE (POLYNOMIAL_CHREC, "polynomial_chrec", tcc_expression, 2)
 
 /* Used to chain children of container statements together.
    Use the interface in tree-iterator.h to access this node.  */
diff --git a/gcc/tree.h b/gcc/tree.h
index df5767f227f..1b05d969541 100644
--- a/gcc/tree.h
+++ b/gcc/tree.h
@@ -730,8 +730,8 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TYPE_REF_CAN_ALIAS_ALL(NODE) \
   (PTR_OR_REF_CHECK (NODE)->base.static_flag)
 
-/* In an INTEGER_CST, REAL_CST, COMPLEX_CST, VECTOR_CST or VEC_DUPLICATE_CST,
-   this means there was an overflow in folding.  */
+/* In an INTEGER_CST, REAL_CST, COMPLEX_CST, VECTOR_CST, VEC_DUPLICATE_CST
+   or VEC_SERES_CST, this means there was an overflow in folding.  */
 
 #define TREE_OVERFLOW(NODE) (CST_CHECK (NODE)->base.public_flag)
 
@@ -1008,11 +1008,14 @@ extern void omp_clause_range_check_failed (const_tree, const char *, int,
 #define TREE_INT_CST_LOW(NODE) \
   ((unsigned HOST_WIDE_INT) TREE_INT_CST_ELT (NODE, 0))
 
+/* Return true if NODE is a POLY_INT_CST.  This is only ever true on
+   targets with variable-sized modes.  */
+#define POLY_INT_CST_P(NODE) \
+  (NUM_POLY_INT_COEFFS > 1 && TREE_CODE (NODE) == POLY_INT_CST)
+
 /* In a POLY_INT_CST node.  */
 #define POLY_INT_CST_COEFF(NODE, I) \
   (POLY_INT_CST_CHECK (NODE)->poly_int_cst.coeffs[I])
-#define POLY_INT_CST_P(NODE) \
-  (NUM_POLY_INT_COEFFS > 1 && TREE_CODE (NODE) == POLY_INT_CST)
 
 #define TREE_REAL_CST_PTR(NODE) (REAL_CST_CHECK (NODE)->real_cst.real_cst_ptr)
 #define TREE_REAL_CST(NODE) (*TREE_REAL_CST_PTR (NODE))
@@ -1257,10 +1260,9 @@ extern void protected_set_expr_location (tree, location_t);
 #define COND_EXPR_ELSE(NODE)	(TREE_OPERAND (COND_EXPR_CHECK (NODE), 2))
 
 /* Accessors for the chains of recurrences.  */
-#define CHREC_VAR(NODE)           TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 0)
-#define CHREC_LEFT(NODE)          TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 1)
-#define CHREC_RIGHT(NODE)         TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 2)
-#define CHREC_VARIABLE(NODE)      TREE_INT_CST_LOW (CHREC_VAR (NODE))
+#define CHREC_LEFT(NODE)          TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 0)
+#define CHREC_RIGHT(NODE)         TREE_OPERAND (POLYNOMIAL_CHREC_CHECK (NODE), 1)
+#define CHREC_VARIABLE(NODE)      POLYNOMIAL_CHREC_CHECK (NODE)->base.u.chrec_var
 
 /* LABEL_EXPR accessor. This gives access to the label associated with
    the given label expression.  */
@@ -2729,6 +2731,10 @@ extern void decl_value_expr_insert (tree, tree);
    LTO compilation and C++.  */
 #define DECL_ASSEMBLER_NAME(NODE) decl_assembler_name (NODE)
 
+/* Raw accessor for DECL_ASSEMBLE_NAME.  */
+#define DECL_ASSEMBLER_NAME_RAW(NODE) \
+  (DECL_WITH_VIS_CHECK (NODE)->decl_with_vis.assembler_name)
+
 /* Return true if NODE is a NODE that can contain a DECL_ASSEMBLER_NAME.
    This is true of all DECL nodes except FIELD_DECL.  */
 #define HAS_DECL_ASSEMBLER_NAME_P(NODE) \
@@ -2738,12 +2744,11 @@ extern void decl_value_expr_insert (tree, tree);
    the NODE might still have a DECL_ASSEMBLER_NAME -- it just hasn't been set
    yet.  */
 #define DECL_ASSEMBLER_NAME_SET_P(NODE) \
-  (HAS_DECL_ASSEMBLER_NAME_P (NODE) \
-   && DECL_WITH_VIS_CHECK (NODE)->decl_with_vis.assembler_name != NULL_TREE)
+  (DECL_ASSEMBLER_NAME_RAW (NODE) != NULL_TREE)
 
 /* Set the DECL_ASSEMBLER_NAME for NODE to NAME.  */
 #define SET_DECL_ASSEMBLER_NAME(NODE, NAME) \
-  (DECL_WITH_VIS_CHECK (NODE)->decl_with_vis.assembler_name = (NAME))
+  (DECL_ASSEMBLER_NAME_RAW (NODE) = (NAME))
 
 /* Copy the DECL_ASSEMBLER_NAME from DECL1 to DECL2.  Note that if DECL1's
    DECL_ASSEMBLER_NAME has not yet been set, using this macro will not cause
@@ -2755,10 +2760,7 @@ extern void decl_value_expr_insert (tree, tree);
    which will try to set the DECL_ASSEMBLER_NAME for DECL1.  */
 
 #define COPY_DECL_ASSEMBLER_NAME(DECL1, DECL2)				\
-  (DECL_ASSEMBLER_NAME_SET_P (DECL1)					\
-   ? (void) SET_DECL_ASSEMBLER_NAME (DECL2,				\
-				     DECL_ASSEMBLER_NAME (DECL1))	\
-   : (void) 0)
+  SET_DECL_ASSEMBLER_NAME (DECL2, DECL_ASSEMBLER_NAME_RAW (DECL1))
 
 /* Records the section name in a section attribute.  Used to pass
    the name from decl_attributes to make_function_rtl and make_decl_rtl.  */
@@ -3661,6 +3663,7 @@ id_equal (const char *str, const_tree id)
 }
 
 /* Return the number of elements in the VECTOR_TYPE given by NODE.  */
+
 inline poly_uint64
 TYPE_VECTOR_SUBPARTS (const_tree node)
 {
@@ -3680,6 +3683,7 @@ TYPE_VECTOR_SUBPARTS (const_tree node)
 
 /* Set the number of elements in VECTOR_TYPE NODE to SUBPARTS, which must
    satisfy valid_vector_subparts_p.  */
+
 inline void
 SET_TYPE_VECTOR_SUBPARTS (tree node, poly_uint64 subparts)
 {
@@ -3700,6 +3704,7 @@ SET_TYPE_VECTOR_SUBPARTS (tree node, poly_uint64 subparts)
 
 /* Return true if we can construct vector types with the given number
    of subparts.  */
+
 static inline bool
 valid_vector_subparts_p (poly_uint64 subparts)
 {
@@ -4232,7 +4237,7 @@ extern bool valid_constant_size_p (const_tree);
    without loss of precision.  Store the value in *VALUE if so.  */
 
 inline bool
-poly_tree_p (const_tree t, poly_int64 *value)
+poly_int_tree_p (const_tree t, poly_int64_pod *value)
 {
   if (tree_fits_poly_int64_p (t))
     {
@@ -4246,7 +4251,7 @@ poly_tree_p (const_tree t, poly_int64 *value)
    without loss of precision.  Store the value in *VALUE if so.  */
 
 inline bool
-poly_tree_p (const_tree t, poly_uint64 *value)
+poly_int_tree_p (const_tree t, poly_uint64_pod *value)
 {
   if (tree_fits_poly_uint64_p (t))
     {
@@ -4804,7 +4809,7 @@ complete_or_array_type_p (const_tree type)
 /* Return true if the value of T could be represented as a poly_widest_int.  */
 
 inline bool
-poly_tree_p (const_tree t)
+poly_int_tree_p (const_tree t)
 {
   return (TREE_CODE (t) == INTEGER_CST || POLY_INT_CST_P (t));
 }
@@ -4829,9 +4834,7 @@ bit_field_offset (const_tree t)
 
 extern tree strip_float_extensions (tree);
 extern int really_constant_p (const_tree);
-extern bool ptrdiff_tree_p (const_tree, poly_int64 *);
-extern bool poly_tree_p (const_tree, poly_int64 *);
-extern bool poly_tree_p (const_tree, poly_uint64 *);
+extern bool ptrdiff_tree_p (const_tree, poly_int64_pod *);
 extern bool decl_address_invariant_p (const_tree);
 extern bool decl_address_ip_invariant_p (const_tree);
 extern bool int_fits_type_p (const_tree, const_tree);
@@ -4861,7 +4864,6 @@ static inline hashval_t iterative_hash_expr(const_tree tree, hashval_t seed)
 }
 
 extern int compare_tree_int (const_tree, unsigned HOST_WIDE_INT);
-extern bool equal_tree_size (const_tree, poly_uint64);
 extern int type_list_equal (const_tree, const_tree);
 extern int chain_member (const_tree, const_tree);
 extern void dump_tree_statistics (void);
@@ -5314,7 +5316,8 @@ namespace wi
 
   typedef const generic_wide_int <widest_extended_tree> tree_to_widest_ref;
   typedef const generic_wide_int <offset_extended_tree> tree_to_offset_ref;
-  typedef const generic_wide_int <unextended_tree> tree_to_wide_ref;
+  typedef const generic_wide_int<wide_int_ref_storage<false, false> >
+    tree_to_wide_ref;
 
   tree_to_widest_ref to_widest (const_tree);
   tree_to_offset_ref to_offset (const_tree);
@@ -5442,7 +5445,8 @@ wi::to_offset (const_tree t)
 inline wi::tree_to_wide_ref
 wi::to_wide (const_tree t)
 {
-  return t;
+  return wi::storage_ref (&TREE_INT_CST_ELT (t, 0), TREE_INT_CST_NUNITS (t),
+			  TYPE_PRECISION (TREE_TYPE (t)));
 }
 
 /* Convert INTEGER_CST T to a wide_int of precision PREC, extending or
@@ -5520,6 +5524,9 @@ poly_int_cst_value (const_tree x)
   return res;
 }
 
+/* Access INTEGER_CST or POLY_INT_CST tree T as if it were a
+   poly_widest_int.  See wi::to_widest for more details.  */
+
 inline wi::tree_to_poly_widest_ref
 wi::to_poly_widest (const_tree t)
 {
@@ -5531,9 +5538,12 @@ wi::to_poly_widest (const_tree t)
 	res.coeffs[i] = POLY_INT_CST_COEFF (t, i);
       return res;
     }
-  return wi::to_widest (t);
+  return t;
 }
 
+/* Access INTEGER_CST or POLY_INT_CST tree T as if it were a
+   poly_offset_int.  See wi::to_offset for more details.  */
+
 inline wi::tree_to_poly_offset_ref
 wi::to_poly_offset (const_tree t)
 {
@@ -5545,15 +5555,18 @@ wi::to_poly_offset (const_tree t)
 	res.coeffs[i] = POLY_INT_CST_COEFF (t, i);
       return res;
     }
-  return wi::to_offset (t);
+  return t;
 }
 
+/* Access INTEGER_CST or POLY_INT_CST tree T as if it were a
+   poly_wide_int.  See wi::to_wide for more details.  */
+
 inline wi::tree_to_poly_wide_ref
 wi::to_poly_wide (const_tree t)
 {
   if (POLY_INT_CST_P (t))
     return poly_int_cst_value (t);
-  return wi::to_wide (t);
+  return t;
 }
 
 template <int N>
@@ -5663,7 +5676,7 @@ extern bool complete_ctor_at_level_p (const_tree, HOST_WIDE_INT, const_tree);
 /* Given an expression EXP that is a handled_component_p,
    look for the ultimate containing object, which is returned and specify
    the access position and size.  */
-extern tree get_inner_reference (tree, poly_int64 *, poly_int64 *,
+extern tree get_inner_reference (tree, poly_int64_pod *, poly_int64_pod *,
 				 tree *, machine_mode *, int *, int *, int *);
 
 extern tree build_personality_function (const char *);
diff --git a/gcc/ubsan.c b/gcc/ubsan.c
index 1923dccd6f2..cfa08c0e6b6 100644
--- a/gcc/ubsan.c
+++ b/gcc/ubsan.c
@@ -1453,7 +1453,7 @@ maybe_instrument_pointer_overflow (gimple_stmt_iterator *gsi, tree t)
 	  && (VAR_P (base)
 	      || TREE_CODE (base) == PARM_DECL
 	      || TREE_CODE (base) == RESULT_DECL)
-	  && poly_tree_p (DECL_SIZE (base), &base_size)
+	  && poly_int_tree_p (DECL_SIZE (base), &base_size)
 	  && must_ge (base_size, bitpos)
 	  && (!is_global_var (base) || decl_binds_to_current_def_p (base)))
 	return;
diff --git a/gcc/valtrack.c b/gcc/valtrack.c
index eece7a8c076..8426ed4757e 100644
--- a/gcc/valtrack.c
+++ b/gcc/valtrack.c
@@ -608,8 +608,8 @@ dead_debug_insert_temp (struct dead_debug_local *debug, unsigned int uregno,
 	  usesp = &cur->next;
 	  *tailp = cur->next;
 	  cur->next = NULL;
-	  /* "may" rather than "must" because we want N V4SFs to win over
-	     plain V4SF even though N might be 1.  */
+	  /* "may" rather than "must" because we want (for example)
+	     N V4SFs to win over plain V4SF even though N might be 1.  */
 	  rtx candidate = *DF_REF_REAL_LOC (cur->use);
 	  if (!reg
 	      || may_lt (GET_MODE_BITSIZE (GET_MODE (reg)),
diff --git a/gcc/var-tracking.c b/gcc/var-tracking.c
index b2689a8e335..4682aabc18f 100644
--- a/gcc/var-tracking.c
+++ b/gcc/var-tracking.c
@@ -2165,7 +2165,7 @@ vt_canonicalize_addr (dataflow_set *set, rtx oloc)
   while (retry)
     {
       while (GET_CODE (loc) == PLUS
-	     && poly_int_const_p (XEXP (loc, 1), &term))
+	     && poly_int_rtx_p (XEXP (loc, 1), &term))
 	{
 	  ofst += term;
 	  loc = XEXP (loc, 0);
@@ -2194,7 +2194,7 @@ vt_canonicalize_addr (dataflow_set *set, rtx oloc)
 	  /* Consolidate plus_constants.  */
 	  while (may_ne (ofst, 0)
 		 && GET_CODE (loc) == PLUS
-		 && poly_int_const_p (XEXP (loc, 1), &term))
+		 && poly_int_rtx_p (XEXP (loc, 1), &term))
 	    {
 	      ofst += term;
 	      loc = XEXP (loc, 0);
@@ -2217,7 +2217,7 @@ vt_canonicalize_addr (dataflow_set *set, rtx oloc)
       /* Don't build new RTL if we can help it.  */
       if (GET_CODE (oloc) == PLUS
 	  && XEXP (oloc, 0) == loc
-	  && poly_int_const_p (XEXP (oloc, 1), &term)
+	  && poly_int_rtx_p (XEXP (oloc, 1), &term)
 	  && must_eq (term, ofst))
 	return oloc;
 
@@ -5382,12 +5382,12 @@ var_lowpart (machine_mode mode, rtx loc)
   if (!REG_P (loc) && !MEM_P (loc))
     return NULL;
 
-  poly_int64 offset = byte_lowpart_offset (mode, GET_MODE (loc));
+  poly_uint64 offset = byte_lowpart_offset (mode, GET_MODE (loc));
 
   if (MEM_P (loc))
     return adjust_address_nv (loc, mode, offset);
 
-  poly_int64 reg_offset = subreg_lowpart_offset (mode, GET_MODE (loc));
+  poly_uint64 reg_offset = subreg_lowpart_offset (mode, GET_MODE (loc));
   regno = REGNO (loc) + subreg_regno_offset (REGNO (loc), GET_MODE (loc),
 					     reg_offset, mode);
   return gen_rtx_REG_offset (loc, mode, regno, offset);
@@ -8790,7 +8790,7 @@ emit_note_insn_var_location (variable **varp, emit_note_data *data)
 	  else if (MEM_P (loc[n_var_parts])
 		   && GET_CODE (XEXP (loc2, 0)) == PLUS
 		   && REG_P (XEXP (XEXP (loc2, 0), 0))
-		   && poly_int_const_p (XEXP (XEXP (loc2, 0), 1), &offset))
+		   && poly_int_rtx_p (XEXP (XEXP (loc2, 0), 1), &offset))
 	    {
 	      poly_int64 offset2;
 	      if ((REG_P (XEXP (loc[n_var_parts], 0))
@@ -8798,7 +8798,7 @@ emit_note_insn_var_location (variable **varp, emit_note_data *data)
 				   XEXP (XEXP (loc2, 0), 0))
 		   && must_eq (offset, GET_MODE_SIZE (mode)))
 		  || (GET_CODE (XEXP (loc[n_var_parts], 0)) == PLUS
-		      && (poly_int_const_p
+		      && (poly_int_rtx_p
 			  (XEXP (XEXP (loc[n_var_parts], 0), 1), &offset2))
 		      && rtx_equal_p (XEXP (XEXP (loc[n_var_parts], 0), 0),
 				      XEXP (XEXP (loc2, 0), 0))
@@ -9679,7 +9679,7 @@ vt_add_function_parameter (tree parm)
 	  || (GET_CODE (XEXP (incoming, 0)) == PLUS
 	      && XEXP (XEXP (incoming, 0), 0)
 		 == crtl->args.internal_arg_pointer
-	      && poly_int_const_p (XEXP (XEXP (incoming, 0), 1), &offset2))))
+	      && poly_int_rtx_p (XEXP (XEXP (incoming, 0), 1), &offset2))))
     {
       HOST_WIDE_INT off = -FIRST_PARM_OFFSET (current_function_decl);
       incoming
diff --git a/gcc/varasm.c b/gcc/varasm.c
index 45e6edc7d3b..bf19ab7f413 100644
--- a/gcc/varasm.c
+++ b/gcc/varasm.c
@@ -2885,7 +2885,8 @@ decode_addr_const (tree exp, struct addr_const *value)
     {
       poly_int64 bytepos;
       if (TREE_CODE (target) == COMPONENT_REF
-	  && poly_tree_p (byte_position (TREE_OPERAND (target, 1)), &bytepos))
+	  && poly_int_tree_p (byte_position (TREE_OPERAND (target, 1)),
+			      &bytepos))
 	{
 	  offset += bytepos;
 	  target = TREE_OPERAND (target, 0);
diff --git a/gcc/wide-int-print.cc b/gcc/wide-int-print.cc
index 36d8ad863f5..8874e819685 100644
--- a/gcc/wide-int-print.cc
+++ b/gcc/wide-int-print.cc
@@ -103,30 +103,28 @@ print_decu (const wide_int_ref &wi, FILE *file)
 }
 
 void
-print_hex (const wide_int_ref &wi, char *buf)
+print_hex (const wide_int_ref &val, char *buf)
 {
-  int i = wi.get_len ();
-
-  if (wi == 0)
+  if (val == 0)
     buf += sprintf (buf, "0x0");
   else
     {
-      if (wi::neg_p (wi))
+      buf += sprintf (buf, "0x");
+      int start = ROUND_DOWN (val.get_precision (), HOST_BITS_PER_WIDE_INT);
+      int width = val.get_precision () - start;
+      bool first_p = true;
+      for (int i = start; i >= 0; i -= HOST_BITS_PER_WIDE_INT)
 	{
-	  int j;
-	  /* If the number is negative, we may need to pad value with
-	     0xFFF...  because the leading elements may be missing and
-	     we do not print a '-' with hex.  */
-	  buf += sprintf (buf, "0x");
-	  for (j = BLOCKS_NEEDED (wi.get_precision ()); j > i; j--)
-	    buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, HOST_WIDE_INT_M1);
-
+	  unsigned HOST_WIDE_INT uhwi = wi::extract_uhwi (val, i, width);
+	  if (!first_p)
+	    buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, uhwi);
+	  else if (uhwi != 0)
+	    {
+	      buf += sprintf (buf, HOST_WIDE_INT_PRINT_HEX_PURE, uhwi);
+	      first_p = false;
+	    }
+	  width = HOST_BITS_PER_WIDE_INT;
 	}
-      else
-	buf += sprintf (buf, "0x" HOST_WIDE_INT_PRINT_HEX_PURE, wi.elt (--i));
-
-      while (--i >= 0)
-	buf += sprintf (buf, HOST_WIDE_INT_PRINT_PADDED_HEX, wi.elt (i));
     }
 }
 
diff --git a/gcc/wide-int.cc b/gcc/wide-int.cc
index cafd39d3ca5..a2c8fa72302 100644
--- a/gcc/wide-int.cc
+++ b/gcc/wide-int.cc
@@ -2141,10 +2141,8 @@ void gt_pch_nx (widest_int *, void (*) (void *, void *), void *) { }
 void gt_pch_nx (widest_int *) { }
 
 template void wide_int::dump () const;
-template void generic_wide_int <wide_int_ref_storage <false, false> >::dump () const;
-template void generic_wide_int <wide_int_ref_storage <false, true> >::dump () const;
-template void generic_wide_int <wide_int_ref_storage <true, false> >::dump () const;
-template void generic_wide_int <wide_int_ref_storage <true, true> >::dump () const;
+template void generic_wide_int <wide_int_ref_storage <false> >::dump () const;
+template void generic_wide_int <wide_int_ref_storage <true> >::dump () const;
 template void offset_int::dump () const;
 template void widest_int::dump () const;
 
@@ -2222,6 +2220,17 @@ test_printing ()
   VALUE_TYPE a = from_int<VALUE_TYPE> (42);
   assert_deceq ("42", a, SIGNED);
   assert_hexeq ("0x2a", a);
+  assert_hexeq ("0x1fffffffffffffffff", wi::shwi (-1, 69));
+  assert_hexeq ("0xffffffffffffffff", wi::mask (64, false, 69));
+  assert_hexeq ("0xffffffffffffffff", wi::mask <widest_int> (64, false));
+  if (WIDE_INT_MAX_PRECISION > 128)
+    {
+      assert_hexeq ("0x20000000000000000fffffffffffffffe",
+		    wi::lshift (1, 129) + wi::lshift (1, 64) - 2);
+      assert_hexeq ("0x200000000000004000123456789abcdef",
+		    wi::lshift (1, 129) + wi::lshift (1, 74)
+		    + wi::lshift (0x1234567, 32) + 0x89abcdef);
+    }
 }
 
 /* Verify that various operations work correctly for VALUE_TYPE,
@@ -2296,50 +2305,22 @@ test_comparisons ()
 /* Run all of the selftests, using the given VALUE_TYPE.  */
 
 template <class VALUE_TYPE>
-static void run_wide_int_tests_for_type ()
+static void run_all_wide_int_tests ()
 {
   test_printing <VALUE_TYPE> ();
   test_ops <VALUE_TYPE> ();
   test_comparisons <VALUE_TYPE> ();
 }
 
-/* Test that storage references copy correctly.  */
-
-static void
-test_wide_int_ref ()
-{
-  wi::hwi_with_prec s10 = wi::shwi (10, 20);
-  wi::hwi_with_prec s12 = wi::shwi (12, 22);
-  wi::hwi_with_prec s14 = wi::shwi (14, 29);
-
-  wide_int_ref x = s10;
-  wide_int_ref y = x;
-  x = s12;
-  ASSERT_EQ (y, 10);
-  ASSERT_EQ (y.get_precision (), 20);
-
-  y = x;
-  x = s14;
-  ASSERT_EQ (y, 12);
-  ASSERT_EQ (y.get_precision (), 22);
-}
-
 /* Run all of the selftests within this file, for all value types.  */
 
 void
 wide_int_cc_tests ()
 {
-  run_wide_int_tests_for_type <wide_int> ();
-  run_wide_int_tests_for_type <offset_int> ();
-  run_wide_int_tests_for_type <widest_int> ();
-  test_wide_int_ref ();
+ run_all_wide_int_tests <wide_int> ();
+ run_all_wide_int_tests <offset_int> ();
+ run_all_wide_int_tests <widest_int> ();
 }
 
 } // namespace selftest
 #endif /* CHECKING_P */
-
-void
-foo (wide_int_ref *x, const wide_int &y)
-{
-  *x = y;
-}
diff --git a/gcc/wide-int.h b/gcc/wide-int.h
index 81a911297da..f6c083ce0f9 100644
--- a/gcc/wide-int.h
+++ b/gcc/wide-int.h
@@ -949,18 +949,12 @@ public:
   wide_int_ref_storage () {}
 
   wide_int_ref_storage (const wi::storage_ref &);
-  wide_int_ref_storage (const wide_int_ref_storage &);
 
   template <typename T>
   wide_int_ref_storage (const T &);
 
   template <typename T>
   wide_int_ref_storage (const T &, unsigned int);
-
-  wide_int_ref_storage &operator = (const wide_int_ref_storage &);
-
-  template <typename T>
-  wide_int_ref_storage &operator = (const T &);
 };
 
 /* Create a reference from an existing reference.  */
@@ -970,24 +964,6 @@ wide_int_ref_storage (const wi::storage_ref &x)
   : storage_ref (x)
 {}
 
-/* Copy constructor.  */
-template <bool SE, bool HDP>
-inline wide_int_ref_storage <SE, HDP>::
-wide_int_ref_storage (const wide_int_ref_storage &x)
-  : storage_ref (x)
-{
-  /* It would be correct to copy SCRATCH unconditionally, which might
-     make the assignment to VAL a conditional move opportunity.
-     However, it will often be the case that we can prove that
-     x.val doesn't point to x.scratch, in which case this code can
-     be removed as dead.  */
-  if (x.val == x.scratch)
-    {
-      memcpy (scratch, x.scratch, sizeof (scratch));
-      val = scratch;
-    }
-}
-
 /* Create a reference to integer X in its natural precision.  Note
    that the natural precision is host-dependent for primitive
    types.  */
@@ -1008,32 +984,6 @@ wide_int_ref_storage (const T &x, unsigned int precision)
 {
 }
 
-/* Normal assignment.  */
-template <bool SE, bool HDP>
-inline wide_int_ref_storage <SE, HDP> &
-wide_int_ref_storage <SE, HDP>::operator = (const wide_int_ref_storage &x)
-{
-  storage_ref::operator = (x);
-  /* See comment in the constructor for the rationale.  */
-  if (x.val == x.scratch)
-    {
-      memcpy (scratch, x.scratch, sizeof (scratch));
-      val = scratch;
-    }
-  return *this;
-}
-
-/* Make the reference refer to X, overwriting the previous reference.  */
-template <bool SE, bool HDP>
-template <typename T>
-inline wide_int_ref_storage <SE, HDP> &
-wide_int_ref_storage <SE, HDP>::operator = (const T &x)
-{
-  storage_ref::operator =
-    (wi::int_traits <T>::decompose (scratch, wi::get_precision (x), x));
-  return *this;
-}
-
 namespace wi
 {
   template <bool SE, bool HDP>
@@ -1348,7 +1298,7 @@ get_binary_result (const T1 &, const T2 &)
   return FIXED_WIDE_INT (N) ();
 }
 
-/* A reference to one element of a trailing_2Dwide_intwide_ints structure.  */
+/* A reference to one element of a trailing_wide_ints structure.  */
 class trailing_wide_int_storage
 {
 private:
@@ -1661,10 +1611,12 @@ wi::two (unsigned int precision)
 
 namespace wi
 {
+  /* ints_for<T>::zero (X) returns a zero that, when asssigned to a T,
+     gives that T the same precision as X.  */
   template<typename T, precision_type = int_traits<T>::precision_type>
   struct ints_for
   {
-    static ALWAYS_INLINE int zero (const T &) { return 0; }
+    static int zero (const T &) { return 0; }
   };
 
   template<typename T>
@@ -1675,7 +1627,7 @@ namespace wi
 }
 
 template<typename T>
-ALWAYS_INLINE wi::hwi_with_prec
+inline wi::hwi_with_prec
 wi::ints_for<T, wi::VAR_PRECISION>::zero (const T &x)
 {
   return wi::zero (wi::get_precision (x));
@@ -3261,6 +3213,14 @@ SIGNED_BINARY_PREDICATE (operator >=, ges_p)
     return wi::F (x, y); \
   }
 
+#define SHIFT_OPERATOR(OP, F) \
+  template<typename T1, typename T2> \
+  WI_BINARY_OPERATOR_RESULT (T1, T1) \
+  OP (const T1 &x, const T2 &y) \
+  { \
+    return wi::F (x, y); \
+  }
+
 UNARY_OPERATOR (operator ~, bit_not)
 UNARY_OPERATOR (operator -, neg)
 BINARY_PREDICATE (operator ==, eq_p)
@@ -3271,11 +3231,12 @@ BINARY_OPERATOR (operator ^, bit_xor)
 BINARY_OPERATOR (operator +, add)
 BINARY_OPERATOR (operator -, sub)
 BINARY_OPERATOR (operator *, mul)
-BINARY_OPERATOR (operator <<, lshift)
+SHIFT_OPERATOR (operator <<, lshift)
 
 #undef UNARY_OPERATOR
 #undef BINARY_PREDICATE
 #undef BINARY_OPERATOR
+#undef SHIFT_OPERATOR
 
 template <typename T1, typename T2>
 inline WI_SIGNED_SHIFT_RESULT (T1, T2)
author	Richard Sandiford <richard.sandiford@linaro.org>	2017-10-23 19:56:19 +0100
committer	Richard Sandiford <richard.sandiford@linaro.org>	2017-10-23 19:56:19 +0100
commit	7bef5b82e4109778a0988d20e19e1ed29dadd835 (patch)
tree	f5c594a5206e2b23c95741c1338fc1d11acffd25 /gcc
parent	246229fdf9230ca040aa990a3fbb42698f30ae5f (diff)
parent	b11bf8d85f574c56cab353544b50396c18ab9b93 (diff)
download	gcc-7bef5b82e4109778a0988d20e19e1ed29dadd835.tar.gz