summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJulian Brown <julian@codesourcery.com>2006-04-03 00:03:34 +0000
committerJulian Brown <julian@codesourcery.com>2006-04-03 00:03:34 +0000
commitb9a434d3363521e79836bf852b3112ab06556700 (patch)
tree3229dedd9ca937e9928b6a30512f64a629dc4422
parent0c1120a49644d5e91f5fce18db4921d4e3437475 (diff)
downloadbinutils-redhat-b9a434d3363521e79836bf852b3112ab06556700.tar.gz
* binutils/readelf.c (arm_attr_tag_VFP_arch): Add VFPv3.
* gas/config/tc-arm.c (limits.h): Include. (fpu_arch_vfp_v3, fpu_vfp_ext_v3, fpu_neon_ext_v1) (fpu_vfp_v3_or_neon_ext): Declare constants. (neon_el_type): New enumeration of types for Neon vector elements. (neon_type_el): New struct. Define type and size of a vector element. (NEON_MAX_TYPE_ELS): Define constant. The maximum number of types per instruction. (neon_type): Define struct. The type of an instruction. (arm_it): Add 'vectype' for the current instruction. (isscalar, immisalign, regisimm, isquad): New predicates for operands. (vfp_sp_reg_pos): Rename to... (vfp_reg_pos): ...this, and add VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn tags. (arm_reg_type): Add REG_TYPE_NQ (Neon Q register) and REG_TYPE_NDQ (Neon D or Q register). (reg_expected_msgs): Sync with above. Allow VFD to mean VFP or Neon D register. (GE_OPT_PREFIX_BIG): Define constant, for use in... (my_get_expression): Allow above constant as argument to accept 64-bit constants with optional prefix. (arm_reg_parse): Add extra argument to return the specific type of register in when either a D or Q register (REG_TYPE_NDQ) is requested. Can be NULL. (parse_scalar): New function. Parse Neon scalar (vector reg and index). (parse_reg_list): Update for new arm_reg_parse args. (parse_vfp_reg_list): Allow parsing of Neon D/Q register lists. (parse_neon_el_struct_list): New function. Parse element/structure register lists for VLD<n>/VST<n> instructions. (s_arm_unwind_save_vfp): Update for new parse_vfp_reg_list args. (s_arm_unwind_save_mmxwr): Likewise. (s_arm_unwind_save_mmxwcg): Likewise. (s_arm_unwind_movsp): Likewise. (s_arm_unwind_setfp): Likewise. (parse_big_immediate): New function. Parse an immediate, which may be 64 bits wide. Put results in inst.operands[i]. (parse_shift): Update for new arm_reg_parse args. (parse_address): Likewise. Add parsing of alignment specifiers. (parse_neon_mov): Parse the operands of a VMOV instruction. (operand_parse_code): Add OP_RND, OP_RNQ, OP_RNDQ, OP_RNSC, OP_NRDLST, OP_NSTRLST, OP_NILO, OP_RNDQ_I0, OP_RR_RNSC, OP_RNDQ_RNSC, OP_RND_RNSC, OP_VMOV, OP_RNDQ_IMVNb, OP_RNDQ_I63b, OP_I0, OP_I16z, OP_I32z, OP_I64, OP_I64z, OP_oI32b, OP_oRND, OP_oRNQ, OP_oRNDQ. (parse_operands): Handle new codes above. (encode_arm_vfp_sp_reg): Rename to... (encode_arm_vfp_reg): ...this. Handle D regs (0-31) too. Complain if selected VFP version only supports D0-D15. (do_vfp_sp_monadic, do_vfp_sp_dyadic, do_vfp_sp_compare_z) (do_vfp_dp_sp_cvt, do_vfp_reg_from_sp, do_vfp_reg2_from_sp2) (do_vfp_sp_from_reg, do_vfp_sp2_from_reg2, do_vfp_sp_ldst) (do_vfp_dp_ldst, vfp_sp_ldstm, vfp_dp_ldstm): Update for new encode_arm_vfp_reg name, and allow 32 D regs. (do_vfp_dp_rd_rm, do_vfp_dp_rn_rd, do_vfp_dp_rd_rn) (do_vfp_dp_rd_rn_rm, do_vfp_rm_rd_rn): New functions to encode VFP insns allowing 32 D regs. (do_vfp_sp_const, do_vfp_dp_const, vfp_conv, do_vfp_sp_conv_16) (do_vfp_dp_conv_16, do_vfp_sp_conv_32, do_vfp_dp_conv_32): Handle constant-load and conversion insns introduced with VFPv3. (neon_tab_entry): New struct. (NEON_ENC_TAB): Bit patterns for overloaded Neon instructions, and those which are the targets of pseudo-instructions. (neon_opc): Enumerate opcodes, use as indices into... (neon_enc_tab): ...this. Hold data from NEON_ENC_TAB. (NEON_ENC_INTEGER, NEON_ENC_ARMREG, NEON_ENC_POLY, NEON_ENC_FLOAT) (NEON_ENC_SCALAR, NEON_ENC_IMMED, NEON_ENC_INTERLV, NEON_ENC_LANE) (NEON_ENC_DUP): Define meaningful helper macros to look up values in neon_enc_tab. (neon_shape): Enumerate shapes (permitted register widths, etc.) for Neon instructions. (neon_type_mask): New. Compact type representation for type checking. (N_SU_ALL, N_SU_32, N_SU_16_64, N_SUF_32, N_I_ALL, N_IF_32): Common permitted type combinations. (N_IGNORE_TYPE): New macro. (neon_check_shape): New function. Check an instruction shape for multiple alternatives. Return the specific shape for the current instruction. (neon_modify_type_size): New function. Modify a vector type and size, depending on the bit mask in argument 1. (neon_type_promote): New function. Convert a given "key" type (of an operand) into the correct type for a different operand, based on a bit mask. (type_chk_of_el_type): New function. Convert a type and size into the compact representation used for type checking. (el_type_of_type_ckh): New function. Reverse of above (only when a single bit is set in the bit mask). (modify_types_allowed): New function. Alter a mask of allowed types based on a bit mask of modifications. (neon_check_type): New function. Check the type of the current instruction against the variable argument list. The "key" type of the instruction is returned. (neon_dp_fixup): New function. Fill in and modify instruction bits for a Neon data-processing instruction depending on whether we're in ARM mode or Thumb-2 mode. (neon_logbits): New function. (neon_three_same, neon_two_same, do_neon_dyadic_i_su) (do_neon_dyadic_i64_su, neon_imm_shift, do_neon_shl_imm) (do_neon_qshl_imm, neon_cmode_for_logic_imm) (neon_bits_same_in_bytes, neon_squash_bits, neon_is_quarter_float) (neon_qfloat_bits, neon_cmode_for_move_imm, neon_write_immbits) (neon_invert_size, do_neon_logic, do_neon_bitfield, neon_dyadic) (do_neon_dyadic_if_su, do_neon_dyadic_if_su_d, do_neon_dyadic_if_i) (do_neon_dyadic_if_i_d, do_neon_addsub_if_i, neon_exchange_operands) (neon_compare, do_neon_cmp, do_neon_cmp_inv, do_neon_ceq) (neon_scalar_for_mul, neon_mul_mac, do_neon_mac_maybe_scalar) (do_neon_tst, do_neon_mul, do_neon_qdmulh, do_neon_fcmp_absolute) (do_neon_fcmp_absolute_inv, do_neon_step, do_neon_abs_neg) (do_neon_sli, do_neon_sri, do_neon_qshlu_imm, do_neon_qmovn) (do_neon_qmovun, do_neon_rshift_sat_narrow) (do_neon_rshift_sat_narrow_u, do_neon_movn, do_neon_rshift_narrow) (do_neon_shll, neon_cvt_flavour, do_neon_cvt, neon_move_immediate) (do_neon_mvn, neon_mixed_length, do_neon_dyadic_long, do_neon_abal) (neon_mac_reg_scalar_long, do_neon_mac_maybe_scalar_long) (do_neon_dyadic_wide, do_neon_vmull, do_neon_ext, do_neon_rev) (do_neon_dup, do_neon_mov, do_neon_rshift_round_imm, do_neon_movl) (do_neon_trn, do_neon_zip_uzp, do_neon_sat_abs_neg) (do_neon_pair_long, do_neon_recip_est, do_neon_cls, do_neon_clz) (do_neon_cnt, do_neon_swp, do_neon_tbl_tbx, do_neon_ldm_stm) (do_neon_ldr_str, do_neon_ld_st_interleave, neon_alignment_bit) (do_neon_ld_st_lane, do_neon_ld_dup, do_neon_ldx_stx): New functions. Neon bit encoding and encoding helpers. (parse_neon_type): New function. Parse Neon type specifier. (opcode_lookup): Allow parsing of Neon type specifiers. (REGNUM2, REGSETH, REGSET2): New macros. (reg_names): Add new VFPv3 and Neon registers. (NUF, nUF, NCE, nCE): New macros for opcode table. (insns): More VFP registers allowed in fcpyd, fmdhr, fmdlr, fmrdh, fmrdl, fabsd, fnegd, fsqrtd, faddd, fsubd, fmuld, fdivd, fmacd, fmscd, fnmuld, fnmacd, fnmscd, fcmpd, fcmpzd, fcmped, fcmpezd, fmdrr, fmrrd. Add Neon instructions vaba, vhadd, vrhadd, vhsub, vqadd, vqsub, vrshl, vqrshl, vshl, vqshl{u}, vand, vbic, vorr, vorn, veor, vbsl, vbit, vbif, vabd, vmax, vmin, vcge, vcgt, vclt, vcle, vceq, vpmax, vpmin, vmla, vmls, vpadd, vadd, vsub, vtst, vmul, vqdmulh, vqrdmulh, vacge, vacgt, vaclt, vacle, vrecps, vrsqrts, vabs, vneg, v{r}shr, v{r}sra, vsli, vsri, vqshrn, vq{r}shr{u}n, v{r}shrn, vshll, vcvt, vmov, vmvn, vabal, vabdl, vaddl, vsubl, vmlal, vmlsl, vaddw, vsubw, v{r}addhn, v{r}subhn, vqdmlal, vqdmlsl, vqdmull, vmull, vext, vrev64, vrev32, vrev16, vdup, vmovl, v{q}movn, vzip, vuzp, vqabs, vqneg, vpadal, vpaddl, vrecpe, vrsqrte, vcls, vclz, vcnt, vswp, vtrn, vtbl, vtbx, vldm, vstm, vldr, vstr, vld[1234], vst[1234], fconst[sd], f[us][lh]to[sd], fto[us][lh][sd]. (tc_arm_regname_to_dw2regnum): Update for arm_reg_parse args. (arm_cpu_option_table): Add Neon and VFPv3 to Cortex-A8. (arm_option_cpu_value): Add vfp3 and neon. (aeabi_set_public_attributes): Support VFPv3 and NEON attributes. Fix VFPv1 attribute. * gas/testsuite/gas/arm/copro.s: Avoid ldcl which encodes as a bad Neon instruction. * gas/testsuite/gas/arm/copro.d: Update accordingly. * gas/testsuite/gas/arm/neon-cond.s: New test. Conditional Neon opcodes in ARM mode. * gas/testsuite/gas/arm/neon-cond.d: Expected results of above. * gas/testsuite/gas/arm/neon-cov.s: New test. Coverage of Neon instructions. * gas/testsuite/gas/arm/neon-cov.d: Expected results of above. * gas/testsuite/gas/arm/neon-ldst-es.s: New test. Element and structure loads and stores. * gas/testsuite/gas/arm/neon-ldst-es.d: Expected results of above. * gas/testsuite/gas/arm/neon-ldst-rm.s: New test. Single and multiple register loads and stores. * gas/testsuite/gas/arm/neon-ldst-rm.d: Expected results of above. * gas/testsuite/gas/arm/neon-omit.s: New test. Omission of optional operands. * gas/testsuite/gas/arm/neon-omit.d: Expected results of above. * gas/testsuite/gas/arm/vfp1.d: Expect Neon syntax for some VFP instructions. * gas/testsuite/gas/arm/vfp1_t2.d: Likewise. * gas/testsuite/gas/arm/vfp1xD.d: Likewise. * gas/testsuite/gas/arm/vfp1xD_t2.d: Likewise. * gas/testsuite/gas/arm/vfp2.d: Likewise. * gas/testsuite/gas/arm/vfp2_t2.d: Likewise. * gas/testsuite/gas/arm/vfp3-32drs.s: New test. Extended D register range for VFP instructions. * gas/testsuite/gas/arm/vfp3-32drs.d: Expected results of above. * gas/testsuite/gas/arm/vfp3-const-conv.s: New test. VFPv3 constant-load and conversion instructions. * gas/testsuite/gas/arm/vfp3-const-conv.d: Expected results of above. * include/opcode/arm.h (FPU_VFP_EXT_V3): Define constant. (FPU_NEON_EXT_V1): Likewise. (FPU_VFP_HARD): Update. (FPU_VFP_V3): Define macro. (FPU_ARCH_VFP_V3, FPU_ARCH_VFP_V3_PLUS_NEON_V1): Define macros. * opcodes/arm-dis.c (coprocessor_opcodes): Add %A, %B, %k, convert %<code>[zy] into %[zy]<code>. Expand meaning of %<bitfield>['`?]. Add unified load/store instruction names. (neon_opcode_table): New. (arm_opcodes): Expand meaning of %<bitfield>['`?]. (arm_decode_bitfield): New. (print_insn_coprocessor): Add pc argument. Add %A & %B specifiers. Use arm_decode_bitfield and adjust numeric specifiers. Adjust %z & %y. (print_insn_neon): New. (print_insn_arm): Adjust print_insn_coprocessor call. Call print_insn_neon. Use arm_decode_bitfield and adjust numeric specifiers. (print_insn_thumb32): Likewise.
-rw-r--r--ChangeLog.csl205
-rw-r--r--binutils/readelf.c3
-rw-r--r--gas/config/tc-arm.c4054
-rw-r--r--gas/testsuite/gas/arm/copro.d2
-rw-r--r--gas/testsuite/gas/arm/copro.s3
-rw-r--r--gas/testsuite/gas/arm/neon-cond.d14
-rw-r--r--gas/testsuite/gas/arm/neon-cond.s13
-rw-r--r--gas/testsuite/gas/arm/neon-cov.d1263
-rw-r--r--gas/testsuite/gas/arm/neon-cov.s595
-rw-r--r--gas/testsuite/gas/arm/neon-ldst-es.d57
-rw-r--r--gas/testsuite/gas/arm/neon-ldst-es.s59
-rw-r--r--gas/testsuite/gas/arm/neon-ldst-rm.d63
-rw-r--r--gas/testsuite/gas/arm/neon-ldst-rm.s44
-rw-r--r--gas/testsuite/gas/arm/neon-omit.d51
-rw-r--r--gas/testsuite/gas/arm/neon-omit.s50
-rw-r--r--gas/testsuite/gas/arm/vfp1.d152
-rw-r--r--gas/testsuite/gas/arm/vfp1_t2.d152
-rw-r--r--gas/testsuite/gas/arm/vfp1xD.d70
-rw-r--r--gas/testsuite/gas/arm/vfp1xD_t2.d70
-rw-r--r--gas/testsuite/gas/arm/vfp2.d8
-rw-r--r--gas/testsuite/gas/arm/vfp2_t2.d8
-rw-r--r--gas/testsuite/gas/arm/vfpv3-32drs.d73
-rw-r--r--gas/testsuite/gas/arm/vfpv3-32drs.s68
-rw-r--r--gas/testsuite/gas/arm/vfpv3-const-conv.d29
-rw-r--r--gas/testsuite/gas/arm/vfpv3-const-conv.s25
-rw-r--r--include/opcode/arm.h10
-rw-r--r--opcodes/arm-dis.c1448
27 files changed, 7859 insertions, 730 deletions
diff --git a/ChangeLog.csl b/ChangeLog.csl
index 75d7adb9cc..5ac69a8672 100644
--- a/ChangeLog.csl
+++ b/ChangeLog.csl
@@ -1,3 +1,208 @@
+2005-04-03 Julian Brown <julian@codesourcery.com>
+ Nathan Sidwell <nathan@codesourcery.com>
+
+ * binutils/readelf.c (arm_attr_tag_VFP_arch): Add VFPv3.
+
+ * gas/config/tc-arm.c (limits.h): Include.
+ (fpu_arch_vfp_v3, fpu_vfp_ext_v3, fpu_neon_ext_v1)
+ (fpu_vfp_v3_or_neon_ext): Declare constants.
+ (neon_el_type): New enumeration of types for Neon vector elements.
+ (neon_type_el): New struct. Define type and size of a vector element.
+ (NEON_MAX_TYPE_ELS): Define constant. The maximum number of types per
+ instruction.
+ (neon_type): Define struct. The type of an instruction.
+ (arm_it): Add 'vectype' for the current instruction.
+ (isscalar, immisalign, regisimm, isquad): New predicates for operands.
+ (vfp_sp_reg_pos): Rename to...
+ (vfp_reg_pos): ...this, and add VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn
+ tags.
+ (arm_reg_type): Add REG_TYPE_NQ (Neon Q register) and REG_TYPE_NDQ
+ (Neon D or Q register).
+ (reg_expected_msgs): Sync with above. Allow VFD to mean VFP or Neon
+ D register.
+ (GE_OPT_PREFIX_BIG): Define constant, for use in...
+ (my_get_expression): Allow above constant as argument to accept
+ 64-bit constants with optional prefix.
+ (arm_reg_parse): Add extra argument to return the specific type of
+ register in when either a D or Q register (REG_TYPE_NDQ) is requested.
+ Can be NULL.
+ (parse_scalar): New function. Parse Neon scalar (vector reg and index).
+ (parse_reg_list): Update for new arm_reg_parse args.
+ (parse_vfp_reg_list): Allow parsing of Neon D/Q register lists.
+ (parse_neon_el_struct_list): New function. Parse element/structure
+ register lists for VLD<n>/VST<n> instructions.
+ (s_arm_unwind_save_vfp): Update for new parse_vfp_reg_list args.
+ (s_arm_unwind_save_mmxwr): Likewise.
+ (s_arm_unwind_save_mmxwcg): Likewise.
+ (s_arm_unwind_movsp): Likewise.
+ (s_arm_unwind_setfp): Likewise.
+ (parse_big_immediate): New function. Parse an immediate, which may
+ be 64 bits wide. Put results in inst.operands[i].
+ (parse_shift): Update for new arm_reg_parse args.
+ (parse_address): Likewise. Add parsing of alignment specifiers.
+ (parse_neon_mov): Parse the operands of a VMOV instruction.
+ (operand_parse_code): Add OP_RND, OP_RNQ, OP_RNDQ, OP_RNSC,
+ OP_NRDLST, OP_NSTRLST, OP_NILO, OP_RNDQ_I0, OP_RR_RNSC,
+ OP_RNDQ_RNSC, OP_RND_RNSC, OP_VMOV, OP_RNDQ_IMVNb, OP_RNDQ_I63b,
+ OP_I0, OP_I16z, OP_I32z, OP_I64, OP_I64z, OP_oI32b, OP_oRND,
+ OP_oRNQ, OP_oRNDQ.
+ (parse_operands): Handle new codes above.
+ (encode_arm_vfp_sp_reg): Rename to...
+ (encode_arm_vfp_reg): ...this. Handle D regs (0-31) too. Complain if
+ selected VFP version only supports D0-D15.
+ (do_vfp_sp_monadic, do_vfp_sp_dyadic, do_vfp_sp_compare_z)
+ (do_vfp_dp_sp_cvt, do_vfp_reg_from_sp, do_vfp_reg2_from_sp2)
+ (do_vfp_sp_from_reg, do_vfp_sp2_from_reg2, do_vfp_sp_ldst)
+ (do_vfp_dp_ldst, vfp_sp_ldstm, vfp_dp_ldstm): Update for new
+ encode_arm_vfp_reg name, and allow 32 D regs.
+ (do_vfp_dp_rd_rm, do_vfp_dp_rn_rd, do_vfp_dp_rd_rn)
+ (do_vfp_dp_rd_rn_rm, do_vfp_rm_rd_rn): New functions to encode VFP
+ insns allowing 32 D regs.
+ (do_vfp_sp_const, do_vfp_dp_const, vfp_conv, do_vfp_sp_conv_16)
+ (do_vfp_dp_conv_16, do_vfp_sp_conv_32, do_vfp_dp_conv_32): Handle
+ constant-load and conversion insns introduced with VFPv3.
+ (neon_tab_entry): New struct.
+ (NEON_ENC_TAB): Bit patterns for overloaded Neon instructions, and
+ those which are the targets of pseudo-instructions.
+ (neon_opc): Enumerate opcodes, use as indices into...
+ (neon_enc_tab): ...this. Hold data from NEON_ENC_TAB.
+ (NEON_ENC_INTEGER, NEON_ENC_ARMREG, NEON_ENC_POLY, NEON_ENC_FLOAT)
+ (NEON_ENC_SCALAR, NEON_ENC_IMMED, NEON_ENC_INTERLV, NEON_ENC_LANE)
+ (NEON_ENC_DUP): Define meaningful helper macros to look up values in
+ neon_enc_tab.
+ (neon_shape): Enumerate shapes (permitted register widths, etc.) for
+ Neon instructions.
+ (neon_type_mask): New. Compact type representation for type
+ checking.
+ (N_SU_ALL, N_SU_32, N_SU_16_64, N_SUF_32, N_I_ALL, N_IF_32): Common
+ permitted type combinations.
+ (N_IGNORE_TYPE): New macro.
+ (neon_check_shape): New function. Check an instruction shape for
+ multiple alternatives. Return the specific shape for the current
+ instruction.
+ (neon_modify_type_size): New function. Modify a vector type and
+ size, depending on the bit mask in argument 1.
+ (neon_type_promote): New function. Convert a given "key" type (of an
+ operand) into the correct type for a different operand, based on a bit
+ mask.
+ (type_chk_of_el_type): New function. Convert a type and size into the
+ compact representation used for type checking.
+ (el_type_of_type_ckh): New function. Reverse of above (only when a
+ single bit is set in the bit mask).
+ (modify_types_allowed): New function. Alter a mask of allowed types
+ based on a bit mask of modifications.
+ (neon_check_type): New function. Check the type of the current
+ instruction against the variable argument list. The "key" type of the
+ instruction is returned.
+ (neon_dp_fixup): New function. Fill in and modify instruction bits for
+ a Neon data-processing instruction depending on whether we're in ARM
+ mode or Thumb-2 mode.
+ (neon_logbits): New function.
+ (neon_three_same, neon_two_same, do_neon_dyadic_i_su)
+ (do_neon_dyadic_i64_su, neon_imm_shift, do_neon_shl_imm)
+ (do_neon_qshl_imm, neon_cmode_for_logic_imm)
+ (neon_bits_same_in_bytes, neon_squash_bits, neon_is_quarter_float)
+ (neon_qfloat_bits, neon_cmode_for_move_imm, neon_write_immbits)
+ (neon_invert_size, do_neon_logic, do_neon_bitfield, neon_dyadic)
+ (do_neon_dyadic_if_su, do_neon_dyadic_if_su_d, do_neon_dyadic_if_i)
+ (do_neon_dyadic_if_i_d, do_neon_addsub_if_i, neon_exchange_operands)
+ (neon_compare, do_neon_cmp, do_neon_cmp_inv, do_neon_ceq)
+ (neon_scalar_for_mul, neon_mul_mac, do_neon_mac_maybe_scalar)
+ (do_neon_tst, do_neon_mul, do_neon_qdmulh, do_neon_fcmp_absolute)
+ (do_neon_fcmp_absolute_inv, do_neon_step, do_neon_abs_neg)
+ (do_neon_sli, do_neon_sri, do_neon_qshlu_imm, do_neon_qmovn)
+ (do_neon_qmovun, do_neon_rshift_sat_narrow)
+ (do_neon_rshift_sat_narrow_u, do_neon_movn, do_neon_rshift_narrow)
+ (do_neon_shll, neon_cvt_flavour, do_neon_cvt, neon_move_immediate)
+ (do_neon_mvn, neon_mixed_length, do_neon_dyadic_long, do_neon_abal)
+ (neon_mac_reg_scalar_long, do_neon_mac_maybe_scalar_long)
+ (do_neon_dyadic_wide, do_neon_vmull, do_neon_ext, do_neon_rev)
+ (do_neon_dup, do_neon_mov, do_neon_rshift_round_imm, do_neon_movl)
+ (do_neon_trn, do_neon_zip_uzp, do_neon_sat_abs_neg)
+ (do_neon_pair_long, do_neon_recip_est, do_neon_cls, do_neon_clz)
+ (do_neon_cnt, do_neon_swp, do_neon_tbl_tbx, do_neon_ldm_stm)
+ (do_neon_ldr_str, do_neon_ld_st_interleave, neon_alignment_bit)
+ (do_neon_ld_st_lane, do_neon_ld_dup, do_neon_ldx_stx): New
+ functions. Neon bit encoding and encoding helpers.
+ (parse_neon_type): New function. Parse Neon type specifier.
+ (opcode_lookup): Allow parsing of Neon type specifiers.
+ (REGNUM2, REGSETH, REGSET2): New macros.
+ (reg_names): Add new VFPv3 and Neon registers.
+ (NUF, nUF, NCE, nCE): New macros for opcode table.
+ (insns): More VFP registers allowed in fcpyd, fmdhr, fmdlr, fmrdh,
+ fmrdl, fabsd, fnegd, fsqrtd, faddd, fsubd, fmuld, fdivd, fmacd,
+ fmscd, fnmuld, fnmacd, fnmscd, fcmpd, fcmpzd, fcmped, fcmpezd,
+ fmdrr, fmrrd. Add Neon instructions vaba, vhadd, vrhadd, vhsub,
+ vqadd, vqsub, vrshl, vqrshl, vshl, vqshl{u}, vand, vbic, vorr, vorn,
+ veor, vbsl, vbit, vbif, vabd, vmax, vmin, vcge, vcgt, vclt, vcle,
+ vceq, vpmax, vpmin, vmla, vmls, vpadd, vadd, vsub, vtst, vmul,
+ vqdmulh, vqrdmulh, vacge, vacgt, vaclt, vacle, vrecps, vrsqrts,
+ vabs, vneg, v{r}shr, v{r}sra, vsli, vsri, vqshrn, vq{r}shr{u}n,
+ v{r}shrn, vshll, vcvt, vmov, vmvn, vabal, vabdl, vaddl, vsubl,
+ vmlal, vmlsl, vaddw, vsubw, v{r}addhn, v{r}subhn, vqdmlal, vqdmlsl,
+ vqdmull, vmull, vext, vrev64, vrev32, vrev16, vdup, vmovl, v{q}movn,
+ vzip, vuzp, vqabs, vqneg, vpadal, vpaddl, vrecpe, vrsqrte, vcls,
+ vclz, vcnt, vswp, vtrn, vtbl, vtbx, vldm, vstm, vldr, vstr,
+ vld[1234], vst[1234], fconst[sd], f[us][lh]to[sd], fto[us][lh][sd].
+ (tc_arm_regname_to_dw2regnum): Update for arm_reg_parse args.
+ (arm_cpu_option_table): Add Neon and VFPv3 to Cortex-A8.
+ (arm_option_cpu_value): Add vfp3 and neon.
+ (aeabi_set_public_attributes): Support VFPv3 and NEON attributes.
+ Fix VFPv1 attribute.
+
+ * gas/testsuite/gas/arm/copro.s: Avoid ldcl which encodes as a bad Neon
+ instruction.
+ * gas/testsuite/gas/arm/copro.d: Update accordingly.
+ * gas/testsuite/gas/arm/neon-cond.s: New test. Conditional Neon opcodes
+ in ARM mode.
+ * gas/testsuite/gas/arm/neon-cond.d: Expected results of above.
+ * gas/testsuite/gas/arm/neon-cov.s: New test. Coverage of Neon
+ instructions.
+ * gas/testsuite/gas/arm/neon-cov.d: Expected results of above.
+ * gas/testsuite/gas/arm/neon-ldst-es.s: New test. Element and structure
+ loads and stores.
+ * gas/testsuite/gas/arm/neon-ldst-es.d: Expected results of above.
+ * gas/testsuite/gas/arm/neon-ldst-rm.s: New test. Single and multiple
+ register loads and stores.
+ * gas/testsuite/gas/arm/neon-ldst-rm.d: Expected results of above.
+ * gas/testsuite/gas/arm/neon-omit.s: New test. Omission of optional
+ operands.
+ * gas/testsuite/gas/arm/neon-omit.d: Expected results of above.
+ * gas/testsuite/gas/arm/vfp1.d: Expect Neon syntax for some VFP
+ instructions.
+ * gas/testsuite/gas/arm/vfp1_t2.d: Likewise.
+ * gas/testsuite/gas/arm/vfp1xD.d: Likewise.
+ * gas/testsuite/gas/arm/vfp1xD_t2.d: Likewise.
+ * gas/testsuite/gas/arm/vfp2.d: Likewise.
+ * gas/testsuite/gas/arm/vfp2_t2.d: Likewise.
+ * gas/testsuite/gas/arm/vfp3-32drs.s: New test. Extended D register
+ range for VFP instructions.
+ * gas/testsuite/gas/arm/vfp3-32drs.d: Expected results of above.
+ * gas/testsuite/gas/arm/vfp3-const-conv.s: New test. VFPv3
+ constant-load and conversion instructions.
+ * gas/testsuite/gas/arm/vfp3-const-conv.d: Expected results of above.
+
+ * include/opcode/arm.h (FPU_VFP_EXT_V3): Define constant.
+ (FPU_NEON_EXT_V1): Likewise.
+ (FPU_VFP_HARD): Update.
+ (FPU_VFP_V3): Define macro.
+ (FPU_ARCH_VFP_V3, FPU_ARCH_VFP_V3_PLUS_NEON_V1): Define macros.
+
+ * opcodes/arm-dis.c (coprocessor_opcodes): Add %A, %B, %k,
+ convert %<code>[zy] into %[zy]<code>. Expand meaning of
+ %<bitfield>['`?].
+ Add unified load/store instruction names.
+ (neon_opcode_table): New.
+ (arm_opcodes): Expand meaning of %<bitfield>['`?].
+ (arm_decode_bitfield): New.
+ (print_insn_coprocessor): Add pc argument. Add %A & %B specifiers.
+ Use arm_decode_bitfield and adjust numeric specifiers.
+ Adjust %z & %y.
+ (print_insn_neon): New.
+ (print_insn_arm): Adjust print_insn_coprocessor call. Call
+ print_insn_neon. Use arm_decode_bitfield and adjust numeric specifiers.
+ (print_insn_thumb32): Likewise.
+
2005-04-01 Paul Brook <paul@codesourcery.com>
* config/tc-arm.c (arm_fix_adjustable): Return 0 for function symbols.
diff --git a/binutils/readelf.c b/binutils/readelf.c
index d8bd54f440..4a9bd1a87e 100644
--- a/binutils/readelf.c
+++ b/binutils/readelf.c
@@ -7731,7 +7731,8 @@ static const char *arm_attr_tag_CPU_arch[] =
static const char *arm_attr_tag_ARM_ISA_use[] = {"No", "Yes"};
static const char *arm_attr_tag_THUMB_ISA_use[] =
{"No", "Thumb-1", "Thumb-2"};
-static const char *arm_attr_tag_VFP_arch[] = {"No", "VFPv1", "VFPv2"};
+/* FIXME: VFPv3 encoding was extrapolated! */
+static const char *arm_attr_tag_VFP_arch[] = {"No", "VFPv1", "VFPv2", "VFPv3"};
static const char *arm_attr_tag_WMMX_arch[] = {"No", "WMMXv1"};
static const char *arm_attr_tag_NEON_arch[] = {"No", "NEONv1"};
static const char *arm_attr_tag_ABI_PCS_config[] =
diff --git a/gas/config/tc-arm.c b/gas/config/tc-arm.c
index 05158eec02..b2541feb7a 100644
--- a/gas/config/tc-arm.c
+++ b/gas/config/tc-arm.c
@@ -26,6 +26,7 @@
02110-1301, USA. */
#include <string.h>
+#include <limits.h>
#define NO_RELOC 0
#include "as.h"
#include "safe-ctype.h"
@@ -155,6 +156,8 @@ static const arm_feature_set *mfpu_opt = NULL;
static const arm_feature_set fpu_default = FPU_DEFAULT;
static const arm_feature_set fpu_arch_vfp_v1 = FPU_ARCH_VFP_V1;
static const arm_feature_set fpu_arch_vfp_v2 = FPU_ARCH_VFP_V2;
+static const arm_feature_set fpu_arch_vfp_v3 = FPU_ARCH_VFP_V3;
+static const arm_feature_set fpu_arch_neon_v1 = FPU_ARCH_NEON_V1;
static const arm_feature_set fpu_arch_fpa = FPU_ARCH_FPA;
static const arm_feature_set fpu_any_hard = FPU_ANY_HARD;
static const arm_feature_set fpu_arch_maverick = FPU_ARCH_MAVERICK;
@@ -206,6 +209,10 @@ static const arm_feature_set fpu_vfp_ext_v1xd =
ARM_FEATURE (0, FPU_VFP_EXT_V1xD);
static const arm_feature_set fpu_vfp_ext_v1 = ARM_FEATURE (0, FPU_VFP_EXT_V1);
static const arm_feature_set fpu_vfp_ext_v2 = ARM_FEATURE (0, FPU_VFP_EXT_V2);
+static const arm_feature_set fpu_vfp_ext_v3 = ARM_FEATURE (0, FPU_VFP_EXT_V3);
+static const arm_feature_set fpu_neon_ext_v1 = ARM_FEATURE (0, FPU_NEON_EXT_V1);
+static const arm_feature_set fpu_vfp_v3_or_neon_ext =
+ ARM_FEATURE (0, FPU_NEON_EXT_V1 | FPU_VFP_EXT_V3);
static int mfloat_abi_opt = -1;
/* Record user cpu selection for object attributes. */
@@ -256,6 +263,31 @@ static int thumb_mode = 0;
static bfd_boolean unified_syntax = FALSE;
+enum neon_el_type
+{
+ NT_untyped,
+ NT_integer,
+ NT_float,
+ NT_poly,
+ NT_signed,
+ NT_unsigned,
+ NT_invtype
+};
+
+struct neon_type_el
+{
+ enum neon_el_type type;
+ unsigned size;
+};
+
+#define NEON_MAX_TYPE_ELS 4
+
+struct neon_type
+{
+ struct neon_type_el el[NEON_MAX_TYPE_ELS];
+ unsigned elems;
+};
+
struct arm_it
{
const char * error;
@@ -263,6 +295,7 @@ struct arm_it
int size;
int size_req;
int cond;
+ struct neon_type vectype;
/* Set to the opcode if the instruction needs relaxation.
Zero if the instruction is not relaxed. */
unsigned long relax;
@@ -280,6 +313,12 @@ struct arm_it
unsigned present : 1; /* Operand present. */
unsigned isreg : 1; /* Operand was a register. */
unsigned immisreg : 1; /* .imm field is a second register. */
+ unsigned isscalar : 1; /* Operand is a (Neon) scalar. */
+ unsigned immisalign : 1; /* Immediate is an alignment specifier. */
+ /* Note: we abuse "regisimm" to mean "is Neon register" in VMOV
+ instructions. This allows us to disambiguate ARM <-> vector insns. */
+ unsigned regisimm : 1; /* 64-bit immediate, reg forms high 32 bits. */
+ unsigned isquad : 1; /* Operand is Neon quad-precision register. */
unsigned hasreloc : 1; /* Operand has relocation suffix. */
unsigned writeback : 1; /* Operand has trailing ! */
unsigned preind : 1; /* Preindexed address. */
@@ -355,9 +394,10 @@ struct reloc_entry
bfd_reloc_code_real_type reloc;
};
-enum vfp_sp_reg_pos
+enum vfp_reg_pos
{
- VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn
+ VFP_REG_Sd, VFP_REG_Sm, VFP_REG_Sn,
+ VFP_REG_Dd, VFP_REG_Dm, VFP_REG_Dn
};
enum vfp_ldstm_type
@@ -375,6 +415,8 @@ enum arm_reg_type
REG_TYPE_FN,
REG_TYPE_VFS,
REG_TYPE_VFD,
+ REG_TYPE_NQ,
+ REG_TYPE_NDQ,
REG_TYPE_VFC,
REG_TYPE_MVF,
REG_TYPE_MVD,
@@ -405,7 +447,9 @@ const char *const reg_expected_msgs[] =
N_("co-processor register expected"),
N_("FPA register expected"),
N_("VFP single precision register expected"),
- N_("VFP double precision register expected"),
+ N_("VFP/Neon double precision register expected"),
+ N_("Neon quad precision register expected"),
+ N_("Neon double or quad precision register expected"),
N_("VFP system register expected"),
N_("Maverick MVF register expected"),
N_("Maverick MVD register expected"),
@@ -691,6 +735,9 @@ static int in_my_get_expression = 0;
#define GE_NO_PREFIX 0
#define GE_IMM_PREFIX 1
#define GE_OPT_PREFIX 2
+/* This is a bit of a hack. Use an optional prefix, and also allow big (64-bit)
+ immediates, as can be used in Neon VMVN and VMOV immediate instructions. */
+#define GE_OPT_PREFIX_BIG 3
static int
my_get_expression (expressionS * ep, char ** str, int prefix_mode)
@@ -700,7 +747,8 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
/* In unified syntax, all prefixes are optional. */
if (unified_syntax)
- prefix_mode = GE_OPT_PREFIX;
+ prefix_mode = (prefix_mode == GE_OPT_PREFIX_BIG) ? prefix_mode
+ : GE_OPT_PREFIX;
switch (prefix_mode)
{
@@ -714,6 +762,7 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
(*str)++;
break;
case GE_OPT_PREFIX:
+ case GE_OPT_PREFIX_BIG:
if (is_immediate_prefix (**str))
(*str)++;
break;
@@ -755,11 +804,12 @@ my_get_expression (expressionS * ep, char ** str, int prefix_mode)
/* Get rid of any bignums now, so that we don't generate an error for which
we can't establish a line number later on. Big numbers are never valid
in instructions, which is where this routine is always called. */
- if (ep->X_op == O_big
- || (ep->X_add_symbol
- && (walk_no_bignums (ep->X_add_symbol)
- || (ep->X_op_symbol
- && walk_no_bignums (ep->X_op_symbol)))))
+ if (prefix_mode != GE_OPT_PREFIX_BIG
+ && (ep->X_op == O_big
+ || (ep->X_add_symbol
+ && (walk_no_bignums (ep->X_add_symbol)
+ || (ep->X_op_symbol
+ && walk_no_bignums (ep->X_op_symbol))))))
{
inst.error = _("invalid constant");
*str = input_line_pointer;
@@ -940,14 +990,24 @@ arm_reg_parse_multi (char **ccp)
}
/* As above, but the register must be of type TYPE, and the return
- value is the register number or FAIL. */
+ value is the register number or FAIL.
+ If RTYPE is non-zero, return the (possibly restricted) type of the
+ register (e.g. Neon double or quad reg when either has been requested). */
static int
-arm_reg_parse (char **ccp, enum arm_reg_type type)
+arm_reg_parse (char **ccp, enum arm_reg_type type, enum arm_reg_type *rtype)
{
char *start = *ccp;
struct reg_entry *reg = arm_reg_parse_multi (ccp);
+ /* Undo polymorphism for Neon D and Q registers. */
+ if (reg && type == REG_TYPE_NDQ
+ && (reg->type == REG_TYPE_NQ || reg->type == REG_TYPE_VFD))
+ type = reg->type;
+
+ if (rtype)
+ *rtype = type;
+
if (reg && reg->type == type)
return reg->number;
@@ -986,6 +1046,46 @@ arm_reg_parse (char **ccp, enum arm_reg_type type)
return FAIL;
}
+/* Parse a Neon scalar. Most of the time when we're parsing a scalar, we don't
+ have enough information to be able to do a good job bounds-checking. So, we
+ just do easy checks here, and do further checks later. */
+
+static int
+parse_scalar (char **ccp, int elsize)
+{
+ int regno, elno;
+ char *str = *ccp;
+ expressionS exp;
+
+ if ((regno = arm_reg_parse (&str, REG_TYPE_VFD, NULL)) == FAIL)
+ return FAIL;
+
+ if (skip_past_char (&str, '[') == FAIL)
+ return FAIL;
+
+ my_get_expression (&exp, &str, GE_NO_PREFIX);
+ if (exp.X_op != O_constant)
+ {
+ inst.error = _("constant expression required");
+ return FAIL;
+ }
+ elno = exp.X_add_number;
+
+ if (elno >= 64 / elsize)
+ {
+ inst.error = _("scalar index out of range");
+ return FAIL;
+ }
+
+ if (skip_past_char (&str, ']') == FAIL)
+ return FAIL;
+
+ /* Parsed scalar successfully. Skip over it. */
+ *ccp = str;
+
+ return (regno * 8) + elno;
+}
+
/* Parse an ARM register list. Returns the bitmask, or FAIL. */
static long
parse_reg_list (char ** strp)
@@ -1009,7 +1109,7 @@ parse_reg_list (char ** strp)
{
int reg;
- if ((reg = arm_reg_parse (&str, REG_TYPE_RN)) == FAIL)
+ if ((reg = arm_reg_parse (&str, REG_TYPE_RN, NULL)) == FAIL)
{
inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
return FAIL;
@@ -1111,53 +1211,101 @@ parse_reg_list (char ** strp)
return range;
}
+/* Types of registers in a list. */
+
+enum reg_list_els
+{
+ REGLIST_VFP_S,
+ REGLIST_VFP_D,
+ REGLIST_NEON_D
+};
+
/* Parse a VFP register list. If the string is invalid return FAIL.
Otherwise return the number of registers, and set PBASE to the first
- register. Double precision registers are matched if DP is nonzero. */
+ register. Parses registers of type ETYPE.
+ If REGLIST_NEON_D is used, several syntax enhancements are enabled:
+ - Q registers can be used to specify pairs of D registers
+ - { } can be omitted from around a singleton register list
+ FIXME: This is not implemented, as it would require backtracking in
+ some cases, e.g.:
+ vtbl.8 d3,d4,d5
+ This could be done (the meaning isn't really ambiguous), but doesn't
+ fit in well with the current parsing framework.
+ - 32 D registers may be used (also true for VFPv3). */
static int
-parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
+parse_vfp_reg_list (char **str, unsigned int *pbase, enum reg_list_els etype)
{
int base_reg;
int new_base;
- int regtype;
- int max_regs;
+ enum arm_reg_type regtype = 0;
+ int max_regs = 0;
int count = 0;
int warned = 0;
unsigned long mask = 0;
int i;
if (**str != '{')
- return FAIL;
+ {
+ inst.error = _("expecting {");
+ return FAIL;
+ }
(*str)++;
- if (dp)
- {
- regtype = REG_TYPE_VFD;
- max_regs = 16;
- }
- else
+ switch (etype)
{
+ case REGLIST_VFP_S:
regtype = REG_TYPE_VFS;
max_regs = 32;
+ break;
+
+ case REGLIST_VFP_D:
+ regtype = REG_TYPE_VFD;
+ /* VFPv3 allows 32 D registers. */
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
+ {
+ max_regs = 32;
+ if (thumb_mode)
+ ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
+ fpu_vfp_ext_v3);
+ else
+ ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used,
+ fpu_vfp_ext_v3);
+ }
+ else
+ max_regs = 16;
+ break;
+
+ case REGLIST_NEON_D:
+ regtype = REG_TYPE_NDQ;
+ max_regs = 32;
+ break;
}
base_reg = max_regs;
do
{
- new_base = arm_reg_parse (str, regtype);
+ int setmask = 1, addregs = 1;
+ new_base = arm_reg_parse (str, regtype, &regtype);
if (new_base == FAIL)
{
inst.error = gettext (reg_expected_msgs[regtype]);
return FAIL;
}
+ /* Note: a value of 2 * n is returned for the register Q<n>. */
+ if (regtype == REG_TYPE_NQ)
+ {
+ setmask = 3;
+ addregs = 2;
+ }
+
if (new_base < base_reg)
base_reg = new_base;
- if (mask & (1 << new_base))
+ if (mask & (setmask << new_base))
{
inst.error = _("invalid register list");
return FAIL;
@@ -1169,8 +1317,8 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
warned = 1;
}
- mask |= 1 << new_base;
- count++;
+ mask |= setmask << new_base;
+ count += addregs;
if (**str == '-') /* We have the start of a range expression */
{
@@ -1178,28 +1326,31 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
(*str)++;
- if ((high_range = arm_reg_parse (str, regtype)) == FAIL)
+ if ((high_range = arm_reg_parse (str, regtype, NULL)) == FAIL)
{
inst.error = gettext (reg_expected_msgs[regtype]);
return FAIL;
}
+ if (regtype == REG_TYPE_NQ)
+ high_range = high_range + 1;
+
if (high_range <= new_base)
{
inst.error = _("register range not in ascending order");
return FAIL;
}
- for (new_base++; new_base <= high_range; new_base++)
+ for (new_base += addregs; new_base <= high_range; new_base += addregs)
{
- if (mask & (1 << new_base))
+ if (mask & (setmask << new_base))
{
inst.error = _("invalid register list");
return FAIL;
}
- mask |= 1 << new_base;
- count++;
+ mask |= setmask << new_base;
+ count += addregs;
}
}
}
@@ -1227,6 +1378,180 @@ parse_vfp_reg_list (char **str, unsigned int *pbase, int dp)
return count;
}
+/* Parse element/structure lists for Neon VLD<n> and VST<n> instructions.
+ The base register is put in *PBASE.
+ The lane (or one of the #defined constants below) is placed in bits [3:0] of
+ the return value.
+ The register stride (minus one) is put in bit 4 of the return value.
+ Bits [6:5] encode the list length (minus one). */
+
+#define NEON_ALL_LANES 15
+#define NEON_INTERLEAVE_LANES 14
+#define NEON_LANE(X) ((X) & 0xf)
+#define NEON_REG_STRIDE(X) (((X) & (1 << 4)) ? 2 : 1)
+#define NEON_REGLIST_LENGTH(X) ((((X) >> 5) & 3) + 1)
+
+static int
+parse_neon_el_struct_list (char **str, unsigned *pbase)
+{
+ char *ptr = *str;
+ int base_reg = -1;
+ int reg_incr = -1;
+ int count = 0;
+ int lane = -1;
+ int leading_brace = 0;
+ enum arm_reg_type rtype = REG_TYPE_NDQ;
+ int addregs = 1;
+ const char *const incr_error = "register stride must be 1 or 2";
+ const char *const type_error = "mismatched element/structure types in list";
+
+ if (skip_past_char (&ptr, '{') == SUCCESS)
+ leading_brace = 1;
+
+ do
+ {
+ int getreg = arm_reg_parse (&ptr, rtype, &rtype);
+ if (getreg == FAIL)
+ {
+ inst.error = _(reg_expected_msgs[rtype]);
+ return FAIL;
+ }
+
+ if (base_reg == -1)
+ {
+ base_reg = getreg;
+ if (rtype == REG_TYPE_NQ)
+ {
+ reg_incr = 1;
+ addregs = 2;
+ }
+ }
+ else if (reg_incr == -1)
+ {
+ reg_incr = getreg - base_reg;
+ if (reg_incr < 1 || reg_incr > 2)
+ {
+ inst.error = _(incr_error);
+ return FAIL;
+ }
+ }
+ else if (getreg != base_reg + reg_incr * count)
+ {
+ inst.error = _(incr_error);
+ return FAIL;
+ }
+
+ /* Handle Dn-Dm or Qn-Qm syntax. Can only be used with non-indexed list
+ modes. */
+ if (ptr[0] == '-')
+ {
+ int hireg, dregs = (rtype == REG_TYPE_NQ) ? 2 : 1;
+ if (lane == -1)
+ lane = NEON_INTERLEAVE_LANES;
+ else if (lane != NEON_INTERLEAVE_LANES)
+ {
+ inst.error = _(type_error);
+ return FAIL;
+ }
+ if (reg_incr == -1)
+ reg_incr = 1;
+ else if (reg_incr != 1)
+ {
+ inst.error = _("don't use Rn-Rm syntax with non-unit stride");
+ return FAIL;
+ }
+ ptr++;
+ hireg = arm_reg_parse (&ptr, rtype, NULL);
+ if (hireg == FAIL)
+ {
+ inst.error = _(reg_expected_msgs[rtype]);
+ return FAIL;
+ }
+ count += hireg + dregs - getreg;
+ continue;
+ }
+
+ /* If we're using Q registers, we can't use [] or [n] syntax. */
+ if (rtype == REG_TYPE_NQ)
+ {
+ count += 2;
+ continue;
+ }
+
+ if (skip_past_char (&ptr, '[') == SUCCESS)
+ {
+ if (skip_past_char (&ptr, ']') == SUCCESS)
+ {
+ if (lane == -1)
+ lane = NEON_ALL_LANES;
+ else if (lane != NEON_ALL_LANES)
+ {
+ inst.error = _(type_error);
+ return FAIL;
+ }
+ }
+ else
+ {
+ expressionS exp;
+ my_get_expression (&exp, &ptr, GE_NO_PREFIX);
+ if (exp.X_op != O_constant)
+ {
+ inst.error = _("constant expression required");
+ return FAIL;
+ }
+ if (lane == -1)
+ lane = exp.X_add_number;
+ else if (lane != exp.X_add_number)
+ {
+ inst.error = _(type_error);
+ return FAIL;
+ }
+
+ if (skip_past_char (&ptr, ']') == FAIL)
+ {
+ inst.error = _("expected ]");
+ return FAIL;
+ }
+ }
+ }
+ else if (lane == -1)
+ lane = NEON_INTERLEAVE_LANES;
+ else if (lane != NEON_INTERLEAVE_LANES)
+ {
+ inst.error = _(type_error);
+ return FAIL;
+ }
+ count++;
+ }
+ while ((count != 1 || leading_brace) && skip_past_comma (&ptr) != FAIL);
+
+ /* No lane set by [x]. We must be interleaving structures. */
+ if (lane == -1)
+ lane = NEON_INTERLEAVE_LANES;
+
+ /* Sanity check. */
+ if (lane == -1 || base_reg == -1 || count < 1 || count > 4
+ || (count > 1 && reg_incr == -1))
+ {
+ inst.error = _("error parsing element/structure list");
+ return FAIL;
+ }
+
+ if ((count > 1 || leading_brace) && skip_past_char (&ptr, '}') == FAIL)
+ {
+ inst.error = _("expected }");
+ return FAIL;
+ }
+
+ if (reg_incr == -1)
+ reg_incr = 1;
+
+ *pbase = base_reg;
+ *str = ptr;
+
+ return lane | ((reg_incr - 1) << 4) | ((count - 1) << 5);
+}
+
/* Parse an explicit relocation suffix on an expression. This is
either nothing, or a word in parentheses. Note that if !OBJ_ELF,
arm_reloc_hsh contains no entries, so this function can only
@@ -2423,7 +2748,7 @@ s_arm_unwind_save_vfp (void)
unsigned int reg;
valueT op;
- count = parse_vfp_reg_list (&input_line_pointer, &reg, 1);
+ count = parse_vfp_reg_list (&input_line_pointer, &reg, REGLIST_VFP_D);
if (count == FAIL)
{
as_bad (_("expected register list"));
@@ -2465,7 +2790,7 @@ s_arm_unwind_save_mmxwr (void)
do
{
- reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR);
+ reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL);
if (reg == FAIL)
{
@@ -2480,7 +2805,7 @@ s_arm_unwind_save_mmxwr (void)
if (*input_line_pointer == '-')
{
input_line_pointer++;
- hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR);
+ hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWR, NULL);
if (hi_reg == FAIL)
{
as_bad (_(reg_expected_msgs[REG_TYPE_MMXWR]));
@@ -2597,7 +2922,7 @@ s_arm_unwind_save_mmxwcg (void)
do
{
- reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG);
+ reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL);
if (reg == FAIL)
{
@@ -2613,7 +2938,7 @@ s_arm_unwind_save_mmxwcg (void)
if (*input_line_pointer == '-')
{
input_line_pointer++;
- hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG);
+ hi_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_MMXWCG, NULL);
if (hi_reg == FAIL)
{
as_bad (_(reg_expected_msgs[REG_TYPE_MMXWCG]));
@@ -2711,7 +3036,7 @@ s_arm_unwind_movsp (int ignored ATTRIBUTE_UNUSED)
int reg;
valueT op;
- reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+ reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
if (reg == FAIL)
{
as_bad (_(reg_expected_msgs[REG_TYPE_RN]));
@@ -2772,11 +3097,11 @@ s_arm_unwind_setfp (int ignored ATTRIBUTE_UNUSED)
int fp_reg;
int offset;
- fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+ fp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
if (skip_past_comma (&input_line_pointer) == FAIL)
sp_reg = FAIL;
else
- sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN);
+ sp_reg = arm_reg_parse (&input_line_pointer, REG_TYPE_RN, NULL);
if (fp_reg == FAIL || sp_reg == FAIL)
{
@@ -3043,6 +3368,47 @@ parse_immediate (char **str, int *val, int min, int max,
return SUCCESS;
}
+/* Less-generic immediate-value read function with the possibility of loading a
+ big (64-bit) immediate, as required by Neon VMOV and VMVN immediate
+ instructions. Puts the result directly in inst.operands[i]. */
+
+static int
+parse_big_immediate (char **str, int i)
+{
+ expressionS exp;
+ char *ptr = *str;
+
+ my_get_expression (&exp, &ptr, GE_OPT_PREFIX_BIG);
+
+ if (exp.X_op == O_constant)
+ inst.operands[i].imm = exp.X_add_number;
+ else if (exp.X_op == O_big
+ && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number > 32
+ && LITTLENUM_NUMBER_OF_BITS * exp.X_add_number <= 64)
+ {
+ unsigned parts = 32 / LITTLENUM_NUMBER_OF_BITS, j, idx = 0;
+ /* Bignums have their least significant bits in
+ generic_bignum[0]. Make sure we put 32 bits in imm and
+ 32 bits in reg, in a (hopefully) portable way. */
+ assert (parts != 0);
+ inst.operands[i].imm = 0;
+ for (j = 0; j < parts; j++, idx++)
+ inst.operands[i].imm |= generic_bignum[idx]
+ << (LITTLENUM_NUMBER_OF_BITS * j);
+ inst.operands[i].reg = 0;
+ for (j = 0; j < parts; j++, idx++)
+ inst.operands[i].reg |= generic_bignum[idx]
+ << (LITTLENUM_NUMBER_OF_BITS * j);
+ inst.operands[i].regisimm = 1;
+ }
+ else
+ return FAIL;
+
+ *str = ptr;
+
+ return SUCCESS;
+}
+
/* Returns the pseudo-register number of an FPA immediate constant,
or FAIL if there isn't a valid constant here. */
@@ -3232,7 +3598,7 @@ parse_shift (char **str, int i, enum parse_shift_mode mode)
skip_whitespace (p);
if (mode == NO_SHIFT_RESTRICT
- && (reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+ && (reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
{
inst.operands[i].imm = reg;
inst.operands[i].immisreg = 1;
@@ -3263,7 +3629,7 @@ parse_shifter_operand (char **str, int i)
int value;
expressionS expr;
- if ((value = arm_reg_parse (str, REG_TYPE_RN)) != FAIL)
+ if ((value = arm_reg_parse (str, REG_TYPE_RN, NULL)) != FAIL)
{
inst.operands[i].reg = value;
inst.operands[i].isreg = 1;
@@ -3374,7 +3740,7 @@ parse_address (char **str, int i)
return SUCCESS;
}
- if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+ if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
{
inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
return FAIL;
@@ -3389,7 +3755,7 @@ parse_address (char **str, int i)
if (*p == '+') p++;
else if (*p == '-') p++, inst.operands[i].negative = 1;
- if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+ if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
{
inst.operands[i].imm = reg;
inst.operands[i].immisreg = 1;
@@ -3398,6 +3764,23 @@ parse_address (char **str, int i)
if (parse_shift (&p, i, SHIFT_IMMEDIATE) == FAIL)
return FAIL;
}
+ else if (skip_past_char (&p, ':') == SUCCESS)
+ {
+ /* FIXME: '@' should be used here, but it's filtered out by generic
+ code before we get to see it here. This may be subject to
+ change. */
+ expressionS exp;
+ my_get_expression (&exp, &p, GE_NO_PREFIX);
+ if (exp.X_op != O_constant)
+ {
+ inst.error = _("alignment must be constant");
+ return FAIL;
+ }
+ inst.operands[i].imm = exp.X_add_number << 8;
+ inst.operands[i].immisalign = 1;
+ /* Alignments are not pre-indexes. */
+ inst.operands[i].preind = 0;
+ }
else
{
if (inst.operands[i].negative)
@@ -3455,9 +3838,14 @@ parse_address (char **str, int i)
if (*p == '+') p++;
else if (*p == '-') p++, inst.operands[i].negative = 1;
- if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) != FAIL)
+ if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) != FAIL)
{
- inst.operands[i].imm = reg;
+ /* We might be using the immediate for alignment already. If we
+ are, OR the register number into the low-order bits. */
+ if (inst.operands[i].immisalign)
+ inst.operands[i].imm |= reg;
+ else
+ inst.operands[i].imm = reg;
inst.operands[i].immisreg = 1;
if (skip_past_comma (&p) == SUCCESS)
@@ -3711,7 +4099,7 @@ parse_tb (char **str)
return FAIL;
}
- if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+ if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
{
inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
return FAIL;
@@ -3724,7 +4112,7 @@ parse_tb (char **str)
return FAIL;
}
- if ((reg = arm_reg_parse (&p, REG_TYPE_RN)) == FAIL)
+ if ((reg = arm_reg_parse (&p, REG_TYPE_RN, NULL)) == FAIL)
{
inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
return FAIL;
@@ -3752,6 +4140,158 @@ parse_tb (char **str)
return SUCCESS;
}
+/* Parse the operands of a Neon VMOV instruction. See do_neon_mov for more
+ information on the types the operands can take and how they are encoded.
+ Note particularly the abuse of ".regisimm" to signify a Neon register.
+ Up to three operands may be read; this function handles setting the
+ ".present" field for each operand itself.
+ Updates STR and WHICH_OPERAND if parsing is successful and returns SUCCESS,
+ else returns FAIL. */
+
+static int
+parse_neon_mov (char **str, int *which_operand)
+{
+ int i = *which_operand, val;
+ enum arm_reg_type rtype;
+ char *ptr = *str;
+
+ if ((val = parse_scalar (&ptr, 8)) != FAIL)
+ {
+ /* Case 4: VMOV<c><q>.<size> <Dn[x]>, <Rd>. */
+ inst.operands[i].reg = val;
+ inst.operands[i].isscalar = 1;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL)
+ goto wanted_arm;
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].present = 1;
+ }
+ else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL)
+ {
+ /* Cases 0, 1, 2, 3, 5 (D only). */
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].isquad = (rtype == REG_TYPE_NQ);
+ inst.operands[i++].present = 1;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+ {
+ /* Case 5: VMOV<c><q> <Dm>, <Rd>, <Rn>. */
+ inst.operands[i-1].regisimm = 1;
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i++].present = 1;
+
+ if (rtype == REG_TYPE_NQ)
+ {
+ inst.error = _("can't use Neon quad register here");
+ return FAIL;
+ }
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) == FAIL)
+ goto wanted_arm;
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].present = 1;
+ }
+ else if (parse_big_immediate (&ptr, i) == SUCCESS)
+ {
+ /* Case 2: VMOV<c><q>.<dt> <Qd>, #<imm>
+ Case 3: VMOV<c><q>.<dt> <Dd>, #<imm> */
+ if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000)
+ goto bad_cond;
+ }
+ else if ((val = arm_reg_parse (&ptr, REG_TYPE_NDQ, &rtype)) != FAIL)
+ {
+ /* Case 0: VMOV<c><q> <Qd>, <Qm>
+ Case 1: VMOV<c><q> <Dd>, <Dm> */
+ if (!thumb_mode && (inst.instruction & 0xf0000000) != 0xe0000000)
+ goto bad_cond;
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].isquad = (rtype == REG_TYPE_NQ);
+ inst.operands[i].present = 1;
+ }
+ else
+ {
+ inst.error = _("expected <Rm> or <Dm> or <Qm> operand");
+ return FAIL;
+ }
+ }
+ else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+ {
+ /* Cases 6, 7. */
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = parse_scalar (&ptr, 8)) != FAIL)
+ {
+ /* Case 6: VMOV<c><q>.<dt> <Rd>, <Dn[x]> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isscalar = 1;
+ inst.operands[i].present = 1;
+ }
+ else if ((val = arm_reg_parse (&ptr, REG_TYPE_RN, NULL)) != FAIL)
+ {
+ /* Case 7: VMOV<c><q> <Rd>, <Rn>, <Dm> */
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i++].present = 1;
+
+ if (skip_past_comma (&ptr) == FAIL)
+ goto wanted_comma;
+
+ if ((val = arm_reg_parse (&ptr, REG_TYPE_VFD, NULL)) == FAIL)
+ {
+ inst.error = _(reg_expected_msgs[REG_TYPE_VFD]);
+ return FAIL;
+ }
+
+ inst.operands[i].reg = val;
+ inst.operands[i].isreg = 1;
+ inst.operands[i].regisimm = 1;
+ inst.operands[i].present = 1;
+ }
+ }
+ else
+ {
+ inst.error = _("parse error");
+ return FAIL;
+ }
+
+ /* Successfully parsed the operands. Update args. */
+ *which_operand = i;
+ *str = ptr;
+ return SUCCESS;
+
+ wanted_comma:
+ inst.error = _("expected comma");
+ return FAIL;
+
+ wanted_arm:
+ inst.error = _(reg_expected_msgs[REG_TYPE_RN]);
+ return FAIL;
+
+ bad_cond:
+ inst.error = _("instruction cannot be conditionalized");
+ return FAIL;
+}
+
/* Matcher codes for parse_operands. */
enum operand_parse_code
{
@@ -3765,7 +4305,11 @@ enum operand_parse_code
OP_RCN, /* Coprocessor register */
OP_RF, /* FPA register */
OP_RVS, /* VFP single precision register */
- OP_RVD, /* VFP double precision register */
+ OP_RVD, /* VFP double precision register (0..15) */
+ OP_RND, /* Neon double precision register (0..31) */
+ OP_RNQ, /* Neon quad precision register */
+ OP_RNDQ, /* Neon double or quad precision register */
+ OP_RNSC, /* Neon scalar D[X] */
OP_RVC, /* VFP control register */
OP_RMF, /* Maverick F register */
OP_RMD, /* Maverick D register */
@@ -3781,14 +4325,31 @@ enum operand_parse_code
OP_REGLST, /* ARM register list */
OP_VRSLST, /* VFP single-precision register list */
OP_VRDLST, /* VFP double-precision register list */
-
+ OP_NRDLST, /* Neon double-precision register list (d0-d31, qN aliases) */
+ OP_NSTRLST, /* Neon element/structure list */
+
+ OP_NILO, /* Neon immediate/logic operands 2 or 2+3. (VBIC, VORR...) */
+ OP_RNDQ_I0, /* Neon D or Q reg, or immediate zero. */
+ OP_RR_RNSC, /* ARM reg or Neon scalar. */
+ OP_RNDQ_RNSC, /* Neon D or Q reg, or Neon scalar. */
+ OP_RND_RNSC, /* Neon D reg, or Neon scalar. */
+ OP_VMOV, /* Neon VMOV operands. */
+ OP_RNDQ_IMVNb,/* Neon D or Q reg, or immediate good for VMVN. */
+ OP_RNDQ_I63b, /* Neon D or Q reg, or immediate for shift. */
+
+ OP_I0, /* immediate zero */
OP_I7, /* immediate value 0 .. 7 */
OP_I15, /* 0 .. 15 */
OP_I16, /* 1 .. 16 */
+ OP_I16z, /* 0 .. 16 */
OP_I31, /* 0 .. 31 */
OP_I31w, /* 0 .. 31, optional trailing ! */
OP_I32, /* 1 .. 32 */
+ OP_I32z, /* 0 .. 32 */
+ OP_I63, /* 0 .. 63 */
OP_I63s, /* -64 .. 63 */
+ OP_I64, /* 1 .. 64 */
+ OP_I64z, /* 0 .. 64 */
OP_I255, /* 0 .. 255 */
OP_Iffff, /* 0 .. 65535 */
@@ -3818,11 +4379,15 @@ enum operand_parse_code
/* Optional operands. */
OP_oI7b, /* immediate, prefix optional, 0 .. 7 */
OP_oI31b, /* 0 .. 31 */
+ OP_oI32b, /* 1 .. 32 */
OP_oIffffb, /* 0 .. 65535 */
OP_oI255c, /* curly-brace enclosed, 0 .. 255 */
OP_oRR, /* ARM register */
OP_oRRnpc, /* ARM register, not the PC */
+ OP_oRND, /* Optional Neon double precision register */
+ OP_oRNQ, /* Optional Neon quad precision register */
+ OP_oRNDQ, /* Optional Neon double or quad precision register */
OP_oSHll, /* LSL immediate */
OP_oSHar, /* ASR immediate */
OP_oSHllar, /* LSL or ASR immediate */
@@ -3843,6 +4408,7 @@ parse_operands (char *str, const unsigned char *pattern)
char *backtrack_pos = 0;
const char *backtrack_error = 0;
int i, val, backtrack_index = 0;
+ enum arm_reg_type rtype;
#define po_char_or_fail(chr) do { \
if (skip_past_char (&str, chr) == FAIL) \
@@ -3850,7 +4416,7 @@ parse_operands (char *str, const unsigned char *pattern)
} while (0)
#define po_reg_or_fail(regtype) do { \
- val = arm_reg_parse (&str, regtype); \
+ val = arm_reg_parse (&str, regtype, &rtype); \
if (val == FAIL) \
{ \
inst.error = _(reg_expected_msgs[regtype]); \
@@ -3858,15 +4424,17 @@ parse_operands (char *str, const unsigned char *pattern)
} \
inst.operands[i].reg = val; \
inst.operands[i].isreg = 1; \
+ inst.operands[i].isquad = (rtype == REG_TYPE_NQ); \
} while (0)
-#define po_reg_or_goto(regtype, label) do { \
- val = arm_reg_parse (&str, regtype); \
- if (val == FAIL) \
- goto label; \
- \
- inst.operands[i].reg = val; \
- inst.operands[i].isreg = 1; \
+#define po_reg_or_goto(regtype, label) do { \
+ val = arm_reg_parse (&str, regtype, &rtype); \
+ if (val == FAIL) \
+ goto label; \
+ \
+ inst.operands[i].reg = val; \
+ inst.operands[i].isreg = 1; \
+ inst.operands[i].isquad = (rtype == REG_TYPE_NQ); \
} while (0)
#define po_imm_or_fail(min, max, popt) do { \
@@ -3875,6 +4443,14 @@ parse_operands (char *str, const unsigned char *pattern)
inst.operands[i].imm = val; \
} while (0)
+#define po_scalar_or_goto(elsz, label) do { \
+ val = parse_scalar (&str, elsz); \
+ if (val == FAIL) \
+ goto label; \
+ inst.operands[i].reg = val; \
+ inst.operands[i].isscalar = 1; \
+} while (0)
+
#define po_misc_or_fail(expr) do { \
if (expr) \
goto failure; \
@@ -3908,6 +4484,8 @@ parse_operands (char *str, const unsigned char *pattern)
case OP_RF: po_reg_or_fail (REG_TYPE_FN); break;
case OP_RVS: po_reg_or_fail (REG_TYPE_VFS); break;
case OP_RVD: po_reg_or_fail (REG_TYPE_VFD); break;
+ case OP_oRND:
+ case OP_RND: po_reg_or_fail (REG_TYPE_VFD); break;
case OP_RVC: po_reg_or_fail (REG_TYPE_VFC); break;
case OP_RMF: po_reg_or_fail (REG_TYPE_MVF); break;
case OP_RMD: po_reg_or_fail (REG_TYPE_MVD); break;
@@ -3919,6 +4497,104 @@ parse_operands (char *str, const unsigned char *pattern)
case OP_RIWC: po_reg_or_fail (REG_TYPE_MMXWC); break;
case OP_RIWG: po_reg_or_fail (REG_TYPE_MMXWCG); break;
case OP_RXA: po_reg_or_fail (REG_TYPE_XSCALE); break;
+ case OP_oRNQ:
+ case OP_RNQ: po_reg_or_fail (REG_TYPE_NQ); break;
+ case OP_oRNDQ:
+ case OP_RNDQ: po_reg_or_fail (REG_TYPE_NDQ); break;
+
+ /* Neon scalar. Using an element size of 8 means that some invalid
+ scalars are accepted here, so deal with those in later code. */
+ case OP_RNSC: po_scalar_or_goto (8, failure); break;
+
+ /* WARNING: We can expand to two operands here. This has the potential
+ to totally confuse the backtracking mechanism! It will be OK at
+ least as long as we don't try to use optional args as well,
+ though. */
+ case OP_NILO:
+ {
+ po_reg_or_goto (REG_TYPE_NDQ, try_imm);
+ i++;
+ skip_past_comma (&str);
+ po_reg_or_goto (REG_TYPE_NDQ, one_reg_only);
+ break;
+ one_reg_only:
+ /* Optional register operand was omitted. Unfortunately, it's in
+ operands[i-1] and we need it to be in inst.operands[i]. Fix that
+ here (this is a bit grotty). */
+ inst.operands[i] = inst.operands[i-1];
+ inst.operands[i-1].present = 0;
+ break;
+ try_imm:
+ /* Immediate gets verified properly later, so accept any now. */
+ po_imm_or_fail (INT_MIN, INT_MAX, TRUE);
+ }
+ break;
+
+ case OP_RNDQ_I0:
+ {
+ po_reg_or_goto (REG_TYPE_NDQ, try_imm0);
+ break;
+ try_imm0:
+ po_imm_or_fail (0, 0, TRUE);
+ }
+ break;
+
+ case OP_RR_RNSC:
+ {
+ po_scalar_or_goto (8, try_rr);
+ break;
+ try_rr:
+ po_reg_or_fail (REG_TYPE_RN);
+ }
+ break;
+
+ case OP_RNDQ_RNSC:
+ {
+ po_scalar_or_goto (8, try_ndq);
+ break;
+ try_ndq:
+ po_reg_or_fail (REG_TYPE_NDQ);
+ }
+ break;
+
+ case OP_RND_RNSC:
+ {
+ po_scalar_or_goto (8, try_vfd);
+ break;
+ try_vfd:
+ po_reg_or_fail (REG_TYPE_VFD);
+ }
+ break;
+
+ case OP_VMOV:
+ /* WARNING: parse_neon_mov can move the operand counter, i. If we're
+ not careful then bad things might happen. */
+ po_misc_or_fail (parse_neon_mov (&str, &i) == FAIL);
+ break;
+
+ case OP_RNDQ_IMVNb:
+ {
+ po_reg_or_goto (REG_TYPE_NDQ, try_mvnimm);
+ break;
+ try_mvnimm:
+ /* There's a possibility of getting a 64-bit immediate here, so
+ we need special handling. */
+ if (parse_big_immediate (&str, i) == FAIL)
+ {
+ inst.error = _("immediate value is out of range");
+ goto failure;
+ }
+ }
+ break;
+
+ case OP_RNDQ_I63b:
+ {
+ po_reg_or_goto (REG_TYPE_NDQ, try_shimm);
+ break;
+ try_shimm:
+ po_imm_or_fail (0, 63, TRUE);
+ }
+ break;
case OP_RRnpcb:
po_char_or_fail ('[');
@@ -3936,9 +4612,14 @@ parse_operands (char *str, const unsigned char *pattern)
case OP_I7: po_imm_or_fail ( 0, 7, FALSE); break;
case OP_I15: po_imm_or_fail ( 0, 15, FALSE); break;
case OP_I16: po_imm_or_fail ( 1, 16, FALSE); break;
+ case OP_I16z: po_imm_or_fail ( 0, 16, FALSE); break;
case OP_I31: po_imm_or_fail ( 0, 31, FALSE); break;
case OP_I32: po_imm_or_fail ( 1, 32, FALSE); break;
+ case OP_I32z: po_imm_or_fail ( 0, 32, FALSE); break;
case OP_I63s: po_imm_or_fail (-64, 63, FALSE); break;
+ case OP_I63: po_imm_or_fail ( 0, 63, FALSE); break;
+ case OP_I64: po_imm_or_fail ( 1, 64, FALSE); break;
+ case OP_I64z: po_imm_or_fail ( 0, 64, FALSE); break;
case OP_I255: po_imm_or_fail ( 0, 255, FALSE); break;
case OP_Iffff: po_imm_or_fail ( 0, 0xffff, FALSE); break;
@@ -3948,6 +4629,7 @@ parse_operands (char *str, const unsigned char *pattern)
case OP_I15b: po_imm_or_fail ( 0, 15, TRUE); break;
case OP_oI31b:
case OP_I31b: po_imm_or_fail ( 0, 31, TRUE); break;
+ case OP_oI32b: po_imm_or_fail ( 1, 32, TRUE); break;
case OP_oIffffb: po_imm_or_fail ( 0, 0xffff, TRUE); break;
/* Immediate variants */
@@ -4066,13 +4748,22 @@ parse_operands (char *str, const unsigned char *pattern)
break;
case OP_VRSLST:
- val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 0);
+ val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_S);
break;
case OP_VRDLST:
- val = parse_vfp_reg_list (&str, &inst.operands[i].reg, 1);
+ val = parse_vfp_reg_list (&str, &inst.operands[i].reg, REGLIST_VFP_D);
break;
+ case OP_NRDLST:
+ val = parse_vfp_reg_list (&str, &inst.operands[i].reg,
+ REGLIST_NEON_D);
+ break;
+
+ case OP_NSTRLST:
+ val = parse_neon_el_struct_list (&str, &inst.operands[i].reg);
+ break;
+
/* Addressing modes */
case OP_ADDR:
po_misc_or_fail (parse_address (&str, i));
@@ -4121,6 +4812,8 @@ parse_operands (char *str, const unsigned char *pattern)
case OP_REGLST:
case OP_VRSLST:
case OP_VRDLST:
+ case OP_NRDLST:
+ case OP_NSTRLST:
if (val == FAIL)
goto failure;
inst.operands[i].imm = val;
@@ -4178,6 +4871,7 @@ parse_operands (char *str, const unsigned char *pattern)
#undef po_reg_or_fail
#undef po_reg_or_goto
#undef po_imm_or_fail
+#undef po_scalar_or_fail
/* Shorthand macro for instruction encoding functions issuing errors. */
#define constraint(expr, err) do { \
@@ -4236,11 +4930,30 @@ encode_thumb32_immediate (unsigned int val)
return FAIL;
}
-/* Encode a VFP SP register number into inst.instruction. */
+/* Encode a VFP SP or DP register number into inst.instruction. */
static void
-encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos)
-{
+encode_arm_vfp_reg (int reg, enum vfp_reg_pos pos)
+{
+ if ((pos == VFP_REG_Dd || pos == VFP_REG_Dn || pos == VFP_REG_Dm)
+ && reg > 15)
+ {
+ if (ARM_CPU_HAS_FEATURE (cpu_variant, fpu_vfp_ext_v3))
+ {
+ if (thumb_mode)
+ ARM_MERGE_FEATURE_SETS (thumb_arch_used, thumb_arch_used,
+ fpu_vfp_ext_v3);
+ else
+ ARM_MERGE_FEATURE_SETS (arm_arch_used, arm_arch_used,
+ fpu_vfp_ext_v3);
+ }
+ else
+ {
+ inst.error = _("D register out of range for selected VFP version");
+ return;
+ }
+ }
+
switch (pos)
{
case VFP_REG_Sd:
@@ -4255,6 +4968,18 @@ encode_arm_vfp_sp_reg (int reg, enum vfp_sp_reg_pos pos)
inst.instruction |= ((reg >> 1) << 0) | ((reg & 1) << 5);
break;
+ case VFP_REG_Dd:
+ inst.instruction |= ((reg & 15) << 12) | ((reg >> 4) << 22);
+ break;
+
+ case VFP_REG_Dn:
+ inst.instruction |= ((reg & 15) << 16) | ((reg >> 4) << 7);
+ break;
+
+ case VFP_REG_Dm:
+ inst.instruction |= (reg & 15) | ((reg >> 4) << 5);
+ break;
+
default:
abort ();
}
@@ -5542,43 +6267,43 @@ do_sxth (void)
static void
do_vfp_sp_monadic (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
- encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
}
static void
do_vfp_sp_dyadic (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
- encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn);
- encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn);
+ encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm);
}
static void
do_vfp_sp_compare_z (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
}
static void
do_vfp_dp_sp_cvt (void)
{
- inst.instruction |= inst.operands[0].reg << 12;
- encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sm);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sm);
}
static void
do_vfp_sp_dp_cvt (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
- inst.instruction |= inst.operands[1].reg;
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm);
}
static void
do_vfp_reg_from_sp (void)
{
inst.instruction |= inst.operands[0].reg << 12;
- encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sn);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sn);
}
static void
@@ -5588,13 +6313,13 @@ do_vfp_reg2_from_sp2 (void)
_("only two consecutive VFP SP registers allowed here"));
inst.instruction |= inst.operands[0].reg << 12;
inst.instruction |= inst.operands[1].reg << 16;
- encode_arm_vfp_sp_reg (inst.operands[2].reg, VFP_REG_Sm);
+ encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Sm);
}
static void
do_vfp_sp_from_reg (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sn);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sn);
inst.instruction |= inst.operands[1].reg << 12;
}
@@ -5603,7 +6328,7 @@ do_vfp_sp2_from_reg2 (void)
{
constraint (inst.operands[0].imm != 2,
_("only two consecutive VFP SP registers allowed here"));
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sm);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sm);
inst.instruction |= inst.operands[1].reg << 12;
inst.instruction |= inst.operands[2].reg << 16;
}
@@ -5611,14 +6336,14 @@ do_vfp_sp2_from_reg2 (void)
static void
do_vfp_sp_ldst (void)
{
- encode_arm_vfp_sp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
encode_arm_cp_address (1, FALSE, TRUE, 0);
}
static void
do_vfp_dp_ldst (void)
{
- inst.instruction |= inst.operands[0].reg << 12;
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
encode_arm_cp_address (1, FALSE, TRUE, 0);
}
@@ -5632,7 +6357,7 @@ vfp_sp_ldstm (enum vfp_ldstm_type ldstm_type)
constraint (ldstm_type != VFP_LDSTMIA,
_("this addressing mode requires base-register writeback"));
inst.instruction |= inst.operands[0].reg << 16;
- encode_arm_vfp_sp_reg (inst.operands[1].reg, VFP_REG_Sd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Sd);
inst.instruction |= inst.operands[1].imm;
}
@@ -5648,7 +6373,7 @@ vfp_dp_ldstm (enum vfp_ldstm_type ldstm_type)
_("this addressing mode requires base-register writeback"));
inst.instruction |= inst.operands[0].reg << 16;
- inst.instruction |= inst.operands[1].reg << 12;
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
count = inst.operands[1].imm << 1;
if (ldstm_type == VFP_LDSTMIAX || ldstm_type == VFP_LDSTMDBX)
@@ -5692,6 +6417,103 @@ do_vfp_xp_ldstmdb (void)
{
vfp_dp_ldstm (VFP_LDSTMDBX);
}
+
+static void
+do_vfp_dp_rd_rm (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dm);
+}
+
+static void
+do_vfp_dp_rn_rd (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dn);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
+}
+
+static void
+do_vfp_dp_rd_rn (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn);
+}
+
+static void
+do_vfp_dp_rd_rn_rm (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dn);
+ encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dm);
+}
+
+static void
+do_vfp_dp_rd (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+}
+
+static void
+do_vfp_dp_rm_rd_rn (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dm);
+ encode_arm_vfp_reg (inst.operands[1].reg, VFP_REG_Dd);
+ encode_arm_vfp_reg (inst.operands[2].reg, VFP_REG_Dn);
+}
+
+/* VFPv3 instructions. */
+static void
+do_vfp_sp_const (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ inst.instruction |= (inst.operands[1].imm & 15) << 16;
+ inst.instruction |= (inst.operands[1].imm >> 4);
+}
+
+static void
+do_vfp_dp_const (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ inst.instruction |= (inst.operands[1].imm & 15) << 16;
+ inst.instruction |= (inst.operands[1].imm >> 4);
+}
+
+static void
+vfp_conv (int srcsize)
+{
+ unsigned immbits = srcsize - inst.operands[1].imm;
+ inst.instruction |= (immbits & 1) << 5;
+ inst.instruction |= (immbits >> 1);
+}
+
+static void
+do_vfp_sp_conv_16 (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ vfp_conv (16);
+}
+
+static void
+do_vfp_dp_conv_16 (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ vfp_conv (16);
+}
+
+static void
+do_vfp_sp_conv_32 (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Sd);
+ vfp_conv (32);
+}
+
+static void
+do_vfp_dp_conv_32 (void)
+{
+ encode_arm_vfp_reg (inst.operands[0].reg, VFP_REG_Dd);
+ vfp_conv (32);
+}
+
/* FPA instructions. Also in a logical order. */
@@ -7998,6 +8820,2549 @@ do_t_usat16 (void)
inst.instruction |= inst.operands[1].imm;
inst.instruction |= inst.operands[2].reg << 16;
}
+
+/* Neon instruction encoder helpers. */
+
+/* Encodings for the different types for various Neon opcodes. */
+
+/* An "invalid" code for the following tables. */
+#define N_INV -1u
+
+struct neon_tab_entry
+{
+ unsigned integer;
+ unsigned float_or_poly;
+ unsigned scalar_or_imm;
+};
+
+/* Map overloaded Neon opcodes to their respective encodings. */
+#define NEON_ENC_TAB \
+ X(vabd, 0x0000700, 0x1200d00, N_INV), \
+ X(vmax, 0x0000600, 0x0000f00, N_INV), \
+ X(vmin, 0x0000610, 0x0200f00, N_INV), \
+ X(vpadd, 0x0000b10, 0x1000d00, N_INV), \
+ X(vpmax, 0x0000a00, 0x1000f00, N_INV), \
+ X(vpmin, 0x0000a10, 0x1200f00, N_INV), \
+ X(vadd, 0x0000800, 0x0000d00, N_INV), \
+ X(vsub, 0x1000800, 0x0200d00, N_INV), \
+ X(vceq, 0x1000810, 0x0000e00, 0x1b10100), \
+ X(vcge, 0x0000310, 0x1000e00, 0x1b10080), \
+ X(vcgt, 0x0000300, 0x1200e00, 0x1b10000), \
+ /* Register variants of the following two instructions are encoded as
+ vcge / vcgt with the operands reversed. */ \
+ X(vclt, 0x0000310, 0x1000e00, 0x1b10200), \
+ X(vcle, 0x0000300, 0x1200e00, 0x1b10180), \
+ X(vmla, 0x0000900, 0x0000d10, 0x0800040), \
+ X(vmls, 0x1000900, 0x0200d10, 0x0800440), \
+ X(vmul, 0x0000910, 0x1000d10, 0x0800840), \
+ X(vmull, 0x0800c00, 0x0800e00, 0x0800a40), /* polynomial not float. */ \
+ X(vmlal, 0x0800800, N_INV, 0x0800240), \
+ X(vmlsl, 0x0800a00, N_INV, 0x0800640), \
+ X(vqdmlal, 0x0800900, N_INV, 0x0800340), \
+ X(vqdmlsl, 0x0800b00, N_INV, 0x0800740), \
+ X(vqdmull, 0x0800d00, N_INV, 0x0800b40), \
+ X(vqdmulh, 0x0000b00, N_INV, 0x0800c40), \
+ X(vqrdmulh, 0x1000b00, N_INV, 0x0800d40), \
+ X(vshl, 0x0000400, N_INV, 0x0800510), \
+ X(vqshl, 0x0000410, N_INV, 0x0800710), \
+ X(vand, 0x0000110, N_INV, 0x0800030), \
+ X(vbic, 0x0100110, N_INV, 0x0800030), \
+ X(veor, 0x1000110, N_INV, N_INV), \
+ X(vorn, 0x0300110, N_INV, 0x0800010), \
+ X(vorr, 0x0200110, N_INV, 0x0800010), \
+ X(vmvn, 0x1b00580, N_INV, 0x0800030), \
+ X(vshll, 0x1b20300, N_INV, 0x0800a10), /* max shift, immediate. */ \
+ X(vcvt, 0x1b30600, N_INV, 0x0800e10), /* integer, fixed-point. */ \
+ X(vdup, 0xe800b10, N_INV, 0x1b00c00), /* arm, scalar. */ \
+ X(vld1, 0x0200000, 0x0a00000, 0x0a00c00), /* interlv, lane, dup. */ \
+ X(vst1, 0x0000000, 0x0800000, N_INV), \
+ X(vld2, 0x0200100, 0x0a00100, 0x0a00d00), \
+ X(vst2, 0x0000100, 0x0800100, N_INV), \
+ X(vld3, 0x0200200, 0x0a00200, 0x0a00e00), \
+ X(vst3, 0x0000200, 0x0800200, N_INV), \
+ X(vld4, 0x0200300, 0x0a00300, 0x0a00f00), \
+ X(vst4, 0x0000300, 0x0800300, N_INV), \
+ X(vmovn, 0x1b20200, N_INV, N_INV), \
+ X(vtrn, 0x1b20080, N_INV, N_INV), \
+ X(vqmovn, 0x1b20200, N_INV, N_INV), \
+ X(vqmovun, 0x1b20240, N_INV, N_INV)
+
+enum neon_opc
+{
+#define X(OPC,I,F,S) N_MNEM_##OPC
+NEON_ENC_TAB
+#undef X
+};
+
+static const struct neon_tab_entry neon_enc_tab[] =
+{
+#define X(OPC,I,F,S) { (I), (F), (S) }
+NEON_ENC_TAB
+#undef X
+};
+
+#define NEON_ENC_INTEGER(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_ARMREG(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_POLY(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_FLOAT(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_SCALAR(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_IMMED(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+#define NEON_ENC_INTERLV(X) (neon_enc_tab[(X) & 0x0fffffff].integer)
+#define NEON_ENC_LANE(X) (neon_enc_tab[(X) & 0x0fffffff].float_or_poly)
+#define NEON_ENC_DUP(X) (neon_enc_tab[(X) & 0x0fffffff].scalar_or_imm)
+
+/* Shapes for instruction operands. Some (e.g. NS_DDD_QQQ) represent multiple
+ shapes which an instruction can accept. The following mnemonic characters
+ are used in the tag names for this enumeration:
+
+ D - Neon D<n> register
+ Q - Neon Q<n> register
+ I - Immediate
+ S - Scalar
+ R - ARM register
+ L - D<n> register list
+*/
+
+enum neon_shape
+{
+ NS_DDD_QQQ,
+ NS_DDD,
+ NS_QQQ,
+ NS_DDI_QQI,
+ NS_DDI,
+ NS_QQI,
+ NS_DDS_QQS,
+ NS_DDS,
+ NS_QQS,
+ NS_DD_QQ,
+ NS_DD,
+ NS_QQ,
+ NS_DS_QS,
+ NS_DS,
+ NS_QS,
+ NS_DR_QR,
+ NS_DR,
+ NS_QR,
+ NS_DI_QI,
+ NS_DI,
+ NS_QI,
+ NS_DLD,
+ NS_DQ,
+ NS_QD,
+ NS_DQI,
+ NS_QDI,
+ NS_QDD,
+ NS_QDS,
+ NS_QQD,
+ NS_DQQ,
+ NS_DDDI_QQQI,
+ NS_DDDI,
+ NS_QQQI,
+ NS_IGNORE
+};
+
+/* Bit masks used in type checking given instructions.
+ 'N_EQK' means the type must be the same as (or based on in some way) the key
+ type, which itself is marked with the 'N_KEY' bit. If the 'N_EQK' bit is
+ set, various other bits can be set as well in order to modify the meaning of
+ the type constraint. */
+
+enum neon_type_mask
+{
+ N_S8 = 0x000001,
+ N_S16 = 0x000002,
+ N_S32 = 0x000004,
+ N_S64 = 0x000008,
+ N_U8 = 0x000010,
+ N_U16 = 0x000020,
+ N_U32 = 0x000040,
+ N_U64 = 0x000080,
+ N_I8 = 0x000100,
+ N_I16 = 0x000200,
+ N_I32 = 0x000400,
+ N_I64 = 0x000800,
+ N_8 = 0x001000,
+ N_16 = 0x002000,
+ N_32 = 0x004000,
+ N_64 = 0x008000,
+ N_P8 = 0x010000,
+ N_P16 = 0x020000,
+ N_F32 = 0x040000,
+ N_KEY = 0x080000, /* key element (main type specifier). */
+ N_EQK = 0x100000, /* given operand has the same type & size as the key. */
+ N_DBL = 0x000001, /* if N_EQK, this operand is twice the size. */
+ N_HLF = 0x000002, /* if N_EQK, this operand is half the size. */
+ N_SGN = 0x000004, /* if N_EQK, this operand is forced to be signed. */
+ N_UNS = 0x000008, /* if N_EQK, this operand is forced to be unsigned. */
+ N_INT = 0x000010, /* if N_EQK, this operand is forced to be integer. */
+ N_FLT = 0x000020, /* if N_EQK, this operand is forced to be float. */
+ N_UTYP = 0,
+ N_MAX_NONSPECIAL = N_F32
+};
+
+#define N_SU_ALL (N_S8 | N_S16 | N_S32 | N_S64 | N_U8 | N_U16 | N_U32 | N_U64)
+#define N_SU_32 (N_S8 | N_S16 | N_S32 | N_U8 | N_U16 | N_U32)
+#define N_SU_16_64 (N_S16 | N_S32 | N_S64 | N_U16 | N_U32 | N_U64)
+#define N_SUF_32 (N_SU_32 | N_F32)
+#define N_I_ALL (N_I8 | N_I16 | N_I32 | N_I64)
+#define N_IF_32 (N_I8 | N_I16 | N_I32 | N_F32)
+
+/* Pass this as the first type argument to neon_check_type to ignore types
+ altogether. */
+#define N_IGNORE_TYPE (N_KEY | N_EQK)
+
+/* Check the shape of a Neon instruction (sizes of registers). Returns the more
+ specific shape when there are two alternatives. For non-polymorphic shapes,
+ checking is done during operand parsing, so is not implemented here. */
+
+static enum neon_shape
+neon_check_shape (enum neon_shape req)
+{
+#define RR(X) (inst.operands[(X)].isreg)
+#define RD(X) (inst.operands[(X)].isreg && !inst.operands[(X)].isquad)
+#define RQ(X) (inst.operands[(X)].isreg && inst.operands[(X)].isquad)
+#define IM(X) (!inst.operands[(X)].isreg && !inst.operands[(X)].isscalar)
+#define SC(X) (!inst.operands[(X)].isreg && inst.operands[(X)].isscalar)
+
+ /* Fix missing optional operands. FIXME: we don't know at this point how
+ many arguments we should have, so this makes the assumption that we have
+ > 1. This is true of all current Neon opcodes, I think, but may not be
+ true in the future. */
+ if (!inst.operands[1].present)
+ inst.operands[1] = inst.operands[0];
+
+ switch (req)
+ {
+ case NS_DDD_QQQ:
+ {
+ if (RD(0) && RD(1) && RD(2))
+ return NS_DDD;
+ else if (RQ(0) && RQ(1) && RQ(1))
+ return NS_QQQ;
+ else
+ inst.error = _("expected <Qd>, <Qn>, <Qm> or <Dd>, <Dn>, <Dm> "
+ "operands");
+ }
+ break;
+
+ case NS_DDI_QQI:
+ {
+ if (RD(0) && RD(1) && IM(2))
+ return NS_DDI;
+ else if (RQ(0) && RQ(1) && IM(2))
+ return NS_QQI;
+ else
+ inst.error = _("expected <Qd>, <Qn>, #<imm> or <Dd>, <Dn>, #<imm> "
+ "operands");
+ }
+ break;
+
+ case NS_DDDI_QQQI:
+ {
+ if (RD(0) && RD(1) && RD(2) && IM(3))
+ return NS_DDDI;
+ if (RQ(0) && RQ(1) && RQ(2) && IM(3))
+ return NS_QQQI;
+ else
+ inst.error = _("expected <Qd>, <Qn>, <Qm>, #<imm> or "
+ "<Dd>, <Dn>, <Dm>, #<imm> operands");
+ }
+ break;
+
+ case NS_DDS_QQS:
+ {
+ if (RD(0) && RD(1) && SC(2))
+ return NS_DDS;
+ else if (RQ(0) && RQ(1) && SC(2))
+ return NS_QQS;
+ else
+ inst.error = _("expected <Qd>, <Qn>, <Dm[x]> or <Dd>, <Dn>, <Dm[x]> "
+ "operands");
+ }
+ break;
+
+ case NS_DD_QQ:
+ {
+ if (RD(0) && RD(1))
+ return NS_DD;
+ else if (RQ(0) && RQ(1))
+ return NS_QQ;
+ else
+ inst.error = _("expected <Qd>, <Qm> or <Dd>, <Dm> operands");
+ }
+ break;
+
+ case NS_DS_QS:
+ {
+ if (RD(0) && SC(1))
+ return NS_DS;
+ else if (RQ(0) && SC(1))
+ return NS_QS;
+ else
+ inst.error = _("expected <Qd>, <Dm[x]> or <Dd>, <Dm[x]> operands");
+ }
+ break;
+
+ case NS_DR_QR:
+ {
+ if (RD(0) && RR(1))
+ return NS_DR;
+ else if (RQ(0) && RR(1))
+ return NS_QR;
+ else
+ inst.error = _("expected <Qd>, <Rm> or <Dd>, <Rm> operands");
+ }
+ break;
+
+ case NS_DI_QI:
+ {
+ if (RD(0) && IM(1))
+ return NS_DI;
+ else if (RQ(0) && IM(1))
+ return NS_QI;
+ else
+ inst.error = _("expected <Qd>, #<imm> or <Dd>, #<imm> operands");
+ }
+ break;
+
+ default:
+ abort ();
+ }
+
+ return req;
+#undef RR
+#undef RD
+#undef RQ
+#undef IM
+#undef SC
+}
+
+static void
+neon_modify_type_size (unsigned typebits, enum neon_el_type *g_type,
+ unsigned *g_size)
+{
+ /* Allow modification to be made to types which are constrained to be
+ based on the key element, based on bits set alongside N_EQK. */
+ if ((typebits & N_EQK) != 0)
+ {
+ if ((typebits & N_HLF) != 0)
+ *g_size /= 2;
+ else if ((typebits & N_DBL) != 0)
+ *g_size *= 2;
+ if ((typebits & N_SGN) != 0)
+ *g_type = NT_signed;
+ else if ((typebits & N_UNS) != 0)
+ *g_type = NT_unsigned;
+ else if ((typebits & N_INT) != 0)
+ *g_type = NT_integer;
+ else if ((typebits & N_FLT) != 0)
+ *g_type = NT_float;
+ }
+}
+
+/* Return operand OPNO promoted by bits set in THISARG. KEY should be the "key"
+ operand type, i.e. the single type specified in a Neon instruction when it
+ is the only one given. */
+
+static struct neon_type_el
+neon_type_promote (struct neon_type_el *key, unsigned thisarg)
+{
+ struct neon_type_el dest = *key;
+
+ assert ((thisarg & N_EQK) != 0);
+
+ neon_modify_type_size (thisarg, &dest.type, &dest.size);
+
+ return dest;
+}
+
+/* Convert Neon type and size into compact bitmask representation. */
+
+static enum neon_type_mask
+type_chk_of_el_type (enum neon_el_type type, unsigned size)
+{
+ switch (type)
+ {
+ case NT_untyped:
+ switch (size)
+ {
+ case 8: return N_8;
+ case 16: return N_16;
+ case 32: return N_32;
+ case 64: return N_64;
+ default: ;
+ }
+ break;
+
+ case NT_integer:
+ switch (size)
+ {
+ case 8: return N_I8;
+ case 16: return N_I16;
+ case 32: return N_I32;
+ case 64: return N_I64;
+ default: ;
+ }
+ break;
+
+ case NT_float:
+ if (size == 32)
+ return N_F32;
+ break;
+
+ case NT_poly:
+ switch (size)
+ {
+ case 8: return N_P8;
+ case 16: return N_P16;
+ default: ;
+ }
+ break;
+
+ case NT_signed:
+ switch (size)
+ {
+ case 8: return N_S8;
+ case 16: return N_S16;
+ case 32: return N_S32;
+ case 64: return N_S64;
+ default: ;
+ }
+ break;
+
+ case NT_unsigned:
+ switch (size)
+ {
+ case 8: return N_U8;
+ case 16: return N_U16;
+ case 32: return N_U32;
+ case 64: return N_U64;
+ default: ;
+ }
+ break;
+
+ default: ;
+ }
+
+ return N_UTYP;
+}
+
+/* Convert compact Neon bitmask type representation to a type and size. Only
+ handles the case where a single bit is set in the mask. */
+
+static void
+el_type_of_type_chk (enum neon_el_type *type, unsigned *size,
+ enum neon_type_mask mask)
+{
+ if ((mask & (N_S8 | N_U8 | N_I8 | N_8 | N_P8)) != 0)
+ *size = 8;
+ if ((mask & (N_S16 | N_U16 | N_I16 | N_16 | N_P16)) != 0)
+ *size = 16;
+ if ((mask & (N_S32 | N_U32 | N_I32 | N_32 | N_F32)) != 0)
+ *size = 32;
+ if ((mask & (N_S64 | N_U64 | N_I64 | N_64)) != 0)
+ *size = 64;
+ if ((mask & (N_S8 | N_S16 | N_S32 | N_S64)) != 0)
+ *type = NT_signed;
+ if ((mask & (N_U8 | N_U16 | N_U32 | N_U64)) != 0)
+ *type = NT_unsigned;
+ if ((mask & (N_I8 | N_I16 | N_I32 | N_I64)) != 0)
+ *type = NT_integer;
+ if ((mask & (N_8 | N_16 | N_32 | N_64)) != 0)
+ *type = NT_untyped;
+ if ((mask & (N_P8 | N_P16)) != 0)
+ *type = NT_poly;
+ if ((mask & N_F32) != 0)
+ *type = NT_float;
+}
+
+/* Modify a bitmask of allowed types. This is only needed for type
+ relaxation. */
+
+static unsigned
+modify_types_allowed (unsigned allowed, unsigned mods)
+{
+ unsigned size;
+ enum neon_el_type type;
+ unsigned destmask;
+ int i;
+
+ destmask = 0;
+
+ for (i = 1; i <= N_MAX_NONSPECIAL; i <<= 1)
+ {
+ el_type_of_type_chk (&type, &size, allowed & i);
+ neon_modify_type_size (mods, &type, &size);
+ destmask |= type_chk_of_el_type (type, size);
+ }
+
+ return destmask;
+}
+
+/* Check type and return type classification.
+ The manual states (paraphrase): If one datatype is given, it indicates the
+ type given in:
+ - the second operand, if there is one
+ - the operand, if there is no second operand
+ - the result, if there are no operands.
+ This isn't quite good enough though, so we use a concept of a "key" datatype
+ which is set on a per-instruction basis, which is the one which matters when
+ only one data type is written.
+ Note: this function has side-effects (e.g. filling in missing operands). All
+ Neon instructions should call it before performing bit encoding.
+*/
+
+static struct neon_type_el
+neon_check_type (unsigned els, enum neon_shape ns, ...)
+{
+ va_list ap;
+ unsigned i, pass, key_el = 0;
+ unsigned types[NEON_MAX_TYPE_ELS];
+ enum neon_el_type k_type = NT_invtype;
+ unsigned k_size = -1u;
+ struct neon_type_el badtype = {NT_invtype, -1};
+ unsigned key_allowed = 0;
+
+ /* Optional registers in Neon instructions are always (not) in operand 1.
+ Fill in the missing operand here, if it was omitted. */
+ if (els > 1 && !inst.operands[1].present)
+ inst.operands[1] = inst.operands[0];
+
+ /* Suck up all the varargs. */
+ va_start (ap, ns);
+ for (i = 0; i < els; i++)
+ {
+ unsigned thisarg = va_arg (ap, unsigned);
+ if (thisarg == N_IGNORE_TYPE)
+ {
+ va_end (ap);
+ return badtype;
+ }
+ types[i] = thisarg;
+ if ((thisarg & N_KEY) != 0)
+ key_el = i;
+ }
+ va_end (ap);
+
+ /* Duplicate inst.vectype elements here as necessary.
+ FIXME: No idea if this is exactly the same as the ARM assembler,
+ particularly when an insn takes one register and one non-register
+ operand. */
+ if (inst.vectype.elems == 1 && els > 1)
+ {
+ unsigned j;
+ inst.vectype.elems = els;
+ inst.vectype.el[key_el] = inst.vectype.el[0];
+ for (j = 0; j < els; j++)
+ {
+ if (j != key_el)
+ inst.vectype.el[j] = neon_type_promote (&inst.vectype.el[key_el],
+ types[j]);
+ }
+ }
+ else if (inst.vectype.elems != els)
+ {
+ inst.error = _("type specifier has the wrong number of parts");
+ return badtype;
+ }
+
+ for (pass = 0; pass < 2; pass++)
+ {
+ for (i = 0; i < els; i++)
+ {
+ unsigned thisarg = types[i];
+ unsigned types_allowed = ((thisarg & N_EQK) != 0 && pass != 0)
+ ? modify_types_allowed (key_allowed, thisarg) : thisarg;
+ enum neon_el_type g_type = inst.vectype.el[i].type;
+ unsigned g_size = inst.vectype.el[i].size;
+
+ /* Decay more-specific signed & unsigned types to sign-insensitive
+ integer types if sign-specific variants are unavailable. */
+ if ((g_type == NT_signed || g_type == NT_unsigned)
+ && (types_allowed & N_SU_ALL) == 0)
+ g_type = NT_integer;
+
+ /* If only untyped args are allowed, decay any more specific types to
+ them. Some instructions only care about signs for some element
+ sizes, so handle that properly. */
+ if ((g_size == 8 && (types_allowed & N_8) != 0)
+ || (g_size == 16 && (types_allowed & N_16) != 0)
+ || (g_size == 32 && (types_allowed & N_32) != 0)
+ || (g_size == 64 && (types_allowed & N_64) != 0))
+ g_type = NT_untyped;
+
+ if (pass == 0)
+ {
+ if ((thisarg & N_KEY) != 0)
+ {
+ k_type = g_type;
+ k_size = g_size;
+ key_allowed = thisarg & ~N_KEY;
+ }
+ }
+ else
+ {
+ if ((thisarg & N_EQK) == 0)
+ {
+ unsigned given_type = type_chk_of_el_type (g_type, g_size);
+
+ if ((given_type & types_allowed) == 0)
+ {
+ inst.error = _("bad type in Neon instruction");
+ return badtype;
+ }
+ }
+ else
+ {
+ enum neon_el_type mod_k_type = k_type;
+ unsigned mod_k_size = k_size;
+ neon_modify_type_size (thisarg, &mod_k_type, &mod_k_size);
+ if (g_type != mod_k_type || g_size != mod_k_size)
+ {
+ inst.error = _("inconsistent types in Neon instruction");
+ return badtype;
+ }
+ }
+ }
+ }
+ }
+
+ return inst.vectype.el[key_el];
+}
+
+/* Fix up Neon data-processing instructions, ORing in the correct bits for
+ ARM mode or Thumb mode and moving the encoded bit 24 to bit 28. */
+
+static unsigned
+neon_dp_fixup (unsigned i)
+{
+ if (thumb_mode)
+ {
+ /* The U bit is at bit 24 by default. Move to bit 28 in Thumb mode. */
+ if (i & (1 << 24))
+ i |= 1 << 28;
+
+ i &= ~(1 << 24);
+
+ i |= 0xef000000;
+ }
+ else
+ i |= 0xf2000000;
+
+ return i;
+}
+
+/* Turn a size (8, 16, 32, 64) into the respective bit number minus 3
+ (0, 1, 2, 3). */
+
+static unsigned
+neon_logbits (unsigned x)
+{
+ return ffs (x) - 4;
+}
+
+#define LOW4(R) ((R) & 0xf)
+#define HI1(R) (((R) >> 4) & 1)
+
+/* Encode insns with bit pattern:
+
+ |28/24|23|22 |21 20|19 16|15 12|11 8|7|6|5|4|3 0|
+ | U |x |D |size | Rn | Rd |x x x x|N|Q|M|x| Rm |
+
+ SIZE is passed in bits. -1 means size field isn't changed, in case it has a
+ different meaning for some instruction. */
+
+static void
+neon_three_same (int first_optional, int isquad, int ubit, int size)
+{
+ /* FIXME optional argument handling. */
+ if (first_optional && !inst.operands[0].present)
+ inst.operands[0].reg = inst.operands[1].reg;
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (isquad != 0) << 6;
+ inst.instruction |= (ubit != 0) << 24;
+ if (size != -1)
+ inst.instruction |= neon_logbits (size) << 20;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of the form:
+
+ |28/24|23|22|21 20|19 18|17 16|15 12|11 7|6|5|4|3 0|
+ | U |x |D |x x |size |x x | Rd |x x x x x|Q|M|x| Rm |
+
+ Don't write size if SIZE == -1. */
+
+static void
+neon_two_same (int qbit, int ubit, int size)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (qbit != 0) << 6;
+ inst.instruction |= (ubit != 0) << 24;
+
+ if (size != -1)
+ inst.instruction |= neon_logbits (size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Neon instruction encoders, in approximate order of appearance. */
+
+static void
+do_neon_dyadic_i_su (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_SU_32 | N_KEY);
+ neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+}
+
+static void
+do_neon_dyadic_i64_su (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_SU_ALL | N_KEY);
+ neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+}
+
+static void
+neon_imm_shift (int write_ubit, int uval, int isquad, struct neon_type_el et,
+ unsigned immbits)
+{
+ unsigned size = et.size >> 3;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (isquad != 0) << 6;
+ inst.instruction |= immbits << 16;
+ inst.instruction |= (size >> 3) << 7;
+ inst.instruction |= (size & 0x7) << 19;
+ if (write_ubit)
+ inst.instruction |= (uval != 0) << 24;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_shl_imm (void)
+{
+ if (!inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_KEY | N_I_ALL);
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (FALSE, 0, rs == NS_QQI, et, inst.operands[2].imm);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+ }
+}
+
+static void
+do_neon_qshl_imm (void)
+{
+ if (!inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et,
+ inst.operands[2].imm);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_SU_ALL | N_KEY, N_EQK | N_SGN);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (TRUE, rs == NS_QQQ, et.type == NT_unsigned, et.size);
+ }
+}
+
+static int
+neon_cmode_for_logic_imm (unsigned immediate, unsigned *immbits, int size)
+{
+ /* Handle .I8 and .I64 as pseudo-instructions. */
+ switch (size)
+ {
+ case 8:
+ /* Unfortunately, this will make everything apart from zero out-of-range.
+ FIXME is this the intended semantics? There doesn't seem much point in
+ accepting .I8 if so. */
+ immediate |= immediate << 8;
+ size = 16;
+ break;
+ case 64:
+ /* Similarly, anything other than zero will be replicated in bits [63:32],
+ which probably isn't want we want if we specified .I64. */
+ if (immediate != 0)
+ goto bad_immediate;
+ size = 32;
+ break;
+ default: ;
+ }
+
+ if (immediate == (immediate & 0x000000ff))
+ {
+ *immbits = immediate;
+ return (size == 16) ? 0x9 : 0x1;
+ }
+ else if (immediate == (immediate & 0x0000ff00))
+ {
+ *immbits = immediate >> 8;
+ return (size == 16) ? 0xb : 0x3;
+ }
+ else if (immediate == (immediate & 0x00ff0000))
+ {
+ *immbits = immediate >> 16;
+ return 0x5;
+ }
+ else if (immediate == (immediate & 0xff000000))
+ {
+ *immbits = immediate >> 24;
+ return 0x7;
+ }
+
+ bad_immediate:
+ inst.error = _("immediate value out of range");
+ return FAIL;
+}
+
+/* True if IMM has form 0bAAAAAAAABBBBBBBBCCCCCCCCDDDDDDDD for bits
+ A, B, C, D. */
+
+static int
+neon_bits_same_in_bytes (unsigned imm)
+{
+ return ((imm & 0x000000ff) == 0 || (imm & 0x000000ff) == 0x000000ff)
+ && ((imm & 0x0000ff00) == 0 || (imm & 0x0000ff00) == 0x0000ff00)
+ && ((imm & 0x00ff0000) == 0 || (imm & 0x00ff0000) == 0x00ff0000)
+ && ((imm & 0xff000000) == 0 || (imm & 0xff000000) == 0xff000000);
+}
+
+/* For immediate of above form, return 0bABCD. */
+
+static unsigned
+neon_squash_bits (unsigned imm)
+{
+ return (imm & 0x01) | ((imm & 0x0100) >> 7) | ((imm & 0x010000) >> 14)
+ | ((imm & 0x01000000) >> 21);
+}
+
+/* Returns 1 if a number has "quarter-precision" float format
+ 0baBbbbbbc defgh000 00000000 00000000. */
+
+static int
+neon_is_quarter_float (unsigned imm)
+{
+ int b = (imm & 0x20000000) != 0;
+ int bs = (b << 25) | (b << 26) | (b << 27) | (b << 28) | (b << 29)
+ | ((!b) << 30);
+ return (imm & 0x81ffffff) == (imm & 0x81f80000)
+ && ((imm & 0x7e000000) ^ bs) == 0;
+}
+
+/* Compress above representation to 0b...000 abcdefgh. */
+
+static unsigned
+neon_qfloat_bits (unsigned imm)
+{
+ return ((imm >> 19) & 0x7f) | (imm >> 24);
+}
+
+/* Returns CMODE. IMMBITS [7:0] is set to bits suitable for inserting into
+ the instruction. *OP is passed as the initial value of the op field, and
+ may be set to a different value depending on the constant (i.e.
+ "MOV I64, 0bAAAAAAAABBBB..." which uses OP = 1 despite being MOV not
+ MVN). */
+
+static int
+neon_cmode_for_move_imm (unsigned immlo, unsigned immhi, unsigned *immbits,
+ int *op, int size)
+{
+ if (size == 64 && neon_bits_same_in_bytes (immhi)
+ && neon_bits_same_in_bytes (immlo))
+ {
+ /* Check this one first so we don't have to bother with immhi in later
+ tests. */
+ if (*op == 1)
+ return FAIL;
+ *immbits = (neon_squash_bits (immhi) << 4) | neon_squash_bits (immlo);
+ *op = 1;
+ return 0xe;
+ }
+ else if (immhi != 0)
+ return FAIL;
+ else if (immlo == (immlo & 0x000000ff))
+ {
+ /* 64-bit case was already handled. Don't allow MVN with 8-bit
+ immediate. */
+ if ((size != 8 && size != 16 && size != 32)
+ || (size == 8 && *op == 1))
+ return FAIL;
+ *immbits = immlo;
+ return (size == 8) ? 0xe : (size == 16) ? 0x8 : 0x0;
+ }
+ else if (immlo == (immlo & 0x0000ff00))
+ {
+ if (size != 16 && size != 32)
+ return FAIL;
+ *immbits = immlo >> 8;
+ return (size == 16) ? 0xa : 0x2;
+ }
+ else if (immlo == (immlo & 0x00ff0000))
+ {
+ if (size != 32)
+ return FAIL;
+ *immbits = immlo >> 16;
+ return 0x4;
+ }
+ else if (immlo == (immlo & 0xff000000))
+ {
+ if (size != 32)
+ return FAIL;
+ *immbits = immlo >> 24;
+ return 0x6;
+ }
+ else if (immlo == ((immlo & 0x0000ff00) | 0x000000ff))
+ {
+ if (size != 32)
+ return FAIL;
+ *immbits = (immlo >> 8) & 0xff;
+ return 0xc;
+ }
+ else if (immlo == ((immlo & 0x00ff0000) | 0x0000ffff))
+ {
+ if (size != 32)
+ return FAIL;
+ *immbits = (immlo >> 16) & 0xff;
+ return 0xd;
+ }
+ else if (neon_is_quarter_float (immlo))
+ {
+ if (size != 32 || *op == 1)
+ return FAIL;
+ *immbits = neon_qfloat_bits (immlo);
+ return 0xf;
+ }
+
+ return FAIL;
+}
+
+/* Write immediate bits [7:0] to the following locations:
+
+ |28/24|23 19|18 16|15 4|3 0|
+ | a |x x x x x|b c d|x x x x x x x x x x x x|e f g h|
+
+ This function is used by VMOV/VMVN/VORR/VBIC. */
+
+static void
+neon_write_immbits (unsigned immbits)
+{
+ inst.instruction |= immbits & 0xf;
+ inst.instruction |= ((immbits >> 4) & 0x7) << 16;
+ inst.instruction |= ((immbits >> 7) & 0x1) << 24;
+}
+
+/* Invert low-order SIZE bits of XHI:XLO. */
+
+static void
+neon_invert_size (unsigned *xlo, unsigned *xhi, int size)
+{
+ unsigned immlo = xlo ? *xlo : 0;
+ unsigned immhi = xhi ? *xhi : 0;
+
+ switch (size)
+ {
+ case 8:
+ immlo = (~immlo) & 0xff;
+ break;
+
+ case 16:
+ immlo = (~immlo) & 0xffff;
+ break;
+
+ case 64:
+ immhi = (~immhi) & 0xffffffff;
+ /* fall through. */
+
+ case 32:
+ immlo = (~immlo) & 0xffffffff;
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (xlo)
+ *xlo = immlo;
+
+ if (xhi)
+ *xhi = immhi;
+}
+
+static void
+do_neon_logic (void)
+{
+ if (inst.operands[2].present && inst.operands[2].isreg)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ neon_check_type (3, rs, N_IGNORE_TYPE);
+ /* U bit and size field were set as part of the bitmask. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DI_QI);
+ struct neon_type_el et = neon_check_type (1, rs, N_I8 | N_I16 | N_I32
+ | N_I64 | N_F32);
+ enum neon_opc opcode = inst.instruction & 0x0fffffff;
+ unsigned immbits;
+ int cmode;
+
+ if (et.type == NT_invtype)
+ return;
+
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+
+ switch (opcode)
+ {
+ case N_MNEM_vbic:
+ cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits,
+ et.size);
+ break;
+
+ case N_MNEM_vorr:
+ cmode = neon_cmode_for_logic_imm (inst.operands[1].imm, &immbits,
+ et.size);
+ break;
+
+ case N_MNEM_vand:
+ /* Pseudo-instruction for VBIC. */
+ immbits = inst.operands[1].imm;
+ neon_invert_size (&immbits, 0, et.size);
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ case N_MNEM_vorn:
+ /* Pseudo-instruction for VORR. */
+ immbits = inst.operands[1].imm;
+ neon_invert_size (&immbits, 0, et.size);
+ cmode = neon_cmode_for_logic_imm (immbits, &immbits, et.size);
+ break;
+
+ default:
+ abort ();
+ }
+
+ if (cmode == FAIL)
+ return;
+
+ inst.instruction |= (rs == NS_QI) << 6;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= cmode << 8;
+ neon_write_immbits (immbits);
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+}
+
+static void
+do_neon_bitfield (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ /* FIXME: Check that no type was given. */
+ neon_three_same (FALSE, rs == NS_QQQ, 0, -1);
+}
+
+static void
+neon_dyadic (enum neon_el_type ubit_meaning, unsigned types)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs, N_EQK, N_EQK, types | N_KEY);
+ if (et.type == NT_float)
+ {
+ inst.instruction = NEON_ENC_FLOAT (inst.instruction);
+ neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_three_same (TRUE, rs == NS_QQQ, et.type == ubit_meaning, et.size);
+ }
+}
+
+static void
+do_neon_dyadic_if_su (void)
+{
+ neon_dyadic (NT_unsigned, N_SUF_32);
+}
+
+static void
+do_neon_dyadic_if_su_d (void)
+{
+ /* This version only allow D registers, but that constraint is enforced during
+ operand parsing so we don't need to do anything extra here. */
+ neon_dyadic (NT_unsigned, N_SUF_32);
+}
+
+static void
+do_neon_dyadic_if_i (void)
+{
+ neon_dyadic (NT_unsigned, N_IF_32);
+}
+
+static void
+do_neon_dyadic_if_i_d (void)
+{
+ neon_dyadic (NT_unsigned, N_IF_32);
+}
+
+static void
+do_neon_addsub_if_i (void)
+{
+ /* The "untyped" case can't happen. Do this to stop the "U" bit being
+ affected if we specify unsigned args. */
+ neon_dyadic (NT_untyped, N_IF_32 | N_I64);
+}
+
+/* Swaps operands 1 and 2. If operand 1 (optional arg) was omitted, we want the
+ result to be:
+ V<op> A,B (A is operand 0, B is operand 2)
+ to mean:
+ V<op> A,B,A
+ not:
+ V<op> A,B,B
+ so handle that case specially. */
+
+static void
+neon_exchange_operands (void)
+{
+ void *scratch = alloca (sizeof (inst.operands[0]));
+ if (inst.operands[1].present)
+ {
+ /* Swap operands[1] and operands[2]. */
+ memcpy (scratch, &inst.operands[1], sizeof (inst.operands[0]));
+ inst.operands[1] = inst.operands[2];
+ memcpy (&inst.operands[2], scratch, sizeof (inst.operands[0]));
+ }
+ else
+ {
+ inst.operands[1] = inst.operands[2];
+ inst.operands[2] = inst.operands[0];
+ }
+}
+
+static void
+neon_compare (unsigned regtypes, unsigned immtypes, int invert)
+{
+ if (inst.operands[2].isreg)
+ {
+ if (invert)
+ neon_exchange_operands ();
+ neon_dyadic (NT_unsigned, regtypes);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, immtypes | N_KEY);
+
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (rs == NS_QQI) << 6;
+ inst.instruction |= (et.type == NT_float) << 10;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+}
+
+static void
+do_neon_cmp (void)
+{
+ neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, FALSE);
+}
+
+static void
+do_neon_cmp_inv (void)
+{
+ neon_compare (N_SUF_32, N_S8 | N_S16 | N_S32 | N_F32, TRUE);
+}
+
+static void
+do_neon_ceq (void)
+{
+ neon_compare (N_IF_32, N_IF_32, FALSE);
+}
+
+/* For multiply instructions, we have the possibility of 16-bit or 32-bit
+ scalars, which are encoded in 5 bits, M : Rm.
+ For 16-bit scalars, the register is encoded in Rm[2:0] and the index in
+ M:Rm[3], and for 32-bit scalars, the register is encoded in Rm[3:0] and the
+ index in M. */
+
+static unsigned
+neon_scalar_for_mul (unsigned scalar, unsigned elsize)
+{
+ unsigned regno = scalar >> 3;
+ unsigned elno = scalar & 7;
+
+ switch (elsize)
+ {
+ case 16:
+ if (regno > 7 || elno > 3)
+ goto bad_scalar;
+ return regno | (elno << 3);
+
+ case 32:
+ if (regno > 15 || elno > 1)
+ goto bad_scalar;
+ return regno | (elno << 4);
+
+ default:
+ bad_scalar:
+ as_bad (_("Scalar out of range for multiply instruction"));
+ }
+
+ return 0;
+}
+
+/* Encode multiply / multiply-accumulate scalar instructions. */
+
+static void
+neon_mul_mac (struct neon_type_el et, int ubit)
+{
+ unsigned scalar = neon_scalar_for_mul (inst.operands[2].reg, et.size);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (scalar);
+ inst.instruction |= HI1 (scalar) << 5;
+ inst.instruction |= (et.type == NT_float) << 8;
+ inst.instruction |= neon_logbits (et.size) << 20;
+ inst.instruction |= (ubit != 0) << 24;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_mac_maybe_scalar (void)
+{
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDS_QQS);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_I16 | N_I32 | N_F32 | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, rs == NS_QQS);
+ }
+ else
+ do_neon_dyadic_if_i ();
+}
+
+static void
+do_neon_tst (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ neon_three_same (TRUE, rs == NS_QQQ, 0, et.size);
+}
+
+/* VMUL with 3 registers allows the P8 type. The scalar version supports the
+ same types as the MAC equivalents. The polynomial type for this instruction
+ is encoded the same as the integer type. */
+
+static void
+do_neon_mul (void)
+{
+ if (inst.operands[2].isscalar)
+ do_neon_mac_maybe_scalar ();
+ else
+ neon_dyadic (NT_poly, N_I8 | N_I16 | N_I32 | N_F32 | N_P8);
+}
+
+static void
+do_neon_qdmulh (void)
+{
+ if (inst.operands[2].isscalar)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDS_QQS);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, rs == NS_QQS);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S16 | N_S32 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ /* The U bit (rounding) comes from bit mask. */
+ neon_three_same (TRUE, rs == NS_QQQ, 0, et.size);
+ }
+}
+
+static void
+do_neon_fcmp_absolute (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+ /* Size field comes from bit mask. */
+ neon_three_same (TRUE, rs == NS_QQQ, 1, -1);
+}
+
+static void
+do_neon_fcmp_absolute_inv (void)
+{
+ neon_exchange_operands ();
+ do_neon_fcmp_absolute ();
+}
+
+static void
+do_neon_step (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDD_QQQ);
+ neon_check_type (3, rs, N_EQK, N_EQK, N_F32 | N_KEY);
+ neon_three_same (TRUE, rs == NS_QQQ, 0, -1);
+}
+
+static void
+do_neon_abs_neg (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_S8 | N_S16 | N_S32 | N_F32 | N_KEY);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (rs == NS_QQ) << 6;
+ inst.instruction |= (et.type == NT_float) << 10;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_sli (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 0 || (unsigned)imm >= et.size,
+ _("immediate out of range for insert"));
+ neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm);
+}
+
+static void
+do_neon_sri (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for insert"));
+ neon_imm_shift (FALSE, 0, rs == NS_QQI, et, et.size - imm);
+}
+
+static void
+do_neon_qshlu_imm (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_UNS, N_S8 | N_S16 | N_S32 | N_S64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ constraint (imm < 0 || (unsigned)imm >= et.size,
+ _("immediate out of range for shift"));
+ /* Only encodes the 'U present' variant of the instruction.
+ In this case, signed types have OP (bit 8) set to 0.
+ Unsigned types have OP set to 1. */
+ inst.instruction |= (et.type == NT_unsigned) << 8;
+ /* The rest of the bits are the same as other immediate shifts. */
+ neon_imm_shift (FALSE, 0, rs == NS_QQI, et, imm);
+}
+
+static void
+do_neon_qmovn (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+ /* Saturating move where operands can be signed or unsigned, and the
+ destination has the same signedness. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ if (et.type == NT_unsigned)
+ inst.instruction |= 0xc0;
+ else
+ inst.instruction |= 0x80;
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_qmovun (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+ /* Saturating move with unsigned results. Operands must be signed. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_sat_narrow (void)
+{
+ /* FIXME: Types for narrowing. If operands are signed, results can be signed
+ or unsigned. If operands are unsigned, results must also be unsigned. */
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF, N_SU_16_64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* VQ{R}SHRN.I<size> <Dd>, <Qm>, #0 is a synonym for
+ VQMOVN.I<size> <Dd>, <Qm>. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vqmovn;
+ do_neon_qmovn ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range"));
+ neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, et.size - imm);
+}
+
+static void
+do_neon_rshift_sat_narrow_u (void)
+{
+ /* FIXME: Types for narrowing. If operands are signed, results can be signed
+ or unsigned. If operands are unsigned, results must also be unsigned. */
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF | N_UNS, N_S16 | N_S32 | N_S64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* VQSHRUN.I<size> <Dd>, <Qm>, #0 is a synonym for
+ VQMOVUN.I<size> <Dd>, <Qm>. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vqmovun;
+ do_neon_qmovun ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range"));
+ /* FIXME: The manual is kind of unclear about what value U should have in
+ VQ{R}SHRUN instructions, but U=0, op=0 definitely encodes VRSHR, so it
+ must be 1. */
+ neon_imm_shift (TRUE, 1, 0, et, et.size - imm);
+}
+
+static void
+do_neon_movn (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQ,
+ N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (0, 1, et.size / 2);
+}
+
+static void
+do_neon_rshift_narrow (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_DQI,
+ N_EQK | N_HLF, N_I16 | N_I32 | N_I64 | N_KEY);
+ int imm = inst.operands[2].imm;
+ /* This gets the bounds check, size encoding and immediate bits calculation
+ right. */
+ et.size /= 2;
+
+ /* If immediate is zero then we are a pseudo-instruction for
+ VMOVN.I<size> <Dd>, <Qm> */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ inst.instruction = N_MNEM_vmovn;
+ do_neon_movn ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for narrowing operation"));
+ neon_imm_shift (FALSE, 0, 0, et, et.size - imm);
+}
+
+static void
+do_neon_shll (void)
+{
+ /* FIXME: Type checking when lengthening. */
+ struct neon_type_el et = neon_check_type (2, NS_QDI,
+ N_EQK | N_DBL, N_I8 | N_I16 | N_I32 | N_KEY);
+ unsigned imm = inst.operands[2].imm;
+
+ if (imm == et.size)
+ {
+ /* Maximum shift variant. */
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= neon_logbits (et.size) << 18;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ else
+ {
+ /* A more-specific type check for non-max versions. */
+ et = neon_check_type (2, NS_QDI,
+ N_EQK | N_DBL, N_SU_32 | N_KEY);
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_imm_shift (TRUE, et.type == NT_unsigned, 0, et, imm);
+ }
+}
+
+/* Check the various types for the VCVT instruction, and return the one that
+ the current instruction is. */
+
+static int
+neon_cvt_flavour (enum neon_shape rs)
+{
+#define CVT_VAR(C,X,Y) \
+ et = neon_check_type (2, rs, (X), (Y)); \
+ if (et.type != NT_invtype) \
+ { \
+ inst.error = NULL; \
+ return (C); \
+ }
+ struct neon_type_el et;
+
+ CVT_VAR (0, N_S32, N_F32);
+ CVT_VAR (1, N_U32, N_F32);
+ CVT_VAR (2, N_F32, N_S32);
+ CVT_VAR (3, N_F32, N_U32);
+
+ return -1;
+#undef CVT_VAR
+}
+
+static void
+do_neon_cvt (void)
+{
+ /* Fixed-point conversion with #0 immediate is encoded as an integer
+ conversion. */
+ if (inst.operands[2].present && inst.operands[2].imm != 0)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ int flavour = neon_cvt_flavour (rs);
+ unsigned immbits = 32 - inst.operands[2].imm;
+ unsigned enctab[] = { 0x0000100, 0x1000100, 0x0, 0x1000000 };
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ if (flavour != -1)
+ inst.instruction |= enctab[flavour];
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (rs == NS_QQI) << 6;
+ inst.instruction |= 1 << 21;
+ inst.instruction |= immbits << 16;
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ int flavour = neon_cvt_flavour (rs);
+ unsigned enctab[] = { 0x100, 0x180, 0x0, 0x080 };
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ if (flavour != -1)
+ inst.instruction |= enctab[flavour];
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (rs == NS_QQ) << 6;
+ inst.instruction |= 2 << 18;
+ }
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+neon_move_immediate (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DI_QI);
+ struct neon_type_el et = neon_check_type (1, rs,
+ N_I8 | N_I16 | N_I32 | N_I64 | N_F32);
+ unsigned immlo, immhi = 0, immbits;
+ int op, cmode;
+
+ /* We start out as an MVN instruction if OP = 1, MOV otherwise. */
+ op = (inst.instruction & (1 << 5)) != 0;
+
+ immlo = inst.operands[1].imm;
+ if (inst.operands[1].regisimm)
+ immhi = inst.operands[1].reg;
+
+ constraint (et.size < 32 && (immlo & ~((1 << et.size) - 1)) != 0,
+ _("immediate has bits set outside the operand size"));
+
+ if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op,
+ et.size)) == FAIL)
+ {
+ /* Invert relevant bits only. */
+ neon_invert_size (&immlo, &immhi, et.size);
+ /* Flip from VMOV/VMVN to VMVN/VMOV. Some immediate types are unavailable
+ with one or the other; those cases are caught by
+ neon_cmode_for_move_imm. */
+ op = !op;
+ if ((cmode = neon_cmode_for_move_imm (immlo, immhi, &immbits, &op,
+ et.size)) == FAIL)
+ {
+ inst.error = _("immediate out of range");
+ return;
+ }
+ }
+
+ inst.instruction &= ~(1 << 5);
+ inst.instruction |= op << 5;
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= (rs == NS_QI) << 6;
+ inst.instruction |= cmode << 8;
+
+ neon_write_immbits (immbits);
+}
+
+static void
+do_neon_mvn (void)
+{
+ if (inst.operands[1].isreg)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= (rs == NS_QQ) << 6;
+ }
+ else
+ {
+ inst.instruction = NEON_ENC_IMMED (inst.instruction);
+ neon_move_immediate ();
+ }
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+/* Encode instructions of form:
+
+ |28/24|23|22|21 20|19 16|15 12|11 8|7|6|5|4|3 0|
+ | U |x |D |size | Rn | Rd |x x x x|N|x|M|x| Rm |
+
+*/
+
+static void
+neon_mixed_length (struct neon_type_el et, unsigned size)
+{
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (et.type == NT_unsigned) << 24;
+ inst.instruction |= neon_logbits (size) << 20;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_dyadic_long (void)
+{
+ /* FIXME: Type checking for lengthening op. */
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_abal (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_INT | N_DBL, N_EQK, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+neon_mac_reg_scalar_long (unsigned regtypes, unsigned scalartypes)
+{
+ if (inst.operands[2].isscalar)
+ {
+ struct neon_type_el et = neon_check_type (2, NS_QDS,
+ N_EQK | N_DBL, regtypes | N_KEY);
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ neon_mul_mac (et, et.type == NT_unsigned);
+ }
+ else
+ {
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, scalartypes | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_mixed_length (et, et.size);
+ }
+}
+
+static void
+do_neon_mac_maybe_scalar_long (void)
+{
+ neon_mac_reg_scalar_long (N_S16 | N_S32 | N_U16 | N_U32, N_SU_32);
+}
+
+static void
+do_neon_dyadic_wide (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QQD,
+ N_EQK | N_DBL, N_EQK | N_DBL, N_SU_32 | N_KEY);
+ neon_mixed_length (et, et.size);
+}
+
+static void
+do_neon_dyadic_narrow (void)
+{
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_I16 | N_I32 | N_I64 | N_KEY);
+ neon_mixed_length (et, et.size / 2);
+}
+
+static void
+do_neon_mul_sat_scalar_long (void)
+{
+ neon_mac_reg_scalar_long (N_S16 | N_S32, N_S16 | N_S32);
+}
+
+static void
+do_neon_vmull (void)
+{
+ if (inst.operands[2].isscalar)
+ do_neon_mac_maybe_scalar_long ();
+ else
+ {
+ struct neon_type_el et = neon_check_type (3, NS_QDD,
+ N_EQK | N_DBL, N_EQK, N_SU_32 | N_P8 | N_KEY);
+ if (et.type == NT_poly)
+ inst.instruction = NEON_ENC_POLY (inst.instruction);
+ else
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ /* For polynomial encoding, size field must be 0b00 and the U bit must be
+ zero. Should be OK as-is. */
+ neon_mixed_length (et, et.size);
+ }
+}
+
+static void
+do_neon_ext (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDDI_QQQI);
+ struct neon_type_el et = neon_check_type (3, rs,
+ N_EQK, N_EQK, N_8 | N_16 | N_32 | N_64 | N_KEY);
+ unsigned imm = (inst.operands[3].imm * et.size) / 8;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= (rs == NS_QQQI) << 6;
+ inst.instruction |= imm << 8;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_rev (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ unsigned op = (inst.instruction >> 7) & 3;
+ /* N (width of reversed regions) is encoded as part of the bitmask. We
+ extract it here to check the elements to be reversed are smaller.
+ Otherwise we'd get a reserved instruction. */
+ unsigned elsize = (op == 2) ? 16 : (op == 1) ? 32 : (op == 0) ? 64 : 0;
+ assert (elsize != 0);
+ constraint (et.size >= elsize,
+ _("elements must be smaller than reversal region"));
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_dup (void)
+{
+ if (inst.operands[1].isscalar)
+ {
+ enum neon_shape rs = neon_check_shape (NS_DS_QS);
+ struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32);
+ unsigned sizebits = et.size >> 3;
+ unsigned dm = inst.operands[1].reg >> 3;
+ int logsize = neon_logbits (et.size);
+ unsigned x = (inst.operands[1].reg & 7) << logsize;
+ inst.instruction = NEON_ENC_SCALAR (inst.instruction);
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (dm);
+ inst.instruction |= HI1 (dm) << 5;
+ inst.instruction |= (rs == NS_QS) << 6;
+ inst.instruction |= x << 17;
+ inst.instruction |= sizebits << 16;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ else
+ {
+ enum neon_shape rs = neon_check_shape (NS_DR_QR);
+ struct neon_type_el et = neon_check_type (1, rs, N_8 | N_16 | N_32);
+ unsigned save_cond = inst.instruction & 0xf0000000;
+ /* Duplicate ARM register to lanes of vector. */
+ inst.instruction = NEON_ENC_ARMREG (inst.instruction);
+ switch (et.size)
+ {
+ case 8: inst.instruction |= 0x400000; break;
+ case 16: inst.instruction |= 0x000020; break;
+ case 32: inst.instruction |= 0x000000; break;
+ default: break;
+ }
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 7;
+ inst.instruction |= (rs == NS_QR) << 21;
+ /* The encoding for this instruction is identical for the ARM and Thumb
+ variants, except for the condition field. */
+ if (thumb_mode)
+ inst.instruction |= 0xe0000000;
+ else
+ inst.instruction |= save_cond;
+ }
+}
+
+/* VMOV has particularly many variations. It can be one of:
+ 0. VMOV<c><q> <Qd>, <Qm>
+ 1. VMOV<c><q> <Dd>, <Dm>
+ (Register operations, which are VORR with Rm = Rn.)
+ 2. VMOV<c><q>.<dt> <Qd>, #<imm>
+ 3. VMOV<c><q>.<dt> <Dd>, #<imm>
+ (Immediate loads.)
+ 4. VMOV<c><q>.<size> <Dn[x]>, <Rd>
+ (ARM register to scalar.)
+ 5. VMOV<c><q> <Dm>, <Rd>, <Rn>
+ (Two ARM registers to vector.)
+ 6. VMOV<c><q>.<dt> <Rd>, <Dn[x]>
+ (Scalar to ARM register.)
+ 7. VMOV<c><q> <Rd>, <Rn>, <Dm>
+ (Vector to two ARM registers.)
+
+ We should have just enough information to be able to disambiguate most of
+ these, apart from "Two ARM registers to vector" and "Vector to two ARM
+ registers" cases. For these, abuse the .regisimm operand field to signify a
+ Neon register.
+
+ All the encoded bits are hardcoded by this function.
+
+ FIXME: Some of the checking may be a bit sloppy (in a couple of cases you
+ can specify a type where it doesn't make sense to, and is ignored).
+*/
+
+static void
+do_neon_mov (void)
+{
+ int nargs = inst.operands[0].present + inst.operands[1].present
+ + inst.operands[2].present;
+ unsigned save_cond = thumb_mode ? 0xe0000000 : inst.instruction & 0xf0000000;
+
+ switch (nargs)
+ {
+ case 2:
+ /* Cases 0, 1, 2, 3, 4, 6. */
+ if (inst.operands[1].isscalar)
+ {
+ /* Case 6. */
+ struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+ N_S8 | N_S16 | N_U8 | N_U16 | N_32);
+ unsigned logsize = neon_logbits (et.size);
+ unsigned dn = inst.operands[1].reg >> 3;
+ unsigned x = inst.operands[1].reg & 7;
+ unsigned abcdebits = 0;
+
+ constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+ switch (et.size)
+ {
+ case 8: abcdebits = (et.type == NT_signed) ? 0x08 : 0x18; break;
+ case 16: abcdebits = (et.type == NT_signed) ? 0x01 : 0x11; break;
+ case 32: abcdebits = 0x00; break;
+ default: ;
+ }
+
+ abcdebits |= x << logsize;
+ inst.instruction = save_cond;
+ inst.instruction |= 0xe100b10;
+ inst.instruction |= LOW4 (dn) << 16;
+ inst.instruction |= HI1 (dn) << 7;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= (abcdebits & 3) << 5;
+ inst.instruction |= (abcdebits >> 2) << 21;
+ }
+ else if (inst.operands[1].isreg)
+ {
+ /* Cases 0, 1, 4. */
+ if (inst.operands[0].isscalar)
+ {
+ /* Case 4. */
+ unsigned bcdebits = 0;
+ struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+ N_8 | N_16 | N_32);
+ int logsize = neon_logbits (et.size);
+ unsigned dn = inst.operands[0].reg >> 3;
+ unsigned x = inst.operands[0].reg & 7;
+
+ constraint (x >= 64 / et.size, _("scalar index out of range"));
+
+ switch (et.size)
+ {
+ case 8: bcdebits = 0x8; break;
+ case 16: bcdebits = 0x1; break;
+ case 32: bcdebits = 0x0; break;
+ default: ;
+ }
+
+ bcdebits |= x << logsize;
+ inst.instruction = save_cond;
+ inst.instruction |= 0xe000b10;
+ inst.instruction |= LOW4 (dn) << 16;
+ inst.instruction |= HI1 (dn) << 7;
+ inst.instruction |= inst.operands[1].reg << 12;
+ inst.instruction |= (bcdebits & 3) << 5;
+ inst.instruction |= (bcdebits >> 2) << 21;
+ }
+ else
+ {
+ /* Cases 0, 1. */
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ /* The architecture manual I have doesn't explicitly state which
+ value the U bit should have for register->register moves, but
+ the equivalent VORR instruction has U = 0, so do that. */
+ inst.instruction = 0x0200110;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg);
+ inst.instruction |= HI1 (inst.operands[1].reg) << 5;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= (rs == NS_QQ) << 6;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ }
+ else
+ {
+ /* Cases 2, 3. */
+ inst.instruction = 0x0800010;
+ neon_move_immediate ();
+ inst.instruction = neon_dp_fixup (inst.instruction);
+ }
+ break;
+
+ case 3:
+ /* Cases 5, 7. */
+ if (inst.operands[0].regisimm)
+ {
+ /* Case 5. */
+ inst.instruction = save_cond;
+ inst.instruction |= 0xc400b10;
+ inst.instruction |= LOW4 (inst.operands[0].reg);
+ inst.instruction |= HI1 (inst.operands[0].reg) << 5;
+ inst.instruction |= inst.operands[1].reg << 12;
+ inst.instruction |= inst.operands[2].reg << 16;
+ }
+ else
+ {
+ /* Case 7. */
+ inst.instruction = save_cond;
+ inst.instruction |= 0xc500b10;
+ inst.instruction |= inst.operands[0].reg << 12;
+ inst.instruction |= inst.operands[1].reg << 16;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ }
+ break;
+
+ default:
+ abort ();
+ }
+}
+
+static void
+do_neon_rshift_round_imm (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DDI_QQI);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_ALL | N_KEY);
+ int imm = inst.operands[2].imm;
+
+ /* imm == 0 case is encoded as VMOV for V{R}SHR. */
+ if (imm == 0)
+ {
+ inst.operands[2].present = 0;
+ do_neon_mov ();
+ return;
+ }
+
+ constraint (imm < 1 || (unsigned)imm > et.size,
+ _("immediate out of range for shift"));
+ neon_imm_shift (TRUE, et.type == NT_unsigned, rs == NS_QQI, et,
+ et.size - imm);
+}
+
+static void
+do_neon_movl (void)
+{
+ struct neon_type_el et = neon_check_type (2, NS_QD,
+ N_EQK | N_DBL, N_SU_32 | N_KEY);
+ unsigned sizebits = et.size >> 3;
+ inst.instruction |= sizebits << 19;
+ neon_two_same (0, et.type == NT_unsigned, -1);
+}
+
+static void
+do_neon_trn (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ inst.instruction = NEON_ENC_INTEGER (inst.instruction);
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_zip_uzp (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_8 | N_16 | N_32 | N_KEY);
+ if (rs == NS_DD && et.size == 32)
+ {
+ /* Special case: encode as VTRN.32 <Dd>, <Dm>. */
+ inst.instruction = N_MNEM_vtrn;
+ do_neon_trn ();
+ return;
+ }
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_sat_abs_neg (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_pair_long (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs, N_EQK, N_SU_32 | N_KEY);
+ /* Unsigned is encoded in OP field (bit 7) for these instruction. */
+ inst.instruction |= (et.type == NT_unsigned) << 7;
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_recip_est (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_FLT, N_F32 | N_U32 | N_KEY);
+ inst.instruction |= (et.type == NT_float) << 8;
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_cls (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_S8 | N_S16 | N_S32 | N_KEY);
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_clz (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK, N_I8 | N_I16 | N_I32 | N_KEY);
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_cnt (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ struct neon_type_el et = neon_check_type (2, rs,
+ N_EQK | N_INT, N_8 | N_KEY);
+ neon_two_same (rs == NS_QQ, 1, et.size);
+}
+
+static void
+do_neon_swp (void)
+{
+ enum neon_shape rs = neon_check_shape (NS_DD_QQ);
+ neon_two_same (rs == NS_QQ, 1, -1);
+}
+
+static void
+do_neon_tbl_tbx (void)
+{
+ unsigned listlenbits;
+ neon_check_type (1, NS_DLD, N_8);
+
+ if (inst.operands[1].imm < 1 || inst.operands[1].imm > 4)
+ {
+ inst.error = _("bad list length for table lookup");
+ return;
+ }
+
+ listlenbits = inst.operands[1].imm - 1;
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 16;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 7;
+ inst.instruction |= LOW4 (inst.operands[2].reg);
+ inst.instruction |= HI1 (inst.operands[2].reg) << 5;
+ inst.instruction |= listlenbits << 8;
+
+ inst.instruction = neon_dp_fixup (inst.instruction);
+}
+
+static void
+do_neon_ldm_stm (void)
+{
+ /* P, U and L bits are part of bitmask. */
+ int is_dbmode = (inst.instruction & (1 << 24)) != 0;
+ unsigned offsetbits = inst.operands[1].imm * 2;
+
+ constraint (is_dbmode && !inst.operands[0].writeback,
+ _("writeback (!) must be used for VLDMDB and VSTMDB"));
+
+ constraint (inst.operands[1].imm < 1 || inst.operands[1].imm > 16,
+ _("register list must contain at least 1 and at most 16 "
+ "registers"));
+
+ inst.instruction |= inst.operands[0].reg << 16;
+ inst.instruction |= inst.operands[0].writeback << 21;
+ inst.instruction |= LOW4 (inst.operands[1].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[1].reg) << 22;
+
+ inst.instruction |= offsetbits;
+
+ if (thumb_mode)
+ inst.instruction |= 0xe0000000;
+}
+
+static void
+do_neon_ldr_str (void)
+{
+ unsigned offsetbits;
+ int offset_up = 1;
+ int is_ldr = (inst.instruction & (1 << 20)) != 0;
+
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+
+ constraint (inst.reloc.pc_rel && !is_ldr,
+ _("PC-relative addressing unavailable with VSTR"));
+
+ constraint (!inst.reloc.pc_rel && inst.reloc.exp.X_op != O_constant,
+ _("Immediate value must be a constant"));
+
+ if (inst.reloc.exp.X_add_number < 0)
+ {
+ offset_up = 0;
+ offsetbits = -inst.reloc.exp.X_add_number / 4;
+ }
+ else
+ offsetbits = inst.reloc.exp.X_add_number / 4;
+
+ /* FIXME: Does this catch everything? */
+ constraint (!inst.operands[1].isreg || !inst.operands[1].preind
+ || inst.operands[1].postind || inst.operands[1].writeback
+ || inst.operands[1].immisreg || inst.operands[1].shifted,
+ BAD_ADDR_MODE);
+ constraint ((inst.operands[1].imm & 3) != 0,
+ _("Offset must be a multiple of 4"));
+ constraint (offsetbits != (offsetbits & 0xff),
+ _("Immediate offset out of range"));
+
+ inst.instruction |= inst.operands[1].reg << 16;
+ inst.instruction |= offsetbits & 0xff;
+ inst.instruction |= offset_up << 23;
+
+ if (thumb_mode)
+ inst.instruction |= 0xe0000000;
+
+ if (inst.reloc.pc_rel)
+ {
+ if (thumb_mode)
+ inst.reloc.type = BFD_RELOC_ARM_T32_CP_OFF_IMM;
+ else
+ inst.reloc.type = BFD_RELOC_ARM_CP_OFF_IMM;
+ }
+ else
+ inst.reloc.type = BFD_RELOC_UNUSED;
+}
+
+/* "interleave" version also handles non-interleaving register VLD1/VST1
+ instructions. */
+
+static void
+do_neon_ld_st_interleave (void)
+{
+ struct neon_type_el et = neon_check_type (1, NS_IGNORE,
+ N_8 | N_16 | N_32 | N_64);
+ unsigned alignbits = 0;
+ unsigned idx;
+ /* The bits in this table go:
+ 0: register stride of one (0) or two (1)
+ 1,2: register list length, minus one (1, 2, 3, 4).
+ 3,4: <n> in instruction type, minus one (VLD<n> / VST<n>).
+ We use -1 for invalid entries. */
+ const int typetable[] =
+ {
+ 0x7, -1, 0xa, -1, 0x6, -1, 0x2, -1, /* VLD1 / VST1. */
+ -1, -1, 0x8, 0x9, -1, -1, 0x3, -1, /* VLD2 / VST2. */
+ -1, -1, -1, -1, 0x4, 0x5, -1, -1, /* VLD3 / VST3. */
+ -1, -1, -1, -1, -1, -1, 0x0, 0x1 /* VLD4 / VST4. */
+ };
+ int typebits;
+
+ if (inst.operands[1].immisalign)
+ switch (inst.operands[1].imm >> 8)
+ {
+ case 64: alignbits = 1; break;
+ case 128:
+ if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+ goto bad_alignment;
+ alignbits = 2;
+ break;
+ case 256:
+ if (NEON_REGLIST_LENGTH (inst.operands[0].imm) == 3)
+ goto bad_alignment;
+ alignbits = 3;
+ break;
+ default:
+ bad_alignment:
+ inst.error = _("bad alignment");
+ return;
+ }
+
+ inst.instruction |= alignbits << 4;
+ inst.instruction |= neon_logbits (et.size) << 6;
+
+ /* Bits [4:6] of the immediate in a list specifier encode register stride
+ (minus 1) in bit 4, and list length in bits [5:6]. We put the <n> of
+ VLD<n>/VST<n> in bits [9:8] of the initial bitmask. Suck it out here, look
+ up the right value for "type" in a table based on this value and the given
+ list style, then stick it back. */
+ idx = ((inst.operands[0].imm >> 4) & 7)
+ | (((inst.instruction >> 8) & 3) << 3);
+
+ typebits = typetable[idx];
+
+ constraint (typebits == -1, _("bad list type for instruction"));
+
+ inst.instruction &= ~0xf00;
+ inst.instruction |= typebits << 8;
+}
+
+/* Check alignment is valid for do_neon_ld_st_lane and do_neon_ld_dup.
+ *DO_ALIGN is set to 1 if the relevant alignment bit should be set, 0
+ otherwise. The variable arguments are a list of pairs of legal (size, align)
+ values, terminated with -1. */
+
+static int
+neon_alignment_bit (int size, int align, int *do_align, ...)
+{
+ va_list ap;
+ int result = FAIL, thissize, thisalign;
+
+ if (!inst.operands[1].immisalign)
+ {
+ *do_align = 0;
+ return SUCCESS;
+ }
+
+ va_start (ap, do_align);
+
+ do
+ {
+ thissize = va_arg (ap, int);
+ if (thissize == -1)
+ break;
+ thisalign = va_arg (ap, int);
+
+ if (size == thissize && align == thisalign)
+ result = SUCCESS;
+ }
+ while (result != SUCCESS);
+
+ va_end (ap);
+
+ if (result == SUCCESS)
+ *do_align = 1;
+ else
+ inst.error = _("unsupported alignment for instruction");
+
+ return result;
+}
+
+static void
+do_neon_ld_st_lane (void)
+{
+ struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32);
+ int align_good, do_align = 0;
+ int logsize = neon_logbits (et.size);
+ int align = inst.operands[1].imm >> 8;
+ int n = (inst.instruction >> 8) & 3;
+ int max_el = 64 / et.size;
+
+ constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != n + 1,
+ _("bad list length"));
+ constraint (NEON_LANE (inst.operands[0].imm) >= max_el,
+ _("scalar index out of range"));
+ constraint (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2
+ && et.size == 8,
+ _("stride of 2 unavailable when element size is 8"));
+
+ switch (n)
+ {
+ case 0: /* VLD1 / VST1. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 16, 16,
+ 32, 32, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ {
+ unsigned alignbits = 0;
+ switch (et.size)
+ {
+ case 16: alignbits = 0x1; break;
+ case 32: alignbits = 0x3; break;
+ default: ;
+ }
+ inst.instruction |= alignbits << 4;
+ }
+ break;
+
+ case 1: /* VLD2 / VST2. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 8, 16, 16, 32,
+ 32, 64, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ inst.instruction |= 1 << 4;
+ break;
+
+ case 2: /* VLD3 / VST3. */
+ constraint (inst.operands[1].immisalign,
+ _("can't use alignment with this instruction"));
+ break;
+
+ case 3: /* VLD4 / VST4. */
+ align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32,
+ 16, 64, 32, 64, 32, 128, -1);
+ if (align_good == FAIL)
+ return;
+ if (do_align)
+ {
+ unsigned alignbits = 0;
+ switch (et.size)
+ {
+ case 8: alignbits = 0x1; break;
+ case 16: alignbits = 0x1; break;
+ case 32: alignbits = (align == 64) ? 0x1 : 0x2; break;
+ default: ;
+ }
+ inst.instruction |= alignbits << 4;
+ }
+ break;
+
+ default: ;
+ }
+
+ /* Reg stride of 2 is encoded in bit 5 when size==16, bit 6 when size==32. */
+ if (n != 0 && NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+ inst.instruction |= 1 << (4 + logsize);
+
+ inst.instruction |= NEON_LANE (inst.operands[0].imm) << (logsize + 5);
+ inst.instruction |= logsize << 10;
+}
+
+/* Encode single n-element structure to all lanes VLD<n> instructions. */
+
+static void
+do_neon_ld_dup (void)
+{
+ struct neon_type_el et = neon_check_type (1, NS_IGNORE, N_8 | N_16 | N_32);
+ int align_good, do_align = 0;
+
+ switch ((inst.instruction >> 8) & 3)
+ {
+ case 0: /* VLD1. */
+ assert (NEON_REG_STRIDE (inst.operands[0].imm) != 2);
+ align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8,
+ &do_align, 16, 16, 32, 32, -1);
+ if (align_good == FAIL)
+ return;
+ switch (NEON_REGLIST_LENGTH (inst.operands[0].imm))
+ {
+ case 1: break;
+ case 2: inst.instruction |= 1 << 5; break;
+ default: inst.error = _("bad list length"); return;
+ }
+ inst.instruction |= neon_logbits (et.size) << 6;
+ break;
+
+ case 1: /* VLD2. */
+ align_good = neon_alignment_bit (et.size, inst.operands[1].imm >> 8,
+ &do_align, 8, 16, 16, 32, 32, 64, -1);
+ if (align_good == FAIL)
+ return;
+ constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 2,
+ _("bad list length"));
+ if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+ inst.instruction |= 1 << 5;
+ inst.instruction |= neon_logbits (et.size) << 6;
+ break;
+
+ case 2: /* VLD3. */
+ constraint (inst.operands[1].immisalign,
+ _("can't use alignment with this instruction"));
+ constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 3,
+ _("bad list length"));
+ if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+ inst.instruction |= 1 << 5;
+ inst.instruction |= neon_logbits (et.size) << 6;
+ break;
+
+ case 3: /* VLD4. */
+ {
+ int align = inst.operands[1].imm >> 8;
+ align_good = neon_alignment_bit (et.size, align, &do_align, 8, 32,
+ 16, 64, 32, 64, 32, 128, -1);
+ if (align_good == FAIL)
+ return;
+ constraint (NEON_REGLIST_LENGTH (inst.operands[0].imm) != 4,
+ _("bad list length"));
+ if (NEON_REG_STRIDE (inst.operands[0].imm) == 2)
+ inst.instruction |= 1 << 5;
+ if (et.size == 32 && align == 128)
+ inst.instruction |= 0x3 << 6;
+ else
+ inst.instruction |= neon_logbits (et.size) << 6;
+ }
+ break;
+
+ default: ;
+ }
+
+ inst.instruction |= do_align << 4;
+}
+
+/* Disambiguate VLD<n> and VST<n> instructions, and fill in common bits (those
+ apart from bits [11:4]. */
+
+static void
+do_neon_ldx_stx (void)
+{
+ switch (NEON_LANE (inst.operands[0].imm))
+ {
+ case NEON_INTERLEAVE_LANES:
+ inst.instruction = NEON_ENC_INTERLV (inst.instruction);
+ do_neon_ld_st_interleave ();
+ break;
+
+ case NEON_ALL_LANES:
+ inst.instruction = NEON_ENC_DUP (inst.instruction);
+ do_neon_ld_dup ();
+ break;
+
+ default:
+ inst.instruction = NEON_ENC_LANE (inst.instruction);
+ do_neon_ld_st_lane ();
+ }
+
+ /* L bit comes from bit mask. */
+ inst.instruction |= LOW4 (inst.operands[0].reg) << 12;
+ inst.instruction |= HI1 (inst.operands[0].reg) << 22;
+ inst.instruction |= inst.operands[1].reg << 16;
+
+ if (inst.operands[1].postind)
+ {
+ int postreg = inst.operands[1].imm & 0xf;
+ constraint (!inst.operands[1].immisreg,
+ _("post-index must be a register"));
+ constraint (postreg == 0xd || postreg == 0xf,
+ _("bad register for post-index"));
+ inst.instruction |= postreg;
+ }
+ else if (inst.operands[1].writeback)
+ {
+ inst.instruction |= 0xd;
+ }
+ else
+ inst.instruction |= 0xf;
+
+ if (thumb_mode)
+ inst.instruction |= 0xf9000000;
+ else
+ inst.instruction |= 0xf4000000;
+}
+
/* Overall per-instruction processing. */
@@ -8123,6 +11488,85 @@ output_inst (const char * str)
#endif
}
+/* Parse a Neon type specifier. *STR should point at the leading '.'
+ character. Does no verification at this stage that the type fits the opcode
+ properly. E.g.,
+
+ .i32.i32.s16
+ .s32.f32
+ .u16
+
+ Can all be legally parsed by this function.
+
+ Fills in neon_type struct pointer with parsed information, and updates STR
+ to point after the parsed type specifier. Returns TRUE if this was a legal
+ type, FALSE if not. */
+
+static bfd_boolean
+parse_neon_type (struct neon_type *type, char **str)
+{
+ char *ptr = *str;
+
+ if (type)
+ type->elems = 0;
+
+ while (type->elems < NEON_MAX_TYPE_ELS)
+ {
+ enum neon_el_type thistype = NT_untyped;
+ unsigned thissize = -1u;
+
+ if (*ptr != '.')
+ break;
+
+ ptr++;
+
+ /* Just a size without an explicit type. */
+ if (ISDIGIT (*ptr))
+ goto parsesize;
+
+ switch (*ptr)
+ {
+ case 'i': thistype = NT_integer; break;
+ case 'f': thistype = NT_float; break;
+ case 'p': thistype = NT_poly; break;
+ case 's': thistype = NT_signed; break;
+ case 'u': thistype = NT_unsigned; break;
+ default:
+ as_bad (_("Unexpected character `%c' in type specifier"), *ptr);
+ return 0;
+ }
+
+ ptr++;
+
+ /* .f is an abbreviation for .f32. */
+ if (thistype == NT_float && !ISDIGIT (*ptr))
+ thissize = 32;
+ else
+ {
+ parsesize:
+ thissize = strtoul (ptr, &ptr, 10);
+
+ if (thissize != 8 && thissize != 16 && thissize != 32
+ && thissize != 64)
+ {
+ as_bad (_("Bad size %d in type specifier"), thissize);
+ return 0;
+ }
+ }
+
+ if (type)
+ {
+ type->el[type->elems].type = thistype;
+ type->el[type->elems].size = thissize;
+ type->elems++;
+ }
+ }
+
+ *str = ptr;
+
+ return 1;
+}
+
/* Tag values used in struct asm_opcode's tag field. */
enum opcode_tag
{
@@ -8222,17 +11666,30 @@ opcode_lookup (char **str)
if (end == base)
return 0;
- /* Handle a possible width suffix. */
+ /* Handle a possible width suffix and/or Neon type suffix. */
if (end[0] == '.')
{
- if (end[1] == 'w' && (end[2] == ' ' || end[2] == '\0'))
+ int offset = 2;
+
+ if (end[1] == 'w')
inst.size_req = 4;
- else if (end[1] == 'n' && (end[2] == ' ' || end[2] == '\0'))
+ else if (end[1] == 'n')
inst.size_req = 2;
else
- return 0;
+ offset = 0;
+
+ inst.vectype.elems = 0;
- *str = end + 2;
+ *str = end + offset;
+
+ if (end[offset] == '.')
+ {
+ /* See if we have a Neon type suffix. */
+ if (!parse_neon_type (&inst.vectype, str))
+ return 0;
+ }
+ else if (end[offset] != '\0' && end[offset] != ' ')
+ return 0;
}
else
*str = end;
@@ -8576,11 +12033,22 @@ arm_canonicalize_symbol_name (char * name)
#define REGDEF(s,n,t) { #s, n, REG_TYPE_##t, TRUE }
#define REGNUM(p,n,t) REGDEF(p##n, n, t)
+#define REGNUM2(p,n,t) REGDEF(p##n, 2 * n, t)
#define REGSET(p,t) \
REGNUM(p, 0,t), REGNUM(p, 1,t), REGNUM(p, 2,t), REGNUM(p, 3,t), \
REGNUM(p, 4,t), REGNUM(p, 5,t), REGNUM(p, 6,t), REGNUM(p, 7,t), \
REGNUM(p, 8,t), REGNUM(p, 9,t), REGNUM(p,10,t), REGNUM(p,11,t), \
REGNUM(p,12,t), REGNUM(p,13,t), REGNUM(p,14,t), REGNUM(p,15,t)
+#define REGSETH(p,t) \
+ REGNUM(p,16,t), REGNUM(p,17,t), REGNUM(p,18,t), REGNUM(p,19,t), \
+ REGNUM(p,20,t), REGNUM(p,21,t), REGNUM(p,22,t), REGNUM(p,23,t), \
+ REGNUM(p,24,t), REGNUM(p,25,t), REGNUM(p,26,t), REGNUM(p,27,t), \
+ REGNUM(p,28,t), REGNUM(p,29,t), REGNUM(p,30,t), REGNUM(p,31,t)
+#define REGSET2(p,t) \
+ REGNUM2(p, 0,t), REGNUM2(p, 1,t), REGNUM2(p, 2,t), REGNUM2(p, 3,t), \
+ REGNUM2(p, 4,t), REGNUM2(p, 5,t), REGNUM2(p, 6,t), REGNUM2(p, 7,t), \
+ REGNUM2(p, 8,t), REGNUM2(p, 9,t), REGNUM2(p,10,t), REGNUM2(p,11,t), \
+ REGNUM2(p,12,t), REGNUM2(p,13,t), REGNUM2(p,14,t), REGNUM2(p,15,t)
static const struct reg_entry reg_names[] =
{
@@ -8619,20 +12087,16 @@ static const struct reg_entry reg_names[] =
REGNUM(F,4,FN), REGNUM(F,5,FN), REGNUM(F,6,FN), REGNUM(F,7, FN),
/* VFP SP registers. */
- REGSET(s,VFS),
- REGNUM(s,16,VFS), REGNUM(s,17,VFS), REGNUM(s,18,VFS), REGNUM(s,19,VFS),
- REGNUM(s,20,VFS), REGNUM(s,21,VFS), REGNUM(s,22,VFS), REGNUM(s,23,VFS),
- REGNUM(s,24,VFS), REGNUM(s,25,VFS), REGNUM(s,26,VFS), REGNUM(s,27,VFS),
- REGNUM(s,28,VFS), REGNUM(s,29,VFS), REGNUM(s,30,VFS), REGNUM(s,31,VFS),
-
- REGSET(S,VFS),
- REGNUM(S,16,VFS), REGNUM(S,17,VFS), REGNUM(S,18,VFS), REGNUM(S,19,VFS),
- REGNUM(S,20,VFS), REGNUM(S,21,VFS), REGNUM(S,22,VFS), REGNUM(S,23,VFS),
- REGNUM(S,24,VFS), REGNUM(S,25,VFS), REGNUM(S,26,VFS), REGNUM(S,27,VFS),
- REGNUM(S,28,VFS), REGNUM(S,29,VFS), REGNUM(S,30,VFS), REGNUM(S,31,VFS),
+ REGSET(s,VFS), REGSET(S,VFS),
+ REGSETH(s,VFS), REGSETH(S,VFS),
/* VFP DP Registers. */
- REGSET(d,VFD), REGSET(D,VFS),
+ REGSET(d,VFD), REGSET(D,VFD),
+ /* Extra Neon DP registers. */
+ REGSETH(d,VFD), REGSETH(D,VFD),
+
+ /* Neon QP registers. */
+ REGSET2(q,NQ), REGSET2(Q,NQ),
/* VFP control registers. */
REGDEF(fpsid,0,VFC), REGDEF(fpscr,1,VFC), REGDEF(fpexc,8,VFC),
@@ -8971,6 +12435,30 @@ static struct asm_barrier_opt barrier_opt_names[] =
#define UF(mnem, op, nops, ops, ae) \
{ #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0, ARM_VARIANT, 0, do_##ae, NULL }
+/* Neon data-processing. ARM versions are unconditional with cond=0xf.
+ The Thumb and ARM variants are mostly the same (bits 0-23 and 24/28), so we
+ use the same encoding function for each. */
+#define NUF(mnem, op, nops, ops, enc) \
+ { #mnem, OPS##nops ops, OT_unconditionalF, 0x##op, 0x##op, \
+ ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon data processing, version which indirects through neon_enc_tab for
+ the various overloaded versions of opcodes. */
+#define nUF(mnem, op, nops, ops, enc) \
+ { #mnem, OPS##nops ops, OT_unconditionalF, N_MNEM_##op, N_MNEM_##op, \
+ ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon insn with conditional suffix for the ARM version, non-overloaded
+ version. */
+#define NCE(mnem, op, nops, ops, enc) \
+ { #mnem, OPS##nops ops, OT_csuffix, 0x##op, 0x##op, ARM_VARIANT, \
+ THUMB_VARIANT, do_##enc, do_##enc }
+
+/* Neon insn with conditional suffix for the ARM version, overloaded types. */
+#define nCE(mnem, op, nops, ops, enc) \
+ { #mnem, OPS##nops ops, OT_csuffix, N_MNEM_##op, N_MNEM_##op, \
+ ARM_VARIANT, THUMB_VARIANT, do_##enc, do_##enc }
+
#define do_0 0
/* Thumb-only, unconditional. */
@@ -9911,13 +13399,13 @@ static const struct asm_opcode insns[] =
#undef ARM_VARIANT
#define ARM_VARIANT &fpu_vfp_ext_v1 /* VFP V1 (Double precision). */
/* Moves and type conversions. */
- cCE(fcpyd, eb00b40, 2, (RVD, RVD), rd_rm),
+ cCE(fcpyd, eb00b40, 2, (RVD, RVD), vfp_dp_rd_rm),
cCE(fcvtds, eb70ac0, 2, (RVD, RVS), vfp_dp_sp_cvt),
cCE(fcvtsd, eb70bc0, 2, (RVS, RVD), vfp_sp_dp_cvt),
- cCE(fmdhr, e200b10, 2, (RVD, RR), rn_rd),
- cCE(fmdlr, e000b10, 2, (RVD, RR), rn_rd),
- cCE(fmrdh, e300b10, 2, (RR, RVD), rd_rn),
- cCE(fmrdl, e100b10, 2, (RR, RVD), rd_rn),
+ cCE(fmdhr, e200b10, 2, (RVD, RR), vfp_dp_rn_rd),
+ cCE(fmdlr, e000b10, 2, (RVD, RR), vfp_dp_rn_rd),
+ cCE(fmrdh, e300b10, 2, (RR, RVD), vfp_dp_rd_rn),
+ cCE(fmrdl, e100b10, 2, (RR, RVD), vfp_dp_rd_rn),
cCE(fsitod, eb80bc0, 2, (RVD, RVS), vfp_dp_sp_cvt),
cCE(fuitod, eb80b40, 2, (RVD, RVS), vfp_dp_sp_cvt),
cCE(ftosid, ebd0b40, 2, (RVS, RVD), vfp_sp_dp_cvt),
@@ -9938,34 +13426,327 @@ static const struct asm_opcode insns[] =
cCE(fstmfdd, d200b00, 2, (RRw, VRDLST), vfp_dp_ldstmdb),
/* Monadic operations. */
- cCE(fabsd, eb00bc0, 2, (RVD, RVD), rd_rm),
- cCE(fnegd, eb10b40, 2, (RVD, RVD), rd_rm),
- cCE(fsqrtd, eb10bc0, 2, (RVD, RVD), rd_rm),
+ cCE(fabsd, eb00bc0, 2, (RVD, RVD), vfp_dp_rd_rm),
+ cCE(fnegd, eb10b40, 2, (RVD, RVD), vfp_dp_rd_rm),
+ cCE(fsqrtd, eb10bc0, 2, (RVD, RVD), vfp_dp_rd_rm),
/* Dyadic operations. */
- cCE(faddd, e300b00, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fsubd, e300b40, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fmuld, e200b00, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fdivd, e800b00, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fmacd, e000b00, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fmscd, e100b00, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fnmuld, e200b40, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fnmacd, e000b40, 3, (RVD, RVD, RVD), rd_rn_rm),
- cCE(fnmscd, e100b40, 3, (RVD, RVD, RVD), rd_rn_rm),
+ cCE(faddd, e300b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fsubd, e300b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fmuld, e200b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fdivd, e800b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fmacd, e000b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fmscd, e100b00, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fnmuld, e200b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fnmacd, e000b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
+ cCE(fnmscd, e100b40, 3, (RVD, RVD, RVD), vfp_dp_rd_rn_rm),
/* Comparisons. */
- cCE(fcmpd, eb40b40, 2, (RVD, RVD), rd_rm),
- cCE(fcmpzd, eb50b40, 1, (RVD), rd),
- cCE(fcmped, eb40bc0, 2, (RVD, RVD), rd_rm),
- cCE(fcmpezd, eb50bc0, 1, (RVD), rd),
+ cCE(fcmpd, eb40b40, 2, (RVD, RVD), vfp_dp_rd_rm),
+ cCE(fcmpzd, eb50b40, 1, (RVD), vfp_dp_rd),
+ cCE(fcmped, eb40bc0, 2, (RVD, RVD), vfp_dp_rd_rm),
+ cCE(fcmpezd, eb50bc0, 1, (RVD), vfp_dp_rd),
#undef ARM_VARIANT
#define ARM_VARIANT &fpu_vfp_ext_v2
cCE(fmsrr, c400a10, 3, (VRSLST, RR, RR), vfp_sp2_from_reg2),
cCE(fmrrs, c500a10, 3, (RR, RR, VRSLST), vfp_reg2_from_sp2),
- cCE(fmdrr, c400b10, 3, (RVD, RR, RR), rm_rd_rn),
- cCE(fmrrd, c500b10, 3, (RR, RR, RVD), rd_rn_rm),
+ cCE(fmdrr, c400b10, 3, (RVD, RR, RR), vfp_dp_rm_rd_rn),
+ cCE(fmrrd, c500b10, 3, (RR, RR, RVD), vfp_dp_rd_rn_rm),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_neon_ext_v1
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_neon_ext_v1
+ /* Data processing with three registers of the same length. */
+ /* integer ops, valid types S8 S16 S32 U8 U16 U32. */
+ NUF(vaba, 0000710, 3, (RNDQ, RNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vabaq, 0000710, 3, (RNQ, RNQ, RNQ), neon_dyadic_i_su),
+ NUF(vhadd, 0000000, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vhaddq, 0000000, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
+ NUF(vrhadd, 0000100, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vrhaddq, 0000100, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
+ NUF(vhsub, 0000200, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i_su),
+ NUF(vhsubq, 0000200, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i_su),
+ /* integer ops, valid types S8 S16 S32 S64 U8 U16 U32 U64. */
+ NUF(vqadd, 0000010, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqaddq, 0000010, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
+ NUF(vqsub, 0000210, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqsubq, 0000210, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
+ NUF(vrshl, 0000500, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vrshlq, 0000500, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
+ NUF(vqrshl, 0000510, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_i64_su),
+ NUF(vqrshlq, 0000510, 3, (RNQ, oRNQ, RNQ), neon_dyadic_i64_su),
+ /* If not immediate, fall back to neon_dyadic_i64_su.
+ shl_imm should accept I8 I16 I32 I64,
+ qshl_imm should accept S8 S16 S32 S64 U8 U16 U32 U64. */
+ nUF(vshl, vshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_shl_imm),
+ nUF(vshlq, vshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_shl_imm),
+ nUF(vqshl, vqshl, 3, (RNDQ, oRNDQ, RNDQ_I63b), neon_qshl_imm),
+ nUF(vqshlq, vqshl, 3, (RNQ, oRNQ, RNDQ_I63b), neon_qshl_imm),
+ /* Logic ops, types optional & ignored. */
+ nUF(vand, vand, 2, (RNDQ, NILO), neon_logic),
+ nUF(vandq, vand, 2, (RNQ, NILO), neon_logic),
+ nUF(vbic, vbic, 2, (RNDQ, NILO), neon_logic),
+ nUF(vbicq, vbic, 2, (RNQ, NILO), neon_logic),
+ nUF(vorr, vorr, 2, (RNDQ, NILO), neon_logic),
+ nUF(vorrq, vorr, 2, (RNQ, NILO), neon_logic),
+ nUF(vorn, vorn, 2, (RNDQ, NILO), neon_logic),
+ nUF(vornq, vorn, 2, (RNQ, NILO), neon_logic),
+ nUF(veor, veor, 3, (RNDQ, oRNDQ, RNDQ), neon_logic),
+ nUF(veorq, veor, 3, (RNQ, oRNQ, RNQ), neon_logic),
+ /* Bitfield ops, untyped. */
+ NUF(vbsl, 1100110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbslq, 1100110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
+ NUF(vbit, 1200110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbitq, 1200110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
+ NUF(vbif, 1300110, 3, (RNDQ, RNDQ, RNDQ), neon_bitfield),
+ NUF(vbifq, 1300110, 3, (RNQ, RNQ, RNQ), neon_bitfield),
+ /* Int and float variants, types S8 S16 S32 U8 U16 U32 F32. */
+ nUF(vabd, vabd, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vabdq, vabd, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
+ nUF(vmax, vmax, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vmaxq, vmax, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
+ nUF(vmin, vmin, 3, (RNDQ, oRNDQ, RNDQ), neon_dyadic_if_su),
+ nUF(vminq, vmin, 3, (RNQ, oRNQ, RNQ), neon_dyadic_if_su),
+ /* Comparisons. Types S8 S16 S32 U8 U16 U32 F32. Non-immediate versions fall
+ back to neon_dyadic_if_su. */
+ nUF(vcge, vcge, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp),
+ nUF(vcgeq, vcge, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp),
+ nUF(vcgt, vcgt, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp),
+ nUF(vcgtq, vcgt, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp),
+ nUF(vclt, vclt, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv),
+ nUF(vcltq, vclt, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp_inv),
+ nUF(vcle, vcle, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_cmp_inv),
+ nUF(vcleq, vcle, 3, (RNQ, oRNQ, RNDQ_I0), neon_cmp_inv),
+ /* Comparison. Type I8 I16 I32 F32. Non-immediate -> neon_dyadic_if_i. */
+ nUF(vceq, vceq, 3, (RNDQ, oRNDQ, RNDQ_I0), neon_ceq),
+ nUF(vceqq, vceq, 3, (RNQ, oRNQ, RNDQ_I0), neon_ceq),
+ /* As above, D registers only. */
+ nUF(vpmax, vpmax, 3, (RND, oRND, RND), neon_dyadic_if_su_d),
+ nUF(vpmin, vpmin, 3, (RND, oRND, RND), neon_dyadic_if_su_d),
+ /* Int and float variants, signedness unimportant. */
+ /* If not scalar, fall back to neon_dyadic_if_i. */
+ nUF(vmla, vmla, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmlaq, vmla, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmls, vmls, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vmlsq, vmls, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mac_maybe_scalar),
+ nUF(vpadd, vpadd, 3, (RND, oRND, RND), neon_dyadic_if_i_d),
+ /* Add/sub take types I8 I16 I32 I64 F32. */
+ nUF(vadd, vadd, 3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i),
+ nUF(vaddq, vadd, 3, (RNQ, oRNQ, RNQ), neon_addsub_if_i),
+ nUF(vsub, vsub, 3, (RNDQ, oRNDQ, RNDQ), neon_addsub_if_i),
+ nUF(vsubq, vsub, 3, (RNQ, oRNQ, RNQ), neon_addsub_if_i),
+ /* vtst takes sizes 8, 16, 32. */
+ NUF(vtst, 0000810, 3, (RNDQ, oRNDQ, RNDQ), neon_tst),
+ NUF(vtstq, 0000810, 3, (RNQ, oRNQ, RNQ), neon_tst),
+ /* VMUL takes I8 I16 I32 F32 P8. */
+ nUF(vmul, vmul, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_mul),
+ nUF(vmulq, vmul, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_mul),
+ /* VQD{R}MULH takes S16 S32. */
+ nUF(vqdmulh, vqdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqdmulhq, vqdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmulh, vqrdmulh, 3, (RNDQ, oRNDQ, RNDQ_RNSC), neon_qdmulh),
+ nUF(vqrdmulhq, vqrdmulh, 3, (RNQ, oRNQ, RNDQ_RNSC), neon_qdmulh),
+ NUF(vacge, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
+ NUF(vacgeq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute),
+ NUF(vacgt, 0200e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute),
+ NUF(vacgtq, 0200e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute),
+ NUF(vaclt, 0000e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv),
+ NUF(vacltq, 0000e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute_inv),
+ NUF(vacle, 0200e10, 3, (RNDQ, oRNDQ, RNDQ), neon_fcmp_absolute_inv),
+ NUF(vacleq, 0200e10, 3, (RNQ, oRNQ, RNQ), neon_fcmp_absolute_inv),
+ NUF(vrecps, 0000f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step),
+ NUF(vrecpsq, 0000f10, 3, (RNQ, oRNQ, RNQ), neon_step),
+ NUF(vrsqrts, 0200f10, 3, (RNDQ, oRNDQ, RNDQ), neon_step),
+ NUF(vrsqrtsq, 0200f10, 3, (RNQ, oRNQ, RNQ), neon_step),
+
+ /* Two address, int/float. Types S8 S16 S32 F32. */
+ NUF(vabs, 1b10300, 2, (RNDQ, RNDQ), neon_abs_neg),
+ NUF(vabsq, 1b10300, 2, (RNQ, RNQ), neon_abs_neg),
+ NUF(vneg, 1b10380, 2, (RNDQ, RNDQ), neon_abs_neg),
+ NUF(vnegq, 1b10380, 2, (RNQ, RNQ), neon_abs_neg),
+
+ /* Data processing with two registers and a shift amount. */
+ /* Right shifts, and variants with rounding.
+ Types accepted S8 S16 S32 S64 U8 U16 U32 U64. */
+ NUF(vshr, 0800010, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
+ NUF(vshrq, 0800010, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm),
+ NUF(vrshr, 0800210, 3, (RNDQ, oRNDQ, I64z), neon_rshift_round_imm),
+ NUF(vrshrq, 0800210, 3, (RNQ, oRNQ, I64z), neon_rshift_round_imm),
+ NUF(vsra, 0800110, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm),
+ NUF(vsraq, 0800110, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm),
+ NUF(vrsra, 0800310, 3, (RNDQ, oRNDQ, I64), neon_rshift_round_imm),
+ NUF(vrsraq, 0800310, 3, (RNQ, oRNQ, I64), neon_rshift_round_imm),
+ /* Shift and insert. Sizes accepted 8 16 32 64. */
+ NUF(vsli, 1800510, 3, (RNDQ, oRNDQ, I63), neon_sli),
+ NUF(vsliq, 1800510, 3, (RNQ, oRNQ, I63), neon_sli),
+ NUF(vsri, 1800410, 3, (RNDQ, oRNDQ, I64), neon_sri),
+ NUF(vsriq, 1800410, 3, (RNQ, oRNQ, I64), neon_sri),
+ /* QSHL{U} immediate accepts S8 S16 S32 S64 U8 U16 U32 U64. */
+ NUF(vqshlu, 1800610, 3, (RNDQ, oRNDQ, I63), neon_qshlu_imm),
+ NUF(vqshluq, 1800610, 3, (RNQ, oRNQ, I63), neon_qshlu_imm),
+ /* Right shift immediate, saturating & narrowing, with rounding variants.
+ Types accepted S16 S32 S64 U16 U32 U64. */
+ NUF(vqshrn, 0800910, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow),
+ NUF(vqrshrn, 0800950, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow),
+ /* As above, unsigned. Types accepted S16 S32 S64. */
+ NUF(vqshrun, 0800810, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u),
+ NUF(vqrshrun, 0800850, 3, (RND, RNQ, I32z), neon_rshift_sat_narrow_u),
+ /* Right shift narrowing. Types accepted I16 I32 I64. */
+ NUF(vshrn, 0800810, 3, (RND, RNQ, I32z), neon_rshift_narrow),
+ NUF(vrshrn, 0800850, 3, (RND, RNQ, I32z), neon_rshift_narrow),
+ /* Special case. Types S8 S16 S32 U8 U16 U32. Handles max shift variant. */
+ nUF(vshll, vshll, 3, (RNQ, RND, I32), neon_shll),
+ /* CVT with optional immediate for fixed-point variant. */
+ nUF(vcvt, vcvt, 3, (RNDQ, RNDQ, oI32b), neon_cvt),
+ nUF(vcvtq, vcvt, 3, (RNQ, RNQ, oI32b), neon_cvt),
+
+ /* One register and an immediate value. All encoding special-cased! */
+ NCE(vmov, 0, 1, (VMOV), neon_mov),
+ NCE(vmovq, 0, 1, (VMOV), neon_mov),
+ nUF(vmvn, vmvn, 2, (RNDQ, RNDQ_IMVNb), neon_mvn),
+ nUF(vmvnq, vmvn, 2, (RNQ, RNDQ_IMVNb), neon_mvn),
+
+ /* Data processing, three registers of different lengths. */
+ /* Dyadic, long insns. Types S8 S16 S32 U8 U16 U32. */
+ NUF(vabal, 0800500, 3, (RNQ, RND, RND), neon_abal),
+ NUF(vabdl, 0800700, 3, (RNQ, RND, RND), neon_dyadic_long),
+ NUF(vaddl, 0800000, 3, (RNQ, RND, RND), neon_dyadic_long),
+ NUF(vsubl, 0800200, 3, (RNQ, RND, RND), neon_dyadic_long),
+ /* If not scalar, fall back to neon_dyadic_long.
+ Vector types as above, scalar types S16 S32 U16 U32. */
+ nUF(vmlal, vmlal, 3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long),
+ nUF(vmlsl, vmlsl, 3, (RNQ, RND, RND_RNSC), neon_mac_maybe_scalar_long),
+ /* Dyadic, widening insns. Types S8 S16 S32 U8 U16 U32. */
+ NUF(vaddw, 0800100, 3, (RNQ, oRNQ, RND), neon_dyadic_wide),
+ NUF(vsubw, 0800300, 3, (RNQ, oRNQ, RND), neon_dyadic_wide),
+ /* Dyadic, narrowing insns. Types I16 I32 I64. */
+ NUF(vaddhn, 0800400, 3, (RND, RNQ, RNQ), neon_dyadic_narrow),
+ NUF(vraddhn, 1800400, 3, (RND, RNQ, RNQ), neon_dyadic_narrow),
+ NUF(vsubhn, 0800600, 3, (RND, RNQ, RNQ), neon_dyadic_narrow),
+ NUF(vrsubhn, 1800600, 3, (RND, RNQ, RNQ), neon_dyadic_narrow),
+ /* Saturating doubling multiplies. Types S16 S32. */
+ nUF(vqdmlal, vqdmlal, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+ nUF(vqdmlsl, vqdmlsl, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+ nUF(vqdmull, vqdmull, 3, (RNQ, RND, RND_RNSC), neon_mul_sat_scalar_long),
+ /* VMULL. Vector types S8 S16 S32 U8 U16 U32 P8, scalar types
+ S16 S32 U16 U32. */
+ nUF(vmull, vmull, 3, (RNQ, RND, RND_RNSC), neon_vmull),
+
+ /* Extract. Size 8. */
+ NUF(vext, 0b00000, 4, (RNDQ, oRNDQ, RNDQ, I7), neon_ext),
+ NUF(vextq, 0b00000, 4, (RNQ, oRNQ, RNQ, I7), neon_ext),
+
+ /* Two registers, miscellaneous. */
+ /* Reverse. Sizes 8 16 32 (must be < size in opcode). */
+ NUF(vrev64, 1b00000, 2, (RNDQ, RNDQ), neon_rev),
+ NUF(vrev64q, 1b00000, 2, (RNQ, RNQ), neon_rev),
+ NUF(vrev32, 1b00080, 2, (RNDQ, RNDQ), neon_rev),
+ NUF(vrev32q, 1b00080, 2, (RNQ, RNQ), neon_rev),
+ NUF(vrev16, 1b00100, 2, (RNDQ, RNDQ), neon_rev),
+ NUF(vrev16q, 1b00100, 2, (RNQ, RNQ), neon_rev),
+ /* Vector replicate. Sizes 8 16 32. */
+ nCE(vdup, vdup, 2, (RNDQ, RR_RNSC), neon_dup),
+ nCE(vdupq, vdup, 2, (RNQ, RR_RNSC), neon_dup),
+ /* VMOVL. Types S8 S16 S32 U8 U16 U32. */
+ NUF(vmovl, 0800a10, 2, (RNQ, RND), neon_movl),
+ /* VMOVN. Types I16 I32 I64. */
+ nUF(vmovn, vmovn, 2, (RND, RNQ), neon_movn),
+ /* VQMOVN. Types S16 S32 S64 U16 U32 U64. */
+ nUF(vqmovn, vqmovn, 2, (RND, RNQ), neon_qmovn),
+ /* VQMOVUN. Types S16 S32 S64. */
+ nUF(vqmovun, vqmovun, 2, (RND, RNQ), neon_qmovun),
+ /* VZIP / VUZP. Sizes 8 16 32. */
+ NUF(vzip, 1b20180, 2, (RNDQ, RNDQ), neon_zip_uzp),
+ NUF(vzipq, 1b20180, 2, (RNQ, RNQ), neon_zip_uzp),
+ NUF(vuzp, 1b20100, 2, (RNDQ, RNDQ), neon_zip_uzp),
+ NUF(vuzpq, 1b20100, 2, (RNQ, RNQ), neon_zip_uzp),
+ /* VQABS / VQNEG. Types S8 S16 S32. */
+ NUF(vqabs, 1b00700, 2, (RNDQ, RNDQ), neon_sat_abs_neg),
+ NUF(vqabsq, 1b00700, 2, (RNQ, RNQ), neon_sat_abs_neg),
+ NUF(vqneg, 1b00780, 2, (RNDQ, RNDQ), neon_sat_abs_neg),
+ NUF(vqnegq, 1b00780, 2, (RNQ, RNQ), neon_sat_abs_neg),
+ /* Pairwise, lengthening. Types S8 S16 S32 U8 U16 U32. */
+ NUF(vpadal, 1b00600, 2, (RNDQ, RNDQ), neon_pair_long),
+ NUF(vpadalq, 1b00600, 2, (RNQ, RNQ), neon_pair_long),
+ NUF(vpaddl, 1b00200, 2, (RNDQ, RNDQ), neon_pair_long),
+ NUF(vpaddlq, 1b00200, 2, (RNQ, RNQ), neon_pair_long),
+ /* Reciprocal estimates. Types U32 F32. */
+ NUF(vrecpe, 1b30400, 2, (RNDQ, RNDQ), neon_recip_est),
+ NUF(vrecpeq, 1b30400, 2, (RNQ, RNQ), neon_recip_est),
+ NUF(vrsqrte, 1b30480, 2, (RNDQ, RNDQ), neon_recip_est),
+ NUF(vrsqrteq, 1b30480, 2, (RNQ, RNQ), neon_recip_est),
+ /* VCLS. Types S8 S16 S32. */
+ NUF(vcls, 1b00400, 2, (RNDQ, RNDQ), neon_cls),
+ NUF(vclsq, 1b00400, 2, (RNQ, RNQ), neon_cls),
+ /* VCLZ. Types I8 I16 I32. */
+ NUF(vclz, 1b00480, 2, (RNDQ, RNDQ), neon_clz),
+ NUF(vclzq, 1b00480, 2, (RNQ, RNQ), neon_clz),
+ /* VCNT. Size 8. */
+ NUF(vcnt, 1b00500, 2, (RNDQ, RNDQ), neon_cnt),
+ NUF(vcntq, 1b00500, 2, (RNQ, RNQ), neon_cnt),
+ /* Two address, untyped. */
+ NUF(vswp, 1b20000, 2, (RNDQ, RNDQ), neon_swp),
+ NUF(vswpq, 1b20000, 2, (RNQ, RNQ), neon_swp),
+ /* VTRN. Sizes 8 16 32. */
+ nUF(vtrn, vtrn, 2, (RNDQ, RNDQ), neon_trn),
+ nUF(vtrnq, vtrn, 2, (RNQ, RNQ), neon_trn),
+
+ /* Table lookup. Size 8. */
+ NUF(vtbl, 1b00800, 3, (RND, NRDLST, RND), neon_tbl_tbx),
+ NUF(vtbx, 1b00840, 3, (RND, NRDLST, RND), neon_tbl_tbx),
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_v3_or_neon_ext
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_v3_or_neon_ext
+
+ /* Load/store instructions. Available in Neon or VFPv3. */
+ NCE(vldm, c900b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vldmia, c900b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vldmdb, d100b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vstm, c800b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vstmia, c800b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vstmdb, d000b00, 2, (RRw, NRDLST), neon_ldm_stm),
+ NCE(vldr, d100b00, 2, (RND, ADDR), neon_ldr_str),
+ NCE(vstr, d000b00, 2, (RND, ADDR), neon_ldr_str),
+
+ /* Neon element/structure load/store. */
+ nUF(vld1, vld1, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vst1, vst1, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vld2, vld2, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vst2, vst2, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vld3, vld3, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vst3, vst3, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vld4, vld4, 2, (NSTRLST, ADDR), neon_ldx_stx),
+ nUF(vst4, vst4, 2, (NSTRLST, ADDR), neon_ldx_stx),
+
+#undef THUMB_VARIANT
+#define THUMB_VARIANT &fpu_vfp_ext_v3
+#undef ARM_VARIANT
+#define ARM_VARIANT &fpu_vfp_ext_v3
+
+ cCE(fconsts, eb00a00, 2, (RVS, I255), vfp_sp_const),
+ cCE(fconstd, eb00b00, 2, (RVD, I255), vfp_dp_const),
+ cCE(fshtos, eba0a40, 2, (RVS, I16z), vfp_sp_conv_16),
+ cCE(fshtod, eba0b40, 2, (RVD, I16z), vfp_dp_conv_16),
+ cCE(fsltos, eba0ac0, 2, (RVS, I32), vfp_sp_conv_32),
+ cCE(fsltod, eba0bc0, 2, (RVD, I32), vfp_dp_conv_32),
+ cCE(fuhtos, ebb0a40, 2, (RVS, I16z), vfp_sp_conv_16),
+ cCE(fuhtod, ebb0b40, 2, (RVD, I16z), vfp_dp_conv_16),
+ cCE(fultos, ebb0ac0, 2, (RVS, I32), vfp_sp_conv_32),
+ cCE(fultod, ebb0bc0, 2, (RVD, I32), vfp_dp_conv_32),
+ cCE(ftoshs, ebe0a40, 2, (RVS, I16z), vfp_sp_conv_16),
+ cCE(ftoshd, ebe0b40, 2, (RVD, I16z), vfp_dp_conv_16),
+ cCE(ftosls, ebe0ac0, 2, (RVS, I32), vfp_sp_conv_32),
+ cCE(ftosld, ebe0bc0, 2, (RVD, I32), vfp_dp_conv_32),
+ cCE(ftouhs, ebf0a40, 2, (RVS, I16z), vfp_sp_conv_16),
+ cCE(ftouhd, ebf0b40, 2, (RVD, I16z), vfp_dp_conv_16),
+ cCE(ftouls, ebf0ac0, 2, (RVS, I32), vfp_sp_conv_32),
+ cCE(ftould, ebf0bc0, 2, (RVD, I32), vfp_dp_conv_32),
+
+#undef THUMB_VARIANT
#undef ARM_VARIANT
#define ARM_VARIANT &arm_cext_xscale /* Intel XScale extensions. */
cCE(mia, e200010, 3, (RXA, RRnpc, RRnpc), xsc_mia),
@@ -10236,6 +14017,10 @@ static const struct asm_opcode insns[] =
#undef UE
#undef UF
#undef UT
+#undef NUF
+#undef nUF
+#undef NCE
+#undef nCE
#undef OPS0
#undef OPS1
#undef OPS2
@@ -11147,7 +14932,7 @@ create_unwind_entry (int have_data)
int
tc_arm_regname_to_dw2regnum (const char *regname)
{
- int reg = arm_reg_parse ((char **) &regname, REG_TYPE_RN);
+ int reg = arm_reg_parse ((char **) &regname, REG_TYPE_RN, NULL);
if (reg == FAIL)
return -1;
@@ -13343,7 +17128,9 @@ static const struct arm_cpu_option_table arm_cpus[] =
{"arm1156t2f-s", ARM_ARCH_V6T2, FPU_ARCH_VFP_V2, NULL},
{"arm1176jz-s", ARM_ARCH_V6ZK, FPU_NONE, NULL},
{"arm1176jzf-s", ARM_ARCH_V6ZK, FPU_ARCH_VFP_V2, NULL},
- {"cortex-a8", ARM_ARCH_V7A, FPU_ARCH_VFP_V2, NULL},
+ {"cortex-a8", ARM_ARCH_V7A, ARM_FEATURE(0, FPU_VFP_V3
+ | FPU_NEON_EXT_V1),
+ NULL},
{"cortex-r4", ARM_ARCH_V7R, FPU_NONE, NULL},
{"cortex-m3", ARM_ARCH_V7M, FPU_NONE, NULL},
/* ??? XSCALE is really an architecture. */
@@ -13433,6 +17220,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
{"softvfp+vfp", FPU_ARCH_VFP_V2},
{"vfp", FPU_ARCH_VFP_V2},
{"vfp9", FPU_ARCH_VFP_V2},
+ {"vfp3", FPU_ARCH_VFP_V3},
{"vfp10", FPU_ARCH_VFP_V2},
{"vfp10-r0", FPU_ARCH_VFP_V1},
{"vfpxd", FPU_ARCH_VFP_V1xD},
@@ -13441,6 +17229,7 @@ static const struct arm_option_cpu_value_table arm_fpus[] =
{"arm1136jfs", FPU_ARCH_VFP_V2},
{"arm1136jf-s", FPU_ARCH_VFP_V2},
{"maverick", FPU_ARCH_MAVERICK},
+ {"neon", FPU_ARCH_VFP_V3_PLUS_NEON_V1},
{NULL, ARM_ARCH_NONE}
};
@@ -13836,7 +17625,7 @@ aeabi_set_public_attributes (void)
ARM_MERGE_FEATURE_SETS (flags, arm_arch_used, thumb_arch_used);
ARM_MERGE_FEATURE_SETS (flags, flags, *mfpu_opt);
ARM_MERGE_FEATURE_SETS (flags, flags, selected_cpu);
-
+
tmp = flags;
arch = 0;
for (p = cpu_arch_ver; p->val; p++)
@@ -13881,16 +17670,25 @@ aeabi_set_public_attributes (void)
elf32_arm_add_eabi_attr_int (stdoutput, 9,
ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_arch_t2) ? 2 : 1);
/* Tag_VFP_arch. */
- if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v2)
- || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v2))
+ if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v3)
+ || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v3))
+ elf32_arm_add_eabi_attr_int (stdoutput, 10, 3);
+ else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v2)
+ || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v2))
elf32_arm_add_eabi_attr_int (stdoutput, 10, 2);
- else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_arch_vfp_v1)
- || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_arch_vfp_v1))
+ else if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1)
+ || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1)
+ || ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_vfp_ext_v1xd)
+ || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_vfp_ext_v1xd))
elf32_arm_add_eabi_attr_int (stdoutput, 10, 1);
/* Tag_WMMX_arch. */
if (ARM_CPU_HAS_FEATURE (thumb_arch_used, arm_cext_iwmmxt)
|| ARM_CPU_HAS_FEATURE (arm_arch_used, arm_cext_iwmmxt))
elf32_arm_add_eabi_attr_int (stdoutput, 11, 1);
+ /* Tag_NEON_arch. */
+ if (ARM_CPU_HAS_FEATURE (thumb_arch_used, fpu_neon_ext_v1)
+ || ARM_CPU_HAS_FEATURE (arm_arch_used, fpu_neon_ext_v1))
+ elf32_arm_add_eabi_attr_int (stdoutput, 12, 1);
}
/* Add the .ARM.attributes section. */
diff --git a/gas/testsuite/gas/arm/copro.d b/gas/testsuite/gas/arm/copro.d
index 5f5dd110e9..8fb657b934 100644
--- a/gas/testsuite/gas/arm/copro.d
+++ b/gas/testsuite/gas/arm/copro.d
@@ -31,7 +31,7 @@ Disassembly of section .text:
0+054 <[^>]*> ecc52805 stcl 8, cr2, \[r5\], \{5\}
0+058 <[^>]*> fcd61906 ldc2l 9, cr1, \[r6\], \{6\}
0+05c <[^>]*> fcc70a07 stc2l 10, cr0, \[r7\], \{7\}
-0+060 <[^>]*> ecd88bff ldcl 11, cr8, \[r8\], \{255\}
+0+060 <[^>]*> ecd88cff ldcl 12, cr8, \[r8\], \{255\}
0+064 <[^>]*> ecc99cfe stcl 12, cr9, \[r9\], \{254\}
0+068 <[^>]*> ec507d04 mrrc 13, 0, r7, r0, cr4
0+06c <[^>]*> ec407e05 mcrr 14, 0, r7, r0, cr5
diff --git a/gas/testsuite/gas/arm/copro.s b/gas/testsuite/gas/arm/copro.s
index 334b000f44..e6976329c7 100644
--- a/gas/testsuite/gas/arm/copro.s
+++ b/gas/testsuite/gas/arm/copro.s
@@ -33,7 +33,8 @@ bar:
stcl p8, c2, [r5], {5}
ldc2l 9, c1, [r6], {6}
stc2l p10, c0, [r7], {7}
- ldcl 11, c8, [r8], {255}
+ @ using '11' below results in an (invalid) Neon vldmia instruction.
+ ldcl 12, c8, [r8], {255}
stcl p12, c9, [r9], {254}
mrrc 13, 0, r7, r0, cr4
mcrr p14, 0, r7, r0, cr5
diff --git a/gas/testsuite/gas/arm/neon-cond.d b/gas/testsuite/gas/arm/neon-cond.d
new file mode 100644
index 0000000000..0b7d8ede73
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cond.d
@@ -0,0 +1,14 @@
+# name: Conditional Neon instructions
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> 0d943b00 vldreq d3, \[r4\]
+0[0-9a-f]+ <[^>]+> be035b70 vmovlt\.16 d3\[1\], r5
+0[0-9a-f]+ <[^>]+> ac474b13 vmovge d3, r4, r7
+0[0-9a-f]+ <[^>]+> 3c543b3e vmovcc r3, r4, d30
+0[0-9a-f]+ <[^>]+> 1e223b10 vmovne\.32 d2\[1\], r3
+0[0-9a-f]+ <[^>]+> 2c521b13 vmovcs r1, r2, d3
+0[0-9a-f]+ <[^>]+> 3c421b14 vmovcc d4, r1, r2
diff --git a/gas/testsuite/gas/arm/neon-cond.s b/gas/testsuite/gas/arm/neon-cond.s
new file mode 100644
index 0000000000..8f62575aa5
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cond.s
@@ -0,0 +1,13 @@
+@ test conditional compilation
+
+ .arm
+ .text
+ .syntax unified
+
+ vldreq.32 d3,[r4]
+ vmovlt.16 d3[1], r5
+ vmovge d3, r4, r7
+ vmovcc r3, r4, d30
+ vmovne.32 d2[1],r3
+ vmovcs r1,r2,d3
+ vmovcc d4,r1,r2
diff --git a/gas/testsuite/gas/arm/neon-cov.d b/gas/testsuite/gas/arm/neon-cov.d
new file mode 100644
index 0000000000..c2ef0eb9c8
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cov.d
@@ -0,0 +1,1263 @@
+# name: Neon instruction coverage
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f2000750 vaba\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000750 vaba\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000710 vaba\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100750 vaba\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100750 vaba\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100710 vaba\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200750 vaba\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200750 vaba\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200710 vaba\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000750 vaba\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000750 vaba\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000710 vaba\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100750 vaba\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100750 vaba\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100710 vaba\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200750 vaba\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200750 vaba\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200710 vaba\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000040 vhadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000040 vhadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000000 vhadd\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100040 vhadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100040 vhadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100000 vhadd\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200040 vhadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200040 vhadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200000 vhadd\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000040 vhadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000040 vhadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000000 vhadd\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100040 vhadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100040 vhadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100000 vhadd\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200040 vhadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200040 vhadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200000 vhadd\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000140 vrhadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000140 vrhadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000100 vrhadd\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100140 vrhadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100140 vrhadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100100 vrhadd\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200140 vrhadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200140 vrhadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200100 vrhadd\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000140 vrhadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000140 vrhadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000100 vrhadd\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100140 vrhadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100140 vrhadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100100 vrhadd\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200140 vrhadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200140 vrhadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200100 vrhadd\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000240 vhsub\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000240 vhsub\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000200 vhsub\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100240 vhsub\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100240 vhsub\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100200 vhsub\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200240 vhsub\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200240 vhsub\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200200 vhsub\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000240 vhsub\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000240 vhsub\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000200 vhsub\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100240 vhsub\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100240 vhsub\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100200 vhsub\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200240 vhsub\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200240 vhsub\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200200 vhsub\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000050 vqadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000050 vqadd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000010 vqadd\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100050 vqadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100050 vqadd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100010 vqadd\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200050 vqadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200050 vqadd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200010 vqadd\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300050 vqadd\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300050 vqadd\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300010 vqadd\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000050 vqadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000050 vqadd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000010 vqadd\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100050 vqadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100050 vqadd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100010 vqadd\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200050 vqadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200050 vqadd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200010 vqadd\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300050 vqadd\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300050 vqadd\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300010 vqadd\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000250 vqsub\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000250 vqsub\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000210 vqsub\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100250 vqsub\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100250 vqsub\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100210 vqsub\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200250 vqsub\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200250 vqsub\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200210 vqsub\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300250 vqsub\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300250 vqsub\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300210 vqsub\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000250 vqsub\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000250 vqsub\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000210 vqsub\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100250 vqsub\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100250 vqsub\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100210 vqsub\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200250 vqsub\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200250 vqsub\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200210 vqsub\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300250 vqsub\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300250 vqsub\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300210 vqsub\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000540 vrshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000540 vrshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000500 vrshl\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100540 vrshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100540 vrshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100500 vrshl\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200540 vrshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200540 vrshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200500 vrshl\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300540 vrshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300540 vrshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300500 vrshl\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000540 vrshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000540 vrshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000500 vrshl\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100540 vrshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100540 vrshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100500 vrshl\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200540 vrshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200540 vrshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200500 vrshl\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300540 vrshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300540 vrshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300500 vrshl\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000550 vqrshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000550 vqrshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000510 vqrshl\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100550 vqrshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100550 vqrshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100510 vqrshl\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200550 vqrshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200550 vqrshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200510 vqrshl\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300550 vqrshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300550 vqrshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300510 vqrshl\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000550 vqrshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000550 vqrshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000510 vqrshl\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100550 vqrshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100550 vqrshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100510 vqrshl\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200550 vqrshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200550 vqrshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200510 vqrshl\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300550 vqrshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300550 vqrshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300510 vqrshl\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000440 vshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000440 vshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000400 vshl\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100440 vshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100440 vshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100400 vshl\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200440 vshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200440 vshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200400 vshl\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300440 vshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300440 vshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300400 vshl\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000440 vshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000440 vshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000400 vshl\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100440 vshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100440 vshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100400 vshl\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200440 vshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200440 vshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200400 vshl\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300440 vshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300440 vshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300400 vshl\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000450 vqshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000450 vqshl\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000410 vqshl\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100450 vqshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100450 vqshl\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100410 vqshl\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200450 vqshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200450 vqshl\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200410 vqshl\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300450 vqshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300450 vqshl\.s64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300410 vqshl\.s64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000450 vqshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000450 vqshl\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000410 vqshl\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100450 vqshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100450 vqshl\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100410 vqshl\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200450 vqshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200450 vqshl\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200410 vqshl\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300450 vqshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300450 vqshl\.u64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300410 vqshl\.u64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2880550 vshl\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880550 vshl\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880510 vshl\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2900550 vshl\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900550 vshl\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900510 vshl\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2a00550 vshl\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00550 vshl\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00510 vshl\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f28005d0 vshl\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f28005d0 vshl\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2800590 vshl\.s64 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2880750 vqshl\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880750 vqshl\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2880710 vqshl\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2900750 vqshl\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900750 vqshl\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2900710 vqshl\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2a00750 vqshl\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00750 vqshl\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2a00710 vqshl\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f28007d0 vqshl\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f28007d0 vqshl\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2800790 vqshl\.s64 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3880750 vqshl\.u8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880750 vqshl\.u8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880710 vqshl\.u8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900750 vqshl\.u16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900750 vqshl\.u16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900710 vqshl\.u16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00750 vqshl\.u32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00750 vqshl\.u32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00710 vqshl\.u32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38007d0 vqshl\.u64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38007d0 vqshl\.u64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800790 vqshl\.u64 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2000150 vand q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000150 vand q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000110 vand d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100150 vbic q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100150 vbic q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100110 vbic d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200150 vorr q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200150 vorr q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200110 vorr d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300150 vorn q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300150 vorn q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300110 vorn d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000150 veor q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000150 veor q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000110 veor d0, d0, d0
+0[0-9a-f]+ <[^>]+> f387017f vbic\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387017f vbic\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387013f vbic\.i32 d0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387037f vbic\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387037f vbic\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387033f vbic\.i32 d0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387057f vbic\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387057f vbic\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387053f vbic\.i32 d0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387077f vbic\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387077f vbic\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387073f vbic\.i32 d0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387097f vbic\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387097f vbic\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387093f vbic\.i16 d0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b7f vbic\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b7f vbic\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b3f vbic\.i16 d0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f387015f vorr\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387015f vorr\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387011f vorr\.i32 d0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387035f vorr\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387035f vorr\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387031f vorr\.i32 d0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387055f vorr\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387055f vorr\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387051f vorr\.i32 d0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387075f vorr\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387075f vorr\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387071f vorr\.i32 d0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387095f vorr\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387095f vorr\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387091f vorr\.i16 d0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b5f vorr\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b5f vorr\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b1f vorr\.i16 d0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f387017f vbic\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387017f vbic\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387013f vbic\.i32 d0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387037f vbic\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387037f vbic\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387033f vbic\.i32 d0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387057f vbic\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387057f vbic\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387053f vbic\.i32 d0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387077f vbic\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387077f vbic\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387073f vbic\.i32 d0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387097f vbic\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387097f vbic\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387093f vbic\.i16 d0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b7f vbic\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b7f vbic\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b3f vbic\.i16 d0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f387015f vorr\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387015f vorr\.i32 q0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387011f vorr\.i32 d0, #255 ; 0x000000ff
+0[0-9a-f]+ <[^>]+> f387035f vorr\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387035f vorr\.i32 q0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387031f vorr\.i32 d0, #65280 ; 0x0000ff00
+0[0-9a-f]+ <[^>]+> f387055f vorr\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387055f vorr\.i32 q0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387051f vorr\.i32 d0, #16711680 ; 0x00ff0000
+0[0-9a-f]+ <[^>]+> f387075f vorr\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387075f vorr\.i32 q0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387071f vorr\.i32 d0, #-16777216 ; 0xff000000
+0[0-9a-f]+ <[^>]+> f387095f vorr\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387095f vorr\.i16 q0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f387091f vorr\.i16 d0, #255 ; 0x00ff
+0[0-9a-f]+ <[^>]+> f3870b5f vorr\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b5f vorr\.i16 q0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3870b1f vorr\.i16 d0, #65280 ; 0xff00
+0[0-9a-f]+ <[^>]+> f3100150 vbsl q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100150 vbsl q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100110 vbsl d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200150 vbit q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200150 vbit q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200110 vbit d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300150 vbif q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300150 vbif q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300110 vbif d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000740 vabd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000740 vabd\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000700 vabd\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100740 vabd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100740 vabd\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100700 vabd\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200740 vabd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200740 vabd\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200700 vabd\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000740 vabd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000740 vabd\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000700 vabd\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100740 vabd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100740 vabd\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100700 vabd\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200740 vabd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200740 vabd\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200700 vabd\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200d40 vabd\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200d40 vabd\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200d00 vabd\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000640 vmax\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000640 vmax\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000600 vmax\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100640 vmax\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100640 vmax\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100600 vmax\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200640 vmax\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200640 vmax\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200600 vmax\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000640 vmax\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000640 vmax\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000600 vmax\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100640 vmax\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100640 vmax\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100600 vmax\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200640 vmax\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200640 vmax\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200600 vmax\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000f40 vmax\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f40 vmax\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f00 vmax\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000650 vmin\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000650 vmin\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000610 vmin\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100650 vmin\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100650 vmin\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100610 vmin\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200650 vmin\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200650 vmin\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200610 vmin\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000650 vmin\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000650 vmin\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000610 vmin\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100650 vmin\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100650 vmin\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100610 vmin\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200650 vmin\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200650 vmin\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200610 vmin\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200f40 vmin\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f40 vmin\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f00 vmin\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000350 vcge\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000350 vcge\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000310 vcge\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100350 vcge\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100350 vcge\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100310 vcge\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200350 vcge\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200350 vcge\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200310 vcge\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000350 vcge\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000350 vcge\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000310 vcge\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100350 vcge\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100350 vcge\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100310 vcge\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200350 vcge\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200350 vcge\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200310 vcge\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e40 vcge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e40 vcge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e00 vcge\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000340 vcgt\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000340 vcgt\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000300 vcgt\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100340 vcgt\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100340 vcgt\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100300 vcgt\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200340 vcgt\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200340 vcgt\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200300 vcgt\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000340 vcgt\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000340 vcgt\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000300 vcgt\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100340 vcgt\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100340 vcgt\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100300 vcgt\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200340 vcgt\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200340 vcgt\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200300 vcgt\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e40 vcgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e40 vcgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e00 vcgt\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000340 vcgt\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000340 vcgt\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000300 vcgt\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100340 vcgt\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100340 vcgt\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100300 vcgt\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200340 vcgt\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200340 vcgt\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200300 vcgt\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000340 vcgt\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000340 vcgt\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000300 vcgt\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100340 vcgt\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100340 vcgt\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100300 vcgt\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200340 vcgt\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200340 vcgt\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200300 vcgt\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e40 vcgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e40 vcgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e00 vcgt\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000350 vcge\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000350 vcge\.s8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000310 vcge\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100350 vcge\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100350 vcge\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100310 vcge\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200350 vcge\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200350 vcge\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200310 vcge\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000350 vcge\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000350 vcge\.u8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000310 vcge\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100350 vcge\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100350 vcge\.u16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100310 vcge\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200350 vcge\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200350 vcge\.u32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200310 vcge\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e40 vcge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e40 vcge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e00 vcge\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000850 vceq\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000850 vceq\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000810 vceq\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100850 vceq\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100850 vceq\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100810 vceq\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200850 vceq\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200850 vceq\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200810 vceq\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000e40 vceq\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000e40 vceq\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000e00 vceq\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3b100c0 vcge\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b100c0 vcge\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10080 vcge\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b500c0 vcge\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b500c0 vcge\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50080 vcge\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b900c0 vcge\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b900c0 vcge\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90080 vcge\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b904c0 vcge\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b904c0 vcge\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90480 vcge\.f32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10040 vcgt\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10040 vcgt\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10000 vcgt\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50040 vcgt\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50040 vcgt\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50000 vcgt\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90040 vcgt\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90040 vcgt\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90000 vcgt\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90440 vcgt\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90440 vcgt\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90400 vcgt\.f32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b101c0 vcle\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b101c0 vcle\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10180 vcle\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b501c0 vcle\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b501c0 vcle\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50180 vcle\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b901c0 vcle\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b901c0 vcle\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90180 vcle\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b905c0 vcle\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b905c0 vcle\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90580 vcle\.f32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10240 vclt\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10240 vclt\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10200 vclt\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50240 vclt\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50240 vclt\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50200 vclt\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90240 vclt\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90240 vclt\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90200 vclt\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90640 vclt\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90640 vclt\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90600 vclt\.f32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b10140 vceq\.i8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10140 vceq\.i8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b10100 vceq\.i8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b50140 vceq\.i16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50140 vceq\.i16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b50100 vceq\.i16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90140 vceq\.i32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90140 vceq\.i32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90100 vceq\.i32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b90540 vceq\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90540 vceq\.f32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3b90500 vceq\.f32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2000a00 vpmax\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100a00 vpmax\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200a00 vpmax\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000a00 vpmax\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100a00 vpmax\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200a00 vpmax\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000f00 vpmax\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000a10 vpmin\.s8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100a10 vpmin\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200a10 vpmin\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000a10 vpmin\.u8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100a10 vpmin\.u16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200a10 vpmin\.u32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200f00 vpmin\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000940 vmla\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000940 vmla\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000900 vmla\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100940 vmla\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100940 vmla\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100900 vmla\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200940 vmla\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200940 vmla\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200900 vmla\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000d50 vmla\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d50 vmla\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d10 vmla\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900040 vmla\.i16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900040 vmla\.i16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900040 vmla\.i16 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00040 vmla\.i32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00040 vmla\.i32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00040 vmla\.i32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00140 vmla\.f32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00140 vmla\.f32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00140 vmla\.f32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3000940 vmls\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000940 vmls\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000900 vmls\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100940 vmls\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100940 vmls\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100900 vmls\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200940 vmls\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200940 vmls\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200900 vmls\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200d50 vmls\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d50 vmls\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d10 vmls\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900440 vmls\.i16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900440 vmls\.i16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900440 vmls\.i16 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00440 vmls\.i32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00440 vmls\.i32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00440 vmls\.i32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00540 vmls\.f32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00540 vmls\.f32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00540 vmls\.f32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2000b10 vpadd\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100b10 vpadd\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200b10 vpadd\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000d00 vpadd\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000840 vadd\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000840 vadd\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000800 vadd\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100840 vadd\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100840 vadd\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100800 vadd\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200840 vadd\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200840 vadd\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200800 vadd\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2300840 vadd\.i64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300840 vadd\.i64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2300800 vadd\.i64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000d40 vadd\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d40 vadd\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000d00 vadd\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000840 vsub\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000840 vsub\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000800 vsub\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3100840 vsub\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100840 vsub\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100800 vsub\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200840 vsub\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200840 vsub\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200800 vsub\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3300840 vsub\.i64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300840 vsub\.i64 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3300800 vsub\.i64 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200d40 vsub\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d40 vsub\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200d00 vsub\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000850 vtst\.8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000850 vtst\.8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000810 vtst\.8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100850 vtst\.16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100850 vtst\.16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100810 vtst\.16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200850 vtst\.32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200850 vtst\.32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200810 vtst\.32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000950 vmul\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000950 vmul\.i8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000910 vmul\.i8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100950 vmul\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100950 vmul\.i16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100910 vmul\.i16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200950 vmul\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200950 vmul\.i32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200910 vmul\.i32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000d50 vmul\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000d50 vmul\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000d10 vmul\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000950 vmul\.p8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000950 vmul\.p8 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000910 vmul\.p8 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2100b40 vqdmulh\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100b40 vqdmulh\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2100b00 vqdmulh\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200b40 vqdmulh\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200b40 vqdmulh\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200b00 vqdmulh\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900c40 vqdmulh\.s16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900c40 vqdmulh\.s16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900c40 vqdmulh\.s16 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00c40 vqdmulh\.s32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00c40 vqdmulh\.s32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00c40 vqdmulh\.s32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3100b40 vqrdmulh\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100b40 vqrdmulh\.s16 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3100b00 vqrdmulh\.s16 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200b40 vqrdmulh\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200b40 vqrdmulh\.s32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200b00 vqrdmulh\.s32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900d40 vqrdmulh\.s16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900d40 vqrdmulh\.s16 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900d40 vqrdmulh\.s16 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00d40 vqrdmulh\.s32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00d40 vqrdmulh\.s32 q0, q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00d40 vqrdmulh\.s32 d0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3000e50 vacge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e50 vacge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e10 vacge\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e50 vacgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e50 vacgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e10 vacgt\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3200e50 vacgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e50 vacgt\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3200e10 vacgt\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3000e50 vacge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e50 vacge\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f3000e10 vacge\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2000f50 vrecps\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f50 vrecps\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2000f10 vrecps\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f2200f50 vrsqrts\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f50 vrsqrts\.f32 q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200f10 vrsqrts\.f32 d0, d0, d0
+0[0-9a-f]+ <[^>]+> f3b10340 vabs\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b10340 vabs\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b10300 vabs\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b50340 vabs\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b50340 vabs\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b50300 vabs\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b90340 vabs\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90340 vabs\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90300 vabs\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b90740 vabs\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90740 vabs\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90700 vabs\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b103c0 vneg\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b103c0 vneg\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b10380 vneg\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b503c0 vneg\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b503c0 vneg\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b50380 vneg\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b903c0 vneg\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b903c0 vneg\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90380 vneg\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b907c0 vneg\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b907c0 vneg\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b90780 vneg\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f2890050 vshr\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890050 vshr\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890010 vshr\.s8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910050 vshr\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910050 vshr\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910010 vshr\.s16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10050 vshr\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10050 vshr\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10010 vshr\.s32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28100d0 vshr\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28100d0 vshr\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810090 vshr\.s64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890050 vshr\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890050 vshr\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890010 vshr\.u8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910050 vshr\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910050 vshr\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910010 vshr\.u16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10050 vshr\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10050 vshr\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10010 vshr\.u32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38100d0 vshr\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38100d0 vshr\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810090 vshr\.u64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890250 vrshr\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890250 vrshr\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890210 vrshr\.s8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910250 vrshr\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910250 vrshr\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910210 vrshr\.s16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10250 vrshr\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10250 vrshr\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10210 vrshr\.s32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28102d0 vrshr\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28102d0 vrshr\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810290 vrshr\.s64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890250 vrshr\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890250 vrshr\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890210 vrshr\.u8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910250 vrshr\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910250 vrshr\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910210 vrshr\.u16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10250 vrshr\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10250 vrshr\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10210 vrshr\.u32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38102d0 vrshr\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38102d0 vrshr\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810290 vrshr\.u64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890150 vsra\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890150 vsra\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890110 vsra\.s8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910150 vsra\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910150 vsra\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910110 vsra\.s16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10150 vsra\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10150 vsra\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10110 vsra\.s32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28101d0 vsra\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28101d0 vsra\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810190 vsra\.s64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890150 vsra\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890150 vsra\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890110 vsra\.u8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910150 vsra\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910150 vsra\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910110 vsra\.u16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10150 vsra\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10150 vsra\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10110 vsra\.u32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38101d0 vsra\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38101d0 vsra\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810190 vsra\.u64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f2890350 vrsra\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890350 vrsra\.s8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f2890310 vrsra\.s8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f2910350 vrsra\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910350 vrsra\.s16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f2910310 vrsra\.s16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f2a10350 vrsra\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10350 vrsra\.s32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f2a10310 vrsra\.s32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f28103d0 vrsra\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f28103d0 vrsra\.s64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f2810390 vrsra\.s64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3890350 vrsra\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890350 vrsra\.u8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890310 vrsra\.u8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910350 vrsra\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910350 vrsra\.u16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910310 vrsra\.u16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10350 vrsra\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10350 vrsra\.u32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10310 vrsra\.u32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38103d0 vrsra\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38103d0 vrsra\.u64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810390 vrsra\.u64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3880550 vsli\.8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880550 vsli\.8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880510 vsli\.8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900550 vsli\.16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900550 vsli\.16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900510 vsli\.16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00550 vsli\.32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00550 vsli\.32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00510 vsli\.32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38005d0 vsli\.64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38005d0 vsli\.64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800590 vsli\.64 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3890450 vsri\.8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890450 vsri\.8 q0, q0, #7
+0[0-9a-f]+ <[^>]+> f3890410 vsri\.8 d0, d0, #7
+0[0-9a-f]+ <[^>]+> f3910450 vsri\.16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910450 vsri\.16 q0, q0, #15
+0[0-9a-f]+ <[^>]+> f3910410 vsri\.16 d0, d0, #15
+0[0-9a-f]+ <[^>]+> f3a10450 vsri\.32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10450 vsri\.32 q0, q0, #31
+0[0-9a-f]+ <[^>]+> f3a10410 vsri\.32 d0, d0, #31
+0[0-9a-f]+ <[^>]+> f38104d0 vsri\.64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f38104d0 vsri\.64 q0, q0, #63
+0[0-9a-f]+ <[^>]+> f3810490 vsri\.64 d0, d0, #63
+0[0-9a-f]+ <[^>]+> f3880650 vqshlu\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880650 vqshlu\.s8 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3880610 vqshlu\.s8 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3900650 vqshlu\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900650 vqshlu\.s16 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3900610 vqshlu\.s16 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3a00650 vqshlu\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00650 vqshlu\.s32 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3a00610 vqshlu\.s32 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f38006d0 vqshlu\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f38006d0 vqshlu\.s64 q0, q0, #0
+0[0-9a-f]+ <[^>]+> f3800690 vqshlu\.s64 d0, d0, #0
+0[0-9a-f]+ <[^>]+> f2890910 vqshrn\.s16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910910 vqshrn\.s32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10910 vqshrn\.s64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890910 vqshrn\.u16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910910 vqshrn\.u32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10910 vqshrn\.u64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890950 vqrshrn\.s16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910950 vqrshrn\.s32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10950 vqrshrn\.s64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890950 vqrshrn\.u16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910950 vqrshrn\.u32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10950 vqrshrn\.u64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890810 vqshrun\.s16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910810 vqshrun\.s32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10810 vqshrun\.s64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f3890850 vqrshrun\.s16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f3910850 vqrshrun\.s32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f3a10850 vqrshrun\.s64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890810 vshrn\.i16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910810 vshrn\.i32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10810 vshrn\.i64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890850 vrshrn\.i16 d0, q0, #7
+0[0-9a-f]+ <[^>]+> f2910850 vrshrn\.i32 d0, q0, #15
+0[0-9a-f]+ <[^>]+> f2a10850 vrshrn\.i64 d0, q0, #31
+0[0-9a-f]+ <[^>]+> f2890a10 vshll\.s8 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f2910a10 vshll\.s16 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f2a10a10 vshll\.s32 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3890a10 vshll\.u8 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3910a10 vshll\.u16 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3a10a10 vshll\.u32 d0, q0, #1
+0[0-9a-f]+ <[^>]+> f3b20300 vshll\.i8 q0, d0, #8
+0[0-9a-f]+ <[^>]+> f3b60300 vshll\.i16 q0, d0, #16
+0[0-9a-f]+ <[^>]+> f3ba0300 vshll\.i32 q0, d0, #32
+0[0-9a-f]+ <[^>]+> f3bb0740 vcvt\.s32\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb07c0 vcvt\.u32\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0640 vcvt\.f32\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb06c0 vcvt\.f32\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0740 vcvt\.s32\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb07c0 vcvt\.u32\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0640 vcvt\.f32\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb06c0 vcvt\.f32\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0700 vcvt\.s32\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0780 vcvt\.u32\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0600 vcvt\.f32\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0680 vcvt\.f32\.u32 d0, d0
+0[0-9a-f]+ <[^>]+> f2bf0f50 vcvt\.s32\.f32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f50 vcvt\.u32\.f32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e50 vcvt\.f32\.s32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e50 vcvt\.f32\.u32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0f50 vcvt\.s32\.f32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f50 vcvt\.u32\.f32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e50 vcvt\.f32\.s32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e50 vcvt\.f32\.u32 q0, q0, #1
+0[0-9a-f]+ <[^>]+> f2bf0f10 vcvt\.s32\.f32 d0, d0, #1
+0[0-9a-f]+ <[^>]+> f3bf0f10 vcvt\.u32\.f32 d0, d0, #1
+0[0-9a-f]+ <[^>]+> f2bf0e10 vcvt\.f32\.s32 d0, d0, #1
+0[0-9a-f]+ <[^>]+> f3bf0e10 vcvt\.f32\.u32 d0, d0, #1
+0[0-9a-f]+ <[^>]+> f2200150 vorr q0, q0, q0
+0[0-9a-f]+ <[^>]+> f2200110 vorr d0, d0, d0
+0[0-9a-f]+ <[^>]+> ee400b10 vmov\.8 d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ee000b30 vmov\.16 d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ee000b10 vmov\.32 d0\[0\], r0
+0[0-9a-f]+ <[^>]+> ec400b10 vmov d0, r0, r0
+0[0-9a-f]+ <[^>]+> ee500b10 vmov\.s8 r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee100b30 vmov\.s16 r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eed00b10 vmov\.u8 r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee900b30 vmov\.u16 r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ee100b10 vmov\.32 r0, d0\[0\]
+0[0-9a-f]+ <[^>]+> ec510b10 vmov r0, r1, d0
+0[0-9a-f]+ <[^>]+> f2870057 vmov\.i32 q0, #119 ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870017 vmov\.i32 d0, #119 ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870077 vmvn\.i32 q0, #119 ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870037 vmvn\.i32 d0, #119 ; 0x00000077
+0[0-9a-f]+ <[^>]+> f2870257 vmov\.i32 q0, #30464 ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870217 vmov\.i32 d0, #30464 ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870277 vmvn\.i32 q0, #30464 ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870237 vmvn\.i32 d0, #30464 ; 0x00007700
+0[0-9a-f]+ <[^>]+> f2870457 vmov\.i32 q0, #7798784 ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870417 vmov\.i32 d0, #7798784 ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870477 vmvn\.i32 q0, #7798784 ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870437 vmvn\.i32 d0, #7798784 ; 0x00770000
+0[0-9a-f]+ <[^>]+> f2870657 vmov\.i32 q0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870617 vmov\.i32 d0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870677 vmvn\.i32 q0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870637 vmvn\.i32 d0, #1996488704 ; 0x77000000
+0[0-9a-f]+ <[^>]+> f2870857 vmov\.i16 q0, #119 ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870817 vmov\.i16 d0, #119 ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870877 vmvn\.i16 q0, #119 ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870837 vmvn\.i16 d0, #119 ; 0x0077
+0[0-9a-f]+ <[^>]+> f2870a57 vmov\.i16 q0, #30464 ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a17 vmov\.i16 d0, #30464 ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a77 vmvn\.i16 q0, #30464 ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870a37 vmvn\.i16 d0, #30464 ; 0x7700
+0[0-9a-f]+ <[^>]+> f2870c57 vmov\.i32 q0, #30719 ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c17 vmov\.i32 d0, #30719 ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c77 vmvn\.i32 q0, #30719 ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870c37 vmvn\.i32 d0, #30719 ; 0x000077ff
+0[0-9a-f]+ <[^>]+> f2870d57 vmov\.i32 q0, #7864319 ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d17 vmov\.i32 d0, #7864319 ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d77 vmvn\.i32 q0, #7864319 ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870d37 vmvn\.i32 d0, #7864319 ; 0x0077ffff
+0[0-9a-f]+ <[^>]+> f2870e57 vmov\.i8 q0, #119 ; 0x77
+0[0-9a-f]+ <[^>]+> f2870e17 vmov\.i8 d0, #119 ; 0x77
+0[0-9a-f]+ <[^>]+> f3810e71 vmov\.i64 q0, #0xff0000ff000000ff
+0[0-9a-f]+ <[^>]+> f3810e31 vmov\.i64 d0, #0xff0000ff000000ff
+0[0-9a-f]+ <[^>]+> f2850f51 vmov\.f32 q0, #1027866624 ; 0x3d440000
+0[0-9a-f]+ <[^>]+> f2850f11 vmov\.f32 d0, #1027866624 ; 0x3d440000
+0[0-9a-f]+ <[^>]+> f3b005c0 vmvn q0, q0
+0[0-9a-f]+ <[^>]+> f3b005c0 vmvn q0, q0
+0[0-9a-f]+ <[^>]+> f3b00580 vmvn d0, d0
+0[0-9a-f]+ <[^>]+> f2800500 vabal\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900500 vabal\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00500 vabal\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800500 vabal\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900500 vabal\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00500 vabal\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800700 vabdl\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900700 vabdl\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00700 vabdl\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800700 vabdl\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900700 vabdl\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00700 vabdl\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800000 vaddl\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900000 vaddl\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00000 vaddl\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800000 vaddl\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900000 vaddl\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00000 vaddl\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800200 vsubl\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900200 vsubl\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00200 vsubl\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800200 vsubl\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900200 vsubl\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00200 vsubl\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800800 vmlal\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900800 vmlal\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00800 vmlal\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800800 vmlal\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900800 vmlal\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00800 vmlal\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900240 vmlal\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00240 vmlal\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900240 vmlal\.u16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00240 vmlal\.u32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800a00 vmlsl\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900a00 vmlsl\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00a00 vmlsl\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800a00 vmlsl\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900a00 vmlsl\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00a00 vmlsl\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900640 vmlsl\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00640 vmlsl\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900640 vmlsl\.u16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00640 vmlsl\.u32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800100 vaddw\.s8 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2900100 vaddw\.s16 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2a00100 vaddw\.s32 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3800100 vaddw\.u8 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3900100 vaddw\.u16 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3a00100 vaddw\.u32 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2800300 vsubw\.s8 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2900300 vsubw\.s16 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2a00300 vsubw\.s32 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3800300 vsubw\.u8 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3900300 vsubw\.u16 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f3a00300 vsubw\.u32 q0, q0, d0
+0[0-9a-f]+ <[^>]+> f2800400 vaddhn\.i16 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900400 vaddhn\.i32 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2a00400 vaddhn\.i64 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3800400 vraddhn\.i16 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3900400 vraddhn\.i32 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3a00400 vraddhn\.i64 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2800600 vsubhn\.i16 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900600 vsubhn\.i32 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2a00600 vsubhn\.i64 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3800600 vrsubhn\.i16 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3900600 vrsubhn\.i32 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f3a00600 vrsubhn\.i64 d0, q0, q0
+0[0-9a-f]+ <[^>]+> f2900900 vqdmlal\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00900 vqdmlal\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900340 vqdmlal\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00340 vqdmlal\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900b00 vqdmlsl\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00b00 vqdmlsl\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900740 vqdmlsl\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00740 vqdmlsl\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2900d00 vqdmull\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00d00 vqdmull\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900b40 vqdmull\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00b40 vqdmull\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2800c00 vmull\.s8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900c00 vmull\.s16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2a00c00 vmull\.s32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3800c00 vmull\.u8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3900c00 vmull\.u16 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f3a00c00 vmull\.u32 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2800e00 vmull\.p8 q0, d0, d0
+0[0-9a-f]+ <[^>]+> f2900a40 vmull\.s16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2a00a40 vmull\.s32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3900a40 vmull\.u16 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3a00a40 vmull\.u32 q0, d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2b00040 vext\.8 q0, q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2b00040 vext\.8 q0, q0, q0, #0
+0[0-9a-f]+ <[^>]+> f2b00000 vext\.8 d0, d0, d0, #0
+0[0-9a-f]+ <[^>]+> f3b00040 vrev64\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00040 vrev64\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00000 vrev64\.8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b40040 vrev64\.16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40040 vrev64\.16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40000 vrev64\.16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b80040 vrev64\.32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80040 vrev64\.32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80000 vrev64\.32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b000c0 vrev32\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b000c0 vrev32\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00080 vrev32\.8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b400c0 vrev32\.16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b400c0 vrev32\.16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40080 vrev32\.16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b00140 vrev16\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00140 vrev16\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00100 vrev16\.8 d0, d0
+0[0-9a-f]+ <[^>]+> eee00b10 vdup\.8 q0, r0
+0[0-9a-f]+ <[^>]+> eee00b10 vdup\.8 q0, r0
+0[0-9a-f]+ <[^>]+> eec00b10 vdup\.8 d0, r0
+0[0-9a-f]+ <[^>]+> f3b10c40 vdup\.8 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b10c40 vdup\.8 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b10c00 vdup\.8 d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eea00b30 vdup\.16 q0, r0
+0[0-9a-f]+ <[^>]+> eea00b30 vdup\.16 q0, r0
+0[0-9a-f]+ <[^>]+> ee800b30 vdup\.16 d0, r0
+0[0-9a-f]+ <[^>]+> f3b20c40 vdup\.16 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b20c40 vdup\.16 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b20c00 vdup\.16 d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> eea00b10 vdup\.32 q0, r0
+0[0-9a-f]+ <[^>]+> eea00b10 vdup\.32 q0, r0
+0[0-9a-f]+ <[^>]+> ee800b10 vdup\.32 d0, r0
+0[0-9a-f]+ <[^>]+> f3b40c40 vdup\.32 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b40c40 vdup\.32 q0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f3b40c00 vdup\.32 d0, d0\[0\]
+0[0-9a-f]+ <[^>]+> f2880a10 vmovl\.s8 q0, d0
+0[0-9a-f]+ <[^>]+> f2900a10 vmovl\.s16 q0, d0
+0[0-9a-f]+ <[^>]+> f2a00a10 vmovl\.s32 q0, d0
+0[0-9a-f]+ <[^>]+> f3880a10 vmovl\.u8 q0, d0
+0[0-9a-f]+ <[^>]+> f3900a10 vmovl\.u16 q0, d0
+0[0-9a-f]+ <[^>]+> f3a00a10 vmovl\.u32 q0, d0
+0[0-9a-f]+ <[^>]+> f3b20200 vmovn\.i8 d0, q0
+0[0-9a-f]+ <[^>]+> f3b60200 vmovn\.i16 d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0200 vmovn\.i32 d0, q0
+0[0-9a-f]+ <[^>]+> f3b20280 vqmovn\.s16 d0, q0
+0[0-9a-f]+ <[^>]+> f3b60280 vqmovn\.s32 d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0280 vqmovn\.s64 d0, q0
+0[0-9a-f]+ <[^>]+> f3b202c0 vqmovn\.u16 d0, q0
+0[0-9a-f]+ <[^>]+> f3b602c0 vqmovn\.u32 d0, q0
+0[0-9a-f]+ <[^>]+> f3ba02c0 vqmovn\.u64 d0, q0
+0[0-9a-f]+ <[^>]+> f3b20240 vqmovun\.s16 d0, q0
+0[0-9a-f]+ <[^>]+> f3b60240 vqmovun\.s32 d0, q0
+0[0-9a-f]+ <[^>]+> f3ba0240 vqmovun\.s64 d0, q0
+0[0-9a-f]+ <[^>]+> f3b201c2 vzip\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b201c2 vzip\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20181 vzip\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b601c2 vzip\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b601c2 vzip\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b60181 vzip\.16 d0, d1
+0[0-9a-f]+ <[^>]+> f3ba01c2 vzip\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba01c2 vzip\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081 vtrn\.32 d0, d1
+0[0-9a-f]+ <[^>]+> f3b20142 vuzp\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20142 vuzp\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20101 vuzp\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b60142 vuzp\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b60142 vuzp\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b60101 vuzp\.16 d0, d1
+0[0-9a-f]+ <[^>]+> f3ba0142 vuzp\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0142 vuzp\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081 vtrn\.32 d0, d1
+0[0-9a-f]+ <[^>]+> f3b00740 vqabs\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00740 vqabs\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00700 vqabs\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b40740 vqabs\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40740 vqabs\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40700 vqabs\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b80740 vqabs\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80740 vqabs\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80700 vqabs\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b007c0 vqneg\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b007c0 vqneg\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00780 vqneg\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b407c0 vqneg\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b407c0 vqneg\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40780 vqneg\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b807c0 vqneg\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b807c0 vqneg\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80780 vqneg\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b00640 vpadal\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00640 vpadal\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00600 vpadal\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b40640 vpadal\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40640 vpadal\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40600 vpadal\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b80640 vpadal\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80640 vpadal\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80600 vpadal\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b006c0 vpadal\.u8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b006c0 vpadal\.u8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00680 vpadal\.u8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b406c0 vpadal\.u16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b406c0 vpadal\.u16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40680 vpadal\.u16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b806c0 vpadal\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b806c0 vpadal\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80680 vpadal\.u32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b00240 vpaddl\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00240 vpaddl\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00200 vpaddl\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b40240 vpaddl\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40240 vpaddl\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40200 vpaddl\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b80240 vpaddl\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80240 vpaddl\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80200 vpaddl\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b002c0 vpaddl\.u8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b002c0 vpaddl\.u8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00280 vpaddl\.u8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b402c0 vpaddl\.u16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b402c0 vpaddl\.u16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40280 vpaddl\.u16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b802c0 vpaddl\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b802c0 vpaddl\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80280 vpaddl\.u32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0440 vrecpe\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0440 vrecpe\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0400 vrecpe\.u32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb0540 vrecpe\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0540 vrecpe\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0500 vrecpe\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb04c0 vrsqrte\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb04c0 vrsqrte\.u32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0480 vrsqrte\.u32 d0, d0
+0[0-9a-f]+ <[^>]+> f3bb05c0 vrsqrte\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb05c0 vrsqrte\.f32 q0, q0
+0[0-9a-f]+ <[^>]+> f3bb0580 vrsqrte\.f32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b00440 vcls\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00440 vcls\.s8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00400 vcls\.s8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b40440 vcls\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40440 vcls\.s16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40400 vcls\.s16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b80440 vcls\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80440 vcls\.s32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80400 vcls\.s32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b004c0 vclz\.i8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b004c0 vclz\.i8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00480 vclz\.i8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b404c0 vclz\.i16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b404c0 vclz\.i16 q0, q0
+0[0-9a-f]+ <[^>]+> f3b40480 vclz\.i16 d0, d0
+0[0-9a-f]+ <[^>]+> f3b804c0 vclz\.i32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b804c0 vclz\.i32 q0, q0
+0[0-9a-f]+ <[^>]+> f3b80480 vclz\.i32 d0, d0
+0[0-9a-f]+ <[^>]+> f3b00540 vcnt\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00540 vcnt\.8 q0, q0
+0[0-9a-f]+ <[^>]+> f3b00500 vcnt\.8 d0, d0
+0[0-9a-f]+ <[^>]+> f3b20042 vswp q0, q1
+0[0-9a-f]+ <[^>]+> f3b20042 vswp q0, q1
+0[0-9a-f]+ <[^>]+> f3b20001 vswp d0, d1
+0[0-9a-f]+ <[^>]+> f3b200c2 vtrn\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b200c2 vtrn\.8 q0, q1
+0[0-9a-f]+ <[^>]+> f3b20081 vtrn\.8 d0, d1
+0[0-9a-f]+ <[^>]+> f3b600c2 vtrn\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b600c2 vtrn\.16 q0, q1
+0[0-9a-f]+ <[^>]+> f3b60081 vtrn\.16 d0, d1
+0[0-9a-f]+ <[^>]+> f3ba00c2 vtrn\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba00c2 vtrn\.32 q0, q1
+0[0-9a-f]+ <[^>]+> f3ba0081 vtrn\.32 d0, d1
+0[0-9a-f]+ <[^>]+> f3b00800 vtbl\.8 d0, {d0}, d0
+0[0-9a-f]+ <[^>]+> f3b00840 vtbx\.8 d0, {d0}, d0
diff --git a/gas/testsuite/gas/arm/neon-cov.s b/gas/testsuite/gas/arm/neon-cov.s
new file mode 100644
index 0000000000..14bc618a4b
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-cov.s
@@ -0,0 +1,595 @@
+@ Neon tests. Basic bitfield tests, using zero for as many registers/fields as
+@ possible, but without causing instructions to be badly-formed.
+
+ .arm
+ .syntax unified
+ .text
+
+ .macro regs3_1 op opq vtype
+ \op\vtype q0,q0,q0
+ \opq\vtype q0,q0,q0
+ \op\vtype d0,d0,d0
+ .endm
+
+ .macro dregs3_1 op vtype
+ \op\vtype d0,d0,d0
+ .endm
+
+ .macro regn3_1 op operand2 vtype
+ \op\vtype d0,q0,\operand2
+ .endm
+
+ .macro regl3_1 op operand2 vtype
+ \op\vtype q0,d0,\operand2
+ .endm
+
+ .macro regw3_1 op operand2 vtype
+ \op\vtype q0,q0,\operand2
+ .endm
+
+ .macro regs2_1 op opq vtype
+ \op\vtype q0,q0
+ \opq\vtype q0,q0
+ \op\vtype d0,d0
+ .endm
+
+ .macro regs3_su_32 op opq
+ regs3_1 \op \opq .s8
+ regs3_1 \op \opq .s16
+ regs3_1 \op \opq .s32
+ regs3_1 \op \opq .u8
+ regs3_1 \op \opq .u16
+ regs3_1 \op \opq .u32
+ .endm
+
+ regs3_su_32 vaba vabaq
+ regs3_su_32 vhadd vhaddq
+ regs3_su_32 vrhadd vrhaddq
+ regs3_su_32 vhsub vhsubq
+
+ .macro regs3_su_64 op opq
+ regs3_1 \op \opq .s8
+ regs3_1 \op \opq .s16
+ regs3_1 \op \opq .s32
+ regs3_1 \op \opq .s64
+ regs3_1 \op \opq .u8
+ regs3_1 \op \opq .u16
+ regs3_1 \op \opq .u32
+ regs3_1 \op \opq .u64
+ .endm
+
+ regs3_su_64 vqadd vqaddq
+ regs3_su_64 vqsub vqsubq
+ regs3_su_64 vrshl vrshlq
+ regs3_su_64 vqrshl vqrshlq
+
+ regs3_su_64 vshl vshlq
+ regs3_su_64 vqshl vqshlq
+
+ .macro regs2i_1 op opq imm vtype
+ \op\vtype q0,q0,\imm
+ \opq\vtype q0,q0,\imm
+ \op\vtype d0,d0,\imm
+ .endm
+
+ .macro regs2i_su_64 op opq imm
+ regs2i_1 \op \opq \imm .s8
+ regs2i_1 \op \opq \imm .s16
+ regs2i_1 \op \opq \imm .s32
+ regs2i_1 \op \opq \imm .s64
+ regs2i_1 \op \opq \imm .u8
+ regs2i_1 \op \opq \imm .u16
+ regs2i_1 \op \opq \imm .u32
+ regs2i_1 \op \opq \imm .u64
+ .endm
+
+ .macro regs2i_i_64 op opq imm
+ regs2i_1 \op \opq \imm .i8
+ regs2i_1 \op \opq \imm .i16
+ regs2i_1 \op \opq \imm .i32
+ regs2i_1 \op \opq \imm .i64
+ .endm
+
+ regs2i_i_64 vshl vshlq 0
+ regs2i_su_64 vqshl vqshlq 0
+
+ .macro regs3_ntyp op opq
+ regs3_1 \op \opq .8
+ .endm
+
+ regs3_ntyp vand vandq
+ regs3_ntyp vbic vbicq
+ regs3_ntyp vorr vorrq
+ regs3_ntyp vorn vornq
+ regs3_ntyp veor veorq
+
+ .macro logic_imm_1 op opq imm vtype
+ \op\vtype q0,\imm
+ \opq\vtype q0,\imm
+ \op\vtype d0,\imm
+ .endm
+
+ .macro logic_imm op opq
+ logic_imm_1 \op \opq 0x000000ff .i32
+ logic_imm_1 \op \opq 0x0000ff00 .i32
+ logic_imm_1 \op \opq 0x00ff0000 .i32
+ logic_imm_1 \op \opq 0xff000000 .i32
+ logic_imm_1 \op \opq 0x00ff .i16
+ logic_imm_1 \op \opq 0xff00 .i16
+ .endm
+
+ logic_imm vbic vbicq
+ logic_imm vorr vorrq
+
+ .macro logic_inv_imm op opq
+ logic_imm_1 \op \opq 0xffffff00 .i32
+ logic_imm_1 \op \opq 0xffff00ff .i32
+ logic_imm_1 \op \opq 0xff00ffff .i32
+ logic_imm_1 \op \opq 0x00ffffff .i32
+ logic_imm_1 \op \opq 0xff00 .i16
+ logic_imm_1 \op \opq 0x00ff .i16
+ .endm
+
+ logic_inv_imm vand vandq
+ logic_inv_imm vorn vornq
+
+ regs3_ntyp vbsl vbslq
+ regs3_ntyp vbit vbitq
+ regs3_ntyp vbif vbifq
+
+ .macro regs3_suf_32 op opq
+ regs3_1 \op \opq .s8
+ regs3_1 \op \opq .s16
+ regs3_1 \op \opq .s32
+ regs3_1 \op \opq .u8
+ regs3_1 \op \opq .u16
+ regs3_1 \op \opq .u32
+ regs3_1 \op \opq .f32
+ .endm
+
+ .macro regs3_if_32 op opq
+ regs3_1 \op \opq .i8
+ regs3_1 \op \opq .i16
+ regs3_1 \op \opq .i32
+ regs3_1 \op \opq .f32
+ .endm
+
+ regs3_suf_32 vabd vabdq
+ regs3_suf_32 vmax vmaxq
+ regs3_suf_32 vmin vminq
+
+ regs3_suf_32 vcge vcgeq
+ regs3_suf_32 vcgt vcgtq
+ regs3_suf_32 vcle vcleq
+ regs3_suf_32 vclt vcltq
+
+ regs3_if_32 vceq vceqq
+
+ .macro regs2i_sf_0 op opq
+ regs2i_1 \op \opq 0 .s8
+ regs2i_1 \op \opq 0 .s16
+ regs2i_1 \op \opq 0 .s32
+ regs2i_1 \op \opq 0 .f32
+ .endm
+
+ regs2i_sf_0 vcge vcgeq
+ regs2i_sf_0 vcgt vcgtq
+ regs2i_sf_0 vcle vcleq
+ regs2i_sf_0 vclt vcltq
+
+ .macro regs2i_if_0 op opq
+ regs2i_1 \op \opq 0 .i8
+ regs2i_1 \op \opq 0 .i16
+ regs2i_1 \op \opq 0 .i32
+ regs2i_1 \op \opq 0 .f32
+ .endm
+
+ regs2i_if_0 vceq vceqq
+
+ .macro dregs3_suf_32 op
+ dregs3_1 \op .s8
+ dregs3_1 \op .s16
+ dregs3_1 \op .s32
+ dregs3_1 \op .u8
+ dregs3_1 \op .u16
+ dregs3_1 \op .u32
+ dregs3_1 \op .f32
+ .endm
+
+ dregs3_suf_32 vpmax
+ dregs3_suf_32 vpmin
+
+ .macro sregs3_1 op opq vtype
+ \op\vtype q0,q0,q0
+ \opq\vtype q0,q0,q0
+ \op\vtype d0,d0,d0
+ .endm
+
+ .macro sclr21_1 op opq vtype
+ \op\vtype q0,q0,d0[0]
+ \opq\vtype q0,q0,d0[0]
+ \op\vtype d0,d0,d0[0]
+ .endm
+
+ .macro mul_incl_scalar op opq
+ regs3_1 \op \opq .i8
+ regs3_1 \op \opq .i16
+ regs3_1 \op \opq .i32
+ regs3_1 \op \opq .f32
+ sclr21_1 \op \opq .i16
+ sclr21_1 \op \opq .i32
+ sclr21_1 \op \opq .f32
+ .endm
+
+ mul_incl_scalar vmla vmlaq
+ mul_incl_scalar vmls vmlsq
+
+ .macro dregs3_if_32 op
+ dregs3_1 \op .i8
+ dregs3_1 \op .i16
+ dregs3_1 \op .i32
+ dregs3_1 \op .f32
+ .endm
+
+ dregs3_if_32 vpadd
+
+ .macro regs3_if_64 op opq
+ regs3_1 \op \opq .i8
+ regs3_1 \op \opq .i16
+ regs3_1 \op \opq .i32
+ regs3_1 \op \opq .i64
+ regs3_1 \op \opq .f32
+ .endm
+
+ regs3_if_64 vadd vaddq
+ regs3_if_64 vsub vsubq
+
+ .macro regs3_sz_32 op opq
+ regs3_1 \op \opq .8
+ regs3_1 \op \opq .16
+ regs3_1 \op \opq .32
+ .endm
+
+ regs3_sz_32 vtst vtstq
+
+ .macro regs3_ifp_32 op opq
+ regs3_1 \op \opq .i8
+ regs3_1 \op \opq .i16
+ regs3_1 \op \opq .i32
+ regs3_1 \op \opq .f32
+ regs3_1 \op \opq .p8
+ .endm
+
+ regs3_ifp_32 vmul vmulq
+
+ .macro dqmulhs op opq
+ regs3_1 \op \opq .s16
+ regs3_1 \op \opq .s32
+ sclr21_1 \op \opq .s16
+ sclr21_1 \op \opq .s32
+ .endm
+
+ dqmulhs vqdmulh vqdmulhq
+ dqmulhs vqrdmulh vqrdmulhq
+
+ regs3_1 vacge vacgeq .f32
+ regs3_1 vacgt vacgtq .f32
+ regs3_1 vacle vacleq .f32
+ regs3_1 vaclt vacltq .f32
+ regs3_1 vrecps vrecpsq .f32
+ regs3_1 vrsqrts vrsqrtsq .f32
+
+ .macro regs2_sf_32 op opq
+ regs2_1 \op \opq .s8
+ regs2_1 \op \opq .s16
+ regs2_1 \op \opq .s32
+ regs2_1 \op \opq .f32
+ .endm
+
+ regs2_sf_32 vabs vabsq
+ regs2_sf_32 vneg vnegq
+
+ .macro rshift_imm op opq
+ regs2i_1 \op \opq 7 .s8
+ regs2i_1 \op \opq 15 .s16
+ regs2i_1 \op \opq 31 .s32
+ regs2i_1 \op \opq 63 .s64
+ regs2i_1 \op \opq 7 .u8
+ regs2i_1 \op \opq 15 .u16
+ regs2i_1 \op \opq 31 .u32
+ regs2i_1 \op \opq 63 .u64
+ .endm
+
+ rshift_imm vshr vshrq
+ rshift_imm vrshr vrshrq
+ rshift_imm vsra vsraq
+ rshift_imm vrsra vrsraq
+
+ regs2i_1 vsli vsliq 0 .8
+ regs2i_1 vsli vsliq 0 .16
+ regs2i_1 vsli vsliq 0 .32
+ regs2i_1 vsli vsliq 0 .64
+
+ regs2i_1 vsri vsriq 7 .8
+ regs2i_1 vsri vsriq 15 .16
+ regs2i_1 vsri vsriq 31 .32
+ regs2i_1 vsri vsriq 63 .64
+
+ regs2i_1 vqshlu vqshluq 0 .s8
+ regs2i_1 vqshlu vqshluq 0 .s16
+ regs2i_1 vqshlu vqshluq 0 .s32
+ regs2i_1 vqshlu vqshluq 0 .s64
+
+ .macro qrshift_imm op
+ regn3_1 \op 7 .s16
+ regn3_1 \op 15 .s32
+ regn3_1 \op 31 .s64
+ regn3_1 \op 7 .u16
+ regn3_1 \op 15 .u32
+ regn3_1 \op 31 .u64
+ .endm
+
+ .macro qrshiftu_imm op
+ regn3_1 \op 7 .s16
+ regn3_1 \op 15 .s32
+ regn3_1 \op 31 .s64
+ .endm
+
+ .macro qrshifti_imm op
+ regn3_1 \op 7 .i16
+ regn3_1 \op 15 .i32
+ regn3_1 \op 31 .i64
+ .endm
+
+ qrshift_imm vqshrn
+ qrshift_imm vqrshrn
+ qrshiftu_imm vqshrun
+ qrshiftu_imm vqrshrun
+
+ qrshifti_imm vshrn
+ qrshifti_imm vrshrn
+
+ regl3_1 vshll 1 .s8
+ regl3_1 vshll 1 .s16
+ regl3_1 vshll 1 .s32
+ regl3_1 vshll 1 .u8
+ regl3_1 vshll 1 .u16
+ regl3_1 vshll 1 .u32
+
+ regl3_1 vshll 8 .i8
+ regl3_1 vshll 16 .i16
+ regl3_1 vshll 32 .i32
+
+ .macro convert op opr arg="" t1=".s32.f32" t2=".u32.f32" t3=".f32.s32" t4=".f32.u32"
+ \op\t1 \opr,\opr\arg
+ \op\t2 \opr,\opr\arg
+ \op\t3 \opr,\opr\arg
+ \op\t4 \opr,\opr\arg
+ .endm
+
+ convert vcvt q0
+ convert vcvtq q0
+ convert vcvt d0
+ convert vcvt q0 ",1"
+ convert vcvtq q0 ",1"
+ convert vcvt d0 ",1"
+
+ vmov q0,q0
+ vmov d0,d0
+ vmov.8 d0[0],r0
+ vmov.16 d0[0],r0
+ vmov.32 d0[0],r0
+ vmov d0,r0,r0
+ vmov.s8 r0,d0[0]
+ vmov.s16 r0,d0[0]
+ vmov.u8 r0,d0[0]
+ vmov.u16 r0,d0[0]
+ vmov.32 r0,d0[0]
+ vmov r0,r1,d0
+
+ .macro mov_imm op imm vtype
+ \op\vtype q0,\imm
+ \op\vtype d0,\imm
+ .endm
+
+ mov_imm vmov 0x00000077 .i32
+ mov_imm vmvn 0x00000077 .i32
+ mov_imm vmov 0x00007700 .i32
+ mov_imm vmvn 0x00007700 .i32
+ mov_imm vmov 0x00770000 .i32
+ mov_imm vmvn 0x00770000 .i32
+ mov_imm vmov 0x77000000 .i32
+ mov_imm vmvn 0x77000000 .i32
+ mov_imm vmov 0x0077 .i16
+ mov_imm vmvn 0x0077 .i16
+ mov_imm vmov 0x7700 .i16
+ mov_imm vmvn 0x7700 .i16
+ mov_imm vmov 0x000077ff .i32
+ mov_imm vmvn 0x000077ff .i32
+ mov_imm vmov 0x0077ffff .i32
+ mov_imm vmvn 0x0077ffff .i32
+ mov_imm vmov 0x77 .i8
+ mov_imm vmov 0xff0000ff000000ff .i64
+ mov_imm vmov 0x40880000 .f32
+
+ vmvn q0,q0
+ vmvnq q0,q0
+ vmvn d0,d0
+
+ .macro long_ops op
+ regl3_1 \op d0 .s8
+ regl3_1 \op d0 .s16
+ regl3_1 \op d0 .s32
+ regl3_1 \op d0 .u8
+ regl3_1 \op d0 .u16
+ regl3_1 \op d0 .u32
+ .endm
+
+ long_ops vabal
+ long_ops vabdl
+ long_ops vaddl
+ long_ops vsubl
+
+ .macro long_mac op
+ regl3_1 \op d0 .s8
+ regl3_1 \op d0 .s16
+ regl3_1 \op d0 .s32
+ regl3_1 \op d0 .u8
+ regl3_1 \op d0 .u16
+ regl3_1 \op d0 .u32
+ regl3_1 \op "d0[0]" .s16
+ regl3_1 \op "d0[0]" .s32
+ regl3_1 \op "d0[0]" .u16
+ regl3_1 \op "d0[0]" .u32
+ .endm
+
+ long_mac vmlal
+ long_mac vmlsl
+
+ .macro wide_ops op
+ regw3_1 \op d0 .s8
+ regw3_1 \op d0 .s16
+ regw3_1 \op d0 .s32
+ regw3_1 \op d0 .u8
+ regw3_1 \op d0 .u16
+ regw3_1 \op d0 .u32
+ .endm
+
+ wide_ops vaddw
+ wide_ops vsubw
+
+ .macro narr_ops op
+ regn3_1 \op q0 .i16
+ regn3_1 \op q0 .i32
+ regn3_1 \op q0 .i64
+ .endm
+
+ narr_ops vaddhn
+ narr_ops vraddhn
+ narr_ops vsubhn
+ narr_ops vrsubhn
+
+ .macro long_dmac op
+ regl3_1 \op d0 .s16
+ regl3_1 \op d0 .s32
+ regl3_1 \op "d0[0]" .s16
+ regl3_1 \op "d0[0]" .s32
+ .endm
+
+ long_dmac vqdmlal
+ long_dmac vqdmlsl
+ long_dmac vqdmull
+
+ regl3_1 vmull d0 .s8
+ regl3_1 vmull d0 .s16
+ regl3_1 vmull d0 .s32
+ regl3_1 vmull d0 .u8
+ regl3_1 vmull d0 .u16
+ regl3_1 vmull d0 .u32
+ regl3_1 vmull d0 .p8
+ regl3_1 vmull "d0[0]" .s16
+ regl3_1 vmull "d0[0]" .s32
+ regl3_1 vmull "d0[0]" .u16
+ regl3_1 vmull "d0[0]" .u32
+
+ vext.8 q0,q0,q0,0
+ vextq.8 q0,q0,q0,0
+ vext.8 d0,d0,d0,0
+
+ .macro revs op opq vtype
+ \op\vtype q0,q0
+ \opq\vtype q0,q0
+ \op\vtype d0,d0
+ .endm
+
+ revs vrev64 vrev64q .8
+ revs vrev64 vrev64q .16
+ revs vrev64 vrev64q .32
+ revs vrev32 vrev32q .8
+ revs vrev32 vrev32q .16
+ revs vrev16 vrev16q .8
+
+ .macro dups op opq vtype
+ \op\vtype q0,r0
+ \opq\vtype q0,r0
+ \op\vtype d0,r0
+ \op\vtype q0,d0[0]
+ \opq\vtype q0,d0[0]
+ \op\vtype d0,d0[0]
+ .endm
+
+ dups vdup vdupq .8
+ dups vdup vdupq .16
+ dups vdup vdupq .32
+
+ .macro binop_3typ op op1 op2 t1 t2 t3
+ \op\t1 \op1,\op2
+ \op\t2 \op1,\op2
+ \op\t3 \op1,\op2
+ .endm
+
+ binop_3typ vmovl q0 d0 .s8 .s16 .s32
+ binop_3typ vmovl q0 d0 .u8 .u16 .u32
+ binop_3typ vmovn d0 q0 .i16 .i32 .i64
+ binop_3typ vqmovn d0 q0 .s16 .s32 .s64
+ binop_3typ vqmovn d0 q0 .u16 .u32 .u64
+ binop_3typ vqmovun d0 q0 .s16 .s32 .s64
+
+ .macro binops op opq vtype="" rhs="0"
+ \op\vtype q0,q\rhs
+ \opq\vtype q0,q\rhs
+ \op\vtype d0,d\rhs
+ .endm
+
+ .macro regs2_sz_32 op opq
+ binops \op \opq .8 1
+ binops \op \opq .16 1
+ binops \op \opq .32 1
+ .endm
+
+ regs2_sz_32 vzip vzipq
+ regs2_sz_32 vuzp vuzpq
+
+ .macro regs2_s_32 op opq
+ binops \op \opq .s8
+ binops \op \opq .s16
+ binops \op \opq .s32
+ .endm
+
+ regs2_s_32 vqabs vqabsq
+ regs2_s_32 vqneg vqnegq
+
+ .macro regs2_su_32 op opq
+ regs2_s_32 \op \opq
+ binops \op \opq .u8
+ binops \op \opq .u16
+ binops \op \opq .u32
+ .endm
+
+ regs2_su_32 vpadal vpadalq
+ regs2_su_32 vpaddl vpaddlq
+
+ binops vrecpe vrecpeq .u32
+ binops vrecpe vrecpeq .f32
+ binops vrsqrte vrsqrteq .u32
+ binops vrsqrte vrsqrteq .f32
+
+ regs2_s_32 vcls vclsq
+
+ .macro regs2_i_32 op opq
+ binops \op \opq .i8
+ binops \op \opq .i16
+ binops \op \opq .i32
+ .endm
+
+ regs2_i_32 vclz vclzq
+
+ binops vcnt vcntq .8
+
+ binops vswp vswpq "" 1
+
+ regs2_sz_32 vtrn vtrnq
+
+ vtbl.8 d0,{d0},d0
+ vtbx.8 d0,{d0},d0
+
diff --git a/gas/testsuite/gas/arm/neon-ldst-es.d b/gas/testsuite/gas/arm/neon-ldst-es.d
new file mode 100644
index 0000000000..c520ac9311
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-es.d
@@ -0,0 +1,57 @@
+# name: Neon element and structure loads and stores
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> f406282f vst2\.8 {d2-d3}, \[r6, :128\]
+0[0-9a-f]+ <[^>]+> f427140d vld3\.8 {d1-d3}, \[r7\]!
+0[0-9a-f]+ <[^>]+> f4091553 vst3\.16 {d1,d3,d5}, \[r9, :64\], r3
+0[0-9a-f]+ <[^>]+> f42a208f vld4\.32 {d2-d5}, \[sl\]
+0[0-9a-f]+ <[^>]+> f40a114f vst4\.16 {d1,d3,d5,d7}, \[sl\]
+0[0-9a-f]+ <[^>]+> f4aa1c6f vld1\.16 {d1\[\]-d2\[\]}, \[sl\]
+0[0-9a-f]+ <[^>]+> f4aa1c5f vld1\.16 {d1\[\]}, \[sl, :16\]
+0[0-9a-f]+ <[^>]+> f4aa1dbf vld2\.32 {d1\[\],d3\[\]}, \[sl, :64\]
+0[0-9a-f]+ <[^>]+> f4aa3e0c vld3\.8 {d3\[\]-d5\[\]}, \[sl\], ip
+0[0-9a-f]+ <[^>]+> f4a9af6d vld4\.16 {d10\[\],d12\[\],d14\[\],d16\[\]}, \[r9\]!
+0[0-9a-f]+ <[^>]+> f4a9af5f vld4\.16 {d10\[\]-d13\[\]}, \[r9, :64\]
+0[0-9a-f]+ <[^>]+> f4a9af9f vld4\.32 {d10\[\]-d13\[\]}, \[r9, :64\]
+0[0-9a-f]+ <[^>]+> f4a9afdf vld4\.32 {d10\[\]-d13\[\]}, \[r9, :128\]
+0[0-9a-f]+ <[^>]+> f4a530ed vld1\.8 {d3\[7\]}, \[r5\]!
+0[0-9a-f]+ <[^>]+> f48554df vst1\.16 {d5\[3\]}, \[r5, :16\]
+0[0-9a-f]+ <[^>]+> f4a535dd vld2\.16 {d3\[3\],d4\[3\]}, \[r5, :32\]!
+0[0-9a-f]+ <[^>]+> f4858a83 vst3\.32 {d8\[1\],d9\[1\],d10\[1\]}, \[r5\], r3
+0[0-9a-f]+ <[^>]+> f4a7804f vld1\.8 {d8\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7848f vld1\.16 {d8\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7849f vld1\.16 {d8\[2\]}, \[r7, :16\]
+0[0-9a-f]+ <[^>]+> f4a7888f vld1\.32 {d8\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a788bf vld1\.32 {d8\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7812f vld2\.8 {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7813f vld2\.8 {d8\[1\],d9\[1\]}, \[r7, :16\]
+0[0-9a-f]+ <[^>]+> f4a7854f vld2\.16 {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7855f vld2\.16 {d8\[1\],d9\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7856f vld2\.16 {d8\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7857f vld2\.16 {d8\[1\],d10\[1\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7898f vld2\.32 {d8\[1\],d9\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7899f vld2\.32 {d8\[1\],d9\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a789cf vld2\.32 {d8\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a789df vld2\.32 {d8\[1\],d10\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a7822f vld3\.8 {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7864f vld3\.16 {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7866f vld3\.16 {d8\[1\],d10\[1\],d12\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78a8f vld3\.32 {d8\[1\],d9\[1\],d10\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78acf vld3\.32 {d8\[1\],d10\[1\],d12\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7834f vld4\.8 {d8\[2\],d9\[2\],d10\[2\],d11\[2\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7835f vld4\.8 {d8\[2\],d9\[2\],d10\[2\],d11\[2\]}, \[r7, :32\]
+0[0-9a-f]+ <[^>]+> f4a7876f vld4\.16 {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a7875f vld4\.16 {d8\[1\],d9\[1\],d10\[1\],d11\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a78bcf vld4\.32 {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7\]
+0[0-9a-f]+ <[^>]+> f4a78bdf vld4\.32 {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7, :64\]
+0[0-9a-f]+ <[^>]+> f4a78bef vld4\.32 {d8\[1\],d10\[1\],d12\[1\],d14\[1\]}, \[r7, :128\]
+0[0-9a-f]+ <[^>]+> f3b43805 vtbl\.8 d3, {d4}, d5
+0[0-9a-f]+ <[^>]+> f3b23b05 vtbl\.8 d3, {d2-d5}, d5
+0[0-9a-f]+ <[^>]+> f3be3985 vtbl\.8 d3, {d30-d31}, d5
+0[0-9a-f]+ <[^>]+> f427288f vld2\.32 {d2-d3}, \[r7\]
+0[0-9a-f]+ <[^>]+> f427208f vld4\.32 {d2-d5}, \[r7\]
+0[0-9a-f]+ <[^>]+> f467c08f vld4\.32 {d28-d31}, \[r7\]
diff --git a/gas/testsuite/gas/arm/neon-ldst-es.s b/gas/testsuite/gas/arm/neon-ldst-es.s
new file mode 100644
index 0000000000..5a29a43793
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-es.s
@@ -0,0 +1,59 @@
+@ test element and structure loads and stores.
+
+ .text
+ .arm
+ .syntax unified
+
+ vst2.8 {d2,d3},[r6,:128]
+ vld3.8 {d1,d2,d3},[r7]!
+ vst3.16 {d1,d3,d5},[r9,:64],r3
+ vld4.32 {d2,d3,d4,d5},[r10]
+ vst4.16 {d1,d3,d5,d7},[r10]
+ vld1.16 {d1[],d2[]},[r10]
+ vld1.16 {d1[]},[r10,:16]
+ vld2.32 {d1[],d3[]},[r10,:64]
+ vld3.s8 {d3[],d4[],d5[]},[r10],r12
+ vld4.16 {d10[],d12[],d14[],d16[]},[r9]!
+ vld4.16 {d10[],d11[],d12[],d13[]},[r9,:64]
+ vld4.32 {d10[],d11[],d12[],d13[]},[r9,:64]
+ vld4.32 {d10[],d11[],d12[],d13[]},[r9,:128]
+ vld1.8 {d3[7]},[r5]!
+ vst1.16 {d5[3]},[r5,:16]
+ vld2.16 {d3[3],d4[3]},[r5,:32]!
+ vst3.32 {d8[1],d9[1],d10[1]},[r5],r3
+
+ vld1.8 {d8[2]},[r7]
+ vld1.16 {d8[2]},[r7]
+ vld1.16 {d8[2]},[r7,:16]
+ vld1.32 {d8[1]},[r7]
+ vld1.32 {d8[1]},[r7,:32]
+ vld2.8 {d8[1],d9[1]},[r7]
+ vld2.8 {d8[1],d9[1]},[r7,:16]
+ vld2.16 {d8[1],d9[1]},[r7]
+ vld2.16 {d8[1],d9[1]},[r7,:32]
+ vld2.16 {d8[1],d10[1]},[r7]
+ vld2.16 {d8[1],d10[1]},[r7,:32]
+ vld2.32 {d8[1],d9[1]},[r7]
+ vld2.32 {d8[1],d9[1]},[r7,:64]
+ vld2.32 {d8[1],d10[1]},[r7]
+ vld2.32 {d8[1],d10[1]},[r7,:64]
+ vld3.8 {d8[1],d9[1],d10[1]},[r7]
+ vld3.16 {d8[1],d9[1],d10[1]},[r7]
+ vld3.16 {d8[1],d10[1],d12[1]},[r7]
+ vld3.32 {d8[1],d9[1],d10[1]},[r7]
+ vld3.32 {d8[1],d10[1],d12[1]},[r7]
+ vld4.8 {d8[2],d9[2],d10[2],d11[2]},[r7]
+ vld4.8 {d8[2],d9[2],d10[2],d11[2]},[r7,:32]
+ vld4.16 {d8[1],d10[1],d12[1],d14[1]},[r7]
+ vld4.16 {d8[1],d9[1],d10[1],d11[1]},[r7,:64]
+ vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7]
+ vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7,:64]
+ vld4.32 {d8[1],d10[1],d12[1],d14[1]},[r7,:128]
+
+ vtbl.8 d3,{d4},d5
+ vtbl.8 d3,{q1-q2},d5
+ vtbl.8 d3,{q15},d5
+
+ vld2.32 {q1},[r7]
+ vld4.32 {q1-q2},[r7]
+ vld4.32 {q14-q15},[r7]
diff --git a/gas/testsuite/gas/arm/neon-ldst-rm.d b/gas/testsuite/gas/arm/neon-ldst-rm.d
new file mode 100644
index 0000000000..c538fc93d6
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-rm.d
@@ -0,0 +1,63 @@
+# name: Neon single and multiple register loads and stores
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> ec922b02 vldmia r2, {d2}
+0[0-9a-f]+ <[^>]+> ec922b04 vldmia r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec924b08 vldmia r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecd28b10 vldmia r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec923b20 vldmia r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec922b02 vldmia r2, {d2}
+0[0-9a-f]+ <[^>]+> ec922b04 vldmia r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec924b08 vldmia r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecd28b10 vldmia r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec923b20 vldmia r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ecb22b02 vldmia r2!, {d2}
+0[0-9a-f]+ <[^>]+> ecb22b04 vldmia r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ecb24b08 vldmia r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecf28b10 vldmia r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ecb23b20 vldmia r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ed322b02 vldmdb r2!, {d2}
+0[0-9a-f]+ <[^>]+> ed322b04 vldmdb r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ed324b08 vldmdb r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ed728b10 vldmdb r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ed323b20 vldmdb r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec822b02 vstmia r2, {d2}
+0[0-9a-f]+ <[^>]+> ec822b04 vstmia r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec824b08 vstmia r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecc28b10 vstmia r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec823b20 vstmia r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> ec822b02 vstmia r2, {d2}
+0[0-9a-f]+ <[^>]+> ec822b04 vstmia r2, {d2-d3}
+0[0-9a-f]+ <[^>]+> ec824b08 vstmia r2, {d4-d7}
+0[0-9a-f]+ <[^>]+> ecc28b10 vstmia r2, {d24-d31}
+0[0-9a-f]+ <[^>]+> ec823b20 vstmia r2, {d3-d18}
+0[0-9a-f]+ <[^>]+> eca22b02 vstmia r2!, {d2}
+0[0-9a-f]+ <[^>]+> eca22b04 vstmia r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> eca24b08 vstmia r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ece28b10 vstmia r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> eca23b20 vstmia r2!, {d3-d18}
+0[0-9a-f]+ <[^>]+> ed222b02 vstmdb r2!, {d2}
+0[0-9a-f]+ <[^>]+> ed222b04 vstmdb r2!, {d2-d3}
+0[0-9a-f]+ <[^>]+> ed224b08 vstmdb r2!, {d4-d7}
+0[0-9a-f]+ <[^>]+> ed628b10 vstmdb r2!, {d24-d31}
+0[0-9a-f]+ <[^>]+> ed223b20 vstmdb r2!, {d3-d18}
+0[0-9a-f]+ <backward> 000001f4 streqd r0, \[r0\], -r4
+0[0-9a-f]+ <[^>]+> eddf6b0b vldr d22, \[pc, #44\] ; 0[0-9a-f]+ <forward>
+0[0-9a-f]+ <[^>]+> ed935b00 vldr d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed135b01 vldr d5, \[r3, #-4\]
+0[0-9a-f]+ <[^>]+> ed935b01 vldr d5, \[r3, #4\]
+0[0-9a-f]+ <[^>]+> ed835b00 vstr d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed035b01 vstr d5, \[r3, #-4\]
+0[0-9a-f]+ <[^>]+> ed835b01 vstr d5, \[r3, #4\]
+0[0-9a-f]+ <[^>]+> ed935b00 vldr d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed135b40 vldr d5, \[r3, #-256\]
+0[0-9a-f]+ <[^>]+> ed935b40 vldr d5, \[r3, #256\]
+0[0-9a-f]+ <[^>]+> ed835b00 vstr d5, \[r3\]
+0[0-9a-f]+ <[^>]+> ed035b40 vstr d5, \[r3, #-256\]
+0[0-9a-f]+ <[^>]+> ed835b40 vstr d5, \[r3, #256\]
+0[0-9a-f]+ <forward> 000002bc streqh r0, \[r0\], -ip
+0[0-9a-f]+ <[^>]+> ed1f7b11 vldr d7, \[pc, #-68\] ; 0[0-9a-f]+ <backward>
diff --git a/gas/testsuite/gas/arm/neon-ldst-rm.s b/gas/testsuite/gas/arm/neon-ldst-rm.s
new file mode 100644
index 0000000000..f9421ac556
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-ldst-rm.s
@@ -0,0 +1,44 @@
+@ test register and multi-register loads and stores.
+
+ .text
+ .arm
+ .syntax unified
+
+ .macro multi op dir="" wb=""
+ \op\dir r2\wb,{d2}
+ \op\dir r2\wb,{d2-d3}
+ \op\dir r2\wb,{q2-q3}
+ \op\dir r2\wb,{q12-q14,q15}
+ \op\dir r2\wb,{d3,d4,d5-d8,d9,d10,d11,d12-d16,d17-d18}
+ .endm
+
+ multi vldm
+ multi vldm ia
+ multi vldm ia "!"
+ multi vldm db "!"
+
+ multi vstm
+ multi vstm ia
+ multi vstm ia "!"
+ multi vstm db "!"
+
+backward:
+ .word 500
+
+ .macro single op offset=""
+ \op d5,[r3]
+ \op d5,[r3,#-\offset]
+ \op d5,[r3,#\offset]
+ .endm
+
+ vldr d22, forward
+
+ single vldr 4
+ single vstr 4
+ single vldr 256
+ single vstr 256
+
+forward:
+ .word 700
+
+ vldr d7, backward
diff --git a/gas/testsuite/gas/arm/neon-omit.d b/gas/testsuite/gas/arm/neon-omit.d
new file mode 100644
index 0000000000..155fec9eab
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-omit.d
@@ -0,0 +1,51 @@
+# name: Neon optional register operands
+# as: -mfpu=neon
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section .text:
+0[0-9a-f]+ <[^>]+> f3022746 vabd\.u8 q1, q1, q3
+0[0-9a-f]+ <[^>]+> f26cc0c6 vhadd\.s32 q14, q14, q3
+0[0-9a-f]+ <[^>]+> f2222144 vrhadd\.s32 q1, q1, q2
+0[0-9a-f]+ <[^>]+> f22aa24e vhsub\.s32 q5, q5, q7
+0[0-9a-f]+ <[^>]+> f3166448 vshl\.u16 q3, q3, q4
+0[0-9a-f]+ <[^>]+> f32aa45c vqshl\.u32 q5, q5, q6
+0[0-9a-f]+ <[^>]+> f20ee170 vand q7, q7, q8
+0[0-9a-f]+ <[^>]+> f30ee170 veor q7, q7, q8
+0[0-9a-f]+ <[^>]+> f3b5a14a vceq\.i16 q5, q5, #0
+0[0-9a-f]+ <[^>]+> f31aa85a vceq\.i16 q5, q5, q5
+0[0-9a-f]+ <[^>]+> f3b5a24a vclt\.s16 q5, q5, #0
+0[0-9a-f]+ <[^>]+> f3b5a34c vabs\.s16 q5, q6
+0[0-9a-f]+ <[^>]+> f3b57388 vneg\.s16 d7, d8
+0[0-9a-f]+ <[^>]+> f3b97708 vabs\.f32 d7, d8
+0[0-9a-f]+ <[^>]+> f3f927e4 vneg\.f32 q9, q10
+0[0-9a-f]+ <[^>]+> f2211a03 vpmax\.s32 d1, d1, d3
+0[0-9a-f]+ <[^>]+> f2255a17 vpmin\.s32 d5, d5, d7
+0[0-9a-f]+ <[^>]+> f3011f03 vpmax\.f32 d1, d1, d3
+0[0-9a-f]+ <[^>]+> f3255f07 vpmin\.f32 d5, d5, d7
+0[0-9a-f]+ <[^>]+> f2122b46 vqdmulh\.s16 q1, q1, q3
+0[0-9a-f]+ <[^>]+> f3255b07 vqrdmulh\.s32 d5, d5, d7
+0[0-9a-f]+ <[^>]+> f3922c6d vqdmulh\.s16 q1, q1, d5\[3\]
+0[0-9a-f]+ <[^>]+> f2122056 vqadd\.s16 q1, q1, q3
+0[0-9a-f]+ <[^>]+> f2255017 vqadd\.s32 d5, d5, d7
+0[0-9a-f]+ <[^>]+> f2222944 vmla\.i32 q1, q1, q2
+0[0-9a-f]+ <[^>]+> f2133b14 vpadd\.i16 d3, d3, d4
+0[0-9a-f]+ <[^>]+> f3266948 vmls\.i32 q3, q3, q4
+0[0-9a-f]+ <[^>]+> f3022e54 vacge\.f32 q1, q1, q2
+0[0-9a-f]+ <[^>]+> f3266e58 vacgt\.f32 q3, q3, q4
+0[0-9a-f]+ <[^>]+> f30cae5a vacge\.f32 q5, q6, q5
+0[0-9a-f]+ <[^>]+> f320eede vacgt\.f32 q7, q8, q7
+0[0-9a-f]+ <[^>]+> f32ee370 vcge\.u32 q7, q7, q8
+0[0-9a-f]+ <[^>]+> f320e3de vcge\.u32 q7, q8, q7
+0[0-9a-f]+ <[^>]+> f3a22102 vaddw\.u32 q1, q1, d2
+0[0-9a-f]+ <[^>]+> f2a66304 vsubw\.s32 q3, q3, d4
+0[0-9a-f]+ <[^>]+> f2244856 vtst\.32 q2, q2, q3
+0[0-9a-f]+ <[^>]+> f2011f12 vrecps\.f32 d1, d1, d2
+0[0-9a-f]+ <[^>]+> f29c2052 vshr\.s16 q1, q1, #4
+0[0-9a-f]+ <[^>]+> f28b4254 vrshr\.s8 q2, q2, #5
+0[0-9a-f]+ <[^>]+> f39a6156 vsra\.u16 q3, q3, #6
+0[0-9a-f]+ <[^>]+> f39a8358 vrsra\.u16 q4, q4, #6
+0[0-9a-f]+ <[^>]+> f3954554 vsli\.16 q2, q2, #5
+0[0-9a-f]+ <[^>]+> f3bff69f vqshlu\.s64 d15, d15, #63
+0[0-9a-f]+ <[^>]+> f2b55306 vext\.8 d5, d5, d6, #3
diff --git a/gas/testsuite/gas/arm/neon-omit.s b/gas/testsuite/gas/arm/neon-omit.s
new file mode 100644
index 0000000000..7b20f1257b
--- /dev/null
+++ b/gas/testsuite/gas/arm/neon-omit.s
@@ -0,0 +1,50 @@
+@ test omitted optional arguments
+
+ .text
+ .arm
+ .syntax unified
+
+ vabd.u8 q1,q3
+ vhadd.s32 q14, q3
+ vrhadd.s32 q1,q2
+ vhsub.s32 q5,q7
+ vshl.u16 q3,q4
+ vqshl.u32 q5,q6
+ vand.64 q7,q8
+ veor.64 q7,q8
+ vceq.i16 q5,#0
+ vceq.i16 q5,q5
+ vclt.s16 q5,#0
+ vabs.s16 q5,q6
+ vneg.s16 d7,d8
+ vabs.f d7,d8
+ vneg.f q9,q10
+ vpmax.s32 d1,d3
+ vpmin.s32 d5,d7
+ vpmax.f32 d1,d3
+ vpmin.f32 d5,d7
+ vqdmulh.s16 q1,q3
+ vqrdmulh.s32 d5,d7
+ vqdmulh.s16 q1,d5[3]
+ vqadd.s16 q1,q3
+ vqadd.s32 d5,d7
+ vmla.i32 q1,q2
+ vpadd.i16 d3,d4
+ vmls.s32 q3,q4
+ vacge.f q1,q2
+ vacgt.f q3,q4
+ vaclt.f q5,q6
+ vacle.f q7,q8
+ vcge.u32 q7,q8
+ vclt.u32 q7,q8
+ vaddw.u32 q1,d2
+ vsubw.s32 q3,d4
+ vtst.i32 q2,q3
+ vrecps.f d1,d2
+ vshr.s16 q1,#4
+ vrshr.s8 q2,#5
+ vsra.u16 q3,#6
+ vrsra.u16 q4,#6
+ vsli.16 q2,#5
+ vqshlu.s64 d15,#63
+ vext.8 d5,d6,#3
diff --git a/gas/testsuite/gas/arm/vfp1.d b/gas/testsuite/gas/arm/vfp1.d
index 672b23de31..3894909539 100644
--- a/gas/testsuite/gas/arm/vfp1.d
+++ b/gas/testsuite/gas/arm/vfp1.d
@@ -24,20 +24,20 @@ Disassembly of section .text:
0+038 <[^>]*> ee100b40 fnmscd d0, d0, d0
0+03c <[^>]*> ee200b40 fnmuld d0, d0, d0
0+040 <[^>]*> ee300b40 fsubd d0, d0, d0
-0+044 <[^>]*> ed900b00 fldd d0, \[r0\]
-0+048 <[^>]*> ed800b00 fstd d0, \[r0\]
-0+04c <[^>]*> ec900b02 fldmiad r0, {d0}
-0+050 <[^>]*> ec900b02 fldmiad r0, {d0}
-0+054 <[^>]*> ecb00b02 fldmiad r0!, {d0}
-0+058 <[^>]*> ecb00b02 fldmiad r0!, {d0}
-0+05c <[^>]*> ed300b02 fldmdbd r0!, {d0}
-0+060 <[^>]*> ed300b02 fldmdbd r0!, {d0}
-0+064 <[^>]*> ec800b02 fstmiad r0, {d0}
-0+068 <[^>]*> ec800b02 fstmiad r0, {d0}
-0+06c <[^>]*> eca00b02 fstmiad r0!, {d0}
-0+070 <[^>]*> eca00b02 fstmiad r0!, {d0}
-0+074 <[^>]*> ed200b02 fstmdbd r0!, {d0}
-0+078 <[^>]*> ed200b02 fstmdbd r0!, {d0}
+0+044 <[^>]*> ed900b00 vldr d0, \[r0\]
+0+048 <[^>]*> ed800b00 vstr d0, \[r0\]
+0+04c <[^>]*> ec900b02 vldmia r0, {d0}
+0+050 <[^>]*> ec900b02 vldmia r0, {d0}
+0+054 <[^>]*> ecb00b02 vldmia r0!, {d0}
+0+058 <[^>]*> ecb00b02 vldmia r0!, {d0}
+0+05c <[^>]*> ed300b02 vldmdb r0!, {d0}
+0+060 <[^>]*> ed300b02 vldmdb r0!, {d0}
+0+064 <[^>]*> ec800b02 vstmia r0, {d0}
+0+068 <[^>]*> ec800b02 vstmia r0, {d0}
+0+06c <[^>]*> eca00b02 vstmia r0!, {d0}
+0+070 <[^>]*> eca00b02 vstmia r0!, {d0}
+0+074 <[^>]*> ed200b02 vstmdb r0!, {d0}
+0+078 <[^>]*> ed200b02 vstmdb r0!, {d0}
0+07c <[^>]*> eeb80bc0 fsitod d0, s0
0+080 <[^>]*> eeb80b40 fuitod d0, s0
0+084 <[^>]*> eebd0b40 ftosid s0, d0
@@ -46,10 +46,10 @@ Disassembly of section .text:
0+090 <[^>]*> eebc0bc0 ftouizd s0, d0
0+094 <[^>]*> eeb70ac0 fcvtds d0, s0
0+098 <[^>]*> eeb70bc0 fcvtsd s0, d0
-0+09c <[^>]*> ee300b10 fmrdh r0, d0
-0+0a0 <[^>]*> ee100b10 fmrdl r0, d0
-0+0a4 <[^>]*> ee200b10 fmdhr d0, r0
-0+0a8 <[^>]*> ee000b10 fmdlr d0, r0
+0+09c <[^>]*> ee300b10 vmov\.32 r0, d0\[1\]
+0+0a0 <[^>]*> ee100b10 vmov\.32 r0, d0\[0\]
+0+0a4 <[^>]*> ee200b10 vmov\.32 d0\[1\], r0
+0+0a8 <[^>]*> ee000b10 vmov\.32 d0\[0\], r0
0+0ac <[^>]*> eeb51b40 fcmpzd d1
0+0b0 <[^>]*> eeb52b40 fcmpzd d2
0+0b4 <[^>]*> eeb5fb40 fcmpzd d15
@@ -89,46 +89,46 @@ Disassembly of section .text:
0+13c <[^>]*> eeb70bc1 fcvtsd s0, d1
0+140 <[^>]*> eeb70bc2 fcvtsd s0, d2
0+144 <[^>]*> eeb70bcf fcvtsd s0, d15
-0+148 <[^>]*> ee301b10 fmrdh r1, d0
-0+14c <[^>]*> ee30eb10 fmrdh lr, d0
-0+150 <[^>]*> ee310b10 fmrdh r0, d1
-0+154 <[^>]*> ee320b10 fmrdh r0, d2
-0+158 <[^>]*> ee3f0b10 fmrdh r0, d15
-0+15c <[^>]*> ee101b10 fmrdl r1, d0
-0+160 <[^>]*> ee10eb10 fmrdl lr, d0
-0+164 <[^>]*> ee110b10 fmrdl r0, d1
-0+168 <[^>]*> ee120b10 fmrdl r0, d2
-0+16c <[^>]*> ee1f0b10 fmrdl r0, d15
-0+170 <[^>]*> ee201b10 fmdhr d0, r1
-0+174 <[^>]*> ee20eb10 fmdhr d0, lr
-0+178 <[^>]*> ee210b10 fmdhr d1, r0
-0+17c <[^>]*> ee220b10 fmdhr d2, r0
-0+180 <[^>]*> ee2f0b10 fmdhr d15, r0
-0+184 <[^>]*> ee001b10 fmdlr d0, r1
-0+188 <[^>]*> ee00eb10 fmdlr d0, lr
-0+18c <[^>]*> ee010b10 fmdlr d1, r0
-0+190 <[^>]*> ee020b10 fmdlr d2, r0
-0+194 <[^>]*> ee0f0b10 fmdlr d15, r0
-0+198 <[^>]*> ed910b00 fldd d0, \[r1\]
-0+19c <[^>]*> ed9e0b00 fldd d0, \[lr\]
-0+1a0 <[^>]*> ed900b00 fldd d0, \[r0\]
-0+1a4 <[^>]*> ed900bff fldd d0, \[r0, #1020\]
-0+1a8 <[^>]*> ed100bff fldd d0, \[r0, #-1020\]
-0+1ac <[^>]*> ed901b00 fldd d1, \[r0\]
-0+1b0 <[^>]*> ed902b00 fldd d2, \[r0\]
-0+1b4 <[^>]*> ed90fb00 fldd d15, \[r0\]
-0+1b8 <[^>]*> ed8ccbc9 fstd d12, \[ip, #804\]
-0+1bc <[^>]*> ec901b02 fldmiad r0, {d1}
-0+1c0 <[^>]*> ec902b02 fldmiad r0, {d2}
-0+1c4 <[^>]*> ec90fb02 fldmiad r0, {d15}
-0+1c8 <[^>]*> ec900b04 fldmiad r0, {d0-d1}
-0+1cc <[^>]*> ec900b06 fldmiad r0, {d0-d2}
-0+1d0 <[^>]*> ec900b20 fldmiad r0, {d0-d15}
-0+1d4 <[^>]*> ec901b1e fldmiad r0, {d1-d15}
-0+1d8 <[^>]*> ec902b1c fldmiad r0, {d2-d15}
-0+1dc <[^>]*> ec90eb04 fldmiad r0, {d14-d15}
-0+1e0 <[^>]*> ec910b02 fldmiad r1, {d0}
-0+1e4 <[^>]*> ec9e0b02 fldmiad lr, {d0}
+0+148 <[^>]*> ee301b10 vmov\.32 r1, d0\[1\]
+0+14c <[^>]*> ee30eb10 vmov\.32 lr, d0\[1\]
+0+150 <[^>]*> ee310b10 vmov\.32 r0, d1\[1\]
+0+154 <[^>]*> ee320b10 vmov\.32 r0, d2\[1\]
+0+158 <[^>]*> ee3f0b10 vmov\.32 r0, d15\[1\]
+0+15c <[^>]*> ee101b10 vmov\.32 r1, d0\[0\]
+0+160 <[^>]*> ee10eb10 vmov\.32 lr, d0\[0\]
+0+164 <[^>]*> ee110b10 vmov\.32 r0, d1\[0\]
+0+168 <[^>]*> ee120b10 vmov\.32 r0, d2\[0\]
+0+16c <[^>]*> ee1f0b10 vmov\.32 r0, d15\[0\]
+0+170 <[^>]*> ee201b10 vmov\.32 d0\[1\], r1
+0+174 <[^>]*> ee20eb10 vmov\.32 d0\[1\], lr
+0+178 <[^>]*> ee210b10 vmov\.32 d1\[1\], r0
+0+17c <[^>]*> ee220b10 vmov\.32 d2\[1\], r0
+0+180 <[^>]*> ee2f0b10 vmov\.32 d15\[1\], r0
+0+184 <[^>]*> ee001b10 vmov\.32 d0\[0\], r1
+0+188 <[^>]*> ee00eb10 vmov\.32 d0\[0\], lr
+0+18c <[^>]*> ee010b10 vmov\.32 d1\[0\], r0
+0+190 <[^>]*> ee020b10 vmov\.32 d2\[0\], r0
+0+194 <[^>]*> ee0f0b10 vmov\.32 d15\[0\], r0
+0+198 <[^>]*> ed910b00 vldr d0, \[r1\]
+0+19c <[^>]*> ed9e0b00 vldr d0, \[lr\]
+0+1a0 <[^>]*> ed900b00 vldr d0, \[r0\]
+0+1a4 <[^>]*> ed900bff vldr d0, \[r0, #1020\]
+0+1a8 <[^>]*> ed100bff vldr d0, \[r0, #-1020\]
+0+1ac <[^>]*> ed901b00 vldr d1, \[r0\]
+0+1b0 <[^>]*> ed902b00 vldr d2, \[r0\]
+0+1b4 <[^>]*> ed90fb00 vldr d15, \[r0\]
+0+1b8 <[^>]*> ed8ccbc9 vstr d12, \[ip, #804\]
+0+1bc <[^>]*> ec901b02 vldmia r0, {d1}
+0+1c0 <[^>]*> ec902b02 vldmia r0, {d2}
+0+1c4 <[^>]*> ec90fb02 vldmia r0, {d15}
+0+1c8 <[^>]*> ec900b04 vldmia r0, {d0-d1}
+0+1cc <[^>]*> ec900b06 vldmia r0, {d0-d2}
+0+1d0 <[^>]*> ec900b20 vldmia r0, {d0-d15}
+0+1d4 <[^>]*> ec901b1e vldmia r0, {d1-d15}
+0+1d8 <[^>]*> ec902b1c vldmia r0, {d2-d15}
+0+1dc <[^>]*> ec90eb04 vldmia r0, {d14-d15}
+0+1e0 <[^>]*> ec910b02 vldmia r1, {d0}
+0+1e4 <[^>]*> ec9e0b02 vldmia lr, {d0}
0+1e8 <[^>]*> eeb50b40 fcmpzd d0
0+1ec <[^>]*> eeb51b40 fcmpzd d1
0+1f0 <[^>]*> eeb52b40 fcmpzd d2
@@ -162,20 +162,20 @@ Disassembly of section .text:
0+260 <[^>]*> 0e167b4b fnmscdeq d7, d6, d11
0+264 <[^>]*> 0e245b4c fnmuldeq d5, d4, d12
0+268 <[^>]*> 0e3d3b4e fsubdeq d3, d13, d14
-0+26c <[^>]*> 0d952b00 flddeq d2, \[r5\]
-0+270 <[^>]*> 0d8c1b00 fstdeq d1, \[ip\]
-0+274 <[^>]*> 0c911b02 fldmiadeq r1, {d1}
-0+278 <[^>]*> 0c922b02 fldmiadeq r2, {d2}
-0+27c <[^>]*> 0cb33b02 fldmiadeq r3!, {d3}
-0+280 <[^>]*> 0cb44b02 fldmiadeq r4!, {d4}
-0+284 <[^>]*> 0d355b02 fldmdbdeq r5!, {d5}
-0+288 <[^>]*> 0d366b02 fldmdbdeq r6!, {d6}
-0+28c <[^>]*> 0c87fb02 fstmiadeq r7, {d15}
-0+290 <[^>]*> 0c88eb02 fstmiadeq r8, {d14}
-0+294 <[^>]*> 0ca9db02 fstmiadeq r9!, {d13}
-0+298 <[^>]*> 0caacb02 fstmiadeq sl!, {d12}
-0+29c <[^>]*> 0d2bbb02 fstmdbdeq fp!, {d11}
-0+2a0 <[^>]*> 0d2cab02 fstmdbdeq ip!, {d10}
+0+26c <[^>]*> 0d952b00 vldreq d2, \[r5\]
+0+270 <[^>]*> 0d8c1b00 vstreq d1, \[ip\]
+0+274 <[^>]*> 0c911b02 vldmiaeq r1, {d1}
+0+278 <[^>]*> 0c922b02 vldmiaeq r2, {d2}
+0+27c <[^>]*> 0cb33b02 vldmiaeq r3!, {d3}
+0+280 <[^>]*> 0cb44b02 vldmiaeq r4!, {d4}
+0+284 <[^>]*> 0d355b02 vldmdbeq r5!, {d5}
+0+288 <[^>]*> 0d366b02 vldmdbeq r6!, {d6}
+0+28c <[^>]*> 0c87fb02 vstmiaeq r7, {d15}
+0+290 <[^>]*> 0c88eb02 vstmiaeq r8, {d14}
+0+294 <[^>]*> 0ca9db02 vstmiaeq r9!, {d13}
+0+298 <[^>]*> 0caacb02 vstmiaeq sl!, {d12}
+0+29c <[^>]*> 0d2bbb02 vstmdbeq fp!, {d11}
+0+2a0 <[^>]*> 0d2cab02 vstmdbeq ip!, {d10}
0+2a4 <[^>]*> 0eb8fbe0 fsitodeq d15, s1
0+2a8 <[^>]*> 0eb81b6f fuitodeq d1, s31
0+2ac <[^>]*> 0efd0b4f ftosideq s1, d15
@@ -184,10 +184,10 @@ Disassembly of section .text:
0+2b8 <[^>]*> 0efc5bc3 ftouizdeq s11, d3
0+2bc <[^>]*> 0eb71ac5 fcvtdseq d1, s10
0+2c0 <[^>]*> 0ef75bc1 fcvtsdeq s11, d1
-0+2c4 <[^>]*> 0e318b10 fmrdheq r8, d1
-0+2c8 <[^>]*> 0e1f7b10 fmrdleq r7, d15
-0+2cc <[^>]*> 0e21fb10 fmdhreq d1, pc
-0+2d0 <[^>]*> 0e0f1b10 fmdlreq d15, r1
+0+2c4 <[^>]*> 0e318b10 vmoveq\.32 r8, d1\[1\]
+0+2c8 <[^>]*> 0e1f7b10 vmoveq\.32 r7, d15\[0\]
+0+2cc <[^>]*> 0e21fb10 vmoveq\.32 d1\[1\], pc
+0+2d0 <[^>]*> 0e0f1b10 vmoveq\.32 d15\[0\], r1
0+2d4 <[^>]*> e1a00000 ? nop[ ]+\(mov r0,r0\)
0+2d8 <[^>]*> e1a00000 ? nop[ ]+\(mov r0,r0\)
0+2dc <[^>]*> e1a00000 ? nop[ ]+\(mov r0,r0\)
diff --git a/gas/testsuite/gas/arm/vfp1_t2.d b/gas/testsuite/gas/arm/vfp1_t2.d
index 22c4fd6f01..df9ab9f960 100644
--- a/gas/testsuite/gas/arm/vfp1_t2.d
+++ b/gas/testsuite/gas/arm/vfp1_t2.d
@@ -24,20 +24,20 @@ Disassembly of section .text:
0+038 <[^>]*> ee10 0b40 fnmscd d0, d0, d0
0+03c <[^>]*> ee20 0b40 fnmuld d0, d0, d0
0+040 <[^>]*> ee30 0b40 fsubd d0, d0, d0
-0+044 <[^>]*> ed90 0b00 fldd d0, \[r0\]
-0+048 <[^>]*> ed80 0b00 fstd d0, \[r0\]
-0+04c <[^>]*> ec90 0b02 fldmiad r0, {d0}
-0+050 <[^>]*> ec90 0b02 fldmiad r0, {d0}
-0+054 <[^>]*> ecb0 0b02 fldmiad r0!, {d0}
-0+058 <[^>]*> ecb0 0b02 fldmiad r0!, {d0}
-0+05c <[^>]*> ed30 0b02 fldmdbd r0!, {d0}
-0+060 <[^>]*> ed30 0b02 fldmdbd r0!, {d0}
-0+064 <[^>]*> ec80 0b02 fstmiad r0, {d0}
-0+068 <[^>]*> ec80 0b02 fstmiad r0, {d0}
-0+06c <[^>]*> eca0 0b02 fstmiad r0!, {d0}
-0+070 <[^>]*> eca0 0b02 fstmiad r0!, {d0}
-0+074 <[^>]*> ed20 0b02 fstmdbd r0!, {d0}
-0+078 <[^>]*> ed20 0b02 fstmdbd r0!, {d0}
+0+044 <[^>]*> ed90 0b00 vldr d0, \[r0\]
+0+048 <[^>]*> ed80 0b00 vstr d0, \[r0\]
+0+04c <[^>]*> ec90 0b02 vldmia r0, {d0}
+0+050 <[^>]*> ec90 0b02 vldmia r0, {d0}
+0+054 <[^>]*> ecb0 0b02 vldmia r0!, {d0}
+0+058 <[^>]*> ecb0 0b02 vldmia r0!, {d0}
+0+05c <[^>]*> ed30 0b02 vldmdb r0!, {d0}
+0+060 <[^>]*> ed30 0b02 vldmdb r0!, {d0}
+0+064 <[^>]*> ec80 0b02 vstmia r0, {d0}
+0+068 <[^>]*> ec80 0b02 vstmia r0, {d0}
+0+06c <[^>]*> eca0 0b02 vstmia r0!, {d0}
+0+070 <[^>]*> eca0 0b02 vstmia r0!, {d0}
+0+074 <[^>]*> ed20 0b02 vstmdb r0!, {d0}
+0+078 <[^>]*> ed20 0b02 vstmdb r0!, {d0}
0+07c <[^>]*> eeb8 0bc0 fsitod d0, s0
0+080 <[^>]*> eeb8 0b40 fuitod d0, s0
0+084 <[^>]*> eebd 0b40 ftosid s0, d0
@@ -46,10 +46,10 @@ Disassembly of section .text:
0+090 <[^>]*> eebc 0bc0 ftouizd s0, d0
0+094 <[^>]*> eeb7 0ac0 fcvtds d0, s0
0+098 <[^>]*> eeb7 0bc0 fcvtsd s0, d0
-0+09c <[^>]*> ee30 0b10 fmrdh r0, d0
-0+0a0 <[^>]*> ee10 0b10 fmrdl r0, d0
-0+0a4 <[^>]*> ee20 0b10 fmdhr d0, r0
-0+0a8 <[^>]*> ee00 0b10 fmdlr d0, r0
+0+09c <[^>]*> ee30 0b10 vmov\.32 r0, d0\[1\]
+0+0a0 <[^>]*> ee10 0b10 vmov\.32 r0, d0\[0\]
+0+0a4 <[^>]*> ee20 0b10 vmov\.32 d0\[1\], r0
+0+0a8 <[^>]*> ee00 0b10 vmov\.32 d0\[0\], r0
0+0ac <[^>]*> eeb5 1b40 fcmpzd d1
0+0b0 <[^>]*> eeb5 2b40 fcmpzd d2
0+0b4 <[^>]*> eeb5 fb40 fcmpzd d15
@@ -89,46 +89,46 @@ Disassembly of section .text:
0+13c <[^>]*> eeb7 0bc1 fcvtsd s0, d1
0+140 <[^>]*> eeb7 0bc2 fcvtsd s0, d2
0+144 <[^>]*> eeb7 0bcf fcvtsd s0, d15
-0+148 <[^>]*> ee30 1b10 fmrdh r1, d0
-0+14c <[^>]*> ee30 eb10 fmrdh lr, d0
-0+150 <[^>]*> ee31 0b10 fmrdh r0, d1
-0+154 <[^>]*> ee32 0b10 fmrdh r0, d2
-0+158 <[^>]*> ee3f 0b10 fmrdh r0, d15
-0+15c <[^>]*> ee10 1b10 fmrdl r1, d0
-0+160 <[^>]*> ee10 eb10 fmrdl lr, d0
-0+164 <[^>]*> ee11 0b10 fmrdl r0, d1
-0+168 <[^>]*> ee12 0b10 fmrdl r0, d2
-0+16c <[^>]*> ee1f 0b10 fmrdl r0, d15
-0+170 <[^>]*> ee20 1b10 fmdhr d0, r1
-0+174 <[^>]*> ee20 eb10 fmdhr d0, lr
-0+178 <[^>]*> ee21 0b10 fmdhr d1, r0
-0+17c <[^>]*> ee22 0b10 fmdhr d2, r0
-0+180 <[^>]*> ee2f 0b10 fmdhr d15, r0
-0+184 <[^>]*> ee00 1b10 fmdlr d0, r1
-0+188 <[^>]*> ee00 eb10 fmdlr d0, lr
-0+18c <[^>]*> ee01 0b10 fmdlr d1, r0
-0+190 <[^>]*> ee02 0b10 fmdlr d2, r0
-0+194 <[^>]*> ee0f 0b10 fmdlr d15, r0
-0+198 <[^>]*> ed91 0b00 fldd d0, \[r1\]
-0+19c <[^>]*> ed9e 0b00 fldd d0, \[lr\]
-0+1a0 <[^>]*> ed90 0b00 fldd d0, \[r0\]
-0+1a4 <[^>]*> ed90 0bff fldd d0, \[r0, #1020\]
-0+1a8 <[^>]*> ed10 0bff fldd d0, \[r0, #-1020\]
-0+1ac <[^>]*> ed90 1b00 fldd d1, \[r0\]
-0+1b0 <[^>]*> ed90 2b00 fldd d2, \[r0\]
-0+1b4 <[^>]*> ed90 fb00 fldd d15, \[r0\]
-0+1b8 <[^>]*> ed8c cbc9 fstd d12, \[ip, #804\]
-0+1bc <[^>]*> ec90 1b02 fldmiad r0, {d1}
-0+1c0 <[^>]*> ec90 2b02 fldmiad r0, {d2}
-0+1c4 <[^>]*> ec90 fb02 fldmiad r0, {d15}
-0+1c8 <[^>]*> ec90 0b04 fldmiad r0, {d0-d1}
-0+1cc <[^>]*> ec90 0b06 fldmiad r0, {d0-d2}
-0+1d0 <[^>]*> ec90 0b20 fldmiad r0, {d0-d15}
-0+1d4 <[^>]*> ec90 1b1e fldmiad r0, {d1-d15}
-0+1d8 <[^>]*> ec90 2b1c fldmiad r0, {d2-d15}
-0+1dc <[^>]*> ec90 eb04 fldmiad r0, {d14-d15}
-0+1e0 <[^>]*> ec91 0b02 fldmiad r1, {d0}
-0+1e4 <[^>]*> ec9e 0b02 fldmiad lr, {d0}
+0+148 <[^>]*> ee30 1b10 vmov\.32 r1, d0\[1\]
+0+14c <[^>]*> ee30 eb10 vmov\.32 lr, d0\[1\]
+0+150 <[^>]*> ee31 0b10 vmov\.32 r0, d1\[1\]
+0+154 <[^>]*> ee32 0b10 vmov\.32 r0, d2\[1\]
+0+158 <[^>]*> ee3f 0b10 vmov\.32 r0, d15\[1\]
+0+15c <[^>]*> ee10 1b10 vmov\.32 r1, d0\[0\]
+0+160 <[^>]*> ee10 eb10 vmov\.32 lr, d0\[0\]
+0+164 <[^>]*> ee11 0b10 vmov\.32 r0, d1\[0\]
+0+168 <[^>]*> ee12 0b10 vmov\.32 r0, d2\[0\]
+0+16c <[^>]*> ee1f 0b10 vmov\.32 r0, d15\[0\]
+0+170 <[^>]*> ee20 1b10 vmov\.32 d0\[1\], r1
+0+174 <[^>]*> ee20 eb10 vmov\.32 d0\[1\], lr
+0+178 <[^>]*> ee21 0b10 vmov\.32 d1\[1\], r0
+0+17c <[^>]*> ee22 0b10 vmov\.32 d2\[1\], r0
+0+180 <[^>]*> ee2f 0b10 vmov\.32 d15\[1\], r0
+0+184 <[^>]*> ee00 1b10 vmov\.32 d0\[0\], r1
+0+188 <[^>]*> ee00 eb10 vmov\.32 d0\[0\], lr
+0+18c <[^>]*> ee01 0b10 vmov\.32 d1\[0\], r0
+0+190 <[^>]*> ee02 0b10 vmov\.32 d2\[0\], r0
+0+194 <[^>]*> ee0f 0b10 vmov\.32 d15\[0\], r0
+0+198 <[^>]*> ed91 0b00 vldr d0, \[r1\]
+0+19c <[^>]*> ed9e 0b00 vldr d0, \[lr\]
+0+1a0 <[^>]*> ed90 0b00 vldr d0, \[r0\]
+0+1a4 <[^>]*> ed90 0bff vldr d0, \[r0, #1020\]
+0+1a8 <[^>]*> ed10 0bff vldr d0, \[r0, #-1020\]
+0+1ac <[^>]*> ed90 1b00 vldr d1, \[r0\]
+0+1b0 <[^>]*> ed90 2b00 vldr d2, \[r0\]
+0+1b4 <[^>]*> ed90 fb00 vldr d15, \[r0\]
+0+1b8 <[^>]*> ed8c cbc9 vstr d12, \[ip, #804\]
+0+1bc <[^>]*> ec90 1b02 vldmia r0, {d1}
+0+1c0 <[^>]*> ec90 2b02 vldmia r0, {d2}
+0+1c4 <[^>]*> ec90 fb02 vldmia r0, {d15}
+0+1c8 <[^>]*> ec90 0b04 vldmia r0, {d0-d1}
+0+1cc <[^>]*> ec90 0b06 vldmia r0, {d0-d2}
+0+1d0 <[^>]*> ec90 0b20 vldmia r0, {d0-d15}
+0+1d4 <[^>]*> ec90 1b1e vldmia r0, {d1-d15}
+0+1d8 <[^>]*> ec90 2b1c vldmia r0, {d2-d15}
+0+1dc <[^>]*> ec90 eb04 vldmia r0, {d14-d15}
+0+1e0 <[^>]*> ec91 0b02 vldmia r1, {d0}
+0+1e4 <[^>]*> ec9e 0b02 vldmia lr, {d0}
0+1e8 <[^>]*> eeb5 0b40 fcmpzd d0
0+1ec <[^>]*> eeb5 1b40 fcmpzd d1
0+1f0 <[^>]*> eeb5 2b40 fcmpzd d2
@@ -168,23 +168,23 @@ Disassembly of section .text:
0+26c <[^>]*> ee24 5b4c fnmuld(eq|) d5, d4, d12
0+270 <[^>]*> bf02 ittt eq
0+272 <[^>]*> ee3d 3b4e fsubd(eq|) d3, d13, d14
-0+276 <[^>]*> ed95 2b00 fldd(eq|) d2, \[r5\]
-0+27a <[^>]*> ed8c 1b00 fstd(eq|) d1, \[ip\]
+0+276 <[^>]*> ed95 2b00 vldr(eq|) d2, \[r5\]
+0+27a <[^>]*> ed8c 1b00 vstr(eq|) d1, \[ip\]
0+27e <[^>]*> bf01 itttt eq
-0+280 <[^>]*> ec91 1b02 fldmiad(eq|) r1, {d1}
-0+284 <[^>]*> ec92 2b02 fldmiad(eq|) r2, {d2}
-0+288 <[^>]*> ecb3 3b02 fldmiad(eq|) r3!, {d3}
-0+28c <[^>]*> ecb4 4b02 fldmiad(eq|) r4!, {d4}
+0+280 <[^>]*> ec91 1b02 vldmia(eq|) r1, {d1}
+0+284 <[^>]*> ec92 2b02 vldmia(eq|) r2, {d2}
+0+288 <[^>]*> ecb3 3b02 vldmia(eq|) r3!, {d3}
+0+28c <[^>]*> ecb4 4b02 vldmia(eq|) r4!, {d4}
0+290 <[^>]*> bf01 itttt eq
-0+292 <[^>]*> ed35 5b02 fldmdbd(eq|) r5!, {d5}
-0+296 <[^>]*> ed36 6b02 fldmdbd(eq|) r6!, {d6}
-0+29a <[^>]*> ec87 fb02 fstmiad(eq|) r7, {d15}
-0+29e <[^>]*> ec88 eb02 fstmiad(eq|) r8, {d14}
+0+292 <[^>]*> ed35 5b02 vldmdb(eq|) r5!, {d5}
+0+296 <[^>]*> ed36 6b02 vldmdb(eq|) r6!, {d6}
+0+29a <[^>]*> ec87 fb02 vstmia(eq|) r7, {d15}
+0+29e <[^>]*> ec88 eb02 vstmia(eq|) r8, {d14}
0+2a2 <[^>]*> bf01 itttt eq
-0+2a4 <[^>]*> eca9 db02 fstmiad(eq|) r9!, {d13}
-0+2a8 <[^>]*> ecaa cb02 fstmiad(eq|) sl!, {d12}
-0+2ac <[^>]*> ed2b bb02 fstmdbd(eq|) fp!, {d11}
-0+2b0 <[^>]*> ed2c ab02 fstmdbd(eq|) ip!, {d10}
+0+2a4 <[^>]*> eca9 db02 vstmia(eq|) r9!, {d13}
+0+2a8 <[^>]*> ecaa cb02 vstmia(eq|) sl!, {d12}
+0+2ac <[^>]*> ed2b bb02 vstmdb(eq|) fp!, {d11}
+0+2b0 <[^>]*> ed2c ab02 vstmdb(eq|) ip!, {d10}
0+2b4 <[^>]*> bf01 itttt eq
0+2b6 <[^>]*> eeb8 fbe0 fsitod(eq|) d15, s1
0+2ba <[^>]*> eeb8 1b6f fuitod(eq|) d1, s31
@@ -196,10 +196,10 @@ Disassembly of section .text:
0+2d0 <[^>]*> eeb7 1ac5 fcvtds(eq|) d1, s10
0+2d4 <[^>]*> eef7 5bc1 fcvtsd(eq|) s11, d1
0+2d8 <[^>]*> bf01 itttt eq
-0+2da <[^>]*> ee31 8b10 fmrdh(eq|) r8, d1
-0+2de <[^>]*> ee1f 7b10 fmrdl(eq|) r7, d15
-0+2e2 <[^>]*> ee21 fb10 fmdhr(eq|) d1, pc
-0+2e6 <[^>]*> ee0f 1b10 fmdlr(eq|) d15, r1
+0+2da <[^>]*> ee31 8b10 vmov(eq|)\.32 r8, d1\[1\]
+0+2de <[^>]*> ee1f 7b10 vmov(eq|)\.32 r7, d15\[0\]
+0+2e2 <[^>]*> ee21 fb10 vmov(eq|)\.32 d1\[1\], pc
+0+2e6 <[^>]*> ee0f 1b10 vmov(eq|)\.32 d15\[0\], r1
0+2ea <[^>]*> bf00 nop
0+2ec <[^>]*> bf00 nop
0+2ee <[^>]*> bf00 nop
diff --git a/gas/testsuite/gas/arm/vfp1xD.d b/gas/testsuite/gas/arm/vfp1xD.d
index 096b46c86e..1dab07cea6 100644
--- a/gas/testsuite/gas/arm/vfp1xD.d
+++ b/gas/testsuite/gas/arm/vfp1xD.d
@@ -33,24 +33,24 @@ Disassembly of section .text:
0+05c <[^>]*> ecb00a01 fldmias r0!, {s0}
0+060 <[^>]*> ed300a01 fldmdbs r0!, {s0}
0+064 <[^>]*> ed300a01 fldmdbs r0!, {s0}
-0+068 <[^>]*> ec900b03 fldmiax r0, {d0}
-0+06c <[^>]*> ec900b03 fldmiax r0, {d0}
-0+070 <[^>]*> ecb00b03 fldmiax r0!, {d0}
-0+074 <[^>]*> ecb00b03 fldmiax r0!, {d0}
-0+078 <[^>]*> ed300b03 fldmdbx r0!, {d0}
-0+07c <[^>]*> ed300b03 fldmdbx r0!, {d0}
+0+068 <[^>]*> ec900b03 vldmia r0, {d0}
+0+06c <[^>]*> ec900b03 vldmia r0, {d0}
+0+070 <[^>]*> ecb00b03 vldmia r0!, {d0}
+0+074 <[^>]*> ecb00b03 vldmia r0!, {d0}
+0+078 <[^>]*> ed300b03 vldmdb r0!, {d0}
+0+07c <[^>]*> ed300b03 vldmdb r0!, {d0}
0+080 <[^>]*> ec800a01 fstmias r0, {s0}
0+084 <[^>]*> ec800a01 fstmias r0, {s0}
0+088 <[^>]*> eca00a01 fstmias r0!, {s0}
0+08c <[^>]*> eca00a01 fstmias r0!, {s0}
0+090 <[^>]*> ed200a01 fstmdbs r0!, {s0}
0+094 <[^>]*> ed200a01 fstmdbs r0!, {s0}
-0+098 <[^>]*> ec800b03 fstmiax r0, {d0}
-0+09c <[^>]*> ec800b03 fstmiax r0, {d0}
-0+0a0 <[^>]*> eca00b03 fstmiax r0!, {d0}
-0+0a4 <[^>]*> eca00b03 fstmiax r0!, {d0}
-0+0a8 <[^>]*> ed200b03 fstmdbx r0!, {d0}
-0+0ac <[^>]*> ed200b03 fstmdbx r0!, {d0}
+0+098 <[^>]*> ec800b03 vstmia r0, {d0}
+0+09c <[^>]*> ec800b03 vstmia r0, {d0}
+0+0a0 <[^>]*> eca00b03 vstmia r0!, {d0}
+0+0a4 <[^>]*> eca00b03 vstmia r0!, {d0}
+0+0a8 <[^>]*> ed200b03 vstmdb r0!, {d0}
+0+0ac <[^>]*> ed200b03 vstmdb r0!, {d0}
0+0b0 <[^>]*> eeb80ac0 fsitos s0, s0
0+0b4 <[^>]*> eeb80a40 fuitos s0, s0
0+0b8 <[^>]*> eebd0a40 ftosis s0, s0
@@ -142,17 +142,17 @@ Disassembly of section .text:
0+210 <[^>]*> ec90fa02 fldmias r0, {s30-s31}
0+214 <[^>]*> ec910a01 fldmias r1, {s0}
0+218 <[^>]*> ec9e0a01 fldmias lr, {s0}
-0+21c <[^>]*> ec801b03 fstmiax r0, {d1}
-0+220 <[^>]*> ec802b03 fstmiax r0, {d2}
-0+224 <[^>]*> ec80fb03 fstmiax r0, {d15}
-0+228 <[^>]*> ec800b05 fstmiax r0, {d0-d1}
-0+22c <[^>]*> ec800b07 fstmiax r0, {d0-d2}
-0+230 <[^>]*> ec800b21 fstmiax r0, {d0-d15}
-0+234 <[^>]*> ec801b1f fstmiax r0, {d1-d15}
-0+238 <[^>]*> ec802b1d fstmiax r0, {d2-d15}
-0+23c <[^>]*> ec80eb05 fstmiax r0, {d14-d15}
-0+240 <[^>]*> ec810b03 fstmiax r1, {d0}
-0+244 <[^>]*> ec8e0b03 fstmiax lr, {d0}
+0+21c <[^>]*> ec801b03 vstmia r0, {d1}
+0+220 <[^>]*> ec802b03 vstmia r0, {d2}
+0+224 <[^>]*> ec80fb03 vstmia r0, {d15}
+0+228 <[^>]*> ec800b05 vstmia r0, {d0-d1}
+0+22c <[^>]*> ec800b07 vstmia r0, {d0-d2}
+0+230 <[^>]*> ec800b21 vstmia r0, {d0-d15}
+0+234 <[^>]*> ec801b1f vstmia r0, {d1-d15}
+0+238 <[^>]*> ec802b1d vstmia r0, {d2-d15}
+0+23c <[^>]*> ec80eb05 vstmia r0, {d14-d15}
+0+240 <[^>]*> ec810b03 vstmia r1, {d0}
+0+244 <[^>]*> ec8e0b03 vstmia lr, {d0}
0+248 <[^>]*> eeb50a40 fcmpzs s0
0+24c <[^>]*> eef50a40 fcmpzs s1
0+250 <[^>]*> eeb51a40 fcmpzs s2
@@ -211,24 +211,24 @@ Disassembly of section .text:
0+324 <[^>]*> 0cf42a01 fldmiaseq r4!, {s5}
0+328 <[^>]*> 0d352a01 fldmdbseq r5!, {s4}
0+32c <[^>]*> 0d761a01 fldmdbseq r6!, {s3}
-0+330 <[^>]*> 0c971b03 fldmiaxeq r7, {d1}
-0+334 <[^>]*> 0c982b03 fldmiaxeq r8, {d2}
-0+338 <[^>]*> 0cb93b03 fldmiaxeq r9!, {d3}
-0+33c <[^>]*> 0cba4b03 fldmiaxeq sl!, {d4}
-0+340 <[^>]*> 0d3b5b03 fldmdbxeq fp!, {d5}
-0+344 <[^>]*> 0d3c6b03 fldmdbxeq ip!, {d6}
+0+330 <[^>]*> 0c971b03 vldmiaeq r7, {d1}
+0+334 <[^>]*> 0c982b03 vldmiaeq r8, {d2}
+0+338 <[^>]*> 0cb93b03 vldmiaeq r9!, {d3}
+0+33c <[^>]*> 0cba4b03 vldmiaeq sl!, {d4}
+0+340 <[^>]*> 0d3b5b03 vldmdbeq fp!, {d5}
+0+344 <[^>]*> 0d3c6b03 vldmdbeq ip!, {d6}
0+348 <[^>]*> 0c8d1a01 fstmiaseq sp, {s2}
0+34c <[^>]*> 0cce0a01 fstmiaseq lr, {s1}
0+350 <[^>]*> 0ce1fa01 fstmiaseq r1!, {s31}
0+354 <[^>]*> 0ca2fa01 fstmiaseq r2!, {s30}
0+358 <[^>]*> 0d63ea01 fstmdbseq r3!, {s29}
0+35c <[^>]*> 0d24ea01 fstmdbseq r4!, {s28}
-0+360 <[^>]*> 0c857b03 fstmiaxeq r5, {d7}
-0+364 <[^>]*> 0c868b03 fstmiaxeq r6, {d8}
-0+368 <[^>]*> 0ca79b03 fstmiaxeq r7!, {d9}
-0+36c <[^>]*> 0ca8ab03 fstmiaxeq r8!, {d10}
-0+370 <[^>]*> 0d29bb03 fstmdbxeq r9!, {d11}
-0+374 <[^>]*> 0d2acb03 fstmdbxeq sl!, {d12}
+0+360 <[^>]*> 0c857b03 vstmiaeq r5, {d7}
+0+364 <[^>]*> 0c868b03 vstmiaeq r6, {d8}
+0+368 <[^>]*> 0ca79b03 vstmiaeq r7!, {d9}
+0+36c <[^>]*> 0ca8ab03 vstmiaeq r8!, {d10}
+0+370 <[^>]*> 0d29bb03 vstmdbeq r9!, {d11}
+0+374 <[^>]*> 0d2acb03 vstmdbeq sl!, {d12}
0+378 <[^>]*> 0ef8dac3 fsitoseq s27, s6
0+37c <[^>]*> 0efdca62 ftosiseq s25, s5
0+380 <[^>]*> 0efdbac2 ftosizseq s23, s4
diff --git a/gas/testsuite/gas/arm/vfp1xD_t2.d b/gas/testsuite/gas/arm/vfp1xD_t2.d
index 327383d01c..65d6115c96 100644
--- a/gas/testsuite/gas/arm/vfp1xD_t2.d
+++ b/gas/testsuite/gas/arm/vfp1xD_t2.d
@@ -33,24 +33,24 @@ Disassembly of section .text:
0+05c <[^>]*> ecb0 0a01 fldmias r0!, {s0}
0+060 <[^>]*> ed30 0a01 fldmdbs r0!, {s0}
0+064 <[^>]*> ed30 0a01 fldmdbs r0!, {s0}
-0+068 <[^>]*> ec90 0b03 fldmiax r0, {d0}
-0+06c <[^>]*> ec90 0b03 fldmiax r0, {d0}
-0+070 <[^>]*> ecb0 0b03 fldmiax r0!, {d0}
-0+074 <[^>]*> ecb0 0b03 fldmiax r0!, {d0}
-0+078 <[^>]*> ed30 0b03 fldmdbx r0!, {d0}
-0+07c <[^>]*> ed30 0b03 fldmdbx r0!, {d0}
+0+068 <[^>]*> ec90 0b03 vldmia r0, {d0}
+0+06c <[^>]*> ec90 0b03 vldmia r0, {d0}
+0+070 <[^>]*> ecb0 0b03 vldmia r0!, {d0}
+0+074 <[^>]*> ecb0 0b03 vldmia r0!, {d0}
+0+078 <[^>]*> ed30 0b03 vldmdb r0!, {d0}
+0+07c <[^>]*> ed30 0b03 vldmdb r0!, {d0}
0+080 <[^>]*> ec80 0a01 fstmias r0, {s0}
0+084 <[^>]*> ec80 0a01 fstmias r0, {s0}
0+088 <[^>]*> eca0 0a01 fstmias r0!, {s0}
0+08c <[^>]*> eca0 0a01 fstmias r0!, {s0}
0+090 <[^>]*> ed20 0a01 fstmdbs r0!, {s0}
0+094 <[^>]*> ed20 0a01 fstmdbs r0!, {s0}
-0+098 <[^>]*> ec80 0b03 fstmiax r0, {d0}
-0+09c <[^>]*> ec80 0b03 fstmiax r0, {d0}
-0+0a0 <[^>]*> eca0 0b03 fstmiax r0!, {d0}
-0+0a4 <[^>]*> eca0 0b03 fstmiax r0!, {d0}
-0+0a8 <[^>]*> ed20 0b03 fstmdbx r0!, {d0}
-0+0ac <[^>]*> ed20 0b03 fstmdbx r0!, {d0}
+0+098 <[^>]*> ec80 0b03 vstmia r0, {d0}
+0+09c <[^>]*> ec80 0b03 vstmia r0, {d0}
+0+0a0 <[^>]*> eca0 0b03 vstmia r0!, {d0}
+0+0a4 <[^>]*> eca0 0b03 vstmia r0!, {d0}
+0+0a8 <[^>]*> ed20 0b03 vstmdb r0!, {d0}
+0+0ac <[^>]*> ed20 0b03 vstmdb r0!, {d0}
0+0b0 <[^>]*> eeb8 0ac0 fsitos s0, s0
0+0b4 <[^>]*> eeb8 0a40 fuitos s0, s0
0+0b8 <[^>]*> eebd 0a40 ftosis s0, s0
@@ -142,17 +142,17 @@ Disassembly of section .text:
0+210 <[^>]*> ec90 fa02 fldmias r0, {s30-s31}
0+214 <[^>]*> ec91 0a01 fldmias r1, {s0}
0+218 <[^>]*> ec9e 0a01 fldmias lr, {s0}
-0+21c <[^>]*> ec80 1b03 fstmiax r0, {d1}
-0+220 <[^>]*> ec80 2b03 fstmiax r0, {d2}
-0+224 <[^>]*> ec80 fb03 fstmiax r0, {d15}
-0+228 <[^>]*> ec80 0b05 fstmiax r0, {d0-d1}
-0+22c <[^>]*> ec80 0b07 fstmiax r0, {d0-d2}
-0+230 <[^>]*> ec80 0b21 fstmiax r0, {d0-d15}
-0+234 <[^>]*> ec80 1b1f fstmiax r0, {d1-d15}
-0+238 <[^>]*> ec80 2b1d fstmiax r0, {d2-d15}
-0+23c <[^>]*> ec80 eb05 fstmiax r0, {d14-d15}
-0+240 <[^>]*> ec81 0b03 fstmiax r1, {d0}
-0+244 <[^>]*> ec8e 0b03 fstmiax lr, {d0}
+0+21c <[^>]*> ec80 1b03 vstmia r0, {d1}
+0+220 <[^>]*> ec80 2b03 vstmia r0, {d2}
+0+224 <[^>]*> ec80 fb03 vstmia r0, {d15}
+0+228 <[^>]*> ec80 0b05 vstmia r0, {d0-d1}
+0+22c <[^>]*> ec80 0b07 vstmia r0, {d0-d2}
+0+230 <[^>]*> ec80 0b21 vstmia r0, {d0-d15}
+0+234 <[^>]*> ec80 1b1f vstmia r0, {d1-d15}
+0+238 <[^>]*> ec80 2b1d vstmia r0, {d2-d15}
+0+23c <[^>]*> ec80 eb05 vstmia r0, {d14-d15}
+0+240 <[^>]*> ec81 0b03 vstmia r1, {d0}
+0+244 <[^>]*> ec8e 0b03 vstmia lr, {d0}
0+248 <[^>]*> eeb5 0a40 fcmpzs s0
0+24c <[^>]*> eef5 0a40 fcmpzs s1
0+250 <[^>]*> eeb5 1a40 fcmpzs s2
@@ -219,13 +219,13 @@ Disassembly of section .text:
0+334 <[^>]*> bf01 itttt eq
0+336 <[^>]*> ed35 2a01 fldmdbs(eq|) r5!, {s4}
0+33a <[^>]*> ed76 1a01 fldmdbs(eq|) r6!, {s3}
-0+33e <[^>]*> ec97 1b03 fldmiax(eq|) r7, {d1}
-0+342 <[^>]*> ec98 2b03 fldmiax(eq|) r8, {d2}
+0+33e <[^>]*> ec97 1b03 vldmia(eq|) r7, {d1}
+0+342 <[^>]*> ec98 2b03 vldmia(eq|) r8, {d2}
0+346 <[^>]*> bf01 itttt eq
-0+348 <[^>]*> ecb9 3b03 fldmiax(eq|) r9!, {d3}
-0+34c <[^>]*> ecba 4b03 fldmiax(eq|) sl!, {d4}
-0+350 <[^>]*> ed3b 5b03 fldmdbx(eq|) fp!, {d5}
-0+354 <[^>]*> ed3c 6b03 fldmdbx(eq|) ip!, {d6}
+0+348 <[^>]*> ecb9 3b03 vldmia(eq|) r9!, {d3}
+0+34c <[^>]*> ecba 4b03 vldmia(eq|) sl!, {d4}
+0+350 <[^>]*> ed3b 5b03 vldmdb(eq|) fp!, {d5}
+0+354 <[^>]*> ed3c 6b03 vldmdb(eq|) ip!, {d6}
0+358 <[^>]*> bf01 itttt eq
0+35a <[^>]*> ec8d 1a01 fstmias(eq|) sp, {s2}
0+35e <[^>]*> ecce 0a01 fstmias(eq|) lr, {s1}
@@ -234,13 +234,13 @@ Disassembly of section .text:
0+36a <[^>]*> bf01 itttt eq
0+36c <[^>]*> ed63 ea01 fstmdbs(eq|) r3!, {s29}
0+370 <[^>]*> ed24 ea01 fstmdbs(eq|) r4!, {s28}
-0+374 <[^>]*> ec85 7b03 fstmiax(eq|) r5, {d7}
-0+378 <[^>]*> ec86 8b03 fstmiax(eq|) r6, {d8}
+0+374 <[^>]*> ec85 7b03 vstmia(eq|) r5, {d7}
+0+378 <[^>]*> ec86 8b03 vstmia(eq|) r6, {d8}
0+37c <[^>]*> bf01 itttt eq
-0+37e <[^>]*> eca7 9b03 fstmiax(eq|) r7!, {d9}
-0+382 <[^>]*> eca8 ab03 fstmiax(eq|) r8!, {d10}
-0+386 <[^>]*> ed29 bb03 fstmdbx(eq|) r9!, {d11}
-0+38a <[^>]*> ed2a cb03 fstmdbx(eq|) sl!, {d12}
+0+37e <[^>]*> eca7 9b03 vstmia(eq|) r7!, {d9}
+0+382 <[^>]*> eca8 ab03 vstmia(eq|) r8!, {d10}
+0+386 <[^>]*> ed29 bb03 vstmdb(eq|) r9!, {d11}
+0+38a <[^>]*> ed2a cb03 vstmdb(eq|) sl!, {d12}
0+38e <[^>]*> bf01 itttt eq
0+390 <[^>]*> eef8 dac3 fsitos(eq|) s27, s6
0+394 <[^>]*> eefd ca62 ftosis(eq|) s25, s5
diff --git a/gas/testsuite/gas/arm/vfp2.d b/gas/testsuite/gas/arm/vfp2.d
index f9b6096081..94827f727b 100644
--- a/gas/testsuite/gas/arm/vfp2.d
+++ b/gas/testsuite/gas/arm/vfp2.d
@@ -7,11 +7,11 @@
.*: +file format .*arm.*
Disassembly of section .text:
-0+000 <[^>]*> ec4a5b10 fmdrr d0, r5, sl
-0+004 <[^>]*> ec5a5b10 fmrrd r5, sl, d0
+0+000 <[^>]*> ec4a5b10 vmov d0, r5, sl
+0+004 <[^>]*> ec5a5b10 vmov r5, sl, d0
0+008 <[^>]*> ec4a5a37 fmsrr r5, sl, {s15, s16}
0+00c <[^>]*> ec5a5a37 fmrrs r5, sl, {s15, s16}
-0+010 <[^>]*> ec45ab1f fmdrr d15, sl, r5
-0+014 <[^>]*> ec55ab1f fmrrd sl, r5, d15
+0+010 <[^>]*> ec45ab1f vmov d15, sl, r5
+0+014 <[^>]*> ec55ab1f vmov sl, r5, d15
0+018 <[^>]*> ec45aa38 fmsrr sl, r5, {s17, s18}
0+01c <[^>]*> ec55aa38 fmrrs sl, r5, {s17, s18}
diff --git a/gas/testsuite/gas/arm/vfp2_t2.d b/gas/testsuite/gas/arm/vfp2_t2.d
index bb988e5472..8710e4e0a9 100644
--- a/gas/testsuite/gas/arm/vfp2_t2.d
+++ b/gas/testsuite/gas/arm/vfp2_t2.d
@@ -7,11 +7,11 @@
.*: +file format .*arm.*
Disassembly of section .text:
-0+000 <[^>]*> ec4a 5b10 fmdrr d0, r5, sl
-0+004 <[^>]*> ec5a 5b10 fmrrd r5, sl, d0
+0+000 <[^>]*> ec4a 5b10 vmov d0, r5, sl
+0+004 <[^>]*> ec5a 5b10 vmov r5, sl, d0
0+008 <[^>]*> ec4a 5a37 fmsrr r5, sl, {s15, s16}
0+00c <[^>]*> ec5a 5a37 fmrrs r5, sl, {s15, s16}
-0+010 <[^>]*> ec45 ab1f fmdrr d15, sl, r5
-0+014 <[^>]*> ec55 ab1f fmrrd sl, r5, d15
+0+010 <[^>]*> ec45 ab1f vmov d15, sl, r5
+0+014 <[^>]*> ec55 ab1f vmov sl, r5, d15
0+018 <[^>]*> ec45 aa38 fmsrr sl, r5, {s17, s18}
0+01c <[^>]*> ec55 aa38 fmrrs sl, r5, {s17, s18}
diff --git a/gas/testsuite/gas/arm/vfpv3-32drs.d b/gas/testsuite/gas/arm/vfpv3-32drs.d
new file mode 100644
index 0000000000..11f9e93d91
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-32drs.d
@@ -0,0 +1,73 @@
+# name: VFPv3 extra D registers
+# as: -mfpu=vfp3
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> eeb03b66 fcpyd d3, d22
+0[0-9a-f]+ <[^>]+> eef06b43 fcpyd d22, d3
+0[0-9a-f]+ <[^>]+> eef76acb fcvtds d22, s22
+0[0-9a-f]+ <[^>]+> eeb7bbe6 fcvtsd s22, d22
+0[0-9a-f]+ <[^>]+> ee254b90 vmov\.32 d21\[1\], r4
+0[0-9a-f]+ <[^>]+> ee0b5b90 vmov\.32 d27\[0\], r5
+0[0-9a-f]+ <[^>]+> ee376b90 vmov\.32 r6, d23\[1\]
+0[0-9a-f]+ <[^>]+> ee197b90 vmov\.32 r7, d25\[0\]
+0[0-9a-f]+ <[^>]+> eef86bcb fsitod d22, s22
+0[0-9a-f]+ <[^>]+> eef85b6a fuitod d21, s21
+0[0-9a-f]+ <[^>]+> eebdab64 ftosid s20, d20
+0[0-9a-f]+ <[^>]+> eebdabe4 ftosizd s20, d20
+0[0-9a-f]+ <[^>]+> eefc9b63 ftouid s19, d19
+0[0-9a-f]+ <[^>]+> eefc9be3 ftouizd s19, d19
+0[0-9a-f]+ <[^>]+> edda3b01 vldr d19, \[sl, #4\]
+0[0-9a-f]+ <[^>]+> edca5b01 vstr d21, \[sl, #4\]
+0[0-9a-f]+ <[^>]+> ecba5b04 vldmia sl!, {d5-d6}
+0[0-9a-f]+ <[^>]+> ecfa2b06 vldmia sl!, {d18-d20}
+0[0-9a-f]+ <[^>]+> ecba5b05 vldmia sl!, {d5-d6}
+0[0-9a-f]+ <[^>]+> ecfa2b07 vldmia sl!, {d18-d20}
+0[0-9a-f]+ <[^>]+> ed7a2b05 vldmdb sl!, {d18-d19}
+0[0-9a-f]+ <[^>]+> ecc94b0a vstmia r9, {d20-d24}
+0[0-9a-f]+ <[^>]+> eeb03bc5 fabsd d3, d5
+0[0-9a-f]+ <[^>]+> eeb0cbe2 fabsd d12, d18
+0[0-9a-f]+ <[^>]+> eef02be3 fabsd d18, d19
+0[0-9a-f]+ <[^>]+> eeb13b45 fnegd d3, d5
+0[0-9a-f]+ <[^>]+> eeb1cb62 fnegd d12, d18
+0[0-9a-f]+ <[^>]+> eef12b63 fnegd d18, d19
+0[0-9a-f]+ <[^>]+> eeb13bc5 fsqrtd d3, d5
+0[0-9a-f]+ <[^>]+> eeb1cbe2 fsqrtd d12, d18
+0[0-9a-f]+ <[^>]+> eef12be3 fsqrtd d18, d19
+0[0-9a-f]+ <[^>]+> ee353b06 faddd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee32cb84 faddd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee732ba4 faddd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee353b46 fsubd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee32cbc4 fsubd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee732be4 fsubd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee253b06 fmuld d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee22cb84 fmuld d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee632ba4 fmuld d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee853b06 fdivd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee82cb84 fdivd d12, d18, d4
+0[0-9a-f]+ <[^>]+> eec32ba4 fdivd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee053b06 fmacd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee02cb84 fmacd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee432ba4 fmacd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee153b06 fmscd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee12cb84 fmscd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee532ba4 fmscd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee253b46 fnmuld d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee22cbc4 fnmuld d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee632be4 fnmuld d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee053b46 fnmacd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee02cbc4 fnmacd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee432be4 fnmacd d18, d19, d20
+0[0-9a-f]+ <[^>]+> ee153b46 fnmscd d3, d5, d6
+0[0-9a-f]+ <[^>]+> ee12cbc4 fnmscd d12, d18, d4
+0[0-9a-f]+ <[^>]+> ee532be4 fnmscd d18, d19, d20
+0[0-9a-f]+ <[^>]+> eeb43b62 fcmpd d3, d18
+0[0-9a-f]+ <[^>]+> eef42b43 fcmpd d18, d3
+0[0-9a-f]+ <[^>]+> eef53b40 fcmpzd d19
+0[0-9a-f]+ <[^>]+> eeb43be2 fcmped d3, d18
+0[0-9a-f]+ <[^>]+> eef42bc3 fcmped d18, d3
+0[0-9a-f]+ <[^>]+> eef53bc0 fcmpezd d19
+0[0-9a-f]+ <[^>]+> ec443b3f vmov d31, r3, r4
+0[0-9a-f]+ <[^>]+> ec565b3e vmov r5, r6, d30
diff --git a/gas/testsuite/gas/arm/vfpv3-32drs.s b/gas/testsuite/gas/arm/vfpv3-32drs.s
new file mode 100644
index 0000000000..ef72c24eb5
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-32drs.s
@@ -0,0 +1,68 @@
+.arm
+.syntax unified
+ fcpyd d3,d22
+ fcpyd d22,d3
+ fcvtds d22,s22
+ fcvtsd s22,d22
+ fmdhr d21,r4
+ fmdlr d27,r5
+ fmrdh r6,d23
+ fmrdl r7,d25
+ fsitod d22,s22
+ fuitod d21,s21
+ ftosid s20,d20
+ ftosizd s20,d20
+ ftouid s19,d19
+ ftouizd s19,d19
+ fldd d19,[r10,#4]
+ fstd d21,[r10,#4]
+ fldmiad r10!,{d5,d6}
+ fldmiad r10!,{d18,d19,d20}
+ fldmiax r10!,{d5,d6}
+ fldmiax r10!,{d18,d19,d20}
+ fldmdbx r10!,{d18,d19}
+ fstmiad r9,{d20,d21,d22,d23,d24}
+ fabsd d3,d5
+ fabsd d12,d18
+ fabsd d18,d19
+ fnegd d3,d5
+ fnegd d12,d18
+ fnegd d18,d19
+ fsqrtd d3,d5
+ fsqrtd d12,d18
+ fsqrtd d18,d19
+ faddd d3,d5,d6
+ faddd d12,d18,d4
+ faddd d18,d19,d20
+ fsubd d3,d5,d6
+ fsubd d12,d18,d4
+ fsubd d18,d19,d20
+ fmuld d3,d5,d6
+ fmuld d12,d18,d4
+ fmuld d18,d19,d20
+ fdivd d3,d5,d6
+ fdivd d12,d18,d4
+ fdivd d18,d19,d20
+ fmacd d3,d5,d6
+ fmacd d12,d18,d4
+ fmacd d18,d19,d20
+ fmscd d3,d5,d6
+ fmscd d12,d18,d4
+ fmscd d18,d19,d20
+ fnmuld d3,d5,d6
+ fnmuld d12,d18,d4
+ fnmuld d18,d19,d20
+ fnmacd d3,d5,d6
+ fnmacd d12,d18,d4
+ fnmacd d18,d19,d20
+ fnmscd d3,d5,d6
+ fnmscd d12,d18,d4
+ fnmscd d18,d19,d20
+ fcmpd d3,d18
+ fcmpd d18,d3
+ fcmpzd d19
+ fcmped d3,d18
+ fcmped d18,d3
+ fcmpezd d19
+ fmdrr d31,r3,r4
+ fmrrd r5,r6,d30
diff --git a/gas/testsuite/gas/arm/vfpv3-const-conv.d b/gas/testsuite/gas/arm/vfpv3-const-conv.d
new file mode 100644
index 0000000000..ddabd1c914
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-const-conv.d
@@ -0,0 +1,29 @@
+# name: VFPv3 additional constant and conversion ops
+# as: -mfpu=vfp3
+# objdump: -dr --prefix-addresses --show-raw-insn
+
+.*: +file format .*arm.*
+
+Disassembly of section \.text:
+0[0-9a-f]+ <[^>]+> eef48a00 fconsts s17, #4
+0[0-9a-f]+ <[^>]+> eeb59a00 fconsts s18, #5
+0[0-9a-f]+ <[^>]+> eef69a00 fconsts s19, #6
+0[0-9a-f]+ <[^>]+> eef41b00 fconstd d17, #4
+0[0-9a-f]+ <[^>]+> eef52b00 fconstd d18, #5
+0[0-9a-f]+ <[^>]+> eef63b00 fconstd d19, #6
+0[0-9a-f]+ <[^>]+> eefa8a63 fshtos s17, #9
+0[0-9a-f]+ <[^>]+> eefa1b63 fshtod d17, #9
+0[0-9a-f]+ <[^>]+> eefa8aeb fsltos s17, #9
+0[0-9a-f]+ <[^>]+> eefa1beb fsltod d17, #9
+0[0-9a-f]+ <[^>]+> eefb8a63 fuhtos s17, #9
+0[0-9a-f]+ <[^>]+> eefb1b63 fuhtod d17, #9
+0[0-9a-f]+ <[^>]+> eefb8aeb fultos s17, #9
+0[0-9a-f]+ <[^>]+> eefb1beb fultod d17, #9
+0[0-9a-f]+ <[^>]+> eefe9a64 ftoshs s19, #7
+0[0-9a-f]+ <[^>]+> eefe3b64 ftoshd d19, #7
+0[0-9a-f]+ <[^>]+> eefe9aec ftosls s19, #7
+0[0-9a-f]+ <[^>]+> eefe3bec ftosld d19, #7
+0[0-9a-f]+ <[^>]+> eeff9a64 ftouhs s19, #7
+0[0-9a-f]+ <[^>]+> eeff3b64 ftouhd d19, #7
+0[0-9a-f]+ <[^>]+> eeff9aec ftouls s19, #7
+0[0-9a-f]+ <[^>]+> eeff3bec ftould d19, #7
diff --git a/gas/testsuite/gas/arm/vfpv3-const-conv.s b/gas/testsuite/gas/arm/vfpv3-const-conv.s
new file mode 100644
index 0000000000..c40301c851
--- /dev/null
+++ b/gas/testsuite/gas/arm/vfpv3-const-conv.s
@@ -0,0 +1,25 @@
+.arm
+.syntax unified
+ fconsts s17, #4
+ fconsts s18, #5
+ fconsts s19, #6
+ fconstd d17, #4
+ fconstd d18, #5
+ fconstd d19, #6
+ fshtos s17, 9
+ fshtod d17, 9
+ fsltos s17, 9
+ fsltod d17, 9
+ fuhtos s17, 9
+ fuhtod d17, 9
+ fultos s17, 9
+ fultod d17, 9
+
+ ftoshs s19, 7
+ ftoshd d19, 7
+ ftosls s19, 7
+ ftosld d19, 7
+ ftouhs s19, 7
+ ftouhd d19, 7
+ ftouls s19, 7
+ ftould d19, 7
diff --git a/include/opcode/arm.h b/include/opcode/arm.h
index 1d3aa5aeb3..f142fca9a5 100644
--- a/include/opcode/arm.h
+++ b/include/opcode/arm.h
@@ -58,6 +58,8 @@
#define FPU_VFP_EXT_V1xD 0x08000000 /* Base VFP instruction set. */
#define FPU_VFP_EXT_V1 0x04000000 /* Double-precision insns. */
#define FPU_VFP_EXT_V2 0x02000000 /* ARM10E VFPr1. */
+#define FPU_VFP_EXT_V3 0x01000000 /* VFPv3 insns. */
+#define FPU_NEON_EXT_V1 0x00800000 /* Neon (SIMD) insns. */
/* Architectures are the sum of the base and extensions. The ARM ARM (rev E)
defines the following: ARMv3, ARMv3M, ARMv4xM, ARMv4, ARMv4TxM, ARMv4T,
@@ -105,7 +107,9 @@
#define FPU_VFP_V1xD (FPU_VFP_EXT_V1xD | FPU_ENDIAN_PURE)
#define FPU_VFP_V1 (FPU_VFP_V1xD | FPU_VFP_EXT_V1)
#define FPU_VFP_V2 (FPU_VFP_V1 | FPU_VFP_EXT_V2)
-#define FPU_VFP_HARD (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2)
+#define FPU_VFP_V3 (FPU_VFP_V2 | FPU_VFP_EXT_V3)
+#define FPU_VFP_HARD (FPU_VFP_EXT_V1xD | FPU_VFP_EXT_V1 | FPU_VFP_EXT_V2 \
+ | FPU_VFP_EXT_V3 | FPU_NEON_EXT_V1)
#define FPU_FPA (FPU_FPA_EXT_V1 | FPU_FPA_EXT_V2)
/* Deprecated */
@@ -117,6 +121,10 @@
#define FPU_ARCH_VFP_V1xD ARM_FEATURE (0, FPU_VFP_V1xD)
#define FPU_ARCH_VFP_V1 ARM_FEATURE (0, FPU_VFP_V1)
#define FPU_ARCH_VFP_V2 ARM_FEATURE (0, FPU_VFP_V2)
+#define FPU_ARCH_VFP_V3 ARM_FEATURE (0, FPU_VFP_V3)
+#define FPU_ARCH_NEON_V1 ARM_FEATURE (0, FPU_NEON_EXT_V1)
+#define FPU_ARCH_VFP_V3_PLUS_NEON_V1 \
+ ARM_FEATURE (0, FPU_VFP_V3 | FPU_NEON_EXT_V1)
#define FPU_ARCH_VFP_HARD ARM_FEATURE (0, FPU_VFP_HARD)
#define FPU_ARCH_ENDIAN_PURE ARM_FEATURE (0, FPU_ENDIAN_PURE)
diff --git a/opcodes/arm-dis.c b/opcodes/arm-dis.c
index bc5b52cf9c..f6cac6563c 100644
--- a/opcodes/arm-dis.c
+++ b/opcodes/arm-dis.c
@@ -63,6 +63,8 @@ struct opcode16
%c print condition code (always bits 28-31)
%A print address for ldc/stc/ldf/stf instruction
+ %B print vstm/vldm register list
+ %C print vstr/vldr address operand
%I print cirrus signed shift immediate: bits 0..3|4..6
%F print the COUNT field of a LFM/SFM instruction.
%P print floating point precision in arithmetic insn
@@ -71,6 +73,7 @@ struct opcode16
%<bitfield>r print as an ARM register
%<bitfield>d print the bitfield in decimal
+ %<bitfield>k print immediate for VFPv3 conversion instruction
%<bitfield>x print the bitfield in hex
%<bitfield>X print the bitfield as 1 hex digit without leading "0x"
%<bitfield>f print a floating point constant if >7 else a
@@ -78,15 +81,18 @@ struct opcode16
%<bitfield>w print as an iWMMXt width field - [bhwd]ss/us
%<bitfield>g print as an iWMMXt 64-bit register
%<bitfield>G print as an iWMMXt general purpose or control register
+ %<bitfield>D print as a NEON D register
+ %<bitfield>Q print as a NEON Q register
- %<code>y print a single precision VFP reg.
+ %y<code> print a single precision VFP reg.
Codes: 0=>Sm, 1=>Sd, 2=>Sn, 3=>multi-list, 4=>Sm pair
- %<code>z print a double precision VFP reg
+ %z<code> print a double precision VFP reg
Codes: 0=>Dm, 1=>Dd, 2=>Dn, 3=>multi-list
- %<bitnum>'c print specified char iff bit is one
- %<bitnum>`c print specified char iff bit is zero
- %<bitnum>?ab print a if bit is one else print b
+ %<bitfield>'c print specified char iff bitfield is all ones
+ %<bitfield>`c print specified char iff bitfield is all zeroes
+ %<bitfield>?ab... select from array of values in big endian order
+
%L print as an iWMMXt N/M width field.
%Z print the Immediate of a WSHUFH instruction.
%l like 'A' except use byte offsets for 'B' & 'H'
@@ -201,80 +207,110 @@ static const struct opcode32 coprocessor_opcodes[] =
{FPU_FPA_EXT_V2, 0x0c000200, 0x0e100f00, "sfm%c\t%12-14f, %F, %A"},
{FPU_FPA_EXT_V2, 0x0c100200, 0x0e100f00, "lfm%c\t%12-14f, %F, %A"},
+ /* Register load/store */
+ {FPU_NEON_EXT_V1, 0x0d000b00, 0x0f300f00, "vstr%c\t%12-15,22D, %C"},
+ {FPU_NEON_EXT_V1, 0x0d100b00, 0x0f300f00, "vldr%c\t%12-15,22D, %C"},
+ {FPU_NEON_EXT_V1, 0x0c800b00, 0x0f900f00, "vstmia%c\t%16-19r%21'!, %B"},
+ {FPU_NEON_EXT_V1, 0x0c900b00, 0x0f900f00, "vldmia%c\t%16-19r%21'!, %B"},
+ {FPU_NEON_EXT_V1, 0x0d000b00, 0x0f900f00, "vstmdb%c\t%16-19r%21'!, %B"},
+ {FPU_NEON_EXT_V1, 0x0d100b00, 0x0f900f00, "vldmdb%c\t%16-19r%21'!, %B"},
+
+ /* Data transfer between ARM and NEON registers */
+ {FPU_NEON_EXT_V1, 0x0e800b10, 0x0ff00f70, "vdup%c.32\t%16-19,7D, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0e800b30, 0x0ff00f70, "vdup%c.16\t%16-19,7D, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0ea00b10, 0x0ff00f70, "vdup%c.32\t%16-19,7Q, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0ea00b30, 0x0ff00f70, "vdup%c.16\t%16-19,7Q, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0ec00b10, 0x0ff00f70, "vdup%c.8\t%16-19,7D, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0ee00b10, 0x0ff00f70, "vdup%c.8\t%16-19,7Q, %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0c400b10, 0x0ff00fd0, "vmov%c\t%0-3,5D, %12-15r, %16-19r"},
+ {FPU_NEON_EXT_V1, 0x0c500b10, 0x0ff00fd0, "vmov%c\t%12-15r, %16-19r, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0x0e000b10, 0x0fd00f70, "vmov%c.32\t%16-19,7D[%21d], %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0e100b10, 0x0f500f70, "vmov%c.32\t%12-15r, %16-19,7D[%21d]"},
+ {FPU_NEON_EXT_V1, 0x0e000b30, 0x0fd00f30, "vmov%c.16\t%16-19,7D[%6,21d], %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0e100b30, 0x0f500f30, "vmov%c.%23?us16\t%12-15r, %16-19,7D[%6,21d]"},
+ {FPU_NEON_EXT_V1, 0x0e400b10, 0x0fd00f10, "vmov%c.8\t%16-19,7D[%5,6,21d], %12-15r"},
+ {FPU_NEON_EXT_V1, 0x0e500b10, 0x0f500f10, "vmov%c.%23?us8\t%12-15r, %16-19,7D[%5,6,21d]"},
+
/* Floating point coprocessor (VFP) instructions */
- {FPU_VFP_EXT_V1, 0x0eb00bc0, 0x0fff0ff0, "fabsd%c\t%1z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0eb00ac0, 0x0fbf0fd0, "fabss%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0e300b00, 0x0ff00ff0, "faddd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e300a00, 0x0fb00f50, "fadds%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fff0f70, "fcmp%7'ed%c\t%1z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "fcmp%7'es%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb50b40, 0x0fff0f70, "fcmp%7'ezd%c\t%1z"},
- {FPU_VFP_EXT_V1xD, 0x0eb50a40, 0x0fbf0f70, "fcmp%7'ezs%c\t%1y"},
- {FPU_VFP_EXT_V1, 0x0eb00b40, 0x0fff0ff0, "fcpyd%c\t%1z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0eb00a40, 0x0fbf0fd0, "fcpys%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb70ac0, 0x0fff0fd0, "fcvtds%c\t%1z, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb70bc0, 0x0fbf0ff0, "fcvtsd%c\t%1y, %0z"},
- {FPU_VFP_EXT_V1, 0x0e800b00, 0x0ff00ff0, "fdivd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e800a00, 0x0fb00f50, "fdivs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0d100b00, 0x0f700f00, "fldd%c\t%1z, %A"},
- {FPU_VFP_EXT_V1xD, 0x0c900b00, 0x0fd00f00, "fldmia%0?xd%c\t%16-19r%21'!, %3z"},
- {FPU_VFP_EXT_V1xD, 0x0d300b00, 0x0ff00f00, "fldmdb%0?xd%c\t%16-19r!, %3z"},
- {FPU_VFP_EXT_V1xD, 0x0d100a00, 0x0f300f00, "flds%c\t%1y, %A"},
- {FPU_VFP_EXT_V1xD, 0x0c900a00, 0x0f900f00, "fldmias%c\t%16-19r%21'!, %3y"},
- {FPU_VFP_EXT_V1xD, 0x0d300a00, 0x0fb00f00, "fldmdbs%c\t%16-19r!, %3y"},
- {FPU_VFP_EXT_V1, 0x0e000b00, 0x0ff00ff0, "fmacd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e000a00, 0x0fb00f50, "fmacs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0e200b10, 0x0ff00fff, "fmdhr%c\t%2z, %12-15r"},
- {FPU_VFP_EXT_V1, 0x0e000b10, 0x0ff00fff, "fmdlr%c\t%2z, %12-15r"},
- {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00ff0, "fmdrr%c\t%0z, %12-15r, %16-19r"},
- {FPU_VFP_EXT_V1, 0x0e300b10, 0x0ff00fff, "fmrdh%c\t%12-15r, %2z"},
- {FPU_VFP_EXT_V1, 0x0e100b10, 0x0ff00fff, "fmrdl%c\t%12-15r, %2z"},
- {FPU_VFP_EXT_V1, 0x0c500b10, 0x0ff00ff0, "fmrrd%c\t%12-15r, %16-19r, %0z"},
- {FPU_VFP_EXT_V2, 0x0c500a10, 0x0ff00fd0, "fmrrs%c\t%12-15r, %16-19r, %4y"},
- {FPU_VFP_EXT_V1xD, 0x0e100a10, 0x0ff00f7f, "fmrs%c\t%12-15r, %2y"},
{FPU_VFP_EXT_V1xD, 0x0ef1fa10, 0x0fffffff, "fmstat%c"},
- {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpsid"},
- {FPU_VFP_EXT_V1xD, 0x0ef10a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpscr"},
- {FPU_VFP_EXT_V1xD, 0x0ef80a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpexc"},
- {FPU_VFP_EXT_V1xD, 0x0ef90a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst\t@ Impl def"},
- {FPU_VFP_EXT_V1xD, 0x0efa0a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst2\t@ Impl def"},
- {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0ff00fff, "fmrx%c\t%12-15r, <impl def 0x%16-19x>"},
- {FPU_VFP_EXT_V1, 0x0e100b00, 0x0ff00ff0, "fmscd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e100a00, 0x0fb00f50, "fmscs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1xD, 0x0e000a10, 0x0ff00f7f, "fmsr%c\t%2y, %12-15r"},
- {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "fmsrr%c\t%12-15r, %16-19r, %4y"},
- {FPU_VFP_EXT_V1, 0x0e200b00, 0x0ff00ff0, "fmuld%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e200a00, 0x0fb00f50, "fmuls%c\t%1y, %2y, %0y"},
{FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0fff0fff, "fmxr%c\tfpsid, %12-15r"},
{FPU_VFP_EXT_V1xD, 0x0ee10a10, 0x0fff0fff, "fmxr%c\tfpscr, %12-15r"},
{FPU_VFP_EXT_V1xD, 0x0ee80a10, 0x0fff0fff, "fmxr%c\tfpexc, %12-15r"},
{FPU_VFP_EXT_V1xD, 0x0ee90a10, 0x0fff0fff, "fmxr%c\tfpinst, %12-15r\t@ Impl def"},
{FPU_VFP_EXT_V1xD, 0x0eea0a10, 0x0fff0fff, "fmxr%c\tfpinst2, %12-15r\t@ Impl def"},
+ {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpsid"},
+ {FPU_VFP_EXT_V1xD, 0x0ef10a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpscr"},
+ {FPU_VFP_EXT_V1xD, 0x0ef80a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpexc"},
+ {FPU_VFP_EXT_V1xD, 0x0ef90a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst\t@ Impl def"},
+ {FPU_VFP_EXT_V1xD, 0x0efa0a10, 0x0fff0fff, "fmrx%c\t%12-15r, fpinst2\t@ Impl def"},
+ {FPU_VFP_EXT_V1, 0x0e000b10, 0x0ff00fff, "fmdlr%c\t%z2, %12-15r"},
+ {FPU_VFP_EXT_V1, 0x0e100b10, 0x0ff00fff, "fmrdl%c\t%12-15r, %z2"},
+ {FPU_VFP_EXT_V1, 0x0e200b10, 0x0ff00fff, "fmdhr%c\t%z2, %12-15r"},
+ {FPU_VFP_EXT_V1, 0x0e300b10, 0x0ff00fff, "fmrdh%c\t%12-15r, %z2"},
{FPU_VFP_EXT_V1xD, 0x0ee00a10, 0x0ff00fff, "fmxr%c\t<impl def 0x%16-19x>, %12-15r"},
- {FPU_VFP_EXT_V1, 0x0eb10b40, 0x0fff0ff0, "fnegd%c\t%1z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0eb10a40, 0x0fbf0fd0, "fnegs%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0e000b40, 0x0ff00ff0, "fnmacd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e000a40, 0x0fb00f50, "fnmacs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0e100b40, 0x0ff00ff0, "fnmscd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e100a40, 0x0fb00f50, "fnmscs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0e200b40, 0x0ff00ff0, "fnmuld%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e200a40, 0x0fb00f50, "fnmuls%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb80bc0, 0x0fff0fd0, "fsitod%c\t%1z, %0y"},
- {FPU_VFP_EXT_V1xD, 0x0eb80ac0, 0x0fbf0fd0, "fsitos%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb10bc0, 0x0fff0ff0, "fsqrtd%c\t%1z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0eb10ac0, 0x0fbf0fd0, "fsqrts%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0d000b00, 0x0f700f00, "fstd%c\t%1z, %A"},
- {FPU_VFP_EXT_V1xD, 0x0c800b00, 0x0fd00f00, "fstmia%0?xd%c\t%16-19r%21'!, %3z"},
- {FPU_VFP_EXT_V1xD, 0x0d200b00, 0x0ff00f00, "fstmdb%0?xd%c\t%16-19r!, %3z"},
- {FPU_VFP_EXT_V1xD, 0x0d000a00, 0x0f300f00, "fsts%c\t%1y, %A"},
- {FPU_VFP_EXT_V1xD, 0x0c800a00, 0x0f900f00, "fstmias%c\t%16-19r%21'!, %3y"},
- {FPU_VFP_EXT_V1xD, 0x0d200a00, 0x0fb00f00, "fstmdbs%c\t%16-19r!, %3y"},
- {FPU_VFP_EXT_V1, 0x0e300b40, 0x0ff00ff0, "fsubd%c\t%1z, %2z, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0e300a40, 0x0fb00f50, "fsubs%c\t%1y, %2y, %0y"},
- {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f70, "fto%16?sui%7'zd%c\t%1y, %0z"},
- {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "fto%16?sui%7'zs%c\t%1y, %0y"},
- {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fff0fd0, "fuitod%c\t%1z, %0y"},
- {FPU_VFP_EXT_V1xD, 0x0eb80a40, 0x0fbf0fd0, "fuitos%c\t%1y, %0y"},
+ {FPU_VFP_EXT_V1xD, 0x0ef00a10, 0x0ff00fff, "fmrx%c\t%12-15r, <impl def 0x%16-19x>"},
+ {FPU_VFP_EXT_V1xD, 0x0e000a10, 0x0ff00f7f, "fmsr%c\t%y2, %12-15r"},
+ {FPU_VFP_EXT_V1xD, 0x0e100a10, 0x0ff00f7f, "fmrs%c\t%12-15r, %y2"},
+ {FPU_VFP_EXT_V1xD, 0x0eb50a40, 0x0fbf0f70, "fcmp%7'ezs%c\t%y1"},
+ {FPU_VFP_EXT_V1, 0x0eb50b40, 0x0fbf0f70, "fcmp%7'ezd%c\t%z1"},
+ {FPU_VFP_EXT_V1xD, 0x0eb00a40, 0x0fbf0fd0, "fcpys%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb00ac0, 0x0fbf0fd0, "fabss%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb00b40, 0x0fbf0fd0, "fcpyd%c\t%z1, %z0"},
+ {FPU_VFP_EXT_V1, 0x0eb00bc0, 0x0fbf0fd0, "fabsd%c\t%z1, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb10a40, 0x0fbf0fd0, "fnegs%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb10ac0, 0x0fbf0fd0, "fsqrts%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb10b40, 0x0fbf0fd0, "fnegd%c\t%z1, %z0"},
+ {FPU_VFP_EXT_V1, 0x0eb10bc0, 0x0fbf0fd0, "fsqrtd%c\t%z1, %z0"},
+ {FPU_VFP_EXT_V1, 0x0eb70ac0, 0x0fbf0fd0, "fcvtds%c\t%z1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb70bc0, 0x0fbf0fd0, "fcvtsd%c\t%y1, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb80a40, 0x0fbf0fd0, "fuitos%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb80ac0, 0x0fbf0fd0, "fsitos%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb80b40, 0x0fbf0fd0, "fuitod%c\t%z1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb80bc0, 0x0fbf0fd0, "fsitod%c\t%z1, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0eb40a40, 0x0fbf0f50, "fcmp%7'es%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0eb40b40, 0x0fbf0f50, "fcmp%7'ed%c\t%z1, %z0"},
+ {FPU_VFP_EXT_V3, 0x0eba0a40, 0x0fbe0f50, "f%16?us%7?lhtos%c\t%y1, #%5,0-3k"},
+ {FPU_VFP_EXT_V3, 0x0eba0b40, 0x0fbe0f50, "f%16?us%7?lhtod%c\t%z1, #%5,0-3k"},
+ {FPU_VFP_EXT_V1xD, 0x0ebc0a40, 0x0fbe0f50, "fto%16?sui%7'zs%c\t%y1, %y0"},
+ {FPU_VFP_EXT_V1, 0x0ebc0b40, 0x0fbe0f50, "fto%16?sui%7'zd%c\t%y1, %z0"},
+ {FPU_VFP_EXT_V3, 0x0ebe0a40, 0x0fbe0f50, "fto%16?us%7?lhs%c\t%y1, #%5,0-3k"},
+ {FPU_VFP_EXT_V3, 0x0ebe0b40, 0x0fbe0f50, "fto%16?us%7?lhd%c\t%z1, #%5,0-3k"},
+ {FPU_VFP_EXT_V1, 0x0c500b10, 0x0fb00ff0, "fmrrd%c\t%12-15r, %16-19r, %z0"},
+ {FPU_VFP_EXT_V3, 0x0eb00a00, 0x0fb00ff0, "fconsts%c\t%y1, #%16-19,0-3d"},
+ {FPU_VFP_EXT_V3, 0x0eb00b00, 0x0fb00ff0, "fconstd%c\t%z1, #%16-19,0-3d"},
+ {FPU_VFP_EXT_V2, 0x0c400a10, 0x0ff00fd0, "fmsrr%c\t%12-15r, %16-19r, %y4"},
+ {FPU_VFP_EXT_V2, 0x0c400b10, 0x0ff00fd0, "fmdrr%c\t%z0, %12-15r, %16-19r"},
+ {FPU_VFP_EXT_V2, 0x0c500a10, 0x0ff00fd0, "fmrrs%c\t%12-15r, %16-19r, %y4"},
+ {FPU_VFP_EXT_V1xD, 0x0e000a00, 0x0fb00f50, "fmacs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0e000a40, 0x0fb00f50, "fnmacs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1, 0x0e000b00, 0x0fb00f50, "fmacd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1, 0x0e000b40, 0x0fb00f50, "fnmacd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0e100a00, 0x0fb00f50, "fmscs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0e100a40, 0x0fb00f50, "fnmscs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1, 0x0e100b00, 0x0fb00f50, "fmscd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1, 0x0e100b40, 0x0fb00f50, "fnmscd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0e200a00, 0x0fb00f50, "fmuls%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0e200a40, 0x0fb00f50, "fnmuls%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1, 0x0e200b00, 0x0fb00f50, "fmuld%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1, 0x0e200b40, 0x0fb00f50, "fnmuld%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0e300a00, 0x0fb00f50, "fadds%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1xD, 0x0e300a40, 0x0fb00f50, "fsubs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1, 0x0e300b00, 0x0fb00f50, "faddd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1, 0x0e300b40, 0x0fb00f50, "fsubd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0e800a00, 0x0fb00f50, "fdivs%c\t%y1, %y2, %y0"},
+ {FPU_VFP_EXT_V1, 0x0e800b00, 0x0fb00f50, "fdivd%c\t%z1, %z2, %z0"},
+ {FPU_VFP_EXT_V1xD, 0x0d200a00, 0x0fb00f00, "fstmdbs%c\t%16-19r!, %y3"},
+ {FPU_VFP_EXT_V1xD, 0x0d200b00, 0x0fb00f00, "fstmdb%0?xd%c\t%16-19r!, %z3"},
+ {FPU_VFP_EXT_V1xD, 0x0d300a00, 0x0fb00f00, "fldmdbs%c\t%16-19r!, %y3"},
+ {FPU_VFP_EXT_V1xD, 0x0d300b00, 0x0fb00f00, "fldmdb%0?xd%c\t%16-19r!, %z3"},
+ {FPU_VFP_EXT_V1xD, 0x0d000a00, 0x0f300f00, "fsts%c\t%y1, %A"},
+ {FPU_VFP_EXT_V1, 0x0d000b00, 0x0f300f00, "fstd%c\t%z1, %A"},
+ {FPU_VFP_EXT_V1xD, 0x0d100a00, 0x0f300f00, "flds%c\t%y1, %A"},
+ {FPU_VFP_EXT_V1, 0x0d100b00, 0x0f300f00, "fldd%c\t%z1, %A"},
+ {FPU_VFP_EXT_V1xD, 0x0c800a00, 0x0f900f00, "fstmias%c\t%16-19r%21'!, %y3"},
+ {FPU_VFP_EXT_V1xD, 0x0c800b00, 0x0f900f00, "fstmia%0?xd%c\t%16-19r%21'!, %z3"},
+ {FPU_VFP_EXT_V1xD, 0x0c900a00, 0x0f900f00, "fldmias%c\t%16-19r%21'!, %y3"},
+ {FPU_VFP_EXT_V1xD, 0x0c900b00, 0x0f900f00, "fldmia%0?xd%c\t%16-19r%21'!, %z3"},
/* Cirrus coprocessor instructions. */
{ARM_CEXT_MAVERICK, 0x0d100400, 0x0f500f00, "cfldrs%c\tmvf%12-15d, %A"},
@@ -381,9 +417,286 @@ static const struct opcode32 coprocessor_opcodes[] =
{ARM_EXT_V5, 0xfe000000, 0xff000010, "cdp2\t%8-11d, %20-23d, cr%12-15d, cr%16-19d, cr%0-3d, {%5-7d}"},
{ARM_EXT_V5, 0xfe000010, 0xff100010, "mcr2\t%8-11d, %21-23d, %12-15r, cr%16-19d, cr%0-3d, {%5-7d}"},
{ARM_EXT_V5, 0xfe100010, 0xff100010, "mrc2\t%8-11d, %21-23d, %12-15r, cr%16-19d, cr%0-3d, {%5-7d}"},
+
{0, 0, 0, 0}
};
+/* Neon opcode table: This does not encode the top byte -- that is
+ checked by the print_insn_neon routine, as it depends on whether we are
+ doing thumb32 or arm32 disassembly. */
+
+/* print_insn_neon recognizes the following format control codes:
+
+ %% %
+
+ %A print v{st,ld}[1234] operands
+ %B print v{st,ld}[1234] any one operands
+ %C print v{st,ld}[1234] single->all operands
+ %D print scalar
+ %E print vmov, vmvn, vorr, vbic encoded constant
+ %F print vtbl,vtbx register list
+
+ %<bitfield>r print as an ARM register
+ %<bitfield>d print the bitfield in decimal
+ %<bitfield>e print the 2^N - bitfield in decimal
+ %<bitfield>D print as a NEON D register
+ %<bitfield>Q print as a NEON Q register
+ %<bitfield>R print as a NEON D or Q register
+ %<bitfield>Sn print byte scaled width limited by n
+ %<bitfield>Tn print short scaled width limited by n
+ %<bitfield>Un print long scaled width limited by n
+
+ %<bitfield>'c print specified char iff bitfield is all ones
+ %<bitfield>`c print specified char iff bitfield is all zeroes
+ %<bitfield>?ab... select from array of values in big endian order */
+
+static const struct opcode32 neon_opcodes[] =
+{
+ /* Extract */
+ {FPU_NEON_EXT_V1, 0xf2b00840, 0xffb00850, "vext.8\t%12-15,22R, %16-19,7R, %0-3,5R, #%8-11d"},
+ {FPU_NEON_EXT_V1, 0xf2b00000, 0xffb00810, "vext.8\t%12-15,22R, %16-19,7R, %0-3,5R, #%8-11d"},
+
+ /* Move data element to all lanes */
+ {FPU_NEON_EXT_V1, 0xf3b40c00, 0xffb70f90, "vdup.32\t%12-15,22R, %0-3,5D[%19d]"},
+ {FPU_NEON_EXT_V1, 0xf3b20c00, 0xffb30f90, "vdup.16\t%12-15,22R, %0-3,5D[%18-19d]"},
+ {FPU_NEON_EXT_V1, 0xf3b10c00, 0xffb10f90, "vdup.8\t%12-15,22R, %0-3,5D[%17-19d]"},
+
+ /* Table lookup */
+ {FPU_NEON_EXT_V1, 0xf3b00800, 0xffb00c50, "vtbl.8\t%12-15,22D, %F, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf3b00840, 0xffb00c50, "vtbx.8\t%12-15,22D, %F, %0-3,5D"},
+
+ /* Two registers, miscellaneous */
+ {FPU_NEON_EXT_V1, 0xf2880a10, 0xfebf0fd0, "vmovl.%24?us8\t%12-15,22Q, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2900a10, 0xfebf0fd0, "vmovl.%24?us16\t%12-15,22Q, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2a00a10, 0xfebf0fd0, "vmovl.%24?us32\t%12-15,22Q, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf3b00500, 0xffbf0f90, "vcnt.8\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00580, 0xffbf0f90, "vmvn\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b20000, 0xffbf0f90, "vswp\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b20200, 0xffb30fd0, "vmovn.i%18-19S2\t%12-15,22D, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf3b20240, 0xffb30fd0, "vqmovun.s%18-19T2\t%12-15,22D, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf3b20280, 0xffb30fd0, "vqmovn.s%18-19T2\t%12-15,22D, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf3b202c0, 0xffb30fd0, "vqmovn.u%18-19T2\t%12-15,22D, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf3b20300, 0xffb30fd0, "vshll.i%18-19S2\t%12-15,22Q, %0-3,5D, #%18-19S2"},
+ {FPU_NEON_EXT_V1, 0xf3bb0400, 0xffbf0e90, "vrecpe.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3bb0480, 0xffbf0e90, "vrsqrte.%8?fu%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00000, 0xffb30f90, "vrev64.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00080, 0xffb30f90, "vrev32.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00100, 0xffb30f90, "vrev16.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00400, 0xffb30f90, "vcls.s%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00480, 0xffb30f90, "vclz.i%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00700, 0xffb30f90, "vqabs.s%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00780, 0xffb30f90, "vqneg.s%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b20080, 0xffb30f90, "vtrn.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b20100, 0xffb30f90, "vuzp.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b20180, 0xffb30f90, "vzip.%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b10000, 0xffb30b90, "vcgt.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+ {FPU_NEON_EXT_V1, 0xf3b10080, 0xffb30b90, "vcge.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+ {FPU_NEON_EXT_V1, 0xf3b10100, 0xffb30b90, "vceq.%10?fi%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+ {FPU_NEON_EXT_V1, 0xf3b10180, 0xffb30b90, "vcle.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+ {FPU_NEON_EXT_V1, 0xf3b10200, 0xffb30b90, "vclt.%10?fs%18-19S2\t%12-15,22R, %0-3,5R, #0"},
+ {FPU_NEON_EXT_V1, 0xf3b10300, 0xffb30b90, "vabs.%10?fs%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b10380, 0xffb30b90, "vneg.%10?fs%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00200, 0xffb30f10, "vpaddl.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b00600, 0xffb30f10, "vpadal.%7?us%18-19S2\t%12-15,22R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3b30600, 0xffb30e10, "vcvt.%7-8?usff%18-19Sa.%7-8?ffus%18-19Sa\t%12-15,22R, %0-3,5R"},
+
+ /* Three registers of the same length */
+ {FPU_NEON_EXT_V1, 0xf2000110, 0xffb00f10, "vand\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2100110, 0xffb00f10, "vbic\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2200110, 0xffb00f10, "vorr\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2300110, 0xffb00f10, "vorn\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000110, 0xffb00f10, "veor\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3100110, 0xffb00f10, "vbsl\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3200110, 0xffb00f10, "vbit\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3300110, 0xffb00f10, "vbif\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000d00, 0xffa00f10, "vadd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000d10, 0xffa00f10, "vmla.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000e00, 0xffa00f10, "vceq.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000f00, 0xffa00f10, "vmax.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000f10, 0xffa00f10, "vrecps.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2200d00, 0xffa00f10, "vsub.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2200d10, 0xffa00f10, "vmls.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2200f00, 0xffa00f10, "vmin.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2200f10, 0xffa00f10, "vrsqrts.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000d00, 0xffa00f10, "vpadd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000d10, 0xffa00f10, "vmul.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000e00, 0xffa00f10, "vcge.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000e10, 0xffa00f10, "vacge.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000f00, 0xffa00f10, "vpmax.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3200d00, 0xffa00f10, "vabd.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3200e00, 0xffa00f10, "vcgt.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3200e10, 0xffa00f10, "vacgt.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3200f00, 0xffa00f10, "vpmin.f%20U0\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000800, 0xff800f10, "vadd.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000810, 0xff800f10, "vtst.%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000900, 0xff800f10, "vmla.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000b00, 0xff800f10, "vqdmulh.s%20-21S6\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000b10, 0xff800f10, "vpadd.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000800, 0xff800f10, "vsub.i%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000810, 0xff800f10, "vceq.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000900, 0xff800f10, "vmls.i%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf3000b00, 0xff800f10, "vqrdmulh.s%20-21S6\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000000, 0xfe800f10, "vhadd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000010, 0xfe800f10, "vqadd.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000100, 0xfe800f10, "vrhadd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000200, 0xfe800f10, "vhsub.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000210, 0xfe800f10, "vqsub.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000300, 0xfe800f10, "vcgt.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000310, 0xfe800f10, "vcge.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000400, 0xfe800f10, "vshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000410, 0xfe800f10, "vqshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000500, 0xfe800f10, "vrshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000510, 0xfe800f10, "vqrshl.%24?us%20-21S3\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000600, 0xfe800f10, "vmax.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000610, 0xfe800f10, "vmin.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000700, 0xfe800f10, "vabd.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000710, 0xfe800f10, "vaba.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000910, 0xfe800f10, "vmul.%24?pi%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000a00, 0xfe800f10, "vpmax.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+ {FPU_NEON_EXT_V1, 0xf2000a10, 0xfe800f10, "vpmin.%24?us%20-21S2\t%12-15,22R, %16-19,7R, %0-3,5R"},
+
+ /* One register and an immediate value */
+ {FPU_NEON_EXT_V1, 0xf2800e10, 0xfeb80fb0, "vmov.i8\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800e30, 0xfeb80fb0, "vmov.i64\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800f10, 0xfeb80fb0, "vmov.f32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800810, 0xfeb80db0, "vmov.i16\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800830, 0xfeb80db0, "vmvn.i16\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800910, 0xfeb80db0, "vorr.i16\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800930, 0xfeb80db0, "vbic.i16\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800c10, 0xfeb80eb0, "vmov.i32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800c30, 0xfeb80eb0, "vmvn.i32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800110, 0xfeb809b0, "vorr.i32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800130, 0xfeb809b0, "vbic.i32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800010, 0xfeb808b0, "vmov.i32\t%12-15,22R, %E"},
+ {FPU_NEON_EXT_V1, 0xf2800030, 0xfeb808b0, "vmvn.i32\t%12-15,22R, %E"},
+
+ /* Two registers and a shift amount */
+ {FPU_NEON_EXT_V1, 0xf2880810, 0xffb80fd0, "vshrn.i16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880850, 0xffb80fd0, "vrshrn.i16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880810, 0xfeb80fd0, "vqshrun.s16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880850, 0xfeb80fd0, "vqrshrun.s16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880910, 0xfeb80fd0, "vqshrn.%24?us16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880950, 0xfeb80fd0, "vqrshrn.%24?us16\t%12-15,22D, %0-3,5Q, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880a10, 0xfeb80fd0, "vshll.%24?us8\t%12-15,22D, %0-3,5Q, #%16-18d"},
+ {FPU_NEON_EXT_V1, 0xf2900810, 0xffb00fd0, "vshrn.i32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900850, 0xffb00fd0, "vrshrn.i32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2880510, 0xffb80f90, "vshl.%24?us8\t%12-15,22R, %0-3,5R, #%16-18d"},
+ {FPU_NEON_EXT_V1, 0xf3880410, 0xffb80f90, "vsri.8\t%12-15,22R, %0-3,5R, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf3880510, 0xffb80f90, "vsli.8\t%12-15,22R, %0-3,5R, #%16-18d"},
+ {FPU_NEON_EXT_V1, 0xf3880610, 0xffb80f90, "vqshlu.s8\t%12-15,22R, %0-3,5R, #%16-18d"},
+ {FPU_NEON_EXT_V1, 0xf2900810, 0xfeb00fd0, "vqshrun.s32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900850, 0xfeb00fd0, "vqrshrun.s32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900910, 0xfeb00fd0, "vqshrn.%24?us32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900950, 0xfeb00fd0, "vqrshrn.%24?us32\t%12-15,22D, %0-3,5Q, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900a10, 0xfeb00fd0, "vshll.%24?us16\t%12-15,22D, %0-3,5Q, #%16-19d"},
+ {FPU_NEON_EXT_V1, 0xf2880010, 0xfeb80f90, "vshr.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880110, 0xfeb80f90, "vsra.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880210, 0xfeb80f90, "vrshr.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880310, 0xfeb80f90, "vrsra.%24?us8\t%12-15,22R, %0-3,5R, #%16-18e"},
+ {FPU_NEON_EXT_V1, 0xf2880710, 0xfeb80f90, "vqshl.%24?us8\t%12-15,22R, %0-3,5R, #%16-18d"},
+ {FPU_NEON_EXT_V1, 0xf2a00810, 0xffa00fd0, "vshrn.i64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00850, 0xffa00fd0, "vrshrn.i64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2900510, 0xffb00f90, "vshl.%24?us16\t%12-15,22R, %0-3,5R, #%16-19d"},
+ {FPU_NEON_EXT_V1, 0xf3900410, 0xffb00f90, "vsri.16\t%12-15,22R, %0-3,5R, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf3900510, 0xffb00f90, "vsli.16\t%12-15,22R, %0-3,5R, #%16-19d"},
+ {FPU_NEON_EXT_V1, 0xf3900610, 0xffb00f90, "vqshlu.s16\t%12-15,22R, %0-3,5R, #%16-19d"},
+ {FPU_NEON_EXT_V1, 0xf2a00a10, 0xfea00fd0, "vshll.%24?us32\t%12-15,22D, %0-3,5Q, #%16-20d"},
+ {FPU_NEON_EXT_V1, 0xf2900010, 0xfeb00f90, "vshr.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900110, 0xfeb00f90, "vsra.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900210, 0xfeb00f90, "vrshr.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900310, 0xfeb00f90, "vrsra.%24?us16\t%12-15,22R, %0-3,5R, #%16-19e"},
+ {FPU_NEON_EXT_V1, 0xf2900710, 0xfeb00f90, "vqshl.%24?us16\t%12-15,22R, %0-3,5R, #%16-19d"},
+ {FPU_NEON_EXT_V1, 0xf2800810, 0xfec00fd0, "vqshrun.s64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2800850, 0xfec00fd0, "vqrshrun.s64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2800910, 0xfec00fd0, "vqshrn.%24?us64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2800950, 0xfec00fd0, "vqrshrn.%24?us64\t%12-15,22D, %0-3,5Q, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00510, 0xffa00f90, "vshl.%24?us32\t%12-15,22R, %0-3,5R, #%16-20d"},
+ {FPU_NEON_EXT_V1, 0xf3a00410, 0xffa00f90, "vsri.32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf3a00510, 0xffa00f90, "vsli.32\t%12-15,22R, %0-3,5R, #%16-20d"},
+ {FPU_NEON_EXT_V1, 0xf3a00610, 0xffa00f90, "vqshlu.s32\t%12-15,22R, %0-3,5R, #%16-20d"},
+ {FPU_NEON_EXT_V1, 0xf2a00010, 0xfea00f90, "vshr.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00110, 0xfea00f90, "vsra.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00210, 0xfea00f90, "vrshr.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00310, 0xfea00f90, "vrsra.%24?us32\t%12-15,22R, %0-3,5R, #%16-20e"},
+ {FPU_NEON_EXT_V1, 0xf2a00710, 0xfea00f90, "vqshl.%24?us32\t%12-15,22R, %0-3,5R, #%16-20d"},
+ {FPU_NEON_EXT_V1, 0xf2800590, 0xff800f90, "vshl.%24?us64\t%12-15,22R, %0-3,5R, #%16-21d"},
+ {FPU_NEON_EXT_V1, 0xf3800490, 0xff800f90, "vsri.64\t%12-15,22R, %0-3,5R, #%16-21e"},
+ {FPU_NEON_EXT_V1, 0xf3800590, 0xff800f90, "vsli.64\t%12-15,22R, %0-3,5R, #%16-21d"},
+ {FPU_NEON_EXT_V1, 0xf3800690, 0xff800f90, "vqshlu.s64\t%12-15,22R, %0-3,5R, #%16-21d"},
+ {FPU_NEON_EXT_V1, 0xf2800090, 0xfe800f90, "vshr.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+ {FPU_NEON_EXT_V1, 0xf2800190, 0xfe800f90, "vsra.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+ {FPU_NEON_EXT_V1, 0xf2800290, 0xfe800f90, "vrshr.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+ {FPU_NEON_EXT_V1, 0xf2800390, 0xfe800f90, "vrsra.%24?us64\t%12-15,22R, %0-3,5R, #%16-21e"},
+ {FPU_NEON_EXT_V1, 0xf2800790, 0xfe800f90, "vqshl.%24?us64\t%12-15,22R, %0-3,5R, #%16-21d"},
+ {FPU_NEON_EXT_V1, 0xf2a00e10, 0xfea00e90, "vcvt.%24,8?usff32.%24,8?ffus32\t%12-15,22R, %0-3,5R, #%16-20e"},
+
+ /* Three registers of different lengths */
+ {FPU_NEON_EXT_V1, 0xf2800e00, 0xfea00f50, "vmull.p%20S0\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800400, 0xff800f50, "vaddhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf2800600, 0xff800f50, "vsubhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf2800900, 0xff800f50, "vqdmlal.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800b00, 0xff800f50, "vqdmlsl.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800d00, 0xff800f50, "vqdmull.s%20-21S6\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf3800400, 0xff800f50, "vraddhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf3800600, 0xff800f50, "vrsubhn.i%20-21T2\t%12-15,22D, %16-19,7Q, %0-3,5Q"},
+ {FPU_NEON_EXT_V1, 0xf2800000, 0xfe800f50, "vaddl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800100, 0xfe800f50, "vaddw.%24?us%20-21S2\t%12-15,22Q, %16-19,7Q, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800200, 0xfe800f50, "vsubl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800300, 0xfe800f50, "vsubw.%24?us%20-21S2\t%12-15,22Q, %16-19,7Q, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800500, 0xfe800f50, "vabal.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800700, 0xfe800f50, "vabdl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800800, 0xfe800f50, "vmlal.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800a00, 0xfe800f50, "vmlsl.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+ {FPU_NEON_EXT_V1, 0xf2800c00, 0xfe800f50, "vmull.%24?us%20-21S2\t%12-15,22Q, %16-19,7D, %0-3,5D"},
+
+ /* Two registers and a scalar */
+ {FPU_NEON_EXT_V1, 0xf2800040, 0xff800f50, "vmla.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800140, 0xff800f50, "vmla.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800340, 0xff800f50, "vqdmlal.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800440, 0xff800f50, "vmls.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800540, 0xff800f50, "vmls.f%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800740, 0xff800f50, "vqdmlsl.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800840, 0xff800f50, "vmul.i%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800940, 0xff800f50, "vmul.f%20-21Sa\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800b40, 0xff800f50, "vqdmull.s%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800c40, 0xff800f50, "vqdmulh.s%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800d40, 0xff800f50, "vqrdmulh.s%20-21S6\t%12-15,22D, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800040, 0xff800f50, "vmla.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800140, 0xff800f50, "vmla.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800440, 0xff800f50, "vmls.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800540, 0xff800f50, "vmls.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800840, 0xff800f50, "vmul.i%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800940, 0xff800f50, "vmul.f%20-21Sa\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800c40, 0xff800f50, "vqdmulh.s%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf3800d40, 0xff800f50, "vqrdmulh.s%20-21S6\t%12-15,22Q, %16-19,7Q, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800240, 0xfe800f50, "vmlal.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800640, 0xfe800f50, "vmlsl.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+ {FPU_NEON_EXT_V1, 0xf2800a40, 0xfe800f50, "vmull.%24?us%20-21S6\t%12-15,22Q, %16-19,7D, %D"},
+
+ /* Element and structure load/store */
+ {FPU_NEON_EXT_V1, 0xf4a00fc0, 0xffb00fc0, "vld4.32\t%C"},
+ {FPU_NEON_EXT_V1, 0xf4a00c00, 0xffb00f00, "vld1.%6-7S2\t%C"},
+ {FPU_NEON_EXT_V1, 0xf4a00d00, 0xffb00f00, "vld2.%6-7S2\t%C"},
+ {FPU_NEON_EXT_V1, 0xf4a00e00, 0xffb00f00, "vld3.%6-7S2\t%C"},
+ {FPU_NEON_EXT_V1, 0xf4a00f00, 0xffb00f00, "vld4.%6-7S2\t%C"},
+ {FPU_NEON_EXT_V1, 0xf4000200, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000300, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000400, 0xff900f00, "v%21?ls%21?dt3.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000500, 0xff900f00, "v%21?ls%21?dt3.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000600, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000700, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000800, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000900, 0xff900f00, "v%21?ls%21?dt2.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000a00, 0xff900f00, "v%21?ls%21?dt1.%6-7S3\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4000000, 0xff900e00, "v%21?ls%21?dt4.%6-7S2\t%A"},
+ {FPU_NEON_EXT_V1, 0xf4800000, 0xff900300, "v%21?ls%21?dt1.%10-11S2\t%B"},
+ {FPU_NEON_EXT_V1, 0xf4800100, 0xff900300, "v%21?ls%21?dt2.%10-11S2\t%B"},
+ {FPU_NEON_EXT_V1, 0xf4800200, 0xff900300, "v%21?ls%21?dt3.%10-11S2\t%B"},
+ {FPU_NEON_EXT_V1, 0xf4800300, 0xff900300, "v%21?ls%21?dt4.%10-11S2\t%B"},
+
+ {0,0 ,0, 0}
+};
+
/* Opcode tables: ARM, 16-bit Thumb, 32-bit Thumb. All three are partially
ordered: they must be searched linearly from the top to obtain a correct
match. */
@@ -410,10 +723,10 @@ static const struct opcode32 coprocessor_opcodes[] =
%<bitfield>W print the bitfield plus one in decimal
%<bitfield>x print the bitfield in hex
%<bitfield>X print the bitfield as 1 hex digit without leading "0x"
-
- %<bitnum>'c print specified char iff bit is one
- %<bitnum>`c print specified char iff bit is zero
- %<bitnum>?ab print a if bit is one else print b
+
+ %<bitfield>'c print specified char iff bitfield is all ones
+ %<bitfield>`c print specified char iff bitfield is all zeroes
+ %<bitfield>?ab... select from array of values in big endian order
%e print arm SMI operand (bits 0..7,8..19).
%E print the LSB and WIDTH fields of a BFI or BFC instruction.
@@ -844,9 +1157,9 @@ static const struct opcode16 thumb_opcodes[] =
%<bitfield>r print bitfield as an ARM register
%<bitfield>c print bitfield as a condition code
- %<bitnum>'c print "c" iff bit is one
- %<bitnum>`c print "c" iff bit is zero
- %<bitnum>?ab print "a" if bit is one, else "b"
+ %<bitfield>'c print specified char iff bitfield is all ones
+ %<bitfield>`c print specified char iff bitfield is all zeroes
+ %<bitfield>?ab... select from array of values in big endian order
With one exception at the bottom (done because BL and BLX(1) need
to come dead last), this table was machine-sorted first in
@@ -1149,6 +1462,43 @@ get_arm_regnames (int option, const char **setname, const char **setdescription,
return 16;
}
+/* Decode a bitfield of the form matching regexp (N(-N)?,)*N(-N)?.
+ Returns pointer to following character of the format string and
+ fills in *VALUEP and *WIDTHP with the extracted value and number of
+ bits extracted. WIDTHP can be NULL. */
+
+static const char *
+arm_decode_bitfield (const char *ptr, unsigned long insn,
+ unsigned long *valuep, int *widthp)
+{
+ unsigned long value = 0;
+ int width = 0;
+
+ do
+ {
+ int start, end;
+ int bits;
+
+ for (start = 0; *ptr >= '0' && *ptr <= '9'; ptr++)
+ start = start * 10 + *ptr - '0';
+ if (*ptr == '-')
+ for (end = 0, ptr++; *ptr >= '0' && *ptr <= '9'; ptr++)
+ end = end * 10 + *ptr - '0';
+ else
+ end = start;
+ bits = end - start;
+ if (bits < 0)
+ abort ();
+ value |= ((insn >> start) & ((2ul << bits) - 1)) << width;
+ width += bits + 1;
+ }
+ while (*ptr++ == ',');
+ *valuep = value;
+ if (widthp)
+ *widthp = width;
+ return ptr - 1;
+}
+
static void
arm_decode_shift (long given, fprintf_ftype func, void *stream)
{
@@ -1185,7 +1535,7 @@ arm_decode_shift (long given, fprintf_ftype func, void *stream)
recognised coprocessor instruction. */
static bfd_boolean
-print_insn_coprocessor (struct disassemble_info *info, long given,
+print_insn_coprocessor (bfd_vma pc, struct disassemble_info *info, long given,
bfd_boolean thumb)
{
const struct opcode32 *insn;
@@ -1265,6 +1615,46 @@ print_insn_coprocessor (struct disassemble_info *info, long given,
}
break;
+ case 'B':
+ {
+ int regno = ((given >> 12) & 0xf) | ((given >> (22 - 4)) & 0x10);
+ int offset = (given >> 1) & 0x3f;
+
+ if (offset == 1)
+ func (stream, "{d%d}", regno);
+ else if (regno + offset > 32)
+ func (stream, "{d%d-<overflow reg d%d>}", regno, regno + offset - 1);
+ else
+ func (stream, "{d%d-d%d}", regno, regno + offset - 1);
+ }
+ break;
+
+ case 'C':
+ {
+ int rn = (given >> 16) & 0xf;
+ int offset = (given & 0xff) * 4;
+ int add = (given >> 23) & 1;
+
+ func (stream, "[%s", arm_regnames[rn]);
+
+ if (offset)
+ {
+ if (!add)
+ offset = -offset;
+ func (stream, ", #%d", offset);
+ }
+ func (stream, "]");
+ if (rn == 15)
+ {
+ func (stream, "\t; ");
+ /* FIXME: Unsure if info->bytes_per_chunk is the
+ right thing to use here. */
+ info->print_address_func (offset + pc
+ + info->bytes_per_chunk * 2, info);
+ }
+ }
+ break;
+
case 'c':
func (stream, "%s",
arm_conditional [(given >> 28) & 0xf]);
@@ -1360,206 +1750,158 @@ print_insn_coprocessor (struct disassemble_info *info, long given,
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
- int bitstart = *c++ - '0';
- int bitend = 0;
- while (*c >= '0' && *c <= '9')
- bitstart = (bitstart * 10) + *c++ - '0';
+ int width;
+ unsigned long value;
+
+ c = arm_decode_bitfield (c, given, &value, &width);
switch (*c)
{
- case '-':
- c++;
-
- while (*c >= '0' && *c <= '9')
- bitend = (bitend * 10) + *c++ - '0';
-
- if (!bitend)
- abort ();
-
- switch (*c)
- {
- case 'r':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
+ case 'r':
+ func (stream, "%s", arm_regnames[value]);
+ break;
+ case 'D':
+ func (stream, "d%ld", value);
+ break;
+ case 'Q':
+ if (value & 1)
+ func (stream, "<illegal reg q%ld.5>", value >> 1);
+ else
+ func (stream, "q%ld", value >> 1);
+ break;
+ case 'd':
+ func (stream, "%ld", value);
+ break;
+ case 'k':
+ {
+ int from = (given & (1 << 7)) ? 32 : 16;
+ func (stream, "%ld", from - value);
+ }
+ break;
+
+ case 'f':
+ if (value > 7)
+ func (stream, "#%s", arm_fp_const[value & 7]);
+ else
+ func (stream, "f%ld", value);
+ break;
- func (stream, "%s", arm_regnames[reg]);
- }
- break;
- case 'd':
- {
- long reg;
+ case 'w':
+ if (width == 2)
+ func (stream, "%s", iwmmxt_wwnames[value]);
+ else
+ func (stream, "%s", iwmmxt_wwssnames[value]);
+ break;
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
+ case 'g':
+ func (stream, "%s", iwmmxt_regnames[value]);
+ break;
+ case 'G':
+ func (stream, "%s", iwmmxt_cregnames[value]);
+ break;
+ case '`':
+ c++;
+ if (value == 0)
+ func (stream, "%c", *c);
+ break;
+ case '\'':
+ c++;
+ if (value == ((1ul << width) - 1))
+ func (stream, "%c", *c);
+ break;
+ case '?':
+ func (stream, "%c", c[(1 << width) - (int)value]);
+ c += 1 << width;
+ break;
+ default:
+ abort ();
+ }
+ break;
- func (stream, "%ld", reg);
- }
- break;
- case 'f':
+ case 'y':
+ case 'z':
+ {
+ int single = *c++ == 'y';
+ int regno;
+
+ switch (*c)
+ {
+ case '4': /* Sm pair */
+ func (stream, "{");
+ /* Fall through. */
+ case '0': /* Sm, Dm */
+ regno = given & 0x0000000f;
+ if (single)
{
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- if (reg > 7)
- func (stream, "#%s",
- arm_fp_const[reg & 7]);
- else
- func (stream, "f%ld", reg);
+ regno <<= 1;
+ regno += (given >> 5) & 1;
}
- break;
+ else
+ regno += ((given >> 5) & 1) << 4;
+ break;
- case 'w':
+ case '1': /* Sd, Dd */
+ regno = (given >> 12) & 0x0000000f;
+ if (single)
{
- long reg;
-
- if (bitstart != bitend)
- {
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
- if (bitend - bitstart == 1)
- func (stream, "%s", iwmmxt_wwnames[reg]);
- else
- func (stream, "%s", iwmmxt_wwssnames[reg]);
- }
- else
- {
- reg = (((given >> 8) & 0x1) |
- ((given >> 22) & 0x1));
- func (stream, "%s", iwmmxt_wwnames[reg]);
- }
+ regno <<= 1;
+ regno += (given >> 22) & 1;
}
- break;
+ else
+ regno += ((given >> 22) & 1) << 4;
+ break;
- case 'g':
+ case '2': /* Sn, Dn */
+ regno = (given >> 16) & 0x0000000f;
+ if (single)
{
- long reg;
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
- func (stream, "%s", iwmmxt_regnames[reg]);
+ regno <<= 1;
+ regno += (given >> 7) & 1;
}
- break;
-
- case 'G':
+ else
+ regno += ((given >> 7) & 1) << 4;
+ break;
+
+ case '3': /* List */
+ func (stream, "{");
+ regno = (given >> 12) & 0x0000000f;
+ if (single)
{
- long reg;
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
- func (stream, "%s", iwmmxt_cregnames[reg]);
+ regno <<= 1;
+ regno += (given >> 22) & 1;
}
- break;
+ else
+ regno += ((given >> 22) & 1) << 4;
+ break;
+
+ default:
+ abort ();
+ }
- default:
- abort ();
- }
- break;
+ func (stream, "%c%d", single ? 's' : 'd', regno);
- case 'y':
- case 'z':
+ if (*c == '3')
{
- int single = *c == 'y';
- int regno;
-
- switch (bitstart)
- {
- case 4: /* Sm pair */
- func (stream, "{");
- /* Fall through. */
- case 0: /* Sm, Dm */
- regno = given & 0x0000000f;
- if (single)
- {
- regno <<= 1;
- regno += (given >> 5) & 1;
- }
- break;
-
- case 1: /* Sd, Dd */
- regno = (given >> 12) & 0x0000000f;
- if (single)
- {
- regno <<= 1;
- regno += (given >> 22) & 1;
- }
- break;
-
- case 2: /* Sn, Dn */
- regno = (given >> 16) & 0x0000000f;
- if (single)
- {
- regno <<= 1;
- regno += (given >> 7) & 1;
- }
- break;
-
- case 3: /* List */
- func (stream, "{");
- regno = (given >> 12) & 0x0000000f;
- if (single)
- {
- regno <<= 1;
- regno += (given >> 22) & 1;
- }
- break;
-
-
- default:
- abort ();
- }
-
- func (stream, "%c%d", single ? 's' : 'd', regno);
-
- if (bitstart == 3)
+ int count = given & 0xff;
+
+ if (single == 0)
+ count >>= 1;
+
+ if (--count)
{
- int count = given & 0xff;
-
- if (single == 0)
- count >>= 1;
-
- if (--count)
- {
- func (stream, "-%c%d",
- single ? 's' : 'd',
- regno + count);
- }
-
- func (stream, "}");
+ func (stream, "-%c%d",
+ single ? 's' : 'd',
+ regno + count);
}
- else if (bitstart == 4)
- func (stream, ", %c%d}", single ? 's' : 'd',
- regno + 1);
-
- break;
+
+ func (stream, "}");
}
-
- break;
-
- case '`':
- c++;
- if ((given & (1 << bitstart)) == 0)
- func (stream, "%c", *c);
- break;
- case '\'':
- c++;
- if ((given & (1 << bitstart)) != 0)
- func (stream, "%c", *c);
- break;
- case '?':
- ++c;
- if ((given & (1 << bitstart)) != 0)
- func (stream, "%c", *c++);
- else
- func (stream, "%c", *++c);
- break;
- default:
- abort ();
- }
+ else if (*c == '4')
+ func (stream, ", %c%d}", single ? 's' : 'd',
+ regno + 1);
+ }
break;
-
+
case 'L':
switch (given & 0x00400100)
{
@@ -1712,6 +2054,436 @@ print_arm_address (bfd_vma pc, struct disassemble_info *info, long given)
}
}
+/* Print one neon instruction on INFO->STREAM.
+ Return TRUE if the instuction matched, FALSE if this is not a
+ recognised neon instruction. */
+
+static bfd_boolean
+print_insn_neon (struct disassemble_info *info, long given, bfd_boolean thumb)
+{
+ const struct opcode32 *insn;
+ void *stream = info->stream;
+ fprintf_ftype func = info->fprintf_func;
+
+ if (thumb)
+ {
+ if ((given & 0xef000000) == 0xef000000)
+ {
+ /* move bit 28 to bit 24 to translate Thumb2 to ARM encoding. */
+ unsigned long bit28 = given & (1 << 28);
+
+ given &= 0x00ffffff;
+ if (bit28)
+ given |= 0xf3000000;
+ else
+ given |= 0xf2000000;
+ }
+ else if ((given & 0xff000000) == 0xf9000000)
+ given ^= 0xf9000000 ^ 0xf4000000;
+ else
+ return FALSE;
+ }
+
+ for (insn = neon_opcodes; insn->assembler; insn++)
+ {
+ if ((given & insn->mask) == insn->value)
+ {
+ const char *c;
+
+ for (c = insn->assembler; *c; c++)
+ {
+ if (*c == '%')
+ {
+ switch (*++c)
+ {
+ case '%':
+ func (stream, "%%");
+ break;
+
+ case 'A':
+ {
+ static const unsigned char enc[16] =
+ {
+ 0x4, 0x14, /* st4 0,1 */
+ 0x4, /* st1 2 */
+ 0x4, /* st2 3 */
+ 0x3, /* st3 4 */
+ 0x13, /* st3 5 */
+ 0x3, /* st1 6 */
+ 0x1, /* st1 7 */
+ 0x2, /* st2 8 */
+ 0x12, /* st2 9 */
+ 0x2, /* st1 10 */
+ 0, 0, 0, 0, 0
+ };
+ int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+ int rn = ((given >> 16) & 0xf);
+ int rm = ((given >> 0) & 0xf);
+ int align = ((given >> 4) & 0x3);
+ int type = ((given >> 8) & 0xf);
+ int n = enc[type] & 0xf;
+ int stride = (enc[type] >> 4) + 1;
+ int ix;
+
+ func (stream, "{");
+ if (stride > 1)
+ for (ix = 0; ix != n; ix++)
+ func (stream, "%sd%d", ix ? "," : "", rd + ix * stride);
+ else if (n == 1)
+ func (stream, "d%d", rd);
+ else
+ func (stream, "d%d-d%d", rd, rd + n - 1);
+ func (stream, "}, [%s", arm_regnames[rn]);
+ if (align)
+ func (stream, ", :%d", 32 << align);
+ func (stream, "]");
+ if (rm == 0xd)
+ func (stream, "!");
+ else if (rm != 0xf)
+ func (stream, ", %s", arm_regnames[rm]);
+ }
+ break;
+
+ case 'B':
+ {
+ int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+ int rn = ((given >> 16) & 0xf);
+ int rm = ((given >> 0) & 0xf);
+ int idx_align = ((given >> 4) & 0xf);
+ int align = 0;
+ int size = ((given >> 10) & 0x3);
+ int idx = idx_align >> (size + 1);
+ int length = ((given >> 8) & 3) + 1;
+ int stride = 1;
+ int i;
+
+ if (length > 1 && size > 0)
+ stride = (idx_align & (1 << size)) ? 2 : 1;
+
+ switch (length)
+ {
+ case 1:
+ {
+ int amask = (1 << size) - 1;
+ if ((idx_align & (1 << size)) != 0)
+ return FALSE;
+ if (size > 0)
+ {
+ if ((idx_align & amask) == amask)
+ align = 8 << size;
+ else if ((idx_align & amask) != 0)
+ return FALSE;
+ }
+ }
+ break;
+
+ case 2:
+ if (size == 2 && (idx_align & 2) != 0)
+ return FALSE;
+ align = (idx_align & 1) ? 16 << size : 0;
+ break;
+
+ case 3:
+ if ((size == 2 && (idx_align & 3) != 0)
+ || (idx_align & 1) != 0)
+ return FALSE;
+ break;
+
+ case 4:
+ if (size == 2)
+ {
+ if ((idx_align & 3) == 3)
+ return FALSE;
+ align = (idx_align & 3) * 64;
+ }
+ else
+ align = (idx_align & 1) ? 32 << size : 0;
+ break;
+
+ default:
+ abort ();
+ }
+
+ func (stream, "{");
+ for (i = 0; i < length; i++)
+ func (stream, "%sd%d[%d]", (i == 0) ? "" : ",",
+ rd + i * stride, idx);
+ func (stream, "}, [%s", arm_regnames[rn]);
+ if (align)
+ func (stream, ", :%d", align);
+ func (stream, "]");
+ if (rm == 0xd)
+ func (stream, "!");
+ else if (rm != 0xf)
+ func (stream, ", %s", arm_regnames[rm]);
+ }
+ break;
+
+ case 'C':
+ {
+ int rd = ((given >> 12) & 0xf) | (((given >> 22) & 1) << 4);
+ int rn = ((given >> 16) & 0xf);
+ int rm = ((given >> 0) & 0xf);
+ int align = ((given >> 4) & 0x1);
+ int size = ((given >> 6) & 0x3);
+ int type = ((given >> 8) & 0x3);
+ int n = type + 1;
+ int stride = ((given >> 5) & 0x1);
+ int ix;
+
+ if (stride && (n == 1))
+ n++;
+ else
+ stride++;
+
+ func (stream, "{");
+ if (stride > 1)
+ for (ix = 0; ix != n; ix++)
+ func (stream, "%sd%d[]", ix ? "," : "", rd + ix * stride);
+ else if (n == 1)
+ func (stream, "d%d[]", rd);
+ else
+ func (stream, "d%d[]-d%d[]", rd, rd + n - 1);
+ func (stream, "}, [%s", arm_regnames[rn]);
+ if (align)
+ {
+ int align = (8 * (type + 1)) << size;
+ if (type == 3)
+ align = (size > 1) ? align >> 1 : align;
+ if (type == 2 || (type == 0 && !size))
+ func (stream, ", :<bad align %d>", align);
+ else
+ func (stream, ", :%d", align);
+ }
+ func (stream, "]");
+ if (rm == 0xd)
+ func (stream, "!");
+ else if (rm != 0xf)
+ func (stream, ", %s", arm_regnames[rm]);
+ }
+ break;
+
+ case 'D':
+ {
+ int raw_reg = (given & 0xf) | ((given >> 1) & 0x10);
+ int size = (given >> 20) & 3;
+ int reg = raw_reg & ((4 << size) - 1);
+ int ix = raw_reg >> size >> 2;
+
+ func (stream, "d%d[%d]", reg, ix);
+ }
+ break;
+
+ case 'E':
+ /* Neon encoded constant for mov, mvn, vorr, vbic */
+ {
+ int bits = 0;
+ int cmode = (given >> 8) & 0xf;
+ int op = (given >> 5) & 0x1;
+ unsigned long value = 0, hival = 0;
+ unsigned shift;
+ int size = 0;
+
+ bits |= ((given >> 24) & 1) << 7;
+ bits |= ((given >> 16) & 7) << 4;
+ bits |= ((given >> 0) & 15) << 0;
+
+ if (cmode < 8)
+ {
+ shift = (cmode >> 1) & 3;
+ value = (unsigned long)bits << (8 * shift);
+ size = 32;
+ }
+ else if (cmode < 12)
+ {
+ shift = (cmode >> 1) & 1;
+ value = (unsigned long)bits << (8 * shift);
+ size = 16;
+ }
+ else if (cmode < 14)
+ {
+ shift = (cmode & 1) + 1;
+ value = (unsigned long)bits << (8 * shift);
+ value |= (1ul << (8 * shift)) - 1;
+ size = 32;
+ }
+ else if (cmode == 14)
+ {
+ if (op)
+ {
+ /* bit replication into bytes */
+ int ix;
+ unsigned long mask;
+
+ value = 0;
+ hival = 0;
+ for (ix = 7; ix >= 0; ix--)
+ {
+ mask = ((bits >> ix) & 1) ? 0xff : 0;
+ if (ix <= 3)
+ value = (value << 8) | mask;
+ else
+ hival = (hival << 8) | mask;
+ }
+ size = 64;
+ }
+ else
+ {
+ /* byte replication */
+ value = (unsigned long)bits;
+ size = 8;
+ }
+ }
+ else if (!op)
+ {
+ /* floating point encoding */
+ int tmp;
+
+ value = (unsigned long)(bits & 0x7f) << (24 - 6);
+ value |= (unsigned long)(bits & 0x80) << 24;
+ tmp = bits & 0x40 ? 0x3c : 0x40;
+ value |= (unsigned long)tmp << 24;
+ size = 32;
+ }
+ else
+ {
+ func (stream, "<illegal constant %.8x:%x:%x>",
+ bits, cmode, op);
+ size = 32;
+ break;
+ }
+ switch (size)
+ {
+ case 8:
+ func (stream, "#%ld\t; 0x%.2lx", value, value);
+ break;
+
+ case 16:
+ func (stream, "#%ld\t; 0x%.4lx", value, value);
+ break;
+
+ case 32:
+ func (stream, "#%ld\t; 0x%.8lx", value, value);
+ break;
+
+ case 64:
+ func (stream, "#0x%.8lx%.8lx", hival, value);
+ break;
+
+ default:
+ abort ();
+ }
+ }
+ break;
+
+ case 'F':
+ {
+ int regno = ((given >> 16) & 0xf) | ((given >> (7 - 4)) & 0x10);
+ int num = (given >> 8) & 0x3;
+
+ if (!num)
+ func (stream, "{d%d}", regno);
+ else if (num + regno >= 32)
+ func (stream, "{d%d-<overflow reg d%d}", regno, regno + num);
+ else
+ func (stream, "{d%d-d%d}", regno, regno + num);
+ }
+ break;
+
+
+ case '0': case '1': case '2': case '3': case '4':
+ case '5': case '6': case '7': case '8': case '9':
+ {
+ int width;
+ unsigned long value;
+
+ c = arm_decode_bitfield (c, given, &value, &width);
+
+ switch (*c)
+ {
+ case 'r':
+ func (stream, "%s", arm_regnames[value]);
+ break;
+ case 'd':
+ func (stream, "%ld", value);
+ break;
+ case 'e':
+ func (stream, "%ld", (1ul << width) - value);
+ break;
+
+ case 'S':
+ case 'T':
+ case 'U':
+ /* various width encodings */
+ {
+ int base = 8 << (*c - 'S'); /* 8,16 or 32 */
+ int limit;
+ unsigned low, high;
+
+ c++;
+ if (*c >= '0' && *c <= '9')
+ limit = *c - '0';
+ else if (*c >= 'a' && *c <= 'f')
+ limit = *c - 'a' + 10;
+ else
+ abort ();
+ low = limit >> 2;
+ high = limit & 3;
+
+ if (value < low || value > high)
+ func (stream, "<illegal width %d>", base << value);
+ else
+ func (stream, "%d", base << value);
+ }
+ break;
+ case 'R':
+ if (given & (1 << 6))
+ goto Q;
+ /* FALLTHROUGH */
+ case 'D':
+ func (stream, "d%ld", value);
+ break;
+ case 'Q':
+ Q:
+ if (value & 1)
+ func (stream, "<illegal reg q%ld.5>", value >> 1);
+ else
+ func (stream, "q%ld", value >> 1);
+ break;
+
+ case '`':
+ c++;
+ if (value == 0)
+ func (stream, "%c", *c);
+ break;
+ case '\'':
+ c++;
+ if (value == ((1ul << width) - 1))
+ func (stream, "%c", *c);
+ break;
+ case '?':
+ func (stream, "%c", c[(1 << width) - (int)value]);
+ c += 1 << width;
+ break;
+ default:
+ abort ();
+ }
+ break;
+
+ default:
+ abort ();
+ }
+ }
+ }
+ else
+ func (stream, "%c", *c);
+ }
+ return TRUE;
+ }
+ }
+ return FALSE;
+}
+
/* Print one ARM instruction from PC on INFO->STREAM. */
static void
@@ -1721,7 +2493,10 @@ print_insn_arm (bfd_vma pc, struct disassemble_info *info, long given)
void *stream = info->stream;
fprintf_ftype func = info->fprintf_func;
- if (print_insn_coprocessor (info, given, FALSE))
+ if (print_insn_coprocessor (pc, info, given, FALSE))
+ return;
+
+ if (print_insn_neon (info, given, FALSE))
return;
for (insn = arm_opcodes; insn->assembler; insn++)
@@ -1964,102 +2739,51 @@ print_insn_arm (bfd_vma pc, struct disassemble_info *info, long given)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
- int bitstart = *c++ - '0';
- int bitend = 0;
- while (*c >= '0' && *c <= '9')
- bitstart = (bitstart * 10) + *c++ - '0';
+ int width;
+ unsigned long value;
+ c = arm_decode_bitfield (c, given, &value, &width);
+
switch (*c)
{
- case '-':
- c++;
-
- while (*c >= '0' && *c <= '9')
- bitend = (bitend * 10) + *c++ - '0';
-
- if (!bitend)
- abort ();
-
- switch (*c)
- {
- case 'r':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- func (stream, "%s", arm_regnames[reg]);
- }
- break;
- case 'd':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- func (stream, "%ld", reg);
- }
- break;
- case 'W':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- func (stream, "%ld", reg + 1);
- }
- break;
- case 'x':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- func (stream, "0x%08lx", reg);
-
- /* Some SWI instructions have special
- meanings. */
- if ((given & 0x0fffffff) == 0x0FF00000)
- func (stream, "\t; IMB");
- else if ((given & 0x0fffffff) == 0x0FF00001)
- func (stream, "\t; IMBRange");
- }
- break;
- case 'X':
- {
- long reg;
-
- reg = given >> bitstart;
- reg &= (2 << (bitend - bitstart)) - 1;
-
- func (stream, "%01lx", reg & 0xf);
- }
- break;
- default:
- abort ();
- }
+ case 'r':
+ func (stream, "%s", arm_regnames[value]);
+ break;
+ case 'd':
+ func (stream, "%ld", value);
+ break;
+ case 'b':
+ func (stream, "%ld", value * 8);
+ break;
+ case 'W':
+ func (stream, "%ld", value + 1);
+ break;
+ case 'x':
+ func (stream, "0x%08lx", value);
+
+ /* Some SWI instructions have special
+ meanings. */
+ if ((given & 0x0fffffff) == 0x0FF00000)
+ func (stream, "\t; IMB");
+ else if ((given & 0x0fffffff) == 0x0FF00001)
+ func (stream, "\t; IMBRange");
+ break;
+ case 'X':
+ func (stream, "%01lx", value & 0xf);
break;
-
case '`':
c++;
- if ((given & (1 << bitstart)) == 0)
+ if (value == 0)
func (stream, "%c", *c);
break;
case '\'':
c++;
- if ((given & (1 << bitstart)) != 0)
+ if (value == ((1ul << width) - 1))
func (stream, "%c", *c);
break;
case '?':
- ++c;
- if ((given & (1 << bitstart)) != 0)
- func (stream, "%c", *c++);
- else
- func (stream, "%c", *++c);
+ func (stream, "%c", c[(1 << width) - (int)value]);
+ c += 1 << width;
break;
default:
abort ();
@@ -2373,7 +3097,10 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
void *stream = info->stream;
fprintf_ftype func = info->fprintf_func;
- if (print_insn_coprocessor (info, given, TRUE))
+ if (print_insn_coprocessor (pc, info, given, TRUE))
+ return;
+
+ if (print_insn_neon (info, given, TRUE))
return;
for (insn = thumb32_opcodes; insn->assembler; insn++)
@@ -2752,30 +3479,15 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
case '0': case '1': case '2': case '3': case '4':
case '5': case '6': case '7': case '8': case '9':
{
- int bitstart = *c++ - '0';
- int bitend = 0;
- unsigned int val;
- while (*c >= '0' && *c <= '9')
- bitstart = (bitstart * 10) + *c++ - '0';
-
- if (*c == '-')
- {
- c++;
- while (*c >= '0' && *c <= '9')
- bitend = (bitend * 10) + *c++ - '0';
- if (!bitend)
- abort ();
-
- val = given >> bitstart;
- val &= (2 << (bitend - bitstart)) - 1;
- }
- else
- val = (given >> bitstart) & 1;
+ int width;
+ unsigned long val;
+ c = arm_decode_bitfield (c, given, &val, &width);
+
switch (*c)
{
- case 'd': func (stream, "%u", val); break;
- case 'W': func (stream, "%u", val * 4); break;
+ case 'd': func (stream, "%lu", val); break;
+ case 'W': func (stream, "%lu", val * 4); break;
case 'r': func (stream, "%s", arm_regnames[val]); break;
case 'c':
@@ -2786,20 +3498,20 @@ print_insn_thumb32 (bfd_vma pc, struct disassemble_info *info, long given)
break;
case '\'':
- if (val)
- func (stream, "%c", c[1]);
c++;
+ if (val == ((1ul << width) - 1))
+ func (stream, "%c", *c);
break;
case '`':
- if (!val)
- func (stream, "%c", c[1]);
c++;
+ if (val == 0)
+ func (stream, "%c", *c);
break;
case '?':
- func (stream, "%c", val ? c[1] : c[2]);
- c += 2;
+ func (stream, "%c", c[(1 << width) - (int)val]);
+ c += 1 << width;
break;
default: