webkitgtk-2.16.5HEAD webkitgtk-2.16.5 master

author: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
committer: Lorry Tar Creator <lorry-tar-importer@lorry> 2017-06-27 06:07:23 +0000
commit: 1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree: 46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/assembler/ARM64Assembler.h
parent: 32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download: WebKitGtk-tarball-1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c.tar.gz
1 files changed, 540 insertions, 410 deletions
diff --git a/Source/JavaScriptCore/assembler/ARM64Assembler.h b/Source/JavaScriptCore/assembler/ARM64Assembler.h
index cfbd8cec5..7421460e9 100644
--- a/Source/JavaScriptCore/assembler/ARM64Assembler.h
+++ b/Source/JavaScriptCore/assembler/ARM64Assembler.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (C) 2012 Apple Inc. All rights reserved.
+ * Copyright (C) 2012, 2014, 2017 Apple Inc. All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -23,12 +23,13 @@
  * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 
  */
 
-#ifndef ARM64Assembler_h
-#define ARM64Assembler_h
+#pragma once
 
 #if ENABLE(ASSEMBLER) && CPU(ARM64)
 
 #include "AssemblerBuffer.h"
+#include "AssemblerCommon.h"
+#include <limits.h>
 #include <wtf/Assertions.h>
 #include <wtf/Vector.h>
 #include <stdint.h>
@@ -37,30 +38,28 @@
 #define DATASIZE_OF(datasize) ((datasize == 64) ? Datasize_64 : Datasize_32)
 #define MEMOPSIZE_OF(datasize) ((datasize == 8 || datasize == 128) ? MemOpSize_8_or_128 : (datasize == 16) ? MemOpSize_16 : (datasize == 32) ? MemOpSize_32 : MemOpSize_64)
 #define CHECK_DATASIZE() CHECK_DATASIZE_OF(datasize)
+#define CHECK_VECTOR_DATASIZE() ASSERT(datasize == 64 || datasize == 128)
 #define DATASIZE DATASIZE_OF(datasize)
 #define MEMOPSIZE MEMOPSIZE_OF(datasize)
 #define CHECK_FP_MEMOP_DATASIZE() ASSERT(datasize == 8 || datasize == 16 || datasize == 32 || datasize == 64 || datasize == 128)
+#define MEMPAIROPSIZE_INT(datasize) ((datasize == 64) ? MemPairOp_64 : MemPairOp_32)
+#define MEMPAIROPSIZE_FP(datasize) ((datasize == 128) ? MemPairOp_V128 : (datasize == 64) ? MemPairOp_V64 : MemPairOp_32)
 
 namespace JSC {
 
-ALWAYS_INLINE bool isInt9(int32_t value)
+ALWAYS_INLINE bool isInt7(int32_t value)
 {
-    return value == ((value << 23) >> 23);
+    return value == ((value << 25) >> 25);
 }
 
-ALWAYS_INLINE bool isUInt5(int32_t value)
+ALWAYS_INLINE bool isInt11(int32_t value)
 {
-    return !(value & ~0x1f);
+    return value == ((value << 21) >> 21);
 }
 
-ALWAYS_INLINE bool isUInt12(int32_t value)
-{
-    return !(value & ~0xfff);
-}
-
-ALWAYS_INLINE bool isUInt12(intptr_t value)
+ALWAYS_INLINE bool isUInt5(int32_t value)
 {
-    return !(value & ~0xfffL);
+    return !(value & ~0x1f);
 }
 
 class UInt5 {
@@ -119,333 +118,174 @@ private:
     int m_value;
 };
 
-class LogicalImmediate {
+class PairPostIndex {
 public:
-    static LogicalImmediate create32(uint32_t value)
-    {
-        // Check for 0, -1 - these cannot be encoded.
-        if (!value || !~value)
-            return InvalidLogicalImmediate;
-
-        // First look for a 32-bit pattern, then for repeating 16-bit
-        // patterns, 8-bit, 4-bit, and finally 2-bit.
-
-        unsigned hsb, lsb;
-        bool inverted;
-        if (findBitRange<32>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<32>(hsb, lsb, inverted);
-
-        if ((value & 0xffff) != (value >> 16))
-            return InvalidLogicalImmediate;
-        value &= 0xffff;
-
-        if (findBitRange<16>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<16>(hsb, lsb, inverted);
-
-        if ((value & 0xff) != (value >> 8))
-            return InvalidLogicalImmediate;
-        value &= 0xff;
-
-        if (findBitRange<8>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<8>(hsb, lsb, inverted);
-
-        if ((value & 0xf) != (value >> 4))
-            return InvalidLogicalImmediate;
-        value &= 0xf;
-
-        if (findBitRange<4>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<4>(hsb, lsb, inverted);
-
-        if ((value & 0x3) != (value >> 2))
-            return InvalidLogicalImmediate;
-        value &= 0x3;
-
-        if (findBitRange<2>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<2>(hsb, lsb, inverted);
-
-        return InvalidLogicalImmediate;
-    }
-
-    static LogicalImmediate create64(uint64_t value)
-    {
-        // Check for 0, -1 - these cannot be encoded.
-        if (!value || !~value)
-            return InvalidLogicalImmediate;
-
-        // Look for a contiguous bit range.
-        unsigned hsb, lsb;
-        bool inverted;
-        if (findBitRange<64>(value, hsb, lsb, inverted))
-            return encodeLogicalImmediate<64>(hsb, lsb, inverted);
-
-        // If the high & low 32 bits are equal, we can try for a 32-bit (or narrower) pattern.
-        if (static_cast<uint32_t>(value) == static_cast<uint32_t>(value >> 32))
-            return create32(static_cast<uint32_t>(value));
-        return InvalidLogicalImmediate;
-    }
-
-    int value() const
-    {
-        ASSERT(isValid());
-        return m_value;
-    }
-
-    bool isValid() const
+    explicit PairPostIndex(int value)
+        : m_value(value)
     {
-        return m_value != InvalidLogicalImmediate;
+        ASSERT(isInt11(value));
     }
 
-    bool is64bit() const
-    {
-        return m_value & (1 << 12);
-    }
+    operator int() { return m_value; }
 
 private:
-    LogicalImmediate(int value)
-        : m_value(value)
-    {
-    }
-
-    // Generate a mask with bits in the range hsb..0 set, for example:
-    //   hsb:63 = 0xffffffffffffffff
-    //   hsb:42 = 0x000007ffffffffff
-    //   hsb: 0 = 0x0000000000000001
-    static uint64_t mask(unsigned hsb)
-    {
-        ASSERT(hsb < 64);
-        return 0xffffffffffffffffull >> (63 - hsb);
-    }
+    int m_value;
+};
 
-    template<unsigned N>
-    static void partialHSB(uint64_t& value, unsigned&result)
+class PairPreIndex {
+public:
+    explicit PairPreIndex(int value)
+        : m_value(value)
     {
-        if (value & (0xffffffffffffffffull << N)) {
-            result += N;
-            value >>= N;
-        }
+        ASSERT(isInt11(value));
     }
 
-    // Find the bit number of the highest bit set in a non-zero value, for example:
-    //   0x8080808080808080 = hsb:63
-    //   0x0000000000000001 = hsb: 0
-    //   0x000007ffffe00000 = hsb:42
-    static unsigned highestSetBit(uint64_t value)
-    {
-        ASSERT(value);
-        unsigned hsb = 0;
-        partialHSB<32>(value, hsb);
-        partialHSB<16>(value, hsb);
-        partialHSB<8>(value, hsb);
-        partialHSB<4>(value, hsb);
-        partialHSB<2>(value, hsb);
-        partialHSB<1>(value, hsb);
-        return hsb;
-    }
-
-    // This function takes a value and a bit width, where value obeys the following constraints:
-    //   * bits outside of the width of the value must be zero.
-    //   * bits within the width of value must neither be all clear or all set.
-    // The input is inspected to detect values that consist of either two or three contiguous
-    // ranges of bits. The output range hsb..lsb will describe the second range of the value.
-    // if the range is set, inverted will be false, and if the range is clear, inverted will
-    // be true. For example (with width 8):
-    //   00001111 = hsb:3, lsb:0, inverted:false
-    //   11110000 = hsb:3, lsb:0, inverted:true
-    //   00111100 = hsb:5, lsb:2, inverted:false
-    //   11000011 = hsb:5, lsb:2, inverted:true
-    template<unsigned width>
-    static bool findBitRange(uint64_t value, unsigned& hsb, unsigned& lsb, bool& inverted)
-    {
-        ASSERT(value & mask(width - 1));
-        ASSERT(value != mask(width - 1));
-        ASSERT(!(value & ~mask(width - 1)));
-
-        // Detect cases where the top bit is set; if so, flip all the bits & set invert.
-        // This halves the number of patterns we need to look for.
-        const uint64_t msb = 1ull << (width - 1);
-        if ((inverted = (value & msb)))
-            value ^= mask(width - 1);
-
-        // Find the highest set bit in value, generate a corresponding mask & flip all
-        // bits under it.
-        hsb = highestSetBit(value);
-        value ^= mask(hsb);
-        if (!value) {
-            // If this cleared the value, then the range hsb..0 was all set.
-            lsb = 0;
-            return true;
-        }
-
-        // Try making one more mask, and flipping the bits!
-        lsb = highestSetBit(value);
-        value ^= mask(lsb);
-        if (!value) {
-            // Success - but lsb actually points to the hsb of a third range - add one
-            // to get to the lsb of the mid range.
-            ++lsb;
-            return true;
-        }
-
-        return false;
-    }
-
-    // Encodes the set of immN:immr:imms fields found in a logical immediate.
-    template<unsigned width>
-    static int encodeLogicalImmediate(unsigned hsb, unsigned lsb, bool inverted)
-    {
-        // Check width is a power of 2!
-        ASSERT(!(width & (width -1)));
-        ASSERT(width <= 64 && width >= 2);
-        ASSERT(hsb >= lsb);
-        ASSERT(hsb < width);
-
-        int immN = 0;
-        int imms = 0;
-        int immr = 0;
-
-        // For 64-bit values this is easy - just set immN to true, and imms just
-        // contains the bit number of the highest set bit of the set range. For
-        // values with narrower widths, these are encoded by a leading set of
-        // one bits, followed by a zero bit, followed by the remaining set of bits
-        // being the high bit of the range. For a 32-bit immediate there are no
-        // leading one bits, just a zero followed by a five bit number. For a
-        // 16-bit immediate there is one one bit, a zero bit, and then a four bit
-        // bit-position, etc.
-        if (width == 64)
-            immN = 1;
-        else
-            imms = 63 & ~(width + width - 1);
-
-        if (inverted) {
-            // if width is 64 & hsb is 62, then we have a value something like:
-            //   0x80000000ffffffff (in this case with lsb 32).
-            // The ror should be by 1, imms (effectively set width minus 1) is
-            // 32. Set width is full width minus cleared width.
-            immr = (width - 1) - hsb;
-            imms |= (width - ((hsb - lsb) + 1)) - 1;
-        } else {
-            // if width is 64 & hsb is 62, then we have a value something like:
-            //   0x7fffffff00000000 (in this case with lsb 32).
-            // The value is effectively rol'ed by lsb, which is equivalent to
-            // a ror by width - lsb (or 0, in the case where lsb is 0). imms
-            // is hsb - lsb.
-            immr = (width - lsb) & (width - 1);
-            imms |= hsb - lsb;
-        }
-
-        return immN << 12 | immr << 6 | imms;
-    }
-
-    static const int InvalidLogicalImmediate = -1;
+    operator int() { return m_value; }
 
+private:
     int m_value;
 };
 
+typedef ARM64LogicalImmediate LogicalImmediate;
+
 inline uint16_t getHalfword(uint64_t value, int which)
 {
     return value >> (which << 4);
 }
 
 namespace ARM64Registers {
-    typedef enum {
-        // Parameter/result registers
-        x0,
-        x1,
-        x2,
-        x3,
-        x4,
-        x5,
-        x6,
-        x7,
-        // Indirect result location register
-        x8,
-        // Temporary registers
-        x9,
-        x10,
-        x11,
-        x12,
-        x13,
-        x14,
-        x15,
-        // Intra-procedure-call scratch registers (temporary)
-        x16, ip0 = x16,
-        x17, ip1 = x17,
-        // Platform Register (temporary)
-        x18,
-        // Callee-saved
-        x19,
-        x20,
-        x21,
-        x22,
-        x23,
-        x24,
-        x25,
-        x26,
-        x27,
-        x28,
-        // Special
-        x29, fp = x29,
-        x30, lr = x30,
-        sp,
-        zr = 0x3f,
-    } RegisterID;
 
-    typedef enum {
-        // Parameter/result registers
-        q0,
-        q1,
-        q2,
-        q3,
-        q4,
-        q5,
-        q6,
-        q7,
-        // Callee-saved (up to 64-bits only!)
-        q8,
-        q9,
-        q10,
-        q11,
-        q12,
-        q13,
-        q14,
-        q15,
-        // Temporary registers
-        q16,
-        q17,
-        q18,
-        q19,
-        q20,
-        q21,
-        q22,
-        q23,
-        q24,
-        q25,
-        q26,
-        q27,
-        q28,
-        q29,
-        q30,
-        q31,
-    } FPRegisterID;
-
-    static bool isSp(RegisterID reg) { return reg == sp; }
-    static bool isZr(RegisterID reg) { return reg == zr; }
-}
+#define FOR_EACH_CPU_REGISTER(V) \
+    FOR_EACH_CPU_GPREGISTER(V) \
+    FOR_EACH_CPU_SPECIAL_REGISTER(V) \
+    FOR_EACH_CPU_FPREGISTER(V)
+
+// The following are defined as pairs of the following value:
+// 1. type of the storage needed to save the register value by the JIT probe.
+// 2. name of the register.
+#define FOR_EACH_CPU_GPREGISTER(V) \
+    /* Parameter/result registers */ \
+    V(void*, x0) \
+    V(void*, x1) \
+    V(void*, x2) \
+    V(void*, x3) \
+    V(void*, x4) \
+    V(void*, x5) \
+    V(void*, x6) \
+    V(void*, x7) \
+    /* Indirect result location register */ \
+    V(void*, x8) \
+    /* Temporary registers */ \
+    V(void*, x9) \
+    V(void*, x10) \
+    V(void*, x11) \
+    V(void*, x12) \
+    V(void*, x13) \
+    V(void*, x14) \
+    V(void*, x15) \
+    /* Intra-procedure-call scratch registers (temporary) */ \
+    V(void*, x16) \
+    V(void*, x17) \
+    /* Platform Register (temporary) */ \
+    V(void*, x18) \
+    /* Callee-saved */ \
+    V(void*, x19) \
+    V(void*, x20) \
+    V(void*, x21) \
+    V(void*, x22) \
+    V(void*, x23) \
+    V(void*, x24) \
+    V(void*, x25) \
+    V(void*, x26) \
+    V(void*, x27) \
+    V(void*, x28) \
+    /* Special */ \
+    V(void*, fp) \
+    V(void*, lr) \
+    V(void*, sp)
+
+#define FOR_EACH_CPU_SPECIAL_REGISTER(V) \
+    V(void*, pc) \
+    V(void*, nzcv) \
+    V(void*, fpsr) \
+
+// ARM64 always has 32 FPU registers 128-bits each. See http://llvm.org/devmtg/2012-11/Northover-AArch64.pdf
+// and Section 5.1.2 in http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf.
+// However, we only use them for 64-bit doubles.
+#define FOR_EACH_CPU_FPREGISTER(V) \
+    /* Parameter/result registers */ \
+    V(double, q0) \
+    V(double, q1) \
+    V(double, q2) \
+    V(double, q3) \
+    V(double, q4) \
+    V(double, q5) \
+    V(double, q6) \
+    V(double, q7) \
+    /* Callee-saved (up to 64-bits only!) */ \
+    V(double, q8) \
+    V(double, q9) \
+    V(double, q10) \
+    V(double, q11) \
+    V(double, q12) \
+    V(double, q13) \
+    V(double, q14) \
+    V(double, q15) \
+    /* Temporary registers */ \
+    V(double, q16) \
+    V(double, q17) \
+    V(double, q18) \
+    V(double, q19) \
+    V(double, q20) \
+    V(double, q21) \
+    V(double, q22) \
+    V(double, q23) \
+    V(double, q24) \
+    V(double, q25) \
+    V(double, q26) \
+    V(double, q27) \
+    V(double, q28) \
+    V(double, q29) \
+    V(double, q30) \
+    V(double, q31)
+
+typedef enum {
+    #define DECLARE_REGISTER(_type, _regName) _regName,
+    FOR_EACH_CPU_GPREGISTER(DECLARE_REGISTER)
+    #undef DECLARE_REGISTER
+
+    ip0 = x16,
+    ip1 = x17,
+    x29 = fp,
+    x30 = lr,
+    zr = 0x3f,
+} RegisterID;
+
+typedef enum {
+    #define DECLARE_REGISTER(_type, _regName) _regName,
+    FOR_EACH_CPU_FPREGISTER(DECLARE_REGISTER)
+    #undef DECLARE_REGISTER
+} FPRegisterID;
+
+static constexpr bool isSp(RegisterID reg) { return reg == sp; }
+static constexpr bool isZr(RegisterID reg) { return reg == zr; }
+
+} // namespace ARM64Registers
 
 class ARM64Assembler {
 public:
     typedef ARM64Registers::RegisterID RegisterID;
     typedef ARM64Registers::FPRegisterID FPRegisterID;
     
-    static RegisterID firstRegister() { return ARM64Registers::x0; }
-    static RegisterID lastRegister() { return ARM64Registers::x28; }
+    static constexpr RegisterID firstRegister() { return ARM64Registers::x0; }
+    static constexpr RegisterID lastRegister() { return ARM64Registers::sp; }
     
-    static FPRegisterID firstFPRegister() { return ARM64Registers::q0; }
-    static FPRegisterID lastFPRegister() { return ARM64Registers::q31; }
+    static constexpr FPRegisterID firstFPRegister() { return ARM64Registers::q0; }
+    static constexpr FPRegisterID lastFPRegister() { return ARM64Registers::q31; }
 
 private:
-    static bool isSp(RegisterID reg) { return ARM64Registers::isSp(reg); }
-    static bool isZr(RegisterID reg) { return ARM64Registers::isZr(reg); }
+    static constexpr bool isSp(RegisterID reg) { return ARM64Registers::isSp(reg); }
+    static constexpr bool isZr(RegisterID reg) { return ARM64Registers::isZr(reg); }
 
 public:
     ARM64Assembler()
@@ -583,9 +423,9 @@ public:
                 JumpType m_type : 8;
                 JumpLinkType m_linkType : 8;
                 Condition m_condition : 4;
-                bool m_is64Bit : 1;
                 unsigned m_bitNumber : 6;
-                RegisterID m_compareRegister : 5;
+                RegisterID m_compareRegister : 6;
+                bool m_is64Bit : 1;
             } realTypes;
             struct CopyTypes {
                 uint64_t content[3];
@@ -642,19 +482,12 @@ public:
     template<int datasize>
     static bool canEncodePImmOffset(int32_t offset)
     {
-        int32_t maxPImm = 4095 * (datasize / 8);
-        if (offset < 0)
-            return false;
-        if (offset > maxPImm)
-            return false;
-        if (offset & ((datasize / 8 ) - 1))
-            return false;
-        return true;
+        return isValidScaledUImm12<datasize>(offset);
     }
 
     static bool canEncodeSImmOffset(int32_t offset)
     {
-        return isInt9(offset);
+        return isValidSignedImm9(offset);
     }
 
 private:
@@ -787,6 +620,22 @@ private:
         FPDataOp_FNMUL
     };
 
+    enum SIMD3Same {
+        SIMD_LogicalOp = 0x03
+    };
+
+    enum SIMD3SameLogical {
+        // This includes both the U bit and the "size" / opc for convience.
+        SIMD_LogicalOp_AND = 0x00,
+        SIMD_LogicalOp_BIC = 0x01,
+        SIMD_LogicalOp_ORR = 0x02,
+        SIMD_LogicalOp_ORN = 0x03,
+        SIMD_LogacalOp_EOR = 0x80,
+        SIMD_LogicalOp_BSL = 0x81,
+        SIMD_LogicalOp_BIT = 0x82,
+        SIMD_LogicalOp_BIF = 0x83,
+    };
+
     enum FPIntConvOp {
         FPIntConvOp_FCVTNS = 0x00,
         FPIntConvOp_FCVTNU = 0x01,
@@ -823,6 +672,16 @@ private:
         MemOp_LOAD_signed32 = 3 // size may be 0 or 1
     };
 
+    enum MemPairOpSize {
+        MemPairOp_32 = 0,
+        MemPairOp_LoadSigned_32 = 1,
+        MemPairOp_64 = 2,
+
+        MemPairOp_V32 = MemPairOp_32,
+        MemPairOp_V64 = 1,
+        MemPairOp_V128 = 2
+    };
+
     enum MoveWideOp {
         MoveWideOp_N = 0,
         MoveWideOp_Z = 2,
@@ -836,6 +695,14 @@ private:
         LdrLiteralOp_128BIT = 2
     };
 
+    static unsigned memPairOffsetShift(bool V, MemPairOpSize size)
+    {
+        // return the log2 of the size in bytes, e.g. 64 bit size returns 3
+        if (V)
+            return size + 2;
+        return (size >> 1) + 2;
+    }
+
 public:
     // Integer Instructions:
 
@@ -871,8 +738,9 @@ public:
     ALWAYS_INLINE void add(RegisterID rd, RegisterID rn, RegisterID rm, ShiftType shift, int amount)
     {
         CHECK_DATASIZE();
-        if (isSp(rn)) {
+        if (isSp(rd) || isSp(rn)) {
             ASSERT(shift == LSL);
+            ASSERT(!isSp(rm));
             add<datasize, setFlags>(rd, rn, rm, UXTX, amount);
         } else
             insn(addSubtractShiftedRegister(DATASIZE, AddOp_ADD, setFlags, shift, rm, amount, rn, rd));
@@ -887,6 +755,7 @@ public:
     {
         ASSERT(!(offset & 0xfff));
         insn(pcRelative(true, offset >> 12, rd));
+        nopCortexA53Fix843419();
     }
 
     template<int datasize, SetFlags setFlags = DontSetFlags>
@@ -1215,6 +1084,40 @@ public:
         insn(excepnGeneration(ExcepnOp_HALT, imm, 0));
     }
 
+    // Only used for testing purposes.
+    void illegalInstruction()
+    {
+        insn(0x0);
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void ldp(RegisterID rt, RegisterID rt2, RegisterID rn, PairPostIndex simm)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairPostIndex(MEMPAIROPSIZE_INT(datasize), false, MemOp_LOAD, simm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void ldp(RegisterID rt, RegisterID rt2, RegisterID rn, PairPreIndex simm)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairPreIndex(MEMPAIROPSIZE_INT(datasize), false, MemOp_LOAD, simm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void ldp(RegisterID rt, RegisterID rt2, RegisterID rn, unsigned pimm = 0)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairOffset(MEMPAIROPSIZE_INT(datasize), false, MemOp_LOAD, pimm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void ldnp(RegisterID rt, RegisterID rt2, RegisterID rn, unsigned pimm = 0)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairNonTemporal(MEMPAIROPSIZE_INT(datasize), false, MemOp_LOAD, pimm, rn, rt, rt2));
+    }
+
     template<int datasize>
     ALWAYS_INLINE void ldr(RegisterID rt, RegisterID rn, RegisterID rm)
     {
@@ -1494,6 +1397,7 @@ public:
     ALWAYS_INLINE void madd(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
         CHECK_DATASIZE();
+        nopCortexA53Fix835769<datasize>();
         insn(dataProcessing3Source(DATASIZE, DataOp_MADD, rm, ra, rn, rd));
     }
 
@@ -1546,6 +1450,7 @@ public:
     ALWAYS_INLINE void msub(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
         CHECK_DATASIZE();
+        nopCortexA53Fix835769<datasize>();
         insn(dataProcessing3Source(DATASIZE, DataOp_MSUB, rm, ra, rn, rd));
     }
 
@@ -1596,9 +1501,27 @@ public:
         insn(nopPseudo());
     }
     
-    ALWAYS_INLINE void dmbSY()
+    static void fillNops(void* base, size_t size, bool isCopyingToExecutableMemory)
+    {
+        RELEASE_ASSERT(!(size % sizeof(int32_t)));
+        size_t n = size / sizeof(int32_t);
+        for (int32_t* ptr = static_cast<int32_t*>(base); n--;) {
+            int insn = nopPseudo();
+            if (isCopyingToExecutableMemory)
+                performJITMemcpy(ptr++, &insn, sizeof(int));
+            else
+                memcpy(ptr++, &insn, sizeof(int));
+        }
+    }
+    
+    ALWAYS_INLINE void dmbISH()
+    {
+        insn(0xd5033bbf);
+    }
+
+    ALWAYS_INLINE void dmbISHST()
     {
-        insn(0xd5033fbf);
+        insn(0xd5033abf);
     }
 
     template<int datasize>
@@ -1724,6 +1647,7 @@ public:
 
     ALWAYS_INLINE void smaddl(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
+        nopCortexA53Fix835769<64>();
         insn(dataProcessing3Source(Datasize_64, DataOp_SMADDL, rm, ra, rn, rd));
     }
 
@@ -1734,6 +1658,7 @@ public:
 
     ALWAYS_INLINE void smsubl(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
+        nopCortexA53Fix835769<64>();
         insn(dataProcessing3Source(Datasize_64, DataOp_SMSUBL, rm, ra, rn, rd));
     }
 
@@ -1748,6 +1673,34 @@ public:
     }
 
     template<int datasize>
+    ALWAYS_INLINE void stp(RegisterID rt, RegisterID rt2, RegisterID rn, PairPostIndex simm)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairPostIndex(MEMPAIROPSIZE_INT(datasize), false, MemOp_STORE, simm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void stp(RegisterID rt, RegisterID rt2, RegisterID rn, PairPreIndex simm)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairPreIndex(MEMPAIROPSIZE_INT(datasize), false, MemOp_STORE, simm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void stp(RegisterID rt, RegisterID rt2, RegisterID rn, unsigned pimm = 0)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairOffset(MEMPAIROPSIZE_INT(datasize), false, MemOp_STORE, pimm, rn, rt, rt2));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void stnp(RegisterID rt, RegisterID rt2, RegisterID rn, unsigned pimm = 0)
+    {
+        CHECK_DATASIZE();
+        insn(loadStoreRegisterPairNonTemporal(MEMPAIROPSIZE_INT(datasize), false, MemOp_STORE, pimm, rn, rt, rt2));
+    }
+
+    template<int datasize>
     ALWAYS_INLINE void str(RegisterID rt, RegisterID rn, RegisterID rm)
     {
         str<datasize>(rt, rn, rm, UXTX, 0);
@@ -1862,7 +1815,13 @@ public:
     template<int datasize, SetFlags setFlags = DontSetFlags>
     ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm)
     {
-        sub<datasize, setFlags>(rd, rn, rm, LSL, 0);
+        ASSERT_WITH_MESSAGE(!isSp(rd) || setFlags == DontSetFlags, "SUBS with shifted register does not support SP for Xd, it uses XZR for the register 31. SUBS with extended register support SP for Xd, but only if SetFlag is not used, otherwise register 31 is Xd.");
+        ASSERT_WITH_MESSAGE(!isSp(rm), "No encoding of SUBS supports SP for the third operand.");
+
+        if (isSp(rd) || isSp(rn))
+            sub<datasize, setFlags>(rd, rn, rm, UXTX, 0);
+        else
+            sub<datasize, setFlags>(rd, rn, rm, LSL, 0);
     }
 
     template<int datasize, SetFlags setFlags = DontSetFlags>
@@ -1876,11 +1835,8 @@ public:
     ALWAYS_INLINE void sub(RegisterID rd, RegisterID rn, RegisterID rm, ShiftType shift, int amount)
     {
         CHECK_DATASIZE();
-        if (isSp(rn)) {
-            ASSERT(shift == LSL);
-            sub<datasize, setFlags>(rd, rn, rm, UXTX, amount);
-        } else
-            insn(addSubtractShiftedRegister(DATASIZE, AddOp_SUB, setFlags, shift, rm, amount, rn, rd));
+        ASSERT(!isSp(rd) && !isSp(rn) && !isSp(rm));
+        insn(addSubtractShiftedRegister(DATASIZE, AddOp_SUB, setFlags, shift, rm, amount, rn, rd));
     }
 
     template<int datasize>
@@ -1960,6 +1916,7 @@ public:
 
     ALWAYS_INLINE void umaddl(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
+        nopCortexA53Fix835769<64>();
         insn(dataProcessing3Source(Datasize_64, DataOp_UMADDL, rm, ra, rn, rd));
     }
 
@@ -1970,6 +1927,7 @@ public:
 
     ALWAYS_INLINE void umsubl(RegisterID rd, RegisterID rn, RegisterID rm, RegisterID ra)
     {
+        nopCortexA53Fix835769<64>();
         insn(dataProcessing3Source(Datasize_64, DataOp_UMSUBL, rm, ra, rn, rd));
     }
 
@@ -2279,6 +2237,20 @@ public:
     }
 
     template<int datasize>
+    ALWAYS_INLINE void vand(FPRegisterID vd, FPRegisterID vn, FPRegisterID vm)
+    {
+        CHECK_VECTOR_DATASIZE();
+        insn(vectorDataProcessingLogical(SIMD_LogicalOp_AND, vm, vn, vd));
+    }
+
+    template<int datasize>
+    ALWAYS_INLINE void vorr(FPRegisterID vd, FPRegisterID vn, FPRegisterID vm)
+    {
+        CHECK_VECTOR_DATASIZE();
+        insn(vectorDataProcessingLogical(SIMD_LogicalOp_ORR, vm, vn, vd));
+    }
+
+    template<int datasize>
     ALWAYS_INLINE void frinta(FPRegisterID vd, FPRegisterID vn)
     {
         CHECK_DATASIZE();
@@ -2494,13 +2466,6 @@ public:
         return b.m_offset - a.m_offset;
     }
 
-    int executableOffsetFor(int location)
-    {
-        if (!location)
-            return 0;
-        return static_cast<int32_t*>(m_buffer.data())[location / sizeof(int32_t) - 1];
-    }
-
     void* unlinkedCode() { return m_buffer.data(); }
     size_t codeSize() const { return m_buffer.codeSize(); }
 
@@ -2539,23 +2504,23 @@ public:
         m_jumpsToLink.append(LinkRecord(from.m_offset, to.m_offset, type, condition, bitNumber, compareRegister));
     }
 
-    void linkJump(AssemblerLabel from, AssemblerLabel to)
+    void linkJump(AssemblerLabel from, void* executableCode, AssemblerLabel to)
     {
         ASSERT(from.isSet());
         ASSERT(to.isSet());
-        relinkJumpOrCall<false>(addressOf(from), addressOf(to));
+        relinkJumpOrCall<false>(addressOf(from), addressOf(executableCode, from), addressOf(to));
     }
     
     static void linkJump(void* code, AssemblerLabel from, void* to)
     {
         ASSERT(from.isSet());
-        relinkJumpOrCall<false>(addressOf(code, from), to);
+        relinkJumpOrCall<false>(addressOf(code, from), addressOf(code, from), to);
     }
 
     static void linkCall(void* code, AssemblerLabel from, void* to)
     {
         ASSERT(from.isSet());
-        linkJumpOrCall<true>(addressOf(code, from) - 1, to);
+        linkJumpOrCall<true>(addressOf(code, from) - 1, addressOf(code, from) - 1, to);
     }
 
     static void linkPointer(void* code, AssemblerLabel where, void* valuePtr)
@@ -2567,7 +2532,8 @@ public:
     {
         intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(where)) >> 2;
         ASSERT(static_cast<int>(offset) == offset);
-        *static_cast<int*>(where) = unconditionalBranchImmediate(false, static_cast<int>(offset));
+        int insn = unconditionalBranchImmediate(false, static_cast<int>(offset));
+        performJITMemcpy(where, &insn, sizeof(int));
         cacheFlush(where, sizeof(int));
     }
     
@@ -2575,6 +2541,11 @@ public:
     {
         return 4;
     }
+
+    static constexpr ptrdiff_t patchableJumpSize()
+    {
+        return 4;
+    }
     
     static void replaceWithLoad(void* where)
     {
@@ -2591,7 +2562,8 @@ public:
             ASSERT(!S);
             ASSERT(!shift);
             ASSERT(!(imm12 & ~0xff8));
-            *static_cast<int*>(where) = loadStoreRegisterUnsignedImmediate(MemOpSize_64, false, MemOp_LOAD, encodePositiveImmediate<64>(imm12), rn, rd);
+            int insn = loadStoreRegisterUnsignedImmediate(MemOpSize_64, false, MemOp_LOAD, encodePositiveImmediate<64>(imm12), rn, rd);
+            performJITMemcpy(where, &insn, sizeof(int));
             cacheFlush(where, sizeof(int));
         }
 #if !ASSERT_DISABLED
@@ -2624,7 +2596,8 @@ public:
             ASSERT(!V);
             ASSERT(opc == MemOp_LOAD);
             ASSERT(!(imm12 & ~0x1ff));
-            *static_cast<int*>(where) = addSubtractImmediate(Datasize_64, AddOp_ADD, DontSetFlags, 0, imm12 * sizeof(void*), rn, rt);
+            int insn = addSubtractImmediate(Datasize_64, AddOp_ADD, DontSetFlags, 0, imm12 * sizeof(void*), rn, rt);
+            performJITMemcpy(where, &insn, sizeof(int));
             cacheFlush(where, sizeof(int));
         }
 #if !ASSERT_DISABLED
@@ -2654,9 +2627,11 @@ public:
     static void setPointer(int* address, void* valuePtr, RegisterID rd, bool flush)
     {
         uintptr_t value = reinterpret_cast<uintptr_t>(valuePtr);
-        address[0] = moveWideImediate(Datasize_64, MoveWideOp_Z, 0, getHalfword(value, 0), rd);
-        address[1] = moveWideImediate(Datasize_64, MoveWideOp_K, 1, getHalfword(value, 1), rd);
-        address[2] = moveWideImediate(Datasize_64, MoveWideOp_K, 2, getHalfword(value, 2), rd);
+        int buffer[3];
+        buffer[0] = moveWideImediate(Datasize_64, MoveWideOp_Z, 0, getHalfword(value, 0), rd);
+        buffer[1] = moveWideImediate(Datasize_64, MoveWideOp_K, 1, getHalfword(value, 1), rd);
+        buffer[2] = moveWideImediate(Datasize_64, MoveWideOp_K, 2, getHalfword(value, 2), rd);
+        performJITMemcpy(address, buffer, sizeof(int) * 3);
 
         if (flush)
             cacheFlush(address, sizeof(int) * 3);
@@ -2675,13 +2650,15 @@ public:
         ASSERT_UNUSED(expected, expected && !sf && (opc == MoveWideOp_Z || opc == MoveWideOp_N) && !hw);
         ASSERT(checkMovk<Datasize_32>(address[1], 1, rd));
 
+        int buffer[2];
         if (value >= 0) {
-            address[0] = moveWideImediate(Datasize_32, MoveWideOp_Z, 0, getHalfword(value, 0), rd);
-            address[1] = moveWideImediate(Datasize_32, MoveWideOp_K, 1, getHalfword(value, 1), rd);
+            buffer[0] = moveWideImediate(Datasize_32, MoveWideOp_Z, 0, getHalfword(value, 0), rd);
+            buffer[1] = moveWideImediate(Datasize_32, MoveWideOp_K, 1, getHalfword(value, 1), rd);
         } else {
-            address[0] = moveWideImediate(Datasize_32, MoveWideOp_N, 0, ~getHalfword(value, 0), rd);
-            address[1] = moveWideImediate(Datasize_32, MoveWideOp_K, 1, getHalfword(value, 1), rd);
+            buffer[0] = moveWideImediate(Datasize_32, MoveWideOp_N, 0, ~getHalfword(value, 0), rd);
+            buffer[1] = moveWideImediate(Datasize_32, MoveWideOp_K, 1, getHalfword(value, 1), rd);
         }
+        performJITMemcpy(where, &buffer, sizeof(int) * 2);
 
         cacheFlush(where, sizeof(int) * 2);
     }
@@ -2716,15 +2693,25 @@ public:
         return readPointer(reinterpret_cast<int*>(from) - 4);
     }
 
+    // The static relink, repatch, and replace methods can use can
+    // use |from| for both the write and executable address for call
+    // and jump patching as they're modifying existing (linked) code,
+    // so the address being provided is correct for relative address
+    // computation.
     static void relinkJump(void* from, void* to)
     {
-        relinkJumpOrCall<false>(reinterpret_cast<int*>(from), to);
+        relinkJumpOrCall<false>(reinterpret_cast<int*>(from), reinterpret_cast<const int*>(from), to);
         cacheFlush(from, sizeof(int));
     }
     
+    static void relinkJumpToNop(void* from)
+    {
+        relinkJump(from, static_cast<char*>(from) + 4);
+    }
+    
     static void relinkCall(void* from, void* to)
     {
-        relinkJumpOrCall<true>(reinterpret_cast<int*>(from) - 1, to);
+        relinkJumpOrCall<true>(reinterpret_cast<int*>(from) - 1, reinterpret_cast<const int*>(from) - 1, to);
         cacheFlush(reinterpret_cast<int*>(from) - 1, sizeof(int));
     }
     
@@ -2745,17 +2732,42 @@ public:
             imm12 = encodePositiveImmediate<32>(value);
         else
             imm12 = encodePositiveImmediate<64>(value);
-        *static_cast<int*>(where) = loadStoreRegisterUnsignedImmediate(size, V, opc, imm12, rn, rt);
+        int insn = loadStoreRegisterUnsignedImmediate(size, V, opc, imm12, rn, rt);
+        performJITMemcpy(where, &insn, sizeof(int));
 
         cacheFlush(where, sizeof(int));
     }
 
     unsigned debugOffset() { return m_buffer.debugOffset(); }
 
+#if OS(LINUX) && COMPILER(GCC_OR_CLANG)
+    static inline void linuxPageFlush(uintptr_t begin, uintptr_t end)
+    {
+        __builtin___clear_cache(reinterpret_cast<char*>(begin), reinterpret_cast<char*>(end));
+    }
+#endif
+
     static void cacheFlush(void* code, size_t size)
     {
 #if OS(IOS)
         sys_cache_control(kCacheFunctionPrepareForExecution, code, size);
+#elif OS(LINUX)
+        size_t page = pageSize();
+        uintptr_t current = reinterpret_cast<uintptr_t>(code);
+        uintptr_t end = current + size;
+        uintptr_t firstPageEnd = (current & ~(page - 1)) + page;
+
+        if (end <= firstPageEnd) {
+            linuxPageFlush(current, end);
+            return;
+        }
+
+        linuxPageFlush(current, firstPageEnd);
+
+        for (current = firstPageEnd; current + page < end; current += page)
+            linuxPageFlush(current, current + page);
+
+        linuxPageFlush(current, end);
 #else
 #error "The cacheFlush support is missing on this platform."
 #endif
@@ -2763,20 +2775,20 @@ public:
 
     // Assembler admin methods:
 
-    int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JUMP_ENUM_SIZE(jumpType) - JUMP_ENUM_SIZE(jumpLinkType); }
+    static int jumpSizeDelta(JumpType jumpType, JumpLinkType jumpLinkType) { return JUMP_ENUM_SIZE(jumpType) - JUMP_ENUM_SIZE(jumpLinkType); }
 
     static ALWAYS_INLINE bool linkRecordSourceComparator(const LinkRecord& a, const LinkRecord& b)
     {
         return a.from() < b.from();
     }
 
-    bool canCompact(JumpType jumpType)
+    static bool canCompact(JumpType jumpType)
     {
         // Fixed jumps cannot be compacted
         return (jumpType == JumpNoCondition) || (jumpType == JumpCondition) || (jumpType == JumpCompareAndBranch) || (jumpType == JumpTestBit);
     }
 
-    JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
+    static JumpLinkType computeJumpType(JumpType jumpType, const uint8_t* from, const uint8_t* to)
     {
         switch (jumpType) {
         case JumpFixed:
@@ -2828,51 +2840,43 @@ public:
         return LinkJumpNoCondition;
     }
 
-    JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
+    static JumpLinkType computeJumpType(LinkRecord& record, const uint8_t* from, const uint8_t* to)
     {
         JumpLinkType linkType = computeJumpType(record.type(), from, to);
         record.setLinkType(linkType);
         return linkType;
     }
 
-    void recordLinkOffsets(int32_t regionStart, int32_t regionEnd, int32_t offset)
-    {
-        int32_t ptr = regionStart / sizeof(int32_t);
-        const int32_t end = regionEnd / sizeof(int32_t);
-        int32_t* offsets = static_cast<int32_t*>(m_buffer.data());
-        while (ptr < end)
-            offsets[ptr++] = offset;
-    }
-
     Vector<LinkRecord, 0, UnsafeVectorOverflow>& jumpsToLink()
     {
         std::sort(m_jumpsToLink.begin(), m_jumpsToLink.end(), linkRecordSourceComparator);
         return m_jumpsToLink;
     }
 
-    void ALWAYS_INLINE link(LinkRecord& record, uint8_t* from, uint8_t* to)
+    static void ALWAYS_INLINE link(LinkRecord& record, uint8_t* from, const uint8_t* fromInstruction8, uint8_t* to)
     {
+        const int* fromInstruction = reinterpret_cast<const int*>(fromInstruction8);
         switch (record.linkType()) {
         case LinkJumpNoCondition:
-            linkJumpOrCall<false>(reinterpret_cast<int*>(from), to);
+            linkJumpOrCall<false>(reinterpret_cast<int*>(from), fromInstruction, to);
             break;
         case LinkJumpConditionDirect:
-            linkConditionalBranch<true>(record.condition(), reinterpret_cast<int*>(from), to);
+            linkConditionalBranch<true>(record.condition(), reinterpret_cast<int*>(from), fromInstruction, to);
             break;
         case LinkJumpCondition:
-            linkConditionalBranch<false>(record.condition(), reinterpret_cast<int*>(from) - 1, to);
+            linkConditionalBranch<false>(record.condition(), reinterpret_cast<int*>(from) - 1, fromInstruction - 1, to);
             break;
         case LinkJumpCompareAndBranchDirect:
-            linkCompareAndBranch<true>(record.condition(), record.is64Bit(), record.compareRegister(), reinterpret_cast<int*>(from), to);
+            linkCompareAndBranch<true>(record.condition(), record.is64Bit(), record.compareRegister(), reinterpret_cast<int*>(from), fromInstruction, to);
             break;
         case LinkJumpCompareAndBranch:
-            linkCompareAndBranch<false>(record.condition(), record.is64Bit(), record.compareRegister(), reinterpret_cast<int*>(from) - 1, to);
+            linkCompareAndBranch<false>(record.condition(), record.is64Bit(), record.compareRegister(), reinterpret_cast<int*>(from) - 1, fromInstruction - 1, to);
             break;
         case LinkJumpTestBitDirect:
-            linkTestAndBranch<true>(record.condition(), record.bitNumber(), record.compareRegister(), reinterpret_cast<int*>(from), to);
+            linkTestAndBranch<true>(record.condition(), record.bitNumber(), record.compareRegister(), reinterpret_cast<int*>(from), fromInstruction, to);
             break;
         case LinkJumpTestBit:
-            linkTestAndBranch<false>(record.condition(), record.bitNumber(), record.compareRegister(), reinterpret_cast<int*>(from) - 1, to);
+            linkTestAndBranch<false>(record.condition(), record.bitNumber(), record.compareRegister(), reinterpret_cast<int*>(from) - 1, fromInstruction - 1, to);
             break;
         default:
             ASSERT_NOT_REACHED();
@@ -2914,7 +2918,7 @@ private:
     }
 
     template<bool isCall>
-    static void linkJumpOrCall(int* from, void* to)
+    static void linkJumpOrCall(int* from, const int* fromInstruction, void* to)
     {
         bool link;
         int imm26;
@@ -2924,60 +2928,69 @@ private:
         ASSERT_UNUSED(isCall, (link == isCall) || disassembleNop(from));
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 3));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 3));
-        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from)) >> 2;
+        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(fromInstruction)) >> 2;
         ASSERT(static_cast<int>(offset) == offset);
 
-        *from = unconditionalBranchImmediate(isCall, static_cast<int>(offset));
+        int insn = unconditionalBranchImmediate(isCall, static_cast<int>(offset));
+        performJITMemcpy(from, &insn, sizeof(int));
     }
 
     template<bool isDirect>
-    static void linkCompareAndBranch(Condition condition, bool is64Bit, RegisterID rt, int* from, void* to)
+    static void linkCompareAndBranch(Condition condition, bool is64Bit, RegisterID rt, int* from, const int* fromInstruction, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 3));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 3));
-        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from)) >> 2;
+        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(fromInstruction)) >> 2;
         ASSERT(((offset << 38) >> 38) == offset);
 
         bool useDirect = ((offset << 45) >> 45) == offset; // Fits in 19 bits
         ASSERT(!isDirect || useDirect);
 
         if (useDirect || isDirect) {
-            *from = compareAndBranchImmediate(is64Bit ? Datasize_64 : Datasize_32, condition == ConditionNE, static_cast<int>(offset), rt);
-            if (!isDirect)
-                *(from + 1) = nopPseudo();
+            int insn = compareAndBranchImmediate(is64Bit ? Datasize_64 : Datasize_32, condition == ConditionNE, static_cast<int>(offset), rt);
+            performJITMemcpy(from, &insn, sizeof(int));
+            if (!isDirect) {
+                insn = nopPseudo();
+                performJITMemcpy(from + 1, &insn, sizeof(int));
+            }
         } else {
-            *from = compareAndBranchImmediate(is64Bit ? Datasize_64 : Datasize_32, invert(condition) == ConditionNE, 2, rt);
-            linkJumpOrCall<false>(from + 1, to);
+            int insn = compareAndBranchImmediate(is64Bit ? Datasize_64 : Datasize_32, invert(condition) == ConditionNE, 2, rt);
+            performJITMemcpy(from, &insn, sizeof(int));
+            linkJumpOrCall<false>(from + 1, fromInstruction + 1, to);
         }
     }
 
     template<bool isDirect>
-    static void linkConditionalBranch(Condition condition, int* from, void* to)
+    static void linkConditionalBranch(Condition condition, int* from, const int* fromInstruction, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 3));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 3));
-        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from)) >> 2;
+        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(fromInstruction)) >> 2;
         ASSERT(((offset << 38) >> 38) == offset);
 
         bool useDirect = ((offset << 45) >> 45) == offset; // Fits in 19 bits
         ASSERT(!isDirect || useDirect);
 
         if (useDirect || isDirect) {
-            *from = conditionalBranchImmediate(static_cast<int>(offset), condition);
-            if (!isDirect)
-                *(from + 1) = nopPseudo();
+            int insn = conditionalBranchImmediate(static_cast<int>(offset), condition);
+            performJITMemcpy(from, &insn, sizeof(int));
+            if (!isDirect) {
+                insn = nopPseudo();
+                performJITMemcpy(from + 1, &insn, sizeof(int));
+            }
         } else {
-            *from = conditionalBranchImmediate(2, invert(condition));
-            linkJumpOrCall<false>(from + 1, to);
+            int insn = conditionalBranchImmediate(2, invert(condition));
+            performJITMemcpy(from, &insn, sizeof(int));
+            linkJumpOrCall<false>(from + 1, fromInstruction + 1, to);
         }
     }
 
     template<bool isDirect>
-    static void linkTestAndBranch(Condition condition, unsigned bitNumber, RegisterID rt, int* from, void* to)
+    static void linkTestAndBranch(Condition condition, unsigned bitNumber, RegisterID rt, int* from, const int* fromInstruction, void* to)
     {
         ASSERT(!(reinterpret_cast<intptr_t>(from) & 3));
         ASSERT(!(reinterpret_cast<intptr_t>(to) & 3));
-        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(from)) >> 2;
+        intptr_t offset = (reinterpret_cast<intptr_t>(to) - reinterpret_cast<intptr_t>(fromInstruction)) >> 2;
         ASSERT(static_cast<int>(offset) == offset);
         ASSERT(((offset << 38) >> 38) == offset);
 
@@ -2985,17 +2998,21 @@ private:
         ASSERT(!isDirect || useDirect);
 
         if (useDirect || isDirect) {
-            *from = testAndBranchImmediate(condition == ConditionNE, static_cast<int>(bitNumber), static_cast<int>(offset), rt);
-            if (!isDirect)
-                *(from + 1) = nopPseudo();
+            int insn = testAndBranchImmediate(condition == ConditionNE, static_cast<int>(bitNumber), static_cast<int>(offset), rt);
+            performJITMemcpy(from, &insn, sizeof(int));
+            if (!isDirect) {
+                insn = nopPseudo();
+                performJITMemcpy(from + 1, &insn, sizeof(int));
+            }
         } else {
-            *from = testAndBranchImmediate(invert(condition) == ConditionNE, static_cast<int>(bitNumber), 2, rt);
-            linkJumpOrCall<false>(from + 1, to);
+            int insn = testAndBranchImmediate(invert(condition) == ConditionNE, static_cast<int>(bitNumber), 2, rt);
+            performJITMemcpy(from, &insn, sizeof(int));
+            linkJumpOrCall<false>(from + 1, fromInstruction + 1, to);
         }
     }
 
     template<bool isCall>
-    static void relinkJumpOrCall(int* from, void* to)
+    static void relinkJumpOrCall(int* from, const int* fromInstruction, void* to)
     {
         if (!isCall && disassembleNop(from)) {
             unsigned op01;
@@ -3010,7 +3027,7 @@ private:
                 if (imm19 == 8)
                     condition = invert(condition);
 
-                linkConditionalBranch<false>(condition, from - 1, to);
+                linkConditionalBranch<false>(condition, from - 1, fromInstruction - 1, to);
                 return;
             }
 
@@ -3023,7 +3040,7 @@ private:
                 if (imm19 == 8)
                     op = !op;
 
-                linkCompareAndBranch<false>(op ? ConditionNE : ConditionEQ, opSize == Datasize_64, rt, from - 1, to);
+                linkCompareAndBranch<false>(op ? ConditionNE : ConditionEQ, opSize == Datasize_64, rt, from - 1, fromInstruction - 1, to);
                 return;
             }
 
@@ -3035,12 +3052,12 @@ private:
                 if (imm14 == 8)
                     op = !op;
 
-                linkTestAndBranch<false>(op ? ConditionNE : ConditionEQ, bitNumber, rt, from - 1, to);
+                linkTestAndBranch<false>(op ? ConditionNE : ConditionEQ, bitNumber, rt, from - 1, fromInstruction - 1, to);
                 return;
             }
         }
 
-        linkJumpOrCall<isCall>(from, to);
+        linkJumpOrCall<isCall>(from, fromInstruction, to);
     }
 
     static int* addressOf(void* code, AssemblerLabel label)
@@ -3124,7 +3141,7 @@ private:
         int insn = *static_cast<int*>(address);
         op = (insn >> 24) & 0x1;
         imm14 = (insn << 13) >> 18;
-        bitNumber = static_cast<unsigned>((((insn >> 26) & 0x20)) | ((insn > 19) & 0x1f));
+        bitNumber = static_cast<unsigned>((((insn >> 26) & 0x20)) | ((insn >> 19) & 0x1f));
         rt = static_cast<RegisterID>(insn & 0x1f);
         return (insn & 0x7e000000) == 0x36000000;
         
@@ -3138,8 +3155,18 @@ private:
         return (insn & 0x7c000000) == 0x14000000;
     }
 
-    static int xOrSp(RegisterID reg) { ASSERT(!isZr(reg)); return reg; }
-    static int xOrZr(RegisterID reg) { ASSERT(!isSp(reg)); return reg & 31; }
+    static int xOrSp(RegisterID reg)
+    {
+        ASSERT(!isZr(reg));
+        ASSERT(!isIOS() || reg != ARM64Registers::x18);
+        return reg;
+    }
+    static int xOrZr(RegisterID reg)
+    {
+        ASSERT(!isSp(reg));
+        ASSERT(!isIOS() || reg != ARM64Registers::x18);
+        return reg & 31;
+    }
     static FPRegisterID xOrZrAsFPR(RegisterID reg) { return static_cast<FPRegisterID>(xOrZr(reg)); }
     static int xOrZrOrSp(bool useZr, RegisterID reg) { return useZr ? xOrZr(reg) : xOrSp(reg); }
 
@@ -3326,6 +3353,12 @@ private:
         return (0x1e200800 | M << 31 | S << 29 | type << 22 | rm << 16 | opcode << 12 | rn << 5 | rd);
     }
 
+    ALWAYS_INLINE static int vectorDataProcessingLogical(SIMD3SameLogical uAndSize, FPRegisterID vm, FPRegisterID vn, FPRegisterID vd)
+    {
+        const int Q = 0;
+        return (0xe200400 | Q << 30 | uAndSize << 22 | vm << 16 | SIMD_LogicalOp << 11 | vn << 5 | vd);
+    }
+
     // 'o1' means negate
     ALWAYS_INLINE static int floatingPointDataProcessing3Source(Datasize type, bool o1, FPRegisterID rm, AddOp o2, FPRegisterID ra, FPRegisterID rn, FPRegisterID rd)
     {
@@ -3361,6 +3394,23 @@ private:
     }
 
     // 'V' means vector
+    ALWAYS_INLINE static int loadStoreRegisterPairPostIndex(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, FPRegisterID rt, FPRegisterID rt2)
+    {
+        ASSERT(size < 3);
+        ASSERT(opc == (opc & 1)); // Only load or store, load signed 64 is handled via size.
+        ASSERT(V || (size != MemPairOp_LoadSigned_32) || (opc == MemOp_LOAD)); // There isn't an integer store signed.
+        unsigned immedShiftAmount = memPairOffsetShift(V, size);
+        int imm7 = immediate >> immedShiftAmount;
+        ASSERT((imm7 << immedShiftAmount) == immediate && isInt7(imm7));
+        return (0x28800000 | size << 30 | V << 26 | opc << 22 | (imm7 & 0x7f) << 15 | rt2 << 10 | xOrSp(rn) << 5 | rt);
+    }
+
+    ALWAYS_INLINE static int loadStoreRegisterPairPostIndex(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, RegisterID rt, RegisterID rt2)
+    {
+        return loadStoreRegisterPairPostIndex(size, V, opc, immediate, rn, xOrZrAsFPR(rt), xOrZrAsFPR(rt2));
+    }
+
+    // 'V' means vector
     ALWAYS_INLINE static int loadStoreRegisterPreIndex(MemOpSize size, bool V, MemOp opc, int imm9, RegisterID rn, FPRegisterID rt)
     {
         ASSERT(!(size && V && (opc & 2))); // Maximum vector size is 128 bits.
@@ -3375,6 +3425,57 @@ private:
     }
 
     // 'V' means vector
+    ALWAYS_INLINE static int loadStoreRegisterPairPreIndex(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, FPRegisterID rt, FPRegisterID rt2)
+    {
+        ASSERT(size < 3);
+        ASSERT(opc == (opc & 1)); // Only load or store, load signed 64 is handled via size.
+        ASSERT(V || (size != MemPairOp_LoadSigned_32) || (opc == MemOp_LOAD)); // There isn't an integer store signed.
+        unsigned immedShiftAmount = memPairOffsetShift(V, size);
+        int imm7 = immediate >> immedShiftAmount;
+        ASSERT((imm7 << immedShiftAmount) == immediate && isInt7(imm7));
+        return (0x29800000 | size << 30 | V << 26 | opc << 22 | (imm7 & 0x7f) << 15 | rt2 << 10 | xOrSp(rn) << 5 | rt);
+    }
+
+    ALWAYS_INLINE static int loadStoreRegisterPairPreIndex(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, RegisterID rt, RegisterID rt2)
+    {
+        return loadStoreRegisterPairPreIndex(size, V, opc, immediate, rn, xOrZrAsFPR(rt), xOrZrAsFPR(rt2));
+    }
+
+    // 'V' means vector
+    ALWAYS_INLINE static int loadStoreRegisterPairOffset(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, FPRegisterID rt, FPRegisterID rt2)
+    {
+        ASSERT(size < 3);
+        ASSERT(opc == (opc & 1)); // Only load or store, load signed 64 is handled via size.
+        ASSERT(V || (size != MemPairOp_LoadSigned_32) || (opc == MemOp_LOAD)); // There isn't an integer store signed.
+        unsigned immedShiftAmount = memPairOffsetShift(V, size);
+        int imm7 = immediate >> immedShiftAmount;
+        ASSERT((imm7 << immedShiftAmount) == immediate && isInt7(imm7));
+        return (0x29000000 | size << 30 | V << 26 | opc << 22 | (imm7 & 0x7f) << 15 | rt2 << 10 | xOrSp(rn) << 5 | rt);
+    }
+
+    ALWAYS_INLINE static int loadStoreRegisterPairOffset(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, RegisterID rt, RegisterID rt2)
+    {
+        return loadStoreRegisterPairOffset(size, V, opc, immediate, rn, xOrZrAsFPR(rt), xOrZrAsFPR(rt2));
+    }
+
+    // 'V' means vector
+    ALWAYS_INLINE static int loadStoreRegisterPairNonTemporal(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, FPRegisterID rt, FPRegisterID rt2)
+    {
+        ASSERT(size < 3);
+        ASSERT(opc == (opc & 1)); // Only load or store, load signed 64 is handled via size.
+        ASSERT(V || (size != MemPairOp_LoadSigned_32) || (opc == MemOp_LOAD)); // There isn't an integer store signed.
+        unsigned immedShiftAmount = memPairOffsetShift(V, size);
+        int imm7 = immediate >> immedShiftAmount;
+        ASSERT((imm7 << immedShiftAmount) == immediate && isInt7(imm7));
+        return (0x28000000 | size << 30 | V << 26 | opc << 22 | (imm7 & 0x7f) << 15 | rt2 << 10 | xOrSp(rn) << 5 | rt);
+    }
+
+    ALWAYS_INLINE static int loadStoreRegisterPairNonTemporal(MemPairOpSize size, bool V, MemOp opc, int immediate, RegisterID rn, RegisterID rt, RegisterID rt2)
+    {
+        return loadStoreRegisterPairNonTemporal(size, V, opc, immediate, rn, xOrZrAsFPR(rt), xOrZrAsFPR(rt2));
+    }
+
+    // 'V' means vector
     // 'S' means shift rm
     ALWAYS_INLINE static int loadStoreRegisterRegisterOffset(MemOpSize size, bool V, MemOp opc, RegisterID rm, ExtendType option, bool S, RegisterID rn, FPRegisterID rt)
     {
@@ -3488,6 +3589,37 @@ private:
         return (0xd6000000 | opc << 21 | op2 << 16 | op3 << 10 | xOrZr(rn) << 5 | op4);
     }
 
+    // Workaround for Cortex-A53 erratum (835769). Emit an extra nop if the
+    // last instruction in the buffer is a load, store or prefetch. Needed
+    // before 64-bit multiply-accumulate instructions.
+    template<int datasize>
+    ALWAYS_INLINE void nopCortexA53Fix835769()
+    {
+#if CPU(ARM64_CORTEXA53)
+        CHECK_DATASIZE();
+        if (datasize == 64) {
+            if (LIKELY(m_buffer.codeSize() >= sizeof(int32_t))) {
+                // From ARMv8 Reference Manual, Section C4.1: the encoding of the
+                // instructions in the Loads and stores instruction group is:
+                // ---- 1-0- ---- ---- ---- ---- ---- ----
+                if (UNLIKELY((*reinterpret_cast_ptr<int32_t*>(reinterpret_cast_ptr<char*>(m_buffer.data()) + m_buffer.codeSize() - sizeof(int32_t)) & 0x0a000000) == 0x08000000))
+                    nop();
+            }
+        }
+#endif
+    }
+
+    // Workaround for Cortex-A53 erratum (843419). Emit extra nops to avoid
+    // wrong address access after ADRP instruction.
+    ALWAYS_INLINE void nopCortexA53Fix843419()
+    {
+#if CPU(ARM64_CORTEXA53)
+        nop();
+        nop();
+        nop();
+#endif
+    }
+
     AssemblerBuffer m_buffer;
     Vector<LinkRecord, 0, UnsafeVectorOverflow> m_jumpsToLink;
     int m_indexOfLastWatchpoint;
@@ -3505,5 +3637,3 @@ private:
 #undef CHECK_FP_MEMOP_DATASIZE
 
 #endif // ENABLE(ASSEMBLER) && CPU(ARM64)
-
-#endif // ARM64Assembler_h
author	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
committer	Lorry Tar Creator <lorry-tar-importer@lorry>	2017-06-27 06:07:23 +0000
commit	1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c (patch)
tree	46dcd36c86e7fbc6e5df36deb463b33e9967a6f7 /Source/JavaScriptCore/assembler/ARM64Assembler.h
parent	32761a6cee1d0dee366b885b7b9c777e67885688 (diff)
download	WebKitGtk-tarball-1bf1084f2b10c3b47fd1a588d85d21ed0eb41d0c.tar.gz