diff options
Diffstat (limited to 'Source/JavaScriptCore/b3/air')
80 files changed, 17220 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/b3/air/AirAllocateStack.cpp b/Source/JavaScriptCore/b3/air/AirAllocateStack.cpp new file mode 100644 index 000000000..de9297f26 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirAllocateStack.cpp @@ -0,0 +1,308 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirAllocateStack.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" +#include "AirPhaseScope.h" +#include "StackAlignment.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +const bool verbose = false; + +bool attemptAssignment( + StackSlot* slot, intptr_t offsetFromFP, const Vector<StackSlot*>& otherSlots) +{ + if (verbose) + dataLog("Attempting to assign ", pointerDump(slot), " to ", offsetFromFP, " with interference ", pointerListDump(otherSlots), "\n"); + + // Need to align it to the slot's desired alignment. + offsetFromFP = -WTF::roundUpToMultipleOf(slot->alignment(), -offsetFromFP); + + for (StackSlot* otherSlot : otherSlots) { + if (!otherSlot->offsetFromFP()) + continue; + bool overlap = WTF::rangesOverlap( + offsetFromFP, + offsetFromFP + static_cast<intptr_t>(slot->byteSize()), + otherSlot->offsetFromFP(), + otherSlot->offsetFromFP() + static_cast<intptr_t>(otherSlot->byteSize())); + if (overlap) + return false; + } + + if (verbose) + dataLog("Assigned ", pointerDump(slot), " to ", offsetFromFP, "\n"); + slot->setOffsetFromFP(offsetFromFP); + return true; +} + +void assign(StackSlot* slot, const Vector<StackSlot*>& otherSlots) +{ + if (verbose) + dataLog("Attempting to assign ", pointerDump(slot), " with interference ", pointerListDump(otherSlots), "\n"); + + if (attemptAssignment(slot, -static_cast<intptr_t>(slot->byteSize()), otherSlots)) + return; + + for (StackSlot* otherSlot : otherSlots) { + if (!otherSlot->offsetFromFP()) + continue; + bool didAssign = attemptAssignment( + slot, otherSlot->offsetFromFP() - static_cast<intptr_t>(slot->byteSize()), otherSlots); + if (didAssign) + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +} // anonymous namespace + +void allocateStack(Code& code) +{ + PhaseScope phaseScope(code, "allocateStack"); + + // Allocate all of the escaped slots in order. This is kind of a crazy algorithm to allow for + // the possibility of stack slots being assigned frame offsets before we even get here. + ASSERT(!code.frameSize()); + Vector<StackSlot*> assignedEscapedStackSlots; + Vector<StackSlot*> escapedStackSlotsWorklist; + for (StackSlot* slot : code.stackSlots()) { + if (slot->isLocked()) { + if (slot->offsetFromFP()) + assignedEscapedStackSlots.append(slot); + else + escapedStackSlotsWorklist.append(slot); + } else { + // It would be super strange to have an unlocked stack slot that has an offset already. + ASSERT(!slot->offsetFromFP()); + } + } + // This is a fairly expensive loop, but it's OK because we'll usually only have a handful of + // escaped stack slots. + while (!escapedStackSlotsWorklist.isEmpty()) { + StackSlot* slot = escapedStackSlotsWorklist.takeLast(); + assign(slot, assignedEscapedStackSlots); + assignedEscapedStackSlots.append(slot); + } + + // Now we handle the spill slots. + StackSlotLiveness liveness(code); + IndexMap<StackSlot, HashSet<StackSlot*>> interference(code.stackSlots().size()); + Vector<StackSlot*> slots; + + for (BasicBlock* block : code) { + StackSlotLiveness::LocalCalc localCalc(liveness, block); + + auto interfere = [&] (unsigned instIndex) { + if (verbose) + dataLog("Interfering: ", WTF::pointerListDump(localCalc.live()), "\n"); + + Inst::forEachDef<Arg>( + block->get(instIndex), block->get(instIndex + 1), + [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { + if (!arg.isStack()) + return; + StackSlot* slot = arg.stackSlot(); + if (slot->kind() != StackSlotKind::Spill) + return; + + for (StackSlot* otherSlot : localCalc.live()) { + interference[slot].add(otherSlot); + interference[otherSlot].add(slot); + } + }); + }; + + for (unsigned instIndex = block->size(); instIndex--;) { + if (verbose) + dataLog("Analyzing: ", block->at(instIndex), "\n"); + + // Kill dead stores. For simplicity we say that a store is killable if it has only late + // defs and those late defs are to things that are dead right now. We only do that + // because that's the only kind of dead stack store we will see here. + Inst& inst = block->at(instIndex); + if (!inst.hasNonArgEffects()) { + bool ok = true; + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { + if (Arg::isEarlyDef(role)) { + ok = false; + return; + } + if (!Arg::isLateDef(role)) + return; + if (!arg.isStack()) { + ok = false; + return; + } + StackSlot* slot = arg.stackSlot(); + if (slot->kind() != StackSlotKind::Spill) { + ok = false; + return; + } + + if (localCalc.isLive(slot)) { + ok = false; + return; + } + }); + if (ok) + inst = Inst(); + } + + interfere(instIndex); + localCalc.execute(instIndex); + } + interfere(-1); + + block->insts().removeAllMatching( + [&] (const Inst& inst) -> bool { + return !inst; + }); + } + + if (verbose) { + for (StackSlot* slot : code.stackSlots()) + dataLog("Interference of ", pointerDump(slot), ": ", pointerListDump(interference[slot]), "\n"); + } + + // Now we assign stack locations. At its heart this algorithm is just first-fit. For each + // StackSlot we just want to find the offsetFromFP that is closest to zero while ensuring no + // overlap with other StackSlots that this overlaps with. + Vector<StackSlot*> otherSlots = assignedEscapedStackSlots; + for (StackSlot* slot : code.stackSlots()) { + if (slot->offsetFromFP()) { + // Already assigned an offset. + continue; + } + + HashSet<StackSlot*>& interferingSlots = interference[slot]; + otherSlots.resize(assignedEscapedStackSlots.size()); + otherSlots.resize(assignedEscapedStackSlots.size() + interferingSlots.size()); + unsigned nextIndex = assignedEscapedStackSlots.size(); + for (StackSlot* otherSlot : interferingSlots) + otherSlots[nextIndex++] = otherSlot; + + assign(slot, otherSlots); + } + + // Figure out how much stack we're using for stack slots. + unsigned frameSizeForStackSlots = 0; + for (StackSlot* slot : code.stackSlots()) { + frameSizeForStackSlots = std::max( + frameSizeForStackSlots, + static_cast<unsigned>(-slot->offsetFromFP())); + } + + frameSizeForStackSlots = WTF::roundUpToMultipleOf(stackAlignmentBytes(), frameSizeForStackSlots); + + // Now we need to deduce how much argument area we need. + for (BasicBlock* block : code) { + for (Inst& inst : *block) { + for (Arg& arg : inst.args) { + if (arg.isCallArg()) { + // For now, we assume that we use 8 bytes of the call arg. But that's not + // such an awesome assumption. + // FIXME: https://bugs.webkit.org/show_bug.cgi?id=150454 + ASSERT(arg.offset() >= 0); + code.requestCallArgAreaSizeInBytes(arg.offset() + 8); + } + } + } + } + + code.setFrameSize(frameSizeForStackSlots + code.callArgAreaSizeInBytes()); + + // Finally, transform the code to use Addr's instead of StackSlot's. This is a lossless + // transformation since we can search the StackSlots array to figure out which StackSlot any + // offset-from-FP refers to. + + // FIXME: This may produce addresses that aren't valid if we end up with a ginormous stack frame. + // We would have to scavenge for temporaries if this happened. Fortunately, this case will be + // extremely rare so we can do crazy things when it arises. + // https://bugs.webkit.org/show_bug.cgi?id=152530 + + InsertionSet insertionSet(code); + for (BasicBlock* block : code) { + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) { + auto stackAddr = [&] (int32_t offset) -> Arg { + return Arg::stackAddr(offset, code.frameSize(), width); + }; + + switch (arg.kind()) { + case Arg::Stack: { + StackSlot* slot = arg.stackSlot(); + if (Arg::isZDef(role) + && slot->kind() == StackSlotKind::Spill + && slot->byteSize() > Arg::bytes(width)) { + // Currently we only handle this simple case because it's the only one + // that arises: ZDef's are only 32-bit right now. So, when we hit these + // assertions it means that we need to implement those other kinds of + // zero fills. + RELEASE_ASSERT(slot->byteSize() == 8); + RELEASE_ASSERT(width == Arg::Width32); + + RELEASE_ASSERT(isValidForm(StoreZero32, Arg::Stack)); + insertionSet.insert( + instIndex + 1, StoreZero32, inst.origin, + stackAddr(arg.offset() + 4 + slot->offsetFromFP())); + } + arg = stackAddr(arg.offset() + slot->offsetFromFP()); + break; + } + case Arg::CallArg: + arg = stackAddr(arg.offset() - code.frameSize()); + break; + default: + break; + } + } + ); + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + + diff --git a/Source/JavaScriptCore/b3/air/AirAllocateStack.h b/Source/JavaScriptCore/b3/air/AirAllocateStack.h new file mode 100644 index 000000000..31519d246 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirAllocateStack.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This allocates StackSlots to places on the stack. It first allocates the pinned ones in index +// order and then it allocates the rest using first fit. Takes the opportunity to kill dead +// assignments to stack slots, since it knows which ones are live. Also fixes ZDefs to anonymous +// stack slots. + +void allocateStack(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirArg.cpp b/Source/JavaScriptCore/b3/air/AirArg.cpp new file mode 100644 index 000000000..c777928b7 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirArg.cpp @@ -0,0 +1,350 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirArg.h" + +#if ENABLE(B3_JIT) + +#include "AirSpecial.h" +#include "AirStackSlot.h" +#include "B3Value.h" +#include "FPRInfo.h" +#include "GPRInfo.h" + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wreturn-type" +#endif // COMPILER(GCC) && ASSERT_DISABLED + +namespace JSC { namespace B3 { namespace Air { + +bool Arg::isStackMemory() const +{ + switch (kind()) { + case Addr: + return base() == Air::Tmp(GPRInfo::callFrameRegister) + || base() == Air::Tmp(MacroAssembler::stackPointerRegister); + case Stack: + case CallArg: + return true; + default: + return false; + } +} + +bool Arg::isRepresentableAs(Width width, Signedness signedness) const +{ + return isRepresentableAs(width, signedness, value()); +} + +bool Arg::usesTmp(Air::Tmp tmp) const +{ + bool uses = false; + const_cast<Arg*>(this)->forEachTmpFast( + [&] (Air::Tmp otherTmp) { + if (otherTmp == tmp) + uses = true; + }); + return uses; +} + +bool Arg::canRepresent(Value* value) const +{ + return isType(typeForB3Type(value->type())); +} + +bool Arg::isCompatibleType(const Arg& other) const +{ + if (hasType()) + return other.isType(type()); + if (other.hasType()) + return isType(other.type()); + return true; +} + +unsigned Arg::jsHash() const +{ + unsigned result = static_cast<unsigned>(m_kind); + + switch (m_kind) { + case Invalid: + case Special: + break; + case Tmp: + result += m_base.internalValue(); + break; + case Imm: + case BitImm: + case CallArg: + case RelCond: + case ResCond: + case DoubleCond: + case WidthArg: + result += static_cast<unsigned>(m_offset); + break; + case BigImm: + case BitImm64: + result += static_cast<unsigned>(m_offset); + result += static_cast<unsigned>(m_offset >> 32); + break; + case Addr: + result += m_offset; + result += m_base.internalValue(); + break; + case Index: + result += static_cast<unsigned>(m_offset); + result += m_scale; + result += m_base.internalValue(); + result += m_index.internalValue(); + break; + case Stack: + result += static_cast<unsigned>(m_scale); + result += stackSlot()->index(); + break; + } + + return result; +} + +void Arg::dump(PrintStream& out) const +{ + switch (m_kind) { + case Invalid: + out.print("<invalid>"); + return; + case Tmp: + out.print(tmp()); + return; + case Imm: + out.print("$", m_offset); + return; + case BigImm: + out.printf("$0x%llx", static_cast<long long unsigned>(m_offset)); + return; + case BitImm: + out.print("$", m_offset); + return; + case BitImm64: + out.printf("$0x%llx", static_cast<long long unsigned>(m_offset)); + return; + case Addr: + if (offset()) + out.print(offset()); + out.print("(", base(), ")"); + return; + case Index: + if (offset()) + out.print(offset()); + out.print("(", base(), ",", index()); + if (scale() != 1) + out.print(",", scale()); + out.print(")"); + return; + case Stack: + if (offset()) + out.print(offset()); + out.print("(", pointerDump(stackSlot()), ")"); + return; + case CallArg: + if (offset()) + out.print(offset()); + out.print("(callArg)"); + return; + case RelCond: + out.print(asRelationalCondition()); + return; + case ResCond: + out.print(asResultCondition()); + return; + case DoubleCond: + out.print(asDoubleCondition()); + return; + case Special: + out.print(pointerDump(special())); + return; + case WidthArg: + out.print(width()); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +} } } // namespace JSC::B3::Air + +namespace WTF { + +using namespace JSC::B3::Air; + +void printInternal(PrintStream& out, Arg::Kind kind) +{ + switch (kind) { + case Arg::Invalid: + out.print("Invalid"); + return; + case Arg::Tmp: + out.print("Tmp"); + return; + case Arg::Imm: + out.print("Imm"); + return; + case Arg::BigImm: + out.print("BigImm"); + return; + case Arg::BitImm: + out.print("BitImm"); + return; + case Arg::BitImm64: + out.print("BitImm64"); + return; + case Arg::Addr: + out.print("Addr"); + return; + case Arg::Stack: + out.print("Stack"); + return; + case Arg::CallArg: + out.print("CallArg"); + return; + case Arg::Index: + out.print("Index"); + return; + case Arg::RelCond: + out.print("RelCond"); + return; + case Arg::ResCond: + out.print("ResCond"); + return; + case Arg::DoubleCond: + out.print("DoubleCond"); + return; + case Arg::Special: + out.print("Special"); + return; + case Arg::WidthArg: + out.print("WidthArg"); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +void printInternal(PrintStream& out, Arg::Role role) +{ + switch (role) { + case Arg::Use: + out.print("Use"); + return; + case Arg::Def: + out.print("Def"); + return; + case Arg::UseDef: + out.print("UseDef"); + return; + case Arg::ZDef: + out.print("ZDef"); + return; + case Arg::UseZDef: + out.print("UseZDef"); + return; + case Arg::UseAddr: + out.print("UseAddr"); + return; + case Arg::ColdUse: + out.print("ColdUse"); + return; + case Arg::LateUse: + out.print("LateUse"); + return; + case Arg::LateColdUse: + out.print("LateColdUse"); + return; + case Arg::EarlyDef: + out.print("EarlyDef"); + return; + case Arg::Scratch: + out.print("Scratch"); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +void printInternal(PrintStream& out, Arg::Type type) +{ + switch (type) { + case Arg::GP: + out.print("GP"); + return; + case Arg::FP: + out.print("FP"); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +void printInternal(PrintStream& out, Arg::Width width) +{ + switch (width) { + case Arg::Width8: + out.print("8"); + return; + case Arg::Width16: + out.print("16"); + return; + case Arg::Width32: + out.print("32"); + return; + case Arg::Width64: + out.print("64"); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +void printInternal(PrintStream& out, Arg::Signedness signedness) +{ + switch (signedness) { + case Arg::Signed: + out.print("Signed"); + return; + case Arg::Unsigned: + out.print("Unsigned"); + return; + } + + RELEASE_ASSERT_NOT_REACHED(); +} + +} // namespace WTF + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic pop +#endif // COMPILER(GCC) && ASSERT_DISABLED + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirArg.h b/Source/JavaScriptCore/b3/air/AirArg.h new file mode 100644 index 000000000..13db1ce7e --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirArg.h @@ -0,0 +1,1383 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirTmp.h" +#include "B3Common.h" +#include "B3Type.h" +#include <wtf/Optional.h> + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wreturn-type" +#endif // COMPILER(GCC) && ASSERT_DISABLED + +namespace JSC { namespace B3 { + +class Value; + +namespace Air { + +class Special; +class StackSlot; + +// This class name is also intentionally terse because we will say it a lot. You'll see code like +// Inst(..., Arg::imm(5), Arg::addr(thing, blah), ...) +class Arg { +public: + // These enum members are intentionally terse because we have to mention them a lot. + enum Kind : int8_t { + Invalid, + + // This is either an unassigned temporary or a register. All unassigned temporaries + // eventually become registers. + Tmp, + + // This is an immediate that the instruction will materialize. Imm is the immediate that can be + // inlined into most instructions, while BigImm indicates a constant materialization and is + // usually only usable with Move. Specials may also admit it, for example for stackmaps used for + // OSR exit and tail calls. + // BitImm is an immediate for Bitwise operation (And, Xor, etc). + Imm, + BigImm, + BitImm, + BitImm64, + + // These are the addresses. Instructions may load from (Use), store to (Def), or evaluate + // (UseAddr) addresses. + Addr, + Stack, + CallArg, + Index, + + // Immediate operands that customize the behavior of an operation. You can think of them as + // secondary opcodes. They are always "Use"'d. + RelCond, + ResCond, + DoubleCond, + Special, + WidthArg + }; + + enum Role : int8_t { + // Use means that the Inst will read from this value before doing anything else. + // + // For Tmp: The Inst will read this Tmp. + // For Arg::addr and friends: The Inst will load from this address. + // For Arg::imm and friends: The Inst will materialize and use this immediate. + // For RelCond/ResCond/Special: This is the only valid role for these kinds. + // + // Note that Use of an address does not mean escape. It only means that the instruction will + // load from the address before doing anything else. This is a bit tricky; for example + // Specials could theoretically squirrel away the address and effectively escape it. However, + // this is not legal. On the other hand, any address other than Stack is presumed to be + // always escaping, and Stack is presumed to be always escaping if it's Locked. + Use, + + // Exactly like Use, except that it also implies that the use is cold: that is, replacing the + // use with something on the stack is free. + ColdUse, + + // LateUse means that the Inst will read from this value after doing its Def's. Note that LateUse + // on an Addr or Index still means Use on the internal temporaries. Note that specifying the + // same Tmp once as Def and once as LateUse has undefined behavior: the use may happen before + // the def, or it may happen after it. + LateUse, + + // Combination of LateUse and ColdUse. + LateColdUse, + + // Def means that the Inst will write to this value after doing everything else. + // + // For Tmp: The Inst will write to this Tmp. + // For Arg::addr and friends: The Inst will store to this address. + // This isn't valid for any other kinds. + // + // Like Use of address, Def of address does not mean escape. + Def, + + // This is a special variant of Def that implies that the upper bits of the target register are + // zero-filled. Specifically, if the Width of a ZDef is less than the largest possible width of + // the argument (for example, we're on a 64-bit machine and we have a Width32 ZDef of a GPR) then + // this has different implications for the upper bits (i.e. the top 32 bits in our example) + // depending on the kind of the argument: + // + // For register: the upper bits are zero-filled. + // For anonymous stack slot: the upper bits are zero-filled. + // For address: the upper bits are not touched (i.e. we do a 32-bit store in our example). + // For tmp: either the upper bits are not touched or they are zero-filled, and we won't know + // which until we lower the tmp to either a StackSlot or a Reg. + // + // The behavior of ZDef is consistent with what happens when you perform 32-bit operations on a + // 64-bit GPR. It's not consistent with what happens with 8-bit or 16-bit Defs on x86 GPRs, or + // what happens with float Defs in ARM NEON or X86 SSE. Hence why we have both Def and ZDef. + ZDef, + + // This is a combined Use and Def. It means that both things happen. + UseDef, + + // This is a combined Use and ZDef. It means that both things happen. + UseZDef, + + // This is like Def, but implies that the assignment occurs before the start of the Inst's + // execution rather than after. Note that specifying the same Tmp once as EarlyDef and once + // as Use has undefined behavior: the use may happen before the def, or it may happen after + // it. + EarlyDef, + + // Some instructions need a scratch register. We model this by saying that the temporary is + // defined early and used late. This role implies that. + Scratch, + + // This is a special kind of use that is only valid for addresses. It means that the + // instruction will evaluate the address expression and consume the effective address, but it + // will neither load nor store. This is an escaping use, because now the address may be + // passed along to who-knows-where. Note that this isn't really a Use of the Arg, but it does + // imply that we're Use'ing any registers that the Arg contains. + UseAddr + }; + + enum Type : int8_t { + GP, + FP + }; + + static const unsigned numTypes = 2; + + template<typename Functor> + static void forEachType(const Functor& functor) + { + functor(GP); + functor(FP); + } + + enum Width : int8_t { + Width8, + Width16, + Width32, + Width64 + }; + + static Width pointerWidth() + { + if (sizeof(void*) == 8) + return Width64; + return Width32; + } + + enum Signedness : int8_t { + Signed, + Unsigned + }; + + // Returns true if the Role implies that the Inst will Use the Arg. It's deliberately false for + // UseAddr, since isAnyUse() for an Arg::addr means that we are loading from the address. + static bool isAnyUse(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseDef: + case UseZDef: + case LateUse: + case LateColdUse: + case Scratch: + return true; + case Def: + case ZDef: + case UseAddr: + case EarlyDef: + return false; + } + ASSERT_NOT_REACHED(); + } + + static bool isColdUse(Role role) + { + switch (role) { + case ColdUse: + case LateColdUse: + return true; + case Use: + case UseDef: + case UseZDef: + case LateUse: + case Def: + case ZDef: + case UseAddr: + case Scratch: + case EarlyDef: + return false; + } + ASSERT_NOT_REACHED(); + } + + static bool isWarmUse(Role role) + { + return isAnyUse(role) && !isColdUse(role); + } + + static Role cooled(Role role) + { + switch (role) { + case ColdUse: + case LateColdUse: + case UseDef: + case UseZDef: + case Def: + case ZDef: + case UseAddr: + case Scratch: + case EarlyDef: + return role; + case Use: + return ColdUse; + case LateUse: + return LateColdUse; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will Use the Arg before doing anything else. + static bool isEarlyUse(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseDef: + case UseZDef: + return true; + case Def: + case ZDef: + case UseAddr: + case LateUse: + case LateColdUse: + case Scratch: + case EarlyDef: + return false; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will Use the Arg after doing everything else. + static bool isLateUse(Role role) + { + switch (role) { + case LateUse: + case LateColdUse: + case Scratch: + return true; + case ColdUse: + case Use: + case UseDef: + case UseZDef: + case Def: + case ZDef: + case UseAddr: + case EarlyDef: + return false; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will Def the Arg. + static bool isAnyDef(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseAddr: + case LateUse: + case LateColdUse: + return false; + case Def: + case UseDef: + case ZDef: + case UseZDef: + case EarlyDef: + case Scratch: + return true; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will Def the Arg before start of execution. + static bool isEarlyDef(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseAddr: + case LateUse: + case Def: + case UseDef: + case ZDef: + case UseZDef: + case LateColdUse: + return false; + case EarlyDef: + case Scratch: + return true; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will Def the Arg after the end of execution. + static bool isLateDef(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseAddr: + case LateUse: + case EarlyDef: + case Scratch: + case LateColdUse: + return false; + case Def: + case UseDef: + case ZDef: + case UseZDef: + return true; + } + ASSERT_NOT_REACHED(); + } + + // Returns true if the Role implies that the Inst will ZDef the Arg. + static bool isZDef(Role role) + { + switch (role) { + case Use: + case ColdUse: + case UseAddr: + case LateUse: + case Def: + case UseDef: + case EarlyDef: + case Scratch: + case LateColdUse: + return false; + case ZDef: + case UseZDef: + return true; + } + ASSERT_NOT_REACHED(); + } + + static Type typeForB3Type(B3::Type type) + { + switch (type) { + case Void: + ASSERT_NOT_REACHED(); + return GP; + case Int32: + case Int64: + return GP; + case Float: + case Double: + return FP; + } + ASSERT_NOT_REACHED(); + return GP; + } + + static Width widthForB3Type(B3::Type type) + { + switch (type) { + case Void: + ASSERT_NOT_REACHED(); + return Width8; + case Int32: + case Float: + return Width32; + case Int64: + case Double: + return Width64; + } + ASSERT_NOT_REACHED(); + } + + static Width conservativeWidth(Type type) + { + return type == GP ? pointerWidth() : Width64; + } + + static Width minimumWidth(Type type) + { + return type == GP ? Width8 : Width32; + } + + static unsigned bytes(Width width) + { + return 1 << width; + } + + static Width widthForBytes(unsigned bytes) + { + switch (bytes) { + case 0: + case 1: + return Width8; + case 2: + return Width16; + case 3: + case 4: + return Width32; + default: + return Width64; + } + } + + Arg() + : m_kind(Invalid) + { + } + + Arg(Air::Tmp tmp) + : m_kind(Tmp) + , m_base(tmp) + { + } + + Arg(Reg reg) + : Arg(Air::Tmp(reg)) + { + } + + static Arg imm(int64_t value) + { + Arg result; + result.m_kind = Imm; + result.m_offset = value; + return result; + } + + static Arg bigImm(int64_t value) + { + Arg result; + result.m_kind = BigImm; + result.m_offset = value; + return result; + } + + static Arg bitImm(int64_t value) + { + Arg result; + result.m_kind = BitImm; + result.m_offset = value; + return result; + } + + static Arg bitImm64(int64_t value) + { + Arg result; + result.m_kind = BitImm64; + result.m_offset = value; + return result; + } + + static Arg immPtr(const void* address) + { + return bigImm(bitwise_cast<intptr_t>(address)); + } + + static Arg addr(Air::Tmp base, int32_t offset = 0) + { + ASSERT(base.isGP()); + Arg result; + result.m_kind = Addr; + result.m_base = base; + result.m_offset = offset; + return result; + } + + static Arg stack(StackSlot* value, int32_t offset = 0) + { + Arg result; + result.m_kind = Stack; + result.m_offset = bitwise_cast<intptr_t>(value); + result.m_scale = offset; // I know, yuck. + return result; + } + + static Arg callArg(int32_t offset) + { + Arg result; + result.m_kind = CallArg; + result.m_offset = offset; + return result; + } + + static Arg stackAddr(int32_t offsetFromFP, unsigned frameSize, Width width) + { + Arg result = Arg::addr(Air::Tmp(GPRInfo::callFrameRegister), offsetFromFP); + if (!result.isValidForm(width)) { + result = Arg::addr( + Air::Tmp(MacroAssembler::stackPointerRegister), + offsetFromFP + frameSize); + } + return result; + } + + // If you don't pass a Width, this optimistically assumes that you're using the right width. + static bool isValidScale(unsigned scale, std::optional<Width> width = std::nullopt) + { + switch (scale) { + case 1: + if (isX86() || isARM64()) + return true; + return false; + case 2: + case 4: + case 8: + if (isX86()) + return true; + if (isARM64()) { + if (!width) + return true; + return scale == 1 || scale == bytes(*width); + } + return false; + default: + return false; + } + } + + static unsigned logScale(unsigned scale) + { + switch (scale) { + case 1: + return 0; + case 2: + return 1; + case 4: + return 2; + case 8: + return 3; + default: + ASSERT_NOT_REACHED(); + return 0; + } + } + + static Arg index(Air::Tmp base, Air::Tmp index, unsigned scale = 1, int32_t offset = 0) + { + ASSERT(base.isGP()); + ASSERT(index.isGP()); + ASSERT(isValidScale(scale)); + Arg result; + result.m_kind = Index; + result.m_base = base; + result.m_index = index; + result.m_scale = static_cast<int32_t>(scale); + result.m_offset = offset; + return result; + } + + static Arg relCond(MacroAssembler::RelationalCondition condition) + { + Arg result; + result.m_kind = RelCond; + result.m_offset = condition; + return result; + } + + static Arg resCond(MacroAssembler::ResultCondition condition) + { + Arg result; + result.m_kind = ResCond; + result.m_offset = condition; + return result; + } + + static Arg doubleCond(MacroAssembler::DoubleCondition condition) + { + Arg result; + result.m_kind = DoubleCond; + result.m_offset = condition; + return result; + } + + static Arg special(Air::Special* special) + { + Arg result; + result.m_kind = Special; + result.m_offset = bitwise_cast<intptr_t>(special); + return result; + } + + static Arg widthArg(Width width) + { + Arg result; + result.m_kind = WidthArg; + result.m_offset = width; + return result; + } + + bool operator==(const Arg& other) const + { + return m_offset == other.m_offset + && m_kind == other.m_kind + && m_base == other.m_base + && m_index == other.m_index + && m_scale == other.m_scale; + } + + bool operator!=(const Arg& other) const + { + return !(*this == other); + } + + explicit operator bool() const { return *this != Arg(); } + + Kind kind() const + { + return m_kind; + } + + bool isTmp() const + { + return kind() == Tmp; + } + + bool isImm() const + { + return kind() == Imm; + } + + bool isBigImm() const + { + return kind() == BigImm; + } + + bool isBitImm() const + { + return kind() == BitImm; + } + + bool isBitImm64() const + { + return kind() == BitImm64; + } + + bool isSomeImm() const + { + switch (kind()) { + case Imm: + case BigImm: + case BitImm: + case BitImm64: + return true; + default: + return false; + } + } + + bool isAddr() const + { + return kind() == Addr; + } + + bool isStack() const + { + return kind() == Stack; + } + + bool isCallArg() const + { + return kind() == CallArg; + } + + bool isIndex() const + { + return kind() == Index; + } + + bool isMemory() const + { + switch (kind()) { + case Addr: + case Stack: + case CallArg: + case Index: + return true; + default: + return false; + } + } + + bool isStackMemory() const; + + bool isRelCond() const + { + return kind() == RelCond; + } + + bool isResCond() const + { + return kind() == ResCond; + } + + bool isDoubleCond() const + { + return kind() == DoubleCond; + } + + bool isCondition() const + { + switch (kind()) { + case RelCond: + case ResCond: + case DoubleCond: + return true; + default: + return false; + } + } + + bool isSpecial() const + { + return kind() == Special; + } + + bool isWidthArg() const + { + return kind() == WidthArg; + } + + bool isAlive() const + { + return isTmp() || isStack(); + } + + Air::Tmp tmp() const + { + ASSERT(kind() == Tmp); + return m_base; + } + + int64_t value() const + { + ASSERT(isSomeImm()); + return m_offset; + } + + template<typename T> + bool isRepresentableAs() const + { + return B3::isRepresentableAs<T>(value()); + } + + static bool isRepresentableAs(Width width, Signedness signedness, int64_t value) + { + switch (signedness) { + case Signed: + switch (width) { + case Width8: + return B3::isRepresentableAs<int8_t>(value); + case Width16: + return B3::isRepresentableAs<int16_t>(value); + case Width32: + return B3::isRepresentableAs<int32_t>(value); + case Width64: + return B3::isRepresentableAs<int64_t>(value); + } + case Unsigned: + switch (width) { + case Width8: + return B3::isRepresentableAs<uint8_t>(value); + case Width16: + return B3::isRepresentableAs<uint16_t>(value); + case Width32: + return B3::isRepresentableAs<uint32_t>(value); + case Width64: + return B3::isRepresentableAs<uint64_t>(value); + } + } + ASSERT_NOT_REACHED(); + } + + bool isRepresentableAs(Width, Signedness) const; + + static int64_t castToType(Width width, Signedness signedness, int64_t value) + { + switch (signedness) { + case Signed: + switch (width) { + case Width8: + return static_cast<int8_t>(value); + case Width16: + return static_cast<int16_t>(value); + case Width32: + return static_cast<int32_t>(value); + case Width64: + return static_cast<int64_t>(value); + } + case Unsigned: + switch (width) { + case Width8: + return static_cast<uint8_t>(value); + case Width16: + return static_cast<uint16_t>(value); + case Width32: + return static_cast<uint32_t>(value); + case Width64: + return static_cast<uint64_t>(value); + } + } + ASSERT_NOT_REACHED(); + } + + template<typename T> + T asNumber() const + { + return static_cast<T>(value()); + } + + void* pointerValue() const + { + ASSERT(kind() == BigImm); + return bitwise_cast<void*>(static_cast<intptr_t>(m_offset)); + } + + Air::Tmp base() const + { + ASSERT(kind() == Addr || kind() == Index); + return m_base; + } + + bool hasOffset() const { return isMemory(); } + + int32_t offset() const + { + if (kind() == Stack) + return static_cast<int32_t>(m_scale); + ASSERT(kind() == Addr || kind() == CallArg || kind() == Index); + return static_cast<int32_t>(m_offset); + } + + StackSlot* stackSlot() const + { + ASSERT(kind() == Stack); + return bitwise_cast<StackSlot*>(m_offset); + } + + Air::Tmp index() const + { + ASSERT(kind() == Index); + return m_index; + } + + unsigned scale() const + { + ASSERT(kind() == Index); + return m_scale; + } + + unsigned logScale() const + { + return logScale(scale()); + } + + Air::Special* special() const + { + ASSERT(kind() == Special); + return bitwise_cast<Air::Special*>(m_offset); + } + + Width width() const + { + ASSERT(kind() == WidthArg); + return static_cast<Width>(m_offset); + } + + bool isGPTmp() const + { + return isTmp() && tmp().isGP(); + } + + bool isFPTmp() const + { + return isTmp() && tmp().isFP(); + } + + // Tells us if this Arg can be used in a position that requires a GP value. + bool isGP() const + { + switch (kind()) { + case Imm: + case BigImm: + case BitImm: + case BitImm64: + case Addr: + case Index: + case Stack: + case CallArg: + case RelCond: + case ResCond: + case DoubleCond: + case Special: + case WidthArg: + return true; + case Tmp: + return isGPTmp(); + case Invalid: + return false; + } + ASSERT_NOT_REACHED(); + } + + // Tells us if this Arg can be used in a position that requires a FP value. + bool isFP() const + { + switch (kind()) { + case Imm: + case BitImm: + case BitImm64: + case RelCond: + case ResCond: + case DoubleCond: + case Special: + case WidthArg: + case Invalid: + return false; + case Addr: + case Index: + case Stack: + case CallArg: + case BigImm: // Yes, we allow BigImm as a double immediate. We use this for implementing stackmaps. + return true; + case Tmp: + return isFPTmp(); + } + ASSERT_NOT_REACHED(); + } + + bool hasType() const + { + switch (kind()) { + case Imm: + case BitImm: + case BitImm64: + case Special: + case Tmp: + return true; + default: + return false; + } + } + + // The type is ambiguous for some arg kinds. Call with care. + Type type() const + { + return isGP() ? GP : FP; + } + + bool isType(Type type) const + { + switch (type) { + case GP: + return isGP(); + case FP: + return isFP(); + } + ASSERT_NOT_REACHED(); + } + + bool canRepresent(Value* value) const; + + bool isCompatibleType(const Arg& other) const; + + bool isGPR() const + { + return isTmp() && tmp().isGPR(); + } + + GPRReg gpr() const + { + return tmp().gpr(); + } + + bool isFPR() const + { + return isTmp() && tmp().isFPR(); + } + + FPRReg fpr() const + { + return tmp().fpr(); + } + + bool isReg() const + { + return isTmp() && tmp().isReg(); + } + + Reg reg() const + { + return tmp().reg(); + } + + unsigned gpTmpIndex() const + { + return tmp().gpTmpIndex(); + } + + unsigned fpTmpIndex() const + { + return tmp().fpTmpIndex(); + } + + unsigned tmpIndex() const + { + return tmp().tmpIndex(); + } + + static bool isValidImmForm(int64_t value) + { + if (isX86()) + return B3::isRepresentableAs<int32_t>(value); + if (isARM64()) + return isUInt12(value); + return false; + } + + static bool isValidBitImmForm(int64_t value) + { + if (isX86()) + return B3::isRepresentableAs<int32_t>(value); + if (isARM64()) + return ARM64LogicalImmediate::create32(value).isValid(); + return false; + } + + static bool isValidBitImm64Form(int64_t value) + { + if (isX86()) + return B3::isRepresentableAs<int32_t>(value); + if (isARM64()) + return ARM64LogicalImmediate::create64(value).isValid(); + return false; + } + + static bool isValidAddrForm(int32_t offset, std::optional<Width> width = std::nullopt) + { + if (isX86()) + return true; + if (isARM64()) { + if (!width) + return true; + + if (isValidSignedImm9(offset)) + return true; + + switch (*width) { + case Width8: + return isValidScaledUImm12<8>(offset); + case Width16: + return isValidScaledUImm12<16>(offset); + case Width32: + return isValidScaledUImm12<32>(offset); + case Width64: + return isValidScaledUImm12<64>(offset); + } + } + return false; + } + + static bool isValidIndexForm(unsigned scale, int32_t offset, std::optional<Width> width = std::nullopt) + { + if (!isValidScale(scale, width)) + return false; + if (isX86()) + return true; + if (isARM64()) + return !offset; + return false; + } + + // If you don't pass a width then this optimistically assumes that you're using the right width. But + // the width is relevant to validity, so passing a null width is only useful for assertions. Don't + // pass null widths when cascading through Args in the instruction selector! + bool isValidForm(std::optional<Width> width = std::nullopt) const + { + switch (kind()) { + case Invalid: + return false; + case Tmp: + return true; + case Imm: + return isValidImmForm(value()); + case BigImm: + return true; + case BitImm: + return isValidBitImmForm(value()); + case BitImm64: + return isValidBitImm64Form(value()); + case Addr: + case Stack: + case CallArg: + return isValidAddrForm(offset(), width); + case Index: + return isValidIndexForm(scale(), offset(), width); + case RelCond: + case ResCond: + case DoubleCond: + case Special: + case WidthArg: + return true; + } + ASSERT_NOT_REACHED(); + } + + template<typename Functor> + void forEachTmpFast(const Functor& functor) + { + switch (m_kind) { + case Tmp: + case Addr: + functor(m_base); + break; + case Index: + functor(m_base); + functor(m_index); + break; + default: + break; + } + } + + bool usesTmp(Air::Tmp tmp) const; + + template<typename Thing> + bool is() const; + + template<typename Thing> + Thing as() const; + + template<typename Thing, typename Functor> + void forEachFast(const Functor&); + + template<typename Thing, typename Functor> + void forEach(Role, Type, Width, const Functor&); + + // This is smart enough to know that an address arg in a Def or UseDef rule will use its + // tmps and never def them. For example, this: + // + // mov %rax, (%rcx) + // + // This defs (%rcx) but uses %rcx. + template<typename Functor> + void forEachTmp(Role argRole, Type argType, Width argWidth, const Functor& functor) + { + switch (m_kind) { + case Tmp: + ASSERT(isAnyUse(argRole) || isAnyDef(argRole)); + functor(m_base, argRole, argType, argWidth); + break; + case Addr: + functor(m_base, Use, GP, argRole == UseAddr ? argWidth : pointerWidth()); + break; + case Index: + functor(m_base, Use, GP, argRole == UseAddr ? argWidth : pointerWidth()); + functor(m_index, Use, GP, argRole == UseAddr ? argWidth : pointerWidth()); + break; + default: + break; + } + } + + MacroAssembler::TrustedImm32 asTrustedImm32() const + { + ASSERT(isImm() || isBitImm()); + return MacroAssembler::TrustedImm32(static_cast<int32_t>(m_offset)); + } + +#if USE(JSVALUE64) + MacroAssembler::TrustedImm64 asTrustedImm64() const + { + ASSERT(isBigImm() || isBitImm64()); + return MacroAssembler::TrustedImm64(value()); + } +#endif + + MacroAssembler::TrustedImmPtr asTrustedImmPtr() const + { + if (is64Bit()) + ASSERT(isBigImm()); + else + ASSERT(isImm()); + return MacroAssembler::TrustedImmPtr(pointerValue()); + } + + MacroAssembler::Address asAddress() const + { + ASSERT(isAddr()); + return MacroAssembler::Address(m_base.gpr(), static_cast<int32_t>(m_offset)); + } + + MacroAssembler::BaseIndex asBaseIndex() const + { + ASSERT(isIndex()); + return MacroAssembler::BaseIndex( + m_base.gpr(), m_index.gpr(), static_cast<MacroAssembler::Scale>(logScale()), + static_cast<int32_t>(m_offset)); + } + + MacroAssembler::RelationalCondition asRelationalCondition() const + { + ASSERT(isRelCond()); + return static_cast<MacroAssembler::RelationalCondition>(m_offset); + } + + MacroAssembler::ResultCondition asResultCondition() const + { + ASSERT(isResCond()); + return static_cast<MacroAssembler::ResultCondition>(m_offset); + } + + MacroAssembler::DoubleCondition asDoubleCondition() const + { + ASSERT(isDoubleCond()); + return static_cast<MacroAssembler::DoubleCondition>(m_offset); + } + + // Tells you if the Arg is invertible. Only condition arguments are invertible, and even for those, there + // are a few exceptions - notably Overflow and Signed. + bool isInvertible() const + { + switch (kind()) { + case RelCond: + case DoubleCond: + return true; + case ResCond: + return MacroAssembler::isInvertible(asResultCondition()); + default: + return false; + } + } + + // This is valid for condition arguments. It will invert them. + Arg inverted(bool inverted = true) const + { + if (!inverted) + return *this; + switch (kind()) { + case RelCond: + return relCond(MacroAssembler::invert(asRelationalCondition())); + case ResCond: + return resCond(MacroAssembler::invert(asResultCondition())); + case DoubleCond: + return doubleCond(MacroAssembler::invert(asDoubleCondition())); + default: + RELEASE_ASSERT_NOT_REACHED(); + return Arg(); + } + } + + Arg flipped(bool flipped = true) const + { + if (!flipped) + return Arg(); + return relCond(MacroAssembler::flip(asRelationalCondition())); + } + + bool isSignedCond() const + { + return isRelCond() && MacroAssembler::isSigned(asRelationalCondition()); + } + + bool isUnsignedCond() const + { + return isRelCond() && MacroAssembler::isUnsigned(asRelationalCondition()); + } + + // This computes a hash for comparing this to JSAir's Arg. + unsigned jsHash() const; + + void dump(PrintStream&) const; + + Arg(WTF::HashTableDeletedValueType) + : m_base(WTF::HashTableDeletedValue) + { + } + + bool isHashTableDeletedValue() const + { + return *this == Arg(WTF::HashTableDeletedValue); + } + + unsigned hash() const + { + // This really doesn't have to be that great. + return WTF::IntHash<int64_t>::hash(m_offset) + m_kind + m_scale + m_base.hash() + + m_index.hash(); + } + +private: + int64_t m_offset { 0 }; + Kind m_kind { Invalid }; + int32_t m_scale { 1 }; + Air::Tmp m_base; + Air::Tmp m_index; +}; + +struct ArgHash { + static unsigned hash(const Arg& key) { return key.hash(); } + static bool equal(const Arg& a, const Arg& b) { return a == b; } + static const bool safeToCompareToEmptyOrDeleted = true; +}; + +} } } // namespace JSC::B3::Air + +namespace WTF { + +JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Arg::Kind); +JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Arg::Role); +JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Arg::Type); +JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Arg::Width); +JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Arg::Signedness); + +template<typename T> struct DefaultHash; +template<> struct DefaultHash<JSC::B3::Air::Arg> { + typedef JSC::B3::Air::ArgHash Hash; +}; + +template<typename T> struct HashTraits; +template<> struct HashTraits<JSC::B3::Air::Arg> : SimpleClassHashTraits<JSC::B3::Air::Arg> { + // Because m_scale is 1 in the empty value. + static const bool emptyValueIsZero = false; +}; + +} // namespace WTF + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic pop +#endif // COMPILER(GCC) && ASSERT_DISABLED + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirArgInlines.h b/Source/JavaScriptCore/b3/air/AirArgInlines.h new file mode 100644 index 000000000..73f7d5bba --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirArgInlines.h @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" + +namespace JSC { namespace B3 { namespace Air { + +template<typename T> struct ArgThingHelper; + +template<> struct ArgThingHelper<Tmp> { + static bool is(const Arg& arg) + { + return arg.isTmp(); + } + + static Tmp as(const Arg& arg) + { + if (is(arg)) + return arg.tmp(); + return Tmp(); + } + + template<typename Functor> + static void forEachFast(Arg& arg, const Functor& functor) + { + arg.forEachTmpFast(functor); + } + + template<typename Functor> + static void forEach(Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width, const Functor& functor) + { + arg.forEachTmp(role, type, width, functor); + } +}; + +template<> struct ArgThingHelper<Arg> { + static bool is(const Arg&) + { + return true; + } + + static Arg as(const Arg& arg) + { + return arg; + } + + template<typename Functor> + static void forEachFast(Arg& arg, const Functor& functor) + { + functor(arg); + } + + template<typename Functor> + static void forEach(Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width, const Functor& functor) + { + functor(arg, role, type, width); + } +}; + +template<> struct ArgThingHelper<StackSlot*> { + static bool is(const Arg& arg) + { + return arg.isStack(); + } + + static StackSlot* as(const Arg& arg) + { + return arg.stackSlot(); + } + + template<typename Functor> + static void forEachFast(Arg& arg, const Functor& functor) + { + if (!arg.isStack()) + return; + + StackSlot* stackSlot = arg.stackSlot(); + functor(stackSlot); + arg = Arg::stack(stackSlot, arg.offset()); + } + + template<typename Functor> + static void forEach(Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width, const Functor& functor) + { + if (!arg.isStack()) + return; + + StackSlot* stackSlot = arg.stackSlot(); + + // FIXME: This is way too optimistic about the meaning of "Def". It gets lucky for + // now because our only use of "Anonymous" stack slots happens to want the optimistic + // semantics. We could fix this by just changing the comments that describe the + // semantics of "Anonymous". + // https://bugs.webkit.org/show_bug.cgi?id=151128 + + functor(stackSlot, role, type, width); + arg = Arg::stack(stackSlot, arg.offset()); + } +}; + +template<> struct ArgThingHelper<Reg> { + static bool is(const Arg& arg) + { + return arg.isReg(); + } + + static Reg as(const Arg& arg) + { + return arg.reg(); + } + + template<typename Functor> + static void forEachFast(Arg& arg, const Functor& functor) + { + arg.forEachTmpFast( + [&] (Tmp& tmp) { + if (!tmp.isReg()) + return; + + Reg reg = tmp.reg(); + functor(reg); + tmp = Tmp(reg); + }); + } + + template<typename Functor> + static void forEach(Arg& arg, Arg::Role argRole, Arg::Type argType, Arg::Width argWidth, const Functor& functor) + { + arg.forEachTmp( + argRole, argType, argWidth, + [&] (Tmp& tmp, Arg::Role role, Arg::Type type, Arg::Width width) { + if (!tmp.isReg()) + return; + + Reg reg = tmp.reg(); + functor(reg, role, type, width); + tmp = Tmp(reg); + }); + } +}; + +template<typename Thing> +bool Arg::is() const +{ + return ArgThingHelper<Thing>::is(*this); +} + +template<typename Thing> +Thing Arg::as() const +{ + return ArgThingHelper<Thing>::as(*this); +} + +template<typename Thing, typename Functor> +void Arg::forEachFast(const Functor& functor) +{ + ArgThingHelper<Thing>::forEachFast(*this, functor); +} + +template<typename Thing, typename Functor> +void Arg::forEach(Role role, Type type, Width width, const Functor& functor) +{ + ArgThingHelper<Thing>::forEach(*this, role, type, width, functor); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirBasicBlock.cpp b/Source/JavaScriptCore/b3/air/AirBasicBlock.cpp new file mode 100644 index 000000000..fa3ad8e4d --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirBasicBlock.cpp @@ -0,0 +1,87 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirBasicBlock.h" + +#if ENABLE(B3_JIT) + +#include "B3BasicBlockUtils.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +const char* const BasicBlock::dumpPrefix = "#"; + +bool BasicBlock::addPredecessor(BasicBlock* block) +{ + return B3::addPredecessor(this, block); +} + +bool BasicBlock::removePredecessor(BasicBlock* block) +{ + return B3::removePredecessor(this, block); +} + +bool BasicBlock::replacePredecessor(BasicBlock* from, BasicBlock* to) +{ + return B3::replacePredecessor(this, from, to); +} + +void BasicBlock::dump(PrintStream& out) const +{ + out.print(dumpPrefix, m_index); +} + +void BasicBlock::deepDump(PrintStream& out) const +{ + dumpHeader(out); + for (const Inst& inst : *this) + out.print(" ", inst, "\n"); + dumpFooter(out); +} + +void BasicBlock::dumpHeader(PrintStream& out) const +{ + out.print("BB", *this, ": ; frequency = ", m_frequency, "\n"); + if (predecessors().size()) + out.print(" Predecessors: ", pointerListDump(predecessors()), "\n"); +} + +void BasicBlock::dumpFooter(PrintStream& out) const +{ + if (successors().size()) + out.print(" Successors: ", listDump(successors()), "\n"); +} + +BasicBlock::BasicBlock(unsigned index, double frequency) + : m_index(index) + , m_frequency(frequency) +{ +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirBasicBlock.h b/Source/JavaScriptCore/b3/air/AirBasicBlock.h new file mode 100644 index 000000000..431bd711c --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirBasicBlock.h @@ -0,0 +1,172 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirFrequentedBlock.h" +#include "AirInst.h" +#include "B3SuccessorCollection.h" +#include <wtf/FastMalloc.h> +#include <wtf/Noncopyable.h> + +namespace JSC { namespace B3 { namespace Air { + +class BlockInsertionSet; +class Code; +class InsertionSet; + +class BasicBlock { + WTF_MAKE_NONCOPYABLE(BasicBlock); + WTF_MAKE_FAST_ALLOCATED; +public: + static const char* const dumpPrefix; + + typedef Vector<Inst> InstList; + typedef Vector<BasicBlock*, 2> PredecessorList; + typedef Vector<FrequentedBlock, 2> SuccessorList; + + unsigned index() const { return m_index; } + + // This method is exposed for phases that mess with the layout of basic blocks. Currently that means just + // optimizeBlockOrder(). + void setIndex(unsigned index) { m_index = index; } + + unsigned size() const { return m_insts.size(); } + InstList::iterator begin() { return m_insts.begin(); } + InstList::iterator end() { return m_insts.end(); } + InstList::const_iterator begin() const { return m_insts.begin(); } + InstList::const_iterator end() const { return m_insts.end(); } + + const Inst& at(unsigned index) const { return m_insts[index]; } + Inst& at(unsigned index) { return m_insts[index]; } + + Inst* get(unsigned index) + { + return index < size() ? &at(index) : nullptr; + } + + const Inst& last() const { return m_insts.last(); } + Inst& last() { return m_insts.last(); } + + void resize(unsigned size) { m_insts.resize(size); } + + const InstList& insts() const { return m_insts; } + InstList& insts() { return m_insts; } + + template<typename Inst> + Inst& appendInst(Inst&& inst) + { + m_insts.append(std::forward<Inst>(inst)); + return m_insts.last(); + } + + template<typename... Arguments> + Inst& append(Arguments&&... arguments) + { + m_insts.append(Inst(std::forward<Arguments>(arguments)...)); + return m_insts.last(); + } + + // The "0" case is the case to which the branch jumps, so the "then" case. The "1" case is the + // "else" case, and is used to represent the fall-through of a conditional branch. + unsigned numSuccessors() const { return m_successors.size(); } + FrequentedBlock successor(unsigned index) const { return m_successors[index]; } + FrequentedBlock& successor(unsigned index) { return m_successors[index]; } + const SuccessorList& successors() const { return m_successors; } + SuccessorList& successors() { return m_successors; } + + BasicBlock* successorBlock(unsigned index) const { return successor(index).block(); } + BasicBlock*& successorBlock(unsigned index) { return successor(index).block(); } + SuccessorCollection<BasicBlock, SuccessorList> successorBlocks() + { + return SuccessorCollection<BasicBlock, SuccessorList>(m_successors); + } + SuccessorCollection<const BasicBlock, const SuccessorList> successorBlocks() const + { + return SuccessorCollection<const BasicBlock, const SuccessorList>(m_successors); + } + + unsigned numPredecessors() const { return m_predecessors.size(); } + BasicBlock* predecessor(unsigned index) const { return m_predecessors[index]; } + BasicBlock*& predecessor(unsigned index) { return m_predecessors[index]; } + const PredecessorList& predecessors() const { return m_predecessors; } + PredecessorList& predecessors() { return m_predecessors; } + + bool addPredecessor(BasicBlock*); + bool removePredecessor(BasicBlock*); + bool replacePredecessor(BasicBlock* from, BasicBlock* to); + bool containsPredecessor(BasicBlock* predecessor) const { return m_predecessors.contains(predecessor); } + + double frequency() const { return m_frequency; } + + void dump(PrintStream&) const; + void deepDump(PrintStream&) const; + + void dumpHeader(PrintStream&) const; + void dumpFooter(PrintStream&) const; + +private: + friend class BlockInsertionSet; + friend class Code; + friend class InsertionSet; + + BasicBlock(unsigned index, double frequency); + + unsigned m_index; + InstList m_insts; + SuccessorList m_successors; + PredecessorList m_predecessors; + double m_frequency; +}; + +class DeepBasicBlockDump { +public: + DeepBasicBlockDump(const BasicBlock* block) + : m_block(block) + { + } + + void dump(PrintStream& out) const + { + if (m_block) + m_block->deepDump(out); + else + out.print("<null>"); + } + +private: + const BasicBlock* m_block; +}; + +inline DeepBasicBlockDump deepDump(const BasicBlock* block) +{ + return DeepBasicBlockDump(block); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirBlockWorklist.h b/Source/JavaScriptCore/b3/air/AirBlockWorklist.h new file mode 100644 index 000000000..ba231a9b5 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirBlockWorklist.h @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include "B3BlockWorklist.h" + +namespace JSC { namespace B3 { namespace Air { + +typedef GraphNodeWorklist<BasicBlock*, IndexSet<BasicBlock>> BlockWorklist; + +// When you say BlockWith<int> you should read it as "block with an int". +template<typename T> using BlockWith = GraphNodeWith<BasicBlock*, T>; + +// Extended block worklist is useful for enqueueing some meta-data along with the block. It also +// permits forcibly enqueueing things even if the block has already been seen. It's useful for +// things like building a spanning tree, in which case T (the auxiliary payload) would be the +// successor index. +template<typename T> using ExtendedBlockWorklist = ExtendedGraphNodeWorklist<BasicBlock*, T, IndexSet<BasicBlock>>; + +typedef GraphNodeWithOrder<BasicBlock*> BlockWithOrder; + +typedef PostOrderGraphNodeWorklist<BasicBlock*, IndexSet<BasicBlock>> PostOrderBlockWorklist; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCCallSpecial.cpp b/Source/JavaScriptCore/b3/air/AirCCallSpecial.cpp new file mode 100644 index 000000000..f1b6d710e --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCCallSpecial.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirCCallSpecial.h" + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +CCallSpecial::CCallSpecial() +{ + m_clobberedRegs = RegisterSet::allRegisters(); + m_clobberedRegs.exclude(RegisterSet::stackRegisters()); + m_clobberedRegs.exclude(RegisterSet::reservedHardwareRegisters()); + m_clobberedRegs.exclude(RegisterSet::calleeSaveRegisters()); + m_clobberedRegs.clear(GPRInfo::returnValueGPR); + m_clobberedRegs.clear(GPRInfo::returnValueGPR2); + m_clobberedRegs.clear(FPRInfo::returnValueFPR); +} + +CCallSpecial::~CCallSpecial() +{ +} + +void CCallSpecial::forEachArg(Inst& inst, const ScopedLambda<Inst::EachArgCallback>& callback) +{ + for (unsigned i = 0; i < numCalleeArgs; ++i) + callback(inst.args[calleeArgOffset + i], Arg::Use, Arg::GP, Arg::pointerWidth()); + for (unsigned i = 0; i < numReturnGPArgs; ++i) + callback(inst.args[returnGPArgOffset + i], Arg::Def, Arg::GP, Arg::pointerWidth()); + for (unsigned i = 0; i < numReturnFPArgs; ++i) + callback(inst.args[returnFPArgOffset + i], Arg::Def, Arg::FP, Arg::Width64); + + for (unsigned i = argArgOffset; i < inst.args.size(); ++i) { + // For the type, we can just query the arg's type. The arg will have a type, because we + // require these args to be argument registers. + Arg::Type type = inst.args[i].type(); + callback(inst.args[i], Arg::Use, type, Arg::conservativeWidth(type)); + } +} + +bool CCallSpecial::isValid(Inst& inst) +{ + if (inst.args.size() < argArgOffset) + return false; + + for (unsigned i = 0; i < numCalleeArgs; ++i) { + Arg& arg = inst.args[i + calleeArgOffset]; + if (!arg.isGP()) + return false; + switch (arg.kind()) { + case Arg::Imm: + if (is32Bit()) + break; + return false; + case Arg::BigImm: + if (is64Bit()) + break; + return false; + case Arg::Tmp: + case Arg::Addr: + case Arg::Stack: + case Arg::CallArg: + break; + default: + return false; + } + } + + // Return args need to be exact. + if (inst.args[returnGPArgOffset + 0] != Tmp(GPRInfo::returnValueGPR)) + return false; + if (inst.args[returnGPArgOffset + 1] != Tmp(GPRInfo::returnValueGPR2)) + return false; + if (inst.args[returnFPArgOffset + 0] != Tmp(FPRInfo::returnValueFPR)) + return false; + + for (unsigned i = argArgOffset; i < inst.args.size(); ++i) { + if (!inst.args[i].isReg()) + return false; + + if (inst.args[i] == Tmp(scratchRegister)) + return false; + } + return true; +} + +bool CCallSpecial::admitsStack(Inst&, unsigned argIndex) +{ + // The callee can be on the stack. + if (argIndex == calleeArgOffset) + return true; + + return false; +} + +void CCallSpecial::reportUsedRegisters(Inst&, const RegisterSet&) +{ +} + +CCallHelpers::Jump CCallSpecial::generate(Inst& inst, CCallHelpers& jit, GenerationContext&) +{ + switch (inst.args[calleeArgOffset].kind()) { + case Arg::Imm: + case Arg::BigImm: + jit.move(inst.args[calleeArgOffset].asTrustedImmPtr(), scratchRegister); + jit.call(scratchRegister); + break; + case Arg::Tmp: + jit.call(inst.args[calleeArgOffset].gpr()); + break; + case Arg::Addr: + jit.call(inst.args[calleeArgOffset].asAddress()); + break; + default: + RELEASE_ASSERT_NOT_REACHED(); + break; + } + return CCallHelpers::Jump(); +} + +RegisterSet CCallSpecial::extraEarlyClobberedRegs(Inst&) +{ + return m_emptyRegs; +} + +RegisterSet CCallSpecial::extraClobberedRegs(Inst&) +{ + return m_clobberedRegs; +} + +void CCallSpecial::dumpImpl(PrintStream& out) const +{ + out.print("CCall"); +} + +void CCallSpecial::deepDumpImpl(PrintStream& out) const +{ + out.print("function call that uses the C calling convention."); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCCallSpecial.h b/Source/JavaScriptCore/b3/air/AirCCallSpecial.h new file mode 100644 index 000000000..ec909b9f0 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCCallSpecial.h @@ -0,0 +1,84 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirSpecial.h" + +namespace JSC { namespace B3 { namespace Air { + +// Use this special for constructing a C call. Arg 0 is of course a Special arg that refers to the +// CCallSpecial object. Arg 1 is the callee, and it can be an ImmPtr, a register, or an address. The +// next three args - arg 2, arg 3, and arg 4 - hold the return value GPRs and FPR. The remaining args +// are just the set of argument registers used by this call. For arguments that go to the stack, you +// have to do the grunt work of doing those stack stores. In fact, the only reason why we specify the +// argument registers as arguments to a call is so that the liveness analysis can see that they get +// used here. It would be wrong to automagically report all argument registers as being used because +// if we had a call that didn't pass them, then they'd appear to be live until some clobber point or +// the prologue, whichever happened sooner. + +class CCallSpecial : public Special { +public: + CCallSpecial(); + ~CCallSpecial(); + + // You cannot use this register to pass arguments. It just so happens that this register is not + // used for arguments in the C calling convention. By the way, this is the only thing that causes + // this special to be specific to C calls. + static const GPRReg scratchRegister = GPRInfo::nonArgGPR0; + +protected: + void forEachArg(Inst&, const ScopedLambda<Inst::EachArgCallback>&) override; + bool isValid(Inst&) override; + bool admitsStack(Inst&, unsigned argIndex) override; + void reportUsedRegisters(Inst&, const RegisterSet&) override; + CCallHelpers::Jump generate(Inst&, CCallHelpers&, GenerationContext&) override; + RegisterSet extraEarlyClobberedRegs(Inst&) override; + RegisterSet extraClobberedRegs(Inst&) override; + + void dumpImpl(PrintStream&) const override; + void deepDumpImpl(PrintStream&) const override; + +private: + static const unsigned specialArgOffset = 0; + static const unsigned numSpecialArgs = 1; + static const unsigned calleeArgOffset = numSpecialArgs; + static const unsigned numCalleeArgs = 1; + static const unsigned returnGPArgOffset = numSpecialArgs + numCalleeArgs; + static const unsigned numReturnGPArgs = 2; + static const unsigned returnFPArgOffset = numSpecialArgs + numCalleeArgs + numReturnGPArgs; + static const unsigned numReturnFPArgs = 1; + static const unsigned argArgOffset = + numSpecialArgs + numCalleeArgs + numReturnGPArgs + numReturnFPArgs; + + RegisterSet m_clobberedRegs; + RegisterSet m_emptyRegs; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCCallingConvention.cpp b/Source/JavaScriptCore/b3/air/AirCCallingConvention.cpp new file mode 100644 index 000000000..2b6f733bf --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCCallingConvention.cpp @@ -0,0 +1,127 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirCCallingConvention.h" + +#if ENABLE(B3_JIT) + +#include "AirCCallSpecial.h" +#include "AirCode.h" +#include "B3CCallValue.h" +#include "B3ValueInlines.h" + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +template<typename BankInfo> +Arg marshallCCallArgumentImpl(unsigned& argumentCount, unsigned& stackOffset, Value* child) +{ + unsigned argumentIndex = argumentCount++; + if (argumentIndex < BankInfo::numberOfArgumentRegisters) + return Tmp(BankInfo::toArgumentRegister(argumentIndex)); + + unsigned slotSize; + if (isARM64() && isIOS()) { + // Arguments are packed. + slotSize = sizeofType(child->type()); + } else { + // Arguments are aligned. + slotSize = 8; + } + + stackOffset = WTF::roundUpToMultipleOf(slotSize, stackOffset); + Arg result = Arg::callArg(stackOffset); + stackOffset += slotSize; + return result; +} + +Arg marshallCCallArgument( + unsigned& gpArgumentCount, unsigned& fpArgumentCount, unsigned& stackOffset, Value* child) +{ + switch (Arg::typeForB3Type(child->type())) { + case Arg::GP: + return marshallCCallArgumentImpl<GPRInfo>(gpArgumentCount, stackOffset, child); + case Arg::FP: + return marshallCCallArgumentImpl<FPRInfo>(fpArgumentCount, stackOffset, child); + } + RELEASE_ASSERT_NOT_REACHED(); + return Arg(); +} + +} // anonymous namespace + +Vector<Arg> computeCCallingConvention(Code& code, CCallValue* value) +{ + Vector<Arg> result; + result.append(Tmp(CCallSpecial::scratchRegister)); + unsigned gpArgumentCount = 0; + unsigned fpArgumentCount = 0; + unsigned stackOffset = 0; + for (unsigned i = 1; i < value->numChildren(); ++i) { + result.append( + marshallCCallArgument(gpArgumentCount, fpArgumentCount, stackOffset, value->child(i))); + } + code.requestCallArgAreaSizeInBytes(WTF::roundUpToMultipleOf(stackAlignmentBytes(), stackOffset)); + return result; +} + +Tmp cCallResult(Type type) +{ + switch (type) { + case Void: + return Tmp(); + case Int32: + case Int64: + return Tmp(GPRInfo::returnValueGPR); + case Float: + case Double: + return Tmp(FPRInfo::returnValueFPR); + } + + RELEASE_ASSERT_NOT_REACHED(); + return Tmp(); +} + +Inst buildCCall(Code& code, Value* origin, const Vector<Arg>& arguments) +{ + Inst inst(Patch, origin, Arg::special(code.cCallSpecial())); + inst.args.append(arguments[0]); + inst.args.append(Tmp(GPRInfo::returnValueGPR)); + inst.args.append(Tmp(GPRInfo::returnValueGPR2)); + inst.args.append(Tmp(FPRInfo::returnValueFPR)); + for (unsigned i = 1; i < arguments.size(); ++i) { + Arg arg = arguments[i]; + if (arg.isTmp()) + inst.args.append(arg); + } + return inst; +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirCCallingConvention.h b/Source/JavaScriptCore/b3/air/AirCCallingConvention.h new file mode 100644 index 000000000..76acc29ab --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCCallingConvention.h @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" +#include "AirInst.h" +#include "B3Type.h" +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { + +class CCallValue; + +namespace Air { + +class Code; + +Vector<Arg> computeCCallingConvention(Code&, CCallValue*); + +Tmp cCallResult(Type); + +Inst buildCCall(Code&, Value* origin, const Vector<Arg>&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCode.cpp b/Source/JavaScriptCore/b3/air/AirCode.cpp new file mode 100644 index 000000000..79e2c0cf2 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCode.cpp @@ -0,0 +1,229 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirCode.h" + +#if ENABLE(B3_JIT) + +#include "AirCCallSpecial.h" +#include "B3BasicBlockUtils.h" +#include "B3Procedure.h" +#include "B3StackSlot.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +Code::Code(Procedure& proc) + : m_proc(proc) + , m_lastPhaseName("initial") +{ + // Come up with initial orderings of registers. The user may replace this with something else. + Arg::forEachType( + [&] (Arg::Type type) { + Vector<Reg> result; + RegisterSet all = type == Arg::GP ? RegisterSet::allGPRs() : RegisterSet::allFPRs(); + all.exclude(RegisterSet::stackRegisters()); + all.exclude(RegisterSet::reservedHardwareRegisters()); + RegisterSet calleeSave = RegisterSet::calleeSaveRegisters(); + all.forEach( + [&] (Reg reg) { + if (!calleeSave.get(reg)) + result.append(reg); + }); + all.forEach( + [&] (Reg reg) { + if (calleeSave.get(reg)) + result.append(reg); + }); + setRegsInPriorityOrder(type, result); + }); +} + +Code::~Code() +{ +} + +void Code::setRegsInPriorityOrder(Arg::Type type, const Vector<Reg>& regs) +{ + regsInPriorityOrderImpl(type) = regs; + m_mutableRegs = RegisterSet(); + Arg::forEachType( + [&] (Arg::Type type) { + for (Reg reg : regsInPriorityOrder(type)) + m_mutableRegs.set(reg); + }); +} + +void Code::pinRegister(Reg reg) +{ + Vector<Reg>& regs = regsInPriorityOrderImpl(Arg(Tmp(reg)).type()); + regs.removeFirst(reg); + m_mutableRegs.clear(reg); + ASSERT(!regs.contains(reg)); +} + +BasicBlock* Code::addBlock(double frequency) +{ + std::unique_ptr<BasicBlock> block(new BasicBlock(m_blocks.size(), frequency)); + BasicBlock* result = block.get(); + m_blocks.append(WTFMove(block)); + return result; +} + +StackSlot* Code::addStackSlot(unsigned byteSize, StackSlotKind kind, B3::StackSlot* b3Slot) +{ + return m_stackSlots.addNew(byteSize, kind, b3Slot); +} + +StackSlot* Code::addStackSlot(B3::StackSlot* b3Slot) +{ + return addStackSlot(b3Slot->byteSize(), StackSlotKind::Locked, b3Slot); +} + +Special* Code::addSpecial(std::unique_ptr<Special> special) +{ + special->m_code = this; + return m_specials.add(WTFMove(special)); +} + +CCallSpecial* Code::cCallSpecial() +{ + if (!m_cCallSpecial) { + m_cCallSpecial = static_cast<CCallSpecial*>( + addSpecial(std::make_unique<CCallSpecial>())); + } + + return m_cCallSpecial; +} + +bool Code::isEntrypoint(BasicBlock* block) const +{ + if (m_entrypoints.isEmpty()) + return !block->index(); + + for (const FrequentedBlock& entrypoint : m_entrypoints) { + if (entrypoint.block() == block) + return true; + } + return false; +} + +void Code::resetReachability() +{ + clearPredecessors(m_blocks); + if (m_entrypoints.isEmpty()) + updatePredecessorsAfter(m_blocks[0].get()); + else { + for (const FrequentedBlock& entrypoint : m_entrypoints) + updatePredecessorsAfter(entrypoint.block()); + } + + for (auto& block : m_blocks) { + if (isBlockDead(block.get()) && !isEntrypoint(block.get())) + block = nullptr; + } +} + +void Code::dump(PrintStream& out) const +{ + if (!m_entrypoints.isEmpty()) + out.print("Entrypoints: ", listDump(m_entrypoints), "\n"); + for (BasicBlock* block : *this) + out.print(deepDump(block)); + if (stackSlots().size()) { + out.print("Stack slots:\n"); + for (StackSlot* slot : stackSlots()) + out.print(" ", pointerDump(slot), ": ", deepDump(slot), "\n"); + } + if (specials().size()) { + out.print("Specials:\n"); + for (Special* special : specials()) + out.print(" ", deepDump(special), "\n"); + } + if (m_frameSize) + out.print("Frame size: ", m_frameSize, "\n"); + if (m_callArgAreaSize) + out.print("Call arg area size: ", m_callArgAreaSize, "\n"); + if (m_calleeSaveRegisters.size()) + out.print("Callee saves: ", m_calleeSaveRegisters, "\n"); +} + +unsigned Code::findFirstBlockIndex(unsigned index) const +{ + while (index < size() && !at(index)) + index++; + return index; +} + +unsigned Code::findNextBlockIndex(unsigned index) const +{ + return findFirstBlockIndex(index + 1); +} + +BasicBlock* Code::findNextBlock(BasicBlock* block) const +{ + unsigned index = findNextBlockIndex(block->index()); + if (index < size()) + return at(index); + return nullptr; +} + +void Code::addFastTmp(Tmp tmp) +{ + m_fastTmps.add(tmp); +} + +void* Code::addDataSection(size_t size) +{ + return m_proc.addDataSection(size); +} + +unsigned Code::jsHash() const +{ + unsigned result = 0; + + for (BasicBlock* block : *this) { + result *= 1000001; + for (Inst& inst : *block) { + result *= 97; + result += inst.jsHash(); + } + for (BasicBlock* successor : block->successorBlocks()) { + result *= 7; + result += successor->index(); + } + } + for (StackSlot* slot : stackSlots()) { + result *= 101; + result += slot->jsHash(); + } + + return result; +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCode.h b/Source/JavaScriptCore/b3/air/AirCode.h new file mode 100644 index 000000000..6d4a14722 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCode.h @@ -0,0 +1,321 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" +#include "AirBasicBlock.h" +#include "AirSpecial.h" +#include "AirStackSlot.h" +#include "AirTmp.h" +#include "B3SparseCollection.h" +#include "CCallHelpers.h" +#include "RegisterAtOffsetList.h" +#include "StackAlignment.h" +#include <wtf/IndexMap.h> + +namespace JSC { namespace B3 { + +class Procedure; + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wreturn-type" +#endif // COMPILER(GCC) && ASSERT_DISABLED + +namespace Air { + +class BlockInsertionSet; +class CCallSpecial; + +typedef void WasmBoundsCheckGeneratorFunction(CCallHelpers&, GPRReg, unsigned); +typedef SharedTask<WasmBoundsCheckGeneratorFunction> WasmBoundsCheckGenerator; + +// This is an IR that is very close to the bare metal. It requires about 40x more bytes than the +// generated machine code - for example if you're generating 1MB of machine code, you need about +// 40MB of Air. + +class Code { + WTF_MAKE_NONCOPYABLE(Code); + WTF_MAKE_FAST_ALLOCATED; +public: + ~Code(); + + Procedure& proc() { return m_proc; } + + const Vector<Reg>& regsInPriorityOrder(Arg::Type type) const + { + switch (type) { + case Arg::GP: + return m_gpRegsInPriorityOrder; + case Arg::FP: + return m_fpRegsInPriorityOrder; + } + ASSERT_NOT_REACHED(); + } + + void setRegsInPriorityOrder(Arg::Type, const Vector<Reg>&); + + // This is the set of registers that Air is allowed to emit code to mutate. It's derived from + // regsInPriorityOrder. Any registers not in this set are said to be "pinned". + const RegisterSet& mutableRegs() const { return m_mutableRegs; } + + bool isPinned(Reg reg) const { return !mutableRegs().get(reg); } + + void pinRegister(Reg); + + JS_EXPORT_PRIVATE BasicBlock* addBlock(double frequency = 1); + + // Note that you can rely on stack slots always getting indices that are larger than the index + // of any prior stack slot. In fact, all stack slots you create in the future will have an index + // that is >= stackSlots().size(). + JS_EXPORT_PRIVATE StackSlot* addStackSlot( + unsigned byteSize, StackSlotKind, B3::StackSlot* = nullptr); + StackSlot* addStackSlot(B3::StackSlot*); + + Special* addSpecial(std::unique_ptr<Special>); + + // This is the special you need to make a C call! + CCallSpecial* cCallSpecial(); + + Tmp newTmp(Arg::Type type) + { + switch (type) { + case Arg::GP: + return Tmp::gpTmpForIndex(m_numGPTmps++); + case Arg::FP: + return Tmp::fpTmpForIndex(m_numFPTmps++); + } + ASSERT_NOT_REACHED(); + } + + unsigned numTmps(Arg::Type type) + { + switch (type) { + case Arg::GP: + return m_numGPTmps; + case Arg::FP: + return m_numFPTmps; + } + ASSERT_NOT_REACHED(); + } + + unsigned callArgAreaSizeInBytes() const { return m_callArgAreaSize; } + + // You can call this before code generation to force a minimum call arg area size. + void requestCallArgAreaSizeInBytes(unsigned size) + { + m_callArgAreaSize = std::max( + m_callArgAreaSize, + static_cast<unsigned>(WTF::roundUpToMultipleOf(stackAlignmentBytes(), size))); + } + + unsigned frameSize() const { return m_frameSize; } + + // Only phases that do stack allocation are allowed to set this. Currently, only + // Air::allocateStack() does this. + void setFrameSize(unsigned frameSize) + { + m_frameSize = frameSize; + } + + // Note that this is not the same thing as proc().numEntrypoints(). This value here may be zero + // until we lower EntrySwitch. + unsigned numEntrypoints() const { return m_entrypoints.size(); } + const Vector<FrequentedBlock>& entrypoints() const { return m_entrypoints; } + const FrequentedBlock& entrypoint(unsigned index) const { return m_entrypoints[index]; } + bool isEntrypoint(BasicBlock*) const; + + // This is used by lowerEntrySwitch(). + template<typename Vector> + void setEntrypoints(Vector&& vector) + { + m_entrypoints = std::forward<Vector>(vector); + } + + CCallHelpers::Label entrypointLabel(unsigned index) const + { + return m_entrypointLabels[index]; + } + + // This is used by generate(). + template<typename Vector> + void setEntrypointLabels(Vector&& vector) + { + m_entrypointLabels = std::forward<Vector>(vector); + } + + const RegisterAtOffsetList& calleeSaveRegisters() const { return m_calleeSaveRegisters; } + RegisterAtOffsetList& calleeSaveRegisters() { return m_calleeSaveRegisters; } + + // Recomputes predecessors and deletes unreachable blocks. + void resetReachability(); + + JS_EXPORT_PRIVATE void dump(PrintStream&) const; + + unsigned size() const { return m_blocks.size(); } + BasicBlock* at(unsigned index) const { return m_blocks[index].get(); } + BasicBlock* operator[](unsigned index) const { return at(index); } + + // This is used by phases that optimize the block list. You shouldn't use this unless you really know + // what you're doing. + Vector<std::unique_ptr<BasicBlock>>& blockList() { return m_blocks; } + + // Finds the smallest index' such that at(index') != null and index' >= index. + JS_EXPORT_PRIVATE unsigned findFirstBlockIndex(unsigned index) const; + + // Finds the smallest index' such that at(index') != null and index' > index. + unsigned findNextBlockIndex(unsigned index) const; + + BasicBlock* findNextBlock(BasicBlock*) const; + + class iterator { + public: + iterator() + : m_code(nullptr) + , m_index(0) + { + } + + iterator(const Code& code, unsigned index) + : m_code(&code) + , m_index(m_code->findFirstBlockIndex(index)) + { + } + + BasicBlock* operator*() + { + return m_code->at(m_index); + } + + iterator& operator++() + { + m_index = m_code->findFirstBlockIndex(m_index + 1); + return *this; + } + + bool operator==(const iterator& other) const + { + return m_index == other.m_index; + } + + bool operator!=(const iterator& other) const + { + return !(*this == other); + } + + private: + const Code* m_code; + unsigned m_index; + }; + + iterator begin() const { return iterator(*this, 0); } + iterator end() const { return iterator(*this, size()); } + + const SparseCollection<StackSlot>& stackSlots() const { return m_stackSlots; } + SparseCollection<StackSlot>& stackSlots() { return m_stackSlots; } + + const SparseCollection<Special>& specials() const { return m_specials; } + SparseCollection<Special>& specials() { return m_specials; } + + template<typename Callback> + void forAllTmps(const Callback& callback) const + { + for (unsigned i = m_numGPTmps; i--;) + callback(Tmp::gpTmpForIndex(i)); + for (unsigned i = m_numFPTmps; i--;) + callback(Tmp::fpTmpForIndex(i)); + } + + void addFastTmp(Tmp); + bool isFastTmp(Tmp tmp) const { return m_fastTmps.contains(tmp); } + + void* addDataSection(size_t); + + // The name has to be a string literal, since we don't do any memory management for the string. + void setLastPhaseName(const char* name) + { + m_lastPhaseName = name; + } + + const char* lastPhaseName() const { return m_lastPhaseName; } + + void setWasmBoundsCheckGenerator(RefPtr<WasmBoundsCheckGenerator> generator) + { + m_wasmBoundsCheckGenerator = generator; + } + + RefPtr<WasmBoundsCheckGenerator> wasmBoundsCheckGenerator() const { return m_wasmBoundsCheckGenerator; } + + // This is a hash of the code. You can use this if you want to put code into a hashtable, but + // it's mainly for validating the results from JSAir. + unsigned jsHash() const; + +private: + friend class ::JSC::B3::Procedure; + friend class BlockInsertionSet; + + Code(Procedure&); + + Vector<Reg>& regsInPriorityOrderImpl(Arg::Type type) + { + switch (type) { + case Arg::GP: + return m_gpRegsInPriorityOrder; + case Arg::FP: + return m_fpRegsInPriorityOrder; + } + ASSERT_NOT_REACHED(); + } + + Procedure& m_proc; // Some meta-data, like byproducts, is stored in the Procedure. + Vector<Reg> m_gpRegsInPriorityOrder; + Vector<Reg> m_fpRegsInPriorityOrder; + RegisterSet m_mutableRegs; + SparseCollection<StackSlot> m_stackSlots; + Vector<std::unique_ptr<BasicBlock>> m_blocks; + SparseCollection<Special> m_specials; + HashSet<Tmp> m_fastTmps; + CCallSpecial* m_cCallSpecial { nullptr }; + unsigned m_numGPTmps { 0 }; + unsigned m_numFPTmps { 0 }; + unsigned m_frameSize { 0 }; + unsigned m_callArgAreaSize { 0 }; + RegisterAtOffsetList m_calleeSaveRegisters; + Vector<FrequentedBlock> m_entrypoints; // This is empty until after lowerEntrySwitch(). + Vector<CCallHelpers::Label> m_entrypointLabels; // This is empty until code generation. + RefPtr<WasmBoundsCheckGenerator> m_wasmBoundsCheckGenerator; + const char* m_lastPhaseName; +}; + +} } } // namespace JSC::B3::Air + +#if COMPILER(GCC) && ASSERT_DISABLED +#pragma GCC diagnostic pop +#endif // COMPILER(GCC) && ASSERT_DISABLED + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirCustom.cpp b/Source/JavaScriptCore/b3/air/AirCustom.cpp new file mode 100644 index 000000000..2a2df2fbd --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCustom.cpp @@ -0,0 +1,195 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirCustom.h" + +#if ENABLE(B3_JIT) + +#include "AirInstInlines.h" +#include "B3CCallValue.h" +#include "B3ValueInlines.h" + +namespace JSC { namespace B3 { namespace Air { + +bool PatchCustom::isValidForm(Inst& inst) +{ + if (inst.args.size() < 1) + return false; + if (!inst.args[0].isSpecial()) + return false; + if (!inst.args[0].special()->isValid(inst)) + return false; + RegisterSet clobberedEarly = inst.extraEarlyClobberedRegs(); + RegisterSet clobberedLate = inst.extraClobberedRegs(); + bool ok = true; + inst.forEachTmp( + [&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { + if (!tmp.isReg()) + return; + if (Arg::isLateDef(role) || Arg::isLateUse(role)) + ok &= !clobberedLate.get(tmp.reg()); + else + ok &= !clobberedEarly.get(tmp.reg()); + }); + return ok; +} + +bool CCallCustom::isValidForm(Inst& inst) +{ + CCallValue* value = inst.origin->as<CCallValue>(); + if (!value) + return false; + + if (inst.args.size() != (value->type() == Void ? 0 : 1) + value->numChildren()) + return false; + + // The arguments can only refer to the stack, tmps, or immediates. + for (Arg& arg : inst.args) { + if (!arg.isTmp() && !arg.isStackMemory() && !arg.isSomeImm()) + return false; + } + + unsigned offset = 0; + + if (!inst.args[0].isGP()) + return false; + + // If there is a result then it cannot be an immediate. + if (value->type() != Void) { + if (inst.args[1].isSomeImm()) + return false; + if (!inst.args[1].canRepresent(value)) + return false; + offset++; + } + + for (unsigned i = value->numChildren(); i-- > 1;) { + Value* child = value->child(i); + Arg arg = inst.args[offset + i]; + if (!arg.canRepresent(child)) + return false; + } + + return true; +} + +CCallHelpers::Jump CCallCustom::generate(Inst& inst, CCallHelpers&, GenerationContext&) +{ + dataLog("FATAL: Unlowered C call: ", inst, "\n"); + UNREACHABLE_FOR_PLATFORM(); + return CCallHelpers::Jump(); +} + +bool ShuffleCustom::isValidForm(Inst& inst) +{ + if (inst.args.size() % 3) + return false; + + // A destination may only appear once. This requirement allows us to avoid the undefined behavior + // of having a destination that is supposed to get multiple inputs simultaneously. It also + // imposes some interesting constraints on the "shape" of the shuffle. If we treat a shuffle pair + // as an edge and the Args as nodes, then the single-destination requirement means that the + // shuffle graph consists of two kinds of subgraphs: + // + // - Spanning trees. We call these shifts. They can be executed as a sequence of Move + // instructions and don't usually require scratch registers. + // + // - Closed loops. These loops consist of nodes that have one successor and one predecessor, so + // there is no way to "get into" the loop from outside of it. These can be executed using swaps + // or by saving one of the Args to a scratch register and executing it as a shift. + HashSet<Arg> dsts; + + for (unsigned i = 0; i < inst.args.size(); ++i) { + Arg arg = inst.args[i]; + unsigned mode = i % 3; + + if (mode == 2) { + // It's the width. + if (!arg.isWidthArg()) + return false; + continue; + } + + // The source can be an immediate. + if (!mode) { + if (arg.isSomeImm()) + continue; + + if (!arg.isCompatibleType(inst.args[i + 1])) + return false; + } else { + ASSERT(mode == 1); + if (!dsts.add(arg).isNewEntry) + return false; + } + + if (arg.isTmp() || arg.isMemory()) + continue; + + return false; + } + + // No destination register may appear in any address expressions. The lowering can't handle it + // and it's not useful for the way we end up using Shuffles. Normally, Shuffles only used for + // stack addresses and non-stack registers. + for (Arg& arg : inst.args) { + if (!arg.isMemory()) + continue; + bool ok = true; + arg.forEachTmpFast( + [&] (Tmp tmp) { + if (dsts.contains(tmp)) + ok = false; + }); + if (!ok) + return false; + } + + return true; +} + +CCallHelpers::Jump ShuffleCustom::generate(Inst& inst, CCallHelpers&, GenerationContext&) +{ + dataLog("FATAL: Unlowered shuffle: ", inst, "\n"); + UNREACHABLE_FOR_PLATFORM(); + return CCallHelpers::Jump(); +} + +bool WasmBoundsCheckCustom::isValidForm(Inst& inst) +{ + if (inst.args.size() != 2) + return false; + if (!inst.args[0].isTmp() && !inst.args[0].isSomeImm()) + return false; + + return inst.args[1].isReg(); +} + + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirCustom.h b/Source/JavaScriptCore/b3/air/AirCustom.h new file mode 100644 index 000000000..cddc03857 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirCustom.h @@ -0,0 +1,328 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirGenerationContext.h" +#include "AirInst.h" +#include "AirSpecial.h" +#include "B3ValueInlines.h" +#include "B3WasmBoundsCheckValue.h" + +namespace JSC { namespace B3 { namespace Air { + +// This defines the behavior of custom instructions - i.e. those whose behavior cannot be +// described using AirOpcode.opcodes. If you define an opcode as "custom Foo" in that file, then +// you will need to create a "struct FooCustom" here that implements the custom behavior +// methods. +// +// The customizability granted by the custom instruction mechanism is strictly less than what +// you get using the Patch instruction and implementing a Special. However, that path requires +// allocating a Special object and ensuring that it's the first operand. For many instructions, +// that is not as convenient as using Custom, which makes the instruction look like any other +// instruction. Note that both of those extra powers of the Patch instruction happen because we +// special-case that instruction in many phases and analyses. Non-special-cased behaviors of +// Patch are implemented using the custom instruction mechanism. +// +// Specials are still more flexible if you need to list extra clobbered registers and you'd like +// that to be expressed as a bitvector rather than an arglist. They are also more flexible if +// you need to carry extra state around with the instruction. Also, Specials mean that you +// always have access to Code& even in methods that don't take a GenerationContext. + +// Definition of Patch instruction. Patch is used to delegate the behavior of the instruction to the +// Special object, which will be the first argument to the instruction. +struct PatchCustom { + template<typename Functor> + static void forEachArg(Inst& inst, const Functor& functor) + { + // This is basically bogus, but it works for analyses that model Special as an + // immediate. + functor(inst.args[0], Arg::Use, Arg::GP, Arg::pointerWidth()); + + inst.args[0].special()->forEachArg(inst, scopedLambda<Inst::EachArgCallback>(functor)); + } + + template<typename... Arguments> + static bool isValidFormStatic(Arguments...) + { + return false; + } + + static bool isValidForm(Inst& inst); + + static bool admitsStack(Inst& inst, unsigned argIndex) + { + if (!argIndex) + return false; + return inst.args[0].special()->admitsStack(inst, argIndex); + } + + static std::optional<unsigned> shouldTryAliasingDef(Inst& inst) + { + return inst.args[0].special()->shouldTryAliasingDef(inst); + } + + static bool isTerminal(Inst& inst) + { + return inst.args[0].special()->isTerminal(inst); + } + + static bool hasNonArgEffects(Inst& inst) + { + return inst.args[0].special()->hasNonArgEffects(inst); + } + + static bool hasNonArgNonControlEffects(Inst& inst) + { + return inst.args[0].special()->hasNonArgNonControlEffects(inst); + } + + static CCallHelpers::Jump generate( + Inst& inst, CCallHelpers& jit, GenerationContext& context) + { + return inst.args[0].special()->generate(inst, jit, context); + } +}; + +template<typename Subtype> +struct CommonCustomBase { + static bool hasNonArgEffects(Inst& inst) + { + return Subtype::isTerminal(inst) || Subtype::hasNonArgNonControlEffects(inst); + } +}; + +// Definition of CCall instruction. CCall is used for hot path C function calls. It's lowered to a +// Patch with an Air CCallSpecial along with code to marshal instructions. The lowering happens +// before register allocation, so that the register allocator sees the clobbers. +struct CCallCustom : public CommonCustomBase<CCallCustom> { + template<typename Functor> + static void forEachArg(Inst& inst, const Functor& functor) + { + Value* value = inst.origin; + + unsigned index = 0; + + functor(inst.args[index++], Arg::Use, Arg::GP, Arg::pointerWidth()); // callee + + if (value->type() != Void) { + functor( + inst.args[index++], Arg::Def, + Arg::typeForB3Type(value->type()), + Arg::widthForB3Type(value->type())); + } + + for (unsigned i = 1; i < value->numChildren(); ++i) { + Value* child = value->child(i); + functor( + inst.args[index++], Arg::Use, + Arg::typeForB3Type(child->type()), + Arg::widthForB3Type(child->type())); + } + } + + template<typename... Arguments> + static bool isValidFormStatic(Arguments...) + { + return false; + } + + static bool isValidForm(Inst&); + + static bool admitsStack(Inst&, unsigned) + { + return true; + } + + static bool isTerminal(Inst&) + { + return false; + } + + static bool hasNonArgNonControlEffects(Inst&) + { + return true; + } + + // This just crashes, since we expect C calls to be lowered before generation. + static CCallHelpers::Jump generate(Inst&, CCallHelpers&, GenerationContext&); +}; + +struct ColdCCallCustom : CCallCustom { + template<typename Functor> + static void forEachArg(Inst& inst, const Functor& functor) + { + // This is just like a call, but uses become cold. + CCallCustom::forEachArg( + inst, + [&] (Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width) { + functor(arg, Arg::cooled(role), type, width); + }); + } +}; + +struct ShuffleCustom : public CommonCustomBase<ShuffleCustom> { + template<typename Functor> + static void forEachArg(Inst& inst, const Functor& functor) + { + unsigned limit = inst.args.size() / 3 * 3; + for (unsigned i = 0; i < limit; i += 3) { + Arg& src = inst.args[i + 0]; + Arg& dst = inst.args[i + 1]; + Arg& widthArg = inst.args[i + 2]; + Arg::Width width = widthArg.width(); + Arg::Type type = src.isGP() && dst.isGP() ? Arg::GP : Arg::FP; + functor(src, Arg::Use, type, width); + functor(dst, Arg::Def, type, width); + functor(widthArg, Arg::Use, Arg::GP, Arg::Width8); + } + } + + template<typename... Arguments> + static bool isValidFormStatic(Arguments...) + { + return false; + } + + static bool isValidForm(Inst&); + + static bool admitsStack(Inst&, unsigned index) + { + switch (index % 3) { + case 0: + case 1: + return true; + default: + return false; + } + } + + static bool isTerminal(Inst&) + { + return false; + } + + static bool hasNonArgNonControlEffects(Inst&) + { + return false; + } + + static CCallHelpers::Jump generate(Inst&, CCallHelpers&, GenerationContext&); +}; + +struct EntrySwitchCustom : public CommonCustomBase<EntrySwitchCustom> { + template<typename Func> + static void forEachArg(Inst&, const Func&) + { + } + + template<typename... Arguments> + static bool isValidFormStatic(Arguments...) + { + return !sizeof...(Arguments); + } + + static bool isValidForm(Inst& inst) + { + return inst.args.isEmpty(); + } + + static bool admitsStack(Inst&, unsigned) + { + return false; + } + + static bool isTerminal(Inst&) + { + return true; + } + + static bool hasNonArgNonControlEffects(Inst&) + { + return false; + } + + static CCallHelpers::Jump generate(Inst&, CCallHelpers&, GenerationContext&) + { + // This should never be reached because we should have lowered EntrySwitch before + // generation. + UNREACHABLE_FOR_PLATFORM(); + return CCallHelpers::Jump(); + } +}; + +struct WasmBoundsCheckCustom : public CommonCustomBase<WasmBoundsCheckCustom> { + template<typename Func> + static void forEachArg(Inst& inst, const Func& functor) + { + functor(inst.args[0], Arg::Use, Arg::GP, Arg::Width64); + functor(inst.args[1], Arg::Use, Arg::GP, Arg::Width64); + } + + template<typename... Arguments> + static bool isValidFormStatic(Arguments...) + { + return false; + } + + static bool isValidForm(Inst&); + + static bool admitsStack(Inst&, unsigned) + { + return false; + } + + static bool isTerminal(Inst&) + { + return false; + } + + static bool hasNonArgNonControlEffects(Inst&) + { + return true; + } + + static CCallHelpers::Jump generate(Inst& inst, CCallHelpers& jit, GenerationContext& context) + { + WasmBoundsCheckValue* value = inst.origin->as<WasmBoundsCheckValue>(); + CCallHelpers::Jump outOfBounds = Inst(Air::Branch64, value, Arg::relCond(CCallHelpers::AboveOrEqual), inst.args[0], inst.args[1]).generate(jit, context); + + context.latePaths.append(createSharedTask<GenerationContext::LatePathFunction>( + [outOfBounds, value] (CCallHelpers& jit, Air::GenerationContext& context) { + outOfBounds.link(&jit); + context.code->wasmBoundsCheckGenerator()->run(jit, value->pinnedGPR(), value->offset()); + })); + + // We said we were not a terminal. + return CCallHelpers::Jump(); + } +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirDumpAsJS.cpp b/Source/JavaScriptCore/b3/air/AirDumpAsJS.cpp new file mode 100644 index 000000000..3d8d6fb41 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirDumpAsJS.cpp @@ -0,0 +1,245 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirDumpAsJS.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +CString varNameForBlockAtIndex(unsigned index) +{ + return toCString("bb", index); +} + +CString varName(BasicBlock* block) +{ + return varNameForBlockAtIndex(block->index()); +} + +CString varNameForStackSlotAtIndex(unsigned index) +{ + return toCString("slot", index); +} + +CString varName(StackSlot* slot) +{ + return varNameForStackSlotAtIndex(slot->index()); +} + +CString varName(Reg reg) +{ + return toCString("Reg.", reg.debugName()); +} + +CString varNameForTmpWithTypeAndIndex(Arg::Type type, unsigned index) +{ + return toCString(type == Arg::FP ? "f" : "", "tmp", index); +} + +CString varName(Tmp tmp) +{ + if (tmp.isReg()) + return varName(tmp.reg()); + return varNameForTmpWithTypeAndIndex(Arg(tmp).type(), tmp.tmpIndex()); +} + +} // anonymous namespace + +void dumpAsJS(Code& code, PrintStream& out) +{ + out.println("let code = new Code();"); + + for (unsigned i = 0; i < code.size(); ++i) + out.println("let ", varNameForBlockAtIndex(i), " = code.addBlock();"); + + out.println("let hash;"); + + for (unsigned i = 0; i < code.stackSlots().size(); ++i) { + StackSlot* slot = code.stackSlots()[i]; + if (slot) { + out.println("let ", varName(slot), " = code.addStackSlot(", slot->byteSize(), ", ", slot->kind(), ");"); + if (slot->offsetFromFP()) + out.println(varName(slot), ".setOffsetFromFP(", slot->offsetFromFP(), ");"); + out.println("hash = ", varName(slot), ".hash();"); + out.println("if (hash != ", slot->jsHash(), ")"); + out.println(" throw new Error(\"Bad hash: \" + hash);"); + } else + out.println("code.addStackSlot(1, Spill);"); + } + + Arg::forEachType( + [&] (Arg::Type type) { + for (unsigned i = code.numTmps(type); i--;) { + out.println( + "let ", varNameForTmpWithTypeAndIndex(type, i), " = code.newTmp(", type, ");"); + } + }); + + out.println("let inst;"); + out.println("let arg;"); + + for (BasicBlock* block : code) { + for (FrequentedBlock successor : block->successors()) { + out.println( + varName(block), ".successors.push(new FrequentedBlock(", + varName(successor.block()), ", ", successor.frequency(), "));"); + } + + for (BasicBlock* predecessor : block->predecessors()) + out.println(varName(block), ".predecessors.push(", varName(predecessor), ");"); + + for (Inst& inst : *block) { + // FIXME: This should do something for flags. + // https://bugs.webkit.org/show_bug.cgi?id=162751 + out.println("inst = new Inst(", inst.kind.opcode, ");"); + + inst.forEachArg( + [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { + switch (arg.kind()) { + case Arg::Invalid: + RELEASE_ASSERT_NOT_REACHED(); + break; + + case Arg::Tmp: + out.println("arg = Arg.createTmp(", varName(arg.tmp()), ");"); + break; + + case Arg::Imm: + out.println("arg = Arg.createImm(", arg.value(), ");"); + break; + + case Arg::BigImm: + out.println( + "arg = Arg.createBigImm(", + static_cast<int32_t>(arg.value()), ", ", + static_cast<int32_t>(arg.value() >> 32), ");"); + break; + + case Arg::BitImm: + out.println("arg = Arg.createBitImm(", arg.value(), ");"); + break; + + case Arg::BitImm64: + out.println( + "arg = Arg.createBitImm64(", + static_cast<int32_t>(arg.value()), ", ", + static_cast<int32_t>(arg.value() >> 32), ");"); + break; + + case Arg::Addr: + out.println( + "arg = Arg.createAddr(", varName(arg.base()), ", ", arg.offset(), ");"); + break; + + case Arg::Stack: + out.println( + "arg = Arg.createStack(", varName(arg.stackSlot()), ", ", arg.offset(), ");"); + break; + + case Arg::CallArg: + out.println("arg = Arg.createCallArg(", arg.offset(), ");"); + break; + + case Arg::Index: + out.println( + "arg = Arg.createIndex(", varName(arg.base()), ", ", + varName(arg.index()), ", ", arg.scale(), ", ", arg.offset(), ");"); + break; + + case Arg::RelCond: + out.println("arg = Arg.createRelCond(", arg.asRelationalCondition(), ");"); + break; + + case Arg::ResCond: + out.println("arg = Arg.createResCond(", arg.asResultCondition(), ");"); + break; + + case Arg::DoubleCond: + out.println("arg = Arg.createDoubleCond(", arg.asDoubleCondition(), ");"); + break; + + case Arg::Special: + out.println("arg = Arg.createSpecial();"); + break; + + case Arg::WidthArg: + out.println("arg = Arg.createWidthArg(", arg.width(), ");"); + break; + } + + out.println("inst.args.push(arg);"); + }); + + if (inst.kind.opcode == Patch) { + if (inst.hasNonArgEffects()) + out.println("inst.patchHasNonArgEffects = true;"); + + out.println("inst.extraEarlyClobberedRegs = new Set();"); + out.println("inst.extraClobberedRegs = new Set();"); + inst.extraEarlyClobberedRegs().forEach( + [&] (Reg reg) { + out.println("inst.extraEarlyClobberedRegs.add(", varName(reg), ");"); + }); + inst.extraClobberedRegs().forEach( + [&] (Reg reg) { + out.println("inst.extraClobberedRegs.add(", varName(reg), ");"); + }); + + out.println("inst.patchArgData = [];"); + inst.forEachArg( + [&] (Arg&, Arg::Role role, Arg::Type type, Arg::Width width) { + out.println( + "inst.patchArgData.push({role: Arg.", role, ", type: ", type, + ", width: ", width, "});"); + }); + } + + if (inst.kind.opcode == CCall || inst.kind.opcode == ColdCCall) { + out.println("inst.cCallType = ", inst.origin->type()); + out.println("inst.cCallArgTypes = [];"); + for (unsigned i = 1; i < inst.origin->numChildren(); ++i) + out.println("inst.cCallArgTypes.push(", inst.origin->child(i)->type(), ");"); + } + + out.println("hash = inst.hash();"); + out.println("if (hash != ", inst.jsHash(), ")"); + out.println(" throw new Error(\"Bad hash: \" + hash);"); + + out.println(varName(block), ".append(inst);"); + } + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirDumpAsJS.h b/Source/JavaScriptCore/b3/air/AirDumpAsJS.h new file mode 100644 index 000000000..8895f5801 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirDumpAsJS.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include <wtf/PrintStream.h> + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This is used for benchmarking. Various operations on Air are interesting from a benchmarking +// standpoint. We can write some Air phases in JS and then use that to benchmark JS. The benchmark +// is called JSAir, and it's in PerformanceTests/JSAir. +void dumpAsJS(Code&, PrintStream&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.cpp b/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.cpp new file mode 100644 index 000000000..ca36af93e --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.cpp @@ -0,0 +1,153 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirEliminateDeadCode.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include <wtf/IndexSet.h> + +namespace JSC { namespace B3 { namespace Air { + +bool eliminateDeadCode(Code& code) +{ + PhaseScope phaseScope(code, "eliminateDeadCode"); + + HashSet<Tmp> liveTmps; + IndexSet<StackSlot> liveStackSlots; + bool changed; + + auto isArgLive = [&] (const Arg& arg) -> bool { + switch (arg.kind()) { + case Arg::Tmp: + if (arg.isReg()) + return true; + return liveTmps.contains(arg.tmp()); + case Arg::Stack: + if (arg.stackSlot()->isLocked()) + return true; + return liveStackSlots.contains(arg.stackSlot()); + default: + return true; + } + }; + + auto addLiveArg = [&] (const Arg& arg) -> bool { + switch (arg.kind()) { + case Arg::Tmp: + if (arg.isReg()) + return false; + return liveTmps.add(arg.tmp()).isNewEntry; + case Arg::Stack: + if (arg.stackSlot()->isLocked()) + return false; + return liveStackSlots.add(arg.stackSlot()); + default: + return false; + } + }; + + auto isInstLive = [&] (Inst& inst) -> bool { + if (inst.hasNonArgEffects()) + return true; + + // This instruction should be presumed dead, if its Args are all dead. + bool storesToLive = false; + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { + if (!Arg::isAnyDef(role)) + return; + if (role == Arg::Scratch) + return; + storesToLive |= isArgLive(arg); + }); + return storesToLive; + }; + + auto handleInst = [&] (Inst& inst) { + if (!isInstLive(inst)) + return; + + // We get here if the Inst is live. For simplicity we say that a live instruction forces + // liveness upon everything it mentions. + for (Arg& arg : inst.args) { + changed |= addLiveArg(arg); + arg.forEachTmpFast( + [&] (Tmp& tmp) { + changed |= addLiveArg(tmp); + }); + } + }; + + auto runForward = [&] () -> bool { + changed = false; + for (BasicBlock* block : code) { + for (Inst& inst : *block) + handleInst(inst); + } + return changed; + }; + + auto runBackward = [&] () -> bool { + changed = false; + for (unsigned blockIndex = code.size(); blockIndex--;) { + BasicBlock* block = code[blockIndex]; + for (unsigned instIndex = block->size(); instIndex--;) + handleInst(block->at(instIndex)); + } + return changed; + }; + + for (;;) { + // Propagating backward is most likely to be profitable. + if (!runBackward()) + break; + if (!runBackward()) + break; + + // Occasionally propagating forward greatly reduces the likelihood of pathologies. + if (!runForward()) + break; + } + + unsigned removedInstCount = 0; + for (BasicBlock* block : code) { + removedInstCount += block->insts().removeAllMatching( + [&] (Inst& inst) -> bool { + return !isInstLive(inst); + }); + } + + return !!removedInstCount; +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.h b/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.h new file mode 100644 index 000000000..1b718f63d --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirEliminateDeadCode.h @@ -0,0 +1,43 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This eliminates instructions that have no observable effect. These are instructions whose only +// effect would be storing to some Arg, except that we proved that the location specified by the Arg +// is never loaded from. The only Args for which we can do such analysis are non-Reg Tmps and +// anonymous StackSlots. + +bool eliminateDeadCode(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp b/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp new file mode 100644 index 000000000..318471976 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp @@ -0,0 +1,543 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirEmitShuffle.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include <wtf/GraphNodeWorklist.h> +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool verbose = false; + +template<typename Functor> +Tmp findPossibleScratch(Code& code, Arg::Type type, const Functor& functor) { + for (Reg reg : code.regsInPriorityOrder(type)) { + Tmp tmp(reg); + if (functor(tmp)) + return tmp; + } + return Tmp(); +} + +Tmp findPossibleScratch(Code& code, Arg::Type type, const Arg& arg1, const Arg& arg2) { + return findPossibleScratch( + code, type, + [&] (Tmp tmp) -> bool { + return !arg1.usesTmp(tmp) && !arg2.usesTmp(tmp); + }); +} + +// Example: (a => b, b => a, a => c, b => d) +struct Rotate { + Vector<ShufflePair> loop; // in the example, this is the loop: (a => b, b => a) + Vector<ShufflePair> fringe; // in the example, these are the associated shifts: (a => c, b => d) +}; + +} // anonymous namespace + +void ShufflePair::dump(PrintStream& out) const +{ + out.print(width(), ":", src(), "=>", dst()); +} + +Inst createShuffle(Value* origin, const Vector<ShufflePair>& pairs) +{ + Inst result(Shuffle, origin); + for (const ShufflePair& pair : pairs) + result.append(pair.src(), pair.dst(), Arg::widthArg(pair.width())); + return result; +} + +Vector<Inst> emitShuffle( + Code& code, Vector<ShufflePair> pairs, std::array<Arg, 2> scratches, Arg::Type type, + Value* origin) +{ + if (verbose) { + dataLog( + "Dealing with pairs: ", listDump(pairs), " and scratches ", scratches[0], ", ", + scratches[1], "\n"); + } + + pairs.removeAllMatching( + [&] (const ShufflePair& pair) -> bool { + return pair.src() == pair.dst(); + }); + + // First validate that this is the kind of shuffle that we know how to deal with. +#if !ASSERT_DISABLED + for (const ShufflePair& pair : pairs) { + ASSERT(pair.src().isType(type)); + ASSERT(pair.dst().isType(type)); + ASSERT(pair.dst().isTmp() || pair.dst().isMemory()); + } +#endif // !ASSERT_DISABLED + + // There are two possible kinds of operations that we will do: + // + // - Shift. Example: (a => b, b => c). We emit this as "Move b, c; Move a, b". This only requires + // scratch registers if there are memory->memory moves. We want to find as many of these as + // possible because they are cheaper. Note that shifts can involve the same source mentioned + // multiple times. Example: (a => b, a => c, b => d, b => e). + // + // - Rotate. Example: (a => b, b => a). We want to emit this as "Swap a, b", but that instruction + // may not be available, in which case we may need a scratch register or a scratch memory + // location. A gnarlier example is (a => b, b => c, c => a). We can emit this as "Swap b, c; + // Swap a, b". Note that swapping has to be careful about differing widths. + // + // Note that a rotate can have "fringe". For example, we might have (a => b, b => a, a =>c, + // b => d). This has a rotate loop (a => b, b => a) and some fringe (a => c, b => d). We treat + // the whole thing as a single rotate. + // + // We will find multiple disjoint such operations. We can execute them in any order. + + // We interpret these as Moves that should be executed backwards. All shifts are keyed by their + // starting source. + HashMap<Arg, Vector<ShufflePair>> shifts; + + // We interpret these as Swaps over src()'s that should be executed backwards, i.e. for a list + // of size 3 we would do "Swap list[1].src(), list[2].src(); Swap list[0].src(), list[1].src()". + // Note that we actually can't do that if the widths don't match or other bad things happen. + // But, prior to executing all of that, we need to execute the fringe: the shifts comming off the + // rotate. + Vector<Rotate> rotates; + + { + HashMap<Arg, Vector<ShufflePair>> mapping; + for (const ShufflePair& pair : pairs) + mapping.add(pair.src(), Vector<ShufflePair>()).iterator->value.append(pair); + + Vector<ShufflePair> currentPairs; + + while (!mapping.isEmpty()) { + ASSERT(currentPairs.isEmpty()); + Arg originalSrc = mapping.begin()->key; + ASSERT(!shifts.contains(originalSrc)); + if (verbose) + dataLog("Processing from ", originalSrc, "\n"); + + GraphNodeWorklist<Arg> worklist; + worklist.push(originalSrc); + while (Arg src = worklist.pop()) { + HashMap<Arg, Vector<ShufflePair>>::iterator iter = mapping.find(src); + if (iter == mapping.end()) { + // With a shift it's possible that we previously built the tail of this shift. + // See if that's the case now. + if (verbose) + dataLog("Trying to append shift at ", src, "\n"); + currentPairs.appendVector(shifts.take(src)); + continue; + } + Vector<ShufflePair> pairs = WTFMove(iter->value); + mapping.remove(iter); + + for (const ShufflePair& pair : pairs) { + currentPairs.append(pair); + ASSERT(pair.src() == src); + worklist.push(pair.dst()); + } + } + + ASSERT(currentPairs.size()); + ASSERT(currentPairs[0].src() == originalSrc); + + if (verbose) + dataLog("currentPairs = ", listDump(currentPairs), "\n"); + + bool isRotate = false; + for (const ShufflePair& pair : currentPairs) { + if (pair.dst() == originalSrc) { + isRotate = true; + break; + } + } + + if (isRotate) { + if (verbose) + dataLog("It's a rotate.\n"); + Rotate rotate; + + // The common case is that the rotate does not have fringe. The only way to + // check for this is to examine the whole rotate. + bool ok; + if (currentPairs.last().dst() == originalSrc) { + ok = true; + for (unsigned i = currentPairs.size() - 1; i--;) + ok &= currentPairs[i].dst() == currentPairs[i + 1].src(); + } else + ok = false; + + if (ok) + rotate.loop = WTFMove(currentPairs); + else { + // This is the slow path. The rotate has fringe. + + HashMap<Arg, ShufflePair> dstMapping; + for (const ShufflePair& pair : currentPairs) + dstMapping.add(pair.dst(), pair); + + ShufflePair pair = dstMapping.take(originalSrc); + for (;;) { + rotate.loop.append(pair); + + auto iter = dstMapping.find(pair.src()); + if (iter == dstMapping.end()) + break; + pair = iter->value; + dstMapping.remove(iter); + } + + rotate.loop.reverse(); + + // Make sure that the fringe appears in the same order as how it appeared in the + // currentPairs, since that's the DFS order. + for (const ShufflePair& pair : currentPairs) { + // But of course we only include it if it's not in the loop. + if (dstMapping.contains(pair.dst())) + rotate.fringe.append(pair); + } + } + + // If the graph search terminates because we returned to the first source, then the + // pair list has to have a very particular shape. + for (unsigned i = rotate.loop.size() - 1; i--;) + ASSERT(rotate.loop[i].dst() == rotate.loop[i + 1].src()); + rotates.append(WTFMove(rotate)); + currentPairs.resize(0); + } else { + if (verbose) + dataLog("It's a shift.\n"); + shifts.add(originalSrc, WTFMove(currentPairs)); + } + } + } + + if (verbose) { + dataLog("Shifts:\n"); + for (auto& entry : shifts) + dataLog(" ", entry.key, ": ", listDump(entry.value), "\n"); + dataLog("Rotates:\n"); + for (auto& rotate : rotates) + dataLog(" loop = ", listDump(rotate.loop), ", fringe = ", listDump(rotate.fringe), "\n"); + } + + // In the worst case, we need two scratch registers. The way we do this is that the client passes + // us what scratch registers he happens to have laying around. We will need scratch registers in + // the following cases: + // + // - Shuffle pairs where both src and dst refer to memory. + // - Rotate when no Swap instruction is available. + // + // Lucky for us, we are guaranteed to have extra scratch registers anytime we have a Shift that + // ends with a register. We search for such a register right now. + + auto moveForWidth = [&] (Arg::Width width) -> Opcode { + switch (width) { + case Arg::Width32: + return type == Arg::GP ? Move32 : MoveFloat; + case Arg::Width64: + return type == Arg::GP ? Move : MoveDouble; + default: + RELEASE_ASSERT_NOT_REACHED(); + } + }; + + Opcode conservativeMove = moveForWidth(Arg::conservativeWidth(type)); + + // We will emit things in reverse. We maintain a list of packs of instructions, and then we emit + // append them together in reverse (for example the thing at the end of resultPacks is placed + // first). This is useful because the last thing we emit frees up its destination registers, so + // it affects how we emit things before it. + Vector<Vector<Inst>> resultPacks; + Vector<Inst> result; + + auto commitResult = [&] () { + resultPacks.append(WTFMove(result)); + }; + + auto getScratch = [&] (unsigned index, Tmp possibleScratch) -> Tmp { + if (scratches[index].isTmp()) + return scratches[index].tmp(); + + if (!possibleScratch) + return Tmp(); + result.append(Inst(conservativeMove, origin, possibleScratch, scratches[index])); + return possibleScratch; + }; + + auto returnScratch = [&] (unsigned index, Tmp tmp) { + if (Arg(tmp) != scratches[index]) + result.append(Inst(conservativeMove, origin, scratches[index], tmp)); + }; + + auto handleShiftPair = [&] (const ShufflePair& pair, unsigned scratchIndex) { + Opcode move = moveForWidth(pair.width()); + + if (!isValidForm(move, pair.src().kind(), pair.dst().kind())) { + Tmp scratch = + getScratch(scratchIndex, findPossibleScratch(code, type, pair.src(), pair.dst())); + RELEASE_ASSERT(scratch); + if (isValidForm(move, pair.src().kind(), Arg::Tmp)) + result.append(Inst(moveForWidth(pair.width()), origin, pair.src(), scratch)); + else { + ASSERT(pair.src().isSomeImm()); + ASSERT(move == Move32); + result.append(Inst(Move, origin, Arg::bigImm(pair.src().value()), scratch)); + } + result.append(Inst(moveForWidth(pair.width()), origin, scratch, pair.dst())); + returnScratch(scratchIndex, scratch); + return; + } + + result.append(Inst(move, origin, pair.src(), pair.dst())); + }; + + auto handleShift = [&] (Vector<ShufflePair>& shift) { + // FIXME: We could optimize the spill behavior of the shifter by checking if any of the + // shifts need spills. If they do, then we could try to get a register out here. Note that + // this may fail where the current strategy succeeds: out here we need a register that does + // not interfere with any of the shifts, while the current strategy only needs to find a + // scratch register that does not interfer with a particular shift. So, this optimization + // will be opportunistic: if it succeeds, then the individual shifts can use that scratch, + // otherwise they will do what they do now. + + for (unsigned i = shift.size(); i--;) + handleShiftPair(shift[i], 0); + + Arg lastDst = shift.last().dst(); + if (lastDst.isTmp()) { + for (Arg& scratch : scratches) { + ASSERT(scratch != lastDst); + if (!scratch.isTmp()) { + scratch = lastDst; + break; + } + } + } + }; + + // First handle shifts whose last destination is a tmp because these free up scratch registers. + // These end up last in the final sequence, so the final destination of these shifts will be + // available as a scratch location for anything emitted prior (so, after, since we're emitting in + // reverse). + for (auto& entry : shifts) { + Vector<ShufflePair>& shift = entry.value; + if (shift.last().dst().isTmp()) + handleShift(shift); + commitResult(); + } + + // Now handle the rest of the shifts. + for (auto& entry : shifts) { + Vector<ShufflePair>& shift = entry.value; + if (!shift.last().dst().isTmp()) + handleShift(shift); + commitResult(); + } + + for (Rotate& rotate : rotates) { + if (!rotate.fringe.isEmpty()) { + // Make sure we do the fringe first! This won't clobber any of the registers that are + // part of the rotation. + handleShift(rotate.fringe); + } + + bool canSwap = false; + Opcode swap = Oops; + Arg::Width swapWidth = Arg::Width8; // bogus value + + // Currently, the swap instruction is not available for floating point on any architecture we + // support. + if (type == Arg::GP) { + // Figure out whether we will be doing 64-bit swaps or 32-bit swaps. If we have a mix of + // widths we handle that by fixing up the relevant register with zero-extends. + swap = Swap32; + swapWidth = Arg::Width32; + bool hasMemory = false; + bool hasIndex = false; + for (ShufflePair& pair : rotate.loop) { + switch (pair.width()) { + case Arg::Width32: + break; + case Arg::Width64: + swap = Swap64; + swapWidth = Arg::Width64; + break; + default: + RELEASE_ASSERT_NOT_REACHED(); + break; + } + + hasMemory |= pair.src().isMemory() || pair.dst().isMemory(); + hasIndex |= pair.src().isIndex() || pair.dst().isIndex(); + } + + canSwap = isValidForm(swap, Arg::Tmp, Arg::Tmp); + + // We can totally use swaps even if there are shuffles involving memory. But, we play it + // safe in that case. There are corner cases we don't handle, and our ability to do it is + // contingent upon swap form availability. + + if (hasMemory) { + canSwap &= isValidForm(swap, Arg::Tmp, Arg::Addr); + + // We don't take the swapping path if there is a mix of widths and some of the + // shuffles involve memory. That gets too confusing. We might be able to relax this + // to only bail if there are subwidth pairs involving memory, but I haven't thought + // about it very hard. Anyway, this case is not common: rotates involving memory + // don't arise for function calls, and they will only happen for rotates in user code + // if some of the variables get spilled. It's hard to imagine a program that rotates + // data around in variables while also doing a combination of uint32->uint64 and + // int64->int32 casts. + for (ShufflePair& pair : rotate.loop) + canSwap &= pair.width() == swapWidth; + } + + if (hasIndex) + canSwap &= isValidForm(swap, Arg::Tmp, Arg::Index); + } + + if (canSwap) { + for (unsigned i = rotate.loop.size() - 1; i--;) { + Arg left = rotate.loop[i].src(); + Arg right = rotate.loop[i + 1].src(); + + if (left.isMemory() && right.isMemory()) { + // Note that this is a super rare outcome. Rotates are rare. Spills are rare. + // Moving data between two spills is rare. To get here a lot of rare stuff has to + // all happen at once. + + Tmp scratch = getScratch(0, findPossibleScratch(code, type, left, right)); + RELEASE_ASSERT(scratch); + result.append(Inst(moveForWidth(swapWidth), origin, left, scratch)); + result.append(Inst(swap, origin, scratch, right)); + result.append(Inst(moveForWidth(swapWidth), origin, scratch, left)); + returnScratch(0, scratch); + continue; + } + + if (left.isMemory()) + std::swap(left, right); + + result.append(Inst(swap, origin, left, right)); + } + + for (ShufflePair pair : rotate.loop) { + if (pair.width() == swapWidth) + continue; + + RELEASE_ASSERT(pair.width() == Arg::Width32); + RELEASE_ASSERT(swapWidth == Arg::Width64); + RELEASE_ASSERT(pair.dst().isTmp()); + + // Need to do an extra zero extension. + result.append(Inst(Move32, origin, pair.dst(), pair.dst())); + } + } else { + // We can treat this as a shift so long as we take the last destination (i.e. first + // source) and save it first. Then we handle the first entry in the pair in the rotate + // specially, after we restore the last destination. This requires some special care to + // find a scratch register. It's possible that we have a rotate that uses the entire + // available register file. + + Tmp scratch = findPossibleScratch( + code, type, + [&] (Tmp tmp) -> bool { + for (ShufflePair pair : rotate.loop) { + if (pair.src().usesTmp(tmp)) + return false; + if (pair.dst().usesTmp(tmp)) + return false; + } + return true; + }); + + // NOTE: This is the most likely use of scratch registers. + scratch = getScratch(0, scratch); + + // We may not have found a scratch register. When this happens, we can just use the spill + // slot directly. + Arg rotateSave = scratch ? Arg(scratch) : scratches[0]; + + handleShiftPair( + ShufflePair(rotate.loop.last().dst(), rotateSave, rotate.loop[0].width()), 1); + + for (unsigned i = rotate.loop.size(); i-- > 1;) + handleShiftPair(rotate.loop[i], 1); + + handleShiftPair( + ShufflePair(rotateSave, rotate.loop[0].dst(), rotate.loop[0].width()), 1); + + if (scratch) + returnScratch(0, scratch); + } + + commitResult(); + } + + ASSERT(result.isEmpty()); + + for (unsigned i = resultPacks.size(); i--;) + result.appendVector(resultPacks[i]); + + return result; +} + +Vector<Inst> emitShuffle( + Code& code, const Vector<ShufflePair>& pairs, + const std::array<Arg, 2>& gpScratch, const std::array<Arg, 2>& fpScratch, + Value* origin) +{ + Vector<ShufflePair> gpPairs; + Vector<ShufflePair> fpPairs; + for (const ShufflePair& pair : pairs) { + if (pair.src().isMemory() && pair.dst().isMemory() && pair.width() > Arg::pointerWidth()) { + // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves. + fpPairs.append(pair); + } else if (pair.src().isGP() && pair.dst().isGP()) { + // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we + // can do that more efficiently using GPRs, except in the special case above. + gpPairs.append(pair); + } else + fpPairs.append(pair); + } + + Vector<Inst> result; + result.appendVector(emitShuffle(code, gpPairs, gpScratch, Arg::GP, origin)); + result.appendVector(emitShuffle(code, fpPairs, fpScratch, Arg::FP, origin)); + return result; +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirEmitShuffle.h b/Source/JavaScriptCore/b3/air/AirEmitShuffle.h new file mode 100644 index 000000000..b2c3bb0c2 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirEmitShuffle.h @@ -0,0 +1,116 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" +#include "AirInst.h" +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { + +class Value; + +namespace Air { + +class Code; + +class ShufflePair { +public: + ShufflePair() + { + } + + ShufflePair(const Arg& src, const Arg& dst, Arg::Width width) + : m_src(src) + , m_dst(dst) + , m_width(width) + { + } + + const Arg& src() const { return m_src; } + const Arg& dst() const { return m_dst; } + + // The width determines the kind of move we do. You can only choose Width32 or Width64 right now. + // For GP, it picks between Move32 and Move. For FP, it picks between MoveFloat and MoveDouble. + Arg::Width width() const { return m_width; } + + void dump(PrintStream&) const; + +private: + Arg m_src; + Arg m_dst; + Arg::Width m_width { Arg::Width8 }; +}; + +// Create a Shuffle instruction. +Inst createShuffle(Value* origin, const Vector<ShufflePair>&); + +// Perform a shuffle of a given type. The scratch argument is mandatory. You should pass it as +// follows: If you know that you have scratch registers or temporaries available - that is, they're +// registers that are not mentioned in the shuffle, have the same type as the shuffle, and are not +// live at the shuffle - then you can pass them. If you don't have scratch registers available or if +// you don't feel like looking for them, you can pass memory locations. It's always safe to pass a +// pair of memory locations, and replacing either memory location with a register can be viewed as an +// optimization. It's a pretty important optimization. Some more notes: +// +// - We define scratch registers as things that are not live before the shuffle and are not one of +// the destinations of the shuffle. Not being live before the shuffle also means that they cannot +// be used for any of the sources of the shuffle. +// +// - A second scratch location is only needed when you have shuffle pairs where memory is used both +// as source and destination. +// +// - You're guaranteed not to need any scratch locations if there is a Swap instruction available for +// the type and you don't have any memory locations that are both the source and the destination of +// some pairs. GP supports Swap on x86 while FP never supports Swap. +// +// - Passing memory locations as scratch if are running emitShuffle() before register allocation is +// silly, since that will cause emitShuffle() to pick some specific registers when it does need +// scratch. One easy way to avoid that predicament is to ensure that you call emitShuffle() after +// register allocation. For this reason we could add a Shuffle instruction so that we can defer +// shufflings until after regalloc. +// +// - Shuffles with memory=>memory pairs are not very well tuned. You should avoid them if you want +// performance. If you need to do them, then making sure that you reserve a temporary is one way to +// get acceptable performance. +// +// NOTE: Use this method (and its friend below) to emit shuffles after register allocation. Before +// register allocation it is much better to simply use the Shuffle instruction. +Vector<Inst> emitShuffle( + Code& code, Vector<ShufflePair>, std::array<Arg, 2> scratch, Arg::Type, Value* origin); + +// Perform a shuffle that involves any number of types. Pass scratch registers or memory locations +// for each type according to the rules above. +Vector<Inst> emitShuffle( + Code& code, const Vector<ShufflePair>&, + const std::array<Arg, 2>& gpScratch, const std::array<Arg, 2>& fpScratch, + Value* origin); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirFixObviousSpills.cpp b/Source/JavaScriptCore/b3/air/AirFixObviousSpills.cpp new file mode 100644 index 000000000..d000d6c5d --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFixObviousSpills.cpp @@ -0,0 +1,569 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirFixObviousSpills.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include <wtf/IndexMap.h> +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool verbose = false; + +class FixObviousSpills { +public: + FixObviousSpills(Code& code) + : m_code(code) + , m_atHead(code.size()) + { + } + + void run() + { + if (verbose) + dataLog("Code before fixObviousSpills:\n", m_code); + + computeAliases(); + fixCode(); + } + +private: + void computeAliases() + { + m_atHead[m_code[0]].wasVisited = true; + + bool changed = true; + while (changed) { + changed = false; + + for (BasicBlock* block : m_code) { + m_block = block; + m_state = m_atHead[block]; + if (!m_state.wasVisited) + continue; + + if (verbose) + dataLog("Executing block ", *m_block, ": ", m_state, "\n"); + + for (m_instIndex = 0; m_instIndex < block->size(); ++m_instIndex) + executeInst(); + + for (BasicBlock* successor : block->successorBlocks()) { + State& toState = m_atHead[successor]; + if (toState.wasVisited) + changed |= toState.merge(m_state); + else { + toState = m_state; + changed = true; + } + } + } + } + } + + void fixCode() + { + for (BasicBlock* block : m_code) { + m_block = block; + m_state = m_atHead[block]; + RELEASE_ASSERT(m_state.wasVisited); + + for (m_instIndex = 0; m_instIndex < block->size(); ++m_instIndex) { + fixInst(); + executeInst(); + } + } + } + + void executeInst() + { + Inst& inst = m_block->at(m_instIndex); + + if (verbose) + dataLog(" Executing ", inst, ": ", m_state, "\n"); + + Inst::forEachDefWithExtraClobberedRegs<Arg>( + &inst, &inst, + [&] (const Arg& arg, Arg::Role, Arg::Type, Arg::Width) { + if (verbose) + dataLog(" Clobbering ", arg, "\n"); + m_state.clobber(arg); + }); + + switch (inst.kind.opcode) { + case Move: + if (inst.args[0].isSomeImm()) { + if (inst.args[1].isReg()) + m_state.addAlias(RegConst(inst.args[1].reg(), inst.args[0].value())); + else if (isSpillSlot(inst.args[1])) + m_state.addAlias(SlotConst(inst.args[1].stackSlot(), inst.args[0].value())); + } else if (isSpillSlot(inst.args[0]) && inst.args[1].isReg()) { + if (std::optional<int64_t> constant = m_state.constantFor(inst.args[0])) + m_state.addAlias(RegConst(inst.args[1].reg(), *constant)); + m_state.addAlias( + RegSlot(inst.args[1].reg(), inst.args[0].stackSlot(), RegSlot::AllBits)); + } else if (inst.args[0].isReg() && isSpillSlot(inst.args[1])) { + if (std::optional<int64_t> constant = m_state.constantFor(inst.args[0])) + m_state.addAlias(SlotConst(inst.args[1].stackSlot(), *constant)); + m_state.addAlias( + RegSlot(inst.args[0].reg(), inst.args[1].stackSlot(), RegSlot::AllBits)); + } + break; + + case Move32: + if (inst.args[0].isSomeImm()) { + if (inst.args[1].isReg()) + m_state.addAlias(RegConst(inst.args[1].reg(), static_cast<uint32_t>(inst.args[0].value()))); + else if (isSpillSlot(inst.args[1])) + m_state.addAlias(SlotConst(inst.args[1].stackSlot(), static_cast<uint32_t>(inst.args[0].value()))); + } else if (isSpillSlot(inst.args[0]) && inst.args[1].isReg()) { + if (std::optional<int64_t> constant = m_state.constantFor(inst.args[0])) + m_state.addAlias(RegConst(inst.args[1].reg(), static_cast<uint32_t>(*constant))); + m_state.addAlias( + RegSlot(inst.args[1].reg(), inst.args[0].stackSlot(), RegSlot::ZExt32)); + } else if (inst.args[0].isReg() && isSpillSlot(inst.args[1])) { + if (std::optional<int64_t> constant = m_state.constantFor(inst.args[0])) + m_state.addAlias(SlotConst(inst.args[1].stackSlot(), static_cast<int32_t>(*constant))); + m_state.addAlias( + RegSlot(inst.args[0].reg(), inst.args[1].stackSlot(), RegSlot::Match32)); + } + break; + + case MoveFloat: + if (isSpillSlot(inst.args[0]) && inst.args[1].isReg()) { + m_state.addAlias( + RegSlot(inst.args[1].reg(), inst.args[0].stackSlot(), RegSlot::Match32)); + } else if (inst.args[0].isReg() && isSpillSlot(inst.args[1])) { + m_state.addAlias( + RegSlot(inst.args[0].reg(), inst.args[1].stackSlot(), RegSlot::Match32)); + } + break; + + case MoveDouble: + if (isSpillSlot(inst.args[0]) && inst.args[1].isReg()) { + m_state.addAlias( + RegSlot(inst.args[1].reg(), inst.args[0].stackSlot(), RegSlot::AllBits)); + } else if (inst.args[0].isReg() && isSpillSlot(inst.args[1])) { + m_state.addAlias( + RegSlot(inst.args[0].reg(), inst.args[1].stackSlot(), RegSlot::AllBits)); + } + break; + + default: + break; + } + } + + void fixInst() + { + Inst& inst = m_block->at(m_instIndex); + + if (verbose) + dataLog("Fixing inst ", inst, ": ", m_state, "\n"); + + // First handle some special instructions. + switch (inst.kind.opcode) { + case Move: { + if (inst.args[0].isBigImm() && inst.args[1].isReg() + && isValidForm(Add64, Arg::Imm, Arg::Tmp, Arg::Tmp)) { + // BigImm materializations are super expensive on both x86 and ARM. Let's try to + // materialize this bad boy using math instead. Note that we use unsigned math here + // since it's more deterministic. + uint64_t myValue = inst.args[0].value(); + Reg myDest = inst.args[1].reg(); + for (const RegConst& regConst : m_state.regConst) { + uint64_t otherValue = regConst.constant; + + // Let's try add. That's the only thing that works on all platforms, since it's + // the only cheap arithmetic op that x86 does in three operands. Long term, we + // should add fancier materializations here for ARM if the BigImm is yuge. + uint64_t delta = myValue - otherValue; + + if (Arg::isValidImmForm(delta)) { + inst.kind = Add64; + inst.args.resize(3); + inst.args[0] = Arg::imm(delta); + inst.args[1] = Tmp(regConst.reg); + inst.args[2] = Tmp(myDest); + return; + } + } + return; + } + break; + } + + default: + break; + } + + // Create a copy in case we invalidate the instruction. That doesn't happen often. + Inst instCopy = inst; + + // The goal is to replace references to stack slots. We only care about early uses. We can't + // handle UseDefs. We could teach this to handle UseDefs if we inserted a store instruction + // after and we proved that the register aliased to the stack slot dies here. We can get that + // information from the liveness analysis. We also can't handle late uses, because we don't + // look at late clobbers when doing this. + bool didThings = false; + auto handleArg = [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width width) { + if (!isSpillSlot(arg)) + return; + if (!Arg::isEarlyUse(role)) + return; + if (Arg::isAnyDef(role)) + return; + + // Try to get a register if at all possible. + if (const RegSlot* alias = m_state.getRegSlot(arg.stackSlot())) { + switch (width) { + case Arg::Width64: + if (alias->mode != RegSlot::AllBits) + return; + if (verbose) + dataLog(" Replacing ", arg, " with ", alias->reg, "\n"); + arg = Tmp(alias->reg); + didThings = true; + return; + case Arg::Width32: + if (verbose) + dataLog(" Replacing ", arg, " with ", alias->reg, " (subwidth case)\n"); + arg = Tmp(alias->reg); + didThings = true; + return; + default: + return; + } + } + + // Revert to immediate if that didn't work. + if (const SlotConst* alias = m_state.getSlotConst(arg.stackSlot())) { + if (verbose) + dataLog(" Replacing ", arg, " with constant ", alias->constant, "\n"); + if (Arg::isValidImmForm(alias->constant)) + arg = Arg::imm(alias->constant); + else + arg = Arg::bigImm(alias->constant); + didThings = true; + return; + } + }; + + inst.forEachArg(handleArg); + if (!didThings || inst.isValidForm()) + return; + + // We introduced something invalid along the way. Back up and carefully handle each argument. + inst = instCopy; + ASSERT(inst.isValidForm()); + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width) { + Arg argCopy = arg; + handleArg(arg, role, type, width); + if (!inst.isValidForm()) + arg = argCopy; + }); + } + + static bool isSpillSlot(const Arg& arg) + { + return arg.isStack() && arg.stackSlot()->isSpill(); + } + + struct RegConst { + RegConst() + { + } + + RegConst(Reg reg, int64_t constant) + : reg(reg) + , constant(constant) + { + } + + explicit operator bool() const + { + return !!reg; + } + + void dump(PrintStream& out) const + { + out.print(reg, "->", constant); + } + + Reg reg; + int64_t constant { 0 }; + }; + + struct RegSlot { + enum Mode : int8_t { + AllBits, + ZExt32, // Register contains zero-extended contents of stack slot. + Match32 // Low 32 bits of register match low 32 bits of stack slot. + }; + + RegSlot() + { + } + + RegSlot(Reg reg, StackSlot* slot, Mode mode) + : slot(slot) + , reg(reg) + , mode(mode) + { + } + + explicit operator bool() const + { + return slot && reg; + } + + void dump(PrintStream& out) const + { + out.print(pointerDump(slot), "->", reg); + switch (mode) { + case AllBits: + out.print("(AllBits)"); + break; + case ZExt32: + out.print("(ZExt32)"); + break; + case Match32: + out.print("(Match32)"); + break; + } + } + + StackSlot* slot { nullptr }; + Reg reg; + Mode mode { AllBits }; + }; + + struct SlotConst { + SlotConst() + { + } + + SlotConst(StackSlot* slot, int64_t constant) + : slot(slot) + , constant(constant) + { + } + + explicit operator bool() const + { + return slot; + } + + void dump(PrintStream& out) const + { + out.print(pointerDump(slot), "->", constant); + } + + StackSlot* slot { nullptr }; + int64_t constant { 0 }; + }; + + struct State { + void addAlias(const RegConst& newAlias) + { + regConst.append(newAlias); + } + void addAlias(const RegSlot& newAlias) + { + regSlot.append(newAlias); + } + void addAlias(const SlotConst& newAlias) + { + slotConst.append(newAlias); + } + + const RegConst* getRegConst(Reg reg) const + { + for (const RegConst& alias : regConst) { + if (alias.reg == reg) + return &alias; + } + return nullptr; + } + + const RegSlot* getRegSlot(Reg reg) const + { + for (const RegSlot& alias : regSlot) { + if (alias.reg == reg) + return &alias; + } + return nullptr; + } + + const RegSlot* getRegSlot(StackSlot* slot) const + { + for (const RegSlot& alias : regSlot) { + if (alias.slot == slot) + return &alias; + } + return nullptr; + } + + const RegSlot* getRegSlot(Reg reg, StackSlot* slot) const + { + for (const RegSlot& alias : regSlot) { + if (alias.reg == reg && alias.slot == slot) + return &alias; + } + return nullptr; + } + + const SlotConst* getSlotConst(StackSlot* slot) const + { + for (const SlotConst& alias : slotConst) { + if (alias.slot == slot) + return &alias; + } + return nullptr; + } + + std::optional<int64_t> constantFor(const Arg& arg) + { + if (arg.isReg()) { + if (const RegConst* alias = getRegConst(arg.reg())) + return alias->constant; + return std::nullopt; + } + if (arg.isStack()) { + if (const SlotConst* alias = getSlotConst(arg.stackSlot())) + return alias->constant; + return std::nullopt; + } + return std::nullopt; + } + + void clobber(const Arg& arg) + { + if (arg.isReg()) { + regConst.removeAllMatching( + [&] (const RegConst& alias) -> bool { + return alias.reg == arg.reg(); + }); + regSlot.removeAllMatching( + [&] (const RegSlot& alias) -> bool { + return alias.reg == arg.reg(); + }); + return; + } + if (arg.isStack()) { + slotConst.removeAllMatching( + [&] (const SlotConst& alias) -> bool { + return alias.slot == arg.stackSlot(); + }); + regSlot.removeAllMatching( + [&] (const RegSlot& alias) -> bool { + return alias.slot == arg.stackSlot(); + }); + } + } + + bool merge(const State& other) + { + bool changed = false; + + changed |= !!regConst.removeAllMatching( + [&] (RegConst& alias) -> bool { + const RegConst* otherAlias = other.getRegConst(alias.reg); + if (!otherAlias) + return true; + if (alias.constant != otherAlias->constant) + return true; + return false; + }); + + changed |= !!slotConst.removeAllMatching( + [&] (SlotConst& alias) -> bool { + const SlotConst* otherAlias = other.getSlotConst(alias.slot); + if (!otherAlias) + return true; + if (alias.constant != otherAlias->constant) + return true; + return false; + }); + + changed |= !!regSlot.removeAllMatching( + [&] (RegSlot& alias) -> bool { + const RegSlot* otherAlias = other.getRegSlot(alias.reg, alias.slot); + if (!otherAlias) + return true; + if (alias.mode != RegSlot::Match32 && alias.mode != otherAlias->mode) { + alias.mode = RegSlot::Match32; + changed = true; + } + return false; + }); + + return changed; + } + + void dump(PrintStream& out) const + { + out.print( + "{regConst = [", listDump(regConst), "], slotConst = [", listDump(slotConst), + "], regSlot = [", listDump(regSlot), "], wasVisited = ", wasVisited, "}"); + } + + Vector<RegConst> regConst; + Vector<SlotConst> slotConst; + Vector<RegSlot> regSlot; + bool wasVisited { false }; + }; + + Code& m_code; + IndexMap<BasicBlock, State> m_atHead; + State m_state; + BasicBlock* m_block { nullptr }; + unsigned m_instIndex { 0 }; +}; + +} // anonymous namespace + +void fixObviousSpills(Code& code) +{ + PhaseScope phaseScope(code, "fixObviousSpills"); + + FixObviousSpills fixObviousSpills(code); + fixObviousSpills.run(); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirFixObviousSpills.h b/Source/JavaScriptCore/b3/air/AirFixObviousSpills.h new file mode 100644 index 000000000..fb8e41fe2 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFixObviousSpills.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This is a forward flow phase that tracks equivalence between spills slots and registers. It +// removes loads from spill slots in cases when the contents of the spill slot can be found in (or +// computed from) a register. +void fixObviousSpills(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp new file mode 100644 index 000000000..b3d5d0b71 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.cpp @@ -0,0 +1,239 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirFixPartialRegisterStalls.h" + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInst.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include "MacroAssembler.h" +#include <wtf/IndexMap.h> +#include <wtf/IndexSet.h> +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool hasPartialXmmRegUpdate(const Inst& inst) +{ + switch (inst.kind.opcode) { + case ConvertDoubleToFloat: + case ConvertFloatToDouble: + case ConvertInt32ToDouble: + case ConvertInt64ToDouble: + case ConvertInt32ToFloat: + case ConvertInt64ToFloat: + case SqrtDouble: + case SqrtFloat: + case CeilDouble: + case CeilFloat: + case FloorDouble: + case FloorFloat: + return true; + default: + break; + } + return false; +} + +bool isDependencyBreaking(const Inst& inst) +{ + // "xorps reg, reg" is used by the frontend to remove the dependency on its argument. + return inst.kind.opcode == MoveZeroToDouble; +} + +// FIXME: find a good distance per architecture experimentally. +// LLVM uses a distance of 16 but that comes from Nehalem. +unsigned char minimumSafeDistance = 16; + +struct FPDefDistance { + FPDefDistance() + { + for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) + distance[i] = 255; + } + + void reset(FPRReg reg) + { + unsigned index = MacroAssembler::fpRegisterIndex(reg); + distance[index] = 255; + } + + void add(FPRReg reg, unsigned registerDistance) + { + unsigned index = MacroAssembler::fpRegisterIndex(reg); + if (registerDistance < distance[index]) + distance[index] = static_cast<unsigned char>(registerDistance); + } + + bool updateFromPrecessor(FPDefDistance& precessorDistance, unsigned constantOffset = 0) + { + bool changed = false; + for (unsigned i = 0; i < MacroAssembler::numberOfFPRegisters(); ++i) { + unsigned regDistance = precessorDistance.distance[i] + constantOffset; + if (regDistance < minimumSafeDistance && regDistance < distance[i]) { + distance[i] = regDistance; + changed = true; + } + } + return changed; + } + + unsigned char distance[MacroAssembler::numberOfFPRegisters()]; +}; + +void updateDistances(Inst& inst, FPDefDistance& localDistance, unsigned& distanceToBlockEnd) +{ + --distanceToBlockEnd; + + if (isDependencyBreaking(inst)) { + localDistance.reset(inst.args[0].tmp().fpr()); + return; + } + + inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { + ASSERT_WITH_MESSAGE(tmp.isReg(), "This phase must be run after register allocation."); + + if (tmp.isFPR() && Arg::isAnyDef(role)) + localDistance.add(tmp.fpr(), distanceToBlockEnd); + }); +} + +} + +void fixPartialRegisterStalls(Code& code) +{ + if (!isX86()) + return; + + PhaseScope phaseScope(code, "fixPartialRegisterStalls"); + + Vector<BasicBlock*> candidates; + + for (BasicBlock* block : code) { + for (const Inst& inst : *block) { + if (hasPartialXmmRegUpdate(inst)) { + candidates.append(block); + break; + } + } + } + + // Fortunately, Partial Stalls are rarely used. Return early if no block + // cares about them. + if (candidates.isEmpty()) + return; + + // For each block, this provides the distance to the last instruction setting each register + // on block *entry*. + IndexMap<BasicBlock, FPDefDistance> lastDefDistance(code.size()); + + // Blocks with dirty distance at head. + IndexSet<BasicBlock> dirty; + + // First, we compute the local distance for each block and push it to the successors. + for (BasicBlock* block : code) { + FPDefDistance localDistance; + + unsigned distanceToBlockEnd = block->size(); + for (Inst& inst : *block) + updateDistances(inst, localDistance, distanceToBlockEnd); + + for (BasicBlock* successor : block->successorBlocks()) { + if (lastDefDistance[successor].updateFromPrecessor(localDistance)) + dirty.add(successor); + } + } + + // Now we propagate the minimums accross blocks. + bool changed; + do { + changed = false; + + for (BasicBlock* block : code) { + if (!dirty.remove(block)) + continue; + + // Little shortcut: if the block is big enough, propagating it won't add any information. + if (block->size() >= minimumSafeDistance) + continue; + + unsigned blockSize = block->size(); + FPDefDistance& blockDistance = lastDefDistance[block]; + for (BasicBlock* successor : block->successorBlocks()) { + if (lastDefDistance[successor].updateFromPrecessor(blockDistance, blockSize)) { + dirty.add(successor); + changed = true; + } + } + } + } while (changed); + + // Finally, update each block as needed. + InsertionSet insertionSet(code); + for (BasicBlock* block : candidates) { + unsigned distanceToBlockEnd = block->size(); + FPDefDistance& localDistance = lastDefDistance[block]; + + for (unsigned i = 0; i < block->size(); ++i) { + Inst& inst = block->at(i); + + if (hasPartialXmmRegUpdate(inst)) { + RegisterSet defs; + RegisterSet uses; + inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type, Arg::Width) { + if (tmp.isFPR()) { + if (Arg::isAnyDef(role)) + defs.set(tmp.fpr()); + if (Arg::isAnyUse(role)) + uses.set(tmp.fpr()); + } + }); + // We only care about values we define but not use. Otherwise we have to wait + // for the value to be resolved anyway. + defs.exclude(uses); + + defs.forEach([&] (Reg reg) { + if (localDistance.distance[MacroAssembler::fpRegisterIndex(reg.fpr())] < minimumSafeDistance) + insertionSet.insert(i, MoveZeroToDouble, inst.origin, Tmp(reg)); + }); + } + + updateDistances(inst, localDistance, distanceToBlockEnd); + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.h b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.h new file mode 100644 index 000000000..009327948 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFixPartialRegisterStalls.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// x86 has a pipelining hazard caused by false dependencies between instructions. +// +// Some instructions update only part of a register, they can only be scheduled after +// the previous definition is computed. This problem can be avoided by the compiler +// by explicitely resetting the entire register before executing the instruction with +// partial update. +// +// See "Partial XMM Register Stalls" and "Dependency Breaking Idioms" in the manual. +void fixPartialRegisterStalls(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirFrequentedBlock.h b/Source/JavaScriptCore/b3/air/AirFrequentedBlock.h new file mode 100644 index 000000000..37cd28736 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirFrequentedBlock.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "B3GenericFrequentedBlock.h" + +namespace JSC { namespace B3 { namespace Air { + +class BasicBlock; + +typedef GenericFrequentedBlock<BasicBlock> FrequentedBlock; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirGenerate.cpp b/Source/JavaScriptCore/b3/air/AirGenerate.cpp new file mode 100644 index 000000000..a99f0501c --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirGenerate.cpp @@ -0,0 +1,292 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirGenerate.h" + +#if ENABLE(B3_JIT) + +#include "AirAllocateStack.h" +#include "AirCode.h" +#include "AirDumpAsJS.h" +#include "AirEliminateDeadCode.h" +#include "AirFixObviousSpills.h" +#include "AirFixPartialRegisterStalls.h" +#include "AirGenerationContext.h" +#include "AirHandleCalleeSaves.h" +#include "AirIteratedRegisterCoalescing.h" +#include "AirLogRegisterPressure.h" +#include "AirLowerAfterRegAlloc.h" +#include "AirLowerEntrySwitch.h" +#include "AirLowerMacros.h" +#include "AirOpcodeUtils.h" +#include "AirOptimizeBlockOrder.h" +#include "AirReportUsedRegisters.h" +#include "AirSimplifyCFG.h" +#include "AirSpillEverything.h" +#include "AirValidate.h" +#include "B3Common.h" +#include "B3Procedure.h" +#include "B3TimingScope.h" +#include "B3ValueInlines.h" +#include "CCallHelpers.h" +#include "DisallowMacroScratchRegisterUsage.h" +#include "LinkBuffer.h" +#include <wtf/IndexMap.h> + +namespace JSC { namespace B3 { namespace Air { + +void prepareForGeneration(Code& code) +{ + TimingScope timingScope("Air::prepareForGeneration"); + + // We don't expect the incoming code to have predecessors computed. + code.resetReachability(); + + if (shouldValidateIR()) + validate(code); + + // If we're doing super verbose dumping, the phase scope of any phase will already do a dump. + if (shouldDumpIR(AirMode) && !shouldDumpIRAtEachPhase(AirMode)) { + dataLog("Initial air:\n"); + dataLog(code); + } + + lowerMacros(code); + + // This is where we run our optimizations and transformations. + // FIXME: Add Air optimizations. + // https://bugs.webkit.org/show_bug.cgi?id=150456 + + eliminateDeadCode(code); + + // Register allocation for all the Tmps that do not have a corresponding machine register. + // After this phase, every Tmp has a reg. + // + // For debugging, you can use spillEverything() to put everything to the stack between each Inst. + if (Options::airSpillsEverything()) + spillEverything(code); + else + iteratedRegisterCoalescing(code); + + if (Options::logAirRegisterPressure()) { + dataLog("Register pressure after register allocation:\n"); + logRegisterPressure(code); + } + + // This replaces uses of spill slots with registers or constants if possible. It does this by + // minimizing the amount that we perturb the already-chosen register allocation. It may extend + // the live ranges of registers though. + fixObviousSpills(code); + + lowerAfterRegAlloc(code); + + // Prior to this point the prologue and epilogue is implicit. This makes it explicit. It also + // does things like identify which callee-saves we're using and saves them. + handleCalleeSaves(code); + + if (Options::dumpAirAsJSBeforeAllocateStack()) { + dataLog("Dumping Air as JS before allocateStack:\n"); + dumpAsJS(code, WTF::dataFile()); + dataLog("Air hash: ", code.jsHash(), "\n"); + } + + // This turns all Stack and CallArg Args into Addr args that use the frame pointer. It does + // this by first-fit allocating stack slots. It should be pretty darn close to optimal, so we + // shouldn't have to worry about this very much. + allocateStack(code); + + if (Options::dumpAirAfterAllocateStack()) { + dataLog("Dumping Air after allocateStack:\n"); + dataLog(code); + dataLog("Air hash: ", code.jsHash(), "\n"); + } + + // If we coalesced moves then we can unbreak critical edges. This is the main reason for this + // phase. + simplifyCFG(code); + + // This is needed to satisfy a requirement of B3::StackmapValue. + reportUsedRegisters(code); + + // Attempt to remove false dependencies between instructions created by partial register changes. + // This must be executed as late as possible as it depends on the instructions order and register + // use. We _must_ run this after reportUsedRegisters(), since that kills variable assignments + // that seem dead. Luckily, this phase does not change register liveness, so that's OK. + fixPartialRegisterStalls(code); + + // Actually create entrypoints. + lowerEntrySwitch(code); + + // The control flow graph can be simplified further after we have lowered EntrySwitch. + simplifyCFG(code); + + // This sorts the basic blocks in Code to achieve an ordering that maximizes the likelihood that a high + // frequency successor is also the fall-through target. + optimizeBlockOrder(code); + + if (shouldValidateIR()) + validate(code); + + // Do a final dump of Air. Note that we have to do this even if we are doing per-phase dumping, + // since the final generation is not a phase. + if (shouldDumpIR(AirMode)) { + dataLog("Air after ", code.lastPhaseName(), ", before generation:\n"); + dataLog(code); + } +} + +void generate(Code& code, CCallHelpers& jit) +{ + TimingScope timingScope("Air::generate"); + + DisallowMacroScratchRegisterUsage disallowScratch(jit); + + auto argFor = [&] (const RegisterAtOffset& entry) -> CCallHelpers::Address { + return CCallHelpers::Address(GPRInfo::callFrameRegister, entry.offset()); + }; + + // And now, we generate code. + GenerationContext context; + context.code = &code; + context.blockLabels.resize(code.size()); + for (BasicBlock* block : code) { + if (block) + context.blockLabels[block] = Box<CCallHelpers::Label>::create(); + } + IndexMap<BasicBlock, CCallHelpers::JumpList> blockJumps(code.size()); + + auto link = [&] (CCallHelpers::Jump jump, BasicBlock* target) { + if (context.blockLabels[target]->isSet()) { + jump.linkTo(*context.blockLabels[target], &jit); + return; + } + + blockJumps[target].append(jump); + }; + + PCToOriginMap& pcToOriginMap = code.proc().pcToOriginMap(); + auto addItem = [&] (Inst& inst) { + if (!inst.origin) { + pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), Origin()); + return; + } + pcToOriginMap.appendItem(jit.labelIgnoringWatchpoints(), inst.origin->origin()); + }; + + for (BasicBlock* block : code) { + context.currentBlock = block; + context.indexInBlock = UINT_MAX; + blockJumps[block].link(&jit); + CCallHelpers::Label label = jit.label(); + *context.blockLabels[block] = label; + + if (code.isEntrypoint(block)) { + jit.emitFunctionPrologue(); + if (code.frameSize()) + jit.addPtr(CCallHelpers::TrustedImm32(-code.frameSize()), MacroAssembler::stackPointerRegister); + + for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) { + if (entry.reg().isGPR()) + jit.storePtr(entry.reg().gpr(), argFor(entry)); + else + jit.storeDouble(entry.reg().fpr(), argFor(entry)); + } + } + + ASSERT(block->size() >= 1); + for (unsigned i = 0; i < block->size() - 1; ++i) { + context.indexInBlock = i; + Inst& inst = block->at(i); + addItem(inst); + CCallHelpers::Jump jump = inst.generate(jit, context); + ASSERT_UNUSED(jump, !jump.isSet()); + } + + context.indexInBlock = block->size() - 1; + + if (block->last().kind.opcode == Jump + && block->successorBlock(0) == code.findNextBlock(block)) + continue; + + addItem(block->last()); + + if (isReturn(block->last().kind.opcode)) { + // We currently don't represent the full prologue/epilogue in Air, so we need to + // have this override. + if (code.frameSize()) { + for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) { + if (entry.reg().isGPR()) + jit.loadPtr(argFor(entry), entry.reg().gpr()); + else + jit.loadDouble(argFor(entry), entry.reg().fpr()); + } + jit.emitFunctionEpilogue(); + } else + jit.emitFunctionEpilogueWithEmptyFrame(); + jit.ret(); + addItem(block->last()); + continue; + } + + CCallHelpers::Jump jump = block->last().generate(jit, context); + // The jump won't be set for patchpoints. It won't be set for Oops because then it won't have + // any successors. + if (jump.isSet()) { + switch (block->numSuccessors()) { + case 1: + link(jump, block->successorBlock(0)); + break; + case 2: + link(jump, block->successorBlock(0)); + if (block->successorBlock(1) != code.findNextBlock(block)) + link(jit.jump(), block->successorBlock(1)); + break; + default: + RELEASE_ASSERT_NOT_REACHED(); + break; + } + } + addItem(block->last()); + } + + context.currentBlock = nullptr; + context.indexInBlock = UINT_MAX; + + Vector<CCallHelpers::Label> entrypointLabels(code.numEntrypoints()); + for (unsigned i = code.numEntrypoints(); i--;) + entrypointLabels[i] = *context.blockLabels[code.entrypoint(i).block()]; + code.setEntrypointLabels(WTFMove(entrypointLabels)); + + pcToOriginMap.appendItem(jit.label(), Origin()); + // FIXME: Make late paths have Origins: https://bugs.webkit.org/show_bug.cgi?id=153689 + for (auto& latePath : context.latePaths) + latePath->run(jit, context); + pcToOriginMap.appendItem(jit.label(), Origin()); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirGenerate.h b/Source/JavaScriptCore/b3/air/AirGenerate.h new file mode 100644 index 000000000..60839bea5 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirGenerate.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { + +class CCallHelpers; + +namespace B3 { namespace Air { + +class Code; + +// This takes an Air::Code that hasn't had any stack allocation and optionally hasn't had any +// register allocation and does both of those things. +JS_EXPORT_PRIVATE void prepareForGeneration(Code&); + +// This generates the code using the given CCallHelpers instance. Note that this may call callbacks +// in the supplied code as it is generating. +JS_EXPORT_PRIVATE void generate(Code&, CCallHelpers&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirGenerated.cpp b/Source/JavaScriptCore/b3/air/AirGenerated.cpp new file mode 100644 index 000000000..6dd2304a9 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirGenerated.cpp @@ -0,0 +1,33 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#if ENABLE(B3_JIT) + +// This is generated by opcode_generator.rb. +#include "AirOpcodeGenerated.h" + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirGenerationContext.h b/Source/JavaScriptCore/b3/air/AirGenerationContext.h new file mode 100644 index 000000000..f48b5bb8a --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirGenerationContext.h @@ -0,0 +1,59 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include "CCallHelpers.h" +#include <wtf/Box.h> +#include <wtf/IndexMap.h> +#include <wtf/SharedTask.h> +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +struct GenerationContext { + WTF_MAKE_NONCOPYABLE(GenerationContext); +public: + + GenerationContext() = default; + + typedef void LatePathFunction(CCallHelpers&, GenerationContext&); + typedef SharedTask<LatePathFunction> LatePath; + + Vector<RefPtr<LatePath>> latePaths; + IndexMap<BasicBlock, Box<CCallHelpers::Label>> blockLabels; + BasicBlock* currentBlock { nullptr }; + unsigned indexInBlock { UINT_MAX }; + Code* code { nullptr }; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.cpp b/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.cpp new file mode 100644 index 000000000..97cdfa1c9 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.cpp @@ -0,0 +1,78 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirHandleCalleeSaves.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" + +namespace JSC { namespace B3 { namespace Air { + +void handleCalleeSaves(Code& code) +{ + PhaseScope phaseScope(code, "handleCalleeSaves"); + + RegisterSet usedCalleeSaves; + + for (BasicBlock* block : code) { + for (Inst& inst : *block) { + inst.forEachTmpFast( + [&] (Tmp& tmp) { + // At first we just record all used regs. + usedCalleeSaves.set(tmp.reg()); + }); + + if (inst.kind.opcode == Patch) + usedCalleeSaves.merge(inst.extraClobberedRegs()); + } + } + + // Now we filter to really get the callee saves. + usedCalleeSaves.filter(RegisterSet::calleeSaveRegisters()); + usedCalleeSaves.filter(code.mutableRegs()); + usedCalleeSaves.exclude(RegisterSet::stackRegisters()); // We don't need to save FP here. + + if (!usedCalleeSaves.numberOfSetRegisters()) + return; + + code.calleeSaveRegisters() = RegisterAtOffsetList(usedCalleeSaves); + + size_t byteSize = 0; + for (const RegisterAtOffset& entry : code.calleeSaveRegisters()) + byteSize = std::max(static_cast<size_t>(-entry.offset()), byteSize); + + StackSlot* savesArea = code.addStackSlot(byteSize, StackSlotKind::Locked); + // This is a bit weird since we could have already pinned a different stack slot to this + // area. Also, our runtime does not require us to pin the saves area. Maybe we shouldn't pin it? + savesArea->setOffsetFromFP(-byteSize); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.h b/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.h new file mode 100644 index 000000000..b4b78a3b7 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirHandleCalleeSaves.h @@ -0,0 +1,46 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This phase identifies callee-save registers and adds code to save/restore them in the +// prologue/epilogue to the code. It's a mandatory phase. + +// FIXME: It would be cool to make this more interactive with the Air client and also more +// powerful. +// We should have shrink wrapping: https://bugs.webkit.org/show_bug.cgi?id=150458 +// We should make this interact with the client: https://bugs.webkit.org/show_bug.cgi?id=150459 + +void handleCalleeSaves(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirInsertionSet.cpp b/Source/JavaScriptCore/b3/air/AirInsertionSet.cpp new file mode 100644 index 000000000..452d4888f --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirInsertionSet.cpp @@ -0,0 +1,51 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirInsertionSet.h" + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include <wtf/BubbleSort.h> + +namespace JSC { namespace B3 { namespace Air { + +void InsertionSet::insertInsts(size_t index, Vector<Inst>&& insts) +{ + for (Inst& inst : insts) + insertInst(index, WTFMove(inst)); +} + +void InsertionSet::execute(BasicBlock* block) +{ + bubbleSort(m_insertions.begin(), m_insertions.end()); + executeInsertions(block->m_insts, m_insertions); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirInsertionSet.h b/Source/JavaScriptCore/b3/air/AirInsertionSet.h new file mode 100644 index 000000000..84a791d40 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirInsertionSet.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2015-2017 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirInst.h" +#include <wtf/Insertion.h> +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { namespace Air { + +class BasicBlock; +class Code; + +typedef WTF::Insertion<Inst> Insertion; + +class InsertionSet { +public: + InsertionSet(Code& code) + : m_code(code) + { + } + + Code& code() { return m_code; } + + template<typename T> + void appendInsertion(T&& insertion) + { + m_insertions.append(std::forward<T>(insertion)); + } + + template<typename Inst> + void insertInst(size_t index, Inst&& inst) + { + appendInsertion(Insertion(index, std::forward<Inst>(inst))); + } + + template <typename InstVector> + void insertInsts(size_t index, const InstVector& insts) + { + for (const Inst& inst : insts) + insertInst(index, inst); + } + void insertInsts(size_t index, Vector<Inst>&&); + + template<typename... Arguments> + void insert(size_t index, Arguments&&... arguments) + { + insertInst(index, Inst(std::forward<Arguments>(arguments)...)); + } + + void execute(BasicBlock*); + +private: + Code& m_code; + Vector<Insertion, 8> m_insertions; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirInst.cpp b/Source/JavaScriptCore/b3/air/AirInst.cpp new file mode 100644 index 000000000..defb344b0 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirInst.cpp @@ -0,0 +1,72 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirInst.h" + +#if ENABLE(B3_JIT) + +#include "AirInstInlines.h" +#include "B3Value.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +bool Inst::hasArgEffects() +{ + bool result = false; + forEachArg( + [&] (Arg&, Arg::Role role, Arg::Type, Arg::Width) { + if (Arg::isAnyDef(role)) + result = true; + }); + return result; +} + +unsigned Inst::jsHash() const +{ + // FIXME: This should do something for flags. + // https://bugs.webkit.org/show_bug.cgi?id=162751 + unsigned result = static_cast<unsigned>(kind.opcode); + + for (const Arg& arg : args) + result += arg.jsHash(); + + return result; +} + +void Inst::dump(PrintStream& out) const +{ + out.print(kind, " ", listDump(args)); + if (origin) { + if (args.size()) + out.print(", "); + out.print(*origin); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirInst.h b/Source/JavaScriptCore/b3/air/AirInst.h new file mode 100644 index 000000000..f38c21df8 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirInst.h @@ -0,0 +1,207 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" +#include "AirKind.h" +#include "CCallHelpers.h" + +namespace JSC { + +class CCallHelpers; +class RegisterSet; + +namespace B3 { + +class Value; + +namespace Air { + +struct GenerationContext; + +struct Inst { +public: + typedef Vector<Arg, 3> ArgList; + + Inst() + : origin(nullptr) + { + } + + Inst(Kind kind, Value* origin) + : origin(origin) + , kind(kind) + { + } + + template<typename... Arguments> + Inst(Kind kind, Value* origin, Arg arg, Arguments... arguments) + : args{ arg, arguments... } + , origin(origin) + , kind(kind) + { + } + + Inst(Kind kind, Value* origin, const ArgList& arguments) + : args(arguments) + , origin(origin) + , kind(kind) + { + } + + Inst(Kind kind, Value* origin, ArgList&& arguments) + : args(WTFMove(arguments)) + , origin(origin) + , kind(kind) + { + } + + explicit operator bool() const { return origin || kind || args.size(); } + + void append() { } + + template<typename... Arguments> + void append(Arg arg, Arguments... arguments) + { + args.append(arg); + append(arguments...); + } + + // Note that these functors all avoid using "const" because we want to use them for things that + // edit IR. IR is meant to be edited; if you're carrying around a "const Inst&" then you're + // probably doing it wrong. + + // This only walks those Tmps that are explicitly mentioned, and it doesn't tell you their role + // or type. + template<typename Functor> + void forEachTmpFast(const Functor& functor) + { + for (Arg& arg : args) + arg.forEachTmpFast(functor); + } + + typedef void EachArgCallback(Arg&, Arg::Role, Arg::Type, Arg::Width); + + // Calls the functor with (arg, role, type, width). This function is auto-generated by + // opcode_generator.rb. + template<typename Functor> + void forEachArg(const Functor&); + + // Calls the functor with (tmp, role, type, width). + template<typename Functor> + void forEachTmp(const Functor& functor) + { + forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width) { + arg.forEachTmp(role, type, width, functor); + }); + } + + // Thing can be either Arg, Tmp, or StackSlot*. + template<typename Thing, typename Functor> + void forEach(const Functor&); + + // Reports any additional registers clobbered by this operation. Note that for efficiency, + // extraClobberedRegs() only works for the Patch opcode. + RegisterSet extraClobberedRegs(); + RegisterSet extraEarlyClobberedRegs(); + + // Iterate over all Def's that happen at the end of an instruction. You supply a pair + // instructions. The instructions must appear next to each other, in that order, in some basic + // block. You can pass null for the first instruction when analyzing what happens at the top of + // a basic block. You can pass null for the second instruction when analyzing what happens at the + // bottom of a basic block. + template<typename Thing, typename Functor> + static void forEachDef(Inst* prevInst, Inst* nextInst, const Functor&); + + // Iterate over all Def's that happen at the end of this instruction, including extra clobbered + // registers. Note that Thing can only be Arg or Tmp when you use this functor. + template<typename Thing, typename Functor> + static void forEachDefWithExtraClobberedRegs(Inst* prevInst, Inst* nextInst, const Functor&); + + // Use this to report which registers are live. This should be done just before codegen. Note + // that for efficiency, reportUsedRegisters() only works for the Patch opcode. + void reportUsedRegisters(const RegisterSet&); + + // Is this instruction in one of the valid forms right now? This function is auto-generated by + // opcode_generator.rb. + bool isValidForm(); + + // Assuming this instruction is in a valid form right now, will it still be in one of the valid + // forms if we put an Addr referencing the stack (or a StackSlot or CallArg, of course) in the + // given index? Spilling uses this: it walks the args by index to find Tmps that need spilling; + // if it finds one, it calls this to see if it can replace the Arg::Tmp with an Arg::Addr. If it + // finds a non-Tmp Arg, then it calls that Arg's forEachTmp to do a replacement that way. + // + // This function is auto-generated by opcode_generator.rb. + bool admitsStack(unsigned argIndex); + bool admitsStack(Arg&); + + // Defined by opcode_generator.rb. + bool isTerminal(); + + // Returns true if this instruction can have any effects other than control flow or arguments. + bool hasNonArgNonControlEffects(); + + // Returns true if this instruction can have any effects other than what is implied by arguments. + // For example, "Move $42, (%rax)" will return false because the effect of storing to (%rax) is + // implied by the second argument. + bool hasNonArgEffects(); + + // Tells you if this operation has arg effects. + bool hasArgEffects(); + + // Tells you if this operation has non-control effects. + bool hasNonControlEffects() { return hasNonArgNonControlEffects() || hasArgEffects(); } + + // Generate some code for this instruction. This is, like, literally our backend. If this is the + // terminal, it returns the jump that needs to be linked for the "then" case, with the "else" + // case being fall-through. This function is auto-generated by opcode_generator.rb. + CCallHelpers::Jump generate(CCallHelpers&, GenerationContext&); + + // If source arguments benefits from being aliased to a destination argument, + // this return the index of the destination argument. + // The source are assumed to be at (index - 1) and (index - 2) + // For example, + // Add Tmp1, Tmp2, Tmp3 + // returns 2 if 0 and 1 benefit from aliasing to Tmp3. + std::optional<unsigned> shouldTryAliasingDef(); + + // This computes a hash for comparing this to JSAir's Inst. + unsigned jsHash() const; + + void dump(PrintStream&) const; + + ArgList args; + Value* origin; // The B3::Value that this originated from. + Kind kind; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirInstInlines.h b/Source/JavaScriptCore/b3/air/AirInstInlines.h new file mode 100644 index 000000000..2d3da626f --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirInstInlines.h @@ -0,0 +1,282 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirInst.h" +#include "AirOpcodeUtils.h" +#include "AirSpecial.h" +#include "AirStackSlot.h" +#include "B3Value.h" + +namespace JSC { namespace B3 { namespace Air { + +template<typename Thing, typename Functor> +void Inst::forEach(const Functor& functor) +{ + forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type type, Arg::Width width) { + arg.forEach<Thing>(role, type, width, functor); + }); +} + +inline RegisterSet Inst::extraClobberedRegs() +{ + ASSERT(kind.opcode == Patch); + return args[0].special()->extraClobberedRegs(*this); +} + +inline RegisterSet Inst::extraEarlyClobberedRegs() +{ + ASSERT(kind.opcode == Patch); + return args[0].special()->extraEarlyClobberedRegs(*this); +} + +template<typename Thing, typename Functor> +inline void Inst::forEachDef(Inst* prevInst, Inst* nextInst, const Functor& functor) +{ + if (prevInst) { + prevInst->forEach<Thing>( + [&] (Thing& thing, Arg::Role role, Arg::Type argType, Arg::Width argWidth) { + if (Arg::isLateDef(role)) + functor(thing, role, argType, argWidth); + }); + } + + if (nextInst) { + nextInst->forEach<Thing>( + [&] (Thing& thing, Arg::Role role, Arg::Type argType, Arg::Width argWidth) { + if (Arg::isEarlyDef(role)) + functor(thing, role, argType, argWidth); + }); + } +} + +template<typename Thing, typename Functor> +inline void Inst::forEachDefWithExtraClobberedRegs( + Inst* prevInst, Inst* nextInst, const Functor& functor) +{ + forEachDef<Thing>(prevInst, nextInst, functor); + + Arg::Role regDefRole; + + auto reportReg = [&] (Reg reg) { + Arg::Type type = reg.isGPR() ? Arg::GP : Arg::FP; + functor(Thing(reg), regDefRole, type, Arg::conservativeWidth(type)); + }; + + if (prevInst && prevInst->kind.opcode == Patch) { + regDefRole = Arg::Def; + prevInst->extraClobberedRegs().forEach(reportReg); + } + + if (nextInst && nextInst->kind.opcode == Patch) { + regDefRole = Arg::EarlyDef; + nextInst->extraEarlyClobberedRegs().forEach(reportReg); + } +} + +inline void Inst::reportUsedRegisters(const RegisterSet& usedRegisters) +{ + ASSERT(kind.opcode == Patch); + args[0].special()->reportUsedRegisters(*this, usedRegisters); +} + +inline bool Inst::admitsStack(Arg& arg) +{ + return admitsStack(&arg - &args[0]); +} + +inline std::optional<unsigned> Inst::shouldTryAliasingDef() +{ + if (!isX86()) + return std::nullopt; + + switch (kind.opcode) { + case Add32: + case Add64: + case And32: + case And64: + case Mul32: + case Mul64: + case Or32: + case Or64: + case Xor32: + case Xor64: + case AndFloat: + case AndDouble: + case OrFloat: + case OrDouble: + case XorDouble: + case XorFloat: + if (args.size() == 3) + return 2; + break; + case AddDouble: + case AddFloat: + case MulDouble: + case MulFloat: +#if CPU(X86) || CPU(X86_64) + if (MacroAssembler::supportsAVX()) + return std::nullopt; +#endif + if (args.size() == 3) + return 2; + break; + case BranchAdd32: + case BranchAdd64: + if (args.size() == 4) + return 3; + break; + case MoveConditionally32: + case MoveConditionally64: + case MoveConditionallyTest32: + case MoveConditionallyTest64: + case MoveConditionallyDouble: + case MoveConditionallyFloat: + case MoveDoubleConditionally32: + case MoveDoubleConditionally64: + case MoveDoubleConditionallyTest32: + case MoveDoubleConditionallyTest64: + case MoveDoubleConditionallyDouble: + case MoveDoubleConditionallyFloat: + if (args.size() == 6) + return 5; + break; + break; + case Patch: + return PatchCustom::shouldTryAliasingDef(*this); + default: + break; + } + return std::nullopt; +} + +inline bool isShiftValid(const Inst& inst) +{ +#if CPU(X86) || CPU(X86_64) + return inst.args[0] == Tmp(X86Registers::ecx); +#else + UNUSED_PARAM(inst); + return true; +#endif +} + +inline bool isLshift32Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isLshift64Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRshift32Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRshift64Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isUrshift32Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isUrshift64Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRotateRight32Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRotateLeft32Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRotateRight64Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isRotateLeft64Valid(const Inst& inst) +{ + return isShiftValid(inst); +} + +inline bool isX86DivHelperValid(const Inst& inst) +{ +#if CPU(X86) || CPU(X86_64) + return inst.args[0] == Tmp(X86Registers::eax) + && inst.args[1] == Tmp(X86Registers::edx); +#else + UNUSED_PARAM(inst); + return false; +#endif +} + +inline bool isX86ConvertToDoubleWord32Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +inline bool isX86ConvertToQuadWord64Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +inline bool isX86Div32Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +inline bool isX86UDiv32Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +inline bool isX86Div64Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +inline bool isX86UDiv64Valid(const Inst& inst) +{ + return isX86DivHelperValid(inst); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.cpp b/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.cpp new file mode 100644 index 000000000..7e81b5e01 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.cpp @@ -0,0 +1,1656 @@ +/* + * Copyright (C) 2015-2017 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirIteratedRegisterCoalescing.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" +#include "AirPadInterference.h" +#include "AirPhaseScope.h" +#include "AirTmpInlines.h" +#include "AirTmpWidth.h" +#include "AirUseCounts.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool debug = false; +bool traceDebug = false; +bool reportStats = false; + +// The AbstractColoringAllocator defines all the code that is independant +// from the type or register and can be shared when allocating registers. +template<typename IndexType> +class AbstractColoringAllocator { +public: + AbstractColoringAllocator(const Vector<Reg>& regsInPriorityOrder, IndexType lastPrecoloredRegisterIndex, unsigned tmpArraySize, const HashSet<unsigned>& unspillableTmp) + : m_regsInPriorityOrder(regsInPriorityOrder) + , m_lastPrecoloredRegisterIndex(lastPrecoloredRegisterIndex) + , m_unspillableTmps(unspillableTmp) + { + for (Reg reg : m_regsInPriorityOrder) + m_mutableRegs.set(reg); + + initializeDegrees(tmpArraySize); + + m_adjacencyList.resize(tmpArraySize); + m_moveList.resize(tmpArraySize); + m_coalescedTmps.fill(0, tmpArraySize); + m_isOnSelectStack.ensureSize(tmpArraySize); + } + +protected: + IndexType getAlias(IndexType tmpIndex) const + { + IndexType alias = tmpIndex; + while (IndexType nextAlias = m_coalescedTmps[alias]) + alias = nextAlias; + return alias; + } + + void addEdge(IndexType a, IndexType b) + { + if (a == b) + return; + addEdgeDistinct(a, b); + } + + void makeWorkList() + { + IndexType firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; + for (IndexType i = firstNonRegIndex; i < m_degrees.size(); ++i) { + unsigned degree = m_degrees[i]; + if (degree >= m_regsInPriorityOrder.size()) + addToSpill(i); + else if (!m_moveList[i].isEmpty()) + m_freezeWorklist.add(i); + else + m_simplifyWorklist.append(i); + } + } + + void addToSpill(unsigned toSpill) + { + if (m_unspillableTmps.contains(toSpill)) + return; + + m_spillWorklist.add(toSpill); + } + + // Low-degree vertex can always be colored: just pick any of the color taken by any + // other adjacent verices. + // The "Simplify" phase takes a low-degree out of the interference graph to simplify it. + void simplify() + { + IndexType lastIndex = m_simplifyWorklist.takeLast(); + + ASSERT(!m_selectStack.contains(lastIndex)); + ASSERT(!m_isOnSelectStack.get(lastIndex)); + m_selectStack.append(lastIndex); + m_isOnSelectStack.quickSet(lastIndex); + + forEachAdjacent(lastIndex, [this](IndexType adjacentTmpIndex) { + decrementDegree(adjacentTmpIndex); + }); + } + + void freeze() + { + IndexType victimIndex = m_freezeWorklist.takeAny(); + ASSERT_WITH_MESSAGE(getAlias(victimIndex) == victimIndex, "coalesce() should not leave aliased Tmp in the worklist."); + m_simplifyWorklist.append(victimIndex); + freezeMoves(victimIndex); + } + + void freezeMoves(IndexType tmpIndex) + { + forEachNodeMoves(tmpIndex, [this, tmpIndex] (IndexType moveIndex) { + if (!m_activeMoves.quickClear(moveIndex)) + m_worklistMoves.takeMove(moveIndex); + + const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; + IndexType srcTmpIndex = moveOperands.srcIndex; + IndexType dstTmpIndex = moveOperands.dstIndex; + + IndexType originalOtherTmp = srcTmpIndex != tmpIndex ? srcTmpIndex : dstTmpIndex; + IndexType otherTmpIndex = getAlias(originalOtherTmp); + if (m_degrees[otherTmpIndex] < m_regsInPriorityOrder.size() && !isMoveRelated(otherTmpIndex)) { + if (m_freezeWorklist.remove(otherTmpIndex)) + m_simplifyWorklist.append(otherTmpIndex); + } + }); + } + + void coalesce() + { + unsigned moveIndex = m_worklistMoves.takeLastMove(); + const MoveOperands& moveOperands = m_coalescingCandidates[moveIndex]; + IndexType u = getAlias(moveOperands.srcIndex); + IndexType v = getAlias(moveOperands.dstIndex); + + if (isPrecolored(v)) + std::swap(u, v); + + if (traceDebug) + dataLog("Coalescing move at index ", moveIndex, " u = ", u, " v = ", v, "\n"); + + if (u == v) { + addWorkList(u); + + if (traceDebug) + dataLog(" Coalesced\n"); + } else if (isPrecolored(v) + || m_interferenceEdges.contains(InterferenceEdge(u, v)) + || (u == m_framePointerIndex && m_interferesWithFramePointer.quickGet(v))) { + addWorkList(u); + addWorkList(v); + + if (traceDebug) + dataLog(" Constrained\n"); + } else if (canBeSafelyCoalesced(u, v)) { + combine(u, v); + addWorkList(u); + m_hasCoalescedNonTrivialMove = true; + + if (traceDebug) + dataLog(" Safe Coalescing\n"); + } else { + m_activeMoves.quickSet(moveIndex); + + if (traceDebug) + dataLog(" Failed coalescing, added to active moves.\n"); + } + } + + void assignColors() + { + ASSERT(m_simplifyWorklist.isEmpty()); + ASSERT(m_worklistMoves.isEmpty()); + ASSERT(m_freezeWorklist.isEmpty()); + ASSERT(m_spillWorklist.isEmpty()); + + // Reclaim as much memory as possible. + m_interferenceEdges.clear(); + m_degrees.clear(); + m_moveList.clear(); + m_worklistMoves.clear(); + m_simplifyWorklist.clear(); + m_spillWorklist.clear(); + m_freezeWorklist.clear(); + + // Try to color the Tmp on the stack. + m_coloredTmp.resize(m_adjacencyList.size()); + + while (!m_selectStack.isEmpty()) { + unsigned tmpIndex = m_selectStack.takeLast(); + ASSERT(!isPrecolored(tmpIndex)); + ASSERT(!m_coloredTmp[tmpIndex]); + + RegisterSet coloredRegisters; + for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { + IndexType aliasTmpIndex = getAlias(adjacentTmpIndex); + Reg reg = m_coloredTmp[aliasTmpIndex]; + + ASSERT(!isPrecolored(aliasTmpIndex) || (isPrecolored(aliasTmpIndex) && reg)); + + if (reg) + coloredRegisters.set(reg); + } + + bool colorAssigned = false; + for (Reg reg : m_regsInPriorityOrder) { + if (!coloredRegisters.get(reg)) { + m_coloredTmp[tmpIndex] = reg; + colorAssigned = true; + break; + } + } + + if (!colorAssigned) + m_spilledTmps.append(tmpIndex); + } + m_selectStack.clear(); + + if (m_spilledTmps.isEmpty()) + m_coalescedTmpsAtSpill.clear(); + else + m_coloredTmp.clear(); + } + +private: + void initializeDegrees(unsigned tmpArraySize) + { + m_degrees.resize(tmpArraySize); + + // All precolored registers have an "infinite" degree. + unsigned firstNonRegIndex = m_lastPrecoloredRegisterIndex + 1; + for (unsigned i = 0; i < firstNonRegIndex; ++i) + m_degrees[i] = std::numeric_limits<unsigned>::max(); + + memset(m_degrees.data() + firstNonRegIndex, 0, (tmpArraySize - firstNonRegIndex) * sizeof(unsigned)); + } + + void addEdgeDistinct(IndexType a, IndexType b) + { + ASSERT(a != b); + if (m_interferenceEdges.add(InterferenceEdge(a, b)).isNewEntry) { + if (!isPrecolored(a)) { + ASSERT(!m_adjacencyList[a].contains(b)); + m_adjacencyList[a].append(b); + m_degrees[a]++; + } + + if (!isPrecolored(b)) { + ASSERT(!m_adjacencyList[b].contains(a)); + m_adjacencyList[b].append(a); + m_degrees[b]++; + } + } + } + + void decrementDegree(IndexType tmpIndex) + { + ASSERT(m_degrees[tmpIndex]); + + unsigned oldDegree = m_degrees[tmpIndex]--; + if (oldDegree == m_regsInPriorityOrder.size()) { + enableMovesOnValueAndAdjacents(tmpIndex); + m_spillWorklist.remove(tmpIndex); + if (isMoveRelated(tmpIndex)) + m_freezeWorklist.add(tmpIndex); + else + m_simplifyWorklist.append(tmpIndex); + } + } + + + bool addEdgeDistinctWithoutDegreeChange(IndexType a, IndexType b) + { + ASSERT(a != b); + if (m_interferenceEdges.add(InterferenceEdge(a, b)).isNewEntry) { + if (!isPrecolored(a)) { + ASSERT(!m_adjacencyList[a].contains(b)); + m_adjacencyList[a].append(b); + } + + if (!isPrecolored(b)) { + ASSERT(!m_adjacencyList[b].contains(a)); + m_adjacencyList[b].append(a); + } + return true; + } + return false; + } + + bool isMoveRelated(IndexType tmpIndex) + { + for (unsigned moveIndex : m_moveList[tmpIndex]) { + if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) + return true; + } + return false; + } + + template<typename Function> + void forEachAdjacent(IndexType tmpIndex, Function function) + { + for (IndexType adjacentTmpIndex : m_adjacencyList[tmpIndex]) { + if (!hasBeenSimplified(adjacentTmpIndex)) + function(adjacentTmpIndex); + } + } + + bool hasBeenSimplified(IndexType tmpIndex) + { + return m_isOnSelectStack.quickGet(tmpIndex) || !!m_coalescedTmps[tmpIndex]; + } + + template<typename Function> + void forEachNodeMoves(IndexType tmpIndex, Function function) + { + for (unsigned moveIndex : m_moveList[tmpIndex]) { + if (m_activeMoves.quickGet(moveIndex) || m_worklistMoves.contains(moveIndex)) + function(moveIndex); + } + } + + void enableMovesOnValue(IndexType tmpIndex) + { + for (unsigned moveIndex : m_moveList[tmpIndex]) { + if (m_activeMoves.quickClear(moveIndex)) + m_worklistMoves.returnMove(moveIndex); + } + } + + void enableMovesOnValueAndAdjacents(IndexType tmpIndex) + { + enableMovesOnValue(tmpIndex); + + forEachAdjacent(tmpIndex, [this] (IndexType adjacentTmpIndex) { + enableMovesOnValue(adjacentTmpIndex); + }); + } + + bool isPrecolored(IndexType tmpIndex) + { + return tmpIndex <= m_lastPrecoloredRegisterIndex; + } + + void addWorkList(IndexType tmpIndex) + { + if (!isPrecolored(tmpIndex) && m_degrees[tmpIndex] < m_regsInPriorityOrder.size() && !isMoveRelated(tmpIndex)) { + m_freezeWorklist.remove(tmpIndex); + m_simplifyWorklist.append(tmpIndex); + } + } + + void combine(IndexType u, IndexType v) + { + if (!m_freezeWorklist.remove(v)) + m_spillWorklist.remove(v); + + ASSERT(!m_coalescedTmps[v]); + m_coalescedTmps[v] = u; + + auto& vMoves = m_moveList[v]; + m_moveList[u].add(vMoves.begin(), vMoves.end()); + + forEachAdjacent(v, [this, u] (IndexType adjacentTmpIndex) { + if (addEdgeDistinctWithoutDegreeChange(adjacentTmpIndex, u)) { + // If we added a new edge between the adjacentTmp and u, it replaces the edge + // that existed with v. + // The degree of adjacentTmp remains the same since the edge just changed from u to v. + // All we need to do is update the degree of u. + if (!isPrecolored(u)) + m_degrees[u]++; + } else { + // If we already had an edge between the adjacentTmp and u, the degree of u + // is already correct. The degree of the adjacentTmp decreases since the edge + // with v is no longer relevant (we can think of it as merged with the edge with u). + decrementDegree(adjacentTmpIndex); + } + }); + + if (m_framePointerIndex && m_interferesWithFramePointer.quickGet(v)) + m_interferesWithFramePointer.quickSet(u); + + if (m_degrees[u] >= m_regsInPriorityOrder.size() && m_freezeWorklist.remove(u)) + addToSpill(u); + } + + bool canBeSafelyCoalesced(IndexType u, IndexType v) + { + ASSERT(!isPrecolored(v)); + if (isPrecolored(u)) + return precoloredCoalescingHeuristic(u, v); + return conservativeHeuristic(u, v); + } + + bool conservativeHeuristic(IndexType u, IndexType v) + { + // This is using the Briggs' conservative coalescing rule: + // If the number of combined adjacent node with a degree >= K is less than K, + // it is safe to combine the two nodes. The reason is that we know that if the graph + // is colorable, we have fewer than K adjacents with high order and there is a color + // for the current node. + ASSERT(u != v); + ASSERT(!isPrecolored(u)); + ASSERT(!isPrecolored(v)); + + const auto& adjacentsOfU = m_adjacencyList[u]; + const auto& adjacentsOfV = m_adjacencyList[v]; + + if (adjacentsOfU.size() + adjacentsOfV.size() < m_regsInPriorityOrder.size()) { + // Shortcut: if the total number of adjacents is less than the number of register, the condition is always met. + return true; + } + + HashSet<IndexType> highOrderAdjacents; + + for (IndexType adjacentTmpIndex : adjacentsOfU) { + ASSERT(adjacentTmpIndex != v); + ASSERT(adjacentTmpIndex != u); + if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size()) { + auto addResult = highOrderAdjacents.add(adjacentTmpIndex); + if (addResult.isNewEntry && highOrderAdjacents.size() >= m_regsInPriorityOrder.size()) + return false; + } + } + for (IndexType adjacentTmpIndex : adjacentsOfV) { + ASSERT(adjacentTmpIndex != u); + ASSERT(adjacentTmpIndex != v); + if (!hasBeenSimplified(adjacentTmpIndex) && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size()) { + auto addResult = highOrderAdjacents.add(adjacentTmpIndex); + if (addResult.isNewEntry && highOrderAdjacents.size() >= m_regsInPriorityOrder.size()) + return false; + } + } + + ASSERT(highOrderAdjacents.size() < m_regsInPriorityOrder.size()); + return true; + } + + bool precoloredCoalescingHeuristic(IndexType u, IndexType v) + { + if (traceDebug) + dataLog(" Checking precoloredCoalescingHeuristic\n"); + ASSERT(isPrecolored(u)); + ASSERT(!isPrecolored(v)); + + // If u is a pinned register then it's always safe to coalesce. Note that when we call this, + // we have already proved that there is no interference between u and v. + if (!m_mutableRegs.get(m_coloredTmp[u])) + return true; + + // If any adjacent of the non-colored node is not an adjacent of the colored node AND has a degree >= K + // there is a risk that this node needs to have the same color as our precolored node. If we coalesce such + // move, we may create an uncolorable graph. + const auto& adjacentsOfV = m_adjacencyList[v]; + for (unsigned adjacentTmpIndex : adjacentsOfV) { + if (!isPrecolored(adjacentTmpIndex) + && !hasBeenSimplified(adjacentTmpIndex) + && m_degrees[adjacentTmpIndex] >= m_regsInPriorityOrder.size() + && !m_interferenceEdges.contains(InterferenceEdge(u, adjacentTmpIndex))) + return false; + } + return true; + } + +protected: +#if PLATFORM(COCOA) +#pragma mark - +#endif + + // Interference edges are not directed. An edge between any two Tmps is represented + // by the concatenated values of the smallest Tmp followed by the bigger Tmp. + class InterferenceEdge { + public: + InterferenceEdge() + { + } + + InterferenceEdge(IndexType a, IndexType b) + { + ASSERT(a); + ASSERT(b); + ASSERT_WITH_MESSAGE(a != b, "A Tmp can never interfere with itself. Doing so would force it to be the superposition of two registers."); + + if (b < a) + std::swap(a, b); + m_value = static_cast<uint64_t>(a) << 32 | b; + } + + InterferenceEdge(WTF::HashTableDeletedValueType) + : m_value(std::numeric_limits<uint64_t>::max()) + { + } + + IndexType first() const + { + return m_value >> 32 & 0xffffffff; + } + + IndexType second() const + { + return m_value & 0xffffffff; + } + + bool operator==(const InterferenceEdge other) const + { + return m_value == other.m_value; + } + + bool isHashTableDeletedValue() const + { + return *this == InterferenceEdge(WTF::HashTableDeletedValue); + } + + unsigned hash() const + { + return WTF::IntHash<uint64_t>::hash(m_value); + } + + void dump(PrintStream& out) const + { + out.print(first(), "<=>", second()); + } + + private: + uint64_t m_value { 0 }; + }; + + struct InterferenceEdgeHash { + static unsigned hash(const InterferenceEdge& key) { return key.hash(); } + static bool equal(const InterferenceEdge& a, const InterferenceEdge& b) { return a == b; } + static const bool safeToCompareToEmptyOrDeleted = true; + }; + typedef SimpleClassHashTraits<InterferenceEdge> InterferenceEdgeHashTraits; + + const Vector<Reg>& m_regsInPriorityOrder; + RegisterSet m_mutableRegs; + IndexType m_lastPrecoloredRegisterIndex { 0 }; + + // The interference graph. + HashSet<InterferenceEdge, InterferenceEdgeHash, InterferenceEdgeHashTraits> m_interferenceEdges; + Vector<Vector<IndexType, 0, UnsafeVectorOverflow, 4>, 0, UnsafeVectorOverflow> m_adjacencyList; + Vector<IndexType, 0, UnsafeVectorOverflow> m_degrees; + + // Instead of keeping track of the move instructions, we just keep their operands around and use the index + // in the vector as the "identifier" for the move. + struct MoveOperands { + IndexType srcIndex; + IndexType dstIndex; + }; + Vector<MoveOperands, 0, UnsafeVectorOverflow> m_coalescingCandidates; + + // List of every move instruction associated with a Tmp. + Vector<HashSet<IndexType, typename DefaultHash<IndexType>::Hash, WTF::UnsignedWithZeroKeyHashTraits<IndexType>>> m_moveList; + + // Colors. + Vector<Reg, 0, UnsafeVectorOverflow> m_coloredTmp; + Vector<IndexType> m_spilledTmps; + + // Values that have been coalesced with an other value. + Vector<IndexType, 0, UnsafeVectorOverflow> m_coalescedTmps; + + // The stack of Tmp removed from the graph and ready for coloring. + BitVector m_isOnSelectStack; + Vector<IndexType> m_selectStack; + + IndexType m_framePointerIndex { 0 }; + BitVector m_interferesWithFramePointer; + + struct OrderedMoveSet { + unsigned addMove() + { + ASSERT(m_lowPriorityMoveList.isEmpty()); + ASSERT(!m_firstLowPriorityMoveIndex); + + unsigned nextIndex = m_positionInMoveList.size(); + unsigned position = m_moveList.size(); + m_moveList.append(nextIndex); + m_positionInMoveList.append(position); + return nextIndex; + } + + void startAddingLowPriorityMoves() + { + ASSERT(m_lowPriorityMoveList.isEmpty()); + m_firstLowPriorityMoveIndex = m_moveList.size(); + } + + unsigned addLowPriorityMove() + { + ASSERT(m_firstLowPriorityMoveIndex == m_moveList.size()); + + unsigned nextIndex = m_positionInMoveList.size(); + unsigned position = m_lowPriorityMoveList.size(); + m_lowPriorityMoveList.append(nextIndex); + m_positionInMoveList.append(position); + + ASSERT(nextIndex >= m_firstLowPriorityMoveIndex); + + return nextIndex; + } + + bool isEmpty() const + { + return m_moveList.isEmpty() && m_lowPriorityMoveList.isEmpty(); + } + + bool contains(unsigned index) + { + return m_positionInMoveList[index] != std::numeric_limits<unsigned>::max(); + } + + void takeMove(unsigned moveIndex) + { + unsigned positionInMoveList = m_positionInMoveList[moveIndex]; + if (positionInMoveList == std::numeric_limits<unsigned>::max()) + return; + + if (moveIndex < m_firstLowPriorityMoveIndex) { + ASSERT(m_moveList[positionInMoveList] == moveIndex); + unsigned lastIndex = m_moveList.last(); + m_positionInMoveList[lastIndex] = positionInMoveList; + m_moveList[positionInMoveList] = lastIndex; + m_moveList.removeLast(); + } else { + ASSERT(m_lowPriorityMoveList[positionInMoveList] == moveIndex); + unsigned lastIndex = m_lowPriorityMoveList.last(); + m_positionInMoveList[lastIndex] = positionInMoveList; + m_lowPriorityMoveList[positionInMoveList] = lastIndex; + m_lowPriorityMoveList.removeLast(); + } + + m_positionInMoveList[moveIndex] = std::numeric_limits<unsigned>::max(); + + ASSERT(!contains(moveIndex)); + } + + unsigned takeLastMove() + { + ASSERT(!isEmpty()); + + unsigned lastIndex; + if (!m_moveList.isEmpty()) { + lastIndex = m_moveList.takeLast(); + ASSERT(m_positionInMoveList[lastIndex] == m_moveList.size()); + } else { + lastIndex = m_lowPriorityMoveList.takeLast(); + ASSERT(m_positionInMoveList[lastIndex] == m_lowPriorityMoveList.size()); + } + m_positionInMoveList[lastIndex] = std::numeric_limits<unsigned>::max(); + + ASSERT(!contains(lastIndex)); + return lastIndex; + } + + void returnMove(unsigned index) + { + // This assertion is a bit strict but that is how the move list should be used. The only kind of moves that can + // return to the list are the ones that we previously failed to coalesce with the conservative heuristics. + // Values should not be added back if they were never taken out when attempting coalescing. + ASSERT(!contains(index)); + + if (index < m_firstLowPriorityMoveIndex) { + unsigned position = m_moveList.size(); + m_moveList.append(index); + m_positionInMoveList[index] = position; + } else { + unsigned position = m_lowPriorityMoveList.size(); + m_lowPriorityMoveList.append(index); + m_positionInMoveList[index] = position; + } + + ASSERT(contains(index)); + } + + void clear() + { + m_positionInMoveList.clear(); + m_moveList.clear(); + m_lowPriorityMoveList.clear(); + } + + private: + Vector<unsigned, 0, UnsafeVectorOverflow> m_positionInMoveList; + Vector<unsigned, 0, UnsafeVectorOverflow> m_moveList; + Vector<unsigned, 0, UnsafeVectorOverflow> m_lowPriorityMoveList; + unsigned m_firstLowPriorityMoveIndex { 0 }; + }; + + // Work lists. + // Set of "move" enabled for possible coalescing. + OrderedMoveSet m_worklistMoves; + // Set of "move" not yet ready for coalescing. + BitVector m_activeMoves; + // Low-degree, non-Move related. + Vector<IndexType> m_simplifyWorklist; + // High-degree Tmp. + HashSet<IndexType> m_spillWorklist; + // Low-degree, Move related. + HashSet<IndexType> m_freezeWorklist; + + bool m_hasSelectedSpill { false }; + bool m_hasCoalescedNonTrivialMove { false }; + + // The mapping of Tmp to their alias for Moves that are always coalescing regardless of spilling. + Vector<IndexType, 0, UnsafeVectorOverflow> m_coalescedTmpsAtSpill; + + const HashSet<unsigned>& m_unspillableTmps; +}; + +// This perform all the tasks that are specific to certain register type. +template<Arg::Type type> +class ColoringAllocator : public AbstractColoringAllocator<unsigned> { +public: + ColoringAllocator(Code& code, TmpWidth& tmpWidth, const UseCounts<Tmp>& useCounts, const HashSet<unsigned>& unspillableTmp) + : AbstractColoringAllocator<unsigned>(code.regsInPriorityOrder(type), AbsoluteTmpMapper<type>::lastMachineRegisterIndex(), tmpArraySize(code), unspillableTmp) + , m_code(code) + , m_tmpWidth(tmpWidth) + , m_useCounts(useCounts) + { + if (type == Arg::GP) { + m_framePointerIndex = AbsoluteTmpMapper<type>::absoluteIndex(Tmp(MacroAssembler::framePointerRegister)); + m_interferesWithFramePointer.ensureSize(tmpArraySize(code)); + } + + initializePrecoloredTmp(); + build(); + allocate(); + } + + Tmp getAlias(Tmp tmp) const + { + return AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(getAlias(AbsoluteTmpMapper<type>::absoluteIndex(tmp))); + } + + // This tells you if a Move will be coalescable if the src and dst end up matching. This method + // relies on an analysis that is invalidated by register allocation, so you it's only meaningful to + // call this *before* replacing the Tmp's in this Inst with registers or spill slots. + bool mayBeCoalescable(const Inst& inst) const + { + return mayBeCoalescableImpl(inst, &m_tmpWidth); + } + + bool isUselessMove(const Inst& inst) const + { + return mayBeCoalescableImpl(inst, nullptr) && inst.args[0].tmp() == inst.args[1].tmp(); + } + + Tmp getAliasWhenSpilling(Tmp tmp) const + { + ASSERT_WITH_MESSAGE(!m_spilledTmps.isEmpty(), "This function is only valid for coalescing during spilling."); + + if (m_coalescedTmpsAtSpill.isEmpty()) + return tmp; + + unsigned aliasIndex = AbsoluteTmpMapper<type>::absoluteIndex(tmp); + while (unsigned nextAliasIndex = m_coalescedTmpsAtSpill[aliasIndex]) + aliasIndex = nextAliasIndex; + + Tmp alias = AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(aliasIndex); + + ASSERT_WITH_MESSAGE(!m_spilledTmps.contains(aliasIndex) || alias == tmp, "The aliases at spill should always be colorable. Something went horribly wrong."); + + return alias; + } + + template<typename IndexIterator> + class IndexToTmpIteratorAdaptor { + public: + IndexToTmpIteratorAdaptor(IndexIterator&& indexIterator) + : m_indexIterator(WTFMove(indexIterator)) + { + } + + Tmp operator*() const { return AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(*m_indexIterator); } + IndexToTmpIteratorAdaptor& operator++() { ++m_indexIterator; return *this; } + + bool operator==(const IndexToTmpIteratorAdaptor& other) const + { + return m_indexIterator == other.m_indexIterator; + } + + bool operator!=(const IndexToTmpIteratorAdaptor& other) const + { + return !(*this == other); + } + + private: + IndexIterator m_indexIterator; + }; + + template<typename Collection> + class IndexToTmpIterableAdaptor { + public: + IndexToTmpIterableAdaptor(const Collection& collection) + : m_collection(collection) + { + } + + IndexToTmpIteratorAdaptor<typename Collection::const_iterator> begin() const + { + return m_collection.begin(); + } + + IndexToTmpIteratorAdaptor<typename Collection::const_iterator> end() const + { + return m_collection.end(); + } + + private: + const Collection& m_collection; + }; + + IndexToTmpIterableAdaptor<Vector<unsigned>> spilledTmps() const { return m_spilledTmps; } + + bool requiresSpilling() const { return !m_spilledTmps.isEmpty(); } + + Reg allocatedReg(Tmp tmp) const + { + ASSERT(!tmp.isReg()); + ASSERT(m_coloredTmp.size()); + ASSERT(tmp.isGP() == (type == Arg::GP)); + + Reg reg = m_coloredTmp[AbsoluteTmpMapper<type>::absoluteIndex(tmp)]; + if (!reg) { + dataLog("FATAL: No color for ", tmp, "\n"); + dataLog("Code:\n"); + dataLog(m_code); + RELEASE_ASSERT_NOT_REACHED(); + } + return reg; + } + +private: + static unsigned tmpArraySize(Code& code) + { + unsigned numTmps = code.numTmps(type); + return AbsoluteTmpMapper<type>::absoluteIndex(numTmps); + } + + void initializePrecoloredTmp() + { + m_coloredTmp.resize(m_lastPrecoloredRegisterIndex + 1); + for (unsigned i = 1; i <= m_lastPrecoloredRegisterIndex; ++i) { + Tmp tmp = AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(i); + ASSERT(tmp.isReg()); + m_coloredTmp[i] = tmp.reg(); + } + } + + bool mayBeCoalesced(Arg left, Arg right) + { + if (!left.isTmp() || !right.isTmp()) + return false; + + Tmp leftTmp = left.tmp(); + Tmp rightTmp = right.tmp(); + + if (leftTmp == rightTmp) + return false; + + if (leftTmp.isGP() != (type == Arg::GP) || rightTmp.isGP() != (type == Arg::GP)) + return false; + + unsigned leftIndex = AbsoluteTmpMapper<type>::absoluteIndex(leftTmp); + unsigned rightIndex = AbsoluteTmpMapper<type>::absoluteIndex(rightTmp); + + return !m_interferenceEdges.contains(InterferenceEdge(leftIndex, rightIndex)); + } + + void addToLowPriorityCoalescingCandidates(Arg left, Arg right) + { + ASSERT(mayBeCoalesced(left, right)); + Tmp leftTmp = left.tmp(); + Tmp rightTmp = right.tmp(); + + unsigned leftIndex = AbsoluteTmpMapper<type>::absoluteIndex(leftTmp); + unsigned rightIndex = AbsoluteTmpMapper<type>::absoluteIndex(rightTmp); + + unsigned nextMoveIndex = m_coalescingCandidates.size(); + m_coalescingCandidates.append({ leftIndex, rightIndex }); + + unsigned newIndexInWorklist = m_worklistMoves.addLowPriorityMove(); + ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); + + ASSERT(nextMoveIndex <= m_activeMoves.size()); + m_activeMoves.ensureSize(nextMoveIndex + 1); + + m_moveList[leftIndex].add(nextMoveIndex); + m_moveList[rightIndex].add(nextMoveIndex); + } + + void build() + { + TmpLiveness<type> liveness(m_code); + for (BasicBlock* block : m_code) { + typename TmpLiveness<type>::LocalCalc localCalc(liveness, block); + for (unsigned instIndex = block->size(); instIndex--;) { + Inst& inst = block->at(instIndex); + Inst* nextInst = block->get(instIndex + 1); + build(&inst, nextInst, localCalc); + localCalc.execute(instIndex); + } + build(nullptr, &block->at(0), localCalc); + } + buildLowPriorityMoveList(); + } + + void build(Inst* prevInst, Inst* nextInst, const typename TmpLiveness<type>::LocalCalc& localCalc) + { + if (traceDebug) + dataLog("Building between ", pointerDump(prevInst), " and ", pointerDump(nextInst), ":\n"); + Inst::forEachDefWithExtraClobberedRegs<Tmp>( + prevInst, nextInst, + [&] (const Tmp& arg, Arg::Role, Arg::Type argType, Arg::Width) { + if (argType != type) + return; + + // All the Def()s interfere with each other and with all the extra clobbered Tmps. + // We should not use forEachDefWithExtraClobberedRegs() here since colored Tmps + // do not need interference edges in our implementation. + Inst::forEachDef<Tmp>( + prevInst, nextInst, + [&] (Tmp& otherArg, Arg::Role, Arg::Type argType, Arg::Width) { + if (argType != type) + return; + + if (traceDebug) + dataLog(" Adding def-def edge: ", arg, ", ", otherArg, "\n"); + this->addEdge(arg, otherArg); + }); + }); + + if (prevInst && mayBeCoalescable(*prevInst)) { + // We do not want the Use() of this move to interfere with the Def(), even if it is live + // after the Move. If we were to add the interference edge, it would be impossible to + // coalesce the Move even if the two Tmp never interfere anywhere. + Tmp defTmp; + Tmp useTmp; + prevInst->forEachTmp([&defTmp, &useTmp] (Tmp& argTmp, Arg::Role role, Arg::Type, Arg::Width) { + if (Arg::isLateDef(role)) + defTmp = argTmp; + else { + ASSERT(Arg::isEarlyUse(role)); + useTmp = argTmp; + } + }); + ASSERT(defTmp); + ASSERT(useTmp); + + unsigned nextMoveIndex = m_coalescingCandidates.size(); + m_coalescingCandidates.append({ AbsoluteTmpMapper<type>::absoluteIndex(useTmp), AbsoluteTmpMapper<type>::absoluteIndex(defTmp) }); + + unsigned newIndexInWorklist = m_worklistMoves.addMove(); + ASSERT_UNUSED(newIndexInWorklist, newIndexInWorklist == nextMoveIndex); + + ASSERT(nextMoveIndex <= m_activeMoves.size()); + m_activeMoves.ensureSize(nextMoveIndex + 1); + + for (const Arg& arg : prevInst->args) { + auto& list = m_moveList[AbsoluteTmpMapper<type>::absoluteIndex(arg.tmp())]; + list.add(nextMoveIndex); + } + + for (const Tmp& liveTmp : localCalc.live()) { + if (liveTmp != useTmp) { + if (traceDebug) + dataLog(" Adding def-live for coalescable: ", defTmp, ", ", liveTmp, "\n"); + addEdge(defTmp, liveTmp); + } + } + + // The next instruction could have early clobbers or early def's. We need to consider + // those now. + addEdges(nullptr, nextInst, localCalc.live()); + } else + addEdges(prevInst, nextInst, localCalc.live()); + } + + void buildLowPriorityMoveList() + { + if (!isX86()) + return; + + m_worklistMoves.startAddingLowPriorityMoves(); + for (BasicBlock* block : m_code) { + for (Inst& inst : *block) { + if (std::optional<unsigned> defArgIndex = inst.shouldTryAliasingDef()) { + Arg op1 = inst.args[*defArgIndex - 2]; + Arg op2 = inst.args[*defArgIndex - 1]; + Arg dest = inst.args[*defArgIndex]; + + if (op1 == dest || op2 == dest) + continue; + + if (mayBeCoalesced(op1, dest)) + addToLowPriorityCoalescingCandidates(op1, dest); + if (op1 != op2 && mayBeCoalesced(op2, dest)) + addToLowPriorityCoalescingCandidates(op2, dest); + } + } + } + } + + void addEdges(Inst* prevInst, Inst* nextInst, typename TmpLiveness<type>::LocalCalc::Iterable liveTmps) + { + // All the Def()s interfere with everthing live. + Inst::forEachDefWithExtraClobberedRegs<Tmp>( + prevInst, nextInst, + [&] (const Tmp& arg, Arg::Role, Arg::Type argType, Arg::Width) { + if (argType != type) + return; + + for (const Tmp& liveTmp : liveTmps) { + ASSERT(liveTmp.isGP() == (type == Arg::GP)); + + if (traceDebug) + dataLog(" Adding def-live edge: ", arg, ", ", liveTmp, "\n"); + + addEdge(arg, liveTmp); + } + + if (type == Arg::GP && !arg.isGPR()) + m_interferesWithFramePointer.quickSet(AbsoluteTmpMapper<type>::absoluteIndex(arg)); + }); + } + + void addEdge(Tmp a, Tmp b) + { + ASSERT_WITH_MESSAGE(a.isGP() == b.isGP(), "An interference between registers of different types does not make sense, it can lead to non-colorable graphs."); + + addEdge(AbsoluteTmpMapper<type>::absoluteIndex(a), AbsoluteTmpMapper<type>::absoluteIndex(b)); + } + + // Calling this without a tmpWidth will perform a more conservative coalescing analysis that assumes + // that Move32's are not coalescable. + static bool mayBeCoalescableImpl(const Inst& inst, TmpWidth* tmpWidth) + { + switch (type) { + case Arg::GP: + switch (inst.kind.opcode) { + case Move: + case Move32: + break; + default: + return false; + } + break; + case Arg::FP: + switch (inst.kind.opcode) { + case MoveFloat: + case MoveDouble: + break; + default: + return false; + } + break; + } + + ASSERT_WITH_MESSAGE(inst.args.size() == 2, "We assume coalecable moves only have two arguments in a few places."); + + if (!inst.args[0].isTmp() || !inst.args[1].isTmp()) + return false; + + ASSERT(inst.args[0].type() == type); + ASSERT(inst.args[1].type() == type); + + // We can coalesce a Move32 so long as either of the following holds: + // - The input is already zero-filled. + // - The output only cares about the low 32 bits. + // + // Note that the input property requires an analysis over ZDef's, so it's only valid so long + // as the input gets a register. We don't know if the input gets a register, but we do know + // that if it doesn't get a register then we will still emit this Move32. + if (inst.kind.opcode == Move32) { + if (!tmpWidth) + return false; + + if (tmpWidth->defWidth(inst.args[0].tmp()) > Arg::Width32 + && tmpWidth->useWidth(inst.args[1].tmp()) > Arg::Width32) + return false; + } + + return true; + } + + void selectSpill() + { + if (!m_hasSelectedSpill) { + m_hasSelectedSpill = true; + + if (m_hasCoalescedNonTrivialMove) + m_coalescedTmpsAtSpill = m_coalescedTmps; + } + + auto iterator = m_spillWorklist.begin(); + + RELEASE_ASSERT_WITH_MESSAGE(iterator != m_spillWorklist.end(), "selectSpill() called when there was no spill."); + RELEASE_ASSERT_WITH_MESSAGE(!m_unspillableTmps.contains(*iterator), "trying to spill unspillable tmp"); + + // Higher score means more desirable to spill. Lower scores maximize the likelihood that a tmp + // gets a register. + auto score = [&] (Tmp tmp) -> double { + // Air exposes the concept of "fast tmps", and we interpret that to mean that the tmp + // should always be in a register. + if (m_code.isFastTmp(tmp)) + return 0; + + // All else being equal, the score should be directly related to the degree. + double degree = static_cast<double>(m_degrees[AbsoluteTmpMapper<type>::absoluteIndex(tmp)]); + + // All else being equal, the score should be inversely related to the number of warm uses and + // defs. + const UseCounts<Tmp>::Counts* counts = m_useCounts[tmp]; + if (!counts) + return std::numeric_limits<double>::infinity(); + + double uses = counts->numWarmUses + counts->numDefs; + + // If it's a constant, then it's not as bad to spill. We can rematerialize it in many + // cases. + if (counts->numConstDefs == 1 && counts->numDefs == 1) + uses /= 2; + + return degree / uses; + }; + + auto victimIterator = iterator; + double maxScore = score(AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(*iterator)); + + ++iterator; + for (;iterator != m_spillWorklist.end(); ++iterator) { + double tmpScore = score(AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(*iterator)); + if (tmpScore > maxScore) { + ASSERT(!m_unspillableTmps.contains(*iterator)); + victimIterator = iterator; + maxScore = tmpScore; + } + } + + unsigned victimIndex = *victimIterator; + m_spillWorklist.remove(victimIterator); + m_simplifyWorklist.append(victimIndex); + + freezeMoves(victimIndex); + } + + void allocate() + { + ASSERT_WITH_MESSAGE(m_activeMoves.size() >= m_coalescingCandidates.size(), "The activeMove set should be big enough for the quick operations of BitVector."); + + makeWorkList(); + + if (debug) { + dataLog("Interference: ", listDump(m_interferenceEdges), "\n"); + dumpInterferenceGraphInDot(WTF::dataFile()); + dataLog("Coalescing candidates:\n"); + for (MoveOperands& moveOp : m_coalescingCandidates) { + dataLog(" ", AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(moveOp.srcIndex), + " -> ", AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(moveOp.dstIndex), "\n"); + } + dataLog("Initial work list\n"); + dumpWorkLists(WTF::dataFile()); + } + + do { + if (traceDebug) { + dataLog("Before Graph simplification iteration\n"); + dumpWorkLists(WTF::dataFile()); + } + + if (!m_simplifyWorklist.isEmpty()) + simplify(); + else if (!m_worklistMoves.isEmpty()) + coalesce(); + else if (!m_freezeWorklist.isEmpty()) + freeze(); + else if (!m_spillWorklist.isEmpty()) + selectSpill(); + + if (traceDebug) { + dataLog("After Graph simplification iteration\n"); + dumpWorkLists(WTF::dataFile()); + } + } while (!m_simplifyWorklist.isEmpty() || !m_worklistMoves.isEmpty() || !m_freezeWorklist.isEmpty() || !m_spillWorklist.isEmpty()); + + assignColors(); + } + +#if PLATFORM(COCOA) +#pragma mark - Debugging helpers. +#endif + + void dumpInterferenceGraphInDot(PrintStream& out) + { + out.print("graph InterferenceGraph { \n"); + + HashSet<Tmp> tmpsWithInterferences; + for (const auto& edge : m_interferenceEdges) { + tmpsWithInterferences.add(AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(edge.first())); + tmpsWithInterferences.add(AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(edge.second())); + } + + for (const auto& tmp : tmpsWithInterferences) { + unsigned tmpIndex = AbsoluteTmpMapper<type>::absoluteIndex(tmp); + if (tmpIndex < m_degrees.size()) + out.print(" ", tmp.internalValue(), " [label=\"", tmp, " (", m_degrees[tmpIndex], ")\"];\n"); + else + out.print(" ", tmp.internalValue(), " [label=\"", tmp, "\"];\n"); + } + + for (const auto& edge : m_interferenceEdges) + out.print(" ", edge.first(), " -- ", edge.second(), ";\n"); + out.print("}\n"); + } + + void dumpWorkLists(PrintStream& out) + { + out.print("Simplify work list:\n"); + for (unsigned tmpIndex : m_simplifyWorklist) + out.print(" ", AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(tmpIndex), "\n"); + out.printf("Moves work list is empty? %d\n", m_worklistMoves.isEmpty()); + out.print("Freeze work list:\n"); + for (unsigned tmpIndex : m_freezeWorklist) + out.print(" ", AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(tmpIndex), "\n"); + out.print("Spill work list:\n"); + for (unsigned tmpIndex : m_spillWorklist) + out.print(" ", AbsoluteTmpMapper<type>::tmpFromAbsoluteIndex(tmpIndex), "\n"); + } + + using AbstractColoringAllocator<unsigned>::addEdge; + using AbstractColoringAllocator<unsigned>::getAlias; + + Code& m_code; + TmpWidth& m_tmpWidth; + // FIXME: spilling should not type specific. It is only a side effect of using UseCounts. + const UseCounts<Tmp>& m_useCounts; +}; + +class IteratedRegisterCoalescing { +public: + IteratedRegisterCoalescing(Code& code) + : m_code(code) + , m_useCounts(code) + { + } + + void run() + { + padInterference(m_code); + + iteratedRegisterCoalescingOnType<Arg::GP>(); + iteratedRegisterCoalescingOnType<Arg::FP>(); + + fixSpillsAfterTerminals(); + + if (reportStats) + dataLog("Num iterations = ", m_numIterations, "\n"); + } + +private: + template<Arg::Type type> + void iteratedRegisterCoalescingOnType() + { + HashSet<unsigned> unspillableTmps = computeUnspillableTmps<type>(); + + // FIXME: If a Tmp is used only from a Scratch role and that argument is !admitsStack, then + // we should add the Tmp to unspillableTmps. That will help avoid relooping only to turn the + // Tmp into an unspillable Tmp. + // https://bugs.webkit.org/show_bug.cgi?id=152699 + + while (true) { + ++m_numIterations; + + if (traceDebug) + dataLog("Code at iteration ", m_numIterations, ":\n", m_code); + + // FIXME: One way to optimize this code is to remove the recomputation inside the fixpoint. + // We need to recompute because spilling adds tmps, but we could just update tmpWidth when we + // add those tmps. Note that one easy way to remove the recomputation is to make any newly + // added Tmps get the same use/def widths that the original Tmp got. But, this may hurt the + // spill code we emit. Since we currently recompute TmpWidth after spilling, the newly + // created Tmps may get narrower use/def widths. On the other hand, the spiller already + // selects which move instruction to use based on the original Tmp's widths, so it may not + // matter than a subsequent iteration sees a coservative width for the new Tmps. Also, the + // recomputation may not actually be a performance problem; it's likely that a better way to + // improve performance of TmpWidth is to replace its HashMap with something else. It's + // possible that most of the TmpWidth overhead is from queries of TmpWidth rather than the + // recomputation, in which case speeding up the lookup would be a bigger win. + // https://bugs.webkit.org/show_bug.cgi?id=152478 + m_tmpWidth.recompute(m_code); + + ColoringAllocator<type> allocator(m_code, m_tmpWidth, m_useCounts, unspillableTmps); + if (!allocator.requiresSpilling()) { + assignRegistersToTmp(allocator); + if (traceDebug) + dataLog("Successfull allocation at iteration ", m_numIterations, ":\n", m_code); + + return; + } + addSpillAndFill<type>(allocator, unspillableTmps); + } + } + + template<Arg::Type type> + HashSet<unsigned> computeUnspillableTmps() + { + HashSet<unsigned> unspillableTmps; + + struct Range { + unsigned first { std::numeric_limits<unsigned>::max() }; + unsigned last { 0 }; + unsigned count { 0 }; + unsigned admitStackCount { 0 }; + }; + + unsigned numTmps = m_code.numTmps(type); + unsigned arraySize = AbsoluteTmpMapper<type>::absoluteIndex(numTmps); + + Vector<Range, 0, UnsafeVectorOverflow> ranges; + ranges.fill(Range(), arraySize); + + unsigned globalIndex = 0; + for (BasicBlock* block : m_code) { + for (Inst& inst : *block) { + inst.forEachArg([&] (Arg& arg, Arg::Role, Arg::Type argType, Arg::Width) { + if (arg.isTmp() && inst.admitsStack(arg)) { + if (argType != type) + return; + + Tmp tmp = arg.tmp(); + Range& range = ranges[AbsoluteTmpMapper<type>::absoluteIndex(tmp)]; + range.count++; + range.admitStackCount++; + if (globalIndex < range.first) { + range.first = globalIndex; + range.last = globalIndex; + } else + range.last = globalIndex; + + return; + } + + arg.forEachTmpFast([&] (Tmp& tmp) { + if (tmp.isGP() != (type == Arg::GP)) + return; + + Range& range = ranges[AbsoluteTmpMapper<type>::absoluteIndex(tmp)]; + range.count++; + if (globalIndex < range.first) { + range.first = globalIndex; + range.last = globalIndex; + } else + range.last = globalIndex; + }); + }); + + ++globalIndex; + } + ++globalIndex; + } + for (unsigned i = AbsoluteTmpMapper<type>::lastMachineRegisterIndex() + 1; i < ranges.size(); ++i) { + Range& range = ranges[i]; + if (range.last - range.first <= 1 && range.count > range.admitStackCount) + unspillableTmps.add(i); + } + + return unspillableTmps; + } + + template<Arg::Type type> + void assignRegistersToTmp(const ColoringAllocator<type>& allocator) + { + for (BasicBlock* block : m_code) { + // Give Tmp a valid register. + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + + // The mayBeCoalescable() method will change its mind for some operations after we + // complete register allocation. So, we record this before starting. + bool mayBeCoalescable = allocator.mayBeCoalescable(inst); + + // Move32 is cheaper if we know that it's equivalent to a Move. It's + // equivalent if the destination's high bits are not observable or if the source's high + // bits are all zero. Note that we don't have the opposite optimization for other + // architectures, which may prefer Move over Move32, because Move is canonical already. + if (type == Arg::GP && inst.kind.opcode == Move + && inst.args[0].isTmp() && inst.args[1].isTmp()) { + if (m_tmpWidth.useWidth(inst.args[1].tmp()) <= Arg::Width32 + || m_tmpWidth.defWidth(inst.args[0].tmp()) <= Arg::Width32) + inst.kind.opcode = Move32; + } + + inst.forEachTmpFast([&] (Tmp& tmp) { + if (tmp.isReg() || tmp.isGP() == (type != Arg::GP)) + return; + + Tmp aliasTmp = allocator.getAlias(tmp); + Tmp assignedTmp; + if (aliasTmp.isReg()) + assignedTmp = Tmp(aliasTmp.reg()); + else { + auto reg = allocator.allocatedReg(aliasTmp); + ASSERT(reg); + assignedTmp = Tmp(reg); + } + ASSERT(assignedTmp.isReg()); + tmp = assignedTmp; + }); + + if (mayBeCoalescable && inst.args[0].isTmp() && inst.args[1].isTmp() + && inst.args[0].tmp() == inst.args[1].tmp()) + inst = Inst(); + } + + // Remove all the useless moves we created in this block. + block->insts().removeAllMatching([&] (const Inst& inst) { + return !inst; + }); + } + } + + static unsigned stackSlotMinimumWidth(Arg::Width width) + { + return width <= Arg::Width32 ? 4 : 8; + } + + template<Arg::Type type> + void addSpillAndFill(const ColoringAllocator<type>& allocator, HashSet<unsigned>& unspillableTmps) + { + HashMap<Tmp, StackSlot*> stackSlots; + for (Tmp tmp : allocator.spilledTmps()) { + // All the spilled values become unspillable. + unspillableTmps.add(AbsoluteTmpMapper<type>::absoluteIndex(tmp)); + + // Allocate stack slot for each spilled value. + StackSlot* stackSlot = m_code.addStackSlot( + stackSlotMinimumWidth(m_tmpWidth.requiredWidth(tmp)), StackSlotKind::Spill); + bool isNewTmp = stackSlots.add(tmp, stackSlot).isNewEntry; + ASSERT_UNUSED(isNewTmp, isNewTmp); + } + + // Rewrite the program to get rid of the spilled Tmp. + InsertionSet insertionSet(m_code); + for (BasicBlock* block : m_code) { + bool hasAliasedTmps = false; + + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + + // The TmpWidth analysis will say that a Move only stores 32 bits into the destination, + // if the source only had 32 bits worth of non-zero bits. Same for the source: it will + // only claim to read 32 bits from the source if only 32 bits of the destination are + // read. Note that we only apply this logic if this turns into a load or store, since + // Move is the canonical way to move data between GPRs. + bool canUseMove32IfDidSpill = false; + bool didSpill = false; + if (type == Arg::GP && inst.kind.opcode == Move) { + if ((inst.args[0].isTmp() && m_tmpWidth.width(inst.args[0].tmp()) <= Arg::Width32) + || (inst.args[1].isTmp() && m_tmpWidth.width(inst.args[1].tmp()) <= Arg::Width32)) + canUseMove32IfDidSpill = true; + } + + // Try to replace the register use by memory use when possible. + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type argType, Arg::Width width) { + if (!arg.isTmp()) + return; + if (argType != type) + return; + if (arg.isReg()) + return; + + auto stackSlotEntry = stackSlots.find(arg.tmp()); + if (stackSlotEntry == stackSlots.end()) + return; + if (!inst.admitsStack(arg)) + return; + + // If the Tmp holds a constant then we want to rematerialize its + // value rather than loading it from the stack. In order for that + // optimization to kick in, we need to avoid placing the Tmp's stack + // address into the instruction. + if (!Arg::isColdUse(role)) { + const UseCounts<Tmp>::Counts* counts = m_useCounts[arg.tmp()]; + if (counts && counts->numConstDefs == 1 && counts->numDefs == 1) + return; + } + + Arg::Width spillWidth = m_tmpWidth.requiredWidth(arg.tmp()); + if (Arg::isAnyDef(role) && width < spillWidth) + return; + ASSERT(inst.kind.opcode == Move || !(Arg::isAnyUse(role) && width > spillWidth)); + + if (spillWidth != Arg::Width32) + canUseMove32IfDidSpill = false; + + stackSlotEntry->value->ensureSize( + canUseMove32IfDidSpill ? 4 : Arg::bytes(width)); + arg = Arg::stack(stackSlotEntry->value); + didSpill = true; + }); + + if (didSpill && canUseMove32IfDidSpill) + inst.kind.opcode = Move32; + + // For every other case, add Load/Store as needed. + inst.forEachTmp([&] (Tmp& tmp, Arg::Role role, Arg::Type argType, Arg::Width) { + if (tmp.isReg() || argType != type) + return; + + auto stackSlotEntry = stackSlots.find(tmp); + if (stackSlotEntry == stackSlots.end()) { + Tmp alias = allocator.getAliasWhenSpilling(tmp); + if (alias != tmp) { + tmp = alias; + hasAliasedTmps = true; + } + return; + } + + Arg::Width spillWidth = m_tmpWidth.requiredWidth(tmp); + Opcode move = Oops; + switch (stackSlotMinimumWidth(spillWidth)) { + case 4: + move = type == Arg::GP ? Move32 : MoveFloat; + break; + case 8: + move = type == Arg::GP ? Move : MoveDouble; + break; + default: + RELEASE_ASSERT_NOT_REACHED(); + break; + } + + tmp = m_code.newTmp(type); + unspillableTmps.add(AbsoluteTmpMapper<type>::absoluteIndex(tmp)); + + Arg arg = Arg::stack(stackSlotEntry->value); + if (Arg::isAnyUse(role) && role != Arg::Scratch) + insertionSet.insert(instIndex, move, inst.origin, arg, tmp); + if (Arg::isAnyDef(role)) + insertionSet.insert(instIndex + 1, move, inst.origin, tmp, arg); + }); + } + insertionSet.execute(block); + + if (hasAliasedTmps) { + block->insts().removeAllMatching([&] (const Inst& inst) { + return allocator.isUselessMove(inst); + }); + } + } + } + + void fixSpillsAfterTerminals() + { + // Because there may be terminals that produce values, IRC may + // want to spill those terminals. It'll happen to spill it after + // the terminal. If we left the graph in this state, it'd be invalid + // because a terminal must be the last instruction in a block. + // We fix that here. + + InsertionSet insertionSet(m_code); + + bool addedBlocks = false; + + for (BasicBlock* block : m_code) { + unsigned terminalIndex = block->size(); + bool foundTerminal = false; + while (terminalIndex--) { + if (block->at(terminalIndex).isTerminal()) { + foundTerminal = true; + break; + } + } + ASSERT_UNUSED(foundTerminal, foundTerminal); + + if (terminalIndex == block->size() - 1) + continue; + + // There must be instructions after the terminal because it's not the last instruction. + ASSERT(terminalIndex < block->size() - 1); + Vector<Inst, 1> instsToMove; + for (unsigned i = terminalIndex + 1; i < block->size(); i++) + instsToMove.append(block->at(i)); + RELEASE_ASSERT(instsToMove.size()); + + for (FrequentedBlock& frequentedSuccessor : block->successors()) { + BasicBlock* successor = frequentedSuccessor.block(); + // If successor's only predecessor is block, we can plant the spill inside + // the successor. Otherwise, we must split the critical edge and create + // a new block for the spill. + if (successor->numPredecessors() == 1) { + insertionSet.insertInsts(0, instsToMove); + insertionSet.execute(successor); + } else { + addedBlocks = true; + // FIXME: We probably want better block ordering here. + BasicBlock* newBlock = m_code.addBlock(); + for (const Inst& inst : instsToMove) + newBlock->appendInst(inst); + newBlock->appendInst(Inst(Jump, instsToMove.last().origin)); + newBlock->successors().append(successor); + frequentedSuccessor.block() = newBlock; + } + } + + block->resize(terminalIndex + 1); + } + + if (addedBlocks) + m_code.resetReachability(); + } + + Code& m_code; + TmpWidth m_tmpWidth; + UseCounts<Tmp> m_useCounts; + unsigned m_numIterations { 0 }; +}; + +} // anonymous namespace + +void iteratedRegisterCoalescing(Code& code) +{ + PhaseScope phaseScope(code, "iteratedRegisterCoalescing"); + + IteratedRegisterCoalescing iteratedRegisterCoalescing(code); + iteratedRegisterCoalescing.run(); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.h b/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.h new file mode 100644 index 000000000..ab689b35c --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirIteratedRegisterCoalescing.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This is a register allocation phase based on Andrew Appel's Iterated Register Coalescing +// http://www.cs.cmu.edu/afs/cs/academic/class/15745-s07/www/papers/george.pdf +void iteratedRegisterCoalescing(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirKind.cpp b/Source/JavaScriptCore/b3/air/AirKind.cpp new file mode 100644 index 000000000..9fe252538 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirKind.cpp @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirKind.h" + +#if ENABLE(B3_JIT) + +#include <wtf/CommaPrinter.h> + +namespace JSC { namespace B3 { namespace Air { + +void Kind::dump(PrintStream& out) const +{ + out.print(opcode); + + CommaPrinter comma(", ", "<"); + if (traps) + out.print(comma, "Traps"); + if (comma.didPrint()) + out.print(">"); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirKind.h b/Source/JavaScriptCore/b3/air/AirKind.h new file mode 100644 index 000000000..e723d4683 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirKind.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef AirKind_h +#define AirKind_h + +#if ENABLE(B3_JIT) + +#include "AirOpcode.h" +#include <wtf/PrintStream.h> + +namespace JSC { namespace B3 { namespace Air { + +// Air opcodes are always carried around with some flags. These flags are understood as having no +// meaning if they are set for an opcode to which they do not apply. This makes sense, since Air +// is a complex instruction set and most of these flags can apply to basically any opcode. In +// fact, it's recommended to only represent something as a flag if you believe that it is largely +// opcode-agnostic. + +struct Kind { + Kind(Opcode opcode) + : opcode(opcode) + , traps(false) + { + } + + Kind() + : Kind(Nop) + { + } + + bool operator==(const Kind& other) const + { + return opcode == other.opcode + && traps == other.traps; + } + + bool operator!=(const Kind& other) const + { + return !(*this == other); + } + + unsigned hash() const + { + return static_cast<unsigned>(opcode) + (static_cast<unsigned>(traps) << 16); + } + + explicit operator bool() const + { + return *this != Kind(); + } + + void dump(PrintStream&) const; + + Opcode opcode; + + // This is an opcode-agnostic flag that indicates that we expect that this instruction will + // trap. This causes the compiler to assume that this side-exits and therefore has non-control + // non-arg effects. This also causes the compiler to tell you about all of these instructions. + // Note that this is just one of several ways of supporting trapping in Air, and it's the less + // precise variant because it's origin-based. This means that if an instruction was fused out + // of B3 values that had different origins, then the origin at which you'll appear to trap + // will be somewhat random. The upside of this approach is that it imposes by far the least + // overhead on the compiler. + // FIXME: Make this completely work. + // https://bugs.webkit.org/show_bug.cgi?id=162689 + bool traps : 1; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + +#endif // AirKind_h + diff --git a/Source/JavaScriptCore/b3/air/AirLiveness.h b/Source/JavaScriptCore/b3/air/AirLiveness.h new file mode 100644 index 000000000..e727c36c9 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLiveness.h @@ -0,0 +1,392 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirBasicBlock.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirStackSlot.h" +#include "AirTmpInlines.h" +#include <wtf/IndexMap.h> +#include <wtf/IndexSet.h> +#include <wtf/IndexSparseSet.h> +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +template<Arg::Type adapterType> +struct TmpLivenessAdapter { + typedef Tmp Thing; + typedef HashSet<unsigned> IndexSet; + + TmpLivenessAdapter(Code&) { } + + static unsigned numIndices(Code& code) + { + unsigned numTmps = code.numTmps(adapterType); + return AbsoluteTmpMapper<adapterType>::absoluteIndex(numTmps); + } + static bool acceptsType(Arg::Type type) { return type == adapterType; } + static unsigned valueToIndex(Tmp tmp) { return AbsoluteTmpMapper<adapterType>::absoluteIndex(tmp); } + static Tmp indexToValue(unsigned index) { return AbsoluteTmpMapper<adapterType>::tmpFromAbsoluteIndex(index); } +}; + +struct StackSlotLivenessAdapter { + typedef StackSlot* Thing; + typedef HashSet<unsigned, DefaultHash<unsigned>::Hash, WTF::UnsignedWithZeroKeyHashTraits<unsigned>> IndexSet; + + StackSlotLivenessAdapter(Code& code) + : m_code(code) + { + } + + static unsigned numIndices(Code& code) + { + return code.stackSlots().size(); + } + static bool acceptsType(Arg::Type) { return true; } + static unsigned valueToIndex(StackSlot* stackSlot) { return stackSlot->index(); } + StackSlot* indexToValue(unsigned index) { return m_code.stackSlots()[index]; } + +private: + Code& m_code; +}; + +struct RegLivenessAdapter { + typedef Reg Thing; + typedef BitVector IndexSet; + + RegLivenessAdapter(Code&) { } + + static unsigned numIndices(Code&) + { + return Reg::maxIndex() + 1; + } + + static bool acceptsType(Arg::Type) { return true; } + static unsigned valueToIndex(Reg reg) { return reg.index(); } + Reg indexToValue(unsigned index) { return Reg::fromIndex(index); } +}; + +template<typename Adapter> +class AbstractLiveness : public Adapter { + struct Workset; +public: + typedef typename Adapter::Thing Thing; + + AbstractLiveness(Code& code) + : Adapter(code) + , m_workset(Adapter::numIndices(code)) + , m_liveAtHead(code.size()) + , m_liveAtTail(code.size()) + { + // The liveAtTail of each block automatically contains the LateUse's of the terminal. + for (BasicBlock* block : code) { + typename Adapter::IndexSet& liveAtTail = m_liveAtTail[block]; + + block->last().forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isLateUse(role) && Adapter::acceptsType(type)) + liveAtTail.add(Adapter::valueToIndex(thing)); + }); + } + + // Blocks with new live values at tail. + BitVector dirtyBlocks; + for (size_t blockIndex = 0; blockIndex < code.size(); ++blockIndex) + dirtyBlocks.set(blockIndex); + + bool changed; + do { + changed = false; + + for (size_t blockIndex = code.size(); blockIndex--;) { + BasicBlock* block = code.at(blockIndex); + if (!block) + continue; + + if (!dirtyBlocks.quickClear(blockIndex)) + continue; + + LocalCalc localCalc(*this, block); + for (size_t instIndex = block->size(); instIndex--;) + localCalc.execute(instIndex); + + // Handle the early def's of the first instruction. + block->at(0).forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isEarlyDef(role) && Adapter::acceptsType(type)) + m_workset.remove(Adapter::valueToIndex(thing)); + }); + + Vector<unsigned>& liveAtHead = m_liveAtHead[block]; + + // We only care about Tmps that were discovered in this iteration. It is impossible + // to remove a live value from the head. + // We remove all the values we already knew about so that we only have to deal with + // what is new in LiveAtHead. + if (m_workset.size() == liveAtHead.size()) + m_workset.clear(); + else { + for (unsigned liveIndexAtHead : liveAtHead) + m_workset.remove(liveIndexAtHead); + } + + if (m_workset.isEmpty()) + continue; + + liveAtHead.reserveCapacity(liveAtHead.size() + m_workset.size()); + for (unsigned newValue : m_workset) + liveAtHead.uncheckedAppend(newValue); + + for (BasicBlock* predecessor : block->predecessors()) { + typename Adapter::IndexSet& liveAtTail = m_liveAtTail[predecessor]; + for (unsigned newValue : m_workset) { + if (liveAtTail.add(newValue)) { + if (!dirtyBlocks.quickSet(predecessor->index())) + changed = true; + } + } + } + } + } while (changed); + } + + // This calculator has to be run in reverse. + class LocalCalc { + public: + LocalCalc(AbstractLiveness& liveness, BasicBlock* block) + : m_liveness(liveness) + , m_block(block) + { + auto& workset = liveness.m_workset; + workset.clear(); + typename Adapter::IndexSet& liveAtTail = liveness.m_liveAtTail[block]; + for (unsigned index : liveAtTail) + workset.add(index); + } + + struct Iterator { + Iterator(Adapter& adapter, IndexSparseSet<UnsafeVectorOverflow>::const_iterator sparceSetIterator) + : m_adapter(adapter) + , m_sparceSetIterator(sparceSetIterator) + { + } + + Iterator& operator++() + { + ++m_sparceSetIterator; + return *this; + } + + typename Adapter::Thing operator*() const + { + return m_adapter.indexToValue(*m_sparceSetIterator); + } + + bool operator==(const Iterator& other) { return m_sparceSetIterator == other.m_sparceSetIterator; } + bool operator!=(const Iterator& other) { return m_sparceSetIterator != other.m_sparceSetIterator; } + + private: + Adapter& m_adapter; + IndexSparseSet<UnsafeVectorOverflow>::const_iterator m_sparceSetIterator; + }; + + struct Iterable { + Iterable(AbstractLiveness& liveness) + : m_liveness(liveness) + { + } + + Iterator begin() const { return Iterator(m_liveness, m_liveness.m_workset.begin()); } + Iterator end() const { return Iterator(m_liveness, m_liveness.m_workset.end()); } + + bool contains(const typename Adapter::Thing& thing) const + { + return m_liveness.m_workset.contains(Adapter::valueToIndex(thing)); + } + + private: + AbstractLiveness& m_liveness; + }; + + Iterable live() const + { + return Iterable(m_liveness); + } + + bool isLive(const typename Adapter::Thing& thing) const + { + return live().contains(thing); + } + + void execute(unsigned instIndex) + { + Inst& inst = m_block->at(instIndex); + auto& workset = m_liveness.m_workset; + + // First handle the early def's of the next instruction. + if (instIndex + 1 < m_block->size()) { + Inst& nextInst = m_block->at(instIndex + 1); + nextInst.forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isEarlyDef(role) && Adapter::acceptsType(type)) + workset.remove(Adapter::valueToIndex(thing)); + }); + } + + // Then handle def's. + inst.forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isLateDef(role) && Adapter::acceptsType(type)) + workset.remove(Adapter::valueToIndex(thing)); + }); + + // Then handle use's. + inst.forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isEarlyUse(role) && Adapter::acceptsType(type)) + workset.add(Adapter::valueToIndex(thing)); + }); + + // And finally, handle the late use's of the previous instruction. + if (instIndex) { + Inst& prevInst = m_block->at(instIndex - 1); + prevInst.forEach<typename Adapter::Thing>( + [&] (typename Adapter::Thing& thing, Arg::Role role, Arg::Type type, Arg::Width) { + if (Arg::isLateUse(role) && Adapter::acceptsType(type)) + workset.add(Adapter::valueToIndex(thing)); + }); + } + } + + private: + AbstractLiveness& m_liveness; + BasicBlock* m_block; + }; + + const Vector<unsigned>& rawLiveAtHead(BasicBlock* block) + { + return m_liveAtHead[block]; + } + + template<typename UnderlyingIterable> + class Iterable { + public: + Iterable(AbstractLiveness& liveness, const UnderlyingIterable& iterable) + : m_liveness(liveness) + , m_iterable(iterable) + { + } + + class iterator { + public: + iterator() + : m_liveness(nullptr) + , m_iter() + { + } + + iterator(AbstractLiveness& liveness, typename UnderlyingIterable::const_iterator iter) + : m_liveness(&liveness) + , m_iter(iter) + { + } + + typename Adapter::Thing operator*() + { + return m_liveness->indexToValue(*m_iter); + } + + iterator& operator++() + { + ++m_iter; + return *this; + } + + bool operator==(const iterator& other) const + { + ASSERT(m_liveness == other.m_liveness); + return m_iter == other.m_iter; + } + + bool operator!=(const iterator& other) const + { + return !(*this == other); + } + + private: + AbstractLiveness* m_liveness; + typename UnderlyingIterable::const_iterator m_iter; + }; + + iterator begin() const { return iterator(m_liveness, m_iterable.begin()); } + iterator end() const { return iterator(m_liveness, m_iterable.end()); } + + bool contains(const typename Adapter::Thing& thing) const + { + return m_liveness.m_workset.contains(Adapter::valueToIndex(thing)); + } + + private: + AbstractLiveness& m_liveness; + const UnderlyingIterable& m_iterable; + }; + + Iterable<Vector<unsigned>> liveAtHead(BasicBlock* block) + { + return Iterable<Vector<unsigned>>(*this, m_liveAtHead[block]); + } + + Iterable<typename Adapter::IndexSet> liveAtTail(BasicBlock* block) + { + return Iterable<typename Adapter::IndexSet>(*this, m_liveAtTail[block]); + } + + IndexSparseSet<UnsafeVectorOverflow>& workset() { return m_workset; } + +private: + friend class LocalCalc; + friend struct LocalCalc::Iterable; + + IndexSparseSet<UnsafeVectorOverflow> m_workset; + IndexMap<BasicBlock, Vector<unsigned>> m_liveAtHead; + IndexMap<BasicBlock, typename Adapter::IndexSet> m_liveAtTail; +}; + +template<Arg::Type type> +using TmpLiveness = AbstractLiveness<TmpLivenessAdapter<type>>; + +typedef AbstractLiveness<TmpLivenessAdapter<Arg::GP>> GPLiveness; +typedef AbstractLiveness<TmpLivenessAdapter<Arg::FP>> FPLiveness; +typedef AbstractLiveness<StackSlotLivenessAdapter> StackSlotLiveness; +typedef AbstractLiveness<RegLivenessAdapter> RegLiveness; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.cpp b/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.cpp new file mode 100644 index 000000000..dbbb257c1 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.cpp @@ -0,0 +1,103 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirLogRegisterPressure.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" + +namespace JSC { namespace B3 { namespace Air { + +void logRegisterPressure(Code& code) +{ + const unsigned totalColumns = 200; + const unsigned registerColumns = 100; + + RegLiveness liveness(code); + + for (BasicBlock* block : code) { + RegLiveness::LocalCalc localCalc(liveness, block); + + block->dumpHeader(WTF::dataFile()); + + Vector<CString> instDumps; + for (unsigned instIndex = block->size(); instIndex--;) { + Inst& inst = block->at(instIndex); + Inst* prevInst = block->get(instIndex - 1); + + localCalc.execute(instIndex); + + RegisterSet set; + set.setAll(localCalc.live()); + Inst::forEachDefWithExtraClobberedRegs<Reg>( + prevInst, &inst, + [&] (Reg reg, Arg::Role, Arg::Type, Arg::Width) { + set.set(reg); + }); + + StringPrintStream instOut; + StringPrintStream lineOut; + lineOut.print(" "); + if (set.numberOfSetRegisters()) { + set.forEach( + [&] (Reg reg) { + CString text = toCString(" ", reg); + if (text.length() + lineOut.length() > totalColumns) { + instOut.print(lineOut.toCString(), "\n"); + lineOut.reset(); + lineOut.print(" "); + } + lineOut.print(text); + }); + lineOut.print(":"); + } + if (lineOut.length() > registerColumns) { + instOut.print(lineOut.toCString(), "\n"); + lineOut.reset(); + } + while (lineOut.length() < registerColumns) + lineOut.print(" "); + lineOut.print(" "); + lineOut.print(inst); + instOut.print(lineOut.toCString(), "\n"); + instDumps.append(instOut.toCString()); + } + + for (unsigned i = instDumps.size(); i--;) + dataLog(instDumps[i]); + + block->dumpFooter(WTF::dataFile()); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.h b/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.h new file mode 100644 index 000000000..3f7c3e24c --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLogRegisterPressure.h @@ -0,0 +1,39 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Dumps the registers that are used at each instruction. +void logRegisterPressure(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp b/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp new file mode 100644 index 000000000..e0018734b --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.cpp @@ -0,0 +1,250 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirLowerAfterRegAlloc.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCCallingConvention.h" +#include "AirCode.h" +#include "AirEmitShuffle.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" +#include "AirPhaseScope.h" +#include "B3CCallValue.h" +#include "B3ValueInlines.h" +#include "RegisterSet.h" +#include <wtf/HashMap.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +bool verbose = false; + +} // anonymous namespace + +void lowerAfterRegAlloc(Code& code) +{ + PhaseScope phaseScope(code, "lowerAfterRegAlloc"); + + if (verbose) + dataLog("Code before lowerAfterRegAlloc:\n", code); + + HashMap<Inst*, RegisterSet> usedRegisters; + + RegLiveness liveness(code); + for (BasicBlock* block : code) { + RegLiveness::LocalCalc localCalc(liveness, block); + + for (unsigned instIndex = block->size(); instIndex--;) { + Inst& inst = block->at(instIndex); + + RegisterSet set; + + bool isRelevant = inst.kind.opcode == Shuffle || inst.kind.opcode == ColdCCall; + + if (isRelevant) { + for (Reg reg : localCalc.live()) + set.set(reg); + } + + localCalc.execute(instIndex); + + if (isRelevant) + usedRegisters.add(&inst, set); + } + } + + auto getScratches = [&] (RegisterSet set, Arg::Type type) -> std::array<Arg, 2> { + std::array<Arg, 2> result; + for (unsigned i = 0; i < 2; ++i) { + bool found = false; + for (Reg reg : code.regsInPriorityOrder(type)) { + if (!set.get(reg)) { + result[i] = Tmp(reg); + set.set(reg); + found = true; + break; + } + } + if (!found) { + result[i] = Arg::stack( + code.addStackSlot( + Arg::bytes(Arg::conservativeWidth(type)), + StackSlotKind::Spill)); + } + } + return result; + }; + + // Now transform the code. + InsertionSet insertionSet(code); + for (BasicBlock* block : code) { + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + + switch (inst.kind.opcode) { + case Shuffle: { + RegisterSet set = usedRegisters.get(&inst); + Vector<ShufflePair> pairs; + for (unsigned i = 0; i < inst.args.size(); i += 3) { + Arg src = inst.args[i + 0]; + Arg dst = inst.args[i + 1]; + Arg::Width width = inst.args[i + 2].width(); + + // The used register set contains things live after the shuffle. But + // emitShuffle() wants a scratch register that is not just dead but also does not + // interfere with either sources or destinations. + auto excludeRegisters = [&] (Tmp tmp) { + if (tmp.isReg()) + set.set(tmp.reg()); + }; + src.forEachTmpFast(excludeRegisters); + dst.forEachTmpFast(excludeRegisters); + + pairs.append(ShufflePair(src, dst, width)); + } + std::array<Arg, 2> gpScratch = getScratches(set, Arg::GP); + std::array<Arg, 2> fpScratch = getScratches(set, Arg::FP); + insertionSet.insertInsts( + instIndex, emitShuffle(code, pairs, gpScratch, fpScratch, inst.origin)); + inst = Inst(); + break; + } + + case ColdCCall: { + CCallValue* value = inst.origin->as<CCallValue>(); + Kind oldKind = inst.kind; + + RegisterSet liveRegs = usedRegisters.get(&inst); + RegisterSet regsToSave = liveRegs; + regsToSave.exclude(RegisterSet::calleeSaveRegisters()); + regsToSave.exclude(RegisterSet::stackRegisters()); + regsToSave.exclude(RegisterSet::reservedHardwareRegisters()); + + RegisterSet preUsed = regsToSave; + Vector<Arg> destinations = computeCCallingConvention(code, value); + Tmp result = cCallResult(value->type()); + Arg originalResult = result ? inst.args[1] : Arg(); + + Vector<ShufflePair> pairs; + for (unsigned i = 0; i < destinations.size(); ++i) { + Value* child = value->child(i); + Arg src = inst.args[result ? (i >= 1 ? i + 1 : i) : i ]; + Arg dst = destinations[i]; + Arg::Width width = Arg::widthForB3Type(child->type()); + pairs.append(ShufflePair(src, dst, width)); + + auto excludeRegisters = [&] (Tmp tmp) { + if (tmp.isReg()) + preUsed.set(tmp.reg()); + }; + src.forEachTmpFast(excludeRegisters); + dst.forEachTmpFast(excludeRegisters); + } + + std::array<Arg, 2> gpScratch = getScratches(preUsed, Arg::GP); + std::array<Arg, 2> fpScratch = getScratches(preUsed, Arg::FP); + + // Also need to save all live registers. Don't need to worry about the result + // register. + if (originalResult.isReg()) + regsToSave.clear(originalResult.reg()); + Vector<StackSlot*> stackSlots; + regsToSave.forEach( + [&] (Reg reg) { + Tmp tmp(reg); + Arg arg(tmp); + Arg::Width width = Arg::conservativeWidth(arg.type()); + StackSlot* stackSlot = + code.addStackSlot(Arg::bytes(width), StackSlotKind::Spill); + pairs.append(ShufflePair(arg, Arg::stack(stackSlot), width)); + stackSlots.append(stackSlot); + }); + + if (verbose) + dataLog("Pre-call pairs for ", inst, ": ", listDump(pairs), "\n"); + + insertionSet.insertInsts( + instIndex, emitShuffle(code, pairs, gpScratch, fpScratch, inst.origin)); + + inst = buildCCall(code, inst.origin, destinations); + if (oldKind.traps) + inst.kind.traps = true; + + // Now we need to emit code to restore registers. + pairs.resize(0); + unsigned stackSlotIndex = 0; + regsToSave.forEach( + [&] (Reg reg) { + Tmp tmp(reg); + Arg arg(tmp); + Arg::Width width = Arg::conservativeWidth(arg.type()); + StackSlot* stackSlot = stackSlots[stackSlotIndex++]; + pairs.append(ShufflePair(Arg::stack(stackSlot), arg, width)); + }); + if (result) { + ShufflePair pair(result, originalResult, Arg::widthForB3Type(value->type())); + pairs.append(pair); + } + + // For finding scratch registers, we need to account for the possibility that + // the result is dead. + if (originalResult.isReg()) + liveRegs.set(originalResult.reg()); + + gpScratch = getScratches(liveRegs, Arg::GP); + fpScratch = getScratches(liveRegs, Arg::FP); + + insertionSet.insertInsts( + instIndex + 1, emitShuffle(code, pairs, gpScratch, fpScratch, inst.origin)); + break; + } + + default: + break; + } + } + + insertionSet.execute(block); + + block->insts().removeAllMatching( + [&] (Inst& inst) -> bool { + return !inst; + }); + } + + if (verbose) + dataLog("Code after lowerAfterRegAlloc:\n", code); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.h b/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.h new file mode 100644 index 000000000..d8234a7e6 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerAfterRegAlloc.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This lowers Shuffle and ColdCCall instructions. This phase is designed to be run after register +// allocation. + +void lowerAfterRegAlloc(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.cpp b/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.cpp new file mode 100644 index 000000000..e14641da6 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.cpp @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirLowerEntrySwitch.h" + +#if ENABLE(B3_JIT) + +#include "AirBlockWorklist.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include "B3Procedure.h" + +namespace JSC { namespace B3 { namespace Air { + +void lowerEntrySwitch(Code& code) +{ + PhaseScope phaseScope(code, "lowerEntrySwitch"); + + // Figure out the set of blocks that should be duplicated. + BlockWorklist worklist; + for (BasicBlock* block : code) { + if (block->last().kind.opcode == EntrySwitch) + worklist.push(block); + } + + // It's possible that we don't have any EntrySwitches. That's fine. + if (worklist.seen().isEmpty()) { + Vector<FrequentedBlock> entrypoints(code.proc().numEntrypoints(), FrequentedBlock(code[0])); + code.setEntrypoints(WTFMove(entrypoints)); + return; + } + + while (BasicBlock* block = worklist.pop()) + worklist.pushAll(block->predecessors()); + + RELEASE_ASSERT(worklist.saw(code[0])); + + Vector<FrequencyClass> entrypointFrequencies(code.proc().numEntrypoints(), FrequencyClass::Rare); + for (BasicBlock* block : code) { + if (block->last().kind.opcode != EntrySwitch) + continue; + for (unsigned entrypointIndex = code.proc().numEntrypoints(); entrypointIndex--;) { + entrypointFrequencies[entrypointIndex] = maxFrequency( + entrypointFrequencies[entrypointIndex], + block->successor(entrypointIndex).frequency()); + } + } + + auto fixEntrySwitch = [&] (BasicBlock* block, unsigned entrypointIndex) { + if (block->last().kind.opcode != EntrySwitch) + return; + FrequentedBlock target = block->successor(entrypointIndex); + block->last().kind.opcode = Jump; + block->successors().resize(1); + block->successor(0) = target; + }; + + // Now duplicate them. + Vector<FrequentedBlock> entrypoints; + entrypoints.append(FrequentedBlock(code[0], entrypointFrequencies[0])); + IndexMap<BasicBlock, BasicBlock*> map(code.size()); + for (unsigned entrypointIndex = 1; entrypointIndex < code.proc().numEntrypoints(); ++entrypointIndex) { + map.clear(); + for (BasicBlock* block : worklist.seen().values(code)) + map[block] = code.addBlock(block->frequency()); + entrypoints.append(FrequentedBlock(map[code[0]], entrypointFrequencies[entrypointIndex])); + for (BasicBlock* block : worklist.seen().values(code)) { + BasicBlock* newBlock = map[block]; + for (const Inst& inst : *block) + newBlock->appendInst(inst); + newBlock->successors() = block->successors(); + for (BasicBlock*& successor : newBlock->successorBlocks()) { + if (BasicBlock* replacement = map[successor]) + successor = replacement; + } + fixEntrySwitch(newBlock, entrypointIndex); + } + } + for (BasicBlock* block : worklist.seen().values(code)) + fixEntrySwitch(block, 0); + + code.setEntrypoints(WTFMove(entrypoints)); + code.resetReachability(); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + + diff --git a/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.h b/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.h new file mode 100644 index 000000000..ff3500727 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerEntrySwitch.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Converts code that seems to have one entrypoint and emulates multiple entrypoints with +// EntrySwitch into code that really has multiple entrypoints. This is accomplished by duplicating +// the backwards transitive closure from all EntrySwitches. +void lowerEntrySwitch(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp b/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp new file mode 100644 index 000000000..b086b7b08 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerMacros.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirLowerMacros.h" + +#if ENABLE(B3_JIT) + +#include "AirCCallingConvention.h" +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include "B3CCallValue.h" +#include "B3ValueInlines.h" + +namespace JSC { namespace B3 { namespace Air { + +void lowerMacros(Code& code) +{ + PhaseScope phaseScope(code, "lowerMacros"); + + InsertionSet insertionSet(code); + for (BasicBlock* block : code) { + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + + switch (inst.kind.opcode) { + case CCall: { + CCallValue* value = inst.origin->as<CCallValue>(); + Kind oldKind = inst.kind; + + Vector<Arg> destinations = computeCCallingConvention(code, value); + + Inst shuffleArguments(Shuffle, value); + unsigned offset = value->type() == Void ? 0 : 1; + for (unsigned i = 1; i < destinations.size(); ++i) { + Value* child = value->child(i); + shuffleArguments.args.append(inst.args[offset + i]); + shuffleArguments.args.append(destinations[i]); + shuffleArguments.args.append(Arg::widthArg(Arg::widthForB3Type(child->type()))); + } + insertionSet.insertInst(instIndex, WTFMove(shuffleArguments)); + + // Indicate that we're using our original callee argument. + destinations[0] = inst.args[0]; + + // Save where the original instruction put its result. + Arg resultDst = value->type() == Void ? Arg() : inst.args[1]; + + inst = buildCCall(code, inst.origin, destinations); + if (oldKind.traps) + inst.kind.traps = true; + + Tmp result = cCallResult(value->type()); + switch (value->type()) { + case Void: + break; + case Float: + insertionSet.insert(instIndex + 1, MoveFloat, value, result, resultDst); + break; + case Double: + insertionSet.insert(instIndex + 1, MoveDouble, value, result, resultDst); + break; + case Int32: + insertionSet.insert(instIndex + 1, Move32, value, result, resultDst); + break; + case Int64: + insertionSet.insert(instIndex + 1, Move, value, result, resultDst); + break; + } + break; + } + + default: + break; + } + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirLowerMacros.h b/Source/JavaScriptCore/b3/air/AirLowerMacros.h new file mode 100644 index 000000000..2dcd76dfe --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirLowerMacros.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Air has some opcodes that are very high-level and are meant to reduce the amount of low-level +// knowledge in the B3->Air lowering. The current example is CCall. + +void lowerMacros(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirOpcode.opcodes b/Source/JavaScriptCore/b3/air/AirOpcode.opcodes new file mode 100644 index 000000000..e82c9f5bf --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirOpcode.opcodes @@ -0,0 +1,943 @@ +# Copyright (C) 2015-2016 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS +# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +# Syllabus: +# +# Examples of some roles, types, and widths: +# U:G:32 => use of the low 32 bits of a general-purpose register or value +# D:G:32 => def of the low 32 bits of a general-purpose register or value +# UD:G:32 => use and def of the low 32 bits of a general-purpose register or value +# U:G:64 => use of the low 64 bits of a general-purpose register or value +# ZD:G:32 => def of all bits of a general-purpose register, where all but the low 32 bits are guaranteed to be zeroed. +# UA:G:Ptr => UseAddr (see comment in Arg.h) +# U:F:32 => use of a float register or value +# U:F:64 => use of a double register or value +# D:F:32 => def of a float register or value +# UD:F:32 => use and def of a float register or value +# S:F:32 => scratch float register. +# +# Argument kinds: +# Tmp => temporary or register +# Imm => 32-bit immediate int +# BigImm => TrustedImm64 +# Addr => address as temporary/register+offset +# Index => BaseIndex address +# Abs => AbsoluteAddress +# +# The parser views these things as keywords, and understands that they fall into two distinct classes +# of things. So, although this file uses a particular indentation style, none of the whitespace or +# even newlines are meaningful to the parser. For example, you could write: +# +# Foo42 U:G:32, UD:F:32 Imm, Tmp Addr, Tmp +# +# And the parser would know that this is the same as: +# +# Foo42 U:G:32, UD:F:32 +# Imm, Tmp +# Addr, Tmp +# +# I.e. a two-form instruction that uses a GPR or an int immediate and uses+defs a float register. +# +# Any opcode or opcode form can be preceded with an architecture list, which restricts the opcode to the +# union of those architectures. For example, if this is the only overload of the opcode, then it makes the +# opcode only available on x86_64: +# +# x86_64: Fuzz UD:G:64, D:G:64 +# Tmp, Tmp +# Tmp, Addr +# +# But this only restricts the two-operand form, the other form is allowed on all architectures: +# +# x86_64: Fuzz UD:G:64, D:G:64 +# Tmp, Tmp +# Tmp, Addr +# Fuzz UD:G:Ptr, D:G:Ptr, U:F:Ptr +# Tmp, Tmp, Tmp +# Tmp, Addr, Tmp +# +# And you can also restrict individual forms: +# +# Thingy UD:G:32, D:G:32 +# Tmp, Tmp +# arm64: Tmp, Addr +# +# Additionally, you can have an intersection between the architectures of the opcode overload and the +# form. In this example, the version that takes an address is only available on armv7 while the other +# versions are available on armv7 or x86_64: +# +# x86_64 armv7: Buzz U:G:32, UD:F:32 +# Tmp, Tmp +# Imm, Tmp +# armv7: Addr, Tmp +# +# Finally, you can specify architectures using helpful architecture groups. Here are all of the +# architecture keywords that we support: +# +# x86: means x86-32 or x86-64. +# x86_32: means just x86-32. +# x86_64: means just x86-64. +# arm: means armv7 or arm64. +# armv7: means just armv7. +# arm64: means just arm64. +# 32: means x86-32 or armv7. +# 64: means x86-64 or arm64. + +# Note that the opcodes here have a leading capital (Add32) but must correspond to MacroAssembler +# API that has a leading lower-case (add32). + +Nop + +Add32 U:G:32, U:G:32, ZD:G:32 + Imm, Tmp, Tmp + Tmp, Tmp, Tmp + +Add32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Imm, Addr + x86: Imm, Index + Imm, Tmp + x86: Addr, Tmp + x86: Tmp, Addr + x86: Tmp, Index + +x86: Add8 U:G:8, UD:G:8 + Imm, Addr + Imm, Index + Tmp, Addr + Tmp, Index + +x86: Add16 U:G:16, UD:G:16 + Imm, Addr + Imm, Index + Tmp, Addr + Tmp, Index + +64: Add64 U:G:64, UD:G:64 + Tmp, Tmp + x86: Imm, Addr + Imm, Tmp + x86: Addr, Tmp + x86: Tmp, Addr + +64: Add64 U:G:64, U:G:64, D:G:64 + Imm, Tmp, Tmp + Tmp, Tmp, Tmp + +AddDouble U:F:64, U:F:64, D:F:64 + Tmp, Tmp, Tmp + x86: Addr, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Index, Tmp, Tmp + +x86: AddDouble U:F:64, UD:F:64 + Tmp, Tmp + Addr, Tmp + +AddFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + x86: Addr, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Index, Tmp, Tmp + +x86: AddFloat U:F:32, UD:F:32 + Tmp, Tmp + Addr, Tmp + +Sub32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Imm, Addr + Imm, Tmp + x86: Addr, Tmp + x86: Tmp, Addr + +arm64: Sub32 U:G:32, U:G:32, D:G:32 + Tmp, Tmp, Tmp + +64: Sub64 U:G:64, UD:G:64 + Tmp, Tmp + x86: Imm, Addr + Imm, Tmp + x86: Addr, Tmp + x86: Tmp, Addr + +arm64: Sub64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + +SubDouble U:F:64, U:F:64, D:F:64 + arm64: Tmp, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Tmp, Index, Tmp + +x86: SubDouble U:F:64, UD:F:64 + Tmp, Tmp + Addr, Tmp + +SubFloat U:F:32, U:F:32, D:F:32 + arm64: Tmp, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Tmp, Index, Tmp + +x86: SubFloat U:F:32, UD:F:32 + Tmp, Tmp + Addr, Tmp + +Neg32 UZD:G:32 + Tmp + x86: Addr + +64: Neg64 UD:G:64 + Tmp + +arm64: NegateDouble U:F:64, D:F:64 + Tmp, Tmp + +arm64: NegateFloat U:F:32, D:F:32 + Tmp, Tmp + +Mul32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Addr, Tmp + +Mul32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + x86: Addr, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Imm, Tmp, Tmp + +64: Mul64 U:G:64, UD:G:64 + Tmp, Tmp + +Mul64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + +arm64: MultiplyAdd32 U:G:32, U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp, Tmp + +arm64: MultiplyAdd64 U:G:64, U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp, Tmp + +arm64: MultiplySub32 U:G:32, U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp, Tmp + +arm64: MultiplySub64 U:G:64, U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp, Tmp + +arm64: MultiplyNeg32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + +arm64: MultiplyNeg64 U:G:64, U:G:64, ZD:G:64 + Tmp, Tmp, Tmp + +arm64: Div32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + +arm64: UDiv32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + +arm64: Div64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + +arm64: UDiv64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + +MulDouble U:F:64, U:F:64, D:F:64 + Tmp, Tmp, Tmp + x86: Addr, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Index, Tmp, Tmp + +x86: MulDouble U:F:64, UD:F:64 + Tmp, Tmp + Addr, Tmp + +MulFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + x86: Addr, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Index, Tmp, Tmp + +x86: MulFloat U:F:32, UD:F:32 + Tmp, Tmp + Addr, Tmp + +arm64: DivDouble U:F:64, U:F:32, D:F:64 + Tmp, Tmp, Tmp + +x86: DivDouble U:F:64, UD:F:64 + Tmp, Tmp + Addr, Tmp + +arm64: DivFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + +x86: DivFloat U:F:32, UD:F:32 + Tmp, Tmp + Addr, Tmp + +x86: X86ConvertToDoubleWord32 U:G:32, ZD:G:32 + Tmp*, Tmp* + +x86_64: X86ConvertToQuadWord64 U:G:64, D:G:64 + Tmp*, Tmp* + +x86: X86Div32 UZD:G:32, UZD:G:32, U:G:32 + Tmp*, Tmp*, Tmp + +x86: X86UDiv32 UZD:G:32, UZD:G:32, U:G:32 + Tmp*, Tmp*, Tmp + +x86_64: X86Div64 UZD:G:64, UZD:G:64, U:G:64 + Tmp*, Tmp*, Tmp + +x86_64: X86UDiv64 UZD:G:64, UZD:G:64, U:G:64 + Tmp*, Tmp*, Tmp + +Lea32 UA:G:32, D:G:32 + Addr, Tmp + x86: Index, Tmp as x86Lea32 + +Lea64 UA:G:64, D:G:64 + Addr, Tmp + x86: Index, Tmp as x86Lea64 + +And32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + arm64: BitImm, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Addr, Tmp, Tmp + +And32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Imm, Tmp + x86: Tmp, Addr + x86: Addr, Tmp + x86: Imm, Addr + +64: And64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + arm64: BitImm64, Tmp, Tmp + +x86_64: And64 U:G:64, UD:G:64 + Tmp, Tmp + x86: Imm, Tmp + +AndDouble U:F:64, U:F:64, D:F:64 + Tmp, Tmp, Tmp + +x86: AndDouble U:F:64, UD:F:64 + Tmp, Tmp + +AndFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + +x86: AndFloat U:F:32, UD:F:32 + Tmp, Tmp + +OrDouble U:F:64, U:F:64, D:F:64 + Tmp, Tmp, Tmp + +x86: OrDouble U:F:64, UD:F:64 + Tmp, Tmp + +OrFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + +x86: OrFloat U:F:32, UD:F:32 + Tmp, Tmp + +x86: XorDouble U:F:64, U:F:64, D:F:64 + Tmp, Tmp, Tmp + +x86: XorDouble U:F:64, UD:F:64 + Tmp, Tmp + +x86: XorFloat U:F:32, U:F:32, D:F:32 + Tmp, Tmp, Tmp + +x86: XorFloat U:F:32, UD:F:32 + Tmp, Tmp + +arm64: Lshift32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86:Lshift32 U:G:32, UZD:G:32 + Tmp*, Tmp + Imm, Tmp + +arm64: Lshift64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86_64: Lshift64 U:G:64, UD:G:64 + Tmp*, Tmp + Imm, Tmp + +arm64: Rshift32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86: Rshift32 U:G:32, UZD:G:32 + Tmp*, Tmp + Imm, Tmp + +arm64: Rshift64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86_64: Rshift64 U:G:64, UD:G:64 + Tmp*, Tmp + Imm, Tmp + +arm64: Urshift32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86: Urshift32 U:G:32, UZD:G:32 + Tmp*, Tmp + Imm, Tmp + +arm64: Urshift64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86_64: Urshift64 U:G:64, UD:G:64 + Tmp*, Tmp + Imm, Tmp + +x86_64: RotateRight32 U:G:32, UZD:G:32 + Tmp*, Tmp + Imm, Tmp + +arm64: RotateRight32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86_64: RotateRight64 U:G:64, UD:G:64 + Tmp*, Tmp + Imm, Tmp + +arm64: RotateRight64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + Tmp, Imm, Tmp + +x86_64: RotateLeft32 U:G:32, UZD:G:32 + Tmp*, Tmp + Imm, Tmp + +x86_64: RotateLeft64 U:G:64, UD:G:64 + Tmp*, Tmp + Imm, Tmp + +Or32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + arm64: BitImm, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Addr, Tmp, Tmp + +Or32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Imm, Tmp + x86: Tmp, Addr + x86: Addr, Tmp + x86: Imm, Addr + +64: Or64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + arm64: BitImm64, Tmp, Tmp + +64: Or64 U:G:64, UD:G:64 + Tmp, Tmp + x86: Imm, Tmp + +Xor32 U:G:32, U:G:32, ZD:G:32 + Tmp, Tmp, Tmp + arm64: BitImm, Tmp, Tmp + x86: Tmp, Addr, Tmp + x86: Addr, Tmp, Tmp + +Xor32 U:G:32, UZD:G:32 + Tmp, Tmp + x86: Imm, Tmp + x86: Tmp, Addr + x86: Addr, Tmp + x86: Imm, Addr + +64: Xor64 U:G:64, U:G:64, D:G:64 + Tmp, Tmp, Tmp + arm64: BitImm64, Tmp, Tmp + +64: Xor64 U:G:64, UD:G:64 + Tmp, Tmp + x86: Tmp, Addr + x86: Imm, Tmp + +arm64: Not32 U:G:32, ZD:G:32 + Tmp, Tmp + +x86: Not32 UZD:G:32 + Tmp + Addr + +arm64: Not64 U:G:64, D:G:64 + Tmp, Tmp + +x86: Not64 UD:G:64 + Tmp + Addr + +arm64: AbsDouble U:F:64, D:F:64 + Tmp, Tmp + +arm64: AbsFloat U:F:32, D:F:32 + Tmp, Tmp + +CeilDouble U:F:64, D:F:64 + Tmp, Tmp + x86: Addr, Tmp + +CeilFloat U:F:32, D:F:32 + Tmp, Tmp + x86: Addr, Tmp + +FloorDouble U:F:64, D:F:64 + Tmp, Tmp + x86: Addr, Tmp + +FloorFloat U:F:32, D:F:32 + Tmp, Tmp + x86: Addr, Tmp + +SqrtDouble U:F:64, D:F:64 + Tmp, Tmp + x86: Addr, Tmp + +SqrtFloat U:F:32, D:F:32 + Tmp, Tmp + x86: Addr, Tmp + +ConvertInt32ToDouble U:G:32, D:F:64 + Tmp, Tmp + x86: Addr, Tmp + +64: ConvertInt64ToDouble U:G:64, D:F:64 + Tmp, Tmp + x86_64: Addr, Tmp + +ConvertInt32ToFloat U:G:32, D:F:32 + Tmp, Tmp + x86: Addr, Tmp + +64: ConvertInt64ToFloat U:G:64, D:F:32 + Tmp, Tmp + x86_64: Addr, Tmp + +CountLeadingZeros32 U:G:32, ZD:G:32 + Tmp, Tmp + x86: Addr, Tmp + +64: CountLeadingZeros64 U:G:64, D:G:64 + Tmp, Tmp + x86: Addr, Tmp + +ConvertDoubleToFloat U:F:64, D:F:32 + Tmp, Tmp + x86: Addr, Tmp + +ConvertFloatToDouble U:F:32, D:F:64 + Tmp, Tmp + x86: Addr, Tmp + +# Note that Move operates over the full register size, which is either 32-bit or 64-bit depending on +# the platform. I'm not entirely sure that this is a good thing; it might be better to just have a +# Move64 instruction. OTOH, our MacroAssemblers already have this notion of "move()" that basically +# means movePtr. +Move U:G:Ptr, D:G:Ptr + Tmp, Tmp + Imm, Tmp as signExtend32ToPtr + BigImm, Tmp + Addr, Tmp as loadPtr # This means that "Move Addr, Tmp" is code-generated as "load" not "move". + Index, Tmp as loadPtr + Tmp, Addr as storePtr + Tmp, Index as storePtr + x86: Imm, Addr as storePtr + +x86: Swap32 UD:G:32, UD:G:32 + Tmp, Tmp + Tmp, Addr + +x86_64: Swap64 UD:G:64, UD:G:64 + Tmp, Tmp + Tmp, Addr + +Move32 U:G:32, ZD:G:32 + Tmp, Tmp as zeroExtend32ToPtr + Addr, Tmp as load32 + Index, Tmp as load32 + Tmp, Addr as store32 + Tmp, Index as store32 + x86: Imm, Tmp as zeroExtend32ToPtr + x86: Imm, Addr as store32 + x86: Imm, Index as store32 + +StoreZero32 U:G:32 + Addr + Index + +SignExtend32ToPtr U:G:32, D:G:Ptr + Tmp, Tmp + +ZeroExtend8To32 U:G:8, ZD:G:32 + Tmp, Tmp + x86: Addr, Tmp as load8 + x86: Index, Tmp as load8 + +SignExtend8To32 U:G:8, ZD:G:32 + Tmp, Tmp + x86: Addr, Tmp as load8SignedExtendTo32 + x86: Index, Tmp as load8SignedExtendTo32 + +ZeroExtend16To32 U:G:16, ZD:G:32 + Tmp, Tmp + x86: Addr, Tmp as load16 + x86: Index, Tmp as load16 + +SignExtend16To32 U:G:16, ZD:G:32 + Tmp, Tmp + x86: Addr, Tmp as load16SignedExtendTo32 + x86: Index, Tmp as load16SignedExtendTo32 + +MoveFloat U:F:32, D:F:32 + Tmp, Tmp as moveDouble + Addr, Tmp as loadFloat + Index, Tmp as loadFloat + Tmp, Addr as storeFloat + Tmp, Index as storeFloat + +MoveDouble U:F:64, D:F:64 + Tmp, Tmp + Addr, Tmp as loadDouble + Index, Tmp as loadDouble + Tmp, Addr as storeDouble + Tmp, Index as storeDouble + +MoveZeroToDouble D:F:64 + Tmp + +64: Move64ToDouble U:G:64, D:F:64 + Tmp, Tmp + x86: Addr, Tmp as loadDouble + Index, Tmp as loadDouble + +Move32ToFloat U:G:32, D:F:32 + Tmp, Tmp + x86: Addr, Tmp as loadFloat + Index, Tmp as loadFloat + +64: MoveDoubleTo64 U:F:64, D:G:64 + Tmp, Tmp + Addr, Tmp as load64 + Index, Tmp as load64 + +MoveFloatTo32 U:F:32, D:G:32 + Tmp, Tmp + Addr, Tmp as load32 + Index, Tmp as load32 + +Load8 U:G:8, ZD:G:32 + Addr, Tmp + Index, Tmp + +Store8 U:G:8, D:G:8 + Tmp, Index + Tmp, Addr + x86: Imm, Index + x86: Imm, Addr + +Load8SignedExtendTo32 U:G:8, ZD:G:32 + Addr, Tmp + Index, Tmp + +Load16 U:G:16, ZD:G:32 + Addr, Tmp + Index, Tmp + +Load16SignedExtendTo32 U:G:16, ZD:G:32 + Addr, Tmp + Index, Tmp + +Store16 U:G:16, D:G:16 + Tmp, Index + Tmp, Addr + +Compare32 U:G:32, U:G:32, U:G:32, ZD:G:32 + RelCond, Tmp, Tmp, Tmp + RelCond, Tmp, Imm, Tmp + +64: Compare64 U:G:32, U:G:64, U:G:64, ZD:G:32 + RelCond, Tmp, Tmp, Tmp + x86: RelCond, Tmp, Imm, Tmp + +Test32 U:G:32, U:G:32, U:G:32, ZD:G:32 + x86: ResCond, Addr, Imm, Tmp + ResCond, Tmp, Tmp, Tmp + ResCond, Tmp, BitImm, Tmp + +64: Test64 U:G:32, U:G:64, U:G:64, ZD:G:32 + x86: ResCond, Tmp, Imm, Tmp + ResCond, Tmp, Tmp, Tmp + +CompareDouble U:G:32, U:F:64, U:F:64, ZD:G:32 + DoubleCond, Tmp, Tmp, Tmp + +CompareFloat U:G:32, U:F:32, U:F:32, ZD:G:32 + DoubleCond, Tmp, Tmp, Tmp + +# Note that branches have some logic in AirOptimizeBlockOrder.cpp. If you add new branches, please make sure +# you opt them into the block order optimizations. + +Branch8 U:G:32, U:G:8, U:G:8 /branch + x86: RelCond, Addr, Imm + x86: RelCond, Index, Imm + +Branch32 U:G:32, U:G:32, U:G:32 /branch + x86: RelCond, Addr, Imm + RelCond, Tmp, Tmp + RelCond, Tmp, Imm + x86: RelCond, Tmp, Addr + x86: RelCond, Addr, Tmp + x86: RelCond, Index, Imm + +64: Branch64 U:G:32, U:G:64, U:G:64 /branch + RelCond, Tmp, Tmp + RelCond, Tmp, Imm + x86: RelCond, Tmp, Addr + x86: RelCond, Addr, Tmp + x86: RelCond, Addr, Imm + x86: RelCond, Index, Tmp + +BranchTest8 U:G:32, U:G:8, U:G:8 /branch + x86: ResCond, Addr, BitImm + x86: ResCond, Index, BitImm + +BranchTest32 U:G:32, U:G:32, U:G:32 /branch + ResCond, Tmp, Tmp + ResCond, Tmp, BitImm + x86: ResCond, Addr, BitImm + x86: ResCond, Index, BitImm + +# Warning: forms that take an immediate will sign-extend their immediate. You probably want +# BranchTest32 in most cases where you use an immediate. +64: BranchTest64 U:G:32, U:G:64, U:G:64 /branch + ResCond, Tmp, Tmp + arm64: ResCond, Tmp, BitImm64 + x86: ResCond, Tmp, BitImm + x86: ResCond, Addr, BitImm + x86: ResCond, Addr, Tmp + x86: ResCond, Index, BitImm + +BranchDouble U:G:32, U:F:64, U:F:64 /branch + DoubleCond, Tmp, Tmp + +BranchFloat U:G:32, U:F:32, U:F:32 /branch + DoubleCond, Tmp, Tmp + +BranchAdd32 U:G:32, U:G:32, U:G:32, ZD:G:32 /branch + ResCond, Tmp, Tmp, Tmp + x86:ResCond, Tmp, Addr, Tmp + x86:ResCond, Addr, Tmp, Tmp + +BranchAdd32 U:G:32, U:G:32, UZD:G:32 /branch + ResCond, Tmp, Tmp + ResCond, Imm, Tmp + x86: ResCond, Imm, Addr + x86: ResCond, Tmp, Addr + x86: ResCond, Addr, Tmp + +BranchAdd64 U:G:32, U:G:64, U:G:64, ZD:G:64 /branch + ResCond, Tmp, Tmp, Tmp + x86:ResCond, Tmp, Addr, Tmp + x86:ResCond, Addr, Tmp, Tmp + +64: BranchAdd64 U:G:32, U:G:64, UD:G:64 /branch + ResCond, Imm, Tmp + ResCond, Tmp, Tmp + x86:ResCond, Addr, Tmp + +x86: BranchMul32 U:G:32, U:G:32, UZD:G:32 /branch + ResCond, Tmp, Tmp + ResCond, Addr, Tmp + +x86: BranchMul32 U:G:32, U:G:32, U:G:32, ZD:G:32 /branch + ResCond, Tmp, Imm, Tmp + +arm64: BranchMul32 U:G:32, U:G:32, U:G:32, S:G:32, S:G:32, ZD:G:32 /branch + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + +x86_64: BranchMul64 U:G:32, U:G:64, UZD:G:64 /branch + ResCond, Tmp, Tmp + +arm64: BranchMul64 U:G:32, U:G:64, U:G:64, S:G:64, S:G:64, ZD:G:64 /branch + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + +BranchSub32 U:G:32, U:G:32, UZD:G:32 /branch + ResCond, Tmp, Tmp + ResCond, Imm, Tmp + x86: ResCond, Imm, Addr + x86: ResCond, Tmp, Addr + x86: ResCond, Addr, Tmp + +64: BranchSub64 U:G:32, U:G:64, UD:G:64 /branch + ResCond, Imm, Tmp + ResCond, Tmp, Tmp + +BranchNeg32 U:G:32, UZD:G:32 /branch + ResCond, Tmp + +64: BranchNeg64 U:G:32, UZD:G:64 /branch + ResCond, Tmp + +MoveConditionally32 U:G:32, U:G:32, U:G:32, U:G:Ptr, UD:G:Ptr + RelCond, Tmp, Tmp, Tmp, Tmp + +MoveConditionally32 U:G:32, U:G:32, U:G:32, U:G:Ptr, U:G:Ptr, D:G:Ptr + RelCond, Tmp, Tmp, Tmp, Tmp, Tmp + RelCond, Tmp, Imm, Tmp, Tmp, Tmp + +64: MoveConditionally64 U:G:32, U:G:64, U:G:64, U:G:Ptr, UD:G:Ptr + RelCond, Tmp, Tmp, Tmp, Tmp + +64: MoveConditionally64 U:G:32, U:G:64, U:G:64, U:G:Ptr, U:G:Ptr, D:G:Ptr + RelCond, Tmp, Tmp, Tmp, Tmp, Tmp + RelCond, Tmp, Imm, Tmp, Tmp, Tmp + +MoveConditionallyTest32 U:G:32, U:G:32, U:G:32, U:G:Ptr, UD:G:Ptr + ResCond, Tmp, Tmp, Tmp, Tmp + x86: ResCond, Tmp, Imm, Tmp, Tmp + +MoveConditionallyTest32 U:G:32, U:G:32, U:G:32, U:G:Ptr, U:G:Ptr, D:G:Ptr + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + ResCond, Tmp, BitImm, Tmp, Tmp, Tmp + +64: MoveConditionallyTest64 U:G:32, U:G:64, U:G:64, U:G:Ptr, UD:G:Ptr + ResCond, Tmp, Tmp, Tmp, Tmp + x86: ResCond, Tmp, Imm, Tmp, Tmp + +64: MoveConditionallyTest64 U:G:32, U:G:32, U:G:32, U:G:Ptr, U:G:Ptr, D:G:Ptr + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + x86_64: ResCond, Tmp, Imm, Tmp, Tmp, Tmp + +MoveConditionallyDouble U:G:32, U:F:64, U:F:64, U:G:Ptr, U:G:Ptr, D:G:Ptr + DoubleCond, Tmp, Tmp, Tmp, Tmp, Tmp + +MoveConditionallyDouble U:G:32, U:F:64, U:F:64, U:G:Ptr, UD:G:Ptr + DoubleCond, Tmp, Tmp, Tmp, Tmp + +MoveConditionallyFloat U:G:32, U:F:32, U:F:32, U:G:Ptr, U:G:Ptr, D:G:Ptr + DoubleCond, Tmp, Tmp, Tmp, Tmp, Tmp + +MoveConditionallyFloat U:G:32, U:F:32, U:F:32, U:G:Ptr, UD:G:Ptr + DoubleCond, Tmp, Tmp, Tmp, Tmp + +MoveDoubleConditionally32 U:G:32, U:G:32, U:G:32, U:F:64, U:F:64, D:F:64 + RelCond, Tmp, Tmp, Tmp, Tmp, Tmp + RelCond, Tmp, Imm, Tmp, Tmp, Tmp + x86: RelCond, Addr, Imm, Tmp, Tmp, Tmp + x86: RelCond, Tmp, Addr, Tmp, Tmp, Tmp + x86: RelCond, Addr, Tmp, Tmp, Tmp, Tmp + x86: RelCond, Index, Imm, Tmp, Tmp, Tmp + +64: MoveDoubleConditionally64 U:G:32, U:G:64, U:G:64, U:F:64, U:F:64, D:F:64 + RelCond, Tmp, Tmp, Tmp, Tmp, Tmp + RelCond, Tmp, Imm, Tmp, Tmp, Tmp + x86_64: RelCond, Tmp, Addr, Tmp, Tmp, Tmp + x86_64: RelCond, Addr, Tmp, Tmp, Tmp, Tmp + x86_64: RelCond, Addr, Imm, Tmp, Tmp, Tmp + x86_64: RelCond, Index, Tmp, Tmp, Tmp, Tmp + +MoveDoubleConditionallyTest32 U:G:32, U:G:32, U:G:32, U:F:64, U:F:64, D:F:64 + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + ResCond, Tmp, BitImm, Tmp, Tmp, Tmp + x86: ResCond, Addr, Imm, Tmp, Tmp, Tmp + x86: ResCond, Index, Imm, Tmp, Tmp, Tmp + +# Warning: forms that take an immediate will sign-extend their immediate. You probably want +# MoveDoubleConditionallyTest32 in most cases where you use an immediate. +64: MoveDoubleConditionallyTest64 U:G:32, U:G:64, U:G:64, U:F:64, U:F:64, D:F:64 + ResCond, Tmp, Tmp, Tmp, Tmp, Tmp + x86_64: ResCond, Tmp, Imm, Tmp, Tmp, Tmp + x86_64: ResCond, Addr, Imm, Tmp, Tmp, Tmp + x86_64: ResCond, Addr, Tmp, Tmp, Tmp, Tmp + x86_64: ResCond, Index, Imm, Tmp, Tmp, Tmp + +MoveDoubleConditionallyDouble U:G:32, U:F:64, U:F:64, U:F:64, U:F:64, D:F:64 + DoubleCond, Tmp, Tmp, Tmp, Tmp, Tmp + +MoveDoubleConditionallyFloat U:G:32, U:F:32, U:F:32, U:F:64, U:F:64, D:F:64 + DoubleCond, Tmp, Tmp, Tmp, Tmp, Tmp + +MemoryFence /effects +StoreFence /effects +LoadFence /effects + +Jump /branch + +RetVoid /return + +Ret32 U:G:32 /return + Tmp + +64: Ret64 U:G:64 /return + Tmp + +RetFloat U:F:32 /return + Tmp + +RetDouble U:F:64 /return + Tmp + +Oops /terminal + +# This is a terminal but we express it as a Custom because we don't want it to have a code +# generator. +custom EntrySwitch + +# A Shuffle is a multi-source, multi-destination move. It simultaneously does multiple moves at once. +# The moves are specified as triplets of src, dst, and width. For example you can request a swap this +# way: +# Shuffle %tmp1, %tmp2, 64, %tmp2, %tmp1, 64 +custom Shuffle + +# Air allows for exotic behavior. A Patch's behavior is determined entirely by the Special operand, +# which must be the first operand. +custom Patch + +# Instructions used for lowering C calls. These don't make it to Air generation. They get lowered to +# something else first. The origin Value must be a CCallValue. +custom CCall +custom ColdCCall + +# This is a special wasm opcode that branches to a trap handler. This uses the generator located to Air::Code +# to produce the side-exit code. +custom WasmBoundsCheck + diff --git a/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.cpp b/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.cpp new file mode 100644 index 000000000..11ca3f3d4 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.cpp @@ -0,0 +1,194 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirOptimizeBlockOrder.h" + +#if ENABLE(B3_JIT) + +#include "AirBlockWorklist.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" +#include <wtf/BubbleSort.h> + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +class SortedSuccessors { +public: + SortedSuccessors() + { + } + + void append(BasicBlock* block) + { + m_successors.append(block); + } + + void process(BlockWorklist& worklist) + { + // We prefer a stable sort, and we don't want it to go off the rails if we see NaN. Also, the number + // of successors is bounded. In fact, it currently cannot be more than 2. :-) + bubbleSort( + m_successors.begin(), m_successors.end(), + [] (BasicBlock* left, BasicBlock* right) { + return left->frequency() < right->frequency(); + }); + + // Pushing the successors in ascending order of frequency ensures that the very next block we visit + // is our highest-frequency successor (unless that successor has already been visited). + for (unsigned i = 0; i < m_successors.size(); ++i) + worklist.push(m_successors[i]); + + m_successors.resize(0); + } + +private: + Vector<BasicBlock*, 2> m_successors; +}; + +} // anonymous namespace + +Vector<BasicBlock*> blocksInOptimizedOrder(Code& code) +{ + Vector<BasicBlock*> blocksInOrder; + + BlockWorklist fastWorklist; + SortedSuccessors sortedSuccessors; + SortedSuccessors sortedSlowSuccessors; + + // We expect entrypoint lowering to have already happened. + RELEASE_ASSERT(code.numEntrypoints()); + + auto appendSuccessor = [&] (const FrequentedBlock& block) { + if (block.isRare()) + sortedSlowSuccessors.append(block.block()); + else + sortedSuccessors.append(block.block()); + }; + + // For everything but the first entrypoint, we push them in order of frequency and frequency + // class. + for (unsigned i = 1; i < code.numEntrypoints(); ++i) + appendSuccessor(code.entrypoint(i)); + + // Always push the primary successor last so that it gets highest priority. + fastWorklist.push(code.entrypoint(0).block()); + + while (BasicBlock* block = fastWorklist.pop()) { + blocksInOrder.append(block); + for (FrequentedBlock& successor : block->successors()) + appendSuccessor(successor); + sortedSuccessors.process(fastWorklist); + } + + BlockWorklist slowWorklist; + sortedSlowSuccessors.process(slowWorklist); + + while (BasicBlock* block = slowWorklist.pop()) { + // We might have already processed this block. + if (fastWorklist.saw(block)) + continue; + + blocksInOrder.append(block); + for (BasicBlock* successor : block->successorBlocks()) + sortedSuccessors.append(successor); + sortedSuccessors.process(slowWorklist); + } + + ASSERT(fastWorklist.isEmpty()); + ASSERT(slowWorklist.isEmpty()); + + return blocksInOrder; +} + +void optimizeBlockOrder(Code& code) +{ + PhaseScope phaseScope(code, "optimizeBlockOrder"); + + Vector<BasicBlock*> blocksInOrder = blocksInOptimizedOrder(code); + + // Place blocks into Code's block list according to the ordering in blocksInOrder. We do this by leaking + // all of the blocks and then readopting them. + for (auto& entry : code.blockList()) + entry.release(); + + code.blockList().resize(0); + + for (unsigned i = 0; i < blocksInOrder.size(); ++i) { + BasicBlock* block = blocksInOrder[i]; + block->setIndex(i); + code.blockList().append(std::unique_ptr<BasicBlock>(block)); + } + + // Finally, flip any branches that we recognize. It's most optimal if the taken successor does not point + // at the next block. + for (BasicBlock* block : code) { + Inst& branch = block->last(); + + // It's somewhat tempting to just say that if the block has two successors and the first arg is + // invertible, then we can do the optimization. But that's wagging the dog. The fact that an + // instruction happens to have an argument that is invertible doesn't mean it's a branch, even though + // it is true that currently only branches have invertible arguments. It's also tempting to say that + // the /branch flag in AirOpcode.opcodes tells us that something is a branch - except that there, + // /branch also means Jump. The approach taken here means that if you add new branch instructions and + // forget about this phase, then at worst your new instructions won't opt into the inversion + // optimization. You'll probably realize that as soon as you look at the disassembly, and it + // certainly won't cause any correctness issues. + + switch (branch.kind.opcode) { + case Branch8: + case Branch32: + case Branch64: + case BranchTest8: + case BranchTest32: + case BranchTest64: + case BranchFloat: + case BranchDouble: + case BranchAdd32: + case BranchAdd64: + case BranchMul32: + case BranchMul64: + case BranchSub32: + case BranchSub64: + case BranchNeg32: + case BranchNeg64: + if (code.findNextBlock(block) == block->successorBlock(0) && branch.args[0].isInvertible()) { + std::swap(block->successor(0), block->successor(1)); + branch.args[0] = branch.args[0].inverted(); + } + break; + + default: + break; + } + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.h b/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.h new file mode 100644 index 000000000..3911fcc8d --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirOptimizeBlockOrder.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include <wtf/Vector.h> + +namespace JSC { namespace B3 { namespace Air { + +class BasicBlock; +class Code; + +// Returns a list of blocks sorted according to what would be the current optimal order. This shares +// some properties with a pre-order traversal. In particular, each block will appear after at least +// one of its predecessors. +Vector<BasicBlock*> blocksInOptimizedOrder(Code&); + +// Reorders the basic blocks to keep hot blocks at the top, and maximize the likelihood that a frequently +// taken edge is just a fall-through. + +void optimizeBlockOrder(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirPadInterference.cpp b/Source/JavaScriptCore/b3/air/AirPadInterference.cpp new file mode 100644 index 000000000..91de56bc8 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirPadInterference.cpp @@ -0,0 +1,88 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirPadInterference.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" + +namespace JSC { namespace B3 { namespace Air { + +void padInterference(Code& code) +{ + InsertionSet insertionSet(code); + for (BasicBlock* block : code) { + bool prevHadLate = false; + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + + bool hasEarlyDef = false; + bool hasLate = false; + inst.forEachArg( + [&] (Arg&, Arg::Role role, Arg::Type, Arg::Width) { + switch (role) { + case Arg::EarlyDef: + hasEarlyDef = true; + break; + case Arg::LateUse: + case Arg::Def: + case Arg::ZDef: + case Arg::LateColdUse: + case Arg::UseDef: + case Arg::UseZDef: + hasLate = true; + break; + case Arg::Scratch: + hasEarlyDef = true; + hasLate = true; + break; + case Arg::Use: + case Arg::ColdUse: + case Arg::UseAddr: + break; + } + }); + if (inst.kind.opcode == Patch) { + hasEarlyDef |= !inst.extraEarlyClobberedRegs().isEmpty(); + hasLate |= !inst.extraClobberedRegs().isEmpty(); + } + + if (hasEarlyDef && prevHadLate) + insertionSet.insert(instIndex, Nop, inst.origin); + + prevHadLate = hasLate; + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirPadInterference.h b/Source/JavaScriptCore/b3/air/AirPadInterference.h new file mode 100644 index 000000000..18f80832f --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirPadInterference.h @@ -0,0 +1,48 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This isn't a phase - it's meant to be a utility that other phases use. Air reasons about liveness by +// reasoning about interference at boundaries between instructions. This can go wrong - for example, a +// late use in one instruction doesn't actually interfere with an early def of the next instruction, but +// Air thinks that it does. This is convenient because it works great in the most common case: early uses +// and late defs. In practice, only the register allocators need to use this, since only they need to be +// able to color the interference graph using a bounded number of colors. +// +// See https://bugs.webkit.org/show_bug.cgi?id=163548#c2 for more info. + +void padInterference(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirPhaseScope.cpp b/Source/JavaScriptCore/b3/air/AirPhaseScope.cpp new file mode 100644 index 000000000..062ea2483 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirPhaseScope.cpp @@ -0,0 +1,60 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirPhaseScope.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirValidate.h" +#include "B3Common.h" + +namespace JSC { namespace B3 { namespace Air { + +PhaseScope::PhaseScope(Code& code, const char* name) + : m_code(code) + , m_name(name) + , m_timingScope(name) +{ + if (shouldDumpIRAtEachPhase(AirMode)) { + dataLog("Air after ", code.lastPhaseName(), ", before ", name, ":\n"); + dataLog(code); + } + + if (shouldSaveIRBeforePhase()) + m_dumpBefore = toCString(code); +} + +PhaseScope::~PhaseScope() +{ + m_code.setLastPhaseName(m_name); + if (shouldValidateIRAtEachPhase()) + validate(m_code, m_dumpBefore.data()); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirPhaseScope.h b/Source/JavaScriptCore/b3/air/AirPhaseScope.h new file mode 100644 index 000000000..71f788fce --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirPhaseScope.h @@ -0,0 +1,53 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "B3TimingScope.h" +#include <wtf/Noncopyable.h> +#include <wtf/text/CString.h> + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +class PhaseScope { + WTF_MAKE_NONCOPYABLE(PhaseScope); +public: + PhaseScope(Code&, const char* name); + ~PhaseScope(); // this does validation + +private: + Code& m_code; + const char* m_name; + TimingScope m_timingScope; + CString m_dumpBefore; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.cpp b/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.cpp new file mode 100644 index 000000000..bb0aeab77 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.cpp @@ -0,0 +1,96 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirReportUsedRegisters.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" +#include "AirPhaseScope.h" + +namespace JSC { namespace B3 { namespace Air { + +void reportUsedRegisters(Code& code) +{ + PhaseScope phaseScope(code, "reportUsedRegisters"); + + RegLiveness liveness(code); + + for (BasicBlock* block : code) { + RegLiveness::LocalCalc localCalc(liveness, block); + + for (unsigned instIndex = block->size(); instIndex--;) { + Inst& inst = block->at(instIndex); + + // Kill dead assignments to registers. For simplicity we say that a store is killable if + // it has only late defs and those late defs are to registers that are dead right now. + if (!inst.hasNonArgEffects()) { + bool canDelete = true; + inst.forEachArg( + [&] (Arg& arg, Arg::Role role, Arg::Type, Arg::Width) { + if (Arg::isEarlyDef(role)) { + canDelete = false; + return; + } + if (!Arg::isLateDef(role)) + return; + if (!arg.isReg()) { + canDelete = false; + return; + } + if (localCalc.isLive(arg.reg())) { + canDelete = false; + return; + } + }); + if (canDelete) + inst = Inst(); + } + + if (inst.kind.opcode == Patch) { + RegisterSet registerSet; + for (Reg reg : localCalc.live()) + registerSet.set(reg); + inst.reportUsedRegisters(registerSet); + } + localCalc.execute(instIndex); + } + + block->insts().removeAllMatching( + [&] (const Inst& inst) -> bool { + return !inst; + }); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + + diff --git a/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.h b/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.h new file mode 100644 index 000000000..ea175dcf4 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirReportUsedRegisters.h @@ -0,0 +1,41 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Performs a liveness analysis over registers and reports the live registers to every Special. Takes +// the opportunity to kill dead assignments to registers, since it has access to register liveness. + +void reportUsedRegisters(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirSimplifyCFG.cpp b/Source/JavaScriptCore/b3/air/AirSimplifyCFG.cpp new file mode 100644 index 000000000..c66f63feb --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSimplifyCFG.cpp @@ -0,0 +1,171 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirSimplifyCFG.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include "AirPhaseScope.h" + +namespace JSC { namespace B3 { namespace Air { + +bool simplifyCFG(Code& code) +{ + const bool verbose = false; + + PhaseScope phaseScope(code, "simplifyCFG"); + + // We have three easy simplification rules: + // + // 1) If a successor is a block that just jumps to another block, then jump directly to + // that block. + // + // 2) If all successors are the same and the operation has no effects, then use a jump + // instead. + // + // 3) If you jump to a block that is not you and has one predecessor, then merge. + // + // Note that because of the first rule, this phase may introduce critical edges. That's fine. + // If you need broken critical edges, then you have to break them yourself. + + bool result = false; + for (;;) { + if (verbose) { + dataLog("Air before an iteration of simplifyCFG:\n"); + dataLog(code); + } + + bool changed = false; + for (BasicBlock* block : code) { + // We rely on predecessors being conservatively correct. Verify this here. + if (shouldValidateIRAtEachPhase()) { + for (BasicBlock* block : code) { + for (BasicBlock* successor : block->successorBlocks()) + RELEASE_ASSERT(successor->containsPredecessor(block)); + } + } + + // We don't care about blocks that don't have successors. + if (!block->numSuccessors()) + continue; + + // First check if any of the successors of this block can be forwarded over. + for (BasicBlock*& successor : block->successorBlocks()) { + if (successor != block + && successor->size() == 1 + && successor->last().kind.opcode == Jump) { + BasicBlock* newSuccessor = successor->successorBlock(0); + if (newSuccessor != successor) { + if (verbose) { + dataLog( + "Replacing ", pointerDump(block), "->", pointerDump(successor), + " with ", pointerDump(block), "->", pointerDump(newSuccessor), "\n"); + } + // Note that we do not do replacePredecessor() because the block we're + // skipping will still have newSuccessor as its successor. + newSuccessor->addPredecessor(block); + successor = newSuccessor; + changed = true; + } + } + } + + // Now check if the block's terminal can be replaced with a jump. The terminal must not + // have weird effects. + if (block->numSuccessors() > 1 + && !block->last().hasNonControlEffects()) { + // All of the successors must be the same. + bool allSame = true; + BasicBlock* firstSuccessor = block->successorBlock(0); + for (unsigned i = 1; i < block->numSuccessors(); ++i) { + if (block->successorBlock(i) != firstSuccessor) { + allSame = false; + break; + } + } + if (allSame) { + if (verbose) + dataLog("Changing ", pointerDump(block), "'s terminal to a Jump.\n"); + block->last() = Inst(Jump, block->last().origin); + block->successors().resize(1); + block->successors()[0].frequency() = FrequencyClass::Normal; + changed = true; + } + } + + // Finally handle jumps to a block with one predecessor. + if (block->numSuccessors() == 1 + && !block->last().hasNonControlEffects()) { + BasicBlock* successor = block->successorBlock(0); + if (successor != block && successor->numPredecessors() == 1) { + RELEASE_ASSERT(successor->predecessor(0) == block); + + // We can merge the two blocks because the predecessor only jumps to the successor + // and the successor is only reachable from the predecessor. + + // Remove the terminal. + Value* origin = block->insts().takeLast().origin; + + // Append the full contents of the successor to the predecessor. + block->insts().reserveCapacity(block->size() + successor->size()); + for (Inst& inst : *successor) + block->appendInst(WTFMove(inst)); + + // Make sure that our successors are the successor's successors. + block->successors() = WTFMove(successor->successors()); + + // Make sure that the successor has nothing left in it except an oops. + successor->resize(1); + successor->last() = Inst(Oops, origin); + successor->successors().clear(); + + // Ensure that the predecessors of block's new successors know what's up. + for (BasicBlock* newSuccessor : block->successorBlocks()) + newSuccessor->replacePredecessor(successor, block); + + if (verbose) + dataLog("Merged ", pointerDump(block), "->", pointerDump(successor), "\n"); + changed = true; + } + } + } + + if (!changed) + break; + result = true; + code.resetReachability(); + } + + return result; +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + + diff --git a/Source/JavaScriptCore/b3/air/AirSimplifyCFG.h b/Source/JavaScriptCore/b3/air/AirSimplifyCFG.h new file mode 100644 index 000000000..7ac510d4b --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSimplifyCFG.h @@ -0,0 +1,40 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Simplifies the control flow graph by removing jump-only blocks and merging jumps. + +bool simplifyCFG(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirSpecial.cpp b/Source/JavaScriptCore/b3/air/AirSpecial.cpp new file mode 100644 index 000000000..e825767b0 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSpecial.cpp @@ -0,0 +1,89 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirSpecial.h" + +#if ENABLE(B3_JIT) + +#include <limits.h> +#include <wtf/StringPrintStream.h> + +namespace JSC { namespace B3 { namespace Air { + +const char* const Special::dumpPrefix = "&"; + +Special::Special() +{ +} + +Special::~Special() +{ +} + +CString Special::name() const +{ + StringPrintStream out; + dumpImpl(out); + return out.toCString(); +} + +std::optional<unsigned> Special::shouldTryAliasingDef(Inst&) +{ + return std::nullopt; +} + +bool Special::isTerminal(Inst&) +{ + return false; +} + +bool Special::hasNonArgEffects(Inst&) +{ + return true; +} + +bool Special::hasNonArgNonControlEffects(Inst&) +{ + return true; +} + +void Special::dump(PrintStream& out) const +{ + out.print(dumpPrefix); + dumpImpl(out); + if (m_index != UINT_MAX) + out.print(m_index); +} + +void Special::deepDump(PrintStream& out) const +{ + out.print(*this, ": "); + deepDumpImpl(out); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirSpecial.h b/Source/JavaScriptCore/b3/air/AirSpecial.h new file mode 100644 index 000000000..480cbfcba --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSpecial.h @@ -0,0 +1,140 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirInst.h" +#include "B3SparseCollection.h" +#include <wtf/FastMalloc.h> +#include <wtf/Noncopyable.h> +#include <wtf/ScopedLambda.h> +#include <wtf/text/CString.h> + +namespace JSC { namespace B3 { namespace Air { + +class Code; +struct GenerationContext; + +class Special { + WTF_MAKE_NONCOPYABLE(Special); + WTF_MAKE_FAST_ALLOCATED; +public: + static const char* const dumpPrefix; + + Special(); + virtual ~Special(); + + Code& code() const { return *m_code; } + + CString name() const; + + virtual void forEachArg(Inst&, const ScopedLambda<Inst::EachArgCallback>&) = 0; + virtual bool isValid(Inst&) = 0; + virtual bool admitsStack(Inst&, unsigned argIndex) = 0; + virtual std::optional<unsigned> shouldTryAliasingDef(Inst&); + + // This gets called on for each Inst that uses this Special. Note that there is no way to + // guarantee that a Special gets used from just one Inst, because Air might taildup late. So, + // if you want to pass this information down to generate(), then you have to either: + // + // 1) Generate Air that starts with a separate Special per Patch Inst, and then merge + // usedRegister sets. This is probably not great, but it optimizes for the common case that + // Air didn't duplicate code or that such duplication didn't cause any interesting changes to + // register assignment. + // + // 2) Have the Special maintain a HashMap<Inst*, RegisterSet>. This works because the analysis + // that feeds into this call is performed just before code generation and there is no way + // for the Vector<>'s that contain the Insts to be reallocated. This allows generate() to + // consult the HashMap. + // + // 3) Hybrid: you could use (1) and fire up a HashMap if you see multiple calls. + // + // Note that it's not possible to rely on reportUsedRegisters() being called in the same order + // as generate(). If we could rely on that, then we could just have each Special instance + // maintain a Vector of RegisterSet's and then process that vector in the right order in + // generate(). But, the ordering difference is unlikely to change since it would harm the + // performance of the liveness analysis. + // + // Currently, we do (1) for B3 stackmaps. + virtual void reportUsedRegisters(Inst&, const RegisterSet&) = 0; + + virtual CCallHelpers::Jump generate(Inst&, CCallHelpers&, GenerationContext&) = 0; + + virtual RegisterSet extraEarlyClobberedRegs(Inst&) = 0; + virtual RegisterSet extraClobberedRegs(Inst&) = 0; + + // By default, this returns false. + virtual bool isTerminal(Inst&); + + // By default, this returns true. + virtual bool hasNonArgEffects(Inst&); + + // By default, this returns true. + virtual bool hasNonArgNonControlEffects(Inst&); + + void dump(PrintStream&) const; + void deepDump(PrintStream&) const; + +protected: + virtual void dumpImpl(PrintStream&) const = 0; + virtual void deepDumpImpl(PrintStream&) const = 0; + +private: + friend class Code; + friend class SparseCollection<Special>; + + unsigned m_index { UINT_MAX }; + Code* m_code { nullptr }; +}; + +class DeepSpecialDump { +public: + DeepSpecialDump(const Special* special) + : m_special(special) + { + } + + void dump(PrintStream& out) const + { + if (m_special) + m_special->deepDump(out); + else + out.print("<null>"); + } + +private: + const Special* m_special; +}; + +inline DeepSpecialDump deepDump(const Special* special) +{ + return DeepSpecialDump(special); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirSpillEverything.cpp b/Source/JavaScriptCore/b3/air/AirSpillEverything.cpp new file mode 100644 index 000000000..ebf3774a5 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSpillEverything.cpp @@ -0,0 +1,190 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirSpillEverything.h" + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirCode.h" +#include "AirInsertionSet.h" +#include "AirInstInlines.h" +#include "AirLiveness.h" +#include "AirPadInterference.h" +#include "AirPhaseScope.h" +#include <wtf/IndexMap.h> + +namespace JSC { namespace B3 { namespace Air { + +void spillEverything(Code& code) +{ + PhaseScope phaseScope(code, "spillEverything"); + + padInterference(code); + + // We want to know the set of registers used at every point in every basic block. + IndexMap<BasicBlock, Vector<RegisterSet>> usedRegisters(code.size()); + GPLiveness gpLiveness(code); + FPLiveness fpLiveness(code); + for (BasicBlock* block : code) { + GPLiveness::LocalCalc gpLocalCalc(gpLiveness, block); + FPLiveness::LocalCalc fpLocalCalc(fpLiveness, block); + + usedRegisters[block].resize(block->size() + 1); + + auto setUsedRegisters = [&] (unsigned index) { + RegisterSet& registerSet = usedRegisters[block][index]; + for (Tmp tmp : gpLocalCalc.live()) { + if (tmp.isReg()) + registerSet.set(tmp.reg()); + } + for (Tmp tmp : fpLocalCalc.live()) { + if (tmp.isReg()) + registerSet.set(tmp.reg()); + } + + // Gotta account for dead assignments to registers. These may happen because the input + // code is suboptimal. + Inst::forEachDefWithExtraClobberedRegs<Tmp>( + block->get(index - 1), block->get(index), + [&] (const Tmp& tmp, Arg::Role, Arg::Type, Arg::Width) { + if (tmp.isReg()) + registerSet.set(tmp.reg()); + }); + }; + + for (unsigned instIndex = block->size(); instIndex--;) { + setUsedRegisters(instIndex + 1); + gpLocalCalc.execute(instIndex); + fpLocalCalc.execute(instIndex); + } + setUsedRegisters(0); + } + + // Allocate a stack slot for each tmp. + Vector<StackSlot*> allStackSlots[Arg::numTypes]; + for (unsigned typeIndex = 0; typeIndex < Arg::numTypes; ++typeIndex) { + Vector<StackSlot*>& stackSlots = allStackSlots[typeIndex]; + Arg::Type type = static_cast<Arg::Type>(typeIndex); + stackSlots.resize(code.numTmps(type)); + for (unsigned tmpIndex = code.numTmps(type); tmpIndex--;) + stackSlots[tmpIndex] = code.addStackSlot(8, StackSlotKind::Spill); + } + + InsertionSet insertionSet(code); + for (BasicBlock* block : code) { + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + RegisterSet& setBefore = usedRegisters[block][instIndex]; + RegisterSet& setAfter = usedRegisters[block][instIndex + 1]; + Inst& inst = block->at(instIndex); + + // First try to spill directly. + for (unsigned i = 0; i < inst.args.size(); ++i) { + Arg& arg = inst.args[i]; + + if (arg.isTmp()) { + if (arg.isReg()) + continue; + + if (inst.admitsStack(i)) { + StackSlot* stackSlot = allStackSlots[arg.type()][arg.tmpIndex()]; + arg = Arg::stack(stackSlot); + continue; + } + } + } + + // Now fall back on spilling using separate Move's to load/store the tmp. + inst.forEachTmp( + [&] (Tmp& tmp, Arg::Role role, Arg::Type type, Arg::Width) { + if (tmp.isReg()) + return; + + StackSlot* stackSlot = allStackSlots[type][tmp.tmpIndex()]; + Arg arg = Arg::stack(stackSlot); + + // Need to figure out a register to use. How we do that depends on the role. + Reg chosenReg; + switch (role) { + case Arg::Use: + case Arg::ColdUse: + for (Reg reg : code.regsInPriorityOrder(type)) { + if (!setBefore.get(reg)) { + setBefore.set(reg); + chosenReg = reg; + break; + } + } + break; + case Arg::Def: + case Arg::ZDef: + for (Reg reg : code.regsInPriorityOrder(type)) { + if (!setAfter.get(reg)) { + setAfter.set(reg); + chosenReg = reg; + break; + } + } + break; + case Arg::UseDef: + case Arg::UseZDef: + case Arg::LateUse: + case Arg::LateColdUse: + case Arg::Scratch: + case Arg::EarlyDef: + for (Reg reg : code.regsInPriorityOrder(type)) { + if (!setBefore.get(reg) && !setAfter.get(reg)) { + setAfter.set(reg); + setBefore.set(reg); + chosenReg = reg; + break; + } + } + break; + case Arg::UseAddr: + // We will never UseAddr a Tmp, that doesn't make sense. + RELEASE_ASSERT_NOT_REACHED(); + break; + } + RELEASE_ASSERT(chosenReg); + + tmp = Tmp(chosenReg); + + Opcode move = type == Arg::GP ? Move : MoveDouble; + + if (Arg::isAnyUse(role) && role != Arg::Scratch) + insertionSet.insert(instIndex, move, inst.origin, arg, tmp); + if (Arg::isAnyDef(role)) + insertionSet.insert(instIndex + 1, move, inst.origin, tmp, arg); + }); + } + insertionSet.execute(block); + } +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirSpillEverything.h b/Source/JavaScriptCore/b3/air/AirSpillEverything.h new file mode 100644 index 000000000..0fdca6677 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirSpillEverything.h @@ -0,0 +1,49 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// This is a phase for testing. It behaves like a register allocator in the sense that it +// eliminates temporaries from the program. It accomplishes this by always spilling all +// temporaries. The resulting code is going to be very inefficient. This phase is great if you +// think that there is a bug in the register allocator. You can confirm this by running this +// phase instead of the register allocator. +// +// Note that even though this phase does the cheapest thing possible, it's not even written in a +// particularly efficient way. So, don't get any ideas about using this phase to reduce compiler +// latency. If you wanted to do that, you should come up with a clever algorithm instead of using +// this silly thing. + +void spillEverything(Code&); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirStackSlot.cpp b/Source/JavaScriptCore/b3/air/AirStackSlot.cpp new file mode 100644 index 000000000..58cac0657 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirStackSlot.cpp @@ -0,0 +1,74 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirStackSlot.h" + +#if ENABLE(B3_JIT) + +#include "B3StackSlot.h" + +namespace JSC { namespace B3 { namespace Air { + +void StackSlot::setOffsetFromFP(intptr_t value) +{ + m_offsetFromFP = value; + if (m_b3Slot) + m_b3Slot->m_offsetFromFP = value; +} + +unsigned StackSlot::jsHash() const +{ + return static_cast<unsigned>(m_kind) + m_byteSize * 3 + m_offsetFromFP * 7; +} + +void StackSlot::dump(PrintStream& out) const +{ + if (isSpill()) + out.print("spill"); + else + out.print("stack"); + out.print(m_index); +} + +void StackSlot::deepDump(PrintStream& out) const +{ + out.print("byteSize = ", m_byteSize, ", offsetFromFP = ", m_offsetFromFP, ", kind = ", m_kind); + if (m_b3Slot) + out.print(", b3Slot = ", *m_b3Slot, ": (", B3::deepDump(m_b3Slot), ")"); +} + +StackSlot::StackSlot(unsigned byteSize, StackSlotKind kind, B3::StackSlot* b3Slot) + : m_byteSize(byteSize) + , m_offsetFromFP(b3Slot ? b3Slot->offsetFromFP() : 0) + , m_kind(kind) + , m_b3Slot(b3Slot) +{ + ASSERT(byteSize); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirStackSlot.h b/Source/JavaScriptCore/b3/air/AirStackSlot.h new file mode 100644 index 000000000..85c94acc8 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirStackSlot.h @@ -0,0 +1,133 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirStackSlotKind.h" +#include "B3SparseCollection.h" +#include <limits.h> +#include <wtf/FastMalloc.h> +#include <wtf/Noncopyable.h> +#include <wtf/PrintStream.h> + +namespace JSC { namespace B3 { + +class StackSlot; + +namespace Air { + +class StackSlot { + WTF_MAKE_NONCOPYABLE(StackSlot); + WTF_MAKE_FAST_ALLOCATED; +public: + unsigned byteSize() const { return m_byteSize; } + StackSlotKind kind() const { return m_kind; } + bool isLocked() const { return m_kind == StackSlotKind::Locked; } + bool isSpill() const { return m_kind == StackSlotKind::Spill; } + unsigned index() const { return m_index; } + + void ensureSize(unsigned requestedSize) + { + ASSERT(!m_offsetFromFP); + m_byteSize = std::max(m_byteSize, requestedSize); + } + + unsigned alignment() const + { + if (byteSize() <= 1) + return 1; + if (byteSize() <= 2) + return 2; + if (byteSize() <= 4) + return 4; + return 8; + } + + B3::StackSlot* b3Slot() const { return m_b3Slot; } + + // Zero means that it's not yet assigned. + intptr_t offsetFromFP() const { return m_offsetFromFP; } + + // This should usually just be called from phases that do stack allocation. But you can + // totally force a stack slot to land at some offset. + void setOffsetFromFP(intptr_t); + + // This computes a hash for comparing this to JSAir's StackSlot. + unsigned jsHash() const; + + void dump(PrintStream&) const; + void deepDump(PrintStream&) const; + +private: + friend class Code; + friend class SparseCollection<StackSlot>; + + StackSlot(unsigned byteSize, StackSlotKind, B3::StackSlot*); + + unsigned m_byteSize { 0 }; + unsigned m_index { UINT_MAX }; + intptr_t m_offsetFromFP { 0 }; + StackSlotKind m_kind { StackSlotKind::Locked }; + B3::StackSlot* m_b3Slot { nullptr }; +}; + +class DeepStackSlotDump { +public: + DeepStackSlotDump(const StackSlot* slot) + : m_slot(slot) + { + } + + void dump(PrintStream& out) const + { + if (m_slot) + m_slot->deepDump(out); + else + out.print("<null>"); + } + +private: + const StackSlot* m_slot; +}; + +inline DeepStackSlotDump deepDump(const StackSlot* slot) +{ + return DeepStackSlotDump(slot); +} + +} } } // namespace JSC::B3::Air + +namespace WTF { + +inline void printInternal(PrintStream& out, JSC::B3::Air::StackSlot* stackSlot) +{ + out.print(pointerDump(stackSlot)); +} + +} // namespace WTF + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirStackSlotKind.cpp b/Source/JavaScriptCore/b3/air/AirStackSlotKind.cpp new file mode 100644 index 000000000..af83de1b9 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirStackSlotKind.cpp @@ -0,0 +1,52 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirStackSlotKind.h" + +#if ENABLE(B3_JIT) + +#include <wtf/PrintStream.h> + +namespace WTF { + +using namespace JSC::B3::Air; + +void printInternal(PrintStream& out, StackSlotKind kind) +{ + switch (kind) { + case StackSlotKind::Locked: + out.print("Locked"); + return; + case StackSlotKind::Spill: + out.print("Spill"); + return; + } + RELEASE_ASSERT_NOT_REACHED(); +} + +} // namespace WTF + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirStackSlotKind.h b/Source/JavaScriptCore/b3/air/AirStackSlotKind.h new file mode 100644 index 000000000..9ef205772 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirStackSlotKind.h @@ -0,0 +1,63 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +enum class StackSlotKind : uint8_t { + // A locked stack slot is an area of stack requested by the client. It cannot be killed. The + // client can get its FP offset and write to it from stack walking code, so we must assume + // that reads and writes to a locked stack slot can be clobbered the same way as reads and + // writes to any memory location. + Locked, + + // A spill slot. These have fundamentally different behavior than a typical memory location. + // They are lowered to from temporaries. This means for example that a 32-bit ZDef store to a + // 8 byte stack slot will zero the top 4 bytes, even though a 32-bit ZDef store to any other + // kind of memory location would do no such thing. UseAddr on a spill slot is not allowed, so + // they never escape. + Spill + + // FIXME: We should add a third mode, which means that the stack slot will be read asynchronously + // as with Locked, but never written to asynchronously. Then, Air could optimize spilling and + // filling by tracking whether the value had been stored to a read-only locked slot. If it had, + // then we can refill from that slot. + // https://bugs.webkit.org/show_bug.cgi?id=150587 +}; + +} } } // namespace JSC::B3::Air + +namespace WTF { + +class PrintStream; + +void printInternal(PrintStream&, JSC::B3::Air::StackSlotKind); + +} // namespace WTF + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirTmp.cpp b/Source/JavaScriptCore/b3/air/AirTmp.cpp new file mode 100644 index 000000000..487f52177 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirTmp.cpp @@ -0,0 +1,55 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirTmp.h" + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +void Tmp::dump(PrintStream& out) const +{ + if (!*this) { + out.print("<none>"); + return; + } + + if (isReg()) { + out.print(reg()); + return; + } + + if (isGP()) { + out.print("%tmp", gpTmpIndex()); + return; + } + + out.print("%ftmp", fpTmpIndex()); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirTmp.h b/Source/JavaScriptCore/b3/air/AirTmp.h new file mode 100644 index 000000000..c01427c2b --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirTmp.h @@ -0,0 +1,298 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "FPRInfo.h" +#include "GPRInfo.h" +#include "Reg.h" +#include <wtf/HashMap.h> + +namespace JSC { namespace B3 { namespace Air { + +class Arg; + +// A Tmp is a generalization of a register. It can be used to refer to any GPR or FPR. It can also +// be used to refer to an unallocated register (i.e. a temporary). Like many Air classes, we use +// deliberately terse naming since we will have to use this name a lot. + +class Tmp { +public: + Tmp() + : m_value(0) + { + } + + explicit Tmp(Reg reg) + { + if (reg) { + if (reg.isGPR()) + m_value = encodeGPR(reg.gpr()); + else + m_value = encodeFPR(reg.fpr()); + } else + m_value = 0; + } + + explicit Tmp(const Arg&); + + static Tmp gpTmpForIndex(unsigned index) + { + Tmp result; + result.m_value = encodeGPTmp(index); + return result; + } + + static Tmp fpTmpForIndex(unsigned index) + { + Tmp result; + result.m_value = encodeFPTmp(index); + return result; + } + + explicit operator bool() const { return !!m_value; } + + bool isGP() const + { + return isEncodedGP(m_value); + } + + bool isFP() const + { + return isEncodedFP(m_value); + } + + bool isGPR() const + { + return isEncodedGPR(m_value); + } + + bool isFPR() const + { + return isEncodedFPR(m_value); + } + + bool isReg() const + { + return isGPR() || isFPR(); + } + + GPRReg gpr() const + { + return decodeGPR(m_value); + } + + FPRReg fpr() const + { + return decodeFPR(m_value); + } + + Reg reg() const + { + if (isGP()) + return gpr(); + return fpr(); + } + + bool hasTmpIndex() const + { + return !isReg(); + } + + unsigned gpTmpIndex() const + { + return decodeGPTmp(m_value); + } + + unsigned fpTmpIndex() const + { + return decodeFPTmp(m_value); + } + + unsigned tmpIndex() const + { + if (isGP()) + return gpTmpIndex(); + return fpTmpIndex(); + } + + bool isAlive() const + { + return !!*this; + } + + bool operator==(const Tmp& other) const + { + return m_value == other.m_value; + } + + bool operator!=(const Tmp& other) const + { + return !(*this == other); + } + + void dump(PrintStream& out) const; + + Tmp(WTF::HashTableDeletedValueType) + : m_value(std::numeric_limits<int>::max()) + { + } + + bool isHashTableDeletedValue() const + { + return *this == Tmp(WTF::HashTableDeletedValue); + } + + unsigned hash() const + { + return WTF::IntHash<int>::hash(m_value); + } + + unsigned internalValue() const { return static_cast<unsigned>(m_value); } + + static Tmp tmpForInternalValue(unsigned index) + { + Tmp result; + result.m_value = static_cast<int>(index); + return result; + } + +private: + static int encodeGP(unsigned index) + { + return 1 + index; + } + + static int encodeFP(unsigned index) + { + return -1 - index; + } + + static int encodeGPR(GPRReg gpr) + { + return encodeGP(gpr - MacroAssembler::firstRegister()); + } + + static int encodeFPR(FPRReg fpr) + { + return encodeFP(fpr - MacroAssembler::firstFPRegister()); + } + + static int encodeGPTmp(unsigned index) + { + return encodeGPR(MacroAssembler::lastRegister()) + 1 + index; + } + + static int encodeFPTmp(unsigned index) + { + return encodeFPR(MacroAssembler::lastFPRegister()) - 1 - index; + } + + static bool isEncodedGP(int value) + { + return value > 0; + } + + static bool isEncodedFP(int value) + { + return value < 0; + } + + static bool isEncodedGPR(int value) + { + return isEncodedGP(value) && value <= encodeGPR(MacroAssembler::lastRegister()); + } + + static bool isEncodedFPR(int value) + { + return isEncodedFP(value) && value >= encodeFPR(MacroAssembler::lastFPRegister()); + } + + static bool isEncodedGPTmp(int value) + { + return isEncodedGP(value) && !isEncodedGPR(value); + } + + static bool isEncodedFPTmp(int value) + { + return isEncodedFP(value) && !isEncodedFPR(value); + } + + static GPRReg decodeGPR(int value) + { + ASSERT(isEncodedGPR(value)); + return static_cast<GPRReg>( + (value - encodeGPR(MacroAssembler::firstRegister())) + MacroAssembler::firstRegister()); + } + + static FPRReg decodeFPR(int value) + { + ASSERT(isEncodedFPR(value)); + return static_cast<FPRReg>( + (encodeFPR(MacroAssembler::firstFPRegister()) - value) + + MacroAssembler::firstFPRegister()); + } + + static unsigned decodeGPTmp(int value) + { + ASSERT(isEncodedGPTmp(value)); + return value - (encodeGPR(MacroAssembler::lastRegister()) + 1); + } + + static unsigned decodeFPTmp(int value) + { + ASSERT(isEncodedFPTmp(value)); + return (encodeFPR(MacroAssembler::lastFPRegister()) - 1) - value; + } + + // 0: empty Tmp + // positive: GPRs and then GP temps. + // negative: FPRs and then FP temps. + int m_value; +}; + +struct TmpHash { + static unsigned hash(const Tmp& key) { return key.hash(); } + static bool equal(const Tmp& a, const Tmp& b) { return a == b; } + static const bool safeToCompareToEmptyOrDeleted = true; +}; + +} } } // namespace JSC::B3::Air + +namespace WTF { + +template<typename T> struct DefaultHash; +template<> struct DefaultHash<JSC::B3::Air::Tmp> { + typedef JSC::B3::Air::TmpHash Hash; +}; + +template<typename T> struct HashTraits; +template<> struct HashTraits<JSC::B3::Air::Tmp> : SimpleClassHashTraits<JSC::B3::Air::Tmp> { }; + +} // namespace WTF + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirTmpInlines.h b/Source/JavaScriptCore/b3/air/AirTmpInlines.h new file mode 100644 index 000000000..a7de098b4 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirTmpInlines.h @@ -0,0 +1,97 @@ +/* + * Copyright (C) 2015 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" +#include "AirTmp.h" + +namespace JSC { namespace B3 { namespace Air { + +inline Tmp::Tmp(const Arg& arg) +{ + *this = arg.tmp(); +} + +// When a Hash structure is too slow or when Sets contains most values, you can +// use direct array addressing with Tmps. +template<Arg::Type type> +struct AbsoluteTmpMapper; + +template<> +struct AbsoluteTmpMapper<Arg::GP> { + static unsigned absoluteIndex(const Tmp& tmp) + { + ASSERT(tmp.isGP()); + ASSERT(static_cast<int>(tmp.internalValue()) > 0); + return tmp.internalValue(); + } + + static unsigned absoluteIndex(unsigned tmpIndex) + { + return absoluteIndex(Tmp::gpTmpForIndex(tmpIndex)); + } + + static unsigned lastMachineRegisterIndex() + { + return absoluteIndex(Tmp(MacroAssembler::lastRegister())); + } + + static Tmp tmpFromAbsoluteIndex(unsigned tmpIndex) + { + return Tmp::tmpForInternalValue(tmpIndex); + } +}; + +template<> +struct AbsoluteTmpMapper<Arg::FP> { + static unsigned absoluteIndex(const Tmp& tmp) + { + ASSERT(tmp.isFP()); + ASSERT(static_cast<int>(tmp.internalValue()) < 0); + return -tmp.internalValue(); + } + + static unsigned absoluteIndex(unsigned tmpIndex) + { + return absoluteIndex(Tmp::fpTmpForIndex(tmpIndex)); + } + + static unsigned lastMachineRegisterIndex() + { + return absoluteIndex(Tmp(MacroAssembler::lastFPRegister())); + } + + static Tmp tmpFromAbsoluteIndex(unsigned tmpIndex) + { + return Tmp::tmpForInternalValue(-tmpIndex); + } +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirTmpWidth.cpp b/Source/JavaScriptCore/b3/air/AirTmpWidth.cpp new file mode 100644 index 000000000..f1173c022 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirTmpWidth.cpp @@ -0,0 +1,183 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirTmpWidth.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +TmpWidth::TmpWidth() +{ +} + +TmpWidth::TmpWidth(Code& code) +{ + recompute(code); +} + +TmpWidth::~TmpWidth() +{ +} + +void TmpWidth::recompute(Code& code) +{ + // Set this to true to cause this analysis to always return pessimistic results. + const bool beCareful = false; + + const bool verbose = false; + + if (verbose) { + dataLog("Code before TmpWidth:\n"); + dataLog(code); + } + + m_width.clear(); + + auto assumeTheWorst = [&] (Tmp tmp) { + Widths& widths = m_width.add(tmp, Widths()).iterator->value; + Arg::Type type = Arg(tmp).type(); + widths.use = Arg::conservativeWidth(type); + widths.def = Arg::conservativeWidth(type); + }; + + // Assume the worst for registers. + RegisterSet::allRegisters().forEach( + [&] (Reg reg) { + assumeTheWorst(Tmp(reg)); + }); + + if (beCareful) { + code.forAllTmps(assumeTheWorst); + + // We fall through because the fixpoint that follows can only make things even more + // conservative. This mode isn't meant to be fast, just safe. + } + + // Now really analyze everything but Move's over Tmp's, but set aside those Move's so we can find + // them quickly during the fixpoint below. Note that we can make this analysis stronger by + // recognizing more kinds of Move's or anything that has Move-like behavior, though it's probably not + // worth it. + Vector<Inst*> moves; + for (BasicBlock* block : code) { + for (Inst& inst : *block) { + if (inst.kind.opcode == Move && inst.args[1].isTmp()) { + if (inst.args[0].isTmp()) { + // Make sure that both sides of the Move have a width already initialized. The + // fixpoint below assumes that it never has to add things to the HashMap. + m_width.add(inst.args[0].tmp(), Widths(Arg::GP)); + m_width.add(inst.args[1].tmp(), Widths(Arg::GP)); + + moves.append(&inst); + continue; + } + if (inst.args[0].isImm() + && inst.args[0].value() >= 0) { + Tmp tmp = inst.args[1].tmp(); + Widths& widths = m_width.add(tmp, Widths(Arg::GP)).iterator->value; + + if (inst.args[0].value() <= std::numeric_limits<int8_t>::max()) + widths.def = std::max(widths.def, Arg::Width8); + else if (inst.args[0].value() <= std::numeric_limits<int16_t>::max()) + widths.def = std::max(widths.def, Arg::Width16); + else if (inst.args[0].value() <= std::numeric_limits<int32_t>::max()) + widths.def = std::max(widths.def, Arg::Width32); + else + widths.def = std::max(widths.def, Arg::Width64); + + continue; + } + } + inst.forEachTmp( + [&] (Tmp& tmp, Arg::Role role, Arg::Type type, Arg::Width width) { + Widths& widths = m_width.add(tmp, Widths(type)).iterator->value; + + if (Arg::isAnyUse(role)) + widths.use = std::max(widths.use, width); + + if (Arg::isZDef(role)) + widths.def = std::max(widths.def, width); + else if (Arg::isAnyDef(role)) + widths.def = Arg::conservativeWidth(type); + }); + } + } + + // Finally, fixpoint over the Move's. + bool changed = true; + while (changed) { + changed = false; + for (Inst* move : moves) { + ASSERT(move->kind.opcode == Move); + ASSERT(move->args[0].isTmp()); + ASSERT(move->args[1].isTmp()); + + // We already ensure that both tmps are added to the width map. That's important + // because you cannot add both tmps here while simultaneously getting a reference to + // their values, since the second add would invalidate the reference returned by the + // first one. + Widths& srcWidths = m_width.find(move->args[0].tmp())->value; + Widths& dstWidths = m_width.find(move->args[1].tmp())->value; + + // Legend: + // + // Move %src, %dst + + // defWidth(%dst) is a promise about how many high bits are zero. The smaller the width, the + // stronger the promise. This Move may weaken that promise if we know that %src is making a + // weaker promise. Such forward flow is the only thing that determines defWidth(). + if (dstWidths.def < srcWidths.def) { + dstWidths.def = srcWidths.def; + changed = true; + } + + // srcWidth(%src) is a promise about how many high bits are ignored. The smaller the width, + // the stronger the promise. This Move may weaken that promise if we know that %dst is making + // a weaker promise. Such backward flow is the only thing that determines srcWidth(). + if (srcWidths.use < dstWidths.use) { + srcWidths.use = dstWidths.use; + changed = true; + } + } + } + + if (verbose) + dataLog("width: ", mapDump(m_width), "\n"); +} + +void TmpWidth::Widths::dump(PrintStream& out) const +{ + out.print("{use = ", use, ", def = ", def, "}"); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirTmpWidth.h b/Source/JavaScriptCore/b3/air/AirTmpWidth.h new file mode 100644 index 000000000..ea612b662 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirTmpWidth.h @@ -0,0 +1,114 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArg.h" + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +class TmpWidth { +public: + TmpWidth(); + TmpWidth(Code&); + ~TmpWidth(); + + void recompute(Code&); + + // The width of a Tmp is the number of bits that you need to be able to track without some trivial + // recovery. A Tmp may have a "subwidth" (say, Width32 on a 64-bit system) if either of the following + // is true: + // + // - The high bits are never read. + // - The high bits are always zero. + // + // This doesn't tell you which of those properties holds, but you can query that using the other + // methods. + Arg::Width width(Tmp tmp) const + { + auto iter = m_width.find(tmp); + if (iter == m_width.end()) + return Arg::minimumWidth(Arg(tmp).type()); + return std::min(iter->value.use, iter->value.def); + } + + // Return the minimum required width for all defs/uses of this Tmp. + Arg::Width requiredWidth(Tmp tmp) + { + auto iter = m_width.find(tmp); + if (iter == m_width.end()) + return Arg::minimumWidth(Arg(tmp).type()); + return std::max(iter->value.use, iter->value.def); + } + + // This indirectly tells you how much of the tmp's high bits are guaranteed to be zero. The number of + // high bits that are zero are: + // + // TotalBits - defWidth(tmp) + // + // Where TotalBits are the total number of bits in the register, so 64 on a 64-bit system. + Arg::Width defWidth(Tmp tmp) const + { + auto iter = m_width.find(tmp); + if (iter == m_width.end()) + return Arg::minimumWidth(Arg(tmp).type()); + return iter->value.def; + } + + // This tells you how much of Tmp is going to be read. + Arg::Width useWidth(Tmp tmp) const + { + auto iter = m_width.find(tmp); + if (iter == m_width.end()) + return Arg::minimumWidth(Arg(tmp).type()); + return iter->value.use; + } + +private: + struct Widths { + Widths() { } + + Widths(Arg::Type type) + { + use = Arg::minimumWidth(type); + def = Arg::minimumWidth(type); + } + + void dump(PrintStream& out) const; + + Arg::Width use; + Arg::Width def; + }; + + HashMap<Tmp, Widths> m_width; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirUseCounts.h b/Source/JavaScriptCore/b3/air/AirUseCounts.h new file mode 100644 index 000000000..98a749321 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirUseCounts.h @@ -0,0 +1,118 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +#include "AirArgInlines.h" +#include "AirBlockWorklist.h" +#include "AirCode.h" +#include "AirInstInlines.h" +#include <wtf/HashMap.h> +#include <wtf/ListDump.h> + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +// Computes the number of uses of a variable based on frequency of execution. The frequency of blocks +// that are only reachable by rare edges is scaled by Options::rareBlockPenalty(). + +// Thing can be either Tmp or Arg. +template<typename Thing> +class UseCounts { +public: + struct Counts { + void dump(PrintStream& out) const + { + out.print( + "{numWarmUses = ", numWarmUses, ", numColdUses = ", numColdUses, ", numDefs = ", + numDefs, "}"); + } + + double numWarmUses { 0 }; + double numColdUses { 0 }; + double numDefs { 0 }; + double numConstDefs { 0 }; + }; + + UseCounts(Code& code) + { + // Find non-rare blocks. + BlockWorklist fastWorklist; + fastWorklist.push(code[0]); + while (BasicBlock* block = fastWorklist.pop()) { + for (FrequentedBlock& successor : block->successors()) { + if (!successor.isRare()) + fastWorklist.push(successor.block()); + } + } + + for (BasicBlock* block : code) { + double frequency = block->frequency(); + if (!fastWorklist.saw(block)) + frequency *= Options::rareBlockPenalty(); + for (Inst& inst : *block) { + inst.forEach<Thing>( + [&] (Thing& arg, Arg::Role role, Arg::Type, Arg::Width) { + Counts& counts = m_counts.add(arg, Counts()).iterator->value; + + if (Arg::isWarmUse(role)) + counts.numWarmUses += frequency; + if (Arg::isColdUse(role)) + counts.numColdUses += frequency; + if (Arg::isAnyDef(role)) + counts.numDefs += frequency; + }); + + if ((inst.kind.opcode == Move || inst.kind.opcode == Move32) + && inst.args[0].isSomeImm() + && inst.args[1].is<Thing>()) + m_counts.add(inst.args[1].as<Thing>(), Counts()).iterator->value.numConstDefs++; + } + } + } + + const Counts* operator[](const Thing& arg) const + { + auto iter = m_counts.find(arg); + if (iter == m_counts.end()) + return nullptr; + return &iter->value; + } + + void dump(PrintStream& out) const + { + out.print(mapDump(m_counts)); + } + +private: + HashMap<Thing, Counts> m_counts; +}; + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/AirValidate.cpp b/Source/JavaScriptCore/b3/air/AirValidate.cpp new file mode 100644 index 000000000..d90de62eb --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirValidate.cpp @@ -0,0 +1,159 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" +#include "AirValidate.h" + +#if ENABLE(B3_JIT) + +#include "AirCode.h" +#include "AirInstInlines.h" +#include "B3Procedure.h" + +namespace JSC { namespace B3 { namespace Air { + +namespace { + +class Validater { +public: + Validater(Code& code, const char* dumpBefore) + : m_code(code) + , m_dumpBefore(dumpBefore) + { + } + +#define VALIDATE(condition, message) do { \ + if (condition) \ + break; \ + fail(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, #condition, toCString message); \ + } while (false) + + void run() + { + HashSet<StackSlot*> validSlots; + HashSet<BasicBlock*> validBlocks; + HashSet<Special*> validSpecials; + + for (BasicBlock* block : m_code) + validBlocks.add(block); + for (StackSlot* slot : m_code.stackSlots()) + validSlots.add(slot); + for (Special* special : m_code.specials()) + validSpecials.add(special); + + for (BasicBlock* block : m_code) { + // Blocks that are entrypoints must not have predecessors. + if (m_code.isEntrypoint(block)) + VALIDATE(!block->numPredecessors(), ("At entrypoint ", *block)); + + for (unsigned instIndex = 0; instIndex < block->size(); ++instIndex) { + Inst& inst = block->at(instIndex); + for (Arg& arg : inst.args) { + switch (arg.kind()) { + case Arg::Stack: + VALIDATE(validSlots.contains(arg.stackSlot()), ("At ", inst, " in ", *block)); + break; + case Arg::Special: + VALIDATE(validSpecials.contains(arg.special()), ("At ", inst, " in ", *block)); + break; + default: + break; + } + } + VALIDATE(inst.isValidForm(), ("At ", inst, " in ", *block)); + if (instIndex == block->size() - 1) + VALIDATE(inst.isTerminal(), ("At ", inst, " in ", *block)); + else + VALIDATE(!inst.isTerminal(), ("At ", inst, " in ", *block)); + + // forEachArg must return Arg&'s that point into the args array. + inst.forEachArg( + [&] (Arg& arg, Arg::Role, Arg::Type, Arg::Width) { + VALIDATE(&arg >= &inst.args[0], ("At ", arg, " in ", inst, " in ", *block)); + VALIDATE(&arg <= &inst.args.last(), ("At ", arg, " in ", inst, " in ", *block)); + }); + + switch (inst.kind.opcode) { + case EntrySwitch: + VALIDATE(block->numSuccessors() == m_code.proc().numEntrypoints(), ("At ", inst, " in ", *block)); + break; + case Shuffle: + // We can't handle trapping shuffles because of how we lower them. That could + // be fixed though. + VALIDATE(!inst.kind.traps, ("At ", inst, " in ", *block)); + break; + default: + break; + } + } + for (BasicBlock* successor : block->successorBlocks()) + VALIDATE(validBlocks.contains(successor), ("In ", *block)); + } + } + +private: + NO_RETURN_DUE_TO_CRASH void fail( + const char* filename, int lineNumber, const char* function, const char* condition, + CString message) + { + CString failureMessage; + { + StringPrintStream out; + out.print("AIR VALIDATION FAILURE\n"); + out.print(" ", condition, " (", filename, ":", lineNumber, ")\n"); + out.print(" ", message, "\n"); + out.print(" After ", m_code.lastPhaseName(), "\n"); + failureMessage = out.toCString(); + } + + dataLog(failureMessage); + if (m_dumpBefore) { + dataLog("Before ", m_code.lastPhaseName(), ":\n"); + dataLog(m_dumpBefore); + } + dataLog("At time of failure:\n"); + dataLog(m_code); + + dataLog(failureMessage); + WTFReportAssertionFailure(filename, lineNumber, function, condition); + CRASH(); + } + + Code& m_code; + const char* m_dumpBefore; +}; + +} // anonymous namespace + +void validate(Code& code, const char* dumpBefore) +{ + Validater validater(code, dumpBefore); + validater.run(); +} + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) + diff --git a/Source/JavaScriptCore/b3/air/AirValidate.h b/Source/JavaScriptCore/b3/air/AirValidate.h new file mode 100644 index 000000000..472c76379 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/AirValidate.h @@ -0,0 +1,38 @@ +/* + * Copyright (C) 2015-2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#pragma once + +#if ENABLE(B3_JIT) + +namespace JSC { namespace B3 { namespace Air { + +class Code; + +JS_EXPORT_PRIVATE void validate(Code&, const char* dumpBefore = nullptr); + +} } } // namespace JSC::B3::Air + +#endif // ENABLE(B3_JIT) diff --git a/Source/JavaScriptCore/b3/air/opcode_generator.rb b/Source/JavaScriptCore/b3/air/opcode_generator.rb new file mode 100644 index 000000000..d14240515 --- /dev/null +++ b/Source/JavaScriptCore/b3/air/opcode_generator.rb @@ -0,0 +1,1228 @@ +#!/usr/bin/env ruby + +# Copyright (C) 2015-2016 Apple Inc. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# 1. Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# 2. Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# +# THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' +# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, +# THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS +# BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR +# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF +# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS +# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN +# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) +# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF +# THE POSSIBILITY OF SUCH DAMAGE. + +require "pathname" + +class Opcode + attr_reader :name, :custom, :overloads + attr_reader :attributes + + def initialize(name, custom) + @name = name + @custom = custom + @attributes = {} + unless custom + @overloads = [] + end + end + + def masmName + name[0].downcase + name[1..-1] + end +end + +class Arg + attr_reader :role, :type, :width + + def initialize(role, type, width) + @role = role + @type = type + @width = width + end + + def widthCode + if width == "Ptr" + "Arg::pointerWidth()" + else + "Arg::Width#{width}" + end + end +end + +class Overload + attr_reader :signature, :forms + + def initialize(signature, forms) + @signature = signature + @forms = forms + end +end + +class Kind + attr_reader :name + attr_accessor :custom + + def initialize(name) + @name = name + @custom = false + end + + def ==(other) + if other.is_a? String + @name == other + else + @name == other.name and @custom == other.custom + end + end + + def Kind.argKinds(kind) + if kind == "Addr" + ["Addr", "Stack", "CallArg"] + else + [kind] + end + end + + def argKinds + Kind.argKinds(kind) + end +end + +class Form + attr_reader :kinds, :altName, :archs + + def initialize(kinds, altName, archs) + @kinds = kinds + @altName = altName + @archs = archs + end +end + +class Origin + attr_reader :fileName, :lineNumber + + def initialize(fileName, lineNumber) + @fileName = fileName + @lineNumber = lineNumber + end + + def to_s + "#{fileName}:#{lineNumber}" + end +end + +class Token + attr_reader :origin, :string + + def initialize(origin, string) + @origin = origin + @string = string + end + + def ==(other) + if other.is_a? Token + @string == other.string + else + @string == other + end + end + + def =~(other) + @string =~ other + end + + def to_s + "#{@string.inspect} at #{origin}" + end + + def parseError(*comment) + if comment.empty? + raise "Parse error: #{to_s}" + else + raise "Parse error: #{to_s}: #{comment[0]}" + end + end +end + +def lex(str, fileName) + fileName = Pathname.new(fileName) + result = [] + lineNumber = 1 + while not str.empty? + case str + when /\A\#([^\n]*)/ + # comment, ignore + when /\A\n/ + # newline, ignore + lineNumber += 1 + when /\A([a-zA-Z0-9_]+)/ + result << Token.new(Origin.new(fileName, lineNumber), $&) + when /\A([ \t\r]+)/ + # whitespace, ignore + when /\A[,:*\/]/ + result << Token.new(Origin.new(fileName, lineNumber), $&) + else + raise "Lexer error at #{Origin.new(fileName, lineNumber).to_s}, unexpected sequence #{str[0..20].inspect}" + end + str = $~.post_match + end + result +end + +def isRole(token) + token =~ /\A((U)|(D)|(UD)|(ZD)|(UZD)|(UA)|(S))\Z/ +end + +def isGF(token) + token =~ /\A((G)|(F))\Z/ +end + +def isKind(token) + token =~ /\A((Tmp)|(Imm)|(BigImm)|(BitImm)|(BitImm64)|(Addr)|(Index)|(RelCond)|(ResCond)|(DoubleCond))\Z/ +end + +def isArch(token) + token =~ /\A((x86)|(x86_32)|(x86_64)|(arm)|(armv7)|(arm64)|(32)|(64))\Z/ +end + +def isWidth(token) + token =~ /\A((8)|(16)|(32)|(64)|(Ptr))\Z/ +end + +def isKeyword(token) + isRole(token) or isGF(token) or isKind(token) or isArch(token) or isWidth(token) or + token == "custom" or token == "as" +end + +def isIdentifier(token) + token =~ /\A([a-zA-Z0-9_]+)\Z/ and not isKeyword(token) +end + +class Parser + def initialize(data, fileName) + @tokens = lex(data, fileName) + @idx = 0 + end + + def token + @tokens[@idx] + end + + def advance + @idx += 1 + end + + def parseError(*comment) + if token + token.parseError(*comment) + else + if comment.empty? + raise "Parse error at end of file" + else + raise "Parse error at end of file: #{comment[0]}" + end + end + end + + def consume(string) + parseError("Expected #{string}") unless token == string + advance + end + + def consumeIdentifier + result = token.string + parseError("Expected identifier") unless isIdentifier(result) + advance + result + end + + def consumeRole + result = token.string + parseError("Expected role (U, D, UD, ZD, UZD, UA, or S)") unless isRole(result) + advance + result + end + + def consumeType + result = token.string + parseError("Expected type (G or F)") unless isGF(result) + advance + result + end + + def consumeKind + result = token.string + parseError("Expected kind (Imm, BigImm, BitImm, BitImm64, Tmp, Addr, Index, RelCond, ResCond, or DoubleCond)") unless isKind(result) + advance + result + end + + def consumeWidth + result = token.string + parseError("Expected width (8, 16, 32, or 64)") unless isWidth(result) + advance + result + end + + def parseArchs + return nil unless isArch(token) + + result = [] + while isArch(token) + case token.string + when "x86" + result << "X86" + result << "X86_64" + when "x86_32" + result << "X86" + when "x86_64" + result << "X86_64" + when "arm" + result << "ARMv7" + result << "ARM64" + when "armv7" + result << "ARMv7" + when "arm64" + result << "ARM64" + when "32" + result << "X86" + result << "ARMv7" + when "64" + result << "X86_64" + result << "ARM64" + else + raise token.string + end + advance + end + + consume(":") + @lastArchs = result + end + + def consumeArchs + result = @lastArchs + @lastArchs = nil + result + end + + def parseAndConsumeArchs + parseArchs + consumeArchs + end + + def intersectArchs(left, right) + return left unless right + return right unless left + + left.select { + | value | + right.find { + | otherValue | + value == otherValue + } + } + end + + def parse + result = {} + + loop { + break if @idx >= @tokens.length + + if token == "custom" + consume("custom") + opcodeName = consumeIdentifier + + parseError("Cannot overload a custom opcode") if result[opcodeName] + + result[opcodeName] = Opcode.new(opcodeName, true) + else + opcodeArchs = parseAndConsumeArchs + + opcodeName = consumeIdentifier + + if result[opcodeName] + opcode = result[opcodeName] + parseError("Cannot overload a custom opcode") if opcode.custom + else + opcode = Opcode.new(opcodeName, false) + result[opcodeName] = opcode + end + + signature = [] + forms = [] + + if isRole(token) + loop { + role = consumeRole + consume(":") + type = consumeType + consume(":") + width = consumeWidth + + signature << Arg.new(role, type, width) + + break unless token == "," + consume(",") + } + end + + while token == "/" + consume("/") + case token.string + when "branch" + opcode.attributes[:branch] = true + opcode.attributes[:terminal] = true + when "terminal" + opcode.attributes[:terminal] = true + when "effects" + opcode.attributes[:effects] = true + when "return" + opcode.attributes[:return] = true + opcode.attributes[:terminal] = true + else + parseError("Bad / directive") + end + advance + end + + parseArchs + if isKind(token) + loop { + kinds = [] + altName = nil + formArchs = consumeArchs + loop { + kinds << Kind.new(consumeKind) + + if token == "*" + parseError("Can only apply * to Tmp") unless kinds[-1].name == "Tmp" + kinds[-1].custom = true + consume("*") + end + + break unless token == "," + consume(",") + } + + if token == "as" + consume("as") + altName = consumeIdentifier + end + + parseError("Form has wrong number of arguments for overload") unless kinds.length == signature.length + kinds.each_with_index { + | kind, index | + if kind.name == "Imm" or kind.name == "BigImm" or kind.name == "BitImm" or kind.name == "BitImm64" + if signature[index].role != "U" + parseError("Form has an immediate for a non-use argument") + end + if signature[index].type != "G" + parseError("Form has an immediate for a non-general-purpose argument") + end + end + } + forms << Form.new(kinds, altName, intersectArchs(opcodeArchs, formArchs)) + + parseArchs + break unless isKind(token) + } + end + + if signature.length == 0 + raise unless forms.length == 0 + forms << Form.new([], nil, opcodeArchs) + end + + opcode.overloads << Overload.new(signature, forms) + end + } + + result + end +end + +$fileName = ARGV[0] + +parser = Parser.new(IO::read($fileName), $fileName) +$opcodes = parser.parse + +def writeH(filename) + File.open("Air#{filename}.h", "w") { + | outp | + + outp.puts "// Generated by opcode_generator.rb from #{$fileName} -- do not edit!" + + outp.puts "#ifndef Air#{filename}_h" + outp.puts "#define Air#{filename}_h" + + yield outp + + outp.puts "#endif // Air#{filename}_h" + } +end + +writeH("Opcode") { + | outp | + outp.puts "namespace JSC { namespace B3 { namespace Air {" + outp.puts "enum Opcode : int16_t {" + $opcodes.keys.sort.each { + | opcode | + outp.puts " #{opcode}," + } + outp.puts "};" + + outp.puts "static const unsigned numOpcodes = #{$opcodes.keys.size};" + outp.puts "} } } // namespace JSC::B3::Air" + + outp.puts "namespace WTF {" + outp.puts "class PrintStream;" + outp.puts "JS_EXPORT_PRIVATE void printInternal(PrintStream&, JSC::B3::Air::Opcode);" + outp.puts "} // namespace WTF" +} + +# From here on, we don't try to emit properly indented code, since we're using a recursive pattern +# matcher. + +def matchForms(outp, speed, forms, columnIndex, columnGetter, filter, callback) + return if forms.length == 0 + + if filter[forms] + return + end + + if columnIndex >= forms[0].kinds.length + raise "Did not reduce to one form: #{forms.inspect}" unless forms.length == 1 + callback[forms[0]] + outp.puts "break;" + return + end + + groups = {} + forms.each { + | form | + kind = form.kinds[columnIndex].name + if groups[kind] + groups[kind] << form + else + groups[kind] = [form] + end + } + + if speed == :fast and groups.length == 1 + matchForms(outp, speed, forms, columnIndex + 1, columnGetter, filter, callback) + return + end + + outp.puts "switch (#{columnGetter[columnIndex]}) {" + groups.each_pair { + | key, value | + outp.puts "#if USE(JSVALUE64)" if key == "BigImm" or key == "BitImm64" + Kind.argKinds(key).each { + | argKind | + outp.puts "case Arg::#{argKind}:" + } + matchForms(outp, speed, value, columnIndex + 1, columnGetter, filter, callback) + outp.puts "break;" + outp.puts "#endif // USE(JSVALUE64)" if key == "BigImm" or key == "BitImm64" + } + outp.puts "default:" + outp.puts "break;" + outp.puts "}" +end + +def matchInstOverload(outp, speed, inst) + outp.puts "switch (#{inst}->kind.opcode) {" + $opcodes.values.each { + | opcode | + outp.puts "case #{opcode.name}:" + if opcode.custom + yield opcode, nil + else + needOverloadSwitch = ((opcode.overloads.size != 1) or speed == :safe) + outp.puts "switch (#{inst}->args.size()) {" if needOverloadSwitch + opcode.overloads.each { + | overload | + outp.puts "case #{overload.signature.length}:" if needOverloadSwitch + yield opcode, overload + outp.puts "break;" if needOverloadSwitch + } + if needOverloadSwitch + outp.puts "default:" + outp.puts "break;" + outp.puts "}" + end + end + outp.puts "break;" + } + outp.puts "default:" + outp.puts "break;" + outp.puts "}" +end + +def matchInstOverloadForm(outp, speed, inst) + matchInstOverload(outp, speed, inst) { + | opcode, overload | + if opcode.custom + yield opcode, nil, nil + else + columnGetter = proc { + | columnIndex | + "#{inst}->args[#{columnIndex}].kind()" + } + filter = proc { false } + callback = proc { + | form | + yield opcode, overload, form + } + matchForms(outp, speed, overload.forms, 0, columnGetter, filter, callback) + end + } +end + +def beginArchs(outp, archs) + return unless archs + if archs.empty? + outp.puts "#if 0" + return + end + outp.puts("#if " + archs.map { + | arch | + "CPU(#{arch})" + }.join(" || ")) +end + +def endArchs(outp, archs) + return unless archs + outp.puts "#endif" +end + +writeH("OpcodeUtils") { + | outp | + outp.puts "#include \"AirCustom.h\"" + outp.puts "#include \"AirInst.h\"" + outp.puts "namespace JSC { namespace B3 { namespace Air {" + + outp.puts "inline bool opgenHiddenTruth() { return true; }" + outp.puts "template<typename T>" + outp.puts "inline T* opgenHiddenPtrIdentity(T* pointer) { return pointer; }" + outp.puts "#define OPGEN_RETURN(value) do {\\" + outp.puts " if (opgenHiddenTruth())\\" + outp.puts " return value;\\" + outp.puts "} while (false)" + + outp.puts "template<typename Functor>" + outp.puts "void Inst::forEachArg(const Functor& functor)" + outp.puts "{" + matchInstOverload(outp, :fast, "this") { + | opcode, overload | + if opcode.custom + outp.puts "#{opcode.name}Custom::forEachArg(*this, functor);" + else + overload.signature.each_with_index { + | arg, index | + + role = nil + case arg.role + when "U" + role = "Use" + when "D" + role = "Def" + when "ZD" + role = "ZDef" + when "UD" + role = "UseDef" + when "UZD" + role = "UseZDef" + when "UA" + role = "UseAddr" + when "S" + role = "Scratch" + else + raise + end + + outp.puts "functor(args[#{index}], Arg::#{role}, Arg::#{arg.type}P, #{arg.widthCode});" + } + end + } + outp.puts "}" + + outp.puts "template<typename... Arguments>" + outp.puts "ALWAYS_INLINE bool isValidForm(Opcode opcode, Arguments... arguments)" + outp.puts "{" + outp.puts "Arg::Kind kinds[sizeof...(Arguments)] = { arguments... };" + outp.puts "switch (opcode) {" + $opcodes.values.each { + | opcode | + outp.puts "case #{opcode.name}:" + if opcode.custom + outp.puts "OPGEN_RETURN(#{opcode.name}Custom::isValidFormStatic(arguments...));" + else + outp.puts "switch (sizeof...(Arguments)) {" + opcode.overloads.each { + | overload | + outp.puts "case #{overload.signature.length}:" + columnGetter = proc { | columnIndex | "opgenHiddenPtrIdentity(kinds)[#{columnIndex}]" } + filter = proc { false } + callback = proc { + | form | + # This conservatively says that Stack is not a valid form for UseAddr, + # because it's only valid if it's not a spill slot. This is consistent with + # isValidForm() being conservative and it also happens to be practical since + # we don't really use isValidForm for deciding when Stack is safe. + overload.signature.length.times { + | index | + if overload.signature[index].role == "UA" + outp.puts "if (opgenHiddenPtrIdentity(kinds)[#{index}] == Arg::Stack)" + outp.puts " return false;" + end + } + + notCustom = (not form.kinds.detect { | kind | kind.custom }) + if notCustom + beginArchs(outp, form.archs) + outp.puts "OPGEN_RETURN(true);" + endArchs(outp, form.archs) + end + } + matchForms(outp, :safe, overload.forms, 0, columnGetter, filter, callback) + outp.puts "break;" + } + outp.puts "default:" + outp.puts "break;" + outp.puts "}" + end + outp.puts "break;" + } + outp.puts "default:" + outp.puts "break;" + outp.puts "}" + outp.puts "return false; " + outp.puts "}" + + outp.puts "inline bool isDefinitelyTerminal(Opcode opcode)" + outp.puts "{" + outp.puts "switch (opcode) {" + didFindTerminals = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:terminal] + outp.puts "case #{opcode.name}:" + didFindTerminals = true + end + } + if didFindTerminals + outp.puts "return true;" + end + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "inline bool isReturn(Opcode opcode)" + outp.puts "{" + outp.puts "switch (opcode) {" + didFindReturns = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:return] + outp.puts "case #{opcode.name}:" + didFindReturns = true + end + } + if didFindReturns + outp.puts "return true;" + end + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "} } } // namespace JSC::B3::Air" +} + +writeH("OpcodeGenerated") { + | outp | + outp.puts "#include \"AirInstInlines.h\"" + outp.puts "#include \"wtf/PrintStream.h\"" + outp.puts "namespace WTF {" + outp.puts "using namespace JSC::B3::Air;" + outp.puts "void printInternal(PrintStream& out, Opcode opcode)" + outp.puts "{" + outp.puts " switch (opcode) {" + $opcodes.keys.each { + | opcode | + outp.puts " case #{opcode}:" + outp.puts " out.print(\"#{opcode}\");" + outp.puts " return;" + } + outp.puts " }" + outp.puts " RELEASE_ASSERT_NOT_REACHED();" + outp.puts "}" + outp.puts "} // namespace WTF" + outp.puts "namespace JSC { namespace B3 { namespace Air {" + outp.puts "bool Inst::isValidForm()" + outp.puts "{" + matchInstOverloadForm(outp, :safe, "this") { + | opcode, overload, form | + if opcode.custom + outp.puts "OPGEN_RETURN(#{opcode.name}Custom::isValidForm(*this));" + else + beginArchs(outp, form.archs) + needsMoreValidation = false + overload.signature.length.times { + | index | + arg = overload.signature[index] + kind = form.kinds[index] + needsMoreValidation |= kind.custom + + # Some kinds of Args reqire additional validation. + case kind.name + when "Tmp" + outp.puts "if (!args[#{index}].tmp().is#{arg.type}P())" + outp.puts "OPGEN_RETURN(false);" + when "Imm" + outp.puts "if (!Arg::isValidImmForm(args[#{index}].value()))" + outp.puts "OPGEN_RETURN(false);" + when "BitImm" + outp.puts "if (!Arg::isValidBitImmForm(args[#{index}].value()))" + outp.puts "OPGEN_RETURN(false);" + when "BitImm64" + outp.puts "if (!Arg::isValidBitImm64Form(args[#{index}].value()))" + outp.puts "OPGEN_RETURN(false);" + when "Addr" + if arg.role == "UA" + outp.puts "if (args[#{index}].isStack() && args[#{index}].stackSlot()->isSpill())" + outp.puts "OPGEN_RETURN(false);" + end + + outp.puts "if (!Arg::isValidAddrForm(args[#{index}].offset()))" + outp.puts "OPGEN_RETURN(false);" + when "Index" + outp.puts "if (!Arg::isValidIndexForm(args[#{index}].scale(), args[#{index}].offset(), #{arg.widthCode}))" + outp.puts "OPGEN_RETURN(false);" + when "BigImm" + when "RelCond" + when "ResCond" + when "DoubleCond" + else + raise "Unexpected kind: #{kind.name}" + end + } + if needsMoreValidation + outp.puts "if (!is#{opcode.name}Valid(*this))" + outp.puts "OPGEN_RETURN(false);" + end + outp.puts "OPGEN_RETURN(true);" + endArchs(outp, form.archs) + end + } + outp.puts "return false;" + outp.puts "}" + + outp.puts "bool Inst::admitsStack(unsigned argIndex)" + outp.puts "{" + outp.puts "switch (kind.opcode) {" + $opcodes.values.each { + | opcode | + outp.puts "case #{opcode.name}:" + + if opcode.custom + outp.puts "OPGEN_RETURN(#{opcode.name}Custom::admitsStack(*this, argIndex));" + else + # Switch on the argIndex. + outp.puts "switch (argIndex) {" + + numArgs = opcode.overloads.map { + | overload | + overload.signature.length + }.max + + numArgs.times { + | argIndex | + outp.puts "case #{argIndex}:" + + # Check if all of the forms of all of the overloads either do, or don't, admit an address + # at this index. We expect this to be a very common case. + numYes = 0 + numNo = 0 + opcode.overloads.each { + | overload | + useAddr = (overload.signature[argIndex] and + overload.signature[argIndex].role == "UA") + overload.forms.each { + | form | + if form.kinds[argIndex] == "Addr" and not useAddr + numYes += 1 + else + numNo += 1 + end + } + } + + # Note that we deliberately test numYes first because if we end up with no forms, we want + # to say that Address is inadmissible. + if numYes == 0 + outp.puts "OPGEN_RETURN(false);" + elsif numNo == 0 + outp.puts "OPGEN_RETURN(true);" + else + # Now do the full test. + + needOverloadSwitch = (opcode.overloads.size != 1) + + outp.puts "switch (args.size()) {" if needOverloadSwitch + opcode.overloads.each { + | overload | + + useAddr = (overload.signature[argIndex] and + overload.signature[argIndex].role == "UA") + + # Again, check if all of them do what we want. + numYes = 0 + numNo = 0 + overload.forms.each { + | form | + if form.kinds[argIndex] == "Addr" and not useAddr + numYes += 1 + else + numNo += 1 + end + } + + if numYes == 0 + # Don't emit anything, just drop to default. + elsif numNo == 0 + outp.puts "case #{overload.signature.length}:" if needOverloadSwitch + outp.puts "OPGEN_RETURN(true);" + outp.puts "break;" if needOverloadSwitch + else + outp.puts "case #{overload.signature.length}:" if needOverloadSwitch + + # This is how we test the hypothesis that changing this argument to an + # address yields a valid form. + columnGetter = proc { + | columnIndex | + if columnIndex == argIndex + "Arg::Addr" + else + "args[#{columnIndex}].kind()" + end + } + filter = proc { + | forms | + numYes = 0 + + forms.each { + | form | + if form.kinds[argIndex] == "Addr" + numYes += 1 + end + } + + if numYes == 0 + # Drop down, emit no code, since we cannot match. + true + else + # Keep going. + false + end + } + callback = proc { + | form | + beginArchs(outp, form.archs) + outp.puts "OPGEN_RETURN(true);" + endArchs(outp, form.archs) + } + matchForms(outp, :safe, overload.forms, 0, columnGetter, filter, callback) + + outp.puts "break;" if needOverloadSwitch + end + } + if needOverloadSwitch + outp.puts "default:" + outp.puts "break;" + outp.puts "}" + end + end + + outp.puts "break;" + } + + outp.puts "default:" + outp.puts "break;" + outp.puts "}" + end + + outp.puts "break;" + } + outp.puts "default:"; + outp.puts "break;" + outp.puts "}" + outp.puts "return false;" + outp.puts "}" + + outp.puts "bool Inst::isTerminal()" + outp.puts "{" + outp.puts "switch (kind.opcode) {" + foundTrue = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:terminal] + outp.puts "case #{opcode.name}:" + foundTrue = true + end + } + if foundTrue + outp.puts "return true;" + end + $opcodes.values.each { + | opcode | + if opcode.custom + outp.puts "case #{opcode.name}:" + outp.puts "return #{opcode.name}Custom::isTerminal(*this);" + end + } + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "bool Inst::hasNonArgNonControlEffects()" + outp.puts "{" + outp.puts "if (kind.traps)" + outp.puts "return true;" + outp.puts "switch (kind.opcode) {" + foundTrue = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:effects] + outp.puts "case #{opcode.name}:" + foundTrue = true + end + } + if foundTrue + outp.puts "return true;" + end + $opcodes.values.each { + | opcode | + if opcode.custom + outp.puts "case #{opcode.name}:" + outp.puts "return #{opcode.name}Custom::hasNonArgNonControlEffects(*this);" + end + } + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "bool Inst::hasNonArgEffects()" + outp.puts "{" + outp.puts "if (kind.traps)" + outp.puts "return true;" + outp.puts "switch (kind.opcode) {" + foundTrue = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:terminal] or opcode.attributes[:effects] + outp.puts "case #{opcode.name}:" + foundTrue = true + end + } + if foundTrue + outp.puts "return true;" + end + $opcodes.values.each { + | opcode | + if opcode.custom + outp.puts "case #{opcode.name}:" + outp.puts "return #{opcode.name}Custom::hasNonArgEffects(*this);" + end + } + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "CCallHelpers::Jump Inst::generate(CCallHelpers& jit, GenerationContext& context)" + outp.puts "{" + outp.puts "UNUSED_PARAM(jit);" + outp.puts "UNUSED_PARAM(context);" + outp.puts "CCallHelpers::Jump result;" + matchInstOverloadForm(outp, :fast, "this") { + | opcode, overload, form | + if opcode.custom + outp.puts "OPGEN_RETURN(#{opcode.name}Custom::generate(*this, jit, context));" + else + beginArchs(outp, form.archs) + if form.altName + methodName = form.altName + else + methodName = opcode.masmName + end + if opcode.attributes[:branch] + outp.print "result = " + end + outp.print "jit.#{methodName}(" + + form.kinds.each_with_index { + | kind, index | + if index != 0 + outp.print ", " + end + case kind.name + when "Tmp" + if overload.signature[index].type == "G" + outp.print "args[#{index}].gpr()" + else + outp.print "args[#{index}].fpr()" + end + when "Imm", "BitImm" + outp.print "args[#{index}].asTrustedImm32()" + when "BigImm", "BitImm64" + outp.print "args[#{index}].asTrustedImm64()" + when "Addr" + outp.print "args[#{index}].asAddress()" + when "Index" + outp.print "args[#{index}].asBaseIndex()" + when "RelCond" + outp.print "args[#{index}].asRelationalCondition()" + when "ResCond" + outp.print "args[#{index}].asResultCondition()" + when "DoubleCond" + outp.print "args[#{index}].asDoubleCondition()" + end + } + + outp.puts ");" + outp.puts "OPGEN_RETURN(result);" + endArchs(outp, form.archs) + end + } + outp.puts "RELEASE_ASSERT_NOT_REACHED();" + outp.puts "return result;" + outp.puts "}" + + outp.puts "} } } // namespace JSC::B3::Air" +} + +# This is a hack for JSAir. It's a joke. +File.open("JSAir_opcode.js", "w") { + | outp | + outp.puts "\"use strict\";" + outp.puts "// Generated by opcode_generator.rb from #{$fileName} -- do not edit!" + + $opcodes.values.each { + | opcode | + outp.puts "const #{opcode.name} = Symbol(#{opcode.name.inspect});" + } + + outp.puts "function Inst_forEachArg(inst, func)" + outp.puts "{" + outp.puts "let replacement;" + outp.puts "switch (inst.opcode) {" + $opcodes.values.each { + | opcode | + outp.puts "case #{opcode.name}:" + if opcode.custom + outp.puts "#{opcode.name}Custom.forEachArg(inst, func);" + else + needOverloadSwitch = opcode.overloads.size != 1 + outp.puts "switch (inst.args.length) {" if needOverloadSwitch + opcode.overloads.each { + | overload | + outp.puts "case #{overload.signature.length}:" if needOverloadSwitch + overload.signature.each_with_index { + | arg, index | + role = nil + case arg.role + when "U" + role = "Use" + when "D" + role = "Def" + when "ZD" + role = "ZDef" + when "UD" + role = "UseDef" + when "UZD" + role = "UseZDef" + when "UA" + role = "UseAddr" + when "S" + role = "Scratch" + else + raise + end + + outp.puts "inst.visitArg(#{index}, func, Arg.#{role}, #{arg.type}P, #{arg.width});" + } + outp.puts "break;" + } + if needOverloadSwitch + outp.puts "default:" + outp.puts "throw new Error(\"Bad overload\");" + outp.puts "break;" + outp.puts "}" + end + end + outp.puts "break;" + } + outp.puts "default:" + outp.puts "throw \"Bad opcode\";" + outp.puts "}" + outp.puts "}" + + outp.puts "function Inst_hasNonArgEffects(inst)" + outp.puts "{" + outp.puts "switch (inst.opcode) {" + foundTrue = false + $opcodes.values.each { + | opcode | + if opcode.attributes[:terminal] or opcode.attributes[:effects] + outp.puts "case #{opcode.name}:" + foundTrue = true + end + } + if foundTrue + outp.puts "return true;" + end + $opcodes.values.each { + | opcode | + if opcode.custom + outp.puts "case #{opcode.name}:" + outp.puts "return #{opcode.name}Custom.hasNonArgNonControlEffects(inst);" + end + } + outp.puts "default:" + outp.puts "return false;" + outp.puts "}" + outp.puts "}" + + outp.puts "function opcodeCode(opcode)" + outp.puts "{" + outp.puts "switch (opcode) {" + $opcodes.keys.sort.each_with_index { + | opcode, index | + outp.puts "case #{opcode}:" + outp.puts "return #{index}" + } + outp.puts "default:" + outp.puts "throw new Error(\"bad opcode\");" + outp.puts "}" + outp.puts "}" +} + diff --git a/Source/JavaScriptCore/b3/air/testair.cpp b/Source/JavaScriptCore/b3/air/testair.cpp new file mode 100644 index 000000000..9f8a8d83e --- /dev/null +++ b/Source/JavaScriptCore/b3/air/testair.cpp @@ -0,0 +1,1964 @@ +/* + * Copyright (C) 2016 Apple Inc. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY + * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY + * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include "config.h" + +#include "AirCode.h" +#include "AirGenerate.h" +#include "AirInstInlines.h" +#include "AllowMacroScratchRegisterUsage.h" +#include "B3Compilation.h" +#include "B3Procedure.h" +#include "CCallHelpers.h" +#include "InitializeThreading.h" +#include "JSCInlines.h" +#include "LinkBuffer.h" +#include "PureNaN.h" +#include "VM.h" +#include <cmath> +#include <map> +#include <string> +#include <wtf/Lock.h> +#include <wtf/NumberOfCores.h> +#include <wtf/Threading.h> + +// We don't have a NO_RETURN_DUE_TO_EXIT, nor should we. That's ridiculous. +static bool hiddenTruthBecauseNoReturnIsStupid() { return true; } + +static void usage() +{ + dataLog("Usage: testb3 [<filter>]\n"); + if (hiddenTruthBecauseNoReturnIsStupid()) + exit(1); +} + +#if ENABLE(B3_JIT) + +using namespace JSC; +using namespace JSC::B3::Air; + +namespace { + +StaticLock crashLock; + +// Nothing fancy for now; we just use the existing WTF assertion machinery. +#define CHECK(x) do { \ + if (!!(x)) \ + break; \ + crashLock.lock(); \ + WTFReportAssertionFailure(__FILE__, __LINE__, WTF_PRETTY_FUNCTION, #x); \ + CRASH(); \ + } while (false) + +VM* vm; + +std::unique_ptr<B3::Compilation> compile(B3::Procedure& proc) +{ + prepareForGeneration(proc.code()); + CCallHelpers jit(vm); + generate(proc.code(), jit); + LinkBuffer linkBuffer(*vm, jit, nullptr); + + return std::make_unique<B3::Compilation>( + FINALIZE_CODE(linkBuffer, ("testair compilation")), proc.releaseByproducts()); +} + +template<typename T, typename... Arguments> +T invoke(const B3::Compilation& code, Arguments... arguments) +{ + T (*function)(Arguments...) = bitwise_cast<T(*)(Arguments...)>(code.code().executableAddress()); + return function(arguments...); +} + +template<typename T, typename... Arguments> +T compileAndRun(B3::Procedure& procedure, Arguments... arguments) +{ + return invoke<T>(*compile(procedure), arguments...); +} + +void testSimple() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Arg::imm(42), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(compileAndRun<int>(proc) == 42); +} + +// Use this to put a constant into a register without Air being able to see the constant. +template<typename T> +void loadConstantImpl(BasicBlock* block, T value, B3::Air::Opcode move, Tmp tmp, Tmp scratch) +{ + static StaticLock lock; + static std::map<T, T*>* map; // I'm not messing with HashMap's problems with integers. + + LockHolder locker(lock); + if (!map) + map = new std::map<T, T*>(); + + if (!map->count(value)) + (*map)[value] = new T(value); + + T* ptr = (*map)[value]; + block->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(ptr)), scratch); + block->append(move, nullptr, Arg::addr(scratch), tmp); +} + +void loadConstant(BasicBlock* block, intptr_t value, Tmp tmp) +{ + loadConstantImpl<intptr_t>(block, value, Move, tmp, tmp); +} + +void loadDoubleConstant(BasicBlock* block, double value, Tmp tmp, Tmp scratch) +{ + loadConstantImpl<double>(block, value, MoveDouble, tmp, scratch); +} + +void testShuffleSimpleSwap() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32)); + + int32_t things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 2); + CHECK(things[2] == 4); + CHECK(things[3] == 3); +} + +void testShuffleSimpleShift() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32)); + + int32_t things[5]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 2); + CHECK(things[2] == 3); + CHECK(things[3] == 3); + CHECK(things[4] == 4); +} + +void testShuffleLongShift() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + loadConstant(root, 7, Tmp(GPRInfo::regT6)); + loadConstant(root, 8, Tmp(GPRInfo::regT7)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Arg::Width32)); + + int32_t things[8]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 3); + CHECK(things[4] == 4); + CHECK(things[5] == 5); + CHECK(things[6] == 6); + CHECK(things[7] == 7); +} + +void testShuffleLongShiftBackwards() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + loadConstant(root, 7, Tmp(GPRInfo::regT6)); + loadConstant(root, 8, Tmp(GPRInfo::regT7)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT7), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32)); + + int32_t things[8]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 3); + CHECK(things[4] == 4); + CHECK(things[5] == 5); + CHECK(things[6] == 6); + CHECK(things[7] == 7); +} + +void testShuffleSimpleRotate() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32)); + + int32_t things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 4); +} + +void testShuffleSimpleBroadcast() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32)); + + int32_t things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 1); + CHECK(things[3] == 1); +} + +void testShuffleBroadcastAllRegs() +{ + B3::Procedure proc; + Code& code = proc.code(); + + const Vector<Reg>& regs = code.regsInPriorityOrder(Arg::GP); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Arg::imm(35), Tmp(GPRInfo::regT0)); + unsigned count = 1; + for (Reg reg : regs) { + if (reg != Reg(GPRInfo::regT0)) + loadConstant(root, count++, Tmp(reg)); + } + Inst& shuffle = root->append(Shuffle, nullptr); + for (Reg reg : regs) { + if (reg != Reg(GPRInfo::regT0)) + shuffle.append(Tmp(GPRInfo::regT0), Tmp(reg), Arg::widthArg(Arg::Width32)); + } + + StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked); + for (unsigned i = 0; i < regs.size(); ++i) + root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t))); + + Vector<int32_t> things(regs.size(), 666); + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0)); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t))); + } + + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + for (int32_t thing : things) + CHECK(thing == 35); +} + +void testShuffleTreeShift() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + loadConstant(root, 7, Tmp(GPRInfo::regT6)); + loadConstant(root, 8, Tmp(GPRInfo::regT7)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Arg::Width32)); + + int32_t things[8]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 1); + CHECK(things[3] == 2); + CHECK(things[4] == 2); + CHECK(things[5] == 3); + CHECK(things[6] == 3); + CHECK(things[7] == 4); +} + +void testShuffleTreeShiftBackward() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + loadConstant(root, 7, Tmp(GPRInfo::regT6)); + loadConstant(root, 8, Tmp(GPRInfo::regT7)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT7), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT6), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32)); + + int32_t things[8]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 1); + CHECK(things[3] == 2); + CHECK(things[4] == 2); + CHECK(things[5] == 3); + CHECK(things[6] == 3); + CHECK(things[7] == 4); +} + +void testShuffleTreeShiftOtherBackward() +{ + // NOTE: This test was my original attempt at TreeShiftBackward but mistakes were made. So, this + // ends up being just a weird test. But weird tests are useful, so I kept it. + + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + loadConstant(root, 7, Tmp(GPRInfo::regT6)); + loadConstant(root, 8, Tmp(GPRInfo::regT7)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT7), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT6), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT6), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT7), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32)); + + int32_t things[8]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT6), Arg::addr(base, 6 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT7), Arg::addr(base, 7 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 8); + CHECK(things[2] == 8); + CHECK(things[3] == 7); + CHECK(things[4] == 7); + CHECK(things[5] == 6); + CHECK(things[6] == 6); + CHECK(things[7] == 5); +} + +void testShuffleMultipleShifts() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 3); + CHECK(things[3] == 3); + CHECK(things[4] == 3); + CHECK(things[5] == 1); +} + +void testShuffleRotateWithFringe() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 1); + CHECK(things[4] == 2); + CHECK(things[5] == 3); +} + +void testShuffleRotateWithFringeInWeirdOrder() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 1); + CHECK(things[4] == 2); + CHECK(things[5] == 3); +} + +void testShuffleRotateWithLongFringe() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 1); + CHECK(things[4] == 4); + CHECK(things[5] == 5); +} + +void testShuffleMultipleRotates() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT5), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 6); + CHECK(things[4] == 4); + CHECK(things[5] == 5); +} + +void testShuffleShiftAndRotate() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + loadConstant(root, 4, Tmp(GPRInfo::regT3)); + loadConstant(root, 5, Tmp(GPRInfo::regT4)); + loadConstant(root, 6, Tmp(GPRInfo::regT5)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT1), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT0), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT4), Tmp(GPRInfo::regT5), Arg::widthArg(Arg::Width32)); + + int32_t things[6]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT5), Arg::addr(base, 5 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 3); + CHECK(things[1] == 1); + CHECK(things[2] == 2); + CHECK(things[3] == 4); + CHECK(things[4] == 4); + CHECK(things[5] == 5); +} + +void testShuffleShiftAllRegs() +{ + B3::Procedure proc; + Code& code = proc.code(); + + const Vector<Reg>& regs = code.regsInPriorityOrder(Arg::GP); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, 35 + i, Tmp(regs[i])); + Inst& shuffle = root->append(Shuffle, nullptr); + for (unsigned i = 1; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width32)); + + StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked); + for (unsigned i = 0; i < regs.size(); ++i) + root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t))); + + Vector<int32_t> things(regs.size(), 666); + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0)); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t))); + } + + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 35); + for (unsigned i = 1; i < regs.size(); ++i) + CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1); +} + +void testShuffleRotateAllRegs() +{ + B3::Procedure proc; + Code& code = proc.code(); + + const Vector<Reg>& regs = code.regsInPriorityOrder(Arg::GP); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, 35 + i, Tmp(regs[i])); + Inst& shuffle = root->append(Shuffle, nullptr); + for (unsigned i = 1; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width32)); + shuffle.append(Tmp(regs.last()), Tmp(regs[0]), Arg::widthArg(Arg::Width32)); + + StackSlot* slot = code.addStackSlot(sizeof(int32_t) * regs.size(), StackSlotKind::Locked); + for (unsigned i = 0; i < regs.size(); ++i) + root->append(Move32, nullptr, Tmp(regs[i]), Arg::stack(slot, i * sizeof(int32_t))); + + Vector<int32_t> things(regs.size(), 666); + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), base); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append(Move32, nullptr, Arg::stack(slot, i * sizeof(int32_t)), Tmp(GPRInfo::regT0)); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, i * sizeof(int32_t))); + } + + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 35 + static_cast<int32_t>(regs.size()) - 1); + for (unsigned i = 1; i < regs.size(); ++i) + CHECK(things[i] == 35 + static_cast<int32_t>(i) - 1); +} + +void testShuffleSimpleSwap64() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1)); + loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2)); + loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width64), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width64)); + + int64_t things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 10000000000000000ll); + CHECK(things[1] == 20000000000000000ll); + CHECK(things[2] == 40000000000000000ll); + CHECK(things[3] == 30000000000000000ll); +} + +void testShuffleSimpleShift64() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1)); + loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2)); + loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3)); + loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width64), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width64)); + + int64_t things[5]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 10000000000000000ll); + CHECK(things[1] == 20000000000000000ll); + CHECK(things[2] == 30000000000000000ll); + CHECK(things[3] == 30000000000000000ll); + CHECK(things[4] == 40000000000000000ll); +} + +void testShuffleSwapMixedWidth() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1)); + loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2)); + loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width32), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT2), Arg::widthArg(Arg::Width64)); + + int64_t things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 10000000000000000ll); + CHECK(things[1] == 20000000000000000ll); + CHECK(things[2] == 40000000000000000ll); + CHECK(things[3] == static_cast<uint32_t>(30000000000000000ll)); +} + +void testShuffleShiftMixedWidth() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadConstant(root, 10000000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 20000000000000000ll, Tmp(GPRInfo::regT1)); + loadConstant(root, 30000000000000000ll, Tmp(GPRInfo::regT2)); + loadConstant(root, 40000000000000000ll, Tmp(GPRInfo::regT3)); + loadConstant(root, 50000000000000000ll, Tmp(GPRInfo::regT4)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT2), Tmp(GPRInfo::regT3), Arg::widthArg(Arg::Width64), + Tmp(GPRInfo::regT3), Tmp(GPRInfo::regT4), Arg::widthArg(Arg::Width32)); + + int64_t things[5]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT3), Arg::addr(base, 3 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT4), Arg::addr(base, 4 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 10000000000000000ll); + CHECK(things[1] == 20000000000000000ll); + CHECK(things[2] == 30000000000000000ll); + CHECK(things[3] == 30000000000000000ll); + CHECK(things[4] == static_cast<uint32_t>(40000000000000000ll)); +} + +void testShuffleShiftMemory() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int32_t memory[2]; + memory[0] = 35; + memory[1] = 36; + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2)); + root->append( + Shuffle, nullptr, + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)), + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Arg::Width32)); + + int32_t things[2]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(memory[0] == 35); + CHECK(memory[1] == 35); +} + +void testShuffleShiftMemoryLong() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int32_t memory[2]; + memory[0] = 35; + memory[1] = 36; + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + loadConstant(root, 3, Tmp(GPRInfo::regT2)); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT3)); + root->append( + Shuffle, nullptr, + + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + + Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)), + Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT3), 0 * sizeof(int32_t)), + Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT3), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT2), + Arg::widthArg(Arg::Width32)); + + int32_t things[3]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT2), Arg::addr(base, 2 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 1); + CHECK(things[2] == 36); + CHECK(memory[0] == 2); + CHECK(memory[1] == 35); +} + +void testShuffleShiftMemoryAllRegs() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int32_t memory[2]; + memory[0] = 35; + memory[1] = 36; + + Vector<Reg> regs = code.regsInPriorityOrder(Arg::GP); + regs.removeFirst(Reg(GPRInfo::regT0)); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, i + 1, Tmp(regs[i])); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0)); + Inst& shuffle = root->append( + Shuffle, nullptr, + + Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)), + Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int32_t)), + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int32_t)), Tmp(regs[1]), + Arg::widthArg(Arg::Width32)); + + for (unsigned i = 2; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width32)); + + Vector<int32_t> things(regs.size(), 666); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0)); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append( + Move32, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int32_t))); + } + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 36); + for (unsigned i = 2; i < regs.size(); ++i) + CHECK(things[i] == static_cast<int32_t>(i)); + CHECK(memory[0] == 1); + CHECK(memory[1] == 35); +} + +void testShuffleShiftMemoryAllRegs64() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + Vector<Reg> regs = code.regsInPriorityOrder(Arg::GP); + regs.removeFirst(Reg(GPRInfo::regT0)); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i])); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0)); + Inst& shuffle = root->append( + Shuffle, nullptr, + + Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]), + Arg::widthArg(Arg::Width64)); + + for (unsigned i = 2; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width64)); + + Vector<int64_t> things(regs.size(), 666); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0)); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append( + Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t))); + } + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1000000000000ll); + CHECK(things[1] == 36000000000000ll); + for (unsigned i = 2; i < regs.size(); ++i) + CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll); + CHECK(memory[0] == 1000000000000ll); + CHECK(memory[1] == 35000000000000ll); +} + +int64_t combineHiLo(int64_t high, int64_t low) +{ + union { + int64_t value; + int32_t halves[2]; + } u; + u.value = high; + u.halves[0] = static_cast<int32_t>(low); + return u.value; +} + +void testShuffleShiftMemoryAllRegsMixedWidth() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + Vector<Reg> regs = code.regsInPriorityOrder(Arg::GP); + regs.removeFirst(Reg(GPRInfo::regT0)); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i])); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0)); + Inst& shuffle = root->append( + Shuffle, nullptr, + + Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]), + Arg::widthArg(Arg::Width32)); + + for (unsigned i = 2; i < regs.size(); ++i) { + shuffle.append( + Tmp(regs[i - 1]), Tmp(regs[i]), + (i & 1) ? Arg::widthArg(Arg::Width32) : Arg::widthArg(Arg::Width64)); + } + + Vector<int64_t> things(regs.size(), 666); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0)); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append( + Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t))); + } + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1000000000000ll); + CHECK(things[1] == static_cast<uint32_t>(36000000000000ll)); + for (unsigned i = 2; i < regs.size(); ++i) { + int64_t value = static_cast<int64_t>(i) * 1000000000000ll; + CHECK(things[i] == ((i & 1) ? static_cast<uint32_t>(value) : value)); + } + CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll)); + CHECK(memory[1] == 35000000000000ll); +} + +void testShuffleRotateMemory() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int32_t memory[2]; + memory[0] = 35; + memory[1] = 36; + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1, Tmp(GPRInfo::regT0)); + loadConstant(root, 2, Tmp(GPRInfo::regT1)); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2)); + root->append( + Shuffle, nullptr, + + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + + Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)), + Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int32_t)), + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int32_t)), Tmp(GPRInfo::regT0), + Arg::widthArg(Arg::Width32)); + + int32_t things[2]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move32, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int32_t))); + root->append(Move32, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int32_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 36); + CHECK(things[1] == 1); + CHECK(memory[0] == 2); + CHECK(memory[1] == 35); +} + +void testShuffleRotateMemory64() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1)); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2)); + root->append( + Shuffle, nullptr, + + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width64), + + Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0), + Arg::widthArg(Arg::Width64)); + + int64_t things[2]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 36000000000000ll); + CHECK(things[1] == 1000000000000ll); + CHECK(memory[0] == 2000000000000ll); + CHECK(memory[1] == 35000000000000ll); +} + +void testShuffleRotateMemoryMixedWidth() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + BasicBlock* root = code.addBlock(); + loadConstant(root, 1000000000000ll, Tmp(GPRInfo::regT0)); + loadConstant(root, 2000000000000ll, Tmp(GPRInfo::regT1)); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT2)); + root->append( + Shuffle, nullptr, + + Tmp(GPRInfo::regT0), Tmp(GPRInfo::regT1), Arg::widthArg(Arg::Width32), + + Tmp(GPRInfo::regT1), Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT2), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT2), 1 * sizeof(int64_t)), Tmp(GPRInfo::regT0), + Arg::widthArg(Arg::Width64)); + + int64_t things[2]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(Move, nullptr, Tmp(GPRInfo::regT0), Arg::addr(base, 0 * sizeof(int64_t))); + root->append(Move, nullptr, Tmp(GPRInfo::regT1), Arg::addr(base, 1 * sizeof(int64_t))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 36000000000000ll); + CHECK(things[1] == static_cast<uint32_t>(1000000000000ll)); + CHECK(memory[0] == 2000000000000ll); + CHECK(memory[1] == combineHiLo(36000000000000ll, 35000000000000ll)); +} + +void testShuffleRotateMemoryAllRegs64() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + Vector<Reg> regs = code.regsInPriorityOrder(Arg::GP); + regs.removeFirst(Reg(GPRInfo::regT0)); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i])); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0)); + Inst& shuffle = root->append( + Shuffle, nullptr, + + Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]), + Arg::widthArg(Arg::Width64), + + regs.last(), regs[0], Arg::widthArg(Arg::Width64)); + + for (unsigned i = 2; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width64)); + + Vector<int64_t> things(regs.size(), 666); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0)); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append( + Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t))); + } + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == static_cast<int64_t>(regs.size()) * 1000000000000ll); + CHECK(things[1] == 36000000000000ll); + for (unsigned i = 2; i < regs.size(); ++i) + CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll); + CHECK(memory[0] == 1000000000000ll); + CHECK(memory[1] == 35000000000000ll); +} + +void testShuffleRotateMemoryAllRegsMixedWidth() +{ + B3::Procedure proc; + Code& code = proc.code(); + + int64_t memory[2]; + memory[0] = 35000000000000ll; + memory[1] = 36000000000000ll; + + Vector<Reg> regs = code.regsInPriorityOrder(Arg::GP); + regs.removeFirst(Reg(GPRInfo::regT0)); + + BasicBlock* root = code.addBlock(); + for (unsigned i = 0; i < regs.size(); ++i) + loadConstant(root, (i + 1) * 1000000000000ll, Tmp(regs[i])); + root->append(Move, nullptr, Arg::immPtr(&memory), Tmp(GPRInfo::regT0)); + Inst& shuffle = root->append( + Shuffle, nullptr, + + Tmp(regs[0]), Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::widthArg(Arg::Width32), + + Arg::addr(Tmp(GPRInfo::regT0), 0 * sizeof(int64_t)), + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Arg::widthArg(Arg::Width64), + + Arg::addr(Tmp(GPRInfo::regT0), 1 * sizeof(int64_t)), Tmp(regs[1]), + Arg::widthArg(Arg::Width32), + + regs.last(), regs[0], Arg::widthArg(Arg::Width32)); + + for (unsigned i = 2; i < regs.size(); ++i) + shuffle.append(Tmp(regs[i - 1]), Tmp(regs[i]), Arg::widthArg(Arg::Width64)); + + Vector<int64_t> things(regs.size(), 666); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things[0])), Tmp(GPRInfo::regT0)); + for (unsigned i = 0; i < regs.size(); ++i) { + root->append( + Move, nullptr, Tmp(regs[i]), Arg::addr(Tmp(GPRInfo::regT0), i * sizeof(int64_t))); + } + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == static_cast<uint32_t>(static_cast<int64_t>(regs.size()) * 1000000000000ll)); + CHECK(things[1] == static_cast<uint32_t>(36000000000000ll)); + for (unsigned i = 2; i < regs.size(); ++i) + CHECK(things[i] == static_cast<int64_t>(i) * 1000000000000ll); + CHECK(memory[0] == combineHiLo(35000000000000ll, 1000000000000ll)); + CHECK(memory[1] == 35000000000000ll); +} + +void testShuffleSwapDouble() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0)); + root->append( + Shuffle, nullptr, + Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Arg::Width64), + Tmp(FPRInfo::fpRegT3), Tmp(FPRInfo::fpRegT2), Arg::widthArg(Arg::Width64)); + + double things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 2); + CHECK(things[2] == 4); + CHECK(things[3] == 3); +} + +void testShuffleShiftDouble() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + loadDoubleConstant(root, 1, Tmp(FPRInfo::fpRegT0), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 2, Tmp(FPRInfo::fpRegT1), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 3, Tmp(FPRInfo::fpRegT2), Tmp(GPRInfo::regT0)); + loadDoubleConstant(root, 4, Tmp(FPRInfo::fpRegT3), Tmp(GPRInfo::regT0)); + root->append( + Shuffle, nullptr, + Tmp(FPRInfo::fpRegT2), Tmp(FPRInfo::fpRegT3), Arg::widthArg(Arg::Width64)); + + double things[4]; + Tmp base = code.newTmp(Arg::GP); + root->append(Move, nullptr, Arg::bigImm(bitwise_cast<intptr_t>(&things)), base); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT0), Arg::addr(base, 0 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT1), Arg::addr(base, 1 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT2), Arg::addr(base, 2 * sizeof(double))); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::fpRegT3), Arg::addr(base, 3 * sizeof(double))); + root->append(Move, nullptr, Arg::imm(0), Tmp(GPRInfo::returnValueGPR)); + root->append(Ret32, nullptr, Tmp(GPRInfo::returnValueGPR)); + + memset(things, 0, sizeof(things)); + + CHECK(!compileAndRun<int>(proc)); + + CHECK(things[0] == 1); + CHECK(things[1] == 2); + CHECK(things[2] == 3); + CHECK(things[3] == 3); +} + +#if CPU(X86) || CPU(X86_64) +void testX86VMULSD() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(FPRInfo::argumentFPR2)); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDDestRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDOp1DestRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14)); + root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDOp2DestRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm14)); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDOpsDestRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14)); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR1), Tmp(X86Registers::xmm13)); + root->append(MulDouble, nullptr, Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm13), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + CHECK(compileAndRun<double>(proc, 2.4, 4.2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDAddr() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), - 16), Tmp(FPRInfo::argumentFPR2)); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDAddrOpRexAddr() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), - 16), Tmp(FPRInfo::argumentFPR2)); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR2), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDDestRexAddr() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(GPRInfo::argumentGPR0), 16), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDRegOpDestRexAddr() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(MoveDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm14)); + root->append(MulDouble, nullptr, Arg::addr(Tmp(GPRInfo::argumentGPR0)), Tmp(X86Registers::xmm14), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDAddrOpDestRexAddr() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)); + root->append(MulDouble, nullptr, Tmp(FPRInfo::argumentFPR0), Arg::addr(Tmp(X86Registers::r13), 8), Tmp(X86Registers::xmm15)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm15), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDBaseNeedsRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)); + root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r13), Tmp(GPRInfo::argumentGPR1)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + uint64_t index = 8; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 1, index, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDIndexNeedsRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13)); + root->append(MulDouble, nullptr, Arg::index(Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + uint64_t index = - 8; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg + 1, index, pureNaN()) == 2.4 * 4.2); +} + +void testX86VMULSDBaseIndexNeedRex() +{ + B3::Procedure proc; + Code& code = proc.code(); + + BasicBlock* root = code.addBlock(); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR0), Tmp(X86Registers::r12)); + root->append(Move, nullptr, Tmp(GPRInfo::argumentGPR1), Tmp(X86Registers::r13)); + root->append(MulDouble, nullptr, Arg::index(Tmp(X86Registers::r12), Tmp(X86Registers::r13)), Tmp(FPRInfo::argumentFPR0), Tmp(X86Registers::xmm0)); + root->append(MoveDouble, nullptr, Tmp(X86Registers::xmm0), Tmp(FPRInfo::returnValueFPR)); + root->append(RetDouble, nullptr, Tmp(FPRInfo::returnValueFPR)); + + double secondArg = 4.2; + uint64_t index = 16; + CHECK(compileAndRun<double>(proc, 2.4, &secondArg - 2, index, pureNaN()) == 2.4 * 4.2); +} + +#endif + +#define RUN(test) do { \ + if (!shouldRun(#test)) \ + break; \ + tasks.append( \ + createSharedTask<void()>( \ + [&] () { \ + dataLog(#test "...\n"); \ + test; \ + dataLog(#test ": OK!\n"); \ + })); \ + } while (false); + +void run(const char* filter) +{ + JSC::initializeThreading(); + vm = &VM::create(LargeHeap).leakRef(); + + Deque<RefPtr<SharedTask<void()>>> tasks; + + auto shouldRun = [&] (const char* testName) -> bool { + return !filter || !!strcasestr(testName, filter); + }; + + RUN(testSimple()); + + RUN(testShuffleSimpleSwap()); + RUN(testShuffleSimpleShift()); + RUN(testShuffleLongShift()); + RUN(testShuffleLongShiftBackwards()); + RUN(testShuffleSimpleRotate()); + RUN(testShuffleSimpleBroadcast()); + RUN(testShuffleBroadcastAllRegs()); + RUN(testShuffleTreeShift()); + RUN(testShuffleTreeShiftBackward()); + RUN(testShuffleTreeShiftOtherBackward()); + RUN(testShuffleMultipleShifts()); + RUN(testShuffleRotateWithFringe()); + RUN(testShuffleRotateWithFringeInWeirdOrder()); + RUN(testShuffleRotateWithLongFringe()); + RUN(testShuffleMultipleRotates()); + RUN(testShuffleShiftAndRotate()); + RUN(testShuffleShiftAllRegs()); + RUN(testShuffleRotateAllRegs()); + RUN(testShuffleSimpleSwap64()); + RUN(testShuffleSimpleShift64()); + RUN(testShuffleSwapMixedWidth()); + RUN(testShuffleShiftMixedWidth()); + RUN(testShuffleShiftMemory()); + RUN(testShuffleShiftMemoryLong()); + RUN(testShuffleShiftMemoryAllRegs()); + RUN(testShuffleShiftMemoryAllRegs64()); + RUN(testShuffleShiftMemoryAllRegsMixedWidth()); + RUN(testShuffleRotateMemory()); + RUN(testShuffleRotateMemory64()); + RUN(testShuffleRotateMemoryMixedWidth()); + RUN(testShuffleRotateMemoryAllRegs64()); + RUN(testShuffleRotateMemoryAllRegsMixedWidth()); + RUN(testShuffleSwapDouble()); + RUN(testShuffleShiftDouble()); + +#if CPU(X86) || CPU(X86_64) + RUN(testX86VMULSD()); + RUN(testX86VMULSDDestRex()); + RUN(testX86VMULSDOp1DestRex()); + RUN(testX86VMULSDOp2DestRex()); + RUN(testX86VMULSDOpsDestRex()); + + RUN(testX86VMULSDAddr()); + RUN(testX86VMULSDAddrOpRexAddr()); + RUN(testX86VMULSDDestRexAddr()); + RUN(testX86VMULSDRegOpDestRexAddr()); + RUN(testX86VMULSDAddrOpDestRexAddr()); + + RUN(testX86VMULSDBaseNeedsRex()); + RUN(testX86VMULSDIndexNeedsRex()); + RUN(testX86VMULSDBaseIndexNeedRex()); +#endif + + if (tasks.isEmpty()) + usage(); + + Lock lock; + + Vector<ThreadIdentifier> threads; + for (unsigned i = filter ? 1 : WTF::numberOfProcessorCores(); i--;) { + threads.append( + createThread( + "testb3 thread", + [&] () { + for (;;) { + RefPtr<SharedTask<void()>> task; + { + LockHolder locker(lock); + if (tasks.isEmpty()) + return; + task = tasks.takeFirst(); + } + + task->run(); + } + })); + } + + for (ThreadIdentifier thread : threads) + waitForThreadCompletion(thread); + crashLock.lock(); +} + +} // anonymois namespace + +#else // ENABLE(B3_JIT) + +static void run(const char*) +{ + dataLog("B3 JIT is not enabled.\n"); +} + +#endif // ENABLE(B3_JIT) + +int main(int argc, char** argv) +{ + const char* filter = nullptr; + switch (argc) { + case 1: + break; + case 2: + filter = argv[1]; + break; + default: + usage(); + break; + } + + run(filter); + return 0; +} |