summaryrefslogtreecommitdiff
path: root/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp')
-rw-r--r--Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp543
1 files changed, 543 insertions, 0 deletions
diff --git a/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp b/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp
new file mode 100644
index 000000000..318471976
--- /dev/null
+++ b/Source/JavaScriptCore/b3/air/AirEmitShuffle.cpp
@@ -0,0 +1,543 @@
+/*
+ * Copyright (C) 2016 Apple Inc. All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "AirEmitShuffle.h"
+
+#if ENABLE(B3_JIT)
+
+#include "AirCode.h"
+#include "AirInstInlines.h"
+#include <wtf/GraphNodeWorklist.h>
+#include <wtf/ListDump.h>
+
+namespace JSC { namespace B3 { namespace Air {
+
+namespace {
+
+bool verbose = false;
+
+template<typename Functor>
+Tmp findPossibleScratch(Code& code, Arg::Type type, const Functor& functor) {
+ for (Reg reg : code.regsInPriorityOrder(type)) {
+ Tmp tmp(reg);
+ if (functor(tmp))
+ return tmp;
+ }
+ return Tmp();
+}
+
+Tmp findPossibleScratch(Code& code, Arg::Type type, const Arg& arg1, const Arg& arg2) {
+ return findPossibleScratch(
+ code, type,
+ [&] (Tmp tmp) -> bool {
+ return !arg1.usesTmp(tmp) && !arg2.usesTmp(tmp);
+ });
+}
+
+// Example: (a => b, b => a, a => c, b => d)
+struct Rotate {
+ Vector<ShufflePair> loop; // in the example, this is the loop: (a => b, b => a)
+ Vector<ShufflePair> fringe; // in the example, these are the associated shifts: (a => c, b => d)
+};
+
+} // anonymous namespace
+
+void ShufflePair::dump(PrintStream& out) const
+{
+ out.print(width(), ":", src(), "=>", dst());
+}
+
+Inst createShuffle(Value* origin, const Vector<ShufflePair>& pairs)
+{
+ Inst result(Shuffle, origin);
+ for (const ShufflePair& pair : pairs)
+ result.append(pair.src(), pair.dst(), Arg::widthArg(pair.width()));
+ return result;
+}
+
+Vector<Inst> emitShuffle(
+ Code& code, Vector<ShufflePair> pairs, std::array<Arg, 2> scratches, Arg::Type type,
+ Value* origin)
+{
+ if (verbose) {
+ dataLog(
+ "Dealing with pairs: ", listDump(pairs), " and scratches ", scratches[0], ", ",
+ scratches[1], "\n");
+ }
+
+ pairs.removeAllMatching(
+ [&] (const ShufflePair& pair) -> bool {
+ return pair.src() == pair.dst();
+ });
+
+ // First validate that this is the kind of shuffle that we know how to deal with.
+#if !ASSERT_DISABLED
+ for (const ShufflePair& pair : pairs) {
+ ASSERT(pair.src().isType(type));
+ ASSERT(pair.dst().isType(type));
+ ASSERT(pair.dst().isTmp() || pair.dst().isMemory());
+ }
+#endif // !ASSERT_DISABLED
+
+ // There are two possible kinds of operations that we will do:
+ //
+ // - Shift. Example: (a => b, b => c). We emit this as "Move b, c; Move a, b". This only requires
+ // scratch registers if there are memory->memory moves. We want to find as many of these as
+ // possible because they are cheaper. Note that shifts can involve the same source mentioned
+ // multiple times. Example: (a => b, a => c, b => d, b => e).
+ //
+ // - Rotate. Example: (a => b, b => a). We want to emit this as "Swap a, b", but that instruction
+ // may not be available, in which case we may need a scratch register or a scratch memory
+ // location. A gnarlier example is (a => b, b => c, c => a). We can emit this as "Swap b, c;
+ // Swap a, b". Note that swapping has to be careful about differing widths.
+ //
+ // Note that a rotate can have "fringe". For example, we might have (a => b, b => a, a =>c,
+ // b => d). This has a rotate loop (a => b, b => a) and some fringe (a => c, b => d). We treat
+ // the whole thing as a single rotate.
+ //
+ // We will find multiple disjoint such operations. We can execute them in any order.
+
+ // We interpret these as Moves that should be executed backwards. All shifts are keyed by their
+ // starting source.
+ HashMap<Arg, Vector<ShufflePair>> shifts;
+
+ // We interpret these as Swaps over src()'s that should be executed backwards, i.e. for a list
+ // of size 3 we would do "Swap list[1].src(), list[2].src(); Swap list[0].src(), list[1].src()".
+ // Note that we actually can't do that if the widths don't match or other bad things happen.
+ // But, prior to executing all of that, we need to execute the fringe: the shifts comming off the
+ // rotate.
+ Vector<Rotate> rotates;
+
+ {
+ HashMap<Arg, Vector<ShufflePair>> mapping;
+ for (const ShufflePair& pair : pairs)
+ mapping.add(pair.src(), Vector<ShufflePair>()).iterator->value.append(pair);
+
+ Vector<ShufflePair> currentPairs;
+
+ while (!mapping.isEmpty()) {
+ ASSERT(currentPairs.isEmpty());
+ Arg originalSrc = mapping.begin()->key;
+ ASSERT(!shifts.contains(originalSrc));
+ if (verbose)
+ dataLog("Processing from ", originalSrc, "\n");
+
+ GraphNodeWorklist<Arg> worklist;
+ worklist.push(originalSrc);
+ while (Arg src = worklist.pop()) {
+ HashMap<Arg, Vector<ShufflePair>>::iterator iter = mapping.find(src);
+ if (iter == mapping.end()) {
+ // With a shift it's possible that we previously built the tail of this shift.
+ // See if that's the case now.
+ if (verbose)
+ dataLog("Trying to append shift at ", src, "\n");
+ currentPairs.appendVector(shifts.take(src));
+ continue;
+ }
+ Vector<ShufflePair> pairs = WTFMove(iter->value);
+ mapping.remove(iter);
+
+ for (const ShufflePair& pair : pairs) {
+ currentPairs.append(pair);
+ ASSERT(pair.src() == src);
+ worklist.push(pair.dst());
+ }
+ }
+
+ ASSERT(currentPairs.size());
+ ASSERT(currentPairs[0].src() == originalSrc);
+
+ if (verbose)
+ dataLog("currentPairs = ", listDump(currentPairs), "\n");
+
+ bool isRotate = false;
+ for (const ShufflePair& pair : currentPairs) {
+ if (pair.dst() == originalSrc) {
+ isRotate = true;
+ break;
+ }
+ }
+
+ if (isRotate) {
+ if (verbose)
+ dataLog("It's a rotate.\n");
+ Rotate rotate;
+
+ // The common case is that the rotate does not have fringe. The only way to
+ // check for this is to examine the whole rotate.
+ bool ok;
+ if (currentPairs.last().dst() == originalSrc) {
+ ok = true;
+ for (unsigned i = currentPairs.size() - 1; i--;)
+ ok &= currentPairs[i].dst() == currentPairs[i + 1].src();
+ } else
+ ok = false;
+
+ if (ok)
+ rotate.loop = WTFMove(currentPairs);
+ else {
+ // This is the slow path. The rotate has fringe.
+
+ HashMap<Arg, ShufflePair> dstMapping;
+ for (const ShufflePair& pair : currentPairs)
+ dstMapping.add(pair.dst(), pair);
+
+ ShufflePair pair = dstMapping.take(originalSrc);
+ for (;;) {
+ rotate.loop.append(pair);
+
+ auto iter = dstMapping.find(pair.src());
+ if (iter == dstMapping.end())
+ break;
+ pair = iter->value;
+ dstMapping.remove(iter);
+ }
+
+ rotate.loop.reverse();
+
+ // Make sure that the fringe appears in the same order as how it appeared in the
+ // currentPairs, since that's the DFS order.
+ for (const ShufflePair& pair : currentPairs) {
+ // But of course we only include it if it's not in the loop.
+ if (dstMapping.contains(pair.dst()))
+ rotate.fringe.append(pair);
+ }
+ }
+
+ // If the graph search terminates because we returned to the first source, then the
+ // pair list has to have a very particular shape.
+ for (unsigned i = rotate.loop.size() - 1; i--;)
+ ASSERT(rotate.loop[i].dst() == rotate.loop[i + 1].src());
+ rotates.append(WTFMove(rotate));
+ currentPairs.resize(0);
+ } else {
+ if (verbose)
+ dataLog("It's a shift.\n");
+ shifts.add(originalSrc, WTFMove(currentPairs));
+ }
+ }
+ }
+
+ if (verbose) {
+ dataLog("Shifts:\n");
+ for (auto& entry : shifts)
+ dataLog(" ", entry.key, ": ", listDump(entry.value), "\n");
+ dataLog("Rotates:\n");
+ for (auto& rotate : rotates)
+ dataLog(" loop = ", listDump(rotate.loop), ", fringe = ", listDump(rotate.fringe), "\n");
+ }
+
+ // In the worst case, we need two scratch registers. The way we do this is that the client passes
+ // us what scratch registers he happens to have laying around. We will need scratch registers in
+ // the following cases:
+ //
+ // - Shuffle pairs where both src and dst refer to memory.
+ // - Rotate when no Swap instruction is available.
+ //
+ // Lucky for us, we are guaranteed to have extra scratch registers anytime we have a Shift that
+ // ends with a register. We search for such a register right now.
+
+ auto moveForWidth = [&] (Arg::Width width) -> Opcode {
+ switch (width) {
+ case Arg::Width32:
+ return type == Arg::GP ? Move32 : MoveFloat;
+ case Arg::Width64:
+ return type == Arg::GP ? Move : MoveDouble;
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ }
+ };
+
+ Opcode conservativeMove = moveForWidth(Arg::conservativeWidth(type));
+
+ // We will emit things in reverse. We maintain a list of packs of instructions, and then we emit
+ // append them together in reverse (for example the thing at the end of resultPacks is placed
+ // first). This is useful because the last thing we emit frees up its destination registers, so
+ // it affects how we emit things before it.
+ Vector<Vector<Inst>> resultPacks;
+ Vector<Inst> result;
+
+ auto commitResult = [&] () {
+ resultPacks.append(WTFMove(result));
+ };
+
+ auto getScratch = [&] (unsigned index, Tmp possibleScratch) -> Tmp {
+ if (scratches[index].isTmp())
+ return scratches[index].tmp();
+
+ if (!possibleScratch)
+ return Tmp();
+ result.append(Inst(conservativeMove, origin, possibleScratch, scratches[index]));
+ return possibleScratch;
+ };
+
+ auto returnScratch = [&] (unsigned index, Tmp tmp) {
+ if (Arg(tmp) != scratches[index])
+ result.append(Inst(conservativeMove, origin, scratches[index], tmp));
+ };
+
+ auto handleShiftPair = [&] (const ShufflePair& pair, unsigned scratchIndex) {
+ Opcode move = moveForWidth(pair.width());
+
+ if (!isValidForm(move, pair.src().kind(), pair.dst().kind())) {
+ Tmp scratch =
+ getScratch(scratchIndex, findPossibleScratch(code, type, pair.src(), pair.dst()));
+ RELEASE_ASSERT(scratch);
+ if (isValidForm(move, pair.src().kind(), Arg::Tmp))
+ result.append(Inst(moveForWidth(pair.width()), origin, pair.src(), scratch));
+ else {
+ ASSERT(pair.src().isSomeImm());
+ ASSERT(move == Move32);
+ result.append(Inst(Move, origin, Arg::bigImm(pair.src().value()), scratch));
+ }
+ result.append(Inst(moveForWidth(pair.width()), origin, scratch, pair.dst()));
+ returnScratch(scratchIndex, scratch);
+ return;
+ }
+
+ result.append(Inst(move, origin, pair.src(), pair.dst()));
+ };
+
+ auto handleShift = [&] (Vector<ShufflePair>& shift) {
+ // FIXME: We could optimize the spill behavior of the shifter by checking if any of the
+ // shifts need spills. If they do, then we could try to get a register out here. Note that
+ // this may fail where the current strategy succeeds: out here we need a register that does
+ // not interfere with any of the shifts, while the current strategy only needs to find a
+ // scratch register that does not interfer with a particular shift. So, this optimization
+ // will be opportunistic: if it succeeds, then the individual shifts can use that scratch,
+ // otherwise they will do what they do now.
+
+ for (unsigned i = shift.size(); i--;)
+ handleShiftPair(shift[i], 0);
+
+ Arg lastDst = shift.last().dst();
+ if (lastDst.isTmp()) {
+ for (Arg& scratch : scratches) {
+ ASSERT(scratch != lastDst);
+ if (!scratch.isTmp()) {
+ scratch = lastDst;
+ break;
+ }
+ }
+ }
+ };
+
+ // First handle shifts whose last destination is a tmp because these free up scratch registers.
+ // These end up last in the final sequence, so the final destination of these shifts will be
+ // available as a scratch location for anything emitted prior (so, after, since we're emitting in
+ // reverse).
+ for (auto& entry : shifts) {
+ Vector<ShufflePair>& shift = entry.value;
+ if (shift.last().dst().isTmp())
+ handleShift(shift);
+ commitResult();
+ }
+
+ // Now handle the rest of the shifts.
+ for (auto& entry : shifts) {
+ Vector<ShufflePair>& shift = entry.value;
+ if (!shift.last().dst().isTmp())
+ handleShift(shift);
+ commitResult();
+ }
+
+ for (Rotate& rotate : rotates) {
+ if (!rotate.fringe.isEmpty()) {
+ // Make sure we do the fringe first! This won't clobber any of the registers that are
+ // part of the rotation.
+ handleShift(rotate.fringe);
+ }
+
+ bool canSwap = false;
+ Opcode swap = Oops;
+ Arg::Width swapWidth = Arg::Width8; // bogus value
+
+ // Currently, the swap instruction is not available for floating point on any architecture we
+ // support.
+ if (type == Arg::GP) {
+ // Figure out whether we will be doing 64-bit swaps or 32-bit swaps. If we have a mix of
+ // widths we handle that by fixing up the relevant register with zero-extends.
+ swap = Swap32;
+ swapWidth = Arg::Width32;
+ bool hasMemory = false;
+ bool hasIndex = false;
+ for (ShufflePair& pair : rotate.loop) {
+ switch (pair.width()) {
+ case Arg::Width32:
+ break;
+ case Arg::Width64:
+ swap = Swap64;
+ swapWidth = Arg::Width64;
+ break;
+ default:
+ RELEASE_ASSERT_NOT_REACHED();
+ break;
+ }
+
+ hasMemory |= pair.src().isMemory() || pair.dst().isMemory();
+ hasIndex |= pair.src().isIndex() || pair.dst().isIndex();
+ }
+
+ canSwap = isValidForm(swap, Arg::Tmp, Arg::Tmp);
+
+ // We can totally use swaps even if there are shuffles involving memory. But, we play it
+ // safe in that case. There are corner cases we don't handle, and our ability to do it is
+ // contingent upon swap form availability.
+
+ if (hasMemory) {
+ canSwap &= isValidForm(swap, Arg::Tmp, Arg::Addr);
+
+ // We don't take the swapping path if there is a mix of widths and some of the
+ // shuffles involve memory. That gets too confusing. We might be able to relax this
+ // to only bail if there are subwidth pairs involving memory, but I haven't thought
+ // about it very hard. Anyway, this case is not common: rotates involving memory
+ // don't arise for function calls, and they will only happen for rotates in user code
+ // if some of the variables get spilled. It's hard to imagine a program that rotates
+ // data around in variables while also doing a combination of uint32->uint64 and
+ // int64->int32 casts.
+ for (ShufflePair& pair : rotate.loop)
+ canSwap &= pair.width() == swapWidth;
+ }
+
+ if (hasIndex)
+ canSwap &= isValidForm(swap, Arg::Tmp, Arg::Index);
+ }
+
+ if (canSwap) {
+ for (unsigned i = rotate.loop.size() - 1; i--;) {
+ Arg left = rotate.loop[i].src();
+ Arg right = rotate.loop[i + 1].src();
+
+ if (left.isMemory() && right.isMemory()) {
+ // Note that this is a super rare outcome. Rotates are rare. Spills are rare.
+ // Moving data between two spills is rare. To get here a lot of rare stuff has to
+ // all happen at once.
+
+ Tmp scratch = getScratch(0, findPossibleScratch(code, type, left, right));
+ RELEASE_ASSERT(scratch);
+ result.append(Inst(moveForWidth(swapWidth), origin, left, scratch));
+ result.append(Inst(swap, origin, scratch, right));
+ result.append(Inst(moveForWidth(swapWidth), origin, scratch, left));
+ returnScratch(0, scratch);
+ continue;
+ }
+
+ if (left.isMemory())
+ std::swap(left, right);
+
+ result.append(Inst(swap, origin, left, right));
+ }
+
+ for (ShufflePair pair : rotate.loop) {
+ if (pair.width() == swapWidth)
+ continue;
+
+ RELEASE_ASSERT(pair.width() == Arg::Width32);
+ RELEASE_ASSERT(swapWidth == Arg::Width64);
+ RELEASE_ASSERT(pair.dst().isTmp());
+
+ // Need to do an extra zero extension.
+ result.append(Inst(Move32, origin, pair.dst(), pair.dst()));
+ }
+ } else {
+ // We can treat this as a shift so long as we take the last destination (i.e. first
+ // source) and save it first. Then we handle the first entry in the pair in the rotate
+ // specially, after we restore the last destination. This requires some special care to
+ // find a scratch register. It's possible that we have a rotate that uses the entire
+ // available register file.
+
+ Tmp scratch = findPossibleScratch(
+ code, type,
+ [&] (Tmp tmp) -> bool {
+ for (ShufflePair pair : rotate.loop) {
+ if (pair.src().usesTmp(tmp))
+ return false;
+ if (pair.dst().usesTmp(tmp))
+ return false;
+ }
+ return true;
+ });
+
+ // NOTE: This is the most likely use of scratch registers.
+ scratch = getScratch(0, scratch);
+
+ // We may not have found a scratch register. When this happens, we can just use the spill
+ // slot directly.
+ Arg rotateSave = scratch ? Arg(scratch) : scratches[0];
+
+ handleShiftPair(
+ ShufflePair(rotate.loop.last().dst(), rotateSave, rotate.loop[0].width()), 1);
+
+ for (unsigned i = rotate.loop.size(); i-- > 1;)
+ handleShiftPair(rotate.loop[i], 1);
+
+ handleShiftPair(
+ ShufflePair(rotateSave, rotate.loop[0].dst(), rotate.loop[0].width()), 1);
+
+ if (scratch)
+ returnScratch(0, scratch);
+ }
+
+ commitResult();
+ }
+
+ ASSERT(result.isEmpty());
+
+ for (unsigned i = resultPacks.size(); i--;)
+ result.appendVector(resultPacks[i]);
+
+ return result;
+}
+
+Vector<Inst> emitShuffle(
+ Code& code, const Vector<ShufflePair>& pairs,
+ const std::array<Arg, 2>& gpScratch, const std::array<Arg, 2>& fpScratch,
+ Value* origin)
+{
+ Vector<ShufflePair> gpPairs;
+ Vector<ShufflePair> fpPairs;
+ for (const ShufflePair& pair : pairs) {
+ if (pair.src().isMemory() && pair.dst().isMemory() && pair.width() > Arg::pointerWidth()) {
+ // 8-byte memory-to-memory moves on a 32-bit platform are best handled as float moves.
+ fpPairs.append(pair);
+ } else if (pair.src().isGP() && pair.dst().isGP()) {
+ // This means that gpPairs gets memory-to-memory shuffles. The assumption is that we
+ // can do that more efficiently using GPRs, except in the special case above.
+ gpPairs.append(pair);
+ } else
+ fpPairs.append(pair);
+ }
+
+ Vector<Inst> result;
+ result.appendVector(emitShuffle(code, gpPairs, gpScratch, Arg::GP, origin));
+ result.appendVector(emitShuffle(code, fpPairs, fpScratch, Arg::FP, origin));
+ return result;
+}
+
+} } } // namespace JSC::B3::Air
+
+#endif // ENABLE(B3_JIT)
+