summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAustin Seipp <austin@well-typed.com>2013-11-11 10:26:03 -0600
committerAustin Seipp <austin@well-typed.com>2013-11-22 08:04:53 -0600
commit5bab1a57f572e29dfdffd6d1ce8e53a2772b18fd (patch)
tree3dd90a9434568af8b174b4809b385375979a4481
parent3fdad85781c4b5cdf424a78613353c9a6f57220e (diff)
downloadhaskell-5bab1a57f572e29dfdffd6d1ce8e53a2772b18fd.tar.gz
GHCi: Properly generate jump code for ARM (#8380)
This adds code for jumping to given addresses for ARM, written by Ben Gamari. However, when allocating new infotables for bytecode (which is where this jump code occurs), we need to be sure to flush the cache on the execute pointer returned from allocateExec() - on systems like ARM, the processor won't reliably read back code or automatically cache flush, where x86 will. So we add a new flushExec primitive to call out to GCC's __builtin___clear_cache primitive, which will properly generate the correct code (nothing on x86, and a call to libgcc's __clear_cache on ARM) and make sure we use it after writing the code out. Authored-by: Ben Gamari <bgamari.foss@gmail.com> Authored-by: Austin Seipp <austin@well-typed.com> Signed-off-by: Austin Seipp <austin@well-typed.com>
-rw-r--r--compiler/ghci/ByteCodeItbls.lhs23
-rw-r--r--includes/rts/storage/GC.h1
-rw-r--r--rts/Linker.c1
-rw-r--r--rts/sm/Storage.c33
4 files changed, 53 insertions, 5 deletions
diff --git a/compiler/ghci/ByteCodeItbls.lhs b/compiler/ghci/ByteCodeItbls.lhs
index 0d07be5f67..2180f87091 100644
--- a/compiler/ghci/ByteCodeItbls.lhs
+++ b/compiler/ghci/ByteCodeItbls.lhs
@@ -226,6 +226,20 @@ mkJumpToAddr dflags a = case platformArch (targetPlatform dflags) of
, 0x47ff041f -- nop
, fromIntegral (w64 .&. 0x0000FFFF)
, fromIntegral ((w64 `shiftR` 32) .&. 0x0000FFFF) ]
+
+ ArchARM { } ->
+ -- Generates Thumb sequence,
+ -- ldr r1, [pc, #0]
+ -- bx r1
+ --
+ -- which looks like:
+ -- 00000000 <.addr-0x8>:
+ -- 0: 4900 ldr r1, [pc] ; 8 <.addr>
+ -- 4: 4708 bx r1
+ let w32 = fromIntegral (ptrToInt a) :: Word32
+ in Left [ 0x49, 0x00
+ , 0x47, 0x08
+ , byte0 w32, byte1 w32, byte2 w32, byte3 w32]
arch ->
panic ("mkJumpToAddr not defined for " ++ show arch)
@@ -374,11 +388,16 @@ load = do addr <- advance
newExecConItbl :: DynFlags -> StgConInfoTable -> IO (FunPtr ())
newExecConItbl dflags obj
= alloca $ \pcode -> do
- wr_ptr <- _allocateExec (fromIntegral (sizeOfConItbl dflags obj)) pcode
+ let sz = fromIntegral (sizeOfConItbl dflags obj)
+ wr_ptr <- _allocateExec sz pcode
ex_ptr <- peek pcode
pokeConItbl dflags wr_ptr ex_ptr obj
+ _flushExec sz ex_ptr -- Cache flush (if needed)
return (castPtrToFunPtr ex_ptr)
foreign import ccall unsafe "allocateExec"
- _allocateExec :: CUInt -> Ptr (Ptr a) -> IO (Ptr a)
+ _allocateExec :: CUInt -> Ptr (Ptr a) -> IO (Ptr a)
+
+foreign import ccall unsafe "flushExec"
+ _flushExec :: CUInt -> Ptr a -> IO ()
\end{code}
diff --git a/includes/rts/storage/GC.h b/includes/rts/storage/GC.h
index 813349610c..f8b8afe328 100644
--- a/includes/rts/storage/GC.h
+++ b/includes/rts/storage/GC.h
@@ -161,6 +161,7 @@ typedef void* AdjustorWritable;
typedef void* AdjustorExecutable;
AdjustorWritable allocateExec(W_ len, AdjustorExecutable *exec_addr);
+void flushExec(W_ len, AdjustorExecutable exec_addr);
#if defined(ios_HOST_OS)
AdjustorWritable execToWritable(AdjustorExecutable exec);
#endif
diff --git a/rts/Linker.c b/rts/Linker.c
index 77943a5402..14ebac3683 100644
--- a/rts/Linker.c
+++ b/rts/Linker.c
@@ -1350,6 +1350,7 @@ typedef struct _RtsSymbolVal {
SymI_HasProto(g0) \
SymI_HasProto(allocate) \
SymI_HasProto(allocateExec) \
+ SymI_HasProto(flushExec) \
SymI_HasProto(freeExec) \
SymI_HasProto(getAllocations) \
SymI_HasProto(revertCAFs) \
diff --git a/rts/sm/Storage.c b/rts/sm/Storage.c
index 112ad8322a..c1a1a5a248 100644
--- a/rts/sm/Storage.c
+++ b/rts/sm/Storage.c
@@ -1152,7 +1152,16 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
return (ret + 1);
}
-// freeExec gets passed the executable address, not the writable address.
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+ /* On ARM and other platforms, we need to flush the cache after
+ writing code into memory, so the processor reliably sees it. */
+ unsigned char* begin = (unsigned char*)exec_addr;
+ unsigned char* end = begin + len;
+ __builtin___clear_cache(begin, end);
+}
+
+// freeExec gets passed the executable address, not the writable address.
void freeExec (AdjustorExecutable addr)
{
AdjustorWritable writable;
@@ -1198,6 +1207,15 @@ AdjustorWritable execToWritable(AdjustorExecutable exec)
return writ;
}
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+ /* On ARM and other platforms, we need to flush the cache after
+ writing code into memory, so the processor reliably sees it. */
+ unsigned char* begin = (unsigned char*)exec_addr;
+ unsigned char* end = begin + len;
+ __builtin___clear_cache(begin, end);
+}
+
void freeExec(AdjustorExecutable exec)
{
AdjustorWritable writ;
@@ -1225,7 +1243,7 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
barf("allocateExec: can't handle large objects");
}
- if (exec_block == NULL ||
+ if (exec_block == NULL ||
exec_block->free + n + 1 > exec_block->start + BLOCK_SIZE_W) {
bdescr *bd;
W_ pagesize = getPageSize();
@@ -1251,6 +1269,15 @@ AdjustorWritable allocateExec (W_ bytes, AdjustorExecutable *exec_ret)
return ret;
}
+void flushExec (W_ len, AdjustorExecutable exec_addr)
+{
+ /* On ARM and other platforms, we need to flush the cache after
+ writing code into memory, so the processor reliably sees it. */
+ unsigned char* begin = (unsigned char*)exec_addr;
+ unsigned char* end = begin + len;
+ __builtin___clear_cache(begin, end);
+}
+
void freeExec (void *addr)
{
StgPtr p = (StgPtr)addr - 1;
@@ -1283,7 +1310,7 @@ void freeExec (void *addr)
}
RELEASE_SM_LOCK
-}
+}
#endif /* mingw32_HOST_OS */