5 files changed, 50 insertions, 39 deletions
diff --git a/rts/posix/OSMem.c b/rts/posix/OSMem.c
index aa3f3a132a..43c7831a37 100644
--- a/rts/posix/OSMem.c
+++ b/rts/posix/OSMem.c
@@ -377,22 +377,22 @@ void setExecutable (void *p, W_ len, rtsBool exec)
 #ifdef USE_LARGE_ADDRESS_SPACE
 
 static void *
-osTryReserveHeapMemory (void *hint)
+osTryReserveHeapMemory (W_ len, void *hint)
 {
     void *base, *top;
     void *start, *end;
 
-    /* We try to allocate MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
+    /* We try to allocate len + MBLOCK_SIZE,
        because we need memory which is MBLOCK_SIZE aligned,
        and then we discard what we don't need */
 
-    base = my_mmap(hint, MBLOCK_SPACE_SIZE + MBLOCK_SIZE, MEM_RESERVE);
-    top = (void*)((W_)base + MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
+    base = my_mmap(hint, len + MBLOCK_SIZE, MEM_RESERVE);
+    top = (void*)((W_)base + len + MBLOCK_SIZE);
 
     if (((W_)base & MBLOCK_MASK) != 0) {
         start = MBLOCK_ROUND_UP(base);
         end = MBLOCK_ROUND_DOWN(top);
-        ASSERT(((W_)end - (W_)start) == MBLOCK_SPACE_SIZE);
+        ASSERT(((W_)end - (W_)start) == len);
 
         if (munmap(base, (W_)start-(W_)base) < 0) {
             sysErrorBelch("unable to release slop before heap");
@@ -407,7 +407,7 @@ osTryReserveHeapMemory (void *hint)
     return start;
 }
 
-void *osReserveHeapMemory(void)
+void *osReserveHeapMemory(W_ len)
 {
     int attempt;
     void *at;
@@ -425,8 +425,8 @@ void *osReserveHeapMemory(void)
 
     attempt = 0;
     do {
-        at = osTryReserveHeapMemory((void*)((W_)8 * (1 << 30) +
-                                            attempt * BLOCK_SIZE));
+        void *hint = (void*)((W_)8 * (1 << 30) + attempt * BLOCK_SIZE);
+        at = osTryReserveHeapMemory(len, hint);
     } while ((W_)at < ((W_)8 * (1 << 30)));
 
     return at;
@@ -467,7 +467,8 @@ void osReleaseHeapMemory(void)
 {
     int r;
 
-    r = munmap((void*)mblock_address_space_begin, MBLOCK_SPACE_SIZE);
+    r = munmap((void*)mblock_address_space.begin,
+               mblock_address_space.end - mblock_address_space.begin);
     if(r < 0)
         sysErrorBelch("unable to release address space");
 }
diff --git a/rts/sm/HeapAlloc.h b/rts/sm/HeapAlloc.h
index f2760bdaf7..99a62f9023 100644
--- a/rts/sm/HeapAlloc.h
+++ b/rts/sm/HeapAlloc.h
@@ -34,12 +34,12 @@
 
    On 64-bit machines, we have two possibilities. One is to request
    a single chunk of address space that we deem "large enough"
-   (currently 1TB, could easily be extended to, say 16TB or more).
-   Memory from that chunk is GC memory, everything else is not. This
-   case is tricky in that it requires support from the OS to allocate
-   address space without allocating memory (in practice, all modern
-   OSes do this). It's also tricky in that it is the only case where
-   a successful HEAP_ALLOCED(p) check can trigger a segfault when
+   (currently 1TB or the ulimit size, whichever is smaller, although this could
+   easily be extended to, say 16TB or more). Memory from that chunk is GC
+   memory, everything else is not. This case is tricky in that it requires
+   support from the OS to allocate address space without allocating memory (in
+   practice, all modern OSes do this). It's also tricky in that it is the only
+   case where a successful HEAP_ALLOCED(p) check can trigger a segfault when
    accessing p (and for debugging purposes, it will).
 
    Alternatively, the older implementation caches one 12-bit block map
@@ -51,16 +51,14 @@
 
 #ifdef USE_LARGE_ADDRESS_SPACE
 
-extern W_ mblock_address_space_begin;
-#if aarch64_HOST_ARCH
-# define MBLOCK_SPACE_SIZE      ((StgWord)1 << 38) /* 1/4 TB */
-#else
-# define MBLOCK_SPACE_SIZE      ((StgWord)1 << 40) /* 1 TB */
-#endif
+struct mblock_address_range {
+    W_ begin, end;
+    W_ padding[6];  // ensure nothing else inhabits this cache line
+} ATTRIBUTE_ALIGNED(64);
+extern struct mblock_address_range mblock_address_space;
 
-# define HEAP_ALLOCED(p)        ((W_)(p) >= mblock_address_space_begin && \
-                                 (W_)(p) < (mblock_address_space_begin +  \
-                                            MBLOCK_SPACE_SIZE))
+# define HEAP_ALLOCED(p)        ((W_)(p) >= mblock_address_space.begin && \
+                                 (W_)(p) < (mblock_address_space.end))
 # define HEAP_ALLOCED_GC(p)     HEAP_ALLOCED(p)
 
 #elif SIZEOF_VOID_P == 4
diff --git a/rts/sm/MBlock.c b/rts/sm/MBlock.c
index 35a11bf589..e1daa71e2f 100644
--- a/rts/sm/MBlock.c
+++ b/rts/sm/MBlock.c
@@ -96,7 +96,12 @@ typedef struct free_list {
 
 static free_list *free_list_head;
 static W_ mblock_high_watermark;
-W_ mblock_address_space_begin = 0;
+/*
+ * it is quite important that these are in the same cache line as they
+ * are both needed by HEAP_ALLOCED. Moreover, we need to ensure that they
+ * don't share a cache line with anything else to prevent false sharing.
+ */
+struct mblock_address_range mblock_address_space = { 0, 0, {} };
 
 static void *getAllocatedMBlock(free_list **start_iter, W_ startingAt)
 {
@@ -131,7 +136,7 @@ void * getFirstMBlock(void **state STG_UNUSED)
         casted_state = &fake_state;
 
     *casted_state = free_list_head;
-    return getAllocatedMBlock(casted_state, mblock_address_space_begin);
+    return getAllocatedMBlock(casted_state, mblock_address_space.begin);
 }
 
 void * getNextMBlock(void **state STG_UNUSED, void *mblock)
@@ -190,8 +195,7 @@ static void *getFreshMBlocks(nat n)
     W_ size = MBLOCK_SIZE * (W_)n;
     void *addr = (void*)mblock_high_watermark;
 
-    if (mblock_high_watermark + size >
-        mblock_address_space_begin + MBLOCK_SPACE_SIZE)
+    if (mblock_high_watermark + size > mblock_address_space.end)
     {
         // whoa, 1 TB of heap?
         errorBelch("out of memory");
@@ -611,7 +615,8 @@ freeAllMBlocks(void)
 
     osReleaseHeapMemory();
 
-    mblock_address_space_begin = (W_)-1;
+    mblock_address_space.begin = (W_)-1;
+    mblock_address_space.end = (W_)-1;
     mblock_high_watermark = (W_)-1;
 #else
     osFreeAllMBlocks();
@@ -634,9 +639,16 @@ initMBlocks(void)
 
 #ifdef USE_LARGE_ADDRESS_SPACE
     {
-        void *addr = osReserveHeapMemory();
+        W_ size;
+#if aarch64_HOST_ARCH
+        size = (W_)1 << 38; // 1/4 TByte
+#else
+        size = (W_)1 << 40; // 1 TByte
+#endif
+        void *addr = osReserveHeapMemory(size);
 
-        mblock_address_space_begin = (W_)addr;
+        mblock_address_space.begin = (W_)addr;
+        mblock_address_space.end = (W_)addr + size;
         mblock_high_watermark = (W_)addr;
     }
 #elif SIZEOF_VOID_P == 8
diff --git a/rts/sm/OSMem.h b/rts/sm/OSMem.h
index 9a6ccdd7ec..6bcaf65b10 100644
--- a/rts/sm/OSMem.h
+++ b/rts/sm/OSMem.h
@@ -29,13 +29,13 @@ void setExecutable (void *p, W_ len, rtsBool exec);
   we will ever need, which keeps everything nice and consecutive.
 */
 
-// Reserve the large address space blob, and return the address that
-// the OS has chosen for it.  It is not safe to access the memory
-// pointed to by the return value, until that memory is committed
-// using osCommitMemory().
+// Reserve the large address space blob of the given size, and return the
+// address that the OS has chosen for it. It is not safe to access the memory
+// pointed to by the return value, until that memory is committed using
+// osCommitMemory().
 //
 // This function is called once when the block allocator is initialized.
-void *osReserveHeapMemory(void);
+void *osReserveHeapMemory(W_ len);
 
 // Commit (allocate memory for) a piece of address space, which must
 // be within the previously reserved space After this call, it is safe
diff --git a/rts/win32/OSMem.c b/rts/win32/OSMem.c
index 716171b3fc..2d2af0ddf6 100644
--- a/rts/win32/OSMem.c
+++ b/rts/win32/OSMem.c
@@ -429,11 +429,11 @@ void setExecutable (void *p, W_ len, rtsBool exec)
 
 static void* heap_base = NULL;
 
-void *osReserveHeapMemory (void)
+void *osReserveHeapMemory (W_ len)
 {
     void *start;
 
-    heap_base = VirtualAlloc(NULL, MBLOCK_SPACE_SIZE + MBLOCK_SIZE,
+    heap_base = VirtualAlloc(NULL, len + MBLOCK_SIZE,
                               MEM_RESERVE, PAGE_READWRITE);
     if (heap_base == NULL) {
         if (GetLastError() == ERROR_NOT_ENOUGH_MEMORY) {
@@ -441,7 +441,7 @@ void *osReserveHeapMemory (void)
         } else {
             sysErrorBelch(
                 "osReserveHeapMemory: VirtualAlloc MEM_RESERVE %llu bytes failed",
-                MBLOCK_SPACE_SIZE + MBLOCK_SIZE);
+                len + MBLOCK_SIZE);
         }
         stg_exit(EXIT_FAILURE);
     }