summaryrefslogtreecommitdiff
path: root/rts
diff options
context:
space:
mode:
Diffstat (limited to 'rts')
-rw-r--r--rts/Prelude.h2
-rw-r--r--rts/RtsSymbols.c5
-rw-r--r--rts/StgStdThunks.cmm100
-rw-r--r--rts/include/stg/MiscClosures.h4
4 files changed, 110 insertions, 1 deletions
diff --git a/rts/Prelude.h b/rts/Prelude.h
index 3db5546ad0..b52e1b38fd 100644
--- a/rts/Prelude.h
+++ b/rts/Prelude.h
@@ -33,6 +33,7 @@ PRELUDE_CLOSURE(ghczmprim_GHCziTupleziPrim_Z0T_closure);
PRELUDE_CLOSURE(ghczmprim_GHCziTypes_True_closure);
PRELUDE_CLOSURE(ghczmprim_GHCziTypes_False_closure);
PRELUDE_CLOSURE(base_GHCziPack_unpackCString_closure);
+PRELUDE_CLOSURE(base_GHCziPack_unpackCStringUtf8_closure);
PRELUDE_CLOSURE(base_GHCziWeak_runFinalizzerBatch_closure);
PRELUDE_CLOSURE(base_GHCziWeakziFinalizze_runFinalizzerBatch_closure);
@@ -70,6 +71,7 @@ PRELUDE_CLOSURE(base_GHCziEventziWindows_processRemoteCompletion_closure);
PRELUDE_CLOSURE(base_GHCziTopHandler_flushStdHandles_closure);
PRELUDE_CLOSURE(base_GHCziTopHandler_runMainIO_closure);
+PRELUDE_INFO(ghczmprim_GHCziCString_unpackCStringzh_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Czh_con_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Izh_con_info);
PRELUDE_INFO(ghczmprim_GHCziTypes_Fzh_con_info);
diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c
index 317b284158..097b8a1df4 100644
--- a/rts/RtsSymbols.c
+++ b/rts/RtsSymbols.c
@@ -9,6 +9,7 @@
#include "ghcplatform.h"
#include "Rts.h"
#include "RtsSymbols.h"
+
#include "TopHandler.h"
#include "HsFFI.h"
#include "CloneStack.h"
@@ -713,7 +714,7 @@ extern char **environ;
SymI_HasProto(defaultRtsConfig) \
SymI_HasProto(initLinker) \
SymI_HasProto(initLinker_) \
- SymI_HasDataProto(stg_unpackClosurezh) \
+ SymI_HasDataProto(stg_unpackClosurezh) \
SymI_HasDataProto(stg_closureSizzezh) \
SymI_HasDataProto(stg_whereFromzh) \
SymI_HasDataProto(stg_getApStackValzh) \
@@ -976,6 +977,8 @@ extern char **environ;
SymI_HasDataProto(stg_sel_13_noupd_info) \
SymI_HasDataProto(stg_sel_14_noupd_info) \
SymI_HasDataProto(stg_sel_15_noupd_info) \
+ SymI_HasDataProto(stg_unpack_cstring_info) \
+ SymI_HasDataProto(stg_unpack_cstring_utf8_info) \
SymI_HasDataProto(stg_upd_frame_info) \
SymI_HasDataProto(stg_bh_upd_frame_info) \
SymI_HasProto(suspendThread) \
diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm
index 3c528f662f..c4f5d25881 100644
--- a/rts/StgStdThunks.cmm
+++ b/rts/StgStdThunks.cmm
@@ -13,6 +13,9 @@
#include "Cmm.h"
#include "Updates.h"
+import ghczmprim_GHCziCString_unpackCStringzh_info;
+import ghczmprim_GHCziCString_unpackCStringUtf8zh_info;
+
/* -----------------------------------------------------------------------------
The code for a thunk that simply extracts a field from a
single-constructor datatype depends only on the offset of the field
@@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info")
StgThunk_payload(node,6));
}
}
+
+/* -----------------------------------------------------------------------------
+ Making strings
+ -------------------------------------------------------------------------- */
+
+/*
+ * Note [unpack_cstring closures]
+ * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+ * Strings are extremely common. In Core they will typically manifest as the
+ * a pair of top-level bindings:
+ *
+ * s :: String
+ * s = unpackCString# s#
+ *
+ * s# :: Addr#
+ * s# = "hello world"#
+ *
+ * It turns out that `s` is a non-trivial amount of code which is duplicated
+ * for every `String` literal. To avoid this duplicate, we have a standard
+ * string-unpacking closure, unpack_cstring. Note that currently we only do
+ * this for ASCII strings; strings mentioning non-ASCII characters are
+ * represented by CAF applications of unpackCStringUtf8# as before.
+ *
+ * unpack_cstring closures are similar to standard THUNK_STATIC closures but
+ * with a non-GC pointer to a C-string at the end (the "extra" pointer).
+ * We must place this extra pointer at the end of the closure to ensure that
+ * it has a similar layout to a normal THUNK_STATIC closure, which has no space
+ * for free variables (since these would be contained in the thunk's code and SRT).
+ *
+ * When it is evaluated, an stg_unpack_cstring closure is updated to be an
+ * indirection to the resulting [Char], just as a normal unpackCString# thunk
+ * would be.
+ *
+ * Closure layout:
+ *
+ * ┌───────────────────┐ ┌──► ┌──────────────────────────┐
+ * │ stg_unpack_cstring│ │ │ "hello world ..." │
+ * ├───────────────────┤ │ └──────────────────────────┘
+ * │ indirectee │ │
+ * ├───────────────────┤ │
+ * │ static_link │ │
+ * ├───────────────────┤ │
+ * │ saved_info │ │
+ * ├───────────────────┤ │
+ * │ the_string ─┼───────┘
+ * └───────────────────┘
+ *
+ */
+
+stg_do_unpack_cstring(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring(node, newCAF_ret);
+ }
+}
+
+stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) {
+ STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret);
+ W_ str;
+ str = StgThunk_payload(node, 2);
+ push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) {
+ jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str);
+ }
+}
+
+INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8")
+ (P_ node)
+{
+ W_ newCAF_ret;
+ (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr");
+
+ if (newCAF_ret == 0) {
+ // We raced with another thread to evaluate the CAF and they won;
+ // `node` should now be an indirection.
+ ENTER(node);
+ } else {
+ jump stg_do_unpack_cstring_utf8(node, newCAF_ret);
+ }
+}
+
diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h
index e6b4957e17..257d59a607 100644
--- a/rts/include/stg/MiscClosures.h
+++ b/rts/include/stg/MiscClosures.h
@@ -318,6 +318,10 @@ RTS_THUNK(stg_ap_5_upd);
RTS_THUNK(stg_ap_6_upd);
RTS_THUNK(stg_ap_7_upd);
+// Standard entry for `unpackCString# str` thunks
+RTS_ENTRY(stg_unpack_cstring);
+RTS_ENTRY(stg_unpack_cstring_utf8);
+
/* standard application routines (see also utils/genapply,
* and GHC.StgToCmm.ArgRep).
*/