diff options
Diffstat (limited to 'rts')
| -rw-r--r-- | rts/Prelude.h | 2 | ||||
| -rw-r--r-- | rts/RtsSymbols.c | 5 | ||||
| -rw-r--r-- | rts/StgStdThunks.cmm | 100 | ||||
| -rw-r--r-- | rts/include/stg/MiscClosures.h | 4 |
4 files changed, 110 insertions, 1 deletions
diff --git a/rts/Prelude.h b/rts/Prelude.h index 3db5546ad0..b52e1b38fd 100644 --- a/rts/Prelude.h +++ b/rts/Prelude.h @@ -33,6 +33,7 @@ PRELUDE_CLOSURE(ghczmprim_GHCziTupleziPrim_Z0T_closure); PRELUDE_CLOSURE(ghczmprim_GHCziTypes_True_closure); PRELUDE_CLOSURE(ghczmprim_GHCziTypes_False_closure); PRELUDE_CLOSURE(base_GHCziPack_unpackCString_closure); +PRELUDE_CLOSURE(base_GHCziPack_unpackCStringUtf8_closure); PRELUDE_CLOSURE(base_GHCziWeak_runFinalizzerBatch_closure); PRELUDE_CLOSURE(base_GHCziWeakziFinalizze_runFinalizzerBatch_closure); @@ -70,6 +71,7 @@ PRELUDE_CLOSURE(base_GHCziEventziWindows_processRemoteCompletion_closure); PRELUDE_CLOSURE(base_GHCziTopHandler_flushStdHandles_closure); PRELUDE_CLOSURE(base_GHCziTopHandler_runMainIO_closure); +PRELUDE_INFO(ghczmprim_GHCziCString_unpackCStringzh_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Czh_con_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Izh_con_info); PRELUDE_INFO(ghczmprim_GHCziTypes_Fzh_con_info); diff --git a/rts/RtsSymbols.c b/rts/RtsSymbols.c index 317b284158..097b8a1df4 100644 --- a/rts/RtsSymbols.c +++ b/rts/RtsSymbols.c @@ -9,6 +9,7 @@ #include "ghcplatform.h" #include "Rts.h" #include "RtsSymbols.h" + #include "TopHandler.h" #include "HsFFI.h" #include "CloneStack.h" @@ -713,7 +714,7 @@ extern char **environ; SymI_HasProto(defaultRtsConfig) \ SymI_HasProto(initLinker) \ SymI_HasProto(initLinker_) \ - SymI_HasDataProto(stg_unpackClosurezh) \ + SymI_HasDataProto(stg_unpackClosurezh) \ SymI_HasDataProto(stg_closureSizzezh) \ SymI_HasDataProto(stg_whereFromzh) \ SymI_HasDataProto(stg_getApStackValzh) \ @@ -976,6 +977,8 @@ extern char **environ; SymI_HasDataProto(stg_sel_13_noupd_info) \ SymI_HasDataProto(stg_sel_14_noupd_info) \ SymI_HasDataProto(stg_sel_15_noupd_info) \ + SymI_HasDataProto(stg_unpack_cstring_info) \ + SymI_HasDataProto(stg_unpack_cstring_utf8_info) \ SymI_HasDataProto(stg_upd_frame_info) \ SymI_HasDataProto(stg_bh_upd_frame_info) \ SymI_HasProto(suspendThread) \ diff --git a/rts/StgStdThunks.cmm b/rts/StgStdThunks.cmm index 3c528f662f..c4f5d25881 100644 --- a/rts/StgStdThunks.cmm +++ b/rts/StgStdThunks.cmm @@ -13,6 +13,9 @@ #include "Cmm.h" #include "Updates.h" +import ghczmprim_GHCziCString_unpackCStringzh_info; +import ghczmprim_GHCziCString_unpackCStringUtf8zh_info; + /* ----------------------------------------------------------------------------- The code for a thunk that simply extracts a field from a single-constructor datatype depends only on the offset of the field @@ -286,3 +289,100 @@ INFO_TABLE(stg_ap_7_upd,7,0,THUNK,"stg_ap_7_upd_info","stg_ap_7_upd_info") StgThunk_payload(node,6)); } } + +/* ----------------------------------------------------------------------------- + Making strings + -------------------------------------------------------------------------- */ + +/* + * Note [unpack_cstring closures] + * ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + * Strings are extremely common. In Core they will typically manifest as the + * a pair of top-level bindings: + * + * s :: String + * s = unpackCString# s# + * + * s# :: Addr# + * s# = "hello world"# + * + * It turns out that `s` is a non-trivial amount of code which is duplicated + * for every `String` literal. To avoid this duplicate, we have a standard + * string-unpacking closure, unpack_cstring. Note that currently we only do + * this for ASCII strings; strings mentioning non-ASCII characters are + * represented by CAF applications of unpackCStringUtf8# as before. + * + * unpack_cstring closures are similar to standard THUNK_STATIC closures but + * with a non-GC pointer to a C-string at the end (the "extra" pointer). + * We must place this extra pointer at the end of the closure to ensure that + * it has a similar layout to a normal THUNK_STATIC closure, which has no space + * for free variables (since these would be contained in the thunk's code and SRT). + * + * When it is evaluated, an stg_unpack_cstring closure is updated to be an + * indirection to the resulting [Char], just as a normal unpackCString# thunk + * would be. + * + * Closure layout: + * + * ┌───────────────────┐ ┌──► ┌──────────────────────────┐ + * │ stg_unpack_cstring│ │ │ "hello world ..." │ + * ├───────────────────┤ │ └──────────────────────────┘ + * │ indirectee │ │ + * ├───────────────────┤ │ + * │ static_link │ │ + * ├───────────────────┤ │ + * │ saved_info │ │ + * ├───────────────────┤ │ + * │ the_string ─┼───────┘ + * └───────────────────┘ + * + */ + +stg_do_unpack_cstring(P_ node, P_ newCAF_ret) { + STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring, node, newCAF_ret); + W_ str; + str = StgThunk_payload(node, 2); + push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) { + jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringzh_info)(node, str); + } +} + +INFO_TABLE(stg_unpack_cstring, 0, 0, THUNK_STATIC, "stg_unpack_cstring", "stg_unpack_cstring") + (P_ node) +{ + W_ newCAF_ret; + (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr"); + + if (newCAF_ret == 0) { + // We raced with another thread to evaluate the CAF and they won; + // `node` should now be an indirection. + ENTER(node); + } else { + jump stg_do_unpack_cstring(node, newCAF_ret); + } +} + +stg_do_unpack_cstring_utf8(P_ node, P_ newCAF_ret) { + STK_CHK_PP(WDS(SIZEOF_StgUpdateFrame), stg_do_unpack_cstring_utf8, node, newCAF_ret); + W_ str; + str = StgThunk_payload(node, 2); + push (UPDATE_FRAME_FIELDS(,,stg_bh_upd_frame_info, CCCS, 0, newCAF_ret)) { + jump %ENTRY_CODE(ghczmprim_GHCziCString_unpackCStringUtf8zh_info)(node, str); + } +} + +INFO_TABLE(stg_unpack_cstring_utf8, 0, 0, THUNK_STATIC, "stg_unpack_cstring_utf8", "stg_unpack_cstring_utf8") + (P_ node) +{ + W_ newCAF_ret; + (newCAF_ret) = ccall newCAF(BaseReg "ptr", node "ptr"); + + if (newCAF_ret == 0) { + // We raced with another thread to evaluate the CAF and they won; + // `node` should now be an indirection. + ENTER(node); + } else { + jump stg_do_unpack_cstring_utf8(node, newCAF_ret); + } +} + diff --git a/rts/include/stg/MiscClosures.h b/rts/include/stg/MiscClosures.h index e6b4957e17..257d59a607 100644 --- a/rts/include/stg/MiscClosures.h +++ b/rts/include/stg/MiscClosures.h @@ -318,6 +318,10 @@ RTS_THUNK(stg_ap_5_upd); RTS_THUNK(stg_ap_6_upd); RTS_THUNK(stg_ap_7_upd); +// Standard entry for `unpackCString# str` thunks +RTS_ENTRY(stg_unpack_cstring); +RTS_ENTRY(stg_unpack_cstring_utf8); + /* standard application routines (see also utils/genapply, * and GHC.StgToCmm.ArgRep). */ |
