diff options
author | Keyhan Vakil <kvakil@sylph.kvakil.me> | 2023-05-14 21:37:24 -0700 |
---|---|---|
committer | GitHub <noreply@github.com> | 2023-05-15 04:37:24 +0000 |
commit | 3ef17b635653d0f862a093a50d611b18577348c2 (patch) | |
tree | 52650a45cc075f25c51236823b76d010e8d85abb | |
parent | b270984d2a87b77555e3edf78413862e4dc6ec64 (diff) | |
download | node-new-3ef17b635653d0f862a093a50d611b18577348c2.tar.gz |
src: stop copying code cache, part 2
This removes more copies of the code cache data.
First: for the builtin snapshot, we were copying the code cache to
create a `std::vector<uint8_t>`. This was slowing down static
intialization. Change it to use a good old `uint8_t*` and `size_t`
rather than a vector. For the case of embedder provided snapshots, we
also add an `owning_ptr` so that we can properly cleanup owned values
created from the snapshot.
Second: whenever the code cache was hit, we would remove the bytecode
from the code cache, and then reserialize it from the compiled function.
This was pretty slow. Change the code so that we can reuse the same code
cache multiple times. If the code cache is rejected (say, because the
user added V8 options), then we need to generate the bytecode, in which
case we again use `owning_ptr` to ensure that the underlying code cache
is freed.
Combined, these changes improve the misc/startup.js benchmarks
significantly (p < 0.001):
* process,benchmark/fixtures/require-builtins: 22.15%
* process,test/fixtures/semicolon: 8.55%
* worker,benchmark/fixtures/require-builtins: 26.52%
* worker,test/fixtures/semicolon: 21.52%
PR-URL: https://github.com/nodejs/node/pull/47958
Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com>
Reviewed-By: Minwoo Jung <nodecorelab@gmail.com>
-rw-r--r-- | src/node_builtins.cc | 55 | ||||
-rw-r--r-- | src/node_builtins.h | 49 | ||||
-rw-r--r-- | src/node_snapshotable.cc | 28 |
3 files changed, 81 insertions, 51 deletions
diff --git a/src/node_builtins.cc b/src/node_builtins.cc index 9e9cf6948b..fb4d53e0ce 100644 --- a/src/node_builtins.cc +++ b/src/node_builtins.cc @@ -154,17 +154,6 @@ BuiltinLoader::BuiltinCategories BuiltinLoader::GetBuiltinCategories() const { return builtin_categories; } -const ScriptCompiler::CachedData* BuiltinLoader::GetCodeCache( - const char* id) const { - RwLock::ScopedReadLock lock(code_cache_->mutex); - const auto it = code_cache_->map.find(id); - if (it == code_cache_->map.end()) { - // The module has not been compiled before. - return nullptr; - } - return it->second.get(); -} - #ifdef NODE_BUILTIN_MODULES_PATH static std::string OnDiskFileName(const char* id) { std::string filename = NODE_BUILTIN_MODULES_PATH; @@ -276,7 +265,7 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal( OneByteString(isolate, filename_s.c_str(), filename_s.size()); ScriptOrigin origin(isolate, filename, 0, 0, true); - ScriptCompiler::CachedData* cached_data = nullptr; + BuiltinCodeCacheData cached_data{}; { // Note: The lock here should not extend into the // `CompileFunction()` call below, because this function may recurse if @@ -286,16 +275,18 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal( auto cache_it = code_cache_->map.find(id); if (cache_it != code_cache_->map.end()) { // Transfer ownership to ScriptCompiler::Source later. - cached_data = cache_it->second.release(); - code_cache_->map.erase(cache_it); + cached_data = cache_it->second; } } - const bool has_cache = cached_data != nullptr; + const bool has_cache = cached_data.data != nullptr; ScriptCompiler::CompileOptions options = has_cache ? ScriptCompiler::kConsumeCodeCache : ScriptCompiler::kEagerCompile; - ScriptCompiler::Source script_source(source, origin, cached_data); + ScriptCompiler::Source script_source( + source, + origin, + has_cache ? cached_data.AsCachedData().release() : nullptr); per_process::Debug(DebugCategory::CODE_CACHE, "Compiling %s %s code cache\n", @@ -342,14 +333,19 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal( : "is accepted"); } - // Generate new cache for next compilation - std::unique_ptr<ScriptCompiler::CachedData> new_cached_data( - ScriptCompiler::CreateCodeCacheForFunction(fun)); - CHECK_NOT_NULL(new_cached_data); - - { - RwLock::ScopedLock lock(code_cache_->mutex); - code_cache_->map[id] = std::move(new_cached_data); + if (*result == Result::kWithoutCache) { + // We failed to accept this cache, maybe because it was rejected, maybe + // because it wasn't present. Either way, we'll attempt to replace this + // code cache info with a new one. + std::shared_ptr<ScriptCompiler::CachedData> new_cached_data( + ScriptCompiler::CreateCodeCacheForFunction(fun)); + CHECK_NOT_NULL(new_cached_data); + + { + RwLock::ScopedLock lock(code_cache_->mutex); + code_cache_->map.insert_or_assign( + id, BuiltinCodeCacheData(std::move(new_cached_data))); + } } return scope.Escape(fun); @@ -499,9 +495,7 @@ bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) { void BuiltinLoader::CopyCodeCache(std::vector<CodeCacheInfo>* out) const { RwLock::ScopedReadLock lock(code_cache_->mutex); for (auto const& item : code_cache_->map) { - out->push_back( - {item.first, - {item.second->data, item.second->data + item.second->length}}); + out->push_back({item.first, item.second}); } } @@ -510,12 +504,7 @@ void BuiltinLoader::RefreshCodeCache(const std::vector<CodeCacheInfo>& in) { code_cache_->map.reserve(in.size()); DCHECK(code_cache_->map.empty()); for (auto const& item : in) { - auto result = code_cache_->map.emplace( - item.id, - std::make_unique<v8::ScriptCompiler::CachedData>( - item.data.data(), - item.data.size(), - v8::ScriptCompiler::CachedData::BufferNotOwned)); + auto result = code_cache_->map.emplace(item.id, item.data); USE(result.second); DCHECK(result.second); } diff --git a/src/node_builtins.h b/src/node_builtins.h index 2dd3ee8b8c..11af941780 100644 --- a/src/node_builtins.h +++ b/src/node_builtins.h @@ -26,20 +26,55 @@ class Realm; namespace builtins { +class BuiltinCodeCacheData { + public: + BuiltinCodeCacheData() : data(nullptr), length(0), owning_ptr(nullptr) {} + + explicit BuiltinCodeCacheData( + std::shared_ptr<v8::ScriptCompiler::CachedData> cached_data) + : data(cached_data->data), + length(cached_data->length), + owning_ptr(cached_data) {} + + explicit BuiltinCodeCacheData( + std::shared_ptr<std::vector<uint8_t>> cached_data) + : data(cached_data->data()), + length(cached_data->size()), + owning_ptr(cached_data) {} + + BuiltinCodeCacheData(const uint8_t* data, size_t length) + : data(data), length(length), owning_ptr(nullptr) {} + + const uint8_t* data; + size_t length; + + // Returns a v8::ScriptCompiler::CachedData corresponding to this + // BuiltinCodeCacheData. The lifetime of the returned + // v8::ScriptCompiler::CachedData must not outlive that of the data. + std::unique_ptr<v8::ScriptCompiler::CachedData> AsCachedData() { + return std::make_unique<v8::ScriptCompiler::CachedData>( + data, length, v8::ScriptCompiler::CachedData::BufferNotOwned); + } + + private: + // If not null, represents a pointer which owns data. Otherwise indicates + // that data has static lifetime. + std::shared_ptr<void> owning_ptr; +}; + +struct CodeCacheInfo { + std::string id; + BuiltinCodeCacheData data; +}; + using BuiltinSourceMap = std::map<std::string, UnionBytes>; using BuiltinCodeCacheMap = - std::unordered_map<std::string, - std::unique_ptr<v8::ScriptCompiler::CachedData>>; + std::unordered_map<std::string, BuiltinCodeCacheData>; // Generated by tools/js2c.py as node_javascript.cc void RegisterExternalReferencesForInternalizedBuiltinCode( ExternalReferenceRegistry* registry); -struct CodeCacheInfo { - std::string id; - std::vector<uint8_t> data; -}; - // Handles compilation and caching of built-in JavaScript modules and // bootstrap scripts, whose source are bundled into the binary as static data. class NODE_EXTERN_PRIVATE BuiltinLoader { diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc index b656332f8c..0db70f5ce0 100644 --- a/src/node_snapshotable.cc +++ b/src/node_snapshotable.cc @@ -53,7 +53,7 @@ const uint32_t SnapshotData::kMagic; std::ostream& operator<<(std::ostream& output, const builtins::CodeCacheInfo& info) { output << "<builtins::CodeCacheInfo id=" << info.id - << ", size=" << info.data.size() << ">\n"; + << ", length=" << info.data.length << ">\n"; return output; } @@ -207,7 +207,11 @@ template <> builtins::CodeCacheInfo SnapshotDeserializer::Read() { Debug("Read<builtins::CodeCacheInfo>()\n"); - builtins::CodeCacheInfo result{ReadString(), ReadVector<uint8_t>()}; + std::string id = ReadString(); + auto owning_ptr = + std::make_shared<std::vector<uint8_t>>(ReadVector<uint8_t>()); + builtins::BuiltinCodeCacheData code_cache_data{std::move(owning_ptr)}; + builtins::CodeCacheInfo result{id, code_cache_data}; if (is_debug) { std::string str = ToStr(result); @@ -217,14 +221,16 @@ builtins::CodeCacheInfo SnapshotDeserializer::Read() { } template <> -size_t SnapshotSerializer::Write(const builtins::CodeCacheInfo& data) { +size_t SnapshotSerializer::Write(const builtins::CodeCacheInfo& info) { Debug("\nWrite<builtins::CodeCacheInfo>() id = %s" - ", size=%d\n", - data.id.c_str(), - data.data.size()); + ", length=%d\n", + info.id.c_str(), + info.data.length); - size_t written_total = WriteString(data.id); - written_total += WriteVector<uint8_t>(data.data); + size_t written_total = WriteString(info.id); + + written_total += WriteArithmetic<size_t>(info.data.length); + written_total += WriteArithmetic(info.data.data, info.data.length); Debug("Write<builtins::CodeCacheInfo>() wrote %d bytes\n", written_total); return written_total; @@ -734,7 +740,7 @@ static std::string FormatSize(size_t size) { static void WriteStaticCodeCacheData(std::ostream* ss, const builtins::CodeCacheInfo& info) { *ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n"; - WriteVector(ss, info.data.data(), info.data.size()); + WriteVector(ss, info.data.data, info.data.length); *ss << "};"; } @@ -742,7 +748,7 @@ static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) { std::string def_name = GetCodeCacheDefName(id); *ss << " { \"" << id << "\",\n"; *ss << " {" << def_name << ",\n"; - *ss << " " << def_name << " + arraysize(" << def_name << "),\n"; + *ss << " arraysize(" << def_name << "),\n"; *ss << " }\n"; *ss << " },\n"; } @@ -954,7 +960,7 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out, } env->builtin_loader()->CopyCodeCache(&(out->code_cache)); for (const auto& item : out->code_cache) { - std::string size_str = FormatSize(item.data.size()); + std::string size_str = FormatSize(item.data.length); per_process::Debug(DebugCategory::MKSNAPSHOT, "Generated code cache for %d: %s\n", item.id.c_str(), |