summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKeyhan Vakil <kvakil@sylph.kvakil.me>2023-05-14 21:37:24 -0700
committerGitHub <noreply@github.com>2023-05-15 04:37:24 +0000
commit3ef17b635653d0f862a093a50d611b18577348c2 (patch)
tree52650a45cc075f25c51236823b76d010e8d85abb
parentb270984d2a87b77555e3edf78413862e4dc6ec64 (diff)
downloadnode-new-3ef17b635653d0f862a093a50d611b18577348c2.tar.gz
src: stop copying code cache, part 2
This removes more copies of the code cache data. First: for the builtin snapshot, we were copying the code cache to create a `std::vector<uint8_t>`. This was slowing down static intialization. Change it to use a good old `uint8_t*` and `size_t` rather than a vector. For the case of embedder provided snapshots, we also add an `owning_ptr` so that we can properly cleanup owned values created from the snapshot. Second: whenever the code cache was hit, we would remove the bytecode from the code cache, and then reserialize it from the compiled function. This was pretty slow. Change the code so that we can reuse the same code cache multiple times. If the code cache is rejected (say, because the user added V8 options), then we need to generate the bytecode, in which case we again use `owning_ptr` to ensure that the underlying code cache is freed. Combined, these changes improve the misc/startup.js benchmarks significantly (p < 0.001): * process,benchmark/fixtures/require-builtins: 22.15% * process,test/fixtures/semicolon: 8.55% * worker,benchmark/fixtures/require-builtins: 26.52% * worker,test/fixtures/semicolon: 21.52% PR-URL: https://github.com/nodejs/node/pull/47958 Reviewed-By: Joyee Cheung <joyeec9h3@gmail.com> Reviewed-By: Minwoo Jung <nodecorelab@gmail.com>
-rw-r--r--src/node_builtins.cc55
-rw-r--r--src/node_builtins.h49
-rw-r--r--src/node_snapshotable.cc28
3 files changed, 81 insertions, 51 deletions
diff --git a/src/node_builtins.cc b/src/node_builtins.cc
index 9e9cf6948b..fb4d53e0ce 100644
--- a/src/node_builtins.cc
+++ b/src/node_builtins.cc
@@ -154,17 +154,6 @@ BuiltinLoader::BuiltinCategories BuiltinLoader::GetBuiltinCategories() const {
return builtin_categories;
}
-const ScriptCompiler::CachedData* BuiltinLoader::GetCodeCache(
- const char* id) const {
- RwLock::ScopedReadLock lock(code_cache_->mutex);
- const auto it = code_cache_->map.find(id);
- if (it == code_cache_->map.end()) {
- // The module has not been compiled before.
- return nullptr;
- }
- return it->second.get();
-}
-
#ifdef NODE_BUILTIN_MODULES_PATH
static std::string OnDiskFileName(const char* id) {
std::string filename = NODE_BUILTIN_MODULES_PATH;
@@ -276,7 +265,7 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
OneByteString(isolate, filename_s.c_str(), filename_s.size());
ScriptOrigin origin(isolate, filename, 0, 0, true);
- ScriptCompiler::CachedData* cached_data = nullptr;
+ BuiltinCodeCacheData cached_data{};
{
// Note: The lock here should not extend into the
// `CompileFunction()` call below, because this function may recurse if
@@ -286,16 +275,18 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
auto cache_it = code_cache_->map.find(id);
if (cache_it != code_cache_->map.end()) {
// Transfer ownership to ScriptCompiler::Source later.
- cached_data = cache_it->second.release();
- code_cache_->map.erase(cache_it);
+ cached_data = cache_it->second;
}
}
- const bool has_cache = cached_data != nullptr;
+ const bool has_cache = cached_data.data != nullptr;
ScriptCompiler::CompileOptions options =
has_cache ? ScriptCompiler::kConsumeCodeCache
: ScriptCompiler::kEagerCompile;
- ScriptCompiler::Source script_source(source, origin, cached_data);
+ ScriptCompiler::Source script_source(
+ source,
+ origin,
+ has_cache ? cached_data.AsCachedData().release() : nullptr);
per_process::Debug(DebugCategory::CODE_CACHE,
"Compiling %s %s code cache\n",
@@ -342,14 +333,19 @@ MaybeLocal<Function> BuiltinLoader::LookupAndCompileInternal(
: "is accepted");
}
- // Generate new cache for next compilation
- std::unique_ptr<ScriptCompiler::CachedData> new_cached_data(
- ScriptCompiler::CreateCodeCacheForFunction(fun));
- CHECK_NOT_NULL(new_cached_data);
-
- {
- RwLock::ScopedLock lock(code_cache_->mutex);
- code_cache_->map[id] = std::move(new_cached_data);
+ if (*result == Result::kWithoutCache) {
+ // We failed to accept this cache, maybe because it was rejected, maybe
+ // because it wasn't present. Either way, we'll attempt to replace this
+ // code cache info with a new one.
+ std::shared_ptr<ScriptCompiler::CachedData> new_cached_data(
+ ScriptCompiler::CreateCodeCacheForFunction(fun));
+ CHECK_NOT_NULL(new_cached_data);
+
+ {
+ RwLock::ScopedLock lock(code_cache_->mutex);
+ code_cache_->map.insert_or_assign(
+ id, BuiltinCodeCacheData(std::move(new_cached_data)));
+ }
}
return scope.Escape(fun);
@@ -499,9 +495,7 @@ bool BuiltinLoader::CompileAllBuiltins(Local<Context> context) {
void BuiltinLoader::CopyCodeCache(std::vector<CodeCacheInfo>* out) const {
RwLock::ScopedReadLock lock(code_cache_->mutex);
for (auto const& item : code_cache_->map) {
- out->push_back(
- {item.first,
- {item.second->data, item.second->data + item.second->length}});
+ out->push_back({item.first, item.second});
}
}
@@ -510,12 +504,7 @@ void BuiltinLoader::RefreshCodeCache(const std::vector<CodeCacheInfo>& in) {
code_cache_->map.reserve(in.size());
DCHECK(code_cache_->map.empty());
for (auto const& item : in) {
- auto result = code_cache_->map.emplace(
- item.id,
- std::make_unique<v8::ScriptCompiler::CachedData>(
- item.data.data(),
- item.data.size(),
- v8::ScriptCompiler::CachedData::BufferNotOwned));
+ auto result = code_cache_->map.emplace(item.id, item.data);
USE(result.second);
DCHECK(result.second);
}
diff --git a/src/node_builtins.h b/src/node_builtins.h
index 2dd3ee8b8c..11af941780 100644
--- a/src/node_builtins.h
+++ b/src/node_builtins.h
@@ -26,20 +26,55 @@ class Realm;
namespace builtins {
+class BuiltinCodeCacheData {
+ public:
+ BuiltinCodeCacheData() : data(nullptr), length(0), owning_ptr(nullptr) {}
+
+ explicit BuiltinCodeCacheData(
+ std::shared_ptr<v8::ScriptCompiler::CachedData> cached_data)
+ : data(cached_data->data),
+ length(cached_data->length),
+ owning_ptr(cached_data) {}
+
+ explicit BuiltinCodeCacheData(
+ std::shared_ptr<std::vector<uint8_t>> cached_data)
+ : data(cached_data->data()),
+ length(cached_data->size()),
+ owning_ptr(cached_data) {}
+
+ BuiltinCodeCacheData(const uint8_t* data, size_t length)
+ : data(data), length(length), owning_ptr(nullptr) {}
+
+ const uint8_t* data;
+ size_t length;
+
+ // Returns a v8::ScriptCompiler::CachedData corresponding to this
+ // BuiltinCodeCacheData. The lifetime of the returned
+ // v8::ScriptCompiler::CachedData must not outlive that of the data.
+ std::unique_ptr<v8::ScriptCompiler::CachedData> AsCachedData() {
+ return std::make_unique<v8::ScriptCompiler::CachedData>(
+ data, length, v8::ScriptCompiler::CachedData::BufferNotOwned);
+ }
+
+ private:
+ // If not null, represents a pointer which owns data. Otherwise indicates
+ // that data has static lifetime.
+ std::shared_ptr<void> owning_ptr;
+};
+
+struct CodeCacheInfo {
+ std::string id;
+ BuiltinCodeCacheData data;
+};
+
using BuiltinSourceMap = std::map<std::string, UnionBytes>;
using BuiltinCodeCacheMap =
- std::unordered_map<std::string,
- std::unique_ptr<v8::ScriptCompiler::CachedData>>;
+ std::unordered_map<std::string, BuiltinCodeCacheData>;
// Generated by tools/js2c.py as node_javascript.cc
void RegisterExternalReferencesForInternalizedBuiltinCode(
ExternalReferenceRegistry* registry);
-struct CodeCacheInfo {
- std::string id;
- std::vector<uint8_t> data;
-};
-
// Handles compilation and caching of built-in JavaScript modules and
// bootstrap scripts, whose source are bundled into the binary as static data.
class NODE_EXTERN_PRIVATE BuiltinLoader {
diff --git a/src/node_snapshotable.cc b/src/node_snapshotable.cc
index b656332f8c..0db70f5ce0 100644
--- a/src/node_snapshotable.cc
+++ b/src/node_snapshotable.cc
@@ -53,7 +53,7 @@ const uint32_t SnapshotData::kMagic;
std::ostream& operator<<(std::ostream& output,
const builtins::CodeCacheInfo& info) {
output << "<builtins::CodeCacheInfo id=" << info.id
- << ", size=" << info.data.size() << ">\n";
+ << ", length=" << info.data.length << ">\n";
return output;
}
@@ -207,7 +207,11 @@ template <>
builtins::CodeCacheInfo SnapshotDeserializer::Read() {
Debug("Read<builtins::CodeCacheInfo>()\n");
- builtins::CodeCacheInfo result{ReadString(), ReadVector<uint8_t>()};
+ std::string id = ReadString();
+ auto owning_ptr =
+ std::make_shared<std::vector<uint8_t>>(ReadVector<uint8_t>());
+ builtins::BuiltinCodeCacheData code_cache_data{std::move(owning_ptr)};
+ builtins::CodeCacheInfo result{id, code_cache_data};
if (is_debug) {
std::string str = ToStr(result);
@@ -217,14 +221,16 @@ builtins::CodeCacheInfo SnapshotDeserializer::Read() {
}
template <>
-size_t SnapshotSerializer::Write(const builtins::CodeCacheInfo& data) {
+size_t SnapshotSerializer::Write(const builtins::CodeCacheInfo& info) {
Debug("\nWrite<builtins::CodeCacheInfo>() id = %s"
- ", size=%d\n",
- data.id.c_str(),
- data.data.size());
+ ", length=%d\n",
+ info.id.c_str(),
+ info.data.length);
- size_t written_total = WriteString(data.id);
- written_total += WriteVector<uint8_t>(data.data);
+ size_t written_total = WriteString(info.id);
+
+ written_total += WriteArithmetic<size_t>(info.data.length);
+ written_total += WriteArithmetic(info.data.data, info.data.length);
Debug("Write<builtins::CodeCacheInfo>() wrote %d bytes\n", written_total);
return written_total;
@@ -734,7 +740,7 @@ static std::string FormatSize(size_t size) {
static void WriteStaticCodeCacheData(std::ostream* ss,
const builtins::CodeCacheInfo& info) {
*ss << "static const uint8_t " << GetCodeCacheDefName(info.id) << "[] = {\n";
- WriteVector(ss, info.data.data(), info.data.size());
+ WriteVector(ss, info.data.data, info.data.length);
*ss << "};";
}
@@ -742,7 +748,7 @@ static void WriteCodeCacheInitializer(std::ostream* ss, const std::string& id) {
std::string def_name = GetCodeCacheDefName(id);
*ss << " { \"" << id << "\",\n";
*ss << " {" << def_name << ",\n";
- *ss << " " << def_name << " + arraysize(" << def_name << "),\n";
+ *ss << " arraysize(" << def_name << "),\n";
*ss << " }\n";
*ss << " },\n";
}
@@ -954,7 +960,7 @@ ExitCode SnapshotBuilder::CreateSnapshot(SnapshotData* out,
}
env->builtin_loader()->CopyCodeCache(&(out->code_cache));
for (const auto& item : out->code_cache) {
- std::string size_str = FormatSize(item.data.size());
+ std::string size_str = FormatSize(item.data.length);
per_process::Debug(DebugCategory::MKSNAPSHOT,
"Generated code cache for %d: %s\n",
item.id.c_str(),