/* * Copyright (C) 2016 Apple Inc. All rights reserved. * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: * 1. Redistributions of source code must retain the above copyright * notice, this list of conditions and the following disclaimer. * 2. Redistributions in binary form must reproduce the above copyright * notice, this list of conditions and the following disclaimer in the * documentation and/or other materials provided with the distribution. * * THIS SOFTWARE IS PROVIDED BY APPLE INC. AND ITS CONTRIBUTORS ``AS IS'' * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR ITS CONTRIBUTORS * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF * THE POSSIBILITY OF SUCH DAMAGE. */ #include "config.h" #include "NetworkCaptureManager.h" #if ENABLE(NETWORK_CAPTURE) #include "NetworkCaptureLogging.h" #include "NetworkCaptureResource.h" #include "WebCore/ResourceRequest.h" #include "WebCore/URL.h" #include #include #include #include #include #include #define DEBUG_CLASS Manager namespace WebKit { namespace NetworkCapture { static const char* kDirNameRecordReplay = "WebKitPerf/record_replay"; static const char* kDirNameResources = "resources"; static const char* kFileNameReportLoad = "report_load.txt"; static const char* kFileNameReportRecord = "report_record.txt"; static const char* kFileNameReportReplay = "report_replay.txt"; static int kMaxMatch = std::numeric_limits::max(); static int kMinMatch = std::numeric_limits::min(); Manager& Manager::singleton() { static NeverDestroyed instance; return instance; } void Manager::initialize(const String& recordReplayMode, const String& recordReplayCacheLocation) { if (equalIgnoringASCIICase(recordReplayMode, "record")) { DEBUG_LOG("Initializing: recording mode"); m_recordReplayMode = Record; } else if (equalIgnoringASCIICase(recordReplayMode, "replay")) { DEBUG_LOG("Initializing: replay mode"); m_recordReplayMode = Replay; } else { DEBUG_LOG("Initializing: disabled"); m_recordReplayMode = Disabled; } m_recordReplayCacheLocation = WebCore::pathByAppendingComponent(recordReplayCacheLocation, kDirNameRecordReplay); DEBUG_LOG("Cache location = " STRING_SPECIFIER, DEBUG_STR(m_recordReplayCacheLocation)); if (isRecording()) { m_recordFileHandle = WebCore::FileHandle(reportRecordPath(), WebCore::OpenForWrite); } else if (isReplaying()) { m_recordFileHandle = WebCore::FileHandle(reportRecordPath(), WebCore::OpenForRead); m_loadFileHandle = WebCore::FileHandle(reportLoadPath(), WebCore::OpenForWrite); m_replayFileHandle = WebCore::FileHandle(reportReplayPath(), WebCore::OpenForWrite); loadResources(); } } void Manager::terminate() { m_loadFileHandle.close(); m_recordFileHandle.close(); m_replayFileHandle.close(); } Resource* Manager::findMatch(const WebCore::ResourceRequest& request) { DEBUG_LOG_VERBOSE("URL = " STRING_SPECIFIER, DEBUG_STR(request.url().string())); auto bestMatch = findExactMatch(request); if (!bestMatch) bestMatch = findBestFuzzyMatch(request); #if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING) if (!bestMatch) DEBUG_LOG("Could not find match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string())); else if (request.url() == bestMatch->url()) DEBUG_LOG("Found exact match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string())); else { DEBUG_LOG("Found fuzzy match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string())); DEBUG_LOG(" replaced with : " STRING_SPECIFIER, DEBUG_STR(bestMatch->url().string())); } #endif return bestMatch; } Resource* Manager::findExactMatch(const WebCore::ResourceRequest& request) { const auto& url = request.url(); auto lower = std::lower_bound(std::begin(m_cachedResources), std::end(m_cachedResources), url, [](auto& resource, const auto& url) { return WTF::codePointCompareLessThan(resource.url().string(), url.string()); }); if (lower != std::end(m_cachedResources) && lower->url() == url) { DEBUG_LOG_VERBOSE("Found exact match: " STRING_SPECIFIER, DEBUG_STR(lower->url().string())); return &*lower; } return nullptr; } Resource* Manager::findBestFuzzyMatch(const WebCore::ResourceRequest& request) { const auto& url = request.url(); const auto& urlIdentifyingCommonDomain = Manager::urlIdentifyingCommonDomain(url); const auto& lower = std::lower_bound(std::begin(m_cachedResources), std::end(m_cachedResources), urlIdentifyingCommonDomain, [](auto& resource, const auto& urlIdentifyingCommonDomain) { return WTF::codePointCompareLessThan(resource.urlIdentifyingCommonDomain(), urlIdentifyingCommonDomain); }); const auto& upper = std::upper_bound(lower, std::end(m_cachedResources), urlIdentifyingCommonDomain, [](const auto& urlIdentifyingCommonDomain, auto& resource) { return WTF::codePointCompareLessThan(urlIdentifyingCommonDomain, resource.urlIdentifyingCommonDomain()); }); Resource* bestMatch = nullptr; int bestScore = kMinMatch; const auto& requestParameters = WebCore::URLParser::parseURLEncodedForm(url.query()); for (auto iResource = lower; iResource != upper; ++iResource) { int thisScore = fuzzyMatchURLs(url, requestParameters, iResource->url(), iResource->queryParameters()); // TODO: Consider ignoring any matches < 0 as being too different. if (bestScore < thisScore) { DEBUG_LOG("New best match (%d): " STRING_SPECIFIER, thisScore, DEBUG_STR(iResource->url().string())); bestScore = thisScore; bestMatch = &*iResource; if (bestScore == kMaxMatch) break; } } return bestMatch; } // TODO: Convert to an interface based on ResourceRequest so that we can do // deeper matching. int Manager::fuzzyMatchURLs(const WebCore::URL& requestURL, const WebCore::URLParser::URLEncodedForm& requestParameters, const WebCore::URL& resourceURL, const WebCore::URLParser::URLEncodedForm& resourceParameters) { // TODO: consider requiring that any trailing suffixes (e.g., ".js", // ".png", ".css", ".html", etc.) should be an exact match. // We do fuzzy matching on the path and query parameters. So let's first // make sure that all the other parts are equal. // If scheme, host, and port don't all match, return this as the "worst" // match. if (!protocolHostAndPortAreEqual(requestURL, resourceURL)) { DEBUG_LOG("Scheme/host/port mismatch: " STRING_SPECIFIER " != " STRING_SPECIFIER, DEBUG_STR(requestURL.string()), DEBUG_STR(resourceURL.string())); return kMinMatch; } // If fragments don't match, return this as the "worst" match. if (requestURL.fragmentIdentifier() != resourceURL.fragmentIdentifier()) { DEBUG_LOG("Fragments mismatch: " STRING_SPECIFIER " != " STRING_SPECIFIER, DEBUG_STR(requestURL.string()), DEBUG_STR(resourceURL.string())); return kMinMatch; } DEBUG_LOG("Fuzzy matching:"); DEBUG_LOG(" : " STRING_SPECIFIER, DEBUG_STR(requestURL.string())); DEBUG_LOG(" : " STRING_SPECIFIER, DEBUG_STR(resourceURL.string())); // Compare the path components and the query parameters. Score each partial // match as +4, each mismatch as -1, and each missing component as -1. // // Note that at the current time these values are rather arbitrary and // could fine-tuned. const int kPathMatchScore = 4; const int kPathMismatchScore = -1; const int kPathMissingScore = -1; const int kParameterMatchScore = 4; const int kParameterMismatchScore = -1; const int kParameterMissingScore = -1; int score = 0; // Quantize the differences in URL paths. // // The approach here is to increase our score for each matching path // component, and to subtract for each differing component as well as for // components that exist in one path but not the other. const auto& requestPath = requestURL.path(); const auto& resourcePath = resourceURL.path(); Vector requestPathComponents, resourcePathComponents; requestPath.split('/', requestPathComponents); resourcePath.split('/', resourcePathComponents); auto updatedIterators = std::mismatch( std::begin(requestPathComponents), std::end(requestPathComponents), std::begin(resourcePathComponents), std::end(resourcePathComponents)); auto matchingDistance = std::distance(std::begin(requestPathComponents), updatedIterators.first); auto requestPathMismatchDistance = std::distance(updatedIterators.first, std::end(requestPathComponents)); auto resourcePathMismatchDistance = std::distance(updatedIterators.second, std::end(resourcePathComponents)); decltype(matchingDistance) mismatchingDistance; decltype(matchingDistance) missingDistance; if (requestPathMismatchDistance < resourcePathMismatchDistance) { mismatchingDistance = requestPathMismatchDistance; missingDistance = resourcePathMismatchDistance - requestPathMismatchDistance; } else { mismatchingDistance = resourcePathMismatchDistance; missingDistance = requestPathMismatchDistance - resourcePathMismatchDistance; } DEBUG_LOG("Path matching results: matching = %d, mismatching = %d, missing = %d", static_cast(matchingDistance), static_cast(mismatchingDistance), static_cast(missingDistance)); score += matchingDistance * kPathMatchScore + mismatchingDistance * kPathMismatchScore + missingDistance * kPathMissingScore; DEBUG_LOG("Score = %d", score); // Quantize the differences in query parameters. // // The approach here is to walk lock-step over the two sets of query // parameters. For each pair of parameters for each URL, we compare their // names and values. If the names and values match, we add a high score. If // just the names match, we add a lower score. // // If the names don't match, we then assume that some intervening query // parameters have been added to one or the other URL. We therefore try to // sync up the iterators used to traverse the query parameter collections // so that they're again pointing to parameters with the same names. We // first start scanning forward down the query parameters for one URL, // looking for one with the same name as the one we're on in the other URL. // If that doesn't turn up a match, we reverse the roles of the query // parameters perform the same process of scanning forward. If neither of // these scans produces a match, we figure that each query parameter we're // looking at from each of the query parameter collections is unique. We // deduct points from the overall score and move on to the next query // parameters in each set. // // If, on the other hand, the forward-scanning does turn up a match, we // adjust out iterators so that they're now again pointing to query // parameters with the same name. This synchronization involves skipping // over any intervening query parameters in one collection or the other. // The assumption here is that these intervening query parameters are // insertions that exist in one URL but not the other. We treat them as // such, subtracting from the overall score for each one. However, this // assumption might easily be incorrect. It might be that the query // parameters that we're skipping over in one URL might exist in the other // URL. If so, then we are foregoing the possibility of using those matches // to increase the overall match score between the two URLs. // // To address this problem, we might want to consider sorting the query // parameters by their names. However, doing this may cause problems if the // order of the parameters is significant. So if we decide to take the // approach of sorting the parameters, keep in mind this possible drawback. auto requestParameter = std::begin(requestParameters); auto resourceParameter = std::begin(resourceParameters); for (; requestParameter != std::end(requestParameters) && resourceParameter != std::end(resourceParameters); ++requestParameter, ++resourceParameter) { if (requestParameter->key == resourceParameter->key) { #if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING) if (requestParameter->value == resourceParameter->value) DEBUG_LOG("Matching parameter names and values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\"", DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second)); else DEBUG_LOG("Mismatching parameter values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\" vs. \"" STRING_SPECIFIER "\"", DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second), DEBUG_STR(resourceParameter->second)); #endif score += (requestParameter->value == resourceParameter->value) ? kParameterMatchScore : kParameterMismatchScore; DEBUG_LOG("Score = %d", score); } else { DEBUG_LOG("Mismatching parameter names: " STRING_SPECIFIER ", " STRING_SPECIFIER, DEBUG_STR(requestParameter->first), DEBUG_STR(resourceParameter->first)); const auto scanForwardForMatch = [this, &score, kParameterMatchScore, kParameterMismatchScore, kParameterMissingScore](const auto& fixedIter, auto& scanningIter, const auto& scannerEnd) { auto scanner = scanningIter; while (scanner != scannerEnd && scanner->key != fixedIter->key) ++scanner; if (scanner == scannerEnd) return false; DEBUG_LOG("Skipping past %d non-matching parameter names", static_cast(std::distance(scanningIter, scanner))); score += kParameterMissingScore * std::distance(scanningIter, scanner); DEBUG_LOG("Score = %d", score); #if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING) if (fixedIter->second == scanner->second) DEBUG_LOG("Matching parameter names and values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\"", DEBUG_STR(fixedIter->first), DEBUG_STR(fixedIter->second)); else DEBUG_LOG("Mismatching parameter values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\" vs. \"" STRING_SPECIFIER "\"", DEBUG_STR(fixedIter->first), DEBUG_STR(fixedIter->second), DEBUG_STR(scanner->second)); #endif score += (fixedIter->value == scanner->value) ? kParameterMatchScore : kParameterMismatchScore; DEBUG_LOG("Score = %d", score); scanningIter = scanner; return true; }; if (!scanForwardForMatch(requestParameter, resourceParameter, std::end(resourceParameters))) { if (!scanForwardForMatch(resourceParameter, requestParameter, std::end(requestParameters))) { DEBUG_LOG("Unmatched parameter: " STRING_SPECIFIER "=" STRING_SPECIFIER, DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second)); DEBUG_LOG("Unmatched parameter: " STRING_SPECIFIER "=" STRING_SPECIFIER, DEBUG_STR(resourceParameter->first), DEBUG_STR(resourceParameter->second)); score += kParameterMissingScore + kParameterMissingScore; DEBUG_LOG("Score = %d", score); } } } } DEBUG_LOG("Adjusting for trailing parameters"); score += kParameterMissingScore * (std::distance(requestParameter, std::end(requestParameters)) + std::distance(resourceParameter, std::end(resourceParameters))); DEBUG_LOG("Score = %d", score); return score; } void Manager::loadResources() { auto lines = readFile(reportRecordPath()); if (!lines) return; for (const auto& line : *lines) { if (line.size() != 2) { DEBUG_LOG_ERROR("line.size == %d", (int) line.size()); continue; } Resource newResource(hashToPath(line[0])); m_cachedResources.append(WTFMove(newResource)); } std::sort(std::begin(m_cachedResources), std::end(m_cachedResources), [](auto& left, auto& right) { return WTF::codePointCompareLessThan(left.url().string(), right.url().string()); }); for (auto& resource : m_cachedResources) logLoadedResource(resource); } String Manager::reportLoadPath() { return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportLoad); } String Manager::reportRecordPath() { return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportRecord); } String Manager::reportReplayPath() { return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportReplay); } String Manager::requestToPath(const WebCore::ResourceRequest& request) { // TODO: come up with a more comprehensive hash that includes HTTP method // and possibly other values (such as headers). const auto& hash = stringToHash(request.url().string()); const auto& path = hashToPath(hash); return path; } String Manager::stringToHash(const String& s) { WTF::MD5 md5; if (s.characters8()) md5.addBytes(static_cast(s.characters8()), s.length()); else md5.addBytes(reinterpret_cast(s.characters16()), 2 * s.length()); WTF::MD5::Digest digest; md5.checksum(digest); return WTF::base64URLEncode(&digest[0], WTF::MD5::hashSize); } String Manager::hashToPath(const String& hash) { auto hashHead = hash.substring(0, 2); auto hashTail = hash.substring(2); StringBuilder fileName; fileName.append(hashTail); fileName.append(".data"); auto path = WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kDirNameResources); path = WebCore::pathByAppendingComponent(path, hashHead); path = WebCore::pathByAppendingComponent(path, fileName.toString()); return path; } String Manager::urlIdentifyingCommonDomain(const WebCore::URL& url) { return url.protocolHostAndPort(); } void Manager::logRecordedResource(const WebCore::ResourceRequest& request) { // Log network resources as they are cached to disk. const auto& url = request.url(); m_recordFileHandle.printf("%s %s\n", DEBUG_STR(stringToHash(url.string())), DEBUG_STR(url.string())); } void Manager::logLoadedResource(Resource& resource) { // Log cached resources as they are loaded from disk. m_loadFileHandle.printf("%s\n", DEBUG_STR(resource.url().string())); } void Manager::logPlayedBackResource(const WebCore::ResourceRequest& request, bool wasCacheMiss) { // Log network resources that are requested during replay. const auto& url = request.url(); if (wasCacheMiss) DEBUG_LOG("Cache miss: URL = " STRING_SPECIFIER, DEBUG_STR(url.string())); else DEBUG_LOG("Cache hit: URL = " STRING_SPECIFIER, DEBUG_STR(url.string())); m_replayFileHandle.printf("%s %s\n", wasCacheMiss ? "miss" : "hit ", DEBUG_STR(url.string())); } WebCore::FileHandle Manager::openCacheFile(const String& filePath, WebCore::FileOpenMode mode) { // If we can trivially open the file, then do that and return the new file // handle. auto fileHandle = WebCore::FileHandle(filePath, mode); if (fileHandle.open()) return fileHandle; // If we're opening the file for writing (including appending), then try // again after making sure all intermediate directories have been created. if (mode != WebCore::OpenForRead) { const auto& parentDir = WebCore::directoryName(filePath); if (!WebCore::makeAllDirectories(parentDir)) { DEBUG_LOG_ERROR("Error %d trying to create intermediate directories: " STRING_SPECIFIER, errno, DEBUG_STR(parentDir)); return fileHandle; } fileHandle = WebCore::FileHandle(filePath, mode); if (fileHandle.open()) return fileHandle; } // Could not open the file. Log the error and leave, returning the invalid // file handle. if (mode == WebCore::OpenForRead) DEBUG_LOG_ERROR("Error %d trying to open " STRING_SPECIFIER " for reading", errno, DEBUG_STR(filePath)); else DEBUG_LOG_ERROR("Error %d trying to open " STRING_SPECIFIER " for writing", errno, DEBUG_STR(filePath)); return fileHandle; } std::optional>> Manager::readFile(const String& filePath) { bool success = false; WebCore::MappedFileData file(filePath, success); if (!success) return std::nullopt; Vector> lines; auto begin = static_cast(file.data()); auto end = begin + file.size(); Vector line; while (getLine(begin, end, line)) lines.append(WTFMove(line)); return WTFMove(lines); } bool Manager::getLine(uint8_t const *& p, uint8_t const * const end, Vector& line) { // NB: Returns true if there may be more data to get, false if we've hit // the end of the buffer. DEBUG_LOG_VERBOSE("Getting a line"); line.clear(); if (p == end) { DEBUG_LOG_VERBOSE("Iterator at end; returning false"); return false; } String word; while (getWord(p, end, word)) { if (!word.isEmpty()) { DEBUG_LOG_VERBOSE("Adding word: " STRING_SPECIFIER, DEBUG_STR(word)); line.append(word); } } return true; } bool Manager::getWord(uint8_t const *& p, uint8_t const * const end, String& word) { // NB: Returns true if a (possibly empty) word was found and there may be // more, false if we've hit the end of line or buffer. DEBUG_LOG_VERBOSE("Getting a word"); if (p == end) { DEBUG_LOG_VERBOSE("Iterator at end; returning false"); return false; } if (*p == '\n') { DEBUG_LOG_VERBOSE("Iterator hit EOL; returning false"); ++p; return false; } bool escaping = false; bool ignoring = false; word = String(); DEBUG_LOG_VERBOSE("Iterating"); for ( ; p != end; ++p) { if (ignoring) { if (*p == '\n') break; } else if (escaping) { word.append(*p); escaping = false; } else if (*p == '#') { ignoring = true; } else if (*p == '\\') { escaping = true; } else if (*p == ' ') { if (!word.isEmpty()) break; } else if (*p == '\n') break; else word.append(*p); } return true; } } // namespace NetworkCapture } // namespace WebKit #endif // ENABLE(NETWORK_CAPTURE)