NetworkCaptureManager.cpp [plain text]
#include "config.h"
#include "NetworkCaptureManager.h"
#if ENABLE(NETWORK_CAPTURE)
#include "NetworkCaptureLogging.h"
#include "NetworkCaptureResource.h"
#include "WebCore/ResourceRequest.h"
#include "WebCore/URL.h"
#include <wtf/MD5.h>
#include <wtf/text/Base64.h>
#include <wtf/text/StringBuilder.h>
#include <algorithm>
#include <iterator>
#include <limits>
#define DEBUG_CLASS Manager
namespace WebKit {
namespace NetworkCapture {
static const char* kDirNameRecordReplay = "WebKitPerf/record_replay";
static const char* kDirNameResources = "resources";
static const char* kFileNameReportLoad = "report_load.txt";
static const char* kFileNameReportRecord = "report_record.txt";
static const char* kFileNameReportReplay = "report_replay.txt";
static int kMaxMatch = std::numeric_limits<int>::max();
static int kMinMatch = std::numeric_limits<int>::min();
Manager& Manager::singleton()
{
static NeverDestroyed<Manager> instance;
return instance;
}
void Manager::initialize(const String& recordReplayMode, const String& recordReplayCacheLocation)
{
DEBUG_LOG("Initializing");
if (equalIgnoringASCIICase(recordReplayMode, "record"))
m_recordReplayMode = Record;
else if (equalIgnoringASCIICase(recordReplayMode, "replay"))
m_recordReplayMode = Replay;
else
m_recordReplayMode = Disabled;
m_recordReplayCacheLocation = WebCore::pathByAppendingComponent(recordReplayCacheLocation, kDirNameRecordReplay);
m_loadFileHandle = WebCore::FileHandle(reportLoadPath(), WebCore::OpenForWrite);
m_recordFileHandle = WebCore::FileHandle(reportRecordPath(), WebCore::OpenForWrite);
m_replayFileHandle = WebCore::FileHandle(reportReplayPath(), WebCore::OpenForWrite);
DEBUG_LOG("Cache location = " STRING_SPECIFIER, DEBUG_STR(m_recordReplayCacheLocation));
if (isReplaying())
loadResources();
}
void Manager::terminate()
{
m_loadFileHandle.close();
m_recordFileHandle.close();
m_replayFileHandle.close();
}
Resource* Manager::findMatch(const WebCore::ResourceRequest& request)
{
DEBUG_LOG_VERBOSE("URL = " STRING_SPECIFIER, DEBUG_STR(request.url().string()));
auto bestMatch = findExactMatch(request);
if (!bestMatch)
bestMatch = findBestFuzzyMatch(request);
#if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING)
if (!bestMatch)
DEBUG_LOG("Could not find match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string()));
else if (request.url() == bestMatch->url())
DEBUG_LOG("Found exact match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string()));
else {
DEBUG_LOG("Found fuzzy match for: " STRING_SPECIFIER, DEBUG_STR(request.url().string()));
DEBUG_LOG(" replaced with : " STRING_SPECIFIER, DEBUG_STR(bestMatch->url().string()));
}
#endif
return bestMatch;
}
Resource* Manager::findExactMatch(const WebCore::ResourceRequest& request)
{
const auto& url = request.url();
auto lower = std::lower_bound(std::begin(m_cachedResources), std::end(m_cachedResources), url, [](auto& resource, const auto& url) {
return WTF::codePointCompareLessThan(resource.url().string(), url.string());
});
if (lower != std::end(m_cachedResources) && lower->url() == url) {
DEBUG_LOG_VERBOSE("Found exact match: " STRING_SPECIFIER, DEBUG_STR(lower->url().string()));
return &*lower;
}
return nullptr;
}
Resource* Manager::findBestFuzzyMatch(const WebCore::ResourceRequest& request)
{
const auto& url = request.url();
Resource* bestMatch = nullptr;
int bestScore = kMinMatch;
const auto& baseURL = url.string().left(url.pathStart());
const auto& lower = std::lower_bound(std::begin(m_cachedResources), std::end(m_cachedResources), baseURL, [](auto& resource, const auto& url) {
return WTF::codePointCompareLessThan(resource.baseURL().string(), url);
});
const auto& upper = std::upper_bound(lower, std::end(m_cachedResources), baseURL, [](const auto& url, auto& resource) {
return WTF::codePointCompareLessThan(resource.baseURL().string(), url);
});
const auto& requestParameters = WebCore::URLParser::parseURLEncodedForm(url.query());
for (auto iResource = lower; iResource != upper; ++iResource) {
int thisScore = fuzzyMatchURLs(url, requestParameters, iResource->url(), iResource->queryParameters());
if (bestScore < thisScore) {
DEBUG_LOG("New best match (%d): " STRING_SPECIFIER, thisScore, DEBUG_STR(iResource->url().string()));
bestScore = thisScore;
bestMatch = &*iResource;
if (bestScore == kMaxMatch)
break;
}
}
return bestMatch;
}
int Manager::fuzzyMatchURLs(const WebCore::URL& requestURL, const WebCore::URLParser::URLEncodedForm& requestParameters, const WebCore::URL& resourceURL, const WebCore::URLParser::URLEncodedForm& resourceParameters)
{
if (!protocolHostAndPortAreEqual(requestURL, resourceURL)) {
DEBUG_LOG("Scheme/host/port mismatch: " STRING_SPECIFIER " != " STRING_SPECIFIER, DEBUG_STR(requestURL.string()), DEBUG_STR(resourceURL.string()));
return kMinMatch;
}
if (requestURL.fragmentIdentifier() != resourceURL.fragmentIdentifier()) {
DEBUG_LOG("Fragments mismatch: " STRING_SPECIFIER " != " STRING_SPECIFIER, DEBUG_STR(requestURL.string()), DEBUG_STR(resourceURL.string()));
return kMinMatch;
}
DEBUG_LOG("Fuzzy matching:");
DEBUG_LOG(" : " STRING_SPECIFIER, DEBUG_STR(requestURL.string()));
DEBUG_LOG(" : " STRING_SPECIFIER, DEBUG_STR(resourceURL.string()));
const int kPathMatchScore = 4;
const int kPathMismatchScore = -1;
const int kPathMissingScore = -1;
const int kParameterMatchScore = 4;
const int kParameterMismatchScore = -1;
const int kParameterMissingScore = -1;
int score = 0;
const auto& requestPath = requestURL.path();
const auto& resourcePath = resourceURL.path();
Vector<String> requestPathComponents, resourcePathComponents;
requestPath.split('/', requestPathComponents);
resourcePath.split('/', resourcePathComponents);
auto updatedIterators = std::mismatch(
std::begin(requestPathComponents), std::end(requestPathComponents),
std::begin(resourcePathComponents), std::end(resourcePathComponents));
auto matchingDistance = std::distance(std::begin(requestPathComponents), updatedIterators.first);
auto requestPathMismatchDistance = std::distance(updatedIterators.first, std::end(requestPathComponents));
auto resourcePathMismatchDistance = std::distance(updatedIterators.second, std::end(resourcePathComponents));
decltype(matchingDistance) mismatchingDistance;
decltype(matchingDistance) missingDistance;
if (requestPathMismatchDistance < resourcePathMismatchDistance) {
mismatchingDistance = requestPathMismatchDistance;
missingDistance = resourcePathMismatchDistance - requestPathMismatchDistance;
} else {
mismatchingDistance = resourcePathMismatchDistance;
missingDistance = requestPathMismatchDistance - resourcePathMismatchDistance;
}
DEBUG_LOG("Path matching results: matching = %d, mismatching = %d, missing = %d",
static_cast<int>(matchingDistance),
static_cast<int>(mismatchingDistance),
static_cast<int>(missingDistance));
score += matchingDistance * kPathMatchScore
+ mismatchingDistance * kPathMismatchScore
+ missingDistance * kPathMissingScore;
DEBUG_LOG("Score = %d", score);
auto requestParameter = std::begin(requestParameters);
auto resourceParameter = std::begin(resourceParameters);
for (; requestParameter != std::end(requestParameters) && resourceParameter != std::end(resourceParameters); ++requestParameter, ++resourceParameter) {
if (requestParameter->first == resourceParameter->first) {
#if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING)
if (requestParameter->second == resourceParameter->second)
DEBUG_LOG("Matching parameter names and values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\"", DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second));
else
DEBUG_LOG("Mismatching parameter values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\" vs. \"" STRING_SPECIFIER "\"", DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second), DEBUG_STR(resourceParameter->second));
#endif
score += (requestParameter->second == resourceParameter->second) ? kParameterMatchScore : kParameterMismatchScore;
DEBUG_LOG("Score = %d", score);
} else {
DEBUG_LOG("Mismatching parameter names: " STRING_SPECIFIER ", " STRING_SPECIFIER, DEBUG_STR(requestParameter->first), DEBUG_STR(resourceParameter->first));
const auto scanForwardForMatch = [this, &score, kParameterMatchScore, kParameterMismatchScore, kParameterMissingScore](const auto& fixedIter, auto& scanningIter, const auto& scannerEnd) {
auto scanner = scanningIter;
while (scanner != scannerEnd && scanner->first != fixedIter->first)
++scanner;
if (scanner == scannerEnd)
return false;
DEBUG_LOG("Skipping past %d non-matching parameter names", static_cast<int>(std::distance(scanningIter, scanner)));
score += kParameterMissingScore * std::distance(scanningIter, scanner);
DEBUG_LOG("Score = %d", score);
#if ENABLE(WTF_CAPTURE_INTERNAL_DEBUGGING)
if (fixedIter->second == scanner->second)
DEBUG_LOG("Matching parameter names and values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\"", DEBUG_STR(fixedIter->first), DEBUG_STR(fixedIter->second));
else
DEBUG_LOG("Mismatching parameter values: \"" STRING_SPECIFIER "\" = \"" STRING_SPECIFIER "\" vs. \"" STRING_SPECIFIER "\"", DEBUG_STR(fixedIter->first), DEBUG_STR(fixedIter->second), DEBUG_STR(scanner->second));
#endif
score += (fixedIter->second == scanner->second) ? kParameterMatchScore : kParameterMismatchScore;
DEBUG_LOG("Score = %d", score);
scanningIter = scanner;
return true;
};
if (!scanForwardForMatch(requestParameter, resourceParameter, std::end(resourceParameters))) {
if (!scanForwardForMatch(resourceParameter, requestParameter, std::end(requestParameters))) {
DEBUG_LOG("Unmatched parameter: " STRING_SPECIFIER "=" STRING_SPECIFIER, DEBUG_STR(requestParameter->first), DEBUG_STR(requestParameter->second));
DEBUG_LOG("Unmatched parameter: " STRING_SPECIFIER "=" STRING_SPECIFIER, DEBUG_STR(resourceParameter->first), DEBUG_STR(resourceParameter->second));
score += kParameterMissingScore + kParameterMissingScore;
DEBUG_LOG("Score = %d", score);
}
}
}
}
DEBUG_LOG("Adjusting for trailing parameters");
score += kParameterMissingScore
* (std::distance(requestParameter, std::end(requestParameters))
+ std::distance(resourceParameter, std::end(resourceParameters)));
DEBUG_LOG("Score = %d", score);
return score;
}
void Manager::loadResources()
{
auto lines = readFile(reportRecordPath());
if (!lines)
return;
for (const auto& line : *lines) {
if (line.size() != 2) {
DEBUG_LOG_ERROR("line.size == %d", (int) line.size());
continue;
}
Resource newResource(hashToPath(line[0]));
m_cachedResources.append(WTFMove(newResource));
}
std::sort(std::begin(m_cachedResources), std::end(m_cachedResources), [](auto& left, auto& right) {
return WTF::codePointCompareLessThan(left.url().string(), right.url().string());
});
for (auto& resource : m_cachedResources)
logLoadedResource(resource);
}
String Manager::reportLoadPath()
{
return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportLoad);
}
String Manager::reportRecordPath()
{
return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportRecord);
}
String Manager::reportReplayPath()
{
return WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kFileNameReportReplay);
}
String Manager::requestToPath(const WebCore::ResourceRequest& request)
{
const auto& hash = stringToHash(request.url().string());
const auto& path = hashToPath(hash);
return path;
}
String Manager::stringToHash(const String& s)
{
WTF::MD5 md5;
if (s.characters8())
md5.addBytes(static_cast<const uint8_t*>(s.characters8()), s.length());
else
md5.addBytes(reinterpret_cast<const uint8_t*>(s.characters16()), 2 * s.length());
WTF::MD5::Digest digest;
md5.checksum(digest);
return WTF::base64URLEncode(&digest[0], WTF::MD5::hashSize);
}
String Manager::hashToPath(const String& hash)
{
auto hashHead = hash.substring(0, 2);
auto hashTail = hash.substring(2);
StringBuilder fileName;
fileName.append(hashTail);
fileName.append(".data");
auto path = WebCore::pathByAppendingComponent(m_recordReplayCacheLocation, kDirNameResources);
path = WebCore::pathByAppendingComponent(path, hashHead);
path = WebCore::pathByAppendingComponent(path, fileName.toString());
return path;
}
void Manager::logRecordedResource(const WebCore::ResourceRequest& request)
{
const auto& url = request.url();
m_recordFileHandle.printf("%s %s\n", DEBUG_STR(stringToHash(url.string())), DEBUG_STR(url.string()));
}
void Manager::logLoadedResource(Resource& resource)
{
m_loadFileHandle.printf("%s\n", DEBUG_STR(resource.url().string()));
}
void Manager::logPlayedBackResource(const WebCore::ResourceRequest& request, bool wasCacheMiss)
{
const auto& url = request.url();
if (wasCacheMiss)
DEBUG_LOG("Cache miss: URL = " STRING_SPECIFIER, DEBUG_STR(url.string()));
else
DEBUG_LOG("Cache hit: URL = " STRING_SPECIFIER, DEBUG_STR(url.string()));
m_replayFileHandle.printf("%s %s\n", wasCacheMiss ? "miss" : "hit ", DEBUG_STR(url.string()));
}
WebCore::FileHandle Manager::openCacheFile(const String& filePath, WebCore::FileOpenMode mode)
{
auto fileHandle = WebCore::FileHandle(filePath, mode);
if (fileHandle.open())
return fileHandle;
if (mode != WebCore::OpenForRead) {
const auto& parentDir = WebCore::directoryName(filePath);
if (!WebCore::makeAllDirectories(parentDir)) {
DEBUG_LOG_ERROR("Error %d trying to create intermediate directories: " STRING_SPECIFIER, errno, DEBUG_STR(parentDir));
return fileHandle;
}
fileHandle = WebCore::FileHandle(filePath, mode);
if (fileHandle.open())
return fileHandle;
}
if (mode == WebCore::OpenForRead)
DEBUG_LOG_ERROR("Error %d trying to open " STRING_SPECIFIER " for reading", errno, DEBUG_STR(filePath));
else
DEBUG_LOG_ERROR("Error %d trying to open " STRING_SPECIFIER " for writing", errno, DEBUG_STR(filePath));
return fileHandle;
}
std::optional<Vector<Vector<String>>> Manager::readFile(const String& filePath)
{
bool success = false;
WebCore::MappedFileData file(filePath, success);
if (!success)
return std::nullopt;
Vector<Vector<String>> lines;
auto begin = static_cast<const uint8_t*>(file.data());
auto end = begin + file.size();
Vector<String> line;
while (getLine(begin, end, line))
lines.append(WTFMove(line));
return WTFMove(lines);
}
bool Manager::getLine(uint8_t const *& p, uint8_t const * const end, Vector<String>& line)
{
DEBUG_LOG_VERBOSE("Getting a line");
line.clear();
if (p == end) {
DEBUG_LOG_VERBOSE("Iterator at end; returning false");
return false;
}
String word;
while (getWord(p, end, word)) {
if (!word.isEmpty()) {
DEBUG_LOG_VERBOSE("Adding word: " STRING_SPECIFIER, DEBUG_STR(word));
line.append(word);
}
}
return true;
}
bool Manager::getWord(uint8_t const *& p, uint8_t const * const end, String& word)
{
DEBUG_LOG_VERBOSE("Getting a word");
if (p == end) {
DEBUG_LOG_VERBOSE("Iterator at end; returning false");
return false;
}
if (*p == '\n') {
DEBUG_LOG_VERBOSE("Iterator hit EOL; returning false");
++p;
return false;
}
bool escaping = false;
bool ignoring = false;
word = String();
DEBUG_LOG_VERBOSE("Iterating");
for ( ; p != end; ++p) {
if (ignoring) {
if (*p == '\n')
break;
} else if (escaping) {
word.append(*p);
escaping = false;
} else if (*p == '#') {
ignoring = true;
} else if (*p == '\\') {
escaping = true;
} else if (*p == ' ') {
if (!word.isEmpty())
break;
} else if (*p == '\n')
break;
else
word.append(*p);
}
return true;
}
} }
#endif // ENABLE(NETWORK_CAPTURE)