WebResourceLoadStatisticsStore.cpp [plain text]
#include "config.h"
#include "WebResourceLoadStatisticsStore.h"
#include "APIWebsiteDataStore.h"
#include "WebProcessMessages.h"
#include "WebProcessPool.h"
#include "WebResourceLoadStatisticsStoreMessages.h"
#include "WebsiteDataFetchOption.h"
#include "WebsiteDataType.h"
#include <WebCore/KeyedCoding.h>
#include <WebCore/ResourceLoadStatistics.h>
#include <wtf/CurrentTime.h>
#include <wtf/MainThread.h>
#include <wtf/MathExtras.h>
#include <wtf/RunLoop.h>
#include <wtf/threads/BinarySemaphore.h>
using namespace WebCore;
namespace WebKit {
static const auto numberOfSecondsBetweenRemovingDataRecords = 60;
static const auto featureVectorLengthThreshold = 3;
static OptionSet<WebKit::WebsiteDataType> dataTypesToRemove;
Ref<WebResourceLoadStatisticsStore> WebResourceLoadStatisticsStore::create(const String& resourceLoadStatisticsDirectory)
{
return adoptRef(*new WebResourceLoadStatisticsStore(resourceLoadStatisticsDirectory));
}
WebResourceLoadStatisticsStore::WebResourceLoadStatisticsStore(const String& resourceLoadStatisticsDirectory)
: m_resourceStatisticsStore(ResourceLoadStatisticsStore::create())
, m_statisticsQueue(WorkQueue::create("WebResourceLoadStatisticsStore Process Data Queue"))
, m_storagePath(resourceLoadStatisticsDirectory)
{
}
WebResourceLoadStatisticsStore::~WebResourceLoadStatisticsStore()
{
}
bool WebResourceLoadStatisticsStore::hasPrevalentResourceCharacteristics(const ResourceLoadStatistics& resourceStatistic)
{
auto subresourceUnderTopFrameOriginsCount = resourceStatistic.subresourceUnderTopFrameOrigins.size();
auto subresourceUniqueRedirectsToCount = resourceStatistic.subresourceUniqueRedirectsTo.size();
auto subframeUnderTopFrameOriginsCount = resourceStatistic.subframeUnderTopFrameOrigins.size();
if (!subresourceUnderTopFrameOriginsCount
&& !subresourceUniqueRedirectsToCount
&& !subframeUnderTopFrameOriginsCount)
return false;
if (subresourceUnderTopFrameOriginsCount > featureVectorLengthThreshold
|| subresourceUniqueRedirectsToCount > featureVectorLengthThreshold
|| subframeUnderTopFrameOriginsCount > featureVectorLengthThreshold)
return true;
double vectorLength = 0;
vectorLength += subresourceUnderTopFrameOriginsCount * subresourceUnderTopFrameOriginsCount;
vectorLength += subresourceUniqueRedirectsToCount * subresourceUniqueRedirectsToCount;
vectorLength += subframeUnderTopFrameOriginsCount * subframeUnderTopFrameOriginsCount;
ASSERT(vectorLength > 0);
return sqrt(vectorLength) > featureVectorLengthThreshold;
}
void WebResourceLoadStatisticsStore::classifyResource(ResourceLoadStatistics& resourceStatistic)
{
if (!resourceStatistic.isPrevalentResource && hasPrevalentResourceCharacteristics(resourceStatistic)) {
resourceStatistic.isPrevalentResource = true;
}
}
void WebResourceLoadStatisticsStore::removeDataRecords()
{
if (m_dataRecordsRemovalPending)
return;
Vector<String> prevalentResourceDomains = coreStore().prevalentResourceDomainsWithoutUserInteraction();
if (!prevalentResourceDomains.size())
return;
double now = currentTime();
if (!m_lastTimeDataRecordsWereRemoved) {
m_lastTimeDataRecordsWereRemoved = now;
return;
}
if (now < (m_lastTimeDataRecordsWereRemoved + numberOfSecondsBetweenRemovingDataRecords))
return;
m_dataRecordsRemovalPending = true;
m_lastTimeDataRecordsWereRemoved = now;
if (dataTypesToRemove.isEmpty()) {
dataTypesToRemove |= WebsiteDataType::Cookies;
dataTypesToRemove |= WebsiteDataType::LocalStorage;
dataTypesToRemove |= WebsiteDataType::IndexedDBDatabases;
dataTypesToRemove |= WebsiteDataType::DiskCache;
dataTypesToRemove |= WebsiteDataType::MemoryCache;
}
RunLoop::main().dispatch([prevalentResourceDomains = WTFMove(prevalentResourceDomains), this] () mutable {
auto& websiteDataStore = API::WebsiteDataStore::defaultDataStore()->websiteDataStore();
websiteDataStore.fetchData(dataTypesToRemove, { }, [prevalentResourceDomains = WTFMove(prevalentResourceDomains), this](auto websiteDataRecords) {
Vector<WebsiteDataRecord> dataRecords;
Vector<String> prevalentResourceDomainsWithDataRecords;
for (auto& websiteDataRecord : websiteDataRecords) {
for (auto& prevalentResourceDomain : prevalentResourceDomains) {
if (websiteDataRecord.displayName.endsWithIgnoringASCIICase(prevalentResourceDomain)) {
auto suffixStart = websiteDataRecord.displayName.length() - prevalentResourceDomain.length();
if (!suffixStart || websiteDataRecord.displayName[suffixStart - 1] == '.') {
dataRecords.append(websiteDataRecord);
prevalentResourceDomainsWithDataRecords.append(prevalentResourceDomain);
}
}
}
}
if (!dataRecords.size()) {
m_dataRecordsRemovalPending = false;
return;
}
auto& websiteDataStore = API::WebsiteDataStore::defaultDataStore()->websiteDataStore();
websiteDataStore.removeData(dataTypesToRemove, dataRecords, [prevalentResourceDomainsWithDataRecords = WTFMove(prevalentResourceDomainsWithDataRecords), this] {
this->coreStore().updateStatisticsForRemovedDataRecords(prevalentResourceDomainsWithDataRecords);
m_dataRecordsRemovalPending = false;
});
});
});
}
void WebResourceLoadStatisticsStore::resourceLoadStatisticsUpdated(const Vector<WebCore::ResourceLoadStatistics>& origins)
{
coreStore().mergeStatistics(origins);
coreStore().processStatistics([this] (ResourceLoadStatistics& resourceStatistic) {
classifyResource(resourceStatistic);
removeDataRecords();
});
auto encoder = coreStore().createEncoderFromData();
writeEncoderToDisk(*encoder.get(), "full_browsing_session");
}
void WebResourceLoadStatisticsStore::setResourceLoadStatisticsEnabled(bool enabled)
{
if (enabled == m_resourceLoadStatisticsEnabled)
return;
m_resourceLoadStatisticsEnabled = enabled;
readDataFromDiskIfNeeded();
}
bool WebResourceLoadStatisticsStore::resourceLoadStatisticsEnabled() const
{
return m_resourceLoadStatisticsEnabled;
}
void WebResourceLoadStatisticsStore::readDataFromDiskIfNeeded()
{
if (!m_resourceLoadStatisticsEnabled)
return;
m_statisticsQueue->dispatch([this, protectedThis = makeRef(*this)] {
coreStore().clear();
auto decoder = createDecoderFromDisk("full_browsing_session");
if (!decoder)
return;
coreStore().readDataFromDecoder(*decoder);
});
}
void WebResourceLoadStatisticsStore::processWillOpenConnection(WebProcessProxy&, IPC::Connection& connection)
{
connection.addWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName(), m_statisticsQueue.get(), this);
}
void WebResourceLoadStatisticsStore::processDidCloseConnection(WebProcessProxy&, IPC::Connection& connection)
{
connection.removeWorkQueueMessageReceiver(Messages::WebResourceLoadStatisticsStore::messageReceiverName());
}
void WebResourceLoadStatisticsStore::applicationWillTerminate()
{
BinarySemaphore semaphore;
m_statisticsQueue->dispatch([this, &semaphore] {
semaphore.signal();
});
semaphore.wait(WallTime::infinity());
}
String WebResourceLoadStatisticsStore::persistentStoragePath(const String& label) const
{
if (m_storagePath.isEmpty())
return emptyString();
return pathByAppendingComponent(m_storagePath, label + "_resourceLog.plist");
}
void WebResourceLoadStatisticsStore::writeEncoderToDisk(KeyedEncoder& encoder, const String& label) const
{
RefPtr<SharedBuffer> rawData = encoder.finishEncoding();
if (!rawData)
return;
String resourceLog = persistentStoragePath(label);
if (resourceLog.isEmpty())
return;
if (!m_storagePath.isEmpty())
makeAllDirectories(m_storagePath);
auto handle = openFile(resourceLog, OpenForWrite);
if (!handle)
return;
int64_t writtenBytes = writeToFile(handle, rawData->data(), rawData->size());
closeFile(handle);
if (writtenBytes != static_cast<int64_t>(rawData->size()))
WTFLogAlways("WebResourceLoadStatisticsStore: We only wrote %d out of %d bytes to disk", static_cast<unsigned>(writtenBytes), rawData->size());
}
std::unique_ptr<KeyedDecoder> WebResourceLoadStatisticsStore::createDecoderFromDisk(const String& label) const
{
String resourceLog = persistentStoragePath(label);
if (resourceLog.isEmpty())
return nullptr;
RefPtr<SharedBuffer> rawData = SharedBuffer::createWithContentsOfFile(resourceLog);
if (!rawData)
return nullptr;
return KeyedDecoder::decoder(reinterpret_cast<const uint8_t*>(rawData->data()), rawData->size());
}
}