summaryrefslogtreecommitdiffstats
path: root/toolkit/components/url-classifier/LookupCache.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-07 09:22:09 +0000
commit43a97878ce14b72f0981164f87f2e35e14151312 (patch)
tree620249daf56c0258faa40cbdcf9cfba06de2a846 /toolkit/components/url-classifier/LookupCache.cpp
parentInitial commit. (diff)
downloadfirefox-43a97878ce14b72f0981164f87f2e35e14151312.tar.xz
firefox-43a97878ce14b72f0981164f87f2e35e14151312.zip
Adding upstream version 110.0.1.upstream/110.0.1upstream
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/components/url-classifier/LookupCache.cpp')
-rw-r--r--toolkit/components/url-classifier/LookupCache.cpp1091
1 files changed, 1091 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/LookupCache.cpp b/toolkit/components/url-classifier/LookupCache.cpp
new file mode 100644
index 0000000000..415b924370
--- /dev/null
+++ b/toolkit/components/url-classifier/LookupCache.cpp
@@ -0,0 +1,1091 @@
+//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCache.h"
+#include "HashStore.h"
+#include "nsIFileStreams.h"
+#include "nsISeekableStream.h"
+#include "mozilla/ArrayUtils.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/Logging.h"
+#include "nsNetUtil.h"
+#include "nsCheckSummedOutputStream.h"
+#include "crc32c.h"
+#include "prprf.h"
+#include "Classifier.h"
+#include "nsUrlClassifierInfo.h"
+
+// We act as the main entry point for all the real lookups,
+// so note that those are not done to the actual HashStore.
+// The latter solely exists to store the data needed to handle
+// the updates from the protocol.
+
+// This module provides a front for PrefixSet, mUpdateCompletions,
+// and mGetHashCache, which together contain everything needed to
+// provide a classification as long as the data is up to date.
+
+// PrefixSet stores and provides lookups for 4-byte prefixes.
+// mUpdateCompletions contains 32-byte completions which were
+// contained in updates. They are retrieved from HashStore/.sbtore
+// on startup.
+// mGetHashCache contains 32-byte completions which were
+// returned from the gethash server. They are not serialized,
+// only cached until the next update.
+
+// MOZ_LOG=UrlClassifierDbService:5
+extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
+#define LOG(args) \
+ MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
+#define LOG_ENABLED() \
+ MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
+
+namespace mozilla {
+namespace safebrowsing {
+
+const uint32_t LookupCache::MAX_BUFFER_SIZE = 64 * 1024;
+
+const int CacheResultV2::VER = CacheResult::V2;
+const int CacheResultV4::VER = CacheResult::V4;
+
+const int LookupCacheV2::VER = 2;
+const uint32_t LookupCacheV2::VLPSET_MAGIC = 0xe5b862e7;
+const uint32_t LookupCacheV2::VLPSET_VERSION = 1;
+
+namespace {
+
+//////////////////////////////////////////////////////////////////////////
+// A set of lightweight functions for reading/writing value from/to file.
+template <typename T>
+struct ValueTraits {
+ static_assert(sizeof(T) <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
+ "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
+ static uint32_t Length(const T& aValue) { return sizeof(T); }
+ static char* WritePtr(T& aValue, uint32_t aLength) { return (char*)&aValue; }
+ static const char* ReadPtr(const T& aValue) { return (char*)&aValue; }
+ static bool IsFixedLength() { return true; }
+};
+
+template <>
+struct ValueTraits<nsACString> {
+ static bool IsFixedLength() { return false; }
+
+ static uint32_t Length(const nsACString& aValue) { return aValue.Length(); }
+
+ static char* WritePtr(nsACString& aValue, uint32_t aLength) {
+ aValue.SetLength(aLength);
+ return aValue.BeginWriting();
+ }
+
+ static const char* ReadPtr(const nsACString& aValue) {
+ return aValue.BeginReading();
+ }
+};
+
+template <typename T>
+static nsresult WriteValue(nsIOutputStream* aOutputStream, const T& aValue) {
+ uint32_t writeLength = ValueTraits<T>::Length(aValue);
+ MOZ_ASSERT(writeLength <= LookupCacheV4::MAX_METADATA_VALUE_LENGTH,
+ "LookupCacheV4::MAX_METADATA_VALUE_LENGTH is too small.");
+ if (!ValueTraits<T>::IsFixedLength()) {
+ // We need to write out the variable value length.
+ nsresult rv = WriteValue(aOutputStream, writeLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ // Write out the value.
+ auto valueReadPtr = ValueTraits<T>::ReadPtr(aValue);
+ uint32_t written;
+ nsresult rv = aOutputStream->Write(valueReadPtr, writeLength, &written);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (NS_WARN_IF(written != writeLength)) {
+ return NS_ERROR_FAILURE;
+ }
+
+ return rv;
+}
+
+template <typename T>
+static nsresult ReadValue(nsIInputStream* aInputStream, T& aValue) {
+ nsresult rv;
+
+ uint32_t readLength;
+ if (ValueTraits<T>::IsFixedLength()) {
+ readLength = ValueTraits<T>::Length(aValue);
+ } else {
+ // Read the variable value length from file.
+ nsresult rv = ReadValue(aInputStream, readLength);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ // Sanity-check the readLength in case of disk corruption
+ // (see bug 1433636).
+ if (readLength > LookupCacheV4::MAX_METADATA_VALUE_LENGTH) {
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+
+ // Read the value.
+ uint32_t read;
+ auto valueWritePtr = ValueTraits<T>::WritePtr(aValue, readLength);
+ rv = aInputStream->Read(valueWritePtr, readLength, &read);
+ if (NS_FAILED(rv) || read != readLength) {
+ LOG(("Failed to read the value."));
+ return NS_FAILED(rv) ? rv : NS_ERROR_FAILURE;
+ }
+
+ return rv;
+}
+
+void CStringToHexString(const nsACString& aIn, nsACString& aOut) {
+ static const char* const lut = "0123456789ABCDEF";
+
+ size_t len = aIn.Length();
+ MOZ_ASSERT(len <= COMPLETE_SIZE);
+
+ aOut.SetCapacity(2 * len);
+ for (size_t i = 0; i < aIn.Length(); ++i) {
+ const char c = static_cast<char>(aIn[i]);
+ aOut.Append(lut[(c >> 4) & 0x0F]);
+ aOut.Append(lut[c & 15]);
+ }
+}
+
+#ifdef DEBUG
+nsCString GetFormattedTimeString(int64_t aCurTimeSec) {
+ PRExplodedTime pret;
+ PR_ExplodeTime(aCurTimeSec * PR_USEC_PER_SEC, PR_GMTParameters, &pret);
+
+ return nsPrintfCString("%04d-%02d-%02d %02d:%02d:%02d UTC", pret.tm_year,
+ pret.tm_month + 1, pret.tm_mday, pret.tm_hour,
+ pret.tm_min, pret.tm_sec);
+}
+#endif
+
+} // end of unnamed namespace.
+////////////////////////////////////////////////////////////////////////
+
+LookupCache::LookupCache(const nsACString& aTableName,
+ const nsACString& aProvider,
+ nsCOMPtr<nsIFile>& aRootStoreDir)
+ : mPrimed(false),
+ mTableName(aTableName),
+ mProvider(aProvider),
+ mRootStoreDirectory(aRootStoreDir),
+ mVLPrefixSet(nullptr) {
+ UpdateRootDirHandle(mRootStoreDirectory);
+}
+
+nsresult LookupCache::Open() {
+ LOG(("Loading PrefixSet for %s", mTableName.get()));
+ nsresult rv;
+ if (nsUrlClassifierUtils::IsMozTestTable(mTableName)) {
+ // For built-in test table, we don't load it from disk,
+ // test entries are directly added in memory.
+ rv = LoadMozEntries();
+ } else {
+ rv = LoadPrefixSet();
+ }
+
+ Unused << NS_WARN_IF(NS_FAILED(rv));
+
+ return rv;
+}
+
+nsresult LookupCache::Init() {
+ MOZ_ASSERT(!mVLPrefixSet);
+
+ mVLPrefixSet = new VariableLengthPrefixSet();
+ nsresult rv = mVLPrefixSet->Init(mTableName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+nsresult LookupCache::UpdateRootDirHandle(
+ nsCOMPtr<nsIFile>& aNewRootStoreDirectory) {
+ nsresult rv;
+
+ if (aNewRootStoreDirectory != mRootStoreDirectory) {
+ rv = aNewRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ rv = Classifier::GetPrivateStoreDirectory(mRootStoreDirectory, mTableName,
+ mProvider,
+ getter_AddRefs(mStoreDirectory));
+
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to get private store directory for %s", mTableName.get()));
+ mStoreDirectory = mRootStoreDirectory;
+ }
+
+ if (LOG_ENABLED()) {
+ nsString path;
+ mStoreDirectory->GetPath(path);
+ LOG(("Private store directory for %s is %s", mTableName.get(),
+ NS_ConvertUTF16toUTF8(path).get()));
+ }
+
+ return rv;
+}
+
+nsresult LookupCache::WriteFile() {
+ if (nsUrlClassifierDBService::ShutdownHasStarted()) {
+ return NS_ERROR_ABORT;
+ }
+
+ nsCOMPtr<nsIFile> psFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = StoreToFile(psFile);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ LOG(("Failed to store the prefixset for table %s", mTableName.get()));
+ return rv;
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCache::CheckCache(const Completion& aCompletion, bool* aHas,
+ bool* aConfirmed) {
+ // Shouldn't call this function if prefix is not in the database.
+ MOZ_ASSERT(*aHas);
+
+ *aConfirmed = false;
+
+ uint32_t prefix = aCompletion.ToUint32();
+
+ CachedFullHashResponse* fullHashResponse = mFullHashCache.Get(prefix);
+ if (!fullHashResponse) {
+ return NS_OK;
+ }
+
+ int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
+ int64_t expiryTimeSec;
+
+ FullHashExpiryCache& fullHashes = fullHashResponse->fullHashes;
+ nsDependentCSubstring completion(
+ reinterpret_cast<const char*>(aCompletion.buf), COMPLETE_SIZE);
+
+ // Check if we can find the fullhash in positive cache
+ if (fullHashes.Get(completion, &expiryTimeSec)) {
+ if (nowSec <= expiryTimeSec) {
+ // Url is NOT safe.
+ *aConfirmed = true;
+ LOG(("Found a valid fullhash in the positive cache"));
+ } else {
+ // Trigger a gethash request in this case(aConfirmed is false).
+ LOG(("Found an expired fullhash in the positive cache"));
+
+ // Remove fullhash entry from the cache when the negative cache
+ // is also expired because whether or not the fullhash is cached
+ // locally, we will need to consult the server next time we
+ // lookup this hash. We may as well remove it from our cache.
+ if (fullHashResponse->negativeCacheExpirySec < expiryTimeSec) {
+ fullHashes.Remove(completion);
+ if (fullHashes.Count() == 0 &&
+ fullHashResponse->negativeCacheExpirySec < nowSec) {
+ mFullHashCache.Remove(prefix);
+ }
+ }
+ }
+ return NS_OK;
+ }
+
+ // Check negative cache.
+ if (fullHashResponse->negativeCacheExpirySec >= nowSec) {
+ // Url is safe.
+ LOG(("Found a valid prefix in the negative cache"));
+ *aHas = false;
+ } else {
+ LOG(("Found an expired prefix in the negative cache"));
+ if (fullHashes.Count() == 0) {
+ mFullHashCache.Remove(prefix);
+ }
+ }
+
+ return NS_OK;
+}
+
+// This function remove cache entries whose negative cache time is expired.
+// It is possible that a cache entry whose positive cache time is not yet
+// expired but still being removed after calling this API. Right now we call
+// this on every update.
+void LookupCache::InvalidateExpiredCacheEntries() {
+ int64_t nowSec = PR_Now() / PR_USEC_PER_SEC;
+
+ for (auto iter = mFullHashCache.Iter(); !iter.Done(); iter.Next()) {
+ CachedFullHashResponse* response = iter.UserData();
+ if (response->negativeCacheExpirySec < nowSec) {
+ iter.Remove();
+ }
+ }
+}
+
+void LookupCache::CopyFullHashCache(const LookupCache* aSource) {
+ if (!aSource) {
+ return;
+ }
+
+ CopyClassHashTable<FullHashResponseMap>(aSource->mFullHashCache,
+ mFullHashCache);
+}
+
+void LookupCache::ClearCache() { mFullHashCache.Clear(); }
+
+void LookupCache::ClearAll() {
+ ClearCache();
+ ClearPrefixes();
+ mPrimed = false;
+}
+
+nsresult LookupCache::ClearPrefixes() {
+ // Clear by seting a empty map
+ PrefixStringMap map;
+ return mVLPrefixSet->SetPrefixes(map);
+}
+
+bool LookupCache::IsEmpty() const {
+ bool isEmpty;
+ mVLPrefixSet->IsEmpty(&isEmpty);
+ return isEmpty;
+}
+
+void LookupCache::GetCacheInfo(nsIUrlClassifierCacheInfo** aCache) const {
+ MOZ_ASSERT(aCache);
+
+ RefPtr<nsUrlClassifierCacheInfo> info = new nsUrlClassifierCacheInfo;
+ info->table = mTableName;
+
+ for (const auto& cacheEntry : mFullHashCache) {
+ RefPtr<nsUrlClassifierCacheEntry> entry = new nsUrlClassifierCacheEntry;
+
+ // Set prefix of the cache entry.
+ nsAutoCString prefix(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
+ PREFIX_SIZE);
+ CStringToHexString(prefix, entry->prefix);
+
+ // Set expiry of the cache entry.
+ CachedFullHashResponse* response = cacheEntry.GetWeak();
+ entry->expirySec = response->negativeCacheExpirySec;
+
+ // Set positive cache.
+ FullHashExpiryCache& fullHashes = response->fullHashes;
+ for (const auto& fullHashEntry : fullHashes) {
+ RefPtr<nsUrlClassifierPositiveCacheEntry> match =
+ new nsUrlClassifierPositiveCacheEntry;
+
+ // Set fullhash of positive cache entry.
+ CStringToHexString(fullHashEntry.GetKey(), match->fullhash);
+
+ // Set expiry of positive cache entry.
+ match->expirySec = fullHashEntry.GetData();
+
+ entry->matches.AppendElement(
+ static_cast<nsIUrlClassifierPositiveCacheEntry*>(match));
+ }
+
+ info->entries.AppendElement(
+ static_cast<nsIUrlClassifierCacheEntry*>(entry));
+ }
+
+ info.forget(aCache);
+}
+
+/* static */
+bool LookupCache::IsCanonicalizedIP(const nsACString& aHost) {
+ // The canonicalization process will have left IP addresses in dotted
+ // decimal with no surprises.
+ uint32_t i1, i2, i3, i4;
+ char c;
+ if (PR_sscanf(PromiseFlatCString(aHost).get(), "%u.%u.%u.%u%c", &i1, &i2, &i3,
+ &i4, &c) == 4) {
+ return (i1 <= 0xFF && i2 <= 0xFF && i3 <= 0xFF && i4 <= 0xFF);
+ }
+
+ return false;
+}
+
+// This is used when the URL is created by CreatePairwiseEntityListURI(),
+// which returns an URI like "toplevel.page/?resource=third.party.domain"
+// The fragment rule for the hostname(toplevel.page) is still the same
+// as Safe Browsing protocol.
+// The difference is that we always keep the path and query string and
+// generate an additional fragment by removing the leading component of
+// third.party.domain. This is to make sure we can find a match when a
+// exceptionlisted domain is eTLD.
+/* static */
+nsresult LookupCache::GetLookupEntitylistFragments(
+ const nsACString& aSpec, nsTArray<nsCString>* aFragments) {
+ aFragments->Clear();
+
+ nsACString::const_iterator begin, end, iter, iter_end;
+ aSpec.BeginReading(begin);
+ aSpec.EndReading(end);
+
+ iter = begin;
+ iter_end = end;
+
+ // Fallback to use default fragment rule when the URL doesn't contain
+ // "/?resoruce=" because this means the URL is not generated in
+ // CreatePairwiseEntityListURI()
+ if (!FindInReadable("/?resource="_ns, iter, iter_end)) {
+ return GetLookupFragments(aSpec, aFragments);
+ }
+
+ const nsACString& topLevelURL = Substring(begin, iter++);
+ const nsACString& thirdPartyURL = Substring(iter_end, end);
+
+ /**
+ * For the top-level URL, we follow the host fragment rule defined
+ * in the Safe Browsing protocol.
+ */
+ nsTArray<nsCString> topLevelURLs;
+ topLevelURLs.AppendElement(topLevelURL);
+
+ if (!IsCanonicalizedIP(topLevelURL)) {
+ topLevelURL.BeginReading(begin);
+ topLevelURL.EndReading(end);
+ int numTopLevelURLComponents = 0;
+ while (RFindInReadable("."_ns, begin, end) &&
+ numTopLevelURLComponents < MAX_HOST_COMPONENTS) {
+ // don't bother checking toplevel domains
+ if (++numTopLevelURLComponents >= 2) {
+ topLevelURL.EndReading(iter);
+ topLevelURLs.AppendElement(Substring(end, iter));
+ }
+ end = begin;
+ topLevelURL.BeginReading(begin);
+ }
+ }
+
+ /**
+ * The whiltelisted domain in the entity list may be eTLD or eTLD+1.
+ * Since the number of the domain name part in the third-party URL searching
+ * is always less than or equal to eTLD+1, we remove the leading
+ * component from the third-party domain to make sure we can find a match
+ * if the exceptionlisted domain stoed in the entity list is eTLD.
+ */
+ nsTArray<nsCString> thirdPartyURLs;
+ thirdPartyURLs.AppendElement(thirdPartyURL);
+
+ if (!IsCanonicalizedIP(thirdPartyURL)) {
+ thirdPartyURL.BeginReading(iter);
+ thirdPartyURL.EndReading(end);
+ if (FindCharInReadable('.', iter, end)) {
+ iter++;
+ nsAutoCString thirdPartyURLToAdd;
+ thirdPartyURLToAdd.Assign(Substring(iter++, end));
+
+ // don't bother checking toplevel domains
+ if (FindCharInReadable('.', iter, end)) {
+ thirdPartyURLs.AppendElement(thirdPartyURLToAdd);
+ }
+ }
+ }
+
+ for (size_t i = 0; i < topLevelURLs.Length(); i++) {
+ for (size_t j = 0; j < thirdPartyURLs.Length(); j++) {
+ nsAutoCString key;
+ key.Assign(topLevelURLs[i]);
+ key.Append("/?resource=");
+ key.Append(thirdPartyURLs[j]);
+
+ aFragments->AppendElement(key);
+ }
+ }
+
+ return NS_OK;
+}
+
+/* static */
+nsresult LookupCache::GetLookupFragments(const nsACString& aSpec,
+ nsTArray<nsCString>* aFragments)
+
+{
+ aFragments->Clear();
+
+ nsACString::const_iterator begin, end, iter;
+ aSpec.BeginReading(begin);
+ aSpec.EndReading(end);
+
+ iter = begin;
+ if (!FindCharInReadable('/', iter, end)) {
+ return NS_OK;
+ }
+
+ const nsACString& host = Substring(begin, iter++);
+ nsAutoCString path;
+ path.Assign(Substring(iter, end));
+
+ /**
+ * From the protocol doc:
+ * For the hostname, the client will try at most 5 different strings. They
+ * are:
+ * a) The exact hostname of the url
+ * b) The 4 hostnames formed by starting with the last 5 components and
+ * successivly removing the leading component. The top-level component
+ * can be skipped. This is not done if the hostname is a numerical IP.
+ */
+ nsTArray<nsCString> hosts;
+ hosts.AppendElement(host);
+
+ if (!IsCanonicalizedIP(host)) {
+ host.BeginReading(begin);
+ host.EndReading(end);
+ int numHostComponents = 0;
+ while (RFindInReadable("."_ns, begin, end) &&
+ numHostComponents < MAX_HOST_COMPONENTS) {
+ // don't bother checking toplevel domains
+ if (++numHostComponents >= 2) {
+ host.EndReading(iter);
+ hosts.AppendElement(Substring(end, iter));
+ }
+ end = begin;
+ host.BeginReading(begin);
+ }
+ }
+
+ /**
+ * From the protocol doc:
+ * For the path, the client will also try at most 6 different strings.
+ * They are:
+ * a) the exact path of the url, including query parameters
+ * b) the exact path of the url, without query parameters
+ * c) the 4 paths formed by starting at the root (/) and
+ * successively appending path components, including a trailing
+ * slash. This behavior should only extend up to the next-to-last
+ * path component, that is, a trailing slash should never be
+ * appended that was not present in the original url.
+ */
+ nsTArray<nsCString> paths;
+ nsAutoCString pathToAdd;
+
+ path.BeginReading(begin);
+ path.EndReading(end);
+ iter = begin;
+ if (FindCharInReadable('?', iter, end)) {
+ pathToAdd = Substring(begin, iter);
+ paths.AppendElement(pathToAdd);
+ end = iter;
+ }
+
+ int numPathComponents = 1;
+ iter = begin;
+ while (FindCharInReadable('/', iter, end) &&
+ numPathComponents < MAX_PATH_COMPONENTS) {
+ iter++;
+ pathToAdd.Assign(Substring(begin, iter));
+ paths.AppendElement(pathToAdd);
+ numPathComponents++;
+ }
+
+ // If we haven't already done so, add the full path
+ if (!pathToAdd.Equals(path)) {
+ paths.AppendElement(path);
+ }
+ // Check an empty path (for whole-domain blocklist entries)
+ if (!paths.Contains(""_ns)) {
+ paths.AppendElement(""_ns);
+ }
+
+ for (uint32_t hostIndex = 0; hostIndex < hosts.Length(); hostIndex++) {
+ for (uint32_t pathIndex = 0; pathIndex < paths.Length(); pathIndex++) {
+ nsCString key;
+ key.Assign(hosts[hostIndex]);
+ key.Append('/');
+ key.Append(paths[pathIndex]);
+
+ aFragments->AppendElement(key);
+ }
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCache::LoadPrefixSet() {
+ nsCOMPtr<nsIFile> psFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(psFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = psFile->AppendNative(mTableName + GetPrefixSetSuffix());
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ bool exists;
+ rv = psFile->Exists(&exists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (exists) {
+ LOG(("stored PrefixSet exists, loading from disk"));
+ rv = LoadFromFile(psFile);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ mPrimed = true;
+ } else {
+ // The only scenario we load the old .pset file is when we haven't received
+ // a SafeBrowsng update before. After receiving an update, new .vlpset will
+ // be stored while old .pset will be removed.
+ if (NS_SUCCEEDED(LoadLegacyFile())) {
+ mPrimed = true;
+ } else {
+ LOG(("no (usable) stored PrefixSet found"));
+ }
+ }
+
+#ifdef DEBUG
+ if (mPrimed) {
+ uint32_t size = SizeOfPrefixSet();
+ LOG(("SB tree done, size = %d bytes\n", size));
+ }
+#endif
+
+ return NS_OK;
+}
+
+size_t LookupCache::SizeOfPrefixSet() const {
+ return mVLPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
+}
+
+#if defined(DEBUG)
+void LookupCache::DumpCache() const {
+ if (!LOG_ENABLED()) {
+ return;
+ }
+
+ for (const auto& cacheEntry : mFullHashCache) {
+ CachedFullHashResponse* response = cacheEntry.GetWeak();
+
+ nsAutoCString prefix;
+ CStringToHexString(
+ nsCString(reinterpret_cast<const char*>(&cacheEntry.GetKey()),
+ PREFIX_SIZE),
+ prefix);
+ LOG(("Cache prefix(%s): %s, Expiry: %s", mTableName.get(), prefix.get(),
+ GetFormattedTimeString(response->negativeCacheExpirySec).get()));
+
+ FullHashExpiryCache& fullHashes = response->fullHashes;
+ for (const auto& fullHashEntry : fullHashes) {
+ nsAutoCString fullhash;
+ CStringToHexString(fullHashEntry.GetKey(), fullhash);
+ LOG((" - %s, Expiry: %s", fullhash.get(),
+ GetFormattedTimeString(fullHashEntry.GetData()).get()));
+ }
+ }
+}
+#endif
+
+nsresult LookupCache::StoreToFile(nsCOMPtr<nsIFile>& aFile) {
+ NS_ENSURE_ARG_POINTER(aFile);
+
+ uint32_t fileSize = sizeof(Header) +
+ mVLPrefixSet->CalculatePreallocateSize() +
+ nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
+
+ nsCOMPtr<nsIOutputStream> localOutFile;
+ nsresult rv =
+ NS_NewSafeLocalFileOutputStream(getter_AddRefs(localOutFile), aFile,
+ PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Preallocate the file storage
+ {
+ nsCOMPtr<nsIFileOutputStream> fos(do_QueryInterface(localOutFile));
+ Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FALLOCATE_TIME> timer;
+
+ Unused << fos->Preallocate(fileSize);
+ }
+
+ nsCOMPtr<nsIOutputStream> out;
+ rv = NS_NewCrc32OutputStream(getter_AddRefs(out), localOutFile.forget(),
+ std::min(fileSize, MAX_BUFFER_SIZE));
+
+ // Write header
+ Header header;
+ GetHeader(header);
+
+ rv = WriteValue(out, header);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Write prefixes
+ rv = mVLPrefixSet->WritePrefixes(out);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Write checksum
+ nsCOMPtr<nsISafeOutputStream> safeOut = do_QueryInterface(out, &rv);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = safeOut->Finish();
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ LOG(("[%s] Storing PrefixSet successful", mTableName.get()));
+
+ // This is to remove old ".pset" files if exist
+ Unused << ClearLegacyFile();
+ return NS_OK;
+}
+
+nsresult LookupCache::LoadFromFile(nsCOMPtr<nsIFile>& aFile) {
+ NS_ENSURE_ARG_POINTER(aFile);
+
+ Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_FILELOAD_TIME> timer;
+
+ nsCOMPtr<nsIInputStream> localInFile;
+ nsresult rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), aFile,
+ PR_RDONLY | nsIFile::OS_READAHEAD);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Calculate how big the file is, make sure our read buffer isn't bigger
+ // than the file itself which is just wasting memory.
+ int64_t fileSize;
+ rv = aFile->GetFileSize(&fileSize);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ if (fileSize < 0 || fileSize > UINT32_MAX) {
+ return NS_ERROR_FAILURE;
+ }
+
+ uint32_t bufferSize =
+ std::min<uint32_t>(static_cast<uint32_t>(fileSize), MAX_BUFFER_SIZE);
+
+ // Convert to buffered stream
+ nsCOMPtr<nsIInputStream> in;
+ rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
+ bufferSize);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Load header
+ Header header;
+ rv = ReadValue(in, header);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ LOG(("Failed to read header for %s", mTableName.get()));
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+
+ rv = SanityCheck(header);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Load data
+ rv = mVLPrefixSet->LoadPrefixes(in);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Load crc32 checksum and verify
+ rv = VerifyCRC32(in);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ mPrimed = true;
+
+ LOG(("[%s] Loading PrefixSet successful", mTableName.get()));
+ return NS_OK;
+}
+
+// This function assumes CRC32 checksum is in the end of the input stream
+nsresult LookupCache::VerifyCRC32(nsCOMPtr<nsIInputStream>& aIn) {
+ nsCOMPtr<nsISeekableStream> seekIn = do_QueryInterface(aIn);
+ nsresult rv = seekIn->Seek(nsISeekableStream::NS_SEEK_SET, 0);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ uint64_t len;
+ rv = aIn->Available(&len);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ uint32_t calculateCrc32 = ~0;
+
+ // We don't want to include the checksum itself
+ len = len - nsCrc32CheckSumedOutputStream::CHECKSUM_SIZE;
+
+ static const uint64_t STREAM_BUFFER_SIZE = 4096;
+ char buffer[STREAM_BUFFER_SIZE];
+ while (len) {
+ uint32_t read;
+ uint64_t readLimit = std::min<uint64_t>(STREAM_BUFFER_SIZE, len);
+
+ rv = aIn->Read(buffer, readLimit, &read);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ calculateCrc32 = ComputeCrc32c(
+ calculateCrc32, reinterpret_cast<const uint8_t*>(buffer), read);
+
+ len -= read;
+ }
+
+ // Now read the CRC32
+ uint32_t crc32;
+ ReadValue(aIn, crc32);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ if (crc32 != calculateCrc32) {
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV2::Has(const Completion& aCompletion, bool* aHas,
+ uint32_t* aMatchLength, bool* aConfirmed) {
+ *aHas = *aConfirmed = false;
+ *aMatchLength = 0;
+
+ uint32_t length = 0;
+ nsDependentCSubstring fullhash;
+ fullhash.Rebind((const char*)aCompletion.buf, COMPLETE_SIZE);
+
+ uint32_t prefix = aCompletion.ToUint32();
+
+ nsresult rv = mVLPrefixSet->Matches(prefix, fullhash, &length);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (length == 0) {
+ return NS_OK;
+ }
+
+ MOZ_ASSERT(length == PREFIX_SIZE || length == COMPLETE_SIZE);
+
+ *aHas = true;
+ *aMatchLength = length;
+ *aConfirmed = length == COMPLETE_SIZE;
+
+ if (!(*aConfirmed)) {
+ rv = CheckCache(aCompletion, aHas, aConfirmed);
+ }
+
+ return rv;
+}
+
+nsresult LookupCacheV2::Build(AddPrefixArray& aAddPrefixes,
+ AddCompleteArray& aAddCompletes) {
+ nsresult rv = mVLPrefixSet->SetPrefixes(aAddPrefixes, aAddCompletes);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+ mPrimed = true;
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes) {
+ if (!mPrimed) {
+ // This can happen if its a new table, so no error.
+ LOG(("GetPrefixes from empty LookupCache"));
+ return NS_OK;
+ }
+
+ return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, nullptr);
+}
+
+nsresult LookupCacheV2::GetPrefixes(FallibleTArray<uint32_t>& aAddPrefixes,
+ FallibleTArray<nsCString>& aAddCompletes) {
+ if (!mPrimed) {
+ // This can happen if its a new table, so no error.
+ LOG(("GetHashes from empty LookupCache"));
+ return NS_OK;
+ }
+
+ return mVLPrefixSet->GetFixedLengthPrefixes(&aAddPrefixes, &aAddCompletes);
+}
+
+nsresult LookupCacheV2::GetPrefixByIndex(uint32_t aIndex,
+ uint32_t* aOutPrefix) const {
+ NS_ENSURE_ARG_POINTER(aOutPrefix);
+
+ return mVLPrefixSet->GetFixedLengthPrefixByIndex(aIndex, aOutPrefix);
+}
+
+void LookupCacheV2::AddGethashResultToCache(
+ const AddCompleteArray& aAddCompletes, const MissPrefixArray& aMissPrefixes,
+ int64_t aExpirySec) {
+ static const int64_t CACHE_DURATION_SEC = 15 * 60;
+ int64_t defaultExpirySec = PR_Now() / PR_USEC_PER_SEC + CACHE_DURATION_SEC;
+ if (aExpirySec != 0) {
+ defaultExpirySec = aExpirySec;
+ }
+
+ for (const AddComplete& add : aAddCompletes) {
+ nsDependentCSubstring fullhash(
+ reinterpret_cast<const char*>(add.CompleteHash().buf), COMPLETE_SIZE);
+
+ CachedFullHashResponse* response =
+ mFullHashCache.GetOrInsertNew(add.ToUint32());
+ response->negativeCacheExpirySec = defaultExpirySec;
+
+ FullHashExpiryCache& fullHashes = response->fullHashes;
+ fullHashes.InsertOrUpdate(fullhash, defaultExpirySec);
+ }
+
+ for (const Prefix& prefix : aMissPrefixes) {
+ CachedFullHashResponse* response =
+ mFullHashCache.GetOrInsertNew(prefix.ToUint32());
+
+ response->negativeCacheExpirySec = defaultExpirySec;
+ }
+}
+
+void LookupCacheV2::GetHeader(Header& aHeader) {
+ aHeader.magic = LookupCacheV2::VLPSET_MAGIC;
+ aHeader.version = LookupCacheV2::VLPSET_VERSION;
+}
+
+nsresult LookupCacheV2::SanityCheck(const Header& aHeader) {
+ if (aHeader.magic != LookupCacheV2::VLPSET_MAGIC) {
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+
+ if (aHeader.version != LookupCacheV2::VLPSET_VERSION) {
+ return NS_ERROR_FAILURE;
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV2::LoadLegacyFile() {
+ // Because mozilla Safe Browsing v2 server only includes completions
+ // in the update, we can simplify this function by only loading .sbtore
+ if (!mProvider.EqualsLiteral("mozilla")) {
+ return NS_OK;
+ }
+
+ HashStore store(mTableName, mProvider, mRootStoreDirectory);
+
+ // Support loading version 3 HashStore.
+ nsresult rv = store.Open(3);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (store.AddChunks().Length() == 0 && store.SubChunks().Length() == 0) {
+ // Return when file doesn't exist
+ return NS_OK;
+ }
+
+ AddPrefixArray prefix;
+ AddCompleteArray addComplete;
+
+ rv = store.ReadCompletionsLegacyV3(addComplete);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return Build(prefix, addComplete);
+}
+
+nsresult LookupCacheV2::ClearLegacyFile() {
+ nsCOMPtr<nsIFile> file;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = file->AppendNative(mTableName + ".pset"_ns);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ bool exists;
+ rv = file->Exists(&exists);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ if (exists) {
+ rv = file->Remove(false);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ LOG(("[%s]Old PrefixSet is successfully removed!", mTableName.get()));
+ }
+
+ return NS_OK;
+}
+
+nsCString LookupCacheV2::GetPrefixSetSuffix() const { return ".vlpset"_ns; }
+
+// Support creating built-in entries for phsihing, malware, unwanted, harmful,
+// tracking/tracking exceptionlist and flash block tables.
+//
+nsresult LookupCacheV2::LoadMozEntries() {
+ // We already have the entries, return
+ if (!IsEmpty() || IsPrimed()) {
+ return NS_OK;
+ }
+
+ nsTArray<nsLiteralCString> entries;
+
+ if (mTableName.EqualsLiteral("moztest-phish-simple")) {
+ // Entries for phishing table
+ entries.AppendElement("itisatrap.org/firefox/its-a-trap.html"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-malware-simple")) {
+ // Entries for malware table
+ entries.AppendElement("itisatrap.org/firefox/its-an-attack.html"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-unwanted-simple")) {
+ // Entries for unwanted table
+ entries.AppendElement("itisatrap.org/firefox/unwanted.html"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-harmful-simple")) {
+ // Entries for harmfule tables
+ entries.AppendElement("itisatrap.org/firefox/harmful.html"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-track-simple")) {
+ // Entries for tracking table
+ entries.AppendElement("trackertest.org/"_ns);
+ entries.AppendElement("itisatracker.org/"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-trackwhite-simple")) {
+ // Entries for tracking entitylist table
+ entries.AppendElement("itisatrap.org/?resource=itisatracker.org"_ns);
+ } else if (mTableName.EqualsLiteral("moztest-block-simple")) {
+ // Entries for flash block table
+ entries.AppendElement("itisatrap.org/firefox/blocked.html"_ns);
+ } else {
+ MOZ_ASSERT_UNREACHABLE();
+ }
+
+ AddPrefixArray prefix;
+ AddCompleteArray completes;
+ for (const auto& entry : entries) {
+ AddComplete add;
+ if (NS_FAILED(add.complete.FromPlaintext(entry))) {
+ continue;
+ }
+ if (!completes.AppendElement(add, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+ }
+
+ return Build(prefix, completes);
+}
+
+} // namespace safebrowsing
+} // namespace mozilla