summaryrefslogtreecommitdiffstats
path: root/toolkit/components/url-classifier/LookupCacheV4.cpp
diff options
context:
space:
mode:
authorDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
committerDaniel Baumann <daniel.baumann@progress-linux.org>2024-04-19 01:47:29 +0000
commit0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d (patch)
treea31f07c9bcca9d56ce61e9a1ffd30ef350d513aa /toolkit/components/url-classifier/LookupCacheV4.cpp
parentInitial commit. (diff)
downloadfirefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.tar.xz
firefox-esr-0ebf5bdf043a27fd3dfb7f92e0cb63d88954c44d.zip
Adding upstream version 115.8.0esr.upstream/115.8.0esr
Signed-off-by: Daniel Baumann <daniel.baumann@progress-linux.org>
Diffstat (limited to 'toolkit/components/url-classifier/LookupCacheV4.cpp')
-rw-r--r--toolkit/components/url-classifier/LookupCacheV4.cpp577
1 files changed, 577 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/LookupCacheV4.cpp b/toolkit/components/url-classifier/LookupCacheV4.cpp
new file mode 100644
index 0000000000..14ebc890d4
--- /dev/null
+++ b/toolkit/components/url-classifier/LookupCacheV4.cpp
@@ -0,0 +1,577 @@
+//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "LookupCacheV4.h"
+#include "HashStore.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/Unused.h"
+#include "nsCheckSummedOutputStream.h"
+#include "nsUrlClassifierDBService.h"
+#include "crc32c.h"
+#include <string>
+
+// MOZ_LOG=UrlClassifierDbService:5
+extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
+#define LOG(args) \
+ MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
+#define LOG_ENABLED() \
+ MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
+
+#define METADATA_SUFFIX ".metadata"_ns
+
+namespace mozilla {
+namespace safebrowsing {
+
+////////////////////////////////////////////////////////////////////////
+
+// Prefixes coming from updates and VLPrefixSet are both stored in the HashTable
+// where the (key, value) pair is a prefix size and a lexicographic-sorted
+// string. The difference is prefixes from updates use std:string(to avoid
+// additional copies) and prefixes from VLPrefixSet use nsCString. This class
+// provides a common interface for the partial update algorithm to make it
+// easier to operate on two different kind prefix string map..
+class VLPrefixSet {
+ public:
+ explicit VLPrefixSet(const PrefixStringMap& aMap);
+
+ // This function will merge the prefix map in VLPrefixSet to aPrefixMap.
+ void Merge(PrefixStringMap& aPrefixMap);
+
+ // Find the smallest string from the map in VLPrefixSet.
+ bool GetSmallestPrefix(nsACString& aOutString) const;
+
+ // Return the number of prefixes in the map
+ uint32_t Count() const { return mCount; }
+
+ private:
+ // PrefixString structure contains a lexicographic-sorted string with
+ // a |pos| variable to indicate which substring we are pointing to right now.
+ // |pos| increases each time GetSmallestPrefix finds the smallest string.
+ struct PrefixString {
+ PrefixString(const nsACString& aStr, uint32_t aSize)
+ : data(aStr), pos(0), size(aSize) {
+ MOZ_ASSERT(data.Length() % size == 0,
+ "PrefixString length must be a multiple of the prefix size.");
+ }
+
+ void getRemainingString(nsACString& out) {
+ MOZ_ASSERT(out.IsEmpty());
+ if (remaining() > 0) {
+ out = Substring(data, pos);
+ }
+ }
+ void getPrefix(nsACString& out) {
+ MOZ_ASSERT(out.IsEmpty());
+ if (remaining() >= size) {
+ out = Substring(data, pos, size);
+ } else {
+ MOZ_ASSERT(remaining() == 0,
+ "Remaining bytes but not enough for a (size)-byte prefix.");
+ }
+ }
+ void next() {
+ pos += size;
+ MOZ_ASSERT(pos <= data.Length());
+ }
+ uint32_t remaining() {
+ return data.Length() - pos;
+ MOZ_ASSERT(pos <= data.Length());
+ }
+
+ nsCString data;
+ uint32_t pos;
+ uint32_t size;
+ };
+
+ nsClassHashtable<nsUint32HashKey, PrefixString> mMap;
+ uint32_t mCount;
+};
+
+nsresult LookupCacheV4::Has(const Completion& aCompletion, bool* aHas,
+ uint32_t* aMatchLength, bool* aConfirmed) {
+ *aHas = *aConfirmed = false;
+ *aMatchLength = 0;
+
+ uint32_t length = 0;
+ nsDependentCSubstring fullhash;
+ fullhash.Rebind((const char*)aCompletion.buf, COMPLETE_SIZE);
+
+ // It is tricky that we use BigEndian read for V4 while use
+ // Completion.ToUint32 for V2. This is because in V2, prefixes are converted
+ // to integers and then sorted internally. In V4, prefixes recieved are
+ // already lexicographical order sorted, so when we manipulate these prefixes
+ // with integer form, we always use big endian so prefixes remain the same
+ // order.
+ uint32_t prefix = BigEndian::readUint32(
+ reinterpret_cast<const uint32_t*>(fullhash.BeginReading()));
+
+ nsresult rv = mVLPrefixSet->Matches(prefix, fullhash, &length);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (length == 0) {
+ return NS_OK;
+ }
+
+ MOZ_ASSERT(length >= PREFIX_SIZE && length <= COMPLETE_SIZE);
+
+ // For V4, We don't set |aConfirmed| to true even if we found a match
+ // for 32-bytes prefix. |aConfirmed| is only set if a match is found in cache.
+ *aHas = true;
+ *aMatchLength = length;
+
+ // Even though V4 supports variable-length prefix, we always send 4-bytes for
+ // completion (Bug 1323953). This means cached prefix length is also 4-bytes.
+ return CheckCache(aCompletion, aHas, aConfirmed);
+}
+
+nsresult LookupCacheV4::Build(PrefixStringMap& aPrefixMap) {
+ Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_VLPS_CONSTRUCT_TIME> timer;
+
+ nsresult rv = mVLPrefixSet->SetPrefixes(aPrefixMap);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+ mPrimed = true;
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV4::GetPrefixes(PrefixStringMap& aPrefixMap) {
+ if (!mPrimed) {
+ // This can happen if its a new table, so no error.
+ LOG(("GetPrefixes from empty LookupCache"));
+ return NS_OK;
+ }
+ return mVLPrefixSet->GetPrefixes(aPrefixMap);
+}
+
+nsresult LookupCacheV4::GetFixedLengthPrefixes(
+ FallibleTArray<uint32_t>& aPrefixes) {
+ return mVLPrefixSet->GetFixedLengthPrefixes(&aPrefixes, nullptr);
+}
+
+nsresult LookupCacheV4::GetFixedLengthPrefixByIndex(
+ uint32_t aIndex, uint32_t* aOutPrefix) const {
+ NS_ENSURE_ARG_POINTER(aOutPrefix);
+
+ return mVLPrefixSet->GetFixedLengthPrefixByIndex(aIndex, aOutPrefix);
+}
+
+nsresult LookupCacheV4::ClearLegacyFile() {
+ nsCOMPtr<nsIFile> file;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = file->AppendNative(mTableName + ".pset"_ns);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ bool exists;
+ rv = file->Exists(&exists);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ if (exists) {
+ rv = file->Remove(false);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ LOG(("[%s] Old PrefixSet is successfully removed!", mTableName.get()));
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV4::LoadLegacyFile() {
+ nsCOMPtr<nsIFile> file;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(file));
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = file->AppendNative(mTableName + ".pset"_ns);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ bool exists;
+ rv = file->Exists(&exists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (!exists) {
+ return NS_ERROR_FAILURE;
+ }
+
+ nsCOMPtr<nsIInputStream> localInFile;
+ rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), file,
+ PR_RDONLY | nsIFile::OS_READAHEAD);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Calculate how big the file is, make sure our read buffer isn't bigger
+ // than the file itself which is just wasting memory.
+ int64_t fileSize;
+ rv = file->GetFileSize(&fileSize);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ if (fileSize < 0 || fileSize > UINT32_MAX) {
+ return NS_ERROR_FAILURE;
+ }
+
+ uint32_t bufferSize =
+ std::min<uint32_t>(static_cast<uint32_t>(fileSize), MAX_BUFFER_SIZE);
+
+ // Convert to buffered stream
+ nsCOMPtr<nsIInputStream> in;
+ rv = NS_NewBufferedInputStream(getter_AddRefs(in), localInFile.forget(),
+ bufferSize);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ // Load data
+ rv = mVLPrefixSet->LoadPrefixes(in);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ mPrimed = true;
+
+ LOG(("[%s] Loading Legacy PrefixSet successful", mTableName.get()));
+ return NS_OK;
+}
+
+void LookupCacheV4::GetHeader(Header& aHeader) {
+ aHeader.magic = LookupCacheV4::VLPSET_MAGIC;
+ aHeader.version = LookupCacheV4::VLPSET_VERSION;
+}
+
+nsresult LookupCacheV4::SanityCheck(const Header& aHeader) {
+ if (aHeader.magic != LookupCacheV4::VLPSET_MAGIC) {
+ return NS_ERROR_FILE_CORRUPTED;
+ }
+
+ if (aHeader.version != LookupCacheV4::VLPSET_VERSION) {
+ return NS_ERROR_FAILURE;
+ }
+
+ return NS_OK;
+}
+
+nsCString LookupCacheV4::GetPrefixSetSuffix() const { return ".vlpset"_ns; }
+
+static nsresult AppendPrefixToMap(PrefixStringMap& prefixes,
+ const nsACString& prefix) {
+ uint32_t len = prefix.Length();
+ MOZ_ASSERT(len >= PREFIX_SIZE && len <= COMPLETE_SIZE);
+ if (!len) {
+ return NS_OK;
+ }
+
+ nsCString* prefixString = prefixes.GetOrInsertNew(len);
+ if (!prefixString->Append(prefix, fallible)) {
+ return NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ return NS_OK;
+}
+
+static nsresult InitCrypto(nsCOMPtr<nsICryptoHash>& aCrypto) {
+ nsresult rv;
+ aCrypto = do_CreateInstance(NS_CRYPTO_HASH_CONTRACTID, &rv);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = aCrypto->Init(nsICryptoHash::SHA256);
+ NS_WARNING_ASSERTION(NS_SUCCEEDED(rv), "InitCrypto failed");
+
+ return rv;
+}
+
+// Read prefix into a buffer and also update the hash which
+// keeps track of the sha256 hash
+static void UpdateSHA256(nsICryptoHash* aCrypto, const nsACString& aPrefix) {
+ MOZ_ASSERT(aCrypto);
+ aCrypto->Update(
+ reinterpret_cast<uint8_t*>(const_cast<char*>(aPrefix.BeginReading())),
+ aPrefix.Length());
+}
+
+// Please see https://bug1287058.bmoattachments.org/attachment.cgi?id=8795366
+// for detail about partial update algorithm.
+nsresult LookupCacheV4::ApplyUpdate(RefPtr<TableUpdateV4> aTableUpdate,
+ PrefixStringMap& aInputMap,
+ PrefixStringMap& aOutputMap) {
+ MOZ_ASSERT(aOutputMap.IsEmpty());
+
+ nsCOMPtr<nsICryptoHash> crypto;
+ nsresult rv = InitCrypto(crypto);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ // oldPSet contains prefixes we already have or we just merged last round.
+ // addPSet contains prefixes stored in tableUpdate which should be merged with
+ // oldPSet.
+ VLPrefixSet oldPSet(aInputMap);
+ VLPrefixSet addPSet(aTableUpdate->Prefixes());
+
+ // RemovalIndiceArray is a sorted integer array indicating the index of prefix
+ // we should remove from the old prefix set(according to lexigraphic order).
+ // |removalIndex| is the current index of RemovalIndiceArray.
+ // |numOldPrefixPicked| is used to record how many prefixes we picked from the
+ // old map.
+ const TableUpdateV4::RemovalIndiceArray& removalArray =
+ aTableUpdate->RemovalIndices();
+ uint32_t removalIndex = 0;
+ int32_t numOldPrefixPicked = -1;
+
+ nsAutoCString smallestOldPrefix;
+ nsAutoCString smallestAddPrefix;
+
+ bool isOldMapEmpty = false, isAddMapEmpty = false;
+
+ // This is used to avoid infinite loop for partial update algorithm.
+ // The maximum loops will be the number of old prefixes plus the number of add
+ // prefixes.
+ int32_t index = oldPSet.Count() + addPSet.Count() + 1;
+ for (; index > 0; index--) {
+ // Get smallest prefix from the old prefix set if we don't have one
+ if (smallestOldPrefix.IsEmpty() && !isOldMapEmpty) {
+ isOldMapEmpty = !oldPSet.GetSmallestPrefix(smallestOldPrefix);
+ }
+
+ // Get smallest prefix from add prefix set if we don't have one
+ if (smallestAddPrefix.IsEmpty() && !isAddMapEmpty) {
+ isAddMapEmpty = !addPSet.GetSmallestPrefix(smallestAddPrefix);
+ }
+
+ bool pickOld;
+
+ // If both prefix sets are not empty, then compare to find the smaller one.
+ if (!isOldMapEmpty && !isAddMapEmpty) {
+ if (smallestOldPrefix == smallestAddPrefix) {
+ LOG(("Add prefix should not exist in the original prefix set."));
+ return NS_ERROR_UC_UPDATE_DUPLICATE_PREFIX;
+ }
+
+ // Compare the smallest string in old prefix set and add prefix set,
+ // merge the smaller one into new map to ensure merged string still
+ // follows lexigraphic order.
+ pickOld = smallestOldPrefix < smallestAddPrefix;
+ } else if (!isOldMapEmpty && isAddMapEmpty) {
+ pickOld = true;
+ } else if (isOldMapEmpty && !isAddMapEmpty) {
+ pickOld = false;
+ // If both maps are empty, then partial update is complete.
+ } else {
+ break;
+ }
+
+ if (pickOld) {
+ numOldPrefixPicked++;
+
+ // If the number of picks from old map matches the removalIndex, then this
+ // prefix will be removed by not merging it to new map.
+ if (removalIndex < removalArray.Length() &&
+ numOldPrefixPicked == removalArray[removalIndex]) {
+ removalIndex++;
+ } else {
+ rv = AppendPrefixToMap(aOutputMap, smallestOldPrefix);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ UpdateSHA256(crypto, smallestOldPrefix);
+ }
+ smallestOldPrefix.SetLength(0);
+ } else {
+ rv = AppendPrefixToMap(aOutputMap, smallestAddPrefix);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ UpdateSHA256(crypto, smallestAddPrefix);
+ smallestAddPrefix.SetLength(0);
+ }
+ }
+
+ // We expect index will be greater to 0 because max number of runs will be
+ // the number of original prefix plus add prefix.
+ if (index <= 0) {
+ LOG(("There are still prefixes remaining after reaching maximum runs."));
+ return NS_ERROR_UC_UPDATE_INFINITE_LOOP;
+ }
+
+ if (removalIndex < removalArray.Length()) {
+ LOG(
+ ("There are still prefixes to remove after exhausting the old "
+ "PrefixSet."));
+ return NS_ERROR_UC_UPDATE_WRONG_REMOVAL_INDICES;
+ }
+
+ // Prefixes and removal indice from update is no longer required
+ // after merging the data with local prefixes.
+ aTableUpdate->Clear();
+
+ nsAutoCString sha256;
+ crypto->Finish(false, sha256);
+ if (aTableUpdate->SHA256().IsEmpty()) {
+ LOG(("Update sha256 hash missing."));
+ Telemetry::Accumulate(
+ Telemetry::URLCLASSIFIER_UPDATE_ERROR, mProvider,
+ NS_ERROR_GET_CODE(NS_ERROR_UC_UPDATE_MISSING_CHECKSUM));
+
+ // Generate our own sha256 to tableUpdate to ensure there is always
+ // checksum in .metadata
+ std::string stdSha256(sha256.BeginReading(), sha256.Length());
+ aTableUpdate->SetSHA256(stdSha256);
+ } else if (aTableUpdate->SHA256() != sha256) {
+ LOG(("SHA256 hash mismatch after applying partial update"));
+ return NS_ERROR_UC_UPDATE_CHECKSUM_MISMATCH;
+ }
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV4::AddFullHashResponseToCache(
+ const FullHashResponseMap& aResponseMap) {
+ CopyClassHashTable<FullHashResponseMap>(aResponseMap, mFullHashCache);
+
+ return NS_OK;
+}
+
+nsresult LookupCacheV4::WriteMetadata(
+ RefPtr<const TableUpdateV4> aTableUpdate) {
+ NS_ENSURE_ARG_POINTER(aTableUpdate);
+ if (nsUrlClassifierDBService::ShutdownHasStarted()) {
+ return NS_ERROR_ABORT;
+ }
+
+ nsCOMPtr<nsIFile> metaFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(metaFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = metaFile->AppendNative(mTableName + METADATA_SUFFIX);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsCOMPtr<nsIOutputStream> outputStream;
+ rv = NS_NewLocalFileOutputStream(getter_AddRefs(outputStream), metaFile,
+ PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Write the state.
+ rv = WriteValue(outputStream, aTableUpdate->ClientState());
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Write the SHA256 hash.
+ rv = WriteValue(outputStream, aTableUpdate->SHA256());
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return rv;
+}
+
+nsresult LookupCacheV4::LoadMetadata(nsACString& aState, nsACString& aSHA256) {
+ nsCOMPtr<nsIFile> metaFile;
+ nsresult rv = mStoreDirectory->Clone(getter_AddRefs(metaFile));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = metaFile->AppendNative(mTableName + METADATA_SUFFIX);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsCOMPtr<nsIInputStream> localInFile;
+ rv = NS_NewLocalFileInputStream(getter_AddRefs(localInFile), metaFile,
+ PR_RDONLY | nsIFile::OS_READAHEAD);
+ if (NS_FAILED(rv)) {
+ LOG(("Unable to open metadata file."));
+ return rv;
+ }
+
+ // Read the list state.
+ rv = ReadValue(localInFile, aState);
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to read state."));
+ return rv;
+ }
+
+ // Read the SHA256 hash.
+ rv = ReadValue(localInFile, aSHA256);
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to read SHA256 hash."));
+ return rv;
+ }
+
+ return rv;
+}
+
+VLPrefixSet::VLPrefixSet(const PrefixStringMap& aMap) : mCount(0) {
+ for (const auto& entry : aMap) {
+ uint32_t size = entry.GetKey();
+ MOZ_ASSERT(entry.GetData()->Length() % size == 0,
+ "PrefixString must be a multiple of the prefix size.");
+ mMap.InsertOrUpdate(size, MakeUnique<PrefixString>(*entry.GetData(), size));
+ mCount += entry.GetData()->Length() / size;
+ }
+}
+
+void VLPrefixSet::Merge(PrefixStringMap& aPrefixMap) {
+ for (const auto& entry : mMap) {
+ nsCString* prefixString = aPrefixMap.GetOrInsertNew(entry.GetKey());
+ PrefixString* str = entry.GetWeak();
+
+ nsAutoCString remainingString;
+ str->getRemainingString(remainingString);
+ if (!remainingString.IsEmpty()) {
+ MOZ_ASSERT(remainingString.Length() == str->remaining());
+ prefixString->Append(remainingString);
+ }
+ }
+}
+
+bool VLPrefixSet::GetSmallestPrefix(nsACString& aOutString) const {
+ PrefixString* pick = nullptr;
+ for (const auto& entry : mMap) {
+ PrefixString* str = entry.GetWeak();
+
+ if (str->remaining() <= 0) {
+ continue;
+ }
+
+ if (aOutString.IsEmpty()) {
+ str->getPrefix(aOutString);
+ MOZ_ASSERT(aOutString.Length() == entry.GetKey());
+ pick = str;
+ continue;
+ }
+
+ nsAutoCString cur;
+ str->getPrefix(cur);
+ if (!cur.IsEmpty() && cur < aOutString) {
+ aOutString.Assign(cur);
+ MOZ_ASSERT(aOutString.Length() == entry.GetKey());
+ pick = str;
+ }
+ }
+
+ if (pick) {
+ pick->next();
+ }
+
+ return pick != nullptr;
+}
+
+nsresult LookupCacheV4::LoadMozEntries() { return NS_ERROR_NOT_IMPLEMENTED; }
+
+} // namespace safebrowsing
+} // namespace mozilla