summaryrefslogtreecommitdiffstats
path: root/toolkit/components/url-classifier/Classifier.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'toolkit/components/url-classifier/Classifier.cpp')
-rw-r--r--toolkit/components/url-classifier/Classifier.cpp1786
1 files changed, 1786 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/Classifier.cpp b/toolkit/components/url-classifier/Classifier.cpp
new file mode 100644
index 0000000000..7a78a59243
--- /dev/null
+++ b/toolkit/components/url-classifier/Classifier.cpp
@@ -0,0 +1,1786 @@
+//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "Classifier.h"
+#include "LookupCacheV4.h"
+#include "nsIFile.h"
+#include "nsNetCID.h"
+#include "nsPrintfCString.h"
+#include "nsThreadUtils.h"
+#include "mozilla/Components.h"
+#include "mozilla/EndianUtils.h"
+#include "mozilla/Telemetry.h"
+#include "mozilla/IntegerPrintfMacros.h"
+#include "mozilla/LazyIdleThread.h"
+#include "mozilla/Logging.h"
+#include "mozilla/SyncRunnable.h"
+#include "mozilla/Base64.h"
+#include "mozilla/Unused.h"
+#include "mozilla/UniquePtr.h"
+#include "nsUrlClassifierDBService.h"
+#include "nsUrlClassifierUtils.h"
+
+// MOZ_LOG=UrlClassifierDbService:5
+extern mozilla::LazyLogModule gUrlClassifierDbServiceLog;
+#define LOG(args) \
+ MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args)
+#define LOG_ENABLED() \
+ MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug)
+
+#define STORE_DIRECTORY "safebrowsing"_ns
+#define TO_DELETE_DIR_SUFFIX "-to_delete"_ns
+#define BACKUP_DIR_SUFFIX "-backup"_ns
+#define UPDATING_DIR_SUFFIX "-updating"_ns
+
+#define V4_METADATA_SUFFIX ".metadata"_ns
+#define V2_METADATA_SUFFIX ".sbstore"_ns
+
+// The amount of time, in milliseconds, that our IO thread will stay alive after
+// the last event it processes.
+#define DEFAULT_THREAD_TIMEOUT_MS 5000
+
+namespace mozilla {
+namespace safebrowsing {
+
+bool Classifier::OnUpdateThread() const {
+ bool onthread = false;
+ if (mUpdateThread) {
+ mUpdateThread->IsOnCurrentThread(&onthread);
+ }
+ return onthread;
+}
+
+void Classifier::SplitTables(const nsACString& str,
+ nsTArray<nsCString>& tables) {
+ tables.Clear();
+
+ for (const auto& table : str.Split(',')) {
+ if (!table.IsEmpty()) {
+ tables.AppendElement(table);
+ }
+ }
+
+ // Remove duplicates
+ tables.Sort();
+ const auto newEnd = std::unique(tables.begin(), tables.end());
+ tables.TruncateLength(std::distance(tables.begin(), newEnd));
+}
+
+nsresult Classifier::GetPrivateStoreDirectory(
+ nsIFile* aRootStoreDirectory, const nsACString& aTableName,
+ const nsACString& aProvider, nsIFile** aPrivateStoreDirectory) {
+ NS_ENSURE_ARG_POINTER(aPrivateStoreDirectory);
+
+ if (!StringEndsWith(aTableName, "-proto"_ns)) {
+ // Only V4 table names (ends with '-proto') would be stored
+ // to per-provider sub-directory.
+ nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory);
+ return NS_OK;
+ }
+
+ if (aProvider.IsEmpty()) {
+ // When failing to get provider, just store in the root folder.
+ nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory);
+ return NS_OK;
+ }
+
+ nsCOMPtr<nsIFile> providerDirectory;
+
+ // Clone first since we are gonna create a new directory.
+ nsresult rv = aRootStoreDirectory->Clone(getter_AddRefs(providerDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Append the provider name to the root store directory.
+ rv = providerDirectory->AppendNative(aProvider);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Ensure existence of the provider directory.
+ bool dirExists;
+ rv = providerDirectory->Exists(&dirExists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (!dirExists) {
+ LOG(("Creating private directory for %s", nsCString(aTableName).get()));
+ rv = providerDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755);
+ NS_ENSURE_SUCCESS(rv, rv);
+ providerDirectory.forget(aPrivateStoreDirectory);
+ return rv;
+ }
+
+ // Store directory exists. Check if it's a directory.
+ bool isDir;
+ rv = providerDirectory->IsDirectory(&isDir);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (!isDir) {
+ return NS_ERROR_FILE_DESTINATION_NOT_DIR;
+ }
+
+ providerDirectory.forget(aPrivateStoreDirectory);
+
+ return NS_OK;
+}
+
+Classifier::Classifier()
+ : mIsTableRequestResultOutdated(true),
+ mUpdateInterrupted(true),
+ mIsClosed(false) {
+ // Make a lazy thread for any IO
+ mUpdateThread =
+ new LazyIdleThread(DEFAULT_THREAD_TIMEOUT_MS, "Classifier Update",
+ LazyIdleThread::ShutdownMethod::ManualShutdown);
+}
+
+Classifier::~Classifier() {
+ if (mUpdateThread) {
+ mUpdateThread->Shutdown();
+ mUpdateThread = nullptr;
+ }
+
+ Close();
+}
+
+nsresult Classifier::SetupPathNames() {
+ // Get the root directory where to store all the databases.
+ nsresult rv = mCacheDirectory->Clone(getter_AddRefs(mRootStoreDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mRootStoreDirectory->AppendNative(STORE_DIRECTORY);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Make sure LookupCaches (which are persistent and survive updates)
+ // are reading/writing in the right place. We will be moving their
+ // files "underneath" them during backup/restore.
+ for (uint32_t i = 0; i < mLookupCaches.Length(); i++) {
+ mLookupCaches[i]->UpdateRootDirHandle(mRootStoreDirectory);
+ }
+
+ // Directory where to move a backup before an update.
+ rv = mCacheDirectory->Clone(getter_AddRefs(mBackupDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mBackupDirectory->AppendNative(STORE_DIRECTORY + BACKUP_DIR_SUFFIX);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Directory where to be working on the update.
+ rv = mCacheDirectory->Clone(getter_AddRefs(mUpdatingDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mUpdatingDirectory->AppendNative(STORE_DIRECTORY + UPDATING_DIR_SUFFIX);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Directory where to move the backup so we can atomically
+ // delete (really move) it.
+ rv = mCacheDirectory->Clone(getter_AddRefs(mToDeleteDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = mToDeleteDirectory->AppendNative(STORE_DIRECTORY + TO_DELETE_DIR_SUFFIX);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+nsresult Classifier::CreateStoreDirectory() {
+ if (ShouldAbort()) {
+ return NS_OK; // nothing to do, the classifier is done
+ }
+
+ // Ensure the safebrowsing directory exists.
+ bool storeExists;
+ nsresult rv = mRootStoreDirectory->Exists(&storeExists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (!storeExists) {
+ rv = mRootStoreDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ bool storeIsDir;
+ rv = mRootStoreDirectory->IsDirectory(&storeIsDir);
+ NS_ENSURE_SUCCESS(rv, rv);
+ if (!storeIsDir) return NS_ERROR_FILE_DESTINATION_NOT_DIR;
+ }
+
+ return NS_OK;
+}
+
+// Testing entries are created directly in LookupCache instead of
+// created via update(Bug 1531354). We can remove unused testing
+// files from profile.
+// TODO: See Bug 723153 to clear old safebrowsing store
+nsresult Classifier::ClearLegacyFiles() {
+ if (ShouldAbort()) {
+ return NS_OK; // nothing to do, the classifier is done
+ }
+
+ nsTArray<nsLiteralCString> tables = {
+ "test-phish-simple"_ns, "test-malware-simple"_ns,
+ "test-unwanted-simple"_ns, "test-harmful-simple"_ns,
+ "test-track-simple"_ns, "test-trackwhite-simple"_ns,
+ "test-block-simple"_ns,
+ };
+
+ const auto fnFindAndRemove = [](nsIFile* aRootDirectory,
+ const nsACString& aFileName) {
+ nsCOMPtr<nsIFile> file;
+ nsresult rv = aRootDirectory->Clone(getter_AddRefs(file));
+ if (NS_FAILED(rv)) {
+ return false;
+ }
+
+ rv = file->AppendNative(aFileName);
+ if (NS_FAILED(rv)) {
+ return false;
+ }
+
+ bool exists;
+ rv = file->Exists(&exists);
+ if (NS_FAILED(rv) || !exists) {
+ return false;
+ }
+
+ rv = file->Remove(false);
+ if (NS_FAILED(rv)) {
+ return false;
+ }
+
+ return true;
+ };
+
+ for (const auto& table : tables) {
+ // Remove both .sbstore and .vlpse if .sbstore exists
+ if (fnFindAndRemove(mRootStoreDirectory, table + ".sbstore"_ns)) {
+ fnFindAndRemove(mRootStoreDirectory, table + ".vlpset"_ns);
+ }
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::Open(nsIFile& aCacheDirectory) {
+ // Remember the Local profile directory.
+ nsresult rv = aCacheDirectory.Clone(getter_AddRefs(mCacheDirectory));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Create the handles to the update and backup directories.
+ rv = SetupPathNames();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Clean up any to-delete directories that haven't been deleted yet.
+ // This is still required for backward compatibility.
+ rv = CleanToDelete();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // If we met a crash during the previous update, "safebrowsing-updating"
+ // directory will exist and let's remove it.
+ rv = mUpdatingDirectory->Remove(true);
+ if (NS_SUCCEEDED(rv)) {
+ // If the "safebrowsing-updating" exists, it implies a crash occurred
+ // in the previous update.
+ LOG(("We may have hit a crash in the previous update."));
+ }
+
+ // Check whether we have an incomplete update and recover from the
+ // backup if so.
+ rv = RecoverBackups();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Make sure the main store directory exists.
+ rv = CreateStoreDirectory();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = ClearLegacyFiles();
+ Unused << NS_WARN_IF(NS_FAILED(rv));
+
+ // Build the list of know urlclassifier lists
+ // XXX: Disk IO potentially on the main thread during startup
+ RegenActiveTables();
+
+ return NS_OK;
+}
+
+void Classifier::Close() {
+ // Close will be called by PreShutdown, so it is important to note that
+ // things put here should not affect an ongoing update thread.
+ mIsClosed = true;
+ DropStores();
+}
+
+void Classifier::Reset() {
+ MOZ_ASSERT(!OnUpdateThread(), "Reset() MUST NOT be called on update thread");
+
+ LOG(("Reset() is called so we interrupt the update."));
+ mUpdateInterrupted = true;
+
+ // We don't pass the ref counted object 'Classifier' to resetFunc because we
+ // don't want to release 'Classifier in the update thread, which triggers an
+ // assertion when LazyIdelUpdate thread is not created and removed by the same
+ // thread (worker thread). Since |resetFuc| is a synchronous call, we can just
+ // pass the reference of Classifier because Classifier's life cycle is
+ // guarantee longer than |resetFunc|.
+ auto resetFunc = [&] {
+ if (this->mIsClosed) {
+ return; // too late to reset, bail
+ }
+ this->DropStores();
+
+ this->mRootStoreDirectory->Remove(true);
+ this->mBackupDirectory->Remove(true);
+ this->mUpdatingDirectory->Remove(true);
+ this->mToDeleteDirectory->Remove(true);
+
+ this->CreateStoreDirectory();
+ this->RegenActiveTables();
+ };
+
+ if (!mUpdateThread) {
+ LOG(("Async update has been disabled. Just Reset() on worker thread."));
+ resetFunc();
+ return;
+ }
+
+ nsCOMPtr<nsIRunnable> r =
+ NS_NewRunnableFunction("safebrowsing::Classifier::Reset", resetFunc);
+ SyncRunnable::DispatchToThread(mUpdateThread, r);
+}
+
+void Classifier::ResetTables(ClearType aType,
+ const nsTArray<nsCString>& aTables) {
+ for (uint32_t i = 0; i < aTables.Length(); i++) {
+ LOG(("Resetting table: %s", aTables[i].get()));
+ RefPtr<LookupCache> cache = GetLookupCache(aTables[i]);
+ if (cache) {
+ // Remove any cached Completes for this table if clear type is Clear_Cache
+ if (aType == Clear_Cache) {
+ cache->ClearCache();
+ } else {
+ cache->ClearAll();
+ }
+ }
+ }
+
+ // Clear on-disk database if clear type is Clear_All
+ if (aType == Clear_All) {
+ DeleteTables(mRootStoreDirectory, aTables);
+
+ RegenActiveTables();
+ }
+}
+
+// |DeleteTables| is used by |GetLookupCache| to remove on-disk data when
+// we detect prefix file corruption. So make sure not to call |GetLookupCache|
+// again in this function to avoid infinite loop.
+void Classifier::DeleteTables(nsIFile* aDirectory,
+ const nsTArray<nsCString>& aTables) {
+ nsCOMPtr<nsIDirectoryEnumerator> entries;
+ nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries));
+ NS_ENSURE_SUCCESS_VOID(rv);
+
+ nsCOMPtr<nsIFile> file;
+ while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) &&
+ file) {
+ // If |file| is a directory, recurse to find its entries as well.
+ bool isDirectory;
+ if (NS_FAILED(file->IsDirectory(&isDirectory))) {
+ continue;
+ }
+ if (isDirectory) {
+ DeleteTables(file, aTables);
+ continue;
+ }
+
+ nsCString leafName;
+ rv = file->GetNativeLeafName(leafName);
+ NS_ENSURE_SUCCESS_VOID(rv);
+
+ // Remove file extension if there's one.
+ int32_t dotPosition = leafName.RFind(".");
+ if (dotPosition >= 0) {
+ leafName.Truncate(dotPosition);
+ }
+
+ if (!leafName.IsEmpty() && aTables.Contains(leafName)) {
+ if (NS_FAILED(file->Remove(false))) {
+ NS_WARNING(nsPrintfCString("Fail to remove file %s from the disk",
+ leafName.get())
+ .get());
+ }
+ }
+ }
+ NS_ENSURE_SUCCESS_VOID(rv);
+}
+
+// This function is I/O intensive. It should only be called before applying
+// an update.
+void Classifier::TableRequest(nsACString& aResult) {
+ MOZ_ASSERT(!NS_IsMainThread(),
+ "TableRequest must be called on the classifier worker thread.");
+
+ // This function and all disk I/O are guaranteed to occur
+ // on the same thread so we don't need to add a lock around.
+ if (!mIsTableRequestResultOutdated) {
+ aResult = mTableRequestResult;
+ return;
+ }
+
+ // We reset tables failed to load here; not just tables are corrupted.
+ // It is because this is a safer way to ensure Safe Browsing databases
+ // can be recovered from any bad situations.
+ nsTArray<nsCString> failedTables;
+
+ // Load meta data from *.sbstore files in the root directory.
+ // Specifically for v4 tables.
+ nsCString v2Metadata;
+ nsresult rv = LoadHashStore(mRootStoreDirectory, v2Metadata, failedTables);
+ if (NS_SUCCEEDED(rv)) {
+ aResult.Append(v2Metadata);
+ }
+
+ // Load meta data from *.metadata files in the root directory.
+ // Specifically for v4 tables.
+ nsCString v4Metadata;
+ rv = LoadMetadata(mRootStoreDirectory, v4Metadata, failedTables);
+ if (NS_SUCCEEDED(rv)) {
+ aResult.Append(v4Metadata);
+ }
+
+ // Clear data for tables that we failed to open, a full update should
+ // be requested for those tables.
+ if (failedTables.Length() != 0) {
+ LOG(("Reset tables failed to open before applying an update"));
+ ResetTables(Clear_All, failedTables);
+ }
+
+ // Update the TableRequest result in-memory cache.
+ mTableRequestResult = aResult;
+ mIsTableRequestResultOutdated = false;
+}
+
+nsresult Classifier::CheckURIFragments(
+ const nsTArray<nsCString>& aSpecFragments, const nsACString& aTable,
+ LookupResultArray& aResults) {
+ // A URL can form up to 30 different fragments
+ MOZ_ASSERT(aSpecFragments.Length() != 0);
+ MOZ_ASSERT(aSpecFragments.Length() <=
+ (MAX_HOST_COMPONENTS * (MAX_PATH_COMPONENTS + 2)));
+
+ if (LOG_ENABLED()) {
+ uint32_t urlIdx = 0;
+ for (uint32_t i = 1; i < aSpecFragments.Length(); i++) {
+ if (aSpecFragments[urlIdx].Length() < aSpecFragments[i].Length()) {
+ urlIdx = i;
+ }
+ }
+ LOG(("Checking table %s, URL is %s", aTable.BeginReading(),
+ aSpecFragments[urlIdx].get()));
+ }
+
+ RefPtr<LookupCache> cache = GetLookupCache(aTable);
+ if (NS_WARN_IF(!cache)) {
+ return NS_ERROR_FAILURE;
+ }
+
+ // Now check each lookup fragment against the entries in the DB.
+ for (uint32_t i = 0; i < aSpecFragments.Length(); i++) {
+ Completion lookupHash;
+ lookupHash.FromPlaintext(aSpecFragments[i]);
+
+ bool has, confirmed;
+ uint32_t matchLength;
+
+ nsresult rv = cache->Has(lookupHash, &has, &matchLength, &confirmed);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (has) {
+ RefPtr<LookupResult> result = new LookupResult;
+ aResults.AppendElement(result);
+
+ if (LOG_ENABLED()) {
+ nsAutoCString checking;
+ lookupHash.ToHexString(checking);
+ LOG(("Found a result in fragment %s, hash %s (%X)",
+ aSpecFragments[i].get(), checking.get(), lookupHash.ToUint32()));
+ LOG(("Result %s, match %d-bytes prefix",
+ confirmed ? "confirmed." : "Not confirmed.", matchLength));
+ }
+
+ result->hash.complete = lookupHash;
+ result->mConfirmed = confirmed;
+ result->mTableName.Assign(cache->TableName());
+ result->mPartialHashLength = confirmed ? COMPLETE_SIZE : matchLength;
+ result->mProtocolV2 = LookupCache::Cast<LookupCacheV2>(cache);
+ }
+ }
+
+ return NS_OK;
+}
+
+static nsresult SwapDirectoryContent(nsIFile* aDir1, nsIFile* aDir2,
+ nsIFile* aParentDir, nsIFile* aTempDir) {
+ // Pre-condition: |aDir1| and |aDir2| are directory and their parent
+ // are both |aParentDir|.
+ //
+ // Post-condition: The locations where aDir1 and aDir2 point to will not
+ // change but their contents will be exchanged. If we failed
+ // to swap their content, everything will be rolled back.
+
+ nsAutoCString tempDirName;
+ aTempDir->GetNativeLeafName(tempDirName);
+
+ nsresult rv;
+
+ nsAutoCString dirName1, dirName2;
+ aDir1->GetNativeLeafName(dirName1);
+ aDir2->GetNativeLeafName(dirName2);
+
+ LOG(("Swapping directories %s and %s...", dirName1.get(), dirName2.get()));
+
+ // 1. Rename "dirName1" to "temp"
+ rv = aDir1->RenameToNative(nullptr, tempDirName);
+ if (NS_FAILED(rv)) {
+ LOG(("Unable to rename %s to %s", dirName1.get(), tempDirName.get()));
+ return rv; // Nothing to roll back.
+ }
+
+ // 1.1. Create a handle for temp directory. This is required since
+ // |nsIFile.rename| will not change the location where the
+ // object points to.
+ nsCOMPtr<nsIFile> tempDirectory;
+ rv = aParentDir->Clone(getter_AddRefs(tempDirectory));
+ rv = tempDirectory->AppendNative(tempDirName);
+
+ // 2. Rename "dirName2" to "dirName1".
+ rv = aDir2->RenameToNative(nullptr, dirName1);
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to rename %s to %s. Rename temp directory back to %s",
+ dirName2.get(), dirName1.get(), dirName1.get()));
+ nsresult rbrv = tempDirectory->RenameToNative(nullptr, dirName1);
+ NS_ENSURE_SUCCESS(rbrv, rbrv);
+ return rv;
+ }
+
+ // 3. Rename "temp" to "dirName2".
+ rv = tempDirectory->RenameToNative(nullptr, dirName2);
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to rename temp directory to %s. ", dirName2.get()));
+ // We've done (1) renaming "dir1 to temp" and
+ // (2) renaming "dir2 to dir1"
+ // so the rollback is
+ // (1) renaming "dir1 to dir2" and
+ // (2) renaming "temp to dir1"
+ nsresult rbrv; // rollback result
+ rbrv = aDir1->RenameToNative(nullptr, dirName2);
+ NS_ENSURE_SUCCESS(rbrv, rbrv);
+ rbrv = tempDirectory->RenameToNative(nullptr, dirName1);
+ NS_ENSURE_SUCCESS(rbrv, rbrv);
+ return rv;
+ }
+
+ return rv;
+}
+
+void Classifier::RemoveUpdateIntermediaries() {
+ // Remove old LookupCaches.
+ mNewLookupCaches.Clear();
+
+ // Remove the "old" directory. (despite its looking-new name)
+ if (NS_FAILED(mUpdatingDirectory->Remove(true))) {
+ // If the directory is locked from removal for some reason,
+ // we will fail here and it doesn't matter until the next
+ // update. (the next udpate will fail due to the removable
+ // "safebrowsing-udpating" directory.)
+ LOG(("Failed to remove updating directory."));
+ }
+}
+
+void Classifier::CopyAndInvalidateFullHashCache() {
+ MOZ_ASSERT(!OnUpdateThread(),
+ "CopyAndInvalidateFullHashCache cannot be called on update thread "
+ "since it mutates mLookupCaches which is only safe on "
+ "worker thread.");
+
+ // New lookup caches are built from disk, data likes cache which is
+ // generated online won't exist. We have to manually copy cache from
+ // old LookupCache to new LookupCache.
+ for (auto& newCache : mNewLookupCaches) {
+ for (auto& oldCache : mLookupCaches) {
+ if (oldCache->TableName() == newCache->TableName()) {
+ newCache->CopyFullHashCache(oldCache);
+ break;
+ }
+ }
+ }
+
+ // Clear cache when update.
+ // Invalidate cache entries in CopyAndInvalidateFullHashCache because only
+ // at this point we will have cache data in LookupCache.
+ for (auto& newCache : mNewLookupCaches) {
+ newCache->InvalidateExpiredCacheEntries();
+ }
+}
+
+void Classifier::MergeNewLookupCaches() {
+ MOZ_ASSERT(!OnUpdateThread(),
+ "MergeNewLookupCaches cannot be called on update thread "
+ "since it mutates mLookupCaches which is only safe on "
+ "worker thread.");
+
+ for (auto& newCache : mNewLookupCaches) {
+ // For each element in mNewLookCaches, it will be swapped with
+ // - An old cache in mLookupCache with the same table name or
+ // - nullptr (mLookupCache will be expaned) otherwise.
+ size_t swapIndex = 0;
+ for (; swapIndex < mLookupCaches.Length(); swapIndex++) {
+ if (mLookupCaches[swapIndex]->TableName() == newCache->TableName()) {
+ break;
+ }
+ }
+ if (swapIndex == mLookupCaches.Length()) {
+ mLookupCaches.AppendElement(nullptr);
+ }
+
+ std::swap(mLookupCaches[swapIndex], newCache);
+ mLookupCaches[swapIndex]->UpdateRootDirHandle(mRootStoreDirectory);
+ }
+
+ // At this point, mNewLookupCaches's length remains the same but
+ // will contain either old cache (override) or nullptr (append).
+}
+
+nsresult Classifier::SwapInNewTablesAndCleanup() {
+ nsresult rv;
+
+ // Step 1. Swap in on-disk tables. The idea of using "safebrowsing-backup"
+ // as the intermediary directory is we can get databases recovered if
+ // crash occurred in any step of the swap. (We will recover from
+ // "safebrowsing-backup" in OpenDb().)
+ rv = SwapDirectoryContent(mUpdatingDirectory, // contains new tables
+ mRootStoreDirectory, // contains old tables
+ mCacheDirectory, // common parent dir
+ mBackupDirectory); // intermediary dir for swap
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to swap in on-disk tables."));
+ RemoveUpdateIntermediaries();
+ return rv;
+ }
+
+ // Step 2. Merge mNewLookupCaches into mLookupCaches. The outdated
+ // LookupCaches will be stored in mNewLookupCaches and be cleaned
+ // up later.
+ MergeNewLookupCaches();
+
+ // Step 3. Re-generate active tables based on on-disk tables.
+ rv = RegenActiveTables();
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to re-generate active tables!"));
+ }
+
+ // Step 4. Clean up intermediaries for update.
+ RemoveUpdateIntermediaries();
+
+ // Step 5. Invalidate cached tableRequest request.
+ mIsTableRequestResultOutdated = true;
+
+ LOG(("Done swap in updated tables."));
+
+ return rv;
+}
+
+void Classifier::FlushAndDisableAsyncUpdate() {
+ LOG(("Classifier::FlushAndDisableAsyncUpdate [%p, %p]", this,
+ mUpdateThread.get()));
+
+ if (!mUpdateThread) {
+ LOG(("Async update has been disabled."));
+ return;
+ }
+
+ mUpdateThread->Shutdown();
+ mUpdateThread = nullptr;
+}
+
+nsresult Classifier::AsyncApplyUpdates(const TableUpdateArray& aUpdates,
+ const AsyncUpdateCallback& aCallback) {
+ LOG(("Classifier::AsyncApplyUpdates"));
+
+ if (!mUpdateThread) {
+ LOG(("Async update has already been disabled."));
+ return NS_ERROR_FAILURE;
+ }
+
+ // Caller thread | Update thread
+ // --------------------------------------------------------
+ // | ApplyUpdatesBackground
+ // (processing other task) | (bg-update done. ping back to caller
+ // thread) (processing other task) | idle... ApplyUpdatesForeground |
+ // callback |
+
+ MOZ_ASSERT(mNewLookupCaches.IsEmpty(),
+ "There should be no leftovers from a previous update.");
+
+ mUpdateInterrupted = false;
+ nsresult rv =
+ mRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectoryForUpdate));
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to clone mRootStoreDirectory for update."));
+ return rv;
+ }
+
+ nsCOMPtr<nsIThread> callerThread = NS_GetCurrentThread();
+ MOZ_ASSERT(!OnUpdateThread());
+
+ RefPtr<Classifier> self = this;
+ nsCOMPtr<nsIRunnable> bgRunnable = NS_NewRunnableFunction(
+ "safebrowsing::Classifier::AsyncApplyUpdates",
+ [self, aUpdates = aUpdates.Clone(), aCallback, callerThread]() mutable {
+ MOZ_ASSERT(self->OnUpdateThread(), "MUST be on update thread");
+
+ nsresult bgRv;
+ nsTArray<nsCString> failedTableNames;
+
+ TableUpdateArray updates;
+
+ // Make a copy of the array since we'll be removing entries as
+ // we process them on the background thread.
+ if (updates.AppendElements(std::move(aUpdates), fallible)) {
+ LOG(("Step 1. ApplyUpdatesBackground on update thread."));
+ bgRv = self->ApplyUpdatesBackground(updates, failedTableNames);
+ } else {
+ LOG(
+ ("Step 1. Not enough memory to run ApplyUpdatesBackground on "
+ "update thread."));
+ bgRv = NS_ERROR_OUT_OF_MEMORY;
+ }
+
+ // Classifier is created in the worker thread and it has to be released
+ // in the worker thread(because of the constrain that LazyIdelThread has
+ // to be created and released in the same thread). We transfer the
+ // ownership to the caller thread here to gurantee that we don't release
+ // it in the udpate thread.
+ nsCOMPtr<nsIRunnable> fgRunnable = NS_NewRunnableFunction(
+ "safebrowsing::Classifier::AsyncApplyUpdates",
+ [self = std::move(self), aCallback, bgRv,
+ failedTableNames = std::move(failedTableNames),
+ callerThread]() mutable {
+ RefPtr<Classifier> classifier = std::move(self);
+
+ MOZ_ASSERT(NS_GetCurrentThread() == callerThread,
+ "MUST be on caller thread");
+
+ LOG(("Step 2. ApplyUpdatesForeground on caller thread"));
+ nsresult rv =
+ classifier->ApplyUpdatesForeground(bgRv, failedTableNames);
+
+ LOG(("Step 3. Updates applied! Fire callback."));
+ aCallback(rv);
+ });
+
+ callerThread->Dispatch(fgRunnable, NS_DISPATCH_NORMAL);
+ });
+
+ return mUpdateThread->Dispatch(bgRunnable, NS_DISPATCH_NORMAL);
+}
+
+nsresult Classifier::ApplyUpdatesBackground(
+ TableUpdateArray& aUpdates, nsTArray<nsCString>& aFailedTableNames) {
+ // |mUpdateInterrupted| is guaranteed to have been unset.
+ // If |mUpdateInterrupted| is set at any point, Reset() must have
+ // been called then we need to interrupt the update process.
+ // We only add checkpoints for non-trivial tasks.
+
+ if (aUpdates.IsEmpty()) {
+ return NS_OK;
+ }
+
+ nsUrlClassifierUtils* urlUtil = nsUrlClassifierUtils::GetInstance();
+ if (NS_WARN_IF(!urlUtil)) {
+ return NS_ERROR_FAILURE;
+ }
+
+ nsCString provider;
+ // Assume all TableUpdate objects should have the same provider.
+ urlUtil->GetTelemetryProvider(aUpdates[0]->TableName(), provider);
+
+ Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_CL_KEYED_UPDATE_TIME>
+ keyedTimer(provider);
+
+ PRIntervalTime clockStart = 0;
+ if (LOG_ENABLED()) {
+ clockStart = PR_IntervalNow();
+ }
+
+ nsresult rv;
+
+ // Check point 1: Copying files takes time so we check ShouldAbort()
+ // inside CopyInUseDirForUpdate().
+ rv = CopyInUseDirForUpdate(); // i.e. mUpdatingDirectory will be setup.
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to copy in-use directory for update."));
+ return (rv == NS_ERROR_ABORT) ? NS_OK : rv;
+ }
+
+ LOG(("Applying %zu table updates.", aUpdates.Length()));
+
+ for (uint32_t i = 0; i < aUpdates.Length(); i++) {
+ RefPtr<const TableUpdate> update = aUpdates[i];
+ if (!update) {
+ // Previous UpdateHashStore() may have consumed this update..
+ continue;
+ }
+
+ // Run all updates for one table
+ nsAutoCString updateTable(update->TableName());
+
+ // Check point 2: Processing downloaded data takes time.
+ if (ShouldAbort()) {
+ LOG(("Update is interrupted. Stop building new tables."));
+ return NS_OK;
+ }
+
+ // Will update the mirrored in-memory and on-disk databases.
+ if (TableUpdate::Cast<TableUpdateV2>(update)) {
+ rv = UpdateHashStore(aUpdates, updateTable);
+ } else {
+ rv = UpdateTableV4(aUpdates, updateTable);
+ }
+
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ LOG(("Failed to update table: %s", updateTable.get()));
+ // We don't quit the updating process immediately when we discover
+ // a failure. Instead, we continue to apply updates to the
+ // remaining tables to find other tables which may also fail to
+ // apply an update. This help us reset all the corrupted tables
+ // within a single update.
+ // Note that changes that result from successful updates don't take
+ // effect after the updating process is finished. This is because
+ // when an error occurs during the updating process, we ignore all
+ // changes that have happened during the udpating process.
+ aFailedTableNames.AppendElement(updateTable);
+ continue;
+ }
+ }
+
+ if (!aFailedTableNames.IsEmpty()) {
+ RemoveUpdateIntermediaries();
+ return NS_ERROR_FAILURE;
+ }
+
+ if (LOG_ENABLED()) {
+ PRIntervalTime clockEnd = PR_IntervalNow();
+ LOG(("update took %dms\n",
+ PR_IntervalToMilliseconds(clockEnd - clockStart)));
+ }
+
+ return rv;
+}
+
+nsresult Classifier::ApplyUpdatesForeground(
+ nsresult aBackgroundRv, const nsTArray<nsCString>& aFailedTableNames) {
+ if (ShouldAbort()) {
+ LOG(("Update is interrupted! Just remove update intermediaries."));
+ RemoveUpdateIntermediaries();
+ return NS_OK;
+ }
+ if (NS_SUCCEEDED(aBackgroundRv)) {
+ // Copy and Invalidate fullhash cache here because this call requires
+ // mLookupCaches which is only available on work-thread
+ CopyAndInvalidateFullHashCache();
+
+ return SwapInNewTablesAndCleanup();
+ }
+ if (NS_ERROR_OUT_OF_MEMORY != aBackgroundRv) {
+ ResetTables(Clear_All, aFailedTableNames);
+ }
+ return aBackgroundRv;
+}
+
+nsresult Classifier::ApplyFullHashes(ConstTableUpdateArray& aUpdates) {
+ MOZ_ASSERT(!OnUpdateThread(),
+ "ApplyFullHashes() MUST NOT be called on update thread");
+ MOZ_ASSERT(
+ !NS_IsMainThread(),
+ "ApplyFullHashes() must be called on the classifier worker thread.");
+
+ LOG(("Applying %zu table gethashes.", aUpdates.Length()));
+
+ for (uint32_t i = 0; i < aUpdates.Length(); i++) {
+ nsresult rv = UpdateCache(aUpdates[i]);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ aUpdates[i] = nullptr;
+ }
+
+ return NS_OK;
+}
+
+void Classifier::GetCacheInfo(const nsACString& aTable,
+ nsIUrlClassifierCacheInfo** aCache) {
+ RefPtr<const LookupCache> lookupCache = GetLookupCache(aTable);
+ if (!lookupCache) {
+ return;
+ }
+
+ lookupCache->GetCacheInfo(aCache);
+}
+
+void Classifier::DropStores() {
+ // See the comment in Classifier::Close() before adding anything here.
+ mLookupCaches.Clear();
+}
+
+nsresult Classifier::RegenActiveTables() {
+ if (ShouldAbort()) {
+ return NS_OK; // nothing to do, the classifier is done
+ }
+
+ mActiveTablesCache.Clear();
+
+ // The extension of V2 and V4 prefix files is .vlpset
+ // We still check .pset here for legacy load.
+ nsTArray<nsCString> exts = {".vlpset"_ns, ".pset"_ns};
+ nsTArray<nsCString> foundTables;
+ nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, foundTables);
+ Unused << NS_WARN_IF(NS_FAILED(rv));
+
+ // We don't have test tables on disk, add Moz built-in entries here
+ rv = AddMozEntries(foundTables);
+ Unused << NS_WARN_IF(NS_FAILED(rv));
+
+ for (const auto& table : foundTables) {
+ RefPtr<const LookupCache> lookupCache = GetLookupCache(table);
+ if (!lookupCache) {
+ LOG(("Inactive table (no cache): %s", table.get()));
+ continue;
+ }
+
+ if (!lookupCache->IsPrimed()) {
+ LOG(("Inactive table (cache not primed): %s", table.get()));
+ continue;
+ }
+
+ LOG(("Active %s table: %s",
+ LookupCache::Cast<const LookupCacheV4>(lookupCache) ? "v4" : "v2",
+ table.get()));
+
+ mActiveTablesCache.AppendElement(table);
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::AddMozEntries(nsTArray<nsCString>& aTables) {
+ nsTArray<nsLiteralCString> tables = {
+ "moztest-phish-simple"_ns, "moztest-malware-simple"_ns,
+ "moztest-unwanted-simple"_ns, "moztest-harmful-simple"_ns,
+ "moztest-track-simple"_ns, "moztest-trackwhite-simple"_ns,
+ "moztest-block-simple"_ns,
+ };
+
+ for (const auto& table : tables) {
+ RefPtr<LookupCache> c = GetLookupCache(table, false);
+ RefPtr<LookupCacheV2> lookupCache = LookupCache::Cast<LookupCacheV2>(c);
+ if (!lookupCache || lookupCache->IsPrimed()) {
+ continue;
+ }
+
+ aTables.AppendElement(table);
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::ScanStoreDir(nsIFile* aDirectory,
+ const nsTArray<nsCString>& aExtensions,
+ nsTArray<nsCString>& aTables) {
+ nsCOMPtr<nsIDirectoryEnumerator> entries;
+ nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries));
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ nsCOMPtr<nsIFile> file;
+ while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) &&
+ file) {
+ // If |file| is a directory, recurse to find its entries as well.
+ bool isDirectory;
+ if (NS_FAILED(file->IsDirectory(&isDirectory))) {
+ continue;
+ }
+ if (isDirectory) {
+ ScanStoreDir(file, aExtensions, aTables);
+ continue;
+ }
+
+ nsAutoCString leafName;
+ rv = file->GetNativeLeafName(leafName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ for (const auto& ext : aExtensions) {
+ if (StringEndsWith(leafName, ext)) {
+ aTables.AppendElement(
+ Substring(leafName, 0, leafName.Length() - strlen(ext.get())));
+ break;
+ }
+ }
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::ActiveTables(nsTArray<nsCString>& aTables) const {
+ aTables = mActiveTablesCache.Clone();
+ return NS_OK;
+}
+
+nsresult Classifier::CleanToDelete() {
+ bool exists;
+ nsresult rv = mToDeleteDirectory->Exists(&exists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (exists) {
+ rv = mToDeleteDirectory->Remove(true);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ return NS_OK;
+}
+
+#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
+
+already_AddRefed<nsIFile> Classifier::GetFailedUpdateDirectroy() {
+ nsCString failedUpdatekDirName = STORE_DIRECTORY + nsCString("-failedupdate");
+
+ nsCOMPtr<nsIFile> failedUpdatekDirectory;
+ if (NS_FAILED(
+ mCacheDirectory->Clone(getter_AddRefs(failedUpdatekDirectory))) ||
+ NS_FAILED(failedUpdatekDirectory->AppendNative(failedUpdatekDirName))) {
+ LOG(("Failed to init failedUpdatekDirectory."));
+ return nullptr;
+ }
+
+ return failedUpdatekDirectory.forget();
+}
+
+nsresult Classifier::DumpRawTableUpdates(const nsACString& aRawUpdates) {
+ LOG(("Dumping raw table updates..."));
+
+ DumpFailedUpdate();
+
+ nsCOMPtr<nsIFile> failedUpdatekDirectory = GetFailedUpdateDirectroy();
+
+ // Create tableupdate.bin and dump raw table update data.
+ nsCOMPtr<nsIFile> rawTableUpdatesFile;
+ nsCOMPtr<nsIOutputStream> outputStream;
+ if (NS_FAILED(
+ failedUpdatekDirectory->Clone(getter_AddRefs(rawTableUpdatesFile))) ||
+ NS_FAILED(
+ rawTableUpdatesFile->AppendNative(nsCString("tableupdates.bin"))) ||
+ NS_FAILED(NS_NewLocalFileOutputStream(
+ getter_AddRefs(outputStream), rawTableUpdatesFile,
+ PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE))) {
+ LOG(("Failed to create file to dump raw table updates."));
+ return NS_ERROR_FAILURE;
+ }
+
+ // Write out the data.
+ uint32_t written;
+ nsresult rv = outputStream->Write(aRawUpdates.BeginReading(),
+ aRawUpdates.Length(), &written);
+ NS_ENSURE_SUCCESS(rv, rv);
+ NS_ENSURE_TRUE(written == aRawUpdates.Length(), NS_ERROR_FAILURE);
+
+ return rv;
+}
+
+nsresult Classifier::DumpFailedUpdate() {
+ LOG(("Dumping failed update..."));
+
+ nsCOMPtr<nsIFile> failedUpdatekDirectory = GetFailedUpdateDirectroy();
+
+ // Remove the "failed update" directory no matter it exists or not.
+ // Failure is fine because the directory may not exist.
+ failedUpdatekDirectory->Remove(true);
+
+ nsCString failedUpdatekDirName;
+ nsresult rv = failedUpdatekDirectory->GetNativeLeafName(failedUpdatekDirName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Copy the in-use directory to a clean "failed update" directory.
+ nsCOMPtr<nsIFile> inUseDirectory;
+ if (NS_FAILED(mRootStoreDirectory->Clone(getter_AddRefs(inUseDirectory))) ||
+ NS_FAILED(inUseDirectory->CopyToNative(nullptr, failedUpdatekDirName))) {
+ LOG(("Failed to move in-use to the \"failed update\" directory %s",
+ failedUpdatekDirName.get()));
+ return NS_ERROR_FAILURE;
+ }
+
+ return rv;
+}
+
+#endif // MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES
+
+/**
+ * This function copies the files one by one to the destination folder.
+ * Before copying a file, it checks ::ShouldAbort and returns
+ * NS_ERROR_ABORT if the flag is set.
+ */
+nsresult Classifier::CopyDirectoryInterruptible(nsCOMPtr<nsIFile>& aDestDir,
+ nsCOMPtr<nsIFile>& aSourceDir) {
+ nsCOMPtr<nsIDirectoryEnumerator> entries;
+ nsresult rv = aSourceDir->GetDirectoryEntries(getter_AddRefs(entries));
+ NS_ENSURE_SUCCESS(rv, rv);
+ MOZ_ASSERT(entries);
+
+ nsCOMPtr<nsIFile> source;
+ while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(source))) &&
+ source) {
+ if (ShouldAbort()) {
+ LOG(("Update is interrupted. Aborting the directory copy"));
+ return NS_ERROR_ABORT;
+ }
+
+ bool isDirectory;
+ rv = source->IsDirectory(&isDirectory);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (isDirectory) {
+ // If it is a directory, recursively copy the files inside the directory.
+ nsAutoCString leaf;
+ source->GetNativeLeafName(leaf);
+ MOZ_ASSERT(!leaf.IsEmpty());
+
+ nsCOMPtr<nsIFile> dest;
+ aDestDir->Clone(getter_AddRefs(dest));
+ dest->AppendNative(leaf);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = CopyDirectoryInterruptible(dest, source);
+ NS_ENSURE_SUCCESS(rv, rv);
+ } else {
+ rv = source->CopyToNative(aDestDir, ""_ns);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+ }
+
+ // If the destination directory doesn't exist in the end, it means that the
+ // source directory is empty, we should copy the directory here.
+ bool exist;
+ rv = aDestDir->Exists(&exist);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (!exist) {
+ rv = aDestDir->Create(nsIFile::DIRECTORY_TYPE, 0755);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::CopyInUseDirForUpdate() {
+ LOG(("Copy in-use directory content for update."));
+ if (ShouldAbort()) {
+ return NS_ERROR_UC_UPDATE_SHUTDOWNING;
+ }
+
+ // We copy everything from in-use directory to a temporary directory
+ // for updating.
+
+ // Remove the destination directory first (just in case) the do the copy.
+ mUpdatingDirectory->Remove(true);
+ if (!mRootStoreDirectoryForUpdate) {
+ LOG(("mRootStoreDirectoryForUpdate is null."));
+ return NS_ERROR_NULL_POINTER;
+ }
+
+ nsresult rv = CopyDirectoryInterruptible(mUpdatingDirectory,
+ mRootStoreDirectoryForUpdate);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ return NS_OK;
+}
+
+nsresult Classifier::RecoverBackups() {
+ bool backupExists;
+ nsresult rv = mBackupDirectory->Exists(&backupExists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (backupExists) {
+ // Remove the safebrowsing dir if it exists
+ nsCString storeDirName;
+ rv = mRootStoreDirectory->GetNativeLeafName(storeDirName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ bool storeExists;
+ rv = mRootStoreDirectory->Exists(&storeExists);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ if (storeExists) {
+ rv = mRootStoreDirectory->Remove(true);
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ // Move the backup to the store location
+ rv = mBackupDirectory->MoveToNative(nullptr, storeDirName);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // mBackupDirectory now points to storeDir, fix up.
+ rv = SetupPathNames();
+ NS_ENSURE_SUCCESS(rv, rv);
+ }
+
+ return NS_OK;
+}
+
+bool Classifier::CheckValidUpdate(TableUpdateArray& aUpdates,
+ const nsACString& aTable) {
+ // take the quick exit if there is no valid update for us
+ // (common case)
+ uint32_t validupdates = 0;
+
+ for (uint32_t i = 0; i < aUpdates.Length(); i++) {
+ RefPtr<const TableUpdate> update = aUpdates[i];
+ if (!update || !update->TableName().Equals(aTable)) {
+ continue;
+ }
+ if (update->Empty()) {
+ aUpdates[i] = nullptr;
+ continue;
+ }
+ validupdates++;
+ }
+
+ if (!validupdates) {
+ // This can happen if the update was only valid for one table.
+ return false;
+ }
+
+ return true;
+}
+
+nsCString Classifier::GetProvider(const nsACString& aTableName) {
+ nsUrlClassifierUtils* urlUtil = nsUrlClassifierUtils::GetInstance();
+ if (NS_WARN_IF(!urlUtil)) {
+ return ""_ns;
+ }
+
+ nsCString provider;
+ nsresult rv = urlUtil->GetProvider(aTableName, provider);
+
+ return NS_SUCCEEDED(rv) ? provider : ""_ns;
+}
+
+/*
+ * This will consume+delete updates from the passed nsTArray.
+ */
+nsresult Classifier::UpdateHashStore(TableUpdateArray& aUpdates,
+ const nsACString& aTable) {
+ if (ShouldAbort()) {
+ return NS_ERROR_UC_UPDATE_SHUTDOWNING;
+ }
+
+ LOG(("Classifier::UpdateHashStore(%s)", PromiseFlatCString(aTable).get()));
+
+ // moztest- tables don't support update because they are directly created
+ // in LookupCache. To test updates, use tables begin with "test-" instead.
+ // Also, recommend using 'test-' tables while writing testcases because
+ // it is more like the real world scenario.
+ MOZ_ASSERT(!nsUrlClassifierUtils::IsMozTestTable(aTable));
+
+ HashStore store(aTable, GetProvider(aTable), mUpdatingDirectory);
+
+ if (!CheckValidUpdate(aUpdates, store.TableName())) {
+ return NS_OK;
+ }
+
+ nsresult rv = store.Open();
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ rv = store.BeginUpdate();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // Read the part of the store that is (only) in the cache
+ RefPtr<LookupCacheV2> lookupCacheV2;
+ {
+ RefPtr<LookupCache> lookupCache =
+ GetLookupCacheForUpdate(store.TableName());
+ if (lookupCache) {
+ lookupCacheV2 = LookupCache::Cast<LookupCacheV2>(lookupCache);
+ }
+ }
+ if (!lookupCacheV2) {
+ return NS_ERROR_UC_UPDATE_TABLE_NOT_FOUND;
+ }
+
+ FallibleTArray<uint32_t> AddPrefixHashes;
+ FallibleTArray<nsCString> AddCompletesHashes;
+ rv = lookupCacheV2->GetPrefixes(AddPrefixHashes, AddCompletesHashes);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ rv = store.AugmentAdds(AddPrefixHashes, AddCompletesHashes);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ AddPrefixHashes.Clear();
+ AddCompletesHashes.Clear();
+
+ uint32_t applied = 0;
+
+ for (uint32_t i = 0; i < aUpdates.Length(); i++) {
+ RefPtr<TableUpdate> update = aUpdates[i];
+ if (!update || !update->TableName().Equals(store.TableName())) {
+ continue;
+ }
+
+ RefPtr<TableUpdateV2> updateV2 = TableUpdate::Cast<TableUpdateV2>(update);
+ NS_ENSURE_TRUE(updateV2, NS_ERROR_UC_UPDATE_UNEXPECTED_VERSION);
+
+ rv = store.ApplyUpdate(updateV2);
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ applied++;
+
+ LOG(("Applied update to table %s:", store.TableName().get()));
+ LOG((" %d add chunks", updateV2->AddChunks().Length()));
+ LOG((" %zu add prefixes", updateV2->AddPrefixes().Length()));
+ LOG((" %zu add completions", updateV2->AddCompletes().Length()));
+ LOG((" %d sub chunks", updateV2->SubChunks().Length()));
+ LOG((" %zu sub prefixes", updateV2->SubPrefixes().Length()));
+ LOG((" %zu sub completions", updateV2->SubCompletes().Length()));
+ LOG((" %d add expirations", updateV2->AddExpirations().Length()));
+ LOG((" %d sub expirations", updateV2->SubExpirations().Length()));
+
+ aUpdates[i] = nullptr;
+ }
+
+ LOG(("Applied %d update(s) to %s.", applied, store.TableName().get()));
+
+ rv = store.Rebuild();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ LOG(("Table %s now has:", store.TableName().get()));
+ LOG((" %d add chunks", store.AddChunks().Length()));
+ LOG((" %zu add prefixes", store.AddPrefixes().Length()));
+ LOG((" %zu add completions", store.AddCompletes().Length()));
+ LOG((" %d sub chunks", store.SubChunks().Length()));
+ LOG((" %zu sub prefixes", store.SubPrefixes().Length()));
+ LOG((" %zu sub completions", store.SubCompletes().Length()));
+
+ rv = store.WriteFile();
+ NS_ENSURE_SUCCESS(rv, rv);
+
+ // At this point the store is updated and written out to disk, but
+ // the data is still in memory. Build our quick-lookup table here.
+ rv = lookupCacheV2->Build(store.AddPrefixes(), store.AddCompletes());
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_BUILD_PREFIX_FAILURE);
+
+ rv = lookupCacheV2->WriteFile();
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK);
+
+ LOG(("Successfully updated %s", store.TableName().get()));
+
+ return NS_OK;
+}
+
+nsresult Classifier::UpdateTableV4(TableUpdateArray& aUpdates,
+ const nsACString& aTable) {
+ MOZ_ASSERT(!NS_IsMainThread(),
+ "UpdateTableV4 must be called on the classifier worker thread.");
+ if (ShouldAbort()) {
+ return NS_ERROR_UC_UPDATE_SHUTDOWNING;
+ }
+
+ // moztest- tables don't support update, see comment in UpdateHashStore.
+ MOZ_ASSERT(!nsUrlClassifierUtils::IsMozTestTable(aTable));
+
+ LOG(("Classifier::UpdateTableV4(%s)", PromiseFlatCString(aTable).get()));
+
+ if (!CheckValidUpdate(aUpdates, aTable)) {
+ return NS_OK;
+ }
+
+ RefPtr<LookupCacheV4> lookupCacheV4;
+ {
+ RefPtr<LookupCache> lookupCache = GetLookupCacheForUpdate(aTable);
+ if (lookupCache) {
+ lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(lookupCache);
+ }
+ }
+ if (!lookupCacheV4) {
+ return NS_ERROR_UC_UPDATE_TABLE_NOT_FOUND;
+ }
+
+ nsresult rv = NS_OK;
+
+ // If there are multiple updates for the same table, prefixes1 & prefixes2
+ // will act as input and output in turn to reduce memory copy overhead.
+ PrefixStringMap prefixes1, prefixes2;
+ PrefixStringMap* input = &prefixes1;
+ PrefixStringMap* output = &prefixes2;
+
+ RefPtr<const TableUpdateV4> lastAppliedUpdate = nullptr;
+ for (uint32_t i = 0; i < aUpdates.Length(); i++) {
+ RefPtr<TableUpdate> update = aUpdates[i];
+ if (!update || !update->TableName().Equals(aTable)) {
+ continue;
+ }
+
+ RefPtr<TableUpdateV4> updateV4 = TableUpdate::Cast<TableUpdateV4>(update);
+ NS_ENSURE_TRUE(updateV4, NS_ERROR_UC_UPDATE_UNEXPECTED_VERSION);
+
+ if (updateV4->IsFullUpdate()) {
+ input->Clear();
+ output->Clear();
+ rv = lookupCacheV4->ApplyUpdate(updateV4, *input, *output);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+ } else {
+ // If both prefix sets are empty, this means we are doing a partial update
+ // without a prior full/partial update in the loop. In this case we should
+ // get prefixes from the lookup cache first.
+ if (prefixes1.IsEmpty() && prefixes2.IsEmpty()) {
+ lookupCacheV4->GetPrefixes(prefixes1);
+ } else {
+ MOZ_ASSERT(prefixes1.IsEmpty() ^ prefixes2.IsEmpty());
+
+ // When there are multiple partial updates, input should always point
+ // to the non-empty prefix set(filled by previous full/partial update).
+ // output should always point to the empty prefix set.
+ input = prefixes1.IsEmpty() ? &prefixes2 : &prefixes1;
+ output = prefixes1.IsEmpty() ? &prefixes1 : &prefixes2;
+ }
+
+ rv = lookupCacheV4->ApplyUpdate(updateV4, *input, *output);
+ if (NS_FAILED(rv)) {
+ return rv;
+ }
+
+ input->Clear();
+ }
+
+ // Keep track of the last applied update.
+ lastAppliedUpdate = updateV4;
+
+ aUpdates[i] = nullptr;
+ }
+
+ rv = lookupCacheV4->Build(*output);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_BUILD_PREFIX_FAILURE);
+
+ rv = lookupCacheV4->WriteFile();
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK);
+
+ if (lastAppliedUpdate) {
+ LOG(("Write meta data of the last applied update."));
+ rv = lookupCacheV4->WriteMetadata(lastAppliedUpdate);
+ NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK);
+ }
+
+ LOG(("Successfully updated %s\n", PromiseFlatCString(aTable).get()));
+
+ return NS_OK;
+}
+
+nsresult Classifier::UpdateCache(RefPtr<const TableUpdate> aUpdate) {
+ if (!aUpdate) {
+ return NS_OK;
+ }
+
+ nsAutoCString table(aUpdate->TableName());
+ LOG(("Classifier::UpdateCache(%s)", table.get()));
+
+ RefPtr<LookupCache> lookupCache = GetLookupCache(table);
+ if (!lookupCache) {
+ return NS_ERROR_FAILURE;
+ }
+
+ RefPtr<LookupCacheV2> lookupV2 =
+ LookupCache::Cast<LookupCacheV2>(lookupCache);
+ if (lookupV2) {
+ RefPtr<const TableUpdateV2> updateV2 =
+ TableUpdate::Cast<TableUpdateV2>(aUpdate);
+ lookupV2->AddGethashResultToCache(updateV2->AddCompletes(),
+ updateV2->MissPrefixes());
+ } else {
+ RefPtr<LookupCacheV4> lookupV4 =
+ LookupCache::Cast<LookupCacheV4>(lookupCache);
+ if (!lookupV4) {
+ return NS_ERROR_FAILURE;
+ }
+
+ RefPtr<const TableUpdateV4> updateV4 =
+ TableUpdate::Cast<TableUpdateV4>(aUpdate);
+ lookupV4->AddFullHashResponseToCache(updateV4->FullHashResponse());
+ }
+
+#if defined(DEBUG)
+ lookupCache->DumpCache();
+#endif
+
+ return NS_OK;
+}
+
+RefPtr<LookupCache> Classifier::GetLookupCache(const nsACString& aTable,
+ bool aForUpdate) {
+ // GetLookupCache(aForUpdate==true) can only be called on update thread.
+ MOZ_ASSERT_IF(aForUpdate, OnUpdateThread());
+
+ LookupCacheArray& lookupCaches =
+ aForUpdate ? mNewLookupCaches : mLookupCaches;
+ auto& rootStoreDirectory =
+ aForUpdate ? mUpdatingDirectory : mRootStoreDirectory;
+
+ for (auto c : lookupCaches) {
+ if (c->TableName().Equals(aTable)) {
+ return c;
+ }
+ }
+
+ // We don't want to create lookupcache when shutdown is already happening.
+ if (ShouldAbort()) {
+ return nullptr;
+ }
+
+ // TODO : Bug 1302600, It would be better if we have a more general non-main
+ // thread method to convert table name to protocol version. Currently
+ // we can only know this by checking if the table name ends with
+ // '-proto'.
+ RefPtr<LookupCache> cache;
+ nsCString provider = GetProvider(aTable);
+
+ // Google requests SafeBrowsing related feature should only be enabled when
+ // the databases are update-to-date. Since we disable Safe Browsing update in
+ // Safe Mode, ignore tables provided by Google to ensure we don't show
+ // outdated warnings.
+ if (nsUrlClassifierUtils::IsInSafeMode()) {
+ if (provider.EqualsASCII("google") || provider.EqualsASCII("google4")) {
+ return nullptr;
+ }
+ }
+
+ if (StringEndsWith(aTable, "-proto"_ns)) {
+ cache = new LookupCacheV4(aTable, provider, rootStoreDirectory);
+ } else {
+ cache = new LookupCacheV2(aTable, provider, rootStoreDirectory);
+ }
+
+ nsresult rv = cache->Init();
+ if (NS_FAILED(rv)) {
+ return nullptr;
+ }
+ rv = cache->Open();
+ if (NS_SUCCEEDED(rv)) {
+ lookupCaches.AppendElement(cache);
+ return cache;
+ }
+
+ // At this point we failed to open LookupCache.
+ //
+ // GetLookupCache for update and for other usage will run on update thread
+ // and worker thread respectively (Bug 1339760). Removing stuff only in
+ // their own realms potentially increases the concurrency.
+
+ if (aForUpdate) {
+ // Remove intermediaries no matter if it's due to file corruption or not.
+ RemoveUpdateIntermediaries();
+ return nullptr;
+ }
+
+ // Non-update case.
+ if (rv == NS_ERROR_FILE_CORRUPTED) {
+ // Remove all the on-disk data when the table's prefix file is corrupted.
+ LOG(("Failed to get prefixes from file for table %s, delete on-disk data!",
+ aTable.BeginReading()));
+
+ DeleteTables(mRootStoreDirectory, nsTArray<nsCString>{nsCString(aTable)});
+ }
+ return nullptr;
+}
+
+nsresult Classifier::ReadNoiseEntries(const Prefix& aPrefix,
+ const nsACString& aTableName,
+ uint32_t aCount,
+ PrefixArray& aNoiseEntries) {
+ RefPtr<LookupCache> cache = GetLookupCache(aTableName);
+ if (!cache) {
+ return NS_ERROR_FAILURE;
+ }
+
+ RefPtr<LookupCacheV2> cacheV2 = LookupCache::Cast<LookupCacheV2>(cache);
+ RefPtr<LookupCacheV4> cacheV4 = LookupCache::Cast<LookupCacheV4>(cache);
+ MOZ_ASSERT_IF(cacheV2, !cacheV4);
+
+ if (cache->PrefixLength() == 0) {
+ NS_WARNING("Could not find prefix in PrefixSet during noise lookup");
+ return NS_ERROR_FAILURE;
+ }
+
+ // We do not want to simply pick random prefixes, because this would allow
+ // averaging out the noise by analysing the traffic from Firefox users.
+ // Instead, we ensure the 'noise' is the same for the same prefix by seeding
+ // the random number generator with the prefix. We prefer not to use rand()
+ // which isn't thread safe, and the reseeding of which could trip up other
+ // parts othe code that expect actual random numbers.
+ // Here we use a simple LCG (Linear Congruential Generator) to generate
+ // random numbers. We seed the LCG with the prefix we are generating noise
+ // for.
+ // http://en.wikipedia.org/wiki/Linear_congruential_generator
+
+ uint32_t m = cache->PrefixLength();
+ uint32_t a = aCount % m;
+ uint32_t idx = aPrefix.ToUint32() % m;
+
+ for (size_t i = 0; i < aCount; i++) {
+ idx = (a * idx + a) % m;
+
+ uint32_t hash;
+
+ nsresult rv;
+ if (cacheV2) {
+ rv = cacheV2->GetPrefixByIndex(idx, &hash);
+ } else {
+ // We don't add noises for variable length prefix because of simplicity,
+ // so we will only get fixed length prefix (4 bytes).
+ rv = cacheV4->GetFixedLengthPrefixByIndex(idx, &hash);
+ }
+
+ if (NS_FAILED(rv)) {
+ NS_WARNING(
+ "Could not find the target prefix in PrefixSet during noise lookup");
+ return NS_ERROR_FAILURE;
+ }
+
+ Prefix newPrefix;
+ // In the case V4 little endian, we did swapping endian when converting from
+ // char* to int, should revert endian to make sure we will send hex string
+ // correctly See https://bugzilla.mozilla.org/show_bug.cgi?id=1283007#c23
+ if (!cacheV2 && !bool(MOZ_BIG_ENDIAN())) {
+ hash = NativeEndian::swapFromBigEndian(hash);
+ }
+
+ newPrefix.FromUint32(hash);
+ if (newPrefix != aPrefix) {
+ aNoiseEntries.AppendElement(newPrefix);
+ }
+ }
+
+ return NS_OK;
+}
+
+nsresult Classifier::LoadHashStore(nsIFile* aDirectory, nsACString& aResult,
+ nsTArray<nsCString>& aFailedTableNames) {
+ nsTArray<nsCString> tables;
+ nsTArray<nsCString> exts = {V2_METADATA_SUFFIX};
+
+ nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ for (const auto& table : tables) {
+ HashStore store(table, GetProvider(table), mRootStoreDirectory);
+
+ nsresult rv = store.Open();
+ if (NS_FAILED(rv) || !GetLookupCache(table)) {
+ // TableRequest is called right before applying an update.
+ // If we cannot retrieve metadata for a given table or we fail to
+ // load the prefixes for a table, reset the table to esnure we
+ // apply a full update to the table.
+ LOG(("Failed to get metadata for v2 table %s", table.get()));
+ aFailedTableNames.AppendElement(table);
+ continue;
+ }
+
+ ChunkSet& adds = store.AddChunks();
+ ChunkSet& subs = store.SubChunks();
+
+ // Open HashStore will always succeed even that is not a v2 table.
+ // So exception tables without add and sub chunks.
+ if (adds.Length() == 0 && subs.Length() == 0) {
+ continue;
+ }
+
+ aResult.Append(store.TableName());
+ aResult.Append(';');
+
+ if (adds.Length() > 0) {
+ aResult.AppendLiteral("a:");
+ nsAutoCString addList;
+ adds.Serialize(addList);
+ aResult.Append(addList);
+ }
+
+ if (subs.Length() > 0) {
+ if (adds.Length() > 0) {
+ aResult.Append(':');
+ }
+ aResult.AppendLiteral("s:");
+ nsAutoCString subList;
+ subs.Serialize(subList);
+ aResult.Append(subList);
+ }
+
+ aResult.Append('\n');
+ }
+
+ return rv;
+}
+
+nsresult Classifier::LoadMetadata(nsIFile* aDirectory, nsACString& aResult,
+ nsTArray<nsCString>& aFailedTableNames) {
+ nsTArray<nsCString> tables;
+ nsTArray<nsCString> exts = {V4_METADATA_SUFFIX};
+
+ nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ for (const auto& table : tables) {
+ RefPtr<LookupCache> c = GetLookupCache(table);
+ RefPtr<LookupCacheV4> lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(c);
+
+ if (!lookupCacheV4) {
+ aFailedTableNames.AppendElement(table);
+ continue;
+ }
+
+ nsCString state, sha256;
+ rv = lookupCacheV4->LoadMetadata(state, sha256);
+ Telemetry::Accumulate(Telemetry::URLCLASSIFIER_VLPS_METADATA_CORRUPT,
+ rv == NS_ERROR_FILE_CORRUPTED);
+ if (NS_FAILED(rv)) {
+ LOG(("Failed to get metadata for v4 table %s", table.get()));
+ aFailedTableNames.AppendElement(table);
+ continue;
+ }
+
+ // The state might include '\n' so that we have to encode.
+ nsAutoCString stateBase64;
+ rv = Base64Encode(state, stateBase64);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ nsAutoCString checksumBase64;
+ rv = Base64Encode(sha256, checksumBase64);
+ if (NS_WARN_IF(NS_FAILED(rv))) {
+ return rv;
+ }
+
+ LOG(("Appending state '%s' and checksum '%s' for table %s",
+ stateBase64.get(), checksumBase64.get(), table.get()));
+
+ aResult.AppendPrintf("%s;%s:%s\n", table.get(), stateBase64.get(),
+ checksumBase64.get());
+ }
+
+ return rv;
+}
+
+bool Classifier::ShouldAbort() const {
+ return mIsClosed || nsUrlClassifierDBService::ShutdownHasStarted() ||
+ (mUpdateInterrupted && mUpdateThread->IsOnCurrentThread());
+}
+
+} // namespace safebrowsing
+} // namespace mozilla