diff options
Diffstat (limited to 'toolkit/components/url-classifier/Classifier.cpp')
-rw-r--r-- | toolkit/components/url-classifier/Classifier.cpp | 1786 |
1 files changed, 1786 insertions, 0 deletions
diff --git a/toolkit/components/url-classifier/Classifier.cpp b/toolkit/components/url-classifier/Classifier.cpp new file mode 100644 index 0000000000..7a78a59243 --- /dev/null +++ b/toolkit/components/url-classifier/Classifier.cpp @@ -0,0 +1,1786 @@ +//* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +#include "Classifier.h" +#include "LookupCacheV4.h" +#include "nsIFile.h" +#include "nsNetCID.h" +#include "nsPrintfCString.h" +#include "nsThreadUtils.h" +#include "mozilla/Components.h" +#include "mozilla/EndianUtils.h" +#include "mozilla/Telemetry.h" +#include "mozilla/IntegerPrintfMacros.h" +#include "mozilla/LazyIdleThread.h" +#include "mozilla/Logging.h" +#include "mozilla/SyncRunnable.h" +#include "mozilla/Base64.h" +#include "mozilla/Unused.h" +#include "mozilla/UniquePtr.h" +#include "nsUrlClassifierDBService.h" +#include "nsUrlClassifierUtils.h" + +// MOZ_LOG=UrlClassifierDbService:5 +extern mozilla::LazyLogModule gUrlClassifierDbServiceLog; +#define LOG(args) \ + MOZ_LOG(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug, args) +#define LOG_ENABLED() \ + MOZ_LOG_TEST(gUrlClassifierDbServiceLog, mozilla::LogLevel::Debug) + +#define STORE_DIRECTORY "safebrowsing"_ns +#define TO_DELETE_DIR_SUFFIX "-to_delete"_ns +#define BACKUP_DIR_SUFFIX "-backup"_ns +#define UPDATING_DIR_SUFFIX "-updating"_ns + +#define V4_METADATA_SUFFIX ".metadata"_ns +#define V2_METADATA_SUFFIX ".sbstore"_ns + +// The amount of time, in milliseconds, that our IO thread will stay alive after +// the last event it processes. +#define DEFAULT_THREAD_TIMEOUT_MS 5000 + +namespace mozilla { +namespace safebrowsing { + +bool Classifier::OnUpdateThread() const { + bool onthread = false; + if (mUpdateThread) { + mUpdateThread->IsOnCurrentThread(&onthread); + } + return onthread; +} + +void Classifier::SplitTables(const nsACString& str, + nsTArray<nsCString>& tables) { + tables.Clear(); + + for (const auto& table : str.Split(',')) { + if (!table.IsEmpty()) { + tables.AppendElement(table); + } + } + + // Remove duplicates + tables.Sort(); + const auto newEnd = std::unique(tables.begin(), tables.end()); + tables.TruncateLength(std::distance(tables.begin(), newEnd)); +} + +nsresult Classifier::GetPrivateStoreDirectory( + nsIFile* aRootStoreDirectory, const nsACString& aTableName, + const nsACString& aProvider, nsIFile** aPrivateStoreDirectory) { + NS_ENSURE_ARG_POINTER(aPrivateStoreDirectory); + + if (!StringEndsWith(aTableName, "-proto"_ns)) { + // Only V4 table names (ends with '-proto') would be stored + // to per-provider sub-directory. + nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory); + return NS_OK; + } + + if (aProvider.IsEmpty()) { + // When failing to get provider, just store in the root folder. + nsCOMPtr<nsIFile>(aRootStoreDirectory).forget(aPrivateStoreDirectory); + return NS_OK; + } + + nsCOMPtr<nsIFile> providerDirectory; + + // Clone first since we are gonna create a new directory. + nsresult rv = aRootStoreDirectory->Clone(getter_AddRefs(providerDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + // Append the provider name to the root store directory. + rv = providerDirectory->AppendNative(aProvider); + NS_ENSURE_SUCCESS(rv, rv); + + // Ensure existence of the provider directory. + bool dirExists; + rv = providerDirectory->Exists(&dirExists); + NS_ENSURE_SUCCESS(rv, rv); + + if (!dirExists) { + LOG(("Creating private directory for %s", nsCString(aTableName).get())); + rv = providerDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755); + NS_ENSURE_SUCCESS(rv, rv); + providerDirectory.forget(aPrivateStoreDirectory); + return rv; + } + + // Store directory exists. Check if it's a directory. + bool isDir; + rv = providerDirectory->IsDirectory(&isDir); + NS_ENSURE_SUCCESS(rv, rv); + if (!isDir) { + return NS_ERROR_FILE_DESTINATION_NOT_DIR; + } + + providerDirectory.forget(aPrivateStoreDirectory); + + return NS_OK; +} + +Classifier::Classifier() + : mIsTableRequestResultOutdated(true), + mUpdateInterrupted(true), + mIsClosed(false) { + // Make a lazy thread for any IO + mUpdateThread = + new LazyIdleThread(DEFAULT_THREAD_TIMEOUT_MS, "Classifier Update", + LazyIdleThread::ShutdownMethod::ManualShutdown); +} + +Classifier::~Classifier() { + if (mUpdateThread) { + mUpdateThread->Shutdown(); + mUpdateThread = nullptr; + } + + Close(); +} + +nsresult Classifier::SetupPathNames() { + // Get the root directory where to store all the databases. + nsresult rv = mCacheDirectory->Clone(getter_AddRefs(mRootStoreDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mRootStoreDirectory->AppendNative(STORE_DIRECTORY); + NS_ENSURE_SUCCESS(rv, rv); + + // Make sure LookupCaches (which are persistent and survive updates) + // are reading/writing in the right place. We will be moving their + // files "underneath" them during backup/restore. + for (uint32_t i = 0; i < mLookupCaches.Length(); i++) { + mLookupCaches[i]->UpdateRootDirHandle(mRootStoreDirectory); + } + + // Directory where to move a backup before an update. + rv = mCacheDirectory->Clone(getter_AddRefs(mBackupDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mBackupDirectory->AppendNative(STORE_DIRECTORY + BACKUP_DIR_SUFFIX); + NS_ENSURE_SUCCESS(rv, rv); + + // Directory where to be working on the update. + rv = mCacheDirectory->Clone(getter_AddRefs(mUpdatingDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mUpdatingDirectory->AppendNative(STORE_DIRECTORY + UPDATING_DIR_SUFFIX); + NS_ENSURE_SUCCESS(rv, rv); + + // Directory where to move the backup so we can atomically + // delete (really move) it. + rv = mCacheDirectory->Clone(getter_AddRefs(mToDeleteDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + rv = mToDeleteDirectory->AppendNative(STORE_DIRECTORY + TO_DELETE_DIR_SUFFIX); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult Classifier::CreateStoreDirectory() { + if (ShouldAbort()) { + return NS_OK; // nothing to do, the classifier is done + } + + // Ensure the safebrowsing directory exists. + bool storeExists; + nsresult rv = mRootStoreDirectory->Exists(&storeExists); + NS_ENSURE_SUCCESS(rv, rv); + + if (!storeExists) { + rv = mRootStoreDirectory->Create(nsIFile::DIRECTORY_TYPE, 0755); + NS_ENSURE_SUCCESS(rv, rv); + } else { + bool storeIsDir; + rv = mRootStoreDirectory->IsDirectory(&storeIsDir); + NS_ENSURE_SUCCESS(rv, rv); + if (!storeIsDir) return NS_ERROR_FILE_DESTINATION_NOT_DIR; + } + + return NS_OK; +} + +// Testing entries are created directly in LookupCache instead of +// created via update(Bug 1531354). We can remove unused testing +// files from profile. +// TODO: See Bug 723153 to clear old safebrowsing store +nsresult Classifier::ClearLegacyFiles() { + if (ShouldAbort()) { + return NS_OK; // nothing to do, the classifier is done + } + + nsTArray<nsLiteralCString> tables = { + "test-phish-simple"_ns, "test-malware-simple"_ns, + "test-unwanted-simple"_ns, "test-harmful-simple"_ns, + "test-track-simple"_ns, "test-trackwhite-simple"_ns, + "test-block-simple"_ns, + }; + + const auto fnFindAndRemove = [](nsIFile* aRootDirectory, + const nsACString& aFileName) { + nsCOMPtr<nsIFile> file; + nsresult rv = aRootDirectory->Clone(getter_AddRefs(file)); + if (NS_FAILED(rv)) { + return false; + } + + rv = file->AppendNative(aFileName); + if (NS_FAILED(rv)) { + return false; + } + + bool exists; + rv = file->Exists(&exists); + if (NS_FAILED(rv) || !exists) { + return false; + } + + rv = file->Remove(false); + if (NS_FAILED(rv)) { + return false; + } + + return true; + }; + + for (const auto& table : tables) { + // Remove both .sbstore and .vlpse if .sbstore exists + if (fnFindAndRemove(mRootStoreDirectory, table + ".sbstore"_ns)) { + fnFindAndRemove(mRootStoreDirectory, table + ".vlpset"_ns); + } + } + + return NS_OK; +} + +nsresult Classifier::Open(nsIFile& aCacheDirectory) { + // Remember the Local profile directory. + nsresult rv = aCacheDirectory.Clone(getter_AddRefs(mCacheDirectory)); + NS_ENSURE_SUCCESS(rv, rv); + + // Create the handles to the update and backup directories. + rv = SetupPathNames(); + NS_ENSURE_SUCCESS(rv, rv); + + // Clean up any to-delete directories that haven't been deleted yet. + // This is still required for backward compatibility. + rv = CleanToDelete(); + NS_ENSURE_SUCCESS(rv, rv); + + // If we met a crash during the previous update, "safebrowsing-updating" + // directory will exist and let's remove it. + rv = mUpdatingDirectory->Remove(true); + if (NS_SUCCEEDED(rv)) { + // If the "safebrowsing-updating" exists, it implies a crash occurred + // in the previous update. + LOG(("We may have hit a crash in the previous update.")); + } + + // Check whether we have an incomplete update and recover from the + // backup if so. + rv = RecoverBackups(); + NS_ENSURE_SUCCESS(rv, rv); + + // Make sure the main store directory exists. + rv = CreateStoreDirectory(); + NS_ENSURE_SUCCESS(rv, rv); + + rv = ClearLegacyFiles(); + Unused << NS_WARN_IF(NS_FAILED(rv)); + + // Build the list of know urlclassifier lists + // XXX: Disk IO potentially on the main thread during startup + RegenActiveTables(); + + return NS_OK; +} + +void Classifier::Close() { + // Close will be called by PreShutdown, so it is important to note that + // things put here should not affect an ongoing update thread. + mIsClosed = true; + DropStores(); +} + +void Classifier::Reset() { + MOZ_ASSERT(!OnUpdateThread(), "Reset() MUST NOT be called on update thread"); + + LOG(("Reset() is called so we interrupt the update.")); + mUpdateInterrupted = true; + + // We don't pass the ref counted object 'Classifier' to resetFunc because we + // don't want to release 'Classifier in the update thread, which triggers an + // assertion when LazyIdelUpdate thread is not created and removed by the same + // thread (worker thread). Since |resetFuc| is a synchronous call, we can just + // pass the reference of Classifier because Classifier's life cycle is + // guarantee longer than |resetFunc|. + auto resetFunc = [&] { + if (this->mIsClosed) { + return; // too late to reset, bail + } + this->DropStores(); + + this->mRootStoreDirectory->Remove(true); + this->mBackupDirectory->Remove(true); + this->mUpdatingDirectory->Remove(true); + this->mToDeleteDirectory->Remove(true); + + this->CreateStoreDirectory(); + this->RegenActiveTables(); + }; + + if (!mUpdateThread) { + LOG(("Async update has been disabled. Just Reset() on worker thread.")); + resetFunc(); + return; + } + + nsCOMPtr<nsIRunnable> r = + NS_NewRunnableFunction("safebrowsing::Classifier::Reset", resetFunc); + SyncRunnable::DispatchToThread(mUpdateThread, r); +} + +void Classifier::ResetTables(ClearType aType, + const nsTArray<nsCString>& aTables) { + for (uint32_t i = 0; i < aTables.Length(); i++) { + LOG(("Resetting table: %s", aTables[i].get())); + RefPtr<LookupCache> cache = GetLookupCache(aTables[i]); + if (cache) { + // Remove any cached Completes for this table if clear type is Clear_Cache + if (aType == Clear_Cache) { + cache->ClearCache(); + } else { + cache->ClearAll(); + } + } + } + + // Clear on-disk database if clear type is Clear_All + if (aType == Clear_All) { + DeleteTables(mRootStoreDirectory, aTables); + + RegenActiveTables(); + } +} + +// |DeleteTables| is used by |GetLookupCache| to remove on-disk data when +// we detect prefix file corruption. So make sure not to call |GetLookupCache| +// again in this function to avoid infinite loop. +void Classifier::DeleteTables(nsIFile* aDirectory, + const nsTArray<nsCString>& aTables) { + nsCOMPtr<nsIDirectoryEnumerator> entries; + nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries)); + NS_ENSURE_SUCCESS_VOID(rv); + + nsCOMPtr<nsIFile> file; + while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) && + file) { + // If |file| is a directory, recurse to find its entries as well. + bool isDirectory; + if (NS_FAILED(file->IsDirectory(&isDirectory))) { + continue; + } + if (isDirectory) { + DeleteTables(file, aTables); + continue; + } + + nsCString leafName; + rv = file->GetNativeLeafName(leafName); + NS_ENSURE_SUCCESS_VOID(rv); + + // Remove file extension if there's one. + int32_t dotPosition = leafName.RFind("."); + if (dotPosition >= 0) { + leafName.Truncate(dotPosition); + } + + if (!leafName.IsEmpty() && aTables.Contains(leafName)) { + if (NS_FAILED(file->Remove(false))) { + NS_WARNING(nsPrintfCString("Fail to remove file %s from the disk", + leafName.get()) + .get()); + } + } + } + NS_ENSURE_SUCCESS_VOID(rv); +} + +// This function is I/O intensive. It should only be called before applying +// an update. +void Classifier::TableRequest(nsACString& aResult) { + MOZ_ASSERT(!NS_IsMainThread(), + "TableRequest must be called on the classifier worker thread."); + + // This function and all disk I/O are guaranteed to occur + // on the same thread so we don't need to add a lock around. + if (!mIsTableRequestResultOutdated) { + aResult = mTableRequestResult; + return; + } + + // We reset tables failed to load here; not just tables are corrupted. + // It is because this is a safer way to ensure Safe Browsing databases + // can be recovered from any bad situations. + nsTArray<nsCString> failedTables; + + // Load meta data from *.sbstore files in the root directory. + // Specifically for v4 tables. + nsCString v2Metadata; + nsresult rv = LoadHashStore(mRootStoreDirectory, v2Metadata, failedTables); + if (NS_SUCCEEDED(rv)) { + aResult.Append(v2Metadata); + } + + // Load meta data from *.metadata files in the root directory. + // Specifically for v4 tables. + nsCString v4Metadata; + rv = LoadMetadata(mRootStoreDirectory, v4Metadata, failedTables); + if (NS_SUCCEEDED(rv)) { + aResult.Append(v4Metadata); + } + + // Clear data for tables that we failed to open, a full update should + // be requested for those tables. + if (failedTables.Length() != 0) { + LOG(("Reset tables failed to open before applying an update")); + ResetTables(Clear_All, failedTables); + } + + // Update the TableRequest result in-memory cache. + mTableRequestResult = aResult; + mIsTableRequestResultOutdated = false; +} + +nsresult Classifier::CheckURIFragments( + const nsTArray<nsCString>& aSpecFragments, const nsACString& aTable, + LookupResultArray& aResults) { + // A URL can form up to 30 different fragments + MOZ_ASSERT(aSpecFragments.Length() != 0); + MOZ_ASSERT(aSpecFragments.Length() <= + (MAX_HOST_COMPONENTS * (MAX_PATH_COMPONENTS + 2))); + + if (LOG_ENABLED()) { + uint32_t urlIdx = 0; + for (uint32_t i = 1; i < aSpecFragments.Length(); i++) { + if (aSpecFragments[urlIdx].Length() < aSpecFragments[i].Length()) { + urlIdx = i; + } + } + LOG(("Checking table %s, URL is %s", aTable.BeginReading(), + aSpecFragments[urlIdx].get())); + } + + RefPtr<LookupCache> cache = GetLookupCache(aTable); + if (NS_WARN_IF(!cache)) { + return NS_ERROR_FAILURE; + } + + // Now check each lookup fragment against the entries in the DB. + for (uint32_t i = 0; i < aSpecFragments.Length(); i++) { + Completion lookupHash; + lookupHash.FromPlaintext(aSpecFragments[i]); + + bool has, confirmed; + uint32_t matchLength; + + nsresult rv = cache->Has(lookupHash, &has, &matchLength, &confirmed); + NS_ENSURE_SUCCESS(rv, rv); + + if (has) { + RefPtr<LookupResult> result = new LookupResult; + aResults.AppendElement(result); + + if (LOG_ENABLED()) { + nsAutoCString checking; + lookupHash.ToHexString(checking); + LOG(("Found a result in fragment %s, hash %s (%X)", + aSpecFragments[i].get(), checking.get(), lookupHash.ToUint32())); + LOG(("Result %s, match %d-bytes prefix", + confirmed ? "confirmed." : "Not confirmed.", matchLength)); + } + + result->hash.complete = lookupHash; + result->mConfirmed = confirmed; + result->mTableName.Assign(cache->TableName()); + result->mPartialHashLength = confirmed ? COMPLETE_SIZE : matchLength; + result->mProtocolV2 = LookupCache::Cast<LookupCacheV2>(cache); + } + } + + return NS_OK; +} + +static nsresult SwapDirectoryContent(nsIFile* aDir1, nsIFile* aDir2, + nsIFile* aParentDir, nsIFile* aTempDir) { + // Pre-condition: |aDir1| and |aDir2| are directory and their parent + // are both |aParentDir|. + // + // Post-condition: The locations where aDir1 and aDir2 point to will not + // change but their contents will be exchanged. If we failed + // to swap their content, everything will be rolled back. + + nsAutoCString tempDirName; + aTempDir->GetNativeLeafName(tempDirName); + + nsresult rv; + + nsAutoCString dirName1, dirName2; + aDir1->GetNativeLeafName(dirName1); + aDir2->GetNativeLeafName(dirName2); + + LOG(("Swapping directories %s and %s...", dirName1.get(), dirName2.get())); + + // 1. Rename "dirName1" to "temp" + rv = aDir1->RenameToNative(nullptr, tempDirName); + if (NS_FAILED(rv)) { + LOG(("Unable to rename %s to %s", dirName1.get(), tempDirName.get())); + return rv; // Nothing to roll back. + } + + // 1.1. Create a handle for temp directory. This is required since + // |nsIFile.rename| will not change the location where the + // object points to. + nsCOMPtr<nsIFile> tempDirectory; + rv = aParentDir->Clone(getter_AddRefs(tempDirectory)); + rv = tempDirectory->AppendNative(tempDirName); + + // 2. Rename "dirName2" to "dirName1". + rv = aDir2->RenameToNative(nullptr, dirName1); + if (NS_FAILED(rv)) { + LOG(("Failed to rename %s to %s. Rename temp directory back to %s", + dirName2.get(), dirName1.get(), dirName1.get())); + nsresult rbrv = tempDirectory->RenameToNative(nullptr, dirName1); + NS_ENSURE_SUCCESS(rbrv, rbrv); + return rv; + } + + // 3. Rename "temp" to "dirName2". + rv = tempDirectory->RenameToNative(nullptr, dirName2); + if (NS_FAILED(rv)) { + LOG(("Failed to rename temp directory to %s. ", dirName2.get())); + // We've done (1) renaming "dir1 to temp" and + // (2) renaming "dir2 to dir1" + // so the rollback is + // (1) renaming "dir1 to dir2" and + // (2) renaming "temp to dir1" + nsresult rbrv; // rollback result + rbrv = aDir1->RenameToNative(nullptr, dirName2); + NS_ENSURE_SUCCESS(rbrv, rbrv); + rbrv = tempDirectory->RenameToNative(nullptr, dirName1); + NS_ENSURE_SUCCESS(rbrv, rbrv); + return rv; + } + + return rv; +} + +void Classifier::RemoveUpdateIntermediaries() { + // Remove old LookupCaches. + mNewLookupCaches.Clear(); + + // Remove the "old" directory. (despite its looking-new name) + if (NS_FAILED(mUpdatingDirectory->Remove(true))) { + // If the directory is locked from removal for some reason, + // we will fail here and it doesn't matter until the next + // update. (the next udpate will fail due to the removable + // "safebrowsing-udpating" directory.) + LOG(("Failed to remove updating directory.")); + } +} + +void Classifier::CopyAndInvalidateFullHashCache() { + MOZ_ASSERT(!OnUpdateThread(), + "CopyAndInvalidateFullHashCache cannot be called on update thread " + "since it mutates mLookupCaches which is only safe on " + "worker thread."); + + // New lookup caches are built from disk, data likes cache which is + // generated online won't exist. We have to manually copy cache from + // old LookupCache to new LookupCache. + for (auto& newCache : mNewLookupCaches) { + for (auto& oldCache : mLookupCaches) { + if (oldCache->TableName() == newCache->TableName()) { + newCache->CopyFullHashCache(oldCache); + break; + } + } + } + + // Clear cache when update. + // Invalidate cache entries in CopyAndInvalidateFullHashCache because only + // at this point we will have cache data in LookupCache. + for (auto& newCache : mNewLookupCaches) { + newCache->InvalidateExpiredCacheEntries(); + } +} + +void Classifier::MergeNewLookupCaches() { + MOZ_ASSERT(!OnUpdateThread(), + "MergeNewLookupCaches cannot be called on update thread " + "since it mutates mLookupCaches which is only safe on " + "worker thread."); + + for (auto& newCache : mNewLookupCaches) { + // For each element in mNewLookCaches, it will be swapped with + // - An old cache in mLookupCache with the same table name or + // - nullptr (mLookupCache will be expaned) otherwise. + size_t swapIndex = 0; + for (; swapIndex < mLookupCaches.Length(); swapIndex++) { + if (mLookupCaches[swapIndex]->TableName() == newCache->TableName()) { + break; + } + } + if (swapIndex == mLookupCaches.Length()) { + mLookupCaches.AppendElement(nullptr); + } + + std::swap(mLookupCaches[swapIndex], newCache); + mLookupCaches[swapIndex]->UpdateRootDirHandle(mRootStoreDirectory); + } + + // At this point, mNewLookupCaches's length remains the same but + // will contain either old cache (override) or nullptr (append). +} + +nsresult Classifier::SwapInNewTablesAndCleanup() { + nsresult rv; + + // Step 1. Swap in on-disk tables. The idea of using "safebrowsing-backup" + // as the intermediary directory is we can get databases recovered if + // crash occurred in any step of the swap. (We will recover from + // "safebrowsing-backup" in OpenDb().) + rv = SwapDirectoryContent(mUpdatingDirectory, // contains new tables + mRootStoreDirectory, // contains old tables + mCacheDirectory, // common parent dir + mBackupDirectory); // intermediary dir for swap + if (NS_FAILED(rv)) { + LOG(("Failed to swap in on-disk tables.")); + RemoveUpdateIntermediaries(); + return rv; + } + + // Step 2. Merge mNewLookupCaches into mLookupCaches. The outdated + // LookupCaches will be stored in mNewLookupCaches and be cleaned + // up later. + MergeNewLookupCaches(); + + // Step 3. Re-generate active tables based on on-disk tables. + rv = RegenActiveTables(); + if (NS_FAILED(rv)) { + LOG(("Failed to re-generate active tables!")); + } + + // Step 4. Clean up intermediaries for update. + RemoveUpdateIntermediaries(); + + // Step 5. Invalidate cached tableRequest request. + mIsTableRequestResultOutdated = true; + + LOG(("Done swap in updated tables.")); + + return rv; +} + +void Classifier::FlushAndDisableAsyncUpdate() { + LOG(("Classifier::FlushAndDisableAsyncUpdate [%p, %p]", this, + mUpdateThread.get())); + + if (!mUpdateThread) { + LOG(("Async update has been disabled.")); + return; + } + + mUpdateThread->Shutdown(); + mUpdateThread = nullptr; +} + +nsresult Classifier::AsyncApplyUpdates(const TableUpdateArray& aUpdates, + const AsyncUpdateCallback& aCallback) { + LOG(("Classifier::AsyncApplyUpdates")); + + if (!mUpdateThread) { + LOG(("Async update has already been disabled.")); + return NS_ERROR_FAILURE; + } + + // Caller thread | Update thread + // -------------------------------------------------------- + // | ApplyUpdatesBackground + // (processing other task) | (bg-update done. ping back to caller + // thread) (processing other task) | idle... ApplyUpdatesForeground | + // callback | + + MOZ_ASSERT(mNewLookupCaches.IsEmpty(), + "There should be no leftovers from a previous update."); + + mUpdateInterrupted = false; + nsresult rv = + mRootStoreDirectory->Clone(getter_AddRefs(mRootStoreDirectoryForUpdate)); + if (NS_FAILED(rv)) { + LOG(("Failed to clone mRootStoreDirectory for update.")); + return rv; + } + + nsCOMPtr<nsIThread> callerThread = NS_GetCurrentThread(); + MOZ_ASSERT(!OnUpdateThread()); + + RefPtr<Classifier> self = this; + nsCOMPtr<nsIRunnable> bgRunnable = NS_NewRunnableFunction( + "safebrowsing::Classifier::AsyncApplyUpdates", + [self, aUpdates = aUpdates.Clone(), aCallback, callerThread]() mutable { + MOZ_ASSERT(self->OnUpdateThread(), "MUST be on update thread"); + + nsresult bgRv; + nsTArray<nsCString> failedTableNames; + + TableUpdateArray updates; + + // Make a copy of the array since we'll be removing entries as + // we process them on the background thread. + if (updates.AppendElements(std::move(aUpdates), fallible)) { + LOG(("Step 1. ApplyUpdatesBackground on update thread.")); + bgRv = self->ApplyUpdatesBackground(updates, failedTableNames); + } else { + LOG( + ("Step 1. Not enough memory to run ApplyUpdatesBackground on " + "update thread.")); + bgRv = NS_ERROR_OUT_OF_MEMORY; + } + + // Classifier is created in the worker thread and it has to be released + // in the worker thread(because of the constrain that LazyIdelThread has + // to be created and released in the same thread). We transfer the + // ownership to the caller thread here to gurantee that we don't release + // it in the udpate thread. + nsCOMPtr<nsIRunnable> fgRunnable = NS_NewRunnableFunction( + "safebrowsing::Classifier::AsyncApplyUpdates", + [self = std::move(self), aCallback, bgRv, + failedTableNames = std::move(failedTableNames), + callerThread]() mutable { + RefPtr<Classifier> classifier = std::move(self); + + MOZ_ASSERT(NS_GetCurrentThread() == callerThread, + "MUST be on caller thread"); + + LOG(("Step 2. ApplyUpdatesForeground on caller thread")); + nsresult rv = + classifier->ApplyUpdatesForeground(bgRv, failedTableNames); + + LOG(("Step 3. Updates applied! Fire callback.")); + aCallback(rv); + }); + + callerThread->Dispatch(fgRunnable, NS_DISPATCH_NORMAL); + }); + + return mUpdateThread->Dispatch(bgRunnable, NS_DISPATCH_NORMAL); +} + +nsresult Classifier::ApplyUpdatesBackground( + TableUpdateArray& aUpdates, nsTArray<nsCString>& aFailedTableNames) { + // |mUpdateInterrupted| is guaranteed to have been unset. + // If |mUpdateInterrupted| is set at any point, Reset() must have + // been called then we need to interrupt the update process. + // We only add checkpoints for non-trivial tasks. + + if (aUpdates.IsEmpty()) { + return NS_OK; + } + + nsUrlClassifierUtils* urlUtil = nsUrlClassifierUtils::GetInstance(); + if (NS_WARN_IF(!urlUtil)) { + return NS_ERROR_FAILURE; + } + + nsCString provider; + // Assume all TableUpdate objects should have the same provider. + urlUtil->GetTelemetryProvider(aUpdates[0]->TableName(), provider); + + Telemetry::AutoTimer<Telemetry::URLCLASSIFIER_CL_KEYED_UPDATE_TIME> + keyedTimer(provider); + + PRIntervalTime clockStart = 0; + if (LOG_ENABLED()) { + clockStart = PR_IntervalNow(); + } + + nsresult rv; + + // Check point 1: Copying files takes time so we check ShouldAbort() + // inside CopyInUseDirForUpdate(). + rv = CopyInUseDirForUpdate(); // i.e. mUpdatingDirectory will be setup. + if (NS_FAILED(rv)) { + LOG(("Failed to copy in-use directory for update.")); + return (rv == NS_ERROR_ABORT) ? NS_OK : rv; + } + + LOG(("Applying %zu table updates.", aUpdates.Length())); + + for (uint32_t i = 0; i < aUpdates.Length(); i++) { + RefPtr<const TableUpdate> update = aUpdates[i]; + if (!update) { + // Previous UpdateHashStore() may have consumed this update.. + continue; + } + + // Run all updates for one table + nsAutoCString updateTable(update->TableName()); + + // Check point 2: Processing downloaded data takes time. + if (ShouldAbort()) { + LOG(("Update is interrupted. Stop building new tables.")); + return NS_OK; + } + + // Will update the mirrored in-memory and on-disk databases. + if (TableUpdate::Cast<TableUpdateV2>(update)) { + rv = UpdateHashStore(aUpdates, updateTable); + } else { + rv = UpdateTableV4(aUpdates, updateTable); + } + + if (NS_WARN_IF(NS_FAILED(rv))) { + LOG(("Failed to update table: %s", updateTable.get())); + // We don't quit the updating process immediately when we discover + // a failure. Instead, we continue to apply updates to the + // remaining tables to find other tables which may also fail to + // apply an update. This help us reset all the corrupted tables + // within a single update. + // Note that changes that result from successful updates don't take + // effect after the updating process is finished. This is because + // when an error occurs during the updating process, we ignore all + // changes that have happened during the udpating process. + aFailedTableNames.AppendElement(updateTable); + continue; + } + } + + if (!aFailedTableNames.IsEmpty()) { + RemoveUpdateIntermediaries(); + return NS_ERROR_FAILURE; + } + + if (LOG_ENABLED()) { + PRIntervalTime clockEnd = PR_IntervalNow(); + LOG(("update took %dms\n", + PR_IntervalToMilliseconds(clockEnd - clockStart))); + } + + return rv; +} + +nsresult Classifier::ApplyUpdatesForeground( + nsresult aBackgroundRv, const nsTArray<nsCString>& aFailedTableNames) { + if (ShouldAbort()) { + LOG(("Update is interrupted! Just remove update intermediaries.")); + RemoveUpdateIntermediaries(); + return NS_OK; + } + if (NS_SUCCEEDED(aBackgroundRv)) { + // Copy and Invalidate fullhash cache here because this call requires + // mLookupCaches which is only available on work-thread + CopyAndInvalidateFullHashCache(); + + return SwapInNewTablesAndCleanup(); + } + if (NS_ERROR_OUT_OF_MEMORY != aBackgroundRv) { + ResetTables(Clear_All, aFailedTableNames); + } + return aBackgroundRv; +} + +nsresult Classifier::ApplyFullHashes(ConstTableUpdateArray& aUpdates) { + MOZ_ASSERT(!OnUpdateThread(), + "ApplyFullHashes() MUST NOT be called on update thread"); + MOZ_ASSERT( + !NS_IsMainThread(), + "ApplyFullHashes() must be called on the classifier worker thread."); + + LOG(("Applying %zu table gethashes.", aUpdates.Length())); + + for (uint32_t i = 0; i < aUpdates.Length(); i++) { + nsresult rv = UpdateCache(aUpdates[i]); + NS_ENSURE_SUCCESS(rv, rv); + + aUpdates[i] = nullptr; + } + + return NS_OK; +} + +void Classifier::GetCacheInfo(const nsACString& aTable, + nsIUrlClassifierCacheInfo** aCache) { + RefPtr<const LookupCache> lookupCache = GetLookupCache(aTable); + if (!lookupCache) { + return; + } + + lookupCache->GetCacheInfo(aCache); +} + +void Classifier::DropStores() { + // See the comment in Classifier::Close() before adding anything here. + mLookupCaches.Clear(); +} + +nsresult Classifier::RegenActiveTables() { + if (ShouldAbort()) { + return NS_OK; // nothing to do, the classifier is done + } + + mActiveTablesCache.Clear(); + + // The extension of V2 and V4 prefix files is .vlpset + // We still check .pset here for legacy load. + nsTArray<nsCString> exts = {".vlpset"_ns, ".pset"_ns}; + nsTArray<nsCString> foundTables; + nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, foundTables); + Unused << NS_WARN_IF(NS_FAILED(rv)); + + // We don't have test tables on disk, add Moz built-in entries here + rv = AddMozEntries(foundTables); + Unused << NS_WARN_IF(NS_FAILED(rv)); + + for (const auto& table : foundTables) { + RefPtr<const LookupCache> lookupCache = GetLookupCache(table); + if (!lookupCache) { + LOG(("Inactive table (no cache): %s", table.get())); + continue; + } + + if (!lookupCache->IsPrimed()) { + LOG(("Inactive table (cache not primed): %s", table.get())); + continue; + } + + LOG(("Active %s table: %s", + LookupCache::Cast<const LookupCacheV4>(lookupCache) ? "v4" : "v2", + table.get())); + + mActiveTablesCache.AppendElement(table); + } + + return NS_OK; +} + +nsresult Classifier::AddMozEntries(nsTArray<nsCString>& aTables) { + nsTArray<nsLiteralCString> tables = { + "moztest-phish-simple"_ns, "moztest-malware-simple"_ns, + "moztest-unwanted-simple"_ns, "moztest-harmful-simple"_ns, + "moztest-track-simple"_ns, "moztest-trackwhite-simple"_ns, + "moztest-block-simple"_ns, + }; + + for (const auto& table : tables) { + RefPtr<LookupCache> c = GetLookupCache(table, false); + RefPtr<LookupCacheV2> lookupCache = LookupCache::Cast<LookupCacheV2>(c); + if (!lookupCache || lookupCache->IsPrimed()) { + continue; + } + + aTables.AppendElement(table); + } + + return NS_OK; +} + +nsresult Classifier::ScanStoreDir(nsIFile* aDirectory, + const nsTArray<nsCString>& aExtensions, + nsTArray<nsCString>& aTables) { + nsCOMPtr<nsIDirectoryEnumerator> entries; + nsresult rv = aDirectory->GetDirectoryEntries(getter_AddRefs(entries)); + NS_ENSURE_SUCCESS(rv, rv); + + nsCOMPtr<nsIFile> file; + while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(file))) && + file) { + // If |file| is a directory, recurse to find its entries as well. + bool isDirectory; + if (NS_FAILED(file->IsDirectory(&isDirectory))) { + continue; + } + if (isDirectory) { + ScanStoreDir(file, aExtensions, aTables); + continue; + } + + nsAutoCString leafName; + rv = file->GetNativeLeafName(leafName); + NS_ENSURE_SUCCESS(rv, rv); + + for (const auto& ext : aExtensions) { + if (StringEndsWith(leafName, ext)) { + aTables.AppendElement( + Substring(leafName, 0, leafName.Length() - strlen(ext.get()))); + break; + } + } + } + + return NS_OK; +} + +nsresult Classifier::ActiveTables(nsTArray<nsCString>& aTables) const { + aTables = mActiveTablesCache.Clone(); + return NS_OK; +} + +nsresult Classifier::CleanToDelete() { + bool exists; + nsresult rv = mToDeleteDirectory->Exists(&exists); + NS_ENSURE_SUCCESS(rv, rv); + + if (exists) { + rv = mToDeleteDirectory->Remove(true); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +#ifdef MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES + +already_AddRefed<nsIFile> Classifier::GetFailedUpdateDirectroy() { + nsCString failedUpdatekDirName = STORE_DIRECTORY + nsCString("-failedupdate"); + + nsCOMPtr<nsIFile> failedUpdatekDirectory; + if (NS_FAILED( + mCacheDirectory->Clone(getter_AddRefs(failedUpdatekDirectory))) || + NS_FAILED(failedUpdatekDirectory->AppendNative(failedUpdatekDirName))) { + LOG(("Failed to init failedUpdatekDirectory.")); + return nullptr; + } + + return failedUpdatekDirectory.forget(); +} + +nsresult Classifier::DumpRawTableUpdates(const nsACString& aRawUpdates) { + LOG(("Dumping raw table updates...")); + + DumpFailedUpdate(); + + nsCOMPtr<nsIFile> failedUpdatekDirectory = GetFailedUpdateDirectroy(); + + // Create tableupdate.bin and dump raw table update data. + nsCOMPtr<nsIFile> rawTableUpdatesFile; + nsCOMPtr<nsIOutputStream> outputStream; + if (NS_FAILED( + failedUpdatekDirectory->Clone(getter_AddRefs(rawTableUpdatesFile))) || + NS_FAILED( + rawTableUpdatesFile->AppendNative(nsCString("tableupdates.bin"))) || + NS_FAILED(NS_NewLocalFileOutputStream( + getter_AddRefs(outputStream), rawTableUpdatesFile, + PR_WRONLY | PR_TRUNCATE | PR_CREATE_FILE))) { + LOG(("Failed to create file to dump raw table updates.")); + return NS_ERROR_FAILURE; + } + + // Write out the data. + uint32_t written; + nsresult rv = outputStream->Write(aRawUpdates.BeginReading(), + aRawUpdates.Length(), &written); + NS_ENSURE_SUCCESS(rv, rv); + NS_ENSURE_TRUE(written == aRawUpdates.Length(), NS_ERROR_FAILURE); + + return rv; +} + +nsresult Classifier::DumpFailedUpdate() { + LOG(("Dumping failed update...")); + + nsCOMPtr<nsIFile> failedUpdatekDirectory = GetFailedUpdateDirectroy(); + + // Remove the "failed update" directory no matter it exists or not. + // Failure is fine because the directory may not exist. + failedUpdatekDirectory->Remove(true); + + nsCString failedUpdatekDirName; + nsresult rv = failedUpdatekDirectory->GetNativeLeafName(failedUpdatekDirName); + NS_ENSURE_SUCCESS(rv, rv); + + // Copy the in-use directory to a clean "failed update" directory. + nsCOMPtr<nsIFile> inUseDirectory; + if (NS_FAILED(mRootStoreDirectory->Clone(getter_AddRefs(inUseDirectory))) || + NS_FAILED(inUseDirectory->CopyToNative(nullptr, failedUpdatekDirName))) { + LOG(("Failed to move in-use to the \"failed update\" directory %s", + failedUpdatekDirName.get())); + return NS_ERROR_FAILURE; + } + + return rv; +} + +#endif // MOZ_SAFEBROWSING_DUMP_FAILED_UPDATES + +/** + * This function copies the files one by one to the destination folder. + * Before copying a file, it checks ::ShouldAbort and returns + * NS_ERROR_ABORT if the flag is set. + */ +nsresult Classifier::CopyDirectoryInterruptible(nsCOMPtr<nsIFile>& aDestDir, + nsCOMPtr<nsIFile>& aSourceDir) { + nsCOMPtr<nsIDirectoryEnumerator> entries; + nsresult rv = aSourceDir->GetDirectoryEntries(getter_AddRefs(entries)); + NS_ENSURE_SUCCESS(rv, rv); + MOZ_ASSERT(entries); + + nsCOMPtr<nsIFile> source; + while (NS_SUCCEEDED(rv = entries->GetNextFile(getter_AddRefs(source))) && + source) { + if (ShouldAbort()) { + LOG(("Update is interrupted. Aborting the directory copy")); + return NS_ERROR_ABORT; + } + + bool isDirectory; + rv = source->IsDirectory(&isDirectory); + NS_ENSURE_SUCCESS(rv, rv); + + if (isDirectory) { + // If it is a directory, recursively copy the files inside the directory. + nsAutoCString leaf; + source->GetNativeLeafName(leaf); + MOZ_ASSERT(!leaf.IsEmpty()); + + nsCOMPtr<nsIFile> dest; + aDestDir->Clone(getter_AddRefs(dest)); + dest->AppendNative(leaf); + NS_ENSURE_SUCCESS(rv, rv); + + rv = CopyDirectoryInterruptible(dest, source); + NS_ENSURE_SUCCESS(rv, rv); + } else { + rv = source->CopyToNative(aDestDir, ""_ns); + NS_ENSURE_SUCCESS(rv, rv); + } + } + + // If the destination directory doesn't exist in the end, it means that the + // source directory is empty, we should copy the directory here. + bool exist; + rv = aDestDir->Exists(&exist); + NS_ENSURE_SUCCESS(rv, rv); + + if (!exist) { + rv = aDestDir->Create(nsIFile::DIRECTORY_TYPE, 0755); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +nsresult Classifier::CopyInUseDirForUpdate() { + LOG(("Copy in-use directory content for update.")); + if (ShouldAbort()) { + return NS_ERROR_UC_UPDATE_SHUTDOWNING; + } + + // We copy everything from in-use directory to a temporary directory + // for updating. + + // Remove the destination directory first (just in case) the do the copy. + mUpdatingDirectory->Remove(true); + if (!mRootStoreDirectoryForUpdate) { + LOG(("mRootStoreDirectoryForUpdate is null.")); + return NS_ERROR_NULL_POINTER; + } + + nsresult rv = CopyDirectoryInterruptible(mUpdatingDirectory, + mRootStoreDirectoryForUpdate); + NS_ENSURE_SUCCESS(rv, rv); + + return NS_OK; +} + +nsresult Classifier::RecoverBackups() { + bool backupExists; + nsresult rv = mBackupDirectory->Exists(&backupExists); + NS_ENSURE_SUCCESS(rv, rv); + + if (backupExists) { + // Remove the safebrowsing dir if it exists + nsCString storeDirName; + rv = mRootStoreDirectory->GetNativeLeafName(storeDirName); + NS_ENSURE_SUCCESS(rv, rv); + + bool storeExists; + rv = mRootStoreDirectory->Exists(&storeExists); + NS_ENSURE_SUCCESS(rv, rv); + + if (storeExists) { + rv = mRootStoreDirectory->Remove(true); + NS_ENSURE_SUCCESS(rv, rv); + } + + // Move the backup to the store location + rv = mBackupDirectory->MoveToNative(nullptr, storeDirName); + NS_ENSURE_SUCCESS(rv, rv); + + // mBackupDirectory now points to storeDir, fix up. + rv = SetupPathNames(); + NS_ENSURE_SUCCESS(rv, rv); + } + + return NS_OK; +} + +bool Classifier::CheckValidUpdate(TableUpdateArray& aUpdates, + const nsACString& aTable) { + // take the quick exit if there is no valid update for us + // (common case) + uint32_t validupdates = 0; + + for (uint32_t i = 0; i < aUpdates.Length(); i++) { + RefPtr<const TableUpdate> update = aUpdates[i]; + if (!update || !update->TableName().Equals(aTable)) { + continue; + } + if (update->Empty()) { + aUpdates[i] = nullptr; + continue; + } + validupdates++; + } + + if (!validupdates) { + // This can happen if the update was only valid for one table. + return false; + } + + return true; +} + +nsCString Classifier::GetProvider(const nsACString& aTableName) { + nsUrlClassifierUtils* urlUtil = nsUrlClassifierUtils::GetInstance(); + if (NS_WARN_IF(!urlUtil)) { + return ""_ns; + } + + nsCString provider; + nsresult rv = urlUtil->GetProvider(aTableName, provider); + + return NS_SUCCEEDED(rv) ? provider : ""_ns; +} + +/* + * This will consume+delete updates from the passed nsTArray. + */ +nsresult Classifier::UpdateHashStore(TableUpdateArray& aUpdates, + const nsACString& aTable) { + if (ShouldAbort()) { + return NS_ERROR_UC_UPDATE_SHUTDOWNING; + } + + LOG(("Classifier::UpdateHashStore(%s)", PromiseFlatCString(aTable).get())); + + // moztest- tables don't support update because they are directly created + // in LookupCache. To test updates, use tables begin with "test-" instead. + // Also, recommend using 'test-' tables while writing testcases because + // it is more like the real world scenario. + MOZ_ASSERT(!nsUrlClassifierUtils::IsMozTestTable(aTable)); + + HashStore store(aTable, GetProvider(aTable), mUpdatingDirectory); + + if (!CheckValidUpdate(aUpdates, store.TableName())) { + return NS_OK; + } + + nsresult rv = store.Open(); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + rv = store.BeginUpdate(); + NS_ENSURE_SUCCESS(rv, rv); + + // Read the part of the store that is (only) in the cache + RefPtr<LookupCacheV2> lookupCacheV2; + { + RefPtr<LookupCache> lookupCache = + GetLookupCacheForUpdate(store.TableName()); + if (lookupCache) { + lookupCacheV2 = LookupCache::Cast<LookupCacheV2>(lookupCache); + } + } + if (!lookupCacheV2) { + return NS_ERROR_UC_UPDATE_TABLE_NOT_FOUND; + } + + FallibleTArray<uint32_t> AddPrefixHashes; + FallibleTArray<nsCString> AddCompletesHashes; + rv = lookupCacheV2->GetPrefixes(AddPrefixHashes, AddCompletesHashes); + NS_ENSURE_SUCCESS(rv, rv); + + rv = store.AugmentAdds(AddPrefixHashes, AddCompletesHashes); + NS_ENSURE_SUCCESS(rv, rv); + + AddPrefixHashes.Clear(); + AddCompletesHashes.Clear(); + + uint32_t applied = 0; + + for (uint32_t i = 0; i < aUpdates.Length(); i++) { + RefPtr<TableUpdate> update = aUpdates[i]; + if (!update || !update->TableName().Equals(store.TableName())) { + continue; + } + + RefPtr<TableUpdateV2> updateV2 = TableUpdate::Cast<TableUpdateV2>(update); + NS_ENSURE_TRUE(updateV2, NS_ERROR_UC_UPDATE_UNEXPECTED_VERSION); + + rv = store.ApplyUpdate(updateV2); + NS_ENSURE_SUCCESS(rv, rv); + + applied++; + + LOG(("Applied update to table %s:", store.TableName().get())); + LOG((" %d add chunks", updateV2->AddChunks().Length())); + LOG((" %zu add prefixes", updateV2->AddPrefixes().Length())); + LOG((" %zu add completions", updateV2->AddCompletes().Length())); + LOG((" %d sub chunks", updateV2->SubChunks().Length())); + LOG((" %zu sub prefixes", updateV2->SubPrefixes().Length())); + LOG((" %zu sub completions", updateV2->SubCompletes().Length())); + LOG((" %d add expirations", updateV2->AddExpirations().Length())); + LOG((" %d sub expirations", updateV2->SubExpirations().Length())); + + aUpdates[i] = nullptr; + } + + LOG(("Applied %d update(s) to %s.", applied, store.TableName().get())); + + rv = store.Rebuild(); + NS_ENSURE_SUCCESS(rv, rv); + + LOG(("Table %s now has:", store.TableName().get())); + LOG((" %d add chunks", store.AddChunks().Length())); + LOG((" %zu add prefixes", store.AddPrefixes().Length())); + LOG((" %zu add completions", store.AddCompletes().Length())); + LOG((" %d sub chunks", store.SubChunks().Length())); + LOG((" %zu sub prefixes", store.SubPrefixes().Length())); + LOG((" %zu sub completions", store.SubCompletes().Length())); + + rv = store.WriteFile(); + NS_ENSURE_SUCCESS(rv, rv); + + // At this point the store is updated and written out to disk, but + // the data is still in memory. Build our quick-lookup table here. + rv = lookupCacheV2->Build(store.AddPrefixes(), store.AddCompletes()); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_BUILD_PREFIX_FAILURE); + + rv = lookupCacheV2->WriteFile(); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK); + + LOG(("Successfully updated %s", store.TableName().get())); + + return NS_OK; +} + +nsresult Classifier::UpdateTableV4(TableUpdateArray& aUpdates, + const nsACString& aTable) { + MOZ_ASSERT(!NS_IsMainThread(), + "UpdateTableV4 must be called on the classifier worker thread."); + if (ShouldAbort()) { + return NS_ERROR_UC_UPDATE_SHUTDOWNING; + } + + // moztest- tables don't support update, see comment in UpdateHashStore. + MOZ_ASSERT(!nsUrlClassifierUtils::IsMozTestTable(aTable)); + + LOG(("Classifier::UpdateTableV4(%s)", PromiseFlatCString(aTable).get())); + + if (!CheckValidUpdate(aUpdates, aTable)) { + return NS_OK; + } + + RefPtr<LookupCacheV4> lookupCacheV4; + { + RefPtr<LookupCache> lookupCache = GetLookupCacheForUpdate(aTable); + if (lookupCache) { + lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(lookupCache); + } + } + if (!lookupCacheV4) { + return NS_ERROR_UC_UPDATE_TABLE_NOT_FOUND; + } + + nsresult rv = NS_OK; + + // If there are multiple updates for the same table, prefixes1 & prefixes2 + // will act as input and output in turn to reduce memory copy overhead. + PrefixStringMap prefixes1, prefixes2; + PrefixStringMap* input = &prefixes1; + PrefixStringMap* output = &prefixes2; + + RefPtr<const TableUpdateV4> lastAppliedUpdate = nullptr; + for (uint32_t i = 0; i < aUpdates.Length(); i++) { + RefPtr<TableUpdate> update = aUpdates[i]; + if (!update || !update->TableName().Equals(aTable)) { + continue; + } + + RefPtr<TableUpdateV4> updateV4 = TableUpdate::Cast<TableUpdateV4>(update); + NS_ENSURE_TRUE(updateV4, NS_ERROR_UC_UPDATE_UNEXPECTED_VERSION); + + if (updateV4->IsFullUpdate()) { + input->Clear(); + output->Clear(); + rv = lookupCacheV4->ApplyUpdate(updateV4, *input, *output); + if (NS_FAILED(rv)) { + return rv; + } + } else { + // If both prefix sets are empty, this means we are doing a partial update + // without a prior full/partial update in the loop. In this case we should + // get prefixes from the lookup cache first. + if (prefixes1.IsEmpty() && prefixes2.IsEmpty()) { + lookupCacheV4->GetPrefixes(prefixes1); + } else { + MOZ_ASSERT(prefixes1.IsEmpty() ^ prefixes2.IsEmpty()); + + // When there are multiple partial updates, input should always point + // to the non-empty prefix set(filled by previous full/partial update). + // output should always point to the empty prefix set. + input = prefixes1.IsEmpty() ? &prefixes2 : &prefixes1; + output = prefixes1.IsEmpty() ? &prefixes1 : &prefixes2; + } + + rv = lookupCacheV4->ApplyUpdate(updateV4, *input, *output); + if (NS_FAILED(rv)) { + return rv; + } + + input->Clear(); + } + + // Keep track of the last applied update. + lastAppliedUpdate = updateV4; + + aUpdates[i] = nullptr; + } + + rv = lookupCacheV4->Build(*output); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_BUILD_PREFIX_FAILURE); + + rv = lookupCacheV4->WriteFile(); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK); + + if (lastAppliedUpdate) { + LOG(("Write meta data of the last applied update.")); + rv = lookupCacheV4->WriteMetadata(lastAppliedUpdate); + NS_ENSURE_SUCCESS(rv, NS_ERROR_UC_UPDATE_FAIL_TO_WRITE_DISK); + } + + LOG(("Successfully updated %s\n", PromiseFlatCString(aTable).get())); + + return NS_OK; +} + +nsresult Classifier::UpdateCache(RefPtr<const TableUpdate> aUpdate) { + if (!aUpdate) { + return NS_OK; + } + + nsAutoCString table(aUpdate->TableName()); + LOG(("Classifier::UpdateCache(%s)", table.get())); + + RefPtr<LookupCache> lookupCache = GetLookupCache(table); + if (!lookupCache) { + return NS_ERROR_FAILURE; + } + + RefPtr<LookupCacheV2> lookupV2 = + LookupCache::Cast<LookupCacheV2>(lookupCache); + if (lookupV2) { + RefPtr<const TableUpdateV2> updateV2 = + TableUpdate::Cast<TableUpdateV2>(aUpdate); + lookupV2->AddGethashResultToCache(updateV2->AddCompletes(), + updateV2->MissPrefixes()); + } else { + RefPtr<LookupCacheV4> lookupV4 = + LookupCache::Cast<LookupCacheV4>(lookupCache); + if (!lookupV4) { + return NS_ERROR_FAILURE; + } + + RefPtr<const TableUpdateV4> updateV4 = + TableUpdate::Cast<TableUpdateV4>(aUpdate); + lookupV4->AddFullHashResponseToCache(updateV4->FullHashResponse()); + } + +#if defined(DEBUG) + lookupCache->DumpCache(); +#endif + + return NS_OK; +} + +RefPtr<LookupCache> Classifier::GetLookupCache(const nsACString& aTable, + bool aForUpdate) { + // GetLookupCache(aForUpdate==true) can only be called on update thread. + MOZ_ASSERT_IF(aForUpdate, OnUpdateThread()); + + LookupCacheArray& lookupCaches = + aForUpdate ? mNewLookupCaches : mLookupCaches; + auto& rootStoreDirectory = + aForUpdate ? mUpdatingDirectory : mRootStoreDirectory; + + for (auto c : lookupCaches) { + if (c->TableName().Equals(aTable)) { + return c; + } + } + + // We don't want to create lookupcache when shutdown is already happening. + if (ShouldAbort()) { + return nullptr; + } + + // TODO : Bug 1302600, It would be better if we have a more general non-main + // thread method to convert table name to protocol version. Currently + // we can only know this by checking if the table name ends with + // '-proto'. + RefPtr<LookupCache> cache; + nsCString provider = GetProvider(aTable); + + // Google requests SafeBrowsing related feature should only be enabled when + // the databases are update-to-date. Since we disable Safe Browsing update in + // Safe Mode, ignore tables provided by Google to ensure we don't show + // outdated warnings. + if (nsUrlClassifierUtils::IsInSafeMode()) { + if (provider.EqualsASCII("google") || provider.EqualsASCII("google4")) { + return nullptr; + } + } + + if (StringEndsWith(aTable, "-proto"_ns)) { + cache = new LookupCacheV4(aTable, provider, rootStoreDirectory); + } else { + cache = new LookupCacheV2(aTable, provider, rootStoreDirectory); + } + + nsresult rv = cache->Init(); + if (NS_FAILED(rv)) { + return nullptr; + } + rv = cache->Open(); + if (NS_SUCCEEDED(rv)) { + lookupCaches.AppendElement(cache); + return cache; + } + + // At this point we failed to open LookupCache. + // + // GetLookupCache for update and for other usage will run on update thread + // and worker thread respectively (Bug 1339760). Removing stuff only in + // their own realms potentially increases the concurrency. + + if (aForUpdate) { + // Remove intermediaries no matter if it's due to file corruption or not. + RemoveUpdateIntermediaries(); + return nullptr; + } + + // Non-update case. + if (rv == NS_ERROR_FILE_CORRUPTED) { + // Remove all the on-disk data when the table's prefix file is corrupted. + LOG(("Failed to get prefixes from file for table %s, delete on-disk data!", + aTable.BeginReading())); + + DeleteTables(mRootStoreDirectory, nsTArray<nsCString>{nsCString(aTable)}); + } + return nullptr; +} + +nsresult Classifier::ReadNoiseEntries(const Prefix& aPrefix, + const nsACString& aTableName, + uint32_t aCount, + PrefixArray& aNoiseEntries) { + RefPtr<LookupCache> cache = GetLookupCache(aTableName); + if (!cache) { + return NS_ERROR_FAILURE; + } + + RefPtr<LookupCacheV2> cacheV2 = LookupCache::Cast<LookupCacheV2>(cache); + RefPtr<LookupCacheV4> cacheV4 = LookupCache::Cast<LookupCacheV4>(cache); + MOZ_ASSERT_IF(cacheV2, !cacheV4); + + if (cache->PrefixLength() == 0) { + NS_WARNING("Could not find prefix in PrefixSet during noise lookup"); + return NS_ERROR_FAILURE; + } + + // We do not want to simply pick random prefixes, because this would allow + // averaging out the noise by analysing the traffic from Firefox users. + // Instead, we ensure the 'noise' is the same for the same prefix by seeding + // the random number generator with the prefix. We prefer not to use rand() + // which isn't thread safe, and the reseeding of which could trip up other + // parts othe code that expect actual random numbers. + // Here we use a simple LCG (Linear Congruential Generator) to generate + // random numbers. We seed the LCG with the prefix we are generating noise + // for. + // http://en.wikipedia.org/wiki/Linear_congruential_generator + + uint32_t m = cache->PrefixLength(); + uint32_t a = aCount % m; + uint32_t idx = aPrefix.ToUint32() % m; + + for (size_t i = 0; i < aCount; i++) { + idx = (a * idx + a) % m; + + uint32_t hash; + + nsresult rv; + if (cacheV2) { + rv = cacheV2->GetPrefixByIndex(idx, &hash); + } else { + // We don't add noises for variable length prefix because of simplicity, + // so we will only get fixed length prefix (4 bytes). + rv = cacheV4->GetFixedLengthPrefixByIndex(idx, &hash); + } + + if (NS_FAILED(rv)) { + NS_WARNING( + "Could not find the target prefix in PrefixSet during noise lookup"); + return NS_ERROR_FAILURE; + } + + Prefix newPrefix; + // In the case V4 little endian, we did swapping endian when converting from + // char* to int, should revert endian to make sure we will send hex string + // correctly See https://bugzilla.mozilla.org/show_bug.cgi?id=1283007#c23 + if (!cacheV2 && !bool(MOZ_BIG_ENDIAN())) { + hash = NativeEndian::swapFromBigEndian(hash); + } + + newPrefix.FromUint32(hash); + if (newPrefix != aPrefix) { + aNoiseEntries.AppendElement(newPrefix); + } + } + + return NS_OK; +} + +nsresult Classifier::LoadHashStore(nsIFile* aDirectory, nsACString& aResult, + nsTArray<nsCString>& aFailedTableNames) { + nsTArray<nsCString> tables; + nsTArray<nsCString> exts = {V2_METADATA_SUFFIX}; + + nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + for (const auto& table : tables) { + HashStore store(table, GetProvider(table), mRootStoreDirectory); + + nsresult rv = store.Open(); + if (NS_FAILED(rv) || !GetLookupCache(table)) { + // TableRequest is called right before applying an update. + // If we cannot retrieve metadata for a given table or we fail to + // load the prefixes for a table, reset the table to esnure we + // apply a full update to the table. + LOG(("Failed to get metadata for v2 table %s", table.get())); + aFailedTableNames.AppendElement(table); + continue; + } + + ChunkSet& adds = store.AddChunks(); + ChunkSet& subs = store.SubChunks(); + + // Open HashStore will always succeed even that is not a v2 table. + // So exception tables without add and sub chunks. + if (adds.Length() == 0 && subs.Length() == 0) { + continue; + } + + aResult.Append(store.TableName()); + aResult.Append(';'); + + if (adds.Length() > 0) { + aResult.AppendLiteral("a:"); + nsAutoCString addList; + adds.Serialize(addList); + aResult.Append(addList); + } + + if (subs.Length() > 0) { + if (adds.Length() > 0) { + aResult.Append(':'); + } + aResult.AppendLiteral("s:"); + nsAutoCString subList; + subs.Serialize(subList); + aResult.Append(subList); + } + + aResult.Append('\n'); + } + + return rv; +} + +nsresult Classifier::LoadMetadata(nsIFile* aDirectory, nsACString& aResult, + nsTArray<nsCString>& aFailedTableNames) { + nsTArray<nsCString> tables; + nsTArray<nsCString> exts = {V4_METADATA_SUFFIX}; + + nsresult rv = ScanStoreDir(mRootStoreDirectory, exts, tables); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + for (const auto& table : tables) { + RefPtr<LookupCache> c = GetLookupCache(table); + RefPtr<LookupCacheV4> lookupCacheV4 = LookupCache::Cast<LookupCacheV4>(c); + + if (!lookupCacheV4) { + aFailedTableNames.AppendElement(table); + continue; + } + + nsCString state, sha256; + rv = lookupCacheV4->LoadMetadata(state, sha256); + Telemetry::Accumulate(Telemetry::URLCLASSIFIER_VLPS_METADATA_CORRUPT, + rv == NS_ERROR_FILE_CORRUPTED); + if (NS_FAILED(rv)) { + LOG(("Failed to get metadata for v4 table %s", table.get())); + aFailedTableNames.AppendElement(table); + continue; + } + + // The state might include '\n' so that we have to encode. + nsAutoCString stateBase64; + rv = Base64Encode(state, stateBase64); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + nsAutoCString checksumBase64; + rv = Base64Encode(sha256, checksumBase64); + if (NS_WARN_IF(NS_FAILED(rv))) { + return rv; + } + + LOG(("Appending state '%s' and checksum '%s' for table %s", + stateBase64.get(), checksumBase64.get(), table.get())); + + aResult.AppendPrintf("%s;%s:%s\n", table.get(), stateBase64.get(), + checksumBase64.get()); + } + + return rv; +} + +bool Classifier::ShouldAbort() const { + return mIsClosed || nsUrlClassifierDBService::ShutdownHasStarted() || + (mUpdateInterrupted && mUpdateThread->IsOnCurrentThread()); +} + +} // namespace safebrowsing +} // namespace mozilla |