/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */ /* vim: set ts=8 sts=2 et sw=2 tw=80: */ /* This Source Code Form is subject to the terms of the Mozilla Public * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ #include "VariableLengthPrefixSet.h" #include "nsIInputStream.h" #include "nsUrlClassifierPrefixSet.h" #include "nsPrintfCString.h" #include "mozilla/ScopeExit.h" #include "mozilla/EndianUtils.h" #include "mozilla/Logging.h" #include "mozilla/UniquePtr.h" #include // MOZ_LOG=UrlClassifierPrefixSet:5 static mozilla::LazyLogModule gUrlClassifierPrefixSetLog( "UrlClassifierPrefixSet"); #define LOG(args) \ MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args) #define LOG_ENABLED() \ MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug) namespace mozilla::safebrowsing { #define PREFIX_SIZE_FIXED 4 #ifdef DEBUG namespace { template void EnsureSorted(T* aArray) { MOZ_ASSERT(std::is_sorted(aArray->Elements(), aArray->Elements() + aArray->Length())); } } // namespace #endif NS_IMPL_ISUPPORTS(VariableLengthPrefixSet, nsIMemoryReporter) // This class will process prefix size between 4~32. But for 4 bytes prefixes, // they will be passed to nsUrlClassifierPrefixSet because of better // optimization. VariableLengthPrefixSet::VariableLengthPrefixSet() : mLock("VariableLengthPrefixSet.mLock"), mFixedPrefixSet(new nsUrlClassifierPrefixSet) {} nsresult VariableLengthPrefixSet::Init(const nsACString& aName) { mName = aName; mMemoryReportPath = nsPrintfCString( "explicit/storage/prefix-set/%s", (!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!")); RegisterWeakMemoryReporter(this); return mFixedPrefixSet->Init(aName); } VariableLengthPrefixSet::~VariableLengthPrefixSet() { UnregisterWeakMemoryReporter(this); } nsresult VariableLengthPrefixSet::SetPrefixes(AddPrefixArray& aAddPrefixes, AddCompleteArray& aAddCompletes) { MutexAutoLock lock(mLock); // We may modify the prefix string in this function, clear this data // before returning to ensure no one use the data after this API. auto scopeExit = MakeScopeExit([&]() { aAddPrefixes.Clear(); aAddCompletes.Clear(); }); // Clear old prefixSet before setting new one. mFixedPrefixSet->SetPrefixes(nullptr, 0); mVLPrefixSet.Clear(); // Build fixed-length prefix set nsTArray array; if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) { return NS_ERROR_OUT_OF_MEMORY; } for (size_t i = 0; i < aAddPrefixes.Length(); i++) { array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32()); } #ifdef DEBUG // PrefixSet requires sorted order EnsureSorted(&array); #endif nsresult rv = mFixedPrefixSet->SetPrefixes(array.Elements(), array.Length()); NS_ENSURE_SUCCESS(rv, rv); #ifdef DEBUG uint32_t size; size = mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of); LOG(("SB tree done, size = %d bytes\n", size)); #endif CompletionArray completions; for (size_t i = 0; i < aAddCompletes.Length(); i++) { completions.AppendElement(aAddCompletes[i].CompleteHash()); } completions.Sort(); UniquePtr completionStr(new nsCString); completionStr->SetCapacity(completions.Length() * COMPLETE_SIZE); for (size_t i = 0; i < completions.Length(); i++) { const char* buf = reinterpret_cast(completions[i].buf); completionStr->Append(buf, COMPLETE_SIZE); } mVLPrefixSet.InsertOrUpdate(COMPLETE_SIZE, std::move(completionStr)); return NS_OK; } nsresult VariableLengthPrefixSet::SetPrefixes(PrefixStringMap& aPrefixMap) { MutexAutoLock lock(mLock); // We may modify the prefix string in this function, clear this data // before returning to ensure no one use the data after this API. auto scopeExit = MakeScopeExit([&]() { aPrefixMap.Clear(); }); // Prefix size should not less than 4-bytes or greater than 32-bytes for (const auto& key : aPrefixMap.Keys()) { if (key < PREFIX_SIZE_FIXED || key > COMPLETE_SIZE) { return NS_ERROR_FAILURE; } } // Clear old prefixSet before setting new one. mFixedPrefixSet->SetPrefixes(nullptr, 0); mVLPrefixSet.Clear(); // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet. nsCString* prefixes = aPrefixMap.Get(PREFIX_SIZE_FIXED); if (prefixes) { NS_ENSURE_TRUE(prefixes->Length() % PREFIX_SIZE_FIXED == 0, NS_ERROR_FAILURE); uint32_t numPrefixes = prefixes->Length() / PREFIX_SIZE_FIXED; // Prefixes are lexicographically-sorted, so the interger array // passed to nsUrlClassifierPrefixSet should also follow the same order. // Reverse byte order in-place in Little-Endian platform. #if MOZ_LITTLE_ENDIAN() char* begin = prefixes->BeginWriting(); char* end = prefixes->EndWriting(); while (begin != end) { uint32_t* p = reinterpret_cast(begin); *p = BigEndian::readUint32(begin); begin += sizeof(uint32_t); } #endif const uint32_t* arrayPtr = reinterpret_cast(prefixes->BeginReading()); nsresult rv = mFixedPrefixSet->SetPrefixes(arrayPtr, numPrefixes); if (NS_WARN_IF(NS_FAILED(rv))) { return rv; } } // 5~32 bytes prefixes are stored in mVLPrefixSet. for (const auto& entry : aPrefixMap) { // Skip 4bytes prefixes because it is already stored in mFixedPrefixSet. if (entry.GetKey() == PREFIX_SIZE_FIXED) { continue; } mVLPrefixSet.InsertOrUpdate(entry.GetKey(), MakeUnique(*entry.GetData())); } return NS_OK; } nsresult VariableLengthPrefixSet::GetPrefixes(PrefixStringMap& aPrefixMap) { MutexAutoLock lock(mLock); // 4-bytes prefixes are handled by nsUrlClassifierPrefixSet. FallibleTArray array; nsresult rv = mFixedPrefixSet->GetPrefixesNative(array); NS_ENSURE_SUCCESS(rv, rv); size_t count = array.Length(); if (count) { UniquePtr prefixes(new nsCString()); if (!prefixes->SetLength(PREFIX_SIZE_FIXED * count, fallible)) { return NS_ERROR_OUT_OF_MEMORY; } // Writing integer array to character array uint32_t* begin = reinterpret_cast(prefixes->BeginWriting()); for (uint32_t i = 0; i < count; i++) { begin[i] = NativeEndian::swapToBigEndian(array[i]); } aPrefixMap.InsertOrUpdate(PREFIX_SIZE_FIXED, std::move(prefixes)); } // Copy variable-length prefix set for (const auto& entry : mVLPrefixSet) { aPrefixMap.InsertOrUpdate(entry.GetKey(), MakeUnique(*entry.GetData())); } return NS_OK; } // This is used by V2 protocol which prefixes are either 4-bytes or 32-bytes. nsresult VariableLengthPrefixSet::GetFixedLengthPrefixes( FallibleTArray* aPrefixes, FallibleTArray* aCompletes) { MOZ_ASSERT(aPrefixes || aCompletes); MOZ_ASSERT_IF(aPrefixes, aPrefixes->IsEmpty()); MOZ_ASSERT_IF(aCompletes, aCompletes->IsEmpty()); if (aPrefixes) { nsresult rv = mFixedPrefixSet->GetPrefixesNative(*aPrefixes); if (NS_FAILED(rv)) { return rv; } } if (aCompletes) { nsCString* completes = mVLPrefixSet.Get(COMPLETE_SIZE); if (completes) { uint32_t count = completes->Length() / COMPLETE_SIZE; if (!aCompletes->SetCapacity(count, fallible)) { return NS_ERROR_OUT_OF_MEMORY; } for (uint32_t i = 0; i < count; i++) { // SetCapacity was just called, these cannot fail. (void)aCompletes->AppendElement( Substring(*completes, i * COMPLETE_SIZE, COMPLETE_SIZE), fallible); } } } return NS_OK; } nsresult VariableLengthPrefixSet::GetFixedLengthPrefixByIndex( uint32_t aIndex, uint32_t* aOutPrefix) const { NS_ENSURE_ARG_POINTER(aOutPrefix); return mFixedPrefixSet->GetPrefixByIndex(aIndex, aOutPrefix); } // It should never be the case that more than one hash prefixes match a given // full hash. However, if that happens, this method returns any one of them. // It does not guarantee which one of those will be returned. nsresult VariableLengthPrefixSet::Matches(uint32_t aPrefix, const nsACString& aFullHash, uint32_t* aLength) const { MutexAutoLock lock(mLock); // Only allow full-length hash to check if match any of the prefix MOZ_ASSERT(aFullHash.Length() == COMPLETE_SIZE); NS_ENSURE_ARG_POINTER(aLength); *aLength = 0; // Check if it matches 4-bytes prefixSet first bool found = false; nsresult rv = mFixedPrefixSet->Contains(aPrefix, &found); NS_ENSURE_SUCCESS(rv, rv); if (found) { *aLength = PREFIX_SIZE_FIXED; return NS_OK; } for (const auto& entry : mVLPrefixSet) { if (BinarySearch(aFullHash, *entry.GetData(), entry.GetKey())) { *aLength = entry.GetKey(); MOZ_ASSERT(*aLength > 4); return NS_OK; } } return NS_OK; } nsresult VariableLengthPrefixSet::IsEmpty(bool* aEmpty) const { MutexAutoLock lock(mLock); NS_ENSURE_ARG_POINTER(aEmpty); mFixedPrefixSet->IsEmpty(aEmpty); *aEmpty = *aEmpty && mVLPrefixSet.IsEmpty(); return NS_OK; } nsresult VariableLengthPrefixSet::LoadPrefixes(nsCOMPtr& in) { MutexAutoLock lock(mLock); // First read prefixes from fixed-length prefix set nsresult rv = mFixedPrefixSet->LoadPrefixes(in); NS_ENSURE_SUCCESS(rv, rv); // Then read prefixes from variable-length prefix set uint32_t magic; uint32_t read; rv = in->Read(reinterpret_cast(&magic), sizeof(uint32_t), &read); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE); if (magic != PREFIXSET_VERSION_MAGIC) { LOG(("[%s] Version magic mismatch, not loading", mName.get())); return NS_ERROR_FILE_CORRUPTED; } mVLPrefixSet.Clear(); uint32_t count; rv = in->Read(reinterpret_cast(&count), sizeof(uint32_t), &read); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE); uint32_t totalPrefixes = 0; for (; count > 0; count--) { uint8_t prefixSize; rv = in->Read(reinterpret_cast(&prefixSize), sizeof(uint8_t), &read); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(read == sizeof(uint8_t), NS_ERROR_FAILURE); if (prefixSize < PREFIX_SIZE || prefixSize > COMPLETE_SIZE) { return NS_ERROR_FILE_CORRUPTED; } uint32_t stringLength; rv = in->Read(reinterpret_cast(&stringLength), sizeof(uint32_t), &read); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE); NS_ENSURE_TRUE(stringLength % prefixSize == 0, NS_ERROR_FILE_CORRUPTED); uint32_t prefixCount = stringLength / prefixSize; UniquePtr vlPrefixes(new nsCString()); if (!vlPrefixes->SetLength(stringLength, fallible)) { return NS_ERROR_OUT_OF_MEMORY; } rv = in->Read(reinterpret_cast(vlPrefixes->BeginWriting()), stringLength, &read); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(read == stringLength, NS_ERROR_FAILURE); mVLPrefixSet.InsertOrUpdate(prefixSize, std::move(vlPrefixes)); totalPrefixes += prefixCount; LOG(("[%s] Loaded %u %u-byte prefixes", mName.get(), prefixCount, prefixSize)); } LOG(("[%s] Loading VLPrefixSet successful (%u total prefixes)", mName.get(), totalPrefixes)); return NS_OK; } uint32_t VariableLengthPrefixSet::CalculatePreallocateSize() const { uint32_t fileSize = 0; // Size of fixed length prefix set. fileSize += mFixedPrefixSet->CalculatePreallocateSize(); // Size of variable length prefix set. // Store how many prefix string. fileSize += sizeof(uint32_t); for (const auto& data : mVLPrefixSet.Values()) { // Store prefix size, prefix string length, and prefix string. fileSize += sizeof(uint8_t); fileSize += sizeof(uint32_t); fileSize += data->Length(); } return fileSize; } uint32_t VariableLengthPrefixSet::FixedLengthPrefixLength() const { return mFixedPrefixSet->Length(); } nsresult VariableLengthPrefixSet::WritePrefixes( nsCOMPtr& out) const { MutexAutoLock lock(mLock); // First, write fixed length prefix set nsresult rv = mFixedPrefixSet->WritePrefixes(out); NS_ENSURE_SUCCESS(rv, rv); // Then, write variable length prefix set uint32_t written; uint32_t writelen = sizeof(uint32_t); uint32_t magic = PREFIXSET_VERSION_MAGIC; rv = out->Write(reinterpret_cast(&magic), writelen, &written); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE); uint32_t count = mVLPrefixSet.Count(); rv = out->Write(reinterpret_cast(&count), writelen, &written); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE); // Store PrefixSize, Length of Prefix String and then Prefix String for (const auto& entry : mVLPrefixSet) { const nsCString& vlPrefixes = *entry.GetData(); uint8_t prefixSize = entry.GetKey(); writelen = sizeof(uint8_t); rv = out->Write(reinterpret_cast(&prefixSize), writelen, &written); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE); uint32_t stringLength = vlPrefixes.Length(); writelen = sizeof(uint32_t); rv = out->Write(reinterpret_cast(&stringLength), writelen, &written); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE); rv = out->Write(const_cast(vlPrefixes.BeginReading()), stringLength, &written); NS_ENSURE_SUCCESS(rv, rv); NS_ENSURE_TRUE(stringLength == written, NS_ERROR_FAILURE); } return NS_OK; } bool VariableLengthPrefixSet::BinarySearch(const nsACString& aFullHash, const nsACString& aPrefixes, uint32_t aPrefixSize) const { const char* fullhash = aFullHash.BeginReading(); const char* prefixes = aPrefixes.BeginReading(); int32_t begin = 0, end = aPrefixes.Length() / aPrefixSize; while (end > begin) { int32_t mid = (begin + end) >> 1; int cmp = memcmp(fullhash, prefixes + mid * aPrefixSize, aPrefixSize); if (cmp < 0) { end = mid; } else if (cmp > 0) { begin = mid + 1; } else { return true; } } return false; } MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf) NS_IMETHODIMP VariableLengthPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport, nsISupports* aData, bool aAnonymize) { MOZ_ASSERT(NS_IsMainThread()); size_t amount = SizeOfIncludingThis(UrlClassifierMallocSizeOf); return aHandleReport->Callback( ""_ns, mMemoryReportPath, KIND_HEAP, UNITS_BYTES, amount, nsLiteralCString("Memory used by the variable-length prefix set for a " "URL classifier."), aData); } size_t VariableLengthPrefixSet::SizeOfIncludingThis( mozilla::MallocSizeOf aMallocSizeOf) const { MutexAutoLock lock(mLock); size_t n = 0; n += aMallocSizeOf(this); n += mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of) - aMallocSizeOf(mFixedPrefixSet); n += mVLPrefixSet.ShallowSizeOfExcludingThis(aMallocSizeOf); for (const auto& data : mVLPrefixSet.Values()) { n += data->SizeOfExcludingThisIfUnshared(aMallocSizeOf); } return n; } } // namespace mozilla::safebrowsing