496 lines
15 KiB
C++
496 lines
15 KiB
C++
/* -*- Mode: C++; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
|
|
/* vim: set ts=8 sts=2 et sw=2 tw=80: */
|
|
/* This Source Code Form is subject to the terms of the Mozilla Public
|
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
|
|
|
#include "VariableLengthPrefixSet.h"
|
|
#include "nsIInputStream.h"
|
|
#include "nsUrlClassifierPrefixSet.h"
|
|
#include "nsPrintfCString.h"
|
|
#include "mozilla/ScopeExit.h"
|
|
#include "mozilla/EndianUtils.h"
|
|
#include "mozilla/Logging.h"
|
|
#include "mozilla/UniquePtr.h"
|
|
#include <algorithm>
|
|
|
|
// MOZ_LOG=UrlClassifierPrefixSet:5
|
|
static mozilla::LazyLogModule gUrlClassifierPrefixSetLog(
|
|
"UrlClassifierPrefixSet");
|
|
#define LOG(args) \
|
|
MOZ_LOG(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug, args)
|
|
#define LOG_ENABLED() \
|
|
MOZ_LOG_TEST(gUrlClassifierPrefixSetLog, mozilla::LogLevel::Debug)
|
|
|
|
namespace mozilla::safebrowsing {
|
|
|
|
#define PREFIX_SIZE_FIXED 4
|
|
|
|
#ifdef DEBUG
|
|
namespace {
|
|
|
|
template <class T>
|
|
void EnsureSorted(T* aArray) {
|
|
MOZ_ASSERT(std::is_sorted(aArray->Elements(),
|
|
aArray->Elements() + aArray->Length()));
|
|
}
|
|
|
|
} // namespace
|
|
#endif
|
|
|
|
NS_IMPL_ISUPPORTS(VariableLengthPrefixSet, nsIMemoryReporter)
|
|
|
|
// This class will process prefix size between 4~32. But for 4 bytes prefixes,
|
|
// they will be passed to nsUrlClassifierPrefixSet because of better
|
|
// optimization.
|
|
VariableLengthPrefixSet::VariableLengthPrefixSet()
|
|
: mLock("VariableLengthPrefixSet.mLock"),
|
|
mFixedPrefixSet(new nsUrlClassifierPrefixSet) {}
|
|
|
|
nsresult VariableLengthPrefixSet::Init(const nsACString& aName) {
|
|
mName = aName;
|
|
mMemoryReportPath = nsPrintfCString(
|
|
"explicit/storage/prefix-set/%s",
|
|
(!aName.IsEmpty() ? PromiseFlatCString(aName).get() : "?!"));
|
|
|
|
RegisterWeakMemoryReporter(this);
|
|
|
|
return mFixedPrefixSet->Init(aName);
|
|
}
|
|
|
|
VariableLengthPrefixSet::~VariableLengthPrefixSet() {
|
|
UnregisterWeakMemoryReporter(this);
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::SetPrefixes(AddPrefixArray& aAddPrefixes,
|
|
AddCompleteArray& aAddCompletes) {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// We may modify the prefix string in this function, clear this data
|
|
// before returning to ensure no one use the data after this API.
|
|
auto scopeExit = MakeScopeExit([&]() {
|
|
aAddPrefixes.Clear();
|
|
aAddCompletes.Clear();
|
|
});
|
|
|
|
// Clear old prefixSet before setting new one.
|
|
mFixedPrefixSet->SetPrefixes(nullptr, 0);
|
|
mVLPrefixSet.Clear();
|
|
|
|
// Build fixed-length prefix set
|
|
nsTArray<uint32_t> array;
|
|
if (!array.SetCapacity(aAddPrefixes.Length(), fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
for (size_t i = 0; i < aAddPrefixes.Length(); i++) {
|
|
array.AppendElement(aAddPrefixes[i].PrefixHash().ToUint32());
|
|
}
|
|
|
|
#ifdef DEBUG
|
|
// PrefixSet requires sorted order
|
|
EnsureSorted(&array);
|
|
#endif
|
|
|
|
nsresult rv = mFixedPrefixSet->SetPrefixes(array.Elements(), array.Length());
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
#ifdef DEBUG
|
|
uint32_t size;
|
|
size = mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of);
|
|
LOG(("SB tree done, size = %d bytes\n", size));
|
|
#endif
|
|
|
|
CompletionArray completions;
|
|
for (size_t i = 0; i < aAddCompletes.Length(); i++) {
|
|
completions.AppendElement(aAddCompletes[i].CompleteHash());
|
|
}
|
|
completions.Sort();
|
|
|
|
UniquePtr<nsCString> completionStr(new nsCString);
|
|
completionStr->SetCapacity(completions.Length() * COMPLETE_SIZE);
|
|
for (size_t i = 0; i < completions.Length(); i++) {
|
|
const char* buf = reinterpret_cast<const char*>(completions[i].buf);
|
|
completionStr->Append(buf, COMPLETE_SIZE);
|
|
}
|
|
mVLPrefixSet.InsertOrUpdate(COMPLETE_SIZE, std::move(completionStr));
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::SetPrefixes(PrefixStringMap& aPrefixMap) {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// We may modify the prefix string in this function, clear this data
|
|
// before returning to ensure no one use the data after this API.
|
|
auto scopeExit = MakeScopeExit([&]() { aPrefixMap.Clear(); });
|
|
|
|
// Prefix size should not less than 4-bytes or greater than 32-bytes
|
|
for (const auto& key : aPrefixMap.Keys()) {
|
|
if (key < PREFIX_SIZE_FIXED || key > COMPLETE_SIZE) {
|
|
return NS_ERROR_FAILURE;
|
|
}
|
|
}
|
|
|
|
// Clear old prefixSet before setting new one.
|
|
mFixedPrefixSet->SetPrefixes(nullptr, 0);
|
|
mVLPrefixSet.Clear();
|
|
|
|
// 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
|
|
nsCString* prefixes = aPrefixMap.Get(PREFIX_SIZE_FIXED);
|
|
if (prefixes) {
|
|
NS_ENSURE_TRUE(prefixes->Length() % PREFIX_SIZE_FIXED == 0,
|
|
NS_ERROR_FAILURE);
|
|
|
|
uint32_t numPrefixes = prefixes->Length() / PREFIX_SIZE_FIXED;
|
|
|
|
// Prefixes are lexicographically-sorted, so the interger array
|
|
// passed to nsUrlClassifierPrefixSet should also follow the same order.
|
|
// Reverse byte order in-place in Little-Endian platform.
|
|
#if MOZ_LITTLE_ENDIAN()
|
|
char* begin = prefixes->BeginWriting();
|
|
char* end = prefixes->EndWriting();
|
|
|
|
while (begin != end) {
|
|
uint32_t* p = reinterpret_cast<uint32_t*>(begin);
|
|
*p = BigEndian::readUint32(begin);
|
|
begin += sizeof(uint32_t);
|
|
}
|
|
#endif
|
|
const uint32_t* arrayPtr =
|
|
reinterpret_cast<const uint32_t*>(prefixes->BeginReading());
|
|
|
|
nsresult rv = mFixedPrefixSet->SetPrefixes(arrayPtr, numPrefixes);
|
|
if (NS_WARN_IF(NS_FAILED(rv))) {
|
|
return rv;
|
|
}
|
|
}
|
|
|
|
// 5~32 bytes prefixes are stored in mVLPrefixSet.
|
|
for (const auto& entry : aPrefixMap) {
|
|
// Skip 4bytes prefixes because it is already stored in mFixedPrefixSet.
|
|
if (entry.GetKey() == PREFIX_SIZE_FIXED) {
|
|
continue;
|
|
}
|
|
|
|
mVLPrefixSet.InsertOrUpdate(entry.GetKey(),
|
|
MakeUnique<nsCString>(*entry.GetData()));
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::GetPrefixes(PrefixStringMap& aPrefixMap) {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// 4-bytes prefixes are handled by nsUrlClassifierPrefixSet.
|
|
FallibleTArray<uint32_t> array;
|
|
nsresult rv = mFixedPrefixSet->GetPrefixesNative(array);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
size_t count = array.Length();
|
|
if (count) {
|
|
UniquePtr<nsCString> prefixes(new nsCString());
|
|
if (!prefixes->SetLength(PREFIX_SIZE_FIXED * count, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
// Writing integer array to character array
|
|
uint32_t* begin = reinterpret_cast<uint32_t*>(prefixes->BeginWriting());
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
begin[i] = NativeEndian::swapToBigEndian(array[i]);
|
|
}
|
|
|
|
aPrefixMap.InsertOrUpdate(PREFIX_SIZE_FIXED, std::move(prefixes));
|
|
}
|
|
|
|
// Copy variable-length prefix set
|
|
for (const auto& entry : mVLPrefixSet) {
|
|
aPrefixMap.InsertOrUpdate(entry.GetKey(),
|
|
MakeUnique<nsCString>(*entry.GetData()));
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
// This is used by V2 protocol which prefixes are either 4-bytes or 32-bytes.
|
|
nsresult VariableLengthPrefixSet::GetFixedLengthPrefixes(
|
|
FallibleTArray<uint32_t>* aPrefixes,
|
|
FallibleTArray<nsCString>* aCompletes) {
|
|
MOZ_ASSERT(aPrefixes || aCompletes);
|
|
MOZ_ASSERT_IF(aPrefixes, aPrefixes->IsEmpty());
|
|
MOZ_ASSERT_IF(aCompletes, aCompletes->IsEmpty());
|
|
|
|
if (aPrefixes) {
|
|
nsresult rv = mFixedPrefixSet->GetPrefixesNative(*aPrefixes);
|
|
if (NS_FAILED(rv)) {
|
|
return rv;
|
|
}
|
|
}
|
|
|
|
if (aCompletes) {
|
|
nsCString* completes = mVLPrefixSet.Get(COMPLETE_SIZE);
|
|
if (completes) {
|
|
uint32_t count = completes->Length() / COMPLETE_SIZE;
|
|
if (!aCompletes->SetCapacity(count, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
for (uint32_t i = 0; i < count; i++) {
|
|
// SetCapacity was just called, these cannot fail.
|
|
(void)aCompletes->AppendElement(
|
|
Substring(*completes, i * COMPLETE_SIZE, COMPLETE_SIZE), fallible);
|
|
}
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::GetFixedLengthPrefixByIndex(
|
|
uint32_t aIndex, uint32_t* aOutPrefix) const {
|
|
NS_ENSURE_ARG_POINTER(aOutPrefix);
|
|
|
|
return mFixedPrefixSet->GetPrefixByIndex(aIndex, aOutPrefix);
|
|
}
|
|
|
|
// It should never be the case that more than one hash prefixes match a given
|
|
// full hash. However, if that happens, this method returns any one of them.
|
|
// It does not guarantee which one of those will be returned.
|
|
nsresult VariableLengthPrefixSet::Matches(uint32_t aPrefix,
|
|
const nsACString& aFullHash,
|
|
uint32_t* aLength) const {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// Only allow full-length hash to check if match any of the prefix
|
|
MOZ_ASSERT(aFullHash.Length() == COMPLETE_SIZE);
|
|
NS_ENSURE_ARG_POINTER(aLength);
|
|
|
|
*aLength = 0;
|
|
|
|
// Check if it matches 4-bytes prefixSet first
|
|
bool found = false;
|
|
nsresult rv = mFixedPrefixSet->Contains(aPrefix, &found);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
if (found) {
|
|
*aLength = PREFIX_SIZE_FIXED;
|
|
return NS_OK;
|
|
}
|
|
|
|
for (const auto& entry : mVLPrefixSet) {
|
|
if (BinarySearch(aFullHash, *entry.GetData(), entry.GetKey())) {
|
|
*aLength = entry.GetKey();
|
|
MOZ_ASSERT(*aLength > 4);
|
|
return NS_OK;
|
|
}
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::IsEmpty(bool* aEmpty) const {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
NS_ENSURE_ARG_POINTER(aEmpty);
|
|
|
|
mFixedPrefixSet->IsEmpty(aEmpty);
|
|
*aEmpty = *aEmpty && mVLPrefixSet.IsEmpty();
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::LoadPrefixes(nsCOMPtr<nsIInputStream>& in) {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// First read prefixes from fixed-length prefix set
|
|
nsresult rv = mFixedPrefixSet->LoadPrefixes(in);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// Then read prefixes from variable-length prefix set
|
|
uint32_t magic;
|
|
uint32_t read;
|
|
|
|
rv = in->Read(reinterpret_cast<char*>(&magic), sizeof(uint32_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
if (magic != PREFIXSET_VERSION_MAGIC) {
|
|
LOG(("[%s] Version magic mismatch, not loading", mName.get()));
|
|
return NS_ERROR_FILE_CORRUPTED;
|
|
}
|
|
|
|
mVLPrefixSet.Clear();
|
|
|
|
uint32_t count;
|
|
rv = in->Read(reinterpret_cast<char*>(&count), sizeof(uint32_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
uint32_t totalPrefixes = 0;
|
|
for (; count > 0; count--) {
|
|
uint8_t prefixSize;
|
|
rv = in->Read(reinterpret_cast<char*>(&prefixSize), sizeof(uint8_t), &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint8_t), NS_ERROR_FAILURE);
|
|
|
|
if (prefixSize < PREFIX_SIZE || prefixSize > COMPLETE_SIZE) {
|
|
return NS_ERROR_FILE_CORRUPTED;
|
|
}
|
|
|
|
uint32_t stringLength;
|
|
rv = in->Read(reinterpret_cast<char*>(&stringLength), sizeof(uint32_t),
|
|
&read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == sizeof(uint32_t), NS_ERROR_FAILURE);
|
|
|
|
NS_ENSURE_TRUE(stringLength % prefixSize == 0, NS_ERROR_FILE_CORRUPTED);
|
|
uint32_t prefixCount = stringLength / prefixSize;
|
|
|
|
UniquePtr<nsCString> vlPrefixes(new nsCString());
|
|
if (!vlPrefixes->SetLength(stringLength, fallible)) {
|
|
return NS_ERROR_OUT_OF_MEMORY;
|
|
}
|
|
|
|
rv = in->Read(reinterpret_cast<char*>(vlPrefixes->BeginWriting()),
|
|
stringLength, &read);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(read == stringLength, NS_ERROR_FAILURE);
|
|
|
|
mVLPrefixSet.InsertOrUpdate(prefixSize, std::move(vlPrefixes));
|
|
totalPrefixes += prefixCount;
|
|
LOG(("[%s] Loaded %u %u-byte prefixes", mName.get(), prefixCount,
|
|
prefixSize));
|
|
}
|
|
|
|
LOG(("[%s] Loading VLPrefixSet successful (%u total prefixes)", mName.get(),
|
|
totalPrefixes));
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
uint32_t VariableLengthPrefixSet::CalculatePreallocateSize() const {
|
|
uint32_t fileSize = 0;
|
|
|
|
// Size of fixed length prefix set.
|
|
fileSize += mFixedPrefixSet->CalculatePreallocateSize();
|
|
|
|
// Size of variable length prefix set.
|
|
// Store how many prefix string.
|
|
fileSize += sizeof(uint32_t);
|
|
|
|
for (const auto& data : mVLPrefixSet.Values()) {
|
|
// Store prefix size, prefix string length, and prefix string.
|
|
fileSize += sizeof(uint8_t);
|
|
fileSize += sizeof(uint32_t);
|
|
fileSize += data->Length();
|
|
}
|
|
return fileSize;
|
|
}
|
|
|
|
uint32_t VariableLengthPrefixSet::FixedLengthPrefixLength() const {
|
|
return mFixedPrefixSet->Length();
|
|
}
|
|
|
|
nsresult VariableLengthPrefixSet::WritePrefixes(
|
|
nsCOMPtr<nsIOutputStream>& out) const {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
// First, write fixed length prefix set
|
|
nsresult rv = mFixedPrefixSet->WritePrefixes(out);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
|
|
// Then, write variable length prefix set
|
|
uint32_t written;
|
|
uint32_t writelen = sizeof(uint32_t);
|
|
uint32_t magic = PREFIXSET_VERSION_MAGIC;
|
|
rv = out->Write(reinterpret_cast<char*>(&magic), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
uint32_t count = mVLPrefixSet.Count();
|
|
rv = out->Write(reinterpret_cast<char*>(&count), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
// Store PrefixSize, Length of Prefix String and then Prefix String
|
|
for (const auto& entry : mVLPrefixSet) {
|
|
const nsCString& vlPrefixes = *entry.GetData();
|
|
|
|
uint8_t prefixSize = entry.GetKey();
|
|
writelen = sizeof(uint8_t);
|
|
rv = out->Write(reinterpret_cast<char*>(&prefixSize), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
uint32_t stringLength = vlPrefixes.Length();
|
|
writelen = sizeof(uint32_t);
|
|
rv = out->Write(reinterpret_cast<char*>(&stringLength), writelen, &written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(written == writelen, NS_ERROR_FAILURE);
|
|
|
|
rv = out->Write(const_cast<char*>(vlPrefixes.BeginReading()), stringLength,
|
|
&written);
|
|
NS_ENSURE_SUCCESS(rv, rv);
|
|
NS_ENSURE_TRUE(stringLength == written, NS_ERROR_FAILURE);
|
|
}
|
|
|
|
return NS_OK;
|
|
}
|
|
|
|
bool VariableLengthPrefixSet::BinarySearch(const nsACString& aFullHash,
|
|
const nsACString& aPrefixes,
|
|
uint32_t aPrefixSize) const {
|
|
const char* fullhash = aFullHash.BeginReading();
|
|
const char* prefixes = aPrefixes.BeginReading();
|
|
int32_t begin = 0, end = aPrefixes.Length() / aPrefixSize;
|
|
|
|
while (end > begin) {
|
|
int32_t mid = (begin + end) >> 1;
|
|
int cmp = memcmp(fullhash, prefixes + mid * aPrefixSize, aPrefixSize);
|
|
if (cmp < 0) {
|
|
end = mid;
|
|
} else if (cmp > 0) {
|
|
begin = mid + 1;
|
|
} else {
|
|
return true;
|
|
}
|
|
}
|
|
return false;
|
|
}
|
|
|
|
MOZ_DEFINE_MALLOC_SIZE_OF(UrlClassifierMallocSizeOf)
|
|
|
|
NS_IMETHODIMP
|
|
VariableLengthPrefixSet::CollectReports(nsIHandleReportCallback* aHandleReport,
|
|
nsISupports* aData, bool aAnonymize) {
|
|
MOZ_ASSERT(NS_IsMainThread());
|
|
|
|
size_t amount = SizeOfIncludingThis(UrlClassifierMallocSizeOf);
|
|
|
|
return aHandleReport->Callback(
|
|
""_ns, mMemoryReportPath, KIND_HEAP, UNITS_BYTES, amount,
|
|
nsLiteralCString("Memory used by the variable-length prefix set for a "
|
|
"URL classifier."),
|
|
aData);
|
|
}
|
|
|
|
size_t VariableLengthPrefixSet::SizeOfIncludingThis(
|
|
mozilla::MallocSizeOf aMallocSizeOf) const {
|
|
MutexAutoLock lock(mLock);
|
|
|
|
size_t n = 0;
|
|
n += aMallocSizeOf(this);
|
|
|
|
n += mFixedPrefixSet->SizeOfIncludingThis(moz_malloc_size_of) -
|
|
aMallocSizeOf(mFixedPrefixSet);
|
|
|
|
n += mVLPrefixSet.ShallowSizeOfExcludingThis(aMallocSizeOf);
|
|
for (const auto& data : mVLPrefixSet.Values()) {
|
|
n += data->SizeOfExcludingThisIfUnshared(aMallocSizeOf);
|
|
}
|
|
|
|
return n;
|
|
}
|
|
|
|
} // namespace mozilla::safebrowsing
|