1 files changed, 667 insertions, 0 deletions
diff --git a/netwerk/cache2/CacheFileUtils.cpp b/netwerk/cache2/CacheFileUtils.cpp
new file mode 100644
index 0000000000..1e158f394a
--- /dev/null
+++ b/netwerk/cache2/CacheFileUtils.cpp
@@ -0,0 +1,667 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+#include "CacheIndex.h"
+#include "CacheLog.h"
+#include "CacheFileUtils.h"
+#include "CacheObserver.h"
+#include "LoadContextInfo.h"
+#include "mozilla/Tokenizer.h"
+#include "mozilla/Telemetry.h"
+#include "nsCOMPtr.h"
+#include "nsString.h"
+#include <algorithm>
+#include "mozilla/Unused.h"
+
+namespace mozilla::net::CacheFileUtils {
+
+// This designates the format for the "alt-data" metadata.
+// When the format changes we need to update the version.
+static uint32_t const kAltDataVersion = 1;
+const char* kAltDataKey = "alt-data";
+
+namespace {
+
+/**
+ * A simple recursive descent parser for the mapping key.
+ */
+class KeyParser : protected Tokenizer {
+ public:
+  explicit KeyParser(nsACString const& aInput)
+      : Tokenizer(aInput),
+        isAnonymous(false)
+        // Initialize the cache key to a zero length by default
+        ,
+        lastTag(0) {}
+
+ private:
+  // Results
+  OriginAttributes originAttribs;
+  bool isAnonymous;
+  nsCString idEnhance;
+  nsDependentCSubstring cacheKey;
+
+  // Keeps the last tag name, used for alphabetical sort checking
+  char lastTag;
+
+  // Classifier for the 'tag' character valid range.
+  // Explicitly using unsigned char as 127 is -1 when signed and it would only
+  // produce a warning.
+  static bool TagChar(const char aChar) {
+    unsigned char c = static_cast<unsigned char>(aChar);
+    return c >= ' ' && c <= '\x7f';
+  }
+
+  bool ParseTags() {
+    // Expects to be at the tag name or at the end
+    if (CheckEOF()) {
+      return true;
+    }
+
+    char tag;
+    if (!ReadChar(&TagChar, &tag)) {
+      return false;
+    }
+
+    // Check the alphabetical order, hard-fail on disobedience
+    if (!(lastTag < tag || tag == ':')) {
+      return false;
+    }
+    lastTag = tag;
+
+    switch (tag) {
+      case ':':
+        // last possible tag, when present there is the cacheKey following,
+        // not terminated with ',' and no need to unescape.
+        cacheKey.Rebind(mCursor, mEnd - mCursor);
+        return true;
+      case 'O': {
+        nsAutoCString originSuffix;
+        if (!ParseValue(&originSuffix) ||
+            !originAttribs.PopulateFromSuffix(originSuffix)) {
+          return false;
+        }
+        break;
+      }
+      case 'p':
+        originAttribs.SyncAttributesWithPrivateBrowsing(true);
+        break;
+      case 'b':
+        // Leaving to be able to read and understand oldformatted entries
+        originAttribs.mInIsolatedMozBrowser = true;
+        break;
+      case 'a':
+        isAnonymous = true;
+        break;
+      case 'i': {
+        // Leaving to be able to read and understand oldformatted entries
+        uint32_t deprecatedAppId = 0;
+        if (!ReadInteger(&deprecatedAppId)) {
+          return false;  // not a valid 32-bit integer
+        }
+        break;
+      }
+      case '~':
+        if (!ParseValue(&idEnhance)) {
+          return false;
+        }
+        break;
+      default:
+        if (!ParseValue()) {  // skip any tag values, optional
+          return false;
+        }
+        break;
+    }
+
+    // We expect a comma after every tag
+    if (!CheckChar(',')) {
+      return false;
+    }
+
+    // Recurse to the next tag
+    return ParseTags();
+  }
+
+  bool ParseValue(nsACString* result = nullptr) {
+    // If at the end, fail since we expect a comma ; value may be empty tho
+    if (CheckEOF()) {
+      return false;
+    }
+
+    Token t;
+    while (Next(t)) {
+      if (!Token::Char(',').Equals(t)) {
+        if (result) {
+          result->Append(t.Fragment());
+        }
+        continue;
+      }
+
+      if (CheckChar(',')) {
+        // Two commas in a row, escaping
+        if (result) {
+          result->Append(',');
+        }
+        continue;
+      }
+
+      // We must give the comma back since the upper calls expect it
+      Rollback();
+      return true;
+    }
+
+    return false;
+  }
+
+ public:
+  already_AddRefed<LoadContextInfo> Parse() {
+    RefPtr<LoadContextInfo> info;
+    if (ParseTags()) {
+      info = GetLoadContextInfo(isAnonymous, originAttribs);
+    }
+
+    return info.forget();
+  }
+
+  void URISpec(nsACString& result) { result.Assign(cacheKey); }
+
+  void IdEnhance(nsACString& result) { result.Assign(idEnhance); }
+};
+
+}  // namespace
+
+already_AddRefed<nsILoadContextInfo> ParseKey(const nsACString& aKey,
+                                              nsACString* aIdEnhance,
+                                              nsACString* aURISpec) {
+  KeyParser parser(aKey);
+  RefPtr<LoadContextInfo> info = parser.Parse();
+
+  if (info) {
+    if (aIdEnhance) parser.IdEnhance(*aIdEnhance);
+    if (aURISpec) parser.URISpec(*aURISpec);
+  }
+
+  return info.forget();
+}
+
+void AppendKeyPrefix(nsILoadContextInfo* aInfo, nsACString& _retval) {
+  /**
+   * This key is used to salt file hashes.  When form of the key is changed
+   * cache entries will fail to find on disk.
+   *
+   * IMPORTANT NOTE:
+   * Keep the attributes list sorted according their ASCII code.
+   */
+
+  if (!aInfo) {
+    return;
+  }
+
+  OriginAttributes const* oa = aInfo->OriginAttributesPtr();
+  nsAutoCString suffix;
+  oa->CreateSuffix(suffix);
+  if (!suffix.IsEmpty()) {
+    AppendTagWithValue(_retval, 'O', suffix);
+  }
+
+  if (aInfo->IsAnonymous()) {
+    _retval.AppendLiteral("a,");
+  }
+
+  if (aInfo->IsPrivate()) {
+    _retval.AppendLiteral("p,");
+  }
+}
+
+void AppendTagWithValue(nsACString& aTarget, char const aTag,
+                        const nsACString& aValue) {
+  aTarget.Append(aTag);
+
+  // First check the value string to save some memory copying
+  // for cases we don't need to escape at all (most likely).
+  if (!aValue.IsEmpty()) {
+    if (!aValue.Contains(',')) {
+      // No need to escape
+      aTarget.Append(aValue);
+    } else {
+      nsAutoCString escapedValue(aValue);
+      escapedValue.ReplaceSubstring(","_ns, ",,"_ns);
+      aTarget.Append(escapedValue);
+    }
+  }
+
+  aTarget.Append(',');
+}
+
+nsresult KeyMatchesLoadContextInfo(const nsACString& aKey,
+                                   nsILoadContextInfo* aInfo, bool* _retval) {
+  nsCOMPtr<nsILoadContextInfo> info = ParseKey(aKey);
+
+  if (!info) {
+    return NS_ERROR_FAILURE;
+  }
+
+  *_retval = info->Equals(aInfo);
+  return NS_OK;
+}
+
+ValidityPair::ValidityPair(uint32_t aOffset, uint32_t aLen)
+    : mOffset(aOffset), mLen(aLen) {}
+
+bool ValidityPair::CanBeMerged(const ValidityPair& aOther) const {
+  // The pairs can be merged into a single one if the start of one of the pairs
+  // is placed anywhere in the validity interval of other pair or exactly after
+  // its end.
+  return IsInOrFollows(aOther.mOffset) || aOther.IsInOrFollows(mOffset);
+}
+
+bool ValidityPair::IsInOrFollows(uint32_t aOffset) const {
+  return mOffset <= aOffset && mOffset + mLen >= aOffset;
+}
+
+bool ValidityPair::LessThan(const ValidityPair& aOther) const {
+  if (mOffset < aOther.mOffset) {
+    return true;
+  }
+
+  if (mOffset == aOther.mOffset && mLen < aOther.mLen) {
+    return true;
+  }
+
+  return false;
+}
+
+void ValidityPair::Merge(const ValidityPair& aOther) {
+  MOZ_ASSERT(CanBeMerged(aOther));
+
+  uint32_t offset = std::min(mOffset, aOther.mOffset);
+  uint32_t end = std::max(mOffset + mLen, aOther.mOffset + aOther.mLen);
+
+  mOffset = offset;
+  mLen = end - offset;
+}
+
+void ValidityMap::Log() const {
+  LOG(("ValidityMap::Log() - number of pairs: %zu", mMap.Length()));
+  for (uint32_t i = 0; i < mMap.Length(); i++) {
+    LOG(("    (%u, %u)", mMap[i].Offset() + 0, mMap[i].Len() + 0));
+  }
+}
+
+uint32_t ValidityMap::Length() const { return mMap.Length(); }
+
+void ValidityMap::AddPair(uint32_t aOffset, uint32_t aLen) {
+  ValidityPair pair(aOffset, aLen);
+
+  if (mMap.Length() == 0) {
+    mMap.AppendElement(pair);
+    return;
+  }
+
+  // Find out where to place this pair into the map, it can overlap only with
+  // one preceding pair and all subsequent pairs.
+  uint32_t pos = 0;
+  for (pos = mMap.Length(); pos > 0;) {
+    --pos;
+
+    if (mMap[pos].LessThan(pair)) {
+      // The new pair should be either inserted after pos or merged with it.
+      if (mMap[pos].CanBeMerged(pair)) {
+        // Merge with the preceding pair
+        mMap[pos].Merge(pair);
+      } else {
+        // They don't overlap, element must be placed after pos element
+        ++pos;
+        if (pos == mMap.Length()) {
+          mMap.AppendElement(pair);
+        } else {
+          mMap.InsertElementAt(pos, pair);
+        }
+      }
+
+      break;
+    }
+
+    if (pos == 0) {
+      // The new pair should be placed in front of all existing pairs.
+      mMap.InsertElementAt(0, pair);
+    }
+  }
+
+  // pos now points to merged or inserted pair, check whether it overlaps with
+  // subsequent pairs.
+  while (pos + 1 < mMap.Length()) {
+    if (mMap[pos].CanBeMerged(mMap[pos + 1])) {
+      mMap[pos].Merge(mMap[pos + 1]);
+      mMap.RemoveElementAt(pos + 1);
+    } else {
+      break;
+    }
+  }
+}
+
+void ValidityMap::Clear() { mMap.Clear(); }
+
+size_t ValidityMap::SizeOfExcludingThis(
+    mozilla::MallocSizeOf mallocSizeOf) const {
+  return mMap.ShallowSizeOfExcludingThis(mallocSizeOf);
+}
+
+ValidityPair& ValidityMap::operator[](uint32_t aIdx) {
+  return mMap.ElementAt(aIdx);
+}
+
+StaticMutex DetailedCacheHitTelemetry::sLock;
+uint32_t DetailedCacheHitTelemetry::sRecordCnt = 0;
+DetailedCacheHitTelemetry::HitRate
+    DetailedCacheHitTelemetry::sHRStats[kNumOfRanges];
+
+DetailedCacheHitTelemetry::HitRate::HitRate() { Reset(); }
+
+void DetailedCacheHitTelemetry::HitRate::AddRecord(ERecType aType) {
+  if (aType == HIT) {
+    ++mHitCnt;
+  } else {
+    ++mMissCnt;
+  }
+}
+
+uint32_t DetailedCacheHitTelemetry::HitRate::GetHitRateBucket(
+    uint32_t aNumOfBuckets) const {
+  uint32_t bucketIdx = (aNumOfBuckets * mHitCnt) / (mHitCnt + mMissCnt);
+  if (bucketIdx ==
+      aNumOfBuckets) {  // make sure 100% falls into the last bucket
+    --bucketIdx;
+  }
+
+  return bucketIdx;
+}
+
+uint32_t DetailedCacheHitTelemetry::HitRate::Count() {
+  return mHitCnt + mMissCnt;
+}
+
+void DetailedCacheHitTelemetry::HitRate::Reset() {
+  mHitCnt = 0;
+  mMissCnt = 0;
+}
+
+// static
+void DetailedCacheHitTelemetry::AddRecord(ERecType aType,
+                                          TimeStamp aLoadStart) {
+  bool isUpToDate = false;
+  CacheIndex::IsUpToDate(&isUpToDate);
+  if (!isUpToDate) {
+    // Ignore the record when the entry file count might be incorrect
+    return;
+  }
+
+  uint32_t entryCount;
+  nsresult rv = CacheIndex::GetEntryFileCount(&entryCount);
+  if (NS_FAILED(rv)) {
+    return;
+  }
+
+  uint32_t rangeIdx = entryCount / kRangeSize;
+  if (rangeIdx >= kNumOfRanges) {  // The last range has no upper limit.
+    rangeIdx = kNumOfRanges - 1;
+  }
+
+  uint32_t hitMissValue = 2 * rangeIdx;  // 2 values per range
+  if (aType == MISS) {                   // The order is HIT, MISS
+    ++hitMissValue;
+  }
+
+  StaticMutexAutoLock lock(sLock);
+
+  if (aType == MISS) {
+    mozilla::Telemetry::AccumulateTimeDelta(
+        mozilla::Telemetry::NETWORK_CACHE_V2_MISS_TIME_MS, aLoadStart);
+  } else {
+    mozilla::Telemetry::AccumulateTimeDelta(
+        mozilla::Telemetry::NETWORK_CACHE_V2_HIT_TIME_MS, aLoadStart);
+  }
+
+  Telemetry::Accumulate(Telemetry::NETWORK_CACHE_HIT_MISS_STAT_PER_CACHE_SIZE,
+                        hitMissValue);
+
+  sHRStats[rangeIdx].AddRecord(aType);
+  ++sRecordCnt;
+
+  if (sRecordCnt < kTotalSamplesReportLimit) {
+    return;
+  }
+
+  sRecordCnt = 0;
+
+  for (uint32_t i = 0; i < kNumOfRanges; ++i) {
+    if (sHRStats[i].Count() >= kHitRateSamplesReportLimit) {
+      // The telemetry enums are grouped by buckets as follows:
+      // Telemetry value : 0,1,2,3, ... ,19,20,21,22, ... ,398,399
+      // Hit rate bucket : 0,0,0,0, ... , 0, 1, 1, 1, ... , 19, 19
+      // Cache size range: 0,1,2,3, ... ,19, 0, 1, 2, ... , 18, 19
+      uint32_t bucketOffset =
+          sHRStats[i].GetHitRateBucket(kHitRateBuckets) * kNumOfRanges;
+
+      Telemetry::Accumulate(Telemetry::NETWORK_CACHE_HIT_RATE_PER_CACHE_SIZE,
+                            bucketOffset + i);
+      sHRStats[i].Reset();
+    }
+  }
+}
+
+StaticMutex CachePerfStats::sLock;
+CachePerfStats::PerfData CachePerfStats::sData[CachePerfStats::LAST];
+uint32_t CachePerfStats::sCacheSlowCnt = 0;
+uint32_t CachePerfStats::sCacheNotSlowCnt = 0;
+
+CachePerfStats::MMA::MMA(uint32_t aTotalWeight, bool aFilter)
+    : mSum(0), mSumSq(0), mCnt(0), mWeight(aTotalWeight), mFilter(aFilter) {}
+
+void CachePerfStats::MMA::AddValue(uint32_t aValue) {
+  if (mFilter) {
+    // Filter high spikes
+    uint32_t avg = GetAverage();
+    uint32_t stddev = GetStdDev();
+    uint32_t maxdiff = avg + (3 * stddev);
+    if (avg && aValue > avg + maxdiff) {
+      return;
+    }
+  }
+
+  if (mCnt < mWeight) {
+    // Compute arithmetic average until we have at least mWeight values
+    CheckedInt<uint64_t> newSumSq = CheckedInt<uint64_t>(aValue) * aValue;
+    newSumSq += mSumSq;
+    if (!newSumSq.isValid()) {
+      return;  // ignore this value
+    }
+    mSumSq = newSumSq.value();
+    mSum += aValue;
+    ++mCnt;
+  } else {
+    CheckedInt<uint64_t> newSumSq = mSumSq - mSumSq / mCnt;
+    newSumSq += static_cast<uint64_t>(aValue) * aValue;
+    if (!newSumSq.isValid()) {
+      return;  // ignore this value
+    }
+    mSumSq = newSumSq.value();
+
+    // Compute modified moving average for more values:
+    // newAvg = ((weight - 1) * oldAvg + newValue) / weight
+    mSum -= GetAverage();
+    mSum += aValue;
+  }
+}
+
+uint32_t CachePerfStats::MMA::GetAverage() {
+  if (mCnt == 0) {
+    return 0;
+  }
+
+  return mSum / mCnt;
+}
+
+uint32_t CachePerfStats::MMA::GetStdDev() {
+  if (mCnt == 0) {
+    return 0;
+  }
+
+  uint32_t avg = GetAverage();
+  uint64_t avgSq = static_cast<uint64_t>(avg) * avg;
+  uint64_t variance = mSumSq / mCnt;
+  if (variance < avgSq) {
+    // Due to rounding error when using integer data type, it can happen that
+    // average of squares of the values is smaller than square of the average
+    // of the values. In this case fix mSumSq.
+    variance = avgSq;
+    mSumSq = variance * mCnt;
+  }
+
+  variance -= avgSq;
+  return sqrt(static_cast<double>(variance));
+}
+
+CachePerfStats::PerfData::PerfData()
+    : mFilteredAvg(50, true), mShortAvg(3, false) {}
+
+void CachePerfStats::PerfData::AddValue(uint32_t aValue, bool aShortOnly) {
+  if (!aShortOnly) {
+    mFilteredAvg.AddValue(aValue);
+  }
+  mShortAvg.AddValue(aValue);
+}
+
+uint32_t CachePerfStats::PerfData::GetAverage(bool aFiltered) {
+  return aFiltered ? mFilteredAvg.GetAverage() : mShortAvg.GetAverage();
+}
+
+uint32_t CachePerfStats::PerfData::GetStdDev(bool aFiltered) {
+  return aFiltered ? mFilteredAvg.GetStdDev() : mShortAvg.GetStdDev();
+}
+
+// static
+void CachePerfStats::AddValue(EDataType aType, uint32_t aValue,
+                              bool aShortOnly) {
+  StaticMutexAutoLock lock(sLock);
+  sData[aType].AddValue(aValue, aShortOnly);
+}
+
+// static
+uint32_t CachePerfStats::GetAverage(EDataType aType, bool aFiltered) {
+  StaticMutexAutoLock lock(sLock);
+  return sData[aType].GetAverage(aFiltered);
+}
+
+// static
+uint32_t CachePerfStats::GetStdDev(EDataType aType, bool aFiltered) {
+  StaticMutexAutoLock lock(sLock);
+  return sData[aType].GetStdDev(aFiltered);
+}
+
+// static
+bool CachePerfStats::IsCacheSlow() {
+  StaticMutexAutoLock lock(sLock);
+
+  // Compare mShortAvg with mFilteredAvg to find out whether cache is getting
+  // slower. Use only data about single IO operations because ENTRY_OPEN can be
+  // affected by more factors than a slow disk.
+  for (uint32_t i = 0; i < ENTRY_OPEN; ++i) {
+    if (i == IO_WRITE) {
+      // Skip this data type. IsCacheSlow is used for determining cache slowness
+      // when opening entries. Writes have low priority and it's normal that
+      // they are delayed a lot, but this doesn't necessarily affect opening
+      // cache entries.
+      continue;
+    }
+
+    uint32_t avgLong = sData[i].GetAverage(true);
+    if (avgLong == 0) {
+      // We have no perf data yet, skip this data type.
+      continue;
+    }
+    uint32_t avgShort = sData[i].GetAverage(false);
+    uint32_t stddevLong = sData[i].GetStdDev(true);
+    uint32_t maxdiff = avgLong + (3 * stddevLong);
+
+    if (avgShort > avgLong + maxdiff) {
+      LOG(
+          ("CachePerfStats::IsCacheSlow() - result is slow based on perf "
+           "type %u [avgShort=%u, avgLong=%u, stddevLong=%u]",
+           i, avgShort, avgLong, stddevLong));
+      ++sCacheSlowCnt;
+      return true;
+    }
+  }
+
+  ++sCacheNotSlowCnt;
+  return false;
+}
+
+// static
+void CachePerfStats::GetSlowStats(uint32_t* aSlow, uint32_t* aNotSlow) {
+  StaticMutexAutoLock lock(sLock);
+  *aSlow = sCacheSlowCnt;
+  *aNotSlow = sCacheNotSlowCnt;
+}
+
+void FreeBuffer(void* aBuf) {
+#ifndef NS_FREE_PERMANENT_DATA
+  if (CacheObserver::ShuttingDown()) {
+    return;
+  }
+#endif
+
+  free(aBuf);
+}
+
+nsresult ParseAlternativeDataInfo(const char* aInfo, int64_t* _offset,
+                                  nsACString* _type) {
+  // The format is: "1;12345,javascript/binary"
+  //         <version>;<offset>,<type>
+  mozilla::Tokenizer p(aInfo, nullptr, "/");
+  uint32_t altDataVersion = 0;
+  int64_t altDataOffset = -1;
+
+  // The metadata format has a wrong version number.
+  if (!p.ReadInteger(&altDataVersion) || altDataVersion != kAltDataVersion) {
+    LOG(
+        ("ParseAlternativeDataInfo() - altDataVersion=%u, "
+         "expectedVersion=%u",
+         altDataVersion, kAltDataVersion));
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  if (!p.CheckChar(';') || !p.ReadInteger(&altDataOffset) ||
+      !p.CheckChar(',')) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  // The requested alt-data representation is not available
+  if (altDataOffset < 0) {
+    return NS_ERROR_NOT_AVAILABLE;
+  }
+
+  if (_offset) {
+    *_offset = altDataOffset;
+  }
+
+  if (_type) {
+    mozilla::Unused << p.ReadUntil(Tokenizer::Token::EndOfFile(), *_type);
+  }
+
+  return NS_OK;
+}
+
+void BuildAlternativeDataInfo(const char* aInfo, int64_t aOffset,
+                              nsACString& _retval) {
+  _retval.Truncate();
+  _retval.AppendInt(kAltDataVersion);
+  _retval.Append(';');
+  _retval.AppendInt(aOffset);
+  _retval.Append(',');
+  _retval.Append(aInfo);
+}
+
+}  // namespace mozilla::net::CacheFileUtils