xpcom/rust/gtest/bench-collections/Bench.cpp


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297

/* This Source Code Form is subject to the terms of the Mozilla Public
 * License, v. 2.0. If a copy of the MPL was not distributed with this
 * file, You can obtain one at http://mozilla.org/MPL/2.0/. */

// Overview
// --------
// This file measures the speed of various implementations of C++ and Rust
// collections (hash tables, etc.) used within the codebase. There are a small
// number of benchmarks for each collection type; each benchmark tests certain
// operations (insertion, lookup, iteration, etc.) More benchmarks could easily
// be envisioned, but this small number is enough to characterize the major
// differences between implementations, while keeping the file size and
// complexity low.
//
// Details
// -------
// The file uses `MOZ_GTEST_BENCH_F` so that results are integrated into
// PerfHerder. It is also designed so that individual test benchmarks can be
// run under a profiler.
//
// The C++ code uses `MOZ_RELEASE_ASSERT` extensively to check values and
// ensure operations aren't optimized away by the compiler. The Rust code uses
// `assert!()`. These should be roughly equivalent, but aren't guaranteed to be
// the same. As a result, the intra-C++ comparisons should be reliable, and the
// intra-Rust comparisons should be reliable, but the C++ vs. Rust comparisons
// may be less reliable.
//
// Note that the Rust implementations run very slowly without --enable-release.
//
// Profiling
// ---------
// If you want to measure a particular implementation under a profiler such as
// Callgrind, do something like this:
//
//   MOZ_RUN_GTEST=1 GTEST_FILTER='*BenchCollections*$IMPL*'
//       valgrind --tool=callgrind --callgrind-out-file=clgout
//       $OBJDIR/dist/bin/firefox -unittest
//   callgrind_annotate --auto=yes clgout > clgann
//
// where $IMPL is part of an implementation name in a test (e.g. "PLDHash",
// "MozHash") and $OBJDIR is an objdir containing a --enable-release build.
//
// Note that multiple processes are spawned, so `clgout` gets overwritten
// multiple times, but the last process to write its profiling data to file is
// the one of interest. (Alternatively, use --callgrind-out-file=clgout.%p to
// get separate output files for each process, with a PID suffix.)

#include "gtest/gtest.h"
#include "gtest/MozGTestBench.h"  // For MOZ_GTEST_BENCH
#include "mozilla/AllocPolicy.h"
#include "mozilla/ArrayUtils.h"
#include "mozilla/HashFunctions.h"
#include "mozilla/HashTable.h"
#include "mozilla/StaticMutex.h"
#include "mozilla/TimeStamp.h"
#include "PLDHashTable.h"
#include <unordered_set>

using namespace mozilla;

// This function gives a pseudo-random sequence with the following properties:
// - Deterministic and platform-independent.
// - No duplicates in the first VALS_LEN results, which is useful for ensuring
//   the tables get to a particular size, and also for guaranteeing lookups
//   that fail.
static uintptr_t MyRand() {
  static uintptr_t s = 0;
  s = s * 1103515245 + 12345;
  return s;
}

// Keep this in sync with Params in bench.rs.
struct Params {
  const char* mConfigName;
  size_t mNumInserts;            // Insert this many unique keys
  size_t mNumSuccessfulLookups;  // Does mNumInserts lookups each time
  size_t mNumFailingLookups;     // Does mNumInserts lookups each time
  size_t mNumIterations;         // Iterates the full table each time
  bool mRemoveInserts;           // Remove all entries at end?
};

// We don't use std::unordered_{set,map}, but it's an interesting thing to
// benchmark against.
//
// Keep this in sync with all the other Bench_*() functions.
static void Bench_Cpp_unordered_set(const Params* aParams, void** aVals,
                                    size_t aLen) {
  std::unordered_set<void*> hs;

  for (size_t j = 0; j < aParams->mNumInserts; j++) {
    hs.insert(aVals[j]);
  }

  for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      MOZ_RELEASE_ASSERT(hs.find(aVals[j]) != hs.end());
    }
  }

  for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
    for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
      MOZ_RELEASE_ASSERT(hs.find(aVals[j]) == hs.end());
    }
  }

  for (size_t i = 0; i < aParams->mNumIterations; i++) {
    size_t n = 0;
    for (const auto& elem : hs) {
      (void)elem;
      n++;
    }
    MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
    MOZ_RELEASE_ASSERT(hs.size() == n);
  }

  if (aParams->mRemoveInserts) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      MOZ_RELEASE_ASSERT(hs.erase(aVals[j]) == 1);
    }
    MOZ_RELEASE_ASSERT(hs.size() == 0);
  } else {
    MOZ_RELEASE_ASSERT(hs.size() == aParams->mNumInserts);
  }
}

// Keep this in sync with all the other Bench_*() functions.
static void Bench_Cpp_PLDHashTable(const Params* aParams, void** aVals,
                                   size_t aLen) {
  PLDHashTable hs(PLDHashTable::StubOps(), sizeof(PLDHashEntryStub));

  for (size_t j = 0; j < aParams->mNumInserts; j++) {
    auto entry = static_cast<PLDHashEntryStub*>(hs.Add(aVals[j]));
    MOZ_RELEASE_ASSERT(!entry->key);
    entry->key = aVals[j];
  }

  for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      MOZ_RELEASE_ASSERT(hs.Search(aVals[j]));
    }
  }

  for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
    for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
      MOZ_RELEASE_ASSERT(!hs.Search(aVals[j]));
    }
  }

  for (size_t i = 0; i < aParams->mNumIterations; i++) {
    size_t n = 0;
    for (auto iter = hs.Iter(); !iter.Done(); iter.Next()) {
      n++;
    }
    MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
    MOZ_RELEASE_ASSERT(hs.EntryCount() == n);
  }

  if (aParams->mRemoveInserts) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      hs.Remove(aVals[j]);
    }
    MOZ_RELEASE_ASSERT(hs.EntryCount() == 0);
  } else {
    MOZ_RELEASE_ASSERT(hs.EntryCount() == aParams->mNumInserts);
  }
}

// Keep this in sync with all the other Bench_*() functions.
static void Bench_Cpp_MozHashSet(const Params* aParams, void** aVals,
                                 size_t aLen) {
  mozilla::HashSet<void*, mozilla::DefaultHasher<void*>, MallocAllocPolicy> hs;

  for (size_t j = 0; j < aParams->mNumInserts; j++) {
    MOZ_RELEASE_ASSERT(hs.put(aVals[j]));
  }

  for (size_t i = 0; i < aParams->mNumSuccessfulLookups; i++) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      MOZ_RELEASE_ASSERT(hs.has(aVals[j]));
    }
  }

  for (size_t i = 0; i < aParams->mNumFailingLookups; i++) {
    for (size_t j = aParams->mNumInserts; j < aParams->mNumInserts * 2; j++) {
      MOZ_RELEASE_ASSERT(!hs.has(aVals[j]));
    }
  }

  for (size_t i = 0; i < aParams->mNumIterations; i++) {
    size_t n = 0;
    for (auto iter = hs.iter(); !iter.done(); iter.next()) {
      n++;
    }
    MOZ_RELEASE_ASSERT(aParams->mNumInserts == n);
    MOZ_RELEASE_ASSERT(hs.count() == n);
  }

  if (aParams->mRemoveInserts) {
    for (size_t j = 0; j < aParams->mNumInserts; j++) {
      hs.remove(aVals[j]);
    }
    MOZ_RELEASE_ASSERT(hs.count() == 0);
  } else {
    MOZ_RELEASE_ASSERT(hs.count() == aParams->mNumInserts);
  }
}

extern "C" {
void Bench_Rust_HashSet(const Params* params, void** aVals, size_t aLen);
void Bench_Rust_FnvHashSet(const Params* params, void** aVals, size_t aLen);
void Bench_Rust_FxHashSet(const Params* params, void** aVals, size_t aLen);
}

static const size_t VALS_LEN = 131072;

// Each benchmark measures a different aspect of performance.
// Note that no "Inserts" value can exceed VALS_LEN.
// Also, if any failing lookups are done, Inserts must be <= VALS_LEN/2.
const Params gParamsList[] = {
    // clang-format off
  //                           Successful Failing              Remove
  //                 Inserts   lookups    lookups  Iterations  inserts
  { "succ_lookups",  1024,     5000,      0,       0,          false },
  { "fail_lookups",  1024,     0,         5000,    0,          false },
  { "insert_remove", VALS_LEN, 0,         0,       0,          true  },
  { "iterate",       1024,     0,         0,       5000,       false },
    // clang-format on
};

class BenchCollections : public ::testing::Test {
 protected:
  void SetUp() override {
    StaticMutexAutoLock lock(sValsMutex);

    if (!sVals) {
      sVals = (void**)malloc(VALS_LEN * sizeof(void*));
      for (size_t i = 0; i < VALS_LEN; i++) {
        // This leaves the high 32 bits zero on 64-bit platforms, but that
        // should still be enough randomness to get typical behaviour.
        sVals[i] = reinterpret_cast<void*>(uintptr_t(MyRand()));
      }
    }

    printf("\n");
    for (size_t i = 0; i < ArrayLength(gParamsList); i++) {
      const Params* params = &gParamsList[i];
      printf("%14s", params->mConfigName);
    }
    printf("%14s\n", "total");
  }

 public:
  void BenchImpl(void (*aBench)(const Params*, void**, size_t)) {
    StaticMutexAutoLock lock(sValsMutex);

    double total = 0;
    for (size_t i = 0; i < ArrayLength(gParamsList); i++) {
      const Params* params = &gParamsList[i];
      TimeStamp t1 = TimeStamp::Now();
      aBench(params, sVals, VALS_LEN);
      TimeStamp t2 = TimeStamp::Now();
      double t = (t2 - t1).ToMilliseconds();
      printf("%11.1f ms", t);
      total += t;
    }
    printf("%11.1f ms\n", total);
  }

 private:
  // Random values used in the benchmarks.
  static void** sVals MOZ_GUARDED_BY(sValsMutex);

  // A mutex that protects all benchmark operations, ensuring that two
  // benchmarks never run concurrently.
  static StaticMutex sValsMutex;
};

void** BenchCollections::sVals;
StaticMutex BenchCollections::sValsMutex;

MOZ_GTEST_BENCH_F(BenchCollections, unordered_set,
                  [this] { BenchImpl(Bench_Cpp_unordered_set); });

MOZ_GTEST_BENCH_F(BenchCollections, PLDHash,
                  [this] { BenchImpl(Bench_Cpp_PLDHashTable); });

MOZ_GTEST_BENCH_F(BenchCollections, MozHash,
                  [this] { BenchImpl(Bench_Cpp_MozHashSet); });

MOZ_GTEST_BENCH_F(BenchCollections, RustHash,
                  [this] { BenchImpl(Bench_Rust_HashSet); });

MOZ_GTEST_BENCH_F(BenchCollections, RustFnvHash,
                  [this] { BenchImpl(Bench_Rust_FnvHashSet); });

MOZ_GTEST_BENCH_F(BenchCollections, RustFxHash,
                  [this] { BenchImpl(Bench_Rust_FxHashSet); });