third_party/libwebrtc/rtc_base/synchronization/mutex_benchmark.cc


1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96

/*
 *  Copyright 2020 The WebRTC Project Authors. All rights reserved.
 *
 *  Use of this source code is governed by a BSD-style license
 *  that can be found in the LICENSE file in the root of the source
 *  tree. An additional intellectual property rights grant can be found
 *  in the file PATENTS.  All contributing project authors may
 *  be found in the AUTHORS file in the root of the source tree.
 */

#include "benchmark/benchmark.h"
#include "rtc_base/synchronization/mutex.h"
#include "rtc_base/system/unused.h"

namespace webrtc {

class PerfTestData {
 public:
  PerfTestData() : cache_line_barrier_1_(), cache_line_barrier_2_() {
    cache_line_barrier_1_[0]++;  // Avoid 'is not used'.
    cache_line_barrier_2_[0]++;  // Avoid 'is not used'.
  }

  int AddToCounter(int add) {
    MutexLock mu(&mu_);
    my_counter_ += add;
    return 0;
  }

 private:
  uint8_t cache_line_barrier_1_[64];
  Mutex mu_;
  uint8_t cache_line_barrier_2_[64];
  int64_t my_counter_ = 0;
};

void BM_LockWithMutex(benchmark::State& state) {
  static PerfTestData test_data;
  for (auto s : state) {
    RTC_UNUSED(s);
    int add_to_counter = test_data.AddToCounter(2);
    benchmark::DoNotOptimize(add_to_counter);
  }
}

BENCHMARK(BM_LockWithMutex)->Threads(1);
BENCHMARK(BM_LockWithMutex)->Threads(2);
BENCHMARK(BM_LockWithMutex)->Threads(4);
BENCHMARK(BM_LockWithMutex)->ThreadPerCpu();

}  // namespace webrtc

/*

Results:

NB when reproducing: Remember to turn of power management features such as CPU
scaling before running!

pthreads (Linux):
----------------------------------------------------------------------
Run on (12 X 4500 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x6)
  L1 Instruction 32 KiB (x6)
  L2 Unified 1024 KiB (x6)
  L3 Unified 8448 KiB (x1)
Load Average: 0.26, 0.28, 0.44
----------------------------------------------------------------------
Benchmark                            Time             CPU   Iterations
----------------------------------------------------------------------
BM_LockWithMutex/threads:1        13.4 ns         13.4 ns     52192906
BM_LockWithMutex/threads:2        44.2 ns         88.4 ns      8189944
BM_LockWithMutex/threads:4        52.0 ns          198 ns      3743244
BM_LockWithMutex/threads:12       84.9 ns          944 ns       733524

std::mutex performs like the pthread implementation (Linux).

Abseil (Linux):
----------------------------------------------------------------------
Run on (12 X 4500 MHz CPU s)
CPU Caches:
  L1 Data 32 KiB (x6)
  L1 Instruction 32 KiB (x6)
  L2 Unified 1024 KiB (x6)
  L3 Unified 8448 KiB (x1)
Load Average: 0.27, 0.24, 0.37
----------------------------------------------------------------------
Benchmark                            Time             CPU   Iterations
----------------------------------------------------------------------
BM_LockWithMutex/threads:1        15.0 ns         15.0 ns     46550231
BM_LockWithMutex/threads:2        91.1 ns          182 ns      4059212
BM_LockWithMutex/threads:4        40.8 ns          131 ns      5496560
BM_LockWithMutex/threads:12       37.0 ns          130 ns      5377668

*/