96 lines
3.0 KiB
C++
96 lines
3.0 KiB
C++
/*
|
|
* Copyright 2020 The WebRTC Project Authors. All rights reserved.
|
|
*
|
|
* Use of this source code is governed by a BSD-style license
|
|
* that can be found in the LICENSE file in the root of the source
|
|
* tree. An additional intellectual property rights grant can be found
|
|
* in the file PATENTS. All contributing project authors may
|
|
* be found in the AUTHORS file in the root of the source tree.
|
|
*/
|
|
|
|
#include "benchmark/benchmark.h"
|
|
#include "rtc_base/synchronization/mutex.h"
|
|
#include "rtc_base/system/unused.h"
|
|
|
|
namespace webrtc {
|
|
|
|
class PerfTestData {
|
|
public:
|
|
PerfTestData() : cache_line_barrier_1_(), cache_line_barrier_2_() {
|
|
cache_line_barrier_1_[0]++; // Avoid 'is not used'.
|
|
cache_line_barrier_2_[0]++; // Avoid 'is not used'.
|
|
}
|
|
|
|
int AddToCounter(int add) {
|
|
MutexLock mu(&mu_);
|
|
my_counter_ += add;
|
|
return 0;
|
|
}
|
|
|
|
private:
|
|
uint8_t cache_line_barrier_1_[64];
|
|
Mutex mu_;
|
|
uint8_t cache_line_barrier_2_[64];
|
|
int64_t my_counter_ = 0;
|
|
};
|
|
|
|
void BM_LockWithMutex(benchmark::State& state) {
|
|
static PerfTestData test_data;
|
|
for (auto s : state) {
|
|
RTC_UNUSED(s);
|
|
benchmark::DoNotOptimize(test_data.AddToCounter(2));
|
|
}
|
|
}
|
|
|
|
BENCHMARK(BM_LockWithMutex)->Threads(1);
|
|
BENCHMARK(BM_LockWithMutex)->Threads(2);
|
|
BENCHMARK(BM_LockWithMutex)->Threads(4);
|
|
BENCHMARK(BM_LockWithMutex)->ThreadPerCpu();
|
|
|
|
} // namespace webrtc
|
|
|
|
/*
|
|
|
|
Results:
|
|
|
|
NB when reproducing: Remember to turn of power management features such as CPU
|
|
scaling before running!
|
|
|
|
pthreads (Linux):
|
|
----------------------------------------------------------------------
|
|
Run on (12 X 4500 MHz CPU s)
|
|
CPU Caches:
|
|
L1 Data 32 KiB (x6)
|
|
L1 Instruction 32 KiB (x6)
|
|
L2 Unified 1024 KiB (x6)
|
|
L3 Unified 8448 KiB (x1)
|
|
Load Average: 0.26, 0.28, 0.44
|
|
----------------------------------------------------------------------
|
|
Benchmark Time CPU Iterations
|
|
----------------------------------------------------------------------
|
|
BM_LockWithMutex/threads:1 13.4 ns 13.4 ns 52192906
|
|
BM_LockWithMutex/threads:2 44.2 ns 88.4 ns 8189944
|
|
BM_LockWithMutex/threads:4 52.0 ns 198 ns 3743244
|
|
BM_LockWithMutex/threads:12 84.9 ns 944 ns 733524
|
|
|
|
std::mutex performs like the pthread implementation (Linux).
|
|
|
|
Abseil (Linux):
|
|
----------------------------------------------------------------------
|
|
Run on (12 X 4500 MHz CPU s)
|
|
CPU Caches:
|
|
L1 Data 32 KiB (x6)
|
|
L1 Instruction 32 KiB (x6)
|
|
L2 Unified 1024 KiB (x6)
|
|
L3 Unified 8448 KiB (x1)
|
|
Load Average: 0.27, 0.24, 0.37
|
|
----------------------------------------------------------------------
|
|
Benchmark Time CPU Iterations
|
|
----------------------------------------------------------------------
|
|
BM_LockWithMutex/threads:1 15.0 ns 15.0 ns 46550231
|
|
BM_LockWithMutex/threads:2 91.1 ns 182 ns 4059212
|
|
BM_LockWithMutex/threads:4 40.8 ns 131 ns 5496560
|
|
BM_LockWithMutex/threads:12 37.0 ns 130 ns 5377668
|
|
|
|
*/
|