Nagram/TMessagesProj/jni/voip/webrtc/absl/synchronization/mutex_benchmark.cc

// Copyright 2017 The Abseil Authors.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
//      https://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.

#include <cstdint>
#include <mutex>  // NOLINT(build/c++11)
#include <vector>

#include "absl/base/config.h"
#include "absl/base/internal/cycleclock.h"
#include "absl/base/internal/spinlock.h"
#include "absl/synchronization/blocking_counter.h"
#include "absl/synchronization/internal/thread_pool.h"
#include "absl/synchronization/mutex.h"
#include "benchmark/benchmark.h"

namespace {

void BM_Mutex(benchmark::State& state) {
  static absl::Mutex* mu = new absl::Mutex;
  for (auto _ : state) {
    absl::MutexLock lock(mu);
  }
}
BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();

static void DelayNs(int64_t ns, int* data) {
  int64_t end = absl::base_internal::CycleClock::Now() +
                ns * absl::base_internal::CycleClock::Frequency() / 1e9;
  while (absl::base_internal::CycleClock::Now() < end) {
    ++(*data);
    benchmark::DoNotOptimize(*data);
  }
}

template <typename MutexType>
class RaiiLocker {
 public:
  explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }
  ~RaiiLocker() { mu_->Unlock(); }
 private:
  MutexType* mu_;
};

template <>
class RaiiLocker<std::mutex> {
 public:
  explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }
  ~RaiiLocker() { mu_->unlock(); }
 private:
  std::mutex* mu_;
};

// RAII object to change the Mutex priority of the running thread.
class ScopedThreadMutexPriority {
 public:
  explicit ScopedThreadMutexPriority(int priority) {
    absl::base_internal::ThreadIdentity* identity =
        absl::synchronization_internal::GetOrCreateCurrentThreadIdentity();
    identity->per_thread_synch.priority = priority;
    // Bump next_priority_read_cycles to the infinite future so that the
    // implementation doesn't re-read the thread's actual scheduler priority
    // and replace our temporary scoped priority.
    identity->per_thread_synch.next_priority_read_cycles =
        std::numeric_limits<int64_t>::max();
  }
  ~ScopedThreadMutexPriority() {
    // Reset the "next priority read time" back to the infinite past so that
    // the next time the Mutex implementation wants to know this thread's
    // priority, it re-reads it from the OS instead of using our overridden
    // priority.
    absl::synchronization_internal::GetOrCreateCurrentThreadIdentity()
        ->per_thread_synch.next_priority_read_cycles =
        std::numeric_limits<int64_t>::min();
  }
};

void BM_MutexEnqueue(benchmark::State& state) {
  // In the "multiple priorities" variant of the benchmark, one of the
  // threads runs with Mutex priority 0 while the rest run at elevated priority.
  // This benchmarks the performance impact of the presence of a low priority
  // waiter when a higher priority waiter adds itself of the queue
  // (b/175224064).
  //
  // NOTE: The actual scheduler priority is not modified in this benchmark:
  // all of the threads get CPU slices with the same priority. Only the
  // Mutex queueing behavior is modified.
  const bool multiple_priorities = state.range(0);
  ScopedThreadMutexPriority priority_setter(
      (multiple_priorities && state.thread_index() != 0) ? 1 : 0);

  struct Shared {
    absl::Mutex mu;
    std::atomic<int> looping_threads{0};
    std::atomic<int> blocked_threads{0};
    std::atomic<bool> thread_has_mutex{false};
  };
  static Shared* shared = new Shared;

  // Set up 'blocked_threads' to count how many threads are currently blocked
  // in Abseil synchronization code.
  //
  // NOTE: Blocking done within the Google Benchmark library itself (e.g.
  // the barrier which synchronizes threads entering and exiting the benchmark
  // loop) does _not_ get registered in this counter. This is because Google
  // Benchmark uses its own synchronization primitives based on std::mutex, not
  // Abseil synchronization primitives. If at some point the benchmark library
  // merges into Abseil, this code may break.
  absl::synchronization_internal::PerThreadSem::SetThreadBlockedCounter(
      &shared->blocked_threads);

  // The benchmark framework may run several iterations in the same process,
  // reusing the same static-initialized 'shared' object. Given the semantics
  // of the members, here, we expect everything to be reset to zero by the
  // end of any iteration. Assert that's the case, just to be sure.
  ABSL_RAW_CHECK(
      shared->looping_threads.load(std::memory_order_relaxed) == 0 &&
          shared->blocked_threads.load(std::memory_order_relaxed) == 0 &&
          !shared->thread_has_mutex.load(std::memory_order_relaxed),
      "Shared state isn't zeroed at start of benchmark iteration");

  static constexpr int kBatchSize = 1000;
  while (state.KeepRunningBatch(kBatchSize)) {
    shared->looping_threads.fetch_add(1);
    for (int i = 0; i < kBatchSize; i++) {
      {
        absl::MutexLock l(&shared->mu);
        shared->thread_has_mutex.store(true, std::memory_order_relaxed);
        // Spin until all other threads are either out of the benchmark loop
        // or blocked on the mutex. This ensures that the mutex queue is kept
        // at its maximal length to benchmark the performance of queueing on
        // a highly contended mutex.
        while (shared->looping_threads.load(std::memory_order_relaxed) -
                   shared->blocked_threads.load(std::memory_order_relaxed) !=
               1) {
        }
        shared->thread_has_mutex.store(false);
      }
      // Spin until some other thread has acquired the mutex before we block
      // again. This ensures that we always go through the slow (queueing)
      // acquisition path rather than reacquiring the mutex we just released.
      while (!shared->thread_has_mutex.load(std::memory_order_relaxed) &&
             shared->looping_threads.load(std::memory_order_relaxed) > 1) {
      }
    }
    // The benchmark framework uses a barrier to ensure that all of the threads
    // complete their benchmark loop together before any of the threads exit
    // the loop. So, we need to remove ourselves from the "looping threads"
    // counter here before potentially blocking on that barrier. Otherwise,
    // another thread spinning above might wait forever for this thread to
    // block on the mutex while we in fact are waiting to exit.
    shared->looping_threads.fetch_add(-1);
  }
  absl::synchronization_internal::PerThreadSem::SetThreadBlockedCounter(
      nullptr);
}

BENCHMARK(BM_MutexEnqueue)
    ->Threads(4)
    ->Threads(64)
    ->Threads(128)
    ->Threads(512)
    ->ArgName("multiple_priorities")
    ->Arg(false)
    ->Arg(true);

template <typename MutexType>
void BM_Contended(benchmark::State& state) {
  int priority = state.thread_index() % state.range(1);
  ScopedThreadMutexPriority priority_setter(priority);

  struct Shared {
    MutexType mu;
    int data = 0;
  };
  static auto* shared = new Shared;
  int local = 0;
  for (auto _ : state) {
    // Here we model both local work outside of the critical section as well as
    // some work inside of the critical section. The idea is to capture some
    // more or less realisitic contention levels.
    // If contention is too low, the benchmark won't measure anything useful.
    // If contention is unrealistically high, the benchmark will favor
    // bad mutex implementations that block and otherwise distract threads
    // from the mutex and shared state for as much as possible.
    // To achieve this amount of local work is multiplied by number of threads
    // to keep ratio between local work and critical section approximately
    // equal regardless of number of threads.
    DelayNs(100 * state.threads(), &local);
    RaiiLocker<MutexType> locker(&shared->mu);
    DelayNs(state.range(0), &shared->data);
  }
}
void SetupBenchmarkArgs(benchmark::internal::Benchmark* bm,
                        bool do_test_priorities) {
  const int max_num_priorities = do_test_priorities ? 2 : 1;
  bm->UseRealTime()
      // ThreadPerCpu poorly handles non-power-of-two CPU counts.
      ->Threads(1)
      ->Threads(2)
      ->Threads(4)
      ->Threads(6)
      ->Threads(8)
      ->Threads(12)
      ->Threads(16)
      ->Threads(24)
      ->Threads(32)
      ->Threads(48)
      ->Threads(64)
      ->Threads(96)
      ->Threads(128)
      ->Threads(192)
      ->Threads(256)
      ->ArgNames({"cs_ns", "num_prios"});
  // Some empirically chosen amounts of work in critical section.
  // 1 is low contention, 2000 is high contention and few values in between.
  for (int critical_section_ns : {1, 20, 50, 200, 2000}) {
    for (int num_priorities = 1; num_priorities <= max_num_priorities;
         num_priorities++) {
      bm->ArgPair(critical_section_ns, num_priorities);
    }
  }
}

BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)
    ->Apply([](benchmark::internal::Benchmark* bm) {
      SetupBenchmarkArgs(bm, /*do_test_priorities=*/true);
    });

BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)
    ->Apply([](benchmark::internal::Benchmark* bm) {
      SetupBenchmarkArgs(bm, /*do_test_priorities=*/false);
    });

BENCHMARK_TEMPLATE(BM_Contended, std::mutex)
    ->Apply([](benchmark::internal::Benchmark* bm) {
      SetupBenchmarkArgs(bm, /*do_test_priorities=*/false);
    });

// Measure the overhead of conditions on mutex release (when they must be
// evaluated).  Mutex has (some) support for equivalence classes allowing
// Conditions with the same function/argument to potentially not be multiply
// evaluated.
//
// num_classes==0 is used for the special case of every waiter being distinct.
void BM_ConditionWaiters(benchmark::State& state) {
  int num_classes = state.range(0);
  int num_waiters = state.range(1);

  struct Helper {
    static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {
      init->DecrementCount();
      m->LockWhen(absl::Condition(
          static_cast<bool (*)(int*)>([](int* v) { return *v == 0; }), p));
      m->Unlock();
    }
  };

  if (num_classes == 0) {
    // No equivalence classes.
    num_classes = num_waiters;
  }

  absl::BlockingCounter init(num_waiters);
  absl::Mutex mu;
  std::vector<int> equivalence_classes(num_classes, 1);

  // Must be declared last to be destroyed first.
  absl::synchronization_internal::ThreadPool pool(num_waiters);

  for (int i = 0; i < num_waiters; i++) {
    // Mutex considers Conditions with the same function and argument
    // to be equivalent.
    pool.Schedule([&, i] {
      Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);
    });
  }
  init.Wait();

  for (auto _ : state) {
    mu.Lock();
    mu.Unlock();  // Each unlock requires Condition evaluation for our waiters.
  }

  mu.Lock();
  for (int i = 0; i < num_classes; i++) {
    equivalence_classes[i] = 0;
  }
  mu.Unlock();
}

// Some configurations have higher thread limits than others.
#if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)
constexpr int kMaxConditionWaiters = 8192;
#else
constexpr int kMaxConditionWaiters = 1024;
#endif
BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);

}  // namespace
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`// Copyright 2017 The Abseil Authors.`
			`//`
			`// Licensed under the Apache License, Version 2.0 (the "License");`
			`// you may not use this file except in compliance with the License.`
			`// You may obtain a copy of the License at`
			`//`
			`// https://www.apache.org/licenses/LICENSE-2.0`
			`//`
			`// Unless required by applicable law or agreed to in writing, software`
			`// distributed under the License is distributed on an "AS IS" BASIS,`
			`// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`// See the License for the specific language governing permissions and`
			`// limitations under the License.`

			`#include <cstdint>`
			`#include <mutex> // NOLINT(build/c++11)`
			`#include <vector>`

Update to 8.6.0 2022-03-11 16:49:54 +00:00			`#include "absl/base/config.h"`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`#include "absl/base/internal/cycleclock.h"`
			`#include "absl/base/internal/spinlock.h"`
			`#include "absl/synchronization/blocking_counter.h"`
			`#include "absl/synchronization/internal/thread_pool.h"`
			`#include "absl/synchronization/mutex.h"`
			`#include "benchmark/benchmark.h"`

			`namespace {`

			`void BM_Mutex(benchmark::State& state) {`
			`static absl::Mutex* mu = new absl::Mutex;`
			`for (auto _ : state) {`
			`absl::MutexLock lock(mu);`
			`}`
			`}`
			`BENCHMARK(BM_Mutex)->UseRealTime()->Threads(1)->ThreadPerCpu();`

			`static void DelayNs(int64_t ns, int* data) {`
			`int64_t end = absl::base_internal::CycleClock::Now() +`
			`ns * absl::base_internal::CycleClock::Frequency() / 1e9;`
			`while (absl::base_internal::CycleClock::Now() < end) {`
			`++(*data);`
			`benchmark::DoNotOptimize(*data);`
			`}`
			`}`

			`template <typename MutexType>`
			`class RaiiLocker {`
			`public:`
			`explicit RaiiLocker(MutexType* mu) : mu_(mu) { mu_->Lock(); }`
			`~RaiiLocker() { mu_->Unlock(); }`
			`private:`
			`MutexType* mu_;`
			`};`

			`template <>`
			`class RaiiLocker<std::mutex> {`
			`public:`
			`explicit RaiiLocker(std::mutex* mu) : mu_(mu) { mu_->lock(); }`
			`~RaiiLocker() { mu_->unlock(); }`
			`private:`
			`std::mutex* mu_;`
			`};`

Update to 8.6.0 2022-03-11 16:49:54 +00:00			`// RAII object to change the Mutex priority of the running thread.`
			`class ScopedThreadMutexPriority {`
			`public:`
			`explicit ScopedThreadMutexPriority(int priority) {`
			`absl::base_internal::ThreadIdentity* identity =`
			`absl::synchronization_internal::GetOrCreateCurrentThreadIdentity();`
			`identity->per_thread_synch.priority = priority;`
			`// Bump next_priority_read_cycles to the infinite future so that the`
			`// implementation doesn't re-read the thread's actual scheduler priority`
			`// and replace our temporary scoped priority.`
			`identity->per_thread_synch.next_priority_read_cycles =`
			`std::numeric_limits<int64_t>::max();`
			`}`
			`~ScopedThreadMutexPriority() {`
			`// Reset the "next priority read time" back to the infinite past so that`
			`// the next time the Mutex implementation wants to know this thread's`
			`// priority, it re-reads it from the OS instead of using our overridden`
			`// priority.`
			`absl::synchronization_internal::GetOrCreateCurrentThreadIdentity()`
			`->per_thread_synch.next_priority_read_cycles =`
			`std::numeric_limits<int64_t>::min();`
			`}`
			`};`

			`void BM_MutexEnqueue(benchmark::State& state) {`
			`// In the "multiple priorities" variant of the benchmark, one of the`
			`// threads runs with Mutex priority 0 while the rest run at elevated priority.`
			`// This benchmarks the performance impact of the presence of a low priority`
			`// waiter when a higher priority waiter adds itself of the queue`
			`// (b/175224064).`
			`//`
			`// NOTE: The actual scheduler priority is not modified in this benchmark:`
			`// all of the threads get CPU slices with the same priority. Only the`
			`// Mutex queueing behavior is modified.`
			`const bool multiple_priorities = state.range(0);`
			`ScopedThreadMutexPriority priority_setter(`
			`(multiple_priorities && state.thread_index() != 0) ? 1 : 0);`

			`struct Shared {`
			`absl::Mutex mu;`
			`std::atomic<int> looping_threads{0};`
			`std::atomic<int> blocked_threads{0};`
			`std::atomic<bool> thread_has_mutex{false};`
			`};`
			`static Shared* shared = new Shared;`

			`// Set up 'blocked_threads' to count how many threads are currently blocked`
			`// in Abseil synchronization code.`
			`//`
			`// NOTE: Blocking done within the Google Benchmark library itself (e.g.`
			`// the barrier which synchronizes threads entering and exiting the benchmark`
			`// loop) does _not_ get registered in this counter. This is because Google`
			`// Benchmark uses its own synchronization primitives based on std::mutex, not`
			`// Abseil synchronization primitives. If at some point the benchmark library`
			`// merges into Abseil, this code may break.`
			`absl::synchronization_internal::PerThreadSem::SetThreadBlockedCounter(`
			`&shared->blocked_threads);`

			`// The benchmark framework may run several iterations in the same process,`
			`// reusing the same static-initialized 'shared' object. Given the semantics`
			`// of the members, here, we expect everything to be reset to zero by the`
			`// end of any iteration. Assert that's the case, just to be sure.`
			`ABSL_RAW_CHECK(`
			`shared->looping_threads.load(std::memory_order_relaxed) == 0 &&`
			`shared->blocked_threads.load(std::memory_order_relaxed) == 0 &&`
			`!shared->thread_has_mutex.load(std::memory_order_relaxed),`
			`"Shared state isn't zeroed at start of benchmark iteration");`

			`static constexpr int kBatchSize = 1000;`
			`while (state.KeepRunningBatch(kBatchSize)) {`
			`shared->looping_threads.fetch_add(1);`
			`for (int i = 0; i < kBatchSize; i++) {`
			`{`
			`absl::MutexLock l(&shared->mu);`
			`shared->thread_has_mutex.store(true, std::memory_order_relaxed);`
			`// Spin until all other threads are either out of the benchmark loop`
			`// or blocked on the mutex. This ensures that the mutex queue is kept`
			`// at its maximal length to benchmark the performance of queueing on`
			`// a highly contended mutex.`
			`while (shared->looping_threads.load(std::memory_order_relaxed) -`
			`shared->blocked_threads.load(std::memory_order_relaxed) !=`
			`1) {`
			`}`
			`shared->thread_has_mutex.store(false);`
			`}`
			`// Spin until some other thread has acquired the mutex before we block`
			`// again. This ensures that we always go through the slow (queueing)`
			`// acquisition path rather than reacquiring the mutex we just released.`
			`while (!shared->thread_has_mutex.load(std::memory_order_relaxed) &&`
			`shared->looping_threads.load(std::memory_order_relaxed) > 1) {`
			`}`
			`}`
			`// The benchmark framework uses a barrier to ensure that all of the threads`
			`// complete their benchmark loop together before any of the threads exit`
			`// the loop. So, we need to remove ourselves from the "looping threads"`
			`// counter here before potentially blocking on that barrier. Otherwise,`
			`// another thread spinning above might wait forever for this thread to`
			`// block on the mutex while we in fact are waiting to exit.`
			`shared->looping_threads.fetch_add(-1);`
			`}`
			`absl::synchronization_internal::PerThreadSem::SetThreadBlockedCounter(`
			`nullptr);`
			`}`

			`BENCHMARK(BM_MutexEnqueue)`
			`->Threads(4)`
			`->Threads(64)`
			`->Threads(128)`
			`->Threads(512)`
			`->ArgName("multiple_priorities")`
			`->Arg(false)`
			`->Arg(true);`

Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`template <typename MutexType>`
			`void BM_Contended(benchmark::State& state) {`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`int priority = state.thread_index() % state.range(1);`
			`ScopedThreadMutexPriority priority_setter(priority);`

Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`struct Shared {`
			`MutexType mu;`
			`int data = 0;`
			`};`
			`static auto* shared = new Shared;`
			`int local = 0;`
			`for (auto _ : state) {`
			`// Here we model both local work outside of the critical section as well as`
			`// some work inside of the critical section. The idea is to capture some`
			`// more or less realisitic contention levels.`
			`// If contention is too low, the benchmark won't measure anything useful.`
			`// If contention is unrealistically high, the benchmark will favor`
			`// bad mutex implementations that block and otherwise distract threads`
			`// from the mutex and shared state for as much as possible.`
			`// To achieve this amount of local work is multiplied by number of threads`
			`// to keep ratio between local work and critical section approximately`
			`// equal regardless of number of threads.`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`DelayNs(100 * state.threads(), &local);`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`RaiiLocker<MutexType> locker(&shared->mu);`
			`DelayNs(state.range(0), &shared->data);`
			`}`
			`}`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`void SetupBenchmarkArgs(benchmark::internal::Benchmark* bm,`
			`bool do_test_priorities) {`
			`const int max_num_priorities = do_test_priorities ? 2 : 1;`
			`bm->UseRealTime()`
			`// ThreadPerCpu poorly handles non-power-of-two CPU counts.`
			`->Threads(1)`
			`->Threads(2)`
			`->Threads(4)`
			`->Threads(6)`
			`->Threads(8)`
			`->Threads(12)`
			`->Threads(16)`
			`->Threads(24)`
			`->Threads(32)`
			`->Threads(48)`
			`->Threads(64)`
			`->Threads(96)`
			`->Threads(128)`
			`->Threads(192)`
			`->Threads(256)`
			`->ArgNames({"cs_ns", "num_prios"});`
			`// Some empirically chosen amounts of work in critical section.`
			`// 1 is low contention, 2000 is high contention and few values in between.`
			`for (int critical_section_ns : {1, 20, 50, 200, 2000}) {`
			`for (int num_priorities = 1; num_priorities <= max_num_priorities;`
			`num_priorities++) {`
			`bm->ArgPair(critical_section_ns, num_priorities);`
			`}`
			`}`
			`}`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00
			`BENCHMARK_TEMPLATE(BM_Contended, absl::Mutex)`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`->Apply([](benchmark::internal::Benchmark* bm) {`
			`SetupBenchmarkArgs(bm, /do_test_priorities=/true);`
			`});`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00
			`BENCHMARK_TEMPLATE(BM_Contended, absl::base_internal::SpinLock)`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`->Apply([](benchmark::internal::Benchmark* bm) {`
			`SetupBenchmarkArgs(bm, /do_test_priorities=/false);`
			`});`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00
			`BENCHMARK_TEMPLATE(BM_Contended, std::mutex)`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`->Apply([](benchmark::internal::Benchmark* bm) {`
			`SetupBenchmarkArgs(bm, /do_test_priorities=/false);`
			`});`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00
			`// Measure the overhead of conditions on mutex release (when they must be`
			`// evaluated). Mutex has (some) support for equivalence classes allowing`
			`// Conditions with the same function/argument to potentially not be multiply`
			`// evaluated.`
			`//`
			`// num_classes==0 is used for the special case of every waiter being distinct.`
			`void BM_ConditionWaiters(benchmark::State& state) {`
			`int num_classes = state.range(0);`
			`int num_waiters = state.range(1);`

			`struct Helper {`
			`static void Waiter(absl::BlockingCounter* init, absl::Mutex* m, int* p) {`
			`init->DecrementCount();`
			`m->LockWhen(absl::Condition(`
			`static_cast<bool ()(int)>([](int* v) { return *v == 0; }), p));`
			`m->Unlock();`
			`}`
			`};`

			`if (num_classes == 0) {`
			`// No equivalence classes.`
			`num_classes = num_waiters;`
			`}`

			`absl::BlockingCounter init(num_waiters);`
			`absl::Mutex mu;`
			`std::vector<int> equivalence_classes(num_classes, 1);`

			`// Must be declared last to be destroyed first.`
			`absl::synchronization_internal::ThreadPool pool(num_waiters);`

			`for (int i = 0; i < num_waiters; i++) {`
			`// Mutex considers Conditions with the same function and argument`
			`// to be equivalent.`
			`pool.Schedule([&, i] {`
			`Helper::Waiter(&init, &mu, &equivalence_classes[i % num_classes]);`
			`});`
			`}`
			`init.Wait();`

			`for (auto _ : state) {`
			`mu.Lock();`
			`mu.Unlock(); // Each unlock requires Condition evaluation for our waiters.`
			`}`

			`mu.Lock();`
			`for (int i = 0; i < num_classes; i++) {`
			`equivalence_classes[i] = 0;`
			`}`
			`mu.Unlock();`
			`}`

			`// Some configurations have higher thread limits than others.`
Update to 8.6.0 2022-03-11 16:49:54 +00:00			`#if defined(__linux__) && !defined(ABSL_HAVE_THREAD_SANITIZER)`
Update to 7.0.0 (2060) 2020-08-14 16:58:22 +00:00			`constexpr int kMaxConditionWaiters = 8192;`
			`#else`
			`constexpr int kMaxConditionWaiters = 1024;`
			`#endif`
			`BENCHMARK(BM_ConditionWaiters)->RangePair(0, 2, 1, kMaxConditionWaiters);`

			`} // namespace`