#include #include #ifdef __x86_64__ #include #endif #define ANKERL_NANOBENCH_IMPLEMENT #include "third_party/nanobench.h" struct Job { int *input; }; bool stepJob(void *x) { auto *j = (Job *)x; auto &val = *j->input; auto result = --val == 0; #ifdef __x86_64__ _mm_clflush(j->input); #endif return result; } // So we can look at the disassembly more easily extern "C" { void sequential(void **jobs, bool (*step)(void *), int count) { for (int i = 0; i < count; ++i) { while (!step(jobs[i])) ; } } void interleaveSwapping(void **jobs, bool (*step)(void *), int remaining) { int current = 0; while (remaining > 0) { bool done = step(jobs[current]); if (done) { jobs[current] = jobs[remaining - 1]; --remaining; } else { ++current; } if (current == remaining) { current = 0; } } } void interleaveBoundedCyclicList(void **jobs, bool (*step)(void *), int count) { if (count == 0) { return; } constexpr int kConcurrent = 16; void *inProgress[kConcurrent]; int nextJob[kConcurrent]; int started = std::min(kConcurrent, count); for (int i = 0; i < kConcurrent; i++) { inProgress[i] = jobs[i]; nextJob[i] = i + 1; } nextJob[started - 1] = 0; int prevJob = started - 1; int job = 0; for (;;) { bool done = step(inProgress[job]); if (done) { if (started == count) { if (prevJob == job) break; nextJob[prevJob] = nextJob[job]; job = prevJob; } else { int temp = started++; inProgress[job] = jobs[temp]; } } prevJob = job; job = nextJob[job]; } } void interleaveCyclicList(void **jobs, bool (*step)(void *), int count) { auto *nextJob = (int *)alloca(sizeof(int) * count); for (int i = 0; i < count - 1; ++i) { nextJob[i] = i + 1; } nextJob[count - 1] = 0; int prevJob = count - 1; int job = 0; for (;;) { bool done = step(jobs[job]); if (done) { if (prevJob == job) break; nextJob[prevJob] = nextJob[job]; job = prevJob; } prevJob = job; job = nextJob[job]; } } } int main() { ankerl::nanobench::Bench bench; constexpr int kNumJobs = 100; bench.relative(true); Job jobs[kNumJobs]; Job jobsCopy[kNumJobs]; int iters = 0; int originalInput[kNumJobs]; for (int i = 0; i < kNumJobs; ++i) { originalInput[i] = rand() % 5 + 3; jobs[i].input = new int{originalInput[i]}; iters += *jobs[i].input; } bench.batch(iters); for (auto [scheduler, name] : {std::make_pair(sequential, "sequential"), std::make_pair(interleaveSwapping, "interleavingSwapping"), std::make_pair(interleaveBoundedCyclicList, "interleaveBoundedCyclicList"), std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) { for (int i = 0; i < kNumJobs; ++i) { *jobs[i].input = originalInput[i]; } memcpy(jobsCopy, jobs, sizeof(jobs)); void *ps[kNumJobs]; for (int i = 0; i < kNumJobs; ++i) { ps[i] = jobsCopy + i; } scheduler(ps, stepJob, kNumJobs); for (int i = 0; i < kNumJobs; ++i) { if (*jobsCopy[i].input != 0) { fprintf(stderr, "%s failed\n", name); abort(); } } bench.run(name, [&]() { for (int i = 0; i < kNumJobs; ++i) { *jobs[i].input = originalInput[i]; } memcpy(jobsCopy, jobs, sizeof(jobs)); void *ps[kNumJobs]; for (int i = 0; i < kNumJobs; ++i) { ps[i] = jobsCopy + i; } scheduler(ps, stepJob, kNumJobs); }); } for (int i = 0; i < kNumJobs; ++i) { delete jobs[i].input; } }