165 lines
3.6 KiB
C++
165 lines
3.6 KiB
C++
#include <alloca.h>
|
|
#include <cassert>
|
|
#ifdef __x86_64__
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
#define ANKERL_NANOBENCH_IMPLEMENT
|
|
#include "third_party/nanobench.h"
|
|
|
|
struct Job {
|
|
int *input;
|
|
};
|
|
|
|
bool stepJob(void *x) {
|
|
auto *j = (Job *)x;
|
|
auto &val = *j->input;
|
|
auto result = --val == 0;
|
|
#ifdef __x86_64__
|
|
_mm_clflush(j->input);
|
|
#endif
|
|
return result;
|
|
}
|
|
|
|
// So we can look at the disassembly more easily
|
|
|
|
extern "C" {
|
|
void sequential(void **jobs, bool (*step)(void *), int count) {
|
|
for (int i = 0; i < count; ++i) {
|
|
while (!step(jobs[i]))
|
|
;
|
|
}
|
|
}
|
|
|
|
void interleaveSwapping(void **jobs, bool (*step)(void *), int remaining) {
|
|
int current = 0;
|
|
while (remaining > 0) {
|
|
bool done = step(jobs[current]);
|
|
if (done) {
|
|
jobs[current] = jobs[remaining - 1];
|
|
--remaining;
|
|
} else {
|
|
++current;
|
|
}
|
|
if (current == remaining) {
|
|
current = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void interleaveBoundedCyclicList(void **jobs, bool (*step)(void *), int count) {
|
|
if (count == 0) {
|
|
return;
|
|
}
|
|
|
|
constexpr int kConcurrent = 16;
|
|
void *inProgress[kConcurrent];
|
|
int nextJob[kConcurrent];
|
|
|
|
int started = std::min(kConcurrent, count);
|
|
for (int i = 0; i < kConcurrent; i++) {
|
|
inProgress[i] = jobs[i];
|
|
nextJob[i] = i + 1;
|
|
}
|
|
nextJob[started - 1] = 0;
|
|
|
|
int prevJob = started - 1;
|
|
int job = 0;
|
|
for (;;) {
|
|
bool done = step(inProgress[job]);
|
|
if (done) {
|
|
if (started == count) {
|
|
if (prevJob == job)
|
|
break;
|
|
nextJob[prevJob] = nextJob[job];
|
|
job = prevJob;
|
|
} else {
|
|
int temp = started++;
|
|
inProgress[job] = jobs[temp];
|
|
}
|
|
}
|
|
prevJob = job;
|
|
job = nextJob[job];
|
|
}
|
|
}
|
|
|
|
void interleaveCyclicList(void **jobs, bool (*step)(void *), int count) {
|
|
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
|
|
|
for (int i = 0; i < count - 1; ++i) {
|
|
nextJob[i] = i + 1;
|
|
}
|
|
nextJob[count - 1] = 0;
|
|
|
|
int prevJob = count - 1;
|
|
int job = 0;
|
|
for (;;) {
|
|
bool done = step(jobs[job]);
|
|
if (done) {
|
|
if (prevJob == job)
|
|
break;
|
|
nextJob[prevJob] = nextJob[job];
|
|
job = prevJob;
|
|
}
|
|
prevJob = job;
|
|
job = nextJob[job];
|
|
}
|
|
}
|
|
}
|
|
|
|
int main() {
|
|
ankerl::nanobench::Bench bench;
|
|
|
|
constexpr int kNumJobs = 100;
|
|
bench.relative(true);
|
|
|
|
Job jobs[kNumJobs];
|
|
Job jobsCopy[kNumJobs];
|
|
int iters = 0;
|
|
int originalInput[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
originalInput[i] = rand() % 5 + 3;
|
|
jobs[i].input = new int{originalInput[i]};
|
|
iters += *jobs[i].input;
|
|
}
|
|
bench.batch(iters);
|
|
|
|
for (auto [scheduler, name] :
|
|
{std::make_pair(sequential, "sequential"),
|
|
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
|
std::make_pair(interleaveBoundedCyclicList,
|
|
"interleaveBoundedCyclicList"),
|
|
std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) {
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
*jobs[i].input = originalInput[i];
|
|
}
|
|
memcpy(jobsCopy, jobs, sizeof(jobs));
|
|
void *ps[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
ps[i] = jobsCopy + i;
|
|
}
|
|
scheduler(ps, stepJob, kNumJobs);
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
if (*jobsCopy[i].input != 0) {
|
|
fprintf(stderr, "%s failed\n", name);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
bench.run(name, [&]() {
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
*jobs[i].input = originalInput[i];
|
|
}
|
|
memcpy(jobsCopy, jobs, sizeof(jobs));
|
|
void *ps[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
ps[i] = jobsCopy + i;
|
|
}
|
|
scheduler(ps, stepJob, kNumJobs);
|
|
});
|
|
}
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
delete jobs[i].input;
|
|
}
|
|
}
|