179 lines
4.2 KiB
C++
179 lines
4.2 KiB
C++
#include <alloca.h>
|
|
#include <cassert>
|
|
#ifdef __x86_64__
|
|
#include <immintrin.h>
|
|
#endif
|
|
|
|
#include "third_party/nanobench.h"
|
|
|
|
struct Job {
|
|
int *input;
|
|
// Returned void* is a function pointer to the next continuation. We have to
|
|
// use void* because otherwise the type would be recursive.
|
|
typedef void *(*continuation)(Job *);
|
|
continuation next;
|
|
};
|
|
|
|
void *stepJob(Job *j) {
|
|
auto done = --(*j->input) == 0;
|
|
#ifdef __x86_64__
|
|
_mm_clflush(j->input);
|
|
#endif
|
|
return done ? nullptr : (void *)stepJob;
|
|
}
|
|
|
|
// So we can look at the disassembly more easily
|
|
|
|
extern "C" {
|
|
void sequential(Job **jobs, int count) {
|
|
for (int i = 0; i < count; ++i) {
|
|
do {
|
|
jobs[i]->next = (Job::continuation)jobs[i]->next(jobs[i]);
|
|
} while (jobs[i]->next);
|
|
}
|
|
}
|
|
|
|
void sequentialNoFuncPtr(Job **jobs, int count) {
|
|
for (int i = 0; i < count; ++i) {
|
|
while (stepJob(jobs[i]))
|
|
;
|
|
}
|
|
}
|
|
|
|
void interleaveSwapping(Job **jobs, int remaining) {
|
|
int current = 0;
|
|
while (remaining > 0) {
|
|
auto next = (Job::continuation)jobs[current]->next(jobs[current]);
|
|
jobs[current]->next = next;
|
|
if (next == nullptr) {
|
|
jobs[current] = jobs[remaining - 1];
|
|
--remaining;
|
|
} else {
|
|
++current;
|
|
}
|
|
if (current == remaining) {
|
|
current = 0;
|
|
}
|
|
}
|
|
}
|
|
|
|
void interleaveBoundedCyclicList(Job **jobs, int count) {
|
|
if (count == 0) {
|
|
return;
|
|
}
|
|
|
|
constexpr int kConcurrent = 32;
|
|
Job *inProgress[kConcurrent];
|
|
int nextJob[kConcurrent];
|
|
|
|
int started = std::min(kConcurrent, count);
|
|
for (int i = 0; i < started; i++) {
|
|
inProgress[i] = jobs[i];
|
|
nextJob[i] = i + 1;
|
|
}
|
|
nextJob[started - 1] = 0;
|
|
|
|
int prevJob = started - 1;
|
|
int job = 0;
|
|
for (;;) {
|
|
auto next = (Job::continuation)inProgress[job]->next(inProgress[job]);
|
|
inProgress[job]->next = next;
|
|
if (next == nullptr) {
|
|
if (started == count) {
|
|
if (prevJob == job)
|
|
break;
|
|
nextJob[prevJob] = nextJob[job];
|
|
job = prevJob;
|
|
} else {
|
|
int temp = started++;
|
|
inProgress[job] = jobs[temp];
|
|
}
|
|
}
|
|
prevJob = job;
|
|
job = nextJob[job];
|
|
}
|
|
}
|
|
|
|
void interleaveCyclicList(Job **jobs, int count) {
|
|
auto *nextJob = (int *)alloca(sizeof(int) * count);
|
|
|
|
for (int i = 0; i < count - 1; ++i) {
|
|
nextJob[i] = i + 1;
|
|
}
|
|
nextJob[count - 1] = 0;
|
|
|
|
int prevJob = count - 1;
|
|
int job = 0;
|
|
for (;;) {
|
|
auto next = (Job::continuation)jobs[job]->next(jobs[job]);
|
|
jobs[job]->next = next;
|
|
if (next == nullptr) {
|
|
if (prevJob == job)
|
|
break;
|
|
nextJob[prevJob] = nextJob[job];
|
|
job = prevJob;
|
|
}
|
|
prevJob = job;
|
|
job = nextJob[job];
|
|
}
|
|
}
|
|
}
|
|
|
|
int main() {
|
|
ankerl::nanobench::Bench bench;
|
|
|
|
constexpr int kNumJobs = 100;
|
|
bench.relative(true);
|
|
|
|
Job jobs[kNumJobs];
|
|
Job jobsCopy[kNumJobs];
|
|
int iters = 0;
|
|
int originalInput[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
originalInput[i] = rand() % 5 + 3;
|
|
jobs[i].input = new int{originalInput[i]};
|
|
jobs[i].next = stepJob;
|
|
iters += *jobs[i].input;
|
|
}
|
|
bench.batch(iters);
|
|
|
|
for (auto [scheduler, name] :
|
|
{std::make_pair(sequentialNoFuncPtr, "sequentialNoFuncPtr"),
|
|
std::make_pair(sequential, "sequential"),
|
|
std::make_pair(interleaveSwapping, "interleavingSwapping"),
|
|
std::make_pair(interleaveBoundedCyclicList,
|
|
"interleaveBoundedCyclicList"),
|
|
std::make_pair(interleaveCyclicList, "interleaveCyclicList")}) {
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
*jobs[i].input = originalInput[i];
|
|
}
|
|
memcpy(jobsCopy, jobs, sizeof(jobs));
|
|
Job *ps[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
ps[i] = jobsCopy + i;
|
|
}
|
|
scheduler(ps, kNumJobs);
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
if (*jobsCopy[i].input != 0) {
|
|
fprintf(stderr, "%s failed\n", name);
|
|
abort();
|
|
}
|
|
}
|
|
|
|
bench.run(name, [&]() {
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
*jobs[i].input = originalInput[i];
|
|
}
|
|
memcpy(jobsCopy, jobs, sizeof(jobs));
|
|
Job *ps[kNumJobs];
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
ps[i] = jobsCopy + i;
|
|
}
|
|
scheduler(ps, kNumJobs);
|
|
});
|
|
}
|
|
for (int i = 0; i < kNumJobs; ++i) {
|
|
delete jobs[i].input;
|
|
}
|
|
}
|