Compare commits
2 Commits
1519216d08
...
a8042ab20d
Author | SHA1 | Date | |
---|---|---|---|
a8042ab20d | |||
8a36e72640 |
148
ConflictSet.cpp
148
ConflictSet.cpp
@@ -1741,109 +1741,69 @@ void addWriteRange(Node *&root, int64_t oldestVersion,
|
||||
}
|
||||
}
|
||||
|
||||
struct FirstGeqStepwise {
|
||||
Node *n;
|
||||
std::span<const uint8_t> remaining;
|
||||
int cmp;
|
||||
|
||||
enum Phase {
|
||||
Init,
|
||||
// Being in this phase implies that the key matches the search path exactly
|
||||
// up to this point
|
||||
Search,
|
||||
DownLeftSpine
|
||||
};
|
||||
Phase phase;
|
||||
|
||||
FirstGeqStepwise(Node *n, std::span<const uint8_t> remaining)
|
||||
: n(n), remaining(remaining), phase(Init) {}
|
||||
|
||||
// Not being done implies that n is not the firstGeq
|
||||
bool step() {
|
||||
switch (phase) {
|
||||
case Search: {
|
||||
if (remaining.size() == 0) {
|
||||
int c = getChildGeq(n, 0);
|
||||
assert(c >= 0);
|
||||
n = getChildExists(n, c);
|
||||
return downLeftSpine();
|
||||
Iterator firstGeq(Node *n, const std::span<const uint8_t> key) {
|
||||
auto remaining = key;
|
||||
for (;;) {
|
||||
if (remaining.size() == 0) {
|
||||
if (n->entryPresent) {
|
||||
return {n, 0};
|
||||
}
|
||||
|
||||
auto *child = getChild(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
int c = getChildGeq(n, remaining[0]);
|
||||
if (c >= 0) {
|
||||
n = getChildExists(n, c);
|
||||
return downLeftSpine();
|
||||
} else {
|
||||
n = nextSibling(n);
|
||||
return downLeftSpine();
|
||||
}
|
||||
}
|
||||
|
||||
n = child;
|
||||
remaining = remaining.subspan(1, remaining.size() - 1);
|
||||
|
||||
if (n->partialKeyLen > 0) {
|
||||
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
|
||||
int i = longestCommonPrefixPartialKey(n->partialKey, remaining.data(),
|
||||
commonLen);
|
||||
if (i < commonLen) {
|
||||
auto c = n->partialKey[i] <=> remaining[i];
|
||||
if (c > 0) {
|
||||
return downLeftSpine();
|
||||
} else {
|
||||
n = nextSibling(n);
|
||||
return downLeftSpine();
|
||||
}
|
||||
}
|
||||
if (commonLen == n->partialKeyLen) {
|
||||
// partial key matches
|
||||
remaining =
|
||||
remaining.subspan(commonLen, remaining.size() - commonLen);
|
||||
} else if (n->partialKeyLen > int(remaining.size())) {
|
||||
// n is the first physical node greater than remaining, and there's no
|
||||
// eq node
|
||||
return downLeftSpine();
|
||||
}
|
||||
}
|
||||
}
|
||||
[[fallthrough]];
|
||||
case Init:
|
||||
phase = Search;
|
||||
if (remaining.size() == 0 && n->entryPresent) {
|
||||
cmp = 0;
|
||||
return true;
|
||||
}
|
||||
return false;
|
||||
case DownLeftSpine:
|
||||
int c = getChildGeq(n, 0);
|
||||
assert(c >= 0);
|
||||
n = getChildExists(n, c);
|
||||
if (n->entryPresent) {
|
||||
cmp = 1;
|
||||
return true;
|
||||
goto downLeftSpine;
|
||||
}
|
||||
|
||||
auto *child = getChild(n, remaining[0]);
|
||||
if (child == nullptr) {
|
||||
int c = getChildGeq(n, remaining[0]);
|
||||
if (c >= 0) {
|
||||
n = getChildExists(n, c);
|
||||
goto downLeftSpine;
|
||||
} else {
|
||||
n = nextSibling(n);
|
||||
goto downLeftSpine;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
||||
}
|
||||
|
||||
bool downLeftSpine() {
|
||||
phase = DownLeftSpine;
|
||||
if (n == nullptr || n->entryPresent) {
|
||||
cmp = 1;
|
||||
return true;
|
||||
n = child;
|
||||
remaining = remaining.subspan(1, remaining.size() - 1);
|
||||
|
||||
if (n->partialKeyLen > 0) {
|
||||
int commonLen = std::min<int>(n->partialKeyLen, remaining.size());
|
||||
int i = longestCommonPrefixPartialKey(n->partialKey, remaining.data(),
|
||||
commonLen);
|
||||
if (i < commonLen) {
|
||||
auto c = n->partialKey[i] <=> remaining[i];
|
||||
if (c > 0) {
|
||||
goto downLeftSpine;
|
||||
} else {
|
||||
n = nextSibling(n);
|
||||
goto downLeftSpine;
|
||||
}
|
||||
}
|
||||
if (commonLen == n->partialKeyLen) {
|
||||
// partial key matches
|
||||
remaining = remaining.subspan(commonLen, remaining.size() - commonLen);
|
||||
} else if (n->partialKeyLen > int(remaining.size())) {
|
||||
// n is the first physical node greater than remaining, and there's no
|
||||
// eq node
|
||||
goto downLeftSpine;
|
||||
}
|
||||
}
|
||||
return step();
|
||||
}
|
||||
};
|
||||
|
||||
Iterator firstGeq(Node *n, const std::span<const uint8_t> key) {
|
||||
FirstGeqStepwise stepwise{n, key};
|
||||
while (!stepwise.step())
|
||||
;
|
||||
return {stepwise.n, stepwise.cmp};
|
||||
downLeftSpine:
|
||||
if (n == nullptr) {
|
||||
return {nullptr, 1};
|
||||
}
|
||||
for (;;) {
|
||||
if (n->entryPresent) {
|
||||
return {n, 1};
|
||||
}
|
||||
int c = getChildGeq(n, 0);
|
||||
assert(c >= 0);
|
||||
n = getChildExists(n, c);
|
||||
}
|
||||
}
|
||||
|
||||
struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
||||
|
131
README.md
131
README.md
@@ -7,66 +7,97 @@ Intended to replace FoundationDB's skip list.
|
||||
## Skip list
|
||||
|
||||
```
|
||||
New conflict set: 4.189 sec
|
||||
0.298 Mtransactions/sec
|
||||
1.194 Mkeys/sec
|
||||
Detect only: 3.990 sec
|
||||
0.313 Mtransactions/sec
|
||||
1.253 Mkeys/sec
|
||||
Skiplist only: 2.849 sec
|
||||
0.439 Mtransactions/sec
|
||||
1.755 Mkeys/sec
|
||||
New conflict set: 2.404 sec
|
||||
0.520 Mtransactions/sec
|
||||
2.080 Mkeys/sec
|
||||
Detect only: 2.266 sec
|
||||
0.552 Mtransactions/sec
|
||||
2.207 Mkeys/sec
|
||||
Skiplist only: 1.594 sec
|
||||
0.784 Mtransactions/sec
|
||||
3.137 Mkeys/sec
|
||||
Performance counters:
|
||||
Build: 0.0913
|
||||
Add: 0.0998
|
||||
Detect: 3.99
|
||||
D.Sort: 0.808
|
||||
D.Combine: 0.0309
|
||||
D.CheckRead: 1.67
|
||||
D.CheckIntraBatch: 0.0305
|
||||
D.MergeWrite: 1.18
|
||||
D.RemoveBefore: 0.265
|
||||
Build: 0.071
|
||||
Add: 0.0641
|
||||
Detect: 2.27
|
||||
D.Sort: 0.44
|
||||
D.Combine: 0.018
|
||||
D.CheckRead: 0.855
|
||||
D.CheckIntraBatch: 0.00903
|
||||
D.MergeWrite: 0.739
|
||||
D.RemoveBefore: 0.201
|
||||
```
|
||||
|
||||
## Radix tree (this implementation)
|
||||
|
||||
```
|
||||
New conflict set: 2.965 sec
|
||||
0.422 Mtransactions/sec
|
||||
1.686 Mkeys/sec
|
||||
Detect only: 2.761 sec
|
||||
0.453 Mtransactions/sec
|
||||
1.811 Mkeys/sec
|
||||
Skiplist only: 1.580 sec
|
||||
0.791 Mtransactions/sec
|
||||
3.165 Mkeys/sec
|
||||
New conflict set: 1.743 sec
|
||||
0.717 Mtransactions/sec
|
||||
2.869 Mkeys/sec
|
||||
Detect only: 1.611 sec
|
||||
0.776 Mtransactions/sec
|
||||
3.103 Mkeys/sec
|
||||
Skiplist only: 0.919 sec
|
||||
1.360 Mtransactions/sec
|
||||
5.440 Mkeys/sec
|
||||
Performance counters:
|
||||
Build: 0.0902
|
||||
Add: 0.107
|
||||
Detect: 2.76
|
||||
D.Sort: 0.809
|
||||
D.Combine: 0.0309
|
||||
D.CheckRead: 0.658
|
||||
D.CheckIntraBatch: 0.0294
|
||||
D.MergeWrite: 0.921
|
||||
D.RemoveBefore: 0.305
|
||||
Build: 0.0657
|
||||
Add: 0.0628
|
||||
Detect: 1.61
|
||||
D.Sort: 0.442
|
||||
D.Combine: 0.0178
|
||||
D.CheckRead: 0.395
|
||||
D.CheckIntraBatch: 0.00776
|
||||
D.MergeWrite: 0.524
|
||||
D.RemoveBefore: 0.221
|
||||
```
|
||||
|
||||
# Our benchmark
|
||||
|
||||
## Skip list
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 325.60 | 3,071,225.77 | 4.8% | 0.77 | `skip list (point reads)`
|
||||
| 297.15 | 3,365,278.10 | 1.7% | 0.72 | `skip list (prefix reads)`
|
||||
| 408.79 | 2,446,222.23 | 1.0% | 1.03 | `skip list (range reads)`
|
||||
| 261.88 | 3,818,471.08 | 1.3% | 0.73 | `skip list (point writes)`
|
||||
| 253.54 | 3,944,191.08 | 0.1% | 0.61 | `skip list (prefix writes)`
|
||||
| 258.73 | 3,865,078.52 | 0.8% | 0.62 | `skip list (range writes)`
|
||||
| 489.56 | 2,042,648.19 | 1.8% | 0.01 | `skip list (monotonic increasing point writes)`
|
||||
| 14.83 | 67,446,579.75 | 0.1% | 0.04 | `radix tree (point reads)`
|
||||
| 59.68 | 16,756,917.37 | 0.1% | 0.14 | `radix tree (prefix reads)`
|
||||
| 287.32 | 3,480,485.22 | 1.2% | 0.69 | `radix tree (range reads)`
|
||||
| 46.59 | 21,461,855.59 | 0.2% | 0.12 | `radix tree (point writes)`
|
||||
| 83.70 | 11,946,755.99 | 0.1% | 0.20 | `radix tree (prefix writes)`
|
||||
| 100.75 | 9,925,723.26 | 0.6% | 0.25 | `radix tree (range writes)`
|
||||
| 118.37 | 8,448,345.29 | 0.6% | 0.01 | `radix tree (monotonic increasing point writes)`
|
||||
| 270.07 | 3,702,706.03 | 0.4% | 0.01 | `point reads`
|
||||
| 285.76 | 3,499,437.03 | 1.5% | 0.01 | `prefix reads`
|
||||
| 532.54 | 1,877,794.90 | 0.7% | 0.01 | `range reads`
|
||||
| 528.50 | 1,892,132.94 | 0.7% | 0.01 | `point writes`
|
||||
| 516.53 | 1,935,978.22 | 0.9% | 0.01 | `prefix writes`
|
||||
| 303.34 | 3,296,630.84 | 3.6% | 0.05 | `range writes`
|
||||
| 502.88 | 1,988,553.24 | 2.0% | 0.01 | `monotonic increasing point writes`
|
||||
|
||||
## Radix tree (this implementation)
|
||||
|
||||
| ns/op | op/s | err% | total | benchmark
|
||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
||||
| 14.52 | 68,850,842.99 | 1.2% | 0.01 | `point reads`
|
||||
| 60.89 | 16,422,538.22 | 1.5% | 0.01 | `prefix reads`
|
||||
| 226.89 | 4,407,362.98 | 0.5% | 0.01 | `range reads`
|
||||
| 22.99 | 43,498,198.49 | 0.2% | 0.01 | `point writes`
|
||||
| 50.51 | 19,799,864.54 | 1.0% | 0.01 | `prefix writes`
|
||||
| 82.50 | 12,121,212.12 | 2.6% | 0.03 | `range writes`
|
||||
| 119.94 | 8,337,354.54 | 2.1% | 0.01 | `monotonic increasing point writes`
|
||||
|
||||
# "Real data" test
|
||||
|
||||
Point queries only, best of three runs. Gc ratio is the ratio of time spent doing garbage collection to time spent adding writes or doing garbage collection. Lower is better.
|
||||
|
||||
## skip list
|
||||
|
||||
```
|
||||
Check: 12.7863 seconds, 292.384 MB/s, Add: 19.8276 seconds, 35.4071 MB/s, Gc ratio: 23.5314%
|
||||
```
|
||||
|
||||
## radix tree
|
||||
|
||||
```
|
||||
Check: 3.60187 seconds, 1037.94 MB/s, Add: 3.03958 seconds, 230.966 MB/s, Gc ratio: 52.3876%
|
||||
```
|
||||
|
||||
## hash table
|
||||
|
||||
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
|
||||
|
||||
```
|
||||
Check: 2.15925 seconds, 1731.4 MB/s, Add: 1.08519 seconds, 646.926 MB/s, Gc ratio: 52.1526%
|
||||
```
|
Reference in New Issue
Block a user