Compare commits
10 Commits
b0414969be
...
d1dc1247e1
Author | SHA1 | Date | |
---|---|---|---|
d1dc1247e1 | |||
f1ad68109a | |||
c4443bc355 | |||
857b402fe2 | |||
9b3e1b219b | |||
ab52c63935 | |||
bad9d7ced8 | |||
c8d9dc034d | |||
72168ef6a3 | |||
620a0afd2a |
@@ -31,8 +31,14 @@ if(NOT CMAKE_BUILD_TYPE AND NOT CMAKE_CONFIGURATION_TYPES)
|
|||||||
"MinSizeRel" "RelWithDebInfo")
|
"MinSizeRel" "RelWithDebInfo")
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
add_compile_options(
|
||||||
-Werror=switch-enum -fPIC)
|
-fdata-sections
|
||||||
|
-ffunction-sections
|
||||||
|
-Wswitch-enum
|
||||||
|
-Werror=switch-enum
|
||||||
|
-fPIC
|
||||||
|
-g
|
||||||
|
-fno-omit-frame-pointer)
|
||||||
|
|
||||||
set(full_relro_flags "-pie;LINKER:-z,relro,-z,now,-z,noexecstack")
|
set(full_relro_flags "-pie;LINKER:-z,relro,-z,now,-z,noexecstack")
|
||||||
cmake_push_check_state()
|
cmake_push_check_state()
|
||||||
|
235
ConflictSet.cpp
235
ConflictSet.cpp
@@ -395,8 +395,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
|
|||||||
assert(numChildren == Node16::kMaxNodes);
|
assert(numChildren == Node16::kMaxNodes);
|
||||||
memset(index, -1, sizeof(index));
|
memset(index, -1, sizeof(index));
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
memcpy(partialKey(), &other + 1, partialKeyLen);
|
memcpy(partialKey(), &other + 1, partialKeyLen);
|
||||||
bitSet.init();
|
bitSet.init();
|
||||||
@@ -423,8 +424,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
|
|||||||
nextFree = other.nextFree;
|
nextFree = other.nextFree;
|
||||||
memcpy(index, other.index, sizeof(index));
|
memcpy(index, other.index, sizeof(index));
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
for (int i = 0; i < numChildren; ++i) {
|
for (int i = 0; i < numChildren; ++i) {
|
||||||
children[i] = other.children[i];
|
children[i] = other.children[i];
|
||||||
@@ -442,8 +444,9 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
|
|||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memset(index, -1, sizeof(index));
|
memset(index, -1, sizeof(index));
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
nextFree = other.numChildren;
|
nextFree = other.numChildren;
|
||||||
bitSet = other.bitSet;
|
bitSet = other.bitSet;
|
||||||
@@ -470,11 +473,12 @@ inline void Node256::copyChildrenAndKeyFrom(const Node48 &other) {
|
|||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
bitSet = other.bitSet;
|
bitSet = other.bitSet;
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
for (auto &v : maxOfMax) {
|
for (auto &v : maxOfMax) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
bitSet.forEachSet([&](int c) {
|
bitSet.forEachSet([&](int c) {
|
||||||
children[c] = other.children[other.index[c]];
|
children[c] = other.children[other.index[c]];
|
||||||
@@ -491,8 +495,9 @@ inline void Node256::copyChildrenAndKeyFrom(const Node256 &other) {
|
|||||||
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
bitSet = other.bitSet;
|
bitSet = other.bitSet;
|
||||||
bitSet.forEachSet([&](int c) {
|
bitSet.forEachSet([&](int c) {
|
||||||
@@ -619,13 +624,15 @@ template <class T> struct BoundedFreeListAllocator {
|
|||||||
T *result = allocate_helper(partialKeyCapacity);
|
T *result = allocate_helper(partialKeyCapacity);
|
||||||
if constexpr (!std::is_same_v<T, Node0>) {
|
if constexpr (!std::is_same_v<T, Node0>) {
|
||||||
memset(result->children, 0, sizeof(result->children));
|
memset(result->children, 0, sizeof(result->children));
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : result->childMaxVersion) {
|
for (auto &v : result->childMaxVersion) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) {
|
if constexpr (std::is_same_v<T, Node48> || std::is_same_v<T, Node256>) {
|
||||||
|
const auto z = InternalVersionT::zero;
|
||||||
for (auto &v : result->maxOfMax) {
|
for (auto &v : result->maxOfMax) {
|
||||||
v = InternalVersionT::zero;
|
v = z;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
return result;
|
return result;
|
||||||
@@ -712,6 +719,9 @@ struct ReadContext {
|
|||||||
int64_t prefix_read_iterations_accum = 0;
|
int64_t prefix_read_iterations_accum = 0;
|
||||||
int64_t range_read_iterations_accum = 0;
|
int64_t range_read_iterations_accum = 0;
|
||||||
int64_t range_read_node_scan_accum = 0;
|
int64_t range_read_node_scan_accum = 0;
|
||||||
|
int64_t commits_accum = 0;
|
||||||
|
int64_t conflicts_accum = 0;
|
||||||
|
int64_t too_olds_accum = 0;
|
||||||
ConflictSet::Impl *impl;
|
ConflictSet::Impl *impl;
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -730,6 +740,10 @@ struct WriteContext {
|
|||||||
int64_t write_bytes;
|
int64_t write_bytes;
|
||||||
} accum;
|
} accum;
|
||||||
|
|
||||||
|
// Cache a copy of InternalVersionT::zero, so we don't need to do the TLS
|
||||||
|
// lookup as often.
|
||||||
|
InternalVersionT zero;
|
||||||
|
|
||||||
WriteContext() { memset(&accum, 0, sizeof(accum)); }
|
WriteContext() { memset(&accum, 0, sizeof(accum)); }
|
||||||
|
|
||||||
template <class T> T *allocate(int c) {
|
template <class T> T *allocate(int c) {
|
||||||
@@ -1122,24 +1136,19 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) {
|
|||||||
|
|
||||||
insert3:
|
insert3:
|
||||||
auto *self3 = static_cast<Node3 *>(self);
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
++self->numChildren;
|
int i = self->numChildren - 1;
|
||||||
int i = 0;
|
for (; i >= 0; --i) {
|
||||||
for (; i < self->numChildren - 1; ++i) {
|
if (int(self3->index[i]) < int(index)) {
|
||||||
if (int(self3->index[i]) > int(index)) {
|
|
||||||
memmove(self3->index + i + 1, self3->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self3->children + i + 1, self3->children + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self3->children[0])); // NOLINT
|
|
||||||
memmove(self3->childMaxVersion + i + 1, self3->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self3->childMaxVersion[0]));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
self3->index[i + 1] = self3->index[i];
|
||||||
|
self3->children[i + 1] = self3->children[i];
|
||||||
|
self3->childMaxVersion[i + 1] = self3->childMaxVersion[i];
|
||||||
}
|
}
|
||||||
self3->index[i] = index;
|
self3->index[i + 1] = index;
|
||||||
auto &result = self3->children[i];
|
auto &result = self3->children[i + 1];
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
|
++self->numChildren;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
@@ -1153,71 +1162,21 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
insert16:
|
insert16:
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
|
||||||
assert(self->getType() == Type_Node16);
|
assert(self->getType() == Type_Node16);
|
||||||
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
++self->numChildren;
|
int i = self->numChildren - 1;
|
||||||
#ifdef HAS_AVX
|
for (; i >= 0; --i) {
|
||||||
__m128i key_vec = _mm_set1_epi8(index);
|
if (int(self16->index[i]) < int(index)) {
|
||||||
__m128i indices;
|
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
|
||||||
int mask = (1 << (self->numChildren - 1)) - 1;
|
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
|
||||||
bitfield |= uint32_t(1) << (self->numChildren - 1);
|
|
||||||
int i = std::countr_zero(bitfield);
|
|
||||||
if (i < self->numChildren - 1) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->children[0])); // NOLINT
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
}
|
|
||||||
#elif defined(HAS_ARM_NEON)
|
|
||||||
uint8x16_t indices;
|
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
|
||||||
// 0xff for each leq
|
|
||||||
auto results = vcleq_u8(vdupq_n_u8(index), indices);
|
|
||||||
uint64_t mask = (uint64_t(1) << ((self->numChildren - 1) * 4)) - 1;
|
|
||||||
// 0xf for each 0xff (within mask)
|
|
||||||
uint64_t bitfield =
|
|
||||||
vget_lane_u64(
|
|
||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
|
|
||||||
0) &
|
|
||||||
mask;
|
|
||||||
bitfield |= uint64_t(0xf) << ((self->numChildren - 1) * 4);
|
|
||||||
int i = std::countr_zero(bitfield) / 4;
|
|
||||||
if (i < self->numChildren - 1) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->children[0])); // NOLINT
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
int i = 0;
|
|
||||||
for (; i < int(self->numChildren) - 1; ++i) {
|
|
||||||
if (int(self16->index[i]) > int(index)) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) * sizeof(self16->children[0]));
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
self16->index[i + 1] = self16->index[i];
|
||||||
|
self16->children[i + 1] = self16->children[i];
|
||||||
|
self16->childMaxVersion[i + 1] = self16->childMaxVersion[i];
|
||||||
}
|
}
|
||||||
#endif
|
self16->index[i + 1] = index;
|
||||||
self16->index[i] = index;
|
auto &result = self16->children[i + 1];
|
||||||
auto &result = self16->children[i];
|
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
|
++self->numChildren;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
@@ -1472,7 +1431,7 @@ void maybeDownsize(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
|||||||
// that we have a new parent.
|
// that we have a new parent.
|
||||||
setMaxVersion(child, impl, childMaxVersion);
|
setMaxVersion(child, impl, childMaxVersion);
|
||||||
if (child->parent) {
|
if (child->parent) {
|
||||||
rezero(child->parent, InternalVersionT::zero);
|
rezero(child->parent, tls->zero);
|
||||||
}
|
}
|
||||||
|
|
||||||
getInTree(self, impl) = child;
|
getInTree(self, impl) = child;
|
||||||
@@ -1552,36 +1511,24 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
|||||||
auto *parent3 = static_cast<Node3 *>(parent);
|
auto *parent3 = static_cast<Node3 *>(parent);
|
||||||
int nodeIndex = getNodeIndex(parent3, parentsIndex);
|
int nodeIndex = getNodeIndex(parent3, parentsIndex);
|
||||||
assert(nodeIndex >= 0);
|
assert(nodeIndex >= 0);
|
||||||
memmove(parent3->index + nodeIndex, parent3->index + nodeIndex + 1,
|
|
||||||
sizeof(parent3->index[0]) *
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
memmove(parent3->children + nodeIndex, parent3->children + nodeIndex + 1,
|
|
||||||
sizeof(parent3->children[0]) * // NOLINT
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
memmove(parent3->childMaxVersion + nodeIndex,
|
|
||||||
parent3->childMaxVersion + nodeIndex + 1,
|
|
||||||
sizeof(parent3->childMaxVersion[0]) *
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
|
|
||||||
--parent->numChildren;
|
--parent->numChildren;
|
||||||
|
for (int i = nodeIndex; i < parent->numChildren; ++i) {
|
||||||
|
parent3->index[i] = parent3->index[i + 1];
|
||||||
|
parent3->children[i] = parent3->children[i + 1];
|
||||||
|
parent3->childMaxVersion[i] = parent3->childMaxVersion[i + 1];
|
||||||
|
}
|
||||||
assert(parent->numChildren > 0 || parent->entryPresent);
|
assert(parent->numChildren > 0 || parent->entryPresent);
|
||||||
} break;
|
} break;
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *parent16 = static_cast<Node16 *>(parent);
|
auto *parent16 = static_cast<Node16 *>(parent);
|
||||||
int nodeIndex = getNodeIndex(parent16, parentsIndex);
|
int nodeIndex = getNodeIndex(parent16, parentsIndex);
|
||||||
assert(nodeIndex >= 0);
|
assert(nodeIndex >= 0);
|
||||||
memmove(parent16->index + nodeIndex, parent16->index + nodeIndex + 1,
|
|
||||||
sizeof(parent16->index[0]) *
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
memmove(parent16->children + nodeIndex, parent16->children + nodeIndex + 1,
|
|
||||||
sizeof(parent16->children[0]) * // NOLINT
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
memmove(parent16->childMaxVersion + nodeIndex,
|
|
||||||
parent16->childMaxVersion + nodeIndex + 1,
|
|
||||||
sizeof(parent16->childMaxVersion[0]) *
|
|
||||||
(parent->numChildren - (nodeIndex + 1)));
|
|
||||||
|
|
||||||
--parent->numChildren;
|
--parent->numChildren;
|
||||||
|
for (int i = nodeIndex; i < parent->numChildren; ++i) {
|
||||||
|
parent16->index[i] = parent16->index[i + 1];
|
||||||
|
parent16->children[i] = parent16->children[i + 1];
|
||||||
|
parent16->childMaxVersion[i] = parent16->childMaxVersion[i + 1];
|
||||||
|
}
|
||||||
|
|
||||||
// By kMinChildrenNode16
|
// By kMinChildrenNode16
|
||||||
assert(parent->numChildren > 0);
|
assert(parent->numChildren > 0);
|
||||||
@@ -1608,7 +1555,7 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
|||||||
parent48->index[parentIndex] = toRemoveChildrenIndex;
|
parent48->index[parentIndex] = toRemoveChildrenIndex;
|
||||||
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
|
parent48->reverseIndex[toRemoveChildrenIndex] = parentIndex;
|
||||||
}
|
}
|
||||||
parent48->childMaxVersion[lastChildrenIndex] = InternalVersionT::zero;
|
parent48->childMaxVersion[lastChildrenIndex] = tls->zero;
|
||||||
|
|
||||||
--parent->numChildren;
|
--parent->numChildren;
|
||||||
|
|
||||||
@@ -1726,15 +1673,38 @@ int firstNeqStride(const uint8_t *ap, const uint8_t *bp) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#if defined(__x86_64__) && !defined(__SANITIZE_THREAD__)
|
||||||
|
__attribute__((target("avx512bw"))) int
|
||||||
|
longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||||
|
int i = 0;
|
||||||
|
int end = cl & ~63;
|
||||||
|
while (i < end) {
|
||||||
|
const uint64_t eq =
|
||||||
|
_mm512_cmpeq_epi8_mask(_mm512_loadu_epi8(ap), _mm512_loadu_epi8(bp));
|
||||||
|
if (eq != uint64_t(-1)) {
|
||||||
|
return i + std::countr_one(eq);
|
||||||
|
}
|
||||||
|
i += 64;
|
||||||
|
ap += 64;
|
||||||
|
bp += 64;
|
||||||
|
}
|
||||||
|
if (i < cl) {
|
||||||
|
const uint64_t mask = (uint64_t(1) << (cl - i)) - 1;
|
||||||
|
const uint64_t eq = _mm512_cmpeq_epi8_mask(
|
||||||
|
_mm512_maskz_loadu_epi8(mask, ap), _mm512_maskz_loadu_epi8(mask, bp));
|
||||||
|
return i + std::countr_one(eq & mask);
|
||||||
|
}
|
||||||
|
assert(i == cl);
|
||||||
|
return i;
|
||||||
|
}
|
||||||
|
__attribute__((target("default")))
|
||||||
|
#endif
|
||||||
|
|
||||||
int longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
int longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
||||||
assume(cl >= 0);
|
assume(cl >= 0);
|
||||||
int i = 0;
|
int i = 0;
|
||||||
int end;
|
int end;
|
||||||
|
|
||||||
if (cl < 8) {
|
|
||||||
goto bytes;
|
|
||||||
}
|
|
||||||
|
|
||||||
// kStride * kUnrollCount at a time
|
// kStride * kUnrollCount at a time
|
||||||
end = cl & ~(kStride * kUnrollFactor - 1);
|
end = cl & ~(kStride * kUnrollFactor - 1);
|
||||||
while (i < end) {
|
while (i < end) {
|
||||||
@@ -1775,7 +1745,6 @@ int longestCommonPrefix(const uint8_t *ap, const uint8_t *bp, int cl) {
|
|||||||
bp += 8;
|
bp += 8;
|
||||||
}
|
}
|
||||||
|
|
||||||
bytes:
|
|
||||||
// byte at a time
|
// byte at a time
|
||||||
while (i < cl) {
|
while (i < cl) {
|
||||||
if (*ap != *bp) {
|
if (*ap != *bp) {
|
||||||
@@ -1808,10 +1777,13 @@ struct SearchStepWise {
|
|||||||
if (child == nullptr) {
|
if (child == nullptr) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
int cl = std::min<int>(child->partialKeyLen, remaining.size() - 1);
|
if (child->partialKeyLen > 0) {
|
||||||
int i = longestCommonPrefix(child->partialKey(), remaining.data() + 1, cl);
|
int cl = std::min<int>(child->partialKeyLen, remaining.size() - 1);
|
||||||
if (i != child->partialKeyLen) {
|
int i =
|
||||||
return true;
|
longestCommonPrefix(child->partialKey(), remaining.data() + 1, cl);
|
||||||
|
if (i != child->partialKeyLen) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
n = child;
|
n = child;
|
||||||
remaining =
|
remaining =
|
||||||
@@ -2949,8 +2921,7 @@ template <bool kBegin>
|
|||||||
child->partialKeyLen = 0;
|
child->partialKeyLen = 0;
|
||||||
child->parent = *self;
|
child->parent = *self;
|
||||||
child->parentsIndex = key.front();
|
child->parentsIndex = key.front();
|
||||||
setMaxVersion(child, impl,
|
setMaxVersion(child, impl, kBegin ? writeVersion : tls->zero);
|
||||||
kBegin ? writeVersion : InternalVersionT::zero);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
self = &child;
|
self = &child;
|
||||||
@@ -2998,8 +2969,7 @@ void addPointWrite(Node *&root, std::span<const uint8_t> key,
|
|||||||
n->entry.pointVersion = writeVersion;
|
n->entry.pointVersion = writeVersion;
|
||||||
setMaxVersion(n, impl, writeVersion);
|
setMaxVersion(n, impl, writeVersion);
|
||||||
n->entry.rangeVersion =
|
n->entry.rangeVersion =
|
||||||
p == nullptr ? InternalVersionT::zero
|
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
|
||||||
} else {
|
} else {
|
||||||
assert(writeVersion >= n->entry.pointVersion);
|
assert(writeVersion >= n->entry.pointVersion);
|
||||||
n->entry.pointVersion = writeVersion;
|
n->entry.pointVersion = writeVersion;
|
||||||
@@ -3063,8 +3033,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
|||||||
++tls->accum.entries_inserted;
|
++tls->accum.entries_inserted;
|
||||||
auto *p = nextLogical(beginNode);
|
auto *p = nextLogical(beginNode);
|
||||||
beginNode->entry.rangeVersion =
|
beginNode->entry.rangeVersion =
|
||||||
p == nullptr ? InternalVersionT::zero
|
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
|
||||||
beginNode->entry.pointVersion = writeVersion;
|
beginNode->entry.pointVersion = writeVersion;
|
||||||
assert(maxVersion(beginNode, impl) <= writeVersion);
|
assert(maxVersion(beginNode, impl) <= writeVersion);
|
||||||
setMaxVersion(beginNode, impl, writeVersion);
|
setMaxVersion(beginNode, impl, writeVersion);
|
||||||
@@ -3084,8 +3053,7 @@ void addWriteRange(Node *&root, std::span<const uint8_t> begin,
|
|||||||
++tls->accum.entries_inserted;
|
++tls->accum.entries_inserted;
|
||||||
auto *p = nextLogical(endNode);
|
auto *p = nextLogical(endNode);
|
||||||
endNode->entry.pointVersion =
|
endNode->entry.pointVersion =
|
||||||
p == nullptr ? InternalVersionT::zero
|
p == nullptr ? tls->zero : std::max(p->entry.rangeVersion, tls->zero);
|
||||||
: std::max(p->entry.rangeVersion, InternalVersionT::zero);
|
|
||||||
auto m = maxVersion(endNode, impl);
|
auto m = maxVersion(endNode, impl);
|
||||||
setMaxVersion(endNode, impl,
|
setMaxVersion(endNode, impl,
|
||||||
std::max<InternalVersionT>(m, endNode->entry.pointVersion));
|
std::max<InternalVersionT>(m, endNode->entry.pointVersion));
|
||||||
@@ -3162,9 +3130,6 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
void check(const ReadRange *reads, Result *result, int count) {
|
void check(const ReadRange *reads, Result *result, int count) {
|
||||||
ReadContext tls;
|
ReadContext tls;
|
||||||
tls.impl = this;
|
tls.impl = this;
|
||||||
int commits_accum = 0;
|
|
||||||
int conflicts_accum = 0;
|
|
||||||
int too_olds_accum = 0;
|
|
||||||
int64_t check_byte_accum = 0;
|
int64_t check_byte_accum = 0;
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
const auto &r = reads[i];
|
const auto &r = reads[i];
|
||||||
@@ -3182,9 +3147,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
InternalVersionT(reads[i].readVersion), &tls))
|
InternalVersionT(reads[i].readVersion), &tls))
|
||||||
? Commit
|
? Commit
|
||||||
: Conflict;
|
: Conflict;
|
||||||
commits_accum += result[i] == Commit;
|
tls.commits_accum += result[i] == Commit;
|
||||||
conflicts_accum += result[i] == Conflict;
|
tls.conflicts_accum += result[i] == Conflict;
|
||||||
too_olds_accum += result[i] == TooOld;
|
tls.too_olds_accum += result[i] == TooOld;
|
||||||
}
|
}
|
||||||
point_read_total.add(tls.point_read_accum);
|
point_read_total.add(tls.point_read_accum);
|
||||||
prefix_read_total.add(tls.prefix_read_accum);
|
prefix_read_total.add(tls.prefix_read_accum);
|
||||||
@@ -3196,9 +3161,9 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
point_read_iterations_total.add(tls.point_read_iterations_accum);
|
point_read_iterations_total.add(tls.point_read_iterations_accum);
|
||||||
prefix_read_iterations_total.add(tls.prefix_read_iterations_accum);
|
prefix_read_iterations_total.add(tls.prefix_read_iterations_accum);
|
||||||
range_read_iterations_total.add(tls.range_read_iterations_accum);
|
range_read_iterations_total.add(tls.range_read_iterations_accum);
|
||||||
commits_total.add(commits_accum);
|
commits_total.add(tls.commits_accum);
|
||||||
conflicts_total.add(conflicts_accum);
|
conflicts_total.add(tls.conflicts_accum);
|
||||||
too_olds_total.add(too_olds_accum);
|
too_olds_total.add(tls.too_olds_accum);
|
||||||
check_bytes_total.add(check_byte_accum);
|
check_bytes_total.add(check_byte_accum);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3206,7 +3171,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
// There could be other conflict sets in the same thread. We need
|
// There could be other conflict sets in the same thread. We need
|
||||||
// InternalVersionT::zero to be correct for this conflict set for the
|
// InternalVersionT::zero to be correct for this conflict set for the
|
||||||
// lifetime of the current call frame.
|
// lifetime of the current call frame.
|
||||||
InternalVersionT::zero = oldestVersion;
|
InternalVersionT::zero = tls.zero = oldestVersion;
|
||||||
|
|
||||||
assert(writeVersion >= newestVersionFullPrecision);
|
assert(writeVersion >= newestVersionFullPrecision);
|
||||||
|
|
||||||
@@ -3317,7 +3282,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
InternalVersionT oldestVersion{o};
|
InternalVersionT oldestVersion{o};
|
||||||
this->oldestVersionFullPrecision = o;
|
this->oldestVersionFullPrecision = o;
|
||||||
this->oldestVersion = oldestVersion;
|
this->oldestVersion = oldestVersion;
|
||||||
InternalVersionT::zero = oldestVersion;
|
InternalVersionT::zero = tls.zero = oldestVersion;
|
||||||
#ifdef NDEBUG
|
#ifdef NDEBUG
|
||||||
// This is here for performance reasons, since we want to amortize the cost
|
// This is here for performance reasons, since we want to amortize the cost
|
||||||
// of storing the search path as a string. In tests, we want to exercise the
|
// of storing the search path as a string. In tests, we want to exercise the
|
||||||
@@ -3367,7 +3332,7 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
root->entry.pointVersion = this->oldestVersion;
|
root->entry.pointVersion = this->oldestVersion;
|
||||||
root->entry.rangeVersion = this->oldestVersion;
|
root->entry.rangeVersion = this->oldestVersion;
|
||||||
|
|
||||||
InternalVersionT::zero = this->oldestVersion;
|
InternalVersionT::zero = tls.zero = this->oldestVersion;
|
||||||
|
|
||||||
// Intentionally not resetting totalBytes
|
// Intentionally not resetting totalBytes
|
||||||
}
|
}
|
||||||
|
104
README.md
104
README.md
@@ -1,86 +1,38 @@
|
|||||||
A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys.
|
A data structure for optimistic concurrency control on ranges of bitwise-lexicographically-ordered keys.
|
||||||
|
|
||||||
Intended to replace FoundationDB's skip list.
|
Intended as an alternative to FoundationDB's skip list.
|
||||||
|
|
||||||
Hardware for all benchmarks is a mac m1 2020.
|
Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-34-34-89 1.35V RAM
|
||||||
|
|
||||||
# FoundationDB's benchmark
|
# Microbenchmark
|
||||||
|
|
||||||
## Skip list
|
## Skip list
|
||||||
|
|
||||||
```
|
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||||
New conflict set: 1.957 sec
|
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||||
0.639 Mtransactions/sec
|
| 172.03 | 5,812,791.77 | 0.4% | 3,130.62 | 879.00 | 3.562 | 509.23 | 0.0% | 0.01 | `point reads`
|
||||||
2.555 Mkeys/sec
|
| 167.44 | 5,972,130.71 | 0.2% | 3,065.14 | 862.27 | 3.555 | 494.30 | 0.0% | 0.01 | `prefix reads`
|
||||||
Detect only: 1.845 sec
|
| 238.77 | 4,188,130.84 | 0.9% | 3,589.93 | 1,259.30 | 2.851 | 637.12 | 0.0% | 0.01 | `range reads`
|
||||||
0.678 Mtransactions/sec
|
| 424.01 | 2,358,426.70 | 0.2% | 5,620.05 | 2,242.35 | 2.506 | 854.80 | 1.7% | 0.01 | `point writes`
|
||||||
2.710 Mkeys/sec
|
| 418.45 | 2,389,780.56 | 0.4% | 5,525.07 | 2,211.05 | 2.499 | 831.71 | 1.7% | 0.01 | `prefix writes`
|
||||||
Skiplist only: 1.263 sec
|
| 254.87 | 3,923,568.88 | 2.6% | 3,187.01 | 1,366.50 | 2.332 | 529.11 | 2.7% | 0.02 | `range writes`
|
||||||
0.990 Mtransactions/sec
|
| 675.96 | 1,479,374.50 | 3.3% | 7,735.41 | 3,468.60 | 2.230 | 1,386.02 | 1.8% | 0.01 | `monotonic increasing point writes`
|
||||||
3.960 Mkeys/sec
|
| 137,986.20 | 7,247.10 | 0.6% | 789,752.33 | 699,462.00 | 1.129 | 144,824.14 | 0.0% | 0.01 | `worst case for radix tree`
|
||||||
Performance counters:
|
| 21.63 | 46,231,564.03 | 1.0% | 448.00 | 107.14 | 4.181 | 84.00 | 0.0% | 0.01 | `create and destroy`
|
||||||
Build: 0.0546
|
|
||||||
Add: 0.0563
|
|
||||||
Detect: 1.84
|
|
||||||
D.Sort: 0.412
|
|
||||||
D.Combine: 0.0141
|
|
||||||
D.CheckRead: 0.671
|
|
||||||
D.CheckIntraBatch: 0.0068
|
|
||||||
D.MergeWrite: 0.592
|
|
||||||
D.RemoveBefore: 0.146
|
|
||||||
```
|
|
||||||
|
|
||||||
## Radix tree (this implementation)
|
## Radix tree (this implementation)
|
||||||
|
|
||||||
```
|
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||||
New conflict set: 1.366 sec
|
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||||
0.915 Mtransactions/sec
|
| 17.03 | 58,732,967.93 | 0.6% | 276.28 | 87.96 | 3.141 | 52.15 | 0.4% | 0.01 | `point reads`
|
||||||
3.660 Mkeys/sec
|
| 19.52 | 51,239,158.04 | 0.3% | 367.16 | 101.50 | 3.617 | 61.92 | 0.3% | 0.01 | `prefix reads`
|
||||||
Detect only: 1.248 sec
|
| 47.74 | 20,947,676.63 | 0.5% | 998.16 | 247.43 | 4.034 | 161.64 | 0.2% | 0.01 | `range reads`
|
||||||
1.002 Mtransactions/sec
|
| 23.14 | 43,207,824.89 | 0.4% | 408.18 | 121.64 | 3.356 | 70.20 | 0.3% | 0.01 | `point writes`
|
||||||
4.007 Mkeys/sec
|
| 38.02 | 26,302,115.66 | 0.1% | 709.72 | 199.70 | 3.554 | 134.26 | 0.3% | 0.01 | `prefix writes`
|
||||||
Skiplist only: 0.573 sec
|
| 44.28 | 22,583,559.17 | 0.9% | 825.19 | 233.10 | 3.540 | 141.48 | 0.2% | 0.01 | `range writes`
|
||||||
2.182 Mtransactions/sec
|
| 85.50 | 11,695,990.63 | 0.5% | 1,488.16 | 455.68 | 3.266 | 289.22 | 0.1% | 0.01 | `monotonic increasing point writes`
|
||||||
8.730 Mkeys/sec
|
| 338,388.50 | 2,955.18 | 3.3% | 4,097,087.00 | 1,809,996.00 | 2.264 | 759,645.00 | 0.0% | 0.01 | `worst case for radix tree`
|
||||||
Performance counters:
|
| 84.84 | 11,787,313.59 | 1.4% | 1,716.02 | 440.50 | 3.896 | 271.00 | 0.0% | 0.01 | `create and destroy`
|
||||||
Build: 0.0594
|
|
||||||
Add: 0.0572
|
|
||||||
Detect: 1.25
|
|
||||||
D.Sort: 0.418
|
|
||||||
D.Combine: 0.0149
|
|
||||||
D.CheckRead: 0.232
|
|
||||||
D.CheckIntraBatch: 0.0067
|
|
||||||
D.MergeWrite: 0.341
|
|
||||||
D.RemoveBefore: 0.232
|
|
||||||
```
|
|
||||||
|
|
||||||
# Our benchmark
|
|
||||||
|
|
||||||
## Skip list
|
|
||||||
|
|
||||||
| ns/op | op/s | err% | total | benchmark
|
|
||||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
|
||||||
| 245.99 | 4,065,232.81 | 0.3% | 0.01 | `point reads`
|
|
||||||
| 265.93 | 3,760,430.49 | 0.2% | 0.01 | `prefix reads`
|
|
||||||
| 485.30 | 2,060,569.50 | 0.2% | 0.01 | `range reads`
|
|
||||||
| 449.60 | 2,224,195.17 | 0.4% | 0.01 | `point writes`
|
|
||||||
| 441.76 | 2,263,688.18 | 1.1% | 0.01 | `prefix writes`
|
|
||||||
| 245.42 | 4,074,647.54 | 2.4% | 0.02 | `range writes`
|
|
||||||
| 572.80 | 1,745,810.06 | 1.3% | 0.01 | `monotonic increasing point writes`
|
|
||||||
| 154,819.33 | 6,459.14 | 0.9% | 0.01 | `worst case for radix tree`
|
|
||||||
|
|
||||||
## Radix tree (this implementation)
|
|
||||||
|
|
||||||
| ns/op | op/s | err% | total | benchmark
|
|
||||||
|--------------------:|--------------------:|--------:|----------:|:----------
|
|
||||||
| 20.25 | 49,372,759.86 | 0.3% | 0.01 | `point reads`
|
|
||||||
| 23.58 | 42,401,298.00 | 0.3% | 0.01 | `prefix reads`
|
|
||||||
| 64.12 | 15,595,463.14 | 0.8% | 0.01 | `range reads`
|
|
||||||
| 29.50 | 33,903,101.20 | 0.7% | 0.01 | `point writes`
|
|
||||||
| 46.76 | 21,384,036.19 | 1.2% | 0.01 | `prefix writes`
|
|
||||||
| 51.25 | 19,512,195.12 | 0.0% | 0.01 | `range writes`
|
|
||||||
| 109.51 | 9,131,469.31 | 3.6% | 0.01 | `monotonic increasing point writes`
|
|
||||||
| 1,153,875.00 | 866.65 | 1.6% | 0.01 | `worst case for radix tree`
|
|
||||||
|
|
||||||
# "Real data" test
|
# "Real data" test
|
||||||
|
|
||||||
@@ -89,13 +41,13 @@ Point queries only, best of three runs. Gc ratio is the ratio of time spent doin
|
|||||||
## skip list
|
## skip list
|
||||||
|
|
||||||
```
|
```
|
||||||
Check: 11.3385 seconds, 329.718 MB/s, Add: 5.35612 seconds, 131.072 MB/s, Gc ratio: 45.7173%
|
Check: 4.47891 seconds, 364.05 MB/s, Add: 4.55599 seconds, 123.058 MB/s, Gc ratio: 37.1145%
|
||||||
```
|
```
|
||||||
|
|
||||||
## radix tree
|
## radix tree
|
||||||
|
|
||||||
```
|
```
|
||||||
Check: 2.60639 seconds, 1434.36 MB/s, Add: 2.10911 seconds, 332.86 MB/s, Gc ratio: 46.3071%
|
Check: 1.05813 seconds, 1540.97 MB/s, Add: 1.32071 seconds, 424.508 MB/s, Gc ratio: 42.2067%
|
||||||
```
|
```
|
||||||
|
|
||||||
## hash table
|
## hash table
|
||||||
@@ -103,5 +55,5 @@ Check: 2.60639 seconds, 1434.36 MB/s, Add: 2.10911 seconds, 332.86 MB/s, Gc rati
|
|||||||
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
|
(The hash table implementation doesn't work on range queries, and its purpose is to provide an idea of how fast point queries can be)
|
||||||
|
|
||||||
```
|
```
|
||||||
Check: 1.83386 seconds, 2038.6 MB/s, Add: 0.601411 seconds, 1167.32 MB/s, Gc ratio: 48.9776%
|
Check: 0.804094 seconds, 2027.81 MB/s, Add: 0.652952 seconds, 858.645 MB/s, Gc ratio: 35.3885%
|
||||||
```
|
```
|
||||||
|
Reference in New Issue
Block a user