Compare commits
11 Commits
8e1e344f4b
...
v0.0.12
Author | SHA1 | Date | |
---|---|---|---|
311794c37e | |||
dfa178ba19 | |||
a16d18edfe | |||
2b60287448 | |||
0a9ac59676 | |||
e3a77ed773 | |||
cdf9a8a7b0 | |||
305dfdd52f | |||
7261c91492 | |||
f11720f5ae | |||
e2b7298af5 |
@@ -276,9 +276,15 @@ if(CMAKE_SOURCE_DIR STREQUAL CMAKE_CURRENT_SOURCE_DIR AND BUILD_TESTING)
|
|||||||
|
|
||||||
find_program(VALGRIND_EXE valgrind)
|
find_program(VALGRIND_EXE valgrind)
|
||||||
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
if(VALGRIND_EXE AND NOT CMAKE_CROSSCOMPILING)
|
||||||
add_test(NAME conflict_set_blackbox_valgrind
|
list(LENGTH CORPUS_TESTS len)
|
||||||
|
math(EXPR last "${len} - 1")
|
||||||
|
set(partition_size 100)
|
||||||
|
foreach(i RANGE 0 ${last} ${partition_size})
|
||||||
|
list(SUBLIST CORPUS_TESTS ${i} ${partition_size} partition)
|
||||||
|
add_test(NAME conflict_set_blackbox_valgrind_${i}
|
||||||
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
COMMAND ${VALGRIND_EXE} --error-exitcode=99 --
|
||||||
$<TARGET_FILE:driver> ${CORPUS_TESTS})
|
$<TARGET_FILE:driver> ${partition})
|
||||||
|
endforeach()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
# api smoke tests
|
# api smoke tests
|
||||||
|
118
ConflictSet.cpp
118
ConflictSet.cpp
@@ -197,7 +197,6 @@ struct Node {
|
|||||||
/* end section that's copied to the next node */
|
/* end section that's copied to the next node */
|
||||||
|
|
||||||
uint8_t *partialKey();
|
uint8_t *partialKey();
|
||||||
|
|
||||||
Type getType() const { return type; }
|
Type getType() const { return type; }
|
||||||
int32_t getCapacity() const { return partialKeyCapacity; }
|
int32_t getCapacity() const { return partialKeyCapacity; }
|
||||||
|
|
||||||
@@ -221,84 +220,83 @@ constexpr int kNodeCopySize =
|
|||||||
|
|
||||||
struct Node0 : Node {
|
struct Node0 : Node {
|
||||||
constexpr static auto kType = Type_Node0;
|
constexpr static auto kType = Type_Node0;
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
|
||||||
|
|
||||||
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
void copyChildrenAndKeyFrom(const Node0 &other);
|
void copyChildrenAndKeyFrom(const Node0 &other);
|
||||||
void copyChildrenAndKeyFrom(const struct Node3 &other);
|
void copyChildrenAndKeyFrom(const struct Node3 &other);
|
||||||
|
|
||||||
size_t size() const { return sizeof(Node0) + getCapacity(); }
|
size_t size() const { return sizeof(Node0) + getCapacity(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node3 : Node {
|
struct Node3 : Node {
|
||||||
constexpr static auto kMaxNodes = 3;
|
constexpr static auto kMaxNodes = 3;
|
||||||
constexpr static auto kType = Type_Node3;
|
constexpr static auto kType = Type_Node3;
|
||||||
// Sorted
|
|
||||||
uint8_t index[kMaxNodes];
|
|
||||||
Node *children[kMaxNodes];
|
Node *children[kMaxNodes];
|
||||||
InternalVersionT childMaxVersion[kMaxNodes];
|
InternalVersionT childMaxVersion[kMaxNodes];
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
// Sorted
|
||||||
|
uint8_t index[kMaxNodes];
|
||||||
|
|
||||||
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
void copyChildrenAndKeyFrom(const Node0 &other);
|
void copyChildrenAndKeyFrom(const Node0 &other);
|
||||||
void copyChildrenAndKeyFrom(const Node3 &other);
|
void copyChildrenAndKeyFrom(const Node3 &other);
|
||||||
void copyChildrenAndKeyFrom(const struct Node16 &other);
|
void copyChildrenAndKeyFrom(const struct Node16 &other);
|
||||||
|
|
||||||
size_t size() const { return sizeof(Node3) + getCapacity(); }
|
size_t size() const { return sizeof(Node3) + getCapacity(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node16 : Node {
|
struct Node16 : Node {
|
||||||
constexpr static auto kType = Type_Node16;
|
constexpr static auto kType = Type_Node16;
|
||||||
constexpr static auto kMaxNodes = 16;
|
constexpr static auto kMaxNodes = 16;
|
||||||
// Sorted
|
|
||||||
uint8_t index[kMaxNodes];
|
|
||||||
Node *children[kMaxNodes];
|
Node *children[kMaxNodes];
|
||||||
InternalVersionT childMaxVersion[kMaxNodes];
|
InternalVersionT childMaxVersion[kMaxNodes];
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
// Sorted
|
||||||
|
uint8_t index[kMaxNodes];
|
||||||
|
|
||||||
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
void copyChildrenAndKeyFrom(const Node3 &other);
|
void copyChildrenAndKeyFrom(const Node3 &other);
|
||||||
void copyChildrenAndKeyFrom(const Node16 &other);
|
void copyChildrenAndKeyFrom(const Node16 &other);
|
||||||
void copyChildrenAndKeyFrom(const struct Node48 &other);
|
void copyChildrenAndKeyFrom(const struct Node48 &other);
|
||||||
|
|
||||||
size_t size() const { return sizeof(Node16) + getCapacity(); }
|
size_t size() const { return sizeof(Node16) + getCapacity(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node48 : Node {
|
struct Node48 : Node {
|
||||||
constexpr static auto kType = Type_Node48;
|
constexpr static auto kType = Type_Node48;
|
||||||
constexpr static auto kMaxNodes = 48;
|
constexpr static auto kMaxNodes = 48;
|
||||||
BitSet bitSet;
|
|
||||||
int8_t nextFree;
|
|
||||||
int8_t index[256];
|
|
||||||
Node *children[kMaxNodes];
|
|
||||||
InternalVersionT childMaxVersion[kMaxNodes];
|
|
||||||
uint8_t reverseIndex[kMaxNodes];
|
|
||||||
constexpr static int kMaxOfMaxPageSize = 16;
|
constexpr static int kMaxOfMaxPageSize = 16;
|
||||||
constexpr static int kMaxOfMaxShift =
|
constexpr static int kMaxOfMaxShift =
|
||||||
std::countr_zero(uint32_t(kMaxOfMaxPageSize));
|
std::countr_zero(uint32_t(kMaxOfMaxPageSize));
|
||||||
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||||
|
|
||||||
|
BitSet bitSet;
|
||||||
|
Node *children[kMaxNodes];
|
||||||
|
InternalVersionT childMaxVersion[kMaxNodes];
|
||||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||||
|
uint8_t reverseIndex[kMaxNodes];
|
||||||
|
int8_t index[256];
|
||||||
|
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
|
|
||||||
void copyChildrenAndKeyFrom(const Node16 &other);
|
void copyChildrenAndKeyFrom(const Node16 &other);
|
||||||
void copyChildrenAndKeyFrom(const Node48 &other);
|
void copyChildrenAndKeyFrom(const Node48 &other);
|
||||||
void copyChildrenAndKeyFrom(const struct Node256 &other);
|
void copyChildrenAndKeyFrom(const struct Node256 &other);
|
||||||
|
|
||||||
size_t size() const { return sizeof(Node48) + getCapacity(); }
|
size_t size() const { return sizeof(Node48) + getCapacity(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
struct Node256 : Node {
|
struct Node256 : Node {
|
||||||
constexpr static auto kType = Type_Node256;
|
constexpr static auto kType = Type_Node256;
|
||||||
BitSet bitSet;
|
constexpr static auto kMaxNodes = 256;
|
||||||
Node *children[256];
|
|
||||||
InternalVersionT childMaxVersion[256];
|
|
||||||
constexpr static int kMaxOfMaxPageSize = 16;
|
constexpr static int kMaxOfMaxPageSize = 16;
|
||||||
constexpr static int kMaxOfMaxShift =
|
constexpr static int kMaxOfMaxShift =
|
||||||
std::countr_zero(uint32_t(kMaxOfMaxPageSize));
|
std::countr_zero(uint32_t(kMaxOfMaxPageSize));
|
||||||
constexpr static int kMaxOfMaxTotalPages = 256 / kMaxOfMaxPageSize;
|
constexpr static int kMaxOfMaxTotalPages = kMaxNodes / kMaxOfMaxPageSize;
|
||||||
|
|
||||||
|
BitSet bitSet;
|
||||||
|
Node *children[kMaxNodes];
|
||||||
|
InternalVersionT childMaxVersion[kMaxNodes];
|
||||||
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
InternalVersionT maxOfMax[kMaxOfMaxTotalPages];
|
||||||
|
|
||||||
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
uint8_t *partialKey() { return (uint8_t *)(this + 1); }
|
||||||
void copyChildrenAndKeyFrom(const Node48 &other);
|
void copyChildrenAndKeyFrom(const Node48 &other);
|
||||||
void copyChildrenAndKeyFrom(const Node256 &other);
|
void copyChildrenAndKeyFrom(const Node256 &other);
|
||||||
|
|
||||||
size_t size() const { return sizeof(Node256) + getCapacity(); }
|
size_t size() const { return sizeof(Node256) + getCapacity(); }
|
||||||
};
|
};
|
||||||
|
|
||||||
@@ -323,7 +321,7 @@ inline void Node3::copyChildrenAndKeyFrom(const Node0 &other) {
|
|||||||
inline void Node3::copyChildrenAndKeyFrom(const Node3 &other) {
|
inline void Node3::copyChildrenAndKeyFrom(const Node3 &other) {
|
||||||
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
memcpy(index, other.index, sizeof(*this) - sizeof(Node));
|
memcpy(children, other.children, sizeof(*this) - sizeof(Node));
|
||||||
memcpy(partialKey(), &other + 1, partialKeyLen);
|
memcpy(partialKey(), &other + 1, partialKeyLen);
|
||||||
for (int i = 0; i < numChildren; ++i) {
|
for (int i = 0; i < numChildren; ++i) {
|
||||||
assert(children[i]->parent == &other);
|
assert(children[i]->parent == &other);
|
||||||
@@ -404,7 +402,6 @@ inline void Node48::copyChildrenAndKeyFrom(const Node16 &other) {
|
|||||||
}
|
}
|
||||||
memcpy(partialKey(), &other + 1, partialKeyLen);
|
memcpy(partialKey(), &other + 1, partialKeyLen);
|
||||||
bitSet.init();
|
bitSet.init();
|
||||||
nextFree = Node16::kMaxNodes;
|
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (auto x : other.index) {
|
for (auto x : other.index) {
|
||||||
bitSet.set(x);
|
bitSet.set(x);
|
||||||
@@ -424,7 +421,6 @@ inline void Node48::copyChildrenAndKeyFrom(const Node48 &other) {
|
|||||||
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
memcpy((char *)this + kNodeCopyBegin, (char *)&other + kNodeCopyBegin,
|
||||||
kNodeCopySize);
|
kNodeCopySize);
|
||||||
bitSet = other.bitSet;
|
bitSet = other.bitSet;
|
||||||
nextFree = other.nextFree;
|
|
||||||
memcpy(index, other.index, sizeof(index));
|
memcpy(index, other.index, sizeof(index));
|
||||||
memset(children, 0, sizeof(children));
|
memset(children, 0, sizeof(children));
|
||||||
const auto z = InternalVersionT::zero;
|
const auto z = InternalVersionT::zero;
|
||||||
@@ -451,7 +447,6 @@ inline void Node48::copyChildrenAndKeyFrom(const Node256 &other) {
|
|||||||
for (auto &v : childMaxVersion) {
|
for (auto &v : childMaxVersion) {
|
||||||
v = z;
|
v = z;
|
||||||
}
|
}
|
||||||
nextFree = other.numChildren;
|
|
||||||
bitSet = other.bitSet;
|
bitSet = other.bitSet;
|
||||||
int i = 0;
|
int i = 0;
|
||||||
bitSet.forEachSet([&](int c) {
|
bitSet.forEachSet([&](int c) {
|
||||||
@@ -773,14 +768,7 @@ private:
|
|||||||
BoundedFreeListAllocator<Node256> node256;
|
BoundedFreeListAllocator<Node256> node256;
|
||||||
};
|
};
|
||||||
|
|
||||||
template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
int getNodeIndex(Node3 *self, uint8_t index) {
|
||||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
|
||||||
|
|
||||||
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
|
||||||
// branches. Microbenchmark says plain loop is faster. It's written in this
|
|
||||||
// weird "generic" way though in case someday we can use the simd
|
|
||||||
// implementation easily if we want.
|
|
||||||
if constexpr (std::is_same_v<NodeT, Node3>) {
|
|
||||||
Node3 *n = (Node3 *)self;
|
Node3 *n = (Node3 *)self;
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
if (n->index[i] == index) {
|
if (n->index[i] == index) {
|
||||||
@@ -790,6 +778,8 @@ template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
|||||||
return -1;
|
return -1;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
int getNodeIndex(Node16 *self, uint8_t index) {
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
// Based on https://www.the-paper-trail.org/post/art-paper-notes/
|
||||||
|
|
||||||
@@ -801,7 +791,7 @@ template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
|||||||
// keys aren't valid, we'll mask the results to only consider the valid ones
|
// keys aren't valid, we'll mask the results to only consider the valid ones
|
||||||
// below.
|
// below.
|
||||||
__m128i indices;
|
__m128i indices;
|
||||||
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
__m128i results = _mm_cmpeq_epi8(key_vec, indices);
|
||||||
|
|
||||||
// Build a mask to select only the first node->num_children values from the
|
// Build a mask to select only the first node->num_children values from the
|
||||||
@@ -824,12 +814,11 @@ template <class NodeT> int getNodeIndex(NodeT *self, uint8_t index) {
|
|||||||
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
// https://community.arm.com/arm-community-blogs/b/infrastructure-solutions-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
|
||||||
|
|
||||||
uint8x16_t indices;
|
uint8x16_t indices;
|
||||||
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||||
// 0xff for each match
|
// 0xff for each match
|
||||||
uint16x8_t results =
|
uint16x8_t results =
|
||||||
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
vreinterpretq_u16_u8(vceqq_u8(vdupq_n_u8(index), indices));
|
||||||
static_assert(NodeT::kMaxNodes <= 16);
|
assume(self->numChildren <= Node16::kMaxNodes);
|
||||||
assume(self->numChildren <= NodeT::kMaxNodes);
|
|
||||||
uint64_t mask = self->numChildren == 16
|
uint64_t mask = self->numChildren == 16
|
||||||
? uint64_t(-1)
|
? uint64_t(-1)
|
||||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||||
@@ -1082,15 +1071,9 @@ ChildAndMaxVersion getChildAndMaxVersion(Node *self, uint8_t index) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
template <class NodeT> Node *getChildGeqSimd(NodeT *self, int child) {
|
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
||||||
static_assert(std::is_same_v<NodeT, Node3> || std::is_same_v<NodeT, Node16>);
|
|
||||||
|
|
||||||
// cachegrind says the plain loop is fewer instructions and more mis-predicted
|
Node *getChildGeq(Node3 *n, int child) {
|
||||||
// branches. Microbenchmark says plain loop is faster. It's written in this
|
|
||||||
// weird "generic" way though so that someday we can use the simd
|
|
||||||
// implementation easily if we want.
|
|
||||||
if constexpr (std::is_same_v<NodeT, Node3>) {
|
|
||||||
Node3 *n = (Node3 *)self;
|
|
||||||
for (int i = 0; i < n->numChildren; ++i) {
|
for (int i = 0; i < n->numChildren; ++i) {
|
||||||
if (n->index[i] >= child) {
|
if (n->index[i] >= child) {
|
||||||
return n->children[i];
|
return n->children[i];
|
||||||
@@ -1098,6 +1081,8 @@ template <class NodeT> Node *getChildGeqSimd(NodeT *self, int child) {
|
|||||||
}
|
}
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
Node *getChildGeq(Node16 *self, int child) {
|
||||||
if (child > 255) {
|
if (child > 255) {
|
||||||
return nullptr;
|
return nullptr;
|
||||||
}
|
}
|
||||||
@@ -1105,7 +1090,7 @@ template <class NodeT> Node *getChildGeqSimd(NodeT *self, int child) {
|
|||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
__m128i key_vec = _mm_set1_epi8(child);
|
__m128i key_vec = _mm_set1_epi8(child);
|
||||||
__m128i indices;
|
__m128i indices;
|
||||||
memcpy(&indices, self->index, NodeT::kMaxNodes);
|
memcpy(&indices, self->index, Node16::kMaxNodes);
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
int mask = (1 << self->numChildren) - 1;
|
int mask = (1 << self->numChildren) - 1;
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
@@ -1115,8 +1100,7 @@ template <class NodeT> Node *getChildGeqSimd(NodeT *self, int child) {
|
|||||||
memcpy(&indices, self->index, sizeof(self->index));
|
memcpy(&indices, self->index, sizeof(self->index));
|
||||||
// 0xff for each leq
|
// 0xff for each leq
|
||||||
auto results = vcleq_u8(vdupq_n_u8(child), indices);
|
auto results = vcleq_u8(vdupq_n_u8(child), indices);
|
||||||
static_assert(NodeT::kMaxNodes <= 16);
|
assume(self->numChildren <= Node16::kMaxNodes);
|
||||||
assume(self->numChildren <= NodeT::kMaxNodes);
|
|
||||||
uint64_t mask = self->numChildren == 16
|
uint64_t mask = self->numChildren == 16
|
||||||
? uint64_t(-1)
|
? uint64_t(-1)
|
||||||
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
: (uint64_t(1) << (self->numChildren * 4)) - 1;
|
||||||
@@ -1141,13 +1125,6 @@ template <class NodeT> Node *getChildGeqSimd(NodeT *self, int child) {
|
|||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
Node *getChildGeq(Node0 *, int) { return nullptr; }
|
|
||||||
Node *getChildGeq(Node3 *self, int child) {
|
|
||||||
return getChildGeqSimd(self, child);
|
|
||||||
}
|
|
||||||
Node *getChildGeq(Node16 *self, int child) {
|
|
||||||
return getChildGeqSimd(self, child);
|
|
||||||
}
|
|
||||||
Node *getChildGeq(Node48 *self, int child) {
|
Node *getChildGeq(Node48 *self, int child) {
|
||||||
int c = self->bitSet.firstSetGeq(child);
|
int c = self->bitSet.firstSetGeq(child);
|
||||||
if (c < 0) {
|
if (c < 0) {
|
||||||
@@ -1360,7 +1337,7 @@ Node *&getOrCreateChild(Node *&self, std::span<const uint8_t> &key,
|
|||||||
auto *self3 = static_cast<Node3 *>(self);
|
auto *self3 = static_cast<Node3 *>(self);
|
||||||
int i = self->numChildren - 1;
|
int i = self->numChildren - 1;
|
||||||
for (; i >= 0; --i) {
|
for (; i >= 0; --i) {
|
||||||
if (int(self3->index[i]) < int(index)) {
|
if (self3->index[i] < index) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
self3->index[i + 1] = self3->index[i];
|
self3->index[i + 1] = self3->index[i];
|
||||||
@@ -1390,7 +1367,7 @@ Node *&getOrCreateChild(Node *&self, std::span<const uint8_t> &key,
|
|||||||
auto *self16 = static_cast<Node16 *>(self);
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
int i = self->numChildren - 1;
|
int i = self->numChildren - 1;
|
||||||
for (; i >= 0; --i) {
|
for (; i >= 0; --i) {
|
||||||
if (int(self16->index[i]) < int(index)) {
|
if (self16->index[i] < index) {
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
self16->index[i + 1] = self16->index[i];
|
self16->index[i + 1] = self16->index[i];
|
||||||
@@ -1419,9 +1396,7 @@ Node *&getOrCreateChild(Node *&self, std::span<const uint8_t> &key,
|
|||||||
insert48:
|
insert48:
|
||||||
auto *self48 = static_cast<Node48 *>(self);
|
auto *self48 = static_cast<Node48 *>(self);
|
||||||
self48->bitSet.set(index);
|
self48->bitSet.set(index);
|
||||||
++self->numChildren;
|
auto nextFree = self48->numChildren++;
|
||||||
assert(self48->nextFree < 48);
|
|
||||||
int nextFree = self48->nextFree++;
|
|
||||||
self48->index[index] = nextFree;
|
self48->index[index] = nextFree;
|
||||||
self48->reverseIndex[nextFree] = index;
|
self48->reverseIndex[nextFree] = index;
|
||||||
auto &result = self48->children[nextFree];
|
auto &result = self48->children[nextFree];
|
||||||
@@ -1569,8 +1544,6 @@ void maybeDecreaseCapacity(Node *&self, WriteContext *tls,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||||
// This gets covered in local development
|
|
||||||
// GCOVR_EXCL_START
|
|
||||||
__attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
__attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
||||||
InternalVersionT zero) {
|
InternalVersionT zero) {
|
||||||
uint32_t z;
|
uint32_t z;
|
||||||
@@ -1580,7 +1553,6 @@ __attribute__((target("avx512f"))) void rezero16(InternalVersionT *vs,
|
|||||||
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
_mm512_sub_epi32(_mm512_loadu_epi32(vs), zvec), _mm512_setzero_epi32());
|
||||||
_mm512_mask_storeu_epi32(vs, m, zvec);
|
_mm512_mask_storeu_epi32(vs, m, zvec);
|
||||||
}
|
}
|
||||||
// GCOVR_EXCL_STOP
|
|
||||||
|
|
||||||
__attribute__((target("default")))
|
__attribute__((target("default")))
|
||||||
#endif
|
#endif
|
||||||
@@ -1812,7 +1784,7 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
|||||||
parent48->bitSet.reset(parentsIndex);
|
parent48->bitSet.reset(parentsIndex);
|
||||||
int8_t toRemoveChildrenIndex =
|
int8_t toRemoveChildrenIndex =
|
||||||
std::exchange(parent48->index[parentsIndex], -1);
|
std::exchange(parent48->index[parentsIndex], -1);
|
||||||
int8_t lastChildrenIndex = --parent48->nextFree;
|
auto lastChildrenIndex = --parent48->numChildren;
|
||||||
assert(toRemoveChildrenIndex >= 0);
|
assert(toRemoveChildrenIndex >= 0);
|
||||||
assert(lastChildrenIndex >= 0);
|
assert(lastChildrenIndex >= 0);
|
||||||
if (toRemoveChildrenIndex != lastChildrenIndex) {
|
if (toRemoveChildrenIndex != lastChildrenIndex) {
|
||||||
@@ -1831,8 +1803,6 @@ Node *erase(Node *self, WriteContext *tls, ConflictSet::Impl *impl,
|
|||||||
}
|
}
|
||||||
parent48->childMaxVersion[lastChildrenIndex] = tls->zero;
|
parent48->childMaxVersion[lastChildrenIndex] = tls->zero;
|
||||||
|
|
||||||
--parent->numChildren;
|
|
||||||
|
|
||||||
if (needsDownsize(parent48)) {
|
if (needsDownsize(parent48)) {
|
||||||
downsize(parent48, tls, impl, result);
|
downsize(parent48, tls, impl, result);
|
||||||
}
|
}
|
||||||
@@ -2019,9 +1989,9 @@ downLeftSpine:
|
|||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
||||||
uint32_t compared = 0;
|
uint32_t compared = 0;
|
||||||
__m128i w[4];
|
__m128i w[4]; // GCOVR_EXCL_LINE
|
||||||
memcpy(w, vs, sizeof(w));
|
memcpy(w, vs, sizeof(w));
|
||||||
uint32_t r;
|
uint32_t r; // GCOVR_EXCL_LINE
|
||||||
memcpy(&r, &rv, sizeof(r));
|
memcpy(&r, &rv, sizeof(r));
|
||||||
const auto rvVec = _mm_set1_epi32(r);
|
const auto rvVec = _mm_set1_epi32(r);
|
||||||
const auto zero = _mm_setzero_si128();
|
const auto zero = _mm_setzero_si128();
|
||||||
@@ -2033,8 +2003,6 @@ uint32_t compare16_32bit(const InternalVersionT *vs, InternalVersionT rv) {
|
|||||||
return compared;
|
return compared;
|
||||||
}
|
}
|
||||||
|
|
||||||
// This gets covered in local development
|
|
||||||
// GCOVR_EXCL_START
|
|
||||||
__attribute__((target("avx512f"))) uint32_t
|
__attribute__((target("avx512f"))) uint32_t
|
||||||
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
||||||
uint32_t r;
|
uint32_t r;
|
||||||
@@ -2044,7 +2012,6 @@ compare16_32bit_avx512(const InternalVersionT *vs, InternalVersionT rv) {
|
|||||||
_mm512_setzero_epi32());
|
_mm512_setzero_epi32());
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
// GCOVR_EXCL_STOP
|
|
||||||
|
|
||||||
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
// Returns true if v[i] <= readVersion for all i such that begin <= is[i] < end
|
||||||
// Preconditions: begin <= end, end - begin < 256
|
// Preconditions: begin <= end, end - begin < 256
|
||||||
@@ -2427,14 +2394,11 @@ bool checkMaxBetweenExclusiveImpl(Node *n, int begin, int end,
|
|||||||
}
|
}
|
||||||
|
|
||||||
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
#if defined(HAS_AVX) && !defined(__SANITIZE_THREAD__)
|
||||||
// This gets covered in local development
|
|
||||||
// GCOVR_EXCL_START
|
|
||||||
__attribute__((target("avx512f"))) bool
|
__attribute__((target("avx512f"))) bool
|
||||||
checkMaxBetweenExclusive(Node *n, int begin, int end,
|
checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||||
InternalVersionT readVersion, ReadContext *tls) {
|
InternalVersionT readVersion, ReadContext *tls) {
|
||||||
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
|
return checkMaxBetweenExclusiveImpl<true>(n, begin, end, readVersion, tls);
|
||||||
}
|
}
|
||||||
// GCOVR_EXCL_STOP
|
|
||||||
__attribute__((target("default")))
|
__attribute__((target("default")))
|
||||||
#endif
|
#endif
|
||||||
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
||||||
|
@@ -748,7 +748,10 @@ struct TestDriver {
|
|||||||
fprintf(stderr, "%p Set oldest version: %" PRId64 "\n", this,
|
fprintf(stderr, "%p Set oldest version: %" PRId64 "\n", this,
|
||||||
oldestVersion);
|
oldestVersion);
|
||||||
#endif
|
#endif
|
||||||
|
CALLGRIND_START_INSTRUMENTATION;
|
||||||
cs.setOldestVersion(oldestVersion);
|
cs.setOldestVersion(oldestVersion);
|
||||||
|
CALLGRIND_STOP_INSTRUMENTATION;
|
||||||
|
|
||||||
if constexpr (kEnableAssertions) {
|
if constexpr (kEnableAssertions) {
|
||||||
refImpl.setOldestVersion(oldestVersion);
|
refImpl.setOldestVersion(oldestVersion);
|
||||||
}
|
}
|
||||||
|
20
README.md
20
README.md
@@ -24,15 +24,15 @@ Hardware for all benchmarks is an AMD Ryzen 9 7900 with (2x32GB) 5600MT/s CL28-3
|
|||||||
|
|
||||||
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
| ns/op | op/s | err% | ins/op | cyc/op | IPC | bra/op | miss% | total | benchmark
|
||||||
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
|--------------------:|--------------------:|--------:|----------------:|----------------:|-------:|---------------:|--------:|----------:|:----------
|
||||||
| 11.04 | 90,614,308.12 | 0.8% | 180.38 | 55.13 | 3.272 | 41.51 | 0.4% | 0.01 | `point reads`
|
| 11.18 | 89,455,125.34 | 0.6% | 185.37 | 57.08 | 3.248 | 41.51 | 0.4% | 0.01 | `point reads`
|
||||||
| 14.96 | 66,843,629.12 | 0.4% | 274.41 | 74.73 | 3.672 | 55.05 | 0.3% | 0.01 | `prefix reads`
|
| 14.53 | 68,800,688.89 | 0.4% | 282.41 | 74.80 | 3.776 | 55.06 | 0.3% | 0.01 | `prefix reads`
|
||||||
| 37.06 | 26,982,847.61 | 0.2% | 791.04 | 185.28 | 4.269 | 142.67 | 0.2% | 0.01 | `range reads`
|
| 36.54 | 27,367,576.87 | 0.2% | 798.06 | 188.90 | 4.225 | 141.69 | 0.2% | 0.01 | `range reads`
|
||||||
| 17.89 | 55,887,365.73 | 0.6% | 335.54 | 89.79 | 3.737 | 43.84 | 0.4% | 0.01 | `point writes`
|
| 16.69 | 59,912,106.02 | 0.6% | 314.57 | 86.29 | 3.645 | 39.84 | 0.4% | 0.01 | `point writes`
|
||||||
| 31.85 | 31,394,336.65 | 0.3% | 615.32 | 159.63 | 3.855 | 87.69 | 0.2% | 0.01 | `prefix writes`
|
| 30.09 | 33,235,744.07 | 0.5% | 591.33 | 155.92 | 3.793 | 82.69 | 0.2% | 0.01 | `prefix writes`
|
||||||
| 36.17 | 27,647,221.45 | 0.6% | 705.11 | 182.80 | 3.857 | 100.62 | 0.1% | 0.01 | `range writes`
|
| 35.77 | 27,956,388.03 | 1.4% | 682.25 | 187.63 | 3.636 | 96.12 | 0.1% | 0.01 | `range writes`
|
||||||
| 79.01 | 12,656,457.78 | 0.7% | 1,498.35 | 402.46 | 3.723 | 270.50 | 0.1% | 0.01 | `monotonic increasing point writes`
|
| 74.04 | 13,505,408.41 | 2.7% | 1,448.95 | 392.10 | 3.695 | 260.53 | 0.1% | 0.01 | `monotonic increasing point writes`
|
||||||
| 303,667.50 | 3,293.08 | 1.1% | 3,931,273.00 | 1,612,702.50 | 2.438 | 806,223.33 | 0.0% | 0.01 | `worst case for radix tree`
|
| 330,984.50 | 3,021.29 | 1.9% | 3,994,153.50 | 1,667,309.00 | 2.396 | 806,019.50 | 0.0% | 0.01 | `worst case for radix tree`
|
||||||
| 83.70 | 11,947,443.83 | 0.7% | 1,738.03 | 429.06 | 4.051 | 270.01 | 0.0% | 0.01 | `create and destroy`
|
| 92.46 | 10,814,961.65 | 0.5% | 1,800.00 | 463.41 | 3.884 | 297.00 | 0.0% | 0.01 | `create and destroy`
|
||||||
|
|
||||||
# "Real data" test
|
# "Real data" test
|
||||||
|
|
||||||
@@ -47,7 +47,7 @@ Check: 4.47891 seconds, 364.05 MB/s, Add: 4.55599 seconds, 123.058 MB/s, Gc rati
|
|||||||
## radix tree
|
## radix tree
|
||||||
|
|
||||||
```
|
```
|
||||||
Check: 0.958985 seconds, 1700.28 MB/s, Add: 1.35083 seconds, 415.044 MB/s, Gc ratio: 44.4768%, Peak idle memory: 2.33588e+06
|
Check: 0.953012 seconds, 1710.94 MB/s, Add: 1.30025 seconds, 431.188 MB/s, Gc ratio: 43.9816%, Peak idle memory: 2.28375e+06
|
||||||
```
|
```
|
||||||
|
|
||||||
## hash table
|
## hash table
|
||||||
|
BIN
corpus/04239a60051c9a1779d9a896b84eff01f272f191
Normal file
BIN
corpus/04239a60051c9a1779d9a896b84eff01f272f191
Normal file
Binary file not shown.
BIN
corpus/0b724429644597a0c8feb3d95dced0c2a48e5ed0
Normal file
BIN
corpus/0b724429644597a0c8feb3d95dced0c2a48e5ed0
Normal file
Binary file not shown.
BIN
corpus/10838bd79f36f824c41afa600c86f176f213ff79
Normal file
BIN
corpus/10838bd79f36f824c41afa600c86f176f213ff79
Normal file
Binary file not shown.
BIN
corpus/158d2f871afe1bf811833d07a03415d89a868efe
Normal file
BIN
corpus/158d2f871afe1bf811833d07a03415d89a868efe
Normal file
Binary file not shown.
BIN
corpus/1a591c014514af4b723cc6f5f0753319fa57b925
Normal file
BIN
corpus/1a591c014514af4b723cc6f5f0753319fa57b925
Normal file
Binary file not shown.
BIN
corpus/1ab0a409dbf44496ea9d7027ba8898805f396140
Normal file
BIN
corpus/1ab0a409dbf44496ea9d7027ba8898805f396140
Normal file
Binary file not shown.
BIN
corpus/1ac23fb78ff1d7ea8b6ca0e63184fbbc9199e68c
Normal file
BIN
corpus/1ac23fb78ff1d7ea8b6ca0e63184fbbc9199e68c
Normal file
Binary file not shown.
BIN
corpus/1ce3c082d69e7b494e648186d23562d60808c096
Normal file
BIN
corpus/1ce3c082d69e7b494e648186d23562d60808c096
Normal file
Binary file not shown.
BIN
corpus/233112524bdcd6f77d4d51c080039bf1e1358bc2
Normal file
BIN
corpus/233112524bdcd6f77d4d51c080039bf1e1358bc2
Normal file
Binary file not shown.
BIN
corpus/236aae2fbf5a1c6716650bdc0187aadebd0fd4eb
Normal file
BIN
corpus/236aae2fbf5a1c6716650bdc0187aadebd0fd4eb
Normal file
Binary file not shown.
BIN
corpus/2aa2ab713a18bc86e886777cf6008a871e590688
Normal file
BIN
corpus/2aa2ab713a18bc86e886777cf6008a871e590688
Normal file
Binary file not shown.
BIN
corpus/2e28e709dc7a313255964e2c21d3c8c9b6c3ef1d
Normal file
BIN
corpus/2e28e709dc7a313255964e2c21d3c8c9b6c3ef1d
Normal file
Binary file not shown.
BIN
corpus/3276a649b804693762b06581bf459b2f2accc03e
Normal file
BIN
corpus/3276a649b804693762b06581bf459b2f2accc03e
Normal file
Binary file not shown.
BIN
corpus/34a4d2436527e903d806a63c002cea88522f3a5e
Normal file
BIN
corpus/34a4d2436527e903d806a63c002cea88522f3a5e
Normal file
Binary file not shown.
BIN
corpus/3a5cb027cf0d324ad95683dd26c4fee6bfa322c3
Normal file
BIN
corpus/3a5cb027cf0d324ad95683dd26c4fee6bfa322c3
Normal file
Binary file not shown.
BIN
corpus/3f2da99062acb638cb5036c847d1cf3edaafc787
Normal file
BIN
corpus/3f2da99062acb638cb5036c847d1cf3edaafc787
Normal file
Binary file not shown.
BIN
corpus/484197fdd1b45c22e42042bb7bd4b2eb8475eeab
Normal file
BIN
corpus/484197fdd1b45c22e42042bb7bd4b2eb8475eeab
Normal file
Binary file not shown.
BIN
corpus/494bfcf88bc305649d5296dcd44a79b55e63f199
Normal file
BIN
corpus/494bfcf88bc305649d5296dcd44a79b55e63f199
Normal file
Binary file not shown.
BIN
corpus/4a31e09cd8d98f2f1b5485a6e9448b3d0f18dd41
Normal file
BIN
corpus/4a31e09cd8d98f2f1b5485a6e9448b3d0f18dd41
Normal file
Binary file not shown.
BIN
corpus/52cadbafc428ca921e6053b5e82867417337c92b
Normal file
BIN
corpus/52cadbafc428ca921e6053b5e82867417337c92b
Normal file
Binary file not shown.
BIN
corpus/5d24680a1a0033cc3201d0662b2ee27f3f5fef71
Normal file
BIN
corpus/5d24680a1a0033cc3201d0662b2ee27f3f5fef71
Normal file
Binary file not shown.
BIN
corpus/71cf9feba09ecb14c67769b79af7a3e17db96fc7
Normal file
BIN
corpus/71cf9feba09ecb14c67769b79af7a3e17db96fc7
Normal file
Binary file not shown.
BIN
corpus/8a424a8152063157d97fbccf0caa08170ce54cef
Normal file
BIN
corpus/8a424a8152063157d97fbccf0caa08170ce54cef
Normal file
Binary file not shown.
BIN
corpus/8e871f447604dcdd1b52711d91de8f886c6e005f
Normal file
BIN
corpus/8e871f447604dcdd1b52711d91de8f886c6e005f
Normal file
Binary file not shown.
BIN
corpus/9629a746347892fe0f16a97b283b1d83dd2f0baf
Normal file
BIN
corpus/9629a746347892fe0f16a97b283b1d83dd2f0baf
Normal file
Binary file not shown.
BIN
corpus/9761f34f6b55d26b9d1de6411aa88a40d8a623a8
Normal file
BIN
corpus/9761f34f6b55d26b9d1de6411aa88a40d8a623a8
Normal file
Binary file not shown.
BIN
corpus/9d3024f064aea96bf7392ca3f71e01ac5493453e
Normal file
BIN
corpus/9d3024f064aea96bf7392ca3f71e01ac5493453e
Normal file
Binary file not shown.
BIN
corpus/9f01be2faa897171ddb18c00a9233b6cd50c8d3e
Normal file
BIN
corpus/9f01be2faa897171ddb18c00a9233b6cd50c8d3e
Normal file
Binary file not shown.
BIN
corpus/a820c98c1b089b41cf31158e9cb066a79fc49280
Normal file
BIN
corpus/a820c98c1b089b41cf31158e9cb066a79fc49280
Normal file
Binary file not shown.
BIN
corpus/abe9e47223d0aeee953c650052431555c64886f7
Normal file
BIN
corpus/abe9e47223d0aeee953c650052431555c64886f7
Normal file
Binary file not shown.
BIN
corpus/b0ab4e5be422e9b86dd4d0f7215caa867eb489f6
Normal file
BIN
corpus/b0ab4e5be422e9b86dd4d0f7215caa867eb489f6
Normal file
Binary file not shown.
BIN
corpus/b7dad2b0884374b6a304435ab8daa6b70aef0617
Normal file
BIN
corpus/b7dad2b0884374b6a304435ab8daa6b70aef0617
Normal file
Binary file not shown.
BIN
corpus/bd5a8061ac65c0b80a351191d5e36461233c4fd8
Normal file
BIN
corpus/bd5a8061ac65c0b80a351191d5e36461233c4fd8
Normal file
Binary file not shown.
BIN
corpus/be65fc01ef13fe43f3f31c01ba6ddf6d715c583d
Normal file
BIN
corpus/be65fc01ef13fe43f3f31c01ba6ddf6d715c583d
Normal file
Binary file not shown.
BIN
corpus/be77377ac9fe6c450dd79266f18bf77f49e0aca4
Normal file
BIN
corpus/be77377ac9fe6c450dd79266f18bf77f49e0aca4
Normal file
Binary file not shown.
BIN
corpus/c1dbcf46970c38ce500975526e2191d98b42d753
Normal file
BIN
corpus/c1dbcf46970c38ce500975526e2191d98b42d753
Normal file
Binary file not shown.
BIN
corpus/c42c174dabf14d9a19f815087a28e1ca3773a2ab
Normal file
BIN
corpus/c42c174dabf14d9a19f815087a28e1ca3773a2ab
Normal file
Binary file not shown.
BIN
corpus/c469f8ad0e2bfd2158f46b9f1683a16b4c1e7112
Normal file
BIN
corpus/c469f8ad0e2bfd2158f46b9f1683a16b4c1e7112
Normal file
Binary file not shown.
BIN
corpus/c5c3046955d40141fa85d313584f42625f307152
Normal file
BIN
corpus/c5c3046955d40141fa85d313584f42625f307152
Normal file
Binary file not shown.
BIN
corpus/c7f64fef6b239d02dff3856dbf7a837733ffa63f
Normal file
BIN
corpus/c7f64fef6b239d02dff3856dbf7a837733ffa63f
Normal file
Binary file not shown.
BIN
corpus/c80544f858e9243fe82e63298b9a10565f5c331a
Normal file
BIN
corpus/c80544f858e9243fe82e63298b9a10565f5c331a
Normal file
Binary file not shown.
BIN
corpus/cbb02b7a3535fc3e4b98a5882369fd46388d54ce
Normal file
BIN
corpus/cbb02b7a3535fc3e4b98a5882369fd46388d54ce
Normal file
Binary file not shown.
BIN
corpus/cd161065095015edbdd71cfee554a7cc5246403e
Normal file
BIN
corpus/cd161065095015edbdd71cfee554a7cc5246403e
Normal file
Binary file not shown.
BIN
corpus/d06a6b29ca2650b98bd137656d0c3f20e3d01d68
Normal file
BIN
corpus/d06a6b29ca2650b98bd137656d0c3f20e3d01d68
Normal file
Binary file not shown.
BIN
corpus/e31b0d2a86d14d72f85e3c288122cf195f797cb8
Normal file
BIN
corpus/e31b0d2a86d14d72f85e3c288122cf195f797cb8
Normal file
Binary file not shown.
BIN
corpus/e6366d007a234382ebe4c2705e81ee76deb73157
Normal file
BIN
corpus/e6366d007a234382ebe4c2705e81ee76deb73157
Normal file
Binary file not shown.
BIN
corpus/eb4ff5608e315a653ee4ae4c912a2b79c6a19bed
Normal file
BIN
corpus/eb4ff5608e315a653ee4ae4c912a2b79c6a19bed
Normal file
Binary file not shown.
BIN
corpus/ed6a4636a203bc85451d8be41af98e363c651cac
Normal file
BIN
corpus/ed6a4636a203bc85451d8be41af98e363c651cac
Normal file
Binary file not shown.
BIN
corpus/ed8ab4678ecbcc9ecc4587d29265c71aa3ad1c15
Normal file
BIN
corpus/ed8ab4678ecbcc9ecc4587d29265c71aa3ad1c15
Normal file
Binary file not shown.
BIN
corpus/eded3d23b7a010f3b6796ce19515e6f8eb2bc15b
Normal file
BIN
corpus/eded3d23b7a010f3b6796ce19515e6f8eb2bc15b
Normal file
Binary file not shown.
BIN
corpus/f878b651e945a1e5853232edc315f5abf11b592f
Normal file
BIN
corpus/f878b651e945a1e5853232edc315f5abf11b592f
Normal file
Binary file not shown.
Reference in New Issue
Block a user