Compare commits
8 Commits
b0085df5ad
...
v0.0.1
Author | SHA1 | Date | |
---|---|---|---|
71c39f9955 | |||
8cc17158fd | |||
ab211c646a | |||
7af961f141 | |||
a91df62608 | |||
0a1843a161 | |||
4edf0315d9 | |||
14515e186a |
@@ -34,7 +34,7 @@ ConflictSet::ReadRange singleton(Arena &arena, std::span<const uint8_t> key) {
|
|||||||
std::span<uint8_t>(new (arena) uint8_t[key.size() + 1], key.size() + 1);
|
std::span<uint8_t>(new (arena) uint8_t[key.size() + 1], key.size() + 1);
|
||||||
memcpy(r.data(), key.data(), key.size());
|
memcpy(r.data(), key.data(), key.size());
|
||||||
r[key.size()] = 0;
|
r[key.size()] = 0;
|
||||||
return {key.data(), int(key.size()), r.data(), int(r.size())};
|
return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
|
ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
|
||||||
@@ -52,7 +52,7 @@ ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
|
|||||||
auto r = std::span<uint8_t>(new (arena) uint8_t[index + 1], index + 1);
|
auto r = std::span<uint8_t>(new (arena) uint8_t[index + 1], index + 1);
|
||||||
memcpy(r.data(), key.data(), index + 1);
|
memcpy(r.data(), key.data(), index + 1);
|
||||||
r[r.size() - 1]++;
|
r[r.size() - 1]++;
|
||||||
return {key.data(), int(key.size()), r.data(), int(r.size())};
|
return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
|
||||||
}
|
}
|
||||||
|
|
||||||
void benchConflictSet() {
|
void benchConflictSet() {
|
||||||
@@ -258,4 +258,4 @@ void benchConflictSet() {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
int main(void) { benchConflictSet(); }
|
int main(void) { benchConflictSet(); }
|
||||||
|
@@ -25,6 +25,9 @@ endif()
|
|||||||
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
|
||||||
-Werror=switch-enum)
|
-Werror=switch-enum)
|
||||||
|
|
||||||
|
option(USE_SIMD_FALLBACK
|
||||||
|
"Use fallback implementations of functions that use SIMD" OFF)
|
||||||
|
|
||||||
# This is encouraged according to
|
# This is encouraged according to
|
||||||
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
|
||||||
include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third_party/valgrind)
|
include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third_party/valgrind)
|
||||||
@@ -43,18 +46,20 @@ endif()
|
|||||||
include(CheckIncludeFileCXX)
|
include(CheckIncludeFileCXX)
|
||||||
include(CMakePushCheckState)
|
include(CMakePushCheckState)
|
||||||
|
|
||||||
cmake_push_check_state()
|
if(NOT USE_SIMD_FALLBACK)
|
||||||
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
|
cmake_push_check_state()
|
||||||
check_include_file_cxx("immintrin.h" HAS_AVX)
|
list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
|
||||||
if(HAS_AVX)
|
check_include_file_cxx("immintrin.h" HAS_AVX)
|
||||||
add_compile_options(-mavx)
|
if(HAS_AVX)
|
||||||
add_compile_definitions(HAS_AVX)
|
add_compile_options(-mavx)
|
||||||
endif()
|
add_compile_definitions(HAS_AVX)
|
||||||
cmake_pop_check_state()
|
endif()
|
||||||
|
cmake_pop_check_state()
|
||||||
|
|
||||||
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
|
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
|
||||||
if(HAS_ARM_NEON)
|
if(HAS_ARM_NEON)
|
||||||
add_compile_definitions(HAS_ARM_NEON)
|
add_compile_definitions(HAS_ARM_NEON)
|
||||||
|
endif()
|
||||||
endif()
|
endif()
|
||||||
|
|
||||||
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
|
||||||
@@ -267,6 +272,10 @@ set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
|
|||||||
set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
|
set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
|
||||||
set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") # avoid stripping
|
set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") # avoid stripping
|
||||||
set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0")
|
set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0")
|
||||||
|
set(CPACK_RPM_FILE_NAME RPM-DEFAULT)
|
||||||
|
|
||||||
|
# deb
|
||||||
|
set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT)
|
||||||
|
|
||||||
include(CPack)
|
include(CPack)
|
||||||
|
|
||||||
|
@@ -29,6 +29,7 @@ limitations under the License.
|
|||||||
#include <span>
|
#include <span>
|
||||||
#include <string>
|
#include <string>
|
||||||
#include <string_view>
|
#include <string_view>
|
||||||
|
#include <type_traits>
|
||||||
#include <utility>
|
#include <utility>
|
||||||
|
|
||||||
#ifdef HAS_AVX
|
#ifdef HAS_AVX
|
||||||
@@ -954,6 +955,42 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
assert(self->getType() == Type_Node16);
|
assert(self->getType() == Type_Node16);
|
||||||
|
|
||||||
++self->numChildren;
|
++self->numChildren;
|
||||||
|
#ifdef HAS_AVX
|
||||||
|
__m128i key_vec = _mm_set1_epi8(index);
|
||||||
|
__m128i indices;
|
||||||
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
|
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
||||||
|
int mask = (1 << (self->numChildren - 1)) - 1;
|
||||||
|
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
||||||
|
bitfield |= uint32_t(1) << (self->numChildren - 1);
|
||||||
|
int i = std::countr_zero(bitfield);
|
||||||
|
if (i < self->numChildren - 1) {
|
||||||
|
memmove(self16->index + i + 1, self16->index + i,
|
||||||
|
self->numChildren - (i + 1));
|
||||||
|
memmove(self16->children + i + 1, self16->children + i,
|
||||||
|
(self->numChildren - (i + 1)) * sizeof(Child));
|
||||||
|
}
|
||||||
|
#elif defined(HAS_ARM_NEON)
|
||||||
|
uint8x16_t indices;
|
||||||
|
memcpy(&indices, self16->index, sizeof(self16->index));
|
||||||
|
// 0xff for each leq
|
||||||
|
auto results = vcleq_u8(vdupq_n_u8(index), indices);
|
||||||
|
uint64_t mask = (uint64_t(1) << ((self->numChildren - 1) * 4)) - 1;
|
||||||
|
// 0xf for each 0xff (within mask)
|
||||||
|
uint64_t bitfield =
|
||||||
|
vget_lane_u64(
|
||||||
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
|
||||||
|
0) &
|
||||||
|
mask;
|
||||||
|
bitfield |= uint64_t(0xf) << ((self->numChildren - 1) * 4);
|
||||||
|
int i = std::countr_zero(bitfield) / 4;
|
||||||
|
if (i < self->numChildren - 1) {
|
||||||
|
memmove(self16->index + i + 1, self16->index + i,
|
||||||
|
self->numChildren - (i + 1));
|
||||||
|
memmove(self16->children + i + 1, self16->children + i,
|
||||||
|
(self->numChildren - (i + 1)) * sizeof(Child));
|
||||||
|
}
|
||||||
|
#else
|
||||||
int i = 0;
|
int i = 0;
|
||||||
for (; i < int(self->numChildren) - 1; ++i) {
|
for (; i < int(self->numChildren) - 1; ++i) {
|
||||||
if (int(self16->index[i]) > int(index)) {
|
if (int(self16->index[i]) > int(index)) {
|
||||||
@@ -964,6 +1001,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
self16->index[i] = index;
|
self16->index[i] = index;
|
||||||
auto &result = self16->children[i].child;
|
auto &result = self16->children[i].child;
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
@@ -979,8 +1017,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
self = newSelf;
|
self = newSelf;
|
||||||
goto insert256;
|
goto insert256;
|
||||||
}
|
}
|
||||||
insert48:
|
|
||||||
|
|
||||||
|
insert48:
|
||||||
auto *self48 = static_cast<Node48 *>(self);
|
auto *self48 = static_cast<Node48 *>(self);
|
||||||
self48->bitSet.set(index);
|
self48->bitSet.set(index);
|
||||||
++self->numChildren;
|
++self->numChildren;
|
||||||
@@ -992,6 +1030,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
|
|||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case Type_Node256: {
|
case Type_Node256: {
|
||||||
|
|
||||||
insert256:
|
insert256:
|
||||||
auto *self256 = static_cast<Node256 *>(self);
|
auto *self256 = static_cast<Node256 *>(self);
|
||||||
++self->numChildren;
|
++self->numChildren;
|
||||||
@@ -2858,8 +2897,8 @@ Iterator firstGeq(Node *n, std::string_view key) {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
bool checkCorrectness(Node *node, int64_t oldestVersion,
|
[[maybe_unused]] bool checkCorrectness(Node *node, int64_t oldestVersion,
|
||||||
ConflictSet::Impl *impl) {
|
ConflictSet::Impl *impl) {
|
||||||
bool success = true;
|
bool success = true;
|
||||||
|
|
||||||
checkParentPointers(node, success);
|
checkParentPointers(node, success);
|
||||||
|
11
Jenkinsfile
vendored
11
Jenkinsfile
vendored
@@ -48,6 +48,17 @@ pipeline {
|
|||||||
recordIssues(tools: [clang()])
|
recordIssues(tools: [clang()])
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
stage('SIMD fallback') {
|
||||||
|
agent {
|
||||||
|
dockerfile {
|
||||||
|
args '-v /home/jenkins/ccache:/ccache'
|
||||||
|
reuseNode true
|
||||||
|
}
|
||||||
|
}
|
||||||
|
steps {
|
||||||
|
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
|
||||||
|
}
|
||||||
|
}
|
||||||
stage('Release [gcc]') {
|
stage('Release [gcc]') {
|
||||||
agent {
|
agent {
|
||||||
dockerfile {
|
dockerfile {
|
||||||
|
66
README.md
66
README.md
@@ -9,23 +9,23 @@ Hardware for all benchmarks is a mac m1 2020.
|
|||||||
## Skip list
|
## Skip list
|
||||||
|
|
||||||
```
|
```
|
||||||
New conflict set: 1.962 sec
|
New conflict set: 1.957 sec
|
||||||
0.637 Mtransactions/sec
|
0.639 Mtransactions/sec
|
||||||
2.548 Mkeys/sec
|
2.555 Mkeys/sec
|
||||||
Detect only: 1.842 sec
|
Detect only: 1.845 sec
|
||||||
0.679 Mtransactions/sec
|
0.678 Mtransactions/sec
|
||||||
2.714 Mkeys/sec
|
2.710 Mkeys/sec
|
||||||
Skiplist only: 1.261 sec
|
Skiplist only: 1.263 sec
|
||||||
0.991 Mtransactions/sec
|
0.990 Mtransactions/sec
|
||||||
3.964 Mkeys/sec
|
3.960 Mkeys/sec
|
||||||
Performance counters:
|
Performance counters:
|
||||||
Build: 0.0597
|
Build: 0.0546
|
||||||
Add: 0.0587
|
Add: 0.0563
|
||||||
Detect: 1.84
|
Detect: 1.84
|
||||||
D.Sort: 0.411
|
D.Sort: 0.412
|
||||||
D.Combine: 0.0136
|
D.Combine: 0.0141
|
||||||
D.CheckRead: 0.67
|
D.CheckRead: 0.671
|
||||||
D.CheckIntraBatch: 0.00671
|
D.CheckIntraBatch: 0.0068
|
||||||
D.MergeWrite: 0.592
|
D.MergeWrite: 0.592
|
||||||
D.RemoveBefore: 0.146
|
D.RemoveBefore: 0.146
|
||||||
```
|
```
|
||||||
@@ -33,25 +33,25 @@ Performance counters:
|
|||||||
## Radix tree (this implementation)
|
## Radix tree (this implementation)
|
||||||
|
|
||||||
```
|
```
|
||||||
New conflict set: 1.660 sec
|
New conflict set: 1.366 sec
|
||||||
0.753 Mtransactions/sec
|
0.915 Mtransactions/sec
|
||||||
3.012 Mkeys/sec
|
3.660 Mkeys/sec
|
||||||
Detect only: 1.524 sec
|
Detect only: 1.248 sec
|
||||||
0.820 Mtransactions/sec
|
1.002 Mtransactions/sec
|
||||||
3.280 Mkeys/sec
|
4.007 Mkeys/sec
|
||||||
Skiplist only: 0.844 sec
|
Skiplist only: 0.573 sec
|
||||||
1.481 Mtransactions/sec
|
2.182 Mtransactions/sec
|
||||||
5.926 Mkeys/sec
|
8.730 Mkeys/sec
|
||||||
Performance counters:
|
Performance counters:
|
||||||
Build: 0.0699
|
Build: 0.0594
|
||||||
Add: 0.0628
|
Add: 0.0572
|
||||||
Detect: 1.52
|
Detect: 1.25
|
||||||
D.Sort: 0.435
|
D.Sort: 0.418
|
||||||
D.Combine: 0.0183
|
D.Combine: 0.0149
|
||||||
D.CheckRead: 0.37
|
D.CheckRead: 0.232
|
||||||
D.CheckIntraBatch: 0.00801
|
D.CheckIntraBatch: 0.0067
|
||||||
D.MergeWrite: 0.473
|
D.MergeWrite: 0.341
|
||||||
D.RemoveBefore: 0.215
|
D.RemoveBefore: 0.232
|
||||||
```
|
```
|
||||||
|
|
||||||
# Our benchmark
|
# Our benchmark
|
||||||
|
@@ -30,7 +30,7 @@ constexpr inline size_t rightAlign(size_t offset, size_t alignment) {
|
|||||||
|
|
||||||
int main(int argc, const char **argv) {
|
int main(int argc, const char **argv) {
|
||||||
// Use with this dataset https://snap.stanford.edu/data/memetracker9.html
|
// Use with this dataset https://snap.stanford.edu/data/memetracker9.html
|
||||||
// Preprocess the files with `sed -i '' '/^Q/d'`
|
// Preprocess the files with `sed -i'' '/^Q/d'`
|
||||||
|
|
||||||
double checkTime = 0;
|
double checkTime = 0;
|
||||||
double addTime = 0;
|
double addTime = 0;
|
||||||
|
Reference in New Issue
Block a user