8 Commits

Author SHA1 Message Date
71c39f9955 Opt-in to rpm/deb default package filenames
All checks were successful
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / SIMD fallback total: 1096, passed: 1096
Tests / Release [gcc] total: 1096, passed: 1096
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head This commit looks good
2024-03-29 15:56:58 -07:00
8cc17158fd Fix preprocessing instructions for linux
All checks were successful
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / SIMD fallback total: 1096, passed: 1096
Tests / Release [gcc] total: 1096, passed: 1096
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head This commit looks good
2024-03-28 14:58:29 -07:00
ab211c646a Apply compiler-appeasing syntax changes from Taoxi 2024-03-28 14:57:31 -07:00
7af961f141 Fix jenkins build
All checks were successful
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / SIMD fallback total: 1096, passed: 1096
Tests / Release [gcc] total: 1096, passed: 1096
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head This commit looks good
2024-03-28 11:44:01 -07:00
a91df62608 Add USE_SIMD_FALLBACK build in jenkins
Some checks failed
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / SIMD fallback total: 1096, passed: 1096
weaselab/conflict-set/pipeline/head There was a failure building this commit
2024-03-28 11:33:53 -07:00
0a1843a161 Add USE_SIMD_FALLBACK
All checks were successful
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc] total: 1096, passed: 1096
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head This commit looks good
2024-03-28 11:12:50 -07:00
4edf0315d9 Find insertion point for Node16 with simd
Closes #13
2024-03-28 10:47:20 -07:00
14515e186a Update readme benchmarks again
All checks were successful
Tests / Clang total: 1096, passed: 1096
Clang |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc] total: 1096, passed: 1096
GNU C Compiler (gcc) |Total|New|Outstanding|Fixed|Trend |:-:|:-:|:-:|:-:|:-: |0|0|0|0|:clap:
Tests / Release [gcc,aarch64] total: 824, passed: 824
Tests / Coverage total: 823, passed: 823
weaselab/conflict-set/pipeline/head This commit looks good
I think the last skiplisttest benchmark looked bad because I had vscode
+ firefox + ?? open
2024-03-27 16:47:36 -07:00
6 changed files with 110 additions and 51 deletions

View File

@@ -34,7 +34,7 @@ ConflictSet::ReadRange singleton(Arena &arena, std::span<const uint8_t> key) {
std::span<uint8_t>(new (arena) uint8_t[key.size() + 1], key.size() + 1); std::span<uint8_t>(new (arena) uint8_t[key.size() + 1], key.size() + 1);
memcpy(r.data(), key.data(), key.size()); memcpy(r.data(), key.data(), key.size());
r[key.size()] = 0; r[key.size()] = 0;
return {key.data(), int(key.size()), r.data(), int(r.size())}; return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
} }
ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) { ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
@@ -52,7 +52,7 @@ ConflictSet::ReadRange prefixRange(Arena &arena, std::span<const uint8_t> key) {
auto r = std::span<uint8_t>(new (arena) uint8_t[index + 1], index + 1); auto r = std::span<uint8_t>(new (arena) uint8_t[index + 1], index + 1);
memcpy(r.data(), key.data(), index + 1); memcpy(r.data(), key.data(), index + 1);
r[r.size() - 1]++; r[r.size() - 1]++;
return {key.data(), int(key.size()), r.data(), int(r.size())}; return {{key.data(), int(key.size())}, {r.data(), int(r.size())}, 0};
} }
void benchConflictSet() { void benchConflictSet() {
@@ -258,4 +258,4 @@ void benchConflictSet() {
} }
} }
int main(void) { benchConflictSet(); } int main(void) { benchConflictSet(); }

View File

@@ -25,6 +25,9 @@ endif()
add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum add_compile_options(-fdata-sections -ffunction-sections -Wswitch-enum
-Werror=switch-enum) -Werror=switch-enum)
option(USE_SIMD_FALLBACK
"Use fallback implementations of functions that use SIMD" OFF)
# This is encouraged according to # This is encouraged according to
# https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq # https://valgrind.org/docs/manual/manual-core-adv.html#manual-core-adv.clientreq
include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third_party/valgrind) include_directories(SYSTEM ${CMAKE_SOURCE_DIR}/third_party/valgrind)
@@ -43,18 +46,20 @@ endif()
include(CheckIncludeFileCXX) include(CheckIncludeFileCXX)
include(CMakePushCheckState) include(CMakePushCheckState)
cmake_push_check_state() if(NOT USE_SIMD_FALLBACK)
list(APPEND CMAKE_REQUIRED_FLAGS -mavx) cmake_push_check_state()
check_include_file_cxx("immintrin.h" HAS_AVX) list(APPEND CMAKE_REQUIRED_FLAGS -mavx)
if(HAS_AVX) check_include_file_cxx("immintrin.h" HAS_AVX)
add_compile_options(-mavx) if(HAS_AVX)
add_compile_definitions(HAS_AVX) add_compile_options(-mavx)
endif() add_compile_definitions(HAS_AVX)
cmake_pop_check_state() endif()
cmake_pop_check_state()
check_include_file_cxx("arm_neon.h" HAS_ARM_NEON) check_include_file_cxx("arm_neon.h" HAS_ARM_NEON)
if(HAS_ARM_NEON) if(HAS_ARM_NEON)
add_compile_definitions(HAS_ARM_NEON) add_compile_definitions(HAS_ARM_NEON)
endif()
endif() endif()
set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "") set(CMAKE_CXX_IMPLICIT_LINK_LIBRARIES "")
@@ -267,6 +272,10 @@ set(CPACK_RESOURCE_FILE_README "${CMAKE_CURRENT_SOURCE_DIR}/README.md")
set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR}) set(CPACK_RPM_PACKAGE_ARCHITECTURE ${CMAKE_SYSTEM_PROCESSOR})
set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") # avoid stripping set(CPACK_RPM_SPEC_INSTALL_POST "/bin/true") # avoid stripping
set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0") set(CPACK_RPM_PACKAGE_LICENSE "Apache 2.0")
set(CPACK_RPM_FILE_NAME RPM-DEFAULT)
# deb
set(CPACK_DEBIAN_FILE_NAME DEB-DEFAULT)
include(CPack) include(CPack)

View File

@@ -29,6 +29,7 @@ limitations under the License.
#include <span> #include <span>
#include <string> #include <string>
#include <string_view> #include <string_view>
#include <type_traits>
#include <utility> #include <utility>
#ifdef HAS_AVX #ifdef HAS_AVX
@@ -954,6 +955,42 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
assert(self->getType() == Type_Node16); assert(self->getType() == Type_Node16);
++self->numChildren; ++self->numChildren;
#ifdef HAS_AVX
__m128i key_vec = _mm_set1_epi8(index);
__m128i indices;
memcpy(&indices, self16->index, sizeof(self16->index));
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
int mask = (1 << (self->numChildren - 1)) - 1;
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
bitfield |= uint32_t(1) << (self->numChildren - 1);
int i = std::countr_zero(bitfield);
if (i < self->numChildren - 1) {
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
}
#elif defined(HAS_ARM_NEON)
uint8x16_t indices;
memcpy(&indices, self16->index, sizeof(self16->index));
// 0xff for each leq
auto results = vcleq_u8(vdupq_n_u8(index), indices);
uint64_t mask = (uint64_t(1) << ((self->numChildren - 1) * 4)) - 1;
// 0xf for each 0xff (within mask)
uint64_t bitfield =
vget_lane_u64(
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
0) &
mask;
bitfield |= uint64_t(0xf) << ((self->numChildren - 1) * 4);
int i = std::countr_zero(bitfield) / 4;
if (i < self->numChildren - 1) {
memmove(self16->index + i + 1, self16->index + i,
self->numChildren - (i + 1));
memmove(self16->children + i + 1, self16->children + i,
(self->numChildren - (i + 1)) * sizeof(Child));
}
#else
int i = 0; int i = 0;
for (; i < int(self->numChildren) - 1; ++i) { for (; i < int(self->numChildren) - 1; ++i) {
if (int(self16->index[i]) > int(index)) { if (int(self16->index[i]) > int(index)) {
@@ -964,6 +1001,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
break; break;
} }
} }
#endif
self16->index[i] = index; self16->index[i] = index;
auto &result = self16->children[i].child; auto &result = self16->children[i].child;
result = nullptr; result = nullptr;
@@ -979,8 +1017,8 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
self = newSelf; self = newSelf;
goto insert256; goto insert256;
} }
insert48:
insert48:
auto *self48 = static_cast<Node48 *>(self); auto *self48 = static_cast<Node48 *>(self);
self48->bitSet.set(index); self48->bitSet.set(index);
++self->numChildren; ++self->numChildren;
@@ -992,6 +1030,7 @@ Node *&getOrCreateChild(Node *&self, uint8_t index,
return result; return result;
} }
case Type_Node256: { case Type_Node256: {
insert256: insert256:
auto *self256 = static_cast<Node256 *>(self); auto *self256 = static_cast<Node256 *>(self);
++self->numChildren; ++self->numChildren;
@@ -2858,8 +2897,8 @@ Iterator firstGeq(Node *n, std::string_view key) {
} }
} }
bool checkCorrectness(Node *node, int64_t oldestVersion, [[maybe_unused]] bool checkCorrectness(Node *node, int64_t oldestVersion,
ConflictSet::Impl *impl) { ConflictSet::Impl *impl) {
bool success = true; bool success = true;
checkParentPointers(node, success); checkParentPointers(node, success);

11
Jenkinsfile vendored
View File

@@ -48,6 +48,17 @@ pipeline {
recordIssues(tools: [clang()]) recordIssues(tools: [clang()])
} }
} }
stage('SIMD fallback') {
agent {
dockerfile {
args '-v /home/jenkins/ccache:/ccache'
reuseNode true
}
}
steps {
CleanBuildAndTest("-DUSE_SIMD_FALLBACK=ON")
}
}
stage('Release [gcc]') { stage('Release [gcc]') {
agent { agent {
dockerfile { dockerfile {

View File

@@ -9,23 +9,23 @@ Hardware for all benchmarks is a mac m1 2020.
## Skip list ## Skip list
``` ```
New conflict set: 1.962 sec New conflict set: 1.957 sec
0.637 Mtransactions/sec 0.639 Mtransactions/sec
2.548 Mkeys/sec 2.555 Mkeys/sec
Detect only: 1.842 sec Detect only: 1.845 sec
0.679 Mtransactions/sec 0.678 Mtransactions/sec
2.714 Mkeys/sec 2.710 Mkeys/sec
Skiplist only: 1.261 sec Skiplist only: 1.263 sec
0.991 Mtransactions/sec 0.990 Mtransactions/sec
3.964 Mkeys/sec 3.960 Mkeys/sec
Performance counters: Performance counters:
Build: 0.0597 Build: 0.0546
Add: 0.0587 Add: 0.0563
Detect: 1.84 Detect: 1.84
D.Sort: 0.411 D.Sort: 0.412
D.Combine: 0.0136 D.Combine: 0.0141
D.CheckRead: 0.67 D.CheckRead: 0.671
D.CheckIntraBatch: 0.00671 D.CheckIntraBatch: 0.0068
D.MergeWrite: 0.592 D.MergeWrite: 0.592
D.RemoveBefore: 0.146 D.RemoveBefore: 0.146
``` ```
@@ -33,25 +33,25 @@ Performance counters:
## Radix tree (this implementation) ## Radix tree (this implementation)
``` ```
New conflict set: 1.660 sec New conflict set: 1.366 sec
0.753 Mtransactions/sec 0.915 Mtransactions/sec
3.012 Mkeys/sec 3.660 Mkeys/sec
Detect only: 1.524 sec Detect only: 1.248 sec
0.820 Mtransactions/sec 1.002 Mtransactions/sec
3.280 Mkeys/sec 4.007 Mkeys/sec
Skiplist only: 0.844 sec Skiplist only: 0.573 sec
1.481 Mtransactions/sec 2.182 Mtransactions/sec
5.926 Mkeys/sec 8.730 Mkeys/sec
Performance counters: Performance counters:
Build: 0.0699 Build: 0.0594
Add: 0.0628 Add: 0.0572
Detect: 1.52 Detect: 1.25
D.Sort: 0.435 D.Sort: 0.418
D.Combine: 0.0183 D.Combine: 0.0149
D.CheckRead: 0.37 D.CheckRead: 0.232
D.CheckIntraBatch: 0.00801 D.CheckIntraBatch: 0.0067
D.MergeWrite: 0.473 D.MergeWrite: 0.341
D.RemoveBefore: 0.215 D.RemoveBefore: 0.232
``` ```
# Our benchmark # Our benchmark

View File

@@ -30,7 +30,7 @@ constexpr inline size_t rightAlign(size_t offset, size_t alignment) {
int main(int argc, const char **argv) { int main(int argc, const char **argv) {
// Use with this dataset https://snap.stanford.edu/data/memetracker9.html // Use with this dataset https://snap.stanford.edu/data/memetracker9.html
// Preprocess the files with `sed -i '' '/^Q/d'` // Preprocess the files with `sed -i'' '/^Q/d'`
double checkTime = 0; double checkTime = 0;
double addTime = 0; double addTime = 0;