Compare commits
7 Commits
76df63a9d7
...
v0.0.7
| Author | SHA1 | Date | |
|---|---|---|---|
| d895be36d2 | |||
| 65f8462e88 | |||
| 46e01af027 | |||
| c9d0d72684 | |||
| 9046dc5a8f | |||
| e2927bf0fa | |||
| 75a2b8d06c |
191
ConflictSet.cpp
191
ConflictSet.cpp
@@ -2001,55 +2001,163 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
|
|
||||||
assert(!(begin == -1 && end == 256));
|
assert(!(begin == -1 && end == 256));
|
||||||
|
|
||||||
{
|
|
||||||
int c = getChildGeq(n, begin + 1);
|
|
||||||
if (c >= 0 && c < end) {
|
|
||||||
auto *child = getChildExists(n, c);
|
|
||||||
if (child->entryPresent) {
|
|
||||||
if (!(child->entry.rangeVersion <= readVersion)) {
|
|
||||||
return false;
|
|
||||||
};
|
|
||||||
}
|
|
||||||
begin = c;
|
|
||||||
} else {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// [begin, end) is now the half-open interval of children we're interested in.
|
|
||||||
assert(begin < end);
|
|
||||||
|
|
||||||
switch (n->getType()) {
|
switch (n->getType()) {
|
||||||
case Type_Node0: // GCOVR_EXCL_LINE
|
case Type_Node0:
|
||||||
// We would have returned above, after not finding a child
|
return true;
|
||||||
__builtin_unreachable(); // GCOVR_EXCL_LINE
|
|
||||||
case Type_Node3: {
|
case Type_Node3: {
|
||||||
auto *self = static_cast<Node3 *>(n);
|
auto *self = static_cast<Node3 *>(n);
|
||||||
|
|
||||||
|
++begin;
|
||||||
|
|
||||||
const unsigned shiftUpperBound = end - begin;
|
const unsigned shiftUpperBound = end - begin;
|
||||||
const unsigned shiftAmount = begin;
|
const unsigned shiftAmount = begin;
|
||||||
auto inBounds = [&](unsigned c) {
|
auto inBounds = [&](unsigned c) {
|
||||||
return c - shiftAmount < shiftUpperBound;
|
return c - shiftAmount < shiftUpperBound;
|
||||||
};
|
};
|
||||||
|
|
||||||
uint32_t compared = 0;
|
|
||||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
|
||||||
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
|
||||||
}
|
|
||||||
uint32_t mask = 0;
|
uint32_t mask = 0;
|
||||||
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||||
mask |= inBounds(self->index[i]) << i;
|
mask |= inBounds(self->index[i]) << i;
|
||||||
}
|
}
|
||||||
return !(compared & mask);
|
mask &= (1 << self->numChildren) - 1;
|
||||||
|
if (!mask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto *child = self->children[__builtin_ctz(mask)];
|
||||||
|
const bool firstRangeOk =
|
||||||
|
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||||
|
uint32_t compared = 0;
|
||||||
|
for (int i = 0; i < Node3::kMaxNodes; ++i) {
|
||||||
|
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||||
|
}
|
||||||
|
|
||||||
|
return !(compared & mask) && firstRangeOk;
|
||||||
}
|
}
|
||||||
case Type_Node16: {
|
case Type_Node16: {
|
||||||
auto *self = static_cast<Node16 *>(n);
|
auto *self = static_cast<Node16 *>(n);
|
||||||
|
|
||||||
return scan16<kAVX512>(self->childMaxVersion, self->index, begin, end,
|
++begin;
|
||||||
readVersion);
|
|
||||||
|
assert(begin <= end);
|
||||||
|
assert(end - begin < 256);
|
||||||
|
|
||||||
|
#ifdef HAS_ARM_NEON
|
||||||
|
|
||||||
|
uint8x16_t indices;
|
||||||
|
memcpy(&indices, self->index, 16);
|
||||||
|
// 0xff for each in bounds
|
||||||
|
auto results =
|
||||||
|
vcltq_u8(vsubq_u8(indices, vdupq_n_u8(begin)), vdupq_n_u8(end - begin));
|
||||||
|
// 0xf for each 0xff
|
||||||
|
uint64_t mask = vget_lane_u64(
|
||||||
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)), 0);
|
||||||
|
|
||||||
|
mask &= self->numChildren == 16
|
||||||
|
? uint64_t(-1)
|
||||||
|
: (uint64_t(1) << (self->numChildren << 2)) - 1;
|
||||||
|
if (!mask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto *child = self->children[__builtin_ctzll(mask) >> 2];
|
||||||
|
const bool firstRangeOk =
|
||||||
|
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||||
|
|
||||||
|
uint32x4_t w4[4];
|
||||||
|
memcpy(w4, self->childMaxVersion, sizeof(w4));
|
||||||
|
uint32_t rv;
|
||||||
|
memcpy(&rv, &readVersion, sizeof(rv));
|
||||||
|
const auto rvVec = vdupq_n_u32(rv);
|
||||||
|
|
||||||
|
int32x4_t z;
|
||||||
|
memset(&z, 0, sizeof(z));
|
||||||
|
|
||||||
|
uint16x4_t conflicting[4];
|
||||||
|
for (int i = 0; i < 4; ++i) {
|
||||||
|
conflicting[i] = vmovn_u32(
|
||||||
|
vcgtq_s32(vreinterpretq_s32_u32(vsubq_u32(w4[i], rvVec)), z));
|
||||||
|
}
|
||||||
|
auto combined =
|
||||||
|
vcombine_u8(vmovn_u16(vcombine_u16(conflicting[0], conflicting[1])),
|
||||||
|
vmovn_u16(vcombine_u16(conflicting[2], conflicting[3])));
|
||||||
|
|
||||||
|
uint64_t compared = vget_lane_u64(
|
||||||
|
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(combined), 4)), 0);
|
||||||
|
|
||||||
|
return !(compared & mask) && firstRangeOk;
|
||||||
|
|
||||||
|
#elif defined(HAS_AVX)
|
||||||
|
|
||||||
|
__m128i indices;
|
||||||
|
memcpy(&indices, self->index, 16);
|
||||||
|
indices = _mm_sub_epi8(indices, _mm_set1_epi8(begin));
|
||||||
|
uint32_t mask =
|
||||||
|
0xffff &
|
||||||
|
~_mm_movemask_epi8(_mm_cmpeq_epi8(
|
||||||
|
indices, _mm_max_epu8(indices, _mm_set1_epi8(end - begin))));
|
||||||
|
mask &= (1 << self->numChildren) - 1;
|
||||||
|
if (!mask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto *child = self->children[__builtin_ctz(mask)];
|
||||||
|
const bool firstRangeOk =
|
||||||
|
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||||
|
|
||||||
|
uint32_t compared = 0;
|
||||||
|
if constexpr (kAVX512) {
|
||||||
|
compared = compare16_32bit_avx512(self->childMaxVersion, readVersion);
|
||||||
|
} else {
|
||||||
|
compared = compare16_32bit(self->childMaxVersion, readVersion);
|
||||||
|
}
|
||||||
|
return !(compared & mask) && firstRangeOk;
|
||||||
|
|
||||||
|
#else
|
||||||
|
|
||||||
|
const unsigned shiftUpperBound = end - begin;
|
||||||
|
const unsigned shiftAmount = begin;
|
||||||
|
auto inBounds = [&](unsigned c) {
|
||||||
|
return c - shiftAmount < shiftUpperBound;
|
||||||
|
};
|
||||||
|
|
||||||
|
uint32_t mask = 0;
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
mask |= inBounds(self->index[i]) << i;
|
||||||
|
}
|
||||||
|
mask &= (1 << self->numChildren) - 1;
|
||||||
|
if (!mask) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
auto *child = self->children[__builtin_ctz(mask)];
|
||||||
|
const bool firstRangeOk =
|
||||||
|
!child->entryPresent || child->entry.rangeVersion <= readVersion;
|
||||||
|
uint32_t compared = 0;
|
||||||
|
for (int i = 0; i < 16; ++i) {
|
||||||
|
compared |= (self->childMaxVersion[i] > readVersion) << i;
|
||||||
|
}
|
||||||
|
return !(compared & mask) && firstRangeOk;
|
||||||
|
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
auto *self = static_cast<Node48 *>(n);
|
auto *self = static_cast<Node48 *>(n);
|
||||||
|
|
||||||
|
{
|
||||||
|
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||||
|
if (c >= 0 && c < end) {
|
||||||
|
auto *child = self->children[self->index[c]];
|
||||||
|
if (child->entryPresent) {
|
||||||
|
if (!(child->entry.rangeVersion <= readVersion)) {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
begin = c;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// [begin, end) is now the half-open interval of children we're interested
|
||||||
|
// in.
|
||||||
|
assert(begin < end);
|
||||||
|
}
|
||||||
|
|
||||||
// Check all pages
|
// Check all pages
|
||||||
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
static_assert(Node48::kMaxOfMaxPageSize == 16);
|
||||||
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
for (int i = 0; i < Node48::kMaxOfMaxTotalPages; ++i) {
|
||||||
@@ -2067,6 +2175,25 @@ bool checkMaxBetweenExclusive(Node *n, int begin, int end,
|
|||||||
case Type_Node256: {
|
case Type_Node256: {
|
||||||
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
static_assert(Node256::kMaxOfMaxTotalPages == 16);
|
||||||
auto *self = static_cast<Node256 *>(n);
|
auto *self = static_cast<Node256 *>(n);
|
||||||
|
|
||||||
|
{
|
||||||
|
int c = self->bitSet.firstSetGeq(begin + 1);
|
||||||
|
if (c >= 0 && c < end) {
|
||||||
|
auto *child = self->children[c];
|
||||||
|
if (child->entryPresent) {
|
||||||
|
if (!(child->entry.rangeVersion <= readVersion)) {
|
||||||
|
return false;
|
||||||
|
};
|
||||||
|
}
|
||||||
|
begin = c;
|
||||||
|
} else {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
// [begin, end) is now the half-open interval of children we're interested
|
||||||
|
// in.
|
||||||
|
assert(begin < end);
|
||||||
|
}
|
||||||
|
|
||||||
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
const int firstPage = begin >> Node256::kMaxOfMaxShift;
|
||||||
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
const int lastPage = (end - 1) >> Node256::kMaxOfMaxShift;
|
||||||
// Check the only page if there's only one
|
// Check the only page if there's only one
|
||||||
@@ -2884,11 +3011,10 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl {
|
|||||||
const auto &r = reads[i];
|
const auto &r = reads[i];
|
||||||
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
|
auto begin = std::span<const uint8_t>(r.begin.p, r.begin.len);
|
||||||
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
|
auto end = std::span<const uint8_t>(r.end.p, r.end.len);
|
||||||
|
assert(oldestVersionFullPrecision >=
|
||||||
|
newestVersionFullPrecision - kNominalVersionWindow);
|
||||||
result[i] =
|
result[i] =
|
||||||
reads[i].readVersion < oldestVersionFullPrecision ||
|
reads[i].readVersion < oldestVersionFullPrecision ? TooOld
|
||||||
reads[i].readVersion <
|
|
||||||
newestVersionFullPrecision - kNominalVersionWindow
|
|
||||||
? TooOld
|
|
||||||
: (end.size() > 0
|
: (end.size() > 0
|
||||||
? checkRangeRead(root, begin, end,
|
? checkRangeRead(root, begin, end,
|
||||||
InternalVersionT(reads[i].readVersion), this)
|
InternalVersionT(reads[i].readVersion), this)
|
||||||
@@ -3155,7 +3281,6 @@ void internal_addWrites(ConflictSet::Impl *impl,
|
|||||||
|
|
||||||
void internal_setOldestVersion(ConflictSet::Impl *impl, int64_t oldestVersion) {
|
void internal_setOldestVersion(ConflictSet::Impl *impl, int64_t oldestVersion) {
|
||||||
mallocBytesDelta = 0;
|
mallocBytesDelta = 0;
|
||||||
assert(oldestVersion >= impl->oldestVersionFullPrecision);
|
|
||||||
impl->setOldestVersion(oldestVersion);
|
impl->setOldestVersion(oldestVersion);
|
||||||
impl->totalBytes += mallocBytesDelta;
|
impl->totalBytes += mallocBytesDelta;
|
||||||
#if SHOW_MEMORY
|
#if SHOW_MEMORY
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
all: paper.pdf
|
all: paper.pdf
|
||||||
|
|
||||||
paper.pdf: paper.tex $(wildcard *.tikz)
|
paper.pdf: paper.tex $(wildcard *.tikz) version.txt
|
||||||
latexmk -pdf
|
latexmk -pdf
|
||||||
|
|
||||||
clean:
|
clean:
|
||||||
|
|||||||
Reference in New Issue
Block a user