Remove memmove call for inserting into Node16
This commit is contained in:
@@ -1148,71 +1148,21 @@ Node *&getOrCreateChild(Node *&self, uint8_t index, WriteContext *tls) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
insert16:
|
insert16:
|
||||||
auto *self16 = static_cast<Node16 *>(self);
|
|
||||||
assert(self->getType() == Type_Node16);
|
assert(self->getType() == Type_Node16);
|
||||||
|
auto *self16 = static_cast<Node16 *>(self);
|
||||||
++self->numChildren;
|
int i = self->numChildren - 1;
|
||||||
#ifdef HAS_AVX
|
for (; i >= 0; --i) {
|
||||||
__m128i key_vec = _mm_set1_epi8(index);
|
if (int(self16->index[i]) < int(index)) {
|
||||||
__m128i indices;
|
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
|
||||||
__m128i results = _mm_cmpeq_epi8(key_vec, _mm_min_epu8(key_vec, indices));
|
|
||||||
int mask = (1 << (self->numChildren - 1)) - 1;
|
|
||||||
uint32_t bitfield = _mm_movemask_epi8(results) & mask;
|
|
||||||
bitfield |= uint32_t(1) << (self->numChildren - 1);
|
|
||||||
int i = std::countr_zero(bitfield);
|
|
||||||
if (i < self->numChildren - 1) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->children[0])); // NOLINT
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
}
|
|
||||||
#elif defined(HAS_ARM_NEON)
|
|
||||||
uint8x16_t indices;
|
|
||||||
memcpy(&indices, self16->index, sizeof(self16->index));
|
|
||||||
// 0xff for each leq
|
|
||||||
auto results = vcleq_u8(vdupq_n_u8(index), indices);
|
|
||||||
uint64_t mask = (uint64_t(1) << ((self->numChildren - 1) * 4)) - 1;
|
|
||||||
// 0xf for each 0xff (within mask)
|
|
||||||
uint64_t bitfield =
|
|
||||||
vget_lane_u64(
|
|
||||||
vreinterpret_u64_u8(vshrn_n_u16(vreinterpretq_u16_u8(results), 4)),
|
|
||||||
0) &
|
|
||||||
mask;
|
|
||||||
bitfield |= uint64_t(0xf) << ((self->numChildren - 1) * 4);
|
|
||||||
int i = std::countr_zero(bitfield) / 4;
|
|
||||||
if (i < self->numChildren - 1) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->children[0])); // NOLINT
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
}
|
|
||||||
#else
|
|
||||||
int i = 0;
|
|
||||||
for (; i < int(self->numChildren) - 1; ++i) {
|
|
||||||
if (int(self16->index[i]) > int(index)) {
|
|
||||||
memmove(self16->index + i + 1, self16->index + i,
|
|
||||||
self->numChildren - (i + 1));
|
|
||||||
memmove(self16->children + i + 1, self16->children + i,
|
|
||||||
(self->numChildren - (i + 1)) * sizeof(self16->children[0]));
|
|
||||||
memmove(self16->childMaxVersion + i + 1, self16->childMaxVersion + i,
|
|
||||||
(self->numChildren - (i + 1)) *
|
|
||||||
sizeof(self16->childMaxVersion[0]));
|
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
self16->index[i + 1] = self16->index[i];
|
||||||
|
self16->children[i + 1] = self16->children[i];
|
||||||
|
self16->childMaxVersion[i + 1] = self16->childMaxVersion[i];
|
||||||
}
|
}
|
||||||
#endif
|
self16->index[i + 1] = index;
|
||||||
self16->index[i] = index;
|
auto &result = self16->children[i + 1];
|
||||||
auto &result = self16->children[i];
|
|
||||||
result = nullptr;
|
result = nullptr;
|
||||||
|
++self->numChildren;
|
||||||
return result;
|
return result;
|
||||||
}
|
}
|
||||||
case Type_Node48: {
|
case Type_Node48: {
|
||||||
|
Reference in New Issue
Block a user