From 3cb0765fdd3dff79e31166a9360c065df333b152 Mon Sep 17 00:00:00 2001 From: Andrew Noyes Date: Wed, 13 Mar 2024 16:48:28 -0700 Subject: [PATCH] Rework SHOW_MEMORY closes #10 --- ConflictSet.cpp | 179 ++++++++++++++++++++++++++++++++++++++++-------- Internal.h | 26 +++++++ 2 files changed, 178 insertions(+), 27 deletions(-) diff --git a/ConflictSet.cpp b/ConflictSet.cpp index 1ce1b16..88e791b 100644 --- a/ConflictSet.cpp +++ b/ConflictSet.cpp @@ -53,6 +53,21 @@ limitations under the License. #define assume assert #endif +#if SHOW_MEMORY +#ifdef __APPLE__ +#include +#endif +void addNode(struct Node *); +void removeNode(struct Node *); +void addKey(struct Node *); +void removeKey(struct Node *); +#else +constexpr void addNode(struct Node *) {} +constexpr void removeNode(struct Node *) {} +constexpr void addKey(struct Node *) {} +constexpr void removeKey(struct Node *) {} +#endif + // ==================== BEGIN IMPLEMENTATION ==================== struct Entry { @@ -303,10 +318,6 @@ struct BoundedFreeListAllocator { static_assert(std::derived_from); T *allocate(int partialKeyCapacity) { -#if SHOW_MEMORY - ++liveAllocations; - maxLiveAllocations = std::max(maxLiveAllocations, liveAllocations); -#endif if (freeList != nullptr) { T *n = (T *)freeList; VALGRIND_MAKE_MEM_UNDEFINED(n, sizeof(T)); @@ -319,22 +330,22 @@ struct BoundedFreeListAllocator { return new (n) T; } else { // The intent is to filter out too-small nodes in the freelist - free(n); + removeNode(n); + safe_free(n); } } auto *result = new (safe_malloc(sizeof(T) + partialKeyCapacity)) T; result->partialKeyCapacity = partialKeyCapacity; + addNode(result); return result; } void release(T *p) { -#if SHOW_MEMORY - --liveAllocations; -#endif static_assert(std::is_trivially_destructible_v); if (freeListBytes >= kMemoryBound) { - return free(p); + removeNode(p); + return safe_free(p); } memcpy((void *)p, &freeList, sizeof(freeList)); freeList = p; @@ -347,22 +358,14 @@ struct BoundedFreeListAllocator { VALGRIND_MAKE_MEM_DEFINED(iter, sizeof(iter)); auto *tmp = iter; memcpy(&iter, iter, sizeof(void *)); - free(tmp); + removeNode(((T *)tmp)); + safe_free(tmp); } } -#if SHOW_MEMORY - int64_t highWaterMarkBytes() const { return maxLiveAllocations * sizeof(T); } -#endif - private: int64_t freeListBytes = 0; void *freeList = nullptr; -#if SHOW_MEMORY - // TODO Track partial key bytes - int64_t maxLiveAllocations = 0; - int64_t liveAllocations = 0; -#endif }; uint8_t *Node::partialKey() { @@ -821,7 +824,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, if constexpr (kUseFreeList) { allocators->node0.release(self0); } else { - free(self0); + removeNode(self0); + safe_free(self0); } self = newSelf; } break; @@ -841,7 +845,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, if constexpr (kUseFreeList) { allocators->node3.release(self3); } else { - free(self3); + removeNode(self3); + safe_free(self3); } self = newSelf; } break; @@ -861,7 +866,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, if constexpr (kUseFreeList) { allocators->node16.release(self16); } else { - free(self16); + removeNode(self16); + safe_free(self16); } self = newSelf; } break; @@ -885,7 +891,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, if constexpr (kUseFreeList) { allocators->node48.release(self48); } else { - free(self48); + removeNode(self48); + safe_free(self48); } self = newSelf; } break; @@ -903,7 +910,8 @@ void freeAndMakeCapacityAtLeast(Node *&self, int capacity, if constexpr (kUseFreeList) { allocators->node256.release(self256); } else { - free(self256); + removeNode(self256); + safe_free(self256); } self = newSelf; } break; @@ -1072,7 +1080,10 @@ Node *erase(Node *self, NodeAllocators *allocators, ConflictSet::Impl *impl, uint8_t parentsIndex = self->parentsIndex; auto *result = nextLogical(self); + + removeKey(self); self->entryPresent = false; + if (self->numChildren != 0) { const bool update = result == dontInvalidate; maybeDownsize(self, allocators, impl, result); @@ -1969,7 +1980,10 @@ template memcpy((char *)*self + kNodeCopyBegin, (char *)old + kNodeCopyBegin, kNodeCopySize); (*self)->partialKeyLen = partialKeyIndex; + + // Not necessary to call removeKey here, since this node is "synthetic" (*self)->entryPresent = false; + (*self)->numChildren = 0; memcpy((*self)->partialKey(), old->partialKey(), (*self)->partialKeyLen); @@ -2035,6 +2049,14 @@ void destroyTree(Node *root) { Arena arena; auto toFree = vector(arena); toFree.push_back(root); + +#if SHOW_MEMORY + for (auto *iter = root; iter != nullptr; iter = nextPhysical(iter)) { + removeNode(iter); + removeKey(iter); + } +#endif + while (toFree.size() > 0) { auto *n = toFree.back(); toFree.pop_back(); @@ -2045,7 +2067,7 @@ void destroyTree(Node *root) { assert(c != nullptr); toFree.push_back(c); } - free(n); + safe_free(n); } } @@ -2055,7 +2077,10 @@ void addPointWrite(Node *&root, int64_t oldestVersion, auto *n = insert(&root, key, writeVersion, allocators, impl); if (!n->entryPresent) { auto *p = nextLogical(n); + + addKey(n); n->entryPresent = true; + n->entry.pointVersion = writeVersion; maxVersion(n, impl) = writeVersion; n->entry.rangeVersion = @@ -2118,6 +2143,8 @@ void addWriteRange(Node *&root, int64_t oldestVersion, insert(useAsRoot, begin, writeVersion, allocators, impl); const bool insertedBegin = !beginNode->entryPresent; + + addKey(beginNode); beginNode->entryPresent = true; if (insertedBegin) { @@ -2136,6 +2163,8 @@ void addWriteRange(Node *&root, int64_t oldestVersion, auto *endNode = insert(useAsRoot, end, writeVersion, allocators, impl); const bool insertedEnd = !endNode->entryPresent; + + addKey(endNode); endNode->entryPresent = true; if (insertedEnd) { @@ -2302,6 +2331,8 @@ struct __attribute__((visibility("hidden"))) ConflictSet::Impl { rootMaxVersion = oldestVersion; root->entry.pointVersion = oldestVersion; root->entry.rangeVersion = oldestVersion; + + addKey(root); root->entryPresent = true; } ~Impl() { destroyTree(root); } @@ -2380,7 +2411,7 @@ ConflictSet::ConflictSet(int64_t oldestVersion) ConflictSet::~ConflictSet() { if (impl) { impl->~Impl(); - free(impl); + safe_free(impl); } } @@ -2420,7 +2451,7 @@ ConflictSet_create(int64_t oldestVersion) { __attribute__((__visibility__("default"))) void ConflictSet_destroy(void *cs) { using Impl = ConflictSet::Impl; ((Impl *)cs)->~Impl(); - free(cs); + safe_free(cs); } } @@ -2652,6 +2683,100 @@ namespace std { void __throw_length_error(const char *) { __builtin_unreachable(); } } // namespace std +#if SHOW_MEMORY + +int64_t nodeBytes = 0; +int64_t peakNodeBytes = 0; +int64_t partialCapacityBytes = 0; +int64_t peakPartialCapacityBytes = 0; +int64_t totalKeys = 0; +int64_t peakKeys = 0; +int64_t keyBytes = 0; +int64_t peakKeyBytes = 0; + +int64_t getNodeSize(struct Node *n) { + switch (n->type) { + case Type_Node0: + return sizeof(Node0); + case Type_Node3: + return sizeof(Node3); + case Type_Node16: + return sizeof(Node16); + case Type_Node48: + return sizeof(Node48); + case Type_Node256: + return sizeof(Node256); + } + __builtin_unreachable(); // GCOVR_EXCL_LINE +} + +int64_t getSearchPathLength(Node *n) { + assert(n != nullptr); + int64_t result = 0; + for (;;) { + result += n->partialKeyLen; + if (n->parent == nullptr) { + break; + } + ++result; + n = n->parent; + } + return result; +} + +void addNode(Node *n) { + nodeBytes += getNodeSize(n); + partialCapacityBytes += n->partialKeyCapacity; + if (nodeBytes > peakNodeBytes) { + peakNodeBytes = nodeBytes; + } + if (partialCapacityBytes > peakPartialCapacityBytes) { + peakPartialCapacityBytes = partialCapacityBytes; + } +} + +void removeNode(Node *n) { + nodeBytes -= getNodeSize(n); + partialCapacityBytes -= n->partialKeyCapacity; +} + +void addKey(Node *n) { + if (!n->entryPresent) { + ++totalKeys; + keyBytes += getSearchPathLength(n); + if (totalKeys > peakKeys) { + peakKeys = totalKeys; + } + if (keyBytes > peakKeyBytes) { + peakKeyBytes = keyBytes; + } + } +} + +void removeKey(Node *n) { + if (n->entryPresent) { + --totalKeys; + keyBytes -= getSearchPathLength(n); + } +} + +struct __attribute__((visibility("default"))) PeakPrinter { + ~PeakPrinter() { + printf("Peak malloc bytes: %g\n", double(peakMallocBytes)); + printf("Node bytes: %g\n", double(nodeBytes)); + printf("Peak node bytes: %g\n", double(peakNodeBytes)); + printf("Expected worst case node bytes: %g\n", + double(peakKeys * kBytesPerKey)); + printf("Key bytes: %g\n", double(keyBytes)); + printf("Peak key bytes: %g (not sharing common prefixes)\n", + double(peakKeyBytes)); + printf("Partial capacity bytes: %g\n", double(partialCapacityBytes)); + printf("Peak partial key capacity bytes: %g\n", + double(peakPartialCapacityBytes)); + } +} peakPrinter; +#endif + #ifdef ENABLE_MAIN void printTree() { diff --git a/Internal.h b/Internal.h index 0374352..eb59dc1 100644 --- a/Internal.h +++ b/Internal.h @@ -40,13 +40,39 @@ operator<=>(const std::span &lhs, // GCOVR_EXCL_START +#if SHOW_MEMORY +#ifdef __APPLE__ +#include +#endif +inline int64_t mallocBytes = 0; +inline int64_t peakMallocBytes = 0; +#endif + +// malloc that aborts on OOM and thus always returns a non-null pointer __attribute__((always_inline)) inline void *safe_malloc(size_t s) { +#if SHOW_MEMORY + mallocBytes += s; + if (mallocBytes > peakMallocBytes) { + peakMallocBytes = mallocBytes; + } +#endif if (void *p = malloc(s)) { return p; } abort(); } +// There's nothing safer about this than free. Only called safe_free for +// symmetry with safe_malloc. +__attribute__((always_inline)) inline void safe_free(void *p) { +#if SHOW_MEMORY +#ifdef __APPLE__ + mallocBytes -= malloc_size(p); +#endif +#endif + free(p); +} + // ==================== BEGIN ARENA IMPL ==================== /// Group allocations with similar lifetimes to amortize the cost of malloc/free