Files
versioned-map/include/VersionedMap.h
Andrew Noyes 3819b83d78 Logically revert 0071600
Return mutation intersecting query key if it exists
2024-05-16 16:09:39 -07:00

202 lines
6.9 KiB
C++

/*
Copyright 2024 Andrew Noyes
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/
#pragma once
#include <stddef.h>
#include <stdint.h>
#ifdef __cplusplus
namespace weaselab {
/** A data structure to facilitate implementing multi-version concurrency
* control reads on bitwise-lexicographically-ordered keys. Indexes mutations by
* key and version, and provides an iterator api which can be used to merge
* versioned results with an underlying unversioned data structure. @warning you
* must not apply mutations to your data structure through a version that
* overtakes a concurrent versioned reader.
*
* Thread safety:
* - It's safe to operate on two different VersionedMaps in two different
* threads concurrently.
* - It's safe to have multiple threads operating on the same VersionedMap
* concurrently if all threads only call const methods.
* - Methods that make stronger guarantees about the safety of calling
* concurrently with non-const methods are documented as such.
*/
struct __attribute__((__visibility__("default"))) VersionedMap {
/** Indicates how `Mutation::param1` and `Mutation::param2` are to be
* interpreted. */
enum MutationType {
/** `param1` is the key, `param2` is the value we set the key to. */
Set,
/** `param1` is the beginning of the range to clear, and `param2` is the end
of the range - i.e. the range to clear is [param1, param2). If
`param2Length`
== 0 then this clears the single key `param1`. */
Clear,
};
/** Bytes ordered bitwise-lexicographically. */
struct Key {
const uint8_t *p;
int len;
};
/** Mutations are bitwise-lexicographically ordered by param1. */
struct Mutation {
const uint8_t *param1;
const uint8_t *param2;
int param1Len;
int param2Len;
MutationType type;
};
/** Mutations must be sorted, non-overlapping, and non-adjacent. `version`
* must be strictly increasing. Postcondition: `getVersion()` == `version` */
void addMutations(const Mutation *mutations, int numMutations,
int64_t version);
/** Reclaim mutations older than `version`. Must be <= `getVersion()`.
* Postcondition: `getOldestVersion()` == `version`. @warning performs work
* proportional to the mutation rate. Call frequently to favor latency and
* memory usage, and infrequently to favor throughput. @warning Invalidates
* any iterator from a version less than `version`. There shouldn't be any
* anyway because you should have already applied all mutations through
* `version` to your unversioned data structure. */
void setOldestVersion(int64_t version);
/** The version of the most recent call to `addMutations`. */
int64_t getVersion() const;
/** The version of the most recent call to `setOldestVersion`. */
int64_t getOldestVersion() const;
/** Iterates through a canonicalized[1] view of all the mutations
* from `oldestVersion` to the iterator's version. There may be mutations from
* versions < `oldestVersion`, but they won't affect the result, and can be
* ignored if desired. It's thread-safe to operate on an iterator concurrently
* with any method of `VersionedMap`, as long as it's not invalidated by
* `setOldestVersion`.
* @warning must not outlive its `VersionedMap`.
*
* [1]: Clears at different versions may be adjacent. This is necessary for
* precisely tracking at what version the mutations take effect.
*/
struct Iterator {
Iterator() = default;
~Iterator();
Iterator(const Iterator &);
Iterator &operator=(const Iterator &);
Iterator(Iterator &&) noexcept;
Iterator &operator=(Iterator &&) noexcept;
struct VersionedMutation {
const uint8_t *param1;
const uint8_t *param2;
int param1Len;
int param2Len;
MutationType type;
int64_t version;
};
/** iter must not be `end()`. Memory pointed-to by return value is valid as
* long as the iterator is valid. */
VersionedMutation operator*() const;
/** iter must not be `end()` */
Iterator &operator++();
/** iter must not be `end()` */
Iterator operator++(int);
/** iter must not be `begin()` */
Iterator &operator--();
/** iter must not be `begin()` */
Iterator operator--(int);
using difference_type = ptrdiff_t;
using value_type = VersionedMutation;
bool operator==(const Iterator &) const;
bool operator!=(const Iterator &) const;
/** 0 if this iterator's param1 is equal to the queried key, < 0 if this
* iterator's param1 is less than the queried key, and > 0 if this
* iterator's param1 is greater than the queried key. Iterating forward is
* treated as a query for the first mutation greater than this iterator's
* mutation, so will always result in a `cmp` > 0, and the converse for
* iterating backward (`cmp` < 0). */
int cmp() const;
/** @private */
struct Impl;
private:
friend struct VersionedMap;
Impl *impl = nullptr;
};
/** Perform `count` "first greater than or equal to" queries. If there's a
* mutation intersecting `key[i]` at `version[i]` then `iterator[i]` will
* point to that mutation. Otherwise it points to the first mutation greater
* or `end()` if none exists. `version[i]` must be >= `getOldestVersion()`
* and
* <= `getVersion()`.
*
* Thread-safe as long as a version is not concurrently invalidated by
* `setOldestVersion`. There's a performance benefit if you pass iterator[i]
* previously obtained at version[i]. */
void firstGeq(const Key *key, const int64_t *version, Iterator *iterator,
int count) const;
/** Returns an iterator to the first mutation visible at `version`, or `end()`
* if none exists. Thread-safe as long as `version` is not concurrently
* invalidated by `setOldestVersion`. */
Iterator begin(int64_t version) const;
/** The "past-the-end" iterator. */
Iterator end(int64_t version) const;
/** Returns the memory usage in bytes. Does not include memory used by
* iterators. */
int64_t getBytes() const;
/** Map starts with no mutations, with `getOldestVersion()` == `getVersion()`
* == `version`. */
explicit VersionedMap(int64_t version);
~VersionedMap();
#if __cplusplus > 199711L
VersionedMap(VersionedMap &&other) noexcept;
VersionedMap &operator=(VersionedMap &&other) noexcept;
VersionedMap(const VersionedMap &) = delete;
VersionedMap &operator=(const VersionedMap &) = delete;
#endif
/** @private */
struct Impl;
private:
Impl *impl;
};
} /* namespace weaselab */
#else
#endif