Draft paper introduction
This commit is contained in:
5
paper/.gitignore
vendored
5
paper/.gitignore
vendored
@@ -3,7 +3,10 @@
|
||||
*.bcf
|
||||
*.blg
|
||||
*.dvi
|
||||
*.fdb_latexmk
|
||||
*.fls
|
||||
*.log
|
||||
*.out
|
||||
*.pdf
|
||||
*.run.xml
|
||||
*.run.xml
|
||||
*.synctex.gz
|
||||
|
@@ -51,8 +51,8 @@ url = {https://doi.org/10.1145/78973.78977},
|
||||
doi = {10.1145/78973.78977},
|
||||
abstract = {Skip lists are data structures that use probabilistic balancing rather than strictly enforced balancing. As a result, the algorithms for insertion and deletion in skip lists are much simpler and significantly faster than equivalent algorithms for balanced trees.},
|
||||
journal = {Commun. ACM},
|
||||
month = {jun},
|
||||
pages = {668–676},
|
||||
month = {6},
|
||||
pages = {668-676},
|
||||
numpages = {9},
|
||||
keywords = {data structures, searching, trees}
|
||||
}
|
||||
@@ -75,4 +75,81 @@ keywords = {data structures, searching, trees}
|
||||
timestamp = {Fri, 24 Mar 2023 00:00:01 +0100},
|
||||
biburl = {https://dblp.org/rec/conf/icde/LeisK013.bib},
|
||||
bibsource = {dblp computer science bibliography, https://dblp.org}
|
||||
}
|
||||
}
|
||||
|
||||
@book{10.5555/17299,
|
||||
author = {Bernstein, Philip A and Hadzilacos, Vassos and Goodman, Nathan},
|
||||
title = {Concurrency control and recovery in database systems},
|
||||
year = {1986},
|
||||
isbn = {0201107155},
|
||||
publisher = {Addison-Wesley Longman Publishing Co., Inc.},
|
||||
address = {USA}
|
||||
}
|
||||
|
||||
@book{cormen2022introduction,
|
||||
title={Introduction to algorithms},
|
||||
author={Cormen, Thomas H and Leiserson, Charles E and Rivest, Ronald L and Stein, Clifford},
|
||||
year={2022},
|
||||
publisher={MIT press},
|
||||
chapter={17 Augmenting Data Structures}
|
||||
}
|
||||
|
||||
@article{bentley1979decomposable,
|
||||
title={Decomposable searching problems},
|
||||
author={Bentley, Jon Louis and others},
|
||||
journal={Inf. Process. Lett.},
|
||||
volume={8},
|
||||
number={5},
|
||||
pages={244--251},
|
||||
year={1979}
|
||||
}
|
||||
|
||||
@inproceedings{adelson1962algorithm,
|
||||
title={An algorithm for organization of information},
|
||||
author={Adelson-Velskii, Georgii Maksimovich and Landis, Evgenii Mikhailovich},
|
||||
booktitle={Doklady Akademii Nauk},
|
||||
volume={146},
|
||||
number={2},
|
||||
pages={263--266},
|
||||
year={1962},
|
||||
organization={Russian Academy of Sciences}
|
||||
}
|
||||
|
||||
@inproceedings{guibas1978dichromatic,
|
||||
title={A dichromatic framework for balanced trees},
|
||||
author={Guibas, Leo J and Sedgewick, Robert},
|
||||
booktitle={19th Annual Symposium on Foundations of Computer Science (sfcs 1978)},
|
||||
pages={8--21},
|
||||
year={1978},
|
||||
organization={IEEE}
|
||||
}
|
||||
|
||||
@article{seidel1996randomized,
|
||||
title={Randomized search trees},
|
||||
author={Seidel, Raimund and Aragon, Cecilia R},
|
||||
journal={Algorithmica},
|
||||
volume={16},
|
||||
number={4-5},
|
||||
pages={464--497},
|
||||
year={1996},
|
||||
publisher={Springer}
|
||||
}
|
||||
|
||||
@article{comer1979ubiquitous,
|
||||
title={Ubiquitous B-tree},
|
||||
author={Comer, Douglas},
|
||||
journal={ACM Computing Surveys (CSUR)},
|
||||
volume={11},
|
||||
number={2},
|
||||
pages={121--137},
|
||||
year={1979},
|
||||
publisher={ACM New York, NY, USA}
|
||||
}
|
||||
|
||||
@inproceedings{binna2018hot,
|
||||
title={HOT: A height optimized trie index for main-memory database systems},
|
||||
author={Binna, Robert and Zangerle, Eva and Pichl, Martin and Specht, G{\"u}nther and Leis, Viktor},
|
||||
booktitle={Proceedings of the 2018 International Conference on Management of Data},
|
||||
pages={521--534},
|
||||
year={2018}
|
||||
}
|
||||
|
@@ -1,9 +1,10 @@
|
||||
\documentclass[twocolumn]{article}
|
||||
|
||||
\usepackage{hyperref}
|
||||
\usepackage[utf8]{inputenc}
|
||||
|
||||
\title{ARTful Conflict Checking for FoundationDB}
|
||||
\author{Andrew Noyes \\ \href{mailto:andrew@weaselab.dev}{andrew@weaselab.dev}}
|
||||
\author{Andrew Noyes \thanks{\href{mailto:andrew@weaselab.dev}{andrew@weaselab.dev}}}
|
||||
\date{}
|
||||
|
||||
\usepackage{biblatex}
|
||||
@@ -16,9 +17,40 @@
|
||||
\section{Abstract}
|
||||
|
||||
FoundationDB \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21} provides serializability using a specialized data structure called \textit{lastCommit} \footnote{See Algorithm 1 referenced in \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21}}.
|
||||
This data structure maps key ranges (sets of keys denoted by either a singleton key or a half-open interval) to a ``commit version'' represented as a 64-bit integer.
|
||||
FoundationDB implements \textit{lastCommit} as a version-augmented probabilistic SkipList \cite{10.1145/78973.78977}.
|
||||
In this paper, we propose an alternative implementation of \textit{lastCommit} as a version-augmented Adaptive Radix Tree \cite{DBLP:conf/icde/LeisK013}, and evaluate its performance.
|
||||
This data structure maps key ranges (sets of bitwise-lexicographically-ordered keys denoted by either a singleton key or a half-open interval) to a version represented as a 64-bit integer.
|
||||
FoundationDB implements \textit{lastCommit} as a version-augmented probabilistic skip list \cite{10.1145/78973.78977}.
|
||||
In this paper, we propose an alternative implementation of \textit{lastCommit} as a version-augmented Adaptive Radix Tree (ART) \cite{DBLP:conf/icde/LeisK013}, and evaluate its performance.
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
Let's begin by considering design options for \textit{lastCommit}.
|
||||
In order to manage half-open intervals we need an ordered data structure, so hash tables are out of consideration.
|
||||
For any ordered data structure we can implement \textit{lastCommit} using a representation where a logical key is mapped to the value of the last physical key less than or equal to the logical key.
|
||||
This is a standard technique used throughout FoundationDB.
|
||||
|
||||
The problem with applying this to an off-the-shelf ordered data structure is that checking a read range is linear in the number of intersecting physical keys.
|
||||
Under a high-enough write load, there can be arbitrarily many point writes unexpired in the MVCC \cite{10.5555/17299} window.
|
||||
Scanning through every point write intersecting a large range read would make conflict checking unacceptably slow.
|
||||
|
||||
This suggests we consider augmenting \cite{cormen2022introduction} an ordered data structure to make checking the max version of a range sublinear.
|
||||
Since finding the maximum of a set of elements is a decomposable search problem \cite{bentley1979decomposable}, we could apply the general technique using \texttt{std::max} as our binary operation, and \texttt{MIN\_INT} as our identity.
|
||||
Algorithmically, this describes the implementation of FoundationDB's skip list.
|
||||
We can also consider any other ordered data structure to augment, such as any variant of a balanced binary search tree \cite{adelson1962algorithm,guibas1978dichromatic,seidel1996randomized}, a b-tree \cite{comer1979ubiquitous}, or a radix tree \cite{DBLP:conf/icde/LeisK013,binna2018hot}.
|
||||
|
||||
Let's compare the relevant properties of our candidate data structures for insertion/update and read operations.
|
||||
After insertion, the max version along the search path must reflect the update.
|
||||
For comparison-based trees, updating max version along the search path cannot be done during top-down search, because \emph{insertion will change the search path}, and we do not know whether or not this is an insert or an update until we complete the top-down search.
|
||||
We have no choice but to do a second, bottom-up pass to propagate max version changes.
|
||||
Furthermore, the usual way of propagating the change will always propagate all the way to the root, since inserts always use the highest-yet version.
|
||||
For a radix tree, max version can be updated on the top-down pass, and there's minimal overhead compared to the radix tree un-augmented.
|
||||
|
||||
For ``last less than or equal to'' queries, skip lists have the convenient property that no backtracking is necessary, since the bottommost level is a sorted linked list.
|
||||
Binary search trees and radix trees both require backtracking up the search path.
|
||||
It's possible to trade off the backtracking for the increased overhead of maintaining the elements in an auxiliary sorted linked list during insertion.
|
||||
|
||||
Our options also have various tradeoffs inherited from their un-augmented versions such as different worst-case and expected bounds on the length of search paths and the number of rotations performed upon insert.
|
||||
ART has been shown \cite{DBLP:conf/icde/LeisK013} to offer superior performance to comparison-based data structures on modern hardware, which is on its own a compelling reason to consider it.
|
||||
The Height Optimized Trie (HOT) \cite{binna2018hot} outperforms ART, but has a few practical disadvantages \footnote{HOT has more implementation complexity than the already-daunting ART. Additionally it requires AVX2 instructions and involves rebalancing operations during insertion. Even so, it's likely that a HOT-based \emph{lastCommit} version would be superior.} and will not be considered in this paper.
|
||||
|
||||
\printbibliography
|
||||
|
||||
|
Reference in New Issue
Block a user