From e956c526b2840b4c9b174456b54beb1918dbfa75 Mon Sep 17 00:00:00 2001
From: Andrew Noyes <andrew@weaselab.dev>
Date: Fri, 16 Feb 2024 16:27:55 -0800
Subject: [PATCH] Draft paper introduction

---
 paper/.gitignore       |  5 ++-
 paper/bibliography.bib | 83 ++++++++++++++++++++++++++++++++++++++++--
 paper/paper.tex        | 40 ++++++++++++++++++--
 3 files changed, 120 insertions(+), 8 deletions(-)

diff --git a/paper/.gitignore b/paper/.gitignore
index 24cfcb3..945e00e 100644
--- a/paper/.gitignore
+++ b/paper/.gitignore
@@ -3,7 +3,10 @@
 *.bcf
 *.blg
 *.dvi
+*.fdb_latexmk
+*.fls
 *.log
 *.out
 *.pdf
-*.run.xml
\ No newline at end of file
+*.run.xml
+*.synctex.gz
diff --git a/paper/bibliography.bib b/paper/bibliography.bib
index 9c893e1..3bb3857 100644
--- a/paper/bibliography.bib
+++ b/paper/bibliography.bib
@@ -51,8 +51,8 @@ url = {https://doi.org/10.1145/78973.78977},
 doi = {10.1145/78973.78977},
 abstract = {Skip lists are data structures that use probabilistic balancing rather than strictly enforced balancing. As a result, the algorithms for insertion and deletion in skip lists are much simpler and significantly faster than equivalent algorithms for balanced trees.},
 journal = {Commun. ACM},
-month = {jun},
-pages = {668–676},
+month = {6},
+pages = {668-676},
 numpages = {9},
 keywords = {data structures, searching, trees}
 }
@@ -75,4 +75,81 @@ keywords = {data structures, searching, trees}
   timestamp    = {Fri, 24 Mar 2023 00:00:01 +0100},
   biburl       = {https://dblp.org/rec/conf/icde/LeisK013.bib},
   bibsource    = {dblp computer science bibliography, https://dblp.org}
-}
\ No newline at end of file
+}
+
+@book{10.5555/17299,
+author = {Bernstein, Philip A and Hadzilacos, Vassos and Goodman, Nathan},
+title = {Concurrency control and recovery in database systems},
+year = {1986},
+isbn = {0201107155},
+publisher = {Addison-Wesley Longman Publishing Co., Inc.},
+address = {USA}
+}
+
+@book{cormen2022introduction,
+  title={Introduction to algorithms},
+  author={Cormen, Thomas H and Leiserson, Charles E and Rivest, Ronald L and Stein, Clifford},
+  year={2022},
+  publisher={MIT press},
+  chapter={17 Augmenting Data Structures}
+}
+
+@article{bentley1979decomposable,
+  title={Decomposable searching problems},
+  author={Bentley, Jon Louis and others},
+  journal={Inf. Process. Lett.},
+  volume={8},
+  number={5},
+  pages={244--251},
+  year={1979}
+}
+
+@inproceedings{adelson1962algorithm,
+  title={An algorithm for organization of information},
+  author={Adelson-Velskii, Georgii Maksimovich and Landis, Evgenii Mikhailovich},
+  booktitle={Doklady Akademii Nauk},
+  volume={146},
+  number={2},
+  pages={263--266},
+  year={1962},
+  organization={Russian Academy of Sciences}
+}
+
+@inproceedings{guibas1978dichromatic,
+  title={A dichromatic framework for balanced trees},
+  author={Guibas, Leo J and Sedgewick, Robert},
+  booktitle={19th Annual Symposium on Foundations of Computer Science (sfcs 1978)},
+  pages={8--21},
+  year={1978},
+  organization={IEEE}
+}
+
+@article{seidel1996randomized,
+  title={Randomized search trees},
+  author={Seidel, Raimund and Aragon, Cecilia R},
+  journal={Algorithmica},
+  volume={16},
+  number={4-5},
+  pages={464--497},
+  year={1996},
+  publisher={Springer}
+}
+
+@article{comer1979ubiquitous,
+  title={Ubiquitous B-tree},
+  author={Comer, Douglas},
+  journal={ACM Computing Surveys (CSUR)},
+  volume={11},
+  number={2},
+  pages={121--137},
+  year={1979},
+  publisher={ACM New York, NY, USA}
+}
+
+@inproceedings{binna2018hot,
+  title={HOT: A height optimized trie index for main-memory database systems},
+  author={Binna, Robert and Zangerle, Eva and Pichl, Martin and Specht, G{\"u}nther and Leis, Viktor},
+  booktitle={Proceedings of the 2018 International Conference on Management of Data},
+  pages={521--534},
+  year={2018}
+}
diff --git a/paper/paper.tex b/paper/paper.tex
index b2c2bf5..f0da3e4 100644
--- a/paper/paper.tex
+++ b/paper/paper.tex
@@ -1,9 +1,10 @@
 \documentclass[twocolumn]{article}
 
 \usepackage{hyperref}
+\usepackage[utf8]{inputenc}
 
 \title{ARTful Conflict Checking for FoundationDB}
-\author{Andrew Noyes \\ \href{mailto:andrew@weaselab.dev}{andrew@weaselab.dev}}
+\author{Andrew Noyes \thanks{\href{mailto:andrew@weaselab.dev}{andrew@weaselab.dev}}}
 \date{}
 
 \usepackage{biblatex}
@@ -16,9 +17,40 @@
 \section{Abstract}
 
 FoundationDB \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21} provides serializability using a specialized data structure called \textit{lastCommit} \footnote{See Algorithm 1 referenced in \cite{DBLP:conf/sigmod/ZhouXSNMTABSLRD21}}.
-This data structure maps key ranges (sets of keys denoted by either a singleton key or a half-open interval) to a ``commit version'' represented as a 64-bit integer.
-FoundationDB implements \textit{lastCommit} as a version-augmented probabilistic SkipList \cite{10.1145/78973.78977}.
-In this paper, we propose an alternative implementation of \textit{lastCommit} as a version-augmented Adaptive Radix Tree \cite{DBLP:conf/icde/LeisK013}, and evaluate its performance.
+This data structure maps key ranges (sets of bitwise-lexicographically-ordered keys denoted by either a singleton key or a half-open interval) to a version represented as a 64-bit integer.
+FoundationDB implements \textit{lastCommit} as a version-augmented probabilistic skip list \cite{10.1145/78973.78977}.
+In this paper, we propose an alternative implementation of \textit{lastCommit} as a version-augmented Adaptive Radix Tree (ART) \cite{DBLP:conf/icde/LeisK013}, and evaluate its performance.
+
+\section{Introduction}
+
+Let's begin by considering design options for \textit{lastCommit}.
+In order to manage half-open intervals we need an ordered data structure, so hash tables are out of consideration.
+For any ordered data structure we can implement \textit{lastCommit} using a representation where a logical key is mapped to the value of the last physical key less than or equal to the logical key.
+This is a standard technique used throughout FoundationDB.
+
+The problem with applying this to an off-the-shelf ordered data structure is that checking a read range is linear in the number of intersecting physical keys.
+Under a high-enough write load, there can be arbitrarily many point writes unexpired in the MVCC \cite{10.5555/17299} window.
+Scanning through every point write intersecting a large range read would make conflict checking unacceptably slow.
+
+This suggests we consider augmenting \cite{cormen2022introduction} an ordered data structure to make checking the max version of a range sublinear.
+Since finding the maximum of a set of elements is a decomposable search problem \cite{bentley1979decomposable}, we could apply the general technique using \texttt{std::max} as our binary operation, and \texttt{MIN\_INT} as our identity.
+Algorithmically, this describes the implementation of FoundationDB's skip list.
+We can also consider any other ordered data structure to augment, such as any variant of a balanced binary search tree \cite{adelson1962algorithm,guibas1978dichromatic,seidel1996randomized}, a b-tree \cite{comer1979ubiquitous}, or a radix tree \cite{DBLP:conf/icde/LeisK013,binna2018hot}.
+
+Let's compare the relevant properties of our candidate data structures for insertion/update and read operations.
+After insertion, the max version along the search path must reflect the update.
+For comparison-based trees, updating max version along the search path cannot be done during top-down search, because \emph{insertion will change the search path}, and we do not know whether or not this is an insert or an update until we complete the top-down search.
+We have no choice but to do a second, bottom-up pass to propagate max version changes.
+Furthermore, the usual way of propagating the change will always propagate all the way to the root, since inserts always use the highest-yet version.
+For a radix tree, max version can be updated on the top-down pass, and there's minimal overhead compared to the radix tree un-augmented.
+
+For ``last less than or equal to'' queries, skip lists have the convenient property that no backtracking is necessary, since the bottommost level is a sorted linked list.
+Binary search trees and radix trees both require backtracking up the search path.
+It's possible to trade off the backtracking for the increased overhead of maintaining the elements in an auxiliary sorted linked list during insertion.
+
+Our options also have various tradeoffs inherited from their un-augmented versions such as different worst-case and expected bounds on the length of search paths and the number of rotations performed upon insert.
+ART has been shown \cite{DBLP:conf/icde/LeisK013} to offer superior performance to comparison-based data structures on modern hardware, which is on its own a compelling reason to consider it.
+The Height Optimized Trie (HOT) \cite{binna2018hot} outperforms ART, but has a few practical disadvantages \footnote{HOT has more implementation complexity than the already-daunting ART. Additionally it requires AVX2 instructions and involves rebalancing operations during insertion. Even so, it's likely that a HOT-based \emph{lastCommit} version would be superior.} and will not be considered in this paper.
 
 \printbibliography