feat: Added start of paper to comps
This commit is contained in:
		
							
								
								
									
										
											BIN
										
									
								
								paper/block-search.drawio.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								paper/block-search.drawio.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 22 KiB  | 
							
								
								
									
										542
									
								
								paper/document.tex
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										542
									
								
								paper/document.tex
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,542 @@
 | 
				
			|||||||
 | 
					\documentclass[10pt,twocolumn]{article}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\usepackage{oxycomps}
 | 
				
			||||||
 | 
					\bibliography{references}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\pdfinfo{
 | 
				
			||||||
 | 
					    /Title (SpatialDB: A Database for Storing Dense Three-Dimensional Voxel Structures)
 | 
				
			||||||
 | 
					    /Author (Nicholas Novak)
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\title{SpatialDB: A Database for Storing Dense Three-Dimensional Voxel Structures}
 | 
				
			||||||
 | 
					\author{Nicholas Novak}
 | 
				
			||||||
 | 
					\affiliation{Occidental College}
 | 
				
			||||||
 | 
					\email{nnovak@oxy.edu}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{document}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\maketitle
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Introduction and Problem Context}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% What my project is
 | 
				
			||||||
 | 
					In my senior comprehensive project, I have designed and implemented a database
 | 
				
			||||||
 | 
					application that is designed specifically to store complex shapes in ``voxels'',
 | 
				
			||||||
 | 
					or three-dimensional pixels.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Applications of voxels
 | 
				
			||||||
 | 
					A voxel\cite{enwiki:1186283262} represents a single point or cube in a
 | 
				
			||||||
 | 
					three-dimensional grid, at a variable size. This feature allows them to
 | 
				
			||||||
 | 
					approximately model many three-dimensional structures, in order to reduce the
 | 
				
			||||||
 | 
					computational complexity in analyzing the shape, which has led to many
 | 
				
			||||||
 | 
					data-related use cases outside of computer science. For example, to model the
 | 
				
			||||||
 | 
					inner workings of the brain, Neuroscientists track oxygen concentration through
 | 
				
			||||||
 | 
					neural tissue on a voxel grid as part of fMRI studies\cite{norman2006beyond},
 | 
				
			||||||
 | 
					and Movie studios such as DreamWorks use voxel data structures to model light
 | 
				
			||||||
 | 
					reflections for visual effects\cite{museth2013vdb}. The output of MRI scans in
 | 
				
			||||||
 | 
					hospitals are very high-resolution voxel grids. Most recently, machine learning
 | 
				
			||||||
 | 
					models are being trained on the LIDAR data from self-driving
 | 
				
			||||||
 | 
					cars\cite{li2020deep} in order to better process their environments. However,
 | 
				
			||||||
 | 
					voxels are not often thought of as a way to store three-dimensional shapes, and
 | 
				
			||||||
 | 
					existing research focuses mainly on efficiently representing and processing
 | 
				
			||||||
 | 
					shapes. My approach models this problem of voxel storage and representation, and
 | 
				
			||||||
 | 
					turns it into a problem of database design.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Using Minecraft as a Model for a Database}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% The problems with Minecraft
 | 
				
			||||||
 | 
					Minecraft\footnote{https://www.minecraft.net/en-us}, released 2009, is a sandbox
 | 
				
			||||||
 | 
					game that is played in a world entirely composed of cubic voxels, where the
 | 
				
			||||||
 | 
					player has complete freedom to manipulate the world by building, destroying, or
 | 
				
			||||||
 | 
					exploring any part of it. I am focusing this database on the requirements of
 | 
				
			||||||
 | 
					Minecraft because the game involves some additional challenges that traditional
 | 
				
			||||||
 | 
					databases do not consider. Primarily, the world of Minecraft is infinite in the
 | 
				
			||||||
 | 
					horizontal $x$ and $z$ axes, but fixed in the $y$ axis, which limits the amount
 | 
				
			||||||
 | 
					of information that can be stored by the database at once. The world also
 | 
				
			||||||
 | 
					contains a denser voxel grid than in many other applications, meaning that far
 | 
				
			||||||
 | 
					more of the blocks in the world are filled than empty.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					A game is also a real-time application, which means that any performance issues
 | 
				
			||||||
 | 
					will be immediately be present to the user. Most databases can be evaluated on
 | 
				
			||||||
 | 
					only their speed, but as the Minecraft server processes new information 20 times
 | 
				
			||||||
 | 
					per second, the game has a time budget of 50ms to handle all game logic,
 | 
				
			||||||
 | 
					including the storing of data. Less time processing the data in the world means
 | 
				
			||||||
 | 
					that more time will be freed up for the game to process other work, although
 | 
				
			||||||
 | 
					finishing work earlier will not necessarily be faster for the end user, if it
 | 
				
			||||||
 | 
					still under the budget of 50ms. Most databases do not meet this requirement, and
 | 
				
			||||||
 | 
					even though they may be faster, their complexity does not mean that they will
 | 
				
			||||||
 | 
					always finish operations within this time limit.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These limitations also make Minecraft unable to take advantage of a cache, since
 | 
				
			||||||
 | 
					the number of different operations that can be done on the world is infinitely
 | 
				
			||||||
 | 
					large, remembering any previous operations will often not be helpful for the
 | 
				
			||||||
 | 
					system's performance. Minecraft also provides a good benchmark for the database,
 | 
				
			||||||
 | 
					because the unpredictability of players stresses the system's ability to return
 | 
				
			||||||
 | 
					results in a variety of settings.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Technical Background}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{What is a database?}
 | 
				
			||||||
 | 
					When I refer to the concept of a database, I am referencing a program that sits
 | 
				
			||||||
 | 
					more or less as a ``black box'' between the user and a software application,
 | 
				
			||||||
 | 
					storing any data required for the application. In most existing applications,
 | 
				
			||||||
 | 
					this is done by a category of databases called ``relational databases'', which
 | 
				
			||||||
 | 
					offer a very general-purpose way to store user data that is highly connected.
 | 
				
			||||||
 | 
					For instance, a person stored in a relational database would be efficiently
 | 
				
			||||||
 | 
					linked to with any of their associated information, such as name or age.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% The model of a database
 | 
				
			||||||
 | 
					In database terms, any amount of data added to the database is called a
 | 
				
			||||||
 | 
					``write'', data retrieved from the database is called a ``read'', and any
 | 
				
			||||||
 | 
					questions asked, such as ``how many people have done this'', are called
 | 
				
			||||||
 | 
					``queries''. Developers ask these questions through computer languages, one such
 | 
				
			||||||
 | 
					example being Structured Query Language or SQL, which allow
 | 
				
			||||||
 | 
					the database to be queried efficiently.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Challenges With Existing Databases}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Software development and SQL
 | 
				
			||||||
 | 
					Most software engineering projects start with a simple front-end and back-end,
 | 
				
			||||||
 | 
					typically implemented with some sort of Model-View-Controller architecture, and
 | 
				
			||||||
 | 
					connected to a relational SQL database \cite{sqliteOnlyDatabase}. This idea was
 | 
				
			||||||
 | 
					popularized by frameworks such as Ruby on Rails and Django, where the model was
 | 
				
			||||||
 | 
					most often modeled by structures within the database. This framework allowed
 | 
				
			||||||
 | 
					software developers to not have to worry about inner workings of the database,
 | 
				
			||||||
 | 
					and focus on writing business logic. This is how many start-ups were built, such
 | 
				
			||||||
 | 
					as GitHub \cite{githubSingleSQL}, who recently moved off its single SQL database
 | 
				
			||||||
 | 
					after 13 years, citing performance issues.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Challenges with working with SQL: Performance
 | 
				
			||||||
 | 
					Using a single SQL-speaking database can be a significant advantage development
 | 
				
			||||||
 | 
					speed, but the database can have some issues keeping up with the demands of the
 | 
				
			||||||
 | 
					application as the performance requirements expand.
 | 
				
			||||||
 | 
					% Caching
 | 
				
			||||||
 | 
					As soon as this happens, companies typically put smaller caching applications in
 | 
				
			||||||
 | 
					front of their database, such as \verb|Redis|\footnote{https://redis.io/},
 | 
				
			||||||
 | 
					\verb|memcached|\cite{nishtala2013scaling}, or \verb|TAO| \cite{bronson2013tao},
 | 
				
			||||||
 | 
					to allow the application to remember some of the commonly asked questions and
 | 
				
			||||||
 | 
					reduce load on the database by not having to do the same work again.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsubsection{The Complexity of General-Purpose Databases}
 | 
				
			||||||
 | 
					% What is being done about this
 | 
				
			||||||
 | 
					Modern SQL databases are also very complex. Three of the most popular SQL
 | 
				
			||||||
 | 
					databases, PostreSQL, MySQL and Sqlite have 1.4 million lines
 | 
				
			||||||
 | 
					\footnote{https://wiki.postgresql.org/wiki/GSoC\_2018, in reference to the
 | 
				
			||||||
 | 
					text ``PostgreSQL is over 1.3M lines of code and some of the code paths can be
 | 
				
			||||||
 | 
					tricky to reach.''} of code, 2.9 million lines
 | 
				
			||||||
 | 
					\footnote{https://www.openhub.net/p/mysql}, and 150,000 lines
 | 
				
			||||||
 | 
					\footnote{https://www.sqlite.org/testing.html} respectively.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Why are databases inefficient?
 | 
				
			||||||
 | 
					Why are databases so complex? Most of the reason for the complexity is that
 | 
				
			||||||
 | 
					because these database systems so general-purpose, they cannot assume anything
 | 
				
			||||||
 | 
					about the data stored in them. For the database, finding an efficient plan to
 | 
				
			||||||
 | 
					answer each query is a known NP-hard problem\cite{chatterji2002complexity}, and
 | 
				
			||||||
 | 
					to keep itself fast, the database must construct this plan with a complex set of
 | 
				
			||||||
 | 
					approximations, based on the assumptions that it can make, which leads to
 | 
				
			||||||
 | 
					ever-evolving complexity.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Impossible to maintain
 | 
				
			||||||
 | 
					With this complexity, it is impossible for a single person to understand the
 | 
				
			||||||
 | 
					complete inner workings of a database. Thus, the problem of the company's
 | 
				
			||||||
 | 
					database often becomes a dedicated person in companies that can afford it, or
 | 
				
			||||||
 | 
					become entire teams of engineers at larger organizations such as
 | 
				
			||||||
 | 
					Google\cite{googlePerfTeam}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Intro to special-purpose databases
 | 
				
			||||||
 | 
					What happens in the larger companies that can afford more engineering time, and
 | 
				
			||||||
 | 
					have a specific problem that they cannot solve with a traditional database?
 | 
				
			||||||
 | 
					Typically, this leads to the creation of special-purpose database solutions. For
 | 
				
			||||||
 | 
					instance, the global scale of iCloud and Apple's cloud solutions required them
 | 
				
			||||||
 | 
					to create FoundationDB\cite{zhou2021foundationdb}. A different set of challenges
 | 
				
			||||||
 | 
					in the Facebook inbox led to the creation of Apache
 | 
				
			||||||
 | 
					Cassandra\cite{lakshman2010cassandra}, which is optimized to allow for many
 | 
				
			||||||
 | 
					emails to be received, at the expense of search speed, which is done far less
 | 
				
			||||||
 | 
					frequently.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsubsection{The Special-Purpose Database}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Limiting a database's design to a specific use-case can make the development
 | 
				
			||||||
 | 
					process much simpler, to the point where it can be done by a single person, and
 | 
				
			||||||
 | 
					can offer higher performance. The first question that needs to be asked is
 | 
				
			||||||
 | 
					whether the application is \textit{write-heavy} or \textit{read-heavy}.
 | 
				
			||||||
 | 
					Read-heavy applications occur often in web development, and most social media
 | 
				
			||||||
 | 
					platforms have far more users reading the content, than writing new content for
 | 
				
			||||||
 | 
					the platform. In contrast, write-heavy applications are often seen in analytics
 | 
				
			||||||
 | 
					workloads, where data is written from many sources, and analyzed infrequently by
 | 
				
			||||||
 | 
					users.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					My application has a relatively even write and read balance, and I evaluated
 | 
				
			||||||
 | 
					three different storage data structures before choosing to implement my own
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% Special-purpose databases
 | 
				
			||||||
 | 
					Recently, companies such as Tigerbeetle\cite{tigerbeetleDesign} have taken this
 | 
				
			||||||
 | 
					domain-driven approach to database design even further, while designing a
 | 
				
			||||||
 | 
					database from the ground up to do financial accounting, which outperforms a
 | 
				
			||||||
 | 
					reference MySQL implementation at 76 accounting transactions per second, to
 | 
				
			||||||
 | 
					1,757 transactions per second \cite{tigerbeetlePerf}. This highly specialized
 | 
				
			||||||
 | 
					and domain-specific approach to creating databases is what my project is going
 | 
				
			||||||
 | 
					to be based on, to create a database around the challenges that the game
 | 
				
			||||||
 | 
					Minecraft has.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsubsection{Key-Value Databases}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					One of the main architectures that I considered for my project is a design
 | 
				
			||||||
 | 
					called a key-value store\cite{kvdatabase}, which would store the relationship of
 | 
				
			||||||
 | 
					a single voxel to its value. Many other voxel databases use this method to
 | 
				
			||||||
 | 
					achieve constant-time operations on retrieving points, which means that
 | 
				
			||||||
 | 
					regardless of the size of the dataset, the database will always be able to
 | 
				
			||||||
 | 
					return a result in the same amount of time. This structures is behind many of
 | 
				
			||||||
 | 
					the high-performance caches that are commonly used to speed up web applications,
 | 
				
			||||||
 | 
					such as Redis and RocksDB\cite{dong2021rocksdb}. In order to provide high speeds
 | 
				
			||||||
 | 
					for this data, the key-value mappings are usually stored in main memory, which
 | 
				
			||||||
 | 
					is far more expensive and limited than the system's disk drive, but offers
 | 
				
			||||||
 | 
					a speed advantage of several orders of magnitude\cite{latencyKnow}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Prior Work}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Voxels for Efficient Computation}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Most existing literature on the topic of using voxels to store shapes focuses on
 | 
				
			||||||
 | 
					the application of the voxel grid for efficient computation. Since voxel points
 | 
				
			||||||
 | 
					are completely independent of each other, this allows for efficient parallel
 | 
				
			||||||
 | 
					processors, which are increasingly more common on consumer hardware, to take
 | 
				
			||||||
 | 
					advantage of this speedup. In VDB\cite{museth2013vdb} Museth demonstrates that
 | 
				
			||||||
 | 
					by modeling a sparse voxel grid in different resolutions, a computer cluster can
 | 
				
			||||||
 | 
					efficiently approximate a physical structures such as a cloud, in order to
 | 
				
			||||||
 | 
					calculate expensive lighting operations.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Parallel Processing on Voxel Databases}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Williams\cite{williams1992voxel} expands upon the uses of a voxel database to
 | 
				
			||||||
 | 
					model graph and mesh-based problems. Taking advantage of the parallelism in the
 | 
				
			||||||
 | 
					grid, many problems can be reframed in the representation of voxels, and solve
 | 
				
			||||||
 | 
					those problems far more efficiently. This model however, assumes that every
 | 
				
			||||||
 | 
					voxel is stored in shared memory, making this process only viable to solve
 | 
				
			||||||
 | 
					problems that can be modeled on one machine, and are far more computationally
 | 
				
			||||||
 | 
					expensive, rather than data-intensive.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Large Voxel Data Set Processing}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Another approach to the problem of storing voxel data is the distributed
 | 
				
			||||||
 | 
					approach in Gorte et. al. \cite{gorte2023analysis}. Since memory is limited
 | 
				
			||||||
 | 
					within one computer, this workload can be split up between many servers, which
 | 
				
			||||||
 | 
					allows very large datasets to be worked on by a single workstation through an
 | 
				
			||||||
 | 
					API. This method keeps many of the same performance considerations, but also
 | 
				
			||||||
 | 
					assumes that the voxel data is not very dense, and uses a three-dimensional
 | 
				
			||||||
 | 
					data structure called an octree, which allows the user to change the resolution
 | 
				
			||||||
 | 
					of the data that they are working on. In the paper, Gorte acknowledges the need
 | 
				
			||||||
 | 
					to split large datasets up into smaller regions, which is similar to the concept
 | 
				
			||||||
 | 
					of ``chunks'' in my implementation.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Previous Special-Purpose Databases}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The design of my database was also inspired by the LSM tree and data-driven
 | 
				
			||||||
 | 
					designs of Tigerbeetle\cite{tigerbeetleDesign}, which is also able to handle
 | 
				
			||||||
 | 
					concurrent operations on the same design. Another database,
 | 
				
			||||||
 | 
					CockroachDB\footnote{https://www.cockroachlabs.com/product/}, uses a key-value
 | 
				
			||||||
 | 
					mapping backend to store a SQL-like tables and rows. Finally, the design of
 | 
				
			||||||
 | 
					caching layers in modern SQL caches such as Noria\cite{gjengset2018noria} show
 | 
				
			||||||
 | 
					that it it possible to efficiently remember the complex queries found in SQL,
 | 
				
			||||||
 | 
					and replicate these in real-time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Methods}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Almost every part of the database was designed so that most operations could be
 | 
				
			||||||
 | 
					done in constant time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The database provides a simple interface to read and write data, consisting of
 | 
				
			||||||
 | 
					the following:
 | 
				
			||||||
 | 
					\begin{itemize}
 | 
				
			||||||
 | 
					  \item Read a single block
 | 
				
			||||||
 | 
					  \item Write a single block
 | 
				
			||||||
 | 
					  \item Change a range of blocks
 | 
				
			||||||
 | 
					  \item Read a pre-defined ``chunk'' of blocks
 | 
				
			||||||
 | 
					\end{itemize}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The process of fetching the data for a single point in the world starts at that
 | 
				
			||||||
 | 
					point's $x, y$ and $z$ location. The world is infinite in size on the horizontal
 | 
				
			||||||
 | 
					$x$ and $z$ axes, but limited in the vertical $y$ axis. In my database, the
 | 
				
			||||||
 | 
					world is composed of an infinite grid of ``chunks'', or columns that are a fixed
 | 
				
			||||||
 | 
					16 x 16 blocks in the $x$ and $z$ axes, but 256 blocks in the vertical $y$ axis.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Once you know a point's location, you can find with a modulus what chunk the
 | 
				
			||||||
 | 
					point is located within. From there, the database only needs to retrieve the
 | 
				
			||||||
 | 
					data for the chunk stored at that location.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Initial implementations for my database focused on tree-based approaches for
 | 
				
			||||||
 | 
					finding the files for chunks, but with their complexity and non-constant
 | 
				
			||||||
 | 
					complexity, I decided to store each chunk separately. However, with worlds with
 | 
				
			||||||
 | 
					chunk counts in the hundreds of thousands, the filesystem implementations had
 | 
				
			||||||
 | 
					issues with searching through so many files, which led to performance problems.
 | 
				
			||||||
 | 
					Finally, I settled on merging all the chunk data into one file, and use the
 | 
				
			||||||
 | 
					filesystem's \verb|seek| syscall to lookup the offset for the correct chunk. A
 | 
				
			||||||
 | 
					simple hash table was then used to store each chunk's location with its offset
 | 
				
			||||||
 | 
					in the file, which keeps the memory cost low, even with chunk counts in the
 | 
				
			||||||
 | 
					millions. This allows for constant-time searches for the chunk's data.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Once a chunk is retrieved from disk, the format of the chunk is broken down into
 | 
				
			||||||
 | 
					smaller cubic slices of the chunk, called ``sections'' each section is a
 | 
				
			||||||
 | 
					16x16x16 cubic area that keeps an index for every chunk. The point's $y$
 | 
				
			||||||
 | 
					position tells the database what section the point is in, and a simple formula
 | 
				
			||||||
 | 
					is done to convert the remaining $x$ and $z$ axes into an index within the
 | 
				
			||||||
 | 
					section.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Every section additionally stores a look-up-table, that stores a mapping of a
 | 
				
			||||||
 | 
					\textit{palette index} to the state of a block. When the value for the point is
 | 
				
			||||||
 | 
					retrieved from the section, the value returned is not the block's state, but
 | 
				
			||||||
 | 
					simply an index into this palette. The palette lookup is done in constant time,
 | 
				
			||||||
 | 
					and when a new block is added into the section that needs an additional state in
 | 
				
			||||||
 | 
					the palette, this value is added in constant time as well. The existence of this
 | 
				
			||||||
 | 
					palette supports the efficient operation of another part of the database, which
 | 
				
			||||||
 | 
					is the ability to change large portions of blocks in the world.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Once the value of the point is found in the palette, the value can be returned
 | 
				
			||||||
 | 
					to the user. A visual diagram of this process can be found in figure
 | 
				
			||||||
 | 
					\ref{fig:lookup}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{figure}
 | 
				
			||||||
 | 
					  \centering
 | 
				
			||||||
 | 
					  \includegraphics[width=8cm]{block-search.drawio.png}
 | 
				
			||||||
 | 
					  \caption{The process of looking up a single block}
 | 
				
			||||||
 | 
					  \label{fig:lookup}
 | 
				
			||||||
 | 
					\end{figure}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ability to change a region of blocks is also a common operation within the
 | 
				
			||||||
 | 
					database, and which isn't locked to a specific range of chunks. This operation
 | 
				
			||||||
 | 
					is implemented as overwriting the palettes for a specific region. By overwriting
 | 
				
			||||||
 | 
					every palette index to the same value, every value in the chunk effectively gets
 | 
				
			||||||
 | 
					set to the same value. This does however create the need for an additional
 | 
				
			||||||
 | 
					``compaction'' step, where the palette is shrunk to remove duplicate values, and
 | 
				
			||||||
 | 
					every block within the section must be updated to point to the correct index in
 | 
				
			||||||
 | 
					the palette. This compaction is done upon any subsequent writes to the section
 | 
				
			||||||
 | 
					by inserting a block, because only this fixed-size section needs to be changed,
 | 
				
			||||||
 | 
					preserving the time of the operation as constant time.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Finally, the retrieval of a single chunk can be done efficiently, because the
 | 
				
			||||||
 | 
					database already stores chunks separately, and serializes these to the client.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% \cite{vohra2016apache}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Evaluation Metrics}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Reading Single Voxels}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Reads and writes of single voxels are the most common fundamental operation for
 | 
				
			||||||
 | 
					my database, and the database should be handle this operation in the same amount
 | 
				
			||||||
 | 
					of time, regardless of the size of the world. Both my implementation and the
 | 
				
			||||||
 | 
					simpler key-value store meet this criteria.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Changing Regions of Voxels}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Changing regions of voxels should be able to be done in linear time. This is
 | 
				
			||||||
 | 
					because resetting or changing a region of voxels is important while drawing
 | 
				
			||||||
 | 
					shapes of various resolutions. Lower resolution shapes are less precise, and
 | 
				
			||||||
 | 
					thus are able to be written faster.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Memory Requirements}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The memory requirement is set quite low, at 256MB, in order to require the
 | 
				
			||||||
 | 
					database to store most of its data on disk, and limit its memory usage to
 | 
				
			||||||
 | 
					important caching features. This limitation was chosen for larger datasets that
 | 
				
			||||||
 | 
					don't fit within memory on a single machine, because memory is much more
 | 
				
			||||||
 | 
					expensive than disk storage, and would limit the analysis to smaller voxel grids.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Reading Regions of Voxels}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The ability to retrieve large shapes from the database is important, because in
 | 
				
			||||||
 | 
					order to export a shape, another operation must be present to efficiently do
 | 
				
			||||||
 | 
					this. This operation therefore must be done in constant time, because as
 | 
				
			||||||
 | 
					Gorte\cite{gorte2023analysis} identifies, many researchers might want to work
 | 
				
			||||||
 | 
					on the same dataset, and exporting all this data would become inefficient for
 | 
				
			||||||
 | 
					the database to process. In the use-case of Minecraft, this allows the server to
 | 
				
			||||||
 | 
					support many more players at once, by not sending every individual block to each
 | 
				
			||||||
 | 
					client. This requirement is not met by the key-value database, but is reached by
 | 
				
			||||||
 | 
					my implementation, by sending the stored chunks on disk.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Reading Neighboring Blocks}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The last common operation in most voxel databases is the ability to read points
 | 
				
			||||||
 | 
					that are neighboring another point. This is important because many voxel shapes
 | 
				
			||||||
 | 
					approximate cubic shapes \cite{gorte2023analysis}, and in Minecraft, players are
 | 
				
			||||||
 | 
					constantly affecting voxels that are nearer to each other.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Results and Discussion}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Benchmarking on my laptop, inserting values in various spreads around the voxel
 | 
				
			||||||
 | 
					world, I get the following benchmarks, comparing an in-memory implementation of
 | 
				
			||||||
 | 
					SpatialDB, the disk-based implementation of SpatialDB, and a memory-based
 | 
				
			||||||
 | 
					key-value implementation in figure \ref{fig:reads}:
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\begin{figure}
 | 
				
			||||||
 | 
					  \centering
 | 
				
			||||||
 | 
					  \begin{tabular}{c | c | c | c}
 | 
				
			||||||
 | 
					    Spread of Points & In-memory & Disk & KeyValue\\
 | 
				
			||||||
 | 
					    \hline
 | 
				
			||||||
 | 
					    128 & 4275 & 4146669 & 176.7\\
 | 
				
			||||||
 | 
					    512 & 4184 & 3319162 & 190.6\\
 | 
				
			||||||
 | 
					    2048 & 2613 & 422938 & 184.8\\
 | 
				
			||||||
 | 
					    65536 & 2382 & 18814 & 186.1
 | 
				
			||||||
 | 
					  \end{tabular}
 | 
				
			||||||
 | 
					  \caption{Time (in ns) to operate on a single voxel, based on the size of the
 | 
				
			||||||
 | 
					  world (spread)}
 | 
				
			||||||
 | 
					  \label{fig:reads}
 | 
				
			||||||
 | 
					\end{figure}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					These results show that the scaling remains consistent between the in-memory
 | 
				
			||||||
 | 
					version and the key-value store, although my implementation is about two orders
 | 
				
			||||||
 | 
					of magnitude slower than the latter. This scaling is however not met by the
 | 
				
			||||||
 | 
					performance of the on-disk database. Originally, I thought that these poor
 | 
				
			||||||
 | 
					results were the result of no caching being done on the chunk files, which would
 | 
				
			||||||
 | 
					have made searches much slower, but still doesn't explain the improvement in
 | 
				
			||||||
 | 
					performance by larger worlds. This led me to implement a disk cache, which had
 | 
				
			||||||
 | 
					similar results, to the final implementation where I combined all the data in
 | 
				
			||||||
 | 
					one large file, and selectively read sections from that file. This leads me to
 | 
				
			||||||
 | 
					believe that as the points tested grow more spread out, since the world is only
 | 
				
			||||||
 | 
					so large, many points will be outside of the loaded chunks, and instantly return
 | 
				
			||||||
 | 
					empty.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This change could likely be addressed by a change in caching methods, and
 | 
				
			||||||
 | 
					remembering the data for more chunks, but this still doesn't address the slow
 | 
				
			||||||
 | 
					speeds for accessing data in the first place. The slow speeds are most likely
 | 
				
			||||||
 | 
					the decoding of the JSON data stored on disk, which is relatively large at
 | 
				
			||||||
 | 
					about 4 megabytes in size. A custom encoding method could be designed to replace
 | 
				
			||||||
 | 
					this scheme, or additionally pre-allocate the entire storage space in the
 | 
				
			||||||
 | 
					chunks, so that chunk data could be retrieved without decoding the entire chunk.
 | 
				
			||||||
 | 
					However, this would require a much more constrained data layout, and limit the
 | 
				
			||||||
 | 
					implementation of different voxels.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Additionally, compression 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Ethical Considerations}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations of Computing Resources}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Since databases are at the core part of most complex systems, they are often
 | 
				
			||||||
 | 
					built to be run on hardware that the normal consumer can afford
 | 
				
			||||||
 | 
					\footnote{\url{https://docs.oracle.com/en/database/oracle/oracle-database/12.2/ntdbi/oracle-database-minimum-hardware-requirements.html}}
 | 
				
			||||||
 | 
					\footnote{\url{https://wiki.lustre.org/Lustre_Server_Requirements_Guidelines}}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					The large hardware requirements of these databases come from the environments
 | 
				
			||||||
 | 
					where they are implemented, and at many of these companies, the ability to
 | 
				
			||||||
 | 
					keep buying faster hardware allows the company to work on other things that are
 | 
				
			||||||
 | 
					more important. However, what this does to the player is effectively prices them
 | 
				
			||||||
 | 
					out of the game that they would be already playing, especially since the
 | 
				
			||||||
 | 
					database would also have to run alongside the existing Java application of
 | 
				
			||||||
 | 
					Minecraft, which quickly exhaust system memory.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In the design of my server I have to prioritize both performance to take
 | 
				
			||||||
 | 
					advantage of the existing hardware, but make sure that the accessibility for
 | 
				
			||||||
 | 
					the application does not decrease as a result.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations of Complexity}
 | 
				
			||||||
 | 
					Another factor to consider in the implementation of my database is how complex
 | 
				
			||||||
 | 
					the existing systems are. Some of the most popular SQL databases, PostgreSQL and
 | 
				
			||||||
 | 
					MySQL have 1.4 and 4.4 million lines of code respectively
 | 
				
			||||||
 | 
					\footnote{\url{https://news.ycombinator.com/item?id=24813239}}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With so much complexity going on, this significantly decreases the overall
 | 
				
			||||||
 | 
					knowledge of the system, as well as the individual user who has to debug their
 | 
				
			||||||
 | 
					game. Most of this is from the large amount of query logic that handles caching
 | 
				
			||||||
 | 
					and speeding up certain queries, so knowing more about the specific problem that
 | 
				
			||||||
 | 
					I am trying to solve removes this process from having to be done.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Especially since most of the people in the Minecraft community are volunteers in
 | 
				
			||||||
 | 
					the open-source community, debugging this large of an application would be out of
 | 
				
			||||||
 | 
					scope for enjoying a game, and likely lead to it being replaced with something
 | 
				
			||||||
 | 
					more simple. The reliability characteristics are also less than what are
 | 
				
			||||||
 | 
					required for Minecraft, since they are being compared against a single-threaded
 | 
				
			||||||
 | 
					Java program which has been tested to do the correct thing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations in Security}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					Since these databases are very complex, there is also the risk that having a
 | 
				
			||||||
 | 
					server exposed over the internet through the Minecraft game server might leave
 | 
				
			||||||
 | 
					it exposed to attacks. While this is a large issue, an even more important
 | 
				
			||||||
 | 
					implication is the ability to configure the database correctly. Since these
 | 
				
			||||||
 | 
					databases are extremely complex, it is also very hard to make sure that they are
 | 
				
			||||||
 | 
					configured securely. There have been many high-profile data
 | 
				
			||||||
 | 
					breaches\footnote{\url{https://www.zdnet.com/article/hacker-ransoms-23k-mongodb-databases-and-threatens-to-contact-gdpr-authorities/}}
 | 
				
			||||||
 | 
					that involve a single server, even at larger companies that have dedicated teams
 | 
				
			||||||
 | 
					that involve a data breach.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					My plan to mitigate this risk is to implement the database in a memory-safe
 | 
				
			||||||
 | 
					programming language, which should remove the risk class of memory-unsafety
 | 
				
			||||||
 | 
					bugs, which account for around 70\% of all bugs in the Chromium browser
 | 
				
			||||||
 | 
					engine\footnote{\url{https://www.chromium.org/Home/chromium-security/memory-safety/}},
 | 
				
			||||||
 | 
					which is entirely written in non-memory safe C++.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					And if the database information is ever able to be leaked through the Minecraft
 | 
				
			||||||
 | 
					protocol, the attacker would have access to the full data, because I am planning
 | 
				
			||||||
 | 
					to store it unencrypted for performance reasons, and rely on the encryption of
 | 
				
			||||||
 | 
					the Minecraft client. And, the data involved does not involve personally
 | 
				
			||||||
 | 
					identifying information, so the usefulness of the data would be close to
 | 
				
			||||||
 | 
					nothing.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					But, perhaps the most important security risk is if an attacker is able to
 | 
				
			||||||
 | 
					access the database directly and bypass all the isolation in the Minecraft
 | 
				
			||||||
 | 
					protocol, in order to wipe or corrupt the data for malicious reasons. This would
 | 
				
			||||||
 | 
					likely lead to the Minecraft server being unable to be played, and degrade the
 | 
				
			||||||
 | 
					experience of the players. It is my plan to take advantage of the limitations of
 | 
				
			||||||
 | 
					the types of Minecraft items to provide resilience and easy backups to the
 | 
				
			||||||
 | 
					system, because of the purpose-built nature of the system
 | 
				
			||||||
 | 
					\footnote{\url{https://twitter.com/eatonphil/status/1568247643788267521?s=20}}.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations in Fairness}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					In the implementation of databases, it can often be beneficial to make certain
 | 
				
			||||||
 | 
					operations faster, at the expense of others that are not done as often. For
 | 
				
			||||||
 | 
					instance, if I notice that players often pull items in and out of their systems
 | 
				
			||||||
 | 
					often, but almost never search through the list of items, I can take advantage
 | 
				
			||||||
 | 
					of this to speed up the database for the most common operations. However, this
 | 
				
			||||||
 | 
					can be problematic if the things that I choose to sacrifice affect a certain
 | 
				
			||||||
 | 
					group of users.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					This tradeoff between speed and reliability occurs so often in Computer Science 
 | 
				
			||||||
 | 
					and is described in terms of percentiles. For instance, if we notice that some
 | 
				
			||||||
 | 
					event occurs about half the time, we can say it is in the 50th percentile.
 | 
				
			||||||
 | 
					Similarly, if an event only occurs 1\% of the time, we can say it occurs in the
 | 
				
			||||||
 | 
					99th percentile. The impossible effect of not hurting anyone when a decision
 | 
				
			||||||
 | 
					like this is make is written about by Google \cite{dean2013tail}, who have to make every
 | 
				
			||||||
 | 
					decision like this at their scale.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					My plan is to not have any tradeoffs that affect the normal gameplay of the
 | 
				
			||||||
 | 
					server, and keep it within the 50ms timeframe that the Minecraft has allocated
 | 
				
			||||||
 | 
					to itself. Apart from this, one of the main goals of the project is to give
 | 
				
			||||||
 | 
					consistent performance, so any further decisions will be made around the
 | 
				
			||||||
 | 
					existing implementation of the Minecraft server.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					%https://www.embedded.com/implementing-a-new-real-time-scheduling-policy-for-linux-part-1/
 | 
				
			||||||
 | 
					%https://www.kernel.org/doc/html/latest/scheduler/sched-design-CFS.html
 | 
				
			||||||
 | 
					%https://helix979.github.io/jkoo/post/os-scheduler/
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations in Accessibility}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					By creating this system, I also have to consider if the players are going to
 | 
				
			||||||
 | 
					require a certain type of computer. Requiring a certain operating system or a
 | 
				
			||||||
 | 
					more powerful computer would limit access to many of the people that were
 | 
				
			||||||
 | 
					playing the game before.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					However, by basing the goal of the project on improving the performance of the
 | 
				
			||||||
 | 
					already existing implementation, any improvements would result in more people
 | 
				
			||||||
 | 
					being able to play than before. Also, by designing the system for normal
 | 
				
			||||||
 | 
					hardware and in a cross-platform way, this does not limit the people that are
 | 
				
			||||||
 | 
					able to access the improvements.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\subsection{Considerations in the Concentration of Power}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					With any improvements to performance to servers in Minecraft, this would allow
 | 
				
			||||||
 | 
					many of the larger hosting companies, who rent servers monthly to individual
 | 
				
			||||||
 | 
					people, to drive down their hosting costs, and allow them to have larger returns
 | 
				
			||||||
 | 
					over the smaller providers. However, since this market is so competitive between
 | 
				
			||||||
 | 
					companies, because of how easy it is to set up a company, and the options
 | 
				
			||||||
 | 
					between companies aren't very different, I would expect any improvement to be
 | 
				
			||||||
 | 
					quickly disappear into the competitive market, and benefit everyone equally.
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\section{Future Work, and Conclusion}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\printbibliography
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\end{document}
 | 
				
			||||||
							
								
								
									
										102
									
								
								paper/oxycomps.sty
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								paper/oxycomps.sty
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,102 @@
 | 
				
			|||||||
 | 
					% A simple two-column LaTeX style for Occidental College's CS senior projects.
 | 
				
			||||||
 | 
					% Based on latex8.sty by Paolo.Ienne@di.epfl.ch 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\usepackage{times} % use Times as the default font
 | 
				
			||||||
 | 
					% define bold 11pt Times font for second-order headings
 | 
				
			||||||
 | 
					\font\elvbf = ptmb scaled 1100
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\usepackage[style=numeric,sorting=nyt]{biblatex} % format the bibliography nicely
 | 
				
			||||||
 | 
					\usepackage{xpatch} % used to patch \textcite
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% change \textcite to do family-name (year)
 | 
				
			||||||
 | 
					\xpatchbibmacro{textcite}
 | 
				
			||||||
 | 
					  {\printnames{labelname}}
 | 
				
			||||||
 | 
					  {\printnames{labelname} (\printfield{year})}
 | 
				
			||||||
 | 
					  {}
 | 
				
			||||||
 | 
					  {}
 | 
				
			||||||
 | 
					% sort bibliography by last name
 | 
				
			||||||
 | 
					\DeclareNameAlias{default}{family-given}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					\usepackage{amsfonts} % provides many math symbols/fonts
 | 
				
			||||||
 | 
					\usepackage{amsmath} % provides many math environments
 | 
				
			||||||
 | 
					\usepackage{amssymb} % provides many math symbols/fonts
 | 
				
			||||||
 | 
					\usepackage{caption} % fixes caption spacing issues
 | 
				
			||||||
 | 
					\usepackage[usenames,dvipsnames]{color} % allows for colored text
 | 
				
			||||||
 | 
					\usepackage{enumitem} % allows adjustment of list spacing
 | 
				
			||||||
 | 
					\usepackage{graphicx} % allows insertion of graphics
 | 
				
			||||||
 | 
					\usepackage{hyperref} % creates links within the page and to URLs
 | 
				
			||||||
 | 
					\usepackage{listings} % provides the lstlisting environment
 | 
				
			||||||
 | 
					\usepackage{url} % formats URLs properly
 | 
				
			||||||
 | 
					\usepackage{verbatim} % provides the comment environment
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% set dimensions of columns, gap between columns, and paragraph indent
 | 
				
			||||||
 | 
					\setlength{\textheight}{8.875in}
 | 
				
			||||||
 | 
					\setlength{\textwidth}{6.875in}
 | 
				
			||||||
 | 
					\setlength{\columnsep}{0.3125in}
 | 
				
			||||||
 | 
					\setlength{\topmargin}{0in}
 | 
				
			||||||
 | 
					\setlength{\headheight}{0in}
 | 
				
			||||||
 | 
					\setlength{\headsep}{0in}
 | 
				
			||||||
 | 
					\setlength{\parindent}{1em}
 | 
				
			||||||
 | 
					\setlength{\oddsidemargin}{-.304in}
 | 
				
			||||||
 | 
					\setlength{\evensidemargin}{-.304in}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% remove the space between list items
 | 
				
			||||||
 | 
					\setlist{noitemsep}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% style code listings
 | 
				
			||||||
 | 
					\lstset{
 | 
				
			||||||
 | 
					    basicstyle=\ttfamily\footnotesize,
 | 
				
			||||||
 | 
					    breaklines=true,
 | 
				
			||||||
 | 
					    showstringspaces=false
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% style the title
 | 
				
			||||||
 | 
					\def\@maketitle{
 | 
				
			||||||
 | 
					   \newpage
 | 
				
			||||||
 | 
					   \begin{center}
 | 
				
			||||||
 | 
					      {\Large \bf \@title \par}
 | 
				
			||||||
 | 
					      % add two empty lines at the end of the title
 | 
				
			||||||
 | 
					      \vspace*{2\baselineskip}
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					         \large
 | 
				
			||||||
 | 
					         \begin{tabular}[t]{c}
 | 
				
			||||||
 | 
					            \@author
 | 
				
			||||||
 | 
					         \end{tabular}
 | 
				
			||||||
 | 
					         \par
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      % add small space at the end of the author name
 | 
				
			||||||
 | 
					      \vspace*{.5em}
 | 
				
			||||||
 | 
					      {
 | 
				
			||||||
 | 
					         \ifx \@empty \@email
 | 
				
			||||||
 | 
					         \else
 | 
				
			||||||
 | 
					            \texttt{\@email}
 | 
				
			||||||
 | 
					            \par
 | 
				
			||||||
 | 
					            \vspace*{.25em}
 | 
				
			||||||
 | 
					         \fi
 | 
				
			||||||
 | 
					         \ifx \@empty \@affiliation
 | 
				
			||||||
 | 
					         \else
 | 
				
			||||||
 | 
					            \@affiliation
 | 
				
			||||||
 | 
					         \fi
 | 
				
			||||||
 | 
					      }
 | 
				
			||||||
 | 
					      % add empty line at the end of the title block
 | 
				
			||||||
 | 
					      \vspace*{\baselineskip}
 | 
				
			||||||
 | 
					   \end{center}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% style the abstract
 | 
				
			||||||
 | 
					\def\abstract{%
 | 
				
			||||||
 | 
					   \centerline{\large\bf Abstract}%
 | 
				
			||||||
 | 
					   \vspace*{\baselineskip}%
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% define email and affiliation
 | 
				
			||||||
 | 
					\def\email#1{\gdef\@email{#1}}
 | 
				
			||||||
 | 
					\gdef\@email{}
 | 
				
			||||||
 | 
					\def\affiliation#1{\gdef\@affiliation{#1}}
 | 
				
			||||||
 | 
					\gdef\@affiliation{}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					% correct heading spacing and type
 | 
				
			||||||
 | 
					\def\section{\@startsection {section}{1}{\z@}
 | 
				
			||||||
 | 
					   {14pt plus 2pt minus 2pt}{14pt plus 2pt minus 2pt} {\large\bf}}
 | 
				
			||||||
 | 
					\def\subsection{\@startsection {subsection}{2}{\z@}
 | 
				
			||||||
 | 
					   {13pt plus 2pt minus 2pt}{13pt plus 2pt minus 2pt} {\elvbf}}
 | 
				
			||||||
							
								
								
									
										307
									
								
								paper/references.bib
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										307
									
								
								paper/references.bib
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,307 @@
 | 
				
			|||||||
 | 
					// Introduction
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{sqliteOnlyDatabase,
 | 
				
			||||||
 | 
					  title={SQLite the only database you will ever need in most cases}, 
 | 
				
			||||||
 | 
					  url={https://unixsheikh.com/articles/sqlite-the-only-database-you-will-ever-need-in-most-cases.html}, 
 | 
				
			||||||
 | 
					  journal={https://unixsheikh.com/}, 
 | 
				
			||||||
 | 
					  publisher={https://unixsheikh.com/}, 
 | 
				
			||||||
 | 
					  author={Sheikh, Unix}, 
 | 
				
			||||||
 | 
					  year={2021}, 
 | 
				
			||||||
 | 
					  month={Apr},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{ enwiki:1181180757,
 | 
				
			||||||
 | 
					  author = "{Wikipedia contributors}",
 | 
				
			||||||
 | 
					  title = "Model–view–controller --- {Wikipedia}{,} The Free Encyclopedia",
 | 
				
			||||||
 | 
					  year = "2023",
 | 
				
			||||||
 | 
					  howpublished = "\url{https://en.wikipedia.org/w/index.php?title=Model%E2%80%93view%E2%80%93controller&oldid=1181180757}",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 13-December-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{googlePerfTeam,
 | 
				
			||||||
 | 
					  author = {{Google Performance Team}},
 | 
				
			||||||
 | 
					  title = {System Performance},
 | 
				
			||||||
 | 
					  month = {May},
 | 
				
			||||||
 | 
					  year = {2023},
 | 
				
			||||||
 | 
					  url = {https://research.google/teams/system-performance/},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Applications of voxels
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{ enwiki:1186283262,
 | 
				
			||||||
 | 
					  author = "{Wikipedia contributors}",
 | 
				
			||||||
 | 
					  title = "Voxel --- {Wikipedia}{,} The Free Encyclopedia",
 | 
				
			||||||
 | 
					  year = "2023",
 | 
				
			||||||
 | 
					  howpublished = "\url{https://en.wikipedia.org/w/index.php?title=Voxel&oldid=1186283262}",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 13-December-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{norman2006beyond,
 | 
				
			||||||
 | 
					  title={Beyond mind-reading: multi-voxel pattern analysis of fMRI data},
 | 
				
			||||||
 | 
					  author={Norman, Kenneth A and Polyn, Sean M and Detre, Greg J and Haxby, James V},
 | 
				
			||||||
 | 
					  journal={Trends in cognitive sciences},
 | 
				
			||||||
 | 
					  volume={10},
 | 
				
			||||||
 | 
					  number={9},
 | 
				
			||||||
 | 
					  pages={424--430},
 | 
				
			||||||
 | 
					  year={2006},
 | 
				
			||||||
 | 
					  publisher={Elsevier}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{museth2013vdb,
 | 
				
			||||||
 | 
					  title={VDB: High-resolution sparse volumes with dynamic topology},
 | 
				
			||||||
 | 
					  author={Museth, Ken},
 | 
				
			||||||
 | 
					  journal={ACM transactions on graphics (TOG)},
 | 
				
			||||||
 | 
					  volume={32},
 | 
				
			||||||
 | 
					  number={3},
 | 
				
			||||||
 | 
					  pages={1--22},
 | 
				
			||||||
 | 
					  year={2013},
 | 
				
			||||||
 | 
					  publisher={ACM New York, NY, USA}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{li2020deep,
 | 
				
			||||||
 | 
					  title={Deep learning for lidar point clouds in autonomous driving: A review},
 | 
				
			||||||
 | 
					  author={Li, Ying and Ma, Lingfei and Zhong, Zilong and Liu, Fei and Chapman, Michael A and Cao, Dongpu and Li, Jonathan},
 | 
				
			||||||
 | 
					  journal={IEEE Transactions on Neural Networks and Learning Systems},
 | 
				
			||||||
 | 
					  volume={32},
 | 
				
			||||||
 | 
					  number={8},
 | 
				
			||||||
 | 
					  pages={3412--3432},
 | 
				
			||||||
 | 
					  year={2020},
 | 
				
			||||||
 | 
					  publisher={IEEE}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					// Literature Review
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{williams1992voxel,
 | 
				
			||||||
 | 
					  title={Voxel databases: A paradigm for parallelism with spatial structure},
 | 
				
			||||||
 | 
					  author={Williams, Roy D},
 | 
				
			||||||
 | 
					  journal={Concurrency: Practice and Experience},
 | 
				
			||||||
 | 
					  volume={4},
 | 
				
			||||||
 | 
					  number={8},
 | 
				
			||||||
 | 
					  pages={619--636},
 | 
				
			||||||
 | 
					  year={1992},
 | 
				
			||||||
 | 
					  publisher={Wiley Online Library}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{gorte2023analysis,
 | 
				
			||||||
 | 
					  title={Analysis of very large voxel datasets},
 | 
				
			||||||
 | 
					  author={Gorte, Ben},
 | 
				
			||||||
 | 
					  journal={International Journal of Applied Earth Observation and Geoinformation},
 | 
				
			||||||
 | 
					  volume={119},
 | 
				
			||||||
 | 
					  pages={103316},
 | 
				
			||||||
 | 
					  year={2023},
 | 
				
			||||||
 | 
					  publisher={Elsevier}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{tigerbeetleDesign,
 | 
				
			||||||
 | 
					  author = {{Tigerbeetle Developers}},
 | 
				
			||||||
 | 
					  title = {Tigerbeetle Design Document},
 | 
				
			||||||
 | 
					  month = {July},
 | 
				
			||||||
 | 
					  year = {2020},
 | 
				
			||||||
 | 
					  url = {https://github.com/tigerbeetledb/tigerbeetle/blob/main/docs/DESIGN.md},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{tigerbeetlePerf,
 | 
				
			||||||
 | 
					  author = {{Tigerbeetle Developers}},
 | 
				
			||||||
 | 
					  title = {Tigerbeetle Design Document},
 | 
				
			||||||
 | 
					  month = {July},
 | 
				
			||||||
 | 
					  year = {2020},
 | 
				
			||||||
 | 
					  url = {https://github.com/tigerbeetledb/tigerbeetle/blob/main/docs/HISTORY.md},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{nomiSlowME,
 | 
				
			||||||
 | 
					  author = {{Jokercortex}},
 | 
				
			||||||
 | 
					  title = {Moron's Guide to Managing Mechanical Monstrosities},
 | 
				
			||||||
 | 
					  month = {Feb},
 | 
				
			||||||
 | 
					  year = {2020},
 | 
				
			||||||
 | 
					  url = {https://github.com/Nomifactory/Guides/blob/latest/guides/AE2ForDummies.md},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{btree,
 | 
				
			||||||
 | 
					  author = "{Wikipedia contributors}",
 | 
				
			||||||
 | 
					  title = "B-tree --- {Wikipedia}{,} The Free Encyclopedia",
 | 
				
			||||||
 | 
					  year = "2023",
 | 
				
			||||||
 | 
					  url = "https://en.wikipedia.org/w/index.php?title=B-tree&oldid=1146616935",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 13-May-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{kvdatabase,
 | 
				
			||||||
 | 
					  author = "{Wikipedia contributors}",
 | 
				
			||||||
 | 
					  title = "Key–value database --- {Wikipedia}{,} The Free Encyclopedia",
 | 
				
			||||||
 | 
					  year = "2023",
 | 
				
			||||||
 | 
					  url = "https://en.wikipedia.org/w/index.php?title=Key%E2%80%93value_database&oldid=1135560734",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 13-May-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{latencyKnow,
 | 
				
			||||||
 | 
					  author = "Jeff Dean",
 | 
				
			||||||
 | 
					  title = "Latency Numbers Every Programmer Should Know",
 | 
				
			||||||
 | 
					  year = "2018",
 | 
				
			||||||
 | 
					  url = "https://gist.github.com/jboner/2841832",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 12-Dec-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{cockroachData,
 | 
				
			||||||
 | 
					  author = {{CockroachDB Developers}},
 | 
				
			||||||
 | 
					  title = {Structured data encoding in CockroachDB SQL},
 | 
				
			||||||
 | 
					  year = {2017},
 | 
				
			||||||
 | 
					  month = Mar,
 | 
				
			||||||
 | 
					  url = {https://github.com/cockroachdb/cockroach/blob/master/docs/tech-notes/encoding.md},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{dong2021rocksdb,
 | 
				
			||||||
 | 
					  title={Rocksdb: Evolution of development priorities in a key-value store serving large-scale applications},
 | 
				
			||||||
 | 
					  author={Dong, Siying and Kryczka, Andrew and Jin, Yanqin and Stumm, Michael},
 | 
				
			||||||
 | 
					  journal={ACM Transactions on Storage (TOS)},
 | 
				
			||||||
 | 
					  volume={17},
 | 
				
			||||||
 | 
					  number={4},
 | 
				
			||||||
 | 
					  pages={1--32},
 | 
				
			||||||
 | 
					  year={2021},
 | 
				
			||||||
 | 
					  publisher={ACM New York, NY}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@misc{lsm,
 | 
				
			||||||
 | 
					  author = "{Wikipedia contributors}",
 | 
				
			||||||
 | 
					  title = "Log-structured merge-tree --- {Wikipedia}{,} The Free Encyclopedia",
 | 
				
			||||||
 | 
					  year = "2023",
 | 
				
			||||||
 | 
					  url = "https://en.wikipedia.org/w/index.php?title=Log-structured_merge-tree&oldid=1153046573",
 | 
				
			||||||
 | 
					  note = "[Online; accessed 13-May-2023]"
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@online{lsmUses,
 | 
				
			||||||
 | 
					  author = {{Braden Groom}},
 | 
				
			||||||
 | 
					  title = {Understanding LSM Trees: What Powers Write-Heavy Databases},
 | 
				
			||||||
 | 
					  month = Jun,
 | 
				
			||||||
 | 
					  year = {2020},
 | 
				
			||||||
 | 
					  url = {https://yetanotherdevblog.com/lsm/},
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{chang2008bigtable,
 | 
				
			||||||
 | 
					  title={Bigtable: A distributed storage system for structured data},
 | 
				
			||||||
 | 
					  author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E},
 | 
				
			||||||
 | 
					  journal={ACM Transactions on Computer Systems (TOCS)},
 | 
				
			||||||
 | 
					  volume={26},
 | 
				
			||||||
 | 
					  number={2},
 | 
				
			||||||
 | 
					  pages={1--26},
 | 
				
			||||||
 | 
					  year={2008},
 | 
				
			||||||
 | 
					  publisher={ACM New York, NY, USA}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{abadi2008column,
 | 
				
			||||||
 | 
					  title={Column-stores vs. row-stores: how different are they really?},
 | 
				
			||||||
 | 
					  author={Abadi, Daniel J and Madden, Samuel R and Hachem, Nabil},
 | 
				
			||||||
 | 
					  booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data},
 | 
				
			||||||
 | 
					  pages={967--980},
 | 
				
			||||||
 | 
					  year={2008}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{athanassoulis2019optimal,
 | 
				
			||||||
 | 
					  title={Optimal column layout for hybrid workloads},
 | 
				
			||||||
 | 
					  author={Athanassoulis, Manos and B{\o}gh, Kenneth S and Idreos, Stratos},
 | 
				
			||||||
 | 
					  journal={Proceedings of the VLDB Endowment},
 | 
				
			||||||
 | 
					  volume={12},
 | 
				
			||||||
 | 
					  number={13},
 | 
				
			||||||
 | 
					  pages={2393--2407},
 | 
				
			||||||
 | 
					  year={2019},
 | 
				
			||||||
 | 
					  publisher={VLDB Endowment}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{armbrust2021lakehouse,
 | 
				
			||||||
 | 
					  title={Lakehouse: a new generation of open platforms that unify data warehousing and advanced analytics},
 | 
				
			||||||
 | 
					  author={Armbrust, Michael and Ghodsi, Ali and Xin, Reynold and Zaharia, Matei},
 | 
				
			||||||
 | 
					  booktitle={Proceedings of CIDR},
 | 
				
			||||||
 | 
					  volume={8},
 | 
				
			||||||
 | 
					  year={2021}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{dean2013tail,
 | 
				
			||||||
 | 
					  title={The tail at scale},
 | 
				
			||||||
 | 
					  author={Dean, Jeffrey and Barroso, Luiz Andr{\'e}},
 | 
				
			||||||
 | 
					  journal={Communications of the ACM},
 | 
				
			||||||
 | 
					  volume={56},
 | 
				
			||||||
 | 
					  number={2},
 | 
				
			||||||
 | 
					  pages={74--80},
 | 
				
			||||||
 | 
					  year={2013},
 | 
				
			||||||
 | 
					  publisher={ACM New York, NY, USA}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					https://github.blog/2021-09-27-partitioning-githubs-relational-databases-scale/#:~:text=Yet%20at%20its%20core%2C%20GitHub,%2C%20issues%2C%20and%20pull%20requests.
 | 
				
			||||||
 | 
					@misc{githubSingleSQL, 
 | 
				
			||||||
 | 
					  title={Partitioning github’s relational databases to handle scale}, 
 | 
				
			||||||
 | 
					  url={https://github.blog/2021-09-27-partitioning-githubs-relational-databases-scale/}, 
 | 
				
			||||||
 | 
					  journal={The GitHub Blog}, 
 | 
				
			||||||
 | 
					  publisher={GitHub}, 
 | 
				
			||||||
 | 
					  author={Maurer, Thomas}, 
 | 
				
			||||||
 | 
					  year={2021}, 
 | 
				
			||||||
 | 
					  month={Sep},
 | 
				
			||||||
 | 
					} 
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{bronson2013tao,
 | 
				
			||||||
 | 
					  title={$\{$TAO$\}$: Facebook’s distributed data store for the social graph},
 | 
				
			||||||
 | 
					  author={Bronson, Nathan and Amsden, Zach and Cabrera, George and Chakka, Prasad and Dimov, Peter and Ding, Hui and Ferris, Jack and Giardullo, Anthony and Kulkarni, Sachin and Li, Harry and others},
 | 
				
			||||||
 | 
					  booktitle={2013 $\{$USENIX$\}$ Annual Technical Conference ($\{$USENIX$\}$$\{$ATC$\}$ 13)},
 | 
				
			||||||
 | 
					  pages={49--60},
 | 
				
			||||||
 | 
					  year={2013}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{chatterji2002complexity,
 | 
				
			||||||
 | 
					  title={On the complexity of approximate query optimization},
 | 
				
			||||||
 | 
					  author={Chatterji, Sourav and Evani, Sai Surya Kiran and Ganguly, Sumit and Yemmanuru, Mahesh Datt},
 | 
				
			||||||
 | 
					  booktitle={Proceedings of the twenty-first ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems},
 | 
				
			||||||
 | 
					  pages={282--292},
 | 
				
			||||||
 | 
					  year={2002}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{gjengset2018noria,
 | 
				
			||||||
 | 
					  title={Noria: dynamic, partially-stateful data-flow for high-performance web applications.},
 | 
				
			||||||
 | 
					  author={Gjengset, Jon and Schwarzkopf, Malte and Behrens, Jonathan and Ara{\'u}jo, Lara Timb{\'o} and Ek, Martin and Kohler, Eddie and Kaashoek, M Frans and Morris, Robert Tappan},
 | 
				
			||||||
 | 
					  booktitle={OSDI},
 | 
				
			||||||
 | 
					  volume={18},
 | 
				
			||||||
 | 
					  pages={213--231},
 | 
				
			||||||
 | 
					  year={2018}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					How storage works in database systems, and the evolution of how data is stored
 | 
				
			||||||
 | 
					@article{stonebraker2005goes,
 | 
				
			||||||
 | 
					  title={What goes around comes around},
 | 
				
			||||||
 | 
					  author={Stonebraker, Michael and Hellerstein, Joey},
 | 
				
			||||||
 | 
					  journal={Readings in database systems},
 | 
				
			||||||
 | 
					  volume={4},
 | 
				
			||||||
 | 
					  pages={1},
 | 
				
			||||||
 | 
					  year={2005}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{vohra2016apache,
 | 
				
			||||||
 | 
					  title={Apache parquet},
 | 
				
			||||||
 | 
					  author={Vohra, Deepak and Vohra, Deepak},
 | 
				
			||||||
 | 
					  journal={Practical Hadoop Ecosystem: A Definitive Guide to Hadoop-Related Frameworks and Tools},
 | 
				
			||||||
 | 
					  pages={325--335},
 | 
				
			||||||
 | 
					  year={2016},
 | 
				
			||||||
 | 
					  publisher={Springer}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{nishtala2013scaling,
 | 
				
			||||||
 | 
					  title={Scaling memcache at facebook},
 | 
				
			||||||
 | 
					  author={Nishtala, Rajesh and Fugal, Hans and Grimm, Steven and Kwiatkowski, Marc and Lee, Herman and Li, Harry C and McElroy, Ryan and Paleczny, Mike and Peek, Daniel and Saab, Paul and others},
 | 
				
			||||||
 | 
					  booktitle={Presented as part of the 10th $\{$USENIX$\}$ Symposium on Networked Systems Design and Implementation ($\{$NSDI$\}$ 13)},
 | 
				
			||||||
 | 
					  pages={385--398},
 | 
				
			||||||
 | 
					  year={2013}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@inproceedings{zhou2021foundationdb,
 | 
				
			||||||
 | 
					  title={Foundationdb: A distributed unbundled transactional key value store},
 | 
				
			||||||
 | 
					  author={Zhou, Jingyu and Xu, Meng and Shraer, Alexander and Namasivayam, Bala and Miller, Alex and Tschannen, Evan and Atherton, Steve and Beamon, Andrew J and Sears, Rusty and Leach, John and others},
 | 
				
			||||||
 | 
					  booktitle={Proceedings of the 2021 International Conference on Management of Data},
 | 
				
			||||||
 | 
					  pages={2653--2666},
 | 
				
			||||||
 | 
					  year={2021}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
 | 
					
 | 
				
			||||||
 | 
					@article{lakshman2010cassandra,
 | 
				
			||||||
 | 
					  title={Cassandra: a decentralized structured storage system},
 | 
				
			||||||
 | 
					  author={Lakshman, Avinash and Malik, Prashant},
 | 
				
			||||||
 | 
					  journal={ACM SIGOPS operating systems review},
 | 
				
			||||||
 | 
					  volume={44},
 | 
				
			||||||
 | 
					  number={2},
 | 
				
			||||||
 | 
					  pages={35--40},
 | 
				
			||||||
 | 
					  year={2010},
 | 
				
			||||||
 | 
					  publisher={ACM New York, NY, USA}
 | 
				
			||||||
 | 
					}
 | 
				
			||||||
		Reference in New Issue
	
	Block a user