From c95b02bd7e0c2f2dee8f2fb1275a36416cdcc768 Mon Sep 17 00:00:00 2001 From: Nicholas Novak <34256932+NickyBoy89@users.noreply.github.com> Date: Wed, 13 Dec 2023 23:54:33 -0800 Subject: [PATCH] feat: Added start of paper to comps --- paper/block-search.drawio.png | Bin 0 -> 22268 bytes paper/document.tex | 542 ++++++++++++++++++++++++++++++++++ paper/oxycomps.sty | 102 +++++++ paper/references.bib | 307 +++++++++++++++++++ 4 files changed, 951 insertions(+) create mode 100644 paper/block-search.drawio.png create mode 100644 paper/document.tex create mode 100644 paper/oxycomps.sty create mode 100644 paper/references.bib diff --git a/paper/block-search.drawio.png b/paper/block-search.drawio.png new file mode 100644 index 0000000000000000000000000000000000000000..41bd60afe1b5ad833dfb4a4c4c0c97c9799fbf1f GIT binary patch literal 22268 zcmd_S1z26pk|+uU2(Agif@=u2aksE>cXxMp5|W@ncW@6*a0qS*nvF}4;O_3uUF6Sy z&VS~fJ9FQi_vX&b`@XRD>h4{pc?onBLKGMn7<4JgSIRIjut~tX7ZM`y z+>Y110DfRylqE!AN(YEHVPKx^xr%GL+IgB=*_y#nvx)ujMh#-IbZ~K{W_v{q0*Tw2 znYcPT*aNRXy`6)pmARFf=^t$%77!aFD~OSmO_ha{noWd_9e8kXGVyS-YX1RmWMO9i z8m?r!GnVg-=x7ZLyL{NZC)FGsUK zYE8}DtxSNCfT-ETsaZsTwugUgVy0F`7S2X?K*8P!80jwziU1`US^q$7^_%=^;x61$ z+#nAxPcsWkS8hf&D=o`^iS19EcAgT>Mvj(p4yM0|Z0h+3T2_Dve;_sW`lE)22O!16 zI4zw2?B{p44;3m_-oFRL3Jmran46WUnagi%e^j|TIM}*cIsRj%iG#g8AW8oKXyokd z;PH=O<_@;MXZO2TM_}Ur@X6+oZgNLX2Z zx7Zo|3H-al#nQ;s!Q&6GUsZqQ${%$6lbX&B4gkKtFzv6+e*)QgikaCyNc5j{{M`>A zO#jcn{Tnr$|E5s?9&}>dq7vflfY2&CsWU1AB@qebKgG+%)$3292u>I=bU+4PYc`soGSQ%hy z00w6>TO(I1_rGERu^9c9dyBupJ#_ZJBO3tBZ_)bK$MRs#|Ahqp(|P`;o4b*%+wUQ2 zI5-1Z2x3umaItcIFk-(5@fW%HJJ%l=>|tr;YNq07^o!~qfSCd6{=#8kaj_M1uyt_$ zMMXAK4l{03b^yc$SXOMz{?ft?GG^lh){*};Zvms~YWAn;dKlHePJw^Qjo+WM|5l&h zW{>T6(c|wng!NAoXZaWV@uyw+zrwzMs{+af1nsA#LoBs!CA1gN(VBvq6mH+Yz_K+{a#o9bF1NRtNUL?4S(-O|4lXgJ+1%tTKJnDf6+o^ z;IM0D1{`u9JZ!%Zo${(H`hW~{)ek?WuBT6s9R*o^+8&W!(ZA^dY){7=3P{_|7+ zZ&UKmS!itQVDfMVad9Q{wC3X zv?XN&JlWv?g^Tf5+dnvS|LOwy)%vIV_>YF)A3S*Y|L~SbwOwq(z>vX6y%JIN)Zf)Z z=8lsiYN^&5&H7TPs{JM5Oz*U?On~uKV3z7gUURf45xMxQ>z4fHA!GV5#ZR##L7Xz~ zM+R&yE_Y(FtEtO|YYlhX`##P4$9*$w7JKJfa~2(nl2zxr>&bCxf^;0m7Qtk&C^4=2s9%>m%gJ)8!&i0^CT9n1``?g0%r^InUNswW7~-LW5I2_T4xd=4s_hqg``q)o%NBE;s{-5XH@M_6#$Ww8NBoi;wx%VjIqX z8!NUcjW)ykM_k+vy?zyV)I4HKRHP*`Qq5@i+WTyM<;TfoWUs3D zUT0vm%+h(rm0cr!9+abmmVP3~o)z3}PtOeB;wPW^x&2W#gl)7+FhAe4&!F4|(L>Kk z%Dk-hGL&}&9nsZ+wOHKCEq;@c5LT3PaPFFFtU!OcVNsDyPN}QsN^&SR$^d8ZY;y*b zZ!m#3CSG5rR_%Z(X5tK&-dw~)t<`IO{WgFm>uo#KZbJTh4-{?VM=RrW@GCQA>7l3u z5eH{|0Y42rUF8|`y?A<8JtYV^p6sbL2fJEaes8QWNn~as!CwPy#S3h6!6L#W#T_*}fP$}`|=!acan-uJ@unnZ& zGds{}LW%4WC1PwS?cnsm$LQ^3-Y0+lW4OHLEz5PAYaC91O8^GT^L+{&BC&7ewt~c- zk|Hg09RagX3EE?w{R1x(_eX0(Nta&hPH{KZMzWmZ{a7`1HrBMaFZF0;6(E~Ptncn| zVY>LlM{yvxR6Dt!@MS!F0=uf&tWu{bFMN?@wUFb2JQa$?563JhcyoH_6Z?Tqn+;qu zSKp=m4gnKx^eY!UaikEPhs|2%W#BB1ay8m@)GqXlyrtjv5~X2lPp*WZB8@8{_}sc= zJV1+em5-cy_>TJW5#0Ptq%KbnIH%HH_Dm38OG108sjGmbS_VYo?r=%rxMl9>Y%JYy zH4yl2KUPCwMs6_BgLJCmQI&Cz4dO-F#QM?lN^GpE8!Kv*SG|fi(Xqj!k93OguZGQP zdVa2Fcvt7z%MlE#Wtv#DU4DQG?dQk1FYlc5+yLK6GqrqaW{6ymOv~23c|4g$5d}2A z3huNuqM|%fQS*E(9%CAA+th#UZf7)JE7OY-W<9pUDF=fT@pGk4bgZcf%aBS%|2wKY zGkH>i!o0096(x5#!i%JsaN)%g(ZaqS;zSE!NF>w{G-J_r3Wxa~EON?)RPn(OUJ&0~Ak$K)jNk5VGjhSfn_2 zXQc3(x4dp!Pw5Bm1D?6&6|P=iJ61alrL{h}MZ{om5*is%-sHBwmdKQ=Q86(&F;kEC)C|r(CN%=so7qoOevG2%N zMS6>P{>b8Ze(}q56PhUcafbjIkRXX=ZtQC@dWh-B$fuFd&(4V5UrnUz#l*#-m!;@W zR+^)hKB36xevXWZD)HzN(Q>kYoCZ@Pc(L*lpTqx&XW26ejMja6TF4<{IkNDl^|pj+ z;kBrJ1J))59ob}F+};VhfY?}e(YKFSJ-YBW26Wzfeb7J-GNo8j%D2Cz$p60ILhG6z z_10nIv1*Yzx>X_lWMePNTy0s~x)s{z3A&%ybR9UaaTY!d%&JBw#YE0MUnxvQJc_8F zpuR6BIPzgc4lv~otoT0z`^ zt_XhP!9ob-8t@=ZVxwrZS+{S+#u8dzzK+_*XaCHcCz}|iCNnOoER8~L^evG?0oBEV z!0^*ILa(d{*lRjAP_Kht52k_Du9iq-P zZe3%j6GN(>kXMMf+6Grv<{6XjCD%4qDL01mAE)_La$i+%D_paRIUqJUc$*~TJ~tNX z%-YXvnKW!BiAZJ_T{)Jln`~SNNWkQE-$kl=ZvNvf1w4o63f6l;9H0-Fsox*<}g#@cU}2jXFNif>3U>L!iO??$=3CJ~$}G7iXT2n9VHvQ;M1CtI#z ztjCMvETFGCvTJPhY^yXQ$_^_g&G*w4gn~^e1kIEo5kR)nrZ?KoOS?GYAXytHS|nWvj(pB*w0nfUkuyU; z*r0O2GKAT)%exe^E!C&TPQ=FHw&45~*~d<=e9oAI*qu8-6Rgnw)~MbDS6`yFYe=Ns zuwe>rlVCH^f&VcdoTLh`j2=vl{UTy=ZY(U!#4`v%fiDNa-8ybSs-BN)SG2SH5dNAJ+rkf__` zdF3XAfzfd%cD-O*OWG|@Dq=fh(R7dh?z-iCC|5;e5>l;MMN@5x1pPi9A2=qD+VT1p zPKTip)VOjX4YK!{aoYSrt=sY%xqBw^+kUzbjg4#@NlJ6wf*VP&UgXd=7wPQR&w)jU z{3|@NvKnNJ9Ow+sir;QaJ!@FRJx}}+&VGWoVcbQ`h^9&+DMfRm1>YyvXY|DxyRx#< zu+i)pIFdk#3c4e-8%EOXL&nG$0SQP7<>4pM&2!+u4U+BH@v1u5&v{MnL`_Y-{5`jQ zh=dEb`RY8&IjW9Wq-XHSjzB{~Lua04VYbgc3Cj;qN6sehr^5Wu&W5g&E6vji{MmDZ zFDmxIUNUG4J`>L*bfcEK+P6b|v5{{#5moc@N@>n^Wtp7>;pyHo!IDWtR8=Ki}&2{c_uM?zMv(GLUQG4(vBiFgGr+Y zw>MWG)W4n3c04S@y)y|fkVfc1iR?uSXq_V;kLE)i16`} z9Wlh~iWk;+?DOJvEwiKBOhAgpKs)V52tS}w{J|zd^)1Kz3g&*Hs zBo!2bBR_F+mQ2Itj@ucD>U-z+cR&z&k4vh>8P-%}Lb240B9?1xN+t=55=w&SDGi8u z_aZgE?Jt~ve{9nydoaWhBDm$YkI`4mx$vz&xZG_9)`wQ-C6{(+jop+{`&#wT@JM@V z(p9W!`c=!DWkEjO=FNcP?y^j)W=^ItP)Jf5_f<$KqhoZRwZ??==G#?;XlkINdvHSteZwoKwK zrLX!RJYF>W4y;M{FA4Nrx3~&0(=Mw%wQ7>OtDhkqpnp9q>fT@+RvO0VvgF_6uSeH& z6FAwLK$V$b40xYIHP;deP9V{%qhix?aTfF-2=*oH;p*^Z6oY>ykOeQLO`6-nGH(QC zu}W{DeTsjkc)R~K#bFwM#T zo>(#vN$zpDQjeDHmOD8KAVImsc5g2h|WV#e;cW)KqewdmPO{i2mn6iYW@52lX0pBbz2Kwz zJrGYlqUC5c&+JZU+}EEo$WYJOMEy^sI?_i0uB$>pU@s7?NsH&izW^6n8rorwyu$pzBQgX8!w( z42RRn`jzTM5`1qvzi;A0He&AFtx=^QQG z9|cJkkkZ6jxFb12qtiUKlUAOJtkGI|N*>lw9I8<27}J128q`9Da4{P37+H05yvRKv za0&OGXcEhZvueA?)Y4g)>9hH-m+H_nx&;|_o18MPqfC2V24}#5w-e{b(s^t&tuPtc zbefSw0{njv|zAqn>RTmZ7 z>BJ}c7Os43AOeDxpVoRA(j|q$i2;ok7Wkix9>*UM+L#E_B_HP4G1yP$VSgk~+FJyI(TPm@k4_bz^I6fLr8U#-Jaw)|z%&-ABh}s@%Un7n^u%3}7M45(s zush~-DWyK%JeDX8e1*is&5p#WvqJ}UOzz$!+L$Wb+4H4{DvL+Mu5(-yrMi*qxc1n0 z651iTWu4NQUXD?K3CR_ENtiSRVrNm%wcO>c$zImBFNaHOaC=|+$zGrH=WAGReHrs* zXB9%*3^SIGf?_iU$n&qCFwnH~sZizN9P-1tUmwrK@*HMMC}AE7UI|QjSVt9v&=Ctc z!lh3LEI!{sn5Fmr!KnTO8ZBED$oA};&THQQEiipK)+Y-f+FggwW~~-tj9l#b6L*q zRKBIEn)1ZB$?(e>eD37ZA@b}o6STwc4oODm*;VaZV7s~ZXrS?6O$~lv&Qp?$VoqIC zHt2l6>W75k$pt;IZb~wf3T>bbrX+~G{Lt~9l>J?JM>ug!%IIr{il65ikb3t`_O^`R zp+x05ZNDI=&$ER#LqGi)P>2+?B*BujCXm|TJ%soa;g?BoS(7S8e;h<@(VgtfeQ4-N zpTW#6Wf}R7I9D7Bmv#;e@QOTyqn%b;E=uWP!X!pIh7HpOs>i^q>*EnHmo zyTGgWg+J89i)nH8lX5sPZoN;tA%R|d6&)R~2#F{vu;<(_I$&*QDkIOzU=_k6>Lx;L zM>+5=;qDQn%r6_Lk-Vc5;@l4siDobwouz0v1>C<--h$G;dQY>+uFI=$BcAy~rY1 z<%W~tTCe>zSqI_ou>Fw^Pz%DT+8dP=ofqMqdkH_1Yw2Vox{C(O>lJ>GDh=Q-)#VR7 z?Ok7N+ZFzzZf!sXjZPrM2@!H1CX(Kb{Ul(;Er+(yVS4upM0KFBBp~KZXFOUX@WW3X z2g0+mB|K~AejShLFe`@92G(B{wSpPVxR& z3vB5eYM>~p_}*!3Y{rQe1TAyKgG>&_^XX+O4JQ`Q(`ZwamQ*Bm8kJ~MG`#IkX2S)w8Km>ig$c0BDeBX$);B(NeVX4R3x#SL)-tR}I= z#iWSh-Vie0_RGQ#{mWD0#UMQqJx4ojhNn^>EZp`DD5Ygv5#fN3)R*HGvUuo^C8 zU|F>;3WyZ*E(tQ&Eb~F54ew5r2xnByN4}lh8&gpYwM-dv?7BD#vqTt0f9196kY<@S z!dO{byJiYH=OZ8>u##3UGib>h#G)vmCLgGlA+2|0VPb${@tET3w>aX>wfIBRcx6)j67vKSa72va z(fSXp^U?fvoer8k9*=;ktvJ<9FRCb{Xm&PDG=Da%UCW!3LglOGr85@$e=OCtdRFy% zPK+g7Xc9Kj5?JK8ZO)-GNieHGbd57Zw@Ij~6iKMoM0H(z(i&G?uC3(pMXMI~_`^FS zE1{PXLz6>C2y9NLOZ2=Xkiwe%@6@il!;Oj%^5sQM0qR-e_fI}9Hl-p_H}04X>1;JD zu~$kx7W6_YV-RF#=>6VtaQ|ATAs%sS)1Tc1uD^$Ze&|->?CtqeQfY34){j3cXH#1) zKhkKl!V-J%8VS#foU|@4nG8!*KX&`bm}v8=jGCI(=nhiP&aFU5P7665`yy2}pT}_G zg%n?`ojBNBV>c>qDL=M$_UI7H0nCnY<5cMX!(ZspK{OG;{cSTaGfPy6L+N{raF6-~; z<@dWuS$8C)L{WebHQBvihl7lj9NQ^l$#SQlz#49aBx$O;&?dZ=nwEz0%;jZrt?j@q zO!b#rh5?t=Cl|C-Vs{!F=NLC+1R(b#5Z1G6>`f~x%egGI&=xUrBL7+MQ!DRE7NX0F zk58~~eo-njho}Lj zcXjrXLYGf3nD-JxJB8s^JG|*Hu#JtCjVlCYZ_aLsZYeT+Q=u}J;->+{+ff;UiVm_2 zo#U6|lHV-8H+LKXCx&cNJv>=Az`GDiRBE5*{i9huy+l_kkN>g0mTLko^afkdKStIp zDLV{=K_1;_@Cl0(dQ&a769l6CDw)@Z6<-De2CaU0Z8kC>nFs36+XxSg3E$`wFZ6Dc z%u_9|2u2Zk6N9FM_g45#OgNe1(=1sqNEE&iQe;e}0;R1>4eFN6OMSDRpbZAyh(e%7 zRPEz$$N;A?C6a0&-N%@4tZ$_&pYfrpN61Y1i>wfO+J(#9_!zX-Xj5l=XgLQN5RnAcV0t|}tecNm08E)Ex?CD=uh8w|T7oQ%!~s%j{Zq)%rOpU< zx7p{F)J$iqy}WgOSJEjZty0$gfTGorr+KjoNEhDlxm5mggtX=!J`pO*-X&ie=+I(U zKiblOzaq_`3#FvpG8`{Ju+H! zb$hkhEmcyzyrvBKxm*3MIY3sWx|A{K#vhOF{sbiDEBtl)653&K;Rb$EfPv-r+^Rq+Fkc*2HE#?T)EYNL1dFIbjC!TWbc%4LT&qpM(C~>HLQHc-Ys6r z8-NlN-zL@*q(_i{MAvK;?MLR_NZr}=p?Rc~+S>A37xeIn^gHhDm zk{pxFJ$Pu$h%^|O;BiPng+!%#1o)lzmr}92@nY;-q46GMSmS!%5Cvgxu)3_VwD)A# z<}A&J&``5D6^}4gnU~2O4p8sYm2J~6`(xUvF%bp*ig$Ndy{psf@TR#IojJKVL4`z7 zJ+-C&sAqO(EX3+MS*76dvc6ANNkZZ;e5Ag7!js+4v4*}<#VvBTsGl0>d^=30Sy)8u zcNc(fYyG2`Dd~hwPpW(>@kEH6jDZrHno?_9X{I(~_)I3){)yR_R9ShnkEIN`F=;ly9cEu?8wUj{+C7h9^D4Z5*Fp+)A;dH}`iEte3t_ZgX@~>EiNJwx2MlgLa1t_9* zbPt*xR@HXZ0o1Fi1M03io@BOcjKl(u4@dC^hLQj~5q=yY2uAD;xuO;}5N^`4h>!U2 z3rz?NT)62gLmbU#_;6^jLBeL@Zvb1Crp3wh3!DrNi3NfkAZ4g$_zPRyQ@}vPxtag0 z`vv|6iR`N+7@j7;E#Mcnf7$u!8yNDZfaW4i}z3uJM$$H@4Psk34&RGWW(|Fm5)55MT`Y#T5F7Wj)0nC@iq;CQtyO|-{s zcpn#+w~VrW!GATtA{*~}1tahoML7`Cm!cwF%A*B+S2?=GtNN7e#TB=WG<)QeBJ#zw z%i~>ovqy_9>fF{S1E4YzaQm~*C-m#~+rtuK7#WO>%=WUFW9D;c&MQ0lyp z{J=4T<}V~Z#>Is63|z)BF)F#FB{>`%JHo(_lt1JG=#xgh&F+x^z1IanvQAcG*K(w| zHW=us^OboN3*fRPZ_nTj40e0vZth8Es7OC>%<05dMgpa}dmr3V<1VN}pU(bR^t-!V7I6@BeF1y@{D# z7ZNHsvHU*A_8K;VH|YyqGw+$H7p3rb8o?RCLT$OwZARM1Rw$o^)FskWEj10izbZbZ zuF;1_aOQzhrvy0+rF3kK73mpp$(RMPG=3Y8V;Np+Ee-y}$^$z%r0I6YYKs6uKH9eDgt%^tJ!tDb{;&u{m~|9|fg6dL=|U%&V;Rq9d1N zbi8|hJD8Y8TI{xZ$X3P(#cN8FeEcXM0+yZCdYFwDOUXKaHtC#9KVBojWw{c?BXz2N z7K?sy{jFq}Y2WC3;CU>z-f%ItOBAw~bYSVg;>Dvqb8^X502z+nV(*#X73ZPYO_U6g z#rZh)2j0B$#=KWz?Rd9(Hb|`uve{(V7gs&28YB5SOgCT5#~Uy9o^Fj@(tddY;bx{YE{o(uahU4Q0;1H? z0a+nJqDyQUVto{G=3PgO+cqUt26fu^XU`L#QZ9WQ%-{11m+4@<|BhtS4%_34qg85@GsH#bX%L2KqLO1M{D+H7~vAVbKFjhly+S}(1x*O@Qe z?`GohB>GcaSpUYpaNA~=h-yR)Sr*MQnZP$Jy5=|f8t+ZL&xzn>y}G{l_luuwCk&i8Vu$|i_kDhKAZ6!l|jmOLA9D}OwTcOy2)hYchK*e=2Bv*a{n5=m~AyL|NN%{ z@?M{lhi^obI2eq0wpR&XF;9lr0c81LriJC}+DSv> z5HSn;MGI#fw*)T)@M3w=;#h!?FkSU_NOg+4@uyN=?{m_(QTy|s!2QXZ zCN*hiCBO+k(ZMYTuQxc+@}*`2-j((;tJl>_%WcN5*tG(VUw)#sQ`-zz$arYl>54#= z2OSA^yB~rcS6Gi`H?t206Qzd6^peg6o0f4+c!}FLUu5lgp#rg6M#mVmK=Q*Q88mh)v>cDtZz0|U zkyp;>2QQ8g5@bDR&6-$~{^kO24(;K0=|gYyu?DRA@)(;HM3F!qGyo9R?+n}K&m8Ub< zGBnPSgVf6ry^hlZZD-r7YGle}kHjIeMLMxnORgC$!W)t|9wn0tz!KhW>Y4c96I^1R4c;FG)HMbZ@S3FP@zrBQ!Jb(Z>&rZ4et*b zG(irCBoh9egBemAvn}K^3i{o|{jV2~*?|~+iZf$Un`Drt5Ec0wb_*Yy_ksN11_XkS zW6)@00%qDnA2)~huT6)UQnho@iR)kixg+TrVMC1oM-WUv#Kn(D1-~{W;cg(xkN8R% z3lG+uF1jxf5^`9DX;*hB$>^q6tl*s=oRkuZv+o#L0SwoFncl>It2LH%=-C;rCNF)cJyH(wd&pPdkP>#5^I56lyu zii2$8`#NtIns7e>DLwBEvO>;cV!}h@Wf8X2%_s>fW6ss%yRAp<*hl-$@6z~-$d^zf zNhUvB>vI`y`=V9bOngexy_Zi6clu$>|PaHz#l9Pu{>#`vkL-qQ>@z$wAVM z`R}!~*BAyJ$3uopNr0(n>xgMqY?Rka)Y)ItzsDh!nbnG6Zrgx&|HF$iT3p)pk^l&> zzi2h&D<8xdH>bILNh)gIp-iD=OQktjVajdKN>qf-UKL@Q*Bs`VzRyP>3Zgm;(Bx0u zH~sF?2f;3x*9zQ4r%XcObE6nN43h+P!ZOpd!d7GmWqI|gU$zjD7j{sIYt_gLu*W*+ z78IoJ*yiNtJ1z64rbrU9A$Ls_tD(vzbLMaMwpoWF&y%Y(%GbZ%93)`=07bG{>(#;p z&baAsq%&J8A+pqogfS#=lms#9BzJC+F=+SY#h)R-zIrT)dAg1O8jIeFa?B14g-wlO zS_urI;Nzf}t#}~@($e}_pjEYnhgSNMN^Fa1_3lbsDBeV_?qL_rG#4kXQy~SGbHTQs ze;w;E)7TbA?zpX}8?*gZ3Bo3p6BZI&SZAnVQe~ zFeo=k6i4(x1NWzcOHK`>NeF7>w5tSkgHf?D>5WQ`nrcDxBY@3c+ zn$rM7SRV_c;5{v}RMIM1g?KA|ZjS4TGu3}DATD+Z22z5gVulXmG3cRF3lq&vSAlv0 zwNI#2vN%-n!Df0IH*IOqMN&9willFTK3{!1`Ag3l{S`8K-2~?lJu`Dfaji=dH7CiB zPWi$K$1xi@ON5o3QNY$OzovMQvYPEU78`(`#h=-4OlPx2rZ-}6O-GbFbH6#Z=PQ_b zm(O3`yUOjKmuI{d%Q{)7N~(^r2jr0jm0UV|4<3K#8qzV3)S*2m6A-|ak@a|clB?5Q zGgW2*>j%YzG7R;qNt$N zWbs(x_O|SD!w3#hAl+vtL}aqb8R2kaGS!rFiLReXUpn$^H#|?UpL-W^tQ3|pHRONN($}{$zsG=PQSbd)FzpC49_=T7 zpX5^~<%YsKnWEwpSY{}@nx&Shn|5WzUd1e26e{ho>?Z_54*Q^$l~>l#Xq4Ks={~2~ z<(EJteuyH(gw|?dt5b!0spWB`y~R+b5#`ma@I@8Ym62Yx^;n(Q%olUkpvaqNw~sNI zRfXyYbga%D)O@@uDA&D-=UP+L&u`b~Ls_bw-^ki&MLW>FOrKV4k@^(pPfbNt;eW~h;y9?AN84q49(lCf zzsfF2Mf=_m8f%RXIK1}CN{r_i(KMSqzc1B!J{LSFxYYfl;U!QYi&U#QcGkzm5)S-J5jlV%x57H=q#f`$pd zU!K{%6F!$S*6sdtpD5uL5rN4>#%!wN(|TI!0X7nkg>@&G{uIuh+86G;&taWVwM(@^ zi01Al%k7xmDzMv@7e?yEh}bm0yx z`o&d{M5dnSPDD>1gFD{+s9jQ> zvYT!(iBa>Bl6xtCeR#_Nq>;!9g%qjelN9&MWeWOnx~?;K{&;;YRwJ*6|597ZMtgU< zIU=6pA-u-uFNyIPEs>Tp$KzS|nf`oFFeO9SM*ld)$(!Y;X0} zl9rddi62x+Ug$bMDr?+P;O2@?U~pbM1hOf*zSg(CQV-9R%q!peGH|A&q)SXG3XF~y_&NCv$-3_Y{(<*8U8!TsX1LtS92vT`0w<~Cv z1K(70MD3K+e7fbvSHkX3SP|P*wcCI6q<8t?;IvRu3ZT=|(*v7>c7gD);t5#263)sL8bY)*V$xqe;?M-DT~Tuse6K@9zYS*i@S|}g)$Cxp zo|<|#*Hbcx*(M}LR>~sNc+TKKv3a|JM^*q7&o?|GKfZ72bb8Tq;lzRNy;qv($#oQv z55R@kCIi}iT)4cNlIRPv?v)(&44tc#W4BcJo;xe-wv|N@-+OX-FBBefoH;sP?Yhh? z2@=XC5!RN5=+lIvQ3jugzm8+lg~Zr#!ArP1bag0sIrzSZ2g@5Et=nxIHty3=_zkRY z6VcMN*W-KMvUs~~Ul1X7Kt|YgWuUokz2=&n3PfP<(c>5iusKaNTId+$hV3be+)5#8 zB^mSv*}@q8>DV15K}(4;rH(`Fs;R8L2SGPvQ2--eh1;>K<5PLfv;xC2WUXcbQ6%ct z$b9vU$gIu{XF4lx-nW5O_~z1lR>GF^Jx8w7))-l%D`X=_$u$KvAGdWzC~Z11e6`!? z3vVDCKCU%McH@P_NNP}SH&yd^nyYLv-hTa-&p|dQ!IY-xl>W;Ib78s;%cp=;IgqX$ zajl_=)w6G}sZ^%AKb-1nO{i7GmZ!WA6AqQkGn8H1dQKXi(j04UhwU-k-*{K&!?e$D z1p(c>JmLuX5T?5he}{@VS9l)5rs881vndVmp~ZV?Q=@f5hp1e@HaKEB zEv%zny+2u;DZNz_v{H{V){OPOu^3R9HJgZyBm!QgnQv)ew*`cJJBLLkufwLGyiyDq$_kTZ9QG`bnJ2Y>fu-TYA=7J1tF$^4X3?LHe^X9mN~693Ce3( z77_1MMn|lYT}2qePK+RPD6YGc@ieSd`8lGc%Z~7~yg>>T26c7Zb!qRXLuHWaUEt<% zaSqFBSgprCBfpM%L}ohhZLpWCTrw3I7H{kh=OcMk3~v1QHKk(p@%~JJHsx}2^WCTk zgCyDGQs(9~<_ZK-QQg$6Nt-5*v#hG{$|n~eQiu|>SrI?Pm*wYU$rBHwzVLgj{FZ|D zKIUBtclVYvq9!qf907+X&OOwpsNi#R)BJ8U#rquVeX~K@FS2}vK{AowsMo!tHPH*N z_@)h6t1BgQXN%577hTTrp66#@6XXCp$DN_lla;nw2Yshg3v)AP6VN$-*)w4M(o_{C zBfxl%0L^>#VB{+Nt)JfI;l%Vsw+I+^ebfURZf$2kjVw1|_4MFLpE*>Oi4*Ka!=T@5CEf6oN}cJ|VBy zf*{xKHWON|-H2rKY=C=D%NsKc7#l@#c^B;{2ZaZR=f zxfJaC6YdEqL;~ctV}DvP2!8jd!VbklYwB!?b^ zc_(?eBRUC$%190+q7^#ZWD2CK_fSVr!B6huSq1YXj4T?sskYPb|W7)tp05#a|NQbgY-0_-qcC*q`bu57(AUx4z220#>_9`ZGcx z^tyPNt@((K6Z%AkP?$>!VwLyfvsViyX;qcW#u z5cTX&X202%h@na;)=!&?ju~(%dumG+k<<|#vK?v~dbmY9juP6=ZuKLx&e3t7GRfUR z$X7Ha9%ZHu>ynb0$#kb|-tSG8sLED6WkU#uSaJQ^i>HAuO3xX(x)X^R@-)X~wWRW> zU?3-Ri5_L-^97sxWA;^5hkJHU7_;Ty-n_L$VA#b2D3NJ@L~f>TPV+>kw*Nf;%a12b zQC>2bT@*hpnj)}h$Win`PvWFoJ1_979z-ylOuc9Wy21wynZweGLW1igOTvMrmW9wO zbx-!giDlwbshH2&>ctU=#^3_so|U{;EYJVAhXi@Jhh$ZA$)yg&rrVWss97P{o12AL zX(m)EWGBR+EqKi1 zmio94IiyNb&g=IZG$qHwzmQuN=n42vv-%cD`g##T7vmQc6aHNu{WV{gWKgT8y112E znkl!LpL&vgV9g$!_Q$=%M%)sMfsjaVDbyzQysn4v{2FN&5S9;< z{Q?uLH72#D=lbMsa~?KT^fUa{$q9Si&YV1IbPtz(*wBKoEr&tq#so8~rdq^!F(oPd zvvs=1#@IYRbw;i60AGF}$*OS6^k_0ST5{F;;+v}H!1VTTHic$OoB_l^%dWkkPh#c~;1xP@bBcu^KG_ zX#Tw9jOSt(FeIbM4>$K0)jC{UH-}LH z&)1!I>?3AqpE9Y^h7v~Po&_r7n)gq(5R!Gqe}-9-`hZoe!T8o($8wV}%lnk@Bm~i~ z#EIMYjI*+;%F1lVM0>_{%k(89mR_m>s%p z)lKY&@LqQnL4ZW|C;{JXoP}u*E-AqF3Eck!4vXHp+QnnSBk{ZB1RWXWc<+ zxkEog(e*QT@n+4fE2jcVJ#-2!cL7Q+Wk)^&a{Xno;l;O_2aLl6oPhmJ;8BcAjW+h4OGa;b3r zKEG>>O$|%-?KbO|ei1&)oQ%|TniQ9qlHwMWp^j#elY)WR!0>%SiD;&ZLx(ww=hk;~ ztT094qyOq!I@{>3Krl1hJ;nPH2BXZEp3hfSSL7FpYU8hNwXyAHTVptYxdMiSk3F#%Y^m5*nRdML8W|urT@71Ji}X#@VhzMMjqY3cd%j=19ANd z`pNA2zEqTfZ?v7J!P=Yhm-scM@7Y@4j);!;2VcPFSeSNF<>dsIB+l7YCMIg)NsvxJ z7Il$4I?J^|VT7%fDnaJ?uNQUHA(@EZSJucC#VVEFCu;WdNl%gDq2l6Z>&>;VnC7yB zKrTUdvb~1e;)x5#S_$Dfo^_rjTj%i4SD58%e>S|pkj`?7Dh_`Y8*W-6;WY0Lo@RdD gP+?eh_c%@No$-`3RvPf%*}_PP%fBiWHGKO&0It4`2LJ#7 literal 0 HcmV?d00001 diff --git a/paper/document.tex b/paper/document.tex new file mode 100644 index 0000000..2e15f5c --- /dev/null +++ b/paper/document.tex @@ -0,0 +1,542 @@ +\documentclass[10pt,twocolumn]{article} + +\usepackage{oxycomps} +\bibliography{references} + +\pdfinfo{ + /Title (SpatialDB: A Database for Storing Dense Three-Dimensional Voxel Structures) + /Author (Nicholas Novak) +} + +\title{SpatialDB: A Database for Storing Dense Three-Dimensional Voxel Structures} +\author{Nicholas Novak} +\affiliation{Occidental College} +\email{nnovak@oxy.edu} + +\begin{document} + +\maketitle + +\section{Introduction and Problem Context} + +% What my project is +In my senior comprehensive project, I have designed and implemented a database +application that is designed specifically to store complex shapes in ``voxels'', +or three-dimensional pixels. + +% Applications of voxels +A voxel\cite{enwiki:1186283262} represents a single point or cube in a +three-dimensional grid, at a variable size. This feature allows them to +approximately model many three-dimensional structures, in order to reduce the +computational complexity in analyzing the shape, which has led to many +data-related use cases outside of computer science. For example, to model the +inner workings of the brain, Neuroscientists track oxygen concentration through +neural tissue on a voxel grid as part of fMRI studies\cite{norman2006beyond}, +and Movie studios such as DreamWorks use voxel data structures to model light +reflections for visual effects\cite{museth2013vdb}. The output of MRI scans in +hospitals are very high-resolution voxel grids. Most recently, machine learning +models are being trained on the LIDAR data from self-driving +cars\cite{li2020deep} in order to better process their environments. However, +voxels are not often thought of as a way to store three-dimensional shapes, and +existing research focuses mainly on efficiently representing and processing +shapes. My approach models this problem of voxel storage and representation, and +turns it into a problem of database design. + +\subsection{Using Minecraft as a Model for a Database} + +% The problems with Minecraft +Minecraft\footnote{https://www.minecraft.net/en-us}, released 2009, is a sandbox +game that is played in a world entirely composed of cubic voxels, where the +player has complete freedom to manipulate the world by building, destroying, or +exploring any part of it. I am focusing this database on the requirements of +Minecraft because the game involves some additional challenges that traditional +databases do not consider. Primarily, the world of Minecraft is infinite in the +horizontal $x$ and $z$ axes, but fixed in the $y$ axis, which limits the amount +of information that can be stored by the database at once. The world also +contains a denser voxel grid than in many other applications, meaning that far +more of the blocks in the world are filled than empty. + +A game is also a real-time application, which means that any performance issues +will be immediately be present to the user. Most databases can be evaluated on +only their speed, but as the Minecraft server processes new information 20 times +per second, the game has a time budget of 50ms to handle all game logic, +including the storing of data. Less time processing the data in the world means +that more time will be freed up for the game to process other work, although +finishing work earlier will not necessarily be faster for the end user, if it +still under the budget of 50ms. Most databases do not meet this requirement, and +even though they may be faster, their complexity does not mean that they will +always finish operations within this time limit. + +These limitations also make Minecraft unable to take advantage of a cache, since +the number of different operations that can be done on the world is infinitely +large, remembering any previous operations will often not be helpful for the +system's performance. Minecraft also provides a good benchmark for the database, +because the unpredictability of players stresses the system's ability to return +results in a variety of settings. + +\section{Technical Background} + +\subsection{What is a database?} +When I refer to the concept of a database, I am referencing a program that sits +more or less as a ``black box'' between the user and a software application, +storing any data required for the application. In most existing applications, +this is done by a category of databases called ``relational databases'', which +offer a very general-purpose way to store user data that is highly connected. +For instance, a person stored in a relational database would be efficiently +linked to with any of their associated information, such as name or age. + +% The model of a database +In database terms, any amount of data added to the database is called a +``write'', data retrieved from the database is called a ``read'', and any +questions asked, such as ``how many people have done this'', are called +``queries''. Developers ask these questions through computer languages, one such +example being Structured Query Language or SQL, which allow +the database to be queried efficiently. + +\subsection{Challenges With Existing Databases} + +% Software development and SQL +Most software engineering projects start with a simple front-end and back-end, +typically implemented with some sort of Model-View-Controller architecture, and +connected to a relational SQL database \cite{sqliteOnlyDatabase}. This idea was +popularized by frameworks such as Ruby on Rails and Django, where the model was +most often modeled by structures within the database. This framework allowed +software developers to not have to worry about inner workings of the database, +and focus on writing business logic. This is how many start-ups were built, such +as GitHub \cite{githubSingleSQL}, who recently moved off its single SQL database +after 13 years, citing performance issues. + +% Challenges with working with SQL: Performance +Using a single SQL-speaking database can be a significant advantage development +speed, but the database can have some issues keeping up with the demands of the +application as the performance requirements expand. +% Caching +As soon as this happens, companies typically put smaller caching applications in +front of their database, such as \verb|Redis|\footnote{https://redis.io/}, +\verb|memcached|\cite{nishtala2013scaling}, or \verb|TAO| \cite{bronson2013tao}, +to allow the application to remember some of the commonly asked questions and +reduce load on the database by not having to do the same work again. + +\subsubsection{The Complexity of General-Purpose Databases} +% What is being done about this +Modern SQL databases are also very complex. Three of the most popular SQL +databases, PostreSQL, MySQL and Sqlite have 1.4 million lines +\footnote{https://wiki.postgresql.org/wiki/GSoC\_2018, in reference to the +text ``PostgreSQL is over 1.3M lines of code and some of the code paths can be +tricky to reach.''} of code, 2.9 million lines +\footnote{https://www.openhub.net/p/mysql}, and 150,000 lines +\footnote{https://www.sqlite.org/testing.html} respectively. + +% Why are databases inefficient? +Why are databases so complex? Most of the reason for the complexity is that +because these database systems so general-purpose, they cannot assume anything +about the data stored in them. For the database, finding an efficient plan to +answer each query is a known NP-hard problem\cite{chatterji2002complexity}, and +to keep itself fast, the database must construct this plan with a complex set of +approximations, based on the assumptions that it can make, which leads to +ever-evolving complexity. + +% Impossible to maintain +With this complexity, it is impossible for a single person to understand the +complete inner workings of a database. Thus, the problem of the company's +database often becomes a dedicated person in companies that can afford it, or +become entire teams of engineers at larger organizations such as +Google\cite{googlePerfTeam}. + +% Intro to special-purpose databases +What happens in the larger companies that can afford more engineering time, and +have a specific problem that they cannot solve with a traditional database? +Typically, this leads to the creation of special-purpose database solutions. For +instance, the global scale of iCloud and Apple's cloud solutions required them +to create FoundationDB\cite{zhou2021foundationdb}. A different set of challenges +in the Facebook inbox led to the creation of Apache +Cassandra\cite{lakshman2010cassandra}, which is optimized to allow for many +emails to be received, at the expense of search speed, which is done far less +frequently. + +\subsubsection{The Special-Purpose Database} + +Limiting a database's design to a specific use-case can make the development +process much simpler, to the point where it can be done by a single person, and +can offer higher performance. The first question that needs to be asked is +whether the application is \textit{write-heavy} or \textit{read-heavy}. +Read-heavy applications occur often in web development, and most social media +platforms have far more users reading the content, than writing new content for +the platform. In contrast, write-heavy applications are often seen in analytics +workloads, where data is written from many sources, and analyzed infrequently by +users. + +My application has a relatively even write and read balance, and I evaluated +three different storage data structures before choosing to implement my own + +% Special-purpose databases +Recently, companies such as Tigerbeetle\cite{tigerbeetleDesign} have taken this +domain-driven approach to database design even further, while designing a +database from the ground up to do financial accounting, which outperforms a +reference MySQL implementation at 76 accounting transactions per second, to +1,757 transactions per second \cite{tigerbeetlePerf}. This highly specialized +and domain-specific approach to creating databases is what my project is going +to be based on, to create a database around the challenges that the game +Minecraft has. + +\subsubsection{Key-Value Databases} + +One of the main architectures that I considered for my project is a design +called a key-value store\cite{kvdatabase}, which would store the relationship of +a single voxel to its value. Many other voxel databases use this method to +achieve constant-time operations on retrieving points, which means that +regardless of the size of the dataset, the database will always be able to +return a result in the same amount of time. This structures is behind many of +the high-performance caches that are commonly used to speed up web applications, +such as Redis and RocksDB\cite{dong2021rocksdb}. In order to provide high speeds +for this data, the key-value mappings are usually stored in main memory, which +is far more expensive and limited than the system's disk drive, but offers +a speed advantage of several orders of magnitude\cite{latencyKnow}. + +\section{Prior Work} + +\subsection{Voxels for Efficient Computation} + +Most existing literature on the topic of using voxels to store shapes focuses on +the application of the voxel grid for efficient computation. Since voxel points +are completely independent of each other, this allows for efficient parallel +processors, which are increasingly more common on consumer hardware, to take +advantage of this speedup. In VDB\cite{museth2013vdb} Museth demonstrates that +by modeling a sparse voxel grid in different resolutions, a computer cluster can +efficiently approximate a physical structures such as a cloud, in order to +calculate expensive lighting operations. + +\subsection{Parallel Processing on Voxel Databases} + +Williams\cite{williams1992voxel} expands upon the uses of a voxel database to +model graph and mesh-based problems. Taking advantage of the parallelism in the +grid, many problems can be reframed in the representation of voxels, and solve +those problems far more efficiently. This model however, assumes that every +voxel is stored in shared memory, making this process only viable to solve +problems that can be modeled on one machine, and are far more computationally +expensive, rather than data-intensive. + +\subsection{Large Voxel Data Set Processing} + +Another approach to the problem of storing voxel data is the distributed +approach in Gorte et. al. \cite{gorte2023analysis}. Since memory is limited +within one computer, this workload can be split up between many servers, which +allows very large datasets to be worked on by a single workstation through an +API. This method keeps many of the same performance considerations, but also +assumes that the voxel data is not very dense, and uses a three-dimensional +data structure called an octree, which allows the user to change the resolution +of the data that they are working on. In the paper, Gorte acknowledges the need +to split large datasets up into smaller regions, which is similar to the concept +of ``chunks'' in my implementation. + +\subsection{Previous Special-Purpose Databases} + +The design of my database was also inspired by the LSM tree and data-driven +designs of Tigerbeetle\cite{tigerbeetleDesign}, which is also able to handle +concurrent operations on the same design. Another database, +CockroachDB\footnote{https://www.cockroachlabs.com/product/}, uses a key-value +mapping backend to store a SQL-like tables and rows. Finally, the design of +caching layers in modern SQL caches such as Noria\cite{gjengset2018noria} show +that it it possible to efficiently remember the complex queries found in SQL, +and replicate these in real-time. + +\section{Methods} + +Almost every part of the database was designed so that most operations could be +done in constant time. + +The database provides a simple interface to read and write data, consisting of +the following: +\begin{itemize} + \item Read a single block + \item Write a single block + \item Change a range of blocks + \item Read a pre-defined ``chunk'' of blocks +\end{itemize} + + +The process of fetching the data for a single point in the world starts at that +point's $x, y$ and $z$ location. The world is infinite in size on the horizontal +$x$ and $z$ axes, but limited in the vertical $y$ axis. In my database, the +world is composed of an infinite grid of ``chunks'', or columns that are a fixed +16 x 16 blocks in the $x$ and $z$ axes, but 256 blocks in the vertical $y$ axis. + +Once you know a point's location, you can find with a modulus what chunk the +point is located within. From there, the database only needs to retrieve the +data for the chunk stored at that location. + +Initial implementations for my database focused on tree-based approaches for +finding the files for chunks, but with their complexity and non-constant +complexity, I decided to store each chunk separately. However, with worlds with +chunk counts in the hundreds of thousands, the filesystem implementations had +issues with searching through so many files, which led to performance problems. +Finally, I settled on merging all the chunk data into one file, and use the +filesystem's \verb|seek| syscall to lookup the offset for the correct chunk. A +simple hash table was then used to store each chunk's location with its offset +in the file, which keeps the memory cost low, even with chunk counts in the +millions. This allows for constant-time searches for the chunk's data. + +Once a chunk is retrieved from disk, the format of the chunk is broken down into +smaller cubic slices of the chunk, called ``sections'' each section is a +16x16x16 cubic area that keeps an index for every chunk. The point's $y$ +position tells the database what section the point is in, and a simple formula +is done to convert the remaining $x$ and $z$ axes into an index within the +section. + +Every section additionally stores a look-up-table, that stores a mapping of a +\textit{palette index} to the state of a block. When the value for the point is +retrieved from the section, the value returned is not the block's state, but +simply an index into this palette. The palette lookup is done in constant time, +and when a new block is added into the section that needs an additional state in +the palette, this value is added in constant time as well. The existence of this +palette supports the efficient operation of another part of the database, which +is the ability to change large portions of blocks in the world. + +Once the value of the point is found in the palette, the value can be returned +to the user. A visual diagram of this process can be found in figure +\ref{fig:lookup}. + +\begin{figure} + \centering + \includegraphics[width=8cm]{block-search.drawio.png} + \caption{The process of looking up a single block} + \label{fig:lookup} +\end{figure} + +The ability to change a region of blocks is also a common operation within the +database, and which isn't locked to a specific range of chunks. This operation +is implemented as overwriting the palettes for a specific region. By overwriting +every palette index to the same value, every value in the chunk effectively gets +set to the same value. This does however create the need for an additional +``compaction'' step, where the palette is shrunk to remove duplicate values, and +every block within the section must be updated to point to the correct index in +the palette. This compaction is done upon any subsequent writes to the section +by inserting a block, because only this fixed-size section needs to be changed, +preserving the time of the operation as constant time. + +Finally, the retrieval of a single chunk can be done efficiently, because the +database already stores chunks separately, and serializes these to the client. + +% \cite{vohra2016apache}. + +\section{Evaluation Metrics} + +\subsection{Reading Single Voxels} + +Reads and writes of single voxels are the most common fundamental operation for +my database, and the database should be handle this operation in the same amount +of time, regardless of the size of the world. Both my implementation and the +simpler key-value store meet this criteria. + +\subsection{Changing Regions of Voxels} + +Changing regions of voxels should be able to be done in linear time. This is +because resetting or changing a region of voxels is important while drawing +shapes of various resolutions. Lower resolution shapes are less precise, and +thus are able to be written faster. + +\subsection{Memory Requirements} + +The memory requirement is set quite low, at 256MB, in order to require the +database to store most of its data on disk, and limit its memory usage to +important caching features. This limitation was chosen for larger datasets that +don't fit within memory on a single machine, because memory is much more +expensive than disk storage, and would limit the analysis to smaller voxel grids. + +\subsection{Reading Regions of Voxels} + +The ability to retrieve large shapes from the database is important, because in +order to export a shape, another operation must be present to efficiently do +this. This operation therefore must be done in constant time, because as +Gorte\cite{gorte2023analysis} identifies, many researchers might want to work +on the same dataset, and exporting all this data would become inefficient for +the database to process. In the use-case of Minecraft, this allows the server to +support many more players at once, by not sending every individual block to each +client. This requirement is not met by the key-value database, but is reached by +my implementation, by sending the stored chunks on disk. + +\subsection{Reading Neighboring Blocks} + +The last common operation in most voxel databases is the ability to read points +that are neighboring another point. This is important because many voxel shapes +approximate cubic shapes \cite{gorte2023analysis}, and in Minecraft, players are +constantly affecting voxels that are nearer to each other. + +\section{Results and Discussion} + +Benchmarking on my laptop, inserting values in various spreads around the voxel +world, I get the following benchmarks, comparing an in-memory implementation of +SpatialDB, the disk-based implementation of SpatialDB, and a memory-based +key-value implementation in figure \ref{fig:reads}: + +\begin{figure} + \centering + \begin{tabular}{c | c | c | c} + Spread of Points & In-memory & Disk & KeyValue\\ + \hline + 128 & 4275 & 4146669 & 176.7\\ + 512 & 4184 & 3319162 & 190.6\\ + 2048 & 2613 & 422938 & 184.8\\ + 65536 & 2382 & 18814 & 186.1 + \end{tabular} + \caption{Time (in ns) to operate on a single voxel, based on the size of the + world (spread)} + \label{fig:reads} +\end{figure} + +These results show that the scaling remains consistent between the in-memory +version and the key-value store, although my implementation is about two orders +of magnitude slower than the latter. This scaling is however not met by the +performance of the on-disk database. Originally, I thought that these poor +results were the result of no caching being done on the chunk files, which would +have made searches much slower, but still doesn't explain the improvement in +performance by larger worlds. This led me to implement a disk cache, which had +similar results, to the final implementation where I combined all the data in +one large file, and selectively read sections from that file. This leads me to +believe that as the points tested grow more spread out, since the world is only +so large, many points will be outside of the loaded chunks, and instantly return +empty. + +This change could likely be addressed by a change in caching methods, and +remembering the data for more chunks, but this still doesn't address the slow +speeds for accessing data in the first place. The slow speeds are most likely +the decoding of the JSON data stored on disk, which is relatively large at +about 4 megabytes in size. A custom encoding method could be designed to replace +this scheme, or additionally pre-allocate the entire storage space in the +chunks, so that chunk data could be retrieved without decoding the entire chunk. +However, this would require a much more constrained data layout, and limit the +implementation of different voxels. + +Additionally, compression + +\section{Ethical Considerations} + +\subsection{Considerations of Computing Resources} + +Since databases are at the core part of most complex systems, they are often +built to be run on hardware that the normal consumer can afford +\footnote{\url{https://docs.oracle.com/en/database/oracle/oracle-database/12.2/ntdbi/oracle-database-minimum-hardware-requirements.html}} +\footnote{\url{https://wiki.lustre.org/Lustre_Server_Requirements_Guidelines}}. + +The large hardware requirements of these databases come from the environments +where they are implemented, and at many of these companies, the ability to +keep buying faster hardware allows the company to work on other things that are +more important. However, what this does to the player is effectively prices them +out of the game that they would be already playing, especially since the +database would also have to run alongside the existing Java application of +Minecraft, which quickly exhaust system memory. + +In the design of my server I have to prioritize both performance to take +advantage of the existing hardware, but make sure that the accessibility for +the application does not decrease as a result. + +\subsection{Considerations of Complexity} +Another factor to consider in the implementation of my database is how complex +the existing systems are. Some of the most popular SQL databases, PostgreSQL and +MySQL have 1.4 and 4.4 million lines of code respectively +\footnote{\url{https://news.ycombinator.com/item?id=24813239}}. + +With so much complexity going on, this significantly decreases the overall +knowledge of the system, as well as the individual user who has to debug their +game. Most of this is from the large amount of query logic that handles caching +and speeding up certain queries, so knowing more about the specific problem that +I am trying to solve removes this process from having to be done. + +Especially since most of the people in the Minecraft community are volunteers in +the open-source community, debugging this large of an application would be out of +scope for enjoying a game, and likely lead to it being replaced with something +more simple. The reliability characteristics are also less than what are +required for Minecraft, since they are being compared against a single-threaded +Java program which has been tested to do the correct thing. + +\subsection{Considerations in Security} + +Since these databases are very complex, there is also the risk that having a +server exposed over the internet through the Minecraft game server might leave +it exposed to attacks. While this is a large issue, an even more important +implication is the ability to configure the database correctly. Since these +databases are extremely complex, it is also very hard to make sure that they are +configured securely. There have been many high-profile data +breaches\footnote{\url{https://www.zdnet.com/article/hacker-ransoms-23k-mongodb-databases-and-threatens-to-contact-gdpr-authorities/}} +that involve a single server, even at larger companies that have dedicated teams +that involve a data breach. + +My plan to mitigate this risk is to implement the database in a memory-safe +programming language, which should remove the risk class of memory-unsafety +bugs, which account for around 70\% of all bugs in the Chromium browser +engine\footnote{\url{https://www.chromium.org/Home/chromium-security/memory-safety/}}, +which is entirely written in non-memory safe C++. + +And if the database information is ever able to be leaked through the Minecraft +protocol, the attacker would have access to the full data, because I am planning +to store it unencrypted for performance reasons, and rely on the encryption of +the Minecraft client. And, the data involved does not involve personally +identifying information, so the usefulness of the data would be close to +nothing. + +But, perhaps the most important security risk is if an attacker is able to +access the database directly and bypass all the isolation in the Minecraft +protocol, in order to wipe or corrupt the data for malicious reasons. This would +likely lead to the Minecraft server being unable to be played, and degrade the +experience of the players. It is my plan to take advantage of the limitations of +the types of Minecraft items to provide resilience and easy backups to the +system, because of the purpose-built nature of the system +\footnote{\url{https://twitter.com/eatonphil/status/1568247643788267521?s=20}}. + +\subsection{Considerations in Fairness} + +In the implementation of databases, it can often be beneficial to make certain +operations faster, at the expense of others that are not done as often. For +instance, if I notice that players often pull items in and out of their systems +often, but almost never search through the list of items, I can take advantage +of this to speed up the database for the most common operations. However, this +can be problematic if the things that I choose to sacrifice affect a certain +group of users. + +This tradeoff between speed and reliability occurs so often in Computer Science +and is described in terms of percentiles. For instance, if we notice that some +event occurs about half the time, we can say it is in the 50th percentile. +Similarly, if an event only occurs 1\% of the time, we can say it occurs in the +99th percentile. The impossible effect of not hurting anyone when a decision +like this is make is written about by Google \cite{dean2013tail}, who have to make every +decision like this at their scale. + +My plan is to not have any tradeoffs that affect the normal gameplay of the +server, and keep it within the 50ms timeframe that the Minecraft has allocated +to itself. Apart from this, one of the main goals of the project is to give +consistent performance, so any further decisions will be made around the +existing implementation of the Minecraft server. + +%https://www.embedded.com/implementing-a-new-real-time-scheduling-policy-for-linux-part-1/ +%https://www.kernel.org/doc/html/latest/scheduler/sched-design-CFS.html +%https://helix979.github.io/jkoo/post/os-scheduler/ + +\subsection{Considerations in Accessibility} + +By creating this system, I also have to consider if the players are going to +require a certain type of computer. Requiring a certain operating system or a +more powerful computer would limit access to many of the people that were +playing the game before. + +However, by basing the goal of the project on improving the performance of the +already existing implementation, any improvements would result in more people +being able to play than before. Also, by designing the system for normal +hardware and in a cross-platform way, this does not limit the people that are +able to access the improvements. + + +\subsection{Considerations in the Concentration of Power} + +With any improvements to performance to servers in Minecraft, this would allow +many of the larger hosting companies, who rent servers monthly to individual +people, to drive down their hosting costs, and allow them to have larger returns +over the smaller providers. However, since this market is so competitive between +companies, because of how easy it is to set up a company, and the options +between companies aren't very different, I would expect any improvement to be +quickly disappear into the competitive market, and benefit everyone equally. + +\section{Future Work, and Conclusion} + +\printbibliography + +\end{document} diff --git a/paper/oxycomps.sty b/paper/oxycomps.sty new file mode 100644 index 0000000..806ec11 --- /dev/null +++ b/paper/oxycomps.sty @@ -0,0 +1,102 @@ +% A simple two-column LaTeX style for Occidental College's CS senior projects. +% Based on latex8.sty by Paolo.Ienne@di.epfl.ch + +\usepackage{times} % use Times as the default font +% define bold 11pt Times font for second-order headings +\font\elvbf = ptmb scaled 1100 + +\usepackage[style=numeric,sorting=nyt]{biblatex} % format the bibliography nicely +\usepackage{xpatch} % used to patch \textcite + +% change \textcite to do family-name (year) +\xpatchbibmacro{textcite} + {\printnames{labelname}} + {\printnames{labelname} (\printfield{year})} + {} + {} +% sort bibliography by last name +\DeclareNameAlias{default}{family-given} + +\usepackage{amsfonts} % provides many math symbols/fonts +\usepackage{amsmath} % provides many math environments +\usepackage{amssymb} % provides many math symbols/fonts +\usepackage{caption} % fixes caption spacing issues +\usepackage[usenames,dvipsnames]{color} % allows for colored text +\usepackage{enumitem} % allows adjustment of list spacing +\usepackage{graphicx} % allows insertion of graphics +\usepackage{hyperref} % creates links within the page and to URLs +\usepackage{listings} % provides the lstlisting environment +\usepackage{url} % formats URLs properly +\usepackage{verbatim} % provides the comment environment + +% set dimensions of columns, gap between columns, and paragraph indent +\setlength{\textheight}{8.875in} +\setlength{\textwidth}{6.875in} +\setlength{\columnsep}{0.3125in} +\setlength{\topmargin}{0in} +\setlength{\headheight}{0in} +\setlength{\headsep}{0in} +\setlength{\parindent}{1em} +\setlength{\oddsidemargin}{-.304in} +\setlength{\evensidemargin}{-.304in} + +% remove the space between list items +\setlist{noitemsep} + +% style code listings +\lstset{ + basicstyle=\ttfamily\footnotesize, + breaklines=true, + showstringspaces=false +} + +% style the title +\def\@maketitle{ + \newpage + \begin{center} + {\Large \bf \@title \par} + % add two empty lines at the end of the title + \vspace*{2\baselineskip} + { + \large + \begin{tabular}[t]{c} + \@author + \end{tabular} + \par + } + % add small space at the end of the author name + \vspace*{.5em} + { + \ifx \@empty \@email + \else + \texttt{\@email} + \par + \vspace*{.25em} + \fi + \ifx \@empty \@affiliation + \else + \@affiliation + \fi + } + % add empty line at the end of the title block + \vspace*{\baselineskip} + \end{center} +} + +% style the abstract +\def\abstract{% + \centerline{\large\bf Abstract}% + \vspace*{\baselineskip}% +} + +% define email and affiliation +\def\email#1{\gdef\@email{#1}} +\gdef\@email{} +\def\affiliation#1{\gdef\@affiliation{#1}} +\gdef\@affiliation{} + +% correct heading spacing and type +\def\section{\@startsection {section}{1}{\z@} + {14pt plus 2pt minus 2pt}{14pt plus 2pt minus 2pt} {\large\bf}} +\def\subsection{\@startsection {subsection}{2}{\z@} + {13pt plus 2pt minus 2pt}{13pt plus 2pt minus 2pt} {\elvbf}} diff --git a/paper/references.bib b/paper/references.bib new file mode 100644 index 0000000..7cc93b2 --- /dev/null +++ b/paper/references.bib @@ -0,0 +1,307 @@ +// Introduction + +@misc{sqliteOnlyDatabase, + title={SQLite the only database you will ever need in most cases}, + url={https://unixsheikh.com/articles/sqlite-the-only-database-you-will-ever-need-in-most-cases.html}, + journal={https://unixsheikh.com/}, + publisher={https://unixsheikh.com/}, + author={Sheikh, Unix}, + year={2021}, + month={Apr}, +} + +@misc{ enwiki:1181180757, + author = "{Wikipedia contributors}", + title = "Model–view–controller --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + howpublished = "\url{https://en.wikipedia.org/w/index.php?title=Model%E2%80%93view%E2%80%93controller&oldid=1181180757}", + note = "[Online; accessed 13-December-2023]" +} + +@online{googlePerfTeam, + author = {{Google Performance Team}}, + title = {System Performance}, + month = {May}, + year = {2023}, + url = {https://research.google/teams/system-performance/}, +} + +// Applications of voxels + +@misc{ enwiki:1186283262, + author = "{Wikipedia contributors}", + title = "Voxel --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + howpublished = "\url{https://en.wikipedia.org/w/index.php?title=Voxel&oldid=1186283262}", + note = "[Online; accessed 13-December-2023]" +} + +@article{norman2006beyond, + title={Beyond mind-reading: multi-voxel pattern analysis of fMRI data}, + author={Norman, Kenneth A and Polyn, Sean M and Detre, Greg J and Haxby, James V}, + journal={Trends in cognitive sciences}, + volume={10}, + number={9}, + pages={424--430}, + year={2006}, + publisher={Elsevier} +} + +@article{museth2013vdb, + title={VDB: High-resolution sparse volumes with dynamic topology}, + author={Museth, Ken}, + journal={ACM transactions on graphics (TOG)}, + volume={32}, + number={3}, + pages={1--22}, + year={2013}, + publisher={ACM New York, NY, USA} +} + +@article{li2020deep, + title={Deep learning for lidar point clouds in autonomous driving: A review}, + author={Li, Ying and Ma, Lingfei and Zhong, Zilong and Liu, Fei and Chapman, Michael A and Cao, Dongpu and Li, Jonathan}, + journal={IEEE Transactions on Neural Networks and Learning Systems}, + volume={32}, + number={8}, + pages={3412--3432}, + year={2020}, + publisher={IEEE} +} + +// Literature Review + +@article{williams1992voxel, + title={Voxel databases: A paradigm for parallelism with spatial structure}, + author={Williams, Roy D}, + journal={Concurrency: Practice and Experience}, + volume={4}, + number={8}, + pages={619--636}, + year={1992}, + publisher={Wiley Online Library} +} + +@article{gorte2023analysis, + title={Analysis of very large voxel datasets}, + author={Gorte, Ben}, + journal={International Journal of Applied Earth Observation and Geoinformation}, + volume={119}, + pages={103316}, + year={2023}, + publisher={Elsevier} +} + +@online{tigerbeetleDesign, + author = {{Tigerbeetle Developers}}, + title = {Tigerbeetle Design Document}, + month = {July}, + year = {2020}, + url = {https://github.com/tigerbeetledb/tigerbeetle/blob/main/docs/DESIGN.md}, +} + +@online{tigerbeetlePerf, + author = {{Tigerbeetle Developers}}, + title = {Tigerbeetle Design Document}, + month = {July}, + year = {2020}, + url = {https://github.com/tigerbeetledb/tigerbeetle/blob/main/docs/HISTORY.md}, +} + +@online{nomiSlowME, + author = {{Jokercortex}}, + title = {Moron's Guide to Managing Mechanical Monstrosities}, + month = {Feb}, + year = {2020}, + url = {https://github.com/Nomifactory/Guides/blob/latest/guides/AE2ForDummies.md}, +} + +@misc{btree, + author = "{Wikipedia contributors}", + title = "B-tree --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + url = "https://en.wikipedia.org/w/index.php?title=B-tree&oldid=1146616935", + note = "[Online; accessed 13-May-2023]" +} + +@misc{kvdatabase, + author = "{Wikipedia contributors}", + title = "Key–value database --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + url = "https://en.wikipedia.org/w/index.php?title=Key%E2%80%93value_database&oldid=1135560734", + note = "[Online; accessed 13-May-2023]" +} + +@online{latencyKnow, + author = "Jeff Dean", + title = "Latency Numbers Every Programmer Should Know", + year = "2018", + url = "https://gist.github.com/jboner/2841832", + note = "[Online; accessed 12-Dec-2023]" +} + +@online{cockroachData, + author = {{CockroachDB Developers}}, + title = {Structured data encoding in CockroachDB SQL}, + year = {2017}, + month = Mar, + url = {https://github.com/cockroachdb/cockroach/blob/master/docs/tech-notes/encoding.md}, +} + +@article{dong2021rocksdb, + title={Rocksdb: Evolution of development priorities in a key-value store serving large-scale applications}, + author={Dong, Siying and Kryczka, Andrew and Jin, Yanqin and Stumm, Michael}, + journal={ACM Transactions on Storage (TOS)}, + volume={17}, + number={4}, + pages={1--32}, + year={2021}, + publisher={ACM New York, NY} +} + +@misc{lsm, + author = "{Wikipedia contributors}", + title = "Log-structured merge-tree --- {Wikipedia}{,} The Free Encyclopedia", + year = "2023", + url = "https://en.wikipedia.org/w/index.php?title=Log-structured_merge-tree&oldid=1153046573", + note = "[Online; accessed 13-May-2023]" +} + +@online{lsmUses, + author = {{Braden Groom}}, + title = {Understanding LSM Trees: What Powers Write-Heavy Databases}, + month = Jun, + year = {2020}, + url = {https://yetanotherdevblog.com/lsm/}, +} + +@article{chang2008bigtable, + title={Bigtable: A distributed storage system for structured data}, + author={Chang, Fay and Dean, Jeffrey and Ghemawat, Sanjay and Hsieh, Wilson C and Wallach, Deborah A and Burrows, Mike and Chandra, Tushar and Fikes, Andrew and Gruber, Robert E}, + journal={ACM Transactions on Computer Systems (TOCS)}, + volume={26}, + number={2}, + pages={1--26}, + year={2008}, + publisher={ACM New York, NY, USA} +} + +@inproceedings{abadi2008column, + title={Column-stores vs. row-stores: how different are they really?}, + author={Abadi, Daniel J and Madden, Samuel R and Hachem, Nabil}, + booktitle={Proceedings of the 2008 ACM SIGMOD international conference on Management of data}, + pages={967--980}, + year={2008} +} + +@article{athanassoulis2019optimal, + title={Optimal column layout for hybrid workloads}, + author={Athanassoulis, Manos and B{\o}gh, Kenneth S and Idreos, Stratos}, + journal={Proceedings of the VLDB Endowment}, + volume={12}, + number={13}, + pages={2393--2407}, + year={2019}, + publisher={VLDB Endowment} +} + +@inproceedings{armbrust2021lakehouse, + title={Lakehouse: a new generation of open platforms that unify data warehousing and advanced analytics}, + author={Armbrust, Michael and Ghodsi, Ali and Xin, Reynold and Zaharia, Matei}, + booktitle={Proceedings of CIDR}, + volume={8}, + year={2021} +} + +@article{dean2013tail, + title={The tail at scale}, + author={Dean, Jeffrey and Barroso, Luiz Andr{\'e}}, + journal={Communications of the ACM}, + volume={56}, + number={2}, + pages={74--80}, + year={2013}, + publisher={ACM New York, NY, USA} +} + +https://github.blog/2021-09-27-partitioning-githubs-relational-databases-scale/#:~:text=Yet%20at%20its%20core%2C%20GitHub,%2C%20issues%2C%20and%20pull%20requests. +@misc{githubSingleSQL, + title={Partitioning github’s relational databases to handle scale}, + url={https://github.blog/2021-09-27-partitioning-githubs-relational-databases-scale/}, + journal={The GitHub Blog}, + publisher={GitHub}, + author={Maurer, Thomas}, + year={2021}, + month={Sep}, +} + +@inproceedings{bronson2013tao, + title={$\{$TAO$\}$: Facebook’s distributed data store for the social graph}, + author={Bronson, Nathan and Amsden, Zach and Cabrera, George and Chakka, Prasad and Dimov, Peter and Ding, Hui and Ferris, Jack and Giardullo, Anthony and Kulkarni, Sachin and Li, Harry and others}, + booktitle={2013 $\{$USENIX$\}$ Annual Technical Conference ($\{$USENIX$\}$$\{$ATC$\}$ 13)}, + pages={49--60}, + year={2013} +} + +@inproceedings{chatterji2002complexity, + title={On the complexity of approximate query optimization}, + author={Chatterji, Sourav and Evani, Sai Surya Kiran and Ganguly, Sumit and Yemmanuru, Mahesh Datt}, + booktitle={Proceedings of the twenty-first ACM SIGMOD-SIGACT-SIGART symposium on Principles of database systems}, + pages={282--292}, + year={2002} +} + +@inproceedings{gjengset2018noria, + title={Noria: dynamic, partially-stateful data-flow for high-performance web applications.}, + author={Gjengset, Jon and Schwarzkopf, Malte and Behrens, Jonathan and Ara{\'u}jo, Lara Timb{\'o} and Ek, Martin and Kohler, Eddie and Kaashoek, M Frans and Morris, Robert Tappan}, + booktitle={OSDI}, + volume={18}, + pages={213--231}, + year={2018} +} + +How storage works in database systems, and the evolution of how data is stored +@article{stonebraker2005goes, + title={What goes around comes around}, + author={Stonebraker, Michael and Hellerstein, Joey}, + journal={Readings in database systems}, + volume={4}, + pages={1}, + year={2005} +} + +@article{vohra2016apache, + title={Apache parquet}, + author={Vohra, Deepak and Vohra, Deepak}, + journal={Practical Hadoop Ecosystem: A Definitive Guide to Hadoop-Related Frameworks and Tools}, + pages={325--335}, + year={2016}, + publisher={Springer} +} + +@inproceedings{nishtala2013scaling, + title={Scaling memcache at facebook}, + author={Nishtala, Rajesh and Fugal, Hans and Grimm, Steven and Kwiatkowski, Marc and Lee, Herman and Li, Harry C and McElroy, Ryan and Paleczny, Mike and Peek, Daniel and Saab, Paul and others}, + booktitle={Presented as part of the 10th $\{$USENIX$\}$ Symposium on Networked Systems Design and Implementation ($\{$NSDI$\}$ 13)}, + pages={385--398}, + year={2013} +} + +@inproceedings{zhou2021foundationdb, + title={Foundationdb: A distributed unbundled transactional key value store}, + author={Zhou, Jingyu and Xu, Meng and Shraer, Alexander and Namasivayam, Bala and Miller, Alex and Tschannen, Evan and Atherton, Steve and Beamon, Andrew J and Sears, Rusty and Leach, John and others}, + booktitle={Proceedings of the 2021 International Conference on Management of Data}, + pages={2653--2666}, + year={2021} +} + +@article{lakshman2010cassandra, + title={Cassandra: a decentralized structured storage system}, + author={Lakshman, Avinash and Malik, Prashant}, + journal={ACM SIGOPS operating systems review}, + volume={44}, + number={2}, + pages={35--40}, + year={2010}, + publisher={ACM New York, NY, USA} +}