From 496a67cc7fc9fbf6831b361a278f822d7549ceeb Mon Sep 17 00:00:00 2001 From: Nicholas Novak <34256932+NickyBoy89@users.noreply.github.com> Date: Mon, 16 Oct 2023 21:26:36 -0700 Subject: [PATCH] feat: Added on-disk cache for files, and way to lookup files. Also started on serialization --- Cargo.lock | 10 ++-- Cargo.toml | 2 + src/simple_server/server.rs | 29 +++++------ src/storage/chunk_compression.rs | 2 + src/storage/disk_storage.rs | 84 +++++++++++++++++++++++++++----- src/storage/mod.rs | 3 +- src/storage/world.rs | 23 +++++++-- 7 files changed, 120 insertions(+), 33 deletions(-) create mode 100644 src/storage/chunk_compression.rs diff --git a/Cargo.lock b/Cargo.lock index bd2455b..7e9ffa5 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1104,18 +1104,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4" [[package]] name = "serde" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e" +checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537" dependencies = [ "serde_derive", ] [[package]] name = "serde_derive" -version = "1.0.188" +version = "1.0.189" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2" +checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5" dependencies = [ "proc-macro2", "quote", @@ -1190,6 +1190,8 @@ dependencies = [ "clap", "parquet", "rand", + "serde", + "serde_json", "tokio", ] diff --git a/Cargo.toml b/Cargo.toml index 376987a..fd7b310 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -11,4 +11,6 @@ axum = "0.6.20" clap = { version = "4.4.5", features = ["derive"] } parquet = "47.0.0" rand = "0.8.5" +serde = { version = "1.0.189", features = ["derive"] } +serde_json = "1.0.107" tokio = { version = "1.32.0", features = ["macros", "rt-multi-thread"] } diff --git a/src/simple_server/server.rs b/src/simple_server/server.rs index cceeb56..7ef868f 100644 --- a/src/simple_server/server.rs +++ b/src/simple_server/server.rs @@ -10,14 +10,14 @@ struct MultipleBlocks { #[derive(Debug)] pub struct SimpleServer { chunks: Vec, - block_ranges: Vec, + // block_ranges: Vec, } impl SimpleServer { pub fn new() -> Self { SimpleServer { chunks: Vec::new(), - block_ranges: Vec::new(), + // block_ranges: Vec::new(), } } @@ -78,13 +78,14 @@ impl StorageServer for SimpleServer { } fn change_block_range(&mut self, target_stage: BlockID, start: &BlockPos, end: &BlockPos) { - self.block_ranges.push(MultipleBlocks { - id: target_stage, - range: BlockRange { - start: start.clone(), - end: end.clone(), - }, - }) + unimplemented!() + // self.block_ranges.push(MultipleBlocks { + // id: target_stage, + // range: BlockRange { + // start: start.clone(), + // end: end.clone(), + // }, + // }) } fn read_block_at(&self, pos: &BlockPos) -> BlockID { @@ -96,11 +97,11 @@ impl StorageServer for SimpleServer { return chunk_section.get_block_at_index(pos).clone(); } - for blocks in self.block_ranges.iter() { - if blocks.range.within_range(&pos) { - return blocks.id.clone(); - } - } + // for blocks in self.block_ranges.iter() { + // if blocks.range.within_range(&pos) { + // return blocks.id.clone(); + // } + // } BlockID::Empty } diff --git a/src/storage/chunk_compression.rs b/src/storage/chunk_compression.rs new file mode 100644 index 0000000..139597f --- /dev/null +++ b/src/storage/chunk_compression.rs @@ -0,0 +1,2 @@ + + diff --git a/src/storage/disk_storage.rs b/src/storage/disk_storage.rs index 2b695a8..c858265 100644 --- a/src/storage/disk_storage.rs +++ b/src/storage/disk_storage.rs @@ -1,19 +1,81 @@ use super::world::{BlockID, ChunkData, ChunkPos}; -use std::fs::File; +use std::cmp::Ordering; +use std::{collections::HashMap, fs::File, time::Instant}; const DATABASE_FILE_LOCATION: &str = "./persistence"; -struct RunLengthEncoding { - pairs: Vec<(usize, BlockID)>, +struct ChunkFile {} + +const CACHED_CHUNK_FILES: usize = 1; + +/// `ChunkStorageCache` caches a list of the most recently used file handles +/// where chunks are stored from, and allows for faster accessing of the data +/// from chunks +struct ChunkStorageCache { + // `cached_chunk_files` is a vector of cached file handles that are already open + cached_chunk_files: [File; CACHED_CHUNK_FILES], + // `cached_file_names` is a list of all the filenames that are contained + // within the cache + cached_file_names: HashMap, + last_used_times: [Instant; CACHED_CHUNK_FILES], } -impl RunLengthEncoding { - fn from_chunk(chunk_data: &ChunkData) -> Self { - for section in chunk_data.sections { - for index in section.chunk_data { - // Yes - } - } +impl ChunkStorageCache { + fn load_chunk_file(&mut self, file_name: &str) -> &File { + let chunk_file = File::open(file_name).expect("Opening file for chunk failed"); + + // Add the newly opened file to the cache + + // Insert the new item to replace the item that was last accessed + // The minimum time should be the oldest time + let (last_used_index, _) = self + .last_used_times + .iter() + .enumerate() + .reduce( + |(fst_index, fst_time), (snd_index, snd_time)| match fst_time.cmp(&snd_time) { + Ordering::Less => (fst_index, fst_time), + Ordering::Equal | Ordering::Greater => (snd_index, snd_time), + }, + ) + .expect("There should always be a last used index"); + + // Next, we have to: + // * Remove the old filename and index mapping from the names + // * Replace the last used time with the curent time + // * Replace the open file with the current one + + // Find the name of the previous entry + let (previous_file_name, _) = self + .cached_file_names + .iter() + .find(|(_, &array_index)| array_index == last_used_index) + .expect("The last used index should always have a name"); + + self.cached_file_names.remove(&previous_file_name.clone()); + self.cached_file_names + .insert(file_name.to_string(), last_used_index); + // Replace the timestamp with the new timestamp + self.last_used_times[last_used_index] = Instant::now(); + self.cached_chunk_files[last_used_index] = chunk_file; + + &self.cached_chunk_files[last_used_index] + } + /// `fetch_chunk_by_pos` takes in the position of a chunk, and returns the + /// data of the chunk from disk + /// + /// This operation is cached, if possible, so that subsequent accesses to + /// the same chunk are handled by the same file + pub fn fetch_chunk_by_pos(&mut self, pos: &ChunkPos) -> ChunkData { + let file_name = pos.storage_file_name(); + + let file_index = self.cached_file_names.get(file_name.as_str()); + + let chunk_file = match file_index { + Some(index) => &self.cached_chunk_files[*index], + None => self.load_chunk_file(file_name.as_str()), + }; + + panic!("Yes"); } } - diff --git a/src/storage/mod.rs b/src/storage/mod.rs index 59158dd..107a43d 100644 --- a/src/storage/mod.rs +++ b/src/storage/mod.rs @@ -1,2 +1,3 @@ -// mod disk_storage; +mod chunk_compression; +mod disk_storage; pub mod world; diff --git a/src/storage/world.rs b/src/storage/world.rs index 60c7bda..232d3c7 100644 --- a/src/storage/world.rs +++ b/src/storage/world.rs @@ -1,13 +1,16 @@ use core::fmt; +use serde::ser; +use serde::Serialize; use std::{ cmp::{max, min}, fmt::Debug, + fs::File, }; const SECTIONS_PER_CHUNK: usize = 16; const SLICE_SIZE: usize = 16 * 16; -#[derive(Debug, Clone, PartialEq)] +#[derive(Debug, Clone, PartialEq, Serialize)] pub struct ChunkPos { pub x: isize, pub z: isize, @@ -22,7 +25,13 @@ impl From<&BlockPos> for ChunkPos { } } -#[derive(Debug)] +impl ChunkPos { + pub fn storage_file_name(&self) -> String { + format!("{}.{}.chunk", self.x, self.z) + } +} + +#[derive(Debug, Serialize)] pub struct ChunkData { pub pos: ChunkPos, pub sections: [ChunkSection; SECTIONS_PER_CHUNK], @@ -39,6 +48,14 @@ impl ChunkData { pub fn section_for(&self, block_pos: &BlockPos) -> &ChunkSection { &self.sections[block_pos.y % 16] } + + pub fn write_to_file(&self, output_file: &mut File) { + let serialized = serde_json::to_string(self).unwrap(); + } + + pub fn read_from_file(chunk_file: &File) -> Self { + unimplemented!() + } } // https://wiki.vg/Chunk_Format @@ -168,7 +185,7 @@ impl BlockRange { /// BlockID represents the type of block stored #[repr(u8)] -#[derive(Debug, Clone, Copy, PartialEq)] +#[derive(Debug, Clone, Copy, PartialEq, Serialize)] pub enum BlockID { Empty, Generic,