feat: Added on-disk cache for files, and way to lookup files. Also started on serialization

2023-10-16 21:26:36 -07:00
parent dcdf3f3dfb
commit 496a67cc7f
7 changed files with 120 additions and 33 deletions
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -1104,18 +1104,18 @@ checksum = "a3f0bf26fd526d2a95683cd0f87bf103b8539e2ca1ef48ce002d67aad59aa0b4"
 [[package]]
 name = "serde"
-version = "1.0.188"
+version = "1.0.189"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "cf9e0fcba69a370eed61bcf2b728575f726b50b55cba78064753d708ddc7549e"
+checksum = "8e422a44e74ad4001bdc8eede9a4570ab52f71190e9c076d14369f38b9200537"
 dependencies = [
 "serde_derive",
 ]
 [[package]]
 name = "serde_derive"
-version = "1.0.188"
+version = "1.0.189"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4eca7ac642d82aa35b60049a6eccb4be6be75e599bd2e9adb5f875a737654af2"
+checksum = "1e48d1f918009ce3145511378cf68d613e3b3d9137d67272562080d68a2b32d5"
 dependencies = [
 "proc-macro2",
 "quote",
@@ -1190,6 +1190,8 @@ dependencies = [
 "clap",
 "parquet",
 "rand",
 "serde",
 "serde_json",
 "tokio",
 ]
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -11,4 +11,6 @@ axum = "0.6.20"
 clap = { version = "4.4.5", features = ["derive"] }
 parquet = "47.0.0"
 rand = "0.8.5"
 serde = { version = "1.0.189", features = ["derive"] }
 serde_json = "1.0.107"
 tokio = { version = "1.32.0", features = ["macros", "rt-multi-thread"] }
--- a/src/simple_server/server.rs
+++ b/src/simple_server/server.rs
@@ -10,14 +10,14 @@ struct MultipleBlocks {
 #[derive(Debug)]
 pub struct SimpleServer {
    chunks: Vec<ChunkData>,
-    block_ranges: Vec<MultipleBlocks>,
+    // block_ranges: Vec<MultipleBlocks>,
 }
 impl SimpleServer {
    pub fn new() -> Self {
        SimpleServer {
            chunks: Vec::new(),
-            block_ranges: Vec::new(),
+            // block_ranges: Vec::new(),
        }
    }
@@ -78,13 +78,14 @@ impl StorageServer for SimpleServer {
    }
    fn change_block_range(&mut self, target_stage: BlockID, start: &BlockPos, end: &BlockPos) {
-        self.block_ranges.push(MultipleBlocks {
+        unimplemented!()
-            id: target_stage,
+        // self.block_ranges.push(MultipleBlocks {
-            range: BlockRange {
+        //     id: target_stage,
-                start: start.clone(),
+        //     range: BlockRange {
-                end: end.clone(),
+        //         start: start.clone(),
-            },
+        //         end: end.clone(),
-        })
+        //     },
        // })
    }
    fn read_block_at(&self, pos: &BlockPos) -> BlockID {
@@ -96,11 +97,11 @@ impl StorageServer for SimpleServer {
            return chunk_section.get_block_at_index(pos).clone();
        }
-        for blocks in self.block_ranges.iter() {
+        // for blocks in self.block_ranges.iter() {
-            if blocks.range.within_range(&pos) {
+        //     if blocks.range.within_range(&pos) {
-                return blocks.id.clone();
+        //         return blocks.id.clone();
-            }
+        //     }
-        }
+        // }
        BlockID::Empty
    }
--- a/src/storage/chunk_compression.rs
+++ b/src/storage/chunk_compression.rs
@@ -0,0 +1,2 @@
--- a/src/storage/disk_storage.rs
+++ b/src/storage/disk_storage.rs
@@ -1,19 +1,81 @@
 use super::world::{BlockID, ChunkData, ChunkPos};
-use std::fs::File;
+use std::cmp::Ordering;
 use std::{collections::HashMap, fs::File, time::Instant};
 const DATABASE_FILE_LOCATION: &str = "./persistence";
-struct RunLengthEncoding {
+struct ChunkFile {}
-    pairs: Vec<(usize, BlockID)>,
+
 const CACHED_CHUNK_FILES: usize = 1;
 /// `ChunkStorageCache` caches a list of the most recently used file handles
 /// where chunks are stored from, and allows for faster accessing of the data
 /// from chunks
 struct ChunkStorageCache {
    // `cached_chunk_files` is a vector of cached file handles that are already open
    cached_chunk_files: [File; CACHED_CHUNK_FILES],
    // `cached_file_names` is a list of all the filenames that are contained
    // within the cache
    cached_file_names: HashMap<String, usize>,
    last_used_times: [Instant; CACHED_CHUNK_FILES],
 }
-impl RunLengthEncoding {
+impl ChunkStorageCache {
-    fn from_chunk(chunk_data: &ChunkData) -> Self {
+    fn load_chunk_file(&mut self, file_name: &str) -> &File {
-        for section in chunk_data.sections {
+        let chunk_file = File::open(file_name).expect("Opening file for chunk failed");
            for index in section.chunk_data {
                // Yes
            }
        }
    }
 }
        // Add the newly opened file to the cache
        // Insert the new item to replace the item that was last accessed
        // The minimum time should be the oldest time
        let (last_used_index, _) = self
            .last_used_times
            .iter()
            .enumerate()
            .reduce(
                |(fst_index, fst_time), (snd_index, snd_time)| match fst_time.cmp(&snd_time) {
                    Ordering::Less => (fst_index, fst_time),
                    Ordering::Equal | Ordering::Greater => (snd_index, snd_time),
                },
            )
            .expect("There should always be a last used index");
        // Next, we have to:
        // * Remove the old filename and index mapping from the names
        // * Replace the last used time with the curent time
        // * Replace the open file with the current one
        // Find the name of the previous entry
        let (previous_file_name, _) = self
            .cached_file_names
            .iter()
            .find(|(_, &array_index)| array_index == last_used_index)
            .expect("The last used index should always have a name");
        self.cached_file_names.remove(&previous_file_name.clone());
        self.cached_file_names
            .insert(file_name.to_string(), last_used_index);
        // Replace the timestamp with the new timestamp
        self.last_used_times[last_used_index] = Instant::now();
        self.cached_chunk_files[last_used_index] = chunk_file;
        &self.cached_chunk_files[last_used_index]
    }
    /// `fetch_chunk_by_pos` takes in the position of a chunk, and returns the
    /// data of the chunk from disk
    ///
    /// This operation is cached, if possible, so that subsequent accesses to
    /// the same chunk are handled by the same file
    pub fn fetch_chunk_by_pos(&mut self, pos: &ChunkPos) -> ChunkData {
        let file_name = pos.storage_file_name();
        let file_index = self.cached_file_names.get(file_name.as_str());
        let chunk_file = match file_index {
            Some(index) => &self.cached_chunk_files[*index],
            None => self.load_chunk_file(file_name.as_str()),
        };
        panic!("Yes");
    }
 }
--- a/src/storage/mod.rs
+++ b/src/storage/mod.rs
@@ -1,2 +1,3 @@
-// mod disk_storage;
+mod chunk_compression;
 mod disk_storage;
 pub mod world;
--- a/src/storage/world.rs
+++ b/src/storage/world.rs
@@ -1,13 +1,16 @@
 use core::fmt;
 use serde::ser;
 use serde::Serialize;
 use std::{
    cmp::{max, min},
    fmt::Debug,
    fs::File,
 };
 const SECTIONS_PER_CHUNK: usize = 16;
 const SLICE_SIZE: usize = 16 * 16;
-#[derive(Debug, Clone, PartialEq)]
+#[derive(Debug, Clone, PartialEq, Serialize)]
 pub struct ChunkPos {
    pub x: isize,
    pub z: isize,
@@ -22,7 +25,13 @@ impl From<&BlockPos> for ChunkPos {
    }
 }
-#[derive(Debug)]
+impl ChunkPos {
    pub fn storage_file_name(&self) -> String {
        format!("{}.{}.chunk", self.x, self.z)
    }
 }
 #[derive(Debug, Serialize)]
 pub struct ChunkData {
    pub pos: ChunkPos,
    pub sections: [ChunkSection; SECTIONS_PER_CHUNK],
@@ -39,6 +48,14 @@ impl ChunkData {
    pub fn section_for(&self, block_pos: &BlockPos) -> &ChunkSection {
        &self.sections[block_pos.y % 16]
    }
    pub fn write_to_file(&self, output_file: &mut File) {
        let serialized = serde_json::to_string(self).unwrap();
    }
    pub fn read_from_file(chunk_file: &File) -> Self {
        unimplemented!()
    }
 }
 // https://wiki.vg/Chunk_Format
@@ -168,7 +185,7 @@ impl BlockRange {
 /// BlockID represents the type of block stored
 #[repr(u8)]
-#[derive(Debug, Clone, Copy, PartialEq)]
+#[derive(Debug, Clone, Copy, PartialEq, Serialize)]
 pub enum BlockID {
    Empty,
    Generic,