From 0be9d86344116aa6385fe7f5f0845f04c3a51e17 Mon Sep 17 00:00:00 2001 From: Mark Poliakov Date: Thu, 5 Dec 2024 13:31:25 +0200 Subject: [PATCH] block: write size hint --- kernel/driver/block/nvme/src/drive.rs | 2 +- kernel/driver/block/nvme/src/lib.rs | 6 +- kernel/driver/block/nvme/src/queue.rs | 14 +--- kernel/driver/fs/ext2/src/dir.rs | 4 +- kernel/driver/fs/ext2/src/file.rs | 2 +- kernel/driver/fs/ext2/src/inode.rs | 101 +++++++++++++++++++++++++- kernel/driver/fs/ext2/src/lib.rs | 47 +++++++++--- kernel/libk/src/vfs/block/cache.rs | 56 +++++++++----- 8 files changed, 180 insertions(+), 52 deletions(-) diff --git a/kernel/driver/block/nvme/src/drive.rs b/kernel/driver/block/nvme/src/drive.rs index 6057ee12..6a5dc469 100644 --- a/kernel/driver/block/nvme/src/drive.rs +++ b/kernel/driver/block/nvme/src/drive.rs @@ -110,7 +110,7 @@ impl NgBlockDevice for NvmeDrive { ) .await; - // log::info!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?} @ {buffer_address:#x}"); + log::info!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?} @ {buffer_address:#x}"); result } diff --git a/kernel/driver/block/nvme/src/lib.rs b/kernel/driver/block/nvme/src/lib.rs index 4bc2ccb2..e4b818fc 100644 --- a/kernel/driver/block/nvme/src/lib.rs +++ b/kernel/driver/block/nvme/src/lib.rs @@ -230,9 +230,9 @@ impl NvmeController { let max_transfer_size = if identify.mdts == 0 { // Pick some sane default value - 256 * PAGE_SIZE + 256 * self.min_page_size } else { - (1 << identify.mdts) * PAGE_SIZE + (1 << identify.mdts) * self.min_page_size }; self.create_queues().await?; @@ -306,7 +306,7 @@ impl NvmeController { )?, }; - ioq.wait_for_completion(cmd_id, &prp_list, ()).await?; + ioq.wait_for_completion(cmd_id, ()).await?; Ok(()) } diff --git a/kernel/driver/block/nvme/src/queue.rs b/kernel/driver/block/nvme/src/queue.rs index 97309e69..23745013 100644 --- a/kernel/driver/block/nvme/src/queue.rs +++ b/kernel/driver/block/nvme/src/queue.rs @@ -5,7 +5,6 @@ use alloc::{ vec::Vec, }; use bytemuck::{Pod, Zeroable}; -use futures_util::Future; use libk_mm::{ address::{AsPhysicalAddress, PhysicalAddress}, PageBox, @@ -119,10 +118,6 @@ impl PrpList { } } } - - pub fn none() -> Self { - Self::None - } } const_assert!(size_of::().is_power_of_two()); @@ -311,12 +306,9 @@ impl QueuePair { pub async fn wait_for_completion( &self, command_id: u32, - list: &PrpList, result: T, ) -> Result { let mut response = Some(result); - // NOTE: for multiple blocks supplied via the PRP list, the NVMe controller will signal - // a completion per each block poll_fn(|cx| { let mut inner = self.inner.lock(); @@ -384,7 +376,7 @@ impl QueuePair { pub async fn request_no_data(&self, req: C) -> Result<(), NvmeError> { let list = PrpList::None; let command_id = self.submit(req, &list, true)?; - self.wait_for_completion(command_id, &list, ()) + self.wait_for_completion(command_id, ()) .await .map_err(NvmeError::CommandError) } @@ -399,9 +391,7 @@ impl QueuePair { let response = PageBox::new_uninit().map_err(NvmeError::MemoryError)?; let list = PrpList::from_buffer(unsafe { response.as_physical_address() }, size_of::())?; let command_id = self.submit(req, &list, true)?; - let result = self - .wait_for_completion(command_id, &list, response) - .await?; + let result = self.wait_for_completion(command_id, response).await?; Ok(unsafe { result.assume_init() }) } diff --git a/kernel/driver/fs/ext2/src/dir.rs b/kernel/driver/fs/ext2/src/dir.rs index f79eda00..a8562d3e 100644 --- a/kernel/driver/fs/ext2/src/dir.rs +++ b/kernel/driver/fs/ext2/src/dir.rs @@ -191,7 +191,7 @@ impl DirectoryNode { for i in 0..n { let fit_block = self .fs - .with_inode_block_mut(&inode, i, |block| { + .with_inode_block_mut(&inode, i, 0, |block| { let mut iter = DirentIterMut::new(&self.fs, &mut block[..], 0); if iter.try_fit(name, ino) { Ok(true) @@ -220,7 +220,7 @@ impl DirectoryNode { .await?; self.fs - .with_inode_block_mut(&inode, block_index, |block| { + .with_inode_block_mut(&inode, block_index, self.fs.block_size, |block| { block.fill(0); // Place dirent diff --git a/kernel/driver/fs/ext2/src/file.rs b/kernel/driver/fs/ext2/src/file.rs index adbc9f76..00c49677 100644 --- a/kernel/driver/fs/ext2/src/file.rs +++ b/kernel/driver/fs/ext2/src/file.rs @@ -77,7 +77,7 @@ impl RegularNode { let amount = remaining.min(self.fs.block_size - block_offset); self.fs - .with_inode_block_mut(&inode, block_index as u32, |block| { + .with_inode_block_mut(&inode, block_index as u32, amount, |block| { block[block_offset..block_offset + amount] .copy_from_slice(&buffer[offset..offset + amount]); Ok(()) diff --git a/kernel/driver/fs/ext2/src/inode.rs b/kernel/driver/fs/ext2/src/inode.rs index 1b526ac1..39e434fe 100644 --- a/kernel/driver/fs/ext2/src/inode.rs +++ b/kernel/driver/fs/ext2/src/inode.rs @@ -211,6 +211,8 @@ impl Inode { debug_assert!(old_l0_capacity <= new_l0_capacity); + log::debug!("Grow L0: {old_l0_capacity} -> {new_l0_capacity}"); + for i in old_l0_capacity..new_l0_capacity { let i = i as usize; let block = fs.allocate_block().await?; @@ -220,6 +222,89 @@ impl Inode { Ok(()) } + async fn grow_l1( + &mut self, + fs: &Ext2Fs, + old_capacity: u64, + new_capacity: u64, + ) -> Result<(), Error> { + let old_l1_capacity = old_capacity + .saturating_sub(DIRECT_BLOCK_COUNT as u64) + .min(fs.pointers_per_block as u64); + let new_l1_capacity = new_capacity + .saturating_sub(DIRECT_BLOCK_COUNT as u64) + .min(fs.pointers_per_block as u64); + + log::debug!("Grow L1: {old_l1_capacity} -> {new_l1_capacity}"); + + debug_assert!(old_l1_capacity <= new_l1_capacity); + + if old_l1_capacity == 0 && new_l1_capacity != 0 { + // Allocate an indirect block + let block = fs.allocate_block().await?; + self.blocks.indirect_block_l1 = block; + } + + for i in old_l1_capacity..new_l1_capacity { + // Allocate inner blocks + debug_assert_ne!(self.blocks.indirect_block_l1, 0); + let i = i as usize; + let block = fs.allocate_block().await?; + fs.write_index(self.blocks.indirect_block_l1, i, block) + .await?; + } + + Ok(()) + } + + async fn grow_l2( + &mut self, + fs: &Ext2Fs, + old_capacity: u64, + new_capacity: u64, + ) -> Result<(), Error> { + let old_l2_capacity = (old_capacity as usize) + .saturating_sub(DIRECT_BLOCK_COUNT + fs.pointers_per_block) + .min(fs.pointers_per_block * fs.pointers_per_block); + let new_l2_capacity = (new_capacity as usize) + .saturating_sub(DIRECT_BLOCK_COUNT + fs.pointers_per_block) + .min(fs.pointers_per_block * fs.pointers_per_block); + + let old_l2_l0 = old_l2_capacity.div_ceil(fs.pointers_per_block); + let new_l2_l0 = new_l2_capacity.div_ceil(fs.pointers_per_block); + + log::debug!( + "Grow L2: {old_l2_capacity} ({old_l2_l0} L2-1) -> {new_l2_capacity} ({new_l2_l0} L2-1)" + ); + + if old_l2_capacity == 0 && new_l2_capacity != 0 { + // Allocate L2 indirect block + let block = fs.allocate_block().await?; + self.blocks.indirect_block_l2 = block; + } + + // Grow L2 direct-indirect block + for i in old_l2_l0..new_l2_l0 { + debug_assert_ne!(self.blocks.indirect_block_l2, 0); + let block = fs.allocate_block().await?; + fs.write_index(self.blocks.indirect_block_l2, i, block) + .await?; + } + + // Grow L2 indirect-indirect blocks + for i in old_l2_capacity..new_l2_capacity { + debug_assert_ne!(self.blocks.indirect_block_l2, 0); + let l1i = i / fs.pointers_per_block; + let l0i = i % fs.pointers_per_block; + let indirect = fs.read_index(self.blocks.indirect_block_l2, l1i).await?; + debug_assert_ne!(indirect, 0); + let block = fs.allocate_block().await?; + fs.write_index(indirect, l0i, block).await?; + } + + Ok(()) + } + fn set_size(&mut self, fs: &Ext2Fs, size: u64) { let block_count = size.div_ceil(fs.block_size as u64); @@ -244,17 +329,27 @@ impl Inode { return Ok(false); } + // TODO check max inode size + let new_blocks = size.div_ceil(fs.block_size as u64); let old_blocks = self.size(fs).div_ceil(fs.block_size as u64); + if new_blocks as usize + > DIRECT_BLOCK_COUNT + + fs.pointers_per_block + + fs.pointers_per_block * fs.pointers_per_block + { + log::warn!("ext2: only L0/L1 are supported"); + return Err(Error::InvalidArgument); + } + match old_blocks.cmp(&new_blocks) { // Grow Ordering::Less => { - if new_blocks > DIRECT_BLOCK_COUNT as u64 { - todo!(); - } log::debug!("Grow inode: {old_blocks} -> {new_blocks} blocks"); self.grow_direct(fs, old_blocks, new_blocks).await?; + self.grow_l1(fs, old_blocks, new_blocks).await?; + self.grow_l2(fs, old_blocks, new_blocks).await?; } // Shrink Ordering::Greater => todo!(), diff --git a/kernel/driver/fs/ext2/src/lib.rs b/kernel/driver/fs/ext2/src/lib.rs index a17d1175..8772f418 100644 --- a/kernel/driver/fs/ext2/src/lib.rs +++ b/kernel/driver/fs/ext2/src/lib.rs @@ -44,6 +44,8 @@ struct Bgdt { pub struct Ext2Fs { mapper: DeviceMapper, inode_cache: OneTimeInit>, + cached: bool, + state: IrqSafeRwLock, bgdt: Bgdt, @@ -263,6 +265,8 @@ impl Ext2Fs { mapper, inode_cache: OneTimeInit::new(), + cached, + state: IrqSafeRwLock::new(State { superblock, dirty: false, @@ -313,13 +317,14 @@ impl Ext2Fs { pub async fn with_block_mut Result>( &self, index: u32, + write_size: usize, mapper: F, ) -> Result { if index < 1 || index >= self.total_blocks { return Err(Error::InvalidFile); } self.mapper - .try_with_mut(self.block_address(index), mapper) + .try_with_mut(self.block_address(index), write_size, mapper) .await } @@ -337,10 +342,11 @@ impl Ext2Fs { &self, inode: &Inode, block: u32, + write_size: usize, mapper: F, ) -> Result { let block_index = self.inode_block_index(inode, block).await?; - self.with_block_mut(block_index, mapper).await + self.with_block_mut(block_index, write_size, mapper).await } async fn with_bgdt_entry Result>( @@ -378,12 +384,17 @@ impl Ext2Fs { } let offset_in_block = offset % self.block_size; - self.with_block_mut(block as u32 + self.bgdt.base, |block| { - let descriptor = bytemuck::from_bytes_mut( - &mut block[offset_in_block..offset_in_block + size_of::()], - ); - mapper(descriptor) - }) + self.with_block_mut( + block as u32 + self.bgdt.base, + size_of::(), + |block| { + let descriptor = bytemuck::from_bytes_mut( + &mut block + [offset_in_block..offset_in_block + size_of::()], + ); + mapper(descriptor) + }, + ) .await } @@ -419,7 +430,7 @@ impl Ext2Fs { pub async fn write_inode(&self, ino: u32, inode: &Inode) -> Result<(), Error> { let (ino_block, offset_in_block) = self.inode(ino).await?; - self.with_block_mut(ino_block, |block| { + self.with_block_mut(ino_block, size_of::(), |block| { block[offset_in_block..offset_in_block + size_of::()] .copy_from_slice(bytemuck::bytes_of(inode)); Ok(()) @@ -469,7 +480,7 @@ impl Ext2Fs { if let Some((bitmap, group_item_count)) = bitmap { let no = self - .with_block_mut(bitmap, |bitmap| { + .with_block_mut(bitmap, 0, |bitmap| { for i in 0..bit_per_block.min(group_item_count as usize) { let index = i / 8; let bit = 1u8 << (i % 8); @@ -490,7 +501,9 @@ impl Ext2Fs { state.dirty = true; } - self.flush_superblock().await?; + if !self.cached { + self.flush_superblock().await?; + } return Ok(group_index * group_item_count + no); } @@ -603,6 +616,18 @@ impl Ext2Fs { .await } + async fn write_index(&self, block_index: u32, index: usize, value: u32) -> Result<(), Error> { + self.with_block_mut(block_index, size_of::(), |block| { + let indirect: &mut [u32] = unsafe { + core::slice::from_raw_parts_mut(block.as_mut_ptr().cast(), self.pointers_per_block) + }; + + indirect[index] = value; + Ok(()) + }) + .await + } + async fn inode_block_index(&self, inode: &Inode, index: u32) -> Result { let mut index = index as usize; // L0 diff --git a/kernel/libk/src/vfs/block/cache.rs b/kernel/libk/src/vfs/block/cache.rs index 85eaf01d..cd7dae7c 100644 --- a/kernel/libk/src/vfs/block/cache.rs +++ b/kernel/libk/src/vfs/block/cache.rs @@ -4,7 +4,7 @@ use core::{ ops::{Deref, DerefMut}, }; -use alloc::{sync::Arc, vec::Vec}; +use alloc::sync::Arc; use kernel_arch::mem::PhysicalMemoryAllocator; use libk_mm::{address::PhysicalAddress, phys::GlobalPhysicalAllocator, PageBox}; use libk_util::{lru_hash_table::LruCache, sync::spin_rwlock::IrqSafeRwLock}; @@ -101,11 +101,12 @@ impl> DeviceMapper { pub async fn try_with_mut Result>( &self, pos: u64, + size: usize, mapper: F, ) -> Result { match self { - Self::Uncached(uncache) => uncache.try_with_mut(pos, mapper).await, - Self::Cached(cache) => cache.try_with_mut(pos, mapper).await, + Self::Uncached(uncache) => uncache.try_with_mut(pos, size, mapper).await, + Self::Cached(cache) => cache.try_with_mut(pos, size, mapper).await, } } @@ -139,10 +140,14 @@ impl> UncachedCache { pub async fn try_with_mut Result>( &self, pos: u64, + size: usize, mapper: F, ) -> Result { let mut data = PageBox::<_, A>::new_uninit_slice_in(self.block_size)?; - self.device.read_aligned(pos, data.as_slice_mut()).await?; + // No need to read a block only to then fully rewrite it + if size != self.block_size { + self.device.read_aligned(pos, data.as_slice_mut()).await?; + } let mut data = unsafe { data.assume_init_slice() }; let result = mapper(&mut data[..])?; self.device.write_aligned(pos, data.as_slice()).await?; @@ -179,9 +184,17 @@ impl> BlockCache { } } - async fn fetch_block(&self, pos: u64) -> Result>>, Error> { + async fn fetch_block( + &self, + pos: u64, + write_size: Option, + ) -> Result>>, Error> { + let need_read = write_size.map(|sz| sz != self.block_size).unwrap_or(true); let mut data = PageBox::new_uninit_slice_in(self.block_size)?; - self.device.read_aligned(pos, data.as_slice_mut()).await?; + // Don't read a block that's going to be fully rewritten immediately + if need_read { + self.device.read_aligned(pos, data.as_slice_mut()).await?; + } let data = unsafe { data.assume_init_slice() }; Ok(Arc::new(IrqSafeRwLock::new(CachedBlock { data, @@ -189,10 +202,14 @@ impl> BlockCache { }))) } - async fn entry(&self, pos: u64) -> Result>>, Error> { + async fn entry( + &self, + pos: u64, + write_size: Option, + ) -> Result>>, Error> { let mut lock = self.cache.lock().await; let (value, evicted) = lock - .try_get_or_insert_with_async(pos, || self.fetch_block(pos)) + .try_get_or_insert_with_async(pos, || self.fetch_block(pos, write_size)) .await?; if let Some((pos, block)) = evicted { @@ -207,7 +224,7 @@ impl> BlockCache { pos: u64, mapper: F, ) -> Result { - let block = self.entry(pos).await?; + let block = self.entry(pos, None).await?; let result = mapper(&block.read()[..])?; Ok(result) } @@ -215,9 +232,10 @@ impl> BlockCache { pub async fn try_with_mut Result>( &self, pos: u64, + size: usize, mapper: F, ) -> Result { - let block = self.entry(pos).await?; + let block = self.entry(pos, Some(size)).await?; let mut block = block.write(); let result = mapper(&mut block[..])?; block.dirty = true; @@ -419,7 +437,7 @@ mod tests { const LBA: u64 = 1; cache - .try_with_mut(LBA * BS as u64, |block, _| { + .try_with_mut(LBA * BS as u64, 16, |block| { block[0..16].fill(0x12); Ok(()) }) @@ -440,21 +458,21 @@ mod tests { } cache - .try_with_mut(LBA * BS as u64, |block, _| { + .try_with_mut(LBA * BS as u64, 16, |block| { block[16..32].fill(0x23); Ok(()) }) .await .unwrap(); cache - .try_with_mut(LBA * BS as u64, |block, _| { + .try_with_mut(LBA * BS as u64, 16, |block| { block[48..64].fill(0x34); Ok(()) }) .await .unwrap(); cache - .try_with_mut(LBA * BS as u64, |block, _| { + .try_with_mut(LBA * BS as u64, 128, |block| { block[128..256].fill(0xF1); Ok(()) }) @@ -504,7 +522,7 @@ mod tests { // Go through all blocks, fill those with some values for i in 0..1024 { cache - .try_with_mut(i * BS as u64, |block, _| { + .try_with_mut(i * BS as u64, BS, |block| { block.fill(mapper(i)); Ok(()) }) @@ -522,14 +540,14 @@ mod tests { for i in 0..1023 { cache - .try_with_mut(i * BS as u64, |block, _| { + .try_with_mut(i * BS as u64, BS, |block| { block.fill(0x12); Ok(()) }) .await .unwrap(); cache - .try_with_mut((i + 1) * BS as u64, |block, _| { + .try_with_mut((i + 1) * BS as u64, BS, |block| { block.fill(0x23); Ok(()) }) @@ -539,14 +557,14 @@ mod tests { for i in 0..1023 { cache - .try_with_mut(i * BS as u64, |block, _| { + .try_with_mut(i * BS as u64, BS, |block| { block.iter_mut().for_each(|x| *x += 1); Ok(()) }) .await .unwrap(); cache - .try_with_mut((i + 1) * BS as u64, |block, _| { + .try_with_mut((i + 1) * BS as u64, BS, |block| { block.iter_mut().for_each(|x| *x += 2); Ok(()) })