block: write size hint

This commit is contained in:
Mark Poliakov 2024-12-05 13:31:25 +02:00
parent dd2c948107
commit 0be9d86344
8 changed files with 180 additions and 52 deletions

View File

@ -110,7 +110,7 @@ impl NgBlockDevice for NvmeDrive {
) )
.await; .await;
// log::info!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?} @ {buffer_address:#x}"); log::info!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?} @ {buffer_address:#x}");
result result
} }

View File

@ -230,9 +230,9 @@ impl NvmeController {
let max_transfer_size = if identify.mdts == 0 { let max_transfer_size = if identify.mdts == 0 {
// Pick some sane default value // Pick some sane default value
256 * PAGE_SIZE 256 * self.min_page_size
} else { } else {
(1 << identify.mdts) * PAGE_SIZE (1 << identify.mdts) * self.min_page_size
}; };
self.create_queues().await?; self.create_queues().await?;
@ -306,7 +306,7 @@ impl NvmeController {
)?, )?,
}; };
ioq.wait_for_completion(cmd_id, &prp_list, ()).await?; ioq.wait_for_completion(cmd_id, ()).await?;
Ok(()) Ok(())
} }

View File

@ -5,7 +5,6 @@ use alloc::{
vec::Vec, vec::Vec,
}; };
use bytemuck::{Pod, Zeroable}; use bytemuck::{Pod, Zeroable};
use futures_util::Future;
use libk_mm::{ use libk_mm::{
address::{AsPhysicalAddress, PhysicalAddress}, address::{AsPhysicalAddress, PhysicalAddress},
PageBox, PageBox,
@ -119,10 +118,6 @@ impl PrpList {
} }
} }
} }
pub fn none() -> Self {
Self::None
}
} }
const_assert!(size_of::<CompletionQueueEntry>().is_power_of_two()); const_assert!(size_of::<CompletionQueueEntry>().is_power_of_two());
@ -311,12 +306,9 @@ impl QueuePair {
pub async fn wait_for_completion<T: Unpin>( pub async fn wait_for_completion<T: Unpin>(
&self, &self,
command_id: u32, command_id: u32,
list: &PrpList,
result: T, result: T,
) -> Result<T, CommandError> { ) -> Result<T, CommandError> {
let mut response = Some(result); let mut response = Some(result);
// NOTE: for multiple blocks supplied via the PRP list, the NVMe controller will signal
// a completion per each block
poll_fn(|cx| { poll_fn(|cx| {
let mut inner = self.inner.lock(); let mut inner = self.inner.lock();
@ -384,7 +376,7 @@ impl QueuePair {
pub async fn request_no_data<C: Command>(&self, req: C) -> Result<(), NvmeError> { pub async fn request_no_data<C: Command>(&self, req: C) -> Result<(), NvmeError> {
let list = PrpList::None; let list = PrpList::None;
let command_id = self.submit(req, &list, true)?; let command_id = self.submit(req, &list, true)?;
self.wait_for_completion(command_id, &list, ()) self.wait_for_completion(command_id, ())
.await .await
.map_err(NvmeError::CommandError) .map_err(NvmeError::CommandError)
} }
@ -399,9 +391,7 @@ impl QueuePair {
let response = PageBox::new_uninit().map_err(NvmeError::MemoryError)?; let response = PageBox::new_uninit().map_err(NvmeError::MemoryError)?;
let list = PrpList::from_buffer(unsafe { response.as_physical_address() }, size_of::<R>())?; let list = PrpList::from_buffer(unsafe { response.as_physical_address() }, size_of::<R>())?;
let command_id = self.submit(req, &list, true)?; let command_id = self.submit(req, &list, true)?;
let result = self let result = self.wait_for_completion(command_id, response).await?;
.wait_for_completion(command_id, &list, response)
.await?;
Ok(unsafe { result.assume_init() }) Ok(unsafe { result.assume_init() })
} }

View File

@ -191,7 +191,7 @@ impl DirectoryNode {
for i in 0..n { for i in 0..n {
let fit_block = self let fit_block = self
.fs .fs
.with_inode_block_mut(&inode, i, |block| { .with_inode_block_mut(&inode, i, 0, |block| {
let mut iter = DirentIterMut::new(&self.fs, &mut block[..], 0); let mut iter = DirentIterMut::new(&self.fs, &mut block[..], 0);
if iter.try_fit(name, ino) { if iter.try_fit(name, ino) {
Ok(true) Ok(true)
@ -220,7 +220,7 @@ impl DirectoryNode {
.await?; .await?;
self.fs self.fs
.with_inode_block_mut(&inode, block_index, |block| { .with_inode_block_mut(&inode, block_index, self.fs.block_size, |block| {
block.fill(0); block.fill(0);
// Place dirent // Place dirent

View File

@ -77,7 +77,7 @@ impl RegularNode {
let amount = remaining.min(self.fs.block_size - block_offset); let amount = remaining.min(self.fs.block_size - block_offset);
self.fs self.fs
.with_inode_block_mut(&inode, block_index as u32, |block| { .with_inode_block_mut(&inode, block_index as u32, amount, |block| {
block[block_offset..block_offset + amount] block[block_offset..block_offset + amount]
.copy_from_slice(&buffer[offset..offset + amount]); .copy_from_slice(&buffer[offset..offset + amount]);
Ok(()) Ok(())

View File

@ -211,6 +211,8 @@ impl Inode {
debug_assert!(old_l0_capacity <= new_l0_capacity); debug_assert!(old_l0_capacity <= new_l0_capacity);
log::debug!("Grow L0: {old_l0_capacity} -> {new_l0_capacity}");
for i in old_l0_capacity..new_l0_capacity { for i in old_l0_capacity..new_l0_capacity {
let i = i as usize; let i = i as usize;
let block = fs.allocate_block().await?; let block = fs.allocate_block().await?;
@ -220,6 +222,89 @@ impl Inode {
Ok(()) Ok(())
} }
async fn grow_l1(
&mut self,
fs: &Ext2Fs,
old_capacity: u64,
new_capacity: u64,
) -> Result<(), Error> {
let old_l1_capacity = old_capacity
.saturating_sub(DIRECT_BLOCK_COUNT as u64)
.min(fs.pointers_per_block as u64);
let new_l1_capacity = new_capacity
.saturating_sub(DIRECT_BLOCK_COUNT as u64)
.min(fs.pointers_per_block as u64);
log::debug!("Grow L1: {old_l1_capacity} -> {new_l1_capacity}");
debug_assert!(old_l1_capacity <= new_l1_capacity);
if old_l1_capacity == 0 && new_l1_capacity != 0 {
// Allocate an indirect block
let block = fs.allocate_block().await?;
self.blocks.indirect_block_l1 = block;
}
for i in old_l1_capacity..new_l1_capacity {
// Allocate inner blocks
debug_assert_ne!(self.blocks.indirect_block_l1, 0);
let i = i as usize;
let block = fs.allocate_block().await?;
fs.write_index(self.blocks.indirect_block_l1, i, block)
.await?;
}
Ok(())
}
async fn grow_l2(
&mut self,
fs: &Ext2Fs,
old_capacity: u64,
new_capacity: u64,
) -> Result<(), Error> {
let old_l2_capacity = (old_capacity as usize)
.saturating_sub(DIRECT_BLOCK_COUNT + fs.pointers_per_block)
.min(fs.pointers_per_block * fs.pointers_per_block);
let new_l2_capacity = (new_capacity as usize)
.saturating_sub(DIRECT_BLOCK_COUNT + fs.pointers_per_block)
.min(fs.pointers_per_block * fs.pointers_per_block);
let old_l2_l0 = old_l2_capacity.div_ceil(fs.pointers_per_block);
let new_l2_l0 = new_l2_capacity.div_ceil(fs.pointers_per_block);
log::debug!(
"Grow L2: {old_l2_capacity} ({old_l2_l0} L2-1) -> {new_l2_capacity} ({new_l2_l0} L2-1)"
);
if old_l2_capacity == 0 && new_l2_capacity != 0 {
// Allocate L2 indirect block
let block = fs.allocate_block().await?;
self.blocks.indirect_block_l2 = block;
}
// Grow L2 direct-indirect block
for i in old_l2_l0..new_l2_l0 {
debug_assert_ne!(self.blocks.indirect_block_l2, 0);
let block = fs.allocate_block().await?;
fs.write_index(self.blocks.indirect_block_l2, i, block)
.await?;
}
// Grow L2 indirect-indirect blocks
for i in old_l2_capacity..new_l2_capacity {
debug_assert_ne!(self.blocks.indirect_block_l2, 0);
let l1i = i / fs.pointers_per_block;
let l0i = i % fs.pointers_per_block;
let indirect = fs.read_index(self.blocks.indirect_block_l2, l1i).await?;
debug_assert_ne!(indirect, 0);
let block = fs.allocate_block().await?;
fs.write_index(indirect, l0i, block).await?;
}
Ok(())
}
fn set_size(&mut self, fs: &Ext2Fs, size: u64) { fn set_size(&mut self, fs: &Ext2Fs, size: u64) {
let block_count = size.div_ceil(fs.block_size as u64); let block_count = size.div_ceil(fs.block_size as u64);
@ -244,17 +329,27 @@ impl Inode {
return Ok(false); return Ok(false);
} }
// TODO check max inode size
let new_blocks = size.div_ceil(fs.block_size as u64); let new_blocks = size.div_ceil(fs.block_size as u64);
let old_blocks = self.size(fs).div_ceil(fs.block_size as u64); let old_blocks = self.size(fs).div_ceil(fs.block_size as u64);
if new_blocks as usize
> DIRECT_BLOCK_COUNT
+ fs.pointers_per_block
+ fs.pointers_per_block * fs.pointers_per_block
{
log::warn!("ext2: only L0/L1 are supported");
return Err(Error::InvalidArgument);
}
match old_blocks.cmp(&new_blocks) { match old_blocks.cmp(&new_blocks) {
// Grow // Grow
Ordering::Less => { Ordering::Less => {
if new_blocks > DIRECT_BLOCK_COUNT as u64 {
todo!();
}
log::debug!("Grow inode: {old_blocks} -> {new_blocks} blocks"); log::debug!("Grow inode: {old_blocks} -> {new_blocks} blocks");
self.grow_direct(fs, old_blocks, new_blocks).await?; self.grow_direct(fs, old_blocks, new_blocks).await?;
self.grow_l1(fs, old_blocks, new_blocks).await?;
self.grow_l2(fs, old_blocks, new_blocks).await?;
} }
// Shrink // Shrink
Ordering::Greater => todo!(), Ordering::Greater => todo!(),

View File

@ -44,6 +44,8 @@ struct Bgdt {
pub struct Ext2Fs { pub struct Ext2Fs {
mapper: DeviceMapper, mapper: DeviceMapper,
inode_cache: OneTimeInit<Arc<InodeCache>>, inode_cache: OneTimeInit<Arc<InodeCache>>,
cached: bool,
state: IrqSafeRwLock<State>, state: IrqSafeRwLock<State>,
bgdt: Bgdt, bgdt: Bgdt,
@ -263,6 +265,8 @@ impl Ext2Fs {
mapper, mapper,
inode_cache: OneTimeInit::new(), inode_cache: OneTimeInit::new(),
cached,
state: IrqSafeRwLock::new(State { state: IrqSafeRwLock::new(State {
superblock, superblock,
dirty: false, dirty: false,
@ -313,13 +317,14 @@ impl Ext2Fs {
pub async fn with_block_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>( pub async fn with_block_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>(
&self, &self,
index: u32, index: u32,
write_size: usize,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
if index < 1 || index >= self.total_blocks { if index < 1 || index >= self.total_blocks {
return Err(Error::InvalidFile); return Err(Error::InvalidFile);
} }
self.mapper self.mapper
.try_with_mut(self.block_address(index), mapper) .try_with_mut(self.block_address(index), write_size, mapper)
.await .await
} }
@ -337,10 +342,11 @@ impl Ext2Fs {
&self, &self,
inode: &Inode, inode: &Inode,
block: u32, block: u32,
write_size: usize,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
let block_index = self.inode_block_index(inode, block).await?; let block_index = self.inode_block_index(inode, block).await?;
self.with_block_mut(block_index, mapper).await self.with_block_mut(block_index, write_size, mapper).await
} }
async fn with_bgdt_entry<T, F: FnOnce(&BlockGroupDescriptor) -> Result<T, Error>>( async fn with_bgdt_entry<T, F: FnOnce(&BlockGroupDescriptor) -> Result<T, Error>>(
@ -378,12 +384,17 @@ impl Ext2Fs {
} }
let offset_in_block = offset % self.block_size; let offset_in_block = offset % self.block_size;
self.with_block_mut(block as u32 + self.bgdt.base, |block| { self.with_block_mut(
let descriptor = bytemuck::from_bytes_mut( block as u32 + self.bgdt.base,
&mut block[offset_in_block..offset_in_block + size_of::<BlockGroupDescriptor>()], size_of::<BlockGroupDescriptor>(),
); |block| {
mapper(descriptor) let descriptor = bytemuck::from_bytes_mut(
}) &mut block
[offset_in_block..offset_in_block + size_of::<BlockGroupDescriptor>()],
);
mapper(descriptor)
},
)
.await .await
} }
@ -419,7 +430,7 @@ impl Ext2Fs {
pub async fn write_inode(&self, ino: u32, inode: &Inode) -> Result<(), Error> { pub async fn write_inode(&self, ino: u32, inode: &Inode) -> Result<(), Error> {
let (ino_block, offset_in_block) = self.inode(ino).await?; let (ino_block, offset_in_block) = self.inode(ino).await?;
self.with_block_mut(ino_block, |block| { self.with_block_mut(ino_block, size_of::<Inode>(), |block| {
block[offset_in_block..offset_in_block + size_of::<Inode>()] block[offset_in_block..offset_in_block + size_of::<Inode>()]
.copy_from_slice(bytemuck::bytes_of(inode)); .copy_from_slice(bytemuck::bytes_of(inode));
Ok(()) Ok(())
@ -469,7 +480,7 @@ impl Ext2Fs {
if let Some((bitmap, group_item_count)) = bitmap { if let Some((bitmap, group_item_count)) = bitmap {
let no = self let no = self
.with_block_mut(bitmap, |bitmap| { .with_block_mut(bitmap, 0, |bitmap| {
for i in 0..bit_per_block.min(group_item_count as usize) { for i in 0..bit_per_block.min(group_item_count as usize) {
let index = i / 8; let index = i / 8;
let bit = 1u8 << (i % 8); let bit = 1u8 << (i % 8);
@ -490,7 +501,9 @@ impl Ext2Fs {
state.dirty = true; state.dirty = true;
} }
self.flush_superblock().await?; if !self.cached {
self.flush_superblock().await?;
}
return Ok(group_index * group_item_count + no); return Ok(group_index * group_item_count + no);
} }
@ -603,6 +616,18 @@ impl Ext2Fs {
.await .await
} }
async fn write_index(&self, block_index: u32, index: usize, value: u32) -> Result<(), Error> {
self.with_block_mut(block_index, size_of::<u32>(), |block| {
let indirect: &mut [u32] = unsafe {
core::slice::from_raw_parts_mut(block.as_mut_ptr().cast(), self.pointers_per_block)
};
indirect[index] = value;
Ok(())
})
.await
}
async fn inode_block_index(&self, inode: &Inode, index: u32) -> Result<u32, Error> { async fn inode_block_index(&self, inode: &Inode, index: u32) -> Result<u32, Error> {
let mut index = index as usize; let mut index = index as usize;
// L0 // L0

View File

@ -4,7 +4,7 @@ use core::{
ops::{Deref, DerefMut}, ops::{Deref, DerefMut},
}; };
use alloc::{sync::Arc, vec::Vec}; use alloc::sync::Arc;
use kernel_arch::mem::PhysicalMemoryAllocator; use kernel_arch::mem::PhysicalMemoryAllocator;
use libk_mm::{address::PhysicalAddress, phys::GlobalPhysicalAllocator, PageBox}; use libk_mm::{address::PhysicalAddress, phys::GlobalPhysicalAllocator, PageBox};
use libk_util::{lru_hash_table::LruCache, sync::spin_rwlock::IrqSafeRwLock}; use libk_util::{lru_hash_table::LruCache, sync::spin_rwlock::IrqSafeRwLock};
@ -101,11 +101,12 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> DeviceMapper<A> {
pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>( pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>(
&self, &self,
pos: u64, pos: u64,
size: usize,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
match self { match self {
Self::Uncached(uncache) => uncache.try_with_mut(pos, mapper).await, Self::Uncached(uncache) => uncache.try_with_mut(pos, size, mapper).await,
Self::Cached(cache) => cache.try_with_mut(pos, mapper).await, Self::Cached(cache) => cache.try_with_mut(pos, size, mapper).await,
} }
} }
@ -139,10 +140,14 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> UncachedCache<A> {
pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>( pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>(
&self, &self,
pos: u64, pos: u64,
size: usize,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
let mut data = PageBox::<_, A>::new_uninit_slice_in(self.block_size)?; let mut data = PageBox::<_, A>::new_uninit_slice_in(self.block_size)?;
self.device.read_aligned(pos, data.as_slice_mut()).await?; // No need to read a block only to then fully rewrite it
if size != self.block_size {
self.device.read_aligned(pos, data.as_slice_mut()).await?;
}
let mut data = unsafe { data.assume_init_slice() }; let mut data = unsafe { data.assume_init_slice() };
let result = mapper(&mut data[..])?; let result = mapper(&mut data[..])?;
self.device.write_aligned(pos, data.as_slice()).await?; self.device.write_aligned(pos, data.as_slice()).await?;
@ -179,9 +184,17 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> BlockCache<A> {
} }
} }
async fn fetch_block(&self, pos: u64) -> Result<Arc<IrqSafeRwLock<CachedBlock<A>>>, Error> { async fn fetch_block(
&self,
pos: u64,
write_size: Option<usize>,
) -> Result<Arc<IrqSafeRwLock<CachedBlock<A>>>, Error> {
let need_read = write_size.map(|sz| sz != self.block_size).unwrap_or(true);
let mut data = PageBox::new_uninit_slice_in(self.block_size)?; let mut data = PageBox::new_uninit_slice_in(self.block_size)?;
self.device.read_aligned(pos, data.as_slice_mut()).await?; // Don't read a block that's going to be fully rewritten immediately
if need_read {
self.device.read_aligned(pos, data.as_slice_mut()).await?;
}
let data = unsafe { data.assume_init_slice() }; let data = unsafe { data.assume_init_slice() };
Ok(Arc::new(IrqSafeRwLock::new(CachedBlock { Ok(Arc::new(IrqSafeRwLock::new(CachedBlock {
data, data,
@ -189,10 +202,14 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> BlockCache<A> {
}))) })))
} }
async fn entry(&self, pos: u64) -> Result<Arc<IrqSafeRwLock<CachedBlock<A>>>, Error> { async fn entry(
&self,
pos: u64,
write_size: Option<usize>,
) -> Result<Arc<IrqSafeRwLock<CachedBlock<A>>>, Error> {
let mut lock = self.cache.lock().await; let mut lock = self.cache.lock().await;
let (value, evicted) = lock let (value, evicted) = lock
.try_get_or_insert_with_async(pos, || self.fetch_block(pos)) .try_get_or_insert_with_async(pos, || self.fetch_block(pos, write_size))
.await?; .await?;
if let Some((pos, block)) = evicted { if let Some((pos, block)) = evicted {
@ -207,7 +224,7 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> BlockCache<A> {
pos: u64, pos: u64,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
let block = self.entry(pos).await?; let block = self.entry(pos, None).await?;
let result = mapper(&block.read()[..])?; let result = mapper(&block.read()[..])?;
Ok(result) Ok(result)
} }
@ -215,9 +232,10 @@ impl<A: PhysicalMemoryAllocator<Address = PhysicalAddress>> BlockCache<A> {
pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>( pub async fn try_with_mut<T, F: FnOnce(&mut [u8]) -> Result<T, Error>>(
&self, &self,
pos: u64, pos: u64,
size: usize,
mapper: F, mapper: F,
) -> Result<T, Error> { ) -> Result<T, Error> {
let block = self.entry(pos).await?; let block = self.entry(pos, Some(size)).await?;
let mut block = block.write(); let mut block = block.write();
let result = mapper(&mut block[..])?; let result = mapper(&mut block[..])?;
block.dirty = true; block.dirty = true;
@ -419,7 +437,7 @@ mod tests {
const LBA: u64 = 1; const LBA: u64 = 1;
cache cache
.try_with_mut(LBA * BS as u64, |block, _| { .try_with_mut(LBA * BS as u64, 16, |block| {
block[0..16].fill(0x12); block[0..16].fill(0x12);
Ok(()) Ok(())
}) })
@ -440,21 +458,21 @@ mod tests {
} }
cache cache
.try_with_mut(LBA * BS as u64, |block, _| { .try_with_mut(LBA * BS as u64, 16, |block| {
block[16..32].fill(0x23); block[16..32].fill(0x23);
Ok(()) Ok(())
}) })
.await .await
.unwrap(); .unwrap();
cache cache
.try_with_mut(LBA * BS as u64, |block, _| { .try_with_mut(LBA * BS as u64, 16, |block| {
block[48..64].fill(0x34); block[48..64].fill(0x34);
Ok(()) Ok(())
}) })
.await .await
.unwrap(); .unwrap();
cache cache
.try_with_mut(LBA * BS as u64, |block, _| { .try_with_mut(LBA * BS as u64, 128, |block| {
block[128..256].fill(0xF1); block[128..256].fill(0xF1);
Ok(()) Ok(())
}) })
@ -504,7 +522,7 @@ mod tests {
// Go through all blocks, fill those with some values // Go through all blocks, fill those with some values
for i in 0..1024 { for i in 0..1024 {
cache cache
.try_with_mut(i * BS as u64, |block, _| { .try_with_mut(i * BS as u64, BS, |block| {
block.fill(mapper(i)); block.fill(mapper(i));
Ok(()) Ok(())
}) })
@ -522,14 +540,14 @@ mod tests {
for i in 0..1023 { for i in 0..1023 {
cache cache
.try_with_mut(i * BS as u64, |block, _| { .try_with_mut(i * BS as u64, BS, |block| {
block.fill(0x12); block.fill(0x12);
Ok(()) Ok(())
}) })
.await .await
.unwrap(); .unwrap();
cache cache
.try_with_mut((i + 1) * BS as u64, |block, _| { .try_with_mut((i + 1) * BS as u64, BS, |block| {
block.fill(0x23); block.fill(0x23);
Ok(()) Ok(())
}) })
@ -539,14 +557,14 @@ mod tests {
for i in 0..1023 { for i in 0..1023 {
cache cache
.try_with_mut(i * BS as u64, |block, _| { .try_with_mut(i * BS as u64, BS, |block| {
block.iter_mut().for_each(|x| *x += 1); block.iter_mut().for_each(|x| *x += 1);
Ok(()) Ok(())
}) })
.await .await
.unwrap(); .unwrap();
cache cache
.try_with_mut((i + 1) * BS as u64, |block, _| { .try_with_mut((i + 1) * BS as u64, BS, |block| {
block.iter_mut().for_each(|x| *x += 2); block.iter_mut().for_each(|x| *x += 2);
Ok(()) Ok(())
}) })