diff --git a/Cargo.lock b/Cargo.lock index 0a5ad156..f1b12b8f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2725,6 +2725,7 @@ dependencies = [ "bytemuck", "device-api", "futures-util", + "kernel-arch", "kernel-fs", "libk", "libk-mm", diff --git a/kernel/arch/aarch64/src/lib.rs b/kernel/arch/aarch64/src/lib.rs index 66e10e1e..9442ef2f 100644 --- a/kernel/arch/aarch64/src/lib.rs +++ b/kernel/arch/aarch64/src/lib.rs @@ -6,7 +6,10 @@ extern crate alloc; use core::sync::atomic::{AtomicUsize, Ordering}; -use aarch64_cpu::registers::{DAIF, MPIDR_EL1, TPIDR_EL1}; +use aarch64_cpu::{ + asm::barrier, + registers::{DAIF, MPIDR_EL1, TPIDR_EL1}, +}; use alloc::{boxed::Box, sync::Arc, vec::Vec}; use device_api::interrupt::LocalInterruptController; use kernel_arch_interface::{ @@ -134,4 +137,30 @@ impl Architecture for ArchitectureImpl { fn cpu_enabled_features(_cpu: &CpuImpl) -> Option<&Self::CpuFeatures> { None } + + // Cache/barrier operation + fn load_barrier() { + barrier::dmb(barrier::ISHLD); + } + + fn store_barrier() { + barrier::dmb(barrier::ISHST); + } + + fn memory_barrier() { + barrier::dsb(barrier::SY); + } + + fn flush_virtual_range(range: core::ops::Range) { + // TODO cache line assumed to be 64 bytes + const CLSIZE: usize = 64; + let start = range.start & !(CLSIZE - 1); + let end = (range.end + (CLSIZE - 1)) & !(CLSIZE - 1); + + for line in (start..end).step_by(CLSIZE) { + unsafe { + core::arch::asm!("dc ivac, {address}", address = in(reg) line); + } + } + } } diff --git a/kernel/arch/interface/src/lib.rs b/kernel/arch/interface/src/lib.rs index a40d1564..10d1616c 100644 --- a/kernel/arch/interface/src/lib.rs +++ b/kernel/arch/interface/src/lib.rs @@ -2,6 +2,8 @@ #![feature(step_trait, const_trait_impl, never_type, decl_macro)] #![allow(clippy::new_without_default)] +use core::ops::Range; + use alloc::vec::Vec; use cpu::{CpuData, CpuFeatureSet, CpuImpl, IpiQueue}; use device_api::interrupt::LocalInterruptController; @@ -80,4 +82,16 @@ pub trait Architecture: Sized + 'static { fn cpu_enabled_features(cpu: &CpuImpl) -> Option<&Self::CpuFeatures> { None } + + // Cache/barrier operation + + fn load_barrier(); + fn store_barrier(); + fn memory_barrier() { + Self::store_barrier(); + Self::load_barrier(); + } + + /// Flushes/invalidates a range of virtual memory from the CPU's data cache. + fn flush_virtual_range(range: Range); } diff --git a/kernel/arch/riscv64/src/lib.rs b/kernel/arch/riscv64/src/lib.rs index d19e4f01..5b7c919d 100644 --- a/kernel/arch/riscv64/src/lib.rs +++ b/kernel/arch/riscv64/src/lib.rs @@ -3,7 +3,10 @@ extern crate alloc; -use core::sync::atomic::{AtomicUsize, Ordering}; +use core::{ + ops::Range, + sync::atomic::{AtomicUsize, Ordering}, +}; use alloc::{boxed::Box, collections::btree_map::BTreeMap, vec::Vec}; use device_api::interrupt::LocalInterruptController; @@ -163,4 +166,21 @@ impl Architecture for ArchitectureImpl { fn idle_task() -> extern "C" fn(usize) -> ! { idle_task } + + // Cache/barrier operation + fn load_barrier() { + unsafe { core::arch::asm!("fence r, w") }; + } + + fn store_barrier() { + unsafe { core::arch::asm!("fence w, r") }; + } + + fn memory_barrier() { + unsafe { core::arch::asm!("fence rw, rw") }; + } + + fn flush_virtual_range(_range: Range) { + // TODO + } } diff --git a/kernel/arch/x86_64/src/lib.rs b/kernel/arch/x86_64/src/lib.rs index 2086285e..e55e3cb9 100644 --- a/kernel/arch/x86_64/src/lib.rs +++ b/kernel/arch/x86_64/src/lib.rs @@ -5,7 +5,7 @@ extern crate alloc; use core::{ - ops::DerefMut, + ops::{DerefMut, Range}, sync::atomic::{AtomicUsize, Ordering}, }; @@ -195,4 +195,30 @@ impl Architecture for ArchitectureImpl { fn cpu_available_features(cpu: &CpuImpl) -> Option<&Self::CpuFeatures> { Some(&cpu.available_features) } + + // Cache/barrier + + fn load_barrier() { + unsafe { core::arch::x86_64::_mm_lfence() }; + } + + fn store_barrier() { + unsafe { core::arch::x86_64::_mm_sfence() }; + } + + fn memory_barrier() { + unsafe { core::arch::x86_64::_mm_mfence() }; + } + + fn flush_virtual_range(range: Range) { + // TODO I assume 64-byte cache line on all CPUs + // TODO clflush instruction may not be available, test for it + const CLSIZE: usize = 64; + let start = range.start & !(CLSIZE - 1); + let end = (range.end + (CLSIZE - 1)) & !(CLSIZE - 1); + + for line in (start..end).step_by(CLSIZE) { + unsafe { core::arch::x86_64::_mm_clflush(line as _) }; + } + } } diff --git a/kernel/driver/block/nvme/Cargo.toml b/kernel/driver/block/nvme/Cargo.toml index faad28d4..e65c79c6 100644 --- a/kernel/driver/block/nvme/Cargo.toml +++ b/kernel/driver/block/nvme/Cargo.toml @@ -10,6 +10,7 @@ libk-util.workspace = true libk-mm.workspace = true libk.workspace = true device-api = { workspace = true, features = ["derive"] } +kernel-arch.workspace = true ygg_driver_pci = { path = "../../bus/pci" } kernel-fs = { path = "../../fs/kernel-fs" } diff --git a/kernel/driver/block/nvme/src/drive.rs b/kernel/driver/block/nvme/src/drive.rs index c2279903..aa4831d0 100644 --- a/kernel/driver/block/nvme/src/drive.rs +++ b/kernel/driver/block/nvme/src/drive.rs @@ -95,28 +95,21 @@ impl BlockDevice for NvmeNamespace { if position % self.block_size() as u64 != 0 { return Err(Error::InvalidOperation); } - if buffer.len() % self.block_size() != 0 { + if buffer.len() % self.block_size() != 0 || buffer.is_empty() { return Err(Error::InvalidOperation); } let lba = position / self.block_size() as u64; - let lba_count = buffer.len() / self.block_size(); + let lba_count = buffer.len().div_ceil(self.block_size()); if lba + lba_count as u64 > self.block_count() { return Err(Error::InvalidOperation); } let result = self .controller - .perform_io( - self.nsid, - lba, - lba_count, - buffer.bus_address(), - buffer.len(), - IoDirection::Read, - ) + .perform_read(self.nsid, lba, lba_count, buffer) .await; - log::info!("read #{lba}, {lba_count} blocks -> {result:?} @ {buffer:p}"); + log::trace!(target: "io", "read #{lba}, {lba_count} blocks -> {result:?}"); result.map_err(NvmeError::into) } @@ -125,34 +118,21 @@ impl BlockDevice for NvmeNamespace { if position % self.block_size() as u64 != 0 { return Err(Error::InvalidOperation); } - if buffer.len() % self.block_size() != 0 { + if buffer.len() % self.block_size() != 0 || buffer.is_empty() { return Err(Error::InvalidOperation); } let lba = position / self.block_size() as u64; - let lba_count = buffer.len() / self.block_size(); + let lba_count = buffer.len().div_ceil(self.block_size()); if lba + lba_count as u64 > self.block_count() { return Err(Error::InvalidOperation); } - // TODO ArchitectureImpl::flush_data_cache() - #[cfg(target_arch = "x86_64")] - unsafe { - core::arch::asm!("wbinvd"); - } - let result = self .controller - .perform_io( - self.nsid, - lba, - lba_count, - buffer.bus_address(), - buffer.len(), - IoDirection::Write, - ) + .perform_write(self.nsid, lba, lba_count, buffer) .await; - log::info!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?} @ {buffer:p}"); + log::trace!(target: "io", "write -> #{lba}, {lba_count} blocks -> {result:?}"); result.map_err(NvmeError::into) } diff --git a/kernel/driver/block/nvme/src/lib.rs b/kernel/driver/block/nvme/src/lib.rs index fc3ebd01..16ede807 100644 --- a/kernel/driver/block/nvme/src/lib.rs +++ b/kernel/driver/block/nvme/src/lib.rs @@ -7,7 +7,7 @@ extern crate alloc; use core::{ - mem::size_of, + mem::{size_of, MaybeUninit}, sync::atomic::{AtomicUsize, Ordering}, time::Duration, }; @@ -20,9 +20,10 @@ use device_api::{ interrupt::{InterruptAffinity, InterruptHandler, IrqVector}, }; use drive::NvmeNamespace; +use kernel_arch::{Architecture, ArchitectureImpl}; use libk::{ device::manager::probe_partitions, - dma::BusAddress, + dma::{BusAddress, DmaSlice, DmaSliceMut}, fs::devfs, task::{cpu_count, cpu_index, runtime}, }; @@ -229,42 +230,54 @@ impl NvmeController { Ok(()) } - pub async fn perform_io( + pub async fn perform_read( &self, nsid: u32, lba: u64, lba_count: usize, - buffer_address: BusAddress, - transfer_size: usize, - direction: IoDirection, + buffer: DmaSliceMut<'_, MaybeUninit>, ) -> Result<(), NvmeError> { - let prp_list = PrpList::from_buffer(&*self.dma, buffer_address, transfer_size)?; - - let _guard = IrqGuard::acquire(); + let prp_list = PrpList::from_buffer(&*self.dma, buffer.bus_address(), buffer.len())?; let cpu_index = cpu_index(); let ioq = &self.ioqs.get()[cpu_index as usize]; + let cmd_id = ioq.submit( + IoRead { + nsid, + lba, + count: lba_count as _, + }, + &prp_list, + true, + )?; + ioq.wait_for_completion(cmd_id, ()).await?; - let cmd_id = match direction { - IoDirection::Read => ioq.submit( - IoRead { - nsid, - lba, - count: lba_count as _, - }, - &prp_list, - true, - )?, - IoDirection::Write => ioq.submit( - IoWrite { - nsid, - lba, - count: lba_count as _, - }, - &prp_list, - true, - )?, - }; + ArchitectureImpl::memory_barrier(); + Ok(()) + } + + pub async fn perform_write( + &self, + nsid: u32, + lba: u64, + lba_count: usize, + buffer: DmaSlice<'_, u8>, + ) -> Result<(), NvmeError> { + buffer.cache_flush_all(); + ArchitectureImpl::store_barrier(); + + let prp_list = PrpList::from_buffer(&*self.dma, buffer.bus_address(), buffer.len())?; + let cpu_index = cpu_index(); + let ioq = &self.ioqs.get()[cpu_index as usize]; + let cmd_id = ioq.submit( + IoWrite { + nsid, + lba, + count: lba_count as _, + }, + &prp_list, + true, + )?; ioq.wait_for_completion(cmd_id, ()).await?; Ok(()) diff --git a/kernel/driver/block/nvme/src/queue.rs b/kernel/driver/block/nvme/src/queue.rs index b963ba29..7fedb6cb 100644 --- a/kernel/driver/block/nvme/src/queue.rs +++ b/kernel/driver/block/nvme/src/queue.rs @@ -3,6 +3,7 @@ use core::{future::poll_fn, mem::size_of, ptr::null_mut, task::Poll}; use alloc::collections::{BTreeMap, BTreeSet}; use bytemuck::{Pod, Zeroable}; use device_api::dma::DmaAllocator; +use kernel_arch::{Architecture, ArchitectureImpl}; use libk::dma::{BusAddress, DmaBuffer}; use libk_mm::address::AsPhysicalAddress; use libk_util::{sync::IrqSafeSpinlock, waker::QueueWaker}; @@ -254,6 +255,8 @@ impl Queue { self.tail = new_tail; if !self.tail_doorbell.is_null() { + self.data.cache_flush_element(self.tail); + ArchitectureImpl::store_barrier(); unsafe { self.tail_doorbell .write_volatile(self.tail.try_into().unwrap()); diff --git a/kernel/libk/libk-mm/src/lib.rs b/kernel/libk/libk-mm/src/lib.rs index 06b3ab69..6d2213bb 100644 --- a/kernel/libk/libk-mm/src/lib.rs +++ b/kernel/libk/libk-mm/src/lib.rs @@ -20,7 +20,7 @@ use core::{ }; use address::Virtualize; -use kernel_arch::mem::PhysicalMemoryAllocator; +use kernel_arch::{mem::PhysicalMemoryAllocator, Architecture, ArchitectureImpl}; use libk_mm_interface::{ address::{AsPhysicalAddress, PhysicalAddress}, table::{MapAttributes, TableAllocator}, @@ -485,3 +485,11 @@ impl DerefMut for PageSlice { &mut self.data } } + +pub fn flush_cache_data(data: *const T) { + ArchitectureImpl::flush_virtual_range(data.addr()..data.addr() + size_of::()); +} + +pub fn flush_cache_data_slice(data: *const [T]) { + ArchitectureImpl::flush_virtual_range(data.addr()..data.addr() + size_of::() * data.len()); +} diff --git a/kernel/libk/src/debug/mod.rs b/kernel/libk/src/debug/mod.rs index 855f0b34..3e805708 100644 --- a/kernel/libk/src/debug/mod.rs +++ b/kernel/libk/src/debug/mod.rs @@ -25,7 +25,7 @@ mod sink; pub use panic::{panic_log, PanicLoggerSink}; pub use ring::add_kernel_log_file; -pub use sink::{add_early_sink, add_serial_sink, add_sink, DebugSink}; +pub use sink::{add_early_sink, add_serial_sink, add_sink, disable_early_sinks, DebugSink}; static DEBUG_LOCK: IrqSafeSpinlock<()> = IrqSafeSpinlock::new(()); diff --git a/kernel/libk/src/dma.rs b/kernel/libk/src/dma.rs index e599e46e..dd86011f 100644 --- a/kernel/libk/src/dma.rs +++ b/kernel/libk/src/dma.rs @@ -213,6 +213,18 @@ impl DmaBuffer<[T]> { range, } } + + pub fn cache_flush_element(&self, index: usize) { + libk_mm::flush_cache_data(&raw const self[index]); + } + + pub fn cache_flush_range(&self, range: Range) { + libk_mm::flush_cache_data_slice(&raw const self[range]); + } + + pub fn cache_flush_all(&self) { + libk_mm::flush_cache_data_slice(&raw const self[..]); + } } impl DmaBuffer { @@ -308,6 +320,10 @@ impl<'a, T> DmaSlice<'a, T> { pub fn into_parts(self) -> (&'a DmaBuffer<[T]>, Range) { (self.buffer, self.range) } + + pub fn cache_flush_all(&self) { + self.buffer.cache_flush_range(self.range.clone()); + } } impl Deref for DmaSlice<'_, T> { @@ -333,6 +349,10 @@ impl<'a, T> DmaSliceMut<'a, T> { pub fn into_parts(self) -> (&'a mut DmaBuffer<[T]>, Range) { (self.buffer, self.range) } + + pub fn cache_flush_all(&self) { + self.buffer.cache_flush_range(self.range.clone()); + } } impl Deref for DmaSliceMut<'_, T> { diff --git a/kernel/src/arch/x86_64/apic/local.rs b/kernel/src/arch/x86_64/apic/local.rs index 4ee785c0..c07613dc 100644 --- a/kernel/src/arch/x86_64/apic/local.rs +++ b/kernel/src/arch/x86_64/apic/local.rs @@ -31,7 +31,7 @@ use super::{ MAX_MSI_VECTORS, }; -const TIMER_INTERVAL: u32 = 150000; +const TIMER_INTERVAL: u32 = 15000; /// When initialized, contains the Local APIC ID of the bootstrap processor pub static BSP_APIC_ID: OneTimeInit = OneTimeInit::new(); @@ -329,7 +329,7 @@ impl LocalApic { regs.TaskPriorityRegister.set(0); // Enable timer - regs.TimerDivideConfig.set(0x3); + regs.TimerDivideConfig.set(0x2); regs.TimerInitCount.set(TIMER_INTERVAL); // Configure local interrupt vectors