501 lines
15 KiB
Rust

#![feature(const_trait_impl, let_chains, if_let_guard, maybe_uninit_slice)]
#![allow(missing_docs)]
#![no_std]
// TODO
#![allow(unused)]
extern crate alloc;
use core::{
mem::size_of,
sync::atomic::{AtomicUsize, Ordering},
time::Duration,
};
use alloc::{collections::BTreeMap, format, sync::Arc, vec::Vec};
use command::{IdentifyActiveNamespaceIdListRequest, IdentifyControllerRequest};
use device_api::{
device::Device,
interrupt::{InterruptAffinity, InterruptHandler},
};
use drive::NvmeNamespace;
use libk::{
device::manager::probe_partitions,
fs::devfs,
task::{cpu_count, cpu_index, runtime},
};
use libk_mm::{address::PhysicalAddress, device::DeviceMemoryIo, L3_PAGE_SIZE};
use libk_util::{
sync::{IrqGuard, IrqSafeSpinlock},
OneTimeInit,
};
use queue::PrpList;
use tock_registers::{
interfaces::{ReadWriteable, Readable, Writeable},
register_bitfields, register_structs,
registers::{ReadOnly, ReadWrite, WriteOnly},
};
use ygg_driver_pci::{
device::{PciDeviceInfo, PreferredInterruptMode},
PciCommandRegister, PciConfigurationSpace,
};
use yggdrasil_abi::{error::Error, io::FileMode};
use crate::{
command::{IoRead, IoWrite},
queue::{CompletionQueueEntry, SubmissionQueueEntry},
};
use self::{
command::{CreateIoCompletionQueue, CreateIoSubmissionQueue, SetFeatureRequest},
error::NvmeError,
queue::QueuePair,
};
mod command;
mod drive;
mod error;
mod queue;
pub const MAX_PAGES_PER_REQUEST: usize = 256;
// Use host page
pub const PAGE_SIZE: usize = L3_PAGE_SIZE;
register_bitfields! {
u32,
CC [
IOCQES OFFSET(20) NUMBITS(4) [],
IOSQES OFFSET(16) NUMBITS(4) [],
AMS OFFSET(11) NUMBITS(3) [],
MPS OFFSET(7) NUMBITS(4) [],
CSS OFFSET(4) NUMBITS(3) [
NvmCommandSet = 0
],
ENABLE OFFSET(0) NUMBITS(1) [],
],
CSTS [
CFS OFFSET(1) NUMBITS(1) [],
RDY OFFSET(0) NUMBITS(1) [],
],
AQA [
/// Admin Completion Queue Size in entries - 1
ACQS OFFSET(16) NUMBITS(12) [],
/// Admin Submission Queue Size in entries - 1
ASQS OFFSET(0) NUMBITS(12) [],
]
}
register_bitfields! {
u64,
CAP [
/// Maximum Queue Entries Supported - 1. i.e., 0 means maximum queue len of 1, 1 = 2 etc.
MQES OFFSET(0) NUMBITS(16) [],
/// Timeout. Represents the worst-case time the host software should wait for CSTS.RDY to
/// change its state.
TO OFFSET(24) NUMBITS(8) [],
/// Doorbell stride. Stride in bytes = pow(2, 2 + DSTRD).
DSTRD OFFSET(32) NUMBITS(4) [],
/// NVM Subsystem Reset Supported (see NVMe BS Section 3.7.1)
NSSRS OFFSET(36) NUMBITS(1) [],
/// Controller supports one or more I/O command sets
CSS_IO_COMMANDS OFFSET(43) NUMBITS(1) [],
/// Controller only supports admin commands and no I/O commands
CSS_ADMIN_ONLY OFFSET(44) NUMBITS(1) [],
/// Memory page size minimum (bytes = pow(2, 12 + MPSMIN))
MPSMIN OFFSET(48) NUMBITS(4) [],
/// Memory page size maximum -|-
MPSMAX OFFSET(52) NUMBITS(4) [],
]
}
register_structs! {
#[allow(non_snake_case)]
Regs {
(0x00 => CAP: ReadOnly<u64, CAP::Register>),
(0x08 => VS: ReadOnly<u32>),
(0x0C => INTMS: WriteOnly<u32>),
(0x10 => INTMC: WriteOnly<u32>),
(0x14 => CC: ReadWrite<u32, CC::Register>),
(0x18 => _0),
(0x1C => CSTS: ReadOnly<u32, CSTS::Register>),
(0x20 => _1),
(0x24 => AQA: ReadWrite<u32, AQA::Register>),
(0x28 => ASQ: ReadWrite<u64>),
(0x30 => ACQ: ReadWrite<u64>),
(0x38 => _2),
(0x2000 => @END),
}
}
pub struct NvmeController {
regs: IrqSafeSpinlock<DeviceMemoryIo<'static, Regs>>,
admin_q: OneTimeInit<QueuePair>,
ioqs: OneTimeInit<Vec<QueuePair>>,
io_queue_count: AtomicUsize,
drive_table: IrqSafeSpinlock<BTreeMap<u32, Arc<NvmeNamespace>>>,
controller_id: OneTimeInit<u32>,
pci: PciDeviceInfo,
doorbell_shift: usize,
min_page_size: usize,
}
#[derive(Clone, Copy, PartialEq, Eq, Debug)]
pub enum IoDirection {
Read,
Write,
}
impl Regs {
unsafe fn doorbell_ptr(&self, shift: usize, completion: bool, queue_index: usize) -> *mut u32 {
let doorbell_base = (self as *const Regs as *mut Regs).addr() + 0x1000;
let offset = ((queue_index << shift) + completion as usize) * 4;
(doorbell_base + offset) as *mut u32
}
}
impl NvmeController {
const ADMIN_QUEUE_SIZE: usize = 32;
const IO_QUEUE_SIZE: usize = 32;
async fn create_queues(&self) -> Result<(), NvmeError> {
let admin_q = self.admin_q.get();
let io_queue_count = self.io_queue_count.load(Ordering::Acquire);
log::info!(
"Creating {} queue pairs for nvme{}",
io_queue_count,
self.controller_id.get()
);
// Request a CQ/SQ pair for I/O
admin_q
.request_no_data(SetFeatureRequest::NumberOfQueues(
io_queue_count as _,
io_queue_count as _,
))
.await?;
let mut queues = Vec::new();
for i in 1..=io_queue_count {
let id = i as u32;
let (sq_doorbell, cq_doorbell) = unsafe { self.doorbell_pair(i) };
let queue = QueuePair::new(id, i, Self::IO_QUEUE_SIZE, sq_doorbell, cq_doorbell)
.map_err(NvmeError::MemoryError)?;
admin_q
.request_no_data(CreateIoCompletionQueue {
id,
vector: id,
size: Self::IO_QUEUE_SIZE,
data: queue.cq_physical_pointer(),
})
.await?;
admin_q
.request_no_data(CreateIoSubmissionQueue {
id,
cq_id: id,
size: Self::IO_QUEUE_SIZE,
data: queue.sq_physical_pointer(),
})
.await?;
queues.push(queue);
}
self.ioqs.init(queues);
Ok(())
}
async fn late_init(self: Arc<Self>) -> Result<(), NvmeError> {
register_nvme_controller(self.clone());
let io_queue_count = cpu_count();
self.io_queue_count.store(io_queue_count, Ordering::Release);
{
let range = self
.pci
.map_interrupt_multiple(0..io_queue_count + 1, InterruptAffinity::Any, self.clone())
.unwrap();
// TODO handle different MSI range allocations
for (i, msi) in range.iter().enumerate() {
assert_eq!(i, msi.vector);
}
}
let admin_q = self.admin_q.get();
// Identify the controller
let identify = admin_q.request(IdentifyControllerRequest).await?;
let max_transfer_size = if identify.mdts == 0 {
// Pick some sane default value
256 * self.min_page_size
} else {
(1 << identify.mdts) * self.min_page_size
};
self.create_queues().await?;
// Identify namespaces
self.enumerate_namespaces(max_transfer_size).await?;
Ok(())
}
async fn enumerate_namespaces(
self: &Arc<Self>,
max_transfer_size: usize,
) -> Result<(), NvmeError> {
let admin_q = self.admin_q.get();
let namespaces = admin_q
.request(IdentifyActiveNamespaceIdListRequest { start_id: 0 })
.await?;
let count = namespaces.entries.iter().position(|&x| x == 0).unwrap();
let list = &namespaces.entries[..count];
for &nsid in list {
match NvmeNamespace::create(self.clone(), nsid, max_transfer_size).await {
Ok(drive) => {
self.drive_table.lock().insert(nsid, drive);
}
Err(error) => {
log::warn!("Could not create nvme drive, nsid={}: {:?}", nsid, error);
}
}
}
Ok(())
}
pub async fn perform_io(
&self,
nsid: u32,
lba: u64,
lba_count: usize,
buffer_address: PhysicalAddress,
transfer_size: usize,
direction: IoDirection,
) -> Result<(), NvmeError> {
let prp_list = PrpList::from_buffer(buffer_address, transfer_size)?;
let _guard = IrqGuard::acquire();
let cpu_index = cpu_index();
let ioq = &self.ioqs.get()[cpu_index as usize];
let cmd_id = match direction {
IoDirection::Read => ioq.submit(
IoRead {
nsid,
lba,
count: lba_count as _,
},
&prp_list,
true,
)?,
IoDirection::Write => ioq.submit(
IoWrite {
nsid,
lba,
count: lba_count as _,
},
&prp_list,
true,
)?,
};
ioq.wait_for_completion(cmd_id, ()).await?;
Ok(())
}
unsafe fn doorbell_pair(&self, idx: usize) -> (*mut u32, *mut u32) {
let regs = self.regs.lock();
let sq_ptr = regs.doorbell_ptr(self.doorbell_shift, false, idx);
let cq_ptr = regs.doorbell_ptr(self.doorbell_shift, true, idx);
(sq_ptr, cq_ptr)
}
}
impl InterruptHandler for NvmeController {
fn handle_irq(self: Arc<Self>, vector: Option<usize>) -> bool {
let vector = vector.expect("Only MSI-X interrupts are supported");
if vector == 0 {
self.admin_q.get().process_completions() != 0
} else if vector <= self.io_queue_count.load(Ordering::Acquire)
&& let Some(ioqs) = self.ioqs.try_get()
{
ioqs[vector - 1].process_completions() != 0
} else {
false
}
}
}
impl Device for NvmeController {
unsafe fn init(self: Arc<Self>) -> Result<(), Error> {
let regs = self.regs.lock();
let timeout = Duration::from_millis(regs.CAP.read(CAP::TO) * 500);
log::debug!("Worst-case timeout: {:?}", timeout);
while regs.CSTS.matches_all(CSTS::RDY::SET) {
core::hint::spin_loop();
}
if Self::ADMIN_QUEUE_SIZE as u64 > regs.CAP.read(CAP::MQES) + 1 {
todo!(
"queue_slots too big, max = {}",
regs.CAP.read(CAP::MQES) + 1
);
}
// Setup the admin queue (index 0)
let admin_sq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, false, 0) };
let admin_cq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, true, 0) };
log::debug!("sq_doorbell for adminq = {:p}", admin_sq_doorbell);
let admin_q = QueuePair::new(
0,
0,
Self::ADMIN_QUEUE_SIZE,
admin_sq_doorbell,
admin_cq_doorbell,
)
.unwrap();
regs.AQA.modify(
AQA::ASQS.val(Self::ADMIN_QUEUE_SIZE as u32 - 1)
+ AQA::ACQS.val(Self::ADMIN_QUEUE_SIZE as u32 - 1),
);
regs.ASQ.set(admin_q.sq_physical_pointer().into());
regs.ACQ.set(admin_q.cq_physical_pointer().into());
// Configure the controller
const IOSQES: u32 = size_of::<SubmissionQueueEntry>().ilog2();
const IOCQES: u32 = size_of::<CompletionQueueEntry>().ilog2();
regs.CC.modify(
CC::IOCQES.val(IOCQES)
+ CC::IOSQES.val(IOSQES)
+ CC::MPS.val(0)
+ CC::CSS::NvmCommandSet,
);
// Enable the controller
regs.CC.modify(CC::ENABLE::SET);
log::debug!("Reset the controller");
while !regs.CSTS.matches_any(&[CSTS::RDY::SET, CSTS::CFS::SET]) {
core::hint::spin_loop();
}
if regs.CSTS.matches_all(CSTS::CFS::SET) {
todo!("CFS set after reset!");
}
self.admin_q.init(admin_q);
// Schedule late_init task
runtime::spawn(self.clone().late_init())?;
Ok(())
}
fn display_name(&self) -> &str {
"NVM Express Controller"
}
}
// TODO
unsafe impl Sync for NvmeController {}
static NVME_CONTROLLERS: IrqSafeSpinlock<Vec<Arc<NvmeController>>> =
IrqSafeSpinlock::new(Vec::new());
pub fn probe(info: &PciDeviceInfo) -> Result<Arc<dyn Device>, Error> {
let bar0 = info
.config_space
.bar(0)
.unwrap()
.as_memory()
.expect("Expected a memory BAR0");
info.init_interrupts(PreferredInterruptMode::Msi(true))?;
let mut cmd = PciCommandRegister::from_bits_retain(info.config_space.command());
cmd &= !(PciCommandRegister::DISABLE_INTERRUPTS | PciCommandRegister::ENABLE_IO);
cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER;
info.config_space.set_command(cmd.bits());
let regs = unsafe { DeviceMemoryIo::<Regs>::map(bar0, Default::default()) }?;
// Disable the controller
regs.CC.modify(CC::ENABLE::CLEAR);
let doorbell_shift = regs.CAP.read(CAP::DSTRD) as usize + 1;
let min_page_size = 1 << (regs.CAP.read(CAP::MPSMIN) + 12);
if min_page_size > PAGE_SIZE {
log::error!("Cannot support NVMe HC: min page size ({min_page_size}) > host page size ({PAGE_SIZE})");
return Err(Error::InvalidArgument);
}
let device = NvmeController {
regs: IrqSafeSpinlock::new(regs),
admin_q: OneTimeInit::new(),
ioqs: OneTimeInit::new(),
drive_table: IrqSafeSpinlock::new(BTreeMap::new()),
controller_id: OneTimeInit::new(),
pci: info.clone(),
io_queue_count: AtomicUsize::new(1),
doorbell_shift,
min_page_size,
};
Ok(Arc::new(device))
}
pub fn register_nvme_controller(controller: Arc<NvmeController>) {
let mut list = NVME_CONTROLLERS.lock();
let id = list.len();
list.push(controller.clone());
controller.controller_id.init(id as u32);
}
pub fn register_nvme_namespace(namespace: Arc<NvmeNamespace>, probe: bool) {
let name = format!("nvme{}n{}", namespace.controller_id(), namespace.id());
log::info!("Register NVMe namespace: {name}");
devfs::add_named_block_device(namespace.clone(), name.clone(), FileMode::new(0o600)).ok();
if probe {
runtime::spawn(async move {
let name = name;
log::info!("Probing partitions for {name}");
probe_partitions(namespace, |index, partition| {
let partition_name = format!("{name}p{}", index + 1);
devfs::add_named_block_device(
Arc::new(partition),
partition_name,
FileMode::new(0o600),
)
.ok();
})
.await
.inspect_err(|error| log::error!("{name}: partition probe failed: {error:?}"))
})
.ok();
}
}