dev/block: NVMe drive enumeration

This commit is contained in:
Mark Poliakov 2023-12-10 12:51:53 +02:00
parent 506476e9c3
commit 352c68e31e
12 changed files with 406 additions and 53 deletions

View File

@ -16,6 +16,7 @@ device-api = { path = "lib/device-api", features = ["derive"] }
kernel-util = { path = "lib/kernel-util" }
memtables = { path = "lib/memtables" }
vmalloc = { path = "lib/vmalloc" }
device-api-macros = { path = "lib/device-api/macros" }
atomic_enum = "0.2.0"
bitflags = "2.3.3"

View File

@ -7,8 +7,8 @@ edition = "2021"
[dependencies]
yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" }
macros = { path = "macros", optional = true }
device-api-macros = { path = "macros", optional = true }
[features]
default = []
derive = ["macros"]
derive = ["device-api-macros"]

View File

@ -1,5 +1,5 @@
[package]
name = "macros"
name = "device-api-macros"
version = "0.1.0"
edition = "2021"

View File

@ -200,21 +200,35 @@ pub static RING_LOGGER_SINK: RingLoggerSink = RingLoggerSink::new();
/// Prints a hex-dump of a slice, appending a virtual address offset to the output
pub fn hex_dump(level: LogLevel, addr_offset: usize, data: &[u8]) {
for (i, b) in data.iter().enumerate() {
if i % 16 == 0 {
log_print_raw!(level, "{:X}: ", addr_offset + i)
const WINDOW_SIZE: usize = 16;
let window_count = (data.len() + WINDOW_SIZE) / WINDOW_SIZE;
for iw in 0..window_count {
let off = iw * WINDOW_SIZE;
let len = core::cmp::min(data.len() - off, WINDOW_SIZE);
let window = &data[off..off + len];
log_print_raw!(level, "{:04X}: ", addr_offset + off);
for i in 0..WINDOW_SIZE {
if i < window.len() {
log_print_raw!(level, "{:02X}", window[i]);
} else {
log_print_raw!(level, " ");
}
log_print_raw!(level, "{:02X}", b);
if i % 16 == 15 {
log_print_raw!(level, "\n");
} else if i % 2 == 1 {
if i % 2 == 1 {
log_print_raw!(level, " ");
}
}
if data.len() % 16 != 0 {
for &ch in window {
if ch.is_ascii_graphic() || ch == b' ' {
log_print_raw!(level, "{}", ch as char);
} else {
log_print_raw!(level, ".");
}
}
log_print_raw!(level, "\n");
}
}

View File

@ -301,7 +301,7 @@ pub trait PciConfigurationSpace {
}
/// Locates a capability within this configuration space
fn capability<'s, C: PciCapability>(&'s self) -> Option<C::CapabilityData<'s, Self>> {
fn capability<C: PciCapability>(&self) -> Option<C::CapabilityData<'_, Self>> {
self.capability_iter().find_map(|(id, offset)| {
if id == C::ID {
Some(C::data(self, offset))

View File

@ -2,6 +2,8 @@
use core::fmt::{self, Write};
use tock_registers::{interfaces::Readable, register_structs, registers::ReadOnly, UIntLike};
use crate::{
device::nvme::queue::PhysicalRegionPage,
mem::{address::IntoRaw, PhysicalAddress},
@ -33,6 +35,22 @@ pub enum ControllerType {
Administrative,
}
// I/O commands
#[derive(Clone, Copy, Debug)]
pub struct IoRead {
pub nsid: u32,
pub lba: u64,
pub count: u32,
}
#[derive(Clone, Copy, Debug)]
pub struct IoWrite {
pub nsid: u32,
pub lba: u64,
pub count: u32,
}
// Requests
#[derive(Clone, Copy, Debug)]
@ -41,7 +59,15 @@ pub enum SetFeatureRequest {
}
#[derive(Clone, Copy, Debug)]
pub struct IdentifyControllerRequest {
pub struct IdentifyControllerRequest;
#[derive(Clone, Copy, Debug)]
pub struct IdentifyActiveNamespaceIdListRequest {
pub start_id: u32,
}
#[derive(Clone, Copy, Debug)]
pub struct IdentifyNamespaceRequest {
pub nsid: u32,
}
@ -81,11 +107,34 @@ pub struct IdentifyControllerResponse {
pub cntrltype: ControllerType,
}
#[derive(Clone, Copy, Debug)]
#[repr(C)]
pub struct IdentifyActiveNamespaceIdListResponse {
pub entries: [u32; 1024],
}
register_structs! {
#[allow(non_snake_case)]
pub IdentifyNamespaceResponse {
(0 => NSZE: ReadOnly<u64>),
(8 => _0),
(25 => NLBAF: ReadOnly<u8>),
(26 => FLBAS: ReadOnly<u8>),
(27 => _1),
(128 => LBAFS: [ReadOnly<u32>; 64]),
(384 => _2),
(4096 => @END),
}
}
#[derive(Clone, Copy, Debug)]
#[repr(transparent)]
pub struct LbaFormat(u32);
impl Command for IdentifyControllerRequest {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x06);
sqe.command_specific[0] = 0x01;
sqe.nsid = self.nsid;
}
}
@ -93,6 +142,62 @@ impl Request for IdentifyControllerRequest {
type Response = IdentifyControllerResponse;
}
impl Command for IdentifyActiveNamespaceIdListRequest {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x06);
sqe.command_specific[0] = 0x02;
sqe.nsid = self.start_id;
}
}
impl Request for IdentifyActiveNamespaceIdListRequest {
type Response = IdentifyActiveNamespaceIdListResponse;
}
impl Command for IdentifyNamespaceRequest {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x06);
sqe.command_specific[0] = 0x00;
sqe.nsid = self.nsid;
}
}
impl Request for IdentifyNamespaceRequest {
type Response = IdentifyNamespaceResponse;
}
impl IdentifyNamespaceResponse {
pub fn current_lba_fmt_idx(&self) -> usize {
let flbas = self.FLBAS.get();
let mut index = flbas & 0xF;
if self.NLBAF.get() > 16 {
index |= (flbas & 0xE0) >> 1;
}
index as usize
}
pub fn lba_fmt(&self, idx: usize) -> Option<LbaFormat> {
if idx >= self.NLBAF.get() as usize {
return None;
}
Some(LbaFormat(self.LBAFS[idx].get()))
}
pub fn total_lba_count(&self) -> u64 {
self.NSZE.get()
}
}
impl LbaFormat {
pub fn lba_data_size(&self) -> Option<u64> {
let lbads = (self.0 >> 16) & 0xFF;
if lbads < 9 {
return None;
}
Some(1 << lbads)
}
}
impl Command for SetFeatureRequest {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x09);
@ -111,7 +216,7 @@ impl Command for SetFeatureRequest {
impl Command for CreateIoCompletionQueue {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x05);
sqe.data_pointer[1] = PhysicalRegionPage::with_addr(self.data.into_raw());
sqe.data_pointer[0] = PhysicalRegionPage::with_addr(self.data);
sqe.command_specific[0] = ((self.size as u32 - 1) << 16) | self.id;
sqe.command_specific[1] = (self.vector << 16) | 3;
}
@ -120,7 +225,7 @@ impl Command for CreateIoCompletionQueue {
impl Command for CreateIoSubmissionQueue {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
sqe.command.set_opcode(0x01);
sqe.data_pointer[1] = PhysicalRegionPage::with_addr(self.data.into_raw());
sqe.data_pointer[0] = PhysicalRegionPage::with_addr(self.data);
sqe.command_specific[0] = ((self.size as u32 - 1) << 16) | self.id;
// Medium priority
sqe.command_specific[1] = (self.cq_id << 16) | 1;
@ -140,3 +245,27 @@ impl<const N: usize> fmt::Debug for String<N> {
Ok(())
}
}
impl Command for IoRead {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
assert!(self.count < 65536);
sqe.command.set_opcode(0x02);
sqe.command_specific[0] = self.lba as u32;
sqe.command_specific[1] = (self.lba >> 32) as u32;
sqe.command_specific[2] = self.count;
sqe.nsid = self.nsid;
}
}
impl Command for IoWrite {
fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) {
assert!(self.count < 65536);
sqe.command.set_opcode(0x01);
sqe.command_specific[0] = self.lba as u32;
sqe.command_specific[1] = (self.lba >> 32) as u32;
sqe.command_specific[2] = self.count;
sqe.nsid = self.nsid;
}
}

87
src/device/nvme/drive.rs Normal file
View File

@ -0,0 +1,87 @@
use abi::{error::Error, io::DeviceRequest};
use alloc::{boxed::Box, format};
use vfs::BlockDevice;
use crate::{device::nvme::command::IdentifyNamespaceRequest, fs::devfs};
use super::{error::NvmeError, NvmeController};
#[allow(unused)]
pub struct NvmeDrive {
controller: &'static NvmeController,
nsid: u32,
total_lba_count: u64,
lba_size: u64,
}
impl NvmeDrive {
pub async fn create(
controller: &'static NvmeController,
nsid: u32,
) -> Result<&'static NvmeDrive, NvmeError> {
let admin_q = controller.admin_q.get();
let identify = admin_q.request(IdentifyNamespaceRequest { nsid })?.await?;
let current_lba_format_idx = identify.current_lba_fmt_idx();
let current_lba_format = identify.lba_fmt(current_lba_format_idx).unwrap();
let lba_size = current_lba_format.lba_data_size().unwrap();
let total_lba_count = identify.total_lba_count();
debugln!(
"ns = {}, lba = {}B, size = {}M",
nsid,
lba_size,
(total_lba_count * lba_size) / (1024 * 1024)
);
let dev = Box::leak(Box::new(NvmeDrive {
controller,
nsid,
total_lba_count,
lba_size,
}));
// TODO add the drive as a block device
let node_name = format!("nvme{}n{}", controller.controller_id.get(), nsid);
devfs::add_named_block_device(dev, node_name).ok();
// TODO probe partitions
Ok(dev)
}
// TODO proper interface for reading/writing blocks
// pub async fn read_block(
// &self,
// lba: u64,
// block: &mut PhysicalRefMut<'_, [u8]>,
// ) -> Result<(), NvmeError> {
// self.controller.read_block(self.nsid, lba, block).await
// }
// pub async fn write_block(
// &self,
// lba: u64,
// block: &PhysicalRefMut<'_, [u8]>,
// ) -> Result<(), NvmeError> {
// self.controller.write_block(self.nsid, lba, block).await
// }
}
impl BlockDevice for NvmeDrive {
fn read(&'static self, _pos: u64, _buf: &mut [u8]) -> Result<usize, Error> {
todo!()
}
fn write(&'static self, _pos: u64, _buf: &[u8]) -> Result<usize, Error> {
todo!()
}
fn size(&self) -> Result<u64, Error> {
Ok(self.lba_size * self.total_lba_count)
}
fn device_request(&self, _req: &mut DeviceRequest) -> Result<(), Error> {
todo!()
}
}

View File

@ -2,7 +2,7 @@
use core::{mem::size_of, time::Duration};
use abi::error::Error;
use alloc::vec::Vec;
use alloc::{collections::BTreeMap, vec::Vec};
use device_api::{interrupt::MsiHandler, Device};
use kernel_util::{sync::IrqSafeSpinlock, util::OneTimeInit};
use tock_registers::{
@ -18,13 +18,17 @@ use crate::{
capability::MsiXCapability, PciBaseAddress, PciCommandRegister, PciConfigurationSpace,
},
nvme::{
command::IdentifyControllerRequest,
command::{
IdentifyActiveNamespaceIdListRequest, IdentifyControllerRequest, IoRead, IoWrite,
},
drive::NvmeDrive,
queue::{CompletionQueueEntry, SubmissionQueueEntry},
},
},
mem::{
address::{FromRaw, IntoRaw},
address::{AsPhysicalAddress, FromRaw, IntoRaw},
device::{DeviceMemoryIo, DeviceMemoryIoMut},
pointer::PhysicalRefMut,
PhysicalAddress,
},
task::runtime,
@ -39,6 +43,7 @@ use self::{
use super::bus::pci::{capability::MsiXEntry, FromPciBus, PciDeviceInfo};
mod command;
mod drive;
mod error;
mod queue;
@ -113,6 +118,8 @@ pub struct NvmeController {
admin_q: OneTimeInit<QueuePair<'static>>,
ioqs: OneTimeInit<Vec<QueuePair<'static>>>,
vector_table: IrqSafeSpinlock<DeviceMemoryIoMut<'static, [MsiXEntry]>>,
drive_table: IrqSafeSpinlock<BTreeMap<u32, &'static NvmeDrive>>,
controller_id: OneTimeInit<usize>,
doorbell_shift: usize,
}
@ -120,17 +127,21 @@ pub struct NvmeController {
impl Regs {
unsafe fn doorbell_ptr(&self, shift: usize, completion: bool, queue_index: usize) -> *mut u32 {
let doorbell_base = (self as *const Regs as *mut Regs).addr() + 0x1000;
let offset = (queue_index << shift) + completion as usize * 4;
let offset = ((queue_index << shift) + completion as usize) * 4;
(doorbell_base + offset) as *mut u32
}
}
impl NvmeController {
async fn late_init(&'static self) -> Result<(), NvmeError> {
// runtime::spawn(self.poll_task()).expect("Couldn't spawn NVMe poll task");
register_nvme_controller(self);
let admin_q = self.admin_q.get();
infoln!("SETUP");
// Identify the controller
let _identify = admin_q.request(IdentifyControllerRequest)?.await?;
// TODO do something with identify_controller
// Request a CQ/SQ pair for I/O
admin_q
@ -140,12 +151,7 @@ impl NvmeController {
// Allocate the queue
let (sq_doorbell, cq_doorbell) = unsafe { self.doorbell_pair(1) };
let io_q =
QueuePair::new(0, 32, sq_doorbell, cq_doorbell).map_err(NvmeError::MemoryError)?;
// Identify the controller
let identify = admin_q
.request(IdentifyControllerRequest { nsid: 0 })?
.await?;
QueuePair::new(1, 0, 32, sq_doorbell, cq_doorbell).map_err(NvmeError::MemoryError)?;
// Create the queue on the device side
admin_q
@ -165,7 +171,86 @@ impl NvmeController {
})
.await?;
loop {}
self.ioqs.init(Vec::from_iter([io_q]));
// Identify namespaces
self.enumerate_namespaces().await?;
Ok(())
}
async fn enumerate_namespaces(&'static self) -> Result<(), NvmeError> {
let admin_q = self.admin_q.get();
let namespaces = admin_q
.request(IdentifyActiveNamespaceIdListRequest { start_id: 0 })?
.await?;
let count = namespaces.entries.iter().position(|&x| x == 0).unwrap();
let list = &namespaces.entries[..count];
for &nsid in list {
match NvmeDrive::create(self, nsid).await {
Ok(drive) => {
self.drive_table.lock().insert(nsid, drive);
}
Err(error) => {
warnln!("Could not create nvme drive, nsid={}: {:?}", nsid, error);
}
}
}
Ok(())
}
pub async fn read_block(
&'static self,
nsid: u32,
lba: u64,
buffer: &mut PhysicalRefMut<'_, [u8]>,
) -> Result<(), NvmeError> {
let ioq = &self.ioqs.get()[0];
let buffer_address = unsafe { buffer.as_physical_address() };
debugln!("read nsid={}, lba={:#x}", nsid, lba);
let cmd_id = ioq.submit(
IoRead {
nsid,
lba,
count: 1,
},
&[buffer_address],
true,
);
ioq.wait_for_completion(cmd_id, ()).await?;
Ok(())
}
pub async fn write_block(
&'static self,
nsid: u32,
lba: u64,
buffer: &PhysicalRefMut<'_, [u8]>,
) -> Result<(), NvmeError> {
let ioq = &self.ioqs.get()[0];
let buffer_address = unsafe { buffer.as_physical_address() };
debugln!("write nsid={}, lba={:#x}", nsid, lba);
let cmd_id = ioq.submit(
IoWrite {
nsid,
lba,
count: 1,
},
&[buffer_address],
true,
);
ioq.wait_for_completion(cmd_id, ()).await?;
Ok(())
}
unsafe fn doorbell_pair(&self, idx: usize) -> (*mut u32, *mut u32) {
@ -177,9 +262,15 @@ impl NvmeController {
}
impl MsiHandler for NvmeController {
fn handle_msi(&self, vector: usize) -> bool {
debugln!("handle_msi {}", vector);
self.admin_q.get().process_completions() != 0
fn handle_msi(&self, _vector: usize) -> bool {
// TODO check MSI-X pending bits
self.admin_q.get().process_completions();
if let Some(qs) = self.ioqs.try_get() {
for q in qs {
q.process_completions();
}
}
true
}
}
@ -211,7 +302,9 @@ impl Device for NvmeController {
// Setup the admin queue (index 0)
let admin_sq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, false, 0) };
let admin_cq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, true, 0) };
debugln!("sq_doorbell for adminq = {:p}", admin_sq_doorbell);
let admin_q = QueuePair::new(
0,
0,
queue_slots as usize,
admin_sq_doorbell,
@ -296,14 +389,26 @@ impl FromPciBus for NvmeController {
// Disable the controller
regs.CC.modify(CC::ENABLE::CLEAR);
let doorbell_shift = regs.CAP.read(CAP::DSTRD) as usize + 2;
let doorbell_shift = regs.CAP.read(CAP::DSTRD) as usize + 1;
Ok(Self {
regs: IrqSafeSpinlock::new(regs),
admin_q: OneTimeInit::new(),
ioqs: OneTimeInit::new(),
vector_table: IrqSafeSpinlock::new(vt),
drive_table: IrqSafeSpinlock::new(BTreeMap::new()),
controller_id: OneTimeInit::new(),
doorbell_shift,
})
}
}
static NVME_CONTROLLERS: IrqSafeSpinlock<Vec<&'static NvmeController>> =
IrqSafeSpinlock::new(Vec::new());
pub fn register_nvme_controller(ctrl: &'static NvmeController) {
let mut list = NVME_CONTROLLERS.lock();
let id = list.len();
list.push(ctrl);
ctrl.controller_id.init(id);
}

View File

@ -50,7 +50,7 @@ pub struct CommandDword0(u32);
pub struct SubmissionQueueEntry {
pub command: CommandDword0, // 0
pub nsid: u32, // 1
_0: [u32; 2], // 2, 3
pub io_data: [u32; 2], // 2, 3
pub metadata_pointer: u64, // 4, 5
pub data_pointer: [PhysicalRegionPage; 2], // 6, 7, 8, 9
pub command_specific: [u32; 6], // 10, 11, 12, 13, 14, 15
@ -89,12 +89,13 @@ struct Inner<'a> {
}
// TODO PageBox<T>?
#[allow(unused)]
pub struct QueuePair<'a> {
base: PhysicalAddress,
page_count: usize,
id: u32,
vector: usize,
sq_base: PhysicalAddress,
cq_base: PhysicalAddress,
@ -110,8 +111,8 @@ impl PhysicalRegionPage {
Self(0)
}
pub const fn with_addr(addr: u64) -> Self {
Self(addr)
pub const fn with_addr(address: PhysicalAddress) -> Self {
Self(address.into_raw())
}
}
@ -240,13 +241,14 @@ impl<'a, T> Queue<'a, T> {
wrapped
}
pub fn is_empty(&self) -> bool {
self.head == self.tail
}
// pub fn is_empty(&self) -> bool {
// self.head == self.tail
// }
}
impl<'a> QueuePair<'a> {
pub fn new(
id: u32,
vector: usize,
capacity: usize,
sq_doorbell: *mut u32,
@ -281,6 +283,7 @@ impl<'a> QueuePair<'a> {
Ok(Self {
completion_notify: QueueWaker::new(),
id,
vector,
base,
page_count,
@ -350,7 +353,7 @@ impl<'a> QueuePair<'a> {
match ranges.len() {
1 => {
sqe.data_pointer[0] = PhysicalRegionPage::with_addr(ranges[0].into_raw());
sqe.data_pointer[0] = PhysicalRegionPage::with_addr(ranges[0]);
sqe.data_pointer[1] = PhysicalRegionPage::null();
}
0 => {
@ -397,7 +400,7 @@ impl<'a> QueuePair<'a> {
'r: 'a,
{
assert_ne!(size_of::<R::Response>(), 0);
assert!(size_of::<R::Response>() < 0x1000);
assert!(size_of::<R::Response>() <= 0x1000);
let page = phys::alloc_page().map_err(NvmeError::MemoryError)?;
// TODO PageBox
@ -424,8 +427,8 @@ impl<'a> QueuePair<'a> {
n += 1;
let sub_queue_id = cmp.sub_queue_id();
// TODO support queues other than admin q
assert_eq!(sub_queue_id, 0);
// TODO allow several sqs receive completions through one cq?
assert_eq!(sub_queue_id, self.id);
let sub_queue_head = cmp.sub_queue_head();
let cmp = *cmp;
@ -442,7 +445,6 @@ impl<'a> QueuePair<'a> {
let command_id = cmp.command_id();
if inner.pending.remove(&command_id) {
debugln!("Insert completion: {}", command_id);
inner.completed.insert(command_id, cmp);
}
}

View File

@ -6,7 +6,7 @@ use alloc::{format, string::String};
use kernel_util::util::OneTimeInit;
use vfs::{
impls::{read_fn_node, MemoryDirectory},
CharDevice, Node, NodeFlags, NodeRef,
BlockDevice, CharDevice, Node, NodeFlags, NodeRef,
};
use crate::proc::random;
@ -37,7 +37,8 @@ pub fn root() -> &'static NodeRef {
DEVFS_ROOT.get()
}
fn _add_char_device(dev: &'static dyn CharDevice, name: String) -> Result<(), Error> {
/// Adds a character device with a custom name
pub fn add_named_char_device(dev: &'static dyn CharDevice, name: String) -> Result<(), Error> {
infoln!("Add char device: {}", name);
let node = Node::char(dev, NodeFlags::IN_MEMORY_PROPS);
@ -45,6 +46,19 @@ fn _add_char_device(dev: &'static dyn CharDevice, name: String) -> Result<(), Er
DEVFS_ROOT.get().add_child(name, node)
}
/// Adds a block device with a custom name
pub fn add_named_block_device<S: Into<String>>(
dev: &'static dyn BlockDevice,
name: S,
) -> Result<(), Error> {
let name = name.into();
infoln!("Add block device: {}", name);
let node = Node::block(dev, NodeFlags::IN_MEMORY_PROPS);
DEVFS_ROOT.get().add_child(name, node)
}
/// Adds a character device to the devfs
pub fn add_char_device(dev: &'static dyn CharDevice, kind: CharDeviceType) -> Result<(), Error> {
static TTY_COUNT: AtomicUsize = AtomicUsize::new(0);
@ -58,7 +72,7 @@ pub fn add_char_device(dev: &'static dyn CharDevice, kind: CharDeviceType) -> Re
let value = count.fetch_add(1, Ordering::AcqRel);
let name = format!("{}{}", prefix, value);
_add_char_device(dev, name)
add_named_char_device(dev, name)
}
/// Adds "pseudo"-devices to the filesystem (i.e. /dev/random)

View File

@ -25,7 +25,6 @@ fn setup_root() -> Result<NodeRef, Error> {
/// initialization has finished.
pub fn kinit() -> Result<(), Error> {
infoln!("In main");
loop {}
#[cfg(feature = "fb_console")]
{

View File

@ -19,7 +19,9 @@
strict_provenance,
slice_ptr_get,
slice_split_once,
iter_collect_into
iter_collect_into,
iter_next_chunk,
exact_size_is_empty
)]
#![allow(
clippy::new_without_default,