From d687051c48ba38ad82da0bfb6a35a9916aeecca1 Mon Sep 17 00:00:00 2001
From: Mark Poliakov <mark@alnyan.me>
Date: Thu, 30 Jan 2025 14:23:21 +0200
Subject: [PATCH] usb: implement usb-msc-bbb class driver, basic scsi commands

---
 Cargo.lock                                    |  15 +
 kernel/arch/interface/src/mem/mod.rs          |   9 +-
 kernel/arch/x86_64/src/mem/mod.rs             |   2 +
 kernel/driver/block/scsi/Cargo.toml           |  14 +
 kernel/driver/block/scsi/src/command.rs       | 102 ++++
 kernel/driver/block/scsi/src/device.rs        |  24 +
 kernel/driver/block/scsi/src/lib.rs           | 245 ++++++++
 kernel/driver/block/scsi/src/transport.rs     |  72 +++
 kernel/driver/bus/usb/Cargo.toml              |   3 +
 kernel/driver/bus/usb/src/bus.rs              |   1 +
 .../bus/usb/src/class_driver/hid_keyboard.rs  | 173 ++++++
 .../bus/usb/src/class_driver/mass_storage.rs  | 227 ++++++++
 kernel/driver/bus/usb/src/class_driver/mod.rs | 193 +------
 kernel/driver/bus/usb/src/communication.rs    |  29 +-
 kernel/driver/bus/usb/src/descriptor.rs       |  17 +-
 kernel/driver/bus/usb/src/device.rs           |  36 +-
 kernel/driver/bus/usb/src/error.rs            |  16 +-
 kernel/driver/bus/usb/src/info.rs             |  47 ++
 kernel/driver/bus/usb/src/pipe/bulk.rs        |  55 ++
 kernel/driver/bus/usb/src/pipe/control.rs     |  23 +
 kernel/driver/bus/usb/src/pipe/mod.rs         |   1 +
 kernel/driver/usb/xhci/src/context.rs         | 136 +++++
 kernel/driver/usb/xhci/src/controller.rs      | 537 ++++++++++++------
 kernel/driver/usb/xhci/src/device.rs          |  96 +++-
 kernel/driver/usb/xhci/src/lib.rs             |  98 +---
 kernel/driver/usb/xhci/src/pipe.rs            |  91 ++-
 kernel/driver/usb/xhci/src/regs.rs            |  62 +-
 kernel/driver/usb/xhci/src/ring/command.rs    |  89 ++-
 kernel/driver/usb/xhci/src/ring/event.rs      |   1 +
 kernel/driver/usb/xhci/src/ring/transfer.rs   | 241 +++++++-
 kernel/driver/usb/xhci/src/util.rs            |  78 +++
 kernel/libk/src/fs/devfs.rs                   |   9 +-
 kernel/libk/src/task/runtime/task.rs          |   8 +-
 kernel/libk/src/task/runtime/task_queue.rs    |  11 +-
 kernel/src/arch/x86/mod.rs                    |  16 +-
 35 files changed, 2269 insertions(+), 508 deletions(-)
 create mode 100644 kernel/driver/block/scsi/Cargo.toml
 create mode 100644 kernel/driver/block/scsi/src/command.rs
 create mode 100644 kernel/driver/block/scsi/src/device.rs
 create mode 100644 kernel/driver/block/scsi/src/lib.rs
 create mode 100644 kernel/driver/block/scsi/src/transport.rs
 create mode 100644 kernel/driver/bus/usb/src/class_driver/hid_keyboard.rs
 create mode 100644 kernel/driver/bus/usb/src/class_driver/mass_storage.rs
 create mode 100644 kernel/driver/bus/usb/src/pipe/bulk.rs
 create mode 100644 kernel/driver/usb/xhci/src/context.rs
 create mode 100644 kernel/driver/usb/xhci/src/util.rs

diff --git a/Cargo.lock b/Cargo.lock
index b47b6483..e1e6ed52 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2744,10 +2744,24 @@ dependencies = [
  "yggdrasil-abi",
 ]
 
+[[package]]
+name = "ygg_driver_scsi"
+version = "0.1.0"
+dependencies = [
+ "async-trait",
+ "device-api",
+ "libk",
+ "libk-mm",
+ "libk-util",
+ "log",
+ "yggdrasil-abi",
+]
+
 [[package]]
 name = "ygg_driver_usb"
 version = "0.1.0"
 dependencies = [
+ "async-trait",
  "bytemuck",
  "device-api",
  "futures-util",
@@ -2756,6 +2770,7 @@ dependencies = [
  "libk-util",
  "log",
  "ygg_driver_input",
+ "ygg_driver_scsi",
  "yggdrasil-abi",
 ]
 
diff --git a/kernel/arch/interface/src/mem/mod.rs b/kernel/arch/interface/src/mem/mod.rs
index 84f5130d..ef2138a1 100644
--- a/kernel/arch/interface/src/mem/mod.rs
+++ b/kernel/arch/interface/src/mem/mod.rs
@@ -35,6 +35,8 @@ pub struct DeviceMemoryAttributes {
 /// Describes a single device memory mapping
 #[derive(Debug)]
 pub struct RawDeviceMemoryMapping<A: KernelTableManager> {
+    /// Physical base address of the object
+    pub physical_base: u64,
     /// Virtual address of the mapped object
     pub address: usize,
     /// Base address of the mapping start
@@ -98,7 +100,8 @@ impl<A: KernelTableManager> RawDeviceMemoryMapping<A> {
         address
     }
 
-    pub fn into_raw_parts(self) -> (usize, usize, usize, usize) {
+    pub fn into_raw_parts(self) -> (u64, usize, usize, usize, usize) {
+        let physical_base = self.physical_base;
         let address = self.address;
         let base_address = self.base_address;
         let page_count = self.page_count;
@@ -106,7 +109,7 @@ impl<A: KernelTableManager> RawDeviceMemoryMapping<A> {
 
         core::mem::forget(self);
 
-        (address, base_address, page_count, page_size)
+        (physical_base, address, base_address, page_count, page_size)
     }
 
     /// # Safety
@@ -114,12 +117,14 @@ impl<A: KernelTableManager> RawDeviceMemoryMapping<A> {
     /// Preconditions: all the fields must come from a [RawDeviceMemoryMapping::into_raw_parts]
     /// call.
     pub unsafe fn from_raw_parts(
+        physical_base: u64,
         address: usize,
         base_address: usize,
         page_count: usize,
         page_size: usize,
     ) -> Self {
         Self {
+            physical_base,
             address,
             base_address,
             page_count,
diff --git a/kernel/arch/x86_64/src/mem/mod.rs b/kernel/arch/x86_64/src/mem/mod.rs
index 49cd5226..1709d82d 100644
--- a/kernel/arch/x86_64/src/mem/mod.rs
+++ b/kernel/arch/x86_64/src/mem/mod.rs
@@ -222,6 +222,7 @@ unsafe fn map_device_memory(
         let address = base_address + l2_offset;
 
         Ok(RawDeviceMemoryMapping::from_raw_parts(
+            l2_aligned.into_u64(),
             address,
             base_address,
             page_count,
@@ -233,6 +234,7 @@ unsafe fn map_device_memory(
         let address = base_address + l3_offset;
 
         Ok(RawDeviceMemoryMapping::from_raw_parts(
+            l3_aligned.into_u64(),
             address,
             base_address,
             page_count,
diff --git a/kernel/driver/block/scsi/Cargo.toml b/kernel/driver/block/scsi/Cargo.toml
new file mode 100644
index 00000000..79809743
--- /dev/null
+++ b/kernel/driver/block/scsi/Cargo.toml
@@ -0,0 +1,14 @@
+[package]
+name = "ygg_driver_scsi"
+version = "0.1.0"
+edition = "2024"
+
+[dependencies]
+yggdrasil-abi.workspace = true
+device-api.workspace = true
+libk-util.workspace = true
+libk-mm.workspace = true
+libk.workspace = true
+
+async-trait.workspace = true
+log.workspace = true
diff --git a/kernel/driver/block/scsi/src/command.rs b/kernel/driver/block/scsi/src/command.rs
new file mode 100644
index 00000000..bf3e7254
--- /dev/null
+++ b/kernel/driver/block/scsi/src/command.rs
@@ -0,0 +1,102 @@
+use libk::error::Error;
+
+use crate::device::ScsiDeviceType;
+
+pub trait ScsiCommand {
+    type Response;
+    const REQUEST_LEN: usize;
+    const RESPONSE_LEN: usize;
+
+    fn into_bytes(self) -> [u8; Self::REQUEST_LEN];
+    fn parse_response(bytes: &[u8]) -> Result<Self::Response, Error>;
+}
+
+// Add more info when needed
+pub struct ScsiInquiry;
+#[derive(Debug)]
+pub struct ScsiInquiryResponse {
+    pub device_type: ScsiDeviceType,
+}
+
+impl ScsiCommand for ScsiInquiry {
+    type Response = ScsiInquiryResponse;
+    const REQUEST_LEN: usize = 6;
+    const RESPONSE_LEN: usize = 36;
+
+    fn into_bytes(self) -> [u8; Self::REQUEST_LEN] {
+        [0x12, 0x00, 0x00, 0x00, 0x00, 0x00]
+    }
+
+    fn parse_response(bytes: &[u8]) -> Result<Self::Response, Error> {
+        if bytes.len() != 36 {
+            return Err(Error::InvalidArgument);
+        }
+        let device_type = ScsiDeviceType::try_from(bytes[0] & 0x1F).unwrap_or_default();
+        Ok(ScsiInquiryResponse { device_type })
+    }
+}
+
+pub struct ScsiTestUnitReady;
+#[derive(Debug)]
+pub struct ScsiTestUnitReadyResponse;
+
+impl ScsiCommand for ScsiTestUnitReady {
+    type Response = ScsiTestUnitReadyResponse;
+    const RESPONSE_LEN: usize = 0;
+    const REQUEST_LEN: usize = 6;
+
+    fn into_bytes(self) -> [u8; Self::REQUEST_LEN] {
+        [0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    }
+
+    fn parse_response(_bytes: &[u8]) -> Result<Self::Response, Error> {
+        Ok(ScsiTestUnitReadyResponse)
+    }
+}
+
+pub struct ScsiRequestSense;
+#[derive(Debug)]
+pub struct ScsiRequestSenseResponse;
+
+impl ScsiCommand for ScsiRequestSense {
+    type Response = ScsiRequestSenseResponse;
+    const RESPONSE_LEN: usize = 0;
+    const REQUEST_LEN: usize = 6;
+
+    fn into_bytes(self) -> [u8; Self::REQUEST_LEN] {
+        [0x03, 0x00, 0x00, 0x00, 0x00, 0x00]
+    }
+
+    fn parse_response(_bytes: &[u8]) -> Result<Self::Response, Error> {
+        Ok(ScsiRequestSenseResponse)
+    }
+}
+
+pub struct ScsiReadCapacity;
+#[derive(Debug)]
+pub struct ScsiReadCapacityResponse {
+    pub block_size: u32,
+    pub block_count: u32,
+}
+
+impl ScsiCommand for ScsiReadCapacity {
+    type Response = ScsiReadCapacityResponse;
+    const REQUEST_LEN: usize = 10;
+    const RESPONSE_LEN: usize = 8;
+
+    fn into_bytes(self) -> [u8; Self::REQUEST_LEN] {
+        [0x25, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00]
+    }
+
+    fn parse_response(bytes: &[u8]) -> Result<Self::Response, Error> {
+        if bytes.len() != 8 {
+            return Err(Error::InvalidArgument);
+        }
+        let block_count = u32::from_be_bytes(bytes[0..4].try_into().unwrap());
+        let block_size = u32::from_be_bytes(bytes[4..8].try_into().unwrap());
+        Ok(ScsiReadCapacityResponse {
+            block_size,
+            block_count,
+        })
+    }
+}
diff --git a/kernel/driver/block/scsi/src/device.rs b/kernel/driver/block/scsi/src/device.rs
new file mode 100644
index 00000000..bd1e5690
--- /dev/null
+++ b/kernel/driver/block/scsi/src/device.rs
@@ -0,0 +1,24 @@
+use yggdrasil_abi::primitive_enum;
+
+primitive_enum! {
+    #[derive(Default)]
+    pub enum ScsiDeviceType: u8 {
+        DirectAccessBlock = 0x00,
+        SequentialAccess = 0x01,
+        Printer = 0x02,
+        Processor = 0x03,
+        WriteOnce = 0x04,
+        CdDvd = 0x05,
+        OpticalMemory = 0x07,
+        MediumChanger = 0x08,
+        StorageArrayController = 0x0C,
+        EnclosureServices = 0x0D,
+        SimplifiedDirectAccess = 0x0E,
+        OpticalCard = 0x0F,
+        BridgeController = 0x10,
+        ObjectBasedStorage = 0x11,
+        AutomationDriveInterface = 0x12,
+        #[default]
+        Other = 0x1F,
+    }
+}
diff --git a/kernel/driver/block/scsi/src/lib.rs b/kernel/driver/block/scsi/src/lib.rs
new file mode 100644
index 00000000..6e46a048
--- /dev/null
+++ b/kernel/driver/block/scsi/src/lib.rs
@@ -0,0 +1,245 @@
+#![feature(generic_const_exprs, maybe_uninit_slice)]
+#![allow(incomplete_features)]
+#![no_std]
+
+use core::{mem::MaybeUninit, time::Duration};
+
+use alloc::{
+    boxed::Box, collections::btree_map::BTreeMap, format, string::String, sync::Arc, vec::Vec,
+};
+use async_trait::async_trait;
+use command::{ScsiInquiry, ScsiReadCapacity, ScsiRequestSense, ScsiTestUnitReady};
+use device_api::device::Device;
+use libk::{
+    device::{block::BlockDevice, manager::probe_partitions},
+    error::Error,
+    fs::devfs,
+    task::runtime,
+};
+use libk_mm::{address::PhysicalAddress, table::MapAttributes, PageProvider, PageSlice};
+use libk_util::{
+    sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock},
+    OneTimeInit,
+};
+use transport::{ScsiTransport, ScsiTransportWrapper};
+use yggdrasil_abi::io::FileMode;
+
+extern crate alloc;
+
+pub mod command;
+pub mod device;
+pub mod transport;
+
+// TODO SCSI detach
+pub struct ScsiDevice {
+    transport: IrqSafeSpinlock<ScsiTransportWrapper>,
+    lba_count: u64,
+    lba_size: usize,
+    index: OneTimeInit<u32>,
+    names: IrqSafeRwLock<Vec<String>>,
+}
+
+impl ScsiDevice {
+    // TODO support LUNs other than 0
+    pub async fn setup<T: ScsiTransport + 'static>(transport: T) -> Result<Arc<Self>, Error> {
+        let mut transport = ScsiTransportWrapper::new(transport);
+
+        transport.perform_command(0, ScsiInquiry).await?;
+
+        let mut attempts = 5;
+        let mut timeout = 100;
+        while attempts > 0 {
+            // TEST UNIT READY (6)
+            if transport
+                .perform_command(0, ScsiTestUnitReady)
+                .await
+                .is_ok()
+            {
+                break;
+            }
+
+            // If not, send a REQUEST SENSE (6)
+            if transport.perform_command(0, ScsiRequestSense).await.is_ok() {
+                break;
+            }
+
+            runtime::sleep(Duration::from_millis(timeout)).await;
+            timeout *= 2;
+            attempts -= 1;
+        }
+        if attempts == 0 {
+            log::error!("scsi: unit not ready");
+            return Err(Error::DoesNotExist);
+        }
+
+        let capacity_info = transport.perform_command(0, ScsiReadCapacity).await?;
+        log::info!(
+            "scsi: lba_size={}, lba_count={}",
+            capacity_info.block_size,
+            capacity_info.block_count
+        );
+
+        Ok(Arc::new(Self {
+            transport: IrqSafeSpinlock::new(transport),
+            lba_count: capacity_info.block_count.into(),
+            lba_size: capacity_info.block_size as usize,
+            index: OneTimeInit::new(),
+            names: IrqSafeRwLock::new(Vec::new()),
+        }))
+    }
+
+    pub fn detach(&self) {
+        if let Some(&index) = self.index.try_get() {
+            detach(index);
+        }
+    }
+}
+
+#[async_trait]
+impl BlockDevice for ScsiDevice {
+    async fn read_aligned(
+        &self,
+        position: u64,
+        buffer: &mut PageSlice<MaybeUninit<u8>>,
+    ) -> Result<(), Error> {
+        if buffer.len() % self.lba_size != 0 {
+            return Err(Error::InvalidArgument);
+        }
+        let lba_start = position / self.lba_size as u64;
+        let lba_count = buffer.len() / self.lba_size;
+        if lba_start.saturating_add(lba_count as u64) >= self.lba_count {
+            return Err(Error::InvalidArgument);
+        }
+        log::info!("scsi: read lba={lba_start}, count={lba_count}");
+        let mut transport = self.transport.lock();
+        for i in 0..lba_count {
+            let lba = lba_start + i as u64;
+            let offset = self.lba_size * i;
+            let slice = unsafe {
+                MaybeUninit::slice_assume_init_mut(&mut buffer[offset..offset + self.lba_size])
+            };
+
+            let len = transport.read(0, lba, slice).await?;
+            if len != self.lba_size {
+                log::warn!("scsi: truncated read received at lba {lba}");
+                return Err(Error::InvalidOperation);
+            }
+        }
+        Ok(())
+    }
+
+    async fn write_aligned(&self, _position: u64, _buffer: &PageSlice<u8>) -> Result<(), Error> {
+        // TODO AtaWriteDmaEx
+        Err(Error::NotImplemented)
+    }
+
+    fn block_size(&self) -> usize {
+        self.lba_size
+    }
+
+    fn block_count(&self) -> u64 {
+        self.lba_count
+    }
+
+    fn max_blocks_per_request(&self) -> usize {
+        8
+    }
+}
+
+impl PageProvider for ScsiDevice {
+    fn get_page(&self, _offset: u64) -> Result<PhysicalAddress, Error> {
+        Err(Error::NotImplemented)
+    }
+
+    fn release_page(&self, _offset: u64, _phys: PhysicalAddress) -> Result<(), Error> {
+        Err(Error::NotImplemented)
+    }
+
+    fn clone_page(
+        &self,
+        _offset: u64,
+        _src_phys: PhysicalAddress,
+        _src_attrs: MapAttributes,
+    ) -> Result<PhysicalAddress, Error> {
+        Err(Error::NotImplemented)
+    }
+}
+
+impl Device for ScsiDevice {
+    fn display_name(&self) -> &str {
+        "SCSI Storage Device"
+    }
+}
+
+impl Drop for ScsiDevice {
+    fn drop(&mut self) {
+        if let Some(index) = self.index.try_get() {
+            log::info!("scsi{index} dropped");
+        }
+    }
+}
+
+// TODO this is crap
+static SCSI_DEVICES: IrqSafeSpinlock<BTreeMap<u32, Arc<ScsiDevice>>> =
+    IrqSafeSpinlock::new(BTreeMap::new());
+static SCSI_BITMAP: IrqSafeSpinlock<u32> = IrqSafeSpinlock::new(0);
+
+pub fn attach(device: Arc<ScsiDevice>) -> Result<(), Error> {
+    let index = {
+        let mut bitmap = SCSI_BITMAP.lock();
+        let index = (0..8)
+            .position(|p| *bitmap & (1 << p) == 0)
+            .ok_or(Error::InvalidOperation)
+            .inspect_err(|_| log::warn!("Cannot attach SCSI device: too many of them"))?
+            as u32;
+        let mut devices = SCSI_DEVICES.lock();
+        *bitmap |= 1 << index;
+        assert!(!devices.contains_key(&index));
+        devices.insert(index, device.clone());
+        index
+    };
+
+    let name = format!("scsi{index}");
+    device.index.init(index);
+    device.names.write().push(name.clone());
+    devfs::add_named_block_device(device.clone(), name.clone(), FileMode::new(0o600)).ok();
+    log::info!("{name} attached");
+
+    // TODO this code is repeated everywhere
+    runtime::spawn(async move {
+        let name = name;
+        probe_partitions(device.clone(), |index, partition| {
+            let partition_name = format!("{name}p{}", index + 1);
+            log::info!("{name}: partition {partition_name}");
+            device.names.write().push(partition_name.clone());
+            devfs::add_named_block_device(
+                Arc::new(partition),
+                partition_name,
+                FileMode::new(0o600),
+            )
+            .ok();
+        })
+        .await
+        .ok();
+    })
+    .ok();
+
+    Ok(())
+}
+
+pub fn detach(index: u32) {
+    let mut devices = SCSI_DEVICES.lock();
+    let mut bitmap = SCSI_BITMAP.lock();
+
+    if let Some(device) = devices.remove(&index) {
+        {
+            let names = device.names.read();
+            for name in names.iter() {
+                devfs::remove_node(name).ok();
+            }
+        }
+
+        *bitmap &= !(1 << index);
+        log::info!("scsi{index} detached");
+    }
+}
diff --git a/kernel/driver/block/scsi/src/transport.rs b/kernel/driver/block/scsi/src/transport.rs
new file mode 100644
index 00000000..887961bc
--- /dev/null
+++ b/kernel/driver/block/scsi/src/transport.rs
@@ -0,0 +1,72 @@
+use alloc::boxed::Box;
+use async_trait::async_trait;
+use libk::error::Error;
+
+use crate::command::ScsiCommand;
+
+#[async_trait]
+pub trait ScsiTransport: Send + Sync {
+    /// Perform a no-data request
+    async fn perform_request_raw(
+        &mut self,
+        lun: u8,
+        request_data: &[u8],
+        response_buffer: &mut [u8],
+    ) -> Result<usize, Error>;
+}
+
+pub struct ScsiTransportWrapper {
+    inner: Box<dyn ScsiTransport>,
+}
+
+impl ScsiTransportWrapper {
+    pub fn new<T: ScsiTransport + 'static>(inner: T) -> Self {
+        Self {
+            inner: Box::new(inner),
+        }
+    }
+
+    pub async fn read(&mut self, lun: u8, lba: u64, buffer: &mut [u8]) -> Result<usize, Error> {
+        if lba >= u32::MAX as u64 || buffer.len() > u16::MAX as usize {
+            return Err(Error::InvalidArgument);
+        }
+        let lba_bytes = (lba as u32).to_be_bytes();
+        // Issue a READ (10) command
+        let request_buffer = [
+            0x28,
+            0x00,
+            lba_bytes[0],
+            lba_bytes[1],
+            lba_bytes[2],
+            lba_bytes[3],
+            0x00,
+            0x00,
+            0x01,
+            0x00,
+        ];
+
+        self.inner
+            .perform_request_raw(lun, &request_buffer, buffer)
+            .await
+    }
+
+    pub async fn perform_command<R: ScsiCommand>(
+        &mut self,
+        lun: u8,
+        request: R,
+    ) -> Result<R::Response, Error>
+    where
+        [u8; R::RESPONSE_LEN]: Sized,
+        [u8; R::REQUEST_LEN]: Sized,
+    {
+        let mut response_buffer = [0; R::RESPONSE_LEN];
+        let request_buffer = request.into_bytes();
+
+        let response_len = self
+            .inner
+            .perform_request_raw(lun, &request_buffer, &mut response_buffer)
+            .await?;
+
+        R::parse_response(&response_buffer[..response_len])
+    }
+}
diff --git a/kernel/driver/bus/usb/Cargo.toml b/kernel/driver/bus/usb/Cargo.toml
index ab7ea702..14ad15ab 100644
--- a/kernel/driver/bus/usb/Cargo.toml
+++ b/kernel/driver/bus/usb/Cargo.toml
@@ -12,7 +12,10 @@ libk-mm.workspace = true
 libk.workspace = true
 
 ygg_driver_input = { path = "../../input" }
+# For mass storage
+ygg_driver_scsi = { path = "../../block/scsi" }
 
 log.workspace = true
 bytemuck.workspace = true
 futures-util.workspace = true
+async-trait.workspace = true
diff --git a/kernel/driver/bus/usb/src/bus.rs b/kernel/driver/bus/usb/src/bus.rs
index d571401a..721a54a1 100644
--- a/kernel/driver/bus/usb/src/bus.rs
+++ b/kernel/driver/bus/usb/src/bus.rs
@@ -24,6 +24,7 @@ impl UsbBusManager {
     }
 
     pub fn register_device(device: Arc<UsbDeviceAccess>) {
+        log::info!("usb: register device {}", device.bus_address());
         BUS_MANAGER
             .devices
             .write()
diff --git a/kernel/driver/bus/usb/src/class_driver/hid_keyboard.rs b/kernel/driver/bus/usb/src/class_driver/hid_keyboard.rs
new file mode 100644
index 00000000..2dbe33d1
--- /dev/null
+++ b/kernel/driver/bus/usb/src/class_driver/hid_keyboard.rs
@@ -0,0 +1,173 @@
+
+use core::mem::MaybeUninit;
+
+use alloc::sync::Arc;
+use futures_util::{future::BoxFuture, FutureExt};
+use libk_mm::PageBox;
+use yggdrasil_abi::io::{KeyboardKey, KeyboardKeyEvent};
+
+use crate::{device::UsbDeviceAccess, error::UsbError, info::UsbDeviceClass};
+
+use super::{UsbClassInfo, UsbDriver};
+
+pub struct UsbHidKeyboardDriver;
+
+const MODIFIER_MAP: &[KeyboardKey] = &[
+    KeyboardKey::LControl,
+    KeyboardKey::LShift,
+    KeyboardKey::LAlt,
+    KeyboardKey::Unknown,
+    KeyboardKey::RControl,
+    KeyboardKey::RShift,
+    KeyboardKey::RAlt,
+    KeyboardKey::Unknown,
+];
+
+#[derive(Default)]
+struct KeyboardState {
+    state: [u64; 4],
+    mods: u8,
+}
+
+impl KeyboardState {
+    pub fn new() -> Self {
+        Self::default()
+    }
+
+    pub fn translate_key(k: u8) -> KeyboardKey {
+        match k {
+            4..=29 => KeyboardKey::Char(k - 4 + b'a'),
+            30..=38 => KeyboardKey::Char(k - 30 + b'1'),
+            39 => KeyboardKey::Char(b'0'),
+
+            40 => KeyboardKey::Enter,
+            41 => KeyboardKey::Escape,
+            42 => KeyboardKey::Backspace,
+            43 => KeyboardKey::Tab,
+            44 => KeyboardKey::Char(b' '),
+            45 => KeyboardKey::Char(b'-'),
+            46 => KeyboardKey::Char(b'='),
+            47 => KeyboardKey::Char(b'['),
+            48 => KeyboardKey::Char(b']'),
+            49 => KeyboardKey::Char(b'\\'),
+            51 => KeyboardKey::Char(b';'),
+            52 => KeyboardKey::Char(b'\''),
+            53 => KeyboardKey::Char(b'`'),
+            54 => KeyboardKey::Char(b','),
+            55 => KeyboardKey::Char(b'.'),
+            56 => KeyboardKey::Char(b'/'),
+
+            58..=69 => KeyboardKey::F(k - 58),
+
+            _ => {
+                log::debug!("Unknown key: {}", k);
+                KeyboardKey::Unknown
+            }
+        }
+    }
+
+    pub fn retain_modifiers(
+        &mut self,
+        m: u8,
+        events: &mut [MaybeUninit<KeyboardKeyEvent>],
+    ) -> usize {
+        let mut count = 0;
+        let released = self.mods & !m;
+        for (i, modifier) in MODIFIER_MAP.iter().enumerate().take(8) {
+            if released & (1 << i) != 0 {
+                events[count].write(KeyboardKeyEvent::Released(*modifier));
+                count += 1;
+            }
+        }
+        self.mods &= m;
+        count
+    }
+
+    pub fn press_modifiers(
+        &mut self,
+        m: u8,
+        events: &mut [MaybeUninit<KeyboardKeyEvent>],
+    ) -> usize {
+        let mut count = 0;
+        let pressed = m & !self.mods;
+        for (i, modifier) in MODIFIER_MAP.iter().enumerate().take(8) {
+            if pressed & (1 << i) != 0 {
+                events[count].write(KeyboardKeyEvent::Pressed(*modifier));
+                count += 1;
+            }
+        }
+        self.mods = m;
+        count
+    }
+
+    pub fn retain(&mut self, keys: &[u8], events: &mut [MaybeUninit<KeyboardKeyEvent>]) -> usize {
+        let mut count = 0;
+        for i in 1..256 {
+            if self.state[i / 64] & (1 << (i % 64)) != 0 && !keys.contains(&(i as u8)) {
+                events[count].write(KeyboardKeyEvent::Released(Self::translate_key(i as u8)));
+                self.state[i / 64] &= !(1 << (i % 64));
+                count += 1;
+            }
+        }
+        count
+    }
+
+    pub fn press(&mut self, keys: &[u8], events: &mut [MaybeUninit<KeyboardKeyEvent>]) -> usize {
+        let mut count = 0;
+        for &k in keys {
+            let index = (k as usize) / 64;
+            if self.state[index] & (1 << (k % 64)) == 0 {
+                self.state[index] |= 1 << (k % 64);
+                events[count].write(KeyboardKeyEvent::Pressed(Self::translate_key(k)));
+                count += 1;
+            }
+        }
+        count
+    }
+}
+
+impl UsbDriver for UsbHidKeyboardDriver {
+    fn run(
+        self: Arc<Self>,
+        device: Arc<UsbDeviceAccess>,
+    ) -> BoxFuture<'static, Result<(), UsbError>> {
+        async move {
+            // TODO not sure whether to use boot protocol (easy) or GetReport
+            let config = device.select_configuration(|_| true).await?.unwrap();
+            assert_eq!(config.endpoints.len(), 1);
+
+            let pipe = device.open_interrupt_in_pipe(1).await?;
+
+            let mut buffer = PageBox::new_slice(0, 8).map_err(UsbError::MemoryError)?;
+            let mut state = KeyboardState::new();
+            let mut events = [MaybeUninit::uninit(); 16];
+
+            loop {
+                let mut event_count = 0;
+
+                let data = pipe.read(&mut buffer).await?;
+
+                event_count += state.retain_modifiers(data[0], &mut events);
+                event_count += state.press_modifiers(data[0], &mut events[event_count..]);
+                event_count += state.retain(&data[2..], &mut events[event_count..]);
+                event_count += state.press(&data[2..], &mut events[event_count..]);
+
+                let events = unsafe { MaybeUninit::slice_assume_init_ref(&events[..event_count]) };
+
+                for &event in events {
+                    log::trace!("Generic Keyboard: {:?}", event);
+                    ygg_driver_input::send_event(event);
+                }
+            }
+        }
+        .boxed()
+    }
+
+    fn name(&self) -> &'static str {
+        "USB HID Keyboard"
+    }
+
+    fn probe(&self, class: &UsbClassInfo, _device: &UsbDeviceAccess) -> bool {
+        class.class == UsbDeviceClass::Hid && class.subclass == 0x01
+    }
+}
diff --git a/kernel/driver/bus/usb/src/class_driver/mass_storage.rs b/kernel/driver/bus/usb/src/class_driver/mass_storage.rs
new file mode 100644
index 00000000..814db8e0
--- /dev/null
+++ b/kernel/driver/bus/usb/src/class_driver/mass_storage.rs
@@ -0,0 +1,227 @@
+use alloc::{boxed::Box, sync::Arc};
+use async_trait::async_trait;
+use bytemuck::{Pod, Zeroable};
+use futures_util::{future::BoxFuture, FutureExt};
+use libk::error::Error;
+use libk_mm::PageBox;
+use ygg_driver_scsi::{transport::ScsiTransport, ScsiDevice};
+
+use crate::{
+    device::{UsbDeviceAccess, UsbDeviceDetachHandler},
+    error::UsbError,
+    info::UsbDeviceClass,
+    pipe::{
+        bulk::{UsbBulkInPipeAccess, UsbBulkOutPipeAccess},
+        control::UsbClassSpecificRequest,
+    },
+};
+
+use super::{UsbClassInfo, UsbDriver};
+
+pub struct UsbMassStorageDriverBulkOnly;
+
+#[derive(Debug, Clone, Copy, Zeroable, Pod)]
+#[repr(C)]
+struct Cbw {
+    signature: u32,       // 0x00
+    tag: u32,             // 0x04
+    transfer_length: u32, // 0x08
+    flags: u8,            // 0x0C
+    lun: u8,              // 0x0D
+    cb_length: u8,        // 0x0E
+    cb_data: [u8; 16],    // 0x0F
+    // Not sent
+    _0: u8,
+}
+
+#[derive(Debug, Clone, Copy, Zeroable, Pod)]
+#[repr(C)]
+struct Csw {
+    signature: u32,
+    tag: u32,
+    data_residue: u32,
+    status: u8,
+    _0: [u8; 3],
+}
+
+struct Bbb {
+    #[allow(unused)]
+    device: Arc<UsbDeviceAccess>,
+    in_pipe: UsbBulkInPipeAccess,
+    out_pipe: UsbBulkOutPipeAccess,
+    buffer: PageBox<[u8]>,
+    last_tag: u32,
+}
+
+struct DetachHandler(Arc<ScsiDevice>);
+
+impl Bbb {
+    pub fn new(
+        device: Arc<UsbDeviceAccess>,
+        in_pipe: UsbBulkInPipeAccess,
+        out_pipe: UsbBulkOutPipeAccess,
+    ) -> Result<Self, UsbError> {
+        let buffer = PageBox::new_slice(0, 4096).map_err(UsbError::MemoryError)?;
+        Ok(Self {
+            device,
+            in_pipe,
+            out_pipe,
+            buffer,
+            last_tag: 0,
+        })
+    }
+}
+
+impl Bbb {
+    async fn send_cbw(
+        &mut self,
+        lun: u8,
+        host_to_dev: bool,
+        command: &[u8],
+        response_len: usize,
+    ) -> Result<u32, Error> {
+        self.last_tag = self.last_tag.wrapping_add(1);
+
+        self.buffer[..size_of::<Cbw>()].fill(0);
+
+        let cbw = bytemuck::from_bytes_mut::<Cbw>(&mut self.buffer[..size_of::<Cbw>()]);
+        let tag = self.last_tag;
+
+        cbw.signature = 0x43425355;
+        cbw.tag = tag;
+        cbw.transfer_length = response_len as u32;
+        if !host_to_dev {
+            cbw.flags = 1 << 7;
+        }
+        cbw.lun = lun;
+        cbw.cb_length = command.len() as u8;
+        cbw.cb_data[..command.len()].copy_from_slice(command);
+
+        self.out_pipe
+            .write(self.buffer.as_slice().subslice(..31))
+            .await?;
+
+        Ok(tag)
+    }
+
+    async fn read_csw(&mut self, tag: u32) -> Result<(), Error> {
+        self.in_pipe
+            .read(self.buffer.as_slice_mut().subslice_mut(..13))
+            .await?;
+
+        let csw = bytemuck::from_bytes::<Csw>(&self.buffer[..size_of::<Csw>()]);
+        if csw.signature != 0x53425355 {
+            log::warn!("msc: invalid csw signature");
+            return Err(Error::InvalidArgument);
+        }
+        if csw.tag != tag {
+            let csw_tag = csw.tag;
+            log::warn!("msc: invalid csw tag (got {}, expected {tag})", csw_tag);
+            return Err(Error::InvalidArgument);
+        }
+        if csw.status != 0x00 {
+            log::warn!("msc: csw error status {:#02x}", csw.status);
+            return Err(Error::InvalidArgument);
+        }
+        Ok(())
+    }
+
+    async fn read_response_data(&mut self, buffer: &mut [u8]) -> Result<usize, Error> {
+        // TODO limit by max_packet_size
+        let bytes = self
+            .in_pipe
+            .read(self.buffer.as_slice_mut().subslice_mut(..buffer.len()))
+            .await?;
+        buffer[..bytes.len()].copy_from_slice(bytes);
+        Ok(bytes.len())
+    }
+}
+
+#[async_trait]
+impl ScsiTransport for Bbb {
+    async fn perform_request_raw(
+        &mut self,
+        lun: u8,
+        request_data: &[u8],
+        response_buffer: &mut [u8],
+    ) -> Result<usize, Error> {
+        if request_data.len() > 16 || response_buffer.len() > self.buffer.len() {
+            return Err(Error::InvalidArgument);
+        }
+
+        let tag = self
+            .send_cbw(lun, false, request_data, response_buffer.len())
+            .await?;
+        let response_len = if response_buffer.is_empty() {
+            0
+        } else {
+            self.read_response_data(response_buffer).await?
+        };
+        self.read_csw(tag).await?;
+        Ok(response_len)
+    }
+}
+
+impl UsbDeviceDetachHandler for DetachHandler {
+    fn handle_device_detach(&self) {
+        log::info!("Mass storage detached");
+        self.0.detach();
+    }
+}
+
+#[derive(Debug, Pod, Zeroable, Clone, Copy)]
+#[repr(C)]
+pub struct BulkOnlyMassStorageReset;
+
+impl UsbClassSpecificRequest for BulkOnlyMassStorageReset {
+    const BM_REQUEST_TYPE: u8 = 0b00100001;
+    const B_REQUEST: u8 = 0b11111111;
+}
+
+impl UsbDriver for UsbMassStorageDriverBulkOnly {
+    fn run(
+        self: Arc<Self>,
+        device: Arc<UsbDeviceAccess>,
+    ) -> BoxFuture<'static, Result<(), UsbError>> {
+        async move {
+            // TODO filter to only accept BBB config
+            let config = device.select_configuration(|_| true).await?.unwrap();
+            // Bulk-in, bulk-out
+            assert_eq!(config.endpoints.len(), 2);
+            // TODO those indices may be different
+            let control_pipe = device.control_pipe();
+            let in_pipe = device.open_bulk_in_pipe(1).await?;
+            let out_pipe = device.open_bulk_out_pipe(2).await?;
+
+            // Perform a Bulk-Only Mass Storage Reset
+            // TODO interface id?
+            control_pipe
+                .class_specific_request::<BulkOnlyMassStorageReset>(0, 0)
+                .await?;
+
+            // TODO get max LUN
+
+            let bbb = Bbb::new(device.clone(), in_pipe, out_pipe)?;
+            let scsi = ScsiDevice::setup(bbb)
+                .await
+                .inspect_err(|error| log::error!("msc: scsi error {error:?}"))
+                .map_err(|_| UsbError::DriverError)?;
+            let detach = DetachHandler(scsi.clone());
+            device.set_detach_handler(Arc::new(detach));
+
+            ygg_driver_scsi::attach(scsi).ok();
+
+            Ok(())
+        }
+        .boxed()
+    }
+
+    fn name(&self) -> &'static str {
+        "USB Mass Storage"
+    }
+
+    fn probe(&self, class: &UsbClassInfo, _device: &UsbDeviceAccess) -> bool {
+        // TODO support other protocols
+        class.class == UsbDeviceClass::MassStorage && class.interface_protocol_number == 0x50
+    }
+}
diff --git a/kernel/driver/bus/usb/src/class_driver/mod.rs b/kernel/driver/bus/usb/src/class_driver/mod.rs
index f85a0526..8c7a23a2 100644
--- a/kernel/driver/bus/usb/src/class_driver/mod.rs
+++ b/kernel/driver/bus/usb/src/class_driver/mod.rs
@@ -9,10 +9,16 @@ use crate::{
     info::{UsbDeviceClass, UsbDeviceProtocol},
 };
 
+pub mod hid_keyboard;
+pub mod mass_storage;
+
+#[derive(Debug)]
 pub struct UsbClassInfo {
     pub class: UsbDeviceClass,
     pub subclass: u8,
     pub protocol: UsbDeviceProtocol,
+    pub device_protocol_number: u8,
+    pub interface_protocol_number: u8,
 }
 
 pub trait UsbDriver: Send + Sync {
@@ -55,6 +61,8 @@ async fn extract_class_info(device: &UsbDeviceAccess) -> Result<Option<UsbClassI
             class,
             subclass,
             protocol,
+            interface_protocol_number: if_info.interface_protocol_number,
+            device_protocol_number: device_info.device_protocol_number,
         }))
     } else {
         Ok(None)
@@ -104,191 +112,8 @@ pub fn register_driver(driver: Arc<dyn UsbDriver + 'static>) {
 
 pub fn register_default_class_drivers() {
     register_driver(Arc::new(hid_keyboard::UsbHidKeyboardDriver));
+    register_driver(Arc::new(mass_storage::UsbMassStorageDriverBulkOnly));
 }
 
 static USB_DEVICE_DRIVERS: IrqSafeRwLock<Vec<Arc<dyn UsbDriver + 'static>>> =
     IrqSafeRwLock::new(Vec::new());
-
-pub mod hid_keyboard {
-    use core::mem::MaybeUninit;
-
-    use alloc::sync::Arc;
-    use futures_util::{future::BoxFuture, FutureExt};
-    use libk_mm::PageBox;
-    use yggdrasil_abi::io::{KeyboardKey, KeyboardKeyEvent};
-
-    use crate::{device::UsbDeviceAccess, error::UsbError, info::UsbDeviceClass};
-
-    use super::{UsbClassInfo, UsbDriver};
-
-    pub struct UsbHidKeyboardDriver;
-
-    const MODIFIER_MAP: &[KeyboardKey] = &[
-        KeyboardKey::LControl,
-        KeyboardKey::LShift,
-        KeyboardKey::LAlt,
-        KeyboardKey::Unknown,
-        KeyboardKey::RControl,
-        KeyboardKey::RShift,
-        KeyboardKey::RAlt,
-        KeyboardKey::Unknown,
-    ];
-
-    #[derive(Default)]
-    struct KeyboardState {
-        state: [u64; 4],
-        mods: u8,
-    }
-
-    impl KeyboardState {
-        pub fn new() -> Self {
-            Self::default()
-        }
-
-        pub fn translate_key(k: u8) -> KeyboardKey {
-            match k {
-                4..=29 => KeyboardKey::Char(k - 4 + b'a'),
-                30..=38 => KeyboardKey::Char(k - 30 + b'1'),
-                39 => KeyboardKey::Char(b'0'),
-
-                40 => KeyboardKey::Enter,
-                41 => KeyboardKey::Escape,
-                42 => KeyboardKey::Backspace,
-                43 => KeyboardKey::Tab,
-                44 => KeyboardKey::Char(b' '),
-                45 => KeyboardKey::Char(b'-'),
-                46 => KeyboardKey::Char(b'='),
-                47 => KeyboardKey::Char(b'['),
-                48 => KeyboardKey::Char(b']'),
-                49 => KeyboardKey::Char(b'\\'),
-                51 => KeyboardKey::Char(b';'),
-                52 => KeyboardKey::Char(b'\''),
-                53 => KeyboardKey::Char(b'`'),
-                54 => KeyboardKey::Char(b','),
-                55 => KeyboardKey::Char(b'.'),
-                56 => KeyboardKey::Char(b'/'),
-
-                58..=69 => KeyboardKey::F(k - 58),
-
-                _ => {
-                    log::debug!("Unknown key: {}", k);
-                    KeyboardKey::Unknown
-                }
-            }
-        }
-
-        pub fn retain_modifiers(
-            &mut self,
-            m: u8,
-            events: &mut [MaybeUninit<KeyboardKeyEvent>],
-        ) -> usize {
-            let mut count = 0;
-            let released = self.mods & !m;
-            for (i, modifier) in MODIFIER_MAP.iter().enumerate().take(8) {
-                if released & (1 << i) != 0 {
-                    events[count].write(KeyboardKeyEvent::Released(*modifier));
-                    count += 1;
-                }
-            }
-            self.mods &= m;
-            count
-        }
-
-        pub fn press_modifiers(
-            &mut self,
-            m: u8,
-            events: &mut [MaybeUninit<KeyboardKeyEvent>],
-        ) -> usize {
-            let mut count = 0;
-            let pressed = m & !self.mods;
-            for (i, modifier) in MODIFIER_MAP.iter().enumerate().take(8) {
-                if pressed & (1 << i) != 0 {
-                    events[count].write(KeyboardKeyEvent::Pressed(*modifier));
-                    count += 1;
-                }
-            }
-            self.mods = m;
-            count
-        }
-
-        pub fn retain(
-            &mut self,
-            keys: &[u8],
-            events: &mut [MaybeUninit<KeyboardKeyEvent>],
-        ) -> usize {
-            let mut count = 0;
-            for i in 1..256 {
-                if self.state[i / 64] & (1 << (i % 64)) != 0 && !keys.contains(&(i as u8)) {
-                    events[count].write(KeyboardKeyEvent::Released(Self::translate_key(i as u8)));
-                    self.state[i / 64] &= !(1 << (i % 64));
-                    count += 1;
-                }
-            }
-            count
-        }
-
-        pub fn press(
-            &mut self,
-            keys: &[u8],
-            events: &mut [MaybeUninit<KeyboardKeyEvent>],
-        ) -> usize {
-            let mut count = 0;
-            for &k in keys {
-                let index = (k as usize) / 64;
-                if self.state[index] & (1 << (k % 64)) == 0 {
-                    self.state[index] |= 1 << (k % 64);
-                    events[count].write(KeyboardKeyEvent::Pressed(Self::translate_key(k)));
-                    count += 1;
-                }
-            }
-            count
-        }
-    }
-
-    impl UsbDriver for UsbHidKeyboardDriver {
-        fn run(
-            self: Arc<Self>,
-            device: Arc<UsbDeviceAccess>,
-        ) -> BoxFuture<'static, Result<(), UsbError>> {
-            async move {
-                // TODO not sure whether to use boot protocol (easy) or GetReport
-                let config = device.select_configuration(|_| true).await?.unwrap();
-                assert_eq!(config.endpoints.len(), 1);
-
-                let pipe = device.open_interrupt_in_pipe(1).await?;
-
-                let mut buffer = PageBox::new_slice(0, 8).map_err(UsbError::MemoryError)?;
-                let mut state = KeyboardState::new();
-                let mut events = [MaybeUninit::uninit(); 16];
-
-                loop {
-                    let mut event_count = 0;
-
-                    let data = pipe.read(&mut buffer).await?;
-
-                    event_count += state.retain_modifiers(data[0], &mut events);
-                    event_count += state.press_modifiers(data[0], &mut events[event_count..]);
-                    event_count += state.retain(&data[2..], &mut events[event_count..]);
-                    event_count += state.press(&data[2..], &mut events[event_count..]);
-
-                    let events =
-                        unsafe { MaybeUninit::slice_assume_init_ref(&events[..event_count]) };
-
-                    for &event in events {
-                        log::trace!("Generic Keyboard: {:?}", event);
-                        ygg_driver_input::send_event(event);
-                    }
-                }
-            }
-            .boxed()
-        }
-
-        fn name(&self) -> &'static str {
-            "USB HID Keyboard"
-        }
-
-        fn probe(&self, class: &UsbClassInfo, _device: &UsbDeviceAccess) -> bool {
-            class.class == UsbDeviceClass::Hid && class.subclass == 0x01
-        }
-    }
-}
diff --git a/kernel/driver/bus/usb/src/communication.rs b/kernel/driver/bus/usb/src/communication.rs
index 823a5702..c390a136 100644
--- a/kernel/driver/bus/usb/src/communication.rs
+++ b/kernel/driver/bus/usb/src/communication.rs
@@ -8,7 +8,7 @@ use alloc::{sync::Arc, vec::Vec};
 use futures_util::task::AtomicWaker;
 use libk_mm::address::PhysicalAddress;
 
-use crate::error::UsbError;
+use crate::error::{TransferError, UsbError};
 
 #[derive(Clone, Copy, PartialEq, Eq, Debug)]
 pub enum UsbDirection {
@@ -44,6 +44,13 @@ pub struct UsbInterruptTransfer {
     pub status: Arc<UsbTransferStatus>,
 }
 
+pub struct UsbBulkTransfer {
+    pub address: PhysicalAddress,
+    pub length: usize,
+    pub direction: UsbDirection,
+    pub status: Arc<UsbTransferStatus>,
+}
+
 impl UsbDirection {
     pub const fn is_device_to_host(self) -> bool {
         matches!(self, UsbDirection::In)
@@ -63,6 +70,17 @@ impl UsbTransferResult {
     pub fn sub_length(&self) -> usize {
         (self.0 & 0xFFFFFF) as _
     }
+
+    pub fn error_code(&self) -> TransferError {
+        match self.0 >> 24 {
+            0 => TransferError::InvalidTransfer,
+            2 => TransferError::BufferError,
+            4 => TransferError::UsbTransactionError,
+            6 => TransferError::Stall,
+            13 => TransferError::ShortPacket(self.0 & 0xFFFFFF),
+            _ => TransferError::Other,
+        }
+    }
 }
 
 impl UsbControlTransfer {
@@ -79,6 +97,13 @@ impl UsbInterruptTransfer {
     }
 }
 
+impl UsbBulkTransfer {
+    pub async fn wait(&self) -> Result<usize, UsbError> {
+        let sub_length = self.status.wait().await?;
+        Ok(self.length.saturating_sub(sub_length))
+    }
+}
+
 impl UsbTransferStatus {
     pub fn new() -> Self {
         Self {
@@ -95,7 +120,7 @@ impl UsbTransferStatus {
                 } else if v.is_aborted() {
                     Err(UsbError::DeviceDisconnected)
                 } else {
-                    Err(UsbError::TransferFailed)
+                    Err(UsbError::TransferFailed(v.error_code()))
                 }
             })
         })
diff --git a/kernel/driver/bus/usb/src/descriptor.rs b/kernel/driver/bus/usb/src/descriptor.rs
index f99468ce..c221a36c 100644
--- a/kernel/driver/bus/usb/src/descriptor.rs
+++ b/kernel/driver/bus/usb/src/descriptor.rs
@@ -1,8 +1,9 @@
 use bytemuck::{Pod, Zeroable};
 
 use crate::{
+    device::UsbSpeed,
     error::UsbError,
-    info::{UsbDeviceClass, UsbDeviceProtocol, UsbEndpointType},
+    info::{UsbDeviceClass, UsbDeviceProtocol, UsbEndpointType, UsbVersion},
     UsbDirection,
 };
 
@@ -134,12 +135,14 @@ impl UsbDeviceDescriptor {
         UsbDeviceProtocol::try_from(self.device_protocol).unwrap_or(UsbDeviceProtocol::Unknown)
     }
 
-    pub fn max_packet_size(&self) -> Result<usize, UsbError> {
-        match self.max_packet_size_0 {
-            8 => Ok(8),
-            16 => Ok(16),
-            32 => Ok(32),
-            64 => Ok(64),
+    pub fn max_packet_size(&self, version: UsbVersion, speed: UsbSpeed) -> Result<usize, UsbError> {
+        match (version.is_version_3(), speed, self.max_packet_size_0) {
+            (true, UsbSpeed::Super, 9) => Ok(1 << 9),
+            (true, _, _) => todo!("Non-GenX speed USB3+ maxpacketsize0"),
+            (false, _, 8) => Ok(8),
+            (false, _, 16) => Ok(16),
+            (false, _, 32) => Ok(32),
+            (false, _, 64) => Ok(64),
             _ => Err(UsbError::InvalidDescriptorField),
         }
     }
diff --git a/kernel/driver/bus/usb/src/device.rs b/kernel/driver/bus/usb/src/device.rs
index 324b1c4b..0404da0a 100644
--- a/kernel/driver/bus/usb/src/device.rs
+++ b/kernel/driver/bus/usb/src/device.rs
@@ -1,14 +1,15 @@
 use core::{fmt, ops::Deref};
 
-use alloc::{boxed::Box, vec::Vec};
+use alloc::{boxed::Box, sync::Arc, vec::Vec};
 use futures_util::future::BoxFuture;
 use libk_mm::PageBox;
 use libk_util::sync::spin_rwlock::{IrqSafeRwLock, IrqSafeRwLockReadGuard};
 
 use crate::{
     error::UsbError,
-    info::{UsbConfigurationInfo, UsbDeviceInfo, UsbEndpointInfo, UsbInterfaceInfo},
+    info::{UsbConfigurationInfo, UsbDeviceInfo, UsbEndpointInfo, UsbInterfaceInfo, UsbVersion},
     pipe::{
+        bulk::{UsbBulkInPipeAccess, UsbBulkOutPipeAccess},
         control::{ConfigurationDescriptorEntry, UsbControlPipeAccess},
         interrupt::UsbInterruptInPipeAccess,
     },
@@ -38,6 +39,10 @@ pub enum UsbSpeed {
     Super,
 }
 
+pub trait UsbDeviceDetachHandler: Send + Sync {
+    fn handle_device_detach(&self);
+}
+
 #[allow(unused)]
 pub trait UsbDevice: Send + Sync {
     // Endpoint "0"
@@ -49,12 +54,19 @@ pub trait UsbDevice: Send + Sync {
     ) -> BoxFuture<Result<UsbInterruptInPipeAccess, UsbError>> {
         unimplemented!()
     }
+    fn open_bulk_in_pipe(&self, number: u8) -> BoxFuture<Result<UsbBulkInPipeAccess, UsbError>> {
+        unimplemented!()
+    }
+    fn open_bulk_out_pipe(&self, number: u8) -> BoxFuture<Result<UsbBulkOutPipeAccess, UsbError>> {
+        unimplemented!()
+    }
 
     fn port_number(&self) -> u8;
     fn bus_address(&self) -> UsbBusAddress;
     fn speed(&self) -> UsbSpeed;
     fn controller_ref(&self) -> &dyn UsbHostController;
 
+    fn set_detach_handler(&self, handler: Arc<dyn UsbDeviceDetachHandler>);
     fn handle_detach(&self);
 
     fn debug(&self) {}
@@ -73,6 +85,17 @@ impl UsbDeviceAccess {
 
         let device_desc = control.query_device_descriptor().await?;
 
+        let bcd_usb = device_desc.bcd_usb;
+        let usb_version = UsbVersion::from_bcd_usb(device_desc.bcd_usb)
+            .ok_or(UsbError::InvalidDescriptorField)
+            .inspect_err(|_| {
+                log::error!(
+                    "{}: unsupported/invalid USB version: {:#x}",
+                    raw.bus_address(),
+                    bcd_usb
+                )
+            })?;
+
         let manufacturer = control
             .query_string(device_desc.manufacturer_str, &mut string_buffer)
             .await?;
@@ -83,6 +106,7 @@ impl UsbDeviceAccess {
         let info = UsbDeviceInfo {
             manufacturer,
             product,
+            usb_version,
 
             id_vendor: device_desc.id_vendor,
             id_product: device_desc.id_product,
@@ -90,8 +114,9 @@ impl UsbDeviceAccess {
             device_class: device_desc.class(),
             device_subclass: device_desc.device_subclass,
             device_protocol: device_desc.protocol(),
+            device_protocol_number: device_desc.device_protocol,
 
-            max_packet_size: device_desc.max_packet_size()?,
+            max_packet_size: device_desc.max_packet_size(usb_version, raw.speed())?,
         };
 
         Ok(Self {
@@ -172,6 +197,7 @@ impl UsbDeviceAccess {
                         interface_class: iface.class(),
                         interface_subclass: iface.interface_subclass,
                         interface_protocol: iface.protocol(),
+                        interface_protocol_number: iface.interface_protocol,
                     });
                 }
                 _ => (),
@@ -187,6 +213,10 @@ impl UsbDeviceAccess {
 
         Ok(info)
     }
+
+    pub fn set_detach_handler(&self, handler: Arc<dyn UsbDeviceDetachHandler>) {
+        self.device.set_detach_handler(handler);
+    }
 }
 
 impl Deref for UsbDeviceAccess {
diff --git a/kernel/driver/bus/usb/src/error.rs b/kernel/driver/bus/usb/src/error.rs
index a4cc18e9..d17dcc6e 100644
--- a/kernel/driver/bus/usb/src/error.rs
+++ b/kernel/driver/bus/usb/src/error.rs
@@ -1,5 +1,15 @@
 use yggdrasil_abi::error::Error;
 
+#[derive(Debug)]
+pub enum TransferError {
+    InvalidTransfer,
+    ShortPacket(u32),
+    BufferError,
+    UsbTransactionError,
+    Stall,
+    Other,
+}
+
 #[derive(Debug)]
 pub enum UsbError {
     /// Could not allocate memory for some device structure
@@ -8,7 +18,7 @@ pub enum UsbError {
     SystemError(Error),
     // HC-side init stage errors
     OutOfAddresses,
-    HostControllerCommandFailed(u8),
+    HostControllerCommandFailed(u8, u32),
     PortResetFailed,
     PortInitFailed,
     // Setup stage errors
@@ -17,7 +27,9 @@ pub enum UsbError {
     // Runtime errors
     DeviceBusy,
     DeviceDisconnected,
-    TransferFailed,
+    TransferFailed(TransferError),
+    // Driver errors
+    DriverError,
 }
 
 impl From<UsbError> for Error {
diff --git a/kernel/driver/bus/usb/src/info.rs b/kernel/driver/bus/usb/src/info.rs
index 8e079bb3..c2ea9fc2 100644
--- a/kernel/driver/bus/usb/src/info.rs
+++ b/kernel/driver/bus/usb/src/info.rs
@@ -1,3 +1,5 @@
+use core::fmt;
+
 use alloc::{string::String, vec::Vec};
 use yggdrasil_abi::primitive_enum;
 
@@ -27,10 +29,20 @@ pub enum UsbUsageType {
     Reserved,
 }
 
+#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)]
+pub enum UsbVersion {
+    Usb11,
+    Usb20,
+    Usb30,
+    Usb31,
+    Usb32,
+}
+
 primitive_enum! {
     pub enum UsbDeviceClass: u8 {
         FromInterface = 0x00,
         Hid = 0x03,
+        MassStorage = 0x08,
         Unknown = 0xFF,
     }
 }
@@ -50,6 +62,7 @@ pub struct UsbInterfaceInfo {
     pub interface_class: UsbDeviceClass,
     pub interface_subclass: u8,
     pub interface_protocol: UsbDeviceProtocol,
+    pub interface_protocol_number: u8,
 }
 
 #[derive(Debug, Clone)]
@@ -73,12 +86,46 @@ pub struct UsbDeviceInfo {
     pub manufacturer: String,
     pub product: String,
 
+    pub usb_version: UsbVersion,
+
     pub id_vendor: u16,
     pub id_product: u16,
 
     pub device_class: UsbDeviceClass,
     pub device_subclass: u8,
     pub device_protocol: UsbDeviceProtocol,
+    pub device_protocol_number: u8,
 
+    /// Max packet size for endpoint zero
     pub max_packet_size: usize,
 }
+
+impl UsbVersion {
+    pub fn is_version_3(&self) -> bool {
+        matches!(self, Self::Usb30 | Self::Usb31 | Self::Usb32)
+    }
+
+    pub fn from_bcd_usb(value: u16) -> Option<Self> {
+        match value {
+            0x110 => Some(UsbVersion::Usb11),
+            0x200 => Some(UsbVersion::Usb20),
+            0x300 => Some(UsbVersion::Usb30),
+            0x310 => Some(UsbVersion::Usb31),
+            0x320 => Some(UsbVersion::Usb32),
+            _ => None,
+        }
+    }
+}
+
+impl fmt::Display for UsbVersion {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        let string = match self {
+            Self::Usb11 => "USB1.1",
+            Self::Usb20 => "USB2.0",
+            Self::Usb30 => "USB3.0",
+            Self::Usb31 => "USB3.1",
+            Self::Usb32 => "USB3.2",
+        };
+        f.write_str(string)
+    }
+}
diff --git a/kernel/driver/bus/usb/src/pipe/bulk.rs b/kernel/driver/bus/usb/src/pipe/bulk.rs
new file mode 100644
index 00000000..f7919c72
--- /dev/null
+++ b/kernel/driver/bus/usb/src/pipe/bulk.rs
@@ -0,0 +1,55 @@
+use core::ops::Deref;
+
+use alloc::boxed::Box;
+use libk_mm::PageSlice;
+
+use crate::{communication::UsbBulkTransfer, error::UsbError};
+
+use super::UsbGenericPipe;
+
+pub trait UsbBulkInPipe: UsbGenericPipe + Send + Sync {
+    fn start_read(&self, buffer: &mut PageSlice<u8>) -> Result<UsbBulkTransfer, UsbError>;
+    fn complete_transfer(&self, transfer: UsbBulkTransfer);
+}
+
+pub trait UsbBulkOutPipe: UsbGenericPipe + Send + Sync {
+    fn start_write(&self, buffer: &PageSlice<u8>) -> Result<UsbBulkTransfer, UsbError>;
+    fn complete_transfer(&self, transfer: UsbBulkTransfer);
+}
+
+pub struct UsbBulkInPipeAccess(pub Box<dyn UsbBulkInPipe>);
+pub struct UsbBulkOutPipeAccess(pub Box<dyn UsbBulkOutPipe>);
+
+impl UsbBulkInPipeAccess {
+    pub async fn read<'a>(&self, buffer: &'a mut PageSlice<u8>) -> Result<&'a [u8], UsbError> {
+        let transfer = self.start_read(buffer)?;
+        let len = transfer.wait().await?;
+        self.complete_transfer(transfer);
+        Ok(&buffer[..len])
+    }
+}
+
+impl Deref for UsbBulkInPipeAccess {
+    type Target = dyn UsbBulkInPipe;
+
+    fn deref(&self) -> &Self::Target {
+        &*self.0
+    }
+}
+
+impl UsbBulkOutPipeAccess {
+    pub async fn write<'a>(&self, buffer: &'a PageSlice<u8>) -> Result<(), UsbError> {
+        let transfer = self.start_write(buffer)?;
+        transfer.wait().await?;
+        self.complete_transfer(transfer);
+        Ok(())
+    }
+}
+
+impl Deref for UsbBulkOutPipeAccess {
+    type Target = dyn UsbBulkOutPipe;
+
+    fn deref(&self) -> &Self::Target {
+        &*self.0
+    }
+}
diff --git a/kernel/driver/bus/usb/src/pipe/control.rs b/kernel/driver/bus/usb/src/pipe/control.rs
index a8ada7a9..ed58c48a 100644
--- a/kernel/driver/bus/usb/src/pipe/control.rs
+++ b/kernel/driver/bus/usb/src/pipe/control.rs
@@ -40,6 +40,11 @@ pub trait UsbDeviceRequest: Sized + Pod {
     const B_REQUEST: u8;
 }
 
+pub trait UsbClassSpecificRequest: Sized + Pod {
+    const BM_REQUEST_TYPE: u8;
+    const B_REQUEST: u8;
+}
+
 pub trait UsbDescriptorRequest: UsbDeviceRequest {
     const DESCRIPTOR_TYPE: u8;
 }
@@ -312,6 +317,24 @@ impl UsbControlPipeAccess {
         .await
     }
 
+    pub async fn class_specific_request<D: UsbClassSpecificRequest>(
+        &self,
+        w_value: u16,
+        w_index: u16,
+    ) -> Result<(), UsbError> {
+        self.perform_value_control(
+            ControlTransferSetup {
+                bm_request_type: D::BM_REQUEST_TYPE,
+                b_request: D::B_REQUEST,
+                w_value,
+                w_index,
+                w_length: 0,
+            },
+            None,
+        )
+        .await
+    }
+
     pub async fn set_configuration(&self, value: u16) -> Result<(), UsbError> {
         self.perform_action::<SetConfiguration>(value, 0).await
     }
diff --git a/kernel/driver/bus/usb/src/pipe/mod.rs b/kernel/driver/bus/usb/src/pipe/mod.rs
index a45d7632..b09f2bd8 100644
--- a/kernel/driver/bus/usb/src/pipe/mod.rs
+++ b/kernel/driver/bus/usb/src/pipe/mod.rs
@@ -1,3 +1,4 @@
+pub mod bulk;
 pub mod control;
 pub mod interrupt;
 
diff --git a/kernel/driver/usb/xhci/src/context.rs b/kernel/driver/usb/xhci/src/context.rs
new file mode 100644
index 00000000..e212a2de
--- /dev/null
+++ b/kernel/driver/usb/xhci/src/context.rs
@@ -0,0 +1,136 @@
+use core::ops::{Deref, DerefMut};
+
+use libk_mm::{
+    address::{AsPhysicalAddress, PhysicalAddress},
+    PageBox,
+};
+use libk_util::sync::spin_rwlock::IrqSafeRwLock;
+use xhci_lib::context::{self, DeviceHandler, InputHandler};
+use ygg_driver_usb::error::UsbError;
+
+use crate::{
+    controller::{ContextSize, PortNumber},
+    regs::PortSpeed,
+};
+
+pub enum XhciDeviceContext {
+    Context32(IrqSafeRwLock<PageBox<context::Device32Byte>>),
+    Context64(IrqSafeRwLock<PageBox<context::Device64Byte>>),
+}
+
+pub enum XhciInputContext {
+    Context32(PageBox<context::Input32Byte>),
+    Context64(PageBox<context::Input64Byte>),
+}
+
+impl XhciDeviceContext {
+    pub fn new(size: ContextSize) -> Result<Self, UsbError> {
+        match size {
+            ContextSize::Context32 => PageBox::new(context::Device::new_32byte())
+                .map(IrqSafeRwLock::new)
+                .map(Self::Context32),
+            ContextSize::Context64 => PageBox::new(context::Device::new_64byte())
+                .map(IrqSafeRwLock::new)
+                .map(Self::Context64),
+        }
+        .map_err(UsbError::MemoryError)
+    }
+
+    pub fn map<T, F: FnOnce(&dyn DeviceHandler) -> T>(&self, mapper: F) -> T {
+        match self {
+            Self::Context32(cx) => mapper(&**cx.read()),
+            Self::Context64(cx) => mapper(&**cx.read()),
+        }
+    }
+
+    pub fn physical_address(&self) -> PhysicalAddress {
+        match self {
+            Self::Context32(cx) => unsafe { cx.read().as_physical_address() },
+            Self::Context64(cx) => unsafe { cx.read().as_physical_address() },
+        }
+    }
+}
+
+impl XhciInputContext {
+    pub fn new(size: ContextSize) -> Result<Self, UsbError> {
+        match size {
+            ContextSize::Context32 => {
+                PageBox::new(context::Input::new_32byte()).map(Self::Context32)
+            }
+            ContextSize::Context64 => {
+                PageBox::new(context::Input::new_64byte()).map(Self::Context64)
+            }
+        }
+        .map_err(UsbError::MemoryError)
+    }
+
+    pub fn new_address_device(
+        size: ContextSize,
+        bus_address: u8,
+        root_hub_port_number: PortNumber,
+        speed: PortSpeed,
+        max_packet_size: Option<usize>, // if not set, a default one for the given speed is used
+        dequeue_pointer: PhysicalAddress,
+    ) -> Result<Self, UsbError> {
+        let mut cx = Self::new(size)?;
+
+        {
+            let control = cx.control_mut();
+
+            control.set_add_context_flag(0); // Enable slot context
+            control.set_add_context_flag(1); // Enable endpoint 0 context
+        }
+
+        {
+            let slot = cx.device_mut().slot_mut();
+
+            slot.set_context_entries(1);
+            slot.set_interrupter_target(0);
+            slot.set_usb_device_address(bus_address);
+            slot.set_root_hub_port_number(root_hub_port_number.into());
+            slot.set_speed(speed.into());
+        }
+
+        {
+            let ep0 = cx.device_mut().endpoint_mut(1);
+
+            ep0.set_endpoint_type(context::EndpointType::Control);
+            ep0.set_error_count(3);
+            // Use the provided max_packet_size, or the default one for the given speed
+            ep0.set_max_packet_size(
+                max_packet_size.unwrap_or(speed.default_max_packet_size()) as u16
+            );
+            ep0.set_tr_dequeue_pointer(dequeue_pointer.into_u64());
+            ep0.set_dequeue_cycle_state();
+        }
+
+        Ok(cx)
+    }
+
+    pub fn physical_address(&self) -> PhysicalAddress {
+        match self {
+            Self::Context32(cx) => unsafe { cx.as_physical_address() },
+            Self::Context64(cx) => unsafe { cx.as_physical_address() },
+        }
+    }
+}
+
+impl Deref for XhciInputContext {
+    type Target = dyn InputHandler;
+
+    fn deref(&self) -> &Self::Target {
+        match self {
+            Self::Context32(cx) => &**cx,
+            Self::Context64(cx) => &**cx,
+        }
+    }
+}
+
+impl DerefMut for XhciInputContext {
+    fn deref_mut(&mut self) -> &mut Self::Target {
+        match self {
+            Self::Context32(cx) => &mut **cx,
+            Self::Context64(cx) => &mut **cx,
+        }
+    }
+}
diff --git a/kernel/driver/usb/xhci/src/controller.rs b/kernel/driver/usb/xhci/src/controller.rs
index 09926873..cd2209e9 100644
--- a/kernel/driver/usb/xhci/src/controller.rs
+++ b/kernel/driver/usb/xhci/src/controller.rs
@@ -1,19 +1,27 @@
-use core::{future::poll_fn, sync::atomic::Ordering, task::Poll, time::Duration};
+use core::{
+    fmt,
+    mem::MaybeUninit,
+    num::NonZeroU8,
+    sync::atomic::{AtomicUsize, Ordering},
+};
 
 use alloc::{boxed::Box, collections::BTreeMap, sync::Arc, vec, vec::Vec};
-use atomic_enum::atomic_enum;
 use device_api::{device::Device, interrupt::InterruptHandler};
-use futures_util::task::AtomicWaker;
 use libk::task::runtime;
 use libk_mm::{
     address::{AsPhysicalAddress, PhysicalAddress},
     PageBox,
 };
-use libk_util::{sync::spin_rwlock::IrqSafeRwLock, OneTimeInit};
+use libk_util::{
+    sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock},
+    OneTimeInit,
+};
+use xhci_lib::ExtendedCapability;
 use ygg_driver_usb::{
     bus::UsbBusManager,
-    device::{UsbBusAddress, UsbDevice, UsbDeviceAccess},
+    device::{UsbBusAddress, UsbDeviceAccess},
     error::UsbError,
+    info::UsbVersion,
     pipe::control::UsbControlPipeAccess,
     util::UsbAddressAllocator,
     UsbHostController,
@@ -21,6 +29,7 @@ use ygg_driver_usb::{
 use yggdrasil_abi::error::Error;
 
 use crate::{
+    context::{XhciDeviceContext, XhciInputContext},
     device::XhciBusDevice,
     pipe::ControlPipe,
     regs::{Mapper, PortSpeed, Regs},
@@ -28,21 +37,44 @@ use crate::{
         CommandExecutor, CommandRing, ControlTransferRing, Event, EventRing, EventRingSegmentTable,
         GenericTransferRing,
     },
-    XhciContext,
+    util::EventBitmap,
 };
 
-#[atomic_enum]
-#[derive(PartialEq, Eq)]
-pub enum PortState {
-    Disconnected, // Default + set by "handle detach"
-    Init,         // Set by "port task"
-    Running,      // Set by "port task"
+#[derive(Debug, PartialEq, Eq, Clone, Copy)]
+pub enum ContextSize {
+    Context32,
+    Context64,
 }
 
-struct PortStruct {
-    state: AtomicPortState,
-    notify: AtomicWaker,
-    address: IrqSafeRwLock<Option<UsbBusAddress>>,
+#[derive(Debug, Clone, Copy, PartialEq, Eq, PartialOrd, Ord)]
+pub struct PortNumber(NonZeroU8);
+
+impl PortNumber {
+    pub fn new(port: u8) -> Option<Self> {
+        NonZeroU8::new(port).map(Self)
+    }
+
+    pub fn from_port_index(index: usize) -> Option<Self> {
+        let as_u8 = (index + 1).try_into().ok()?;
+        Some(Self(unsafe { NonZeroU8::new_unchecked(as_u8) }))
+    }
+
+    pub fn port_index(&self) -> usize {
+        u8::from(self.0) as usize - 1
+    }
+}
+
+#[allow(unused)]
+struct Scratchpads {
+    buffers: Vec<PageBox<[MaybeUninit<u8>]>>,
+    array: PageBox<[PhysicalAddress]>,
+}
+
+#[allow(unused)]
+struct RootHubPort {
+    version: UsbVersion,
+    slot_type: u8,
+    max_hub_depth: Option<usize>,
 }
 
 pub struct Xhci {
@@ -52,32 +84,146 @@ pub struct Xhci {
     address_allocator: UsbAddressAllocator,
 
     port_count: usize,
-    // TODO use to allocate proper contexts
-    #[allow(unused)]
-    context_size: usize,
+    pub(crate) context_size: ContextSize,
 
     dcbaa: IrqSafeRwLock<PageBox<[PhysicalAddress]>>,
     endpoints: IrqSafeRwLock<BTreeMap<(u8, u8), Arc<dyn GenericTransferRing>>>,
     event_ring: EventRing,
     pub(crate) command_ring: CommandRing,
+    #[allow(unused)]
+    scratchpads: Option<Scratchpads>,
 
-    port_states: Vec<PortStruct>,
+    root_hub_map: BTreeMap<PortNumber, RootHubPort>,
+
+    port_event_map: EventBitmap,
+}
+
+impl Scratchpads {
+    pub fn new(count: usize) -> Result<Option<Self>, UsbError> {
+        if count == 0 {
+            return Ok(None);
+        }
+
+        let buffers = (0..count)
+            .map(|_| PageBox::new_uninit_slice(4096))
+            .collect::<Result<Vec<_>, _>>()
+            .map_err(UsbError::MemoryError)?;
+        let array = PageBox::new_slice_with(|i| unsafe { buffers[i].as_physical_address() }, count)
+            .map_err(UsbError::MemoryError)?;
+
+        Ok(Some(Self { buffers, array }))
+    }
 }
 
 impl Xhci {
-    pub fn new(regs: xhci_lib::Registers<Mapper>) -> Result<Self, UsbError> {
+    // Extract all info about the xHC, but don't do any init besides performing a BIOS->OS handoff
+    pub fn new(
+        regs: xhci_lib::Registers<Mapper>,
+        mut extended: xhci_lib::extended_capabilities::List<Mapper>,
+    ) -> Result<Self, UsbError> {
         let event_ring = EventRing::new(128)?;
         let command_ring = CommandRing::new(128)?;
 
         let regs = Regs::from(regs);
 
+        let scratchpad_count = regs.scratchpad_count();
         let port_count = regs.port_count();
         let slot_count = regs.max_slot_count();
         let context_size = regs.context_size();
+        let context_size = match context_size {
+            32 => ContextSize::Context32,
+            64 => ContextSize::Context64,
+            _ => {
+                log::error!("Unhandled context size: {context_size}");
+                return Err(UsbError::InvalidConfiguration);
+            }
+        };
 
-        let dcbaa = PageBox::new_slice(PhysicalAddress::ZERO, slot_count + 1)
+        let scratchpads = Scratchpads::new(scratchpad_count)?;
+        let mut dcbaa = PageBox::new_slice(PhysicalAddress::ZERO, slot_count + 1)
             .map_err(UsbError::MemoryError)?;
 
+        if let Some(scratchpads) = scratchpads.as_ref() {
+            dcbaa[0] = unsafe { scratchpads.array.as_physical_address() };
+        }
+
+        let mut root_hub_map = BTreeMap::new();
+        for cap in extended.into_iter() {
+            let Ok(cap) = cap else {
+                continue;
+            };
+
+            match cap {
+                ExtendedCapability::UsbLegacySupport(mut legsup) => {
+                    let mut handoff = false;
+                    legsup.usblegsup.update_volatile(|legsup| {
+                        if legsup.hc_bios_owned_semaphore() {
+                            handoff = true;
+                            legsup.set_hc_os_owned_semaphore();
+                        }
+                    });
+
+                    if !handoff {
+                        continue;
+                    }
+                    log::info!("xhci: BIOS->OS handoff started");
+
+                    let mut timeout = 10000000;
+                    while timeout > 0 {
+                        let status = legsup.usblegsup.read_volatile();
+                        if !status.hc_bios_owned_semaphore() && status.hc_os_owned_semaphore() {
+                            break;
+                        }
+
+                        core::hint::spin_loop();
+                        timeout -= 1;
+                    }
+                    if timeout == 0 {
+                        log::error!("xhci: BIOS->OS handoff failed");
+                        return Err(UsbError::DeviceBusy);
+                    }
+                    log::info!("xhci: BIOS->OS handoff finished");
+                }
+                ExtendedCapability::XhciSupportedProtocol(proto) => {
+                    let header = proto.header.read_volatile();
+
+                    if header.name_string() != 0x20425355 {
+                        log::warn!("Skip unknown xHCI supported protocol capability");
+                        continue;
+                    }
+                    let version_major = header.major_revision();
+                    let version_minor = header.minor_revision();
+                    let slot_type = header.protocol_slot_type();
+                    let max_hub_depth = header.hub_depth();
+                    let version = ((version_major as u16) << 8) | (version_minor as u16);
+                    let Some(version) = UsbVersion::from_bcd_usb(version) else {
+                        log::warn!("Skip unknown xHCI supported protocol revision: {version_major:x}.{version_minor:x}");
+                        continue;
+                    };
+
+                    let port_range = header.compatible_port_offset()
+                        ..header.compatible_port_offset() + header.compatible_port_count();
+                    log::info!("Ports {port_range:?}: USB {version_major:x}.{version_minor:x}, slot type {slot_type}, max hub depth {max_hub_depth}");
+
+                    for port in port_range {
+                        let Some(number) = PortNumber::new(port) else {
+                            continue;
+                        };
+                        root_hub_map.insert(
+                            number,
+                            RootHubPort {
+                                version,
+                                slot_type,
+                                max_hub_depth: (max_hub_depth != 0)
+                                    .then_some(max_hub_depth as usize),
+                            },
+                        );
+                    }
+                }
+                _ => (),
+            }
+        }
+
         Ok(Self {
             regs,
 
@@ -92,12 +238,10 @@ impl Xhci {
 
             dcbaa: IrqSafeRwLock::new(dcbaa),
             endpoints: IrqSafeRwLock::new(BTreeMap::new()),
+            scratchpads,
+            root_hub_map,
 
-            port_states: Vec::from_iter((0..port_count).map(|_| PortStruct {
-                state: AtomicPortState::new(PortState::Disconnected),
-                notify: AtomicWaker::new(),
-                address: IrqSafeRwLock::new(None),
-            })),
+            port_event_map: EventBitmap::new(),
         })
     }
 
@@ -135,173 +279,220 @@ impl Xhci {
     ) {
         if let Some(ep) = self.endpoints.read().get(&(slot_id, endpoint_id)) {
             ep.notify(address, status);
+        } else {
+            log::warn!("No endpoint slot={slot_id}, ep={endpoint_id}");
         }
     }
 
-    async fn assign_device(
-        self: Arc<Self>,
-        speed: PortSpeed,
-        slot_id: u8,
-        root_hub_port_number: u8,
-    ) -> Result<Box<dyn UsbDevice>, UsbError> {
-        let address = self.address_allocator.allocate().unwrap();
-        let ring = Arc::new(ControlTransferRing::new(slot_id, 1, 128)?);
+    async fn reset_port(&self, port: PortNumber) -> Result<(), UsbError> {
+        let index = port.port_index();
 
-        let context =
-            XhciContext::new_32byte_address_device(&ring, speed, address, root_hub_port_number)?;
-        let mut input = context.input.write();
+        // Set port reset and wait for it to clear
+        self.regs.ports.update(index, |regs| {
+            regs.portsc.set_port_reset();
+        });
 
-        self.register_device_context(slot_id, unsafe { context.output.as_physical_address() });
+        // TODO timeout
+        loop {
+            self.port_event_map.wait_specific(index).await;
 
-        self.command_ring
-            .address_device(&*self, slot_id, &mut input)
-            .await?;
+            let mut status = None;
+            self.regs.ports.update(index, |regs| {
+                // Port became disconnected
+                if !regs.portsc.port_enabled_disabled() || !regs.portsc.current_connect_status() {
+                    log::warn!("xhci: port {port} disconnected during reset");
+                    status = Some(Err(UsbError::DeviceDisconnected));
+                    return;
+                }
 
-        self.register_endpoint(slot_id, 1, ring.clone());
+                if !regs.portsc.port_reset() {
+                    regs.portsc.clear_port_reset_change();
+                    status = Some(Ok(()));
+                }
+            });
 
-        let pipe = ControlPipe::new(self.clone(), slot_id, ring.clone());
+            if let Some(status) = status {
+                return status;
+            }
+        }
+    }
 
-        // TODO: If the device is a Full-speed one, determine its max packet size for the control
-        // endpoint
-        if speed == PortSpeed::Full {
-            todo!()
+    async fn setup_connected_port(self: Arc<Self>, port: PortNumber) -> Result<(), UsbError> {
+        // TODO cleanup after a failed device init:
+        //  * Deallocate the bus address
+        //  * Issue a Disable Slot TRB
+        //  * Remove Device Context from DCBAA
+        //  * Deregister the device's Default Control Endpoint
+
+        let index = port.port_index();
+        let root_hub_port = self
+            .root_hub_map
+            .get(&port)
+            .ok_or(UsbError::PortInitFailed)?;
+
+        log::info!(
+            "xhci: setup {} device at port {port}",
+            root_hub_port.version
+        );
+
+        if root_hub_port.version == UsbVersion::Usb20 {
+            // Port needs a reset first
+            log::info!("xhci: reset port {port}");
+            self.reset_port(port).await?;
         }
 
-        drop(input);
+        let speed = PortSpeed::try_from(self.regs.ports.read(index).portsc.port_speed())
+            .map_err(|_| UsbError::PortInitFailed)?;
+
+        log::info!("xhci: port {port} effective speed {speed:?}");
+
+        // Allocate a device slot
+        let slot_id = self
+            .command_ring
+            .enable_slot(&*self, root_hub_port.slot_type)
+            .await?;
+
+        // Allocate some address for the device
+        let bus_address = self.address_allocator.allocate()?;
+
+        // Allocate a default endpoint ring
+        let control_ring = Arc::new(ControlTransferRing::new(slot_id, 1, 128)?);
+
+        // Register control endpoint
+        self.register_endpoint(slot_id, 1, control_ring.clone());
+
+        let control_pipe = UsbControlPipeAccess(Box::new(ControlPipe::new(
+            self.clone(),
+            slot_id,
+            control_ring.clone(),
+        )));
+
+        // Setup Device Context
+        let device_cx = XhciDeviceContext::new(self.context_size)?;
+        self.register_device_context(slot_id, device_cx.physical_address());
+
+        // Issue an Address Device TRB with BSR=1 first
+        let input_cx = XhciInputContext::new_address_device(
+            self.context_size,
+            bus_address,
+            port,
+            speed,
+            None,
+            control_ring.dequeue_pointer(),
+        )?;
+        self.command_ring
+            .address_device(&*self, slot_id, input_cx.physical_address(), true)
+            .await?;
+
+        let state = device_cx.map(|cx| cx.slot().slot_state());
+        log::info!("xhci: port {port} slot {slot_id} state after BSR=1: {state:?}");
+
+        // After an Address Device with BSR=1, retrieving a Device Descriptor is possible
+        // This is needed for Full-speed devices, where the max_packet_size is 8 at first, but
+        // is determined by the Device Descriptor when it's available
+        let device_descriptor = control_pipe.query_device_descriptor().await?;
+        let max_packet_size =
+            device_descriptor.max_packet_size(root_hub_port.version, speed.into())?;
+
+        log::info!("xhci: port {port}:");
+        log::info!(" * max_packet_size = {max_packet_size}");
+        let max_packet_size = Some(max_packet_size);
+
+        // Reset the control endpoint for a proper dequeue pointer and a proper DCS
+        control_ring.reset();
+
+        // Issue an Address Device TRB with BSR=0
+        let input_cx = XhciInputContext::new_address_device(
+            self.context_size,
+            bus_address,
+            port,
+            speed,
+            max_packet_size,
+            control_ring.dequeue_pointer(),
+        )?;
+        self.command_ring
+            .address_device(&*self, slot_id, input_cx.physical_address(), false)
+            .await
+            .inspect_err(|error| {
+                log::error!("xhci: port {port} Address Device (BSR=0) error: {error:?}")
+            })?;
+
+        let state = device_cx.map(|cx| cx.slot().slot_state());
+        log::info!("xhci: port {port} slot {slot_id} state after BSR=0: {state:?}");
+
+        log::info!("xhci: port {port} device addressed ({bus_address})");
+
+        // Hand off the device to the general USB stack
 
         let bus_address = UsbBusAddress {
             bus: *self.bus_address.get(),
-            device: address,
+            device: bus_address,
         };
 
-        let device = XhciBusDevice {
+        let device = Box::new(XhciBusDevice {
             xhci: self.clone(),
             slot_id,
-            port_id: root_hub_port_number,
+            port_id: port.into(),
             bus_address,
             speed,
-            context: Arc::new(context),
-            rings: IrqSafeRwLock::new(vec![ring]),
-            control_pipe: UsbControlPipeAccess(Box::new(pipe)),
-        };
-
-        Ok(Box::new(device))
-    }
-
-    async fn port_task(self: Arc<Self>, index: usize) -> Result<(), UsbError> {
-        let state = &self.port_states[index];
-
-        self.reset_port(index).await?;
-
-        let regs = self.regs.ports.read(index);
-        let speed =
-            PortSpeed::try_from(regs.portsc.port_speed()).map_err(|_| UsbError::PortInitFailed)?;
-
-        let slot_id = self.command_ring.enable_slot(self.as_ref()).await?;
-
-        let device = self
-            .clone()
-            .assign_device(speed, slot_id, (index + 1) as _)
-            .await?;
+            control_pipe,
+            device_context: device_cx,
+            rings: IrqSafeRwLock::new(vec![control_ring]),
+            detach_handler: IrqSafeSpinlock::new(None),
+        });
         let device = UsbDeviceAccess::setup(device).await?;
 
-        let old = state.address.write().replace(device.bus_address());
-        assert!(old.is_none());
-
         UsbBusManager::register_device(Arc::new(device));
 
-        state.state.store(PortState::Running, Ordering::Release);
-
         Ok(())
     }
 
-    fn handle_device_attached(self: Arc<Self>, port: usize) -> Result<(), UsbError> {
-        log::info!("Port {}: device attached", port);
-
-        if let Err(err) = self.port_states[port].state.compare_exchange(
-            PortState::Disconnected,
-            PortState::Init,
-            Ordering::Acquire,
-            Ordering::Relaxed,
-        ) {
-            log::warn!("Could not start port init task: port state is {:?}", err);
-            return Err(UsbError::DeviceBusy);
+    async fn port_manager_task(self: Arc<Self>) -> Result<(), UsbError> {
+        // Inject events for the root hub ports
+        for (&port, _) in self.root_hub_map.iter() {
+            self.port_event_map.signal(port.port_index());
         }
 
-        runtime::spawn(async move { self.port_task(port).await }).map_err(UsbError::SystemError)?;
-        Ok(())
-    }
+        let mut show_disconnect_mask = 0;
+        loop {
+            let events = self.port_event_map.wait_any(self.port_count).await;
 
-    fn handle_device_detached(&self, port: usize) -> Result<(), UsbError> {
-        let state = &self.port_states[port];
+            for port_index in events {
+                let port = PortNumber::from_port_index(port_index).unwrap();
 
-        match state.state.swap(PortState::Disconnected, Ordering::Release) {
-            PortState::Init => {
-                log::warn!("USB device detach received while in init state");
-                Ok(())
-            }
-            PortState::Running => {
-                log::info!("Port {}: device detached", port);
-                let address = state
-                    .address
-                    .write()
-                    .take()
-                    .expect("Port marked as Running, but has no address");
+                let mut connected = None;
 
-                UsbBusManager::detach_device(address);
-                state.notify.wake();
+                self.regs.ports.update(port_index, |regs| {
+                    if regs.portsc.port_enabled_disabled_change() {
+                        regs.portsc.clear_port_enabled_disabled_change();
+                    }
+                    if regs.portsc.connect_status_change() {
+                        regs.portsc.clear_connect_status_change();
+                        connected = Some(regs.portsc.current_connect_status());
+                    }
+                });
 
-                Ok(())
-            }
-            // Already disconnected
-            PortState::Disconnected => Ok(()),
-        }
-    }
+                let Some(state) = connected else {
+                    continue;
+                };
 
-    fn handle_port_event(self: Arc<Self>, port: usize) -> Result<(), UsbError> {
-        let state = &self.port_states[port];
-        let port_regs = self.regs.ports.read(port);
-
-        if port_regs.portsc.connect_status_change() {
-            if port_regs.portsc.current_connect_status() {
-                self.handle_device_attached(port)
-            } else {
-                self.handle_device_detached(port)
-            }
-        } else {
-            // Some other event
-            state.notify.wake();
-            Ok(())
-        }
-    }
-
-    async fn reset_port(&self, port: usize) -> Result<(), UsbError> {
-        log::debug!("Reset port {}", port);
-
-        self.regs.ports.update(port, |u| {
-            u.portsc.set_port_reset();
-        });
-
-        // Wait for port reset
-        // TODO handle disconnect during reset?
-        let result = runtime::with_timeout(
-            poll_fn(|cx| {
-                let state = &self.port_states[port];
-
-                state.notify.register(cx.waker());
-                if !self.regs.ports.read(port).portsc.port_reset() {
-                    Poll::Ready(())
+                if state {
+                    if show_disconnect_mask & (1 << port_index) == 0 {
+                        log::info!("xhci: port {port} connected");
+                        if let Err(error) = self.clone().setup_connected_port(port).await {
+                            show_disconnect_mask &= !(1 << port_index);
+                            log::error!("xhci: port {port} setup failed: {error:?}");
+                        } else {
+                            show_disconnect_mask |= 1 << port_index;
+                        }
+                    }
                 } else {
-                    Poll::Pending
+                    if show_disconnect_mask & (1 << port_index) != 0 {
+                        log::warn!("xhci: port {port} disconnected");
+                        show_disconnect_mask &= !(1 << port_index);
+                    }
                 }
-            }),
-            Duration::from_secs(1),
-        )
-        .await;
-
-        match result {
-            Ok(()) => Ok(()),
-            Err(_) => Err(UsbError::PortResetFailed),
+            }
         }
     }
 
@@ -309,7 +500,9 @@ impl Xhci {
         while let Some(event) = self.event_ring.try_dequeue() {
             match event {
                 Event::PortChange(port) => {
-                    self.clone().handle_port_event(port - 1).ok();
+                    if port > 0 {
+                        self.port_event_map.signal(port - 1);
+                    }
                 }
                 Event::CommandCompletion { address, reply } => {
                     self.command_ring.notify(address, reply);
@@ -340,8 +533,14 @@ impl CommandExecutor for Xhci {
 
 impl Device for Xhci {
     unsafe fn init(self: Arc<Self>) -> Result<(), Error> {
+        static XHCI_COUNT: AtomicUsize = AtomicUsize::new(0);
         log::info!("Init USB xHCI");
 
+        if XHCI_COUNT.fetch_add(1, Ordering::Release) != 0 {
+            log::warn!("Skip second xhci init");
+            return Ok(());
+        }
+
         self.regs.reset();
         self.regs.set_max_slot_count();
 
@@ -354,12 +553,8 @@ impl Device for Xhci {
         let bus = UsbBusManager::register_bus(self.clone());
         self.bus_address.init(bus);
 
-        for port in 0..self.port_count {
-            let p = self.regs.ports.read(port);
-            if p.portsc.current_connect_status() {
-                self.clone().handle_device_attached(port).ok();
-            }
-        }
+        // Start the port manager task
+        runtime::spawn(self.clone().port_manager_task())?;
 
         Ok(())
     }
@@ -382,3 +577,15 @@ impl InterruptHandler for Xhci {
         }
     }
 }
+
+impl fmt::Display for PortNumber {
+    fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
+        fmt::Display::fmt(&self.0, f)
+    }
+}
+
+impl From<PortNumber> for u8 {
+    fn from(value: PortNumber) -> Self {
+        value.0.into()
+    }
+}
diff --git a/kernel/driver/usb/xhci/src/device.rs b/kernel/driver/usb/xhci/src/device.rs
index e6fb4bfa..702b1753 100644
--- a/kernel/driver/usb/xhci/src/device.rs
+++ b/kernel/driver/usb/xhci/src/device.rs
@@ -1,23 +1,30 @@
 use alloc::{boxed::Box, sync::Arc, vec::Vec};
 use futures_util::{future::BoxFuture, FutureExt};
-use libk_util::sync::spin_rwlock::IrqSafeRwLock;
-use xhci_lib::context::{self, InputHandler};
+use libk_util::sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock};
+use xhci_lib::context;
 use ygg_driver_usb::{
-    device::{UsbBusAddress, UsbDevice, UsbSpeed},
+    device::{UsbBusAddress, UsbDevice, UsbDeviceDetachHandler, UsbSpeed},
     error::UsbError,
     info::UsbEndpointType,
-    pipe::{control::UsbControlPipeAccess, interrupt::UsbInterruptInPipeAccess},
+    pipe::{
+        bulk::{UsbBulkInPipeAccess, UsbBulkOutPipeAccess},
+        control::UsbControlPipeAccess,
+        interrupt::UsbInterruptInPipeAccess,
+    },
     UsbDirection, UsbHostController,
 };
 
 use crate::{
-    pipe::InterruptInPipe,
+    context::{XhciDeviceContext, XhciInputContext},
+    pipe::{BulkInPipe, BulkOutPipe, InterruptInPipe},
     regs::PortSpeed,
-    ring::{transfer::InterruptInTransferRing, GenericTransferRing},
-    Xhci, XhciContext,
+    ring::{
+        transfer::{BulkInTransferRing, BulkOutTransferRing, InterruptInTransferRing},
+        GenericTransferRing,
+    },
+    Xhci,
 };
 
-// TODO device context information
 pub struct XhciBusDevice {
     pub(crate) port_id: u8,
     pub(crate) slot_id: u8,
@@ -27,9 +34,11 @@ pub struct XhciBusDevice {
 
     pub(crate) xhci: Arc<Xhci>,
 
-    pub(crate) context: Arc<XhciContext<8>>,
+    #[allow(unused)]
+    pub(crate) device_context: XhciDeviceContext,
     pub(crate) rings: IrqSafeRwLock<Vec<Arc<dyn GenericTransferRing>>>,
     pub(crate) control_pipe: UsbControlPipeAccess,
+    pub(crate) detach_handler: IrqSafeSpinlock<Option<Arc<dyn UsbDeviceDetachHandler>>>,
 }
 
 impl XhciBusDevice {
@@ -56,42 +65,51 @@ impl XhciBusDevice {
     ) -> Result<(), UsbError> {
         log::debug!("Setup endpoint dci #{}: {:?} {:?}", dci, ty, direction);
 
-        let mut input = self.context.input.write();
-
         let ep_type = match (ty, direction) {
             (UsbEndpointType::Interrupt, UsbDirection::In) => context::EndpointType::InterruptIn,
+            (UsbEndpointType::Interrupt, UsbDirection::Out) => context::EndpointType::InterruptOut,
+            (UsbEndpointType::Bulk, UsbDirection::In) => context::EndpointType::BulkIn,
+            (UsbEndpointType::Bulk, UsbDirection::Out) => context::EndpointType::BulkOut,
             _ => todo!(),
         };
 
+        let mut input_cx = XhciInputContext::new(self.xhci.context_size)?;
+
         {
-            let control = input.control_mut();
+            let control = input_cx.control_mut();
 
             control.set_add_context_flag(0);
-            control.clear_add_context_flag(1);
             control.set_add_context_flag(dci as _);
         }
 
         {
-            let slot = input.device_mut().slot_mut();
+            let slot = input_cx.device_mut().slot_mut();
 
             slot.set_context_entries(31);
+            slot.set_interrupter_target(0);
+            slot.set_usb_device_address(self.bus_address.device);
+            slot.set_root_hub_port_number(self.port_id);
+            slot.set_speed(self.speed.into());
         }
 
         {
-            let ep_cx = input.device_mut().endpoint_mut(dci as _);
+            let ep_cx = input_cx.device_mut().endpoint_mut(dci as _);
 
-            ep_cx.set_tr_dequeue_pointer(ring.dequeue_pointer().into());
-            ep_cx.set_dequeue_cycle_state();
             ep_cx.set_endpoint_type(ep_type);
             ep_cx.set_error_count(3);
-
-            // TODO get from endpoint info
+            // TODO
             ep_cx.set_max_packet_size(8);
+            ep_cx.set_tr_dequeue_pointer(ring.dequeue_pointer().into_u64());
+            ep_cx.set_dequeue_cycle_state();
         }
 
         self.xhci
             .command_ring
-            .configure_endpoint(self.xhci.as_ref(), self.slot_id, &mut input)
+            .configure_endpoint(
+                self.xhci.as_ref(),
+                self.slot_id,
+                input_cx.physical_address(),
+            )
             .await?;
 
         self.rings.write().push(ring.clone());
@@ -141,12 +159,50 @@ impl UsbDevice for XhciBusDevice {
         .boxed()
     }
 
+    fn open_bulk_in_pipe(&self, number: u8) -> BoxFuture<Result<UsbBulkInPipeAccess, UsbError>> {
+        async move {
+            let dci = Self::dci(UsbEndpointType::Bulk, UsbDirection::In, number) as u8;
+            let ring = Arc::new(BulkInTransferRing::new(self.slot_id, dci as _, 128)?);
+
+            self.setup_endpoint_inner(ring.clone(), dci, UsbEndpointType::Bulk, UsbDirection::In)
+                .await?;
+
+            let pipe = BulkInPipe::new(self.xhci.clone(), self.slot_id, number, dci, ring);
+
+            Ok(UsbBulkInPipeAccess(Box::new(pipe)))
+        }
+        .boxed()
+    }
+
+    fn open_bulk_out_pipe(&self, number: u8) -> BoxFuture<Result<UsbBulkOutPipeAccess, UsbError>> {
+        async move {
+            let dci = Self::dci(UsbEndpointType::Bulk, UsbDirection::Out, number) as u8;
+            let ring = Arc::new(BulkOutTransferRing::new(self.slot_id, dci as _, 128)?);
+
+            self.setup_endpoint_inner(ring.clone(), dci, UsbEndpointType::Bulk, UsbDirection::Out)
+                .await?;
+
+            let pipe = BulkOutPipe::new(self.xhci.clone(), self.slot_id, number, dci, ring);
+
+            Ok(UsbBulkOutPipeAccess(Box::new(pipe)))
+        }
+        .boxed()
+    }
+
+    fn set_detach_handler(&self, handler: Arc<dyn UsbDeviceDetachHandler>) {
+        *self.detach_handler.lock() = Some(handler);
+    }
+
     fn handle_detach(&self) {
         log::info!("Device detach handler");
         for ring in self.rings.write().drain(..) {
             self.xhci
                 .shutdown_endpoint(ring.slot_id(), ring.endpoint_id());
         }
+
+        if let Some(handler) = self.detach_handler.lock().take() {
+            handler.handle_device_detach();
+        }
     }
 
     fn speed(&self) -> UsbSpeed {
diff --git a/kernel/driver/usb/xhci/src/lib.rs b/kernel/driver/usb/xhci/src/lib.rs
index 4a3d4267..c3beda25 100644
--- a/kernel/driver/usb/xhci/src/lib.rs
+++ b/kernel/driver/usb/xhci/src/lib.rs
@@ -4,87 +4,24 @@
 
 extern crate alloc;
 
+use alloc::sync::Arc;
+use device_api::{device::Device, interrupt::InterruptAffinity};
+use regs::Mapper;
+use xhci_lib::extended_capabilities;
+use ygg_driver_pci::{
+    device::{PciDeviceInfo, PreferredInterruptMode},
+    PciCommandRegister, PciConfigurationSpace,
+};
+use yggdrasil_abi::error::Error;
+
+mod context;
 mod controller;
 mod device;
 mod pipe;
 mod regs;
 mod ring;
-
-use alloc::sync::Arc;
+mod util;
 pub use controller::Xhci;
-use device_api::{device::Device, interrupt::InterruptAffinity};
-use libk_mm::PageBox;
-use libk_util::sync::spin_rwlock::IrqSafeRwLock;
-use regs::{Mapper, PortSpeed};
-use ring::{ControlTransferRing, GenericTransferRing};
-use xhci_lib::context::{self, InputHandler};
-use ygg_driver_pci::{
-    device::{PciDeviceInfo, PreferredInterruptMode},
-    PciCommandRegister, PciConfigurationSpace,
-};
-use ygg_driver_usb::error::UsbError;
-use yggdrasil_abi::error::Error;
-
-pub struct XhciContext<const N: usize> {
-    pub(crate) input: IrqSafeRwLock<PageBox<context::Input<N>>>,
-    pub(crate) output: PageBox<context::Device<N>>,
-}
-
-impl XhciContext<8> {
-    pub fn new_32byte() -> Result<Self, UsbError> {
-        let input = PageBox::new(context::Input::new_32byte()).map_err(UsbError::MemoryError)?;
-        let output = PageBox::new(context::Device::new_32byte()).map_err(UsbError::MemoryError)?;
-
-        Ok(Self {
-            input: IrqSafeRwLock::new(input),
-            output,
-        })
-    }
-
-    pub fn new_32byte_address_device(
-        default_control_ring: &ControlTransferRing,
-        speed: PortSpeed,
-        address: u8,
-        root_hub_port_number: u8,
-    ) -> Result<Self, UsbError> {
-        let mut input =
-            PageBox::new(context::Input::new_32byte()).map_err(UsbError::MemoryError)?;
-        let output = PageBox::new(context::Device::new_32byte()).map_err(UsbError::MemoryError)?;
-
-        // Setup input context
-        {
-            let control = input.control_mut();
-
-            control.set_add_context_flag(0);
-            control.set_add_context_flag(1);
-        }
-
-        {
-            let slot = input.device_mut().slot_mut();
-
-            slot.set_context_entries(1);
-            slot.set_interrupter_target(0);
-            slot.set_usb_device_address(address);
-            slot.set_root_hub_port_number(root_hub_port_number);
-            slot.set_speed(speed.into());
-        }
-
-        {
-            let ep0 = input.device_mut().endpoint_mut(1);
-
-            ep0.set_endpoint_type(context::EndpointType::Control);
-            ep0.set_tr_dequeue_pointer(default_control_ring.dequeue_pointer().into());
-            ep0.set_dequeue_cycle_state();
-            ep0.set_error_count(3);
-            ep0.set_max_packet_size(speed.default_max_packet_size() as _);
-        }
-
-        Ok(Self {
-            input: IrqSafeRwLock::new(input),
-            output,
-        })
-    }
-}
 
 pub fn probe(info: &PciDeviceInfo) -> Result<Arc<dyn Device>, Error> {
     // TODO Chip Hardware Reset
@@ -100,8 +37,15 @@ pub fn probe(info: &PciDeviceInfo) -> Result<Arc<dyn Device>, Error> {
     cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER;
     info.config_space.set_command(cmd.bits());
 
-    let regs = unsafe { xhci_lib::Registers::new(bar0.try_into_usize().unwrap(), Mapper::new()) };
-    let xhci = Arc::new(Xhci::new(regs)?);
+    let base = bar0.try_into_usize().unwrap();
+    let mapper = Mapper::new();
+    let regs = unsafe { xhci_lib::Registers::new(base, mapper.clone()) };
+    let extended = unsafe {
+        extended_capabilities::List::new(base, regs.capability.hccparams1.read_volatile(), mapper)
+    }
+    .ok_or(Error::InvalidArgument)
+    .inspect_err(|_| log::error!("Cannot proceed with xhci init: no extended capabilities"))?;
+    let xhci = Arc::new(Xhci::new(regs, extended)?);
 
     info.init_interrupts(PreferredInterruptMode::Msi(true))?;
     info.map_interrupt(InterruptAffinity::Any, xhci.clone())?;
diff --git a/kernel/driver/usb/xhci/src/pipe.rs b/kernel/driver/usb/xhci/src/pipe.rs
index 4d2cfc0e..43d5f833 100644
--- a/kernel/driver/usb/xhci/src/pipe.rs
+++ b/kernel/driver/usb/xhci/src/pipe.rs
@@ -1,9 +1,10 @@
 use alloc::sync::Arc;
-use libk_mm::{address::PhysicalAddress, PageBox};
+use libk_mm::{address::PhysicalAddress, PageBox, PageSlice};
 use ygg_driver_usb::{
-    communication::UsbInterruptTransfer,
+    communication::{UsbBulkTransfer, UsbInterruptTransfer},
     error::UsbError,
     pipe::{
+        bulk::{UsbBulkInPipe, UsbBulkOutPipe},
         control::{ControlTransferSetup, UsbControlPipe},
         interrupt::UsbInterruptInPipe,
         UsbGenericPipe,
@@ -12,7 +13,10 @@ use ygg_driver_usb::{
 };
 
 use crate::{
-    ring::{transfer::InterruptInTransferRing, ControlTransferRing},
+    ring::{
+        transfer::{BulkInTransferRing, BulkOutTransferRing, InterruptInTransferRing},
+        ControlTransferRing,
+    },
     Xhci,
 };
 
@@ -32,6 +36,27 @@ pub struct InterruptInPipe {
     ring: Arc<InterruptInTransferRing>,
 }
 
+#[allow(unused)]
+pub struct BulkInPipe {
+    xhci: Arc<Xhci>,
+
+    slot_id: u8,
+    endpoint_id: u8,
+    dci: u8,
+
+    ring: Arc<BulkInTransferRing>,
+}
+#[allow(unused)]
+pub struct BulkOutPipe {
+    xhci: Arc<Xhci>,
+
+    slot_id: u8,
+    endpoint_id: u8,
+    dci: u8,
+
+    ring: Arc<BulkOutTransferRing>,
+}
+
 impl UsbGenericPipe for ControlPipe {}
 
 impl UsbControlPipe for ControlPipe {
@@ -83,3 +108,63 @@ impl InterruptInPipe {
         }
     }
 }
+
+impl UsbGenericPipe for BulkInPipe {}
+
+impl UsbBulkInPipe for BulkInPipe {
+    fn start_read(&self, buffer: &mut PageSlice<u8>) -> Result<UsbBulkTransfer, UsbError> {
+        self.ring.start_transfer(self.xhci.as_ref(), buffer)
+    }
+
+    fn complete_transfer(&self, transfer: UsbBulkTransfer) {
+        self.ring.complete_transfer(transfer)
+    }
+}
+
+impl BulkInPipe {
+    pub fn new(
+        xhci: Arc<Xhci>,
+        slot_id: u8,
+        endpoint_id: u8,
+        dci: u8,
+        ring: Arc<BulkInTransferRing>,
+    ) -> Self {
+        Self {
+            xhci,
+            slot_id,
+            endpoint_id,
+            dci,
+            ring,
+        }
+    }
+}
+
+impl UsbGenericPipe for BulkOutPipe {}
+
+impl UsbBulkOutPipe for BulkOutPipe {
+    fn start_write(&self, buffer: &PageSlice<u8>) -> Result<UsbBulkTransfer, UsbError> {
+        self.ring.start_transfer(self.xhci.as_ref(), buffer)
+    }
+
+    fn complete_transfer(&self, transfer: UsbBulkTransfer) {
+        self.ring.complete_transfer(transfer)
+    }
+}
+
+impl BulkOutPipe {
+    pub fn new(
+        xhci: Arc<Xhci>,
+        slot_id: u8,
+        endpoint_id: u8,
+        dci: u8,
+        ring: Arc<BulkOutTransferRing>,
+    ) -> Self {
+        Self {
+            xhci,
+            slot_id,
+            endpoint_id,
+            dci,
+            ring,
+        }
+    }
+}
diff --git a/kernel/driver/usb/xhci/src/regs.rs b/kernel/driver/usb/xhci/src/regs.rs
index 35820c0a..b8eb075b 100644
--- a/kernel/driver/usb/xhci/src/regs.rs
+++ b/kernel/driver/usb/xhci/src/regs.rs
@@ -6,7 +6,7 @@ use libk_mm::{
     device::RawDeviceMemoryMapping,
     PageBox,
 };
-use libk_util::sync::spin_rwlock::IrqSafeRwLock;
+use libk_util::sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock};
 use xhci_lib::{
     accessor::{array, marker},
     registers::{
@@ -19,9 +19,13 @@ use yggdrasil_abi::primitive_enum;
 
 use crate::ring::{CommandRing, EventRing, EventRingSegmentTable, GenericRing};
 
+struct MapperInner {
+    mappings: Vec<(RawDeviceMemoryMapping, usize)>,
+}
+
 #[derive(Clone)]
 pub struct Mapper {
-    mappings: Vec<Arc<RawDeviceMemoryMapping>>,
+    mapper: Arc<IrqSafeSpinlock<MapperInner>>,
 }
 
 pub struct LockedArray<T> {
@@ -136,6 +140,13 @@ impl Regs {
         self.capability.hcsparams1.read_volatile().number_of_ports() as _
     }
 
+    pub fn scratchpad_count(&self) -> usize {
+        self.capability
+            .hcsparams2
+            .read_volatile()
+            .max_scratchpad_buffers() as usize
+    }
+
     pub fn configure(
         &self,
         dcbaa: &PageBox<[PhysicalAddress]>,
@@ -176,6 +187,11 @@ impl Regs {
         o.usbcmd.update_volatile(|u| {
             u.set_interrupter_enable().set_run_stop();
         });
+
+        // Wait for the controller to come out of "not ready" state
+        while o.usbsts.read_volatile().controller_not_ready() {
+            core::hint::spin_loop();
+        }
     }
 
     pub fn handle_interrupt(&self) -> Option<UsbStatusRegister> {
@@ -222,22 +238,58 @@ impl Regs {
 impl Mapper {
     pub fn new() -> Self {
         Self {
-            mappings: Vec::new(),
+            mapper: Arc::new(IrqSafeSpinlock::new(MapperInner {
+                mappings: Vec::new(),
+            })),
         }
     }
 }
 
 impl xhci_lib::accessor::Mapper for Mapper {
+    // FIXME really slow, but at least reduces the number of unneeded mappings
     unsafe fn map(&mut self, phys_start: usize, bytes: usize) -> NonZeroUsize {
+        let mut mapper = self.mapper.lock();
+        for (mapping, refcount) in mapper.mappings.iter_mut() {
+            if phys_start as u64 >= mapping.physical_base
+                && ((phys_start + bytes) as u64)
+                    <= mapping.physical_base + (mapping.page_size * mapping.page_count) as u64
+            {
+                *refcount += 1;
+                return NonZeroUsize::new_unchecked(
+                    mapping.base_address + phys_start - mapping.physical_base as usize,
+                );
+            }
+        }
         let mapping = RawDeviceMemoryMapping::map(phys_start as u64, bytes, Default::default())
+            .inspect_err(|error| {
+                log::error!(
+                    "Cannot map xHC MMIO region {:#x?}: {error:?}",
+                    phys_start..phys_start + bytes
+                )
+            })
             .expect("Could not map an USB xHCI region");
         let address = mapping.address;
-        self.mappings.push(Arc::new(mapping));
+        log::info!("xhci: map {:#x} -> {:#x}", mapping.base_address, address);
+        mapper.mappings.push((mapping, 1));
         NonZeroUsize::new_unchecked(address)
     }
 
     fn unmap(&mut self, _virt_start: usize, _bytes: usize) {
-        // TODO
+        // let mut mapper = self.mapper.lock();
+        // let index = mapper.mappings.iter().position(|(mapping, _)| {
+        //     virt_start >= mapping.base_address
+        //         && virt_start + bytes
+        //             <= mapping.base_address + mapping.page_count * mapping.page_size
+        // });
+
+        // if let Some(index) = index {
+        //     let (entry, refcount) = &mut mapper.mappings[index];
+        //     *refcount -= 1;
+        //     if *refcount == 0 {
+        //         log::info!("xhci: unmap {:#x}", entry.base_address);
+        //         mapper.mappings.remove(index);
+        //     }
+        // }
     }
 }
 
diff --git a/kernel/driver/usb/xhci/src/ring/command.rs b/kernel/driver/usb/xhci/src/ring/command.rs
index 6c626e79..9ce1d16f 100644
--- a/kernel/driver/usb/xhci/src/ring/command.rs
+++ b/kernel/driver/usb/xhci/src/ring/command.rs
@@ -15,7 +15,6 @@ use libk_util::{
     sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock},
     waker::QueueWaker,
 };
-use xhci_lib::context;
 use ygg_driver_usb::error::UsbError;
 use yggdrasil_abi::define_bitfields;
 
@@ -110,14 +109,18 @@ impl CommandRing {
         inner.enqueue(trb)
     }
 
-    pub async fn address_device<E: CommandExecutor, const N: usize>(
+    pub async fn address_device<E: CommandExecutor>(
         &self,
         executor: &E,
         slot_id: u8,
-        input: &mut PageBox<context::Input<N>>,
+        cx_physical_address: PhysicalAddress,
+        bsr: bool,
     ) -> Result<(), UsbError> {
-        self.submit_and_wait(executor, AddressDeviceCommandTrb::new(input, slot_id))
-            .await?;
+        self.submit_and_wait(
+            executor,
+            AddressDeviceCommandTrb::new(cx_physical_address, slot_id, bsr),
+        )
+        .await?;
         Ok(())
     }
 
@@ -125,15 +128,32 @@ impl CommandRing {
         &self,
         executor: &E,
         slot_id: u8,
-        input: &mut PageBox<context::Input<8>>,
+        cx_physical_address: PhysicalAddress,
     ) -> Result<(), UsbError> {
-        self.submit_and_wait(executor, ConfigureEndpointCommandTrb::new(input, slot_id))
-            .await?;
+        self.submit_and_wait(
+            executor,
+            ConfigureEndpointCommandTrb::new(cx_physical_address, slot_id),
+        )
+        .await?;
         Ok(())
     }
 
-    pub async fn enable_slot<E: CommandExecutor>(&self, executor: &E) -> Result<u8, UsbError> {
-        self.submit_and_wait(executor, EnableSlotCommandTrb::new())
+    pub async fn enable_slot<E: CommandExecutor>(
+        &self,
+        executor: &E,
+        slot_type: u8,
+    ) -> Result<u8, UsbError> {
+        self.submit_and_wait(executor, EnableSlotCommandTrb::new(slot_type))
+            .await
+    }
+
+    #[allow(unused)]
+    pub async fn disable_slot<E: CommandExecutor>(
+        &self,
+        executor: &E,
+        slot_id: u8,
+    ) -> Result<u8, UsbError> {
+        self.submit_and_wait(executor, DisableSlotCommandTrb::new(slot_id))
             .await
     }
 
@@ -153,6 +173,7 @@ impl CommandRing {
                 } else {
                     Poll::Ready(Err(UsbError::HostControllerCommandFailed(
                         status.completion_code,
+                        status.completion_parameter,
                     )))
                 }
             } else {
@@ -181,8 +202,15 @@ define_bitfields! {
     }
 }
 
+define_bitfields! {
+    pub DisableSlotCommandFlags : u32 {
+        (24..32) => slot_id
+    }
+}
+
 define_bitfields! {
     pub AddressDeviceCommandFlags : u32 {
+        9 => bsr + set_bsr,
         (24..32) => slot_id
     }
 }
@@ -207,6 +235,13 @@ pub struct EnableSlotCommandTrb {
     pub flags: EnableSlotCommandFlags,
 }
 
+#[derive(Clone, Copy, Debug, Pod, Zeroable)]
+#[repr(C, align(16))]
+pub struct DisableSlotCommandTrb {
+    _0: [u32; 3],
+    pub flags: DisableSlotCommandFlags,
+}
+
 #[derive(Clone, Copy, Debug, Pod, Zeroable)]
 #[repr(C, align(16))]
 pub struct AddressDeviceCommandTrb {
@@ -233,6 +268,7 @@ pub struct RawCommandTrb {
 #[derive(Debug, Clone, Copy)]
 pub struct CommandReply {
     pub completion_code: u8,
+    pub completion_parameter: u32,
     pub slot_id: u8,
 }
 
@@ -241,34 +277,37 @@ pub trait CommandTrb: Pod + fmt::Debug {
 }
 
 impl EnableSlotCommandTrb {
-    pub fn new() -> Self {
+    pub fn new(slot_type: u8) -> Self {
         Self {
             _0: [0; 3],
-            flags: EnableSlotCommandFlags::new(0),
+            flags: EnableSlotCommandFlags::new(slot_type as u32),
+        }
+    }
+}
+
+impl DisableSlotCommandTrb {
+    pub fn new(slot_id: u8) -> Self {
+        Self {
+            _0: [0; 3],
+            flags: DisableSlotCommandFlags::new(slot_id as u32),
         }
     }
 }
 
 impl AddressDeviceCommandTrb {
-    pub fn new<const N: usize>(
-        input_context: &mut PageBox<context::Input<N>>,
-        slot_id: u8,
-    ) -> Self {
+    pub fn new(input_context_address: PhysicalAddress, slot_id: u8, bsr: bool) -> Self {
         Self {
-            input_context_address: unsafe { input_context.as_physical_address() },
+            input_context_address,
             _0: 0,
-            flags: AddressDeviceCommandFlags::new(slot_id as _),
+            flags: AddressDeviceCommandFlags::new(bsr, slot_id as _),
         }
     }
 }
 
 impl ConfigureEndpointCommandTrb {
-    pub fn new<const N: usize>(
-        input_context: &mut PageBox<context::Input<N>>,
-        slot_id: u8,
-    ) -> Self {
+    pub fn new(input_context_address: PhysicalAddress, slot_id: u8) -> Self {
         Self {
-            input_context_address: unsafe { input_context.as_physical_address() },
+            input_context_address,
             _0: 0,
             flags: ConfigureEndpointCommandFlags::new(slot_id as _),
         }
@@ -279,6 +318,10 @@ impl CommandTrb for EnableSlotCommandTrb {
     const TRB_TYPE: u8 = 9;
 }
 
+impl CommandTrb for DisableSlotCommandTrb {
+    const TRB_TYPE: u8 = 10;
+}
+
 impl CommandTrb for AddressDeviceCommandTrb {
     const TRB_TYPE: u8 = 11;
 }
diff --git a/kernel/driver/usb/xhci/src/ring/event.rs b/kernel/driver/usb/xhci/src/ring/event.rs
index 10785ab0..a5a2f4a8 100644
--- a/kernel/driver/usb/xhci/src/ring/event.rs
+++ b/kernel/driver/usb/xhci/src/ring/event.rs
@@ -218,6 +218,7 @@ impl RawEventTrb {
                     address: command.address,
                     reply: CommandReply {
                         completion_code: command.status.completion_code() as _,
+                        completion_parameter: command.status.completion_parameter(),
                         slot_id: command.flags.slot_id() as _,
                     },
                 })
diff --git a/kernel/driver/usb/xhci/src/ring/transfer.rs b/kernel/driver/usb/xhci/src/ring/transfer.rs
index 34781f98..af539948 100644
--- a/kernel/driver/usb/xhci/src/ring/transfer.rs
+++ b/kernel/driver/usb/xhci/src/ring/transfer.rs
@@ -7,11 +7,13 @@ use alloc::{collections::BTreeMap, sync::Arc, vec::Vec};
 use bytemuck::{Pod, Zeroable};
 use libk_mm::{
     address::{AsPhysicalAddress, PhysicalAddress},
-    PageBox,
+    PageBox, PageSlice,
 };
 use libk_util::sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock, IrqSafeSpinlockGuard};
 use ygg_driver_usb::{
-    communication::UsbInterruptTransfer, error::UsbError, pipe::control::ControlTransferSetup,
+    communication::{UsbBulkTransfer, UsbInterruptTransfer},
+    error::UsbError,
+    pipe::control::ControlTransferSetup,
     UsbControlTransfer, UsbDirection, UsbTransferStatus, UsbTransferToken,
 };
 use yggdrasil_abi::define_bitfields;
@@ -55,6 +57,29 @@ pub struct InterruptInTransferRing {
     shutdown: AtomicBool,
 }
 
+pub struct BulkInTransferRing {
+    inner: IrqSafeSpinlock<TransferRingInner>,
+    capacity: usize,
+
+    completions: IrqSafeRwLock<BTreeMap<PhysicalAddress, Arc<UsbTransferStatus>>>,
+
+    slot_id: u8,
+    ep_id: u8,
+
+    shutdown: AtomicBool,
+}
+pub struct BulkOutTransferRing {
+    inner: IrqSafeSpinlock<TransferRingInner>,
+    capacity: usize,
+
+    completions: IrqSafeRwLock<BTreeMap<PhysicalAddress, Arc<UsbTransferStatus>>>,
+
+    slot_id: u8,
+    ep_id: u8,
+
+    shutdown: AtomicBool,
+}
+
 struct TransferBuilder<'a> {
     ring: &'a ControlTransferRing,
     ring_inner: IrqSafeSpinlockGuard<'a, TransferRingInner>,
@@ -248,6 +273,96 @@ impl GenericTransferRing for InterruptInTransferRing {
     }
 }
 
+impl GenericRing for BulkInTransferRing {
+    fn base(&self) -> PhysicalAddress {
+        unsafe { self.inner.lock().trbs.as_physical_address() }
+    }
+
+    fn capacity(&self) -> usize {
+        self.capacity
+    }
+}
+
+impl GenericTransferRing for BulkInTransferRing {
+    fn dequeue_pointer(&self) -> PhysicalAddress {
+        let inner = self.inner.lock();
+        unsafe { inner.trbs.as_physical_address() }
+            .add(inner.dequeue_index * size_of::<RawTransferTrb>())
+    }
+
+    fn notify(&self, address: PhysicalAddress, value: u32) {
+        if value == 0 {
+            return;
+        }
+
+        let mut completions = self.completions.write();
+        if let Some(status) = completions.remove(&address) {
+            status.signal(value);
+        }
+    }
+
+    fn shutdown(&self) {
+        self.shutdown.store(true, Ordering::Release);
+        let mut completions = self.completions.write();
+        while let Some((_, status)) = completions.pop_first() {
+            status.abort();
+        }
+    }
+
+    fn slot_id(&self) -> u8 {
+        self.slot_id
+    }
+
+    fn endpoint_id(&self) -> u8 {
+        self.ep_id
+    }
+}
+
+impl GenericRing for BulkOutTransferRing {
+    fn capacity(&self) -> usize {
+        self.capacity
+    }
+
+    fn base(&self) -> PhysicalAddress {
+        unsafe { self.inner.lock().trbs.as_physical_address() }
+    }
+}
+
+impl GenericTransferRing for BulkOutTransferRing {
+    fn dequeue_pointer(&self) -> PhysicalAddress {
+        let inner = self.inner.lock();
+        unsafe { inner.trbs.as_physical_address() }
+            .add(inner.dequeue_index * size_of::<RawTransferTrb>())
+    }
+
+    fn shutdown(&self) {
+        self.shutdown.store(true, Ordering::Release);
+        let mut completions = self.completions.write();
+        while let Some((_, status)) = completions.pop_first() {
+            status.abort();
+        }
+    }
+
+    fn notify(&self, address: PhysicalAddress, value: u32) {
+        if value == 0 {
+            return;
+        }
+
+        let mut completions = self.completions.write();
+        if let Some(status) = completions.remove(&address) {
+            status.signal(value);
+        }
+    }
+
+    fn endpoint_id(&self) -> u8 {
+        self.ep_id
+    }
+
+    fn slot_id(&self) -> u8 {
+        self.slot_id
+    }
+}
+
 impl InterruptInTransferRing {
     pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result<Self, UsbError> {
         let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?;
@@ -305,6 +420,120 @@ impl InterruptInTransferRing {
     }
 }
 
+impl BulkInTransferRing {
+    pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result<Self, UsbError> {
+        let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?;
+
+        Ok(Self {
+            inner: IrqSafeSpinlock::new(TransferRingInner {
+                trbs,
+                enqueue_index: 0,
+                dequeue_index: 0,
+                cycle_bit: true,
+            }),
+            completions: IrqSafeRwLock::new(BTreeMap::new()),
+            slot_id,
+            ep_id,
+            capacity,
+            shutdown: AtomicBool::new(false),
+        })
+    }
+
+    pub fn start_transfer<E: CommandExecutor>(
+        &self,
+        executor: &E,
+        buffer: &mut PageSlice<u8>,
+    ) -> Result<UsbBulkTransfer, UsbError> {
+        // Don't even try to start the transfer
+        if self.shutdown.load(Ordering::Acquire) {
+            return Err(UsbError::DeviceDisconnected);
+        }
+
+        let status = Arc::new(UsbTransferStatus::new());
+        let address = self.inner.lock().enqueue(NormalTransferTrb::new(
+            unsafe { buffer.as_physical_address() },
+            buffer.len(),
+            true,
+        ));
+        self.completions.write().insert(address, status.clone());
+
+        executor.ring_doorbell(self.slot_id as _, self.ep_id);
+
+        Ok(UsbBulkTransfer {
+            length: buffer.len(),
+            direction: UsbDirection::In,
+            address,
+            status,
+        })
+    }
+
+    pub fn complete_transfer(&self, _transfer: UsbBulkTransfer) {
+        // Interrupt transfers consist of one TRB each
+        // TODO: Can two transfers happen simultaneously? e.g.
+        //
+        // [TRBa, TRBb] are queued in the ring, both are executing and
+        // TRBb finishes first
+        self.inner.lock().advance();
+    }
+}
+
+impl BulkOutTransferRing {
+    pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result<Self, UsbError> {
+        let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?;
+
+        Ok(Self {
+            inner: IrqSafeSpinlock::new(TransferRingInner {
+                trbs,
+                enqueue_index: 0,
+                dequeue_index: 0,
+                cycle_bit: true,
+            }),
+            completions: IrqSafeRwLock::new(BTreeMap::new()),
+            slot_id,
+            ep_id,
+            capacity,
+            shutdown: AtomicBool::new(false),
+        })
+    }
+
+    pub fn start_transfer<E: CommandExecutor>(
+        &self,
+        executor: &E,
+        buffer: &PageSlice<u8>,
+    ) -> Result<UsbBulkTransfer, UsbError> {
+        // Don't even try to start the transfer
+        if self.shutdown.load(Ordering::Acquire) {
+            return Err(UsbError::DeviceDisconnected);
+        }
+
+        let status = Arc::new(UsbTransferStatus::new());
+        let address = self.inner.lock().enqueue(NormalTransferTrb::new(
+            unsafe { buffer.as_physical_address() },
+            buffer.len(),
+            true,
+        ));
+        self.completions.write().insert(address, status.clone());
+
+        executor.ring_doorbell(self.slot_id as _, self.ep_id);
+
+        Ok(UsbBulkTransfer {
+            direction: UsbDirection::Out,
+            length: buffer.len(),
+            address,
+            status,
+        })
+    }
+
+    pub fn complete_transfer(&self, _transfer: UsbBulkTransfer) {
+        // Interrupt transfers consist of one TRB each
+        // TODO: Can two transfers happen simultaneously? e.g.
+        //
+        // [TRBa, TRBb] are queued in the ring, both are executing and
+        // TRBb finishes first
+        self.inner.lock().advance();
+    }
+}
+
 impl ControlTransferRing {
     pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result<Self, UsbError> {
         let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?;
@@ -328,6 +557,14 @@ impl ControlTransferRing {
         })
     }
 
+    pub fn reset(&self) {
+        let mut inner = self.inner.lock();
+        self.transfer_id.store(0, Ordering::Release);
+        inner.enqueue_index = 0;
+        inner.dequeue_index = 0;
+        inner.cycle_bit = true;
+    }
+
     pub fn start_transfer<E: CommandExecutor>(
         &self,
         executor: &E,
diff --git a/kernel/driver/usb/xhci/src/util.rs b/kernel/driver/usb/xhci/src/util.rs
new file mode 100644
index 00000000..60da8fa2
--- /dev/null
+++ b/kernel/driver/usb/xhci/src/util.rs
@@ -0,0 +1,78 @@
+use core::{
+    future::poll_fn,
+    sync::atomic::{AtomicU64, Ordering},
+    task::Poll,
+};
+
+use futures_util::task::AtomicWaker;
+
+pub struct EventBitmap {
+    bitmap: AtomicU64,
+    waker: AtomicWaker,
+}
+
+struct BitIter {
+    mask: u64,
+    index: usize,
+    limit: usize,
+}
+
+impl Iterator for BitIter {
+    type Item = usize;
+
+    fn next(&mut self) -> Option<Self::Item> {
+        while self.index < self.limit {
+            let bit = self.index;
+            self.index += 1;
+
+            if self.mask & (1 << bit) != 0 {
+                return Some(bit);
+            }
+        }
+        None
+    }
+}
+
+impl EventBitmap {
+    pub fn new() -> Self {
+        Self {
+            bitmap: AtomicU64::new(0),
+            waker: AtomicWaker::new(),
+        }
+    }
+
+    pub fn signal(&self, bit: usize) {
+        self.bitmap.fetch_or(1 << bit, Ordering::Release);
+        self.waker.wake();
+    }
+
+    pub async fn wait_specific(&self, bit: usize) {
+        poll_fn(|cx| {
+            let state = self.bitmap.fetch_and(!(1 << bit), Ordering::Acquire);
+            if state & (1 << bit) != 0 {
+                Poll::Ready(())
+            } else {
+                self.waker.register(cx.waker());
+                Poll::Pending
+            }
+        })
+        .await
+    }
+
+    pub async fn wait_any(&self, max: usize) -> impl Iterator<Item = usize> {
+        poll_fn(|cx| {
+            let mask = self.bitmap.swap(0, Ordering::Acquire);
+            if mask == 0 {
+                self.waker.register(cx.waker());
+                return Poll::Pending;
+            }
+
+            Poll::Ready(BitIter {
+                mask,
+                index: 0,
+                limit: max,
+            })
+        })
+        .await
+    }
+}
diff --git a/kernel/libk/src/fs/devfs.rs b/kernel/libk/src/fs/devfs.rs
index 7402cda6..861d9e38 100644
--- a/kernel/libk/src/fs/devfs.rs
+++ b/kernel/libk/src/fs/devfs.rs
@@ -9,7 +9,7 @@ use crate::{
     vfs::{
         impls::{fixed_path_symlink, MemoryDirectory},
         path::OwnedFilename,
-        AccessToken, Metadata, Node, NodeFlags, NodeRef,
+        AccessToken, Filename, Metadata, Node, NodeFlags, NodeRef,
     },
 };
 
@@ -41,6 +41,13 @@ pub fn redirect<S: AsRef<str>>(name: S, destination: &str) -> Result<(), Error>
     root.add_child(filename, fixed_path_symlink(destination))
 }
 
+pub fn remove_node<S: AsRef<str>>(name: S) -> Result<(), Error> {
+    let name = name.as_ref();
+    let root = DEVFS_ROOT.get();
+    let filename = Filename::new(name)?;
+    root.remove_file(filename, unsafe { AccessToken::authorized() })
+}
+
 /// Adds a character device with a custom name
 pub fn add_named_char_device<S: AsRef<str>>(
     dev: Arc<dyn CharDevice>,
diff --git a/kernel/libk/src/task/runtime/task.rs b/kernel/libk/src/task/runtime/task.rs
index 76cec2eb..3a39c1b6 100644
--- a/kernel/libk/src/task/runtime/task.rs
+++ b/kernel/libk/src/task/runtime/task.rs
@@ -1,4 +1,4 @@
-use core::fmt;
+use core::{fmt, sync::atomic::AtomicBool};
 
 use alloc::sync::Arc;
 use futures_util::{future::BoxFuture, task::ArcWake, Future, FutureExt};
@@ -12,6 +12,7 @@ pub trait Termination {
 
 pub struct Task {
     pub(super) future: IrqSafeSpinlock<Option<BoxFuture<'static, ()>>>,
+    pub(super) enqueued: AtomicBool,
 }
 
 impl ArcWake for Task {
@@ -28,7 +29,10 @@ impl Task {
             }
             .boxed(),
         ));
-        Arc::new(Self { future })
+        Arc::new(Self {
+            future,
+            enqueued: AtomicBool::new(false),
+        })
     }
 }
 
diff --git a/kernel/libk/src/task/runtime/task_queue.rs b/kernel/libk/src/task/runtime/task_queue.rs
index 45b0479e..b9d205e7 100644
--- a/kernel/libk/src/task/runtime/task_queue.rs
+++ b/kernel/libk/src/task/runtime/task_queue.rs
@@ -1,3 +1,5 @@
+use core::sync::atomic::Ordering;
+
 use alloc::sync::Arc;
 use crossbeam_queue::ArrayQueue;
 use libk_util::{sync::IrqGuard, OneTimeInit};
@@ -31,9 +33,13 @@ impl TaskQueue {
     }
 
     pub fn enqueue(&self, task: Arc<Task>) -> Result<(), Error> {
+        // Already enqueued
+        if task.enqueued.swap(true, Ordering::Acquire) {
+            return Ok(());
+        }
         let _irq = IrqGuard::acquire();
         if self.task_queue.push(task).is_err() {
-            todo!();
+            return Err(Error::WouldBlock);
         }
         self.wakeup_one();
         Ok(())
@@ -44,6 +50,7 @@ impl TaskQueue {
         // assert!(PlatformImpl::interrupt_mask());
         loop {
             if let Some(task) = self.task_queue.pop() {
+                task.enqueued.store(false, Ordering::Release);
                 return Ok(task);
             }
 
@@ -59,7 +66,7 @@ impl TaskQueue {
 
 /// Initializes the global async/await task queue
 pub fn init_task_queue() {
-    TASK_QUEUE.init(TaskQueue::new(128));
+    TASK_QUEUE.init(TaskQueue::new(256));
 }
 
 pub(super) fn push_task(task: Arc<Task>) -> Result<(), Error> {
diff --git a/kernel/src/arch/x86/mod.rs b/kernel/src/arch/x86/mod.rs
index c76ce0bf..730d15cf 100644
--- a/kernel/src/arch/x86/mod.rs
+++ b/kernel/src/arch/x86/mod.rs
@@ -53,14 +53,14 @@ pub enum SelectedClockSource {
 // TODO move this to some sort of .init_array-style implicit thing
 pub fn register_pci_drivers() {
     // XXX: Only works with MSI-X, so no i686
-    #[cfg(any(target_arch = "x86_64", rust_analyzer))]
-    ygg_driver_pci::register_class_driver(
-        "NVMe Host Controller",
-        0x01,
-        Some(0x08),
-        Some(0x02),
-        ygg_driver_nvme::probe,
-    );
+    // #[cfg(any(target_arch = "x86_64", rust_analyzer))]
+    // ygg_driver_pci::register_class_driver(
+    //     "NVMe Host Controller",
+    //     0x01,
+    //     Some(0x08),
+    //     Some(0x02),
+    //     ygg_driver_nvme::probe,
+    // );
     // XXX: i686 hangs in interrupt handler
     #[cfg(any(target_arch = "x86_64", rust_analyzer))]
     ygg_driver_pci::register_class_driver(