diff --git a/kernel/.gitea/workflows/kernel.yaml b/kernel/.gitea/workflows/kernel.yaml new file mode 100644 index 00000000..cd391311 --- /dev/null +++ b/kernel/.gitea/workflows/kernel.yaml @@ -0,0 +1,51 @@ +name: Kernel tests +run_name: Kernel tests +on: [pull_request] + +jobs: + Test-x86_64-Build: + runs-on: ubuntu-latest + steps: + - name: Checkout kernel sources + uses: actions/checkout@v3 + - name: Install build dependencies + run: | + apt update && apt install -y nasm gcc + - name: Install nightly Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y --default-toolchain nightly + source "$HOME/.cargo/env" + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + - name: Update dependencies + run: | + source "$HOME/.cargo/env" + cd ${{ gitea.workspace }} + cargo update yggdrasil-abi elf + - name: Build x86-64 + run: | + source "$HOME/.cargo/env" + cd ${{ gitea.workspace }} + cargo build -Z build-std=core,alloc,compiler_builtins --target=etc/x86_64-unknown-none.json + Test-aarch64-Build: + runs-on: ubuntu-latest + steps: + - name: Checkout kernel sources + uses: actions/checkout@v3 + - name: Install build dependencies + run: | + apt update && apt install -y nasm gcc + - name: Install nightly Rust toolchain + run: | + curl --proto '=https' --tlsv1.2 -sSf https://sh.rustup.rs | bash -s -- -y --default-toolchain nightly + source "$HOME/.cargo/env" + rustup component add rust-src --toolchain nightly-x86_64-unknown-linux-gnu + - name: Update dependencies + run: | + source "$HOME/.cargo/env" + cd ${{ gitea.workspace }} + cargo update yggdrasil-abi elf + - name: Build aarch64 + run: | + source "$HOME/.cargo/env" + cd ${{ gitea.workspace }} + cargo build -Z build-std=core,alloc,compiler_builtins --target=etc/aarch64-unknown-qemu.json diff --git a/kernel/.gitignore b/kernel/.gitignore new file mode 100644 index 00000000..ea8c4bf7 --- /dev/null +++ b/kernel/.gitignore @@ -0,0 +1 @@ +/target diff --git a/kernel/Cargo.toml b/kernel/Cargo.toml new file mode 100644 index 00000000..c4dcee1f --- /dev/null +++ b/kernel/Cargo.toml @@ -0,0 +1,82 @@ +[package] +name = "yggdrasil-kernel" +version = "0.1.0" +edition = "2021" +build = "build.rs" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html +[profile.dev.package.tock-registers] +opt-level = 3 + +[dependencies] +abi-lib = { git = "https://git.alnyan.me/yggdrasil/abi-generator.git" } + +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +vfs = { path = "lib/vfs" } +device-api = { path = "lib/device-api", features = ["derive"] } +libk = { path = "libk" } +libk-util = { path = "libk/libk-util" } +libk-mm = { path = "libk/libk-mm" } +libk-thread = { path = "libk/libk-thread" } +libk-device = { path = "libk/libk-device" } +memtables = { path = "lib/memtables" } +vmalloc = { path = "lib/vmalloc" } +device-api-macros = { path = "lib/device-api/macros" } + +kernel-arch = { path = "arch" } + +# Drivers +ygg_driver_pci = { path = "driver/bus/pci" } +ygg_driver_usb = { path = "driver/bus/usb" } +ygg_driver_block = { path = "driver/block/core" } +ygg_driver_net_core = { path = "driver/net/core" } +ygg_driver_net_loopback = { path = "driver/net/loopback" } +ygg_driver_virtio_net = { path = "driver/virtio/net", features = ["pci"] } +ygg_driver_ahci = { path = "driver/block/ahci" } +ygg_driver_usb_xhci = { path = "driver/usb/xhci" } +ygg_driver_input = { path = "driver/input" } + +kernel-fs = { path = "driver/fs/kernel-fs" } +memfs = { path = "driver/fs/memfs" } + +atomic_enum = "0.2.0" +bitflags = "2.3.3" +linked_list_allocator = "0.10.5" +spinning_top = "0.2.5" +static_assertions = "1.1.0" +tock-registers = "0.8.1" +cfg-if = "1.0.0" +git-version = "0.3.5" +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } +crossbeam-queue = { version = "0.3.8", default-features = false, features = ["alloc"] } +bytemuck = { version = "1.14.0", features = ["derive"] } + +[dependencies.elf] +version = "0.7.2" +git = "https://git.alnyan.me/yggdrasil/yggdrasil-elf.git" +default-features = false +features = ["no_std_stream"] + +[target.'cfg(target_arch = "aarch64")'.dependencies] +aarch64-cpu = "9.3.1" +device-tree = { path = "lib/device-tree" } +kernel-arch-aarch64 = { path = "arch/aarch64" } + +[target.'cfg(target_arch = "x86_64")'.dependencies] +yboot-proto = { git = "https://git.alnyan.me/yggdrasil/yboot-proto.git" } +aml = { git = "https://github.com/alnyan/acpi.git", branch = "acpi-system" } +acpi_lib = { git = "https://github.com/alnyan/acpi.git", package = "acpi", branch = "acpi-system" } +acpi-system = { git = "https://github.com/alnyan/acpi-system.git" } +ygg_driver_nvme = { path = "driver/block/nvme" } +kernel-arch-x86_64 = { path = "arch/x86_64" } + +[build-dependencies] +prettyplease = "0.2.15" +yggdrasil-abi-def = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi-def.git" } +abi-generator = { git = "https://git.alnyan.me/yggdrasil/abi-generator.git" } + +[features] +default = ["fb_console"] +fb_console = [] diff --git a/kernel/README.md b/kernel/README.md new file mode 100644 index 00000000..72fe1ac9 --- /dev/null +++ b/kernel/README.md @@ -0,0 +1,87 @@ +yggdrasil-kernel +================ + +Rust Unix-like operating system kernel. + +See also: + +* [ABI for kernel-user communication](https://git.alnyan.me/yggdrasil-abi) +* [Rust fork to use with the kernel](https://git.alnyan.me/yggdrasil/yggdrasil-rust) +* [Userspace programs](https://git.alnyan.me/yggdrasil/yggdrasil-userspace) +* [yboot — x86-64 UEFI bootloader](https://git.alnyan.me/yggdrasil/yboot) + +Main features +------------- + +* Architecture support: [aarch64](/src/arch/aarch64) and [x86_64](/src/arch/x86_64) +* Kernel/userspace preemptive multithreading +* Kernel-space multitasking with `async`/`await` runtime +* Symmetric Multiprocessing +* Unix-like virtual filesystem: + files, directories, block/char devices, symlinks, mounts +* In-memory read-write filesystem for tar-based initrd +* sysfs/devfs +* Binary formats: ELF + `#!/...` shebangs +* Rust-style interfaces for most of the stuff like memory management, devices etc. + +aarch64-specific: + +* PSCI for SMP start-up and power control +* PL011 serial port +* ARM generic timer as system/monotonic timer +* GICv2 IRQ controller + +x86_64-specific: + +* UEFI boot through [yboot](https://git.alnyan.me/yggdrasil/yboot) + (no plans for legacy boot) +* PCIe, with plans to extend to aarch64 as well + * NVMe drive support (read/write) + * AHCI SATA drive support (read/write) +* I/O and Local APIC IRQ controllers +* PS/2 keyboard, +* i8253-based timer (got some problems with HPET on + real hw, had to revert, lol) +* COM ports +* ACPI, [work in progress](https://github.com/rust-osdev/acpi), mostly broken + on real hardware + * ACPI shutdown + * PCI IRQ pin routing + * Events like power button, etc. +* Fancy framebuffer console + +Userspace features: + +* Sanitized system calls better suited for Rust +* Userspace threads +* Synchronization primitives through futex-like interface +* Unix-like signals and exceptions + +General plans (in no particular order) +-------------------------------------- + +* Better unification of architecture code +* `async` for VFS (?) +* PCIe NVMe block device +* PCIe SATA block device +* PCIe XHCI USB devices +* Better algorithms for memory management + +Navigation +---------- + +* `src/arch` — architecture-specific code +* `src/device` — device driver implementations + * `bus` — bus devices like USB, PCIe etc. + * `display` — everything related to graphic displays + * `power` — power and reset controllers + * `serial` — serial transceiver drivers + * `devtree.rs` — stuff related to ARM DeviceTree + * `tty.rs` — Unix-style terminal driver implementation +* `src/fs` — in-kernel filesystems (sysfs/devfs) +* `src/mem` — memory management +* `src/proc` — process information management +* `src/syscall` — system call handling +* `src/task` — kernel and userspace tasks, processes and threads +* `src/util` — utilities used within the kernel +* `src/init.rs` — kernel init thread impl. diff --git a/kernel/arch/Cargo.toml b/kernel/arch/Cargo.toml new file mode 100644 index 00000000..78a41e21 --- /dev/null +++ b/kernel/arch/Cargo.toml @@ -0,0 +1,20 @@ +[package] +name = "kernel-arch" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[target.'cfg(all(target_os = "none", target_arch = "x86_64"))'.dependencies] +kernel-arch-x86_64 = { path = "x86_64" } + +[target.'cfg(all(target_os = "none", target_arch = "aarch64"))'.dependencies] +kernel-arch-aarch64 = { path = "aarch64" } + +[target.'cfg(not(target_os = "none"))'.dependencies] +kernel-arch-hosted = { path = "hosted" } + +[dependencies] +kernel-arch-interface = { path = "interface" } + +cfg-if = "1.0.0" diff --git a/kernel/arch/aarch64/Cargo.toml b/kernel/arch/aarch64/Cargo.toml new file mode 100644 index 00000000..839d6ff0 --- /dev/null +++ b/kernel/arch/aarch64/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "kernel-arch-aarch64" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +kernel-arch-interface = { path = "../interface" } +libk-mm-interface = { path = "../../libk/libk-mm/interface" } +memtables = { path = "../../lib/memtables" } +device-api = { path = "../../lib/device-api", features = ["derive"] } + +bitflags = "2.3.3" +static_assertions = "1.1.0" +aarch64-cpu = "9.3.1" +tock-registers = "0.8.1" diff --git a/kernel/arch/aarch64/src/context.S b/kernel/arch/aarch64/src/context.S new file mode 100644 index 00000000..581cf4cf --- /dev/null +++ b/kernel/arch/aarch64/src/context.S @@ -0,0 +1,123 @@ +.global __aarch64_enter_task +.global __aarch64_switch_task +.global __aarch64_switch_task_and_drop + +.global __aarch64_task_enter_kernel +.global __aarch64_task_enter_user + +.section .text + +.macro SAVE_TASK_STATE + sub sp, sp, #{context_size} + + stp x19, x20, [sp, #16 * 0] + stp x21, x22, [sp, #16 * 1] + stp x23, x24, [sp, #16 * 2] + stp x25, x26, [sp, #16 * 3] + stp x27, x28, [sp, #16 * 4] + stp x29, x30, [sp, #16 * 5] + + mrs x19, tpidr_el0 + mrs x20, ttbr0_el1 + stp x19, x20, [sp, #16 * 6] +.endm + +.macro LOAD_TASK_STATE + // x19 == tpidr_el0, x20 = ttbr0_el1 + ldp x19, x20, [sp, #16 * 6] + msr tpidr_el0, x19 + msr ttbr0_el1, x20 + + ldp x19, x20, [sp, #16 * 0] + ldp x21, x22, [sp, #16 * 1] + ldp x23, x24, [sp, #16 * 2] + ldp x25, x26, [sp, #16 * 3] + ldp x27, x28, [sp, #16 * 4] + ldp x29, x30, [sp, #16 * 5] + + add sp, sp, #{context_size} +.endm + +__aarch64_task_enter_kernel: + # EL1h, IRQs unmasked + mov x0, #5 + msr spsr_el1, x0 + + # x0 == argument, x1 == entry point + ldp x0, x1, [sp, #0] + msr elr_el1, x1 + + add sp, sp, #16 + + eret + +__aarch64_task_enter_user: + // x0 == sp, x1 == ignored + ldp x0, x1, [sp, #16 * 0] + msr sp_el0, x0 + + # EL0t, IRQs unmasked + msr spsr_el1, xzr + + // x0 == arg, x1 == entry + ldp x0, x1, [sp, #16 * 1] + msr elr_el1, x1 + add sp, sp, #32 + + // Zero the registers + mov x1, xzr + mov x2, xzr + mov x3, xzr + mov x4, xzr + mov x5, xzr + mov x6, xzr + mov x7, xzr + mov x8, xzr + mov x9, xzr + mov x10, xzr + mov x11, xzr + mov x12, xzr + mov x13, xzr + mov x14, xzr + mov x15, xzr + mov x16, xzr + mov x17, xzr + mov x18, xzr + + mov lr, xzr + + dmb ish + isb sy + + eret + +__aarch64_switch_task: + SAVE_TASK_STATE + mov x19, sp + str x19, [x1] + + ldr x0, [x0] + mov sp, x0 + LOAD_TASK_STATE + + ret + +// x0 -- destination context +// x1 -- source (dropped) thread +__aarch64_switch_task_and_drop: + ldr x0, [x0] + mov sp, x0 + + mov x0, x1 + bl __arch_drop_thread + + LOAD_TASK_STATE + + ret + +__aarch64_enter_task: + ldr x0, [x0] + mov sp, x0 + LOAD_TASK_STATE + + ret diff --git a/kernel/arch/aarch64/src/context.rs b/kernel/arch/aarch64/src/context.rs new file mode 100644 index 00000000..503c10d6 --- /dev/null +++ b/kernel/arch/aarch64/src/context.rs @@ -0,0 +1,242 @@ +//! AArch64-specific task context implementation +use core::{arch::global_asm, cell::UnsafeCell, fmt, marker::PhantomData}; + +use kernel_arch_interface::{ + mem::{KernelTableManager, PhysicalMemoryAllocator}, + task::{StackBuilder, TaskContext, TaskFrame}, +}; +use libk_mm_interface::address::PhysicalAddress; +use yggdrasil_abi::{arch::SavedFrame, error::Error}; + +/// Struct for register values saved when taking an exception +#[repr(C)] +pub struct ExceptionFrame { + /// General-purpose registers + pub r: [u64; 32], + /// SPSR_EL1, userspace flags register + pub spsr_el1: u64, + /// ELR_EL1, userspace program counter + pub elr_el1: u64, + /// SP_EL0, userspace stack pointer + pub sp_el0: u64, + _x: u64, + // ... +} + +#[repr(C, align(0x10))] +struct TaskContextInner { + // 0x00 + sp: usize, +} + +/// AArch64 implementation of a task context +#[allow(unused)] +pub struct TaskContextImpl< + K: KernelTableManager, + PA: PhysicalMemoryAllocator
, +> { + inner: UnsafeCell, + stack_base_phys: PhysicalAddress, + stack_size: usize, + + _alloc: PhantomData, + _table_manager: PhantomData, +} + +const COMMON_CONTEXT_SIZE: usize = 8 * 14; + +impl TaskFrame for ExceptionFrame { + fn store(&self) -> SavedFrame { + SavedFrame { + gp_regs: self.r, + spsr_el1: self.spsr_el1, + elr_el1: self.elr_el1, + sp_el0: self.sp_el0, + } + } + + fn restore(&mut self, saved: &SavedFrame) { + self.r = saved.gp_regs; + self.spsr_el1 = saved.spsr_el1; + self.elr_el1 = saved.elr_el1; + self.sp_el0 = saved.sp_el0; + } + + fn argument(&self) -> u64 { + self.r[0] + } + + fn user_ip(&self) -> usize { + self.elr_el1 as _ + } + + fn user_sp(&self) -> usize { + self.sp_el0 as _ + } + + fn set_argument(&mut self, value: u64) { + self.r[0] = value; + } + + fn set_return_value(&mut self, value: u64) { + self.r[0] = value; + } + + fn set_user_ip(&mut self, value: usize) { + self.elr_el1 = value as _; + } + + fn set_user_sp(&mut self, value: usize) { + self.sp_el0 = value as _; + } +} + +impl fmt::Debug for ExceptionFrame { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + for i in (0..32).step_by(2) { + write!( + f, + "x{:<2} = {:#020x}\tx{:<2} = {:#020x}", + i, + self.r[i], + i + 1, + self.r[i + 1] + )?; + if i != 30 { + f.write_str("\n")?; + } + } + + Ok(()) + } +} + +unsafe impl> Sync + for TaskContextImpl +{ +} + +impl> + TaskContext for TaskContextImpl +{ + const USER_STACK_EXTRA_ALIGN: usize = 0; + const SIGNAL_STACK_EXTRA_ALIGN: usize = 0; + + fn kernel(entry: extern "C" fn(usize) -> !, arg: usize) -> Result { + const KERNEL_TASK_PAGES: usize = 8; + let stack_base_phys = PA::allocate_contiguous_pages(KERNEL_TASK_PAGES)?; + let stack_base = stack_base_phys.raw_virtualize::(); + + let mut stack = StackBuilder::new(stack_base, KERNEL_TASK_PAGES * 0x1000); + + // Entry and argument + stack.push(entry as _); + stack.push(arg); + + setup_common_context(&mut stack, __aarch64_task_enter_kernel as _, 0, 0); + + let sp = stack.build(); + + // TODO stack is leaked + + Ok(Self { + inner: UnsafeCell::new(TaskContextInner { sp }), + stack_base_phys, + stack_size: KERNEL_TASK_PAGES * 0x1000, + + _alloc: PhantomData, + _table_manager: PhantomData, + }) + } + + fn user( + entry: usize, + arg: usize, + ttbr0: u64, + user_stack_sp: usize, + tpidr_el0: usize, + ) -> Result { + const USER_TASK_PAGES: usize = 16; + let stack_base_phys = PA::allocate_contiguous_pages(USER_TASK_PAGES)?; + let stack_base = stack_base_phys.raw_virtualize::(); + + let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000); + + stack.push(entry as _); + stack.push(arg); + stack.push(0); + stack.push(user_stack_sp); + + setup_common_context( + &mut stack, + __aarch64_task_enter_user as _, + ttbr0, + tpidr_el0 as _, + ); + + let sp = stack.build(); + + Ok(Self { + inner: UnsafeCell::new(TaskContextInner { sp }), + stack_base_phys, + stack_size: USER_TASK_PAGES * 0x1000, + + _alloc: PhantomData, + _table_manager: PhantomData, + }) + } + + unsafe fn enter(&self) -> ! { + __aarch64_enter_task(self.inner.get()) + } + + unsafe fn switch(&self, from: &Self) { + __aarch64_switch_task(self.inner.get(), from.inner.get()) + } + + unsafe fn switch_and_drop(&self, thread: *const ()) { + __aarch64_switch_task_and_drop(self.inner.get(), thread); + } +} + +impl> Drop + for TaskContextImpl +{ + fn drop(&mut self) { + assert_eq!(self.stack_size % 0x1000, 0); + + for offset in (0..self.stack_size).step_by(0x1000) { + unsafe { + PA::free_page(self.stack_base_phys.add(offset)); + } + } + } +} + +fn setup_common_context(builder: &mut StackBuilder, entry: usize, ttbr0: u64, tpidr_el0: u64) { + builder.push(ttbr0 as _); // ttbr0_el1 + builder.push(tpidr_el0 as _); // tpidr_el0 + + builder.push(entry); // x30/lr + builder.push(0); // x29 + builder.push(0); // x28 + builder.push(0); // x27 + builder.push(0); // x26 + builder.push(0); // x25 + builder.push(0); // x24 + builder.push(0); // x23 + builder.push(0); // x22 + builder.push(0); // x21 + builder.push(0); // x20 + builder.push(0); // x19 +} + +extern "C" { + fn __aarch64_enter_task(to: *mut TaskContextInner) -> !; + fn __aarch64_switch_task(to: *mut TaskContextInner, from: *mut TaskContextInner); + fn __aarch64_switch_task_and_drop(to: *mut TaskContextInner, thread: *const ()) -> !; + fn __aarch64_task_enter_kernel(); + fn __aarch64_task_enter_user(); +} + +global_asm!(include_str!("context.S"), context_size = const COMMON_CONTEXT_SIZE); diff --git a/kernel/arch/aarch64/src/lib.rs b/kernel/arch/aarch64/src/lib.rs new file mode 100644 index 00000000..e5505b9d --- /dev/null +++ b/kernel/arch/aarch64/src/lib.rs @@ -0,0 +1,119 @@ +#![no_std] +#![feature( + effects, + strict_provenance, + asm_const, + naked_functions, + trait_upcasting +)] + +extern crate alloc; + +use core::sync::atomic::{AtomicUsize, Ordering}; + +use aarch64_cpu::registers::{DAIF, MPIDR_EL1, TPIDR_EL1}; +use alloc::{boxed::Box, vec::Vec}; +use device_api::interrupt::{LocalInterruptController, MessageInterruptController}; +use kernel_arch_interface::{ + cpu::{CpuImpl, IpiQueue}, + task::Scheduler, + util::OneTimeInit, + Architecture, +}; +use tock_registers::interfaces::{ReadWriteable, Readable, Writeable}; + +pub mod context; +pub mod mem; + +pub use context::TaskContextImpl; +pub use mem::{process::ProcessAddressSpaceImpl, KernelTableManagerImpl}; + +pub struct ArchitectureImpl; + +pub trait GicInterface: LocalInterruptController {} + +pub struct PerCpuData { + pub gic: OneTimeInit<&'static dyn GicInterface>, +} + +static IPI_QUEUES: OneTimeInit>> = OneTimeInit::new(); +pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(1); + +#[naked] +extern "C" fn idle_task(_: usize) -> ! { + unsafe { + core::arch::asm!("1: nop; b 1b", options(noreturn)); + } +} + +impl ArchitectureImpl { + pub fn local_cpu_data() -> Option<&'static mut PerCpuData> { + unsafe { (Self::local_cpu() as *mut PerCpuData).as_mut() } + } +} + +impl Architecture for ArchitectureImpl { + type PerCpuData = PerCpuData; + + fn cpu_index() -> u32 { + (MPIDR_EL1.get() & 0xFF) as u32 + } + + fn interrupt_mask() -> bool { + DAIF.read(DAIF::I) != 0 + } + + unsafe fn set_interrupt_mask(mask: bool) -> bool { + let old = Self::interrupt_mask(); + if mask { + DAIF.modify(DAIF::I::SET); + } else { + DAIF.modify(DAIF::I::CLEAR); + } + old + } + + fn wait_for_interrupt() { + aarch64_cpu::asm::wfi(); + } + + unsafe fn set_local_cpu(cpu: *mut ()) { + TPIDR_EL1.set(cpu as _); + } + + unsafe fn init_local_cpu(id: Option, data: Self::PerCpuData) { + assert!( + id.is_none(), + "AArch64 uses MPIDR_EL1 instead of manual ID set" + ); + let id = (MPIDR_EL1.get() & 0xFF) as u32; + let cpu = Box::leak(Box::new(CpuImpl::::new(id, data))); + + cpu.set_local(); + } + + fn local_cpu() -> *mut () { + TPIDR_EL1.get() as _ + } + + unsafe fn init_ipi_queues(queues: Vec>) { + IPI_QUEUES.init(queues); + } + + fn idle_task() -> extern "C" fn(usize) -> ! { + idle_task + } + + fn cpu_count() -> usize { + CPU_COUNT.load(Ordering::Acquire) + } + + fn local_interrupt_controller() -> &'static dyn LocalInterruptController { + let local = Self::local_cpu_data().unwrap(); + *local.gic.get() + } + + fn message_interrupt_controller() -> &'static dyn MessageInterruptController { + todo!() + } +} diff --git a/kernel/arch/aarch64/src/mem/mod.rs b/kernel/arch/aarch64/src/mem/mod.rs new file mode 100644 index 00000000..e7fc6ae7 --- /dev/null +++ b/kernel/arch/aarch64/src/mem/mod.rs @@ -0,0 +1,408 @@ +use core::{ + alloc::Layout, + ops::{Deref, DerefMut}, + ptr::addr_of, + sync::atomic::AtomicUsize, + sync::atomic::Ordering, +}; + +use aarch64_cpu::registers::{TTBR0_EL1, TTBR1_EL1}; +use kernel_arch_interface::{ + mem::{DeviceMemoryAttributes, KernelTableManager, RawDeviceMemoryMapping}, + KERNEL_VIRT_OFFSET, +}; +use libk_mm_interface::{ + address::{FromRaw, PhysicalAddress}, + table::{EntryLevel, EntryLevelExt}, + KernelImageObject, +}; +use memtables::aarch64::{FixedTables, KERNEL_L3_COUNT}; +use static_assertions::const_assert_eq; +use tock_registers::interfaces::Writeable; +use yggdrasil_abi::error::Error; + +use self::table::{PageAttributes, PageEntry, PageTable, L1, L2, L3}; + +pub mod process; +pub mod table; + +#[derive(Debug)] +pub struct KernelTableManagerImpl; + +// TODO eliminate this requirement by using precomputed indices +const MAPPING_OFFSET: usize = KERNEL_VIRT_OFFSET; +const KERNEL_PHYS_BASE: usize = 0x40080000; + +// Precomputed mappings +const KERNEL_L1_INDEX: usize = (KERNEL_VIRT_OFFSET + KERNEL_PHYS_BASE).page_index::(); +const KERNEL_START_L2_INDEX: usize = (KERNEL_VIRT_OFFSET + KERNEL_PHYS_BASE).page_index::(); +const KERNEL_END_L2_INDEX: usize = KERNEL_START_L2_INDEX + KERNEL_L3_COUNT; + +// Must not be zero, should be at 4MiB +const_assert_eq!(KERNEL_START_L2_INDEX, 0); +// From static mapping +const_assert_eq!(KERNEL_L1_INDEX, 1); + +// Runtime mappings +// 2MiB max +const EARLY_MAPPING_L2I: usize = KERNEL_END_L2_INDEX + 1; +// 1GiB max +const HEAP_MAPPING_L1I: usize = KERNEL_L1_INDEX + 1; +// 1GiB max +const DEVICE_MAPPING_L1I: usize = KERNEL_L1_INDEX + 2; +const DEVICE_MAPPING_L3_COUNT: usize = 4; +// 16GiB max +const RAM_MAPPING_START_L1I: usize = KERNEL_L1_INDEX + 3; +pub const RAM_MAPPING_L1_COUNT: usize = 16; + +// 2MiB for early mappings +const EARLY_MAPPING_OFFSET: usize = + MAPPING_OFFSET | (KERNEL_L1_INDEX * L1::SIZE) | (EARLY_MAPPING_L2I * L2::SIZE); +static mut EARLY_MAPPING_L3: PageTable = PageTable::zeroed(); +// 1GiB for heap mapping +pub const HEAP_MAPPING_OFFSET: usize = MAPPING_OFFSET | (HEAP_MAPPING_L1I * L1::SIZE); +pub static mut HEAP_MAPPING_L2: PageTable = PageTable::zeroed(); +// 1GiB for device MMIO mapping +const DEVICE_MAPPING_OFFSET: usize = MAPPING_OFFSET | (DEVICE_MAPPING_L1I * L1::SIZE); +static mut DEVICE_MAPPING_L2: PageTable = PageTable::zeroed(); +static mut DEVICE_MAPPING_L3S: [PageTable; DEVICE_MAPPING_L3_COUNT] = + [PageTable::zeroed(); DEVICE_MAPPING_L3_COUNT]; +// 16GiB for RAM mapping +pub const RAM_MAPPING_OFFSET: usize = MAPPING_OFFSET | (RAM_MAPPING_START_L1I * L1::SIZE); +pub static MEMORY_LIMIT: AtomicUsize = AtomicUsize::new(0); + +#[link_section = ".data.tables"] +pub static mut KERNEL_TABLES: KernelImageObject = + unsafe { KernelImageObject::new(FixedTables::zeroed()) }; + +impl KernelTableManager for KernelTableManagerImpl { + fn virtualize(address: u64) -> usize { + let address = address as usize; + if address < MEMORY_LIMIT.load(Ordering::Acquire) { + address + RAM_MAPPING_OFFSET + } else { + panic!("Invalid physical address: {:#x}", address); + } + } + + fn physicalize(address: usize) -> u64 { + if address < RAM_MAPPING_OFFSET + || address - RAM_MAPPING_OFFSET >= MEMORY_LIMIT.load(Ordering::Acquire) + { + panic!("Not a virtualized physical address: {:#x}", address); + } + + (address - RAM_MAPPING_OFFSET) as _ + } + + unsafe fn map_device_pages( + base: u64, + count: usize, + attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + map_device_memory(PhysicalAddress::from_raw(base), count, attrs) + } + + unsafe fn unmap_device_pages(mapping: &RawDeviceMemoryMapping) { + unmap_device_memory(mapping) + } +} + +/// Memory mapping which may be used for performing early kernel initialization +pub struct EarlyMapping<'a, T: ?Sized> { + value: &'a mut T, + page_count: usize, +} + +impl<'a, T: Sized> EarlyMapping<'a, T> { + pub unsafe fn map_slice( + physical: PhysicalAddress, + len: usize, + ) -> Result, Error> { + let layout = Layout::array::(len).unwrap(); + let aligned = physical.page_align_down::(); + let offset = physical.page_offset::(); + let page_count = (offset + layout.size() + L3::SIZE - 1) / L3::SIZE; + + let virt = map_early_pages(aligned, page_count)?; + let value = core::slice::from_raw_parts_mut((virt + offset) as *mut T, len); + + Ok(EarlyMapping { value, page_count }) + } +} + +impl<'a, T: ?Sized> Deref for EarlyMapping<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +impl<'a, T: ?Sized> DerefMut for EarlyMapping<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.value + } +} + +impl<'a, T: ?Sized> Drop for EarlyMapping<'a, T> { + fn drop(&mut self) { + let address = (self.value as *mut T).addr() & !(L3::SIZE - 1); + + for i in 0..self.page_count { + let page = address + i * L3::SIZE; + + unsafe { + unmap_early_page(page); + } + } + } +} + +fn kernel_table_flags() -> PageAttributes { + PageAttributes::TABLE + | PageAttributes::ACCESS + | PageAttributes::SH_INNER + | PageAttributes::PAGE_ATTR_NORMAL + | PageAttributes::PRESENT +} + +fn ram_block_flags() -> PageAttributes { + // TODO UXN, PXN + PageAttributes::BLOCK + | PageAttributes::ACCESS + | PageAttributes::SH_INNER + | PageAttributes::PAGE_ATTR_NORMAL + | PageAttributes::PRESENT +} + +// Early mappings +unsafe fn map_early_pages(physical: PhysicalAddress, count: usize) -> Result { + for l3i in 0..512 { + let mut taken = false; + for i in 0..count { + if EARLY_MAPPING_L3[i + l3i].is_present() { + taken = true; + break; + } + } + + if taken { + continue; + } + + for i in 0..count { + let page = physical.add(i * L3::SIZE); + // TODO NX, NC + EARLY_MAPPING_L3[i + l3i] = PageEntry::normal_page(page, PageAttributes::empty()); + } + + return Ok(EARLY_MAPPING_OFFSET + l3i * L3::SIZE); + } + + Err(Error::OutOfMemory) +} + +unsafe fn unmap_early_page(address: usize) { + if !(EARLY_MAPPING_OFFSET..EARLY_MAPPING_OFFSET + L2::SIZE).contains(&address) { + panic!("Tried to unmap invalid early mapping: {:#x}", address); + } + + let l3i = (address - EARLY_MAPPING_OFFSET).page_index::(); + + assert!(EARLY_MAPPING_L3[l3i].is_present()); + EARLY_MAPPING_L3[l3i] = PageEntry::INVALID; + + // TODO invalidate tlb +} + +pub unsafe fn map_ram_l1(index: usize) { + if index >= RAM_MAPPING_L1_COUNT { + todo!() + } + assert_eq!(KERNEL_TABLES.l1.data[index + RAM_MAPPING_START_L1I], 0); + + KERNEL_TABLES.l1.data[index + RAM_MAPPING_START_L1I] = + ((index * L1::SIZE) as u64) | ram_block_flags().bits(); +} + +pub unsafe fn map_heap_l2(index: usize, page: PhysicalAddress) { + if index >= 512 { + todo!() + } + assert!(!HEAP_MAPPING_L2[index].is_present()); + // TODO UXN, PXN + HEAP_MAPPING_L2[index] = PageEntry::normal_block(page, PageAttributes::empty()); +} + +// Device mappings +unsafe fn map_device_memory_l3( + base: PhysicalAddress, + count: usize, + _attrs: DeviceMemoryAttributes, +) -> Result { + // TODO don't map pages if already mapped + + 'l0: for i in 0..DEVICE_MAPPING_L3_COUNT * 512 { + for j in 0..count { + let l2i = (i + j) / 512; + let l3i = (i + j) % 512; + + if DEVICE_MAPPING_L3S[l2i][l3i].is_present() { + continue 'l0; + } + } + + for j in 0..count { + let l2i = (i + j) / 512; + let l3i = (i + j) % 512; + + // TODO NX, NC + DEVICE_MAPPING_L3S[l2i][l3i] = PageEntry::device_page(base.add(j * L3::SIZE)); + } + + return Ok(DEVICE_MAPPING_OFFSET + i * L3::SIZE); + } + + Err(Error::OutOfMemory) +} + +unsafe fn map_device_memory_l2( + base: PhysicalAddress, + count: usize, + _attrs: DeviceMemoryAttributes, +) -> Result { + 'l0: for i in DEVICE_MAPPING_L3_COUNT..512 { + for j in 0..count { + if DEVICE_MAPPING_L2[i + j].is_present() { + continue 'l0; + } + } + + for j in 0..count { + DEVICE_MAPPING_L2[i + j] = PageEntry::::device_block(base.add(j * L2::SIZE)); + } + + // log::debug!( + // "map l2s: base={:#x}, count={} -> {:#x}", + // base, + // count, + // DEVICE_MAPPING_OFFSET + i * L2::SIZE + // ); + return Ok(DEVICE_MAPPING_OFFSET + i * L2::SIZE); + } + + Err(Error::OutOfMemory) +} + +pub unsafe fn map_device_memory( + base: PhysicalAddress, + size: usize, + attrs: DeviceMemoryAttributes, +) -> Result, Error> { + // debugln!("Map {}B @ {:#x}", size, base); + let l3_aligned = base.page_align_down::(); + let l3_offset = base.page_offset::(); + let page_count = (l3_offset + size).page_count::(); + + if page_count > 256 { + // Large mapping, use L2 mapping instead + let l2_aligned = base.page_align_down::(); + let l2_offset = base.page_offset::(); + let page_count = (l2_offset + size).page_count::(); + + let base_address = map_device_memory_l2(l2_aligned, page_count, attrs)?; + let address = base_address + l2_offset; + + Ok(RawDeviceMemoryMapping::from_raw_parts( + address, + base_address, + page_count, + L2::SIZE, + )) + } else { + // Just map the pages directly + let base_address = map_device_memory_l3(l3_aligned, page_count, attrs)?; + let address = base_address + l3_offset; + + Ok(RawDeviceMemoryMapping::from_raw_parts( + address, + base_address, + page_count, + L3::SIZE, + )) + } +} + +pub unsafe fn unmap_device_memory(map: &RawDeviceMemoryMapping) { + // debugln!( + // "Unmap {}B @ {:#x}", + // map.page_count * map.page_size, + // map.base_address + // ); + match map.page_size { + L3::SIZE => { + for i in 0..map.page_count { + let page = map.base_address + i * L3::SIZE; + let l2i = page.page_index::(); + let l3i = page.page_index::(); + assert!(DEVICE_MAPPING_L3S[l2i][l3i].is_present()); + DEVICE_MAPPING_L3S[l2i][l3i] = PageEntry::INVALID; + + tlb_flush_vaae1(page); + } + } + L2::SIZE => todo!(), + _ => unimplemented!(), + } +} + +#[inline] +pub fn tlb_flush_vaae1(mut page: usize) { + page >>= 12; + unsafe { + core::arch::asm!("tlbi vaae1, {page}", page = in(reg) page); + } +} + +/// (BSP-early init) loads precomputed kernel mapping tables for the kernel to jump to "higher-half" +/// +/// # Safety +/// +/// Unsafe, must only be called by BSP during its early init while still in "lower-half" +pub unsafe fn load_fixed_tables() { + let ttbr0 = KERNEL_TABLES.l1.data.as_ptr() as u64; + TTBR0_EL1.set(ttbr0); + TTBR1_EL1.set(ttbr0); +} + +/// Sets up additional translation tables for kernel usage +/// +/// # Safety +/// +/// Unsafe, must only be called by BSP during its early init, must already be in "higher-half" +pub unsafe fn init_fixed_tables() { + // TODO this could be built in compile-time too? + let early_mapping_l3_phys = addr_of!(EARLY_MAPPING_L3) as usize - KERNEL_VIRT_OFFSET; + let device_mapping_l2_phys = addr_of!(DEVICE_MAPPING_L2) as usize - KERNEL_VIRT_OFFSET; + let heap_mapping_l2_phys = addr_of!(HEAP_MAPPING_L2) as usize - KERNEL_VIRT_OFFSET; + + for i in 0..DEVICE_MAPPING_L3_COUNT { + let device_mapping_l3_phys = PhysicalAddress::from_raw( + &DEVICE_MAPPING_L3S[i] as *const _ as usize - KERNEL_VIRT_OFFSET, + ); + DEVICE_MAPPING_L2[i] = PageEntry::table(device_mapping_l3_phys, PageAttributes::empty()); + } + + assert_eq!(KERNEL_TABLES.l2.data[EARLY_MAPPING_L2I], 0); + KERNEL_TABLES.l2.data[EARLY_MAPPING_L2I] = + (early_mapping_l3_phys as u64) | kernel_table_flags().bits(); + + assert_eq!(KERNEL_TABLES.l1.data[HEAP_MAPPING_L1I], 0); + KERNEL_TABLES.l1.data[HEAP_MAPPING_L1I] = + (heap_mapping_l2_phys as u64) | kernel_table_flags().bits(); + + assert_eq!(KERNEL_TABLES.l1.data[DEVICE_MAPPING_L1I], 0); + KERNEL_TABLES.l1.data[DEVICE_MAPPING_L1I] = + (device_mapping_l2_phys as u64) | kernel_table_flags().bits(); +} diff --git a/kernel/arch/aarch64/src/mem/process.rs b/kernel/arch/aarch64/src/mem/process.rs new file mode 100644 index 00000000..8fb9a4d9 --- /dev/null +++ b/kernel/arch/aarch64/src/mem/process.rs @@ -0,0 +1,156 @@ +//! AArch64-specific process address space management +use core::{ + marker::PhantomData, + sync::atomic::{AtomicU8, Ordering}, +}; + +use libk_mm_interface::{ + address::{AsPhysicalAddress, PhysicalAddress}, + pointer::PhysicalRefMut, + process::ProcessAddressSpaceManager, + table::{ + EntryLevel, EntryLevelDrop, EntryLevelExt, MapAttributes, NextPageTable, TableAllocator, + }, +}; +use yggdrasil_abi::error::Error; + +use crate::{mem::table::PageEntry, KernelTableManagerImpl}; + +use super::{ + table::{PageTable, L1, L2, L3}, + tlb_flush_vaae1, +}; + +/// AArch64 implementation of a process address space table +#[repr(C)] +pub struct ProcessAddressSpaceImpl { + l1: PhysicalRefMut<'static, PageTable, KernelTableManagerImpl>, + asid: u8, + _alloc: PhantomData, +} + +impl ProcessAddressSpaceManager for ProcessAddressSpaceImpl { + const LOWER_LIMIT_PFN: usize = 8; + // 16GiB VM limit + const UPPER_LIMIT_PFN: usize = (16 << 30) / L3::SIZE; + + fn new() -> Result { + static LAST_ASID: AtomicU8 = AtomicU8::new(1); + + let asid = LAST_ASID.fetch_add(1, Ordering::AcqRel); + + let mut l1 = unsafe { + PhysicalRefMut::<'static, PageTable, KernelTableManagerImpl>::map( + TA::allocate_page_table()?, + ) + }; + + for i in 0..512 { + l1[i] = PageEntry::INVALID; + } + + Ok(Self { + l1, + asid, + _alloc: PhantomData, + }) + } + + fn translate(&self, address: usize) -> Result<(PhysicalAddress, MapAttributes), Error> { + self.read_l3_entry(address).ok_or(Error::DoesNotExist) + } + + unsafe fn map_page( + &mut self, + address: usize, + physical: PhysicalAddress, + flags: MapAttributes, + ) -> Result<(), Error> { + self.write_l3_entry( + address, + PageEntry::normal_page(physical, flags.into()), + false, + ) + } + + unsafe fn unmap_page(&mut self, address: usize) -> Result { + self.pop_l3_entry(address) + } + + fn as_address_with_asid(&self) -> u64 { + unsafe { u64::from(self.l1.as_physical_address()) | ((self.asid as u64) << 48) } + } + + unsafe fn clear(&mut self) { + self.l1 + .drop_range::(0..((Self::UPPER_LIMIT_PFN * L3::SIZE).page_index::())); + } +} + +impl ProcessAddressSpaceImpl { + // Write a single 4KiB entry + fn write_l3_entry( + &mut self, + virt: usize, + entry: PageEntry, + overwrite: bool, + ) -> Result<(), Error> { + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + let mut l2 = self.l1.get_mut_or_alloc::(l1i)?; + let mut l3 = l2.get_mut_or_alloc::(l2i)?; + + if l3[l3i].is_present() && !overwrite { + todo!(); + } + + l3[l3i] = entry; + tlb_flush_vaae1(virt); + + Ok(()) + } + + fn pop_l3_entry(&mut self, virt: usize) -> Result { + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + // TODO somehow drop tables if they're known to be empty? + let mut l2 = self.l1.get_mut(l1i).ok_or(Error::DoesNotExist)?; + let mut l3 = l2.get_mut(l2i).ok_or(Error::DoesNotExist)?; + + let page = l3[l3i].as_page().ok_or(Error::DoesNotExist)?; + + l3[l3i] = PageEntry::INVALID; + tlb_flush_vaae1(virt); + + Ok(page) + } + + fn read_l3_entry(&self, virt: usize) -> Option<(PhysicalAddress, MapAttributes)> { + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + let l2 = self.l1.get(l1i)?; + let l3 = l2.get(l2i)?; + + let page = l3[l3i].as_page()?; + + Some((page, l3[l3i].attributes().into())) + } +} + +impl Drop for ProcessAddressSpaceImpl { + fn drop(&mut self) { + // SAFETY: with safe usage of the ProcessAddressSpaceImpl, clearing and dropping + // is safe, no one refers to the memory + unsafe { + self.clear(); + let l1_phys = self.l1.as_physical_address(); + TA::free_page_table(l1_phys); + } + } +} diff --git a/kernel/arch/aarch64/src/mem/table.rs b/kernel/arch/aarch64/src/mem/table.rs new file mode 100644 index 00000000..3a5fe69b --- /dev/null +++ b/kernel/arch/aarch64/src/mem/table.rs @@ -0,0 +1,342 @@ +use core::{ + marker::PhantomData, + ops::{Index, IndexMut, Range}, +}; + +use bitflags::bitflags; +use libk_mm_interface::{ + address::{AsPhysicalAddress, FromRaw, IntoRaw, PhysicalAddress}, + pointer::{PhysicalRef, PhysicalRefMut}, + table::{ + EntryLevel, EntryLevelDrop, MapAttributes, NextPageTable, NonTerminalEntryLevel, + TableAllocator, + }, +}; +use yggdrasil_abi::error::Error; + +use crate::KernelTableManagerImpl; + +bitflags! { + #[derive(Clone, Copy, PartialEq, Eq)] + pub struct PageAttributes: u64 { + const PRESENT = 1 << 0; + + const TABLE = 1 << 1; + const PAGE = 1 << 1; + const BLOCK = 0 << 1; + + const ACCESS = 1 << 10; + + const AP_KERNEL_READWRITE = 0 << 6; + const AP_BOTH_READWRITE = 1 << 6; + const AP_KERNEL_READONLY = 2 << 6; + const AP_BOTH_READONLY = 3 << 6; + const AP_ACCESS_MASK = 3 << 6; + + const SH_OUTER = 2 << 8; + const SH_INNER = 3 << 8; + + const PAGE_ATTR_NORMAL = 0 << 2; + const PAGE_ATTR_DEVICE = 1 << 2; + + const NON_GLOBAL = 1 << 11; + + const PXN = 1 << 53; + const UXN = 1 << 54; + } +} + +#[derive(Clone, Copy)] +#[repr(C, align(0x1000))] +pub struct PageTable { + entries: [PageEntry; 512], +} + +#[derive(Clone, Copy)] +pub struct PageEntry(u64, PhantomData); + +#[derive(Clone, Copy)] +pub struct L1; +#[derive(Clone, Copy)] +pub struct L2; +#[derive(Clone, Copy)] +pub struct L3; + +impl NonTerminalEntryLevel for L1 { + type NextLevel = L2; +} + +impl NonTerminalEntryLevel for L2 { + type NextLevel = L3; +} + +impl EntryLevel for L1 { + const SHIFT: usize = 30; +} + +impl EntryLevel for L2 { + const SHIFT: usize = 21; +} + +impl EntryLevel for L3 { + const SHIFT: usize = 12; +} + +impl PageTable { + pub const fn zeroed() -> Self { + Self { + entries: [PageEntry::INVALID; 512], + } + } + + pub fn new_zeroed<'a, TA: TableAllocator>( + ) -> Result, Error> { + let physical = TA::allocate_page_table()?; + let mut table = + unsafe { PhysicalRefMut::<'a, Self, KernelTableManagerImpl>::map(physical) }; + + for i in 0..512 { + table[i] = PageEntry::INVALID; + } + + Ok(table) + } +} + +impl PageEntry { + pub const INVALID: Self = Self(0, PhantomData); + + pub const fn is_present(self) -> bool { + self.0 & PageAttributes::PRESENT.bits() != 0 + } + + pub fn attributes(self) -> PageAttributes { + PageAttributes::from_bits_retain(self.0) + } +} + +impl NextPageTable for PageTable { + type NextLevel = PageTable; + type TableRef = PhysicalRef<'static, PageTable, KernelTableManagerImpl>; + type TableRefMut = PhysicalRefMut<'static, PageTable, KernelTableManagerImpl>; + + fn get(&self, index: usize) -> Option { + self[index] + .as_table() + .map(|phys| unsafe { PhysicalRef::map(phys) }) + } + + fn get_mut(&mut self, index: usize) -> Option { + self[index] + .as_table() + .map(|phys| unsafe { PhysicalRefMut::map(phys) }) + } + + fn get_mut_or_alloc( + &mut self, + index: usize, + ) -> Result { + let entry = self[index]; + + if let Some(table) = entry.as_table() { + Ok(unsafe { PhysicalRefMut::map(table) }) + } else { + let table = PageTable::new_zeroed::()?; + self[index] = PageEntry::::table( + unsafe { table.as_physical_address() }, + PageAttributes::empty(), + ); + Ok(table) + } + } +} + +impl EntryLevelDrop for PageTable { + const FULL_RANGE: Range = 0..512; + + // Do nothing + unsafe fn drop_range(&mut self, _range: Range) {} +} + +impl EntryLevelDrop for PageTable +where + PageTable: EntryLevelDrop, +{ + const FULL_RANGE: Range = 0..512; + + unsafe fn drop_range(&mut self, range: Range) { + for index in range { + let entry = self[index]; + + if let Some(table) = entry.as_table() { + let mut table_ref: PhysicalRefMut, KernelTableManagerImpl> = + PhysicalRefMut::map(table); + + table_ref.drop_all::(); + + // Drop the table + drop(table_ref); + + TA::free_page_table(table); + } else if entry.is_present() { + // Memory must've been cleared beforehand, so no non-table entries must be present + panic!( + "Expected a table containing only tables, got table[{}] = {:#x?}", + index, entry.0 + ); + } + + self[index] = PageEntry::INVALID; + } + } +} + +impl PageEntry { + pub fn table(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + IntoRaw::::into_raw(phys) + | (PageAttributes::TABLE | PageAttributes::PRESENT | attrs).bits(), + PhantomData, + ) + } + + pub fn normal_block(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + IntoRaw::::into_raw(phys) + | (PageAttributes::BLOCK + | PageAttributes::PRESENT + | PageAttributes::ACCESS + | PageAttributes::SH_INNER + | PageAttributes::PAGE_ATTR_NORMAL + | attrs) + .bits(), + PhantomData, + ) + } + + pub fn device_block(phys: PhysicalAddress) -> Self { + Self( + IntoRaw::::into_raw(phys) + | (PageAttributes::BLOCK + | PageAttributes::PRESENT + | PageAttributes::ACCESS + | PageAttributes::SH_OUTER + | PageAttributes::PAGE_ATTR_DEVICE + | PageAttributes::UXN + | PageAttributes::PXN) + .bits(), + PhantomData, + ) + } + + /// Returns the physical address of the table this entry refers to, returning None if it + /// does not + pub fn as_table(self) -> Option { + if self.0 & PageAttributes::PRESENT.bits() != 0 + && self.0 & PageAttributes::BLOCK.bits() == 0 + { + Some(PhysicalAddress::from_raw(self.0 & !0xFFF)) + } else { + None + } + } +} + +impl PageEntry { + pub fn normal_page(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + IntoRaw::::into_raw(phys) + | (PageAttributes::PAGE + | PageAttributes::PRESENT + | PageAttributes::ACCESS + | PageAttributes::SH_INNER + | PageAttributes::PAGE_ATTR_NORMAL + | attrs) + .bits(), + PhantomData, + ) + } + + pub fn device_page(phys: PhysicalAddress) -> Self { + Self( + IntoRaw::::into_raw(phys) + | (PageAttributes::PAGE + | PageAttributes::PRESENT + | PageAttributes::ACCESS + | PageAttributes::SH_OUTER + | PageAttributes::PAGE_ATTR_DEVICE + | PageAttributes::UXN + | PageAttributes::PXN) + .bits(), + PhantomData, + ) + } + + pub fn as_page(&self) -> Option { + let mask = (PageAttributes::PRESENT | PageAttributes::PAGE).bits(); + if self.0 & mask == mask { + Some(PhysicalAddress::from_raw(self.0 & !0xFFF)) + } else { + None + } + } +} + +impl Index for PageTable { + type Output = PageEntry; + + fn index(&self, index: usize) -> &Self::Output { + &self.entries[index] + } +} + +impl IndexMut for PageTable { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + &mut self.entries[index] + } +} + +impl From for PageAttributes { + fn from(value: MapAttributes) -> Self { + let mut out = PageAttributes::empty(); + // TODO kernel cannot write el0 readonly pages + if value.contains(MapAttributes::USER_WRITE) { + // Read/write + out |= PageAttributes::AP_BOTH_READWRITE; + } else if value.contains(MapAttributes::USER_READ) { + // Read only + out |= PageAttributes::AP_BOTH_READONLY; + } else { + // No read/write + out |= PageAttributes::AP_KERNEL_READONLY; + } + + if value.contains(MapAttributes::NON_GLOBAL) { + out |= PageAttributes::NON_GLOBAL; + } + + out + } +} + +impl From for MapAttributes { + fn from(value: PageAttributes) -> Self { + let mut out = MapAttributes::empty(); + + out |= match value.intersection(PageAttributes::AP_ACCESS_MASK) { + PageAttributes::AP_BOTH_READWRITE => { + MapAttributes::USER_WRITE | MapAttributes::USER_READ + } + PageAttributes::AP_BOTH_READONLY => MapAttributes::USER_READ, + PageAttributes::AP_KERNEL_READONLY => MapAttributes::empty(), + PageAttributes::AP_KERNEL_READWRITE => panic!("This variant cannot be constructed"), + _ => unreachable!(), + }; + + if value.contains(PageAttributes::NON_GLOBAL) { + out |= MapAttributes::NON_GLOBAL; + } + + out + } +} diff --git a/kernel/arch/hosted/Cargo.toml b/kernel/arch/hosted/Cargo.toml new file mode 100644 index 00000000..c9db9191 --- /dev/null +++ b/kernel/arch/hosted/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "kernel-arch-hosted" +version = "0.1.0" +edition = "2021" + +[dependencies] +kernel-arch-interface = { path = "../interface" } +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-mm-interface = { path = "../../libk/libk-mm/interface" } diff --git a/kernel/arch/hosted/src/lib.rs b/kernel/arch/hosted/src/lib.rs new file mode 100644 index 00000000..0a333a5f --- /dev/null +++ b/kernel/arch/hosted/src/lib.rs @@ -0,0 +1,176 @@ +#![feature(never_type)] +use std::{ + marker::PhantomData, + sync::atomic::{AtomicBool, Ordering}, +}; + +use kernel_arch_interface::{ + cpu::IpiQueue, + mem::{ + DeviceMemoryAttributes, KernelTableManager, PhysicalMemoryAllocator, RawDeviceMemoryMapping, + }, + task::{Scheduler, TaskContext}, + Architecture, +}; +use libk_mm_interface::{ + address::PhysicalAddress, + process::ProcessAddressSpaceManager, + table::{MapAttributes, TableAllocator}, +}; +use yggdrasil_abi::{error::Error, process::Signal}; + +pub struct ArchitectureImpl; + +#[derive(Debug)] +pub struct KernelTableManagerImpl; + +pub struct ProcessAddressSpaceImpl(!, PhantomData); + +pub struct TaskContextImpl( + !, + PhantomData<(K, PA)>, +); + +static DUMMY_INTERRUPT_MASK: AtomicBool = AtomicBool::new(true); + +impl Architecture for ArchitectureImpl { + type PerCpuData = (); + + fn local_cpu() -> *mut Self::PerCpuData { + unimplemented!() + } + + unsafe fn set_local_cpu(_cpu: *mut Self::PerCpuData) { + unimplemented!() + } + + unsafe fn init_local_cpu(_id: Option, _data: Self::PerCpuData) { + unimplemented!() + } + + unsafe fn init_ipi_queues(_queues: Vec>) { + unimplemented!() + } + + fn idle_task() -> extern "C" fn(usize) -> ! { + unimplemented!() + } + + fn cpu_count() -> usize { + unimplemented!() + } + + fn cpu_index() -> u32 { + unimplemented!() + } + + unsafe fn set_interrupt_mask(mask: bool) -> bool { + DUMMY_INTERRUPT_MASK.swap(mask, Ordering::Acquire) + } + + fn interrupt_mask() -> bool { + unimplemented!() + } + + fn wait_for_interrupt() { + unimplemented!() + } +} + +impl KernelTableManager for KernelTableManagerImpl { + fn virtualize(_phys: u64) -> usize { + unimplemented!() + } + + fn physicalize(_virt: usize) -> u64 { + unimplemented!() + } + + unsafe fn map_device_pages( + _base: u64, + _count: usize, + _attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + unimplemented!() + } + + unsafe fn unmap_device_pages(_mapping: &RawDeviceMemoryMapping) { + unimplemented!() + } +} + +impl ProcessAddressSpaceManager for ProcessAddressSpaceImpl { + const LOWER_LIMIT_PFN: usize = 16; + const UPPER_LIMIT_PFN: usize = 1024; + + fn new() -> Result { + unimplemented!() + } + + unsafe fn clear(&mut self) { + unimplemented!() + } + + unsafe fn map_page( + &mut self, + _address: usize, + _physical: PhysicalAddress, + _flags: MapAttributes, + ) -> Result<(), Error> { + unimplemented!() + } + + unsafe fn unmap_page(&mut self, _address: usize) -> Result { + unimplemented!() + } + + fn translate(&self, _address: usize) -> Result<(PhysicalAddress, MapAttributes), Error> { + unimplemented!() + } + + fn as_address_with_asid(&self) -> u64 { + unimplemented!() + } +} + +impl TaskContext + for TaskContextImpl +{ + const USER_STACK_EXTRA_ALIGN: usize = 0; + const SIGNAL_STACK_EXTRA_ALIGN: usize = 0; + + unsafe fn enter(&self) -> ! { + unimplemented!() + } + + unsafe fn switch(&self, _from: &Self) { + unimplemented!() + } + + unsafe fn switch_and_drop(&self, _thread: *const ()) { + unimplemented!() + } + + fn user( + _entry: usize, + _arg: usize, + _cr3: u64, + _user_stack_sp: usize, + _tls_address: usize, + ) -> Result { + unimplemented!() + } + + fn kernel(_entry: extern "C" fn(usize) -> !, _arg: usize) -> Result { + unimplemented!() + } + + fn kernel_closure ! + Send + 'static>(_f: F) -> Result { + unimplemented!() + } +} + +#[no_mangle] +extern "Rust" fn __signal_process_group(_group_id: u32, _signal: Signal) { + unimplemented!() +} diff --git a/kernel/arch/interface/Cargo.toml b/kernel/arch/interface/Cargo.toml new file mode 100644 index 00000000..cdcc580b --- /dev/null +++ b/kernel/arch/interface/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "kernel-arch-interface" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../../lib/device-api", features = ["derive"] } diff --git a/kernel/arch/interface/src/cpu.rs b/kernel/arch/interface/src/cpu.rs new file mode 100644 index 00000000..61830a44 --- /dev/null +++ b/kernel/arch/interface/src/cpu.rs @@ -0,0 +1,151 @@ +use core::{ + marker::PhantomData, + ops::{Deref, DerefMut}, +}; + +use alloc::vec::Vec; +use device_api::interrupt::IpiMessage; + +use crate::{ + guard::IrqGuard, sync::IrqSafeSpinlock, task::Scheduler, util::OneTimeInit, Architecture, +}; + +#[repr(C, align(0x10))] +pub struct CpuImpl { + inner: A::PerCpuData, + scheduler: OneTimeInit<&'static S>, + + id: u32, + current_thread_id: Option, + + _pd: PhantomData, +} + +pub struct LocalCpuImpl<'a, A: Architecture, S: Scheduler + 'static> { + cpu: &'a mut CpuImpl, + guard: IrqGuard, +} + +pub struct IpiQueue { + data: IrqSafeSpinlock>, +} + +impl CpuImpl { + pub fn new(id: u32, inner: A::PerCpuData) -> Self { + Self { + inner, + scheduler: OneTimeInit::new(), + id, + current_thread_id: None, + _pd: PhantomData, + } + } + + pub fn init_ipi_queues(cpu_count: usize) { + let queues = Vec::from_iter((0..cpu_count).map(|_| IpiQueue::new())); + unsafe { A::init_ipi_queues(queues) } + } + + pub fn set_current_thread_id(&mut self, id: Option) { + self.current_thread_id = id; + } + + pub fn current_thread_id(&self) -> Option { + self.current_thread_id + } + + pub fn set_scheduler(&mut self, sched: &'static S) { + self.scheduler.init(sched); + } + + pub fn try_get_scheduler(&self) -> Option<&'static S> { + self.scheduler.try_get().copied() + } + + pub fn scheduler(&self) -> &'static S { + self.scheduler.get() + } + + pub unsafe fn set_local(&'static mut self) { + A::set_local_cpu(self as *mut _ as *mut _) + } + + pub fn try_local<'a>() -> Option> { + let guard = IrqGuard::acquire(); + let cpu = A::local_cpu() as *mut Self; + + unsafe { cpu.as_mut().map(|cpu| LocalCpuImpl { cpu, guard }) } + } + + pub fn local<'a>() -> LocalCpuImpl<'a, A, S> { + Self::try_local().expect("Local CPU not initialized") + } + + pub fn id(&self) -> u32 { + self.id + } + + pub fn push_ipi_queue(_cpu_id: u32, _msg: IpiMessage) { + // XXX + todo!() + } + + pub fn get_ipi(&self) -> Option { + // XXX + todo!() + } +} + +impl Deref for CpuImpl { + type Target = A::PerCpuData; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for CpuImpl { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} + +impl<'a, A: Architecture, S: Scheduler + 'static> LocalCpuImpl<'a, A, S> { + pub fn into_guard(self) -> IrqGuard { + self.guard + } +} + +impl<'a, A: Architecture, S: Scheduler> Deref for LocalCpuImpl<'a, A, S> { + type Target = CpuImpl; + + fn deref(&self) -> &Self::Target { + self.cpu + } +} + +impl<'a, A: Architecture, S: Scheduler> DerefMut for LocalCpuImpl<'a, A, S> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.cpu + } +} + +impl IpiQueue { + pub const fn new() -> Self { + Self { + data: IrqSafeSpinlock::::new(None), + } + } + + pub fn push(&self, msg: IpiMessage) { + let mut lock = self.data.lock(); + + assert!(lock.is_none()); + lock.replace(msg); + } + + pub fn pop(&self) -> Option { + let mut lock = self.data.lock(); + lock.take() + } +} diff --git a/kernel/arch/interface/src/guard.rs b/kernel/arch/interface/src/guard.rs new file mode 100644 index 00000000..ef20a092 --- /dev/null +++ b/kernel/arch/interface/src/guard.rs @@ -0,0 +1,24 @@ +use core::marker::PhantomData; + +use crate::Architecture; + +/// Token type used to prevent IRQs from firing during some critical section. Normal IRQ operation +/// (if enabled before) is resumed when [IrqGuard]'s lifetime is over. +pub struct IrqGuard(bool, PhantomData); + +// IrqGuard impls +impl IrqGuard { + /// Saves the current IRQ state and masks them + pub fn acquire() -> Self { + let mask = unsafe { A::set_interrupt_mask(true) }; + Self(mask, PhantomData) + } +} + +impl Drop for IrqGuard { + fn drop(&mut self) { + unsafe { + A::set_interrupt_mask(self.0); + } + } +} diff --git a/kernel/arch/interface/src/lib.rs b/kernel/arch/interface/src/lib.rs new file mode 100644 index 00000000..2815fed1 --- /dev/null +++ b/kernel/arch/interface/src/lib.rs @@ -0,0 +1,47 @@ +#![no_std] +#![feature(step_trait, effects, const_trait_impl, never_type)] + +use alloc::vec::Vec; +use cpu::IpiQueue; +use device_api::interrupt::{LocalInterruptController, MessageInterruptController}; +use task::Scheduler; + +extern crate alloc; + +pub mod cpu; +pub mod guard; +pub mod mem; +pub mod sync; +pub mod task; +pub mod util; + +pub const KERNEL_VIRT_OFFSET: usize = 0xFFFFFF8000000000; + +pub trait Architecture: Sized { + type PerCpuData; + + // Cpu management + unsafe fn set_local_cpu(cpu: *mut ()); + fn local_cpu() -> *mut (); + unsafe fn init_ipi_queues(queues: Vec>); + unsafe fn init_local_cpu(id: Option, data: Self::PerCpuData); + + fn idle_task() -> extern "C" fn(usize) -> !; + + fn cpu_count() -> usize; + fn cpu_index() -> u32; + + // Interrupt management + fn interrupt_mask() -> bool; + unsafe fn set_interrupt_mask(mask: bool) -> bool; + fn wait_for_interrupt(); + + // Architectural devices + fn local_interrupt_controller() -> &'static dyn LocalInterruptController { + unimplemented!() + } + + fn message_interrupt_controller() -> &'static dyn MessageInterruptController { + unimplemented!() + } +} diff --git a/kernel/arch/interface/src/mem/address.rs b/kernel/arch/interface/src/mem/address.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/arch/interface/src/mem/mod.rs b/kernel/arch/interface/src/mem/mod.rs new file mode 100644 index 00000000..73cdf364 --- /dev/null +++ b/kernel/arch/interface/src/mem/mod.rs @@ -0,0 +1,120 @@ +use core::{fmt, marker::PhantomData, mem::size_of, ptr::NonNull}; + +use yggdrasil_abi::error::Error; + +pub mod address; +pub mod table; + +pub trait PhysicalMemoryAllocator { + type Address; + + fn allocate_page() -> Result; + fn allocate_contiguous_pages(count: usize) -> Result; + + unsafe fn free_page(page: Self::Address); +} + +#[derive(Debug, Default, Clone, Copy)] +pub enum DeviceMemoryCaching { + #[default] + None, + Cacheable, +} + +#[derive(Default, Debug, Clone, Copy)] +pub struct DeviceMemoryAttributes { + pub caching: DeviceMemoryCaching, +} + +/// Describes a single device memory mapping +#[derive(Debug)] +pub struct RawDeviceMemoryMapping { + /// Virtual address of the mapped object + pub address: usize, + /// Base address of the mapping start + pub base_address: usize, + /// Page size used for the mapping + pub page_size: usize, + /// Number of pages used to map the object + pub page_count: usize, + + _manager: PhantomData, +} + +pub trait KernelTableManager: Sized + fmt::Debug { + fn virtualize(phys: u64) -> usize; + fn physicalize(virt: usize) -> u64; + + unsafe fn map_device_pages( + base: u64, + count: usize, + attrs: DeviceMemoryAttributes, + ) -> Result, Error>; + unsafe fn unmap_device_pages(mapping: &RawDeviceMemoryMapping); +} + +impl RawDeviceMemoryMapping { + /// Maps a region of physical memory as device memory of given size. + /// + /// # Safety + /// + /// The caller must ensure proper access synchronization, as well as the address' origin. + #[inline] + pub unsafe fn map( + base: u64, + size: usize, + attrs: DeviceMemoryAttributes, + ) -> Result { + A::map_device_pages(base, size, attrs) + } + + /// Consumes the device mapping, leaking its address without deallocating the translation + /// mapping itself + pub fn leak(self) -> usize { + let address = self.address; + core::mem::forget(self); + address + } + + pub fn into_raw_parts(self) -> (usize, usize, usize, usize) { + let address = self.address; + let base_address = self.base_address; + let page_count = self.page_count; + let page_size = self.page_size; + + core::mem::forget(self); + + (address, base_address, page_count, page_size) + } + + pub unsafe fn from_raw_parts( + address: usize, + base_address: usize, + page_count: usize, + page_size: usize, + ) -> Self { + Self { + address, + base_address, + page_count, + page_size, + _manager: PhantomData, + } + } + + /// "Casts" the mapping to a specific type T and returns a [NonNull] pointer to it + pub unsafe fn as_non_null(&self) -> NonNull { + if self.page_size * self.page_count < size_of::() { + panic!(); + } + NonNull::new_unchecked(self.address as *mut T) + } +} + +impl Drop for RawDeviceMemoryMapping { + fn drop(&mut self) { + unsafe { + A::unmap_device_pages(self); + } + } +} diff --git a/kernel/arch/interface/src/mem/table.rs b/kernel/arch/interface/src/mem/table.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/arch/interface/src/sync.rs b/kernel/arch/interface/src/sync.rs new file mode 100644 index 00000000..940aa375 --- /dev/null +++ b/kernel/arch/interface/src/sync.rs @@ -0,0 +1,155 @@ +use core::{ + cell::UnsafeCell, + marker::PhantomData, + mem, + ops::{Deref, DerefMut}, + sync::atomic::{AtomicBool, Ordering}, +}; + +use crate::{guard::IrqGuard, Architecture}; + +struct SpinlockInner { + value: UnsafeCell, + state: AtomicBool, + _pd: PhantomData, +} + +struct SpinlockInnerGuard<'a, A: Architecture, T> { + lock: &'a SpinlockInner, +} + +/// Spinlock implementation which prevents interrupts to avoid deadlocks when an interrupt handler +/// tries to acquire a lock taken before the IRQ fired. +pub struct IrqSafeSpinlock { + inner: SpinlockInner, +} + +/// Token type allowing safe access to the underlying data of the [IrqSafeSpinlock]. Resumes normal +/// IRQ operation (if enabled before acquiring) when the lifetime is over. +pub struct IrqSafeSpinlockGuard<'a, A: Architecture, T> { + // Must come first to ensure the lock is dropped first and only then IRQs are re-enabled + inner: SpinlockInnerGuard<'a, A, T>, + _irq: IrqGuard, +} + +// Spinlock impls +impl SpinlockInner { + const fn new(value: T) -> Self { + Self { + value: UnsafeCell::new(value), + state: AtomicBool::new(false), + _pd: PhantomData, + } + } + + fn lock(&self) -> SpinlockInnerGuard { + // Loop until the lock can be acquired + // if LOCK_HACK.load(Ordering::Acquire) { + // return SpinlockInnerGuard { lock: self }; + // } + while self + .state + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_err() + { + core::hint::spin_loop(); + } + + SpinlockInnerGuard { lock: self } + } +} + +impl<'a, A: Architecture, T> Deref for SpinlockInnerGuard<'a, A, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.lock.value.get() } + } +} + +impl<'a, A: Architecture, T> DerefMut for SpinlockInnerGuard<'a, A, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.lock.value.get() } + } +} + +impl<'a, A: Architecture, T> Drop for SpinlockInnerGuard<'a, A, T> { + fn drop(&mut self) { + // if !LOCK_HACK.load(Ordering::Acquire) { + self.lock + .state + .compare_exchange(true, false, Ordering::Release, Ordering::Relaxed) + .unwrap(); + // } + } +} + +unsafe impl Sync for SpinlockInner {} +unsafe impl Send for SpinlockInner {} + +// IrqSafeSpinlock impls +impl IrqSafeSpinlock { + /// Wraps the value in a spinlock primitive + pub const fn new(value: T) -> Self { + Self { + inner: SpinlockInner::new(value), + } + } + + #[inline] + pub fn replace(&self, value: T) -> T { + let mut lock = self.lock(); + mem::replace(&mut lock, value) + } + + /// Attempts to acquire a lock. IRQs will be disabled until the lock is released. + pub fn lock(&self) -> IrqSafeSpinlockGuard { + // Disable IRQs to avoid IRQ handler trying to acquire the same lock + let irq_guard = IrqGuard::acquire(); + + // Acquire the inner lock + let inner = self.inner.lock(); + + IrqSafeSpinlockGuard { + inner, + _irq: irq_guard, + } + } + + /// Returns an unsafe reference to the inner value. + /// + /// # Safety + /// + /// Unsafe: explicitly ignores proper access sharing. + #[allow(clippy::mut_from_ref)] + pub unsafe fn grab(&self) -> &mut T { + unsafe { &mut *self.inner.value.get() } + } +} + +impl IrqSafeSpinlock { + pub fn get_cloned(&self) -> T { + self.lock().clone() + } +} + +impl Clone for IrqSafeSpinlock { + fn clone(&self) -> Self { + let inner = self.lock(); + IrqSafeSpinlock::new(inner.clone()) + } +} + +impl<'a, A: Architecture, T> Deref for IrqSafeSpinlockGuard<'a, A, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.inner.deref() + } +} + +impl<'a, A: Architecture, T> DerefMut for IrqSafeSpinlockGuard<'a, A, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.inner.deref_mut() + } +} diff --git a/kernel/arch/interface/src/task.rs b/kernel/arch/interface/src/task.rs new file mode 100644 index 00000000..2a262a61 --- /dev/null +++ b/kernel/arch/interface/src/task.rs @@ -0,0 +1,183 @@ +use core::fmt; + +use alloc::boxed::Box; +use yggdrasil_abi::{arch::SavedFrame, error::Error, process::ExitCode}; + +use crate::mem::{KernelTableManager, PhysicalMemoryAllocator}; + +pub trait Scheduler { + type ThreadId: Copy; + + fn for_cpu(index: usize) -> &'static Self; + fn for_affinity_mask(mask: u64) -> &'static Self; + fn local() -> &'static Self; + + fn is_local(&self) -> bool; + fn push(&self, task: Self::ThreadId); + + /// Selects a new thread from the queue and performs a context switch if necessary. + /// + /// # Safety + /// + /// Only meant to be called from within the timer handler or the thread impl. + unsafe fn yield_cpu(&self) -> bool; +} + +/// Conversion trait to allow multiple kernel closure return types +pub trait Termination { + /// Converts the closure return type into [ExitCode] + fn into_exit_code(self) -> ExitCode; +} + +/// Interface for task state save/restore mechanisms +pub trait TaskFrame { + /// Creates a "snapshot" of a exception/syscall frame + fn store(&self) -> SavedFrame; + + /// Restores the exception/syscall frame from its saved state + fn restore(&mut self, saved: &SavedFrame); + + /// Replaces the return value in the frame (or does nothing, if the frame is not a part of a + /// syscall signal handler) + fn set_return_value(&mut self, value: u64); + + /// Replaces the userspace stack pointer in the frame + fn set_user_sp(&mut self, value: usize); + + /// Replaces the userspace instruction pointer in the frame + fn set_user_ip(&mut self, value: usize); + + /// Replaces the argument in the frame + fn set_argument(&mut self, value: u64); + + /// Returns the argument (if any) of the frame being processed + fn argument(&self) -> u64; + + /// Returns the userspace stack pointer + fn user_sp(&self) -> usize; + /// Returns the userspace instruction pointer + fn user_ip(&self) -> usize; +} + +/// Interface for performing context fork operations +pub trait ForkFrame: Sized { + type Context: TaskContext; + + /// Constructs a "forked" task context by copying the registers from this one and supplying a + /// new address space to it. + /// + /// # Safety + /// + /// Unsafe: accepts raw frames and address space address. + unsafe fn fork(&self, address_space: u64) -> Result; + + /// Replaces the return value inside the frame with a new one + fn set_return_value(&mut self, value: u64); +} + +/// Platform-specific task context implementation +pub trait TaskContext: Sized { + /// Number of bytes to offset the signal stack pointer by + const SIGNAL_STACK_EXTRA_ALIGN: usize; + /// Number of bytes to offset the user stack pointer by + const USER_STACK_EXTRA_ALIGN: usize; + + /// Constructs a kernel-space task context + fn kernel(entry: extern "C" fn(usize) -> !, arg: usize) -> Result; + + /// Constructs a user thread context. The caller is responsible for allocating the userspace + /// stack and setting up a valid address space for the context. + fn user( + entry: usize, + arg: usize, + cr3: u64, + user_stack_sp: usize, + tls_address: usize, + ) -> Result; + + /// Performs an entry into a context. + /// + /// # Safety + /// + /// Only meant to be called from the scheduler code. + unsafe fn enter(&self) -> !; + + /// Performs a context switch between two contexts. + /// + /// # Safety + /// + /// Only meant to be called from the scheduler code. + unsafe fn switch(&self, from: &Self); + + /// Performs a context switch and drops the source thread. + /// + /// # Safety + /// + /// Only meant to be called from the scheduler code after the `thread` has terminated. + unsafe fn switch_and_drop(&self, thread: *const ()); + + // XXX + /// Constructs a safe wrapper process to execute a kernel-space closure + fn kernel_closure ! + Send + 'static>(f: F) -> Result { + extern "C" fn closure_wrapper ! + Send + 'static>(closure_addr: usize) -> ! { + let closure = unsafe { Box::from_raw(closure_addr as *mut F) }; + closure() + } + + let closure = Box::new(f); + Self::kernel(closure_wrapper::, Box::into_raw(closure) as usize) + } +} + +pub struct StackBuilder { + base: usize, + sp: usize, +} + +impl StackBuilder { + pub fn new(base: usize, size: usize) -> Self { + Self { + base, + sp: base + size, + } + } + + pub fn push(&mut self, value: usize) { + if self.sp == self.base { + panic!(); + } + self.sp -= 8; + unsafe { + (self.sp as *mut usize).write_volatile(value); + } + } + + pub fn build(self) -> usize { + self.sp + } +} + +impl Termination for Result { + fn into_exit_code(self) -> ExitCode { + match self { + Ok(_) => ExitCode::SUCCESS, + Err(_err) => { + // XXX + // log::warn!("Kernel thread failed: {:?}", err); + ExitCode::Exited(1) + } + } + } +} + +impl Termination for ExitCode { + fn into_exit_code(self) -> ExitCode { + self + } +} + +impl Termination for () { + fn into_exit_code(self) -> ExitCode { + ExitCode::SUCCESS + } +} diff --git a/kernel/arch/interface/src/util.rs b/kernel/arch/interface/src/util.rs new file mode 100644 index 00000000..0778b012 --- /dev/null +++ b/kernel/arch/interface/src/util.rs @@ -0,0 +1,125 @@ +use core::{ + cell::UnsafeCell, + mem::MaybeUninit, + panic, + sync::atomic::{AtomicUsize, Ordering}, +}; + +/// Wrapper struct to ensure a value can only be initialized once and used only after that +#[repr(C)] +pub struct OneTimeInit { + value: UnsafeCell>, + state: AtomicUsize, +} + +unsafe impl Sync for OneTimeInit {} +unsafe impl Send for OneTimeInit {} + +impl OneTimeInit { + const STATE_UNINITIALIZED: usize = 0; + const STATE_INITIALIZING: usize = 1; + const STATE_INITIALIZED: usize = 2; + + /// Wraps the value in an [OneTimeInit] + pub const fn new() -> Self { + Self { + value: UnsafeCell::new(MaybeUninit::uninit()), + state: AtomicUsize::new(Self::STATE_UNINITIALIZED), + } + } + + /// Returns `true` if the value has already been initialized + #[inline] + pub fn is_initialized(&self) -> bool { + self.state.load(Ordering::Acquire) == Self::STATE_INITIALIZED + } + + pub fn try_init_with T>(&self, f: F) -> Option<&T> { + if self + .state + .compare_exchange( + Self::STATE_UNINITIALIZED, + Self::STATE_INITIALIZING, + Ordering::Release, + Ordering::Relaxed, + ) + .is_err() + { + // Already initialized + return None; + } + + let value = unsafe { (*self.value.get()).write(f()) }; + + self.state + .compare_exchange( + Self::STATE_INITIALIZING, + Self::STATE_INITIALIZED, + Ordering::Release, + Ordering::Relaxed, + ) + .unwrap(); + + Some(value) + } + + /// Sets the underlying value of the [OneTimeInit]. If already initialized, panics. + #[track_caller] + pub fn init(&self, value: T) -> &T { + // Transition to "initializing" state + if self + .state + .compare_exchange( + Self::STATE_UNINITIALIZED, + Self::STATE_INITIALIZING, + Ordering::Release, + Ordering::Relaxed, + ) + .is_err() + { + panic!( + "{:?}: Double initialization of OneTimeInit", + panic::Location::caller() + ); + } + + let value = unsafe { (*self.value.get()).write(value) }; + + // Transition to "initialized" state. This must not fail + self.state + .compare_exchange( + Self::STATE_INITIALIZING, + Self::STATE_INITIALIZED, + Ordering::Release, + Ordering::Relaxed, + ) + .unwrap(); + + value + } + + /// Returns an immutable reference to the underlying value and panics if it hasn't yet been + /// initialized + #[track_caller] + pub fn get(&self) -> &T { + // TODO check for INITIALIZING state and wait until it becomes INITIALIZED? + if !self.is_initialized() { + panic!( + "{:?}: Attempt to dereference an uninitialized value", + panic::Location::caller() + ); + } + + unsafe { (*self.value.get()).assume_init_ref() } + } + + /// Returns an immutable reference to the underlying value and [None] if the value hasn't yet + /// been initialized + pub fn try_get(&self) -> Option<&T> { + if self.is_initialized() { + Some(self.get()) + } else { + None + } + } +} diff --git a/kernel/arch/src/lib.rs b/kernel/arch/src/lib.rs new file mode 100644 index 00000000..14e23235 --- /dev/null +++ b/kernel/arch/src/lib.rs @@ -0,0 +1,39 @@ +#![no_std] + +use cfg_if::cfg_if; + +/// Returns an absolute address to the given symbol +#[macro_export] +macro_rules! absolute_address { + ($sym:expr) => {{ + let mut _x: usize; + #[cfg(target_arch = "aarch64")] + unsafe { + core::arch::asm!("ldr {0}, ={1}", out(reg) _x, sym $sym); + } + #[cfg(target_arch = "x86_64")] + unsafe { + core::arch::asm!("movabsq ${1}, {0}", out(reg) _x, sym $sym, options(att_syntax)); + } + _x + }}; +} + +cfg_if! { + if #[cfg(any(test, not(target_os = "none")))] { + extern crate kernel_arch_hosted as imp; + } else if #[cfg(target_arch = "aarch64")] { + extern crate kernel_arch_aarch64 as imp; + } else if #[cfg(target_arch = "x86_64")] { + extern crate kernel_arch_x86_64 as imp; + } else { + compile_error!("Unsupported architecture"); + } +} + +pub use imp::{ArchitectureImpl, KernelTableManagerImpl, ProcessAddressSpaceImpl, TaskContextImpl}; + +pub use kernel_arch_interface::{guard, mem, sync, task, util, Architecture}; + +pub type CpuImpl = kernel_arch_interface::cpu::CpuImpl; +pub type LocalCpuImpl<'a, S> = kernel_arch_interface::cpu::LocalCpuImpl<'a, ArchitectureImpl, S>; diff --git a/kernel/arch/x86_64/Cargo.toml b/kernel/arch/x86_64/Cargo.toml new file mode 100644 index 00000000..3971debc --- /dev/null +++ b/kernel/arch/x86_64/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "kernel-arch-x86_64" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +kernel-arch-interface = { path = "../interface" } +libk-mm-interface = { path = "../../libk/libk-mm/interface" } +memtables = { path = "../../lib/memtables" } +device-api = { path = "../../lib/device-api", features = ["derive"] } + +bitflags = "2.3.3" +static_assertions = "1.1.0" +tock-registers = "0.8.1" diff --git a/kernel/arch/x86_64/src/context.S b/kernel/arch/x86_64/src/context.S new file mode 100644 index 00000000..147c8ce3 --- /dev/null +++ b/kernel/arch/x86_64/src/context.S @@ -0,0 +1,186 @@ +// vi: set ft=asm : + +.set MSR_IA32_FS_BASE, 0xC0000100 + +.macro SAVE_TASK_STATE + sub ${context_size}, %rsp + + mov %rbx, 0(%rsp) + mov %r12, 8(%rsp) + mov %r13, 16(%rsp) + mov %r14, 24(%rsp) + mov %r15, 32(%rsp) + + // Store FS_BASE + mov $MSR_IA32_FS_BASE, %ecx + rdmsr + mov %edx, %ecx + shl $32, %rcx + or %rax, %rcx + + mov %rcx, 40(%rsp) + + // TODO save %fs + mov %rbp, 48(%rsp) + + mov %cr3, %rbx + mov %rbx, 56(%rsp) +.endm + +.macro LOAD_TASK_STATE + mov 56(%rsp), %rbx + mov %rbx, %cr3 + + mov 0(%rsp), %rbx + mov 8(%rsp), %r12 + mov 16(%rsp), %r13 + mov 24(%rsp), %r14 + mov 32(%rsp), %r15 + + // Load FS_BASE + // edx:eax = fs_base + mov 40(%rsp), %rdx + mov %edx, %eax + shr $32, %rdx + + mov $MSR_IA32_FS_BASE, %ecx + wrmsr + + // mov 40(%rsp), %fs + mov 48(%rsp), %rbp + + add ${context_size}, %rsp +.endm + +.global __x86_64_task_enter_user +.global __x86_64_task_enter_kernel +.global __x86_64_task_enter_from_fork +.global __x86_64_enter_task +.global __x86_64_switch_task +.global __x86_64_switch_and_drop + +.section .text + +__x86_64_task_enter_from_fork: + xorq %rax, %rax + + xorq %rcx, %rcx + xorq %r11, %r11 + + popq %rdi + popq %rsi + popq %rdx + popq %r10 + popq %r8 + popq %r9 + + swapgs + + iretq + +__x86_64_task_enter_user: + // User stack pointer + popq %rcx + // Argument + popq %rdi + // Entry address + popq %rax + + // SS:RSP + pushq $0x1B + pushq %rcx + + // RFLAGS + pushq $0x200 + + // CS:RIP + pushq $0x23 + pushq %rax + + swapgs + + iretq + +__x86_64_task_enter_kernel: + // Argument + popq %rdi + // Entry address + popq %rax + + // Alignment word + fake return address to terminate "call chain" + pushq $0 + + // Enable IRQ in RFLAGS + pushfq + popq %rdx + or $(1 << 9), %rdx + + mov %rsp, %rcx + + // SS:RSP + pushq $0x10 + pushq %rcx + + // RFLAGS + pushq %rdx + + // CS:RIP + pushq $0x08 + pushq %rax + + iretq + +// %rsi - from struct ptr, %rdi - to struct ptr +__x86_64_switch_task: + SAVE_TASK_STATE + mov %rsp, 0(%rsi) + + // TSS.RSP0 + mov 8(%rdi), %rax + // Kernel stack + mov 0(%rdi), %rdi + + mov %rdi, %rsp + + // Load TSS.RSP0 + mov %gs:(8), %rdi + mov %rax, 4(%rdi) + + LOAD_TASK_STATE + + ret + +__x86_64_switch_and_drop: + // TSS.RSP0 + mov 8(%rdi), %rax + // Kernel stack + mov 0(%rdi), %rdi + + mov %rdi, %rsp + + // Load TSS.RSP0 + mov %gs:(8), %rdi + mov %rax, 4(%rdi) + + mov %rsi, %rdi + call __arch_drop_thread + + LOAD_TASK_STATE + + ret + +// %rdi - to struct ptr +__x86_64_enter_task: + // TSS.RSP0 + mov 8(%rdi), %rax + // Kernel stack + mov 0(%rdi), %rdi + + mov %rdi, %rsp + + // Load TSS.RSP0 + mov %gs:(8), %rdi + mov %rax, 4(%rdi) + + LOAD_TASK_STATE + ret diff --git a/kernel/arch/x86_64/src/context.rs b/kernel/arch/x86_64/src/context.rs new file mode 100644 index 00000000..bef56a63 --- /dev/null +++ b/kernel/arch/x86_64/src/context.rs @@ -0,0 +1,525 @@ +use core::{arch::global_asm, cell::UnsafeCell, marker::PhantomData}; + +use kernel_arch_interface::{ + mem::{KernelTableManager, PhysicalMemoryAllocator}, + task::{ForkFrame, StackBuilder, TaskContext, TaskFrame}, +}; +use libk_mm_interface::address::{AsPhysicalAddress, IntoRaw, PhysicalAddress}; +use yggdrasil_abi::{arch::SavedFrame, error::Error}; + +use crate::{mem::KERNEL_TABLES, registers::FpuContext}; + +/// Frame saved onto the stack when taking an IRQ +#[derive(Debug)] +#[repr(C)] +pub struct IrqFrame { + pub rax: u64, + pub rcx: u64, + pub rdx: u64, + pub rbx: u64, + pub rsi: u64, + pub rdi: u64, + pub rbp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} + +/// Set of registers saved when taking an exception/interrupt +#[derive(Debug)] +#[repr(C)] +pub struct ExceptionFrame { + pub rax: u64, + pub rcx: u64, + pub rdx: u64, + pub rbx: u64, + pub rsi: u64, + pub rdi: u64, + pub rbp: u64, + pub r8: u64, + pub r9: u64, + pub r10: u64, + pub r11: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, + + pub exc_number: u64, + pub exc_code: u64, + + pub rip: u64, + pub cs: u64, + pub rflags: u64, + pub rsp: u64, + pub ss: u64, +} + +/// Set of registers saved when taking a syscall instruction +#[derive(Debug)] +#[repr(C)] +pub struct SyscallFrame { + pub rax: u64, + pub args: [u64; 6], + + pub rcx: u64, + pub r11: u64, + + pub user_ip: u64, + pub user_sp: u64, + pub user_flags: u64, + + pub rbx: u64, + pub rbp: u64, + pub r12: u64, + pub r13: u64, + pub r14: u64, + pub r15: u64, +} + +#[repr(C, align(0x10))] +struct Inner { + // 0x00 + sp: usize, + // 0x08 + tss_rsp0: usize, +} + +/// x86-64 implementation of a task context +#[allow(dead_code)] +pub struct TaskContextImpl< + K: KernelTableManager, + PA: PhysicalMemoryAllocator
, +> { + inner: UnsafeCell, + fpu_context: UnsafeCell, + stack_base_phys: PhysicalAddress, + stack_size: usize, + + _alloc: PhantomData, + _table_manager: PhantomData, +} + +// 8 registers + return address (which is not included) +const COMMON_CONTEXT_SIZE: usize = 8 * 8; + +impl TaskFrame for IrqFrame { + fn store(&self) -> SavedFrame { + SavedFrame { + rax: self.rax, + rcx: self.rcx, + rdx: self.rdx, + rbx: self.rbx, + rsi: self.rsi, + rdi: self.rdi, + rbp: self.rbp, + r8: self.r8, + r9: self.r9, + r10: self.r10, + r11: self.r11, + r12: self.r12, + r13: self.r13, + r14: self.r14, + r15: self.r15, + user_ip: self.rip, + user_sp: self.rsp, + rflags: self.rflags, + } + } + + fn restore(&mut self, _saved: &SavedFrame) { + todo!() + } + + fn argument(&self) -> u64 { + self.rdi as _ + } + + fn user_ip(&self) -> usize { + self.rip as _ + } + + fn user_sp(&self) -> usize { + self.rsp as _ + } + + fn set_argument(&mut self, value: u64) { + self.rdi = value; + } + + fn set_return_value(&mut self, value: u64) { + self.rax = value; + } + + fn set_user_ip(&mut self, value: usize) { + self.rip = value as _; + } + + fn set_user_sp(&mut self, value: usize) { + self.rsp = value as _; + } +} + +impl TaskFrame for ExceptionFrame { + fn store(&self) -> SavedFrame { + SavedFrame { + rax: self.rax, + rcx: self.rcx, + rdx: self.rdx, + rbx: self.rbx, + rsi: self.rsi, + rdi: self.rdi, + rbp: self.rbp, + r8: self.r8, + r9: self.r9, + r10: self.r10, + r11: self.r11, + r12: self.r12, + r13: self.r13, + r14: self.r14, + r15: self.r15, + user_ip: self.rip, + user_sp: self.rsp, + rflags: self.rflags, + } + } + + fn restore(&mut self, _saved: &SavedFrame) { + todo!() + } + + fn argument(&self) -> u64 { + 0 + } + + fn user_sp(&self) -> usize { + self.rsp as _ + } + + fn user_ip(&self) -> usize { + self.rip as _ + } + + fn set_user_sp(&mut self, value: usize) { + self.rsp = value as _; + } + + fn set_user_ip(&mut self, value: usize) { + self.rip = value as _; + } + + fn set_return_value(&mut self, _value: u64) { + // Not in syscall, do not overwrite + } + + fn set_argument(&mut self, value: u64) { + self.rdi = value; + } +} + +impl> ForkFrame + for SyscallFrame +{ + type Context = TaskContextImpl; + + unsafe fn fork(&self, address_space: u64) -> Result, Error> { + TaskContextImpl::from_syscall_frame(self, address_space) + } + + fn set_return_value(&mut self, value: u64) { + self.rax = value; + } +} + +impl TaskFrame for SyscallFrame { + fn store(&self) -> SavedFrame { + SavedFrame { + rax: self.rax, + rcx: self.rcx, + rdx: self.args[2], + rbx: self.rbx, + rsi: self.args[1], + rdi: self.args[0], + rbp: self.rbp, + r8: self.args[4], + r9: self.args[5], + r10: self.args[3], + r11: self.r11, + r12: self.r12, + r13: self.r13, + r14: self.r14, + r15: self.r15, + user_ip: self.user_ip, + user_sp: self.user_sp, + rflags: self.user_flags, + } + } + + fn restore(&mut self, saved: &SavedFrame) { + self.rax = saved.rax; + self.args[0] = saved.rdi; + self.args[1] = saved.rsi; + self.args[2] = saved.rdx; + self.args[3] = saved.r10; + self.args[4] = saved.r8; + self.args[5] = saved.r9; + + self.rcx = saved.rcx; + self.r11 = saved.r11; + + self.user_ip = saved.user_ip; + self.user_sp = saved.user_sp; + self.user_flags = saved.rflags; + + self.rbx = saved.rbx; + self.rbp = saved.rbp; + self.r12 = saved.r12; + self.r13 = saved.r13; + self.r14 = saved.r14; + self.r15 = saved.r15; + } + + fn argument(&self) -> u64 { + self.args[0] + } + + fn user_sp(&self) -> usize { + self.user_sp as _ + } + + fn user_ip(&self) -> usize { + self.user_ip as _ + } + + fn set_user_sp(&mut self, value: usize) { + self.user_sp = value as _; + } + + fn set_user_ip(&mut self, value: usize) { + self.user_ip = value as _; + } + + fn set_return_value(&mut self, value: u64) { + self.rax = value; + } + + fn set_argument(&mut self, value: u64) { + self.args[0] = value; + } +} + +impl> + TaskContextImpl +{ + /// Constructs a new task context from a "forked" syscall frame + pub(super) unsafe fn from_syscall_frame(frame: &SyscallFrame, cr3: u64) -> Result { + const USER_TASK_PAGES: usize = 8; + + let stack_base_phys = PA::allocate_contiguous_pages(USER_TASK_PAGES)?; + let stack_base = stack_base_phys.raw_virtualize::(); + + let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000); + + // iretq frame + stack.push(0x1B); + stack.push(frame.user_sp as _); + stack.push(0x200); + stack.push(0x23); + stack.push(frame.user_ip as _); + + stack.push(frame.args[5] as _); // r9 + stack.push(frame.args[4] as _); // r8 + stack.push(frame.args[3] as _); // r10 + stack.push(frame.args[2] as _); // rdx + stack.push(frame.args[1] as _); // rsi + stack.push(frame.args[0] as _); // rdi + + // callee-saved registers + stack.push(__x86_64_task_enter_from_fork as _); + + stack.push(cr3 as _); + + stack.push(frame.rbp as _); + stack.push(0x12345678); // XXX TODO: fs_base from SyscallFrame + stack.push(frame.r15 as _); + stack.push(frame.r14 as _); + stack.push(frame.r13 as _); + stack.push(frame.r12 as _); + stack.push(frame.rbx as _); + + let sp = stack.build(); + let rsp0 = stack_base + USER_TASK_PAGES * 0x1000; + + Ok(Self { + inner: UnsafeCell::new(Inner { sp, tss_rsp0: rsp0 }), + fpu_context: UnsafeCell::new(FpuContext::new()), + stack_base_phys, + stack_size: USER_TASK_PAGES * 0x1000, + _alloc: PhantomData, + _table_manager: PhantomData, + }) + } +} + +unsafe impl> Sync + for TaskContextImpl +{ +} + +impl> + TaskContext for TaskContextImpl +{ + const SIGNAL_STACK_EXTRA_ALIGN: usize = 8; + const USER_STACK_EXTRA_ALIGN: usize = 8; + + fn kernel(entry: extern "C" fn(usize) -> !, arg: usize) -> Result { + const KERNEL_TASK_PAGES: usize = 32; + + let stack_base_phys = PA::allocate_contiguous_pages(KERNEL_TASK_PAGES)?; + let stack_base = stack_base_phys.raw_virtualize::(); + + let mut stack = StackBuilder::new(stack_base, KERNEL_TASK_PAGES * 0x1000); + + // Entry and argument + stack.push(entry as _); + stack.push(arg); + + // XXX + setup_common_context( + &mut stack, + __x86_64_task_enter_kernel as _, + unsafe { KERNEL_TABLES.as_physical_address().into_raw() }, + 0, + ); + + let sp = stack.build(); + + // TODO stack is leaked + + Ok(Self { + inner: UnsafeCell::new(Inner { sp, tss_rsp0: 0 }), + fpu_context: UnsafeCell::new(FpuContext::new()), + stack_base_phys, + stack_size: KERNEL_TASK_PAGES * 0x1000, + + _alloc: PhantomData, + _table_manager: PhantomData, + }) + } + + fn user( + entry: usize, + arg: usize, + cr3: u64, + user_stack_sp: usize, + fs_base: usize, + ) -> Result { + const USER_TASK_PAGES: usize = 8; + + let stack_base_phys = PA::allocate_contiguous_pages(USER_TASK_PAGES)?; + let stack_base = stack_base_phys.raw_virtualize::(); + + let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000); + + stack.push(entry as _); + stack.push(arg); + stack.push(user_stack_sp); + + setup_common_context(&mut stack, __x86_64_task_enter_user as _, cr3, fs_base); + + let sp = stack.build(); + let rsp0 = stack_base + USER_TASK_PAGES * 0x1000; + + Ok(Self { + inner: UnsafeCell::new(Inner { sp, tss_rsp0: rsp0 }), + fpu_context: UnsafeCell::new(FpuContext::new()), + stack_base_phys, + stack_size: USER_TASK_PAGES * 0x1000, + + _alloc: PhantomData, + _table_manager: PhantomData, + }) + } + + unsafe fn enter(&self) -> ! { + FpuContext::restore(self.fpu_context.get()); + + __x86_64_enter_task(self.inner.get()) + } + + unsafe fn switch(&self, from: &Self) { + let dst = self.inner.get(); + let src = from.inner.get(); + + if dst != src { + // Save the old context + FpuContext::save(from.fpu_context.get()); + // Load next context + FpuContext::restore(self.fpu_context.get()); + + __x86_64_switch_task(dst, src); + } + } + + unsafe fn switch_and_drop(&self, thread: *const ()) { + let dst = self.inner.get(); + + FpuContext::restore(self.fpu_context.get()); + + __x86_64_switch_and_drop(dst, thread) + } +} + +impl> Drop + for TaskContextImpl +{ + fn drop(&mut self) { + assert_eq!(self.stack_size % 0x1000, 0); + + for offset in (0..self.stack_size).step_by(0x1000) { + unsafe { + PA::free_page(self.stack_base_phys.add(offset)); + } + } + } +} + +fn setup_common_context(builder: &mut StackBuilder, entry: usize, cr3: u64, fs_base: usize) { + builder.push(entry); + + builder.push(cr3 as _); + + builder.push(0); // %rbp + builder.push(fs_base); // %fs_base + builder.push(0); // %r15 + builder.push(0); // %r14 + builder.push(0); // %r13 + builder.push(0); // %r12 + builder.push(0); // %rbx +} + +extern "C" { + fn __x86_64_task_enter_kernel(); + fn __x86_64_task_enter_user(); + fn __x86_64_task_enter_from_fork(); + fn __x86_64_enter_task(to: *mut Inner) -> !; + fn __x86_64_switch_task(to: *mut Inner, from: *mut Inner); + fn __x86_64_switch_and_drop(to: *mut Inner, from: *const ()); +} + +global_asm!( + include_str!("context.S"), + context_size = const COMMON_CONTEXT_SIZE, + options(att_syntax) +); diff --git a/kernel/arch/x86_64/src/lib.rs b/kernel/arch/x86_64/src/lib.rs new file mode 100644 index 00000000..86158492 --- /dev/null +++ b/kernel/arch/x86_64/src/lib.rs @@ -0,0 +1,172 @@ +#![no_std] +#![feature( + effects, + strict_provenance, + asm_const, + naked_functions, + trait_upcasting +)] + +extern crate alloc; + +use core::{ + ops::DerefMut, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use alloc::vec::Vec; +use device_api::interrupt::{LocalInterruptController, MessageInterruptController}; +use kernel_arch_interface::{ + cpu::{CpuImpl, IpiQueue}, + task::Scheduler, + util::OneTimeInit, + Architecture, +}; +use libk_mm_interface::address::PhysicalAddress; +use registers::MSR_IA32_KERNEL_GS_BASE; +use tock_registers::interfaces::Writeable; + +pub mod context; +pub mod mem; +pub mod registers; + +pub use context::TaskContextImpl; +pub use mem::{process::ProcessAddressSpaceImpl, KernelTableManagerImpl}; + +pub struct ArchitectureImpl; + +pub const KERNEL_VIRT_OFFSET: usize = 0xFFFFFF8000000000; + +pub trait LocalApicInterface: LocalInterruptController + MessageInterruptController { + /// Performs an application processor startup sequence. + /// + /// # Safety + /// + /// Unsafe: only meant to be called by the BSP during SMP init. + unsafe fn wakeup_cpu(&self, apic_id: u32, bootstrap_code: PhysicalAddress); + + /// Signals local APIC that we've handled the IRQ + fn clear_interrupt(&self); +} + +#[repr(C, align(0x10))] +pub struct PerCpuData { + // 0x00 + pub this: *mut Self, + // 0x08, used in assembly + pub tss_address: usize, + // 0x10, used in assembly + pub tmp_address: usize, + + pub local_apic: &'static dyn LocalApicInterface, +} + +impl PerCpuData { + pub fn local_apic(&self) -> &'static dyn LocalApicInterface { + self.local_apic + } +} + +static IPI_QUEUES: OneTimeInit>> = OneTimeInit::new(); +pub static CPU_COUNT: AtomicUsize = AtomicUsize::new(1); + +#[naked] +extern "C" fn idle_task(_: usize) -> ! { + unsafe { + core::arch::asm!( + r#" + 1: + nop + jmp 1b + "#, + options(noreturn, att_syntax) + ); + } +} + +impl ArchitectureImpl { + fn local_cpu_data() -> Option<&'static mut PerCpuData> { + unsafe { (Self::local_cpu() as *mut PerCpuData).as_mut() } + } +} + +impl Architecture for ArchitectureImpl { + type PerCpuData = PerCpuData; + + unsafe fn set_local_cpu(cpu: *mut ()) { + MSR_IA32_KERNEL_GS_BASE.set(cpu as u64); + core::arch::asm!("wbinvd; swapgs"); + } + + fn local_cpu() -> *mut () { + let mut addr: u64; + unsafe { + core::arch::asm!("movq %gs:(0), {0}", out(reg) addr, options(att_syntax)); + } + addr as _ + } + + unsafe fn init_ipi_queues(queues: Vec>) { + IPI_QUEUES.init(queues); + } + + unsafe fn init_local_cpu(id: Option, data: Self::PerCpuData) { + use alloc::boxed::Box; + + let cpu = Box::leak(Box::new(CpuImpl::::new( + id.expect("x86_64 required manual CPU ID set"), + data, + ))); + cpu.this = cpu.deref_mut(); + + cpu.set_local(); + } + + fn idle_task() -> extern "C" fn(usize) -> ! { + idle_task + } + + fn cpu_count() -> usize { + CPU_COUNT.load(Ordering::Acquire) + } + + fn cpu_index() -> u32 { + CpuImpl::::local().id() + } + + fn interrupt_mask() -> bool { + let mut flags: u64; + unsafe { + core::arch::asm!("pushfq; pop {0}", out(reg) flags, options(att_syntax)); + } + // If IF is zero, interrupts are disabled (masked) + flags & (1 << 9) == 0 + } + + unsafe fn set_interrupt_mask(mask: bool) -> bool { + let old = Self::interrupt_mask(); + if mask { + core::arch::asm!("cli"); + } else { + core::arch::asm!("sti"); + } + old + } + + #[inline] + fn wait_for_interrupt() { + unsafe { + core::arch::asm!("hlt"); + } + } + + fn local_interrupt_controller() -> &'static dyn LocalInterruptController { + let local = Self::local_cpu_data().unwrap(); + local.local_apic + } + + fn message_interrupt_controller() -> &'static dyn MessageInterruptController { + let local = Self::local_cpu_data().unwrap(); + local.local_apic + } +} diff --git a/kernel/arch/x86_64/src/mem/mod.rs b/kernel/arch/x86_64/src/mem/mod.rs new file mode 100644 index 00000000..0d495b65 --- /dev/null +++ b/kernel/arch/x86_64/src/mem/mod.rs @@ -0,0 +1,405 @@ +use core::{ + alloc::Layout, + ops::{Deref, DerefMut}, + ptr::addr_of, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use kernel_arch_interface::mem::{ + DeviceMemoryAttributes, KernelTableManager, RawDeviceMemoryMapping, +}; +use libk_mm_interface::{ + address::{FromRaw, PhysicalAddress}, + table::{EntryLevel, EntryLevelExt}, + KernelImageObject, +}; +use memtables::x86_64::FixedTables; +use static_assertions::{const_assert_eq, const_assert_ne}; +use yggdrasil_abi::error::Error; + +use crate::{registers::CR3, KERNEL_VIRT_OFFSET}; + +use self::table::{PageAttributes, PageEntry, PageTable, L0, L1, L2, L3}; + +pub mod process; +pub mod table; + +#[derive(Debug)] +pub struct KernelTableManagerImpl; + +const CANONICAL_ADDRESS_MASK: usize = 0xFFFF000000000000; +const KERNEL_PHYS_BASE: usize = 0x200000; + +// Mapped at compile time +const KERNEL_MAPPING_BASE: usize = KERNEL_VIRT_OFFSET + KERNEL_PHYS_BASE; +const KERNEL_L0_INDEX: usize = KERNEL_MAPPING_BASE.page_index::(); +const KERNEL_L1_INDEX: usize = KERNEL_MAPPING_BASE.page_index::(); +const KERNEL_START_L2_INDEX: usize = KERNEL_MAPPING_BASE.page_index::(); + +// Must not be zero, should be at 4MiB +const_assert_ne!(KERNEL_START_L2_INDEX, 0); +// From static mapping +const_assert_eq!(KERNEL_L0_INDEX, 511); +const_assert_eq!(KERNEL_L1_INDEX, 0); + +// Mapped at boot +const EARLY_MAPPING_L2I: usize = KERNEL_START_L2_INDEX - 1; +const HEAP_MAPPING_L1I: usize = KERNEL_L1_INDEX + 1; +const DEVICE_MAPPING_L1I: usize = KERNEL_L1_INDEX + 2; +const RAM_MAPPING_L0I: usize = KERNEL_L0_INDEX - 1; + +const DEVICE_MAPPING_L3_COUNT: usize = 4; + +#[link_section = ".data.tables"] +pub static mut KERNEL_TABLES: KernelImageObject = + unsafe { KernelImageObject::new(FixedTables::zeroed()) }; + +// 2MiB for early mappings +const EARLY_MAPPING_OFFSET: usize = CANONICAL_ADDRESS_MASK + | (KERNEL_L0_INDEX * L0::SIZE) + | (KERNEL_L1_INDEX * L1::SIZE) + | (EARLY_MAPPING_L2I * L2::SIZE); +static mut EARLY_MAPPING_L3: PageTable = PageTable::zeroed(); +// 1GiB for heap mapping +pub const HEAP_MAPPING_OFFSET: usize = + CANONICAL_ADDRESS_MASK | (KERNEL_L0_INDEX * L0::SIZE) | (HEAP_MAPPING_L1I * L1::SIZE); +pub(super) static mut HEAP_MAPPING_L2: PageTable = PageTable::zeroed(); +// 1GiB for device MMIO mapping +const DEVICE_MAPPING_OFFSET: usize = + CANONICAL_ADDRESS_MASK | (KERNEL_L0_INDEX * L0::SIZE) | (DEVICE_MAPPING_L1I * L1::SIZE); +static mut DEVICE_MAPPING_L2: PageTable = PageTable::zeroed(); +static mut DEVICE_MAPPING_L3S: [PageTable; DEVICE_MAPPING_L3_COUNT] = + [PageTable::zeroed(); DEVICE_MAPPING_L3_COUNT]; +// 512GiB for whole RAM mapping +pub const RAM_MAPPING_OFFSET: usize = CANONICAL_ADDRESS_MASK | (RAM_MAPPING_L0I * L0::SIZE); +pub static MEMORY_LIMIT: AtomicUsize = AtomicUsize::new(0); +pub static mut RAM_MAPPING_L1: PageTable = PageTable::zeroed(); + +impl KernelTableManager for KernelTableManagerImpl { + fn virtualize(address: u64) -> usize { + let address = address as usize; + if address < MEMORY_LIMIT.load(Ordering::Acquire) { + address + RAM_MAPPING_OFFSET + } else { + panic!("Invalid physical address: {:#x}", address); + } + } + + fn physicalize(address: usize) -> u64 { + if address < RAM_MAPPING_OFFSET + || address - RAM_MAPPING_OFFSET >= MEMORY_LIMIT.load(Ordering::Acquire) + { + panic!("Not a virtualized physical address: {:#x}", address); + } + + (address - RAM_MAPPING_OFFSET) as _ + } + + unsafe fn map_device_pages( + base: u64, + count: usize, + attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + map_device_memory(PhysicalAddress::from_raw(base), count, attrs) + } + + unsafe fn unmap_device_pages(mapping: &RawDeviceMemoryMapping) { + unmap_device_memory(mapping) + } +} + +// Early mappings +unsafe fn map_early_pages(physical: PhysicalAddress, count: usize) -> Result { + for l3i in 0..512 { + let mut taken = false; + for i in 0..count { + if EARLY_MAPPING_L3[i + l3i].is_present() { + taken = true; + break; + } + } + + if taken { + continue; + } + + for i in 0..count { + // TODO NX, NC + EARLY_MAPPING_L3[i + l3i] = + PageEntry::page(physical.add(i * L3::SIZE), PageAttributes::WRITABLE); + } + + return Ok(EARLY_MAPPING_OFFSET + l3i * L3::SIZE); + } + + Err(Error::OutOfMemory) +} + +unsafe fn unmap_early_page(address: usize) { + if !(EARLY_MAPPING_OFFSET..EARLY_MAPPING_OFFSET + L2::SIZE).contains(&address) { + panic!("Tried to unmap invalid early mapping: {:#x}", address); + } + + let l3i = (address - EARLY_MAPPING_OFFSET).page_index::(); + + assert!(EARLY_MAPPING_L3[l3i].is_present()); + EARLY_MAPPING_L3[l3i] = PageEntry::INVALID; +} + +// Device mappings +unsafe fn map_device_memory_l3( + base: PhysicalAddress, + count: usize, + _attrs: DeviceMemoryAttributes, +) -> Result { + // TODO don't map pages if already mapped + + 'l0: for i in 0..DEVICE_MAPPING_L3_COUNT * 512 { + for j in 0..count { + let l2i = (i + j) / 512; + let l3i = (i + j) % 512; + + if DEVICE_MAPPING_L3S[l2i][l3i].is_present() { + continue 'l0; + } + } + + for j in 0..count { + let l2i = (i + j) / 512; + let l3i = (i + j) % 512; + + // TODO NX, NC + DEVICE_MAPPING_L3S[l2i][l3i] = + PageEntry::page(base.add(j * L3::SIZE), PageAttributes::WRITABLE); + } + + return Ok(DEVICE_MAPPING_OFFSET + i * L3::SIZE); + } + + Err(Error::OutOfMemory) +} + +unsafe fn map_device_memory_l2( + base: PhysicalAddress, + count: usize, + _attrs: DeviceMemoryAttributes, +) -> Result { + 'l0: for i in DEVICE_MAPPING_L3_COUNT..512 { + for j in 0..count { + if DEVICE_MAPPING_L2[i + j].is_present() { + continue 'l0; + } + } + + for j in 0..count { + DEVICE_MAPPING_L2[i + j] = + PageEntry::::block(base.add(j * L2::SIZE), PageAttributes::WRITABLE); + } + + // debugln!( + // "map l2s: base={:#x}, count={} -> {:#x}", + // base, + // count, + // DEVICE_MAPPING_OFFSET + i * L2::SIZE + // ); + return Ok(DEVICE_MAPPING_OFFSET + i * L2::SIZE); + } + + Err(Error::OutOfMemory) +} + +unsafe fn map_device_memory( + base: PhysicalAddress, + size: usize, + attrs: DeviceMemoryAttributes, +) -> Result, Error> { + // debugln!("Map {}B @ {:#x}", size, base); + let l3_aligned = base.page_align_down::(); + let l3_offset = base.page_offset::(); + let page_count = (l3_offset + size).page_count::(); + + if page_count > 256 { + // Large mapping, use L2 mapping instead + let l2_aligned = base.page_align_down::(); + let l2_offset = base.page_offset::(); + let page_count = (l2_offset + size).page_count::(); + + let base_address = map_device_memory_l2(l2_aligned, page_count, attrs)?; + let address = base_address + l2_offset; + + Ok(RawDeviceMemoryMapping::from_raw_parts( + address, + base_address, + page_count, + L2::SIZE, + )) + } else { + // Just map the pages directly + let base_address = map_device_memory_l3(l3_aligned, page_count, attrs)?; + let address = base_address + l3_offset; + + Ok(RawDeviceMemoryMapping::from_raw_parts( + address, + base_address, + page_count, + L3::SIZE, + )) + } +} + +unsafe fn unmap_device_memory(map: &RawDeviceMemoryMapping) { + // debugln!( + // "Unmap {}B @ {:#x}", + // map.page_count * map.page_size, + // map.base_address + // ); + match map.page_size { + L3::SIZE => { + for i in 0..map.page_count { + let page = map.base_address + i * L3::SIZE; + let l2i = page.page_index::(); + let l3i = page.page_index::(); + assert!(DEVICE_MAPPING_L3S[l2i][l3i].is_present()); + DEVICE_MAPPING_L3S[l2i][l3i] = PageEntry::INVALID; + flush_tlb_entry(page); + } + } + L2::SIZE => todo!(), + _ => unimplemented!(), + } +} + +pub unsafe fn map_heap_block(index: usize, page: PhysicalAddress) { + if !page.is_page_aligned_for::() { + panic!("Attempted to map a misaligned 2MiB page"); + } + assert!(index < 512); + + if HEAP_MAPPING_L2[index].is_present() { + panic!("Page is already mappged: {:#x}", page); + } + + // TODO NX + HEAP_MAPPING_L2[index] = PageEntry::::block(page, PageAttributes::WRITABLE); +} + +/// Memory mapping which may be used for performing early kernel initialization +pub struct EarlyMapping<'a, T: ?Sized> { + value: &'a mut T, + page_count: usize, +} + +impl<'a, T: Sized> EarlyMapping<'a, T> { + pub unsafe fn map(physical: PhysicalAddress) -> Result, Error> { + let layout = Layout::new::(); + let aligned = physical.page_align_down::(); + let offset = physical.page_offset::(); + let page_count = (offset + layout.size() + L3::SIZE - 1) / L3::SIZE; + + let virt = map_early_pages(aligned, page_count)?; + let value = &mut *((virt + offset) as *mut T); + + Ok(EarlyMapping { value, page_count }) + } + + pub unsafe fn map_slice( + physical: PhysicalAddress, + len: usize, + ) -> Result, Error> { + let layout = Layout::array::(len).unwrap(); + let aligned = physical.page_align_down::(); + let offset = physical.page_offset::(); + let page_count = (offset + layout.size() + L3::SIZE - 1) / L3::SIZE; + + let virt = map_early_pages(aligned, page_count)?; + let value = core::slice::from_raw_parts_mut((virt + offset) as *mut T, len); + + Ok(EarlyMapping { value, page_count }) + } +} + +impl<'a, T: ?Sized> Deref for EarlyMapping<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +impl<'a, T: ?Sized> DerefMut for EarlyMapping<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.value + } +} + +impl<'a, T: ?Sized> Drop for EarlyMapping<'a, T> { + fn drop(&mut self) { + let address = (self.value as *mut T).addr() & !(L3::SIZE - 1); + + for i in 0..self.page_count { + let page = address + i * L3::SIZE; + + unsafe { + unmap_early_page(page); + } + } + } +} + +pub fn clone_kernel_tables(dst: &mut PageTable) { + unsafe { + dst[KERNEL_L0_INDEX] = PageEntry::from_raw(KERNEL_TABLES.l0.data[KERNEL_L0_INDEX]); + dst[RAM_MAPPING_L0I] = PageEntry::from_raw(KERNEL_TABLES.l0.data[RAM_MAPPING_L0I]); + } +} + +/// Sets up the following memory map: +/// ...: KERNEL_TABLES.l0: +/// * 0xFFFFFF0000000000 .. 0xFFFFFFFF8000000000 : RAM_MAPPING_L1 +/// * 0xFFFFFF8000000000 .. ... : KERNEL_TABLES.kernel_l1: +/// * 0xFFFFFF8000000000 .. 0xFFFFFF8040000000 : KERNEL_TABLES.kernel_l2 +/// * 0xFFFFFF8000000000 .. 0xFFFFFF8000200000 : --- +/// * 0xFFFFFF8000200000 .. 0xFFFFFF8000400000 : EARLY_MAPPING_L3 +/// * 0xFFFFFF8000400000 .. ... : KERNEL_TABLES.kernel_l3s +/// * 0xFFFFFF8040000000 .. 0xFFFFFF8080000000 : HEAP_MAPPING_L2 +/// * 0xFFFFFF8080000000 .. 0xFFFFFF8100000000 : DEVICE_MAPPING_L2 +/// * 0xFFFFFF8080000000 .. 0xFFFFFF8080800000 : DEVICE_MAPPING_L3S +/// * 0xFFFFFF8080800000 .. 0xFFFFFF8100000000 : ... +pub unsafe fn init_fixed_tables() { + // TODO this could be built in compile-time too? + let early_mapping_l3_phys = addr_of!(EARLY_MAPPING_L3) as usize - KERNEL_VIRT_OFFSET; + let device_mapping_l2_phys = addr_of!(DEVICE_MAPPING_L2) as usize - KERNEL_VIRT_OFFSET; + let heap_mapping_l2_phys = addr_of!(HEAP_MAPPING_L2) as usize - KERNEL_VIRT_OFFSET; + let ram_mapping_l1_phys = addr_of!(RAM_MAPPING_L1) as usize - KERNEL_VIRT_OFFSET; + + for i in 0..DEVICE_MAPPING_L3_COUNT { + let device_mapping_l3_phys = PhysicalAddress::from_raw( + &DEVICE_MAPPING_L3S[i] as *const _ as usize - KERNEL_VIRT_OFFSET, + ); + DEVICE_MAPPING_L2[i] = PageEntry::table(device_mapping_l3_phys, PageAttributes::WRITABLE); + } + + assert_eq!(KERNEL_TABLES.kernel_l2.data[EARLY_MAPPING_L2I], 0); + KERNEL_TABLES.kernel_l2.data[EARLY_MAPPING_L2I] = (early_mapping_l3_phys as u64) + | (PageAttributes::WRITABLE | PageAttributes::PRESENT).bits(); + + assert_eq!(KERNEL_TABLES.kernel_l1.data[HEAP_MAPPING_L1I], 0); + KERNEL_TABLES.kernel_l1.data[HEAP_MAPPING_L1I] = + (heap_mapping_l2_phys as u64) | (PageAttributes::WRITABLE | PageAttributes::PRESENT).bits(); + assert_eq!(KERNEL_TABLES.kernel_l1.data[DEVICE_MAPPING_L1I], 0); + KERNEL_TABLES.kernel_l1.data[DEVICE_MAPPING_L1I] = (device_mapping_l2_phys as u64) + | (PageAttributes::WRITABLE | PageAttributes::PRESENT).bits(); + + assert_eq!(KERNEL_TABLES.l0.data[RAM_MAPPING_L0I], 0); + KERNEL_TABLES.l0.data[RAM_MAPPING_L0I] = + (ram_mapping_l1_phys as u64) | (PageAttributes::WRITABLE | PageAttributes::PRESENT).bits(); + + // TODO ENABLE EFER.NXE + let cr3 = &KERNEL_TABLES.l0 as *const _ as usize - KERNEL_VIRT_OFFSET; + CR3.set_address(cr3); +} + +#[inline] +pub unsafe fn flush_tlb_entry(address: usize) { + core::arch::asm!("invlpg ({0})", in(reg) address, options(att_syntax)); +} diff --git a/kernel/arch/x86_64/src/mem/process.rs b/kernel/arch/x86_64/src/mem/process.rs new file mode 100644 index 00000000..bd4a2059 --- /dev/null +++ b/kernel/arch/x86_64/src/mem/process.rs @@ -0,0 +1,161 @@ +//! x86-64-specific process address space management functions +use core::marker::PhantomData; + +use libk_mm_interface::{ + address::{AsPhysicalAddress, IntoRaw, PhysicalAddress}, + pointer::PhysicalRefMut, + process::ProcessAddressSpaceManager, + table::{ + EntryLevel, EntryLevelDrop, EntryLevelExt, MapAttributes, NextPageTable, TableAllocator, + }, +}; +use yggdrasil_abi::error::Error; + +use crate::KernelTableManagerImpl; + +use super::{ + clone_kernel_tables, flush_tlb_entry, + table::{PageEntry, PageTable, L0, L1, L2, L3}, +}; + +/// Represents a process or kernel address space. Because x86-64 does not have cool stuff like +/// TTBR0 and TTBR1, all address spaces are initially cloned from the kernel space. +#[repr(C)] +pub struct ProcessAddressSpaceImpl { + l0: PhysicalRefMut<'static, PageTable, KernelTableManagerImpl>, + _alloc: PhantomData, +} + +impl ProcessAddressSpaceManager for ProcessAddressSpaceImpl { + // Start with 8GiB + const LOWER_LIMIT_PFN: usize = (8 << 30) / L3::SIZE; + // 16GiB VM limit + const UPPER_LIMIT_PFN: usize = (16 << 30) / L3::SIZE; + + fn new() -> Result { + let mut l0 = unsafe { + PhysicalRefMut::<'static, PageTable, KernelTableManagerImpl>::map( + TA::allocate_page_table()?, + ) + }; + + for i in 0..512 { + l0[i] = PageEntry::INVALID; + } + + clone_kernel_tables(&mut l0); + + Ok(Self { + l0, + _alloc: PhantomData, + }) + } + + #[inline] + unsafe fn map_page( + &mut self, + address: usize, + physical: PhysicalAddress, + flags: MapAttributes, + ) -> Result<(), Error> { + self.write_l3_entry(address, PageEntry::page(physical, flags.into()), false) + } + + unsafe fn unmap_page(&mut self, address: usize) -> Result { + self.pop_l3_entry(address) + } + + #[inline] + fn translate(&self, address: usize) -> Result<(PhysicalAddress, MapAttributes), Error> { + self.read_l3_entry(address) + .ok_or(Error::InvalidMemoryOperation) + } + + fn as_address_with_asid(&self) -> u64 { + // TODO x86-64 PCID/ASID? + unsafe { self.l0.as_physical_address().into_raw() } + } + + unsafe fn clear(&mut self) { + self.l0 + .drop_range::(0..((Self::UPPER_LIMIT_PFN * L3::SIZE).page_index::())); + } +} + +impl ProcessAddressSpaceImpl { + // Write a single 4KiB entry + fn write_l3_entry( + &mut self, + virt: usize, + entry: PageEntry, + overwrite: bool, + ) -> Result<(), Error> { + let l0i = virt.page_index::(); + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + let mut l1 = self.l0.get_mut_or_alloc::(l0i)?; + let mut l2 = l1.get_mut_or_alloc::(l1i)?; + let mut l3 = l2.get_mut_or_alloc::(l2i)?; + + if l3[l3i].is_present() && !overwrite { + todo!(); + } + + l3[l3i] = entry; + unsafe { + flush_tlb_entry(virt); + } + + Ok(()) + } + + fn pop_l3_entry(&mut self, virt: usize) -> Result { + let l0i = virt.page_index::(); + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + // TODO somehow drop tables if they're known to be empty? + let mut l1 = self.l0.get_mut(l0i).ok_or(Error::DoesNotExist)?; + let mut l2 = l1.get_mut(l1i).ok_or(Error::DoesNotExist)?; + let mut l3 = l2.get_mut(l2i).ok_or(Error::DoesNotExist)?; + + let page = l3[l3i].as_page().ok_or(Error::DoesNotExist)?; + + l3[l3i] = PageEntry::INVALID; + unsafe { + flush_tlb_entry(virt); + } + + Ok(page) + } + + fn read_l3_entry(&self, virt: usize) -> Option<(PhysicalAddress, MapAttributes)> { + let l0i = virt.page_index::(); + let l1i = virt.page_index::(); + let l2i = virt.page_index::(); + let l3i = virt.page_index::(); + + let l1 = self.l0.get(l0i)?; + let l2 = l1.get(l1i)?; + let l3 = l2.get(l2i)?; + + let page = l3[l3i].as_page()?; + + Some((page, l3[l3i].attributes().into())) + } +} + +impl Drop for ProcessAddressSpaceImpl { + fn drop(&mut self) { + // SAFETY: with safe usage of the ProcessAddressSpaceImpl, clearing and dropping + // is safe, no one refers to the memory + unsafe { + self.clear(); + let l0_phys = self.l0.as_physical_address(); + TA::free_page_table(l0_phys); + } + } +} diff --git a/kernel/arch/x86_64/src/mem/table.rs b/kernel/arch/x86_64/src/mem/table.rs new file mode 100644 index 00000000..354b3fec --- /dev/null +++ b/kernel/arch/x86_64/src/mem/table.rs @@ -0,0 +1,335 @@ +//! x86-64-specific memory translation table management interfaces and functions +use core::{ + marker::PhantomData, + ops::{Index, IndexMut, Range}, +}; + +use bitflags::bitflags; +use libk_mm_interface::{ + address::{AsPhysicalAddress, FromRaw, PhysicalAddress}, + pointer::{PhysicalRef, PhysicalRefMut}, + table::{ + EntryLevel, EntryLevelDrop, MapAttributes, NextPageTable, NonTerminalEntryLevel, + TableAllocator, + }, +}; +use yggdrasil_abi::error::Error; + +use crate::KernelTableManagerImpl; + +bitflags! { + /// Describes how each page table entry is mapped + pub struct PageAttributes: u64 { + /// When set, the mapping is considered valid and pointing somewhere + const PRESENT = 1 << 0; + /// For tables, allows writes to further translation levels, for pages/blocks, allows + /// writes to the region covered by the entry + const WRITABLE = 1 << 1; + /// When set for L2 entries, the mapping specifies a 2MiB page instead of a page table + /// reference + const BLOCK = 1 << 7; + /// For tables, allows user access to further translation levels, for pages/blocks, allows + /// user access to the region covered by the entry + const USER = 1 << 2; + } +} + +/// Represents a single virtual address space mapping depending on its translation level +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +pub struct PageEntry(u64, PhantomData); + +/// Table describing a single level of address translation +#[derive(Clone, Copy)] +#[repr(C, align(0x1000))] +pub struct PageTable { + data: [PageEntry; 512], +} + +/// Translation level 0 (PML4): Entry is 512GiB table +#[derive(Clone, Copy, Debug)] +pub struct L0; +/// Translation level 1 (PDPT): Entry is 1GiB table +#[derive(Clone, Copy, Debug)] +pub struct L1; +/// Translation level 2 (Page directory): Entry is 2MiB block/table +#[derive(Clone, Copy, Debug)] +pub struct L2; +/// Translation level 3 (Page table): Entry is 4KiB page +#[derive(Clone, Copy, Debug)] +pub struct L3; + +impl NonTerminalEntryLevel for L0 { + type NextLevel = L1; +} +impl NonTerminalEntryLevel for L1 { + type NextLevel = L2; +} +impl NonTerminalEntryLevel for L2 { + type NextLevel = L3; +} + +impl EntryLevel for L0 { + const SHIFT: usize = 39; +} + +impl EntryLevel for L1 { + const SHIFT: usize = 30; +} + +impl EntryLevel for L2 { + const SHIFT: usize = 21; +} + +impl EntryLevel for L3 { + const SHIFT: usize = 12; +} + +impl PageEntry { + /// Constructs a mapping which points to a 4KiB page + pub fn page(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + u64::from(phys) | (attrs | PageAttributes::PRESENT | PageAttributes::USER).bits(), + PhantomData, + ) + } + + /// Returns the physical address of the page this entry refers to, returning None if it does + /// not + pub fn as_page(self) -> Option { + if self.0 & PageAttributes::PRESENT.bits() != 0 { + Some(PhysicalAddress::from_raw(self.0 & !0xFFF)) + } else { + None + } + } +} + +impl PageEntry { + /// Constructs a mapping which points to a 2MiB block + pub fn block(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + u64::from(phys) | (attrs | PageAttributes::PRESENT | PageAttributes::BLOCK).bits(), + PhantomData, + ) + } +} + +impl PageEntry { + /// Constructs a mapping which points to a 1GiB block + pub fn block(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + u64::from(phys) | (attrs | PageAttributes::PRESENT | PageAttributes::BLOCK).bits(), + PhantomData, + ) + } +} + +impl PageEntry { + /// Constructs a mapping which points to a next-level table + pub fn table(phys: PhysicalAddress, attrs: PageAttributes) -> Self { + Self( + u64::from(phys) + | (attrs + | PageAttributes::PRESENT + | PageAttributes::WRITABLE + | PageAttributes::USER) + .bits(), + PhantomData, + ) + } + + /// Returns the physical address of the table this entry refers to, returning None if it + /// does not + pub fn as_table(self) -> Option { + if self.0 & PageAttributes::PRESENT.bits() != 0 + && self.0 & PageAttributes::BLOCK.bits() == 0 + { + Some(PhysicalAddress::from_raw(self.0 & !0xFFF)) + } else { + None + } + } + + /// Returns `true` if the mapping represents a "page"/"block" and not a table + pub fn is_block(self) -> bool { + self.0 & PageAttributes::BLOCK.bits() != 0 + } +} + +impl PageEntry { + /// An entry that is not mapped + pub const INVALID: Self = Self(0, PhantomData); + + /// Reinterprets raw [u64] as a [PageEntry]. + /// + /// # Safety + /// + /// Unsafe: the caller must ensure the value is a valid page translation entry. + pub const unsafe fn from_raw(raw: u64) -> Self { + Self(raw, PhantomData) + } + + /// Returns the translation attributes of the entry + pub fn attributes(&self) -> PageAttributes { + PageAttributes::from_bits_retain(self.0) + } + + /// Returns `true` if the entry contains a valid mapping to either a table or to a page/block + pub fn is_present(&self) -> bool { + self.0 & PageAttributes::PRESENT.bits() != 0 + } +} + +impl PageTable { + /// Constructs a page table filled with invalid (non-present) entries + pub const fn zeroed() -> Self { + Self { + data: [PageEntry::INVALID; 512], + } + } + + /// Reinterprets given [PageEntry] slice as a reference to [PageTable]. + /// + /// # Safety + /// + /// Unsafe: the caller must ensure the provided reference is properly aligned and contains sane + /// data. + pub unsafe fn from_raw_slice_mut(data: &mut [PageEntry; 512]) -> &mut Self { + core::mem::transmute(data) + } + + /// Allocates a new page table, filling it with non-preset entries + pub fn new_zeroed<'a, TA: TableAllocator>( + ) -> Result, Error> { + let physical = TA::allocate_page_table()?; + let mut table = + unsafe { PhysicalRefMut::<'a, Self, KernelTableManagerImpl>::map(physical) }; + + for i in 0..512 { + table[i] = PageEntry::INVALID; + } + + Ok(table) + } + + // /// Returns the physical address of this table + // pub fn physical_address(&self) -> usize { + // unsafe { (self.data.as_ptr() as usize).physicalize() } + // } +} + +impl NextPageTable for PageTable { + type NextLevel = PageTable; + type TableRef = PhysicalRef<'static, Self::NextLevel, KernelTableManagerImpl>; + type TableRefMut = PhysicalRefMut<'static, Self::NextLevel, KernelTableManagerImpl>; + + fn get(&self, index: usize) -> Option { + self[index] + .as_table() + .map(|addr| unsafe { PhysicalRef::map(addr) }) + } + + fn get_mut(&mut self, index: usize) -> Option { + self[index] + .as_table() + .map(|addr| unsafe { PhysicalRefMut::map(addr) }) + } + + fn get_mut_or_alloc( + &mut self, + index: usize, + ) -> Result { + let entry = self[index]; + + if let Some(table) = entry.as_table() { + Ok(unsafe { PhysicalRefMut::map(table) }) + } else { + let table = PageTable::new_zeroed::()?; + self[index] = PageEntry::::table( + unsafe { table.as_physical_address() }, + PageAttributes::WRITABLE | PageAttributes::USER, + ); + Ok(table) + } + } +} + +impl EntryLevelDrop for PageTable { + const FULL_RANGE: Range = 0..512; + + // Do nothing + unsafe fn drop_range(&mut self, _range: Range) {} +} + +impl EntryLevelDrop for PageTable +where + PageTable: EntryLevelDrop, +{ + const FULL_RANGE: Range = 0..512; + + unsafe fn drop_range(&mut self, range: Range) { + for index in range { + let entry = self[index]; + + if let Some(table) = entry.as_table() { + let mut table_ref: PhysicalRefMut, KernelTableManagerImpl> = + PhysicalRefMut::map(table); + + table_ref.drop_all::(); + + // Drop the table + drop(table_ref); + + TA::free_page_table(table); + } else if entry.is_present() { + // Memory must've been cleared beforehand, so no non-table entries must be present + panic!( + "Expected a table containing only tables, got table[{}] = {:#x?}", + index, entry.0 + ); + } + + self[index] = PageEntry::INVALID; + } + } +} + +impl Index for PageTable { + type Output = PageEntry; + + fn index(&self, index: usize) -> &Self::Output { + &self.data[index] + } +} + +impl IndexMut for PageTable { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + &mut self.data[index] + } +} + +impl From for PageAttributes { + fn from(value: MapAttributes) -> Self { + let mut res = PageAttributes::WRITABLE; + if value.intersects(MapAttributes::USER_READ | MapAttributes::USER_WRITE) { + res |= PageAttributes::USER; + } + res + } +} + +impl From for MapAttributes { + fn from(value: PageAttributes) -> Self { + let mut res = MapAttributes::empty(); + if value.contains(PageAttributes::USER) { + res |= MapAttributes::USER_READ; + if value.contains(PageAttributes::WRITABLE) { + res |= MapAttributes::USER_WRITE; + } + } + // TODO ??? + res |= MapAttributes::NON_GLOBAL; + res + } +} diff --git a/kernel/arch/x86_64/src/registers.rs b/kernel/arch/x86_64/src/registers.rs new file mode 100644 index 00000000..04bd8c30 --- /dev/null +++ b/kernel/arch/x86_64/src/registers.rs @@ -0,0 +1,426 @@ +//! Helper types for interfacing with x86-64 registers +#![allow(unused)] + +macro_rules! impl_read { + ($t:ident, $register:ty, $body:expr) => { + impl tock_registers::interfaces::Readable for $t { + type T = u64; + type R = $register; + + #[inline] + fn get(&self) -> u64 { + $body + } + } + }; +} + +macro_rules! impl_write { + ($t:ident, $register:ty, $value:ident, $body:expr) => { + impl tock_registers::interfaces::Writeable for $t { + type T = u64; + type R = $register; + + #[inline] + fn set(&self, $value: u64) { + $body + } + } + }; +} + +macro_rules! msr_impl_read { + ($t:ident, $addr:expr, $register:ty) => { + impl_read!($t, $register, { + let (high, low): (u32, u32); + unsafe { + core::arch::asm!( + "rdmsr", + in("ecx") $addr, + out("eax") low, + out("edx") high, + options(att_syntax) + ); + } + ((high as u64) << 32) | (low as u64) + }); + }; + + ($t:ident, $addr:expr) => { msr_impl_read!($t, $addr, ()); }; +} + +macro_rules! msr_impl_write { + ($t:ident, $addr:expr, $register:ty) => { + impl_write!($t, $register, value, { + let low = value as u32; + let high = (value >> 32) as u32; + unsafe { + core::arch::asm!( + "wrmsr", + in("ecx") $addr, + in("eax") low, + in("edx") high, + options(att_syntax) + ); + } + }); + }; + + ($t:ident, $addr:expr) => { msr_impl_write!($t, $addr, ()); }; +} + +macro_rules! cr_impl_read { + ($t:ident, $cr:ident, $register:ty) => { + impl_read!($t, $register, { + let value: u64; + unsafe { + core::arch::asm!( + concat!("mov %", stringify!($cr), ", {}"), + out(reg) value, + options(att_syntax) + ); + } + value + }); + }; +} + +macro_rules! cr_impl_write { + ($t:ident, $cr:ident, $register:ty) => { + impl_write!($t, $register, value, { + unsafe { + core::arch::asm!( + concat!("mov {}, %", stringify!($cr)), + in(reg) value, + options(att_syntax) + ); + } + }); + }; +} + +mod msr_ia32_kernel_gs_base { + const ADDR: u32 = 0xC0000102; + pub struct Reg; + + msr_impl_read!(Reg, ADDR); + msr_impl_write!(Reg, ADDR); + + /// IA32_KERNEL_GS_BASE model-specific register. Provides the base address for %gs-relative + /// loads/stores. + pub const MSR_IA32_KERNEL_GS_BASE: Reg = Reg; +} + +mod msr_ia32_apic_base { + use tock_registers::{interfaces::Readable, register_bitfields}; + + register_bitfields! { + u64, + #[allow(missing_docs)] + #[doc = "IA32_APIC_BASE model-specific register"] + pub MSR_IA32_APIC_BASE [ + #[doc = "Contains a virtual page number of the Local APIC base address for this processor"] + AddressPage OFFSET(12) NUMBITS(40) [], + #[doc = "If set, the APIC is enabled"] + ApicEnable OFFSET(11) NUMBITS(1) [], + #[doc = "If set, x2APIC mode is enabled"] + ExtendedEnable OFFSET(10) NUMBITS(1) [], + #[doc = "If set, this CPU is a bootstrap processor"] + BootstrapCpuCore OFFSET(8) NUMBITS(1) [], + ] + } + + const ADDR: u32 = 0x0000001B; + pub struct Reg; + + msr_impl_read!(Reg, ADDR, MSR_IA32_APIC_BASE::Register); + msr_impl_write!(Reg, ADDR, MSR_IA32_APIC_BASE::Register); + + impl Reg { + #[inline] + pub fn read_base(&self) -> u64 { + self.read(MSR_IA32_APIC_BASE::AddressPage) << 12 + } + } + + /// IA32_APIC_BASE model-specific register + pub const MSR_IA32_APIC_BASE: Reg = Reg; +} + +mod msr_ia32_sfmask { + use tock_registers::register_bitfields; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub MSR_IA32_SFMASK [ + IF OFFSET(9) NUMBITS(1) [ + Masked = 1, + Unmasked = 0 + ] + ] + } + + const ADDR: u32 = 0xC0000084; + pub struct Reg; + + msr_impl_read!(Reg, ADDR, MSR_IA32_SFMASK::Register); + msr_impl_write!(Reg, ADDR, MSR_IA32_SFMASK::Register); + + /// IA32_SFMASK model-specific register + pub const MSR_IA32_SFMASK: Reg = Reg; +} + +mod msr_ia32_star { + use tock_registers::register_bitfields; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub MSR_IA32_STAR [ + SYSCALL_CS_SS OFFSET(32) NUMBITS(16) [], + SYSRET_CS_SS OFFSET(48) NUMBITS(16) [], + ] + } + + const ADDR: u32 = 0xC0000081; + pub struct Reg; + + msr_impl_read!(Reg, ADDR, MSR_IA32_STAR::Register); + msr_impl_write!(Reg, ADDR, MSR_IA32_STAR::Register); + + /// IA32_STAR model-specific register + pub const MSR_IA32_STAR: Reg = Reg; +} + +mod msr_ia32_lstar { + const ADDR: u32 = 0xC0000082; + pub struct Reg; + + msr_impl_read!(Reg, ADDR); + msr_impl_write!(Reg, ADDR); + + /// IA32_LSTAR model-specific register + pub const MSR_IA32_LSTAR: Reg = Reg; +} + +mod msr_ia32_efer { + use tock_registers::register_bitfields; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub MSR_IA32_EFER [ + // If set, support for SYSCALL/SYSRET instructions is enabled + SCE OFFSET(0) NUMBITS(1) [ + Enable = 1, + Disable = 0 + ] + ] + } + + const ADDR: u32 = 0xC0000080; + pub struct Reg; + + msr_impl_read!(Reg, ADDR, MSR_IA32_EFER::Register); + msr_impl_write!(Reg, ADDR, MSR_IA32_EFER::Register); + + /// IA32_EFER Extended Feature Enable model-specific Register + pub const MSR_IA32_EFER: Reg = Reg; +} + +mod cr0 { + use tock_registers::register_bitfields; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub CR0 [ + PG OFFSET(31) NUMBITS(1) [], + CD OFFSET(30) NUMBITS(1) [], + NW OFFSET(29) NUMBITS(1) [], + AM OFFSET(18) NUMBITS(1) [], + WP OFFSET(16) NUMBITS(1) [], + NE OFFSET(5) NUMBITS(1) [], + ET OFFSET(4) NUMBITS(1) [], + TS OFFSET(3) NUMBITS(1) [], + EM OFFSET(2) NUMBITS(1) [], + MP OFFSET(1) NUMBITS(1) [], + PE OFFSET(0) NUMBITS(1) [], + ] + } + + pub struct Reg; + + cr_impl_read!(Reg, cr0, CR0::Register); + cr_impl_write!(Reg, cr0, CR0::Register); + + /// x86-64 control register 0 + pub const CR0: Reg = Reg; +} + +mod cr3 { + use tock_registers::{interfaces::ReadWriteable, register_bitfields}; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub CR3 [ + ADDR OFFSET(12) NUMBITS(40) [], + ] + } + + pub struct Reg; + + cr_impl_read!(Reg, cr3, CR3::Register); + cr_impl_write!(Reg, cr3, CR3::Register); + + impl Reg { + pub fn set_address(&self, address: usize) { + assert_eq!(address & 0xFFF, 0); + self.modify(CR3::ADDR.val((address as u64) >> 12)) + } + } + + /// x86-64 control register 3 + pub const CR3: Reg = Reg; +} + +mod cr4 { + use tock_registers::register_bitfields; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub CR4 [ + /// If set, XSAVE and extended processor states are enabled + OSXSAVE OFFSET(18) NUMBITS(1) [], + /// Indicates OS support for FXSAVE and FXRSTOR instructions + OSFXSR OFFSET(9) NUMBITS(1) [], + /// Performance-Monitoring Counter enable + PCE OFFSET(8) NUMBITS(1) [], + /// If set, "page global" attribute is enabled + PGE OFFSET(7) NUMBITS(1) [], + /// Machine Check enable + MCE OFFSET(6) NUMBITS(1) [], + /// Physical Address Extension (enabled if 64-bit mode) + PAE OFFSET(5) NUMBITS(1) [], + /// Page Size Extension (should be enabled by yboot) + PSE OFFSET(4) NUMBITS(1) [], + /// Debugging extensions + DE OFFSET(3) NUMBITS(1) [], + TSD OFFSET(2) NUMBITS(1) [], + PVI OFFSET(1) NUMBITS(1) [], + VME OFFSET(0) NUMBITS(1) [], + ] + } + + pub struct Reg; + + cr_impl_read!(Reg, cr4, CR4::Register); + cr_impl_write!(Reg, cr4, CR4::Register); + + /// x86-64 control register 4 + pub const CR4: Reg = Reg; +} + +mod xcr0 { + use tock_registers::{ + interfaces::{Readable, Writeable}, + register_bitfields, + }; + + register_bitfields! { + u64, + #[allow(missing_docs)] + pub XCR0 [ + /// If set, x87 FPU/MMX is enabled + X87 OFFSET(0) NUMBITS(1) [], + /// If set, XSAVE support for MXCSR and XMM registers is enabled + SSE OFFSET(1) NUMBITS(1) [], + /// If set, AVX is enabled and XSAVE supports YMM upper halves + AVX OFFSET(2) NUMBITS(1) [], + ] + } + + pub struct Reg; + + impl Readable for Reg { + type T = u64; + type R = XCR0::Register; + + fn get(&self) -> Self::T { + let eax: u32; + let edx: u32; + unsafe { + core::arch::asm!( + "xgetbv", + in("ecx") 0, + out("eax") eax, + out("edx") edx, + options(att_syntax) + ); + } + ((edx as u64) << 32) | (eax as u64) + } + } + + impl Writeable for Reg { + type T = u64; + type R = XCR0::Register; + + fn set(&self, value: Self::T) { + let eax = value as u32; + let edx = (value >> 32) as u32; + unsafe { + core::arch::asm!( + "xsetbv", + in("ecx") 0, + in("eax") eax, + in("edx") edx, + options(att_syntax) + ); + } + } + } + + /// Extended control register for SSE/AVX/FPU configuration + pub const XCR0: Reg = Reg; +} + +use core::ptr::NonNull; + +pub use cr0::CR0; +pub use cr3::CR3; +pub use cr4::CR4; +pub use msr_ia32_apic_base::MSR_IA32_APIC_BASE; +pub use msr_ia32_efer::MSR_IA32_EFER; +pub use msr_ia32_kernel_gs_base::MSR_IA32_KERNEL_GS_BASE; +pub use msr_ia32_lstar::MSR_IA32_LSTAR; +pub use msr_ia32_sfmask::MSR_IA32_SFMASK; +pub use msr_ia32_star::MSR_IA32_STAR; +pub use xcr0::XCR0; + +#[repr(C, align(0x10))] +pub struct FpuContext { + data: [u8; 512], +} + +impl FpuContext { + pub fn new() -> Self { + let mut value = Self { data: [0; 512] }; + unsafe { + let ptr = value.data.as_mut_ptr(); + core::arch::asm!("fninit; fxsave64 ({})", in(reg) ptr, options(att_syntax)); + } + value + } + + pub unsafe fn save(dst: *mut FpuContext) { + core::arch::asm!("fxsave64 ({})", in(reg) dst, options(att_syntax)); + } + + pub unsafe fn restore(src: *mut FpuContext) { + core::arch::asm!("fxrstor64 ({})", in(reg) src, options(att_syntax)); + } +} diff --git a/kernel/build.rs b/kernel/build.rs new file mode 100644 index 00000000..5307bb10 --- /dev/null +++ b/kernel/build.rs @@ -0,0 +1,74 @@ +use std::{ + env, fs, + io::{self, Write}, + path::{Path, PathBuf}, + process::Command, +}; + +use abi_generator::{ + abi::{ty::TypeWidth, AbiBuilder}, + syntax::UnwrapFancy, + TargetEnv, +}; + +fn build_x86_64() { + const DEFAULT_8086_AS: &str = "nasm"; + const AP_BOOTSTRAP_S: &str = "src/arch/x86_64/boot/ap_boot.S"; + + println!("cargo:rerun-if-changed={}", AP_BOOTSTRAP_S); + + let out_dir = env::var("OUT_DIR").unwrap(); + let assembler = env::var("AS8086").unwrap_or(DEFAULT_8086_AS.to_owned()); + + let ap_bootstrap_out = PathBuf::from(out_dir).join("__x86_64_ap_boot.bin"); + + // Assemble the code + let output = Command::new(assembler.as_str()) + .args([ + "-fbin", + "-o", + ap_bootstrap_out.to_str().unwrap(), + AP_BOOTSTRAP_S, + ]) + .output() + .unwrap(); + + if !output.status.success() { + io::stderr().write_all(&output.stderr).ok(); + panic!("{}: could not assemble {}", assembler, AP_BOOTSTRAP_S); + } +} + +fn generate_syscall_dispatcher>(out_dir: P) { + let abi: AbiBuilder = AbiBuilder::from_string( + yggdrasil_abi_def::ABI_FILE, + TargetEnv { + thin_pointer_width: TypeWidth::U64, + fat_pointer_width: TypeWidth::U128, + }, + ) + .unwrap_fancy(""); + + let generated_dispatcher = out_dir.as_ref().join("generated_dispatcher.rs"); + let file = prettyplease::unparse( + &abi.emit_syscall_dispatcher("handle_syscall", "impls") + .unwrap_fancy(""), + ); + + fs::write(generated_dispatcher, file.as_bytes()).unwrap(); +} + +fn main() { + let out_dir = env::var("OUT_DIR").unwrap(); + let arch = env::var("CARGO_CFG_TARGET_ARCH").unwrap(); + + generate_syscall_dispatcher(&out_dir); + + println!("cargo:rerun-if-changed=build.rs"); + + match arch.as_str() { + "x86_64" => build_x86_64(), + "aarch64" => (), + _ => panic!("Unknown target arch: {:?}", arch), + } +} diff --git a/kernel/driver/block/ahci/Cargo.toml b/kernel/driver/block/ahci/Cargo.toml new file mode 100644 index 00000000..45ae22f2 --- /dev/null +++ b/kernel/driver/block/ahci/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "ygg_driver_ahci" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-mm = { path = "../../../libk/libk-mm" } +libk-thread = { path = "../../../libk/libk-thread" } +libk-util = { path = "../../../libk/libk-util" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } +vfs = { path = "../../../lib/vfs" } + +ygg_driver_pci = { path = "../../bus/pci" } +ygg_driver_block = { path = "../../block/core" } +kernel-fs = { path = "../../fs/kernel-fs" } + +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } +static_assertions = "1.1.0" +tock-registers = "0.8.1" +bytemuck = { version = "1.14.0", features = ["derive"] } +memoffset = "0.9.0" diff --git a/kernel/driver/block/ahci/src/command.rs b/kernel/driver/block/ahci/src/command.rs new file mode 100644 index 00000000..d2c57561 --- /dev/null +++ b/kernel/driver/block/ahci/src/command.rs @@ -0,0 +1,140 @@ +use core::mem::{size_of, MaybeUninit}; + +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; +use tock_registers::register_structs; + +use crate::{data::AtaString, error::AhciError, SECTOR_SIZE}; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[repr(u8)] +pub enum AtaCommandId { + Identify = 0xEC, + ReadDmaEx = 0x25, +} + +pub trait AtaCommand { + type Response; + + const COMMAND_ID: AtaCommandId; + + fn lba(&self) -> u64; + fn sector_count(&self) -> usize; + fn regions(&self) -> &[(PhysicalAddress, usize)]; + unsafe fn into_response(self) -> Self::Response; +} + +register_structs! { + // Offsets in the ATA8-ACS spec are in words, so each value take from there is + // multiplied by two + pub AtaIdentifyResponse { + (0 => _0), + (20 => pub serial_number: AtaString<20>), + (40 => _1), + (54 => pub model_number: AtaString<40>), + (94 => _2), + (98 => pub capabilities: [u16; 2]), + (102 => _3), + (120 => pub logical_sector_count_28: [u8; 4]), + (124 => _4), + (138 => pub additional_features: u16), + (140 => _5), + (164 => pub command_sets: [u16; 6]), + (176 => _6), + (200 => pub logical_sector_count_qw: u64), + (208 => _7), + (212 => pub phys_logical_sector_size: u16), + (214 => _8), + (234 => pub logical_sector_size: [u16; 2]), + (238 => _9), + (460 => pub ext_logical_sector_count_qw: [u8; 8]), + (468 => _10), + (512 => @END), + } +} + +pub struct AtaIdentify { + buffer: PageBox>, + regions: [(PhysicalAddress, usize); 1], +} + +pub struct AtaReadDmaEx { + lba: u64, + sector_count: usize, + regions: [(PhysicalAddress, usize); 1], +} + +impl AtaIdentify { + pub fn create() -> Result { + PageBox::new_uninit() + .map(Self::with_data) + .map_err(AhciError::MemoryError) + } + + pub fn with_data(buffer: PageBox>) -> Self { + Self { + regions: [( + unsafe { buffer.as_physical_address() }, + size_of::(), + )], + buffer, + } + } +} + +impl AtaReadDmaEx { + pub fn new(lba: u64, sector_count: usize, buffer: &PageBox<[MaybeUninit]>) -> Self { + assert_eq!(buffer.len() % SECTOR_SIZE, 0); + assert_ne!(buffer.len(), 0); + + Self { + lba, + sector_count, + regions: [(unsafe { buffer.as_physical_address() }, buffer.len())], + } + } +} + +impl AtaCommand for AtaIdentify { + type Response = PageBox; + + const COMMAND_ID: AtaCommandId = AtaCommandId::Identify; + + fn lba(&self) -> u64 { + 0 + } + + fn sector_count(&self) -> usize { + 0 + } + + fn regions(&self) -> &[(PhysicalAddress, usize)] { + &self.regions + } + + unsafe fn into_response(self) -> Self::Response { + self.buffer.assume_init() + } +} + +impl AtaCommand for AtaReadDmaEx { + type Response = (); + + const COMMAND_ID: AtaCommandId = AtaCommandId::ReadDmaEx; + + fn lba(&self) -> u64 { + self.lba + } + + fn sector_count(&self) -> usize { + self.sector_count + } + + fn regions(&self) -> &[(PhysicalAddress, usize)] { + &self.regions + } + + unsafe fn into_response(self) -> Self::Response {} +} diff --git a/kernel/driver/block/ahci/src/data.rs b/kernel/driver/block/ahci/src/data.rs new file mode 100644 index 00000000..2c5ff64f --- /dev/null +++ b/kernel/driver/block/ahci/src/data.rs @@ -0,0 +1,252 @@ +use core::mem::size_of; + +use alloc::string::String; +use bytemuck::{Pod, Zeroable}; +use libk_mm::address::{IntoRaw, PhysicalAddress}; +use libk_util::{ConstAssert, IsTrue}; +use static_assertions::const_assert_eq; + +use crate::{ + command::{AtaCommand, AtaIdentify, AtaIdentifyResponse}, + error::AhciError, + MAX_PRD_SIZE, +}; + +pub const COMMAND_LIST_LENGTH: usize = 32; + +const AHCI_FIS_REG_H2D_COMMAND: u8 = 1 << 7; +const AHCI_FIS_REG_H2D: u8 = 0x27; + +#[repr(C)] +pub struct AtaString +where + ConstAssert<{ N % 2 == 0 }>: IsTrue, +{ + data: [u8; N], +} + +#[derive(Debug, Clone, Copy, Zeroable)] +#[repr(C)] +pub struct PhysicalRegionDescriptor { + buffer_address: u64, + _0: u32, + dbc: u32, +} + +#[derive(Debug, Clone, Copy, Zeroable)] +#[repr(C)] +pub struct CommandListEntry { + attr: u16, + prdtl: u16, + prdbc: u32, + ctba: u64, + _0: [u32; 4], +} + +#[derive(Clone, Copy, Zeroable)] +#[repr(C)] +pub union SentFis { + reg_h2d: RegisterHostToDeviceFis, + raw: RawFis, +} + +#[derive(Clone, Copy, Zeroable)] +#[repr(C)] +pub struct RegisterHostToDeviceFis { + pub ty: u8, + pub cmd_port: u8, + pub cmd: u8, + pub feature_low: u8, + + pub lba0: u8, + pub lba1: u8, + pub lba2: u8, + pub device: u8, + + pub lba3: u8, + pub lba4: u8, + pub lba5: u8, + pub feature_high: u8, + + pub count: u16, + pub icc: u8, + pub control: u8, + + _0: u32, +} + +#[derive(Clone, Copy, Zeroable, Pod)] +#[repr(C)] +pub struct RawFis { + pub bytes: [u8; 64], +} + +#[derive(Clone, Copy)] +#[repr(C)] +pub struct CommandTable { + fis: SentFis, // 0x00..0x40 + _0: [u8; 16], // 0x40..0x50 + _1: [u8; 48], // 0x50..0x80 + prdt: [PhysicalRegionDescriptor; 248], // 0x80..0x1000 +} + +#[derive(Clone, Copy, Zeroable, Pod)] +#[repr(C)] +pub struct ReceivedFis { + _dsfis: [u8; 0x1C], // 0x00..0x1C + _0: [u8; 0x04], // 0x1C..0x20 + _psfis: [u8; 0x14], // 0x20..0x34 + _1: [u8; 0x0C], // 0x34..0x40 + _rfis: [u8; 0x14], // 0x40..0x54 + _2: [u8; 0x04], // 0x54..0x58 + _sdbfis: [u8; 0x08], // 0x58..0x60 + _ufis: [u8; 0x40], // 0x60..0xA0 + _3: [u8; 0x60], // 0xA0..0x100 +} + +const_assert_eq!(size_of::(), 0x40); +const_assert_eq!(size_of::(), 0x1000); +const_assert_eq!(size_of::(), 32); +const_assert_eq!(size_of::(), 0x100); + +impl CommandTable { + pub fn setup_command(&mut self, command: &C) -> Result<(), AhciError> { + let lba = command.lba(); + assert_eq!(lba & !0xFFFFFFFFFF, 0); + let count = command.sector_count().try_into().unwrap(); + + if C::COMMAND_ID == AtaIdentify::COMMAND_ID { + self.fis = SentFis { + reg_h2d: RegisterHostToDeviceFis { + ty: AHCI_FIS_REG_H2D, + cmd_port: AHCI_FIS_REG_H2D_COMMAND, + cmd: C::COMMAND_ID as _, + + ..RegisterHostToDeviceFis::zeroed() + }, + }; + } else { + self.fis = SentFis { + reg_h2d: RegisterHostToDeviceFis { + ty: AHCI_FIS_REG_H2D, + cmd_port: AHCI_FIS_REG_H2D_COMMAND, + cmd: C::COMMAND_ID as _, + device: 1 << 6, // LBA mode + lba0: lba as u8, + lba1: (lba >> 8) as u8, + lba2: (lba >> 16) as u8, + lba3: (lba >> 24) as u8, + lba4: (lba >> 32) as u8, + lba5: (lba >> 40) as u8, + count, + + ..RegisterHostToDeviceFis::zeroed() + }, + }; + } + + let regions = command.regions(); + for (i, &(base, size)) in regions.iter().enumerate() { + let last = i == regions.len() - 1; + self.prdt[i] = PhysicalRegionDescriptor::new(base, size, last)?; + } + + Ok(()) + } +} + +impl CommandListEntry { + pub fn new(command_table_entry: PhysicalAddress, prd_count: usize) -> Result { + if prd_count > 0xFFFF { + todo!() + } + Ok(Self { + // attr = FIS size in dwords + attr: (size_of::() / size_of::()) as _, + prdtl: prd_count as _, + prdbc: 0, + ctba: command_table_entry.into_raw(), + _0: [0; 4], + }) + } +} + +unsafe impl Zeroable for CommandTable { + fn zeroed() -> Self { + Self { + fis: SentFis::zeroed(), + _0: [0; 16], + _1: [0; 48], + prdt: [PhysicalRegionDescriptor::zeroed(); 248], + } + } +} + +impl PhysicalRegionDescriptor { + pub fn new( + address: PhysicalAddress, + byte_count: usize, + is_last: bool, + ) -> Result { + if byte_count >= MAX_PRD_SIZE { + return Err(AhciError::RegionTooLarge); + } + + let dbc_mask = (is_last as u32) << 31; + Ok(Self { + buffer_address: address.into_raw(), + _0: 0, + dbc: ((byte_count as u32 - 1) << 1) | 1 | dbc_mask, + }) + } +} + +impl AtaIdentifyResponse { + pub fn logical_sector_count(&self) -> u64 { + // If logical_sector_count_28 == 0x0FFFFFFF, and logical_sector_count_qw >= 0x0FFFFFFF, + // then ACCEESSIBLE CAPACITY (?) field contains the total number of user addressable + // LBAs (see 4.1) + // bit 3 in additional_features -> logical_sector_count_qw: + // 0 -> max value = 0xFFFFFFFFFFFF (48) + // 1 -> max value = 0xFFFFFFFF (32) + // If bit 3 in additional_features is set, ext_logical_sector_count_qw contains + // the maximum addressable LBA count. Max value = 0xFFFFFFFFFFFF (48) + + if self.command_sets[1] & (1 << 10) != 0 { + // 48-bit supported + if self.additional_features & (1 << 3) != 0 { + // Use ext_logical_sector_count_qw + todo!() + } else { + // Use logical_sector_count_qw + self.logical_sector_count_qw + } + } else { + todo!() + } + } +} + +impl AtaString +where + ConstAssert<{ N % 2 == 0 }>: IsTrue, +{ + #[allow(clippy::inherent_to_string)] + pub fn to_string(&self) -> String { + let mut buf = [0; N]; + + for i in (0..N).step_by(2) { + buf[i] = self.data[i + 1]; + buf[i + 1] = self.data[i]; + } + let mut len = 0; + for i in (0..N).rev() { + if buf[i] != b' ' { + len = i + 1; + break; + } + } + + String::from(core::str::from_utf8(&buf[..len]).unwrap()) + } +} diff --git a/kernel/driver/block/ahci/src/error.rs b/kernel/driver/block/ahci/src/error.rs new file mode 100644 index 00000000..85f3b418 --- /dev/null +++ b/kernel/driver/block/ahci/src/error.rs @@ -0,0 +1,8 @@ +use yggdrasil_abi::error::Error; + +#[derive(Debug)] +pub enum AhciError { + MemoryError(Error), + RegionTooLarge, + DeviceError, +} diff --git a/kernel/driver/block/ahci/src/lib.rs b/kernel/driver/block/ahci/src/lib.rs new file mode 100644 index 00000000..b3f74369 --- /dev/null +++ b/kernel/driver/block/ahci/src/lib.rs @@ -0,0 +1,256 @@ +#![feature(generic_const_exprs, inline_const)] +#![allow(incomplete_features)] +#![no_std] + +extern crate alloc; + +use alloc::{boxed::Box, format, vec, vec::Vec}; +use bytemuck::Zeroable; +use data::ReceivedFis; +use device_api::{ + interrupt::{InterruptAffinity, InterruptHandler}, + Device, +}; +use error::AhciError; +use kernel_fs::devfs; +use libk_mm::{address::AsPhysicalAddress, device::DeviceMemoryIo, PageBox}; +use libk_thread::runtime; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; +use port::AhciPort; +use regs::{PortRegs, Regs}; +use tock_registers::interfaces::{ReadWriteable, Readable, Writeable}; +use ygg_driver_block::{probe_partitions, NgBlockDeviceWrapper}; +use ygg_driver_pci::{ + device::{PciDeviceInfo, PreferredInterruptMode}, + PciCommandRegister, PciConfigurationSpace, +}; +use yggdrasil_abi::error::Error; + +use crate::regs::{Version, CAP, GHC, SSTS}; + +mod command; +mod data; +mod error; +mod port; +mod regs; + +const MAX_PRD_SIZE: usize = 8192; +const MAX_COMMANDS: usize = u32::BITS as usize; +const SECTOR_SIZE: usize = 512; +const MAX_DRIVES: usize = (b'z' - b'a') as usize; + +pub struct AhciController { + regs: IrqSafeSpinlock>, + ports: OneTimeInit>, + received_fis_buffers: OneTimeInit<[Option>; 16]>, + + version: Version, + max_port_count: usize, + ahci_only: bool, + has_64_bit: bool, +} + +impl AhciController { + async fn late_init(&'static self) -> Result<(), AhciError> { + log::info!("Initializing AHCI SATA Controller {:?}", self.version); + + let regs = self.regs.lock(); + + regs.GHC.modify(GHC::HR::SET); + + while regs.GHC.matches_all(GHC::HR::SET) { + core::hint::spin_loop(); + } + + if !self.ahci_only { + regs.GHC.modify(GHC::AE::SET); + } + + let pi = regs.PI.get(); + + let mut ports = vec![]; + + drop(regs); + + let mut fis_buffers = [const { None }; 16]; + // Allocate FIS receive buffers for the ports + for i in 0..self.max_port_count { + if pi & (1 << i) == 0 { + continue; + } + + let regs = self.regs.lock(); + let port = ®s.PORTS[i]; + + let buffer = PageBox::new(ReceivedFis::zeroed()).map_err(AhciError::MemoryError)?; + port.set_received_fis_address_64(unsafe { buffer.as_physical_address() }); + fis_buffers[i] = Some(buffer); + } + + self.received_fis_buffers.init(fis_buffers); + + for i in 0..self.max_port_count { + if pi & (1 << i) == 0 { + continue; + } + + let regs = self.regs.lock(); + let port = ®s.PORTS[i]; + + if !port.SSTS.matches_all(SSTS::DET::Online + SSTS::IPM::Active) { + continue; + } + + port.start()?; + + // TODO wait here + + let sig = port.SIG.get(); + if sig != PortRegs::SIG_SATA { + log::warn!("Skipping unknown port {} with signature {:#x}", i, sig); + continue; + } + + let port = unsafe { regs.extract(|regs| ®s.PORTS[i]) }; + + drop(regs); + + let port = match AhciPort::create(port, self, i) { + Ok(port) => port, + Err(error) => { + log::warn!("Port {} init error: {:?}", i, error); + continue; + } + }; + + ports.push(port); + } + + let ports = self.ports.init(ports); + + // Enable global HC interrupts + self.regs.lock().GHC.modify(GHC::IE::SET); + + // Setup the detected ports + for (i, &port) in ports.iter().enumerate() { + log::info!("Init port {}", i); + port.init().await?; + } + + // Dump info about the drives + for (i, &port) in ports.iter().enumerate() { + let info = port.info().unwrap(); + log::info!( + "Port {}: model={:?}, serial={:?}, lba_count={}", + i, + info.model, + info.serial, + info.lba_count + ); + } + + { + let mut lock = SATA_DRIVES.lock(); + for &port in ports.iter() { + let n = lock.len(); + if n >= MAX_DRIVES { + todo!("Too many drives, ran out of letters"); + } + let n = n as u8; + lock.push(port); + + let name = format!("sd{}", (n + b'a') as char); + + let blk = NgBlockDeviceWrapper::new(port); + devfs::add_named_block_device(blk, name.clone()).ok(); + probe_partitions(blk, move |index, partition| { + devfs::add_block_device_partition(name.clone(), index, partition) + }) + .ok(); + } + } + + log::debug!("All ports initialized"); + + Ok(()) + } +} + +impl InterruptHandler for AhciController { + fn handle_irq(&self, _vector: Option) -> bool { + let regs = self.regs.lock(); + + let is = regs.IS.get(); + if is != 0 { + if let Some(ports) = self.ports.try_get() { + // Clear global interrupt status + regs.IS.set(u32::MAX); + + for &port in ports { + if is & (1 << port.index) != 0 { + port.handle_pending_interrupts(); + } + } + } + } + + false + } +} + +impl Device for AhciController { + unsafe fn init(&'static self) -> Result<(), Error> { + // Do the init in background + runtime::spawn(self.late_init())?; + Ok(()) + } + + fn display_name(&self) -> &'static str { + "AHCI SATA Controller" + } +} + +static SATA_DRIVES: IrqSafeSpinlock> = IrqSafeSpinlock::new(Vec::new()); + +pub fn probe(info: &PciDeviceInfo) -> Result<&'static dyn Device, Error> { + let bar5 = info.config_space.bar(5).ok_or(Error::InvalidOperation)?; + let bar5 = bar5.as_memory().ok_or(Error::InvalidOperation)?; + + let mut cmd = PciCommandRegister::from_bits_retain(info.config_space.command()); + cmd &= !(PciCommandRegister::DISABLE_INTERRUPTS | PciCommandRegister::ENABLE_IO); + cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER; + info.config_space.set_command(cmd.bits()); + + info.init_interrupts(PreferredInterruptMode::Msi)?; + + // // TODO support regular PCI interrupts (ACPI dependency) + // let Some(mut msi) = info.config_space.capability::() else { + // log::warn!("Ignoring AHCI: does not support MSI (and the OS doesn't yet support PCI IRQ)"); + // return Err(Error::InvalidOperation); + // }; + + // Map the registers + let regs = unsafe { DeviceMemoryIo::::map(bar5, Default::default()) }?; + + let version = Version::try_from(regs.VS.get())?; + let ahci_only = regs.CAP.matches_all(CAP::SAM::SET); + let max_port_count = regs.CAP.read(CAP::NP) as usize; + let has_64_bit = regs.CAP.matches_all(CAP::S64A::SET); + + // TODO extract Number of Command Slots + + let ahci = Box::leak(Box::new(AhciController { + regs: IrqSafeSpinlock::new(regs), + ports: OneTimeInit::new(), + received_fis_buffers: OneTimeInit::new(), + version, + max_port_count, + ahci_only, + has_64_bit, + })); + + // TODO use multiple vectors if capable + info.map_interrupt(InterruptAffinity::Any, ahci)?; + + Ok(ahci) +} diff --git a/kernel/driver/block/ahci/src/port.rs b/kernel/driver/block/ahci/src/port.rs new file mode 100644 index 00000000..14afc1d9 --- /dev/null +++ b/kernel/driver/block/ahci/src/port.rs @@ -0,0 +1,401 @@ +use core::{ + pin::Pin, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Poll}, +}; + +use alloc::{boxed::Box, string::String}; +use bytemuck::Zeroable; +use futures_util::{task::AtomicWaker, Future}; +use libk_mm::{address::AsPhysicalAddress, device::DeviceMemoryIo, PageBox}; +use libk_util::{sync::IrqSafeSpinlock, waker::QueueWaker, OneTimeInit}; +use tock_registers::interfaces::{Readable, Writeable}; +use ygg_driver_block::{IoOperation, IoRequest, IoSubmissionId, NgBlockDevice}; +use yggdrasil_abi::error::Error; + +use crate::{ + command::{AtaCommand, AtaIdentify, AtaReadDmaEx}, + data::{CommandListEntry, CommandTable, ReceivedFis, COMMAND_LIST_LENGTH}, + error::AhciError, + regs::{CommandState, CommandStatus, PortRegs, IE, TFD}, + AhciController, MAX_COMMANDS, SECTOR_SIZE, +}; + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum PortType { + Sata, +} + +struct PortInner { + regs: DeviceMemoryIo<'static, PortRegs>, + + #[allow(unused)] + received_fis: PageBox, + command_list: PageBox<[CommandListEntry]>, +} + +pub struct PortInfo { + pub model: String, + pub serial: String, + pub lba_count: u64, +} + +#[allow(unused)] +pub struct AhciPort { + inner: IrqSafeSpinlock, + ahci: &'static AhciController, + ty: PortType, + pub(crate) index: usize, + info: OneTimeInit, + + command_allocation: IrqSafeSpinlock, + // One command index can only be waited for by one task, so this approach is usable + command_completion: [(AtomicWaker, AtomicU32); COMMAND_LIST_LENGTH], + command_available: QueueWaker, +} + +impl PortInner { + fn submit_command( + &mut self, + index: usize, + command: &C, + ) -> Result<(), AhciError> { + let list_entry = &mut self.command_list[index]; + let mut table_entry = + PageBox::new(CommandTable::zeroed()).map_err(AhciError::MemoryError)?; + + table_entry.setup_command(command)?; + *list_entry = CommandListEntry::new( + unsafe { table_entry.as_physical_address() }, + command.regions().len(), + )?; + + // Sync before send + // XXX do this properly + #[cfg(target_arch = "x86_64")] + unsafe { + core::arch::asm!("wbinvd"); + } + + // TODO deal with this async way + while self.regs.TFD.matches_any(TFD::BSY::SET + TFD::DRQ::SET) { + core::hint::spin_loop(); + } + + let ci = self.regs.CI.get(); + assert_eq!(ci & (1 << index), 0); + self.regs.CI.set(ci | (1 << index)); + + Ok(()) + } +} + +impl AhciPort { + pub fn create( + regs: DeviceMemoryIo<'static, PortRegs>, + ahci: &'static AhciController, + index: usize, + ) -> Result<&'static Self, AhciError> { + log::debug!("Initialize port {}", index); + regs.stop()?; + + if !ahci.has_64_bit { + todo!("Handle controllers incapable of 64 bit"); + } + + let received_fis = PageBox::new(ReceivedFis::zeroed()).map_err(AhciError::MemoryError)?; + let command_list = PageBox::new_slice(CommandListEntry::zeroed(), COMMAND_LIST_LENGTH) + .map_err(AhciError::MemoryError)?; + + regs.set_received_fis_address_64(unsafe { received_fis.as_physical_address() }); + regs.set_command_list_address_64(unsafe { command_list.as_physical_address() }); + + regs.IE.write( + IE::DPE::SET + + IE::IFE::SET + + IE::OFE::SET + + IE::HBDE::SET + + IE::HBFE::SET + + IE::TFEE::SET + + IE::DHRE::SET, + ); + + regs.start()?; + + let inner = PortInner { + regs, + command_list, + received_fis, + }; + let command_completion = [const { (AtomicWaker::new(), AtomicU32::new(0)) }; MAX_COMMANDS]; + let command_available = QueueWaker::new(); + let command_allocation = IrqSafeSpinlock::new(0); + + Ok(Box::leak(Box::new(Self { + inner: IrqSafeSpinlock::new(inner), + ty: PortType::Sata, + info: OneTimeInit::new(), + ahci, + index, + + command_completion, + command_allocation, + command_available, + }))) + } + + pub async fn init(&'static self) -> Result<(), AhciError> { + let identify = self.perform_command(AtaIdentify::create()?).await?; + let model = identify.model_number.to_string(); + let serial = identify.serial_number.to_string(); + let lba_count = identify.logical_sector_count(); + + // TODO can sector size be different from 512 in ATA? + // should logical sector size be accounted for? + // TODO test for ReadDmaEx capability (?) + + self.info.init(PortInfo { + model, + serial, + lba_count, + }); + + Ok(()) + } + + pub fn info(&self) -> Option<&PortInfo> { + self.info.try_get() + } + + async fn perform_command(&self, command: C) -> Result { + let slot = self.allocate_command().await?; + log::trace!( + "Submit command on port {}, cmd index = {}", + self.index, + slot + ); + self.inner.lock().submit_command(slot, &command)?; + self.wait_for_completion(slot).await?; + self.free_command(slot); + Ok(unsafe { command.into_response() }) + } + + fn allocate_command(&self) -> impl Future> + '_ { + struct F<'f> { + waker: &'f QueueWaker, + state: &'f IrqSafeSpinlock, + } + + impl<'f> Future for F<'f> { + type Output = Result; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.waker.register(cx.waker()); + + let mut state = self.state.lock(); + + if *state != u32::MAX { + self.waker.remove(cx.waker()); + + for i in 0..MAX_COMMANDS { + if *state & (1 << i) == 0 { + *state |= 1 << i; + return Poll::Ready(Ok(i)); + } + } + + panic!("Unreachable"); + } else { + Poll::Pending + } + } + } + + let waker = &self.command_available; + let state = &self.command_allocation; + + F { waker, state } + } + + fn wait_for_completion( + &self, + index: usize, + ) -> impl Future> + '_ { + struct F<'f> { + waker: &'f AtomicWaker, + status: &'f AtomicU32, + } + + impl<'f> Future for F<'f> { + type Output = Result<(), AhciError>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + match self.status.load(Ordering::Acquire) { + 0 => (), + 1 => return Poll::Ready(Ok(())), + _ => return Poll::Ready(Err(AhciError::DeviceError)), + } + + self.waker.register(cx.waker()); + + match self.status.load(Ordering::Acquire) { + 0 => Poll::Pending, + 1 => Poll::Ready(Ok(())), + _ => Poll::Ready(Err(AhciError::DeviceError)), + } + } + } + + let (waker, status) = &self.command_completion[index]; + + F { status, waker } + } + + fn free_command(&self, index: usize) { + { + let mut alloc = self.command_allocation.lock(); + assert_ne!(*alloc & (1 << index), 0); + *alloc &= !(1 << index); + } + self.command_available.wake_one(); + } + + pub fn handle_pending_interrupts(&self) -> bool { + let inner = self.inner.lock(); + + for i in 0..MAX_COMMANDS { + match inner.regs.clear_state(i) { + CommandState::Pending => (), + CommandState::Ready(status) => { + // TODO better error handling? + let val = match status { + CommandStatus::Success => 1, + _ => 2, + }; + + self.command_completion[i].1.store(val, Ordering::Release); + self.command_completion[i].0.wake(); + } + } + } + true + } +} + +impl NgBlockDevice for AhciPort { + type CompletionNotify = AtomicWaker; + + fn bus_id(&self) -> u32 { + 0 + } + + fn unit_id(&self) -> u32 { + self.index as u32 + } + + fn block_size(&self) -> u64 { + SECTOR_SIZE as _ + } + + fn block_count(&self) -> u64 { + self.info.get().lba_count + } + + fn max_blocks_per_request(&self) -> u64 { + // TODO + 1 + } + + async fn submit_request(&self, request: IoRequest<'_>) -> Result { + // TODO better error handling + let slot = self.allocate_command().await.unwrap(); + log::trace!( + "Submit command on port {}, cmd index = {}", + self.index, + slot + ); + + match request.operation { + IoOperation::Read { lba, count } => { + self.inner + .lock() + .submit_command(slot, &AtaReadDmaEx::new(lba, count, request.data)) + .unwrap(); + } + IoOperation::Write { .. } => todo!(), + } + + Ok(IoSubmissionId { + queue_id: self.index, + command_id: slot, + }) + } + + fn poll_completion(&self, id: IoSubmissionId) -> Poll> { + let (_, status) = &self.command_completion[id.command_id]; + + match status.load(Ordering::Acquire) { + 0 => Poll::Pending, + 1 => { + self.free_command(id.command_id); + log::debug!("COMMAND FINISHED"); + Poll::Ready(Ok(())) + } + _ => todo!(), // Poll::Ready(Err(AhciError::DeviceError)), + } + } + + fn completion_notify(&self, id: IoSubmissionId) -> &Self::CompletionNotify { + let (notify, _) = &self.command_completion[id.command_id]; + notify + } +} + +// impl BlockDevice for AhciPort { +// fn read(&'static self, mut pos: u64, buf: &mut [u8]) -> Result { +// let info = self.info.try_get().ok_or(Error::PermissionDenied)?; +// +// let mut cache = self.cache.lock(); +// let mut rem = buf.len(); +// let mut off = 0; +// +// while rem != 0 { +// let lba = pos / SECTOR_SIZE as u64; +// +// if lba >= info.lba_count { +// break; +// } +// +// let block_offset = (pos % SECTOR_SIZE as u64) as usize; +// let count = core::cmp::min(SECTOR_SIZE - block_offset, rem); +// +// let block = cache.get_or_fetch_with(lba, |block| { +// block! { +// self.read_block(lba, block).await +// }? +// .map_err(|_| Error::InvalidOperation) +// })?; +// +// buf[off..off + count].copy_from_slice(&block[block_offset..block_offset + count]); +// +// rem -= count; +// off += count; +// pos += count as u64; +// } +// +// Ok(off) +// } +// +// fn write(&'static self, _pos: u64, _buf: &[u8]) -> Result { +// todo!() +// } +// +// fn size(&self) -> Result { +// let info = self.info.try_get().ok_or(Error::PermissionDenied)?; +// Ok(info.lba_count * SECTOR_SIZE as u64) +// } +// +// fn device_request(&self, _req: &mut DeviceRequest) -> Result<(), Error> { +// todo!() +// } +// } diff --git a/kernel/driver/block/ahci/src/regs.rs b/kernel/driver/block/ahci/src/regs.rs new file mode 100644 index 00000000..5a2a29be --- /dev/null +++ b/kernel/driver/block/ahci/src/regs.rs @@ -0,0 +1,203 @@ +use libk_mm::address::{IntoRaw, PhysicalAddress}; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite}, +}; +use yggdrasil_abi::error::Error; + +use crate::error::AhciError; + +register_bitfields! { + u32, + pub CAP [ + NP OFFSET(0) NUMBITS(5) [], + NCS OFFSET(8) NUMBITS(5) [], + SAM OFFSET(18) NUMBITS(1) [], + S64A OFFSET(31) NUMBITS(1) [], + ], + pub GHC [ + HR OFFSET(0) NUMBITS(1) [], + IE OFFSET(1) NUMBITS(1) [], + AE OFFSET(31) NUMBITS(1) [], + ], + + // Read/write 1 to clear + pub IS [ + TFES OFFSET(30) NUMBITS(1) [], + HBFS OFFSET(29) NUMBITS(1) [], + HBDS OFFSET(28) NUMBITS(1) [], + IFS OFFSET(27) NUMBITS(1) [], + OFS OFFSET(24) NUMBITS(1) [], + ], + pub IE [ + TFEE OFFSET(30) NUMBITS(1) [], + HBFE OFFSET(29) NUMBITS(1) [], + HBDE OFFSET(28) NUMBITS(1) [], + IFE OFFSET(27) NUMBITS(1) [], + OFE OFFSET(24) NUMBITS(1) [], + DPE OFFSET(5) NUMBITS(1) [], + DHRE OFFSET(0) NUMBITS(1) [], + ], + pub CMD [ + CR OFFSET(15) NUMBITS(1) [], + FR OFFSET(14) NUMBITS(1) [], + CCS OFFSET(8) NUMBITS(5) [], + FRE OFFSET(4) NUMBITS(1) [], + POD OFFSET(2) NUMBITS(1) [], + ST OFFSET(0) NUMBITS(1) [], + ], + pub SSTS [ + IPM OFFSET(8) NUMBITS(4) [ + NotPresent = 0, + Active = 1, + ], + DET OFFSET(0) NUMBITS(4) [ + NotPresent = 0, + Online = 3, + ], + ], + pub TFD [ + BSY OFFSET(7) NUMBITS(1) [], + DRQ OFFSET(3) NUMBITS(1) [], + ERR OFFSET(0) NUMBITS(1) [], + ] +} + +register_structs! { + #[allow(non_snake_case)] + pub Regs { + (0x0000 => pub CAP: ReadOnly), + (0x0004 => pub GHC: ReadWrite), + (0x0008 => pub IS: ReadWrite), + (0x000C => pub PI: ReadOnly), + (0x0010 => pub VS: ReadOnly), + (0x0014 => _0), + (0x0100 => pub PORTS: [PortRegs; 30]), + (0x1000 => @END), + } +} + +register_structs! { + #[allow(non_snake_case)] + pub PortRegs { + (0x00 => pub CLB: ReadWrite), + (0x04 => pub CLBU: ReadWrite), + (0x08 => pub FB: ReadWrite), + (0x0C => pub FBU: ReadWrite), + (0x10 => pub IS: ReadWrite), + (0x14 => pub IE: ReadWrite), + (0x18 => pub CMD: ReadWrite), + (0x1C => _0), + (0x20 => pub TFD: ReadWrite), + (0x24 => pub SIG: ReadOnly), + (0x28 => pub SSTS: ReadOnly), + (0x2C => pub SCTL: ReadOnly), + (0x30 => pub SERR: ReadOnly), + (0x34 => pub SACT: ReadOnly), + (0x38 => pub CI: ReadWrite), + (0x3C => pub SNTF: ReadOnly), + (0x40 => _1), + (0x80 => @END), + } +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum Version { + V0_95, + V1_0, + V1_1, + V1_2, + V1_3, + V1_3_1, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum CommandStatus { + Success, + TaskFileError, +} + +#[derive(Clone, Copy, PartialEq, Debug)] +pub enum CommandState { + Pending, + Ready(CommandStatus), +} + +impl PortRegs { + pub const SIG_SATA: u32 = 0x101; + + // NOTE: usually doesn't take long, so not async, I guess + pub fn stop(&self) -> Result<(), AhciError> { + self.CMD.modify(CMD::ST::CLEAR + CMD::FRE::CLEAR); + + // TODO timeout here + while self.CMD.matches_any(CMD::FR::SET + CMD::CR::SET) { + core::hint::spin_loop(); + } + + Ok(()) + } + + pub fn start(&self) -> Result<(), AhciError> { + while self.CMD.matches_all(CMD::CR::SET) { + core::hint::spin_loop(); + } + + self.CMD.modify(CMD::ST::SET + CMD::FRE::SET); + + Ok(()) + } + + pub fn set_received_fis_address_64(&self, address: PhysicalAddress) { + let address: u64 = address.into_raw(); + self.FB.set(address as u32); + self.FBU.set((address >> 32) as u32); + } + + pub fn set_command_list_address_64(&self, address: PhysicalAddress) { + let address: u64 = address.into_raw(); + self.CLB.set(address as u32); + self.CLBU.set((address >> 32) as u32); + } + + pub fn clear_state(&self, index: usize) -> CommandState { + let is = self.IS.extract(); + let ci = self.CI.get(); + + if is.get() == 0 { + return CommandState::Pending; + } + + // Clear everything + self.IS.set(0xFFFFFFFF); + + if is.matches_any(IS::HBDS::SET + IS::HBFS::SET) { + todo!("Host communication error unhandled"); + } + + assert_eq!(ci & (1 << index), 0); + + if is.matches_any(IS::TFES::SET + IS::IFS::SET + IS::OFS::SET) { + return CommandState::Ready(CommandStatus::TaskFileError); + } + + CommandState::Ready(CommandStatus::Success) + } +} + +impl TryFrom for Version { + type Error = Error; + + fn try_from(value: u32) -> Result { + match value { + 0x00000905 => Ok(Self::V0_95), + 0x00010000 => Ok(Self::V1_0), + 0x00010100 => Ok(Self::V1_1), + 0x00010200 => Ok(Self::V1_2), + 0x00010300 => Ok(Self::V1_3), + 0x00010301 => Ok(Self::V1_3_1), + _ => Err(Error::InvalidArgument), + } + } +} diff --git a/kernel/driver/block/core/Cargo.toml b/kernel/driver/block/core/Cargo.toml new file mode 100644 index 00000000..ade50366 --- /dev/null +++ b/kernel/driver/block/core/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "ygg_driver_block" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../../libk/libk-util" } +libk-mm = { path = "../../../libk/libk-mm" } + +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } +bytemuck = { version = "1.14.0", features = ["derive"] } +static_assertions = "1.1.0" +uuid = { version = "1.6.1", default-features = false, features = ["bytemuck"] } diff --git a/kernel/driver/block/core/src/device.rs b/kernel/driver/block/core/src/device.rs new file mode 100644 index 00000000..d924baca --- /dev/null +++ b/kernel/driver/block/core/src/device.rs @@ -0,0 +1,379 @@ +#![allow(unused)] + +use core::{ + ops::Range, + pin::Pin, + task::{Context, Poll}, +}; + +use alloc::boxed::Box; +use futures_util::{task::AtomicWaker, Future}; +use libk_mm::{address::PhysicalAddress, table::MapAttributes, PageBox, PageProvider}; +use libk_util::waker::QueueWaker; +use yggdrasil_abi::{error::Error, io::DeviceRequest}; + +use crate::{ + request::{IoOperation, IoRequest, IoSubmissionId}, + BlockDevice, +}; + +pub trait CompletionNotify { + fn wait_for_completion<'a, D: NgBlockDevice + 'a>( + &'a self, + device: &'a D, + id: IoSubmissionId, + ) -> impl Future> + Send + '_; +} + +pub trait NgBlockDevice: Sync { + type CompletionNotify: CompletionNotify; + + fn bus_id(&self) -> u32; // HBA, controller ID, etc. + fn unit_id(&self) -> u32; // Drive, slot, connector ID, etc. + + fn block_size(&self) -> u64; + fn block_count(&self) -> u64; + fn max_blocks_per_request(&self) -> u64; + + fn submit_request( + &self, + request: IoRequest, + ) -> impl Future> + Send; + + fn poll_completion(&self, id: IoSubmissionId) -> Poll>; + fn completion_notify(&self, id: IoSubmissionId) -> &Self::CompletionNotify; + + fn wait_for_completion( + &self, + id: IoSubmissionId, + ) -> impl Future> + Send + '_ + where + Self: Sized, + { + self.completion_notify(id).wait_for_completion(self, id) + } +} + +pub struct NgBlockDeviceWrapper<'a, D: NgBlockDevice + 'a> { + device: &'a D, + + pub(crate) block_size: u64, + pub(crate) block_count: u64, + #[allow(unused)] + max_blocks_per_request: u64, +} + +#[derive(Debug, PartialEq)] +struct BlockChunk { + lba_start: u64, + lba_count: usize, + buffer_offset: usize, + lba_offset: usize, + byte_count: usize, +} + +struct BlockChunkIter { + remaining: usize, + buffer_offset: usize, + position: u64, + + block_size: u64, + max_blocks_per_request: u64, +} + +impl CompletionNotify for QueueWaker { + fn wait_for_completion<'a, D: NgBlockDevice + 'a>( + &'a self, + device: &'a D, + id: IoSubmissionId, + ) -> impl Future> + Send + '_ { + struct F<'f, D: NgBlockDevice + 'f> { + device: &'f D, + notify: &'f QueueWaker, + id: IoSubmissionId, + } + + impl<'f, D: NgBlockDevice + 'f> Future for F<'f, D> { + type Output = Result<(), Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.notify.register(cx.waker()); + match self.device.poll_completion(self.id) { + Poll::Ready(result) => { + self.notify.remove(cx.waker()); + Poll::Ready(result) + } + Poll::Pending => Poll::Pending, + } + } + } + + F { + notify: self, + device, + id, + } + } +} + +impl CompletionNotify for AtomicWaker { + fn wait_for_completion<'a, D: NgBlockDevice + 'a>( + &'a self, + device: &'a D, + id: IoSubmissionId, + ) -> impl Future> + Send + '_ { + struct F<'f, D: NgBlockDevice + 'f> { + device: &'f D, + notify: &'f AtomicWaker, + id: IoSubmissionId, + } + + impl<'f, D: NgBlockDevice + 'f> Future for F<'f, D> { + type Output = Result<(), Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if let Poll::Ready(result) = self.device.poll_completion(self.id) { + return Poll::Ready(result); + } + self.notify.register(cx.waker()); + self.device.poll_completion(self.id) + } + } + + F { + notify: self, + device, + id, + } + } +} + +impl BlockChunk { + pub fn block_range(&self) -> Range { + self.lba_offset..self.lba_offset + self.byte_count + } + + pub fn buffer_range(&self) -> Range { + self.buffer_offset..self.buffer_offset + self.byte_count + } +} + +impl BlockChunkIter { + pub fn new(pos: u64, count: usize, lba_size: u64, max_lba_per_request: u64) -> Self { + Self { + remaining: count, + buffer_offset: 0, + position: pos, + + block_size: lba_size, + max_blocks_per_request: max_lba_per_request, + } + } +} + +impl Iterator for BlockChunkIter { + type Item = BlockChunk; + + fn next(&mut self) -> Option { + if self.remaining == 0 { + return None; + } + + let lba_start = self.position / self.block_size; + let lba_end = + (self.position + self.remaining as u64 + self.block_size - 1) / self.block_size; + + let lba_count = core::cmp::min(lba_end - lba_start, self.max_blocks_per_request); + + let lba_offset = (self.position % self.block_size) as usize; + let byte_count = core::cmp::min( + (lba_count * self.block_size) as usize - lba_offset, + self.remaining, + ); + + let buffer_offset = self.buffer_offset; + + self.position += byte_count as u64; + self.buffer_offset += byte_count; + self.remaining -= byte_count; + + Some(BlockChunk { + lba_start, + lba_count: lba_count as usize, + buffer_offset, + lba_offset, + byte_count, + }) + } +} + +impl<'a, D: NgBlockDevice + 'a> NgBlockDeviceWrapper<'a, D> { + pub fn new(device: &'a D) -> &'a Self { + let block_size = device.block_size(); + let block_count = device.block_count(); + let max_blocks_per_request = device.max_blocks_per_request(); + + Box::leak(Box::new(Self { + device, + block_size, + block_count, + max_blocks_per_request, + })) + } + + async fn read_range_inner(&self, lba: u64, count: usize) -> Result, Error> { + let mut data = PageBox::new_uninit_slice(self.block_size as usize * count)?; + + let id = self + .device + .submit_request(IoRequest { + operation: IoOperation::Read { lba, count }, + data: &mut data, + }) + .await?; + + self.device.wait_for_completion(id).await?; + + Ok(unsafe { data.assume_init_slice() }) + } +} + +impl<'a, D: NgBlockDevice + 'a> PageProvider for NgBlockDeviceWrapper<'a, D> { + fn get_page(&self, _offset: u64) -> Result { + todo!() + } + + fn release_page(&self, _offset: u64, _phys: PhysicalAddress) -> Result<(), Error> { + todo!() + } + + fn clone_page( + &self, + _offset: u64, + _src_phys: PhysicalAddress, + _src_attrs: MapAttributes, + ) -> Result { + todo!() + } +} + +impl<'a, D: NgBlockDevice + 'a> BlockDevice for NgBlockDeviceWrapper<'a, D> { + fn poll_read( + &self, + cx: &mut Context<'_>, + pos: u64, + buf: &mut [u8], + ) -> Poll> { + todo!() + } + + fn poll_write(&self, cx: &mut Context<'_>, pos: u64, buf: &[u8]) -> Poll> { + todo!() + } + + // fn read(&'static self, pos: u64, buf: &mut [u8]) -> Result { + // // TODO block cache + // block! { + // let mut bytes_read = 0; + + // for chunk in + // BlockChunkIter::new(pos, buf.len(), self.block_size, self.max_blocks_per_request) + // { + // log::debug!( + // "Read chunk: lba_start={}, lba_count={}", + // chunk.lba_start, + // chunk.lba_count + // ); + + // let block = self.read_range_inner(chunk.lba_start, chunk.lba_count).await?; + + // buf[chunk.buffer_range()].copy_from_slice(&block[chunk.block_range()]); + + // bytes_read += chunk.byte_count; + // } + + // Ok(bytes_read) + // }? + // } + + // fn write(&'static self, _pos: u64, _buf: &[u8]) -> Result { + // todo!() + // } + + fn size(&self) -> Result { + Ok(self.block_size * self.block_count) + } + + fn device_request(&self, _req: &mut DeviceRequest) -> Result<(), Error> { + todo!() + } +} + +#[cfg(test)] +mod tests { + use crate::device::BlockChunk; + + use super::BlockChunkIter; + + #[test] + fn block_chunk_iter() { + let mut it = BlockChunkIter { + remaining: 512 * 9 + 1, + position: 123, + block_size: 512, + buffer_offset: 0, + max_blocks_per_request: 2, + }; + + assert_eq!( + it.next().unwrap(), + BlockChunk { + lba_start: 0, + lba_count: 2, + buffer_offset: 0, + lba_offset: 123, + byte_count: 901 + } + ); + assert_eq!( + it.next().unwrap(), + BlockChunk { + lba_start: 2, + lba_count: 2, + buffer_offset: 1024 - 123, + lba_offset: 0, + byte_count: 1024 + } + ); + assert_eq!( + it.next().unwrap(), + BlockChunk { + lba_start: 4, + lba_count: 2, + buffer_offset: 2 * 1024 - 123, + lba_offset: 0, + byte_count: 1024 + } + ); + assert_eq!( + it.next().unwrap(), + BlockChunk { + lba_start: 6, + lba_count: 2, + buffer_offset: 3 * 1024 - 123, + lba_offset: 0, + byte_count: 1024 + } + ); + assert_eq!( + it.next().unwrap(), + BlockChunk { + lba_start: 8, + lba_count: 2, + buffer_offset: 4 * 1024 - 123, + lba_offset: 0, + byte_count: 512 + 123 + 1 + } + ); + } +} diff --git a/kernel/driver/block/core/src/lib.rs b/kernel/driver/block/core/src/lib.rs new file mode 100644 index 00000000..80d7f1e0 --- /dev/null +++ b/kernel/driver/block/core/src/lib.rs @@ -0,0 +1,110 @@ +#![no_std] + +extern crate alloc; + +use core::task::{Context, Poll}; + +use libk_mm::PageProvider; +use yggdrasil_abi::{error::Error, io::DeviceRequest}; + +pub mod device; +// mod partition; +pub mod request; + +pub use device::{NgBlockDevice, NgBlockDeviceWrapper}; +pub use request::{IoOperation, IoRequest, IoSubmissionId}; + +// TODO +pub fn probe_partitions< + D: NgBlockDevice + 'static, + F: Fn(usize, &'static dyn BlockDevice) -> Result<(), Error> + Send + 'static, +>( + _dev: &'static NgBlockDeviceWrapper, + _callback: F, +) -> Result<(), Error> { + Ok(()) + // async fn probe_table( + // dev: &'static NgBlockDeviceWrapper<'static, D>, + // ) -> Result>>, Error> { + // if let Some(partitions) = partition::probe_gpt(dev)? { + // return Ok(Some(partitions)); + // } + + // Ok(None) + // } + + // runtime::spawn(async move { + // match probe_table(dev).await { + // Ok(Some(partitions)) => { + // // Create block devices for the partitions + // for (i, partition) in partitions.into_iter().enumerate() { + // let partition_blkdev = Box::leak(Box::new(partition)); + + // if let Err(error) = callback(i, partition_blkdev) { + // log::warn!("Could not add partition {}: {:?}", i, error); + // } + // } + // } + // Ok(None) => { + // log::warn!("Unknown or missing partition table"); + // } + // Err(error) => { + // log::warn!("Could not probe partition table: {:?}", error); + // } + // } + // }) +} + +/// Block device interface +#[allow(unused)] +pub trait BlockDevice: PageProvider + Sync { + fn poll_read( + &self, + cx: &mut Context<'_>, + pos: u64, + buf: &mut [u8], + ) -> Poll> { + Poll::Ready(Err(Error::NotImplemented)) + } + + fn poll_write(&self, cx: &mut Context<'_>, pos: u64, buf: &[u8]) -> Poll> { + Poll::Ready(Err(Error::NotImplemented)) + } + + // /// Reads data frmo the given offset of the device + // fn read(&'static self, pos: u64, buf: &mut [u8]) -> Result { + // Err(Error::NotImplemented) + // } + // /// Writes the data to the given offset of the device + // fn write(&'static self, pos: u64, buf: &[u8]) -> Result { + // Err(Error::NotImplemented) + // } + + /// Returns the size of the block device in bytes + fn size(&self) -> Result { + Err(Error::NotImplemented) + } + + /// Returns `true` if the device can be read from + fn is_readable(&self) -> bool { + true + } + /// Returns `true` if the device can be written to + fn is_writable(&self) -> bool { + true + } + + /// Performs a device-specific function + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + Err(Error::NotImplemented) + } + + // fn read_exact(&'static self, pos: u64, buf: &mut [u8]) -> Result<(), Error> { + // let count = self.read(pos, buf)?; + // if count == buf.len() { + // Ok(()) + // } else { + // Err(Error::MissingData) + // } + // } +} diff --git a/kernel/driver/block/core/src/partition.rs b/kernel/driver/block/core/src/partition.rs new file mode 100644 index 00000000..148d3ad7 --- /dev/null +++ b/kernel/driver/block/core/src/partition.rs @@ -0,0 +1,137 @@ +use core::mem::{size_of, MaybeUninit}; + +use alloc::{vec, vec::Vec}; +use bytemuck::{Pod, Zeroable}; +use libk::mem::PageBox; +use static_assertions::const_assert_eq; +use uuid::Uuid; +use yggdrasil_abi::{error::Error, io::DeviceRequest}; + +use crate::{BlockDevice, NgBlockDevice, NgBlockDeviceWrapper}; + +pub struct Partition<'a, D: NgBlockDevice + 'a> { + pub device: &'a NgBlockDeviceWrapper<'a, D>, + pub lba_start: u64, + pub lba_end: u64, +} + +#[derive(Clone, Copy)] +#[repr(C)] +struct GptHeader { + signature: [u8; 8], + revision: u32, + header_size: u32, + crc32: u32, + _0: u32, + header_lba: u64, + alternate_header_lba: u64, + first_usable_lba: u64, + last_usable_lba: u64, + guid: [u8; 16], + partition_table_lba: u64, + partition_table_len: u32, + partition_table_entry_size: u32, + partition_table_crc32: u32, + _1: [u8; 420], +} + +#[derive(Clone, Copy, Zeroable, Pod)] +#[repr(C)] +struct GptEntry { + type_guid: Uuid, + part_guid: Uuid, + lba_start: u64, + lba_end: u64, + attrs: u64, +} + +const_assert_eq!(size_of::(), 512); + +impl<'a, D: NgBlockDevice + 'a> Partition<'a, D> { + fn end_byte(&self) -> u64 { + self.lba_end * self.device.block_size + } + + fn start_byte(&self) -> u64 { + self.lba_start * self.device.block_size + } +} + +impl<'a, D: NgBlockDevice + 'a> BlockDevice for Partition<'a, D> { + fn read(&'static self, pos: u64, buf: &mut [u8]) -> Result { + if pos >= self.end_byte() { + return Ok(0); + } + + let start = self.start_byte() + pos; + let end = core::cmp::min(start + buf.len() as u64, self.end_byte()); + let count = (end - start) as usize; + + self.device.read(start, &mut buf[..count]) + } + + fn write(&'static self, pos: u64, buf: &[u8]) -> Result { + if pos >= self.end_byte() { + return Ok(0); + } + + let start = self.start_byte() + pos; + let end = core::cmp::min(start + buf.len() as u64, self.end_byte()); + let count = (end - start) as usize; + + self.device.write(start, &buf[..count]) + } + + fn size(&self) -> Result { + Ok((self.lba_end - self.lba_start) * self.device.block_size) + } + + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + self.device.device_request(req) + } +} + +unsafe fn read_struct_lba(dev: &'static dyn BlockDevice, lba: u64) -> Result { + assert_eq!(size_of::(), 512); + let mut data = MaybeUninit::::uninit(); + let buffer = core::slice::from_raw_parts_mut(data.as_mut_ptr() as *mut u8, 512); + dev.read_exact(lba * 512, buffer)?; + Ok(data.assume_init()) +} + +pub(crate) fn probe_gpt( + dev: &'static NgBlockDeviceWrapper<'static, D>, +) -> Result>>, Error> { + let header = unsafe { read_struct_lba::(dev, 1) }?; + + if &header.signature != b"EFI PART" { + // Not a GPT partition table + return Ok(None); + } + + let pt_entsize = header.partition_table_entry_size as usize; + let pt_len = header.partition_table_len as usize; + let mut pt_data = PageBox::new_slice(0, pt_len * pt_entsize)?; + + assert!(size_of::() <= pt_entsize); + + dev.read_exact(header.partition_table_lba * 512, &mut pt_data)?; + + let mut partitions = vec![]; + for i in 0..pt_len { + let pt_entry_data = &pt_data[i * pt_entsize..i * pt_entsize + size_of::()]; + let pt_entry: &GptEntry = bytemuck::from_bytes(pt_entry_data); + + if pt_entry.type_guid.is_nil() { + continue; + } + + partitions.push(Partition { + device: dev, + lba_start: pt_entry.lba_start, + lba_end: pt_entry.lba_end, + }); + } + + Ok(Some(partitions)) +} diff --git a/kernel/driver/block/core/src/request.rs b/kernel/driver/block/core/src/request.rs new file mode 100644 index 00000000..d379ff69 --- /dev/null +++ b/kernel/driver/block/core/src/request.rs @@ -0,0 +1,19 @@ +use core::mem::MaybeUninit; + +use libk_mm::PageBox; + +pub enum IoOperation { + Read { lba: u64, count: usize }, + Write { lba: u64, count: usize }, +} + +pub struct IoRequest<'a> { + pub operation: IoOperation, + pub data: &'a mut PageBox<[MaybeUninit]>, +} + +#[derive(Clone, Copy, Debug)] +pub struct IoSubmissionId { + pub queue_id: usize, + pub command_id: usize, +} diff --git a/kernel/driver/block/nvme/Cargo.toml b/kernel/driver/block/nvme/Cargo.toml new file mode 100644 index 00000000..611d52a3 --- /dev/null +++ b/kernel/driver/block/nvme/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "ygg_driver_nvme" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../../libk/libk-util" } +libk-thread = { path = "../../../libk/libk-thread" } +libk-mm = { path = "../../../libk/libk-mm" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } +vfs = { path = "../../../lib/vfs" } + +ygg_driver_pci = { path = "../../bus/pci" } +ygg_driver_block = { path = "../../block/core" } +kernel-fs = { path = "../../fs/kernel-fs" } + +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } +static_assertions = "1.1.0" +tock-registers = "0.8.1" +bytemuck = { version = "1.14.0", features = ["derive"] } diff --git a/kernel/driver/block/nvme/src/command.rs b/kernel/driver/block/nvme/src/command.rs new file mode 100644 index 00000000..3eb4dbcc --- /dev/null +++ b/kernel/driver/block/nvme/src/command.rs @@ -0,0 +1,269 @@ +#![allow(unused)] + +use core::fmt::{self, Write}; + +use libk_mm::address::PhysicalAddress; +use tock_registers::{interfaces::Readable, register_structs, registers::ReadOnly, UIntLike}; + +use crate::queue::PhysicalRegionPage; + +use super::queue::SubmissionQueueEntry; + +pub trait Command { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry); +} + +pub trait Request: Command { + type Response; +} + +#[derive(Clone, Copy)] +#[repr(transparent)] +pub struct String { + data: [u8; N], +} + +#[derive(Clone, Copy, Debug)] +#[non_exhaustive] +#[repr(u8)] +pub enum ControllerType { + Reserved, + Io, + Discovery, + Administrative, +} + +// I/O commands + +#[derive(Clone, Copy, Debug)] +pub struct IoRead { + pub nsid: u32, + pub lba: u64, + pub count: u32, +} + +#[derive(Clone, Copy, Debug)] +pub struct IoWrite { + pub nsid: u32, + pub lba: u64, + pub count: u32, +} + +// Requests + +#[derive(Clone, Copy, Debug)] +pub enum SetFeatureRequest { + NumberOfQueues(u32, u32), +} + +#[derive(Clone, Copy, Debug)] +pub struct IdentifyControllerRequest; + +#[derive(Clone, Copy, Debug)] +pub struct IdentifyActiveNamespaceIdListRequest { + pub start_id: u32, +} + +#[derive(Clone, Copy, Debug)] +pub struct IdentifyNamespaceRequest { + pub nsid: u32, +} + +#[derive(Clone, Copy, Debug)] +pub struct CreateIoCompletionQueue { + pub id: u32, + pub size: usize, + pub vector: u32, + pub data: PhysicalAddress, +} + +#[derive(Clone, Copy, Debug)] +pub struct CreateIoSubmissionQueue { + pub id: u32, + pub cq_id: u32, + pub size: usize, + pub data: PhysicalAddress, +} + +// Replies + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct IdentifyControllerResponse { + pub pci_vid: u16, + pub pci_ssvid: u16, + pub serial_number: String<20>, + pub model_number: String<40>, + pub firmware_rev: u64, + _0: [u8; 5], // 72..77 + pub mdts: u8, + pub cntlid: u16, + pub ver: u32, + _1: [u8; 12], // 84..96 + pub ctratt: u32, + _2: [u8; 11], // 100..111 + pub cntrltype: ControllerType, +} + +#[derive(Clone, Copy, Debug)] +#[repr(C)] +pub struct IdentifyActiveNamespaceIdListResponse { + pub entries: [u32; 1024], +} + +register_structs! { + #[allow(non_snake_case)] + pub IdentifyNamespaceResponse { + (0 => NSZE: ReadOnly), + (8 => _0), + (25 => NLBAF: ReadOnly), + (26 => FLBAS: ReadOnly), + (27 => _1), + (128 => LBAFS: [ReadOnly; 64]), + (384 => _2), + (4096 => @END), + } +} + +#[derive(Clone, Copy, Debug)] +#[repr(transparent)] +pub struct LbaFormat(u32); + +impl Command for IdentifyControllerRequest { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x06); + sqe.command_specific[0] = 0x01; + } +} + +impl Request for IdentifyControllerRequest { + type Response = IdentifyControllerResponse; +} + +impl Command for IdentifyActiveNamespaceIdListRequest { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x06); + sqe.command_specific[0] = 0x02; + sqe.nsid = self.start_id; + } +} + +impl Request for IdentifyActiveNamespaceIdListRequest { + type Response = IdentifyActiveNamespaceIdListResponse; +} + +impl Command for IdentifyNamespaceRequest { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x06); + sqe.command_specific[0] = 0x00; + sqe.nsid = self.nsid; + } +} + +impl Request for IdentifyNamespaceRequest { + type Response = IdentifyNamespaceResponse; +} + +impl IdentifyNamespaceResponse { + pub fn current_lba_fmt_idx(&self) -> usize { + let flbas = self.FLBAS.get(); + let mut index = flbas & 0xF; + if self.NLBAF.get() > 16 { + index |= (flbas & 0xE0) >> 1; + } + index as usize + } + + pub fn lba_fmt(&self, idx: usize) -> Option { + if idx > self.NLBAF.get() as usize { + return None; + } + Some(LbaFormat(self.LBAFS[idx].get())) + } + + pub fn total_lba_count(&self) -> u64 { + self.NSZE.get() + } +} + +impl LbaFormat { + pub fn lba_data_size(&self) -> Option { + let lbads = (self.0 >> 16) & 0xFF; + if lbads < 9 { + return None; + } + Some(1 << lbads) + } +} + +impl Command for SetFeatureRequest { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x09); + + match self { + Self::NumberOfQueues(cq, sq) => { + let dw11 = (cq << 16) | sq; + + sqe.command_specific[0] = 0x07; + sqe.command_specific[1] = dw11; + } + } + } +} + +impl Command for CreateIoCompletionQueue { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x05); + sqe.data_pointer[0] = PhysicalRegionPage::with_addr(self.data); + sqe.command_specific[0] = ((self.size as u32 - 1) << 16) | self.id; + sqe.command_specific[1] = (self.vector << 16) | 3; + } +} + +impl Command for CreateIoSubmissionQueue { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + sqe.command.set_opcode(0x01); + sqe.data_pointer[0] = PhysicalRegionPage::with_addr(self.data); + sqe.command_specific[0] = ((self.size as u32 - 1) << 16) | self.id; + // Medium priority + sqe.command_specific[1] = (self.cq_id << 16) | 1; + } +} + +impl fmt::Debug for String { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + f.write_char('"')?; + for ch in self.data { + if ch == b' ' || ch == 0 { + break; + } + f.write_char(ch as _)?; + } + f.write_char('"')?; + Ok(()) + } +} + +impl Command for IoRead { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + assert!(self.count < 65536); + + sqe.command.set_opcode(0x02); + sqe.command_specific[0] = self.lba as u32; + sqe.command_specific[1] = (self.lba >> 32) as u32; + sqe.command_specific[2] = self.count; + sqe.nsid = self.nsid; + } +} + +impl Command for IoWrite { + fn fill_sqe(&self, sqe: &mut SubmissionQueueEntry) { + assert!(self.count < 65536); + + sqe.command.set_opcode(0x01); + sqe.command_specific[0] = self.lba as u32; + sqe.command_specific[1] = (self.lba >> 32) as u32; + sqe.command_specific[2] = self.count; + sqe.nsid = self.nsid; + } +} diff --git a/kernel/driver/block/nvme/src/drive.rs b/kernel/driver/block/nvme/src/drive.rs new file mode 100644 index 00000000..49bb6443 --- /dev/null +++ b/kernel/driver/block/nvme/src/drive.rs @@ -0,0 +1,128 @@ +use core::task::Poll; + +use alloc::{boxed::Box, format}; +use kernel_fs::devfs; +use libk_mm::address::AsPhysicalAddress; +use libk_thread::cpu_index; +use libk_util::waker::QueueWaker; +use ygg_driver_block::{ + probe_partitions, IoOperation, IoRequest, IoSubmissionId, NgBlockDevice, NgBlockDeviceWrapper, +}; +use yggdrasil_abi::error::Error; + +use crate::command::{IdentifyNamespaceRequest, IoRead}; + +use super::{error::NvmeError, NvmeController}; + +#[allow(unused)] +pub struct NvmeDrive { + controller: &'static NvmeController, + nsid: u32, + total_lba_count: u64, + lba_size: u64, +} + +impl NvmeDrive { + pub async fn create( + controller: &'static NvmeController, + nsid: u32, + ) -> Result<&'static NvmeDrive, NvmeError> { + let admin_q = controller.admin_q.get(); + let identify = admin_q.request(IdentifyNamespaceRequest { nsid }).await?; + + let current_lba_format_idx = identify.current_lba_fmt_idx(); + let current_lba_format = identify.lba_fmt(current_lba_format_idx).unwrap(); + let lba_size = current_lba_format.lba_data_size().unwrap(); + let total_lba_count = identify.total_lba_count(); + + log::debug!( + "ns = {}, lba = {}B, size = {}M", + nsid, + lba_size, + (total_lba_count * lba_size) / (1024 * 1024) + ); + + let dev = Box::leak(Box::new(NvmeDrive { + controller, + nsid, + total_lba_count, + lba_size, + })); + + let node_name = format!("nvme{}n{}", controller.controller_id.get(), nsid); + let blk = NgBlockDeviceWrapper::new(dev); + devfs::add_named_block_device(blk, node_name.clone()).ok(); + probe_partitions(blk, move |index, partition| { + devfs::add_block_device_partition(format!("{}p", node_name), index, partition) + }) + .ok(); + + Ok(dev) + } +} + +impl NgBlockDevice for NvmeDrive { + type CompletionNotify = QueueWaker; + + fn bus_id(&self) -> u32 { + (*self.controller.controller_id.get()) as _ + } + + fn unit_id(&self) -> u32 { + self.nsid + } + + fn block_size(&self) -> u64 { + self.lba_size + } + + fn block_count(&self) -> u64 { + self.total_lba_count + } + + fn max_blocks_per_request(&self) -> u64 { + // TODO get from identify + 8 + } + + async fn submit_request(&self, request: IoRequest<'_>) -> Result { + let queue_id = cpu_index(); + let ioq = &self.controller.ioqs.get()[queue_id as usize]; + + let command_id = match request.operation { + IoOperation::Read { lba, count } => { + log::debug!( + "Submit read of {} lbas from ns {} to queue {}", + count, + self.nsid, + queue_id + ); + let range = unsafe { request.data.as_physical_address() }; + ioq.submit( + IoRead { + lba, + count: count as _, + nsid: self.nsid, + }, + &[range], + true, + ) + } + IoOperation::Write { .. } => todo!(), + }; + + Ok(IoSubmissionId { + queue_id: queue_id as _, + command_id: command_id as _, + }) + } + + fn poll_completion(&self, id: IoSubmissionId) -> Poll> { + let ioq = &self.controller.ioqs.get()[id.queue_id]; + ioq.poll_completion(id.command_id as _) + } + + fn completion_notify(&self, id: IoSubmissionId) -> &QueueWaker { + &self.controller.ioqs.get()[id.queue_id].completion_notify + } +} diff --git a/kernel/driver/block/nvme/src/error.rs b/kernel/driver/block/nvme/src/error.rs new file mode 100644 index 00000000..e933f039 --- /dev/null +++ b/kernel/driver/block/nvme/src/error.rs @@ -0,0 +1,15 @@ +use yggdrasil_abi::error::Error; + +use super::queue::CommandError; + +#[derive(Debug)] +pub enum NvmeError { + MemoryError(Error), + CommandError(CommandError), +} + +impl From for NvmeError { + fn from(value: CommandError) -> Self { + Self::CommandError(value) + } +} diff --git a/kernel/driver/block/nvme/src/lib.rs b/kernel/driver/block/nvme/src/lib.rs new file mode 100644 index 00000000..56467f78 --- /dev/null +++ b/kernel/driver/block/nvme/src/lib.rs @@ -0,0 +1,456 @@ +#![feature(strict_provenance, const_trait_impl, let_chains, if_let_guard, effects)] +#![allow(missing_docs)] +#![no_std] + +extern crate alloc; + +use core::{ + mem::size_of, + sync::atomic::{AtomicUsize, Ordering}, + time::Duration, +}; + +use alloc::{boxed::Box, collections::BTreeMap, vec::Vec}; +use command::{IdentifyActiveNamespaceIdListRequest, IdentifyControllerRequest}; +use device_api::{ + interrupt::{InterruptAffinity, InterruptHandler}, + Device, +}; +use drive::NvmeDrive; +use libk_mm::{ + address::{IntoRaw, PhysicalAddress}, + device::DeviceMemoryIo, +}; +use libk_thread::{cpu_count, cpu_index, runtime}; +use libk_util::{ + sync::{IrqGuard, IrqSafeSpinlock}, + OneTimeInit, +}; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite, WriteOnly}, +}; +use ygg_driver_pci::{ + device::{PciDeviceInfo, PreferredInterruptMode}, + PciCommandRegister, PciConfigurationSpace, +}; +use yggdrasil_abi::error::Error; + +use crate::{ + command::{IoRead, IoWrite}, + queue::{CompletionQueueEntry, SubmissionQueueEntry}, +}; + +use self::{ + command::{CreateIoCompletionQueue, CreateIoSubmissionQueue, SetFeatureRequest}, + error::NvmeError, + queue::QueuePair, +}; + +mod command; +mod drive; +mod error; +mod queue; + +register_bitfields! { + u32, + CC [ + IOCQES OFFSET(20) NUMBITS(4) [], + IOSQES OFFSET(16) NUMBITS(4) [], + AMS OFFSET(11) NUMBITS(3) [], + MPS OFFSET(7) NUMBITS(4) [], + CSS OFFSET(4) NUMBITS(3) [ + NvmCommandSet = 0 + ], + ENABLE OFFSET(0) NUMBITS(1) [], + ], + CSTS [ + CFS OFFSET(1) NUMBITS(1) [], + RDY OFFSET(0) NUMBITS(1) [], + ], + AQA [ + /// Admin Completion Queue Size in entries - 1 + ACQS OFFSET(16) NUMBITS(12) [], + /// Admin Submission Queue Size in entries - 1 + ASQS OFFSET(0) NUMBITS(12) [], + ] +} + +register_bitfields! { + u64, + CAP [ + /// Maximum Queue Entries Supported - 1. i.e., 0 means maximum queue len of 1, 1 = 2 etc. + MQES OFFSET(0) NUMBITS(16) [], + /// Timeout. Represents the worst-case time the host software should wait for CSTS.RDY to + /// change its state. + TO OFFSET(24) NUMBITS(8) [], + /// Doorbell stride. Stride in bytes = pow(2, 2 + DSTRD). + DSTRD OFFSET(32) NUMBITS(4) [], + /// NVM Subsystem Reset Supported (see NVMe BS Section 3.7.1) + NSSRS OFFSET(36) NUMBITS(1) [], + /// Controller supports one or more I/O command sets + CSS_IO_COMMANDS OFFSET(43) NUMBITS(1) [], + /// Controller only supports admin commands and no I/O commands + CSS_ADMIN_ONLY OFFSET(44) NUMBITS(1) [], + /// Memory page size minimum (bytes = pow(2, 12 + MPSMIN)) + MPSMIN OFFSET(48) NUMBITS(4) [], + /// Memory page size maximum -|- + MPSMAX OFFSET(52) NUMBITS(4) [], + ] +} + +register_structs! { + #[allow(non_snake_case)] + Regs { + (0x00 => CAP: ReadOnly), + (0x08 => VS: ReadOnly), + (0x0C => INTMS: WriteOnly), + (0x10 => INTMC: WriteOnly), + (0x14 => CC: ReadWrite), + (0x18 => _0), + (0x1C => CSTS: ReadOnly), + (0x20 => _1), + (0x24 => AQA: ReadWrite), + (0x28 => ASQ: ReadWrite), + (0x30 => ACQ: ReadWrite), + (0x38 => _2), + (0x2000 => @END), + } +} + +pub struct NvmeController { + regs: IrqSafeSpinlock>, + admin_q: OneTimeInit, + ioqs: OneTimeInit>, + io_queue_count: AtomicUsize, + drive_table: IrqSafeSpinlock>, + controller_id: OneTimeInit, + + pci: PciDeviceInfo, + + doorbell_shift: usize, +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum IoDirection { + Read, + Write, +} + +impl Regs { + unsafe fn doorbell_ptr(&self, shift: usize, completion: bool, queue_index: usize) -> *mut u32 { + let doorbell_base = (self as *const Regs as *mut Regs).addr() + 0x1000; + let offset = ((queue_index << shift) + completion as usize) * 4; + (doorbell_base + offset) as *mut u32 + } +} + +impl NvmeController { + const ADMIN_QUEUE_SIZE: usize = 32; + const IO_QUEUE_SIZE: usize = 32; + + async fn create_queues(&'static self) -> Result<(), NvmeError> { + let admin_q = self.admin_q.get(); + let io_queue_count = self.io_queue_count.load(Ordering::Acquire); + + log::info!( + "Creating {} queue pairs for nvme{}", + io_queue_count, + self.controller_id.get() + ); + + // Request a CQ/SQ pair for I/O + admin_q + .request_no_data(SetFeatureRequest::NumberOfQueues( + io_queue_count as _, + io_queue_count as _, + )) + .await?; + + let mut queues = Vec::new(); + for i in 1..=io_queue_count { + let id = i as u32; + + let (sq_doorbell, cq_doorbell) = unsafe { self.doorbell_pair(i) }; + let queue = QueuePair::new(id, i, Self::IO_QUEUE_SIZE, sq_doorbell, cq_doorbell) + .map_err(NvmeError::MemoryError)?; + + admin_q + .request_no_data(CreateIoCompletionQueue { + id, + vector: id, + size: Self::IO_QUEUE_SIZE, + data: queue.cq_physical_pointer(), + }) + .await?; + + admin_q + .request_no_data(CreateIoSubmissionQueue { + id, + cq_id: id, + size: Self::IO_QUEUE_SIZE, + data: queue.sq_physical_pointer(), + }) + .await?; + + queues.push(queue); + } + + self.ioqs.init(queues); + + Ok(()) + } + + async fn late_init(&'static self) -> Result<(), NvmeError> { + let io_queue_count = cpu_count(); + self.io_queue_count.store(io_queue_count, Ordering::Release); + + { + let range = self + .pci + .map_interrupt_multiple(0..io_queue_count + 1, InterruptAffinity::Any, self) + .unwrap(); + + // TODO handle different MSI range allocations + for (i, msi) in range.iter().enumerate() { + assert_eq!(i, msi.vector); + } + } + + register_nvme_controller(self); + + let admin_q = self.admin_q.get(); + + // Identify the controller + let _identify = admin_q.request(IdentifyControllerRequest).await?; + + // TODO do something with identify_controller + + self.create_queues().await?; + + // Identify namespaces + self.enumerate_namespaces().await?; + + Ok(()) + } + + async fn enumerate_namespaces(&'static self) -> Result<(), NvmeError> { + let admin_q = self.admin_q.get(); + + let namespaces = admin_q + .request(IdentifyActiveNamespaceIdListRequest { start_id: 0 }) + .await?; + + let count = namespaces.entries.iter().position(|&x| x == 0).unwrap(); + let list = &namespaces.entries[..count]; + + for &nsid in list { + match NvmeDrive::create(self, nsid).await { + Ok(drive) => { + self.drive_table.lock().insert(nsid, drive); + } + Err(error) => { + log::warn!("Could not create nvme drive, nsid={}: {:?}", nsid, error); + } + } + } + + Ok(()) + } + + pub async fn perform_io( + &'static self, + nsid: u32, + lba: u64, + buffer_address: PhysicalAddress, + direction: IoDirection, + ) -> Result<(), NvmeError> { + let _guard = IrqGuard::acquire(); + let cpu_index = cpu_index(); + let ioq = &self.ioqs.get()[cpu_index as usize]; + + log::debug!( + "{:?} ioq #{}, nsid={}, lba={:#x}", + direction, + cpu_index, + nsid, + lba + ); + + let cmd_id = match direction { + IoDirection::Read => ioq.submit( + IoRead { + nsid, + lba, + count: 1, + }, + &[buffer_address], + true, + ), + IoDirection::Write => ioq.submit( + IoWrite { + nsid, + lba, + count: 1, + }, + &[buffer_address], + true, + ), + }; + + ioq.wait_for_completion(cmd_id, ()).await?; + + Ok(()) + } + + unsafe fn doorbell_pair(&self, idx: usize) -> (*mut u32, *mut u32) { + let regs = self.regs.lock(); + let sq_ptr = regs.doorbell_ptr(self.doorbell_shift, false, idx); + let cq_ptr = regs.doorbell_ptr(self.doorbell_shift, true, idx); + (sq_ptr, cq_ptr) + } +} + +impl InterruptHandler for NvmeController { + fn handle_irq(&self, vector: Option) -> bool { + let vector = vector.expect("Only MSI-X interrupts are supported"); + + if vector == 0 { + self.admin_q.get().process_completions() != 0 + } else if vector <= self.io_queue_count.load(Ordering::Acquire) + && let Some(ioqs) = self.ioqs.try_get() + { + ioqs[vector - 1].process_completions() != 0 + } else { + false + } + } +} + +impl Device for NvmeController { + unsafe fn init(&'static self) -> Result<(), Error> { + let regs = self.regs.lock(); + + let min_page_size = 1usize << (12 + regs.CAP.read(CAP::MPSMIN)); + + if min_page_size > 4096 { + panic!(); + } + + let timeout = Duration::from_millis(regs.CAP.read(CAP::TO) * 500); + log::debug!("Worst-case timeout: {:?}", timeout); + + while regs.CSTS.matches_any(CSTS::RDY::SET) { + core::hint::spin_loop(); + } + + if Self::ADMIN_QUEUE_SIZE as u64 > regs.CAP.read(CAP::MQES) + 1 { + todo!( + "queue_slots too big, max = {}", + regs.CAP.read(CAP::MQES) + 1 + ); + } + + // Setup the admin queue (index 0) + let admin_sq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, false, 0) }; + let admin_cq_doorbell = unsafe { regs.doorbell_ptr(self.doorbell_shift, true, 0) }; + log::debug!("sq_doorbell for adminq = {:p}", admin_sq_doorbell); + let admin_q = QueuePair::new( + 0, + 0, + Self::ADMIN_QUEUE_SIZE, + admin_sq_doorbell, + admin_cq_doorbell, + ) + .unwrap(); + + regs.AQA.modify( + AQA::ASQS.val(Self::ADMIN_QUEUE_SIZE as u32 - 1) + + AQA::ACQS.val(Self::ADMIN_QUEUE_SIZE as u32 - 1), + ); + regs.ASQ.set(admin_q.sq_physical_pointer().into_raw()); + regs.ACQ.set(admin_q.cq_physical_pointer().into_raw()); + + // Configure the controller + const IOSQES: u32 = size_of::().ilog2(); + const IOCQES: u32 = size_of::().ilog2(); + + regs.CC.modify( + CC::IOCQES.val(IOCQES) + + CC::IOSQES.val(IOSQES) + + CC::MPS.val(0) + + CC::CSS::NvmCommandSet, + ); + + // Enable the controller + regs.CC.modify(CC::ENABLE::SET); + + log::debug!("Reset the controller"); + + while !regs.CSTS.matches_any(CSTS::RDY::SET + CSTS::CFS::SET) { + core::hint::spin_loop(); + } + + if regs.CSTS.matches_any(CSTS::CFS::SET) { + todo!("CFS set after reset!"); + } + + self.admin_q.init(admin_q); + + // Schedule late_init task + runtime::spawn(self.late_init())?; + + Ok(()) + } + + fn display_name(&self) -> &'static str { + "NVM Express Controller" + } +} + +static NVME_CONTROLLERS: IrqSafeSpinlock> = + IrqSafeSpinlock::new(Vec::new()); + +pub fn probe(info: &PciDeviceInfo) -> Result<&'static dyn Device, Error> { + let bar0 = info + .config_space + .bar(0) + .unwrap() + .as_memory() + .expect("Expected a memory BAR0"); + + info.init_interrupts(PreferredInterruptMode::Msi)?; + + let mut cmd = PciCommandRegister::from_bits_retain(info.config_space.command()); + cmd &= !(PciCommandRegister::DISABLE_INTERRUPTS | PciCommandRegister::ENABLE_IO); + cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER; + info.config_space.set_command(cmd.bits()); + + let regs = unsafe { DeviceMemoryIo::::map(bar0, Default::default()) }?; + + // Disable the controller + regs.CC.modify(CC::ENABLE::CLEAR); + + let doorbell_shift = regs.CAP.read(CAP::DSTRD) as usize + 1; + + Ok(Box::leak(Box::new(NvmeController { + regs: IrqSafeSpinlock::new(regs), + admin_q: OneTimeInit::new(), + ioqs: OneTimeInit::new(), + drive_table: IrqSafeSpinlock::new(BTreeMap::new()), + controller_id: OneTimeInit::new(), + + pci: info.clone(), + + io_queue_count: AtomicUsize::new(1), + doorbell_shift, + }))) +} + +pub fn register_nvme_controller(ctrl: &'static NvmeController) { + let mut list = NVME_CONTROLLERS.lock(); + let id = list.len(); + list.push(ctrl); + ctrl.controller_id.init(id); +} diff --git a/kernel/driver/block/nvme/src/queue.rs b/kernel/driver/block/nvme/src/queue.rs new file mode 100644 index 00000000..f99afc9b --- /dev/null +++ b/kernel/driver/block/nvme/src/queue.rs @@ -0,0 +1,432 @@ +use core::{ + mem::size_of, + pin::Pin, + ptr::null_mut, + task::{Context, Poll}, +}; + +use alloc::{ + collections::{BTreeMap, BTreeSet}, + vec::Vec, +}; +use bytemuck::{Pod, Zeroable}; +use futures_util::Future; +use libk_mm::{ + address::{AsPhysicalAddress, IntoRaw, PhysicalAddress}, + PageBox, +}; +use libk_util::{sync::IrqSafeSpinlock, waker::QueueWaker}; +use static_assertions::const_assert; +use yggdrasil_abi::error::Error; + +use super::{ + command::{Command, Request}, + error::NvmeError, +}; + +#[derive(Zeroable, Pod, Clone, Copy, Debug)] +#[repr(C)] +pub struct PhysicalRegionPage(u64); + +// Bits: +// +// 16..32 - CID. Command identifier +// 14..16 - PSDT. PRP or SGL for data transfer. +// 0b00 - PRP used +// 0b01 - SGL used. Not implemented +// 0b10 - SGL used. Not implemented +// 0b11 - Reserved +// 10..14 - Reserved +// 08..10 - FUSE. Fused Operation +// 00..08 - OPC. Opcode +#[derive(Zeroable, Pod, Clone, Copy, Debug)] +#[repr(C)] +pub struct CommandDword0(u32); + +#[derive(Zeroable, Pod, Clone, Copy, Debug)] +#[repr(C)] +pub struct SubmissionQueueEntry { + pub command: CommandDword0, // 0 + pub nsid: u32, // 1 + pub io_data: [u32; 2], // 2, 3 + pub metadata_pointer: u64, // 4, 5 + pub data_pointer: [PhysicalRegionPage; 2], // 6, 7, 8, 9 + pub command_specific: [u32; 6], // 10, 11, 12, 13, 14, 15 +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub struct CommandError { + sct: u8, + sc: u8, +} + +#[derive(Zeroable, Pod, Clone, Copy, Debug)] +#[repr(C)] +pub struct CompletionQueueEntry { + dw: [u32; 4], +} + +pub struct Queue { + data: PageBox<[T]>, + mask: usize, + head: usize, + tail: usize, + phase: bool, + + head_doorbell: *mut u32, + tail_doorbell: *mut u32, +} + +struct Inner { + sq: Queue, + cq: Queue, + + completed: BTreeMap, + pending: BTreeSet, +} + +pub struct QueuePair { + id: u32, + + #[allow(unused)] + vector: usize, + + sq_base: PhysicalAddress, + cq_base: PhysicalAddress, + + pub completion_notify: QueueWaker, + + inner: IrqSafeSpinlock, +} + +const_assert!(size_of::().is_power_of_two()); + +impl PhysicalRegionPage { + pub const fn null() -> Self { + Self(0) + } + + pub const fn with_addr(address: PhysicalAddress) -> Self { + Self(address.into_raw()) + } +} + +impl CommandDword0 { + pub fn set_command_id(&mut self, id: u32) { + debug_assert!(id & 0xFFFF0000 == 0); + self.0 &= !(0xFFFF << 16); + self.0 |= id << 16; + } + + pub fn set_opcode(&mut self, opcode: u8) { + self.0 &= !0xFF; + self.0 |= opcode as u32; + } +} + +impl CompletionQueueEntry { + pub fn phase(&self) -> bool { + self.dw[3] & (1 << 16) != 0 + } + + pub fn sub_queue_id(&self) -> u32 { + self.dw[2] >> 16 + } + + pub fn sub_queue_head(&self) -> usize { + (self.dw[2] & 0xFFFF) as _ + } + + pub fn command_id(&self) -> u32 { + self.dw[3] & 0xFFFF + } + + pub fn error(&self) -> Option { + let status = (self.dw[3] >> 17) as u16; + + if status != 0 { + Some(CommandError { + sct: ((status >> 8) & 0x7) as u8, + sc: status as u8, + }) + } else { + None + } + } +} + +impl Queue { + pub fn new( + data: PageBox<[T]>, + head_doorbell: *mut u32, + tail_doorbell: *mut u32, + phase: bool, + ) -> Self { + assert!( + (head_doorbell.is_null() && !tail_doorbell.is_null()) + || (!head_doorbell.is_null() && tail_doorbell.is_null()) + ); + + Self { + mask: data.len() - 1, + head: 0, + tail: 0, + data, + head_doorbell, + tail_doorbell, + phase, + } + } + + pub fn enqueue(&mut self, item: T) -> usize { + let index = self.tail; + self.data[self.tail] = item; + self.phase ^= self.set_tail(self.next_index(self.tail)); + index + } + + pub fn at_head(&self, offset: usize) -> (&T, bool) { + let index = (self.head + offset) & self.mask; + let expected_phase = self.phase ^ (index < self.head); + (&self.data[index], expected_phase) + } + + pub fn take(&mut self, count: usize) { + let index = (self.head + count) & self.mask; + self.phase ^= self.set_head(index); + } + + pub fn take_until(&mut self, new_head: usize) { + self.phase ^= self.set_head(new_head); + } + + fn next_index(&self, index: usize) -> usize { + (index + 1) & self.mask + } + + fn set_tail(&mut self, new_tail: usize) -> bool { + let wrapped = new_tail < self.tail; + + self.tail = new_tail; + + if !self.tail_doorbell.is_null() { + unsafe { + self.tail_doorbell + .write_volatile(self.tail.try_into().unwrap()); + } + } + + wrapped + } + + fn set_head(&mut self, new_head: usize) -> bool { + let wrapped = new_head < self.head; + + self.head = new_head; + + if !self.head_doorbell.is_null() { + unsafe { + self.head_doorbell + .write_volatile(self.head.try_into().unwrap()); + } + } + + wrapped + } +} + +impl QueuePair { + pub fn new( + id: u32, + vector: usize, + capacity: usize, + sq_doorbell: *mut u32, + cq_doorbell: *mut u32, + ) -> Result { + let sq_data = PageBox::new_slice(SubmissionQueueEntry::zeroed(), capacity)?; + let cq_data = PageBox::new_slice(CompletionQueueEntry::zeroed(), capacity)?; + + let sq_base = unsafe { sq_data.as_physical_address() }; + let cq_base = unsafe { cq_data.as_physical_address() }; + + log::debug!("Allocated queue pair: sq={:p}, cq={:p}", sq_data, cq_data); + + let sq = Queue::new(sq_data, null_mut(), sq_doorbell, true); + let cq = Queue::new(cq_data, cq_doorbell, null_mut(), true); + + let inner = IrqSafeSpinlock::new(Inner { + sq, + cq, + pending: BTreeSet::new(), + completed: BTreeMap::new(), + }); + + Ok(Self { + completion_notify: QueueWaker::new(), + + id, + vector, + sq_base, + cq_base, + inner, + }) + } + + #[inline] + pub fn sq_physical_pointer(&self) -> PhysicalAddress { + self.sq_base + } + + #[inline] + pub fn cq_physical_pointer(&self) -> PhysicalAddress { + self.cq_base + } + + pub fn poll_completion(&self, command_id: u32) -> Poll> { + let mut inner = self.inner.lock(); + + match inner.completed.remove(&command_id) { + Some(result) if let Some(_error) = result.error() => todo!(), + Some(_) => Poll::Ready(Ok(())), + None => Poll::Pending, + } + } + + pub fn wait_for_completion<'r, T: Unpin + 'r>( + &'r self, + command_id: u32, + result: T, + ) -> impl Future> + 'r { + struct Fut<'r, R: Unpin + 'r> { + this: &'r QueuePair, + response: Option, + command_id: u32, + } + + impl<'r, R: Unpin + 'r> Future for Fut<'r, R> { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.this.completion_notify.register(cx.waker()); + let mut inner = self.this.inner.lock(); + + if let Some(entry) = inner.completed.remove(&self.command_id) { + self.this.completion_notify.remove(cx.waker()); + + let result = if let Some(error) = entry.error() { + Err(error) + } else { + Ok(self.response.take().unwrap()) + }; + + Poll::Ready(result) + } else { + Poll::Pending + } + } + } + + Fut { + this: self, + response: Some(result), + command_id, + } + } + + pub fn submit(&self, cmd: C, ranges: &[PhysicalAddress], set_pending: bool) -> u32 { + let mut inner = self.inner.lock(); + let mut sqe = SubmissionQueueEntry::zeroed(); + + match ranges.len() { + 1 => { + sqe.data_pointer[0] = PhysicalRegionPage::with_addr(ranges[0]); + sqe.data_pointer[1] = PhysicalRegionPage::null(); + } + 0 => { + sqe.data_pointer[0] = PhysicalRegionPage::null(); + sqe.data_pointer[1] = PhysicalRegionPage::null(); + } + _ => todo!(), + } + + cmd.fill_sqe(&mut sqe); + + let command_id = inner.sq.tail.try_into().unwrap(); + sqe.command.set_command_id(command_id); + + if set_pending { + inner.pending.insert(command_id); + } + + inner.sq.enqueue(sqe); + + command_id + } + + pub fn request_no_data( + &self, + req: C, + ) -> impl Future> + '_ { + let command_id = self.submit(req, &[], true); + self.wait_for_completion(command_id, ()) + } + + pub async fn request<'r, R: Request>( + &'r self, + req: R, + ) -> Result, NvmeError> + where + R::Response: 'r, + { + let response = PageBox::new_uninit().map_err(NvmeError::MemoryError)?; + let command_id = self.submit(req, &[unsafe { response.as_physical_address() }], true); + let result = self.wait_for_completion(command_id, response).await?; + Ok(unsafe { result.assume_init() }) + } + + pub fn process_completions(&self) -> usize { + let mut inner = self.inner.lock(); + let mut n = 0; + + let mut completion_list = Vec::new(); + + loop { + let (cmp, expected_phase) = inner.cq.at_head(n); + let cmp_phase = cmp.phase(); + + if cmp_phase != expected_phase { + break; + } + + n += 1; + + let sub_queue_id = cmp.sub_queue_id(); + // TODO allow several sqs receive completions through one cq? + assert_eq!(sub_queue_id, self.id); + + let sub_queue_head = cmp.sub_queue_head(); + let cmp = *cmp; + inner.sq.take_until(sub_queue_head); + + completion_list.push(cmp); + } + + if n != 0 { + inner.cq.take(n); + } + + for cmp in completion_list { + let command_id = cmp.command_id(); + + if inner.pending.remove(&command_id) { + inner.completed.insert(command_id, cmp); + } + } + + if n != 0 { + self.completion_notify.wake_all(); + } + + n + } +} diff --git a/kernel/driver/bus/pci/Cargo.toml b/kernel/driver/bus/pci/Cargo.toml new file mode 100644 index 00000000..ed51f994 --- /dev/null +++ b/kernel/driver/bus/pci/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ygg_driver_pci" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } +libk-mm = { path = "../../../libk/libk-mm" } +libk-device = { path = "../../../libk/libk-device" } +libk-util = { path = "../../../libk/libk-util" } + +log = "0.4.20" +bitflags = "2.3.3" +tock-registers = "0.8.1" + +[target.'cfg(target_arch = "x86_64")'.dependencies] +acpi = { git = "https://github.com/alnyan/acpi.git", package = "acpi", branch = "acpi-system" } diff --git a/kernel/driver/bus/pci/src/capability.rs b/kernel/driver/bus/pci/src/capability.rs new file mode 100644 index 00000000..ffd33379 --- /dev/null +++ b/kernel/driver/bus/pci/src/capability.rs @@ -0,0 +1,388 @@ +//! PCI capability structures and queries + +use alloc::{vec, vec::Vec}; +use device_api::interrupt::{ + InterruptAffinity, InterruptHandler, MessageInterruptController, MsiInfo, +}; +use libk_mm::{address::PhysicalAddress, device::DeviceMemoryIoMut}; +use tock_registers::{ + interfaces::{Readable, Writeable}, + registers::{ReadWrite, WriteOnly}, +}; +use yggdrasil_abi::error::Error; + +use super::{PciCapability, PciCapabilityId, PciConfigurationSpace}; + +pub trait VirtioCapabilityData<'s, S: PciConfigurationSpace + ?Sized + 's>: Sized { + fn from_space_offset(space: &'s S, offset: usize) -> Self; + + fn space(&self) -> &'s S; + fn offset(&self) -> usize; + + fn bar_index(&self) -> Option { + let value = self.space().read_u8(self.offset() + 4); + (value <= 0x5).then_some(value as _) + } + + fn bar_offset(&self) -> usize { + let value = self.space().read_u32(self.offset() + 8); + value as _ + } + + fn length(&self) -> usize { + let value = self.space().read_u32(self.offset() + 12); + value as _ + } +} + +pub trait VirtioCapability { + const CFG_TYPE: u8; + const MIN_LEN: usize = 0; + type Output<'a, S: PciConfigurationSpace + ?Sized + 'a>: VirtioCapabilityData<'a, S>; +} + +/// MSI-X capability query +pub struct MsiXCapability; + +/// MSI capability query +pub struct MsiCapability; + +// VirtIO-over-PCI capabilities +/// VirtIO PCI configuration access +pub struct VirtioDeviceConfigCapability; +/// VirtIO common configuration +pub struct VirtioCommonConfigCapability; +/// VirtIO notify configuration +pub struct VirtioNotifyConfigCapability; +/// VirtIO interrupt status +pub struct VirtioInterruptStatusCapability; + +/// Represents an entry in MSI-X vector table +#[repr(C)] +pub struct MsiXEntry { + /// Address to which the value is written on interrupt + pub address: WriteOnly, + /// Value which is written to trigger an interrupt + pub data: WriteOnly, + /// Vector control word + pub control: ReadWrite, +} + +pub struct MsiXVectorTable<'a> { + vectors: DeviceMemoryIoMut<'a, [MsiXEntry]>, +} + +/// MSI-X capability data structure +pub struct MsiXData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +/// MSI capability data structure +pub struct MsiData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +pub struct VirtioDeviceConfigData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +pub struct VirtioCommonConfigData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +pub struct VirtioNotifyConfigData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +pub struct VirtioInterruptStatusData<'s, S: PciConfigurationSpace + ?Sized + 's> { + space: &'s S, + offset: usize, +} + +impl PciCapability for T { + const ID: PciCapabilityId = PciCapabilityId::VendorSpecific; + type CapabilityData<'a, S: PciConfigurationSpace + ?Sized + 'a> = T::Output<'a, S>; + + fn check(space: &S, offset: usize, len: usize) -> bool { + let cfg_type = space.read_u8(offset + 3); + cfg_type == T::CFG_TYPE && len >= T::MIN_LEN + } + + fn data<'s, S: PciConfigurationSpace + ?Sized + 's>( + space: &'s S, + offset: usize, + _len: usize, + ) -> Self::CapabilityData<'s, S> { + T::Output::from_space_offset(space, offset) + } +} + +impl PciCapability for MsiXCapability { + const ID: PciCapabilityId = PciCapabilityId::MsiX; + type CapabilityData<'a, S: PciConfigurationSpace + ?Sized + 'a> = MsiXData<'a, S>; + + fn data<'s, S: PciConfigurationSpace + ?Sized + 's>( + space: &'s S, + offset: usize, + _len: usize, + ) -> Self::CapabilityData<'s, S> { + MsiXData { space, offset } + } +} + +impl PciCapability for MsiCapability { + const ID: PciCapabilityId = PciCapabilityId::Msi; + type CapabilityData<'a, S: PciConfigurationSpace + ?Sized + 'a> = MsiData<'a, S>; + + fn data<'s, S: PciConfigurationSpace + ?Sized + 's>( + space: &'s S, + offset: usize, + _len: usize, + ) -> Self::CapabilityData<'s, S> { + MsiData { space, offset } + } +} + +impl VirtioCapability for VirtioDeviceConfigCapability { + const CFG_TYPE: u8 = 0x04; + type Output<'a, S: PciConfigurationSpace + ?Sized + 'a> = VirtioDeviceConfigData<'a, S>; +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioCapabilityData<'s, S> + for VirtioDeviceConfigData<'s, S> +{ + fn from_space_offset(space: &'s S, offset: usize) -> Self { + Self { space, offset } + } + + fn space(&self) -> &'s S { + self.space + } + + fn offset(&self) -> usize { + self.offset + } +} + +impl VirtioCapability for VirtioCommonConfigCapability { + const CFG_TYPE: u8 = 0x01; + type Output<'a, S: PciConfigurationSpace + ?Sized + 'a> = VirtioCommonConfigData<'a, S>; +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioCapabilityData<'s, S> + for VirtioCommonConfigData<'s, S> +{ + fn from_space_offset(space: &'s S, offset: usize) -> Self { + Self { space, offset } + } + + fn space(&self) -> &'s S { + self.space + } + + fn offset(&self) -> usize { + self.offset + } +} + +impl VirtioCapability for VirtioNotifyConfigCapability { + const CFG_TYPE: u8 = 0x02; + const MIN_LEN: usize = 0x14; + type Output<'a, S: PciConfigurationSpace + ?Sized + 'a> = VirtioNotifyConfigData<'a, S>; +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioNotifyConfigData<'s, S> { + pub fn offset_multiplier(&self) -> usize { + self.space.read_u32(self.offset + 16) as usize + } +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioCapabilityData<'s, S> + for VirtioNotifyConfigData<'s, S> +{ + fn from_space_offset(space: &'s S, offset: usize) -> Self { + Self { space, offset } + } + + fn space(&self) -> &'s S { + self.space + } + + fn offset(&self) -> usize { + self.offset + } +} + +impl VirtioCapability for VirtioInterruptStatusCapability { + const CFG_TYPE: u8 = 0x03; + const MIN_LEN: usize = 1; + type Output<'a, S: PciConfigurationSpace + ?Sized + 'a> = VirtioInterruptStatusData<'a, S>; +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioInterruptStatusData<'s, S> { + pub fn read_status(&self) -> (bool, bool) { + todo!() + } +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> VirtioCapabilityData<'s, S> + for VirtioInterruptStatusData<'s, S> +{ + fn from_space_offset(space: &'s S, offset: usize) -> Self { + Self { space, offset } + } + + fn space(&self) -> &'s S { + self.space + } + + fn offset(&self) -> usize { + self.offset + } +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> MsiXData<'s, S> { + // TODO use pending bits as well + /// Maps and returns the vector table associated with the device's MSI-X capability + pub fn vector_table<'a>(&self) -> Result, Error> { + let w0 = self.space.read_u16(self.offset + 2); + let dw1 = self.space.read_u32(self.offset + 4); + + let table_size = (w0 as usize & 0x3FF) + 1; + let bir = dw1 as usize & 0x3; + let table_offset = dw1 as usize & !0x3; + + let Some(base) = self.space.bar(bir) else { + return Err(Error::DoesNotExist); + }; + let Some(base) = base.as_memory() else { + return Err(Error::InvalidOperation); + }; + + log::debug!("MSI-X table address: {:#x}", base.add(table_offset)); + + unsafe { MsiXVectorTable::from_raw_parts(base.add(table_offset), table_size) } + } + + /// Changes the global enable status for the device's MSI-X capability. If set, regular IRQs + /// are not generated. + pub fn set_enabled(&mut self, enabled: bool) { + let mut w0 = self.space.read_u32(self.offset); + if enabled { + w0 |= 1 << 31; + } else { + w0 &= !(1 << 31); + } + self.space.write_u32(self.offset, w0); + } + + pub fn set_function_mask(&mut self, masked: bool) { + let mut w0 = self.space.read_u32(self.offset); + if masked { + w0 |= 1 << 30; + } else { + w0 &= !(1 << 30); + } + self.space.write_u32(self.offset, w0); + } +} + +impl MsiXVectorTable<'_> { + unsafe fn from_raw_parts(base: PhysicalAddress, len: usize) -> Result { + let vectors = DeviceMemoryIoMut::map_slice(base, len, Default::default())?; + Ok(Self { vectors }) + } + + pub fn mask_all(&mut self) { + for vector in self.vectors.iter_mut() { + vector.set_masked(true); + } + } + + pub fn register_range( + &mut self, + start: usize, + end: usize, + ic: &C, + affinity: InterruptAffinity, + handler: &'static dyn InterruptHandler, + ) -> Result, Error> { + assert!(end > start); + let mut range = vec![ + MsiInfo { + affinity, + ..Default::default() + }; + end - start + ]; + ic.register_msi_range(&mut range, handler)?; + + for (i, info) in range.iter().enumerate() { + let index = i + start; + self.vectors[index].address.set(info.address as _); + self.vectors[index].data.set(info.value); + self.vectors[index].set_masked(false); + } + + Ok(range) + } +} + +impl MsiXEntry { + /// If set, prevents the MSI-X interrupt from being delivered + fn set_masked(&mut self, masked: bool) { + if masked { + self.control.set(self.control.get() | 1); + } else { + self.control.set(self.control.get() & !1); + } + } +} + +impl<'s, S: PciConfigurationSpace + ?Sized + 's> MsiData<'s, S> { + pub fn register( + &mut self, + ic: &C, + affinity: InterruptAffinity, + handler: &'static dyn InterruptHandler, + ) -> Result { + let info = ic.register_msi(affinity, handler)?; + + let mut w0 = self.space.read_u16(self.offset + 2); + // Enable the vector first + w0 |= 1 << 0; + + // Reset to one vector + w0 &= !(0x7 << 4); + + self.space.write_u16(self.offset + 2, w0); + + if info.value > u16::MAX as u32 { + log::warn!("Could not setup a MSI: value={:#x} > u16", info.value); + return Err(Error::InvalidOperation); + } + + if info.address > u32::MAX as usize { + if w0 & (1 << 7) == 0 { + log::warn!( + "Could not setup a MSI: address={:#x} and MSI is not 64 bit capable", + info.address + ); + return Err(Error::InvalidOperation); + } + + self.space + .write_u32(self.offset + 8, (info.address >> 32) as u32); + } + self.space.write_u32(self.offset + 4, info.address as u32); + + self.space.write_u16(self.offset + 12, info.value as u16); + + Ok(info) + } +} diff --git a/kernel/driver/bus/pci/src/device.rs b/kernel/driver/bus/pci/src/device.rs new file mode 100644 index 00000000..8afe3c85 --- /dev/null +++ b/kernel/driver/bus/pci/src/device.rs @@ -0,0 +1,220 @@ +use core::ops::Range; + +use alloc::{sync::Arc, vec::Vec}; +use device_api::{ + interrupt::{InterruptAffinity, InterruptHandler, IrqOptions, MsiInfo}, + Device, +}; +use libk_device::{message_interrupt_controller, register_global_interrupt}; +use libk_util::{sync::spin_rwlock::IrqSafeRwLock, OneTimeInit}; +use yggdrasil_abi::error::Error; + +use crate::{ + capability::{MsiCapability, MsiXCapability, MsiXVectorTable}, + PciAddress, PciConfigSpace, PciConfigurationSpace, PciSegmentInfo, +}; + +/// Describes a PCI device +#[derive(Clone)] +pub struct PciDeviceInfo { + /// Address of the device + pub address: PciAddress, + /// Configuration space access method + pub config_space: PciConfigSpace, + /// Describes the PCI segment this device is a part of + pub segment: Arc, + + pub(crate) interrupt_config: Arc>>, +} + +pub struct InterruptConfig { + #[allow(unused)] + preferred_mode: PreferredInterruptMode, + configured_mode: ConfiguredInterruptMode, +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum PciInterruptPin { + A, + B, + C, + D, +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum PreferredInterruptMode { + Msi, + Legacy, +} + +enum ConfiguredInterruptMode { + MsiX(MsiXVectorTable<'static>), + Msi, + Legacy(PciInterruptPin), + None, +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct PciInterrupt { + pub address: PciAddress, + pub pin: PciInterruptPin, +} + +#[derive(Clone, Copy, Debug)] +pub struct PciInterruptRoute { + pub number: u32, + pub options: IrqOptions, +} + +pub enum PciMatch { + Generic(fn(&PciDeviceInfo) -> bool), + Vendor(u16, u16), + Class(u8, Option, Option), +} + +pub struct PciDriver { + pub(crate) name: &'static str, + pub(crate) check: PciMatch, + pub(crate) probe: fn(&PciDeviceInfo) -> Result<&'static dyn Device, Error>, +} + +/// Used to store PCI bus devices which were enumerated by the kernel +pub struct PciBusDevice { + pub(crate) info: PciDeviceInfo, + pub(crate) driver: Option<&'static dyn Device>, +} + +impl PciDeviceInfo { + pub fn init_interrupts(&self, preferred_mode: PreferredInterruptMode) -> Result<(), Error> { + self.interrupt_config + .try_init_with(|| { + let configured_mode = + if self.segment.has_msi && preferred_mode == PreferredInterruptMode::Msi { + if let Some(mut msix) = self.config_space.capability::() { + let mut vt = msix.vector_table().unwrap(); + + vt.mask_all(); + + msix.set_function_mask(false); + msix.set_enabled(true); + + ConfiguredInterruptMode::MsiX(vt) + } else if self.config_space.capability::().is_some() { + ConfiguredInterruptMode::Msi + } else { + self.interrupt_mode_from_pin() + } + } else { + // Ignore preferred_mode, the only supported is Legacy + self.interrupt_mode_from_pin() + }; + + IrqSafeRwLock::new(InterruptConfig { + preferred_mode, + configured_mode, + }) + }) + .expect("Attempted to double-configure interrupts for a PCI device"); + + Ok(()) + } + + fn interrupt_mode_from_pin(&self) -> ConfiguredInterruptMode { + match self.config_space.interrupt_pin() { + Some(pin) => ConfiguredInterruptMode::Legacy(pin), + None => ConfiguredInterruptMode::None, + } + } + + pub fn map_interrupt( + &self, + affinity: InterruptAffinity, + handler: &'static dyn InterruptHandler, + ) -> Result, Error> { + let mut irq = self.interrupt_config.get().write(); + + match &mut irq.configured_mode { + ConfiguredInterruptMode::MsiX(msix) => { + let info = + msix.register_range(0, 1, message_interrupt_controller(), affinity, handler)?; + Ok(Some(info[0])) + } + ConfiguredInterruptMode::Msi => { + let mut msi = self + .config_space + .capability::() + .ok_or(Error::InvalidOperation)?; + + let info = msi.register(message_interrupt_controller(), affinity, handler)?; + + Ok(Some(info)) + } + ConfiguredInterruptMode::Legacy(pin) => { + self.try_map_legacy(*pin, handler)?; + Ok(None) + } + ConfiguredInterruptMode::None => Err(Error::InvalidOperation), + } + } + + pub fn map_interrupt_multiple( + &self, + vector_range: Range, + affinity: InterruptAffinity, + handler: &'static dyn InterruptHandler, + ) -> Result, Error> { + let mut irq = self.interrupt_config.get().write(); + let start = vector_range.start; + let end = vector_range.end; + + match &mut irq.configured_mode { + ConfiguredInterruptMode::MsiX(msix) => msix.register_range( + start, + end, + message_interrupt_controller(), + affinity, + handler, + ), + _ => Err(Error::InvalidOperation), + } + } + + fn try_map_legacy( + &self, + pin: PciInterruptPin, + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + let src = PciInterrupt { + address: self.address, + pin, + }; + let route = self + .segment + .irq_translation_map + .get(&src) + .ok_or(Error::InvalidOperation)?; + + log::debug!( + "PCI {} pin {:?} -> system IRQ #{}", + src.address, + src.pin, + route.number + ); + + register_global_interrupt(route.number, route.options, handler) + } +} + +impl TryFrom for PciInterruptPin { + type Error = (); + + fn try_from(value: u32) -> Result { + match value { + 1 => Ok(Self::A), + 2 => Ok(Self::B), + 3 => Ok(Self::C), + 4 => Ok(Self::D), + _ => Err(()), + } + } +} diff --git a/kernel/driver/bus/pci/src/lib.rs b/kernel/driver/bus/pci/src/lib.rs new file mode 100644 index 00000000..aa768167 --- /dev/null +++ b/kernel/driver/bus/pci/src/lib.rs @@ -0,0 +1,624 @@ +//! PCI/PCIe bus interfaces +#![no_std] + +extern crate alloc; + +use core::fmt; + +#[cfg(target_arch = "x86_64")] +use acpi::mcfg::McfgEntry; +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use bitflags::bitflags; +use device::{PciBusDevice, PciDeviceInfo, PciDriver, PciInterrupt, PciInterruptRoute, PciMatch}; +use device_api::Device; +use libk_mm::address::{FromRaw, PhysicalAddress}; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; +use yggdrasil_abi::error::Error; + +pub mod capability; +pub mod device; +mod space; + +pub use space::{ + ecam::PciEcam, PciConfigSpace, PciConfigurationSpace, PciLegacyConfigurationSpace, +}; + +bitflags! { + /// Command register of the PCI configuration space + pub struct PciCommandRegister: u16 { + /// If set, I/O access to the device is enabled + const ENABLE_IO = 1 << 0; + /// If set, memory-mapped access to the device is enabled + const ENABLE_MEMORY = 1 << 1; + /// If set, the device can generate PCI bus accesses on its own + const BUS_MASTER = 1 << 2; + /// If set, interrupts are masked from being raised + const DISABLE_INTERRUPTS = 1 << 10; + } +} + +bitflags! { + /// Status register of the PCI configuration space + pub struct PciStatusRegister: u16 { + /// Read-only. If set, the configuration space has a pointer to the capabilities list. + const CAPABILITIES_LIST = 1 << 4; + } +} + +/// Represents the address of a single object on a bus (or the bus itself) +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash, Debug)] +pub struct PciAddress { + /// PCIe segment group, ignored (?) with PCI + pub segment: u8, + /// Bus number + pub bus: u8, + /// Slot/device number + pub device: u8, + /// Function number + pub function: u8, +} + +/// Address provided by PCI configuration space Base Address Register +#[derive(Debug, Clone, Copy)] +pub enum PciBaseAddress { + /// 32-bit memory address + Memory32(u32), + /// 64-bit memory address + Memory64(u64), + /// I/O space address + Io(u16), +} + +/// Unique ID assigned to PCI capability structures +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +#[non_exhaustive] +#[repr(u8)] +pub enum PciCapabilityId { + /// MSI (32-bit or 64-bit) + Msi = 0x05, + /// Vendor-specific capability + VendorSpecific = 0x09, + /// MSI-X + MsiX = 0x11, + /// Unknown capability missing from this list + Unknown, +} + +/// Interface used for querying PCI capabilities +#[allow(unused)] +pub trait PciCapability { + /// Capability ID + const ID: PciCapabilityId; + /// Wrapper for accessing the capability data structure + type CapabilityData<'a, S: PciConfigurationSpace + ?Sized + 'a>; + + fn check(space: &S, offset: usize, len: usize) -> bool { + true + } + + /// Constructs an access wrapper for this capability with given offset + fn data<'s, S: PciConfigurationSpace + ?Sized + 's>( + space: &'s S, + offset: usize, + len: usize, + ) -> Self::CapabilityData<'s, S>; +} + +struct BusAddressAllocator { + pci_base_64: u64, + pci_base_32: u32, + // pci_base_io: u16, + host_base_64: PhysicalAddress, + host_base_32: PhysicalAddress, + // host_base_io: PhysicalAddress, + size_64: usize, + size_32: usize, + // size_io: usize, + offset_64: u64, + offset_32: u32, +} + +#[cfg_attr(target_arch = "x86_64", allow(dead_code))] +impl BusAddressAllocator { + pub fn from_ranges(ranges: &[PciAddressRange]) -> Self { + let mut range_32 = None; + let mut range_64 = None; + // let mut range_io = None; + + for range in ranges { + let range_val = (range.pci_base, range.host_base, range.size); + match range.ty { + // PciRangeType::Io if range_io.is_none() => { + // range_io.replace(range_val); + // } + PciRangeType::Memory32 if range_32.is_none() => { + range_32.replace(range_val); + } + PciRangeType::Memory64 if range_64.is_none() => { + range_64.replace(range_val); + } + _ => (), + } + } + + let (pci_base_32, host_base_32, size_32) = range_32.unwrap(); + let (pci_base_64, host_base_64, size_64) = range_64.unwrap(); + // let (pci_base_io, host_base_io, size_io) = range_io.unwrap(); + + Self { + pci_base_64, + pci_base_32: pci_base_32.try_into().unwrap(), + // pci_base_io: pci_base_io.try_into().unwrap(), + host_base_64, + host_base_32, + // host_base_io, + size_64, + size_32, + // size_io, + offset_64: 0, + offset_32: 0, + } + } + + pub fn allocate(&mut self, ty: PciRangeType, size: usize) -> (PciBaseAddress, PhysicalAddress) { + match ty { + PciRangeType::Io => todo!(), + PciRangeType::Memory32 => { + if self.offset_32 as usize + size >= self.size_32 { + todo!(); + } + let bar = PciBaseAddress::Memory32(self.pci_base_32 + self.offset_32); + let host = self.host_base_32.add(self.offset_32 as usize); + self.offset_32 += size as u32; + (bar, host) + } + PciRangeType::Memory64 => { + if self.offset_64 as usize + size >= self.size_64 { + todo!(); + } + let bar = PciBaseAddress::Memory64(self.pci_base_64 + self.offset_64); + let host = self.host_base_64.add(self.offset_64 as usize); + self.offset_64 += size as u64; + (bar, host) + } + PciRangeType::Configuration => unimplemented!(), + } + } +} + +#[derive(Debug)] +pub struct PciSegmentInfo { + pub segment_number: u8, + pub bus_number_start: u8, + pub bus_number_end: u8, + pub ecam_phys_base: Option, + + pub irq_translation_map: BTreeMap, + pub has_msi: bool, +} + +/// Represents a single PCIe bus segment +pub struct PciBusSegment { + allocator: Option, + info: Arc, + devices: Vec, +} + +pub enum PciRangeType { + Configuration, + Io, + Memory32, + Memory64, +} + +pub struct PciAddressRange { + pub ty: PciRangeType, + pub bus_number: u8, + pub pci_base: u64, + pub host_base: PhysicalAddress, + pub size: usize, +} + +/// Manager struct to store and control all PCI devices in the system +pub struct PciBusManager { + segments: Vec, +} + +impl PciBaseAddress { + pub fn as_memory(self) -> Option { + match self { + Self::Memory32(address) => Some(PhysicalAddress::from_raw(address as u64)), + Self::Memory64(address) => Some(PhysicalAddress::from_raw(address)), + _ => None, + } + } +} + +impl PciBusSegment { + fn probe_config_space(&self, address: PciAddress) -> Result, Error> { + match self.info.ecam_phys_base { + Some(ecam_phys_base) => Ok(unsafe { + PciEcam::probe_raw_parts(ecam_phys_base, self.info.bus_number_start, address)? + } + .map(PciConfigSpace::Ecam)), + None => todo!(), + } + } + + fn enumerate_function(&mut self, address: PciAddress) -> Result<(), Error> { + let Some(config) = self.probe_config_space(address)? else { + return Ok(()); + }; + + let header_type = config.header_type(); + + // Enumerate multi-function devices + if address.function == 0 && header_type & 0x80 != 0 { + for function in 1..8 { + self.enumerate_function(address.with_function(function))?; + } + } + + // PCI-to-PCI bridge + // if config.class_code() == 0x06 && config.subclass() == 0x04 { + // let secondary_bus = config.secondary_bus(); + // // TODO + // } + + if let Some(allocator) = self.allocator.as_mut() { + log::debug!("Remapping BARs for {}", address); + + // Find valid BARs + let mut i = 0; + let mut bar_mask = 0; + + while i < 6 { + let w0 = config.read_u32(0x10 + i * 4); + + let bar_width = match w0 & 1 == 0 { + // Memory BAR + true => match (w0 >> 1) & 3 { + // 32-bit BAR + 0 => 1, + // Reserved + 1 => unimplemented!(), + // 64-bit BAR + 2 => 2, + // Unknown + _ => unreachable!(), + }, + false => 1, + }; + + bar_mask |= 1 << i; + i += bar_width; + } + + for i in 0..6 { + if (1 << i) & bar_mask != 0 { + let orig_value = config.bar(i).unwrap(); + let size = unsafe { config.bar_size(i) }; + + if size != 0 { + log::debug!("BAR{}: size={:#x}", i, size); + + match orig_value { + PciBaseAddress::Io(_) => (), + PciBaseAddress::Memory64(_) => { + let (bar, host) = allocator.allocate(PciRangeType::Memory64, size); + let bar_address = bar.as_memory().unwrap(); + unsafe { + config.set_bar(i, bar); + } + log::debug!( + "Mapped BAR{} -> pci {:#x} host {:#x}", + i, + bar_address, + host + ); + // TODO Don't yet differentiate between Host/PCI addresses, lol + assert_eq!(bar_address, host); + } + PciBaseAddress::Memory32(_) => { + let (bar, host) = allocator.allocate(PciRangeType::Memory32, size); + let bar_address = bar.as_memory().unwrap(); + unsafe { + config.set_bar(i, bar); + } + log::debug!( + "Mapped BAR{} -> pci {:#x} host {:#x}", + i, + bar_address, + host + ); + // TODO Don't yet differentiate between Host/PCI addresses, lol + assert_eq!(bar_address, host); + } + } + } + } + } + } + + let info = PciDeviceInfo { + address, + segment: self.info.clone(), + config_space: config, + interrupt_config: Arc::new(OneTimeInit::new()), + }; + self.devices.push(PciBusDevice { info, driver: None }); + + Ok(()) + } + + fn enumerate_bus(&mut self, bus: u8) -> Result<(), Error> { + let address = PciAddress::for_bus(self.info.segment_number, bus); + + for i in 0..32 { + let device_address = address.with_device(i); + + self.enumerate_function(device_address)?; + } + + Ok(()) + } + + /// Enumerates the bus segment, placing found devices into the manager + pub fn enumerate(&mut self) -> Result<(), Error> { + for bus in self.info.bus_number_start..self.info.bus_number_end { + self.enumerate_bus(bus)?; + } + Ok(()) + } +} + +impl PciBusManager { + const fn new() -> Self { + Self { + segments: Vec::new(), + } + } + + /// Walks the bus device list and calls init/init_irq functions on any devices with associated + /// drivers + pub fn setup_bus_devices() -> Result<(), Error> { + log::info!("Setting up bus devices"); + Self::walk_bus_devices(|device| { + log::info!("Set up {}", device.info.address); + setup_bus_device(device)?; + Ok(true) + }) + } + + /// Iterates over the bus devices, calling the function on each of them until either an error + /// or `Ok(false)` is returned + pub fn walk_bus_devices Result>( + mut f: F, + ) -> Result<(), Error> { + let mut this = PCI_MANAGER.lock(); + + for segment in this.segments.iter_mut() { + for device in segment.devices.iter_mut() { + if !f(device)? { + return Ok(()); + } + } + } + + Ok(()) + } + + /// Enumerates a bus segment provided by ACPI MCFG table entry + #[cfg(target_arch = "x86_64")] + pub fn add_segment_from_mcfg(entry: &McfgEntry) -> Result<(), Error> { + let mut bus_segment = PciBusSegment { + info: Arc::new(PciSegmentInfo { + segment_number: entry.pci_segment_group as u8, + bus_number_start: entry.bus_number_start, + bus_number_end: entry.bus_number_end, + ecam_phys_base: Some(PhysicalAddress::from_raw(entry.base_address)), + + // TODO obtain this from ACPI SSDT + irq_translation_map: BTreeMap::new(), + has_msi: true, + }), + // Firmware done this for us + allocator: None, + + devices: Vec::new(), + }; + + let mut this = PCI_MANAGER.lock(); + bus_segment.enumerate()?; + this.segments.push(bus_segment); + + Ok(()) + } + + #[cfg(target_arch = "aarch64")] + pub fn add_segment_from_device_tree( + cfg_base: PhysicalAddress, + bus_range: core::ops::Range, + ranges: Vec, + interrupt_map: BTreeMap, + ) -> Result<(), Error> { + let mut bus_segment = PciBusSegment { + info: Arc::new(PciSegmentInfo { + segment_number: 0, + bus_number_start: bus_range.start, + bus_number_end: bus_range.end, + ecam_phys_base: Some(cfg_base), + + irq_translation_map: interrupt_map, + has_msi: false, + }), + allocator: Some(BusAddressAllocator::from_ranges(&ranges)), + + devices: Vec::new(), + }; + + let mut this = PCI_MANAGER.lock(); + bus_segment.enumerate()?; + this.segments.push(bus_segment); + + Ok(()) + } +} + +impl fmt::Display for PciAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}:{}", self.bus, self.device, self.function) + } +} + +impl PciAddress { + /// Constructs a [PciAddress] representing a bus + pub const fn for_bus(segment: u8, bus: u8) -> Self { + Self { + segment, + bus, + device: 0, + function: 0, + } + } + + /// Constructs a [PciAddress] representing a specific function + pub const fn for_function(segment: u8, bus: u8, device: u8, function: u8) -> Self { + Self { + segment, + bus, + device, + function, + } + } + + /// Constructs a [PciAddress] representing a device on a given bus + pub const fn with_device(self, device: u8) -> Self { + Self { + device, + function: 0, + ..self + } + } + + /// Constructs a [PciAddress] representing a function of a given bus device + pub const fn with_function(self, function: u8) -> Self { + Self { function, ..self } + } +} + +impl PciConfigurationSpace for PciConfigSpace { + fn read_u32(&self, offset: usize) -> u32 { + match self { + Self::Ecam(ecam) => ecam.read_u32(offset), + _ => todo!(), + } + } + + fn write_u32(&self, offset: usize, value: u32) { + match self { + Self::Ecam(ecam) => ecam.write_u32(offset, value), + _ => todo!(), + } + } +} + +fn setup_bus_device(device: &mut PciBusDevice) -> Result<(), Error> { + if device.driver.is_some() { + return Ok(()); + } + + let config = &device.info.config_space; + + log::debug!( + "{}: {:04x}:{:04x}", + device.info.address, + config.vendor_id(), + config.device_id() + ); + + let class = config.class_code(); + let subclass = config.subclass(); + let prog_if = config.prog_if(); + + let drivers = PCI_DRIVERS.lock(); + for driver in drivers.iter() { + if driver + .check + .check_device(&device.info, class, subclass, prog_if) + { + // TODO add the device to the bus + log::debug!(" -> {:?}", driver.name); + let instance = (driver.probe)(&device.info)?; + + unsafe { + instance.init()?; + } + + device.driver.replace(instance); + break; + } else { + log::debug!(" -> No driver"); + } + } + + Ok(()) +} + +impl PciMatch { + pub fn check_device(&self, info: &PciDeviceInfo, class: u8, subclass: u8, prog_if: u8) -> bool { + match self { + Self::Generic(f) => f(info), + &Self::Vendor(vendor_, device_) => { + info.config_space.vendor_id() == vendor_ && info.config_space.device_id() == device_ + } + &Self::Class(class_, Some(subclass_), Some(prog_if_)) => { + class_ == class && subclass_ == subclass && prog_if_ == prog_if + } + &Self::Class(class_, Some(subclass_), _) => class_ == class && subclass_ == subclass, + &Self::Class(class_, _, _) => class_ == class, + } + } +} + +pub fn register_class_driver( + name: &'static str, + class: u8, + subclass: Option, + prog_if: Option, + probe: fn(&PciDeviceInfo) -> Result<&'static dyn Device, Error>, +) { + PCI_DRIVERS.lock().push(PciDriver { + name, + check: PciMatch::Class(class, subclass, prog_if), + probe, + }); +} + +pub fn register_vendor_driver( + name: &'static str, + vendor_id: u16, + device_id: u16, + probe: fn(&PciDeviceInfo) -> Result<&'static dyn Device, Error>, +) { + PCI_DRIVERS.lock().push(PciDriver { + name, + check: PciMatch::Vendor(vendor_id, device_id), + probe, + }); +} + +pub fn register_generic_driver( + name: &'static str, + check: fn(&PciDeviceInfo) -> bool, + probe: fn(&PciDeviceInfo) -> Result<&'static dyn Device, Error>, +) { + PCI_DRIVERS.lock().push(PciDriver { + name, + check: PciMatch::Generic(check), + probe, + }); +} + +static PCI_DRIVERS: IrqSafeSpinlock> = IrqSafeSpinlock::new(Vec::new()); +static PCI_MANAGER: IrqSafeSpinlock = IrqSafeSpinlock::new(PciBusManager::new()); diff --git a/kernel/driver/bus/pci/src/space/ecam.rs b/kernel/driver/bus/pci/src/space/ecam.rs new file mode 100644 index 00000000..15ab2744 --- /dev/null +++ b/kernel/driver/bus/pci/src/space/ecam.rs @@ -0,0 +1,60 @@ +//! PCI Express ECAM interface +use libk_mm::{address::PhysicalAddress, device::DeviceMemoryMapping}; +use yggdrasil_abi::error::Error; + +use super::{PciAddress, PciConfigurationSpace}; + +/// PCI Express Enhanced Configuration Access Mechanism +#[derive(Debug, Clone)] +#[repr(transparent)] +pub struct PciEcam { + mapping: DeviceMemoryMapping, +} + +impl PciConfigurationSpace for PciEcam { + fn read_u32(&self, offset: usize) -> u32 { + assert_eq!(offset & 3, 0); + unsafe { ((self.mapping.address() + offset) as *const u32).read_volatile() } + } + + fn write_u32(&self, offset: usize, value: u32) { + assert_eq!(offset & 3, 0); + unsafe { ((self.mapping.address() + offset) as *mut u32).write_volatile(value) } + } +} + +impl PciEcam { + /// Maps the physical address of a ECAM space for kernel access. + /// + /// # Safety + /// + /// The `phys_addr` must be a valid ECAM address. The address must not alias any other mapped + /// regions. The address must be aligned to a 4KiB boundary and be valid for accesses within a + /// 4KiB-sized range. + pub unsafe fn map(phys_addr: PhysicalAddress) -> Result { + let mapping = DeviceMemoryMapping::map(phys_addr, 0x1000, Default::default())?; + Ok(Self { mapping }) + } + + /// Checks if the ECAM contains a valid device configuration space, mapping and returning a + /// [PciEcam] if it does. + /// + /// # Safety + /// + /// See [PciEcam::map]. + pub unsafe fn probe_raw_parts( + segment_phys_addr: PhysicalAddress, + bus_offset: u8, + address: PciAddress, + ) -> Result, Error> { + let phys_addr = segment_phys_addr.add( + ((address.bus - bus_offset) as usize * 256 + + address.device as usize * 8 + + address.function as usize) + * 0x1000, + ); + let this = Self::map(phys_addr)?; + + Ok(if this.is_valid() { Some(this) } else { None }) + } +} diff --git a/kernel/driver/bus/pci/src/space/mod.rs b/kernel/driver/bus/pci/src/space/mod.rs new file mode 100644 index 00000000..2f860926 --- /dev/null +++ b/kernel/driver/bus/pci/src/space/mod.rs @@ -0,0 +1,382 @@ +use alloc::sync::Arc; + +use super::{PciAddress, PciBaseAddress, PciCapability, PciCapabilityId, PciEcam}; +use crate::{device::PciInterruptPin, PciCommandRegister, PciStatusRegister}; + +pub(super) mod ecam; + +macro_rules! pci_config_field_getter { + ($self:ident, u32, $offset:expr) => { + $self.read_u32($offset) + }; + + ($self:ident, u16, $offset:expr) => { + $self.read_u16($offset) + }; + + ($self:ident, u8, $offset:expr) => { + $self.read_u8($offset) + }; +} + +macro_rules! pci_config_field_setter { + ($self:ident, u32, $offset:expr, $value:expr) => { + $self.write_u32($offset, $value) + }; + + ($self:ident, u16, $offset:expr, $value:expr) => {{ + $self.write_u16($offset, $value) + }}; + + ($self:ident, u8, $offset:expr, $value:expr) => { + $self.write_u8($offset, $value) + }; +} + +macro_rules! pci_config_field { + ( + $offset:expr => $ty:ident, + $(#[$getter_meta:meta])* $getter:ident + $(, $(#[$setter_meta:meta])* $setter:ident)? + ) => { + $(#[$getter_meta])* + fn $getter(&self) -> $ty { + pci_config_field_getter!(self, $ty, $offset) + } + + $( + $(#[$setter_meta])* + fn $setter(&self, value: $ty) { + pci_config_field_setter!(self, $ty, $offset, value) + } + )? + }; +} + +/// Provides access to the legacy (port I/O-driven) PCI configuration space +#[derive(Debug)] +#[repr(transparent)] +pub struct PciLegacyConfigurationSpace { + #[allow(unused)] + address: PciAddress, +} + +/// Describes a configuration space access method for a PCI device +#[derive(Debug, Clone)] +pub enum PciConfigSpace { + /// Legacy configuration space. + /// + /// See [PciLegacyConfigurationSpace]. + Legacy(PciAddress), + + /// Enhanced Configuration Access Mechanism (PCIe). + /// + /// See [PciEcam]. + Ecam(PciEcam), +} + +pub struct CapabilityIterator<'s, S: PciConfigurationSpace + ?Sized> { + space: &'s S, + current: Option, +} + +impl<'s, S: PciConfigurationSpace + ?Sized> Iterator for CapabilityIterator<'s, S> { + type Item = (PciCapabilityId, usize, usize); + + fn next(&mut self) -> Option { + let offset = self.current? & !0x3; + + let id = unsafe { core::mem::transmute(self.space.read_u8(offset)) }; + let len = self.space.read_u8(offset + 2); + let next_pointer = self.space.read_u8(offset + 1); + + self.current = if next_pointer != 0 { + Some(next_pointer as usize) + } else { + None + }; + + Some((id, offset, len as usize)) + } +} + +/// Interface for accessing the configuration space of a device +pub trait PciConfigurationSpace { + /// Reads a 32-bit value from the device configuration space. + /// + /// # Note + /// + /// The `offset` must be u32-aligned. + fn read_u32(&self, offset: usize) -> u32; + + /// Writes a 32-bit value to the device configuration space. + /// + /// # Note + /// + /// The `offset` must be u32-aligned. + fn write_u32(&self, offset: usize, value: u32); + + /// Reads a 16-bit value from the device configuration space. + /// + /// # Note + /// + /// The `offset` must be u16-aligned. + fn read_u16(&self, offset: usize) -> u16 { + assert_eq!(offset & 1, 0); + let value = self.read_u32(offset & !3); + (value >> ((offset & 3) * 8)) as u16 + } + + /// Reads a byte from the device configuration space + fn read_u8(&self, offset: usize) -> u8 { + let value = self.read_u32(offset & !3); + (value >> ((offset & 3) * 8)) as u8 + } + + /// Writes a 16-bit value to the device configuration space. + /// + /// # Note + /// + /// The `offset` must be u16-aligned. + fn write_u16(&self, offset: usize, value: u16) { + let shift = ((offset >> 1) & 1) << 4; + assert_eq!(offset & 1, 0); + let mut tmp = self.read_u32(offset & !3); + tmp &= !(0xFFFF << shift); + tmp |= (value as u32) << shift; + self.write_u32(offset & !3, tmp); + } + + /// Writes a byte to the device configuration space + fn write_u8(&self, _offset: usize, _value: u16) { + todo!() + } + + /// Returns `true` if the device is present on the bus (i.e. configuration space is not filled + /// with only 1's) + fn is_valid(&self) -> bool { + self.vendor_id() != 0xFFFF && self.device_id() != 0xFFFF + } + + pci_config_field!( + 0x00 => u16, + #[doc = "Returns the Vendor ID"] vendor_id + ); + pci_config_field!(0x02 => u16, + #[doc = "Returns the Device ID"] device_id + ); + pci_config_field!( + 0x04 => u16, + #[doc = "Returns the value of the command register"] command, + #[doc = "Writes to the command word register"] set_command + ); + pci_config_field!( + 0x06 => u16, + #[doc = "Returns the value of the status register"] status + ); + + pci_config_field!( + 0x08 => u8, + #[doc = "Returns the device Revision ID"] + rev_id + ); + pci_config_field!( + 0x09 => u8, + #[doc = "Returns the device Prog IF field"] + prog_if + ); + pci_config_field!( + 0x0A => u8, + #[doc = "Returns the device Subclass field"] + subclass + ); + pci_config_field!( + 0x0B => u8, + #[doc = "Returns the device Class Code field"] + class_code + ); + + // ... + pci_config_field!( + 0x0E => u8, + #[doc = "Returns the header type of the device"] + header_type + ); + pci_config_field!( + 0x19 => u8, + #[doc = r#" + Returns the secondary bus number associated with this device + + # Note + + The function is only valid for devices with `header_type() == 1` + "#] + secondary_bus + ); + pci_config_field!( + 0x34 => u8, + #[doc = + r"Returns the offset within the configuration space where the Capabilities List + is located. Only valid if the corresponding Status Register bit is set" + ] + capability_pointer + ); + + fn interrupt_pin(&self) -> Option { + PciInterruptPin::try_from(self.read_u8(0x3D) as u32).ok() + } + + unsafe fn bar_size(&self, index: usize) -> usize { + let cmd = self.command(); + + // Disable I/O and memory + self.set_command( + cmd & !(PciCommandRegister::ENABLE_IO | PciCommandRegister::ENABLE_MEMORY).bits(), + ); + + let orig_value = self.bar(index).unwrap(); + // TODO preserve prefetch bit + let mask_value = match orig_value { + PciBaseAddress::Io(_) => PciBaseAddress::Io(0xFFFC), + PciBaseAddress::Memory32(_) => PciBaseAddress::Memory32(0xFFFFFFF0), + PciBaseAddress::Memory64(_) => PciBaseAddress::Memory64(0xFFFFFFFFFFFFFFF0), + }; + self.set_bar(index, mask_value); + let new_value = self.bar(index).unwrap(); + + let size = match new_value { + PciBaseAddress::Io(address) if address != 0 => ((!address) + 1) as usize, + PciBaseAddress::Memory32(address) if address != 0 => ((!address) + 1) as usize, + PciBaseAddress::Memory64(address) if address != 0 => ((!address) + 1) as usize, + _ => 0, + }; + + self.set_bar(index, orig_value); + self.set_command(cmd); + + size + } + + /// Updates the value of the Base Address Register with given index. + /// + /// # Note + /// + /// The function is only valid for devices with `header_type() == 0` + /// + /// The `index` corresponds to the actual configuration space BAR index. + unsafe fn set_bar(&self, index: usize, value: PciBaseAddress) { + assert!(index < 6); + + match value { + PciBaseAddress::Io(value) => { + self.write_u32(0x10 + index * 4, ((value as u32) & !0x3) | 1) + } + PciBaseAddress::Memory32(address) => self.write_u32(0x10 + index * 4, address & !0xF), + PciBaseAddress::Memory64(address) => { + self.write_u32(0x10 + index * 4, ((address as u32) & !0xF) | (2 << 1)); + self.write_u32(0x10 + (index + 1) * 4, (address >> 32) as u32); + } + } + } + + /// Returns the value of the Base Address Register with given index. + /// + /// # Note + /// + /// The function is only valid for devices with `header_type() == 0` + /// + /// The `index` corresponds to the actual configuration space BAR index, i.e. if a 64-bit + /// address occupies [BAR0, BAR1] and BAR 1 is requested, the function will return [None]. + fn bar(&self, index: usize) -> Option { + assert!(index < 6); + + if index % 2 == 0 { + let w0 = self.read_u32(0x10 + index * 4); + + match w0 & 1 { + 0 => match (w0 >> 1) & 3 { + 0 => { + // 32-bit memory BAR + Some(PciBaseAddress::Memory32(w0 & !0xF)) + } + 2 => { + // 64-bit memory BAR + let w1 = self.read_u32(0x10 + (index + 1) * 4); + Some(PciBaseAddress::Memory64( + ((w1 as u64) << 32) | ((w0 as u64) & !0xF), + )) + } + _ => unimplemented!(), + }, + 1 => Some(PciBaseAddress::Io((w0 as u16) & !0x3)), + _ => unreachable!(), + } + } else { + let prev_w0 = self.read_u32(0x10 + (index - 1) * 4); + if prev_w0 & 0x7 == 0x4 { + // Previous BAR is 64-bit memory and this one is its continuation + return None; + } + + let w0 = self.read_u32(0x10 + index * 4); + + match w0 & 1 { + 0 => match (w0 >> 1) & 3 { + 0 => { + // 32-bit memory BAR + Some(PciBaseAddress::Memory32(w0 & !0xF)) + } + // TODO can 64-bit BARs not be on a 64-bit boundary? + 2 => todo!(), + _ => unimplemented!(), + }, + 1 => todo!(), + _ => unreachable!(), + } + } + } + + /// Returns an iterator over the PCI capabilities + fn capability_iter(&self) -> CapabilityIterator { + let status = PciStatusRegister::from_bits_retain(self.status()); + + let current = if status.contains(PciStatusRegister::CAPABILITIES_LIST) { + let ptr = self.capability_pointer() as usize; + + if ptr != 0 { + Some(self.capability_pointer() as usize) + } else { + None + } + } else { + // Return an empty iterator + None + }; + + CapabilityIterator { + space: self, + current, + } + } + + /// Locates a capability within this configuration space + fn capability(&self) -> Option> { + self.capability_iter().find_map(|(id, offset, len)| { + if id == C::ID && C::check(self, offset, len) { + Some(C::data(self, offset, len)) + } else { + None + } + }) + } +} + +impl PciConfigurationSpace for Arc { + fn read_u32(&self, offset: usize) -> u32 { + T::read_u32(self.as_ref(), offset) + } + + fn write_u32(&self, offset: usize, value: u32) { + T::write_u32(self.as_ref(), offset, value); + } +} diff --git a/kernel/driver/bus/usb/Cargo.toml b/kernel/driver/bus/usb/Cargo.toml new file mode 100644 index 00000000..b6b70c3a --- /dev/null +++ b/kernel/driver/bus/usb/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "ygg_driver_usb" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } +ygg_driver_input = { path = "../../input" } + +libk-util = { path = "../../../libk/libk-util" } +libk-mm = { path = "../../../libk/libk-mm" } +libk-thread = { path = "../../../libk/libk-thread" } + +log = "0.4.20" +bytemuck = { version = "1.14.0", features = ["derive"] } +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } diff --git a/kernel/driver/bus/usb/src/bus.rs b/kernel/driver/bus/usb/src/bus.rs new file mode 100644 index 00000000..9ac24c5e --- /dev/null +++ b/kernel/driver/bus/usb/src/bus.rs @@ -0,0 +1,63 @@ +use core::sync::atomic::{AtomicU16, Ordering}; + +use alloc::{collections::BTreeMap, sync::Arc}; +use libk_util::{queue::UnboundedMpmcQueue, sync::spin_rwlock::IrqSafeRwLock}; + +use crate::{ + class_driver, + device::{UsbBusAddress, UsbDeviceAccess}, + UsbHostController, +}; + +pub struct UsbBusManager { + busses: IrqSafeRwLock>, + devices: IrqSafeRwLock>>, + + last_bus_address: AtomicU16, +} + +impl UsbBusManager { + pub fn register_bus(hc: &'static dyn UsbHostController) -> u16 { + let i = BUS_MANAGER.last_bus_address.fetch_add(1, Ordering::AcqRel); + BUS_MANAGER.busses.write().insert(i, hc); + i + } + + pub fn register_device(device: Arc) { + BUS_MANAGER + .devices + .write() + .insert(device.bus_address(), device.clone()); + + QUEUE.push_back(device); + } + + pub fn detach_device(address: UsbBusAddress) { + if let Some(device) = BUS_MANAGER.devices.write().remove(&address) { + device.handle_detach(); + } + } +} + +pub async fn bus_handler() { + class_driver::register_default_class_drivers(); + + loop { + let new_device = QUEUE.pop_front().await; + log::info!( + "New {:?}-speed USB device connected: {}", + new_device.speed(), + new_device.bus_address() + ); + + class_driver::spawn_driver(new_device).await.ok(); + } +} + +static BUS_MANAGER: UsbBusManager = UsbBusManager { + busses: IrqSafeRwLock::new(BTreeMap::new()), + devices: IrqSafeRwLock::new(BTreeMap::new()), + + last_bus_address: AtomicU16::new(0), +}; +static QUEUE: UnboundedMpmcQueue> = UnboundedMpmcQueue::new(); diff --git a/kernel/driver/bus/usb/src/class_driver/mod.rs b/kernel/driver/bus/usb/src/class_driver/mod.rs new file mode 100644 index 00000000..7aba3634 --- /dev/null +++ b/kernel/driver/bus/usb/src/class_driver/mod.rs @@ -0,0 +1,297 @@ +use alloc::{sync::Arc, vec::Vec}; +use futures_util::future::BoxFuture; +use libk_thread::runtime; +use libk_util::sync::spin_rwlock::IrqSafeRwLock; + +use crate::{ + device::UsbDeviceAccess, + error::UsbError, + info::{UsbDeviceClass, UsbDeviceProtocol}, +}; + +pub struct UsbClassInfo { + pub class: UsbDeviceClass, + pub subclass: u8, + pub protocol: UsbDeviceProtocol, +} + +pub trait UsbDriver: Send + Sync { + fn name(&self) -> &'static str; + fn run( + self: Arc, + device: Arc, + ) -> BoxFuture<'static, Result<(), UsbError>>; + + fn probe(&self, class: &UsbClassInfo, device: &UsbDeviceAccess) -> bool; +} + +async fn extract_class_info(device: &UsbDeviceAccess) -> Result, UsbError> { + if device.num_configurations != 1 { + return Ok(None); + } + let device_info = &device.info; + let config_info = device.query_configuration_info(0).await?; + + if config_info.interfaces.len() == 1 { + let if_info = &config_info.interfaces[0]; + + let class = if device_info.device_class == UsbDeviceClass::FromInterface { + if_info.interface_class + } else { + device_info.device_class + }; + let subclass = if device_info.device_subclass == 0 { + if_info.interface_subclass + } else { + device_info.device_subclass + }; + let protocol = if device_info.device_protocol == UsbDeviceProtocol::FromInterface { + if_info.interface_protocol + } else { + device_info.device_protocol + }; + + Ok(Some(UsbClassInfo { + class, + subclass, + protocol, + })) + } else { + Ok(None) + } +} + +async fn pick_driver( + device: &UsbDeviceAccess, +) -> Result>, UsbError> { + let Some(class) = extract_class_info(&device).await? else { + return Ok(None); + }; + + for driver in USB_DEVICE_DRIVERS.read().iter() { + if driver.probe(&class, device) { + return Ok(Some(driver.clone())); + } + } + Ok(None) +} + +pub async fn spawn_driver(device: Arc) -> Result<(), UsbError> { + if let Some(driver) = pick_driver(&device).await? { + runtime::spawn(async move { + let name = driver.name(); + match driver.run(device).await { + e @ Err(UsbError::DeviceDisconnected) => { + log::warn!( + "Driver {:?} did not exit cleanly: device disconnected", + name, + ); + + e + } + e => e, + } + }) + .map_err(UsbError::SystemError)?; + } + Ok(()) +} + +pub fn register_driver(driver: Arc) { + // TODO check for duplicates + USB_DEVICE_DRIVERS.write().push(driver); +} + +pub fn register_default_class_drivers() { + register_driver(Arc::new(hid_keyboard::UsbHidKeyboardDriver)); +} + +static USB_DEVICE_DRIVERS: IrqSafeRwLock>> = + IrqSafeRwLock::new(Vec::new()); + +pub mod hid_keyboard { + use core::mem::MaybeUninit; + + use alloc::sync::Arc; + use futures_util::{future::BoxFuture, FutureExt}; + use libk_mm::PageBox; + use yggdrasil_abi::io::{KeyboardKey, KeyboardKeyEvent}; + + use crate::{device::UsbDeviceAccess, error::UsbError, info::UsbDeviceClass}; + + use super::{UsbClassInfo, UsbDriver}; + + pub struct UsbHidKeyboardDriver; + + const MODIFIER_MAP: &[KeyboardKey] = &[ + KeyboardKey::LControl, + KeyboardKey::LShift, + KeyboardKey::LAlt, + KeyboardKey::Unknown, + KeyboardKey::RControl, + KeyboardKey::RShift, + KeyboardKey::RAlt, + KeyboardKey::Unknown, + ]; + + #[derive(Default)] + struct KeyboardState { + state: [u64; 4], + mods: u8, + } + + impl KeyboardState { + pub fn new() -> Self { + Self::default() + } + + pub fn translate_key(k: u8) -> KeyboardKey { + match k { + 4..=29 => KeyboardKey::Char(k - 4 + b'a'), + 30..=38 => KeyboardKey::Char(k - 30 + b'1'), + 39 => KeyboardKey::Char(b'0'), + + 40 => KeyboardKey::Enter, + 41 => KeyboardKey::Escape, + 42 => KeyboardKey::Backspace, + 43 => KeyboardKey::Tab, + 44 => KeyboardKey::Char(b' '), + 45 => KeyboardKey::Char(b'-'), + 46 => KeyboardKey::Char(b'='), + 47 => KeyboardKey::Char(b'['), + 48 => KeyboardKey::Char(b']'), + 49 => KeyboardKey::Char(b'\\'), + 51 => KeyboardKey::Char(b';'), + 52 => KeyboardKey::Char(b'\''), + 53 => KeyboardKey::Char(b'`'), + 54 => KeyboardKey::Char(b','), + 55 => KeyboardKey::Char(b'.'), + 56 => KeyboardKey::Char(b'/'), + + 58..=69 => KeyboardKey::F(k - 58), + + _ => { + log::debug!("Unknown key: {}", k); + KeyboardKey::Unknown + } + } + } + + pub fn retain_modifiers( + &mut self, + m: u8, + events: &mut [MaybeUninit], + ) -> usize { + let mut count = 0; + let released = self.mods & !m; + for i in 0..8 { + if released & (1 << i) != 0 { + events[count].write(KeyboardKeyEvent::Released(MODIFIER_MAP[i])); + count += 1; + } + } + self.mods &= m; + count + } + + pub fn press_modifiers( + &mut self, + m: u8, + events: &mut [MaybeUninit], + ) -> usize { + let mut count = 0; + let pressed = m & !self.mods; + for i in 0..8 { + if pressed & (1 << i) != 0 { + events[count].write(KeyboardKeyEvent::Pressed(MODIFIER_MAP[i])); + count += 1; + } + } + self.mods = m; + count + } + + pub fn retain( + &mut self, + keys: &[u8], + events: &mut [MaybeUninit], + ) -> usize { + let mut count = 0; + for i in 1..256 { + if self.state[i / 64] & (1 << (i % 64)) != 0 { + if !keys.contains(&(i as u8)) { + events[count] + .write(KeyboardKeyEvent::Released(Self::translate_key(i as u8))); + self.state[i / 64] &= !(1 << (i % 64)); + count += 1; + } + } + } + count + } + + pub fn press( + &mut self, + keys: &[u8], + events: &mut [MaybeUninit], + ) -> usize { + let mut count = 0; + for &k in keys { + let index = (k as usize) / 64; + if self.state[index] & (1 << (k % 64)) == 0 { + self.state[index] |= 1 << (k % 64); + events[count].write(KeyboardKeyEvent::Pressed(Self::translate_key(k))); + count += 1; + } + } + count + } + } + + impl UsbDriver for UsbHidKeyboardDriver { + fn run( + self: Arc, + device: Arc, + ) -> BoxFuture<'static, Result<(), UsbError>> { + async move { + // TODO not sure whether to use boot protocol (easy) or GetReport + let config = device.select_configuration(|_| true).await?.unwrap(); + assert_eq!(config.endpoints.len(), 1); + + let pipe = device.open_interrupt_in_pipe(1).await?; + + let mut buffer = PageBox::new_slice(0, 8).map_err(UsbError::MemoryError)?; + let mut state = KeyboardState::new(); + let mut events = [MaybeUninit::uninit(); 16]; + + loop { + let mut event_count = 0; + + let data = pipe.read(&mut buffer).await?; + + event_count += state.retain_modifiers(data[0], &mut events); + event_count += state.press_modifiers(data[0], &mut events[event_count..]); + event_count += state.retain(&data[2..], &mut events[event_count..]); + event_count += state.press(&data[2..], &mut events[event_count..]); + + let events = + unsafe { MaybeUninit::slice_assume_init_ref(&events[..event_count]) }; + + for &event in events { + log::debug!("Generic Keyboard: {:?}", event); + ygg_driver_input::send_event(event); + } + } + } + .boxed() + } + + fn name(&self) -> &'static str { + "USB HID Keyboard" + } + + fn probe(&self, class: &UsbClassInfo, _device: &UsbDeviceAccess) -> bool { + class.class == UsbDeviceClass::Hid && class.subclass == 0x01 + } + } +} diff --git a/kernel/driver/bus/usb/src/communication.rs b/kernel/driver/bus/usb/src/communication.rs new file mode 100644 index 00000000..823a5702 --- /dev/null +++ b/kernel/driver/bus/usb/src/communication.rs @@ -0,0 +1,124 @@ +use core::{ + future::poll_fn, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Poll}, +}; + +use alloc::{sync::Arc, vec::Vec}; +use futures_util::task::AtomicWaker; +use libk_mm::address::PhysicalAddress; + +use crate::error::UsbError; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum UsbDirection { + Out, + In, +} + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct UsbTransferToken(pub u64); + +#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)] +#[repr(transparent)] +pub struct UsbTransferResult(pub u32); + +pub struct UsbTransferStatus { + pub result: AtomicU32, + pub notify: AtomicWaker, +} + +pub struct UsbControlTransfer { + pub id: UsbTransferToken, + pub length: usize, + pub direction: UsbDirection, + pub elements: Vec, + pub status: Arc, +} + +pub struct UsbInterruptTransfer { + pub address: PhysicalAddress, + pub length: usize, + pub direction: UsbDirection, + pub status: Arc, +} + +impl UsbDirection { + pub const fn is_device_to_host(self) -> bool { + matches!(self, UsbDirection::In) + } +} + +// TODO this is xHCI-specific +impl UsbTransferResult { + pub fn is_aborted(&self) -> bool { + self.0 == u32::MAX + } + + pub fn is_success(&self) -> bool { + (self.0 >> 24) & 0xFF == 1 + } + + pub fn sub_length(&self) -> usize { + (self.0 & 0xFFFFFF) as _ + } +} + +impl UsbControlTransfer { + pub async fn wait(&self) -> Result { + let sub_length = self.status.wait().await?; + Ok(self.length.saturating_sub(sub_length)) + } +} + +impl UsbInterruptTransfer { + pub async fn wait(&self) -> Result { + let sub_length = self.status.wait().await?; + Ok(self.length.saturating_sub(sub_length)) + } +} + +impl UsbTransferStatus { + pub fn new() -> Self { + Self { + result: AtomicU32::new(0), + notify: AtomicWaker::new(), + } + } + + pub(crate) async fn wait(&self) -> Result { + poll_fn(|cx| { + self.poll(cx).map(|v| { + if v.is_success() { + Ok(v.sub_length()) + } else if v.is_aborted() { + Err(UsbError::DeviceDisconnected) + } else { + Err(UsbError::TransferFailed) + } + }) + }) + .await + } + + pub fn signal(&self, status: u32) { + self.result.store(status, Ordering::Release); + self.notify.wake(); + } + + pub fn abort(&self) { + self.result.store(u32::MAX, Ordering::Release); + self.notify.wake(); + } + + pub fn poll(&self, cx: &mut Context<'_>) -> Poll { + self.notify.register(cx.waker()); + let value = self.result.load(Ordering::Acquire); + if value != 0 { + Poll::Ready(UsbTransferResult(value)) + } else { + Poll::Pending + } + } +} diff --git a/kernel/driver/bus/usb/src/descriptor.rs b/kernel/driver/bus/usb/src/descriptor.rs new file mode 100644 index 00000000..f99468ce --- /dev/null +++ b/kernel/driver/bus/usb/src/descriptor.rs @@ -0,0 +1,146 @@ +use bytemuck::{Pod, Zeroable}; + +use crate::{ + error::UsbError, + info::{UsbDeviceClass, UsbDeviceProtocol, UsbEndpointType}, + UsbDirection, +}; + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbDeviceDescriptor { + pub length: u8, + pub ty: u8, + pub bcd_usb: u16, + pub device_class: u8, + pub device_subclass: u8, + pub device_protocol: u8, + pub max_packet_size_0: u8, + pub id_vendor: u16, + pub id_product: u16, + pub bcd_device: u16, + pub manufacturer_str: u8, + pub product_str: u8, + pub serial_number_str: u8, + pub num_configurations: u8, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbConfigurationDescriptor { + pub length: u8, + pub ty: u8, + pub total_length: u16, + pub num_interfaces: u8, + pub config_val: u8, + pub config_str: u8, + pub attributes: u8, + pub max_power: u8, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbInterfaceDescriptor { + pub length: u8, + pub ty: u8, + pub interface_number: u8, + pub alternate_setting: u8, + pub num_endpoints: u8, + pub interface_class: u8, + pub interface_subclass: u8, + pub interface_protocol: u8, + pub interface_str: u8, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbEndpointDescriptor { + pub length: u8, + pub ty: u8, + pub endpoint_address: u8, + pub attributes: u8, + pub max_packet_size: u16, + pub interval: u8, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbDeviceQualifier { + pub length: u8, + pub ty: u8, + pub bcd_usb: u16, + pub device_class: u8, + pub device_subclass: u8, + pub device_protocol: u8, + pub max_packet_size_0: u8, + pub num_configurations: u8, + pub _reserved: u8, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C, packed)] +pub struct UsbOtherSpeedConfiguration { + pub length: u8, + pub ty: u8, + pub total_length: u16, + pub num_interfaces: u8, + pub config_val: u8, + pub config_str: u8, + pub attributes: u8, + pub max_power: u8, +} + +impl UsbInterfaceDescriptor { + pub fn class(&self) -> UsbDeviceClass { + UsbDeviceClass::try_from(self.interface_class).unwrap_or(UsbDeviceClass::Unknown) + } + + pub fn protocol(&self) -> UsbDeviceProtocol { + UsbDeviceProtocol::try_from(self.interface_protocol).unwrap_or(UsbDeviceProtocol::Unknown) + } +} + +impl UsbEndpointDescriptor { + pub fn direction(&self) -> UsbDirection { + match self.endpoint_address >> 7 { + 1 => UsbDirection::In, + 0 => UsbDirection::Out, + _ => unreachable!(), + } + } + + pub fn number(&self) -> u8 { + assert_ne!(self.endpoint_address & 0xF, 0); + self.endpoint_address & 0xF + } + + pub fn transfer_type(&self) -> UsbEndpointType { + match self.attributes & 0x3 { + 0 => UsbEndpointType::Control, + 1 => UsbEndpointType::Isochronous, + 2 => UsbEndpointType::Bulk, + 3 => UsbEndpointType::Interrupt, + _ => unreachable!(), + } + } +} + +impl UsbDeviceDescriptor { + pub fn class(&self) -> UsbDeviceClass { + UsbDeviceClass::try_from(self.device_class).unwrap_or(UsbDeviceClass::Unknown) + } + + pub fn protocol(&self) -> UsbDeviceProtocol { + UsbDeviceProtocol::try_from(self.device_protocol).unwrap_or(UsbDeviceProtocol::Unknown) + } + + pub fn max_packet_size(&self) -> Result { + match self.max_packet_size_0 { + 8 => Ok(8), + 16 => Ok(16), + 32 => Ok(32), + 64 => Ok(64), + _ => Err(UsbError::InvalidDescriptorField), + } + } +} diff --git a/kernel/driver/bus/usb/src/device.rs b/kernel/driver/bus/usb/src/device.rs new file mode 100644 index 00000000..693221f8 --- /dev/null +++ b/kernel/driver/bus/usb/src/device.rs @@ -0,0 +1,204 @@ +use core::{fmt, ops::Deref}; + +use alloc::{boxed::Box, vec::Vec}; +use futures_util::future::BoxFuture; +use libk_mm::PageBox; +use libk_util::sync::spin_rwlock::{IrqSafeRwLock, IrqSafeRwLockReadGuard}; + +use crate::{ + error::UsbError, + info::{UsbConfigurationInfo, UsbDeviceInfo, UsbEndpointInfo, UsbInterfaceInfo}, + pipe::{ + control::{ConfigurationDescriptorEntry, UsbControlPipeAccess}, + interrupt::UsbInterruptInPipeAccess, + }, + UsbHostController, +}; + +// High-level structures for info provided through descriptors + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub struct UsbBusAddress { + pub bus: u16, + pub device: u8, +} + +pub struct UsbDeviceAccess { + pub device: Box, + pub info: UsbDeviceInfo, + pub num_configurations: u8, + pub current_configuration: IrqSafeRwLock>, +} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug)] +pub enum UsbSpeed { + Low, + Full, + High, + Super, +} + +#[allow(unused)] +pub trait UsbDevice: Send + Sync { + // Endpoint "0" + fn control_pipe(&self) -> &UsbControlPipeAccess; + + fn open_interrupt_in_pipe<'a>( + &'a self, + number: u8, + ) -> BoxFuture> { + unimplemented!() + } + + fn port_number(&self) -> u8; + fn bus_address(&self) -> UsbBusAddress; + fn speed(&self) -> UsbSpeed; + fn controller(&self) -> &'static dyn UsbHostController; + + fn handle_detach(&self); + + fn debug(&self) {} +} + +impl UsbDeviceAccess { + /// Expected device state: + /// + /// * Link-layer stuff has been reset and established properly by the HCD + /// * Device is not yet configured + /// * Control pipe for the device has been properly set up + /// * Device has been assigned a bus address + pub async fn setup(raw: Box) -> Result { + let control = raw.control_pipe(); + let mut string_buffer = PageBox::new_uninit().map_err(UsbError::MemoryError)?; + + let device_desc = control.query_device_descriptor().await?; + + let manufacturer = control + .query_string(device_desc.manufacturer_str, &mut string_buffer) + .await?; + let product = control + .query_string(device_desc.product_str, &mut string_buffer) + .await?; + + let info = UsbDeviceInfo { + manufacturer, + product, + + id_vendor: device_desc.id_vendor, + id_product: device_desc.id_product, + + device_class: device_desc.class(), + device_subclass: device_desc.device_subclass, + device_protocol: device_desc.protocol(), + + max_packet_size: device_desc.max_packet_size()?, + }; + + Ok(Self { + device: raw, + info, + num_configurations: device_desc.num_configurations, + current_configuration: IrqSafeRwLock::new(None), + }) + } + + pub fn read_current_configuration( + &self, + ) -> IrqSafeRwLockReadGuard> { + self.current_configuration.read() + } + + pub async fn select_configuration bool>( + &self, + predicate: F, + ) -> Result, UsbError> { + let mut current_config = self.current_configuration.write(); + let control_pipe = self.control_pipe(); + + for i in 0..self.num_configurations { + let info = self.query_configuration_info(i).await?; + + if predicate(&info) { + log::debug!("Selected configuration: {:#?}", info); + let config = current_config.insert(info); + + control_pipe + .set_configuration(config.config_value as _) + .await?; + + return Ok(Some(config.clone())); + } + } + + Ok(None) + } + + pub async fn query_configuration_info( + &self, + index: u8, + ) -> Result { + if index >= self.num_configurations { + return Err(UsbError::InvalidConfiguration); + } + let mut string_buffer = PageBox::new_uninit().map_err(UsbError::MemoryError)?; + let control_pipe = self.control_pipe(); + let query = control_pipe.query_configuration_descriptor(index).await?; + + let configuration_name = control_pipe + .query_string(query.configuration().config_str, &mut string_buffer) + .await?; + + let mut endpoints = Vec::new(); + let mut interfaces = Vec::new(); + + for desc in query.descriptors() { + match desc { + ConfigurationDescriptorEntry::Endpoint(ep) => { + endpoints.push(UsbEndpointInfo { + number: ep.number(), + direction: ep.direction(), + max_packet_size: ep.max_packet_size as _, + ty: ep.transfer_type(), + }); + } + ConfigurationDescriptorEntry::Interface(iface) => { + let name = control_pipe + .query_string(iface.interface_str, &mut string_buffer) + .await?; + interfaces.push(UsbInterfaceInfo { + name, + number: iface.interface_number, + + interface_class: iface.class(), + interface_subclass: iface.interface_subclass, + interface_protocol: iface.protocol(), + }); + } + _ => (), + } + } + + let info = UsbConfigurationInfo { + name: configuration_name, + config_value: query.configuration().config_val, + interfaces, + endpoints, + }; + + Ok(info) + } +} + +impl Deref for UsbDeviceAccess { + type Target = dyn UsbDevice; + + fn deref(&self) -> &Self::Target { + &*self.device + } +} + +impl fmt::Display for UsbBusAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{}:{}", self.bus, self.device) + } +} diff --git a/kernel/driver/bus/usb/src/error.rs b/kernel/driver/bus/usb/src/error.rs new file mode 100644 index 00000000..a4cc18e9 --- /dev/null +++ b/kernel/driver/bus/usb/src/error.rs @@ -0,0 +1,30 @@ +use yggdrasil_abi::error::Error; + +#[derive(Debug)] +pub enum UsbError { + /// Could not allocate memory for some device structure + MemoryError(Error), + /// Other system errors + SystemError(Error), + // HC-side init stage errors + OutOfAddresses, + HostControllerCommandFailed(u8), + PortResetFailed, + PortInitFailed, + // Setup stage errors + InvalidConfiguration, + InvalidDescriptorField, + // Runtime errors + DeviceBusy, + DeviceDisconnected, + TransferFailed, +} + +impl From for Error { + fn from(value: UsbError) -> Self { + match value { + UsbError::MemoryError(e) => e, + _ => Error::InvalidOperation, + } + } +} diff --git a/kernel/driver/bus/usb/src/info.rs b/kernel/driver/bus/usb/src/info.rs new file mode 100644 index 00000000..8e079bb3 --- /dev/null +++ b/kernel/driver/bus/usb/src/info.rs @@ -0,0 +1,84 @@ +use alloc::{string::String, vec::Vec}; +use yggdrasil_abi::primitive_enum; + +use crate::UsbDirection; + +#[derive(Debug, Clone, Copy)] +pub enum UsbEndpointType { + Control, + Isochronous, + Bulk, + Interrupt, +} + +#[derive(Debug, Clone, Copy)] +pub enum UsbSyncType { + NoSync, + Async, + Adaptive, + Sync, +} + +#[derive(Debug)] +pub enum UsbUsageType { + Data, + Feedback, + ImplicitFeedbackData, + Reserved, +} + +primitive_enum! { + pub enum UsbDeviceClass: u8 { + FromInterface = 0x00, + Hid = 0x03, + Unknown = 0xFF, + } +} + +primitive_enum! { + pub enum UsbDeviceProtocol: u8 { + FromInterface = 0x00, + Unknown = 0xFF, + } +} + +#[derive(Debug, Clone)] +pub struct UsbInterfaceInfo { + pub name: String, + pub number: u8, + + pub interface_class: UsbDeviceClass, + pub interface_subclass: u8, + pub interface_protocol: UsbDeviceProtocol, +} + +#[derive(Debug, Clone)] +pub struct UsbEndpointInfo { + pub number: u8, + pub direction: UsbDirection, + pub max_packet_size: usize, + pub ty: UsbEndpointType, +} + +#[derive(Debug, Clone)] +pub struct UsbConfigurationInfo { + pub name: String, + pub config_value: u8, + pub interfaces: Vec, + pub endpoints: Vec, +} + +#[derive(Debug, Clone)] +pub struct UsbDeviceInfo { + pub manufacturer: String, + pub product: String, + + pub id_vendor: u16, + pub id_product: u16, + + pub device_class: UsbDeviceClass, + pub device_subclass: u8, + pub device_protocol: UsbDeviceProtocol, + + pub max_packet_size: usize, +} diff --git a/kernel/driver/bus/usb/src/lib.rs b/kernel/driver/bus/usb/src/lib.rs new file mode 100644 index 00000000..0a4e6d7f --- /dev/null +++ b/kernel/driver/bus/usb/src/lib.rs @@ -0,0 +1,21 @@ +#![no_std] +#![feature(iter_array_chunks, maybe_uninit_slice)] + +extern crate alloc; + +pub mod bus; +pub mod communication; +pub mod descriptor; +pub mod device; +pub mod error; +pub mod info; +pub mod pipe; +pub mod util; + +pub mod class_driver; + +pub use communication::{UsbControlTransfer, UsbDirection, UsbTransferStatus, UsbTransferToken}; + +pub trait UsbEndpoint {} + +pub trait UsbHostController {} diff --git a/kernel/driver/bus/usb/src/pipe/control.rs b/kernel/driver/bus/usb/src/pipe/control.rs new file mode 100644 index 00000000..2d68d165 --- /dev/null +++ b/kernel/driver/bus/usb/src/pipe/control.rs @@ -0,0 +1,326 @@ +use core::{ + cmp::Ordering, + mem::{size_of, MaybeUninit}, + ops::Deref, +}; + +use alloc::{boxed::Box, string::String}; +use bytemuck::{Pod, Zeroable}; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; + +use crate::{ + descriptor::{ + UsbConfigurationDescriptor, UsbDeviceDescriptor, UsbDeviceQualifier, UsbEndpointDescriptor, + UsbInterfaceDescriptor, UsbOtherSpeedConfiguration, + }, + error::UsbError, + UsbControlTransfer, UsbDirection, +}; + +use super::UsbGenericPipe; + +#[derive(Debug)] +pub struct ControlTransferSetup { + pub bm_request_type: u8, + pub b_request: u8, + pub w_value: u16, + pub w_index: u16, + pub w_length: u16, +} + +#[derive(Clone, Copy, Debug, Default, Pod, Zeroable)] +#[repr(C)] +pub struct SetConfiguration; + +pub trait UsbDeviceRequest: Sized + Pod { + const BM_REQUEST_TYPE: u8; + const B_REQUEST: u8; +} + +pub trait UsbDescriptorRequest: UsbDeviceRequest { + const DESCRIPTOR_TYPE: u8; +} + +impl UsbDescriptorRequest for UsbDeviceDescriptor { + const DESCRIPTOR_TYPE: u8 = 1; +} + +impl UsbDescriptorRequest for UsbConfigurationDescriptor { + const DESCRIPTOR_TYPE: u8 = 2; +} + +impl UsbDescriptorRequest for UsbInterfaceDescriptor { + const DESCRIPTOR_TYPE: u8 = 4; +} + +impl UsbDeviceRequest for SetConfiguration { + const BM_REQUEST_TYPE: u8 = 0; + const B_REQUEST: u8 = 0x09; +} + +impl UsbDeviceRequest for U { + const BM_REQUEST_TYPE: u8 = 0b10000000; + const B_REQUEST: u8 = 0x06; +} + +fn decode_usb_string(bytes: &[u8]) -> Result { + if bytes.len() % 2 != 0 { + return Err(UsbError::InvalidDescriptorField); + } + + char::decode_utf16( + bytes + .into_iter() + .array_chunks::<2>() + .map(|[&a, &b]| u16::from_le_bytes([a, b])), + ) + .collect::>() + .map_err(|_| UsbError::InvalidDescriptorField) +} + +// Pipe impl + +pub trait UsbControlPipe: UsbGenericPipe + Send + Sync { + fn start_transfer( + &self, + setup: ControlTransferSetup, + data: Option<(PhysicalAddress, usize, UsbDirection)>, + ) -> Result; + + fn complete_transfer(&self, transfer: UsbControlTransfer); +} + +pub struct UsbControlPipeAccess(pub Box); + +fn input_buffer( + data: &mut PageBox>, +) -> (PhysicalAddress, usize, UsbDirection) { + ( + unsafe { data.as_physical_address() }, + size_of::(), + UsbDirection::In, + ) +} + +#[derive(Debug)] +pub enum ConfigurationDescriptorEntry<'a> { + Configuration(&'a UsbConfigurationDescriptor), + Interface(&'a UsbInterfaceDescriptor), + Endpoint(&'a UsbEndpointDescriptor), + DeviceQualifier(&'a UsbDeviceQualifier), + OtherSpeed(&'a UsbOtherSpeedConfiguration), + Other, +} + +pub struct ConfigurationDescriptorIter<'a> { + buffer: &'a PageBox<[u8]>, + offset: usize, +} + +pub struct ConfigurationDescriptorQuery { + buffer: PageBox<[u8]>, +} + +impl<'a> Iterator for ConfigurationDescriptorIter<'a> { + type Item = ConfigurationDescriptorEntry<'a>; + + fn next(&mut self) -> Option { + if self.offset + 2 >= self.buffer.len() { + return None; + } + + let desc_len = self.buffer[self.offset] as usize; + let desc_ty = self.buffer[self.offset + 1]; + + if desc_len == 0 { + return None; + } + + let entry = match desc_ty { + 0x02 if desc_len == size_of::() => { + ConfigurationDescriptorEntry::Configuration(bytemuck::from_bytes( + &self.buffer[self.offset..self.offset + desc_len], + )) + } + 0x04 if desc_len == size_of::() => { + ConfigurationDescriptorEntry::Interface(bytemuck::from_bytes( + &self.buffer[self.offset..self.offset + desc_len], + )) + } + 0x05 if desc_len == size_of::() => { + ConfigurationDescriptorEntry::Endpoint(bytemuck::from_bytes( + &self.buffer[self.offset..self.offset + desc_len], + )) + } + 0x07 if desc_len == size_of::() => { + ConfigurationDescriptorEntry::OtherSpeed(bytemuck::from_bytes( + &self.buffer[self.offset..self.offset + desc_len], + )) + } + _ => ConfigurationDescriptorEntry::Other, + }; + + self.offset += desc_len; + + Some(entry) + } +} + +impl ConfigurationDescriptorQuery { + pub fn configuration(&self) -> &UsbConfigurationDescriptor { + bytemuck::from_bytes(&self.buffer[..size_of::()]) + } + + pub fn descriptors(&self) -> ConfigurationDescriptorIter<'_> { + ConfigurationDescriptorIter { + buffer: &self.buffer, + offset: 0, + } + } +} + +impl UsbControlPipeAccess { + pub async fn perform_value_control( + &self, + setup: ControlTransferSetup, + buffer: Option<(PhysicalAddress, usize, UsbDirection)>, + ) -> Result<(), UsbError> { + let transfer = self.start_transfer(setup, buffer)?; + transfer.status.wait().await?; + self.complete_transfer(transfer); + Ok(()) + } + + async fn fill_configuation_descriptor( + &self, + index: u8, + buffer: &mut PageBox<[MaybeUninit]>, + ) -> Result<(), UsbError> { + self.perform_value_control( + ControlTransferSetup { + bm_request_type: 0b10000000, + b_request: 0x06, + w_value: 0x200 | (index as u16), + w_index: 0, + w_length: buffer.len().try_into().unwrap(), + }, + Some(( + unsafe { buffer.as_physical_address() }, + buffer.len(), + UsbDirection::In, + )), + ) + .await + } + + pub async fn query_configuration_descriptor( + &self, + index: u8, + ) -> Result { + // First, query the real length of the descriptor + let mut buffer = PageBox::new_uninit_slice(size_of::()) + .map_err(UsbError::MemoryError)?; + self.fill_configuation_descriptor(index, &mut buffer) + .await?; + let buffer = unsafe { PageBox::assume_init_slice(buffer) }; + + let desc: &UsbConfigurationDescriptor = bytemuck::from_bytes(&buffer); + let total_len = desc.total_length as usize; + + // Return if everything's ready at this point + match total_len.cmp(&size_of::()) { + Ordering::Less => todo!(), + Ordering::Equal => return Ok(ConfigurationDescriptorQuery { buffer }), + _ => (), + } + + // Otherwise, query the rest of the data + let mut buffer = PageBox::new_uninit_slice(total_len).map_err(UsbError::MemoryError)?; + self.fill_configuation_descriptor(index, &mut buffer) + .await?; + let buffer = unsafe { PageBox::assume_init_slice(buffer) }; + + let desc: &UsbConfigurationDescriptor = + bytemuck::from_bytes(&buffer[..size_of::()]); + let total_len = desc.total_length as usize; + + if total_len != buffer.len() { + todo!(); + } + + Ok(ConfigurationDescriptorQuery { buffer }) + } + + pub async fn query_device_descriptor(&self) -> Result, UsbError> { + let mut output = PageBox::new_uninit().map_err(UsbError::MemoryError)?; + self.perform_value_control( + ControlTransferSetup { + bm_request_type: 0b10000000, + b_request: 0x06, + w_value: 0x100, + w_index: 0, + w_length: size_of::() as _, + }, + Some(input_buffer(&mut output)), + ) + .await?; + + Ok(unsafe { output.assume_init() }) + } + + pub async fn query_string( + &self, + index: u8, + buffer: &mut PageBox>, + ) -> Result { + self.perform_value_control( + ControlTransferSetup { + bm_request_type: 0b10000000, + b_request: 0x06, + w_value: 0x300 | (index as u16), + w_index: 0, + w_length: 4096, + }, + Some(input_buffer(buffer)), + ) + .await?; + let data = unsafe { buffer.assume_init_ref() }; + + let len = data[0] as usize; + + decode_usb_string(&data[2..len]) + } + + pub async fn perform_action( + &self, + w_value: u16, + w_index: u16, + ) -> Result<(), UsbError> { + self.perform_value_control( + ControlTransferSetup { + bm_request_type: D::BM_REQUEST_TYPE, + b_request: D::B_REQUEST, + w_value, + w_index, + w_length: 0, + }, + None, + ) + .await + } + + pub async fn set_configuration(&self, value: u16) -> Result<(), UsbError> { + self.perform_action::(value, 0).await + } +} + +impl Deref for UsbControlPipeAccess { + type Target = dyn UsbControlPipe; + + fn deref(&self) -> &Self::Target { + &*self.0 + } +} diff --git a/kernel/driver/bus/usb/src/pipe/interrupt.rs b/kernel/driver/bus/usb/src/pipe/interrupt.rs new file mode 100644 index 00000000..a474db1a --- /dev/null +++ b/kernel/driver/bus/usb/src/pipe/interrupt.rs @@ -0,0 +1,32 @@ +use core::ops::Deref; + +use alloc::boxed::Box; +use libk_mm::PageBox; + +use crate::{communication::UsbInterruptTransfer, error::UsbError}; + +use super::UsbGenericPipe; + +pub trait UsbInterruptInPipe: UsbGenericPipe + Send + Sync { + fn start_read(&self, buffer: &mut PageBox<[u8]>) -> Result; + fn complete_transfer(&self, transfer: UsbInterruptTransfer); +} + +pub struct UsbInterruptInPipeAccess(pub Box); + +impl UsbInterruptInPipeAccess { + pub async fn read<'a>(&self, buffer: &'a mut PageBox<[u8]>) -> Result<&'a [u8], UsbError> { + let transfer = self.start_read(buffer)?; + let len = transfer.wait().await?; + self.complete_transfer(transfer); + Ok(&buffer[..len]) + } +} + +impl Deref for UsbInterruptInPipeAccess { + type Target = dyn UsbInterruptInPipe; + + fn deref(&self) -> &Self::Target { + &*self.0 + } +} diff --git a/kernel/driver/bus/usb/src/pipe/mod.rs b/kernel/driver/bus/usb/src/pipe/mod.rs new file mode 100644 index 00000000..a45d7632 --- /dev/null +++ b/kernel/driver/bus/usb/src/pipe/mod.rs @@ -0,0 +1,8 @@ +pub mod control; +pub mod interrupt; + +pub trait UsbGenericPipe {} + +pub enum UsbPipe { + Control(control::UsbControlPipeAccess), +} diff --git a/kernel/driver/bus/usb/src/util.rs b/kernel/driver/bus/usb/src/util.rs new file mode 100644 index 00000000..bd3e09f9 --- /dev/null +++ b/kernel/driver/bus/usb/src/util.rs @@ -0,0 +1,64 @@ +use libk_util::sync::spin_rwlock::IrqSafeRwLock; + +use crate::error::UsbError; + +pub struct UsbAddressAllocator { + // 256 bits + bitmap: IrqSafeRwLock, +} + +struct Bitmap { + data: [u64; 4], +} + +impl Bitmap { + const fn bit_index(bit: u8) -> usize { + (bit / 64) as usize + } + + const fn bit_mask(bit: u8) -> u64 { + 1 << (bit % 64) + } + + pub const fn new() -> Self { + // First is 1 to prevent address 0 from being allocated + Self { data: [1, 0, 0, 0] } + } + + pub fn is_set(&self, bit: u8) -> bool { + self.data[Self::bit_index(bit)] & Self::bit_mask(bit) != 0 + } + + pub fn set(&mut self, bit: u8) { + self.data[Self::bit_index(bit)] |= Self::bit_mask(bit); + } + + pub fn clear(&mut self, bit: u8) { + self.data[Self::bit_index(bit)] &= !Self::bit_mask(bit); + } +} + +impl UsbAddressAllocator { + pub fn new() -> Self { + Self { + bitmap: IrqSafeRwLock::new(Bitmap::new()), + } + } + + pub fn allocate(&self) -> Result { + let mut bitmap = self.bitmap.write(); + for bit in 0..=255 { + if !bitmap.is_set(bit) { + bitmap.set(bit); + return Ok(bit); + } + } + Err(UsbError::OutOfAddresses) + } + + pub fn free(&self, address: u8) { + let mut bitmap = self.bitmap.write(); + assert!(bitmap.is_set(address)); + bitmap.clear(address); + } +} diff --git a/kernel/driver/fs/kernel-fs/Cargo.toml b/kernel/driver/fs/kernel-fs/Cargo.toml new file mode 100644 index 00000000..e6c25931 --- /dev/null +++ b/kernel/driver/fs/kernel-fs/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "kernel-fs" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +vfs = { path = "../../../lib/vfs" } +libk-util = { path = "../../../libk/libk-util" } + +ygg_driver_block = { path = "../../block/core" } + +log = "0.4.20" diff --git a/kernel/driver/fs/kernel-fs/src/devfs.rs b/kernel/driver/fs/kernel-fs/src/devfs.rs new file mode 100644 index 00000000..63c8e814 --- /dev/null +++ b/kernel/driver/fs/kernel-fs/src/devfs.rs @@ -0,0 +1,86 @@ +//! Device virtual file system +use core::sync::atomic::{AtomicUsize, Ordering}; + +use alloc::{format, string::String}; +use libk_util::OneTimeInit; +use vfs::{impls::MemoryDirectory, CharDevice, Node, NodeFlags, NodeRef}; +use ygg_driver_block::BlockDevice; +use yggdrasil_abi::error::Error; + +/// Describes the kind of a character device +#[derive(Debug)] +pub enum CharDeviceType { + /// Regular terminal + TtyRegular, + /// Serial terminal + TtySerial, +} + +static DEVFS_ROOT: OneTimeInit = OneTimeInit::new(); + +/// Sets up the device filesystem +pub fn init() { + let root = MemoryDirectory::empty(); + DEVFS_ROOT.init(root); +} + +/// Returns the root of the devfs. +/// +/// # Panics +/// +/// Will panic if the devfs hasn't yet been initialized. +pub fn root() -> &'static NodeRef { + DEVFS_ROOT.get() +} + +/// Adds a character device with a custom name +pub fn add_named_char_device(dev: &'static dyn CharDevice, name: String) -> Result<(), Error> { + log::info!("Add char device: {}", name); + + let node = Node::char(dev, NodeFlags::IN_MEMORY_PROPS); + + DEVFS_ROOT.get().add_child(name, node) +} + +/// Adds a block device with a custom name +pub fn add_named_block_device>( + dev: &'static dyn BlockDevice, + name: S, +) -> Result<(), Error> { + let name = name.into(); + log::info!("Add block device: {}", name); + + let node = Node::block(dev, NodeFlags::IN_MEMORY_PROPS); + + DEVFS_ROOT.get().add_child(name, node) +} + +pub fn add_block_device_partition>( + base_name: S, + index: usize, + partition: &'static dyn BlockDevice, +) -> Result<(), Error> { + let base_name = base_name.into(); + let name = format!("{}{}", base_name, index + 1); + log::info!("Add partition: {}", name); + + let node = Node::block(partition, NodeFlags::IN_MEMORY_PROPS); + + DEVFS_ROOT.get().add_child(name, node) +} + +/// Adds a character device to the devfs +pub fn add_char_device(dev: &'static dyn CharDevice, kind: CharDeviceType) -> Result<(), Error> { + static TTY_COUNT: AtomicUsize = AtomicUsize::new(0); + static TTYS_COUNT: AtomicUsize = AtomicUsize::new(0); + + let (count, prefix) = match kind { + CharDeviceType::TtyRegular => (&TTY_COUNT, "tty"), + CharDeviceType::TtySerial => (&TTYS_COUNT, "ttyS"), + }; + + let value = count.fetch_add(1, Ordering::AcqRel); + let name = format!("{}{}", prefix, value); + + add_named_char_device(dev, name) +} diff --git a/kernel/driver/fs/kernel-fs/src/lib.rs b/kernel/driver/fs/kernel-fs/src/lib.rs new file mode 100644 index 00000000..22b952ff --- /dev/null +++ b/kernel/driver/fs/kernel-fs/src/lib.rs @@ -0,0 +1,5 @@ +#![no_std] + +extern crate alloc; + +pub mod devfs; diff --git a/kernel/driver/fs/memfs/Cargo.toml b/kernel/driver/fs/memfs/Cargo.toml new file mode 100644 index 00000000..71f5250a --- /dev/null +++ b/kernel/driver/fs/memfs/Cargo.toml @@ -0,0 +1,19 @@ +[package] +name = "memfs" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../../libk/libk-util" } +vfs = { path = "../../../lib/vfs" } + +static_assertions = "1.1.0" +log = "0.4.20" + +[features] +default = [] +test-io = [] diff --git a/kernel/driver/fs/memfs/src/block.rs b/kernel/driver/fs/memfs/src/block.rs new file mode 100644 index 00000000..9510098f --- /dev/null +++ b/kernel/driver/fs/memfs/src/block.rs @@ -0,0 +1,357 @@ +//! Block management interfaces and structures +use core::{ + marker::PhantomData, + mem::{size_of, MaybeUninit}, + ops::{Deref, DerefMut}, + ptr::NonNull, +}; + +use yggdrasil_abi::error::Error; + +/// Number of bytes in a block +pub const SIZE: usize = 4096; +/// Maximum number of indirection pointers a block can hold +pub const ENTRY_COUNT: usize = SIZE / size_of::(); + +/// Interface for a block allocator +/// +/// # Safety +/// +/// This trait is unsafe to implement because it has to provide and accept raw data pointers of +/// exactly [SIZE]. +pub unsafe trait BlockAllocator: Send + Sync + 'static { + /// Allocates a contiguous block of size [SIZE] + fn alloc() -> Result, Error>; + + /// Dealocates a block. + /// + /// # Safety + /// + /// Unsafe: accepts arbitrary data pointers. + unsafe fn dealloc(block: NonNull); +} + +struct BlockRef<'a, A: BlockAllocator> { + ptr: usize, + _pd: PhantomData<&'a A>, +} + +#[repr(transparent)] +struct BlockRaw<'a, A: BlockAllocator> { + inner: BlockRef<'a, A>, // inner: Option<&'a mut [u8; SIZE]>, +} + +/// Block containing file data +#[repr(transparent)] +pub struct BlockData<'a, A: BlockAllocator> { + inner: BlockRaw<'a, A>, +} + +/// Block containing indirection pointers to other blocks +#[repr(transparent)] +pub struct BlockIndirect<'a, A: BlockAllocator> { + inner: BlockRaw<'a, A>, +} + +impl<'a, A: BlockAllocator> BlockRef<'a, A> { + const fn null() -> Self { + Self { + ptr: 0, + _pd: PhantomData, + } + } + + unsafe fn from_allocated(address: usize) -> Self { + debug_assert_eq!(address & 1, 0); + Self { + ptr: address, + _pd: PhantomData, + } + } + + unsafe fn copy_on_write(address: usize) -> Self { + debug_assert_eq!(address & 1, 0); + Self { + ptr: address | 1, + _pd: PhantomData, + } + } + + #[inline] + fn is_allocated(&self) -> bool { + self.ptr & 1 == 0 + } + + #[inline] + fn is_null(&self) -> bool { + self.ptr == 0 + } + + #[inline] + fn as_mut(&mut self) -> &'a mut [u8; SIZE] { + if self.is_null() { + panic!("Null block dereference"); + } + + // FIXME: if a non-full block has been marked as CoW, the function will overrun the file + // boundary + if !self.is_allocated() { + // Allocate the block + let ptr = A::alloc().expect("Could not allocate a block").as_ptr() as usize; + // Clone data + let src = self.as_ref(); + let dst = unsafe { core::slice::from_raw_parts_mut(ptr as *mut u8, SIZE) }; + + dst.copy_from_slice(src); + + self.ptr = ptr; + } + + unsafe { &mut *((self.ptr & !1) as *mut [u8; SIZE]) } + } + + #[inline] + fn as_ref(&self) -> &'a [u8; SIZE] { + if self.is_null() { + panic!("Null block dereference"); + } + + unsafe { &*((self.ptr & !1) as *const [u8; SIZE]) } + } +} + +impl<'a, A: BlockAllocator> Drop for BlockRef<'a, A> { + fn drop(&mut self) { + if self.is_allocated() && !self.is_null() { + unsafe { + A::dealloc(NonNull::new_unchecked(self.ptr as *mut _)); + } + } + } +} + +impl<'a, A: BlockAllocator> BlockRaw<'a, A> { + const fn null() -> Self { + Self { + inner: BlockRef::null(), + } + } + + fn new() -> Result { + let ptr = A::alloc()?; + unsafe { + Ok(Self { + inner: BlockRef::from_allocated(ptr.as_ptr() as _), + }) + } + } + + unsafe fn as_uninit_indirect_mut( + &mut self, + ) -> &'a mut [MaybeUninit>; ENTRY_COUNT] { + if self.inner.is_null() { + panic!("Null block dereference"); + } + + &mut *(self.inner.ptr as *mut _) + } + + #[inline] + unsafe fn as_data_ref(&self) -> &'a [u8; SIZE] { + self.inner.as_ref() + } + + #[inline] + unsafe fn as_data_mut(&mut self) -> &'a mut [u8; SIZE] { + self.inner.as_mut() + } + + #[inline] + fn is_null(&self) -> bool { + self.inner.is_null() + } +} + +// Data block +impl<'a, A: BlockAllocator> BlockData<'a, A> { + /// Dummy entry representing a missing block + pub const fn null() -> Self { + Self { + inner: BlockRaw::null(), + } + } + + /// Create a Copy-on-Write data block from existing data. + /// + /// # Safety + /// + /// This function is unsafe as it accepts arbitrary pointers. The caller must ensure the + /// address is properly aligned (at least to a u16 boundary), does not cross any device memory + /// and the pointer outlives the block reference. + pub unsafe fn copy_on_write(address: usize) -> Self { + Self { + inner: BlockRaw { + inner: BlockRef::copy_on_write(address), + }, + } + } + + /// Allocates a new block for data + pub fn new() -> Result { + Ok(Self { + inner: BlockRaw::new()?, + }) + } + + /// Replaces self with a null block and drops any data that might've been allocated + pub fn set_null(&mut self) { + self.inner = BlockRaw::null(); + } + + /// Returns `true` if the block this structure refers to has not yet been allocated + #[inline] + pub fn is_null(&self) -> bool { + self.inner.is_null() + } +} + +impl Deref for BlockData<'_, A> { + type Target = [u8; SIZE]; + + fn deref(&self) -> &Self::Target { + unsafe { self.inner.as_data_ref() } + } +} + +impl DerefMut for BlockData<'_, A> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { self.inner.as_data_mut() } + } +} + +// Indirect block +impl<'a, A: BlockAllocator> BlockIndirect<'a, A> { + /// Dummy entry representing a missing block + pub const fn null() -> Self { + Self { + inner: BlockRaw::null(), + } + } + + /// Allocates a new indirection block + pub fn new() -> Result { + let mut inner = BlockRaw::new()?; + for item in unsafe { inner.as_uninit_indirect_mut() } { + item.write(BlockData::null()); + } + Ok(Self { inner }) + } + + /// Returns `true` if the block this structure refers to has not yet been allocated + #[inline] + pub fn is_null(&self) -> bool { + self.inner.is_null() + } +} + +impl<'a, A: BlockAllocator> Deref for BlockIndirect<'a, A> { + type Target = [BlockData<'a, A>; ENTRY_COUNT]; + + fn deref(&self) -> &Self::Target { + unsafe { &*(self.inner.inner.as_ref() as *const _ as *const _) } + } +} + +impl<'a, A: BlockAllocator> DerefMut for BlockIndirect<'a, A> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *(self.inner.inner.as_mut() as *mut _ as *mut _) } + } +} + +impl<'a, A: BlockAllocator> Drop for BlockIndirect<'a, A> { + fn drop(&mut self) { + if self.is_null() { + return; + } + + for item in self.iter_mut() { + item.set_null(); + } + } +} + +#[cfg(test)] +mod tests { + use core::sync::atomic::Ordering; + use std::vec::Vec; + + use crate::block::{BlockData, BlockIndirect}; + + #[test] + fn block_indirect_allocation() { + test_allocator_with_counter!(A_COUNTER, A); + + const N: usize = 7; + const M: usize = 13; + + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + + { + let mut indirect = Vec::new(); + + // Allocate indirect blocks + { + for _ in 0..N { + indirect.push(BlockIndirect::::new().unwrap()); + } + } + + assert_eq!(A_COUNTER.load(Ordering::Acquire), N); + + // Allocate L1 indirection blocks + { + for l1_block in indirect.iter_mut() { + for i in 0..M { + let l0_block = BlockData::new().unwrap(); + + l1_block[i] = l0_block; + } + } + } + + // N * M data blocks and N indirection blocks + assert_eq!(A_COUNTER.load(Ordering::Acquire), N * M + N); + + // Drop 1 indirect block for test + indirect.pop(); + + assert_eq!(A_COUNTER.load(Ordering::Acquire), (N - 1) * M + (N - 1)); + } + + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + } + + #[test] + fn block_allocation() { + test_allocator_with_counter!(A_COUNTER, A); + + const N: usize = 13; + { + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + + { + let mut s = Vec::new(); + + for _ in 0..N { + let mut block = BlockData::::new().unwrap(); + block.fill(1); + s.push(block); + } + assert_eq!(A_COUNTER.load(Ordering::Acquire), N); + } + + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + } + } +} diff --git a/kernel/driver/fs/memfs/src/bvec.rs b/kernel/driver/fs/memfs/src/bvec.rs new file mode 100644 index 00000000..f6f91ad4 --- /dev/null +++ b/kernel/driver/fs/memfs/src/bvec.rs @@ -0,0 +1,483 @@ +//! Block vector management structures +use core::{ + cmp::Ordering, + mem::MaybeUninit, + ops::{Index, IndexMut}, +}; + +use yggdrasil_abi::error::Error; + +use crate::block::{self, BlockAllocator, BlockData, BlockIndirect}; + +// 16.125M total +const L0_BLOCKS: usize = 32; // 128K in L0 +const L1_BLOCKS: usize = 8; // 16M in L1 + +/// Block vector for efficient in-memory files +pub struct BVec<'a, A: BlockAllocator> { + capacity: usize, + size: usize, + l0: [BlockData<'a, A>; L0_BLOCKS], + l1: [BlockIndirect<'a, A>; L1_BLOCKS], +} + +impl<'a, A: BlockAllocator> BVec<'a, A> { + /// Creates an empty block vector. + /// + /// # Note + /// + /// The function is guaranteed to make no allocations before the vector is actually written to. + pub fn new() -> Self { + let mut l0 = MaybeUninit::uninit_array(); + let mut l1 = MaybeUninit::uninit_array(); + + for it in l0.iter_mut() { + it.write(BlockData::null()); + } + + for it in l1.iter_mut() { + it.write(BlockIndirect::null()); + } + + Self { + capacity: 0, + size: 0, + l0: unsafe { MaybeUninit::array_assume_init(l0) }, + l1: unsafe { MaybeUninit::array_assume_init(l1) }, + } + } + + /// Initializes the block vector with existing data, marking all blocks as Copy-on-Write + pub fn init_with_cow(&mut self, data: &'static [u8]) -> Result<(), Error> { + let data_ptr = data.as_ptr() as usize; + assert_eq!(data_ptr & 1, 0); + + let blocks = (data.len() + block::SIZE - 1) / block::SIZE; + self.resize(blocks)?; + + for i in 0..blocks { + let src = data_ptr + i * block::SIZE; + let block = unsafe { BlockData::copy_on_write(src) }; + self[i] = block; + } + + self.size = data.len(); + Ok(()) + } + + /// Returns the size of the data inside this vector + #[inline] + pub const fn size(&self) -> usize { + self.size + } + + fn grow_l1(&mut self, old_l1_cap: usize, new_l1_cap: usize) -> Result<(), Error> { + for i in old_l1_cap..new_l1_cap { + assert!(self.l1[i].is_null()); + self.l1[i] = BlockIndirect::new()?; + } + Ok(()) + } + + fn shrink_l1(&mut self, old_l1_cap: usize, new_l1_cap: usize) { + debug_assert!(new_l1_cap <= old_l1_cap); + for i in new_l1_cap..old_l1_cap { + assert!(!self.l1[i].is_null()); + self.l1[i] = BlockIndirect::null(); + } + } + + #[inline] + fn caps(cap: usize) -> (usize, usize) { + let l0_cap = core::cmp::min(cap, L0_BLOCKS); + let l1_cap = if cap > L0_BLOCKS { + core::cmp::min( + (cap - L0_BLOCKS + block::ENTRY_COUNT - 1) / block::ENTRY_COUNT, + L1_BLOCKS, + ) + } else { + 0 + }; + (l0_cap, l1_cap) + } + + /// Resizes the vector to hold exactly `new_capacity` data blocks + pub fn resize(&mut self, new_capacity: usize) -> Result<(), Error> { + // TODO handle L2 capacity + match new_capacity.cmp(&self.capacity) { + Ordering::Less => { + let (_, new_l1_cap) = Self::caps(new_capacity); + let (_, old_l1_cap) = Self::caps(self.capacity); + + // Shrink data blocks + for index in new_capacity..self.capacity { + let block = &mut self[index]; + assert!(!block.is_null()); + block.set_null(); + } + + // Shrink L1 blocks + self.shrink_l1(old_l1_cap, new_l1_cap); + } + + Ordering::Greater => { + let (_, new_l1_cap) = Self::caps(new_capacity); + let (_, old_l1_cap) = Self::caps(self.capacity); + + // Allocate L1 indirection blocks + assert!(new_l1_cap >= old_l1_cap); + if new_l1_cap > old_l1_cap { + self.grow_l1(old_l1_cap, new_l1_cap)?; + } + + // Grow data blocks + for index in self.capacity..new_capacity { + let block = unsafe { self.index_unchecked_mut(index) }; + assert!(block.is_null()); + *block = BlockData::new()?; + } + } + + Ordering::Equal => (), + } + + self.capacity = new_capacity; + Ok(()) + } + + fn ensure_write_capacity(&mut self, pos: usize, need_to_write: usize) -> Result<(), Error> { + let current_capacity = self.capacity; + let need_capacity = + (core::cmp::max(pos + need_to_write, self.size) + block::SIZE - 1) / block::SIZE; + + if need_capacity > current_capacity { + self.resize(need_capacity) + } else { + Ok(()) + } + } + + /// Writes data to the vector, growing it if needed + pub fn write(&mut self, pos: u64, data: &[u8]) -> Result { + let mut pos = pos as usize; + let mut rem = data.len(); + let mut doff = 0usize; + + self.ensure_write_capacity(pos, rem)?; + + if pos + rem > self.size { + self.size = pos + rem; + } + + while rem > 0 { + let index = pos / block::SIZE; + let offset = pos % block::SIZE; + let count = core::cmp::min(rem, block::SIZE - offset); + + let block = &mut self[index]; + + let dst = &mut block[offset..offset + count]; + let src = &data[doff..doff + count]; + dst.copy_from_slice(src); + + doff += count; + pos += count; + rem -= count; + } + + Ok(doff) + } + + /// Reads data from the vector + pub fn read(&self, pos: u64, data: &mut [u8]) -> Result { + let mut pos = pos as usize; + if pos > self.size { + return Err(Error::InvalidFile); + } + + let mut rem = core::cmp::min(self.size - pos, data.len()); + let mut doff = 0usize; + + while rem > 0 { + let index = pos / block::SIZE; + let offset = pos % block::SIZE; + let count = core::cmp::min(block::SIZE - offset, rem); + + let block = &self[index]; + + let src = &block[offset..offset + count]; + let dst = &mut data[doff..doff + count]; + + dst.copy_from_slice(src); + + doff += count; + pos += count; + rem -= count; + } + + Ok(doff) + } + + /// Resize the block vector to requested size + pub fn truncate(&mut self, new_size: u64) -> Result<(), Error> { + let new_size: usize = new_size.try_into().unwrap(); + let requested_capacity = (new_size + block::SIZE - 1) / block::SIZE; + self.resize(requested_capacity)?; + // TODO fill with zeros if resizing larger? + self.size = new_size; + Ok(()) + } + + unsafe fn index_unchecked(&self, mut index: usize) -> &BlockData<'a, A> { + if index < L0_BLOCKS { + return &self.l0[index]; + } + index -= L0_BLOCKS; + if index < L1_BLOCKS * block::ENTRY_COUNT { + let l1i = index / block::ENTRY_COUNT; + let l0i = index % block::ENTRY_COUNT; + + let l1r = &self.l1[l1i]; + assert!(!l1r.is_null()); + + return &l1r[l0i]; + } + + todo!(); + } + + unsafe fn index_unchecked_mut(&mut self, mut index: usize) -> &mut BlockData<'a, A> { + if index < L0_BLOCKS { + return &mut self.l0[index]; + } + index -= L0_BLOCKS; + if index < L1_BLOCKS * block::ENTRY_COUNT { + let l1i = index / block::ENTRY_COUNT; + let l0i = index % block::ENTRY_COUNT; + + let l1r = &mut self.l1[l1i]; + assert!(!l1r.is_null()); + + return &mut l1r[l0i]; + } + + todo!() + } +} + +impl<'a, A: BlockAllocator> Index for BVec<'a, A> { + type Output = BlockData<'a, A>; + + fn index(&self, index: usize) -> &Self::Output { + if index > self.capacity { + panic!( + "Block index out of bounds: capacity={}, index={}", + self.capacity, index + ); + } + + unsafe { self.index_unchecked(index) } + } +} + +impl<'a, A: BlockAllocator> IndexMut for BVec<'a, A> { + fn index_mut(&mut self, index: usize) -> &mut Self::Output { + if index > self.capacity { + panic!( + "Block index out of bounds: capacity={}, index={}", + self.capacity, index + ); + } + + unsafe { self.index_unchecked_mut(index) } + } +} + +impl<'a, A: BlockAllocator> TryFrom<&'static [u8]> for BVec<'a, A> { + type Error = Error; + + fn try_from(value: &'static [u8]) -> Result { + let mut res = Self::new(); + res.init_with_cow(value)?; + assert_eq!(res.size(), value.len()); + Ok(res) + } +} + +#[cfg(test)] +mod bvec_allocation { + use core::sync::atomic::Ordering; + + use crate::{ + block, + bvec::{BVec, L0_BLOCKS, L1_BLOCKS}, + }; + + #[test] + fn bvec_grow_shrink() { + test_allocator_with_counter!(A_COUNTER, A); + + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + + { + let mut bvec = BVec::::new(); + + assert_eq!( + A_COUNTER.load(Ordering::Acquire), + 0, + "BVec should not allocate on creation" + ); + + const N: usize = 123; + bvec.resize(N).unwrap(); + + // N data blocks (12 in L0 + 111 in L1) + assert_eq!(A_COUNTER.load(Ordering::Acquire), N + 1); + + // Test the index interface + for i in 0..N { + assert!(!bvec[i].is_null(), "Index {} must be allocated", i); + } + + // Test the data structure + for i in 0..L0_BLOCKS { + assert!(!bvec.l0[i].is_null()); + } + assert!(!bvec.l1[0].is_null()); + for i in L0_BLOCKS..N { + let l1i = (i - L0_BLOCKS) / block::ENTRY_COUNT; + let l0i = (i - L0_BLOCKS) % block::ENTRY_COUNT; + + let l1r = &bvec.l1[l1i]; + assert!(!l1r.is_null()); + assert!(!l1r[l0i].is_null()); + } + + for i in 1..L1_BLOCKS { + assert!(bvec.l1[i].is_null()); + } + + // Shrink to 100 blocks, test if L1 is still allocated + const M: usize = 100; + bvec.resize(M).unwrap(); + + assert_eq!(A_COUNTER.load(Ordering::Acquire), M + 1); + + // Test the index interface + for i in 0..M { + assert!(!bvec[i].is_null(), "Index {} must be allocated", i); + } + + // Test the data structure + for i in 0..L0_BLOCKS { + assert!(!bvec.l0[i].is_null()); + } + assert!(!bvec.l1[0].is_null()); + for i in L0_BLOCKS..M { + let l1i = (i - L0_BLOCKS) / block::ENTRY_COUNT; + let l0i = (i - L0_BLOCKS) % block::ENTRY_COUNT; + + let l1r = &bvec.l1[l1i]; + assert!(!l1r.is_null()); + assert!(!l1r[l0i].is_null()); + } + for i in M..N { + let l1i = (i - L0_BLOCKS) / block::ENTRY_COUNT; + let l0i = (i - L0_BLOCKS) % block::ENTRY_COUNT; + + let l1r = &bvec.l1[l1i]; + assert!(!l1r.is_null()); + assert!(l1r[l0i].is_null()); + } + + for i in 1..L1_BLOCKS { + assert!(bvec.l1[i].is_null()); + } + + // Shrink to 13 blocks, test if L1 got deallocated + const O: usize = 13; + bvec.resize(O).unwrap(); + + assert_eq!(A_COUNTER.load(Ordering::Acquire), O); + } + + assert_eq!(A_COUNTER.load(Ordering::Acquire), 0); + } +} + +#[cfg(all(test, feature = "test-io"))] +mod bvec_io { + use crate::{block, bvec::L0_BLOCKS}; + + use super::BVec; + + #[test] + fn test_bvec_write() { + test_allocator_with_counter!(A_COUNTER, A); + + { + let data = [1, 2, 3, 4, 5]; + let mut bvec = BVec::::new(); + + // Write at 0 + assert_eq!(bvec.write(0, &data).unwrap(), data.len()); + assert_eq!(bvec.capacity, 1); + assert_eq!(bvec.size(), data.len()); + + assert_eq!(&bvec[0][..bvec.size()], &data[..]); + + // Write at 3 + assert_eq!(bvec.write(3, &data).unwrap(), data.len()); + assert_eq!(bvec.capacity, 1); + assert_eq!(bvec.size(), 3 + data.len()); + + assert_eq!(&bvec[0][..bvec.size()], &[1, 2, 3, 1, 2, 3, 4, 5]); + } + + { + let data = [5, 4, 3, 2, 1]; + let mut bvec = BVec::::new(); + + // Write at the end of L0-region + assert_eq!( + bvec.write((L0_BLOCKS * block::SIZE) as u64, &data).unwrap(), + data.len() + ); + // L0_BLOCKS + 1 L1 data block + assert_eq!(bvec.capacity, L0_BLOCKS + 1); + assert_eq!(bvec.size(), L0_BLOCKS * block::SIZE + data.len()); + + assert_eq!(&bvec[L0_BLOCKS][..data.len()], &data[..]); + + // Write at zero + assert_eq!(bvec.write(0, &data).unwrap(), data.len()); + assert_eq!(bvec.capacity, L0_BLOCKS + 1); + assert_eq!(bvec.size(), L0_BLOCKS * block::SIZE + data.len()); + + assert_eq!(&bvec[0][..data.len()], &data[..]); + + // Test write crossing L0 block boundary + assert_eq!( + bvec.write((block::SIZE - 3) as u64, &data).unwrap(), + data.len() + ); + assert_eq!(bvec.capacity, L0_BLOCKS + 1); + assert_eq!(bvec.size(), L0_BLOCKS * block::SIZE + data.len()); + + assert_eq!(&bvec[0][block::SIZE - 3..], &[5, 4, 3]); + assert_eq!(&bvec[1][..2], &[2, 1]); + + // Test write crossing L0-L1 boundary + assert_eq!( + bvec.write((L0_BLOCKS * block::SIZE) as u64 - 2, &data) + .unwrap(), + data.len() + ); + assert_eq!(bvec.capacity, L0_BLOCKS + 1); + assert_eq!(bvec.size(), L0_BLOCKS * block::SIZE + data.len()); + + assert_eq!(&bvec[L0_BLOCKS - 1][block::SIZE - 2..], &[5, 4]); + assert_eq!(&bvec[L0_BLOCKS][..data.len()], &[3, 2, 1, 2, 1]); + } + } +} diff --git a/kernel/driver/fs/memfs/src/dir.rs b/kernel/driver/fs/memfs/src/dir.rs new file mode 100644 index 00000000..ead383a0 --- /dev/null +++ b/kernel/driver/fs/memfs/src/dir.rs @@ -0,0 +1,35 @@ +use core::marker::PhantomData; + +use vfs::{CommonImpl, DirectoryImpl, DirectoryOpenPosition, Node, NodeFlags, NodeRef}; +use yggdrasil_abi::{error::Error, io::FileType}; + +use crate::{block::BlockAllocator, file::FileNode}; + +pub(crate) struct DirectoryNode { + _pd: PhantomData, +} + +impl DirectoryNode { + pub fn new() -> NodeRef { + Node::directory( + Self { _pd: PhantomData }, + NodeFlags::IN_MEMORY_SIZE | NodeFlags::IN_MEMORY_PROPS, + ) + } +} + +impl CommonImpl for DirectoryNode {} + +impl DirectoryImpl for DirectoryNode { + fn open(&self, _node: &NodeRef) -> Result { + Ok(DirectoryOpenPosition::FromCache) + } + + fn create_node(&self, _parent: &NodeRef, ty: FileType) -> Result { + match ty { + FileType::File => Ok(FileNode::::new()), + FileType::Directory => Ok(DirectoryNode::::new()), + _ => todo!(), + } + } +} diff --git a/kernel/driver/fs/memfs/src/file.rs b/kernel/driver/fs/memfs/src/file.rs new file mode 100644 index 00000000..e89dd4d7 --- /dev/null +++ b/kernel/driver/fs/memfs/src/file.rs @@ -0,0 +1,75 @@ +use core::any::Any; + +use libk_util::sync::IrqSafeSpinlock; +use vfs::{CommonImpl, InstanceData, Node, NodeFlags, NodeRef, RegularImpl}; +use yggdrasil_abi::{error::Error, io::OpenOptions}; + +use crate::{block::BlockAllocator, bvec::BVec}; + +pub(crate) struct FileNode { + pub(crate) data: IrqSafeSpinlock>, +} + +impl FileNode { + pub fn new() -> NodeRef { + Node::regular( + Self { + data: IrqSafeSpinlock::new(BVec::new()), + }, + NodeFlags::IN_MEMORY_PROPS, + ) + } +} + +impl CommonImpl for FileNode { + fn as_any(&self) -> &dyn Any { + self + } + + fn size(&self, _node: &NodeRef) -> Result { + Ok(self.data.lock().size() as u64) + } +} + +impl RegularImpl for FileNode { + fn open( + &self, + _node: &NodeRef, + opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + // TODO provide APPEND by vfs driver instead + if opts.contains(OpenOptions::APPEND) { + Ok((self.data.lock().size() as u64, None)) + } else { + Ok((0, None)) + } + } + + fn read( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + self.data.lock().read(pos, buf) + } + + fn write( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + pos: u64, + buf: &[u8], + ) -> Result { + self.data.lock().write(pos, buf) + } + + fn truncate(&self, _node: &NodeRef, new_size: u64) -> Result<(), Error> { + self.data.lock().truncate(new_size) + } + + fn close(&self, _node: &NodeRef, _instance: Option<&InstanceData>) -> Result<(), Error> { + Ok(()) + } +} diff --git a/kernel/driver/fs/memfs/src/lib.rs b/kernel/driver/fs/memfs/src/lib.rs new file mode 100644 index 00000000..54894160 --- /dev/null +++ b/kernel/driver/fs/memfs/src/lib.rs @@ -0,0 +1,192 @@ +//! In-memory filesystem driver +#![no_std] +#![deny(missing_docs)] +#![allow(clippy::new_without_default, clippy::new_ret_no_self)] +#![feature( + const_mut_refs, + maybe_uninit_uninit_array, + const_maybe_uninit_uninit_array, + maybe_uninit_array_assume_init +)] + +use core::{cell::RefCell, marker::PhantomData}; + +use alloc::rc::Rc; +use block::BlockAllocator; +use dir::DirectoryNode; +use file::FileNode; +use vfs::{AccessToken, NodeRef}; +use yggdrasil_abi::{ + error::Error, + io::{FileMode, FileType, GroupId, UserId}, + path::Path, +}; + +use crate::tar::TarIterator; + +#[cfg(test)] +extern crate std; + +extern crate alloc; + +#[cfg(test)] +macro_rules! test_allocator_with_counter { + ($counter:ident, $allocator:ident) => { + static $counter: core::sync::atomic::AtomicUsize = core::sync::atomic::AtomicUsize::new(0); + + struct $allocator; + + unsafe impl $crate::block::BlockAllocator for $allocator { + fn alloc() -> Result, yggdrasil_abi::error::Error> { + let b = std::boxed::Box::into_raw(std::boxed::Box::new([0; $crate::block::SIZE])); + $counter.fetch_add(1, core::sync::atomic::Ordering::Release); + Ok(unsafe { core::ptr::NonNull::new_unchecked(b as _) }) + } + + unsafe fn dealloc(block: core::ptr::NonNull) { + $counter.fetch_sub(1, core::sync::atomic::Ordering::Release); + drop(std::boxed::Box::from_raw( + block.as_ptr() as *mut [u8; $crate::block::SIZE] + )); + } + } + }; +} + +pub mod block; +pub mod bvec; + +mod dir; +mod file; +mod tar; + +/// In-memory read/write filesystem +pub struct MemoryFilesystem { + root: RefCell>, + _pd: PhantomData, +} + +impl MemoryFilesystem { + fn make_path( + self: &Rc, + at: &NodeRef, + path: &Path, + kind: FileType, + create: bool, + ) -> Result { + let access = unsafe { AccessToken::authorized() }; + if path.is_empty() { + return Ok(at.clone()); + } + let (element, rest) = path.split_left(); + // let (element, rest) = path::split_left(path); + assert!(!element.is_empty()); + assert!(!element.contains('/')); + + // let node = at.lookup(element); + let node = at.lookup_or_load(element, access); + let node = match node { + Ok(node) => node, + Err(Error::DoesNotExist) => { + if !create { + return Err(Error::DoesNotExist); + } + + let node = self.create_node_initial(kind); + at.add_child(element, node.clone())?; + + node + } + Err(err) => { + log::warn!("{:?}: lookup failed: {:?}", path, err); + return Err(err); + } + }; + + if rest.is_empty() { + Ok(node) + } else { + assert!(node.is_directory()); + self.make_path(&node, rest, kind, create) + } + } + + fn create_node_initial(self: &Rc, kind: FileType) -> NodeRef { + match kind { + FileType::File => FileNode::::new(), + FileType::Directory => DirectoryNode::::new(), + _ => todo!(), + } + } + + fn from_slice_internal(self: &Rc, tar_data: &'static [u8]) -> Result { + let root = DirectoryNode::::new(); + + // 1. Create paths in tar + for item in TarIterator::new(tar_data) { + let Ok((hdr, _)) = item else { + return Err(Error::InvalidArgument); + }; + + let path = Path::from_str(hdr.name.as_str()?.trim_matches('/')); + log::debug!("Make path {:?}", path); + + let (dirname, filename) = path.split_right(); + let parent = self.make_path(&root, dirname, FileType::Directory, true)?; + let node = self.create_node_initial(hdr.node_kind()); + + parent.add_child(filename, node)?; + } + + // 2. Associate files with their data + for item in TarIterator::new(tar_data) { + let Ok((hdr, data)) = item else { + panic!("Unreachable"); + }; + + let path = Path::from_str(hdr.name.as_str()?.trim_matches('/')); + let node = self.make_path(&root, path, FileType::Directory, false)?; + assert_eq!(node.ty(), hdr.node_kind()); + + let uid = unsafe { UserId::from_raw(usize::from(&hdr.uid) as u32) }; + let gid = unsafe { GroupId::from_raw(usize::from(&hdr.gid) as u32) }; + let mode = convert_mode(usize::from(&hdr.mode))?; + + let access = unsafe { AccessToken::authorized() }; + node.set_access(Some(uid), Some(gid), Some(mode), access)?; + + if hdr.node_kind() == FileType::File { + let data = data.unwrap(); + let node_data = node.data_as_ref::>(); + let mut bvec = node_data.data.lock(); + + bvec.init_with_cow(data)?; + assert_eq!(bvec.size(), data.len()); + } + } + + Ok(root) + } + + /// Constructs a filesystem tree from a tar image in memory + pub fn from_slice(tar_data: &'static [u8]) -> Result, Error> { + let fs = Rc::new(Self { + root: RefCell::new(None), + _pd: PhantomData, + }); + let root = fs.from_slice_internal(tar_data)?; + fs.root.replace(Some(root)); + + Ok(fs) + } + + // TODO Filesystem trait? + /// Returns the root node of the memory filesystem + pub fn root(&self) -> Result { + Ok(self.root.borrow().clone().unwrap()) + } +} + +fn convert_mode(mode: usize) -> Result { + Ok(FileMode::new(mode as u32 & 0o777)) +} diff --git a/kernel/driver/fs/memfs/src/tar.rs b/kernel/driver/fs/memfs/src/tar.rs new file mode 100644 index 00000000..abdc4927 --- /dev/null +++ b/kernel/driver/fs/memfs/src/tar.rs @@ -0,0 +1,138 @@ +use yggdrasil_abi::{error::Error, io::FileType}; + +#[repr(C)] +pub(crate) struct OctalField { + data: [u8; N], +} + +#[repr(C)] +pub(crate) struct TarString { + data: [u8; N], +} + +pub(crate) struct TarIterator<'a> { + data: &'a [u8], + offset: usize, + zero_blocks: usize, +} + +#[repr(packed)] +pub(crate) struct TarEntry { + pub name: TarString<100>, + pub mode: OctalField<8>, + pub uid: OctalField<8>, + pub gid: OctalField<8>, + pub size: OctalField<12>, + _mtime: OctalField<12>, + _checksum: OctalField<8>, + type_: u8, + _link_name: TarString<100>, + _magic: [u8; 8], + _user: TarString<32>, + _group: TarString<32>, + _dev_major: OctalField<8>, + _dev_minor: OctalField<8>, + _prefix: TarString<155>, + __pad: [u8; 12], +} + +impl<'a> TarIterator<'a> { + pub const fn new(data: &'a [u8]) -> Self { + Self { + data, + offset: 0, + zero_blocks: 0, + } + } +} + +impl<'a> Iterator for TarIterator<'a> { + type Item = Result<(&'a TarEntry, Option<&'a [u8]>), Error>; + + fn next(&mut self) -> Option { + loop { + if self.offset + 512 > self.data.len() { + break None; + } + + let hdr_ptr = &self.data[self.offset..]; + let hdr = unsafe { &*(hdr_ptr.as_ptr() as *const TarEntry) }; + + if hdr.is_empty() { + if self.zero_blocks == 1 { + self.offset = self.data.len(); + return None; + } + self.zero_blocks += 1; + continue; + } + + let size = usize::from(&hdr.size); + let size_aligned = (size + 511) & !511; + + let (data, size_aligned) = match hdr.type_ { + 0 | b'0' => { + if self.offset + 512 + size > self.data.len() { + return Some(Err(Error::InvalidArgument)); + } + + let data = &self.data[self.offset + 512..self.offset + 512 + size]; + + (Some(data), size_aligned) + } + // Directory + b'5' => (None, 0), + _ => { + self.offset += size_aligned + 512; + continue; + } + }; + self.offset += size_aligned + 512; + + break Some(Ok((hdr, data))); + } + } +} + +impl From<&OctalField> for usize { + fn from(value: &OctalField) -> Self { + let mut acc = 0; + for i in 0..N { + if !(b'0'..b'8').contains(&value.data[i]) { + break; + } + acc <<= 3; + acc |= (value.data[i] - b'0') as usize; + } + acc + } +} + +impl TarString { + pub fn as_str(&self) -> Result<&str, Error> { + core::str::from_utf8(&self.data[..self.len()]).map_err(|_| Error::InvalidArgument) + } + + pub fn len(&self) -> usize { + for i in 0..N { + if self.data[i] == 0 { + return i; + } + } + N + } +} + +impl TarEntry { + pub fn is_empty(&self) -> bool { + self.name.data[0] == 0 + } + + pub fn node_kind(&self) -> FileType { + match self.type_ { + 0 | b'0' => FileType::File, + b'5' => FileType::Directory, + _ => todo!(), + } + } +} diff --git a/kernel/driver/fs/memfs/test/dir1/test1.txt b/kernel/driver/fs/memfs/test/dir1/test1.txt new file mode 100644 index 00000000..ac439756 --- /dev/null +++ b/kernel/driver/fs/memfs/test/dir1/test1.txt @@ -0,0 +1 @@ +This is another test file diff --git a/kernel/driver/fs/memfs/test/test1.txt b/kernel/driver/fs/memfs/test/test1.txt new file mode 100644 index 00000000..9f4b6d8b --- /dev/null +++ b/kernel/driver/fs/memfs/test/test1.txt @@ -0,0 +1 @@ +This is a test file diff --git a/kernel/driver/fs/memfs/test/test_image.tar b/kernel/driver/fs/memfs/test/test_image.tar new file mode 100644 index 00000000..15dd01d9 Binary files /dev/null and b/kernel/driver/fs/memfs/test/test_image.tar differ diff --git a/kernel/driver/input/Cargo.toml b/kernel/driver/input/Cargo.toml new file mode 100644 index 00000000..abfeffd1 --- /dev/null +++ b/kernel/driver/input/Cargo.toml @@ -0,0 +1,11 @@ +[package] +name = "ygg_driver_input" +version = "0.1.0" +edition = "2021" + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../libk/libk-util" } +libk-thread = { path = "../../libk/libk-thread" } +libk-mm = { path = "../../libk/libk-mm" } +vfs = { path = "../../lib/vfs" } diff --git a/kernel/driver/input/src/lib.rs b/kernel/driver/input/src/lib.rs new file mode 100644 index 00000000..87242944 --- /dev/null +++ b/kernel/driver/input/src/lib.rs @@ -0,0 +1,54 @@ +#![no_std] + +extern crate alloc; + +use core::task::{Context, Poll}; + +use libk_thread::block; +use libk_util::ring::LossyRingQueue; +use vfs::{CharDevice, FileReadiness}; +use yggdrasil_abi::{ + error::Error, + io::{DeviceRequest, KeyboardKeyEvent}, +}; + +pub struct KeyboardDevice; + +impl FileReadiness for KeyboardDevice { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + INPUT_QUEUE.poll_readable(cx).map(Ok) + } +} + +impl CharDevice for KeyboardDevice { + fn read(&'static self, buf: &mut [u8]) -> Result { + if buf.len() < 4 { + return Ok(0); + } + + let ev = block!(INPUT_QUEUE.read().await)?; + + buf[..4].copy_from_slice(&ev.as_bytes()); + + Ok(4) + } + + fn is_writable(&self) -> bool { + false + } + + fn device_request(&self, _req: &mut DeviceRequest) -> Result<(), Error> { + todo!() + } + + fn is_terminal(&self) -> bool { + false + } +} + +static INPUT_QUEUE: LossyRingQueue = LossyRingQueue::with_capacity(32); +pub static KEYBOARD_DEVICE: KeyboardDevice = KeyboardDevice; + +pub fn send_event(ev: KeyboardKeyEvent) { + INPUT_QUEUE.write(ev); +} diff --git a/kernel/driver/net/core/Cargo.toml b/kernel/driver/net/core/Cargo.toml new file mode 100644 index 00000000..bfe925c6 --- /dev/null +++ b/kernel/driver/net/core/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ygg_driver_net_core" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git", features = ["serde_kernel", "bytemuck"] } +libk-mm = { path = "../../../libk/libk-mm" } +libk-util = { path = "../../../libk/libk-util" } +libk-thread = { path = "../../../libk/libk-thread" } +libk-device = { path = "../../../libk/libk-device" } +vfs = { path = "../../../lib/vfs" } + +kernel-fs = { path = "../../fs/kernel-fs" } + +log = "0.4.20" +bytemuck = { version = "1.14.0", features = ["derive"] } +serde_json = { version = "1.0.111", default-features = false, features = ["alloc"] } +serde = { version = "1.0.193", features = ["derive"], default-features = false } diff --git a/kernel/driver/net/core/src/config.rs b/kernel/driver/net/core/src/config.rs new file mode 100644 index 00000000..f5e66a59 --- /dev/null +++ b/kernel/driver/net/core/src/config.rs @@ -0,0 +1,188 @@ +use alloc::{boxed::Box, sync::Arc, vec::Vec}; +use serde::Serialize; +use vfs::{ChannelDescriptor, MessagePayload}; +use yggdrasil_abi::{ + error::Error, + io::{ChannelPublisherId, MessageDestination}, + net::{ + netconfig::{ + InterfaceInfo, InterfaceQuery, NetConfigRequest, NetConfigResult, RouteInfo, + RoutingInfo, + }, + IpAddr, SubnetAddr, + }, +}; + +use crate::{ + interface::NetworkInterface, + l3::{arp, Route}, +}; + +async fn receive_request( + channel: &ChannelDescriptor, +) -> Result<(ChannelPublisherId, NetConfigRequest), Error> { + loop { + let raw = channel.receive_message_async().await?; + match &raw.payload { + MessagePayload::Data(message) => { + let msg = + serde_json::from_slice(message.as_ref()).map_err(|_| Error::InvalidArgument)?; + + return Ok((raw.source, msg)); + } + MessagePayload::File(_) => (), + } + } +} + +fn send_reply( + channel: &ChannelDescriptor, + recepient: ChannelPublisherId, + message: NetConfigResult, +) -> Result<(), Error> { + let data = serde_json::to_vec(&message).map_err(|_| Error::InvalidArgument)?; + channel.send_message( + MessagePayload::Data(data.into_boxed_slice()), + MessageDestination::Specific(recepient.into()), + ) +} + +fn list_interfaces() -> Vec<(Box, u32)> { + let interfaces = NetworkInterface::list_ref(); + interfaces + .iter() + .map(|(&id, iface)| (iface.name.clone(), id)) + .collect() +} + +fn describe_interface(interface: &NetworkInterface) -> InterfaceInfo { + InterfaceInfo { + interface_id: interface.id, + interface_name: interface.name.clone(), + address: interface.address.read().map(Into::into), + mac: interface.mac, + } +} + +fn describe_route(route: &Route) -> RouteInfo { + // NOTE: must exist + let interface = NetworkInterface::get(route.interface).unwrap(); + + RouteInfo { + interface_name: interface.name.clone(), + interface_id: route.interface, + subnet: route.subnet, + gateway: route.gateway.map(Into::into), + } +} + +fn query_route(destination: IpAddr) -> Option { + let (interface_id, gateway, destination) = Route::lookup(destination)?; + let interface = NetworkInterface::get(interface_id).unwrap(); + let source = *interface.address.read(); + + Some(RoutingInfo { + interface_name: interface.name.clone(), + interface_id, + destination, + gateway, + source, + source_mac: interface.mac, + }) +} + +fn query_interface(query: InterfaceQuery) -> Option> { + match query { + InterfaceQuery::ById(id) => NetworkInterface::get(id).ok(), + InterfaceQuery::ByName(name) => { + let interfaces = NetworkInterface::list_ref(); + interfaces.iter().find_map(|(_, iface)| { + if iface.name == name { + Some(iface.clone()) + } else { + None + } + }) + } + } +} + +fn add_route( + query: InterfaceQuery, + gateway: Option, + subnet: SubnetAddr, +) -> Result<(), &'static str> { + let interface = query_interface(query).ok_or("No such interface")?; + let route = Route { + interface: interface.id, + gateway, + subnet, + }; + Route::insert(route).map_err(|_| "Could not insert route")?; + Ok(()) +} + +pub async fn network_config_service() -> Result<(), Error> { + let channel = ChannelDescriptor::open("@kernel-netconf", true); + + loop { + let (sender_id, request) = receive_request(&channel).await?; + + match request { + NetConfigRequest::ListRoutes => { + let routes = Route::list_ref(); + let route_info: Vec<_> = routes.iter().map(describe_route).collect(); + send_reply(&channel, sender_id, NetConfigResult::Ok(route_info))?; + } + NetConfigRequest::ListInterfaces => { + let interfaces = list_interfaces(); + send_reply(&channel, sender_id, NetConfigResult::Ok(interfaces))?; + } + NetConfigRequest::DescribeRoutes(_query) => todo!(), + NetConfigRequest::DescribeInterface(query) => { + let result = match query_interface(query) { + Some(interface) => NetConfigResult::Ok(describe_interface(&interface)), + None => NetConfigResult::err("No such interface"), + }; + send_reply(&channel, sender_id, result)?; + } + NetConfigRequest::AddRoute { + interface, + gateway, + subnet, + } => { + let result = match add_route(interface, gateway, subnet) { + Ok(()) => NetConfigResult::Ok(()), + Err(error) => NetConfigResult::err(error), + }; + send_reply(&channel, sender_id, result)?; + } + NetConfigRequest::SetNetworkAddress { interface, address } => { + let result = match query_interface(interface) { + Some(interface) => { + interface.set_address(address); + NetConfigResult::Ok(()) + } + None => NetConfigResult::err("No such interface"), + }; + + send_reply(&channel, sender_id, result)?; + } + NetConfigRequest::ClearNetworkAddress(_interface) => todo!(), + NetConfigRequest::QueryRoute(destination) => { + let result = match query_route(destination) { + Some(route) => NetConfigResult::Ok(route), + None => NetConfigResult::err("No route to host"), + }; + send_reply(&channel, sender_id, result)?; + } + NetConfigRequest::QueryArp(interface_id, destination, perform_query) => { + let result = match arp::lookup(interface_id, destination, perform_query).await { + Ok(mac) => NetConfigResult::Ok(mac), + Err(_) => NetConfigResult::err("No ARP entry"), + }; + send_reply(&channel, sender_id, result)?; + } + } + } +} diff --git a/kernel/driver/net/core/src/ethernet.rs b/kernel/driver/net/core/src/ethernet.rs new file mode 100644 index 00000000..9ac95c7c --- /dev/null +++ b/kernel/driver/net/core/src/ethernet.rs @@ -0,0 +1,80 @@ +use core::mem::size_of; + +use alloc::sync::Arc; +use bytemuck::Pod; +use libk_mm::PageBox; +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{EtherType, EthernetFrame}, + types::NetValueImpl, + MacAddress, + }, +}; + +use crate::{interface::NetworkInterface, l3, socket::RawSocket}; + +#[derive(Clone)] +pub struct L2Packet { + pub interface_id: u32, + + pub source_address: MacAddress, + pub destination_address: MacAddress, + + pub l2_offset: usize, + pub l3_offset: usize, + + pub data: Arc>, +} + +impl L2Packet { + pub fn ethernet_frame(&self) -> &EthernetFrame { + bytemuck::from_bytes( + &self.data[self.l2_offset..self.l2_offset + size_of::()], + ) + } + + pub fn l2_data(&self) -> &[u8] { + &self.data[self.l3_offset..] + } +} + +pub fn send_l2( + interface: &NetworkInterface, + source_mac: MacAddress, + destination_mac: MacAddress, + ethertype: EtherType, + l2_data: &T, +) -> Result<(), Error> { + let l2_frame = EthernetFrame { + source_mac, + destination_mac, + ethertype: ethertype.to_network_order(), + }; + + log::debug!( + "send_l2: {} -> {}", + l2_frame.source_mac, + l2_frame.destination_mac + ); + + interface.send_l2(&l2_frame, bytemuck::bytes_of(l2_data)) +} + +pub fn handle(packet: L2Packet) { + let frame = packet.ethernet_frame(); + let ty = EtherType::from_network_order(frame.ethertype); + + RawSocket::packet_received(packet.clone()); + + match ty { + EtherType::ARP => l3::arp::handle_packet(packet), + EtherType::IPV4 => l3::ip::handle_v4_packet(packet), + p => { + log::debug!( + "Unrecognized L2 protocol: {:#06x}", + bytemuck::cast::<_, u16>(p) + ); + } + } +} diff --git a/kernel/driver/net/core/src/interface.rs b/kernel/driver/net/core/src/interface.rs new file mode 100644 index 00000000..90c3b46c --- /dev/null +++ b/kernel/driver/net/core/src/interface.rs @@ -0,0 +1,148 @@ +use core::{ + mem::size_of, + sync::atomic::{AtomicU32, AtomicUsize, Ordering}, +}; + +use alloc::{boxed::Box, collections::BTreeMap, format, sync::Arc}; +// TODO: link state management? +use libk_mm::PageBox; +use libk_util::{ + sync::spin_rwlock::{IrqSafeRwLock, IrqSafeRwLockReadGuard}, + OneTimeInit, +}; +use yggdrasil_abi::{ + error::Error, + net::{protocols::EthernetFrame, IpAddr, MacAddress}, +}; + +use crate::l3::{arp::ArpTable, Route}; + +pub trait NetworkDevice: Sync { + fn transmit(&self, packet: PageBox<[u8]>) -> Result<(), Error>; + fn packet_prefix_size(&self) -> usize; + + fn read_hardware_address(&self) -> MacAddress; +} + +pub struct NetworkInterface { + pub(crate) name: Box, + pub(crate) device: &'static dyn NetworkDevice, + pub(crate) mac: MacAddress, + + pub(crate) address: IrqSafeRwLock>, + pub(crate) id: u32, +} + +#[derive(PartialEq, Eq)] +pub enum NetworkInterfaceType { + Ethernet, + Loopback, +} + +static INTERFACES: IrqSafeRwLock>> = + IrqSafeRwLock::new(BTreeMap::new()); +static LAST_INTERFACE_ID: AtomicU32 = AtomicU32::new(1); +static LOOPBACK: OneTimeInit> = OneTimeInit::new(); + +impl NetworkInterface { + pub fn id(&self) -> u32 { + self.id + } + + pub fn loopback() -> &'static Arc { + LOOPBACK.get() + } + + pub fn get(id: u32) -> Result, Error> { + INTERFACES + .read() + .get(&id) + .cloned() + .ok_or(Error::DoesNotExist) + } + + pub fn query_by_name(name: &str) -> Result, Error> { + INTERFACES + .read() + .iter() + .find_map(|(_, iface)| { + if iface.name.as_ref() == name { + Some(iface.clone()) + } else { + None + } + }) + .ok_or(Error::DoesNotExist) + } + + pub fn list_ref() -> IrqSafeRwLockReadGuard<'static, BTreeMap>> { + INTERFACES.read() + } + + pub fn set_address(&self, address: IpAddr) { + // Flush routes associated with the interface + { + let mut routes = Route::list_mut(); + routes.retain(|route| route.interface != self.id); + } + + let mut addr = self.address.write(); + // Flush owned ARP entries related to the old address + if let Some(address) = *addr { + ArpTable::flush_address(self.id, address); + } + + addr.replace(address); + + ArpTable::insert_address(self.id, self.mac, address, true); + } + + pub fn send_l2(&self, l2_frame: &EthernetFrame, l2_data: &[u8]) -> Result<(), Error> { + let l2_offset = self.device.packet_prefix_size(); + let l2_data_offset = l2_offset + size_of::(); + + let mut packet = PageBox::new_slice(0, l2_data_offset + l2_data.len())?; + + packet[l2_offset..l2_data_offset].copy_from_slice(bytemuck::bytes_of(l2_frame)); + packet[l2_data_offset..].copy_from_slice(l2_data); + + self.device.transmit(packet) + } +} + +pub fn register_interface( + ty: NetworkInterfaceType, + dev: &'static dyn NetworkDevice, +) -> Arc { + let name = match ty { + NetworkInterfaceType::Ethernet => { + static LAST_ETHERNET_ID: AtomicUsize = AtomicUsize::new(0); + let eth_id = LAST_ETHERNET_ID.fetch_add(1, Ordering::SeqCst); + format!("eth{}", eth_id).into_boxed_str() + } + NetworkInterfaceType::Loopback => "lo".into(), + }; + + let mac = dev.read_hardware_address(); + let id = LAST_INTERFACE_ID.fetch_add(1, Ordering::SeqCst); + + log::info!("Register network interface {} (#{}): {}", name, id, mac); + + let iface = NetworkInterface { + name, + device: dev, + mac, + address: IrqSafeRwLock::new(None), + id, + }; + + let interface = Arc::new(iface); + + INTERFACES.write().insert(id, interface.clone()); + + if ty == NetworkInterfaceType::Loopback { + LOOPBACK.init(interface.clone()); + } + + interface +} diff --git a/kernel/driver/net/core/src/l3/arp.rs b/kernel/driver/net/core/src/l3/arp.rs new file mode 100644 index 00000000..cf4dbb6f --- /dev/null +++ b/kernel/driver/net/core/src/l3/arp.rs @@ -0,0 +1,262 @@ +use core::{ + future::Future, + mem::size_of, + pin::Pin, + task::{Context, Poll}, + time::Duration, +}; + +use alloc::{boxed::Box, collections::BTreeMap}; +use libk_thread::runtime; +use libk_util::{sync::spin_rwlock::IrqSafeRwLock, waker::QueueWaker}; +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{ArpFrame, EtherType}, + types::NetValueImpl, + IpAddr, Ipv4Addr, MacAddress, + }, +}; + +use crate::{ethernet, interface::NetworkInterface, L2Packet}; + +struct Inner { + entries: BTreeMap<(u32, A), (MacAddress, bool)>, + reverse: BTreeMap<(u32, MacAddress), (A, bool)>, +} + +pub struct ArpTable { + v4: IrqSafeRwLock>, + notify: QueueWaker, +} + +impl Inner { + const fn new() -> Self { + Self { + entries: BTreeMap::new(), + reverse: BTreeMap::new(), + } + } + + fn query_mac(&self, interface: u32, address: A) -> Option<(MacAddress, bool)> { + self.entries.get(&(interface, address)).copied() + } + + // fn query_address(&self, interface: u32, mac: MacAddress) -> Option<(A, bool)> { + // self.reverse.get(&(interface, mac)).copied() + // } + + fn insert(&mut self, interface: u32, mac: MacAddress, address: A, owned: bool) -> bool { + let new = self + .entries + .insert((interface, address), (mac, owned)) + .is_none(); + self.reverse.insert((interface, mac), (address, owned)); + new + } + + fn flush(&mut self, interface: u32, address: A) -> bool { + if let Some((mac, _)) = self.entries.remove(&(interface, address)) { + self.reverse.remove(&(interface, mac)); + true + } else { + false + } + } +} + +impl ArpTable { + pub const fn new() -> Self { + Self { + v4: IrqSafeRwLock::new(Inner::new()), + notify: QueueWaker::new(), + } + } + + pub fn lookup_cache_v4(interface: u32, address: Ipv4Addr) -> Option<(MacAddress, bool)> { + ARP_TABLE.v4.read().query_mac(interface, address) + } + + pub fn lookup_cache(interface: u32, address: IpAddr) -> Option { + let (address, _) = match address { + IpAddr::V4(address) => Self::lookup_cache_v4(interface, address), + IpAddr::V6(_) => todo!(), + }?; + Some(address) + } + + pub fn flush_address_v4(interface: u32, address: Ipv4Addr) -> bool { + ARP_TABLE.v4.write().flush(interface, address) + } + + pub fn flush_address(interface: u32, address: IpAddr) -> bool { + match address { + IpAddr::V4(address) => Self::flush_address_v4(interface, address), + IpAddr::V6(_) => todo!(), + } + } + + pub fn insert_address_v4(interface: u32, mac: MacAddress, address: Ipv4Addr, owned: bool) { + ARP_TABLE.v4.write().insert(interface, mac, address, owned); + } + + pub fn insert_address(interface: u32, mac: MacAddress, address: IpAddr, owned: bool) { + match address { + IpAddr::V4(address) => Self::insert_address_v4(interface, mac, address, owned), + IpAddr::V6(_) => todo!(), + } + ARP_TABLE.notify.wake_all(); + } + + fn poll_address( + interface: u32, + address: IpAddr, + timeout: Duration, + ) -> impl Future> { + struct F> { + interface: u32, + address: IpAddr, + timeout: Pin>, + } + + impl> Future for F { + type Output = Option; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + if self.timeout.as_mut().poll(cx).is_ready() { + ARP_TABLE.notify.remove(cx.waker()); + return Poll::Ready(None); + } + ARP_TABLE.notify.register(cx.waker()); + if let Some(mac) = ArpTable::lookup_cache(self.interface, self.address) { + ARP_TABLE.notify.remove(cx.waker()); + Poll::Ready(Some(mac)) + } else { + Poll::Pending + } + } + } + + F { + interface, + address, + timeout: Box::pin(runtime::sleep(timeout)), + } + } +} + +static ARP_TABLE: ArpTable = ArpTable::new(); + +pub async fn lookup(interface: u32, ip: IpAddr, perform_query: bool) -> Result { + if let Some(mac) = ArpTable::lookup_cache(interface, ip) { + return Ok(mac); + } + if !perform_query { + return Err(Error::HostUnreachable); + } + query(interface, ip, 5, Duration::from_millis(200)).await +} + +async fn query( + interface: u32, + ip: IpAddr, + retries: usize, + retry_timeout: Duration, +) -> Result { + let interface = NetworkInterface::get(interface)?; + for _ in 0..retries { + send_request(&interface, ip)?; + if let Some(mac) = ArpTable::poll_address(interface.id, ip, retry_timeout).await { + return Ok(mac); + } + } + Err(Error::HostUnreachable) +} + +fn send_request_v4(interface: &NetworkInterface, query_address: Ipv4Addr) -> Result<(), Error> { + let request = ArpFrame { + protocol: EtherType::IPV4.to_network_order(), + protocol_size: 4, + hardware_type: u16::to_network_order(1), + hardware_size: 6, + opcode: 1u16.to_network_order(), + + sender_mac: interface.mac, + // TODO maybe would be nice to specify + sender_ip: 0u32.to_network_order(), + + target_ip: u32::to_network_order(query_address.into()), + target_mac: MacAddress::UNSPECIFIED, + }; + + ethernet::send_l2( + interface, + interface.mac, + MacAddress::BROADCAST, + EtherType::ARP, + &request, + ) +} + +fn send_request(interface: &NetworkInterface, query_address: IpAddr) -> Result<(), Error> { + log::debug!("Querying address of {}", query_address); + + match query_address { + IpAddr::V4(address) => send_request_v4(interface, address), + IpAddr::V6(_) => todo!(), + } +} + +fn send_reply(interface_id: u32, arp: &ArpFrame, target_mac: MacAddress) -> Result<(), Error> { + let interface = NetworkInterface::get(interface_id)?; + let reply = ArpFrame { + protocol: arp.protocol, + hardware_type: arp.hardware_type, + hardware_size: arp.hardware_size, + protocol_size: arp.protocol_size, + opcode: 2u16.to_network_order(), + + sender_mac: target_mac, + sender_ip: arp.target_ip, + + target_ip: arp.sender_ip, + target_mac: arp.sender_mac, + }; + + ethernet::send_l2( + &interface, + target_mac, + arp.sender_mac, + EtherType::ARP, + &reply, + ) +} + +pub fn handle_packet(packet: L2Packet) { + let arp: &ArpFrame = bytemuck::from_bytes(&packet.l2_data()[..size_of::()]); + let proto = EtherType::from_network_order(arp.protocol); + let opcode = u16::from_network_order(arp.opcode); + + let (target_address, sender_address) = match proto { + EtherType::IPV4 => ( + Ipv4Addr::from(u32::from_network_order(arp.target_ip)), + Ipv4Addr::from(u32::from_network_order(arp.sender_ip)), + ), + _ => { + log::warn!("TODO: unhandled ARP proto: {:#x?}", proto); + return; + } + }; + + log::debug!("ARP: {} -> {}", sender_address, target_address); + + ArpTable::insert_address_v4(packet.interface_id, arp.sender_mac, sender_address, false); + + if opcode == 1 { + // Don't answer with non-owned addresses + if let Some((mac, true)) = ArpTable::lookup_cache_v4(packet.interface_id, target_address) { + // Reply with own address + send_reply(packet.interface_id, arp, mac).ok(); + } + } +} diff --git a/kernel/driver/net/core/src/l3/ip.rs b/kernel/driver/net/core/src/l3/ip.rs new file mode 100644 index 00000000..9b9451e6 --- /dev/null +++ b/kernel/driver/net/core/src/l3/ip.rs @@ -0,0 +1,97 @@ +use core::mem::size_of; + +use yggdrasil_abi::net::{ + protocols::{IpProtocol, Ipv4Frame, TcpFrame, UdpFrame}, + types::NetValueImpl, + IpAddr, Ipv4Addr, +}; + +use crate::{interface::NetworkInterface, L2Packet, L3Packet, ACCEPT_QUEUE}; + +use super::IpFrame; + +impl IpFrame for Ipv4Frame { + fn destination_ip(&self) -> IpAddr { + IpAddr::V4(Ipv4Addr::from(u32::from_network_order( + self.destination_address, + ))) + } + + fn source_ip(&self) -> IpAddr { + IpAddr::V4(Ipv4Addr::from(u32::from_network_order(self.source_address))) + } + + fn data_length(&self) -> usize { + self.total_length().saturating_sub(self.header_length()) + } +} + +pub fn handle_v4_packet(packet: L2Packet) { + let Ok(interface) = NetworkInterface::get(packet.interface_id) else { + log::debug!("Invalid interface ID in L2 packet"); + return; + }; + + let l2_data = packet.l2_data(); + let l3_frame: &Ipv4Frame = bytemuck::from_bytes(&l2_data[..size_of::()]); + let header_length = l3_frame.header_length(); + let l3_data = &l2_data[size_of::()..]; + + let is_input = interface + .address + .read() + .map(|address| address == l3_frame.destination_ip()) + .unwrap_or(false); + + if is_input { + // Extract ports from L4 proto + let (source_port, destination_port) = match l3_frame.protocol { + IpProtocol::UDP => { + // TODO check size + let l4_frame: &UdpFrame = bytemuck::from_bytes(&l3_data[..size_of::()]); + ( + Some(u16::from_network_order(l4_frame.source_port)), + Some(u16::from_network_order(l4_frame.destination_port)), + ) + } + IpProtocol::TCP => { + // TODO check size + let l4_frame: &TcpFrame = bytemuck::from_bytes(&l3_data[..size_of::()]); + ( + Some(u16::from_network_order(l4_frame.source_port)), + Some(u16::from_network_order(l4_frame.destination_port)), + ) + } + IpProtocol::ICMP => (None, None), + _ => (None, None), + }; + + let l3_packet = L3Packet { + interface_id: packet.interface_id, + + protocol: l3_frame.protocol, + + source_address: l3_frame.source_ip(), + destination_address: l3_frame.destination_ip(), + + source_port, + destination_port, + + l2_offset: packet.l2_offset, + l3_offset: packet.l3_offset, + l4_offset: packet.l3_offset + header_length, + data_length: l3_frame.data_length(), + + data: packet.data, + }; + + ACCEPT_QUEUE.push_back(l3_packet); + } else { + // TODO forwarding + log::debug!( + "Dropped forwarded IPv4: {} -> {}", + l3_frame.source_ip(), + l3_frame.destination_ip() + ); + } +} diff --git a/kernel/driver/net/core/src/l3/mod.rs b/kernel/driver/net/core/src/l3/mod.rs new file mode 100644 index 00000000..7d42fd5f --- /dev/null +++ b/kernel/driver/net/core/src/l3/mod.rs @@ -0,0 +1,246 @@ +use core::{fmt, mem::size_of}; + +use alloc::{sync::Arc, vec::Vec}; +use bytemuck::{Pod, Zeroable}; +use libk_mm::PageBox; +use libk_util::sync::spin_rwlock::{ + IrqSafeRwLock, IrqSafeRwLockReadGuard, IrqSafeRwLockWriteGuard, +}; +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{EtherType, EthernetFrame, InetChecksum, IpProtocol, Ipv4Frame}, + types::NetValueImpl, + IpAddr, Ipv4Addr, MacAddress, SubnetAddr, + }, +}; + +use crate::{interface::NetworkInterface, l4, PacketBuilder}; + +pub mod arp; +pub mod ip; + +pub struct L3Packet { + pub interface_id: u32, + + pub protocol: IpProtocol, + + pub source_address: IpAddr, + pub destination_address: IpAddr, + + pub source_port: Option, + pub destination_port: Option, + + pub l2_offset: usize, + pub l3_offset: usize, + pub l4_offset: usize, + pub data_length: usize, + + pub data: Arc>, +} + +pub trait IpFrame: Pod { + fn destination_ip(&self) -> IpAddr; + fn source_ip(&self) -> IpAddr; + fn data_length(&self) -> usize; +} + +// TODO use range map for this? +pub struct Route { + pub subnet: SubnetAddr, + pub interface: u32, + pub gateway: Option, +} + +pub struct L4ResolvedPacket<'a, 'i> { + pub interface: &'i NetworkInterface, + pub source_ip: IpAddr, + pub gateway_ip: IpAddr, + pub destination_ip: IpAddr, + pub l4_frame: &'a [u8], + pub l4_options: &'a [u8], + pub l4_data: &'a [u8], + pub protocol: IpProtocol, + pub ttl: u8, +} + +pub struct L4UnresolvedPacket<'a> { + pub destination_ip: IpAddr, + pub l4_frame: &'a [u8], + pub l4_options: &'a [u8], + pub l4_data: &'a [u8], + pub protocol: IpProtocol, + pub ttl: u8, +} + +static ROUTES: IrqSafeRwLock> = IrqSafeRwLock::new(Vec::new()); + +impl L3Packet { + pub fn l3_data(&self) -> &[u8] { + &self.data[self.l4_offset..] + } +} + +impl Route { + pub fn list_mut() -> IrqSafeRwLockWriteGuard<'static, Vec> { + ROUTES.write() + } + + pub fn list_ref() -> IrqSafeRwLockReadGuard<'static, Vec> { + ROUTES.read() + } + + pub fn lookup(address: IpAddr) -> Option<(u32, Option, IpAddr)> { + // TODO sort routes based on their "specificity"? + // Check for local route + for (_, interface) in NetworkInterface::list_ref().iter() { + if interface + .address + .read() + .map(|addr| addr == address) + .unwrap_or(false) + { + // This is the address of loopback, return it + return Some(( + NetworkInterface::loopback().id, + Some(IpAddr::V4(Ipv4Addr::LOOPBACK)), + IpAddr::V4(Ipv4Addr::LOOPBACK), + )); + } + } + + let routes = ROUTES.read(); + for route in routes.iter() { + if route.subnet.contains(&address) { + return Some((route.interface, route.gateway, address)); + } + } + None + } + + pub fn insert(route: Self) -> Result<(), Error> { + // TODO check for conflicts + log::debug!("Add route: {}", route); + ROUTES.write().push(route); + Ok(()) + } +} + +impl fmt::Display for Route { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{} ", self.subnet)?; + if let Some(gw) = self.gateway { + write!(f, " via {}", gw)?; + } + Ok(()) + } +} + +pub async fn handle_accepted(l3_packet: L3Packet) -> Result<(), Error> { + match l3_packet.protocol { + IpProtocol::UDP => l4::udp::handle(l3_packet), + IpProtocol::ICMP => l4::icmp::handle(l3_packet).await, + IpProtocol::TCP => l4::tcp::handle(l3_packet).await, + _ => Ok(()), + } +} + +impl<'a, 'i> L4ResolvedPacket<'a, 'i> { + pub async fn lookup_gateway_mac(&self) -> Result { + arp::lookup(self.interface.id, self.gateway_ip, true).await + } + + pub fn make_l3_frame(&self) -> Result { + // TODO what if source_ip/gateway_ip/destination_ip are a mix of IPv4/IPv6? + let source_ip = self.source_ip.into_ipv4().ok_or(Error::NotImplemented)?; + let destination_ip = self + .destination_ip + .into_ipv4() + .ok_or(Error::NotImplemented)?; + + let total_length = (self.total_l4_len() + size_of::()) + .try_into() + .map_err(|_| Error::InvalidArgument)?; + + let mut l3_frame = Ipv4Frame { + source_address: u32::to_network_order(source_ip.into()), + destination_address: u32::to_network_order(destination_ip.into()), + protocol: self.protocol, + version_length: 0x45, + total_length: u16::to_network_order(total_length), + flags_frag: u16::to_network_order(0x4000), + id: u16::to_network_order(0), + ttl: self.ttl, + ..Ipv4Frame::zeroed() + }; + + let l3_frame_bytes = bytemuck::bytes_of(&l3_frame); + let mut ip_checksum = InetChecksum::new(); + ip_checksum.add_bytes(l3_frame_bytes, true); + let ip_checksum = ip_checksum.finish(); + + l3_frame.header_checksum = u16::to_network_order(ip_checksum); + + Ok(l3_frame) + } + + pub fn total_l4_len(&self) -> usize { + self.l4_frame.len() + self.l4_options.len() + self.l4_data.len() + } +} + +pub(crate) fn resolve_l3_route( + destination_ip: IpAddr, +) -> Result<(Arc, IpAddr, IpAddr, IpAddr), Error> { + // Find the route itself + let (interface_id, gateway, destination_ip) = + Route::lookup(destination_ip).ok_or(Error::NetworkUnreachable)?; + let interface = NetworkInterface::get(interface_id)?; + // Route exists, but has no gateway (TODO: assign subnets to interfaces) + let gateway = gateway.ok_or(Error::NetworkUnreachable)?; + // Route exists, but network has no address assigned (TODO: how?) + let source_address = interface.address.read().ok_or(Error::NetworkUnreachable)?; + + Ok((interface, source_address, gateway, destination_ip)) +} + +pub async fn send_l4_ip_resolved(packet: &L4ResolvedPacket<'_, '_>) -> Result<(), Error> { + let gateway_mac = packet.lookup_gateway_mac().await?; + + let l3_frame = packet.make_l3_frame()?; + + let mut builder = PacketBuilder::new( + packet.interface.device.packet_prefix_size(), + size_of::() + size_of::() + packet.total_l4_len(), + )?; + builder.push(&EthernetFrame { + source_mac: packet.interface.mac, + destination_mac: gateway_mac, + ethertype: EtherType::IPV4.to_network_order(), + })?; + builder.push(&l3_frame)?; + builder.push_bytes(packet.l4_frame)?; + builder.push_bytes(packet.l4_options)?; + builder.push_bytes(packet.l4_data)?; + + let (sent_packet, _len) = builder.finish(); + packet.interface.device.transmit(sent_packet) +} + +pub async fn send_l4_ip(packet: &L4UnresolvedPacket<'_>) -> Result<(), Error> { + let (interface, source_ip, gateway_ip, destination_ip) = + resolve_l3_route(packet.destination_ip)?; + + send_l4_ip_resolved(&L4ResolvedPacket { + interface: &interface, + source_ip, + gateway_ip, + destination_ip, + l4_frame: packet.l4_frame, + l4_options: packet.l4_options, + l4_data: packet.l4_data, + protocol: packet.protocol, + ttl: packet.ttl, + }) + .await +} diff --git a/kernel/driver/net/core/src/l4/icmp.rs b/kernel/driver/net/core/src/l4/icmp.rs new file mode 100644 index 00000000..f62ed7df --- /dev/null +++ b/kernel/driver/net/core/src/l4/icmp.rs @@ -0,0 +1,82 @@ +use core::mem::size_of; + +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{IcmpV4Frame, InetChecksum, IpProtocol}, + types::NetValueImpl, + IpAddr, Ipv4Addr, + }, +}; + +use crate::{l3, L3Packet}; + +async fn send_v4_reply( + destination_ip: Ipv4Addr, + icmp_frame: &IcmpV4Frame, + icmp_data: &[u8], +) -> Result<(), Error> { + let mut reply_frame = IcmpV4Frame { + ty: 0, + code: 0, + checksum: u16::to_network_order(0), + rest: icmp_frame.rest, + }; + + if icmp_data.len() % 2 != 0 { + todo!(); + } + + let l4_bytes = bytemuck::bytes_of(&reply_frame); + let mut checksum = InetChecksum::new(); + checksum.add_bytes(l4_bytes, true); + checksum.add_bytes(icmp_data, true); + + reply_frame.checksum = checksum.finish().to_network_order(); + + l3::send_l4_ip(&l3::L4UnresolvedPacket { + destination_ip: IpAddr::V4(destination_ip), + l4_frame: bytemuck::bytes_of(&reply_frame), + l4_options: &[], + l4_data: icmp_data, + protocol: IpProtocol::ICMP, + ttl: 255, + }) + .await +} + +async fn handle_v4(source_address: Ipv4Addr, l3_packet: L3Packet) -> Result<(), Error> { + if l3_packet.data_length < size_of::() { + log::debug!("Truncated ICMPv4 packet"); + return Err(Error::MissingData); + } + + if l3_packet.data_length - size_of::() > 576 { + log::debug!("ICMPv4 packet too large"); + return Err(Error::MissingData); + } + + let l3_data = l3_packet.l3_data(); + let icmp_frame: &IcmpV4Frame = bytemuck::from_bytes(&l3_data[..size_of::()]); + let icmp_data = &l3_data[size_of::()..l3_packet.data_length]; + + match (icmp_frame.ty, icmp_frame.code) { + (8, 0) => send_v4_reply(source_address, icmp_frame, icmp_data).await, + (0, 0) => Ok(()), + _ => { + log::debug!( + "Ignoring unknown ICMPv4 type:code: {}:{}", + icmp_frame.ty, + icmp_frame.code + ); + Ok(()) + } + } +} + +pub async fn handle(l3_packet: L3Packet) -> Result<(), Error> { + match l3_packet.source_address { + IpAddr::V4(v4) => handle_v4(v4, l3_packet).await, + IpAddr::V6(_) => todo!(), + } +} diff --git a/kernel/driver/net/core/src/l4/mod.rs b/kernel/driver/net/core/src/l4/mod.rs new file mode 100644 index 00000000..decb9436 --- /dev/null +++ b/kernel/driver/net/core/src/l4/mod.rs @@ -0,0 +1,3 @@ +pub mod icmp; +pub mod tcp; +pub mod udp; diff --git a/kernel/driver/net/core/src/l4/tcp.rs b/kernel/driver/net/core/src/l4/tcp.rs new file mode 100644 index 00000000..00694bc1 --- /dev/null +++ b/kernel/driver/net/core/src/l4/tcp.rs @@ -0,0 +1,721 @@ +use core::{ + mem::size_of, + task::{Context, Poll}, +}; + +use alloc::{vec, vec::Vec}; +use bytemuck::Zeroable; +use libk_util::waker::QueueWaker; +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{InetChecksum, IpProtocol, TcpFlags, TcpFrame, TcpV4PseudoHeader}, + types::NetValueImpl, + IpAddr, SocketAddr, + }, +}; + +use crate::{ + l3::{self, L3Packet}, + socket::{TcpListener, TcpSocket}, + util::Assembler, +}; + +#[derive(PartialEq, Debug)] +pub enum TcpConnectionState { + SynSent, + SynReceived, + Established, + FinWait1, + FinWait2, + Closed, +} + +pub enum TcpSocketBehavior { + None, + Accept, + Remove, +} + +struct SocketBuffer { + data: Vec, + wr: usize, + rd: usize, +} + +pub struct TcpConnection { + state: TcpConnectionState, + + local: SocketAddr, + remote: SocketAddr, + + rx_window_size: u16, + + // Rx half + // RCV.WND = rx_buffer.capacity() + // RCV.NXT = rx_window_start + rx_window.len() + // TODO RCV.UP + rx_buffer: SocketBuffer, + rx_segment_buffer: Vec, + rx_assembler: Assembler, + + // Relative RX sequence number of window start + rx_window_start: usize, + + // Tx half + // SND.UNA = tx_window_start + tx_sent_unacknowledged + // SND.WND = tx_buffer.capacity() + // SND.NXT = tx_window_start IF tx_sent_unacknowledged == 0 + tx_buffer: Vec, + + tx_window_start: usize, + tx_sent_unacknowledged: usize, + + // IRS + initial_rx_seq: u32, + initial_tx_seq: u32, + + rx_notify: QueueWaker, + tx_notify: QueueWaker, +} + +#[allow(unused)] +struct TcpPacket { + local: SocketAddr, + remote: SocketAddr, + seq: u32, + ack: u32, + window_size: u16, + flags: TcpFlags, +} + +impl SocketBuffer { + pub fn with_capacity(capacity: usize) -> Self { + Self { + data: vec![0; capacity], + wr: 0, + rd: 0, + } + } + + pub fn len(&self) -> usize { + if self.wr >= self.rd { + self.wr - self.rd + } else { + self.wr + self.capacity() - self.rd + } + } + + pub fn capacity(&self) -> usize { + self.data.len() + } + + pub fn can_read(&self) -> bool { + self.rd != self.wr + } + + pub fn write(&mut self, data: &[u8]) { + for &byte in data { + self.putc(byte); + } + } + + pub fn putc(&mut self, data: u8) { + if (self.wr + 1) % self.capacity() == self.rd { + self.rd = (self.rd + 1) % self.capacity(); + } + self.data[self.wr] = data; + self.wr = (self.wr + 1) % self.capacity(); + } + + pub fn read(&mut self, buffer: &mut [u8]) -> usize { + let mut amount = 0; + while amount != buffer.len() { + if self.rd == self.wr { + break; + } + buffer[amount] = self.data[self.rd]; + self.rd = (self.rd + 1) % self.capacity(); + amount += 1; + } + amount + } +} + +impl TcpConnection { + pub fn new( + local: SocketAddr, + remote: SocketAddr, + window_size: usize, + tx_seq: u32, + rx_seq: u32, + state: TcpConnectionState, + ) -> Self { + debug_assert!( + state == TcpConnectionState::SynSent + || state == TcpConnectionState::Closed + || state == TcpConnectionState::SynReceived + ); + debug_assert!(window_size < u16::MAX as usize); + Self { + state, + + local, + remote, + + rx_buffer: SocketBuffer::with_capacity(window_size), + rx_assembler: Assembler::new(), + rx_segment_buffer: Vec::with_capacity(window_size), + rx_window_start: 1, + + tx_buffer: Vec::with_capacity(window_size), + tx_window_start: 1, + tx_sent_unacknowledged: 0, + + rx_window_size: window_size as u16, + + initial_rx_seq: rx_seq, + initial_tx_seq: tx_seq, + + rx_notify: QueueWaker::new(), + tx_notify: QueueWaker::new(), + } + } + + fn ack_number(&self) -> u32 { + (self.initial_rx_seq as usize + self.rx_window_start + self.rx_buffer.len()) as u32 + } + + fn seq_number(&self) -> u32 { + (self.initial_tx_seq as usize + self.tx_window_start) as u32 + } + + pub fn is_closing(&self) -> bool { + self.state == TcpConnectionState::FinWait1 + || self.state == TcpConnectionState::FinWait2 + || self.state == TcpConnectionState::Closed + } + + pub fn is_closed(&self) -> bool { + self.state == TcpConnectionState::Closed + } + + pub fn read_nonblocking(&mut self, buffer: &mut [u8]) -> Result { + let amount = self.rx_buffer.read(buffer); + if amount == 0 && self.state != TcpConnectionState::Established { + // TODO ConnectionAborted? + return Err(Error::ConnectionReset); + } + self.rx_window_start += amount; + Ok(amount) + } + + pub(crate) fn poll_receive(&self, cx: &mut Context<'_>) -> Poll> { + self.rx_notify.register(cx.waker()); + if self.rx_buffer.can_read() { + self.rx_notify.remove(cx.waker()); + Poll::Ready(Ok(())) + } else if self.state != TcpConnectionState::Established { + self.rx_notify.remove(cx.waker()); + Poll::Ready(Err(Error::ConnectionReset)) + } else { + Poll::Pending + } + } + + pub(crate) fn poll_send(&self, cx: &mut Context<'_>) -> Poll> { + self.tx_notify.register(cx.waker()); + if self.state == TcpConnectionState::Closed { + self.tx_notify.remove(cx.waker()); + Poll::Ready(Err(Error::ConnectionReset)) + } else if self.tx_sent_unacknowledged == 0 { + self.tx_notify.remove(cx.waker()); + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } + + pub(crate) fn poll_acknowledge(&self, cx: &mut Context<'_>) -> Poll<()> { + self.tx_notify.register(cx.waker()); + if self.tx_sent_unacknowledged == 0 { + self.tx_notify.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + } + + pub(crate) fn poll_finish(&self, cx: &mut Context<'_>) -> Poll<()> { + self.tx_notify.register(cx.waker()); + if self.state == TcpConnectionState::FinWait2 || self.state == TcpConnectionState::Closed { + self.tx_notify.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + } + + pub(crate) fn poll_established(&self, cx: &mut Context<'_>) -> Poll> { + self.rx_notify.register(cx.waker()); + match self.state { + TcpConnectionState::Established => Poll::Ready(Ok(())), + TcpConnectionState::Closed => Poll::Ready(Err(Error::ConnectionRefused)), + _ => Poll::Pending, + } + } + + pub(crate) async fn transmit(&mut self, data: &[u8]) -> Result<(), Error> { + assert_eq!(self.tx_sent_unacknowledged, 0); + assert_eq!(self.tx_buffer.len(), 0); + + self.tx_buffer.extend_from_slice(data); + self.tx_sent_unacknowledged = data.len(); + + send( + self.local, + self.remote, + self.seq_number(), + self.ack_number(), + self.rx_window_size, + TcpFlags::ACK, + data, + ) + .await + } + + pub(crate) async fn finish(&mut self) -> Result<(), Error> { + assert_eq!(self.tx_sent_unacknowledged, 0); + assert_eq!(self.tx_buffer.len(), 0); + + log::debug!("Finish connection {} <-> {}", self.local, self.remote); + send( + self.local, + self.remote, + self.seq_number(), + self.ack_number(), + self.rx_window_size, + TcpFlags::FIN | TcpFlags::ACK, + &[], + ) + .await?; + + self.state = TcpConnectionState::FinWait1; + + Ok(()) + } + + pub(crate) fn notify_all(&self) { + self.rx_notify.wake_all(); + self.tx_notify.wake_all(); + } + + pub(crate) async fn send_syn(&mut self) -> Result<(), Error> { + assert!( + self.state == TcpConnectionState::SynSent || self.state == TcpConnectionState::Closed + ); + log::debug!("Send SYN {} -> {}", self.local, self.remote); + self.state = TcpConnectionState::SynSent; + send( + self.local, + self.remote, + self.initial_tx_seq, + 0, + self.rx_window_size, + TcpFlags::SYN, + &[], + ) + .await + } + + fn handle_packet_payload(&mut self, data: &[u8], seq: u32) -> bool { + // Local side + let rx_window_start = self.rx_window_start + self.rx_buffer.len(); + let rx_window_end = self.rx_window_start + self.rx_buffer.capacity(); + // Remote side + let rx_segment_start = seq.wrapping_sub(self.initial_rx_seq) as usize; + let rx_segment_end = rx_segment_start + data.len(); + + if rx_segment_end >= rx_window_end || rx_segment_start < rx_window_start { + return false; + } + + // Offset from expected seq + let segment_start_offset = rx_segment_start - rx_window_start; + + // Push the data into reassembler buffer + assert!(segment_start_offset + data.len() <= self.rx_segment_buffer.capacity()); + if segment_start_offset + data.len() > self.rx_segment_buffer.len() { + self.rx_segment_buffer + .resize(segment_start_offset + data.len(), 0); + } + self.rx_segment_buffer[segment_start_offset..segment_start_offset + data.len()] + .copy_from_slice(data); + + let amount = self + .rx_assembler + .add_then_remove_front(segment_start_offset, data.len()) + .unwrap(); + + if amount != 0 { + // Take data from reassembly buffer and append to rx_buffer + self.rx_buffer.write(&self.rx_segment_buffer[..amount]); + self.rx_segment_buffer.drain(..amount); + self.rx_notify.wake_one(); + } + + true + } + + async fn handle_packet( + &mut self, + packet: TcpPacket, + data: &[u8], + ) -> Result { + // TODO what if window_size changes? + + match self.state { + TcpConnectionState::SynSent => { + if packet.flags == TcpFlags::SYN | TcpFlags::ACK { + if packet.ack != self.initial_tx_seq.wrapping_add(1) { + log::warn!( + "Expected ACK {}, got {}", + self.initial_tx_seq.wrapping_add(1), + packet.ack + ); + return Ok(TcpSocketBehavior::None); + } + + log::debug!( + "TCP {} -> {} got ACKed, established", + self.local, + self.remote + ); + self.initial_rx_seq = packet.seq; + + // ACK the SYN+ACK + send( + self.local, + self.remote, + self.initial_tx_seq.wrapping_add(1), + self.initial_rx_seq.wrapping_add(1), + self.rx_window_size, + TcpFlags::ACK, + &[], + ) + .await?; + + self.state = TcpConnectionState::Established; + self.rx_notify.wake_all(); + } else if packet.flags == TcpFlags::RST | TcpFlags::ACK { + log::debug!("TCP {} -> {} got RSTd, closing", self.local, self.remote); + self.state = TcpConnectionState::Closed; + self.rx_notify.wake_all(); + return Ok(TcpSocketBehavior::Remove); + } + + // TODO try re-sending SYN? + return Ok(TcpSocketBehavior::None); + } + TcpConnectionState::FinWait1 => { + // Check if connection close initiated locally got ACKed by remote + // TODO check ack/seq + if packet.flags == TcpFlags::FIN | TcpFlags::ACK { + self.state = TcpConnectionState::Closed; + send( + self.local, + self.remote, + self.seq_number() + 1, + self.ack_number() + 1, + self.rx_window_size, + TcpFlags::ACK, + &[], + ) + .await?; + + // Socket fully closed, remove from table + return Ok(TcpSocketBehavior::Remove); + } + if packet.flags == TcpFlags::ACK { + self.state = TcpConnectionState::FinWait2; + self.tx_notify.wake_all(); + } + return Ok(TcpSocketBehavior::None); + } + TcpConnectionState::FinWait2 => { + if packet.flags == TcpFlags::FIN | TcpFlags::ACK { + self.state = TcpConnectionState::Closed; + send( + self.local, + self.remote, + self.seq_number() + 1, + self.ack_number() + 1, + self.rx_window_size, + TcpFlags::ACK, + &[], + ) + .await?; + + // Socket fully closed, remove from table + return Ok(TcpSocketBehavior::Remove); + } + + return Ok(TcpSocketBehavior::None); + } + TcpConnectionState::Closed => { + log::warn!("Packet received on closed connection"); + return Ok(TcpSocketBehavior::None); + } + TcpConnectionState::SynReceived => { + // TODO check ack/seq + // Handshake continuation expected (ACK) + if packet.flags == TcpFlags::ACK { + self.state = TcpConnectionState::Established; + return Ok(TcpSocketBehavior::Accept); + } + + return Err(Error::InvalidArgument); + } + TcpConnectionState::Established => (), + } + + if self.tx_sent_unacknowledged != 0 { + let tx_acknowledge_end = packet.ack.wrapping_sub(self.initial_tx_seq) as usize; + + if tx_acknowledge_end == self.tx_window_start + self.tx_sent_unacknowledged { + self.tx_window_start += self.tx_sent_unacknowledged; + self.tx_sent_unacknowledged = 0; + self.tx_buffer.clear(); + self.tx_notify.wake_one(); + } + } + + let mut reply_flags = TcpFlags::empty(); + let mut behavior = TcpSocketBehavior::None; + + if !data.is_empty() && self.handle_packet_payload(data, packet.seq) { + reply_flags |= TcpFlags::ACK; + } + + // TODO check window resize notification + + let mut ack_number = self.ack_number(); + + if packet.flags.contains(TcpFlags::FIN) { + reply_flags |= TcpFlags::ACK; + ack_number = ack_number.wrapping_add(1); + + // Only send an actual FIN after a FIN without any data + if data.is_empty() { + reply_flags |= TcpFlags::FIN; + // TODO go to LastAck state and wait for ACK + self.state = TcpConnectionState::Closed; + log::trace!( + "TCP connection FIN requested by remote: {} <-> {}", + self.local, + self.remote + ); + + behavior = TcpSocketBehavior::Remove; + } + } + + if reply_flags != TcpFlags::empty() { + send( + packet.local, + packet.remote, + self.seq_number(), + ack_number, + self.rx_window_size, + reply_flags, + &[], + ) + .await?; + } + + Ok(behavior) + } +} + +async fn send( + local: SocketAddr, + remote: SocketAddr, + seq: u32, + ack: u32, + window_size: u16, + flags: TcpFlags, + data: &[u8], +) -> Result<(), Error> { + let (interface, source_ip, gateway_ip, destination_ip) = l3::resolve_l3_route(remote.ip())?; + // TODO TCPv6 + let source_ip = source_ip.into_ipv4().unwrap(); + let destination_ip = destination_ip.into_ipv4().unwrap(); + + let tcp_length = size_of::() + data.len(); + + let mut frame = TcpFrame { + source_port: local.port().to_network_order(), + destination_port: remote.port().to_network_order(), + sequence_number: seq.to_network_order(), + acknowledge_number: ack.to_network_order(), + data_offset: 5 << 4, + window_size: window_size.to_network_order(), + flags, + ..TcpFrame::zeroed() + }; + let pseudo_header = TcpV4PseudoHeader { + source_address: u32::from(source_ip).to_network_order(), + destination_address: u32::from(destination_ip).to_network_order(), + _zero: 0, + protocol: IpProtocol::TCP, + tcp_length: (tcp_length as u16).to_network_order(), + }; + + let mut checksum = InetChecksum::new(); + checksum.add_value(&pseudo_header, true); + checksum.add_value(&frame, true); + checksum.add_bytes(data, true); + let checksum = checksum.finish(); + + frame.checksum = checksum.to_network_order(); + + l3::send_l4_ip_resolved(&l3::L4ResolvedPacket { + interface: &interface, + source_ip: source_ip.into(), + gateway_ip, + destination_ip: destination_ip.into(), + l4_frame: bytemuck::bytes_of(&frame), + l4_options: &[], + l4_data: data, + protocol: IpProtocol::TCP, + ttl: 64, + }) + .await +} + +fn validate(source: IpAddr, destination: IpAddr, tcp_frame: &TcpFrame, data: &[u8]) -> bool { + // TODO TCPv6 + let source = source.into_ipv4().unwrap(); + let destination = destination.into_ipv4().unwrap(); + let tcp_length = size_of::() + data.len(); + + let pseudo_header = TcpV4PseudoHeader { + source_address: u32::from(source).to_network_order(), + destination_address: u32::from(destination).to_network_order(), + _zero: 0, + protocol: IpProtocol::TCP, + tcp_length: (tcp_length as u16).to_network_order(), + }; + + let mut checksum = InetChecksum::new(); + checksum.add_value(&pseudo_header, true); + checksum.add_value(tcp_frame, true); + checksum.add_bytes(data, true); + let checksum = checksum.finish(); + + checksum == 0 +} + +pub async fn handle(packet: L3Packet) -> Result<(), Error> { + if packet.data_length < size_of::() { + log::warn!("Truncated TCP packet"); + return Ok(()); + } + + let l3_data = packet.l3_data(); + + let tcp_frame: &TcpFrame = bytemuck::from_bytes(&l3_data[..size_of::()]); + let tcp_data_offset = tcp_frame.data_offset(); + let tcp_data = &l3_data[tcp_data_offset..packet.data_length]; + + let remote = SocketAddr::new( + packet.source_address, + u16::from_network_order(tcp_frame.source_port), + ); + let local = SocketAddr::new( + packet.destination_address, + u16::from_network_order(tcp_frame.destination_port), + ); + + let seq = u32::from_network_order(tcp_frame.sequence_number); + let ack = u32::from_network_order(tcp_frame.acknowledge_number); + + if !validate( + packet.source_address, + packet.destination_address, + tcp_frame, + &l3_data[size_of::()..packet.data_length], + ) { + log::warn!("Invalid TCP packet received"); + return Ok(()); + } + + match tcp_frame.flags { + TcpFlags::SYN => { + if let Some(listener) = TcpListener::get(local) { + let window_size = u16::from_network_order(tcp_frame.window_size); + let tx_seq = 12345; + + // Create a socket and insert it into the table + TcpSocket::accept_remote( + listener.clone(), + local, + remote, + window_size as usize, + tx_seq, + seq, + )?; + + // Send SYN+ACK + send( + local, + remote, + tx_seq, + seq.wrapping_add(1), + window_size, + TcpFlags::SYN | TcpFlags::ACK, + &[], + ) + .await + } else { + // RST+ACK + log::warn!("SYN {} -> {}: port not listening", remote, local); + let window_size = u16::from_network_order(tcp_frame.window_size); + send( + local, + remote, + 0, + seq.wrapping_add(1), + window_size, + TcpFlags::RST | TcpFlags::ACK, + &[], + ) + .await + } + } + _ => { + let packet = TcpPacket { + local, + remote, + window_size: u16::from_network_order(tcp_frame.window_size), + flags: tcp_frame.flags, + ack, + seq, + }; + + let socket = TcpSocket::get(local, remote).ok_or(Error::DoesNotExist)?; + let mut connection = socket.connection().write(); + match connection.handle_packet(packet, tcp_data).await? { + TcpSocketBehavior::None => (), + TcpSocketBehavior::Accept => { + socket.accept(); + } + TcpSocketBehavior::Remove => { + drop(connection); + socket.remove_socket()?; + } + } + Ok(()) + } + } +} diff --git a/kernel/driver/net/core/src/l4/udp.rs b/kernel/driver/net/core/src/l4/udp.rs new file mode 100644 index 00000000..e99fb4c8 --- /dev/null +++ b/kernel/driver/net/core/src/l4/udp.rs @@ -0,0 +1,86 @@ +use core::mem::size_of; + +use yggdrasil_abi::{ + error::Error, + net::{ + protocols::{IpProtocol, UdpFrame}, + types::NetValueImpl, + IpAddr, SocketAddr, + }, +}; + +use crate::{l3, socket::UdpSocket, L3Packet}; + +pub async fn send( + source_port: u16, + destination_ip: IpAddr, + destination_port: u16, + ttl: u8, + data: &[u8], +) -> Result<(), Error> { + let length: u16 = (data.len() + size_of::()).try_into().unwrap(); + let udp_frame = UdpFrame { + source_port: source_port.to_network_order(), + destination_port: destination_port.to_network_order(), + length: length.to_network_order(), + checksum: 0u16.to_network_order(), + }; + + l3::send_l4_ip(&l3::L4UnresolvedPacket { + destination_ip, + l4_frame: bytemuck::bytes_of(&udp_frame), + l4_options: &[], + l4_data: data, + protocol: IpProtocol::UDP, + ttl, + }) + .await +} + +// pub fn send_broadcast( +// v6: bool, +// source_port: u16, +// destination_port: u16, +// data: &[u8], +// ) -> Result<(), Error> { +// let length: u16 = (data.len() + size_of::()).try_into().unwrap(); +// let udp_frame = UdpFrame { +// source_port: source_port.to_network_order(), +// destination_port: destination_port.to_network_order(), +// length: length.to_network_order(), +// checksum: 0u16.to_network_order(), +// }; +// +// l3::send_l4_ip_broadcast(v6, IpProtocol::UDP, &udp_frame, data) +// } + +pub fn handle(l3_packet: L3Packet) -> Result<(), Error> { + if l3_packet.data_length < size_of::() { + log::warn!("Truncated UDP frame received"); + return Err(Error::MissingData); + } + + let l3_data = l3_packet.l3_data(); + + let udp_frame: &UdpFrame = bytemuck::from_bytes(&l3_data[..size_of::()]); + let data_size = core::cmp::min( + udp_frame.data_length(), + l3_packet.data_length - size_of::(), + ); + let udp_data = &l3_data[size_of::()..data_size + size_of::()]; + + let source = SocketAddr::new( + l3_packet.source_address, + u16::from_network_order(udp_frame.source_port), + ); + let destination = SocketAddr::new( + l3_packet.destination_address, + u16::from_network_order(udp_frame.destination_port), + ); + + if let Some(socket) = UdpSocket::get(&destination) { + socket.packet_received(source, udp_data).ok(); + } + + Ok(()) +} diff --git a/kernel/driver/net/core/src/lib.rs b/kernel/driver/net/core/src/lib.rs new file mode 100644 index 00000000..f3d50deb --- /dev/null +++ b/kernel/driver/net/core/src/lib.rs @@ -0,0 +1,143 @@ +#![feature(map_try_insert)] +#![allow(clippy::type_complexity)] +#![no_std] + +extern crate alloc; + +use core::mem::size_of; + +use alloc::sync::Arc; +use bytemuck::Pod; +use ethernet::L2Packet; +use l3::L3Packet; +use libk_mm::PageBox; +use libk_thread::runtime; +use libk_util::queue::UnboundedMpmcQueue; +use yggdrasil_abi::{error::Error, net::protocols::EthernetFrame}; + +pub mod ethernet; +pub mod l3; +pub mod l4; + +pub mod socket; + +pub mod config; +pub mod interface; +pub mod util; + +pub use interface::register_interface; + +pub struct Packet { + // TODO info about "received" interface + buffer: PageBox<[u8]>, + offset: usize, + iface: u32, +} + +pub struct PacketBuilder { + data: PageBox<[u8]>, + pos: usize, + len: usize, +} + +impl PacketBuilder { + pub fn new(l2_offset: usize, l2_size: usize) -> Result { + let data = PageBox::new_slice(0, l2_offset + l2_size)?; + Ok(Self { + data, + pos: l2_offset, + len: l2_offset, + }) + } + + #[inline] + pub fn push(&mut self, value: &T) -> Result<(), Error> { + self.push_bytes(bytemuck::bytes_of(value)) + } + + pub fn push_bytes(&mut self, bytes: &[u8]) -> Result<(), Error> { + if self.pos + bytes.len() > self.data.len() { + return Err(Error::OutOfMemory); + } + self.data[self.pos..self.pos + bytes.len()].copy_from_slice(bytes); + self.pos += bytes.len(); + Ok(()) + } + + pub fn finish(self) -> (PageBox<[u8]>, usize) { + (self.data, self.len) + } +} + +impl Packet { + #[inline] + pub fn new(buffer: PageBox<[u8]>, offset: usize, iface: u32) -> Self { + Self { + buffer, + offset, + iface, + } + } +} + +static PACKET_QUEUE: UnboundedMpmcQueue = UnboundedMpmcQueue::new(); +static ACCEPT_QUEUE: UnboundedMpmcQueue = UnboundedMpmcQueue::new(); + +#[inline] +pub fn receive_packet(packet: Packet) -> Result<(), Error> { + PACKET_QUEUE.push_back(packet); + Ok(()) +} + +pub fn start_network_tasks() -> Result<(), Error> { + runtime::spawn(l2_packet_handler_worker())?; + for _ in 0..4 { + runtime::spawn(l3_accept_worker())?; + } + runtime::spawn(config::network_config_service())?; + + Ok(()) +} + +async fn l2_packet_handler_worker() { + loop { + let packet = PACKET_QUEUE.pop_front().await; + + let eth_frame: &EthernetFrame = bytemuck::from_bytes( + &packet.buffer[packet.offset..packet.offset + size_of::()], + ); + + let l2_packet = L2Packet { + interface_id: packet.iface, + + source_address: eth_frame.source_mac, + destination_address: eth_frame.destination_mac, + + l2_offset: packet.offset, + l3_offset: packet.offset + size_of::(), + + data: Arc::new(packet.buffer), + }; + + ethernet::handle(l2_packet); + } +} + +async fn l3_accept_worker() { + loop { + let l3_packet = ACCEPT_QUEUE.pop_front().await; + + // log::debug!( + // "INPUT {} {}:{:?} -> {}:{:?}: ACCEPT", + // l3_packet.protocol, + // l3_packet.source_address, + // l3_packet.source_port, + // l3_packet.destination_address, + // l3_packet.destination_port + // ); + + if let Err(error) = l3::handle_accepted(l3_packet).await { + log::error!("L3 handle error: {:?}", error); + } + } +} diff --git a/kernel/driver/net/core/src/socket.rs b/kernel/driver/net/core/src/socket.rs new file mode 100644 index 00000000..23416657 --- /dev/null +++ b/kernel/driver/net/core/src/socket.rs @@ -0,0 +1,827 @@ +use core::{ + future::{poll_fn, Future}, + pin::Pin, + sync::atomic::{AtomicBool, AtomicU32, Ordering}, + task::{Context, Poll}, + time::Duration, +}; + +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use libk_device::monotonic_timestamp; +use libk_mm::PageBox; +use libk_thread::{ + block, + runtime::{run_with_timeout, FutureTimeout}, +}; +use libk_util::{ + queue::BoundedMpmcQueue, + sync::{ + spin_rwlock::{IrqSafeRwLock, IrqSafeRwLockWriteGuard}, + IrqSafeSpinlock, IrqSafeSpinlockGuard, + }, + waker::QueueWaker, +}; +use vfs::{ConnectionSocket, FileReadiness, ListenerSocket, PacketSocket, Socket}; +use yggdrasil_abi::{ + error::Error, + net::{IpAddr, Ipv4Addr, SocketAddr, SocketAddrV4, SocketInterfaceQuery, SocketOption}, +}; + +use crate::{ + ethernet::L2Packet, + interface::NetworkInterface, + l3::Route, + l4::{ + self, + tcp::{TcpConnection, TcpConnectionState}, + }, +}; + +pub struct UdpSocket { + local: SocketAddr, + remote: Option, + + broadcast: AtomicBool, + + // TODO just place packets here for one less copy? + receive_queue: BoundedMpmcQueue<(SocketAddr, Vec)>, +} + +pub struct TcpSocket { + pub(crate) local: SocketAddr, + pub(crate) remote: SocketAddr, + // Listener which accepted the socket + listener: Option>, + connection: IrqSafeRwLock, +} + +pub struct TcpListener { + accept: SocketAddr, + + // Currently active sockets + sockets: IrqSafeRwLock>>, + pending_accept: IrqSafeSpinlock>>, + accept_notify: QueueWaker, +} + +pub struct RawSocket { + id: u32, + bound: IrqSafeSpinlock>, + receive_queue: BoundedMpmcQueue, +} + +pub struct SocketTable { + inner: BTreeMap>, +} + +pub struct TwoWaySocketTable { + inner: BTreeMap<(SocketAddr, SocketAddr), Arc>, +} + +impl TwoWaySocketTable { + pub const fn new() -> Self { + Self { + inner: BTreeMap::new(), + } + } + + pub fn try_insert_with Result, Error>>( + &mut self, + local: SocketAddr, + remote: SocketAddr, + with: F, + ) -> Result, Error> { + if self.inner.contains_key(&(local, remote)) { + return Err(Error::AddrInUse); + } + let socket = with()?; + self.inner.insert((local, remote), socket.clone()); + Ok(socket) + } + + pub fn try_insert_with_ephemeral_port Result, Error>>( + &mut self, + local: IpAddr, + remote: SocketAddr, + mut with: F, + ) -> Result, Error> { + for port in 32768..u16::MAX - 1 { + let local = SocketAddr::new(local, port); + + match self.try_insert_with(local, remote, || with(port)) { + Ok(socket) => return Ok(socket), + Err(Error::AddrInUse) => continue, + Err(error) => return Err(error), + } + } + Err(Error::AddrInUse) + } + + pub fn remove(&mut self, local: SocketAddr, remote: SocketAddr) -> Result<(), Error> { + match self.inner.remove(&(local, remote)) { + Some(_) => Ok(()), + None => Err(Error::DoesNotExist), + } + } + + pub fn get(&self, local: SocketAddr, remote: SocketAddr) -> Option> { + self.inner.get(&(local, remote)).cloned() + } +} + +impl SocketTable { + pub const fn new() -> Self { + Self { + inner: BTreeMap::new(), + } + } + + pub fn try_insert_with_ephemeral_port Result, Error>>( + &mut self, + local: IpAddr, + mut with: F, + ) -> Result, Error> { + for port in 32768..u16::MAX - 1 { + let local = SocketAddr::new(local, port); + + match self.try_insert_with(local, || with(port)) { + Ok(socket) => return Ok(socket), + Err(Error::AddrInUse) => continue, + Err(error) => return Err(error), + } + } + Err(Error::AddrInUse) + } + + pub fn try_insert_with Result, Error>>( + &mut self, + address: SocketAddr, + with: F, + ) -> Result, Error> { + if self.inner.contains_key(&address) { + return Err(Error::AddrInUse); + } + let socket = with()?; + self.inner.insert(address, socket.clone()); + Ok(socket) + } + + pub fn remove(&mut self, local: SocketAddr) -> Result<(), Error> { + match self.inner.remove(&local) { + Some(_) => Ok(()), + None => Err(Error::DoesNotExist), + } + } + + pub fn get_exact(&self, local: &SocketAddr) -> Option> { + self.inner.get(local).cloned() + } + + pub fn get(&self, local: &SocketAddr) -> Option> { + if let Some(socket) = self.inner.get(local) { + return Some(socket.clone()); + } + + match local { + SocketAddr::V4(_v4) => { + let unspec_v4 = SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, local.port()); + self.inner.get(&SocketAddr::V4(unspec_v4)).cloned() + } + SocketAddr::V6(_) => todo!(), + } + } +} + +static UDP_SOCKETS: IrqSafeRwLock> = IrqSafeRwLock::new(SocketTable::new()); +static TCP_SOCKETS: IrqSafeRwLock> = + IrqSafeRwLock::new(TwoWaySocketTable::new()); +static RAW_SOCKET_ID: AtomicU32 = AtomicU32::new(0); +static RAW_SOCKETS: IrqSafeRwLock>> = + IrqSafeRwLock::new(BTreeMap::new()); +static BOUND_RAW_SOCKETS: IrqSafeRwLock>> = + IrqSafeRwLock::new(BTreeMap::new()); +static TCP_LISTENERS: IrqSafeRwLock> = + IrqSafeRwLock::new(SocketTable::new()); + +impl UdpSocket { + fn create_socket(local: SocketAddr) -> Arc { + log::debug!("UDP socket opened: {}", local); + Arc::new(UdpSocket { + local, + remote: None, + broadcast: AtomicBool::new(false), + receive_queue: BoundedMpmcQueue::new(128), + }) + } + + pub fn bind(address: SocketAddr) -> Result, Error> { + let mut sockets = UDP_SOCKETS.write(); + if address.port() == 0 { + sockets.try_insert_with_ephemeral_port(address.ip(), |port| { + Ok(Self::create_socket(SocketAddr::new(address.ip(), port))) + }) + } else { + sockets.try_insert_with(address, move || Ok(Self::create_socket(address))) + } + } + + pub fn connect(&self, _address: SocketAddr) -> Result<(), Error> { + todo!() + } + + pub fn get(local: &SocketAddr) -> Option> { + UDP_SOCKETS.read().get(local) + } + + pub fn packet_received(&self, source: SocketAddr, data: &[u8]) -> Result<(), Error> { + self.receive_queue + .try_push_back((source, Vec::from(data))) + .map_err(|_| Error::QueueFull) + } +} + +impl FileReadiness for UdpSocket { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.receive_queue.poll_not_empty(cx).map(Ok) + } +} + +impl PacketSocket for UdpSocket { + fn send(&self, destination: Option, data: &[u8]) -> Result { + let Some(destination) = destination else { + // TODO can still send without setting address if "connected" + return Err(Error::InvalidArgument); + }; + // TODO check that destnation family matches self family + match (self.broadcast.load(Ordering::Relaxed), destination.ip()) { + // SendTo in broadcast? + (true, _) => todo!(), + (false, _) => { + block!( + l4::udp::send( + self.local.port(), + destination.ip(), + destination.port(), + 64, + data, + ) + .await + )??; + } + } + + Ok(data.len()) + } + + fn receive(&self, buffer: &mut [u8]) -> Result<(SocketAddr, usize), Error> { + let (source, data) = block!(self.receive_queue.pop_front().await)?; + if data.len() > buffer.len() { + // TODO check how other OSs handle this + return Err(Error::BufferTooSmall); + } + buffer[..data.len()].copy_from_slice(&data); + Ok((source, data.len())) + } +} + +impl Socket for UdpSocket { + fn local_address(&self) -> SocketAddr { + self.local + } + + fn remote_address(&self) -> Option { + self.remote + } + + fn close(&self) -> Result<(), Error> { + log::debug!("UDP socket closed: {}", self.local); + UDP_SOCKETS.write().remove(self.local) + } + + fn set_option(&self, option: &SocketOption) -> Result<(), Error> { + match option { + &SocketOption::Broadcast(broadcast) => { + log::debug!("{} broadcast: {}", self.local, broadcast); + self.broadcast.store(broadcast, Ordering::Relaxed); + Ok(()) + } + _ => Err(Error::InvalidOperation), + } + } + + fn get_option(&self, option: &mut SocketOption) -> Result<(), Error> { + match option { + SocketOption::Broadcast(broadcast) => { + *broadcast = self.broadcast.load(Ordering::Relaxed); + Ok(()) + } + _ => Err(Error::InvalidOperation), + } + } +} + +impl RawSocket { + pub fn bind() -> Result, Error> { + let id = RAW_SOCKET_ID.fetch_add(1, Ordering::SeqCst); + let socket = Self { + id, + bound: IrqSafeSpinlock::new(None), + receive_queue: BoundedMpmcQueue::new(256), + }; + let socket = Arc::new(socket); + RAW_SOCKETS.write().insert(id, socket.clone()); + + Ok(socket) + } + + fn bound_packet_received(&self, packet: L2Packet) { + // TODO do something with the dropped packet? + self.receive_queue.try_push_back(packet).ok(); + } + + pub fn packet_received(packet: L2Packet) { + let bound_sockets = BOUND_RAW_SOCKETS.read(); + let raw_sockets = RAW_SOCKETS.read(); + + if let Some(ids) = bound_sockets.get(&packet.interface_id) { + for id in ids { + let socket = raw_sockets.get(id).unwrap(); + socket.bound_packet_received(packet.clone()); + } + } + } +} + +impl FileReadiness for RawSocket { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.receive_queue.poll_not_empty(cx).map(Ok) + } +} + +impl Socket for RawSocket { + fn get_option(&self, option: &mut SocketOption) -> Result<(), Error> { + match option { + SocketOption::BoundHardwareAddress(mac) => { + let bound = self.bound.lock().ok_or(Error::DoesNotExist)?; + let interface = NetworkInterface::get(bound).unwrap(); + *mac = interface.mac; + Ok(()) + } + _ => Err(Error::InvalidOperation), + } + } + + fn set_option(&self, option: &SocketOption) -> Result<(), Error> { + match option { + SocketOption::BindInterface(query) => { + let mut bound = self.bound.lock(); + if bound.is_some() { + return Err(Error::AlreadyExists); + } + + let mut bound_sockets = BOUND_RAW_SOCKETS.write(); + + let interface = match *query { + SocketInterfaceQuery::ById(id) => NetworkInterface::get(id), + SocketInterfaceQuery::ByName(name) => NetworkInterface::query_by_name(name), + }?; + + let list = bound_sockets.entry(interface.id).or_default(); + bound.replace(interface.id); + list.push(self.id); + + Ok(()) + } + SocketOption::UnbindInterface => todo!(), + _ => Err(Error::InvalidOperation), + } + } + + fn close(&self) -> Result<(), Error> { + let bound = self.bound.lock().take(); + + if let Some(bound) = bound { + let mut bound_sockets = BOUND_RAW_SOCKETS.write(); + let mut clear = false; + + if let Some(list) = bound_sockets.get_mut(&bound) { + list.retain(|&item| item != self.id); + clear = list.is_empty(); + } + + if clear { + bound_sockets.remove(&bound); + } + } + + RAW_SOCKETS.write().remove(&self.id).unwrap(); + + Ok(()) + } + + fn local_address(&self) -> SocketAddr { + SocketAddr::V4(SocketAddrV4::new(Ipv4Addr::UNSPECIFIED, 0)) + } + + fn remote_address(&self) -> Option { + None + } +} + +impl PacketSocket for RawSocket { + fn send(&self, _destination: Option, data: &[u8]) -> Result { + // TODO cap by MTU? + let bound = self.bound.lock().ok_or(Error::InvalidOperation)?; + let interface = NetworkInterface::get(bound).unwrap(); + let l2_offset = interface.device.packet_prefix_size(); + if data.len() > 4096 - l2_offset { + return Err(Error::InvalidArgument); + } + let mut packet = PageBox::new_slice(0, l2_offset + data.len())?; + packet[l2_offset..l2_offset + data.len()].copy_from_slice(data); + interface.device.transmit(packet)?; + Ok(data.len()) + } + + fn receive(&self, buffer: &mut [u8]) -> Result<(SocketAddr, usize), Error> { + let data = block!(self.receive_queue.pop_front().await)?; + let full_len = data.data.len(); + let len = full_len - data.l2_offset; + if buffer.len() < len { + return Err(Error::BufferTooSmall); + } + buffer[..len].copy_from_slice(&data.data[data.l2_offset..full_len]); + Ok((SocketAddr::NULL_V4, len)) + } +} + +impl TcpSocket { + pub fn connect(remote: SocketAddr) -> Result<(SocketAddr, Arc), Error> { + block!(Self::connect_async(remote).await)? + } + + pub fn accept_remote( + listener: Arc, + local: SocketAddr, + remote: SocketAddr, + remote_window_size: usize, + tx_seq: u32, + rx_seq: u32, + ) -> Result, Error> { + let mut sockets = TCP_SOCKETS.write(); + sockets.try_insert_with(local, remote, move || { + let connection = TcpConnection::new( + local, + remote, + remote_window_size, + tx_seq, + rx_seq, + TcpConnectionState::SynReceived, + ); + + log::debug!("Accepted TCP socket {} -> {}", local, remote); + + let socket = Self { + local, + remote, + listener: Some(listener), + connection: IrqSafeRwLock::new(connection), + }; + + Ok(Arc::new(socket)) + }) + } + + pub fn connection(&self) -> &IrqSafeRwLock { + &self.connection + } + + pub(crate) fn accept(self: &Arc) { + if let Some(listener) = self.listener.as_ref() { + listener.accept_socket(self.clone()); + } + } + + pub fn get(local: SocketAddr, remote: SocketAddr) -> Option> { + TCP_SOCKETS.read().get(local, remote) + } + + pub fn receive_async<'a>( + &'a self, + buffer: &'a mut [u8], + ) -> impl Future> + 'a { + // TODO timeout here + // TODO don't throw ConnectionReset immediately + struct F<'f> { + socket: &'f TcpSocket, + buffer: &'f mut [u8], + } + + impl<'f> Future for F<'f> { + type Output = Result; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + match self.socket.poll_receive(cx) { + Poll::Ready(Ok(mut lock)) => Poll::Ready(lock.read_nonblocking(self.buffer)), + Poll::Ready(Err(error)) => Poll::Ready(Err(error)), + Poll::Pending => Poll::Pending, + } + } + } + + F { + socket: self, + buffer, + } + } + + pub async fn send_async(&self, data: &[u8]) -> Result { + let mut pos = 0; + let mut rem = data.len(); + while rem != 0 { + // TODO check MTU + let amount = rem.min(512); + self.send_segment_async(&data[pos..pos + amount]).await?; + pos += amount; + rem -= amount; + } + Ok(pos) + } + + pub async fn close_async(&self, remove_from_listener: bool) -> Result<(), Error> { + // TODO timeout here + // Already closing + if self.connection.read().is_closing() { + return Ok(()); + } + + // Wait for all sent data to be acknowledged + { + let mut connection = poll_fn(|cx| { + let connection = self.connection.write(); + match connection.poll_send(cx) { + Poll::Ready(Ok(())) => Poll::Ready(Ok(connection)), + Poll::Ready(Err(error)) => Poll::Ready(Err(error)), + Poll::Pending => Poll::Pending, + } + }) + .await?; + + connection.finish().await?; + } + + log::debug!( + "TCP socket closed (FinWait2/Closed): {} <-> {}", + self.local, + self.remote + ); + + // Wait for connection to get closed + poll_fn(|cx| { + let connection = self.connection.read(); + connection.poll_finish(cx) + }) + .await; + + if remove_from_listener { + if let Some(listener) = self.listener.as_ref() { + listener.remove_socket(self.remote); + }; + } + + Ok(()) + } + + pub(crate) fn remove_socket(&self) -> Result<(), Error> { + log::debug!( + "TCP socket closed and removed: {} <-> {}", + self.local, + self.remote + ); + let connection = self.connection.read(); + debug_assert!(connection.is_closed()); + TCP_SOCKETS.write().remove(self.local, self.remote)?; + connection.notify_all(); + Ok(()) + } + + fn poll_receive( + &self, + cx: &mut Context<'_>, + ) -> Poll, Error>> { + let lock = self.connection.write(); + match lock.poll_receive(cx) { + Poll::Ready(Ok(())) => Poll::Ready(Ok(lock)), + Poll::Ready(Err(error)) => Poll::Ready(Err(error)), + Poll::Pending => Poll::Pending, + } + } + + async fn send_segment_async(&self, data: &[u8]) -> Result<(), Error> { + // TODO timeout here + { + let mut connection = poll_fn(|cx| { + let connection = self.connection.write(); + match connection.poll_send(cx) { + Poll::Ready(Ok(())) => Poll::Ready(Ok(connection)), + Poll::Ready(Err(error)) => Poll::Ready(Err(error)), + Poll::Pending => Poll::Pending, + } + }) + .await?; + + connection.transmit(data).await?; + } + + poll_fn(|cx| { + let connection = self.connection.read(); + connection.poll_acknowledge(cx) + }) + .await; + + Ok(()) + } + + async fn connect_async(remote: SocketAddr) -> Result<(SocketAddr, Arc), Error> { + // Lookup route to remote + let (interface_id, _, remote_ip) = + Route::lookup(remote.ip()).ok_or(Error::HostUnreachable)?; + let remote = SocketAddr::new(remote_ip, remote.port()); + let interface = NetworkInterface::get(interface_id)?; + let local_ip = interface.address.read().ok_or(Error::NetworkUnreachable)?; + + let socket = { + let mut sockets = TCP_SOCKETS.write(); + sockets.try_insert_with_ephemeral_port(local_ip, remote, |port| { + let t = monotonic_timestamp()?; + let tx_seq = t.as_micros() as u32; + let local = SocketAddr::new(local_ip, port); + let connection = + TcpConnection::new(local, remote, 16384, tx_seq, 0, TcpConnectionState::Closed); + + let socket = Self { + local, + remote, + listener: None, + connection: IrqSafeRwLock::new(connection), + }; + + Ok(Arc::new(socket)) + })? + }; + + let mut t = 200; + for _ in 0..5 { + let timeout = Duration::from_millis(t); + log::debug!("Try SYN with timeout={:?}", timeout); + match socket.try_connect(timeout).await { + Ok(()) => return Ok((socket.local, socket)), + Err(Error::TimedOut) => (), + Err(error) => return Err(error), + } + t *= 2; + } + + // Couldn't establish + Err(Error::TimedOut) + } + + async fn try_connect(&self, timeout: Duration) -> Result<(), Error> { + { + let mut connection = self.connection.write(); + connection.send_syn().await?; + } + + let fut = poll_fn(|cx| { + let connection = self.connection.read(); + connection.poll_established(cx) + }); + + match run_with_timeout(timeout, fut).await { + FutureTimeout::Ok(value) => value, + FutureTimeout::Timeout => Err(Error::TimedOut), + } + } +} + +impl Socket for TcpSocket { + fn local_address(&self) -> SocketAddr { + self.local + } + + fn remote_address(&self) -> Option { + Some(self.remote) + } + + fn close(&self) -> Result<(), Error> { + block!(self.close_async(true).await)? + } +} + +impl FileReadiness for TcpSocket { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.poll_receive(cx).map_ok(|_| ()) + } +} + +impl ConnectionSocket for TcpSocket { + fn receive(&self, buffer: &mut [u8]) -> Result { + block!(self.receive_async(buffer).await)? + } + + fn send(&self, data: &[u8]) -> Result { + block!(self.send_async(data).await)? + } +} + +impl TcpListener { + pub fn bind(accept: SocketAddr) -> Result, Error> { + TCP_LISTENERS.write().try_insert_with(accept, || { + let listener = TcpListener { + accept, + sockets: IrqSafeRwLock::new(BTreeMap::new()), + pending_accept: IrqSafeSpinlock::new(Vec::new()), + accept_notify: QueueWaker::new(), + }; + + log::debug!("TCP Listener opened: {}", accept); + + Ok(Arc::new(listener)) + }) + } + + pub fn get(local: SocketAddr) -> Option> { + TCP_LISTENERS.read().get(&local) + } + + pub fn accept_async(&self) -> impl Future, Error>> + '_ { + struct F<'f> { + listener: &'f TcpListener, + } + + impl<'f> Future for F<'f> { + type Output = Result, Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + match self.listener.poll_accept(cx) { + Poll::Ready(mut lock) => Poll::Ready(Ok(lock.pop().unwrap())), + Poll::Pending => Poll::Pending, + } + } + } + + F { listener: self } + } + + fn accept_socket(&self, socket: Arc) { + log::debug!("{}: accept {}", self.accept, socket.remote); + self.sockets.write().insert(socket.remote, socket.clone()); + self.pending_accept.lock().push(socket); + self.accept_notify.wake_one(); + } + + fn remove_socket(&self, remote: SocketAddr) { + log::debug!("Remove client {}", remote); + self.sockets.write().remove(&remote); + } + + fn poll_accept(&self, cx: &mut Context<'_>) -> Poll>>> { + let lock = self.pending_accept.lock(); + self.accept_notify.register(cx.waker()); + if !lock.is_empty() { + self.accept_notify.remove(cx.waker()); + Poll::Ready(lock) + } else { + Poll::Pending + } + } +} + +impl Socket for TcpListener { + fn local_address(&self) -> SocketAddr { + self.accept + } + + fn remote_address(&self) -> Option { + None + } + + fn close(&self) -> Result<(), Error> { + // TODO if clients not closed already, send RST? + TCP_LISTENERS.write().remove(self.accept) + } +} + +impl FileReadiness for TcpListener { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.poll_accept(cx).map(|_| Ok(())) + } +} + +impl ListenerSocket for TcpListener { + fn accept(&self) -> Result<(SocketAddr, Arc), Error> { + let socket = block!(self.accept_async().await)??; + let remote = socket.remote; + Ok((remote, socket)) + } +} diff --git a/kernel/driver/net/core/src/util.rs b/kernel/driver/net/core/src/util.rs new file mode 100644 index 00000000..1d012c69 --- /dev/null +++ b/kernel/driver/net/core/src/util.rs @@ -0,0 +1,292 @@ +// This TCP reassembler was taken from smoltcp-rs/smoltcp: +// +// https://github.com/smoltcp-rs/smoltcp + +use core::fmt; + +pub const ASSEMBLER_MAX_SEGMENT_COUNT: usize = 32; + +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +pub struct TooManyHolesError; + +impl fmt::Display for TooManyHolesError { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "too many holes") + } +} + +/// A contiguous chunk of absent data, followed by a contiguous chunk of present data. +#[derive(Debug, Clone, Copy, PartialEq, Eq)] +struct Contig { + hole_size: usize, + data_size: usize, +} + +impl fmt::Display for Contig { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + if self.has_hole() { + write!(f, "({})", self.hole_size)?; + } + if self.has_hole() && self.has_data() { + write!(f, " ")?; + } + if self.has_data() { + write!(f, "{}", self.data_size)?; + } + Ok(()) + } +} + +impl Contig { + const fn empty() -> Contig { + Contig { + hole_size: 0, + data_size: 0, + } + } + + fn hole_and_data(hole_size: usize, data_size: usize) -> Contig { + Contig { + hole_size, + data_size, + } + } + + fn has_hole(&self) -> bool { + self.hole_size != 0 + } + + fn has_data(&self) -> bool { + self.data_size != 0 + } + + fn total_size(&self) -> usize { + self.hole_size + self.data_size + } + + fn shrink_hole_by(&mut self, size: usize) { + self.hole_size -= size; + } + + fn shrink_hole_to(&mut self, size: usize) { + debug_assert!(self.hole_size >= size); + + let total_size = self.total_size(); + self.hole_size = size; + self.data_size = total_size - size; + } +} + +/// A buffer (re)assembler. +/// +/// Currently, up to a hardcoded limit of 4 or 32 holes can be tracked in the buffer. +#[derive(Debug, PartialEq, Eq, Clone)] +pub struct Assembler { + contigs: [Contig; ASSEMBLER_MAX_SEGMENT_COUNT], +} + +impl fmt::Display for Assembler { + fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result { + write!(f, "[ ")?; + for contig in self.contigs.iter() { + if !contig.has_data() { + break; + } + write!(f, "{contig} ")?; + } + write!(f, "]")?; + Ok(()) + } +} + +// Invariant on Assembler::contigs: +// - There's an index `i` where all contigs before have data, and all contigs after don't (are unused). +// - All contigs with data must have hole_size != 0, except the first. + +impl Assembler { + /// Create a new buffer assembler. + pub const fn new() -> Assembler { + const EMPTY: Contig = Contig::empty(); + Assembler { + contigs: [EMPTY; ASSEMBLER_MAX_SEGMENT_COUNT], + } + } + + pub fn clear(&mut self) { + self.contigs.fill(Contig::empty()); + } + + fn front(&self) -> Contig { + self.contigs[0] + } + + /// Return length of the front contiguous range without removing it from the assembler + pub fn peek_front(&self) -> usize { + let front = self.front(); + if front.has_hole() { + 0 + } else { + front.data_size + } + } + + fn back(&self) -> Contig { + self.contigs[self.contigs.len() - 1] + } + + /// Return whether the assembler contains no data. + pub fn is_empty(&self) -> bool { + !self.front().has_data() + } + + /// Remove a contig at the given index. + fn remove_contig_at(&mut self, at: usize) { + debug_assert!(self.contigs[at].has_data()); + + for i in at..self.contigs.len() - 1 { + if !self.contigs[i].has_data() { + return; + } + self.contigs[i] = self.contigs[i + 1]; + } + + // Removing the last one. + self.contigs[self.contigs.len() - 1] = Contig::empty(); + } + + /// Add a contig at the given index, and return a pointer to it. + fn add_contig_at(&mut self, at: usize) -> Result<&mut Contig, TooManyHolesError> { + if self.back().has_data() { + return Err(TooManyHolesError); + } + + for i in (at + 1..self.contigs.len()).rev() { + self.contigs[i] = self.contigs[i - 1]; + } + + self.contigs[at] = Contig::empty(); + Ok(&mut self.contigs[at]) + } + + /// Add a new contiguous range to the assembler, + /// or return `Err(TooManyHolesError)` if too many discontinuities are already recorded. + pub fn add(&mut self, mut offset: usize, size: usize) -> Result<(), TooManyHolesError> { + if size == 0 { + return Ok(()); + } + + let mut i = 0; + + // Find index of the contig containing the start of the range. + loop { + if i == self.contigs.len() { + // The new range is after all the previous ranges, but there/s no space to add it. + return Err(TooManyHolesError); + } + let contig = &mut self.contigs[i]; + if !contig.has_data() { + // The new range is after all the previous ranges. Add it. + *contig = Contig::hole_and_data(offset, size); + return Ok(()); + } + if offset <= contig.total_size() { + break; + } + offset -= contig.total_size(); + i += 1; + } + + let contig = &mut self.contigs[i]; + if offset < contig.hole_size { + // Range starts within the hole. + + if offset + size < contig.hole_size { + // Range also ends within the hole. + let new_contig = self.add_contig_at(i)?; + new_contig.hole_size = offset; + new_contig.data_size = size; + + // Previous contigs[index] got moved to contigs[index+1] + self.contigs[i + 1].shrink_hole_by(offset + size); + return Ok(()); + } + + // The range being added covers both a part of the hole and a part of the data + // in this contig, shrink the hole in this contig. + contig.shrink_hole_to(offset); + } + + // coalesce contigs to the right. + let mut j = i + 1; + while j < self.contigs.len() + && self.contigs[j].has_data() + && offset + size >= self.contigs[i].total_size() + self.contigs[j].hole_size + { + self.contigs[i].data_size += self.contigs[j].total_size(); + j += 1; + } + let shift = j - i - 1; + if shift != 0 { + for x in i + 1..self.contigs.len() { + if !self.contigs[x].has_data() { + break; + } + + self.contigs[x] = self + .contigs + .get(x + shift) + .copied() + .unwrap_or_else(Contig::empty); + } + } + + if offset + size > self.contigs[i].total_size() { + // The added range still extends beyond the current contig. Increase data size. + let left = offset + size - self.contigs[i].total_size(); + self.contigs[i].data_size += left; + + // Decrease hole size of the next, if any. + if i + 1 < self.contigs.len() && self.contigs[i + 1].has_data() { + self.contigs[i + 1].hole_size -= left; + } + } + + Ok(()) + } + + /// Remove a contiguous range from the front of the assembler. + /// If no such range, return 0. + pub fn remove_front(&mut self) -> usize { + let front = self.front(); + if front.has_hole() || !front.has_data() { + 0 + } else { + self.remove_contig_at(0); + debug_assert!(front.data_size > 0); + front.data_size + } + } + + /// Add a segment, then remove_front. + /// + /// This is equivalent to calling `add` then `remove_front` individually, + /// except it's guaranteed to not fail when offset = 0. + /// This is required for TCP: we must never drop the next expected segment, or + /// the protocol might get stuck. + pub fn add_then_remove_front( + &mut self, + offset: usize, + size: usize, + ) -> Result { + // This is the only case where a segment at offset=0 would cause the + // total amount of contigs to rise (and therefore can potentially cause + // a TooManyHolesError). Handle it in a way that is guaranteed to succeed. + if offset == 0 && size < self.contigs[0].hole_size { + self.contigs[0].hole_size -= size; + return Ok(size); + } + + self.add(offset, size)?; + Ok(self.remove_front()) + } +} diff --git a/kernel/driver/net/loopback/Cargo.toml b/kernel/driver/net/loopback/Cargo.toml new file mode 100644 index 00000000..d7e37a79 --- /dev/null +++ b/kernel/driver/net/loopback/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "ygg_driver_net_loopback" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../../libk/libk-util" } +libk-mm = { path = "../../../libk/libk-mm" } + +ygg_driver_net_core = { path = "../../net/core" } + +bytemuck = { version = "1.14.0", features = ["derive"] } diff --git a/kernel/driver/net/loopback/src/lib.rs b/kernel/driver/net/loopback/src/lib.rs new file mode 100644 index 00000000..a422da34 --- /dev/null +++ b/kernel/driver/net/loopback/src/lib.rs @@ -0,0 +1,41 @@ +#![no_std] + +use libk_mm::PageBox; +use libk_util::OneTimeInit; +use ygg_driver_net_core::{ + interface::{NetworkDevice, NetworkInterfaceType}, + Packet, +}; +use yggdrasil_abi::{ + error::Error, + net::{IpAddr, Ipv4Addr, MacAddress}, +}; + +struct LoopbackDevice; + +impl NetworkDevice for LoopbackDevice { + fn transmit(&self, packet: PageBox<[u8]>) -> Result<(), Error> { + ygg_driver_net_core::receive_packet(Packet::new(packet, 0, *LOOPBACK_ID.get())) + } + + fn packet_prefix_size(&self) -> usize { + 0 + } + + fn read_hardware_address(&self) -> MacAddress { + MacAddress::UNSPECIFIED + } +} + +static LOOPBACK: OneTimeInit = OneTimeInit::new(); +static LOOPBACK_ID: OneTimeInit = OneTimeInit::new(); + +pub fn init() { + let loopback = LOOPBACK.init(LoopbackDevice); + let interface = + ygg_driver_net_core::register_interface(NetworkInterfaceType::Loopback, loopback); + + LOOPBACK_ID.init(interface.id()); + + interface.set_address(IpAddr::V4(Ipv4Addr::LOOPBACK)); +} diff --git a/kernel/driver/usb/xhci/Cargo.toml b/kernel/driver/usb/xhci/Cargo.toml new file mode 100644 index 00000000..e4f964f3 --- /dev/null +++ b/kernel/driver/usb/xhci/Cargo.toml @@ -0,0 +1,23 @@ +[package] +name = "ygg_driver_usb_xhci" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } +ygg_driver_pci = { path = "../../bus/pci" } +ygg_driver_usb = { path = "../../bus/usb" } + +libk-util = { path = "../../../libk/libk-util" } +libk-mm = { path = "../../../libk/libk-mm" } +libk-thread = { path = "../../../libk/libk-thread" } + +xhci_lib = { git = "https://github.com/rust-osdev/xhci.git", package = "xhci" } + +atomic_enum = "0.2.0" +log = "0.4.20" +tock-registers = "0.9.0" +bytemuck = { version = "1.14.0", features = ["derive"] } +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } diff --git a/kernel/driver/usb/xhci/src/controller.rs b/kernel/driver/usb/xhci/src/controller.rs new file mode 100644 index 00000000..787a9fb0 --- /dev/null +++ b/kernel/driver/usb/xhci/src/controller.rs @@ -0,0 +1,383 @@ +use core::{future::poll_fn, sync::atomic::Ordering, task::Poll, time::Duration}; + +use alloc::{boxed::Box, collections::BTreeMap, sync::Arc, vec, vec::Vec}; +use atomic_enum::atomic_enum; +use device_api::{interrupt::InterruptHandler, Device}; +use futures_util::task::AtomicWaker; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; +use libk_thread::runtime::{self, FutureTimeout}; +use libk_util::{sync::spin_rwlock::IrqSafeRwLock, OneTimeInit}; +use ygg_driver_usb::{ + bus::UsbBusManager, + device::{UsbBusAddress, UsbDevice, UsbDeviceAccess}, + error::UsbError, + pipe::control::UsbControlPipeAccess, + util::UsbAddressAllocator, + UsbHostController, +}; +use yggdrasil_abi::error::Error; + +use crate::{ + device::XhciBusDevice, + pipe::ControlPipe, + regs::{Mapper, PortSpeed, Regs}, + ring::{ + CommandExecutor, CommandRing, ControlTransferRing, Event, EventRing, EventRingSegmentTable, + GenericTransferRing, + }, + XhciContext, +}; + +#[atomic_enum] +#[derive(PartialEq, Eq)] +pub enum PortState { + Disconnected, // Default + set by "handle detach" + Init, // Set by "port task" + Running, // Set by "port task" +} + +struct PortStruct { + state: AtomicPortState, + notify: AtomicWaker, + address: IrqSafeRwLock>, +} + +pub struct Xhci { + regs: Regs, + + bus_address: OneTimeInit, + address_allocator: UsbAddressAllocator, + + port_count: usize, + // TODO use to allocate proper contexts + #[allow(unused)] + context_size: usize, + + dcbaa: IrqSafeRwLock>, + endpoints: IrqSafeRwLock>>, + event_ring: EventRing, + pub(crate) command_ring: CommandRing, + + port_states: Vec, +} + +impl Xhci { + pub fn new(regs: xhci_lib::Registers) -> Result { + let event_ring = EventRing::new(128)?; + let command_ring = CommandRing::new(128)?; + + let regs = Regs::from(regs); + + let port_count = regs.port_count(); + let slot_count = regs.max_slot_count(); + let context_size = regs.context_size(); + + let dcbaa = PageBox::new_slice(PhysicalAddress::ZERO, slot_count + 1) + .map_err(UsbError::MemoryError)?; + + Ok(Self { + regs, + + bus_address: OneTimeInit::new(), + address_allocator: UsbAddressAllocator::new(), + + port_count, + context_size, + + event_ring, + command_ring, + + dcbaa: IrqSafeRwLock::new(dcbaa), + endpoints: IrqSafeRwLock::new(BTreeMap::new()), + + port_states: Vec::from_iter((0..port_count).map(|_| PortStruct { + state: AtomicPortState::new(PortState::Disconnected), + notify: AtomicWaker::new(), + address: IrqSafeRwLock::new(None), + })), + }) + } + + pub fn register_device_context(&self, slot_id: u8, context: PhysicalAddress) { + self.dcbaa.write()[slot_id as usize] = context; + } + + pub fn register_endpoint( + &self, + slot_id: u8, + endpoint_id: u8, + ring: Arc, + ) { + self.endpoints.write().insert((slot_id, endpoint_id), ring); + } + + pub fn shutdown_endpoint(&self, slot_id: u8, endpoint_id: u8) { + if let Some(endpoint) = self.endpoints.write().remove(&(slot_id, endpoint_id)) { + endpoint.shutdown(); + } else { + log::warn!( + "Endpoint {}:{} does not exist, maybe already shut down?", + slot_id, + endpoint_id + ); + } + } + + pub fn notify_transfer( + &self, + slot_id: u8, + endpoint_id: u8, + address: PhysicalAddress, + status: u32, + ) { + if let Some(ep) = self.endpoints.read().get(&(slot_id, endpoint_id)) { + ep.notify(address, status); + } + } + + async fn assign_device( + &'static self, + speed: PortSpeed, + slot_id: u8, + root_hub_port_number: u8, + ) -> Result, UsbError> { + let address = self.address_allocator.allocate().unwrap(); + let ring = Arc::new(ControlTransferRing::new(slot_id, 1, 128)?); + + let context = + XhciContext::new_32byte_address_device(&ring, speed, address, root_hub_port_number)?; + let mut input = context.input.write(); + + self.register_device_context(slot_id, unsafe { context.output.as_physical_address() }); + + self.command_ring + .address_device(self, slot_id, &mut input) + .await?; + + self.register_endpoint(slot_id, 1, ring.clone()); + + let pipe = ControlPipe::new(self, slot_id, ring.clone()); + + // TODO: If the device is a Full-speed one, determine its max packet size for the control + // endpoint + if speed == PortSpeed::Full { + todo!() + } + + drop(input); + + let bus_address = UsbBusAddress { + bus: *self.bus_address.get(), + device: address, + }; + + let device = XhciBusDevice { + xhci: self, + slot_id, + port_id: root_hub_port_number, + bus_address, + speed, + context: Arc::new(context), + rings: IrqSafeRwLock::new(vec![ring]), + control_pipe: UsbControlPipeAccess(Box::new(pipe)), + }; + + Ok(Box::new(device)) + } + + async fn port_task(&'static self, index: usize) -> Result<(), UsbError> { + let state = &self.port_states[index]; + + self.reset_port(index).await?; + + let regs = self.regs.ports.read(index); + let speed = + PortSpeed::try_from(regs.portsc.port_speed()).map_err(|_| UsbError::PortInitFailed)?; + + let slot_id = self.command_ring.enable_slot(self).await?; + + let device = self.assign_device(speed, slot_id, (index + 1) as _).await?; + let device = UsbDeviceAccess::setup(device).await?; + + let old = state.address.write().replace(device.bus_address()); + assert!(old.is_none()); + + UsbBusManager::register_device(Arc::new(device)); + + state.state.store(PortState::Running, Ordering::Release); + + Ok(()) + } + + fn handle_device_attached(&'static self, port: usize) -> Result<(), UsbError> { + log::info!("Port {}: device attached", port); + + if let Err(err) = self.port_states[port].state.compare_exchange( + PortState::Disconnected, + PortState::Init, + Ordering::Acquire, + Ordering::Relaxed, + ) { + log::warn!("Could not start port init task: port state is {:?}", err); + return Err(UsbError::DeviceBusy); + } + + runtime::spawn(async move { self.port_task(port).await }).map_err(UsbError::SystemError)?; + Ok(()) + } + + fn handle_device_detached(&'static self, port: usize) -> Result<(), UsbError> { + let state = &self.port_states[port]; + + match state.state.swap(PortState::Disconnected, Ordering::Release) { + PortState::Init => todo!(), + PortState::Running => { + log::info!("Port {}: device detached", port); + let address = state + .address + .write() + .take() + .expect("Port marked as Running, but has no address"); + + UsbBusManager::detach_device(address); + state.notify.wake(); + + Ok(()) + } + // Already disconnected + PortState::Disconnected => Ok(()), + } + } + + fn handle_port_event(&'static self, port: usize) -> Result<(), UsbError> { + let state = &self.port_states[port]; + let port_regs = self.regs.ports.read(port); + + if port_regs.portsc.connect_status_change() { + if port_regs.portsc.current_connect_status() { + self.handle_device_attached(port) + } else { + self.handle_device_detached(port) + } + } else { + // Some other event + state.notify.wake(); + Ok(()) + } + } + + async fn reset_port(&self, port: usize) -> Result<(), UsbError> { + log::debug!("Reset port {}", port); + + self.regs.ports.update(port, |u| { + u.portsc.set_port_reset(); + }); + + // Wait for port reset + // TODO handle disconnect during reset? + let result = runtime::run_with_timeout( + Duration::from_secs(1), + poll_fn(|cx| { + let state = &self.port_states[port]; + + state.notify.register(cx.waker()); + if !self.regs.ports.read(port).portsc.port_reset() { + Poll::Ready(()) + } else { + Poll::Pending + } + }), + ) + .await; + + match result { + FutureTimeout::Ok(()) => Ok(()), + FutureTimeout::Timeout => Err(UsbError::PortResetFailed), + } + } + + fn handle_event(&'static self) { + while let Some(event) = self.event_ring.try_dequeue() { + match event { + Event::PortChange(port) => { + self.handle_port_event(port - 1).ok(); + } + Event::CommandCompletion { address, reply } => { + self.command_ring.notify(address, reply); + } + Event::Transfer { + address, + slot_id, + endpoint_id, + status, + } => { + self.notify_transfer(slot_id, endpoint_id, address, status); + } + } + } + + self.regs + .set_interrupter_0_dequeue_pointer(self.event_ring.dequeue_pointer()); + } +} + +impl UsbHostController for Xhci {} + +impl CommandExecutor for Xhci { + fn ring_doorbell(&self, index: usize, target: u8) { + self.regs.ring_doorbell(index, target); + } +} + +impl Device for Xhci { + unsafe fn init(&'static self) -> Result<(), Error> { + log::info!("Init USB xHCI"); + + self.regs.reset(); + self.regs.set_max_slot_count(); + + let erst = EventRingSegmentTable::for_event_rings(&[&self.event_ring])?; + let dcbaa = self.dcbaa.read(); + + self.regs + .configure(&dcbaa, &self.command_ring, &self.event_ring, &erst); + + let bus = UsbBusManager::register_bus(self); + self.bus_address.init(bus); + + for port in 0..self.port_count { + let p = self.regs.ports.read(port); + if p.portsc.current_connect_status() { + self.handle_device_attached(port).ok(); + } + } + + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + log::info!("Init USB xHCI IRQ"); + Ok(()) + } + + fn display_name(&self) -> &'static str { + "USB xHCI" + } +} + +impl InterruptHandler for Xhci { + fn handle_irq(&'static self, _vector: Option) -> bool { + if let Some(status) = self.regs.handle_interrupt() { + if status.event_interrupt() { + self.handle_event(); + } + + true + } else { + false + } + } +} diff --git a/kernel/driver/usb/xhci/src/device.rs b/kernel/driver/usb/xhci/src/device.rs new file mode 100644 index 00000000..d93cf15e --- /dev/null +++ b/kernel/driver/usb/xhci/src/device.rs @@ -0,0 +1,158 @@ +use alloc::{boxed::Box, sync::Arc, vec::Vec}; +use futures_util::{future::BoxFuture, FutureExt}; +use libk_mm::address::IntoRaw; +use libk_util::sync::spin_rwlock::IrqSafeRwLock; +use xhci_lib::context::{self, InputHandler}; +use ygg_driver_usb::{ + device::{UsbBusAddress, UsbDevice, UsbSpeed}, + error::UsbError, + info::UsbEndpointType, + pipe::{control::UsbControlPipeAccess, interrupt::UsbInterruptInPipeAccess}, + UsbDirection, UsbHostController, +}; + +use crate::{ + pipe::InterruptInPipe, + regs::PortSpeed, + ring::{transfer::InterruptInTransferRing, GenericTransferRing}, + Xhci, XhciContext, +}; + +// TODO device context information +pub struct XhciBusDevice { + pub(crate) port_id: u8, + pub(crate) slot_id: u8, + pub(crate) bus_address: UsbBusAddress, + + pub(crate) speed: PortSpeed, + + pub(crate) xhci: &'static Xhci, + + pub(crate) context: Arc>, + pub(crate) rings: IrqSafeRwLock>>, + pub(crate) control_pipe: UsbControlPipeAccess, +} + +impl XhciBusDevice { + fn dci(ty: UsbEndpointType, dir: UsbDirection, number: u8) -> usize { + match ty { + UsbEndpointType::Control => (number as usize) * 2 + 1, + _ => { + let off = match dir { + UsbDirection::Out => 0, + UsbDirection::In => 1, + }; + + (number as usize * 2) + off + } + } + } + + async fn setup_endpoint_inner( + &self, + ring: Arc, + dci: u8, + ty: UsbEndpointType, + direction: UsbDirection, + ) -> Result<(), UsbError> { + log::debug!("Setup endpoint dci #{}: {:?} {:?}", dci, ty, direction); + + let mut input = self.context.input.write(); + + let ep_type = match (ty, direction) { + (UsbEndpointType::Interrupt, UsbDirection::In) => context::EndpointType::InterruptIn, + _ => todo!(), + }; + + { + let control = input.control_mut(); + + control.set_add_context_flag(0); + control.clear_add_context_flag(1); + control.set_add_context_flag(dci as _); + } + + { + let slot = input.device_mut().slot_mut(); + + slot.set_context_entries(31); + } + + { + let ep_cx = input.device_mut().endpoint_mut(dci as _); + + ep_cx.set_tr_dequeue_pointer(ring.dequeue_pointer().into_raw()); + ep_cx.set_dequeue_cycle_state(); + ep_cx.set_endpoint_type(ep_type); + ep_cx.set_error_count(3); + + // TODO get from endpoint info + ep_cx.set_max_packet_size(8); + } + + self.xhci + .command_ring + .configure_endpoint(self.xhci, self.slot_id, &mut input) + .await?; + + self.rings.write().push(ring.clone()); + self.xhci.register_endpoint(self.slot_id, dci, ring); + + Ok(()) + } +} + +impl UsbDevice for XhciBusDevice { + fn control_pipe(&self) -> &UsbControlPipeAccess { + &self.control_pipe + } + + fn port_number(&self) -> u8 { + self.port_id + } + + fn bus_address(&self) -> UsbBusAddress { + self.bus_address + } + + fn controller(&self) -> &'static dyn UsbHostController { + self.xhci + } + + fn open_interrupt_in_pipe<'a>( + &'a self, + number: u8, + ) -> BoxFuture> { + async move { + let dci = Self::dci(UsbEndpointType::Interrupt, UsbDirection::In, number) as u8; + let ring = Arc::new(InterruptInTransferRing::new(self.slot_id, dci as _, 128)?); + + self.setup_endpoint_inner( + ring.clone(), + dci, + UsbEndpointType::Interrupt, + UsbDirection::In, + ) + .await?; + + let pipe = InterruptInPipe::new(self.xhci, self.slot_id, number, dci, ring); + + Ok(UsbInterruptInPipeAccess(Box::new(pipe))) + } + .boxed() + } + + fn handle_detach(&self) { + log::info!("Device detach handler"); + for ring in self.rings.write().drain(..) { + self.xhci + .shutdown_endpoint(ring.slot_id(), ring.endpoint_id()); + } + } + + fn speed(&self) -> UsbSpeed { + self.speed.into() + } + + fn debug(&self) {} +} diff --git a/kernel/driver/usb/xhci/src/lib.rs b/kernel/driver/usb/xhci/src/lib.rs new file mode 100644 index 00000000..bc9a635d --- /dev/null +++ b/kernel/driver/usb/xhci/src/lib.rs @@ -0,0 +1,109 @@ +#![no_std] +#![feature(iter_array_chunks)] + +extern crate alloc; + +mod controller; +mod device; +mod pipe; +mod regs; +mod ring; + +use alloc::boxed::Box; +pub use controller::Xhci; +use device_api::{interrupt::InterruptAffinity, Device}; +use libk_mm::{address::IntoRaw, PageBox}; +use libk_util::sync::spin_rwlock::IrqSafeRwLock; +use regs::{Mapper, PortSpeed}; +use ring::{ControlTransferRing, GenericTransferRing}; +use xhci_lib::context::{self, InputHandler}; +use ygg_driver_pci::{ + device::{PciDeviceInfo, PreferredInterruptMode}, + PciCommandRegister, PciConfigurationSpace, +}; +use ygg_driver_usb::error::UsbError; +use yggdrasil_abi::error::Error; + +pub struct XhciContext { + pub(crate) input: IrqSafeRwLock>>, + pub(crate) output: PageBox>, +} + +impl XhciContext<8> { + pub fn new_32byte() -> Result { + let input = PageBox::new(context::Input::new_32byte()).map_err(UsbError::MemoryError)?; + let output = PageBox::new(context::Device::new_32byte()).map_err(UsbError::MemoryError)?; + + Ok(Self { + input: IrqSafeRwLock::new(input), + output, + }) + } + + pub fn new_32byte_address_device( + default_control_ring: &ControlTransferRing, + speed: PortSpeed, + address: u8, + root_hub_port_number: u8, + ) -> Result { + let mut input = + PageBox::new(context::Input::new_32byte()).map_err(UsbError::MemoryError)?; + let output = PageBox::new(context::Device::new_32byte()).map_err(UsbError::MemoryError)?; + + // Setup input context + { + let control = input.control_mut(); + + control.set_add_context_flag(0); + control.set_add_context_flag(1); + } + + { + let slot = input.device_mut().slot_mut(); + + slot.set_context_entries(1); + slot.set_interrupter_target(0); + slot.set_usb_device_address(address); + slot.set_root_hub_port_number(root_hub_port_number); + slot.set_speed(speed.into()); + } + + { + let ep0 = input.device_mut().endpoint_mut(1); + + ep0.set_endpoint_type(context::EndpointType::Control); + ep0.set_tr_dequeue_pointer(default_control_ring.dequeue_pointer().into_raw()); + ep0.set_dequeue_cycle_state(); + ep0.set_error_count(3); + ep0.set_max_packet_size(speed.default_max_packet_size() as _); + } + + Ok(Self { + input: IrqSafeRwLock::new(input), + output, + }) + } +} + +pub fn probe(info: &PciDeviceInfo) -> Result<&'static dyn Device, Error> { + // TODO Chip Hardware Reset + let bar0 = info + .config_space + .bar(0) + .expect("xHCI doesn't have BAR0 configured") + .as_memory() + .expect("xHCI's BAR0 is not memory-type"); + + let mut cmd = PciCommandRegister::from_bits_retain(info.config_space.command()); + cmd &= !(PciCommandRegister::DISABLE_INTERRUPTS | PciCommandRegister::ENABLE_IO); + cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER; + info.config_space.set_command(cmd.bits()); + + let regs = unsafe { xhci_lib::Registers::new(bar0.try_into().unwrap(), Mapper::new()) }; + let xhci = Box::leak(Box::new(Xhci::new(regs)?)); + + info.init_interrupts(PreferredInterruptMode::Msi)?; + info.map_interrupt(InterruptAffinity::Any, xhci)?; + + Ok(xhci) +} diff --git a/kernel/driver/usb/xhci/src/pipe.rs b/kernel/driver/usb/xhci/src/pipe.rs new file mode 100644 index 00000000..6697073f --- /dev/null +++ b/kernel/driver/usb/xhci/src/pipe.rs @@ -0,0 +1,85 @@ +use alloc::sync::Arc; +use libk_mm::{address::PhysicalAddress, PageBox}; +use ygg_driver_usb::{ + communication::UsbInterruptTransfer, + error::UsbError, + pipe::{ + control::{ControlTransferSetup, UsbControlPipe}, + interrupt::UsbInterruptInPipe, + UsbGenericPipe, + }, + UsbControlTransfer, UsbDirection, +}; + +use crate::{ + ring::{transfer::InterruptInTransferRing, ControlTransferRing}, + Xhci, +}; + +pub struct ControlPipe { + xhci: &'static Xhci, + ring: Arc, +} + +#[allow(unused)] +pub struct InterruptInPipe { + xhci: &'static Xhci, + + slot_id: u8, + endpoint_id: u8, + dci: u8, + + ring: Arc, +} + +impl UsbGenericPipe for ControlPipe {} + +impl UsbControlPipe for ControlPipe { + fn start_transfer( + &self, + setup: ControlTransferSetup, + data: Option<(PhysicalAddress, usize, UsbDirection)>, + ) -> Result { + self.ring.start_transfer(self.xhci, setup, data) + } + + fn complete_transfer(&self, transfer: UsbControlTransfer) { + self.ring.complete_transfer(transfer) + } +} + +impl ControlPipe { + pub fn new(xhci: &'static Xhci, _slot_id: u8, ring: Arc) -> Self { + Self { xhci, ring } + } +} + +impl UsbGenericPipe for InterruptInPipe {} + +impl UsbInterruptInPipe for InterruptInPipe { + fn start_read(&self, buffer: &mut PageBox<[u8]>) -> Result { + self.ring.start_transfer(self.xhci, buffer) + } + + fn complete_transfer(&self, transfer: UsbInterruptTransfer) { + self.ring.complete_transfer(transfer) + } +} + +impl InterruptInPipe { + pub fn new( + xhci: &'static Xhci, + slot_id: u8, + endpoint_id: u8, + dci: u8, + ring: Arc, + ) -> Self { + Self { + xhci, + slot_id, + endpoint_id, + dci, + ring, + } + } +} diff --git a/kernel/driver/usb/xhci/src/regs.rs b/kernel/driver/usb/xhci/src/regs.rs new file mode 100644 index 00000000..29de28a3 --- /dev/null +++ b/kernel/driver/usb/xhci/src/regs.rs @@ -0,0 +1,283 @@ +use core::{cell::UnsafeCell, num::NonZeroUsize}; + +use alloc::{sync::Arc, vec::Vec}; +use libk_mm::{ + address::{AsPhysicalAddress, IntoRaw, PhysicalAddress}, + device::RawDeviceMemoryMapping, + PageBox, +}; +use libk_util::sync::spin_rwlock::IrqSafeRwLock; +use xhci_lib::{ + accessor::{array, marker}, + registers::{ + operational::UsbStatusRegister, Capability, Doorbell, InterrupterRegisterSet, Operational, + PortRegisterSet, + }, +}; +use ygg_driver_usb::device::UsbSpeed; +use yggdrasil_abi::primitive_enum; + +use crate::ring::{CommandRing, EventRing, EventRingSegmentTable, GenericRing}; + +#[derive(Clone)] +pub struct Mapper { + mappings: Vec>, +} + +pub struct LockedArray { + array: UnsafeCell>, + locks: Vec>, +} + +unsafe impl Sync for LockedArray {} +unsafe impl Send for LockedArray {} + +pub struct Regs { + operational: IrqSafeRwLock>, + interrupters: IrqSafeRwLock>, + capability: Capability, + doorbells: LockedArray, + pub ports: LockedArray, +} + +impl LockedArray { + #[inline] + #[allow(clippy::mut_from_ref)] + unsafe fn get_mut(&self) -> &mut array::Generic { + &mut *self.array.get() + } + + pub fn update(&self, index: usize, f: U) { + let _guard = self.locks[index].write(); + unsafe { self.get_mut() }.update_volatile_at(index, f); + } + + pub fn read(&self, index: usize) -> T { + let _guard = self.locks[index].read(); + unsafe { self.get_mut() }.read_volatile_at(index) + } +} + +impl From> for LockedArray { + fn from(value: array::Generic) -> Self { + let locks = Vec::from_iter((0..value.len()).map(|_| IrqSafeRwLock::new(()))); + + Self { + array: UnsafeCell::new(value), + locks, + } + } +} + +impl From> for Regs { + fn from(value: xhci_lib::Registers) -> Self { + Self { + operational: IrqSafeRwLock::new(value.operational), + capability: value.capability, + interrupters: IrqSafeRwLock::new(value.interrupter_register_set), + doorbells: LockedArray::from(value.doorbell), + ports: LockedArray::from(value.port_register_set), + } + } +} + +impl Regs { + pub fn reset(&self) { + let mut o = self.operational.write(); + + // TODO Get ownership from firmware + + // Stop the controller + o.usbcmd.update_volatile(|u| { + u.clear_run_stop(); + }); + + while !o.usbsts.read_volatile().hc_halted() { + core::hint::spin_loop(); + } + + // Reset the controller + o.usbcmd.update_volatile(|u| { + u.set_host_controller_reset(); + }); + while o.usbcmd.read_volatile().host_controller_reset() + || o.usbsts.read_volatile().controller_not_ready() + { + core::hint::spin_loop(); + } + } + + pub fn max_slot_count(&self) -> usize { + self.capability + .hcsparams1 + .read_volatile() + .number_of_device_slots() as _ + } + + pub fn set_max_slot_count(&self) -> usize { + let device_slot_count = self.max_slot_count(); + let mut o = self.operational.write(); + // Set max slots enabled + o.config.update_volatile(|u| { + u.set_max_device_slots_enabled(device_slot_count as _); + }); + + device_slot_count as _ + } + + pub fn context_size(&self) -> usize { + match self.capability.hccparams1.read_volatile().context_size() { + true => 64, + false => 32, + } + } + + pub fn port_count(&self) -> usize { + self.capability.hcsparams1.read_volatile().number_of_ports() as _ + } + + pub fn configure( + &self, + dcbaa: &PageBox<[PhysicalAddress]>, + cmd_ring: &CommandRing, + evt_ring: &EventRing, + erst: &EventRingSegmentTable, + ) { + let mut o = self.operational.write(); + let mut i = self.interrupters.write(); + + o.dcbaap.update_volatile(|u| unsafe { + u.set(dcbaa.as_physical_address().into_raw()); + }); + o.crcr.update_volatile(|u| { + u.set_command_ring_pointer(cmd_ring.base().into_raw()); + u.set_ring_cycle_state(); + }); + + let mut intr0 = i.interrupter_mut(0); + intr0.erstsz.update_volatile(|u| { + u.set(erst.capacity().try_into().unwrap()); + }); + intr0.erdp.update_volatile(|u| { + log::debug!("::: Dequeue Pointer: {:#x}", evt_ring.dequeue_pointer()); + u.set_event_ring_dequeue_pointer(evt_ring.dequeue_pointer().into_raw()); + }); + intr0.erstba.update_volatile(|u| { + u.set(erst.physical_address().into_raw()); + }); + // intr0.imod.update_volatile(|u| { + // u.set_interrupt_moderation_interval(0) + // .set_interrupt_moderation_counter(0); + // }); + intr0.iman.update_volatile(|u| { + u.set_interrupt_enable(); + }); + + o.usbcmd.update_volatile(|u| { + u.set_interrupter_enable().set_run_stop(); + }); + } + + pub fn handle_interrupt(&self) -> Option { + let mut o = self.operational.write(); + let mut i = self.interrupters.write(); + + let status = o.usbsts.read_volatile(); + + if !status.event_interrupt() { + return None; + } + + o.usbsts.write_volatile(status); + + if status.host_system_error() { + return Some(status); + } + + // Acknowledge interrupts + let mut intr0 = i.interrupter_mut(0); + intr0.iman.update_volatile(|u| { + u.clear_interrupt_pending(); + }); + + Some(status) + } + + pub fn set_interrupter_0_dequeue_pointer(&self, pointer: PhysicalAddress) { + let mut i = self.interrupters.write(); + + i.interrupter_mut(0).erdp.update_volatile(|u| { + u.set_event_ring_dequeue_pointer(pointer.into_raw()); + u.clear_event_handler_busy(); + }); + } + + pub fn ring_doorbell(&self, index: usize, target: u8) { + self.doorbells.update(index, |u| { + u.set_doorbell_target(target); + }); + } +} + +impl Mapper { + pub fn new() -> Self { + Self { + mappings: Vec::new(), + } + } +} + +impl xhci_lib::accessor::Mapper for Mapper { + unsafe fn map(&mut self, phys_start: usize, bytes: usize) -> NonZeroUsize { + let mapping = RawDeviceMemoryMapping::map(phys_start as u64, bytes, Default::default()) + .expect("Could not map an USB xHCI region"); + let address = mapping.address; + self.mappings.push(Arc::new(mapping)); + NonZeroUsize::new_unchecked(address) + } + + fn unmap(&mut self, _virt_start: usize, _bytes: usize) { + // TODO + } +} + +// Register value definitions + +primitive_enum! { + pub enum PortSpeed: u8 { + Full = 1, + Low = 2, + High = 3, + SuperGen1x1 = 4, + SuperGen2x1 = 5, + SuperGen1x2 = 6, + SuperGen2x2 = 7, + } +} + +impl PortSpeed { + pub fn default_max_packet_size(&self) -> usize { + match self { + Self::Low => 8, + Self::High => 64, + Self::SuperGen1x1 | Self::SuperGen1x2 | Self::SuperGen2x1 | Self::SuperGen2x2 => 512, + + // See Section 4.3., point 7. of the initialization list + Self::Full => 8, + } + } +} + +impl From for UsbSpeed { + fn from(value: PortSpeed) -> Self { + match value { + PortSpeed::Low => UsbSpeed::Low, + PortSpeed::Full => UsbSpeed::Full, + PortSpeed::High => UsbSpeed::High, + PortSpeed::SuperGen1x1 + | PortSpeed::SuperGen1x2 + | PortSpeed::SuperGen2x1 + | PortSpeed::SuperGen2x2 => UsbSpeed::Super, + } + } +} diff --git a/kernel/driver/usb/xhci/src/ring/command.rs b/kernel/driver/usb/xhci/src/ring/command.rs new file mode 100644 index 00000000..f1da87f8 --- /dev/null +++ b/kernel/driver/usb/xhci/src/ring/command.rs @@ -0,0 +1,289 @@ +use core::{ + fmt, + future::poll_fn, + mem::{size_of, MaybeUninit}, + task::Poll, +}; + +use alloc::collections::BTreeMap; +use bytemuck::{Pod, Zeroable}; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; +use libk_util::{ + sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock}, + waker::QueueWaker, +}; +use xhci_lib::context; +use ygg_driver_usb::error::UsbError; +use yggdrasil_abi::define_bitfields; + +use super::{CommandExecutor, GenericRing, LinkTrb}; + +struct CommandRingInner { + trbs: PageBox<[MaybeUninit]>, + enqueue_index: usize, + #[allow(unused)] + dequeue_index: usize, + cycle_bit: bool, +} + +pub struct CommandRing { + inner: IrqSafeSpinlock, + // TODO maybe use Vec of "slots"? + completions: IrqSafeRwLock>, + completion_notify: QueueWaker, + capacity: usize, +} + +impl GenericRing for CommandRing { + fn base(&self) -> PhysicalAddress { + unsafe { self.inner.lock().trbs.as_physical_address() } + } + + fn capacity(&self) -> usize { + self.capacity + } +} + +impl CommandRingInner { + fn enqueue(&mut self, trb: C) -> PhysicalAddress { + let mut raw: RawCommandTrb = bytemuck::cast(trb); + + raw.flags.set_ty(C::TRB_TYPE as _); + raw.flags.set_cycle(self.cycle_bit); + + self.trbs[self.enqueue_index].write(raw); + + let address = unsafe { self.trbs.as_physical_address() } + .add(self.enqueue_index * size_of::()); + + // Move to the next TRB slot + self.enqueue_index += 1; + if self.enqueue_index >= self.trbs.len() - 1 { + self.enqueue_link(); + + // Wrap around + self.cycle_bit = !self.cycle_bit; + self.enqueue_index = 0; + } + + address + } + + fn enqueue_link(&mut self) { + let base = unsafe { self.trbs.as_physical_address() }; + + let link = LinkTrb::new(base, self.cycle_bit); + self.trbs[self.enqueue_index].write(bytemuck::cast(link)); + } + + fn advance(&mut self) { + self.dequeue_index += 1; + + if self.dequeue_index >= self.trbs.len() - 1 { + self.dequeue_index = 0; + } + } +} + +impl CommandRing { + pub fn new(capacity: usize) -> Result { + let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?; + + Ok(Self { + inner: IrqSafeSpinlock::new(CommandRingInner { + trbs, + enqueue_index: 0, + dequeue_index: 0, + cycle_bit: true, + }), + completions: IrqSafeRwLock::new(BTreeMap::new()), + completion_notify: QueueWaker::new(), + capacity, + }) + } + + pub fn enqueue(&self, trb: C) -> PhysicalAddress { + let mut inner = self.inner.lock(); + let address = inner.enqueue(trb); + address + } + + pub async fn address_device( + &self, + executor: &E, + slot_id: u8, + input: &mut PageBox>, + ) -> Result<(), UsbError> { + self.submit_and_wait(executor, AddressDeviceCommandTrb::new(input, slot_id)) + .await?; + Ok(()) + } + + pub async fn configure_endpoint( + &self, + executor: &E, + slot_id: u8, + input: &mut PageBox>, + ) -> Result<(), UsbError> { + self.submit_and_wait(executor, ConfigureEndpointCommandTrb::new(input, slot_id)) + .await?; + Ok(()) + } + + pub async fn enable_slot(&self, executor: &E) -> Result { + self.submit_and_wait(executor, EnableSlotCommandTrb::new()) + .await + } + + pub async fn submit_and_wait( + &self, + executor: &E, + trb: C, + ) -> Result { + let token = self.enqueue(trb); + executor.ring_doorbell(0, 0); + poll_fn(|cx| { + self.completion_notify.register(cx.waker()); + if let Some(status) = self.get_completion(token) { + self.completion_notify.remove(cx.waker()); + if status.completion_code == 1 { + Poll::Ready(Ok(status.slot_id)) + } else { + Poll::Ready(Err(UsbError::HostControllerCommandFailed( + status.completion_code, + ))) + } + } else { + Poll::Pending + } + }) + .await + } + + pub fn get_completion(&self, address: PhysicalAddress) -> Option { + self.completions.write().remove(&address) + } + + pub fn notify(&self, address: PhysicalAddress, reply: CommandReply) { + self.inner.lock().advance(); + self.completions.write().insert(address, reply); + self.completion_notify.wake_all(); + } +} + +// TRB implementations + +define_bitfields! { + pub EnableSlotCommandFlags : u32 { + (16..20) => slot_type + } +} + +define_bitfields! { + pub AddressDeviceCommandFlags : u32 { + (24..32) => slot_id + } +} + +define_bitfields! { + pub ConfigureEndpointCommandFlags : u32 { + (24..32) => slot_id + } +} + +define_bitfields! { + pub RawCommandFlags : u32 { + (10..16) => ty + set_ty, + 0 => cycle + set_cycle + } +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct EnableSlotCommandTrb { + _0: [u32; 3], + pub flags: EnableSlotCommandFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct AddressDeviceCommandTrb { + pub input_context_address: PhysicalAddress, + _0: u32, + pub flags: AddressDeviceCommandFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct ConfigureEndpointCommandTrb { + pub input_context_address: PhysicalAddress, + _0: u32, + pub flags: ConfigureEndpointCommandFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct RawCommandTrb { + _0: [u32; 3], + flags: RawCommandFlags, +} + +#[derive(Debug, Clone, Copy)] +pub struct CommandReply { + pub completion_code: u8, + pub slot_id: u8, +} + +pub trait CommandTrb: Pod + fmt::Debug { + const TRB_TYPE: u8; +} + +impl EnableSlotCommandTrb { + pub fn new() -> Self { + Self { + _0: [0; 3], + flags: EnableSlotCommandFlags::new(0), + } + } +} + +impl AddressDeviceCommandTrb { + pub fn new( + input_context: &mut PageBox>, + slot_id: u8, + ) -> Self { + Self { + input_context_address: unsafe { input_context.as_physical_address() }, + _0: 0, + flags: AddressDeviceCommandFlags::new(slot_id as _), + } + } +} + +impl ConfigureEndpointCommandTrb { + pub fn new( + input_context: &mut PageBox>, + slot_id: u8, + ) -> Self { + Self { + input_context_address: unsafe { input_context.as_physical_address() }, + _0: 0, + flags: ConfigureEndpointCommandFlags::new(slot_id as _), + } + } +} + +impl CommandTrb for EnableSlotCommandTrb { + const TRB_TYPE: u8 = 9; +} + +impl CommandTrb for AddressDeviceCommandTrb { + const TRB_TYPE: u8 = 11; +} + +impl CommandTrb for ConfigureEndpointCommandTrb { + const TRB_TYPE: u8 = 12; +} diff --git a/kernel/driver/usb/xhci/src/ring/event.rs b/kernel/driver/usb/xhci/src/ring/event.rs new file mode 100644 index 00000000..10785ab0 --- /dev/null +++ b/kernel/driver/usb/xhci/src/ring/event.rs @@ -0,0 +1,240 @@ +use core::mem::{size_of, MaybeUninit}; + +use bytemuck::{Pod, Zeroable}; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; +use libk_util::sync::IrqSafeSpinlock; +use ygg_driver_usb::error::UsbError; +use yggdrasil_abi::define_bitfields; + +use super::{command::CommandReply, GenericRing}; + +pub enum Event { + PortChange(usize), + CommandCompletion { + address: PhysicalAddress, + reply: CommandReply, + }, + Transfer { + address: PhysicalAddress, + slot_id: u8, + endpoint_id: u8, + status: u32, + }, +} + +#[repr(C, align(16))] +pub struct EventRingSegment { + address: PhysicalAddress, + // Number of TRBs supported by the ring segment. Valid values are 16 to 4096 + size: u16, + _0: u16, + _1: u32, +} + +pub struct EventRingSegmentTable { + entries: PageBox<[EventRingSegment]>, +} + +struct EventRingInner { + trbs: PageBox<[MaybeUninit]>, + dequeue_index: usize, + cycle_bit: bool, +} + +pub struct EventRing { + inner: IrqSafeSpinlock, + capacity: usize, +} + +impl EventRingSegmentTable { + pub fn for_event_rings(rings: &[&EventRing]) -> Result { + let entries = PageBox::from_iter_exact(rings.iter().map(|ring| EventRingSegment { + address: ring.base(), + size: ring.capacity().try_into().unwrap(), + _0: 0, + _1: 0, + })) + .map_err(UsbError::MemoryError)?; + + Ok(Self { entries }) + } + + pub fn physical_address(&self) -> PhysicalAddress { + unsafe { self.entries.as_physical_address() } + } + + pub fn capacity(&self) -> usize { + self.entries.len() + } +} + +impl GenericRing for EventRing { + fn base(&self) -> PhysicalAddress { + unsafe { self.inner.lock().trbs.as_physical_address() } + } + + fn capacity(&self) -> usize { + self.capacity + } +} + +impl EventRingInner { + fn try_dequeue(&mut self) -> Option { + let trb = unsafe { self.trbs[self.dequeue_index].assume_init_ref() }; + + // TRB cannot be consumed -- its cycle bit not toggled + let trb_cycle = trb.cycle_bit(); + if trb_cycle != self.cycle_bit { + return None; + } + + self.dequeue_index += 1; + + if self.dequeue_index == self.trbs.len() { + self.dequeue_index = 0; + self.cycle_bit = !self.cycle_bit; + } + + trb.into_event() + } +} + +impl EventRing { + pub fn new(capacity: usize) -> Result { + let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?; + + Ok(Self { + inner: IrqSafeSpinlock::new(EventRingInner { + trbs, + dequeue_index: 0, + cycle_bit: true, + }), + capacity, + }) + } + + pub fn try_dequeue(&self) -> Option { + self.inner.lock().try_dequeue() + } + + pub fn dequeue_pointer(&self) -> PhysicalAddress { + let i = self.inner.lock(); + unsafe { i.trbs.as_physical_address() }.add(i.dequeue_index * size_of::()) + } +} + +// TRB implementations + +define_bitfields! { + pub TransferEventStatus : u32 { + (24..32) => completion_code, + (0..24) => sub_length + } +} + +define_bitfields! { + pub TransferEventFlags : u32 { + (24..32) => slot_id, + (16..20) => endpoint_id, + } +} + +define_bitfields! { + pub CommandCompletionEventStatus : u32 { + (24..32) => completion_code, + (0..24) => completion_parameter + } +} + +define_bitfields! { + pub CommandCompletionEventFlags : u32 { + (24..32) => slot_id, + } +} + +define_bitfields! { + pub PortStatusChangeEventAddress : u32 { + (24..32) => port_id + } +} + +define_bitfields! { + pub RawEventFlags : u32 { + (10..16) => ty, + 0 => cycle + } +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct TransferEventTrb { + pub address: PhysicalAddress, + pub status: TransferEventStatus, + pub flags: TransferEventFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct CommandCompletionEventTrb { + pub address: PhysicalAddress, + pub status: CommandCompletionEventStatus, + pub flags: CommandCompletionEventFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct PortStatusChangeEventTrb { + pub address: PortStatusChangeEventAddress, + _0: [u32; 3], +} + +#[derive(Clone, Copy, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct RawEventTrb { + _0: [u32; 3], + pub flags: RawEventFlags, +} + +impl RawEventTrb { + pub fn into_event(self) -> Option { + match self.flags.ty() { + 32 => { + let transfer: TransferEventTrb = bytemuck::cast(self); + + Some(Event::Transfer { + address: transfer.address, + slot_id: transfer.flags.slot_id() as _, + endpoint_id: transfer.flags.endpoint_id() as _, + status: transfer.status.into_raw(), + }) + } + 33 => { + let command: CommandCompletionEventTrb = bytemuck::cast(self); + + Some(Event::CommandCompletion { + address: command.address, + reply: CommandReply { + completion_code: command.status.completion_code() as _, + slot_id: command.flags.slot_id() as _, + }, + }) + } + 34 => { + let port_status: PortStatusChangeEventTrb = bytemuck::cast(self); + + Some(Event::PortChange(port_status.address.port_id() as _)) + } + ty => { + log::warn!("Unhandled event TRB with type: {}", ty); + None + } + } + } + + pub fn cycle_bit(&self) -> bool { + self.flags.cycle() + } +} diff --git a/kernel/driver/usb/xhci/src/ring/mod.rs b/kernel/driver/usb/xhci/src/ring/mod.rs new file mode 100644 index 00000000..8f589fef --- /dev/null +++ b/kernel/driver/usb/xhci/src/ring/mod.rs @@ -0,0 +1,56 @@ +use bytemuck::{Pod, Zeroable}; +use libk_mm::address::PhysicalAddress; +use yggdrasil_abi::define_bitfields; + +pub mod command; +pub mod event; +pub mod transfer; + +pub use command::CommandRing; +pub use event::{Event, EventRing, EventRingSegmentTable}; +pub use transfer::ControlTransferRing; + +pub trait CommandExecutor { + fn ring_doorbell(&self, index: usize, target: u8); +} + +pub trait GenericRing { + fn capacity(&self) -> usize; + fn base(&self) -> PhysicalAddress; +} + +pub trait GenericTransferRing: GenericRing + Send + Sync { + fn slot_id(&self) -> u8; + fn endpoint_id(&self) -> u8; + + fn dequeue_pointer(&self) -> PhysicalAddress; + fn notify(&self, address: PhysicalAddress, status: u32); + fn shutdown(&self); +} + +define_bitfields! { + pub LinkTrbFlags : u32 { + (10..16) => ty, + 4 => chain, + 1 => toggle_cycle, + 0 => cycle + } +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct LinkTrb { + pub address: PhysicalAddress, + _0: u32, + pub flags: LinkTrbFlags, +} + +impl LinkTrb { + pub fn new(address: PhysicalAddress, cycle_bit: bool) -> Self { + Self { + address, + _0: 0, + flags: LinkTrbFlags::new(6, true, true, cycle_bit), + } + } +} diff --git a/kernel/driver/usb/xhci/src/ring/transfer.rs b/kernel/driver/usb/xhci/src/ring/transfer.rs new file mode 100644 index 00000000..34781f98 --- /dev/null +++ b/kernel/driver/usb/xhci/src/ring/transfer.rs @@ -0,0 +1,515 @@ +use core::{ + mem::{size_of, MaybeUninit}, + sync::atomic::{AtomicBool, AtomicU64, Ordering}, +}; + +use alloc::{collections::BTreeMap, sync::Arc, vec::Vec}; +use bytemuck::{Pod, Zeroable}; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + PageBox, +}; +use libk_util::sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock, IrqSafeSpinlockGuard}; +use ygg_driver_usb::{ + communication::UsbInterruptTransfer, error::UsbError, pipe::control::ControlTransferSetup, + UsbControlTransfer, UsbDirection, UsbTransferStatus, UsbTransferToken, +}; +use yggdrasil_abi::define_bitfields; + +use crate::ring::LinkTrb; + +use super::{CommandExecutor, GenericRing, GenericTransferRing}; + +struct TransferRingInner { + trbs: PageBox<[MaybeUninit]>, + enqueue_index: usize, + dequeue_index: usize, + cycle_bit: bool, +} + +// TODO split TransferRing into Normal, Control, etc +pub struct ControlTransferRing { + inner: IrqSafeSpinlock, + capacity: usize, + + // TODO this is inefficient and ugly + pending_trbs: IrqSafeRwLock>, + completions: IrqSafeRwLock>>, + + slot_id: u8, + ep_id: u8, + + transfer_id: AtomicU64, + shutdown: AtomicBool, +} + +pub struct InterruptInTransferRing { + inner: IrqSafeSpinlock, + capacity: usize, + + completions: IrqSafeRwLock>>, + + slot_id: u8, + ep_id: u8, + + shutdown: AtomicBool, +} + +struct TransferBuilder<'a> { + ring: &'a ControlTransferRing, + ring_inner: IrqSafeSpinlockGuard<'a, TransferRingInner>, + + token: UsbTransferToken, + direction: UsbDirection, + addresses: Vec, + status: Arc, +} + +impl<'a> TransferBuilder<'a> { + pub fn new(ring: &'a ControlTransferRing, direction: UsbDirection) -> Self { + let ring_inner = ring.inner.lock(); + let token = UsbTransferToken(ring.transfer_id.fetch_add(1, Ordering::AcqRel)); + let status = Arc::new(UsbTransferStatus::new()); + + ring.completions.write().insert(token, status.clone()); + + Self { + ring, + ring_inner, + + token, + direction, + status, + addresses: Vec::new(), + } + } + + pub fn push_trb(&mut self, trb: C) -> &mut Self { + let address = self.ring_inner.enqueue(trb); + self.addresses.push(address); + self.ring.pending_trbs.write().insert(address, self.token); + self + } + + pub fn start(self, executor: &E, length: usize) -> UsbControlTransfer { + executor.ring_doorbell(self.ring.slot_id as _, self.ring.ep_id); + + UsbControlTransfer { + id: self.token, + length, + + direction: self.direction, + elements: self.addresses, + status: self.status, + } + } +} + +impl TransferRingInner { + fn enqueue(&mut self, trb: C) -> PhysicalAddress { + if (self.enqueue_index + 1) % (self.trbs.len() - 1) == self.dequeue_index { + todo!("Ring full"); + } + + let mut raw: RawTransferTrb = bytemuck::cast(trb); + + raw.flags.set_ty(C::TRB_TYPE as _); + raw.flags.set_cycle(self.cycle_bit); + + self.trbs[self.enqueue_index].write(raw); + + let address = unsafe { self.trbs.as_physical_address() } + .add(self.enqueue_index * size_of::()); + + // Move to the next TRB slot + self.enqueue_index += 1; + if self.enqueue_index >= self.trbs.len() - 1 { + self.enqueue_link(); + + // Wrap around + self.cycle_bit = !self.cycle_bit; + self.enqueue_index = 0; + } + + address + } + + fn enqueue_link(&mut self) { + let base = unsafe { self.trbs.as_physical_address() }; + + let link = LinkTrb::new(base, self.cycle_bit); + self.trbs[self.enqueue_index].write(bytemuck::cast(link)); + } + + fn advance(&mut self) { + self.dequeue_index += 1; + + if self.dequeue_index >= self.trbs.len() - 1 { + self.dequeue_index = 0; + } + } +} + +impl GenericRing for ControlTransferRing { + fn base(&self) -> PhysicalAddress { + unsafe { self.inner.lock().trbs.as_physical_address() } + } + + fn capacity(&self) -> usize { + self.capacity + } +} + +impl GenericTransferRing for ControlTransferRing { + fn dequeue_pointer(&self) -> PhysicalAddress { + let inner = self.inner.lock(); + unsafe { inner.trbs.as_physical_address() } + .add(inner.dequeue_index * size_of::()) + } + + fn notify(&self, address: PhysicalAddress, value: u32) { + if value == 0 { + return; + } + + let completions = self.completions.read(); + if let Some(&token) = self.pending_trbs.read().get(&address) { + let Some(status) = completions.get(&token) else { + log::warn!( + "Notification received for non-existent transfer: {:?}", + token + ); + return; + }; + + status.signal(value); + } + } + + fn shutdown(&self) { + self.shutdown.store(true, Ordering::Release); + let mut completions = self.completions.write(); + while let Some((_, status)) = completions.pop_first() { + status.abort(); + } + } + + fn slot_id(&self) -> u8 { + self.slot_id + } + + fn endpoint_id(&self) -> u8 { + self.ep_id + } +} + +impl GenericRing for InterruptInTransferRing { + fn base(&self) -> PhysicalAddress { + unsafe { self.inner.lock().trbs.as_physical_address() } + } + + fn capacity(&self) -> usize { + self.capacity + } +} + +impl GenericTransferRing for InterruptInTransferRing { + fn dequeue_pointer(&self) -> PhysicalAddress { + let inner = self.inner.lock(); + unsafe { inner.trbs.as_physical_address() } + .add(inner.dequeue_index * size_of::()) + } + + fn notify(&self, address: PhysicalAddress, value: u32) { + if value == 0 { + return; + } + + let mut completions = self.completions.write(); + if let Some(status) = completions.remove(&address) { + status.signal(value); + } + } + + fn shutdown(&self) { + self.shutdown.store(true, Ordering::Release); + let mut completions = self.completions.write(); + while let Some((_, status)) = completions.pop_first() { + status.abort(); + } + } + + fn slot_id(&self) -> u8 { + self.slot_id + } + + fn endpoint_id(&self) -> u8 { + self.ep_id + } +} + +impl InterruptInTransferRing { + pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result { + let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?; + + Ok(Self { + inner: IrqSafeSpinlock::new(TransferRingInner { + trbs, + enqueue_index: 0, + dequeue_index: 0, + cycle_bit: true, + }), + completions: IrqSafeRwLock::new(BTreeMap::new()), + slot_id, + ep_id, + capacity, + shutdown: AtomicBool::new(false), + }) + } + + pub fn start_transfer( + &self, + executor: &E, + buffer: &mut PageBox<[u8]>, + ) -> Result { + // Don't even try to start the transfer + if self.shutdown.load(Ordering::Acquire) { + return Err(UsbError::DeviceDisconnected); + } + + let status = Arc::new(UsbTransferStatus::new()); + let address = self.inner.lock().enqueue(NormalTransferTrb::new( + unsafe { buffer.as_physical_address() }, + buffer.len(), + true, + )); + self.completions.write().insert(address, status.clone()); + + executor.ring_doorbell(self.slot_id as _, self.ep_id); + + Ok(UsbInterruptTransfer { + length: buffer.len(), + direction: UsbDirection::In, + address, + status, + }) + } + + pub fn complete_transfer(&self, _transfer: UsbInterruptTransfer) { + // Interrupt transfers consist of one TRB each + // TODO: Can two transfers happen simultaneously? e.g. + // + // [TRBa, TRBb] are queued in the ring, both are executing and + // TRBb finishes first + self.inner.lock().advance(); + } +} + +impl ControlTransferRing { + pub fn new(slot_id: u8, ep_id: u8, capacity: usize) -> Result { + let trbs = PageBox::new_zeroed_slice(capacity).map_err(UsbError::MemoryError)?; + + Ok(Self { + inner: IrqSafeSpinlock::new(TransferRingInner { + trbs, + enqueue_index: 0, + dequeue_index: 0, + cycle_bit: true, + }), + completions: IrqSafeRwLock::new(BTreeMap::new()), + pending_trbs: IrqSafeRwLock::new(BTreeMap::new()), + slot_id, + ep_id, + capacity, + + transfer_id: AtomicU64::new(0), + + shutdown: AtomicBool::new(false), + }) + } + + pub fn start_transfer( + &self, + executor: &E, + setup: ControlTransferSetup, + buffer: Option<(PhysicalAddress, usize, UsbDirection)>, + ) -> Result { + // Don't even try to start the transfer + if self.shutdown.load(Ordering::Acquire) { + return Err(UsbError::DeviceDisconnected); + } + + let mut builder = TransferBuilder::new(self, UsbDirection::In); + + builder.push_trb(ControlTransferSetupTrb::new(setup)); + if let Some((address, length, direction)) = buffer { + builder.push_trb(ControlTransferDataTrb::new(address, length, direction)); + } + builder.push_trb(ControlTransferStatusTrb::new(UsbDirection::In, true)); + + let transfer = builder.start(executor, 0); + + Ok(transfer) + } + + pub fn complete_transfer(&self, transfer: UsbControlTransfer) { + let mut pending = self.pending_trbs.write(); + let mut inner = self.inner.lock(); + for trb in transfer.elements { + pending.remove(&trb); + inner.advance(); + } + self.completions.write().remove(&transfer.id); + } +} + +// TRB implementations + +define_bitfields! { + pub RawTransferFlags : u32 { + (10..16) => ty + set_ty, + 0 => cycle + set_cycle + } +} + +define_bitfields! { + pub NormalTransferFlags: u64 { + (0..16) => trb_length, + 37 => interrupt_on_completion, + } +} + +define_bitfields! { + pub ControlTransferSetupRequest : u64 { + (0..8) => bm_request_type, + (8..16) => b_request, + (16..32) => w_value, + (32..48) => w_index, + (48..64) => w_length + } +} + +define_bitfields! { + pub ControlTransferSetupFlags : u64 { + (0..16) => trb_length, + 38 => immediate_data, + (48..50) => transfer_type + } +} + +define_bitfields! { + pub ControlTransferDataFlags : u64 { + (0..16) => trb_length, + 48 => direction, + } +} + +define_bitfields! { + pub ControlTransferStatusFlags : u32 { + 16 => direction, + 5 => interrupt_on_completion + } +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct NormalTransferTrb { + pub buffer: PhysicalAddress, + pub flags: NormalTransferFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct ControlTransferSetupTrb { + pub request: ControlTransferSetupRequest, + pub flags: ControlTransferSetupFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct ControlTransferDataTrb { + pub buffer: PhysicalAddress, + pub flags: ControlTransferDataFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct ControlTransferStatusTrb { + _0: [u32; 3], + pub flags: ControlTransferStatusFlags, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C, align(16))] +pub struct RawTransferTrb { + _0: [u32; 3], + pub flags: RawTransferFlags, +} + +pub trait TransferTrb: Pod { + const TRB_TYPE: u8; +} + +impl NormalTransferTrb { + pub fn new(buffer: PhysicalAddress, length: usize, interrupt_on_completion: bool) -> Self { + Self { + buffer, + flags: NormalTransferFlags::new(length.try_into().unwrap(), interrupt_on_completion), + } + } +} + +impl ControlTransferSetupTrb { + pub const fn new(setup: ControlTransferSetup) -> Self { + Self { + request: ControlTransferSetupRequest::new( + setup.bm_request_type as _, + setup.b_request as _, + setup.w_value as _, + setup.w_index as _, + setup.w_length as _, + ), + flags: ControlTransferSetupFlags::new(8, true, 3), + } + } +} + +impl ControlTransferDataTrb { + pub fn new(buffer: PhysicalAddress, length: usize, direction: UsbDirection) -> Self { + Self { + buffer, + flags: ControlTransferDataFlags::new( + length.try_into().unwrap(), + direction.is_device_to_host(), + ), + } + } +} + +impl ControlTransferStatusTrb { + pub const fn new(direction: UsbDirection, interrupt_on_completion: bool) -> Self { + Self { + _0: [0; 3], + flags: ControlTransferStatusFlags::new( + direction.is_device_to_host(), + interrupt_on_completion, + ), + } + } +} + +impl TransferTrb for NormalTransferTrb { + const TRB_TYPE: u8 = 1; +} + +impl TransferTrb for ControlTransferSetupTrb { + const TRB_TYPE: u8 = 2; +} + +impl TransferTrb for ControlTransferDataTrb { + const TRB_TYPE: u8 = 3; +} + +impl TransferTrb for ControlTransferStatusTrb { + const TRB_TYPE: u8 = 4; +} diff --git a/kernel/driver/virtio/core/Cargo.toml b/kernel/driver/virtio/core/Cargo.toml new file mode 100644 index 00000000..68d512f3 --- /dev/null +++ b/kernel/driver/virtio/core/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "ygg_driver_virtio_core" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-mm = { path = "../../../libk/libk-mm" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } + +ygg_driver_pci = { path = "../../bus/pci", optional = true } + +log = "0.4.20" +bitflags = "2.4.2" +tock-registers = "0.8.1" + +[features] +default = [] +pci = ["ygg_driver_pci"] diff --git a/kernel/driver/virtio/core/src/error.rs b/kernel/driver/virtio/core/src/error.rs new file mode 100644 index 00000000..f04cb2f4 --- /dev/null +++ b/kernel/driver/virtio/core/src/error.rs @@ -0,0 +1,21 @@ +#[derive(Debug)] +pub enum Error { + OsError(yggdrasil_abi::error::Error), + InvalidPciConfiguration, + NoCommonConfigCapability, + NoNotifyConfigCapability, + NoDeviceConfigCapability, + NoInterruptStatusCapability, + QueueTooLarge, + InvalidQueueSize, + EmptyTransaction, + QueueFull, + QueueEmpty, + WrongToken, +} + +impl From for Error { + fn from(value: yggdrasil_abi::error::Error) -> Self { + Self::OsError(value) + } +} diff --git a/kernel/driver/virtio/core/src/lib.rs b/kernel/driver/virtio/core/src/lib.rs new file mode 100644 index 00000000..85b7491e --- /dev/null +++ b/kernel/driver/virtio/core/src/lib.rs @@ -0,0 +1,53 @@ +#![no_std] + +extern crate alloc; + +pub mod error; +pub mod queue; +pub mod transport; + +use bitflags::bitflags; +use tock_registers::{ + register_structs, + registers::{ReadOnly, ReadWrite}, +}; + +register_structs! { + pub CommonConfiguration { + (0x00 => device_feature_select: ReadWrite), + (0x04 => device_feature: ReadOnly), + (0x08 => driver_feature_select: ReadWrite), + (0x0C => driver_feature: ReadWrite), + (0x10 => msix_config: ReadWrite), + (0x12 => num_queues: ReadOnly), + (0x14 => device_status: ReadWrite), + (0x15 => config_generation: ReadOnly), + + (0x16 => queue_select: ReadWrite), + (0x18 => queue_size: ReadWrite), + (0x1A => queue_msix_vector: ReadWrite), + (0x1C => queue_enable: ReadWrite), + (0x1E => queue_notify_off: ReadWrite), + (0x20 => queue_desc: ReadWrite), + (0x28 => queue_driver: ReadWrite), + (0x30 => queue_device: ReadWrite), + + (0x38 => @END), + } +} + +bitflags! { + #[derive(Clone, Copy)] + pub struct DeviceStatus: u8 { + const ACKNOWLEDGE = 1 << 0; + const DRIVER = 1 << 1; + const DRIVER_OK = 1 << 2; + const FEATURES_OK = 1 << 3; + const DEVICE_NEEDS_RESET = 1 << 6; + const FAILED = 1 << 7; + } +} + +impl DeviceStatus { + pub const RESET_VALUE: Self = Self::empty(); +} diff --git a/kernel/driver/virtio/core/src/queue.rs b/kernel/driver/virtio/core/src/queue.rs new file mode 100644 index 00000000..885f1a64 --- /dev/null +++ b/kernel/driver/virtio/core/src/queue.rs @@ -0,0 +1,355 @@ +//! VirtIO queue implementation. +//! +//! # Note +//! +//! The code is poorly borrowed from `virtio-drivers` crate. I want to rewrite it properly myself. +use core::{ + mem::MaybeUninit, + sync::atomic::{fence, Ordering}, +}; + +use libk_mm::{ + address::{AsPhysicalAddress, IntoRaw}, + PageBox, +}; + +use crate::{error::Error, transport::Transport}; + +#[derive(Debug)] +#[repr(C)] +struct Descriptor { + address: u64, + len: u32, + flags: u16, + next: u16, +} + +// Layout: +// { +// flags: u16, +// idx: u16, +// ring: [u16; QUEUE_SIZE], +// used_event: u16 +// } +struct AvailableRing { + data: PageBox<[MaybeUninit]>, +} + +// Layout: +// { +// flags: u16, +// idx: u16, +// ring: [UsedElem; QUEUE_SIZE], +// avail_event: u16, +// _pad: u16 +// } +struct UsedRing { + data: PageBox<[MaybeUninit]>, + + used_count: usize, +} + +pub struct VirtQueue { + descriptor_table: PageBox<[MaybeUninit]>, + available: AvailableRing, + used: UsedRing, + + capacity: usize, + + queue_index: u16, + free_head: u16, + + avail_idx: u16, + last_used_idx: u16, + + msix_vector: u16, +} + +impl AvailableRing { + pub fn with_capacity(no_irq: bool, capacity: usize) -> Result { + let mut data = PageBox::new_uninit_slice(capacity + 3)?; + + if no_irq { + data[0].write(1); + } + + data[1].write(0); + + Ok(Self { data }) + } + + pub fn set_head(&mut self, slot: u16, head: u16) { + self.data[slot as usize + 2].write(head); + } + + pub fn set_index(&mut self, index: u16) { + self.data[1].write(index); + } +} + +impl UsedRing { + pub fn with_capacity(capacity: usize) -> Result { + let mut data = PageBox::new_uninit_slice(capacity * 2 + 2)?; + + data[0].write(0); + + Ok(Self { + data, + used_count: 0, + }) + } + + pub fn read_slot(&self, index: u16) -> (u32, u32) { + let index = unsafe { self.data[1 + index as usize * 2].assume_init() }; + let len = unsafe { self.data[2 + index as usize * 2].assume_init() }; + (index, len) + } + + pub fn index(&self) -> u16 { + unsafe { (self.data[0].assume_init() >> 16) as u16 } + } +} + +impl VirtQueue { + pub fn with_capacity( + transport: &mut T, + index: u16, + capacity: usize, + msix_vector: Option, + no_avail_irq: bool, + ) -> Result { + // TODO check if queue is already set up + + let max_capacity = transport.max_queue_size(index); + + if !capacity.is_power_of_two() || capacity > u16::MAX.into() { + return Err(Error::InvalidQueueSize); + } + + if capacity > max_capacity as usize { + return Err(Error::QueueTooLarge); + } + + let descriptor_table = PageBox::new_uninit_slice(capacity)?; + let available = AvailableRing::with_capacity(no_avail_irq, capacity)?; + let used = UsedRing::with_capacity(capacity)?; + + transport.set_queue( + index, + capacity as u16, + unsafe { descriptor_table.as_physical_address() }, + unsafe { available.data.as_physical_address() }, + unsafe { used.data.as_physical_address() }, + msix_vector, + ); + + Ok(Self { + descriptor_table, + available, + used, + + capacity, + + queue_index: index, + free_head: 0, + + avail_idx: 0, + last_used_idx: 0, + + msix_vector: msix_vector.unwrap_or(0xFFFF), + }) + } + + pub fn capacity(&self) -> usize { + self.capacity + } + + pub fn with_max_capacity( + transport: &mut T, + index: u16, + capacity: usize, + msix_vector: Option, + no_avail_irq: bool, + ) -> Result { + let max_capacity = transport.max_queue_size(index); + let capacity = capacity.min(max_capacity as usize); + + Self::with_capacity(transport, index, capacity, msix_vector, no_avail_irq) + } + + pub unsafe fn add<'a, 'b>( + &mut self, + input: &'a [&'b mut PageBox<[u8]>], + output: &'a [&'b PageBox<[u8]>], + ) -> Result { + if input.is_empty() && output.is_empty() { + return Err(Error::EmptyTransaction); + } + let n_desc = input.len() + output.len(); + + if self.used.used_count + 1 > self.capacity || self.used.used_count + n_desc > self.capacity + { + return Err(Error::QueueFull); + } + + let head = self.add_direct(input, output); + let avail_slot = self.avail_idx % self.capacity as u16; + + self.available.set_head(avail_slot, head); + + fence(Ordering::SeqCst); + + self.avail_idx = self.avail_idx.wrapping_add(1); + self.available.set_index(self.avail_idx); + + fence(Ordering::SeqCst); + + Ok(head) + } + + unsafe fn add_direct<'a, 'b>( + &mut self, + input: &'a [&'b mut PageBox<[u8]>], + output: &'a [&'b PageBox<[u8]>], + ) -> u16 { + let head = self.free_head; + let mut last = self.free_head; + + for item in input { + assert_ne!(item.len(), 0); + let desc = &mut self.descriptor_table[usize::from(self.free_head)]; + let next = (self.free_head + 1) % self.capacity as u16; + + desc.write(Descriptor { + address: item.as_physical_address().into_raw(), + len: item.len().try_into().unwrap(), + // TODO MAGIC + flags: (1 << 0) | (1 << 1), + next, + }); + + last = self.free_head; + self.free_head = next; + } + + for item in output { + assert_ne!(item.len(), 0); + let desc = &mut self.descriptor_table[usize::from(self.free_head)]; + let next = (self.free_head + 1) % self.capacity as u16; + + desc.write(Descriptor { + address: item.as_physical_address().into_raw(), + len: item.len().try_into().unwrap(), + // TODO + flags: (1 << 0), + next, + }); + + last = self.free_head; + self.free_head = next; + } + + { + let last_desc = self.descriptor_table[last as usize].assume_init_mut(); + + // TODO + last_desc.flags &= !(1 << 0); + } + + self.used.used_count += input.len() + output.len(); + + fence(Ordering::SeqCst); + + head + } + + pub fn add_notify_wait_pop<'a, 'b, T: Transport>( + &mut self, + input: &'a [&'b mut PageBox<[u8]>], + output: &'a [&'b PageBox<[u8]>], + transport: &mut T, + ) -> Result { + let token = unsafe { self.add(input, output) }?; + + transport.notify(self.queue_index); + + while self.is_used_empty() { + core::hint::spin_loop(); + } + + fence(Ordering::SeqCst); + + unsafe { self.pop_used(token) } + } + + pub fn is_used_empty(&self) -> bool { + fence(Ordering::SeqCst); + + self.last_used_idx == self.used.index() + } + + pub unsafe fn pop_last_used(&mut self) -> Option<(u16, u32)> { + let token = self.peek_used()?; + let len = self.pop_used(token).unwrap(); + + Some((token, len)) + } + + pub unsafe fn peek_used(&mut self) -> Option { + if !self.is_used_empty() { + let last_used = self.last_used_idx % self.capacity as u16; + Some(self.used.read_slot(last_used).0 as u16) + } else { + None + } + } + + pub unsafe fn pop_used(&mut self, token: u16) -> Result { + if self.is_used_empty() { + return Err(Error::QueueEmpty); + } + + let last_used_slot = self.last_used_idx % self.capacity as u16; + let (index, len) = self.used.read_slot(last_used_slot); + + if index != token as u32 { + return Err(Error::WrongToken); + } + + self.free_descriptor_chain(token); + + fence(Ordering::SeqCst); + + self.last_used_idx = self.last_used_idx.wrapping_add(1); + + Ok(len) + } + + unsafe fn free_descriptor_chain(&mut self, head: u16) -> usize { + let mut current_node = Some(self.descriptor_table[usize::from(head)].assume_init_mut()); + let mut count = 0; + + while let Some(current) = current_node { + assert_ne!(current.len, 0); + let next_head = (current.flags & (1 << 0) != 0).then_some(current.next); + + current.address = 0; + current.flags = 0; + current.next = 0; + current.len = 0; + + self.used.used_count -= 1; + count += 1; + + current_node = + next_head.map(|head| self.descriptor_table[usize::from(head)].assume_init_mut()); + } + + self.free_head = head; + count + } + + pub fn msix_vector(&self) -> u16 { + self.msix_vector + } +} diff --git a/kernel/driver/virtio/core/src/transport/mod.rs b/kernel/driver/virtio/core/src/transport/mod.rs new file mode 100644 index 00000000..0c9df6f1 --- /dev/null +++ b/kernel/driver/virtio/core/src/transport/mod.rs @@ -0,0 +1,95 @@ +use core::mem::size_of; + +use libk_mm::{ + address::{IntoRaw, PhysicalAddress}, + device::DeviceMemoryIo, +}; +use tock_registers::{ + interfaces::{Readable, Writeable}, + registers::WriteOnly, +}; + +use crate::{CommonConfiguration, DeviceStatus}; + +pub mod pci; + +pub trait Transport { + fn common_cfg(&self) -> &CommonConfiguration; + fn notify_cfg(&self) -> &[WriteOnly]; + fn notify_off_mul(&self) -> usize; + fn supports_msix(&self) -> bool; + fn device_cfg(&self) -> Option<&DeviceMemoryIo<[u8]>>; + fn read_interrupt_status(&self) -> (bool, bool); + + fn read_device_features(&mut self) -> u64 { + let cfg = self.common_cfg(); + cfg.device_feature_select.set(0); + let low = cfg.device_feature.get(); + cfg.device_feature_select.set(1); + let high = cfg.device_feature.get(); + + (low as u64) | ((high as u64) << 32) + } + + fn write_driver_features(&mut self, value: u64) { + let cfg = self.common_cfg(); + cfg.driver_feature_select.set(0); + cfg.driver_feature.set(value as u32); + cfg.driver_feature_select.set(1); + cfg.driver_feature.set((value >> 32) as u32); + } + + fn read_device_status(&mut self) -> DeviceStatus { + let cfg = self.common_cfg(); + DeviceStatus::from_bits_retain(cfg.device_status.get()) + } + + fn write_device_status(&mut self, value: DeviceStatus) { + let cfg = self.common_cfg(); + cfg.device_status.set(value.bits()); + } + + fn max_queue_size(&mut self, queue: u16) -> u32 { + let cfg = self.common_cfg(); + cfg.queue_select.set(queue); + cfg.queue_size.get().into() + } + + fn set_queue( + &mut self, + queue: u16, + capacity: u16, + descriptor_table_phys: PhysicalAddress, + available_ring_phys: PhysicalAddress, + used_ring_phys: PhysicalAddress, + msix_vector: Option, + ) { + let cfg = self.common_cfg(); + cfg.queue_select.set(queue); + cfg.queue_size.set(capacity); + cfg.queue_desc.set(descriptor_table_phys.into_raw()); + cfg.queue_driver.set(available_ring_phys.into_raw()); + cfg.queue_device.set(used_ring_phys.into_raw()); + if self.supports_msix() { + cfg.queue_msix_vector.set(msix_vector.unwrap_or(0xFFFF)); + } else { + cfg.queue_msix_vector.set(0xFFFF); + } + cfg.queue_enable.set(1); + } + + fn unset_queue(&mut self, _queue: u16) { + todo!() + } + + fn notify(&mut self, queue: u16) { + let cfg = self.common_cfg(); + let notify = self.notify_cfg(); + + cfg.queue_select.set(queue); + let notify_off = cfg.queue_notify_off.get() as usize; + let index = (notify_off * self.notify_off_mul()) / size_of::(); + + notify[index].set(queue); + } +} diff --git a/kernel/driver/virtio/core/src/transport/pci.rs b/kernel/driver/virtio/core/src/transport/pci.rs new file mode 100644 index 00000000..20a838d8 --- /dev/null +++ b/kernel/driver/virtio/core/src/transport/pci.rs @@ -0,0 +1,142 @@ +use libk_mm::device::DeviceMemoryIo; +use tock_registers::{ + interfaces::Readable, + registers::{ReadOnly, WriteOnly}, +}; +use ygg_driver_pci::{ + capability::{ + VirtioCapabilityData, VirtioCommonConfigCapability, VirtioDeviceConfigCapability, + VirtioInterruptStatusCapability, VirtioNotifyConfigCapability, + }, + PciCommandRegister, PciConfigurationSpace, +}; + +use crate::{error::Error, CommonConfiguration}; + +use super::Transport; + +pub struct PciTransport { + common_cfg: DeviceMemoryIo<'static, CommonConfiguration>, + device_cfg: DeviceMemoryIo<'static, [u8]>, + notify_cfg: DeviceMemoryIo<'static, [WriteOnly]>, + isr: DeviceMemoryIo<'static, ReadOnly>, + notify_cfg_mul: usize, +} + +impl Transport for PciTransport { + fn common_cfg(&self) -> &CommonConfiguration { + &self.common_cfg + } + + fn notify_cfg(&self) -> &[WriteOnly] { + &self.notify_cfg + } + + fn notify_off_mul(&self) -> usize { + self.notify_cfg_mul + } + + fn supports_msix(&self) -> bool { + true + } + + fn device_cfg(&self) -> Option<&DeviceMemoryIo<[u8]>> { + Some(&self.device_cfg) + } + + fn read_interrupt_status(&self) -> (bool, bool) { + let value = self.isr.get(); + (value & 1 != 0, value & 2 != 0) + } +} + +impl PciTransport { + pub fn from_config_space(space: &S) -> Result { + // Transitional devices MUST have a PCI Revision ID of 0. + // Transitional devices MUST have the PCI Subsystem Device ID + // matching the Virtio Device ID, as indicated in section 5. + // Transitional devices MUST have the Transitional PCI + // Device ID in the range 0x1000 to 0x103f. + // TODO check PCI subsystem ID + if space.rev_id() != 0 { + return Err(Error::InvalidPciConfiguration); + } + + let mut cmd = PciCommandRegister::from_bits_retain(space.command()); + cmd &= !(PciCommandRegister::DISABLE_INTERRUPTS | PciCommandRegister::ENABLE_IO); + cmd |= PciCommandRegister::ENABLE_MEMORY | PciCommandRegister::BUS_MASTER; + space.set_command(cmd.bits()); + + // Extract capabilities + + let common_cfg_cap = space + .capability::() + .ok_or(Error::NoCommonConfigCapability)?; + // TODO this is not mandatory + let device_cfg_cap = space + .capability::() + .ok_or(Error::NoDeviceConfigCapability)?; + let notify_cfg_cap = space + .capability::() + .ok_or(Error::NoNotifyConfigCapability)?; + let isr_cap = space + .capability::() + .ok_or(Error::NoInterruptStatusCapability)?; + + // TODO MSI/MSI-X + + // Map the regions + + let common_cfg_base = space + .bar(common_cfg_cap.bar_index().unwrap()) + .unwrap() + .as_memory() + .unwrap() + .add(common_cfg_cap.bar_offset()); + let device_cfg_base = space + .bar(device_cfg_cap.bar_index().unwrap()) + .unwrap() + .as_memory() + .unwrap() + .add(device_cfg_cap.bar_offset()); + let device_cfg_len = device_cfg_cap.length(); + let notify_cfg_base = space + .bar(notify_cfg_cap.bar_index().unwrap()) + .unwrap() + .as_memory() + .unwrap() + .add(notify_cfg_cap.bar_offset()); + let notify_cfg_len = notify_cfg_cap.length(); + let notify_cfg_mul = notify_cfg_cap.offset_multiplier(); + let isr_base = space + .bar(isr_cap.bar_index().unwrap()) + .unwrap() + .as_memory() + .unwrap() + .add(isr_cap.bar_offset()); + let isr_len = isr_cap.length(); + + assert!(isr_len >= 4); + assert_eq!(notify_cfg_len % 2, 0); + + let common_cfg = + unsafe { DeviceMemoryIo::map(common_cfg_base, Default::default()) }.unwrap(); + let device_cfg = unsafe { + DeviceMemoryIo::map_slice(device_cfg_base, device_cfg_len, Default::default()) + } + .unwrap(); + let notify_cfg = unsafe { + DeviceMemoryIo::map_slice(notify_cfg_base, notify_cfg_len / 2, Default::default()) + } + .unwrap(); + let isr = unsafe { DeviceMemoryIo::map(isr_base, Default::default()) }.unwrap(); + + Ok(Self { + common_cfg, + device_cfg, + notify_cfg, + notify_cfg_mul, + isr, + }) + } +} diff --git a/kernel/driver/virtio/net/Cargo.toml b/kernel/driver/virtio/net/Cargo.toml new file mode 100644 index 00000000..fcb6869b --- /dev/null +++ b/kernel/driver/virtio/net/Cargo.toml @@ -0,0 +1,25 @@ +[package] +name = "ygg_driver_virtio_net" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +libk-util = { path = "../../../libk/libk-util" } +libk-mm = { path = "../../../libk/libk-mm" } +device-api = { path = "../../../lib/device-api", features = ["derive"] } + +ygg_driver_virtio_core = { path = "../core" } +ygg_driver_net_core = { path = "../../net/core" } +ygg_driver_pci = { path = "../../bus/pci", optional = true } + +log = "0.4.20" +bitflags = "2.4.2" +tock-registers = "0.8.1" +bytemuck = { version = "1.14.0", features = ["derive"] } + +[features] +default = [] +pci = ["ygg_driver_pci", "ygg_driver_virtio_core/pci"] diff --git a/kernel/driver/virtio/net/src/lib.rs b/kernel/driver/virtio/net/src/lib.rs new file mode 100644 index 00000000..79ae7f60 --- /dev/null +++ b/kernel/driver/virtio/net/src/lib.rs @@ -0,0 +1,311 @@ +// TODO use more fancy features of virtio-net, TCP/IP checksum offloading would be nice +#![feature(strict_provenance)] +#![no_std] + +extern crate alloc; + +use core::mem::size_of; + +use alloc::{boxed::Box, collections::BTreeMap}; +use bytemuck::{Pod, Zeroable}; +use device_api::{ + interrupt::{InterruptAffinity, InterruptHandler}, + Device, +}; +use libk_mm::PageBox; +use libk_util::{ + sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock, IrqSafeSpinlockGuard}, + OneTimeInit, +}; +use ygg_driver_net_core::{ + interface::{NetworkDevice, NetworkInterfaceType}, + Packet, +}; +use ygg_driver_pci::device::{PciDeviceInfo, PreferredInterruptMode}; +use ygg_driver_virtio_core::{ + queue::VirtQueue, + transport::{pci::PciTransport, Transport}, + DeviceStatus, +}; +use yggdrasil_abi::{error::Error, net::MacAddress}; + +struct Queues { + receive: IrqSafeSpinlock, + transmit: IrqSafeSpinlock, + // #[allow(unused)] + // configuration_vector: usize, + receive_vector: Option, +} + +pub struct VirtioNet { + transport: IrqSafeSpinlock, + queues: OneTimeInit, + interface_id: OneTimeInit, + + mac: IrqSafeRwLock, + + pending_packets: IrqSafeRwLock>>, + + pci_device_info: Option, +} + +#[derive(Clone, Copy, Debug, Pod, Zeroable)] +#[repr(C)] +struct VirtioPacketHeader { + flags: u8, + gso_type: u8, + hdr_len: u16, + gso_size: u16, + csum_start: u16, + csum_offset: u16, +} + +impl Queues { + pub fn try_receive(&self, _index: usize) -> Option<(u16, IrqSafeSpinlockGuard)> { + let mut queue = self.receive.lock(); + // TODO use len for packet size hint + let (token, _len) = unsafe { queue.pop_last_used() }?; + Some((token, queue)) + } +} + +impl VirtioNet { + const PACKET_SIZE: usize = 4096; + + pub fn new(transport: T, pci_device_info: Option) -> Self { + // Read MAC from device config + let device_cfg = transport + .device_cfg() + .expect("virtio-net must have device-specific configuration section"); + let mut mac_bytes = [0; 6]; + mac_bytes.copy_from_slice(&device_cfg[..6]); + let mac = MacAddress::from(mac_bytes); + + Self { + transport: IrqSafeSpinlock::new(transport), + queues: OneTimeInit::new(), + interface_id: OneTimeInit::new(), + + mac: IrqSafeRwLock::new(mac), + + pending_packets: IrqSafeRwLock::new(BTreeMap::new()), + + pci_device_info, + } + } + + pub fn listen(&self, buffers: usize) { + let queues = self.queues.get(); + let mut queue = queues.receive.lock(); + let mut packets = self.pending_packets.write(); + + for _ in 0..buffers { + let mut packet = PageBox::new_slice(0, Self::PACKET_SIZE).unwrap(); + let token = unsafe { queue.add(&[&mut packet], &[]).unwrap() }; + packets.insert(token, packet); + } + + let mut transport = self.transport.lock(); + transport.notify(0); + } + + fn handle_receive_interrupt(&self, queue: usize) -> bool { + let queues = self.queues.get(); + let interface_id = *self.interface_id.get(); + let mut count = 0; + + while let Some((token, mut queue)) = queues.try_receive(queue) { + let mut pending_packets = self.pending_packets.write(); + let packet = pending_packets.remove(&token).unwrap(); + + let mut buffer = PageBox::new_slice(0, Self::PACKET_SIZE).unwrap(); + + let token = unsafe { queue.add(&[&mut buffer], &[]).unwrap() }; + pending_packets.insert(token, buffer); + + let packet = Packet::new(packet, size_of::(), interface_id); + ygg_driver_net_core::receive_packet(packet).unwrap(); + count += 1 + } + + if count != 0 { + self.transport.lock().notify(0); + } + + count != 0 + } + + fn begin_init(&self) -> Result { + let mut transport = self.transport.lock(); + let mut status = DeviceStatus::RESET_VALUE; + + log::debug!("Reset device"); + transport.write_device_status(status); + status |= DeviceStatus::ACKNOWLEDGE; + transport.write_device_status(status); + status |= DeviceStatus::DRIVER; + transport.write_device_status(status); + + let _device_features = transport.read_device_features(); + + // TODO blah blah blah + + transport.write_driver_features(0); + + status |= DeviceStatus::FEATURES_OK; + transport.write_device_status(status); + + if !transport + .read_device_status() + .contains(DeviceStatus::FEATURES_OK) + { + return Err(Error::InvalidOperation); + } + + Ok(status) + } + + fn finish_init(&self, status: DeviceStatus) { + let mut transport = self.transport.lock(); + + transport.write_device_status(status | DeviceStatus::DRIVER_OK); + } + + unsafe fn setup_queues( + &'static self, + receive_count: usize, + transmit_count: usize, + ) -> Result<(), Error> { + let receive_vector = if let Some(pci) = self.pci_device_info.as_ref() { + pci.init_interrupts(PreferredInterruptMode::Msi)?; + let info = pci.map_interrupt(InterruptAffinity::Any, self)?; + if let Some(info) = info { + Some(info.vector as u16) + } else { + None + } + } else { + None + }; + + // TODO multiqueue capability + assert_eq!(receive_count, 1); + assert_eq!(transmit_count, 1); + + let mut transport = self.transport.lock(); + + // Setup the virtqs + let rx = VirtQueue::with_max_capacity(&mut *transport, 0, 128, receive_vector, false) + .map_err(cvt_error)?; + let tx = + VirtQueue::with_max_capacity(&mut *transport, 1, 128, None, true).map_err(cvt_error)?; + + self.queues.init(Queues { + receive: IrqSafeSpinlock::new(rx), + transmit: IrqSafeSpinlock::new(tx), + receive_vector, + }); + + Ok(()) + } +} + +impl NetworkDevice for VirtioNet { + fn transmit(&self, mut packet: PageBox<[u8]>) -> Result<(), Error> { + let queues = self.queues.get(); + let mut tx = queues.transmit.lock(); + let mut transport = self.transport.lock(); + packet[..size_of::()].fill(0); + let _len = tx + .add_notify_wait_pop(&[], &[&packet], &mut *transport) + .unwrap(); + + Ok(()) + } + + fn read_hardware_address(&self) -> MacAddress { + *self.mac.read() + } + + fn packet_prefix_size(&self) -> usize { + size_of::() + } +} + +impl InterruptHandler for VirtioNet { + fn handle_irq(&self, vector: Option) -> bool { + if let Some(_) = vector { + // MSI/MSI-X + let Some(queues) = self.queues.try_get() else { + return false; + }; + + if vector == queues.receive_vector.map(Into::into) { + self.handle_receive_interrupt(0) + } else { + false + } + } else { + // Legacy IRQ + let (queue_irq, config_irq) = self.transport.lock().read_interrupt_status(); + + if queue_irq { + log::debug!("Handle IRQ"); + self.handle_receive_interrupt(0); + } + + queue_irq || config_irq + } + } +} + +// impl MsiHandler for VirtioNet { +// fn handle_msi(&self, vector: usize) -> bool { +// +// todo!() +// } +// } + +impl Device for VirtioNet { + fn display_name(&self) -> &'static str { + "VirtIO Network Device" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + let status = self.begin_init()?; + + // TODO multiqueue + self.setup_queues(1, 1)?; + + self.finish_init(status); + + let iface = ygg_driver_net_core::register_interface(NetworkInterfaceType::Ethernet, self); + self.interface_id.init(iface.id()); + self.listen(64); + + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + Ok(()) + } +} + +fn cvt_error(error: ygg_driver_virtio_core::error::Error) -> Error { + use ygg_driver_virtio_core::error::Error as VirtioError; + match error { + VirtioError::OsError(err) => err, + _ => Error::InvalidOperation, + } +} + +pub fn probe(info: &PciDeviceInfo) -> Result<&'static dyn Device, Error> { + let space = &info.config_space; + + let transport = PciTransport::from_config_space(space).unwrap(); + let device = VirtioNet::new(transport, Some(info.clone())); + + let device = Box::leak(Box::new(device)); + + Ok(device) +} diff --git a/kernel/etc/aarch64-unknown-qemu.json b/kernel/etc/aarch64-unknown-qemu.json new file mode 100644 index 00000000..fd88dbf3 --- /dev/null +++ b/kernel/etc/aarch64-unknown-qemu.json @@ -0,0 +1,18 @@ +{ + "arch": "aarch64", + "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128", + "disable-redzone": true, + "features": "+v8a,+strict-align,+neon,+fp-armv8", + "is-builtin": false, + "linker": "rust-lld", + "linker-flavor": "ld.lld", + "llvm-target": "aarch64-unknown-none", + "max-atomic-width": 128, + "panic-strategy": "abort", + "relocation-model": "static", + "target-pointer-width": "64", + "eh-frame-header": false, + "post-link-args": { + "ld.lld": ["-Tetc/aarch64-unknown-qemu.ld"] + } +} diff --git a/kernel/etc/aarch64-unknown-qemu.ld b/kernel/etc/aarch64-unknown-qemu.ld new file mode 100644 index 00000000..f1fc2e63 --- /dev/null +++ b/kernel/etc/aarch64-unknown-qemu.ld @@ -0,0 +1,53 @@ +ENTRY(__aarch64_entry); + +KERNEL_PHYS_BASE = 0x40080000; +KERNEL_VIRT_OFFSET = 0xFFFFFF8000000000; + +SECTIONS { + . = KERNEL_PHYS_BASE; + PROVIDE(__kernel_phys_start = .); + + .text.entry : { + *(.text.entry) + } + + . = ALIGN(16); + . = . + KERNEL_VIRT_OFFSET; + + .text : AT(. - KERNEL_VIRT_OFFSET) { + KEEP(*(.text.vectors)); + *(.text*) + } + + . = ALIGN(4K); + .rodata : AT(. - KERNEL_VIRT_OFFSET) { + *(.eh_frame*) + . = ALIGN(16); + PROVIDE(__dt_probes_start = .); + KEEP(*(.dt_probes)); + PROVIDE(__dt_probes_end = .); + *(.rodata*) + } + + . = ALIGN(4K); + .data.tables : AT (. - KERNEL_VIRT_OFFSET) { + KEEP(*(.data.tables)) + } + + . = ALIGN(4K); + .data : AT(. - KERNEL_VIRT_OFFSET) { + *(.data*) + /* *(.got*) */ + } + + . = ALIGN(4K); + PROVIDE(__bss_start_phys = . - KERNEL_VIRT_OFFSET); + .bss : AT(. - KERNEL_VIRT_OFFSET) { + *(COMMON) + *(.bss*) + } + . = ALIGN(4K); + PROVIDE(__bss_end_phys = . - KERNEL_VIRT_OFFSET); + + PROVIDE(__kernel_size = . - KERNEL_VIRT_OFFSET - KERNEL_PHYS_BASE); +}; diff --git a/kernel/etc/x86_64-unknown-none.json b/kernel/etc/x86_64-unknown-none.json new file mode 100644 index 00000000..eed3a3e1 --- /dev/null +++ b/kernel/etc/x86_64-unknown-none.json @@ -0,0 +1,26 @@ +{ + "is-builtin": false, + "arch": "x86_64", + "cpu": "x86-64", + "os": "none", + "llvm-target": "x86_64-unknown-linux-gnu", + "data-layout": "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128", + "max-atomic-width": 64, + "target-pointer-width": "64", + + "disable-redzone": true, + "executables": true, + "panic-strategy": "abort", + "features": "-avx,-sse,+soft-float", + + "has-thread-local": false, + + "linker": "rust-lld", + "linker-flavor": "ld.lld", + + "pre-link-args": { + "ld.lld": [ + "-Tetc/x86_64-unknown-none.ld" + ] + } +} diff --git a/kernel/etc/x86_64-unknown-none.ld b/kernel/etc/x86_64-unknown-none.ld new file mode 100644 index 00000000..1be78b30 --- /dev/null +++ b/kernel/etc/x86_64-unknown-none.ld @@ -0,0 +1,51 @@ +ENTRY(__x86_64_entry); + +KERNEL_PHYS_BASE = 0x200000; +KERNEL_VIRT_OFFSET = 0xFFFFFF8000000000; + +SECTIONS { + . = KERNEL_PHYS_BASE; + PROVIDE(__kernel_phys_start = .); + PROVIDE(__kernel_start = . + KERNEL_VIRT_OFFSET); + + .text.entry : { + *(.multiboot) + *(.text.entry) + } + + . = ALIGN(16); + . = . + KERNEL_VIRT_OFFSET; + + .text : AT(. - KERNEL_VIRT_OFFSET) { + *(.text*) + } + + . = ALIGN(4K); + .rodata : AT(. - KERNEL_VIRT_OFFSET) { + *(.eh_frame*) + *(.rodata*) + } + + . = ALIGN(4K); + .data.tables : AT (. - KERNEL_VIRT_OFFSET) { + KEEP(*(.data.tables)) + } + + .data : AT(. - KERNEL_VIRT_OFFSET) { + KEEP(*(.data.yboot)) + *(.data*) + *(.got*) + } + + . = ALIGN(4K); + PROVIDE(__bss_start_phys = . - KERNEL_VIRT_OFFSET); + .bss : AT(. - KERNEL_VIRT_OFFSET) { + *(COMMON) + *(.bss*) + } + . = ALIGN(4K); + PROVIDE(__bss_end_phys = . - KERNEL_VIRT_OFFSET); + + PROVIDE(__kernel_end = .); + PROVIDE(__kernel_size = . - KERNEL_VIRT_OFFSET - KERNEL_PHYS_BASE); +}; diff --git a/kernel/lib/device-api/Cargo.toml b/kernel/lib/device-api/Cargo.toml new file mode 100644 index 00000000..bab8d602 --- /dev/null +++ b/kernel/lib/device-api/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "device-api" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api-macros = { path = "macros", optional = true } + +[features] +default = [] +derive = ["device-api-macros"] diff --git a/kernel/lib/device-api/macros/Cargo.toml b/kernel/lib/device-api/macros/Cargo.toml new file mode 100644 index 00000000..6d7dd27f --- /dev/null +++ b/kernel/lib/device-api/macros/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "device-api-macros" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[lib] +proc-macro = true + +[dependencies] +proc-macro2 = "1.0.66" +quote = "1.0.32" +syn = { version = "2.0.28", features = ["full"] } diff --git a/kernel/lib/device-api/macros/src/lib.rs b/kernel/lib/device-api/macros/src/lib.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/lib/device-api/src/bus.rs b/kernel/lib/device-api/src/bus.rs new file mode 100644 index 00000000..589540c2 --- /dev/null +++ b/kernel/lib/device-api/src/bus.rs @@ -0,0 +1,7 @@ +use yggdrasil_abi::error::Error; + +use crate::{manager::DeviceManager, Device}; + +pub trait Bus: Device { + fn enumerate(&self, manager: &mut DeviceManager) -> Result<(), Error>; +} diff --git a/kernel/lib/device-api/src/device.rs b/kernel/lib/device-api/src/device.rs new file mode 100644 index 00000000..5ce73b4a --- /dev/null +++ b/kernel/lib/device-api/src/device.rs @@ -0,0 +1,37 @@ +use yggdrasil_abi::error::Error; + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash)] +#[repr(transparent)] +pub struct DeviceId(u64); + +pub trait Device: Send + 'static { + fn display_name(&self) -> &'static str; + + /// Initializes the device, making it ready for operation. + /// The method is also responsible for registering the device with appropriate OS subsystems + /// (e.g. registering a terminal ttySn for a serial port) + /// + /// # Safety + /// + /// The caller must make sure the function is only called once. + unsafe fn init(&'static self) -> Result<(), Error> { + Ok(()) + } + + /// Initializes the IRQ handling options on this device: binds its IRQ(s) to their handlers and + /// enables their reception. + /// + /// # Safety + /// + /// The caller must make sure the function is only called once. The caller must also make sure + /// the function is not called before the device's [Device::init] is called. + unsafe fn init_irq(&'static self) -> Result<(), Error> { + Ok(()) + } +} + +impl From for DeviceId { + fn from(value: usize) -> Self { + Self(value as u64) + } +} diff --git a/kernel/lib/device-api/src/interrupt.rs b/kernel/lib/device-api/src/interrupt.rs new file mode 100644 index 00000000..e0c768f6 --- /dev/null +++ b/kernel/lib/device-api/src/interrupt.rs @@ -0,0 +1,193 @@ +use yggdrasil_abi::error::Error; + +use crate::Device; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum Irq { + Private(u32), + External(u32), +} + +/// Describes messages sent from some CPU to others +#[derive(Clone, Copy, PartialEq, Debug)] +#[repr(u64)] +pub enum IpiMessage { + /// Indicates that the sender CPU entered kernel panic and wants other CPUs to follow + Panic, + /// Indicates that the cores should either halt and wait for the caller to shut the system + /// down, or they should shut down by themselves, depending on the platform + Shutdown, +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] +#[repr(u32)] +pub enum IrqLevel { + #[default] + Default, + ActiveHigh, + ActiveLow, +} + +#[derive(Default, Clone, Copy, PartialEq, Eq, Debug)] +#[repr(u32)] +pub enum IrqTrigger { + #[default] + Default, + Edge, + Level, +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub enum IpiDeliveryTarget { + Specific(usize), + ThisCpu, + OtherCpus, +} + +#[derive(Default, Clone, Copy, Debug)] +pub struct IrqOptions { + pub level: IrqLevel, + pub trigger: IrqTrigger, +} + +#[derive(Clone, Copy, Debug, Default)] +pub struct MsiInfo { + pub address: usize, + pub value: u32, + pub vector: usize, + pub affinity: InterruptAffinity, +} + +#[derive(Clone, Copy, Debug, Default)] +pub enum InterruptAffinity { + #[default] + Any, + Specific(usize), +} + +pub trait InterruptTable { + fn handler(&self, index: usize) -> Option<&'static dyn InterruptHandler>; +} + +pub trait MessageInterruptController { + fn register_msi( + &self, + affinity: InterruptAffinity, + handler: &'static dyn InterruptHandler, + ) -> Result { + let mut range = [MsiInfo { + affinity, + ..Default::default() + }]; + self.register_msi_range(&mut range, handler)?; + Ok(range[0]) + } + + #[allow(unused)] + fn register_msi_range( + &self, + range: &mut [MsiInfo], + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + Err(Error::NotImplemented) + } + + fn handle_msi(&self, #[allow(unused)] vector: usize) {} +} + +pub trait ExternalInterruptController { + /// Performs IRQ delivery method configuration and registers a handler to execute when it is + /// fired + fn register_irq( + &self, + irq: Irq, + options: IrqOptions, + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error>; + + /// Enables the specified IRQ (unmasks it) + fn enable_irq(&self, irq: Irq) -> Result<(), Error>; + + /// Handles a single pending interrupt on this controller. + /// The function is intended for interrupt controllers which have internal registers to track + /// the IRQ index and the order of interrupt handling (if multiple are handled in sequence) is + /// platform/controller specific. + fn handle_pending_irqs(&self) {} + /// Handles a single pending interrupt with a known index on this controller. + /// The function is intended for interrupt controllers where vectors "know" their interrupt + /// index. + fn handle_specific_irq(&self, #[allow(unused)] index: usize) {} +} + +pub trait LocalInterruptController { + fn send_ipi(&self, target: IpiDeliveryTarget, msg: IpiMessage) -> Result<(), Error>; + + /// Initializes the local interrupt controller for an Application Processor instance. + /// + /// # Safety + /// + /// The caller must ensure this function is only called once per each AP (and only for APs). + unsafe fn init_ap(&self) -> Result<(), Error>; +} + +pub trait InterruptHandler: Device { + fn handle_irq(&'static self, vector: Option) -> bool; +} + +pub struct FixedInterruptTable { + entries: [Option<&'static dyn InterruptHandler>; SIZE], +} + +impl IrqLevel { + pub fn override_default(self, value: IrqLevel) -> Self { + match self { + Self::Default => value, + _ => self, + } + } +} + +impl IrqTrigger { + pub fn override_default(self, value: IrqTrigger) -> Self { + match self { + Self::Default => value, + _ => self, + } + } +} + +impl FixedInterruptTable { + pub const fn new() -> Self { + Self { + entries: [None; SIZE], + } + } + + pub fn insert( + &mut self, + index: usize, + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + if self.entries[index].is_some() { + todo!(); + } + + self.entries[index] = Some(handler); + Ok(()) + } + + pub fn insert_least_loaded( + &mut self, + handler: &'static dyn InterruptHandler, + ) -> Result { + let index = self.entries.iter().position(|p| p.is_none()).unwrap(); + self.entries[index].replace(handler); + Ok(index) + } +} + +impl InterruptTable for FixedInterruptTable { + fn handler(&self, index: usize) -> Option<&'static dyn InterruptHandler> { + self.entries[index] + } +} diff --git a/kernel/lib/device-api/src/lib.rs b/kernel/lib/device-api/src/lib.rs new file mode 100644 index 00000000..7225486b --- /dev/null +++ b/kernel/lib/device-api/src/lib.rs @@ -0,0 +1,35 @@ +#![feature(trait_alias)] +#![no_std] + +extern crate alloc; + +pub mod bus; +pub mod device; +pub mod interrupt; +pub mod manager; +pub mod serial; +pub mod timer; + +pub use device::{Device, DeviceId}; +use yggdrasil_abi::error::Error; + +pub trait CpuBringupDevice: Device { + /// Starts a CPU with given index, providing it with some argument value and instruction + /// pointer from which its execution should begin. + /// + /// # Safety + /// + /// This function is unsafe because it can have unexpected effects on the system state if + /// misused. + unsafe fn start_cpu(&self, id: usize, ip: usize, arg0: usize) -> Result<(), Error>; +} + +pub trait ResetDevice: Device { + /// Performs a system reset. + /// + /// # Safety + /// + /// The kernel must ensure it is actually safe to perform a reset, no critical operations + /// are aborted and no data is lost. + unsafe fn reset(&self) -> !; +} diff --git a/kernel/lib/device-api/src/manager.rs b/kernel/lib/device-api/src/manager.rs new file mode 100644 index 00000000..13573456 --- /dev/null +++ b/kernel/lib/device-api/src/manager.rs @@ -0,0 +1,25 @@ +use alloc::vec::Vec; + +use crate::{Device, DeviceId}; + +pub struct DeviceManager { + devices: Vec<&'static dyn Device>, +} + +impl DeviceManager { + pub const fn new() -> Self { + Self { + devices: Vec::new(), + } + } + + pub fn register(&mut self, device: &'static dyn Device) -> DeviceId { + let id = DeviceId::from(self.devices.len()); + self.devices.push(device); + id + } + + pub fn devices(&self) -> impl Iterator + '_ { + self.devices.iter().copied() + } +} diff --git a/kernel/lib/device-api/src/serial.rs b/kernel/lib/device-api/src/serial.rs new file mode 100644 index 00000000..3a78fb0d --- /dev/null +++ b/kernel/lib/device-api/src/serial.rs @@ -0,0 +1,7 @@ +use yggdrasil_abi::error::Error; + +use crate::Device; + +pub trait SerialDevice: Device { + fn send(&self, byte: u8) -> Result<(), Error>; +} diff --git a/kernel/lib/device-api/src/timer.rs b/kernel/lib/device-api/src/timer.rs new file mode 100644 index 00000000..3fe2ab66 --- /dev/null +++ b/kernel/lib/device-api/src/timer.rs @@ -0,0 +1,31 @@ +//! Interfaces for time-providing devices + +use core::time::Duration; + +use yggdrasil_abi::error::Error; + +use crate::Device; + +/// Interface for precise timing devices +pub trait MonotonicTimestampProviderDevice: Device { + /// Provides a timestamp value of the timer. The value: + /// + /// * Represents monotonically increasing clock time since some arbitrary point in the past. + /// * Can be used for delays and measuring time passed between two measurements. + /// + /// * Is not an accurate wall-clock time or real-world time. + /// * Cannot be used for date/time managament purposes. + fn monotonic_timestamp(&self) -> Result; +} + +/// Interface for real-world time-telling devices +pub trait RealTimeProviderDevice: Device { + /// Provides a real-time clock value of the timer. The value: + /// + /// * Represents a real-world time since TODO TODO TODO. + /// * Can be used for rough measurements of duration passed between two points in time. + /// * Can be used for delays, but precision is not guaranteed. + /// * Can be used for date/time management. + // TODO actual type for time + fn real_timestamp(&self) -> Result; +} diff --git a/kernel/lib/device-tree/Cargo.toml b/kernel/lib/device-tree/Cargo.toml new file mode 100644 index 00000000..0d343613 --- /dev/null +++ b/kernel/lib/device-tree/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "device-tree" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../device-api", features = ["derive"] } +libk-mm = { path = "../../libk/libk-mm" } + +fdt-rs = { version = "0.4.3", default-features = false } +log = "0.4.20" diff --git a/kernel/lib/device-tree/src/driver.rs b/kernel/lib/device-tree/src/driver.rs new file mode 100644 index 00000000..40f81f37 --- /dev/null +++ b/kernel/lib/device-tree/src/driver.rs @@ -0,0 +1,151 @@ +//! Device tree-based driver definitions + +use core::mem::size_of; + +use alloc::boxed::Box; +use device_api::{Device, DeviceId}; +use fdt_rs::index::DevTreeIndexNode; +use yggdrasil_abi::error::Error; + +use crate::dt::{DevTreeIndexNodePropGet, DevTreeNodeInfo}; + +/// Helper macro to return the count of expressions supplied to it +#[macro_export] +macro_rules! count { + () => (0usize); + ($x:tt $($xs:tt)*) => (1usize + $crate::count!($($xs)*)); +} + +/// Registers a device driver for compatible device tree nodes +/// +/// # Usage example +/// +/// ``` +/// device_tree_driver! { +/// compatible: ["arm,pl011"], +/// probe(of) => { +/// let my_device = ...; // ... extract some info about the device ... +/// Some(Box::new(my_device)) +/// } +/// } +/// ``` +#[macro_export] +macro_rules! device_tree_driver { + ( + compatible: [$($compatible:literal),+], + probe ($node:ident) => $probe_body:block $(,)? + ) => { + const __COMPATIBLE_LEN: usize = $crate::count!($($compatible )+); + static __COMPATIBLE: [&str; __COMPATIBLE_LEN] = [$($compatible),+]; + + fn __probe($node: &$crate::dt::DevTreeNodeInfo) -> + Option> $probe_body + + core::arch::global_asm!(r#" + .pushsection .dt_probes, "a" + .quad {compatible} + .quad {compatible_len} + .quad {probe_func} + .popsection + "#, + compatible = sym __COMPATIBLE, + compatible_len = const __COMPATIBLE_LEN, + probe_func = sym __probe + ); + }; +} + +struct DevTreeProbe<'a> { + compatible: &'static [&'static str], + probe_func: fn(&'a DevTreeNodeInfo<'a, 'a, 'a>) -> Option>, +} + +fn iter_dt_probes<'a>() -> impl Iterator> { + extern "C" { + static __dt_probes_start: u64; + static __dt_probes_end: u64; + } + + unsafe { + let base = &__dt_probes_start as *const u64; + let end = &__dt_probes_end as *const u64; + let len = (end as usize - base as usize) / (size_of::() * 3); + + (0..len).map(move |i| { + let compatible_ptr = *base.add(i * 3); + let compatible_len = *base.add(i * 3 + 1); + let probe_func_ptr = *base.add(i * 3 + 2); + + let compatible = + core::slice::from_raw_parts(compatible_ptr as *const &str, compatible_len as usize); + let probe_func = core::mem::transmute(probe_func_ptr); + + DevTreeProbe { + compatible, + probe_func, + } + }) + } +} + +fn dt_match_compatible(compatible: &str) -> Option { + iter_dt_probes().find(|probe| probe.compatible.contains(&compatible)) +} + +/// "Probes" a device tree node for any matching device, registering it if a compatible driver is +/// found +pub fn probe_dt_node DeviceId>( + dt: &DevTreeNodeInfo, + register: F, +) -> Option<(&'static dyn Device, DeviceId)> { + // TODO use list, not just the first item + let Some(compatible) = dt.node.prop("compatible") else { + return None; + }; + + let probe = dt_match_compatible(compatible)?; + let device = Box::leak((probe.probe_func)(dt)?); + let id = register(device); + Some((device, id)) +} + +/// Performs shallow walk of a device tree node and executes the visitor function on each node +pub fn enumerate_dt< + 'a, + I: Iterator>, + F: Fn(&str, DevTreeNodeInfo) -> Result<(), Error>, +>( + address_cells: usize, + size_cells: usize, + nodes: I, + f: F, +) -> Result<(), usize> { + let mut failed_count = 0; + + for node in nodes { + // Skip /cpus and /memory* + let probe = DevTreeNodeInfo { + address_cells, + size_cells, + node, + }; + + let Ok(name) = probe.node.name() else { + continue; + }; + let Some(compatible) = probe.node.prop("compatible") else { + continue; + }; + + if let Err(error) = f(compatible, probe) { + log::warn!("{}: {:?}", name, error); + failed_count += 1; + } + } + + if failed_count == 0 { + Ok(()) + } else { + Err(failed_count) + } +} diff --git a/kernel/lib/device-tree/src/dt.rs b/kernel/lib/device-tree/src/dt.rs new file mode 100644 index 00000000..1304d55f --- /dev/null +++ b/kernel/lib/device-tree/src/dt.rs @@ -0,0 +1,289 @@ +//! ARM device tree utlities + +use fdt_rs::{ + base::DevTree, + index::{iters::DevTreeIndexNodeSiblingIter, DevTreeIndex, DevTreeIndexNode, DevTreeIndexProp}, + prelude::PropReader, +}; +use libk_mm::{ + address::{FromRaw, PhysicalAddress}, + phys::PhysicalMemoryRegion, +}; +use yggdrasil_abi::error::Error; + +const INDEX_BUFFER_SIZE: usize = 65536; + +#[repr(C, align(0x10))] +struct FdtIndexBuffer([u8; INDEX_BUFFER_SIZE]); + +static mut FDT_INDEX_BUFFER: FdtIndexBuffer = FdtIndexBuffer::zeroed(); + +impl FdtIndexBuffer { + const fn zeroed() -> Self { + Self([0; INDEX_BUFFER_SIZE]) + } +} + +/// Device tree node +pub type TNode<'a> = DevTreeIndexNode<'a, 'a, 'a>; +/// Device tree property +pub type TProp<'a> = DevTreeIndexProp<'a, 'a, 'a>; + +/// Helper trait to provide extra functionality for [DevTreeIndexProp] +pub trait DevTreeIndexPropExt { + /// Reads a cell value from single-type cell array at given cell index + fn cell1_array_item(&self, index: usize, cells: usize) -> Option; + /// Reads a cell pair from cell pair array at given pair index + fn cell2_array_item(&self, index: usize, cells0: usize, cells1: usize) -> Option<(u64, u64)>; + + /// Reads a cell value from the property at given offset + fn read_cell(&self, u32_offset: usize, cell_size: usize) -> Option; + + /// Returns the length in bytes + fn len(&self) -> usize; +} + +/// Helper trait to provide extra functionality for [DevTreeIndexNode] +pub trait DevTreeIndexNodeExt { + /// Returns the root node's `#address-cells` property, or the default value defined by the + /// specification if it's absent + fn address_cells(&self) -> usize { + self.get_address_cells().unwrap_or(2) + } + /// Returns the root node's `#size-cells` property, or the default value defined by the + /// specification if it's absent + fn size_cells(&self) -> usize { + self.get_size_cells().unwrap_or(1) + } + + /// Returns the #address-cells property of the node, if there is one + fn get_address_cells(&self) -> Option; + /// Returns the #size-cells property of the node, if there is one + fn get_size_cells(&self) -> Option; +} + +/// Extension trait for [DevTreeIndexNode] to obtain typed property values +pub trait DevTreeIndexNodePropGet { + /// Returns a property value of given type, if it exists + fn prop(&self, name: &str) -> Option; +} + +/// Iterator for physical memory regions present in the device tree +#[derive(Clone)] +pub struct FdtMemoryRegionIter<'a> { + inner: DevTreeIndexNodeSiblingIter<'a, 'a, 'a>, + address_cells: usize, + size_cells: usize, +} + +/// Device tree wrapper struct +pub struct DeviceTree<'a> { + tree: DevTree<'a>, + index: DevTreeIndex<'a, 'a>, +} + +/// Provides information about a device tree node to device driver's "probe" function +pub struct DevTreeNodeInfo<'a, 'i, 'dt> { + /// #address-cells property of the parent bus/system + pub address_cells: usize, + /// #size-cells property of the parent bus/system + pub size_cells: usize, + /// Device tree node being probed + pub node: DevTreeIndexNode<'a, 'i, 'dt>, +} + +impl<'a> DeviceTree<'a> { + pub const MIN_HEADER_SIZE: usize = DevTree::MIN_HEADER_SIZE; + + /// Constructs a device tree wrapper from the DTB virtual address. + /// + /// # Safety + /// + /// The caller must ensure the validity of the address. + pub unsafe fn from_addr(virt: usize) -> Self { + FDT_INDEX_BUFFER.0.fill(0); + let tree = DevTree::from_raw_pointer(virt as _).unwrap(); + let index = DevTreeIndex::new(tree, &mut FDT_INDEX_BUFFER.0).unwrap(); + Self { tree, index } + } + + /// Looks up a node for a given path + pub fn node_by_path(&self, path: &str) -> Option { + find_node(self.index.root(), path.trim_start_matches('/')) + } + + /// Returns the total size of the device tree in memory + pub fn size(&self) -> usize { + self.tree.totalsize() + } + + /// Returns the root node's `#address-cells` property, or the default value defined by the + /// specification if it's absent + pub fn address_cells(&self) -> usize { + self.index.root().address_cells() + } + + /// Returns the root node's `#size-cells` property, or the default value defined by the + /// specification if it's absent + pub fn size_cells(&self) -> usize { + self.index.root().size_cells() + } + + /// Returns the root node of the device tree + pub fn root(&self) -> DevTreeIndexNode { + self.index.root() + } + + // Commonly used functions for convenience + + /// Returns the /chosen.stdout-path value + pub fn chosen_stdout_path(&self) -> Option<&str> { + let chosen = self.node_by_path("/chosen")?; + chosen.prop("stdout-path") + } + + /// Returns the length of the header provided as a slice of bytes + pub unsafe fn read_totalsize(header: &[u8]) -> Result { + DevTree::read_totalsize(header).map_err(|_| Error::InvalidArgument) + } +} + +impl<'a, 'i, 'dt> DevTreeIndexNodeExt for DevTreeIndexNode<'a, 'i, 'dt> { + fn get_address_cells(&self) -> Option { + self.props() + .find(|p| p.name().unwrap_or("") == "#address-cells") + .map(|p| p.u32(0).unwrap() as usize) + } + + fn get_size_cells(&self) -> Option { + self.props() + .find(|p| p.name().unwrap_or("") == "#size-cells") + .map(|p| p.u32(0).unwrap() as usize) + } +} + +impl<'a, 'i, 'dt> DevTreeIndexPropExt for DevTreeIndexProp<'a, 'i, 'dt> { + fn read_cell(&self, u32_offset: usize, cell_size: usize) -> Option { + match cell_size { + 1 => self.u32(u32_offset).map(|x| x as u64).ok(), + 2 => { + let high = self.u32(u32_offset).ok()? as u64; + let low = self.u32(u32_offset + 1).ok()? as u64; + + Some((high << 32) | low) + } + _ => unimplemented!(), + } + } + + fn cell1_array_item(&self, index: usize, cells: usize) -> Option { + self.read_cell(index * cells, cells) + } + + fn cell2_array_item(&self, index: usize, cells0: usize, cells1: usize) -> Option<(u64, u64)> { + let u32_index = index * (cells0 + cells1); + let cell0 = self.read_cell(u32_index, cells0)?; + let cell1 = self.read_cell(u32_index + cells0, cells1)?; + Some((cell0, cell1)) + } + + fn len(&self) -> usize { + self.length() + } +} + +impl<'a> FdtMemoryRegionIter<'a> { + /// Constructs a memory region iterator for given device tree + pub fn new(dt: &'a DeviceTree) -> Self { + let inner = dt.index.root().children(); + let address_cells = dt.address_cells(); + let size_cells = dt.size_cells(); + Self { + inner, + address_cells, + size_cells, + } + } +} + +impl Iterator for FdtMemoryRegionIter<'_> { + type Item = PhysicalMemoryRegion; + + fn next(&mut self) -> Option { + loop { + let Some(item) = self.inner.next() else { + break None; + }; + + let name = item.name().unwrap_or(""); + + if name.starts_with("memory@") || name == "memory" { + let reg = item + .props() + .find(|p| p.name().unwrap_or("") == "reg") + .unwrap(); + + let (base, size) = reg + .cell2_array_item(0, self.address_cells, self.size_cells) + .unwrap(); + + let base = PhysicalAddress::from_raw(base); + let size = size as usize; + + break Some(PhysicalMemoryRegion { base, size }); + } + } + } +} + +impl<'a, 'i, 'dt> DevTreeIndexNodePropGet for DevTreeIndexNode<'a, 'i, 'dt> { + fn prop(&self, name: &str) -> Option { + self.props().find_map(|prop| { + if prop.name().ok()? == name { + prop.u32(0).ok() + } else { + None + } + }) + } +} + +impl<'a, 'i, 'dt> DevTreeIndexNodePropGet<&'a str> for DevTreeIndexNode<'a, 'i, 'dt> { + fn prop(&self, name: &str) -> Option<&'a str> { + self.props().find_map(|prop| { + if prop.name().ok()? == name { + prop.str().ok() + } else { + None + } + }) + } +} + +/// Looks up a property with given name in the node +pub fn find_prop<'a>(node: &TNode<'a>, name: &str) -> Option> { + node.props().find(|p| p.name().unwrap_or("") == name) +} + +fn path_component_left(path: &str) -> (&str, &str) { + if let Some((left, right)) = path.split_once('/') { + (left, right.trim_start_matches('/')) + } else { + (path, "") + } +} + +fn find_node<'a>(at: TNode<'a>, path: &str) -> Option> { + let (item, path) = path_component_left(path); + if item.is_empty() { + assert_eq!(path, ""); + Some(at) + } else { + let child = at.children().find(|c| c.name().unwrap() == item)?; + if path.is_empty() { + Some(child) + } else { + find_node(child, path) + } + } +} diff --git a/kernel/lib/device-tree/src/lib.rs b/kernel/lib/device-tree/src/lib.rs new file mode 100644 index 00000000..6d90ee0c --- /dev/null +++ b/kernel/lib/device-tree/src/lib.rs @@ -0,0 +1,10 @@ +#![no_std] + +extern crate alloc; + +#[macro_use] +pub mod driver; + +pub mod dt; + +pub use dt::find_prop; diff --git a/kernel/lib/memtables/Cargo.toml b/kernel/lib/memtables/Cargo.toml new file mode 100644 index 00000000..061db54a --- /dev/null +++ b/kernel/lib/memtables/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "memtables" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +bytemuck = { version = "1.14.0", features = ["derive"] } + +[features] +default = [] +all = [] diff --git a/kernel/lib/memtables/src/aarch64.rs b/kernel/lib/memtables/src/aarch64.rs new file mode 100644 index 00000000..5adab8bc --- /dev/null +++ b/kernel/lib/memtables/src/aarch64.rs @@ -0,0 +1,26 @@ +use bytemuck::{Pod, Zeroable}; + +use crate::RawTable; + +pub const KERNEL_L3_COUNT: usize = 4; + +#[derive(Clone, Copy, Pod, Zeroable)] +#[repr(C)] +pub struct FixedTables { + // 1GiB entries + pub l1: RawTable, + + // 2MiB entries + pub l2: RawTable, + pub l3s: [RawTable; KERNEL_L3_COUNT], +} + +impl FixedTables { + pub const fn zeroed() -> Self { + Self { + l1: RawTable::zeroed(), + l2: RawTable::zeroed(), + l3s: [RawTable::zeroed(); KERNEL_L3_COUNT], + } + } +} diff --git a/kernel/lib/memtables/src/any.rs b/kernel/lib/memtables/src/any.rs new file mode 100644 index 00000000..1dcbcfe4 --- /dev/null +++ b/kernel/lib/memtables/src/any.rs @@ -0,0 +1,27 @@ +use crate::{aarch64, x86_64}; + +pub enum AnyTables { + X86_64(x86_64::FixedTables), + AArch64(aarch64::FixedTables), +} + +impl AnyTables { + pub fn as_bytes(&self) -> &[u8] { + match self { + Self::X86_64(tables) => bytemuck::bytes_of(tables), + Self::AArch64(tables) => bytemuck::bytes_of(tables), + } + } +} + +impl From for AnyTables { + fn from(value: x86_64::FixedTables) -> Self { + Self::X86_64(value) + } +} + +impl From for AnyTables { + fn from(value: aarch64::FixedTables) -> Self { + Self::AArch64(value) + } +} diff --git a/kernel/lib/memtables/src/lib.rs b/kernel/lib/memtables/src/lib.rs new file mode 100644 index 00000000..fa3334e1 --- /dev/null +++ b/kernel/lib/memtables/src/lib.rs @@ -0,0 +1,30 @@ +#![no_std] + +use bytemuck::{Pod, Zeroable}; + +// AArch64 +#[cfg(any(feature = "all", target_arch = "aarch64"))] +pub mod aarch64; +#[cfg(all(not(feature = "all"), target_arch = "aarch64"))] +pub use aarch64::FixedTables; + +// x86-64 +#[cfg(any(feature = "all", target_arch = "x86_64"))] +pub mod x86_64; +#[cfg(all(not(feature = "all"), target_arch = "x86_64"))] +pub use x86_64::FixedTables; + +#[cfg(feature = "all")] +pub mod any; + +#[derive(Clone, Copy, Pod, Zeroable)] +#[repr(C, align(0x1000))] +pub struct RawTable { + pub data: [u64; 512], +} + +impl RawTable { + pub const fn zeroed() -> Self { + Self { data: [0; 512] } + } +} diff --git a/kernel/lib/memtables/src/x86_64.rs b/kernel/lib/memtables/src/x86_64.rs new file mode 100644 index 00000000..cea2608a --- /dev/null +++ b/kernel/lib/memtables/src/x86_64.rs @@ -0,0 +1,27 @@ +use bytemuck::{Pod, Zeroable}; + +use crate::RawTable; + +pub const KERNEL_L3_COUNT: usize = 16; + +#[derive(Clone, Copy, Pod, Zeroable)] +#[repr(C)] +pub struct FixedTables { + pub l0: RawTable, + + pub kernel_l1: RawTable, + pub kernel_l2: RawTable, + pub kernel_l3s: [RawTable; KERNEL_L3_COUNT], +} + +impl FixedTables { + pub const fn zeroed() -> Self { + Self { + l0: RawTable::zeroed(), + + kernel_l1: RawTable::zeroed(), + kernel_l2: RawTable::zeroed(), + kernel_l3s: [RawTable::zeroed(); KERNEL_L3_COUNT], + } + } +} diff --git a/kernel/lib/vfs/Cargo.toml b/kernel/lib/vfs/Cargo.toml new file mode 100644 index 00000000..f850d83a --- /dev/null +++ b/kernel/lib/vfs/Cargo.toml @@ -0,0 +1,18 @@ +[package] +name = "vfs" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git", features = ["alloc"] } +libk-mm = { path = "../../libk/libk-mm" } +libk-util = { path = "../../libk/libk-util" } +libk-thread = { path = "../../libk/libk-thread" } + +ygg_driver_block = { path = "../../driver/block/core" } + +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } diff --git a/kernel/lib/vfs/src/channel.rs b/kernel/lib/vfs/src/channel.rs new file mode 100644 index 00000000..0feb212d --- /dev/null +++ b/kernel/lib/vfs/src/channel.rs @@ -0,0 +1,213 @@ +use core::{ + pin::Pin, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Poll}, +}; + +use alloc::{ + boxed::Box, + collections::{BTreeMap, VecDeque}, + string::String, + sync::Arc, +}; +use futures_util::{task::AtomicWaker, Future}; +use libk_thread::{block, sync::Mutex}; +use libk_util::sync::{IrqSafeSpinlock, LockMethod}; +use yggdrasil_abi::{ + error::Error, + io::{ChannelPublisherId, MessageDestination}, +}; + +use crate::{FileReadiness, FileRef}; + +/// Describes a channel over which messages can be sent to [Subscription]s +pub struct Channel { + last_id: AtomicU32, + subscriptions: Mutex>>, +} + +/// Describes message payload +pub enum MessagePayload { + /// Payload contains a file + File(FileRef), + /// Payload contains byte data + Data(Box<[u8]>), +} + +/// Describes a message sent over a channel +pub struct Message { + /// Channel descriptor ID from which the message came + pub source: ChannelPublisherId, + /// Data of the message + pub payload: MessagePayload, +} + +/// Describes a single subscription so some [Channel] +pub struct Subscription { + queue: Mutex>>, + notify: AtomicWaker, +} + +/// Describes a pair of a [Channel] descriptor plus an optional [Subscription] +pub struct ChannelDescriptor { + id: u32, + tx: Arc, + rx: Option>, +} + +impl ChannelDescriptor { + /// Opens a channel descriptor, optionally creating a subscription to it + pub fn open(name: &str, subscribe: bool) -> ChannelDescriptor { + let tx = Channel::get_or_create(name.into()); + // NOTE The first one to open the channel is guaranteed to get an ID of 0 + let id = tx.last_id.fetch_add(1, Ordering::SeqCst); + let rx = if subscribe { + Some(tx.subscribe(id)) + } else { + None + }; + + Self { tx, rx, id } + } + + /// Receives a message from the subscription + pub fn receive_message(&self) -> Result, Error> { + let Some(rx) = self.rx.as_ref() else { + return Err(Error::InvalidOperation); + }; + + rx.receive_message_inner() + } + + /// Asynchronously receives a message from the subscription + pub async fn receive_message_async(&self) -> Result, Error> { + let rx = self.rx.as_ref().ok_or(Error::InvalidOperation)?; + rx.receive_message_async().await + } + + /// Sends a message to the channel + pub fn send_message( + &self, + payload: MessagePayload, + dst: MessageDestination, + ) -> Result<(), Error> { + let message = Arc::new(Message { + source: unsafe { ChannelPublisherId::from_raw(self.id) }, + payload, + }); + + let lock = self.tx.subscriptions.lock()?; + + match dst { + MessageDestination::Specific(id) => { + if let Some(sub) = lock.get(&id) { + sub.push_message(message)?; + } + } + MessageDestination::AllExceptSelf => { + for (&id, sub) in lock.iter() { + if id == self.id { + continue; + } + + sub.push_message(message.clone())?; + } + } + MessageDestination::All => todo!(), + } + + Ok(()) + } +} + +impl Channel { + fn new() -> Arc { + Arc::new(Self { + last_id: AtomicU32::new(0), + subscriptions: Mutex::new(BTreeMap::new()), + }) + } + + fn get_or_create(name: String) -> Arc { + let mut channels = CHANNELS.lock(); + + channels.entry(name).or_insert_with(Self::new).clone() + } + + fn subscribe(&self, id: u32) -> Arc { + let mut lock = self.subscriptions.lock().unwrap(); + + let sub = Arc::new(Subscription { + queue: Mutex::new(VecDeque::new()), + notify: AtomicWaker::new(), + }); + + lock.insert(id, sub.clone()); + + sub + } +} + +impl Subscription { + fn receive_message_async(&self) -> impl Future, Error>> + '_ { + struct F<'f> { + rx: &'f Subscription, + } + + impl<'f> Future for F<'f> { + type Output = Result, Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut lock = self.rx.queue.lock()?; + if let Some(msg) = lock.pop_front() { + return Poll::Ready(Ok(msg)); + } + drop(lock); + + self.rx.notify.register(cx.waker()); + + let mut lock = self.rx.queue.lock()?; + if let Some(msg) = lock.pop_front() { + Poll::Ready(Ok(msg)) + } else { + Poll::Pending + } + } + } + + F { rx: self } + } + + fn receive_message_inner(&self) -> Result, Error> { + block!(self.receive_message_async().await)? + } + + fn push_message(&self, msg: Arc) -> Result<(), Error> { + self.queue.lock()?.push_back(msg); + self.notify.wake(); + Ok(()) + } +} + +impl FileReadiness for ChannelDescriptor { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + let Some(rx) = self.rx.as_ref() else { + return Poll::Ready(Err(Error::InvalidOperation)); + }; + + if !rx.queue.lock()?.is_empty() { + return Poll::Ready(Ok(())); + } + + rx.notify.register(cx.waker()); + + if !rx.queue.lock()?.is_empty() { + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } +} + +static CHANNELS: IrqSafeSpinlock>> = + IrqSafeSpinlock::new(BTreeMap::new()); diff --git a/kernel/lib/vfs/src/device.rs b/kernel/lib/vfs/src/device.rs new file mode 100644 index 00000000..c45a291e --- /dev/null +++ b/kernel/lib/vfs/src/device.rs @@ -0,0 +1,79 @@ +use ygg_driver_block::BlockDevice; +use yggdrasil_abi::{error::Error, io::DeviceRequest}; + +use crate::{ + node::{CommonImpl, NodeRef}, + traits::FileReadiness, +}; + +/// Character device interface +#[allow(unused)] +pub trait CharDevice: FileReadiness + Sync { + /// Reads data from the device + fn read(&'static self, buf: &mut [u8]) -> Result { + Err(Error::NotImplemented) + } + /// Writes the data to the device + fn write(&'static self, buf: &[u8]) -> Result { + Err(Error::NotImplemented) + } + + /// Returns `true` if the device can be read from + fn is_readable(&self) -> bool { + true + } + /// Returns `true` if the device can be written to + fn is_writable(&self) -> bool { + true + } + /// Returns `true` if the given device is a terminal + fn is_terminal(&self) -> bool { + false + } + + /// Performs a device-specific function + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + Err(Error::NotImplemented) + } +} + +#[derive(Clone)] +pub(crate) struct BlockDeviceWrapper(pub(crate) &'static dyn BlockDevice); +#[derive(Clone)] +pub(crate) struct CharDeviceWrapper(pub(crate) &'static dyn CharDevice); + +impl BlockDeviceWrapper { + pub fn is_readable(&self) -> bool { + self.0.is_readable() + } + + pub fn is_writable(&self) -> bool { + self.0.is_writable() + } +} + +impl CommonImpl for BlockDeviceWrapper { + fn size(&self, _node: &NodeRef) -> Result { + self.0.size() + } +} + +impl CharDeviceWrapper { + pub fn is_terminal(&self) -> bool { + self.0.is_terminal() + } + + pub fn is_readable(&self) -> bool { + self.0.is_readable() + } + + pub fn is_writable(&self) -> bool { + self.0.is_writable() + } +} + +impl CommonImpl for CharDeviceWrapper { + fn size(&self, _node: &NodeRef) -> Result { + Ok(0) + } +} diff --git a/kernel/lib/vfs/src/file/device.rs b/kernel/lib/vfs/src/file/device.rs new file mode 100644 index 00000000..4abf8f3d --- /dev/null +++ b/kernel/lib/vfs/src/file/device.rs @@ -0,0 +1,86 @@ +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::{error::Error, io::SeekFrom}; + +use crate::{ + device::{BlockDeviceWrapper, CharDeviceWrapper}, + node::NodeRef, +}; + +pub struct BlockFile { + pub(super) device: BlockDeviceWrapper, + pub(super) node: NodeRef, + pub(super) position: IrqSafeSpinlock, + pub(super) read: bool, + pub(super) write: bool, +} + +pub struct CharFile { + pub(super) device: CharDeviceWrapper, + pub(super) node: NodeRef, + pub(super) read: bool, + pub(super) write: bool, +} + +impl BlockFile { + pub fn read(&self, _buf: &mut [u8]) -> Result { + todo!() + // let mut position = self.position.lock(); + // let count = self.device.0.read(*position, buf)?; + // *position += count as u64; + // Ok(count) + } + + pub fn write(&self, _buf: &[u8]) -> Result { + todo!() + // let mut position = self.position.lock(); + // let count = self.device.0.write(*position, buf)?; + // *position += count as u64; + // Ok(count) + } + + pub fn seek(&self, from: SeekFrom) -> Result { + let mut position = self.position.lock(); + + let newpos = match from { + SeekFrom::Current(off) => { + let newpos = i64::try_from(*position).unwrap() + off; + if newpos < 0 { + return Err(Error::InvalidArgument); + } + newpos as u64 + } + SeekFrom::Start(pos) => pos, + SeekFrom::End(off) => { + let size = i64::try_from(self.device.0.size()?).unwrap(); + let newpos = size + off; + + if newpos < 0 { + return Err(Error::InvalidArgument); + } + + newpos as u64 + } + }; + + *position = newpos; + Ok(newpos) + } +} + +impl CharFile { + pub fn read(&self, buf: &mut [u8]) -> Result { + if self.read { + self.device.0.read(buf) + } else { + Err(Error::InvalidOperation) + } + } + + pub fn write(&self, buf: &[u8]) -> Result { + if self.write { + self.device.0.write(buf) + } else { + Err(Error::ReadOnly) + } + } +} diff --git a/kernel/lib/vfs/src/file/directory.rs b/kernel/lib/vfs/src/file/directory.rs new file mode 100644 index 00000000..a90f6e93 --- /dev/null +++ b/kernel/lib/vfs/src/file/directory.rs @@ -0,0 +1,110 @@ +use core::{mem::MaybeUninit, str::FromStr}; + +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::{error::Error, io::DirectoryEntry, util::FixedString}; + +use crate::node::NodeRef; + +use super::DirectoryOpenPosition; + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(super) enum DirectoryCachePosition { + Dot, + DotDot, + Index(usize), +} + +#[derive(Clone, Copy, PartialEq, Eq, Debug)] +pub(super) enum DirectoryPosition { + Cache(DirectoryCachePosition), + Physical(u64), +} + +pub struct DirectoryFile { + pub(super) node: NodeRef, + pub(super) position: IrqSafeSpinlock, +} + +impl DirectoryFile { + pub(super) fn read_cached( + node: &NodeRef, + mut pos: DirectoryCachePosition, + entries: &mut [MaybeUninit], + ) -> Result<(usize, DirectoryCachePosition), Error> { + let directory = node.as_directory()?; + let children = directory.children.lock(); + let mut rem = entries.len(); + let mut off = 0; + + while rem != 0 { + let entry = match pos { + DirectoryCachePosition::Dot => { + pos = DirectoryCachePosition::DotDot; + Some((FixedString::from_str(".").unwrap(), node.clone())) + } + DirectoryCachePosition::DotDot => { + pos = DirectoryCachePosition::Index(0); + Some((FixedString::from_str("..").unwrap(), node.parent())) + } + DirectoryCachePosition::Index(index) + if let Some((name, node)) = children.get(index) => + { + pos = DirectoryCachePosition::Index(index + 1); + Some((FixedString::from_str(name)?, node.clone())) + } + DirectoryCachePosition::Index(_) => None, + }; + + let Some((name, node)) = entry else { + break; + }; + + let ty = node.ty(); + + entries[off].write(DirectoryEntry { name, ty }); + + off += 1; + rem -= 1; + } + + Ok((off, pos)) + } + + pub(super) fn read_physical( + node: &NodeRef, + pos: u64, + entries: &mut [MaybeUninit], + ) -> Result<(usize, u64), Error> { + node.read_directory(pos, entries) + } + + pub(super) fn read_entries( + &self, + entries: &mut [MaybeUninit], + ) -> Result { + let mut position = self.position.lock(); + + let (count, pos) = match *position { + DirectoryPosition::Cache(pos) => { + let (count, pos) = DirectoryFile::read_cached(&self.node, pos, entries)?; + (count, DirectoryPosition::Cache(pos)) + } + DirectoryPosition::Physical(off) => { + let (count, pos) = DirectoryFile::read_physical(&self.node, off, entries)?; + (count, DirectoryPosition::Physical(pos)) + } + }; + *position = pos; + + Ok(count) + } +} + +impl From for DirectoryPosition { + fn from(value: DirectoryOpenPosition) -> Self { + match value { + DirectoryOpenPosition::FromCache => Self::Cache(DirectoryCachePosition::Dot), + DirectoryOpenPosition::FromPhysical(off) => Self::Physical(off), + } + } +} diff --git a/kernel/lib/vfs/src/file/mod.rs b/kernel/lib/vfs/src/file/mod.rs new file mode 100644 index 00000000..a052ef43 --- /dev/null +++ b/kernel/lib/vfs/src/file/mod.rs @@ -0,0 +1,857 @@ +use core::{ + any::Any, + fmt, + mem::MaybeUninit, + task::{Context, Poll}, +}; + +use alloc::{ + collections::{btree_map::Entry, BTreeMap}, + sync::Arc, +}; +use libk_mm::{address::PhysicalAddress, table::MapAttributes, PageProvider}; +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::{ + error::Error, + io::{ + DeviceRequest, DirectoryEntry, OpenOptions, RawFd, SeekFrom, TerminalOptions, TerminalSize, + }, + net::SocketAddr, +}; + +use crate::{ + channel::ChannelDescriptor, + device::{BlockDeviceWrapper, CharDeviceWrapper}, + node::NodeRef, + socket::{ConnectionSocketWrapper, ListenerSocketWrapper, PacketSocketWrapper}, + traits::{Read, Seek, Write}, + ConnectionSocket, FdPoll, FileReadiness, ListenerSocket, Node, PacketSocket, PseudoTerminal, + PseudoTerminalMaster, PseudoTerminalSlave, SharedMemory, Socket, TimerFile, +}; + +use self::{ + device::{BlockFile, CharFile}, + directory::DirectoryFile, + pipe::PipeEnd, + regular::RegularFile, +}; + +mod device; +mod directory; +mod pipe; +mod regular; + +/// Per-file optional instance data created when a regular file is opened +pub type InstanceData = Arc; + +/// Describes the starting position of the directory +pub enum DirectoryOpenPosition { + /// Contents should be fetched from the directory impl with given offset + FromPhysical(u64), + /// Contents should be fetched from the tree cache + FromCache, +} + +/// Wrapper type for a [File] shared reference +pub type FileRef = Arc; + +// TODO some kind of a mutex instead? +/// Describes an open file +#[allow(missing_docs)] +pub enum File { + Directory(DirectoryFile), + Regular(RegularFile), + Block(BlockFile), + Char(CharFile), + + PacketSocket(Arc), + ListenerSocket(Arc), + StreamSocket(Arc), + + AnonymousPipe(PipeEnd), + Poll(FdPoll), + Timer(TimerFile), + Channel(ChannelDescriptor), + SharedMemory(Arc), + PtySlave(Arc, NodeRef), + PtyMaster(Arc, NodeRef), +} + +/// Contains a per-process fd -> FileRef map +pub struct FileSet { + map: BTreeMap, +} + +impl File { + /// Constructs a pipe pair, returning its `(read, write)` ends + pub fn new_pipe_pair(capacity: usize) -> (Arc, Arc) { + let (read, write) = PipeEnd::new_pair(capacity); + ( + Arc::new(Self::AnonymousPipe(read)), + Arc::new(Self::AnonymousPipe(write)), + ) + } + + /// Constructs a new poll channel file + pub fn new_poll_channel() -> Arc { + Arc::new(Self::Poll(FdPoll::new())) + } + + /// Opens a new message channel, optionally subscribing to it as well + pub fn new_message_channel(name: &str, with_sub: bool) -> Arc { + let channel = ChannelDescriptor::open(name, with_sub); + Arc::new(Self::Channel(channel)) + } + + /// Creates a buffer of shared memory and associates a [File] with it + pub fn new_shared_memory(size: usize) -> Result, Error> { + let shm = SharedMemory::new(size)?; + Ok(Arc::new(Self::SharedMemory(Arc::new(shm)))) + } + + /// Creates a pair of PTY master/slave + pub fn new_pseudo_terminal( + config: TerminalOptions, + size: TerminalSize, + ) -> Result<(Arc, Arc), Error> { + let (master, slave) = PseudoTerminal::new(config, size)?; + let master = Arc::new(master); + let slave = Arc::new(slave); + let (master_node, slave_node) = Node::pseudo_terminal_nodes(master.clone(), slave.clone()); + Ok(( + Arc::new(Self::PtyMaster(master, master_node)), + Arc::new(Self::PtySlave(slave, slave_node)), + )) + } + + /// Creates a new [TimerFile]-backed File + pub fn new_timer(repeat: bool) -> FileRef { + Arc::new(Self::Timer(TimerFile::new(repeat))) + } + + /// Constructs a [File] from a [PacketSocket] + pub fn from_packet_socket(socket: Arc) -> Arc { + Arc::new(Self::PacketSocket(Arc::new(PacketSocketWrapper(socket)))) + } + + /// Constructs a [File] from a [ListenerSocket] + pub fn from_listener_socket(socket: Arc) -> Arc { + Arc::new(Self::ListenerSocket(Arc::new(ListenerSocketWrapper( + socket, + )))) + } + + /// Constructs a [File] from a [ConnectionSocket] + pub fn from_stream_socket(socket: Arc) -> Arc { + Arc::new(Self::StreamSocket(Arc::new(ConnectionSocketWrapper( + socket, + )))) + } + + pub(crate) fn directory(node: NodeRef, position: DirectoryOpenPosition) -> Arc { + let position = IrqSafeSpinlock::new(position.into()); + Arc::new(Self::Directory(DirectoryFile { node, position })) + } + + pub(crate) fn regular( + node: NodeRef, + position: u64, + instance_data: Option, + opts: OpenOptions, + ) -> Arc { + let read = opts.contains(OpenOptions::READ); + let write = opts.contains(OpenOptions::WRITE); + + Arc::new(Self::Regular(RegularFile { + node, + read, + write, + instance_data, + position: IrqSafeSpinlock::new(position), + })) + } + + pub(crate) fn block( + device: BlockDeviceWrapper, + node: NodeRef, + opts: OpenOptions, + ) -> Result, Error> { + let read = opts.contains(OpenOptions::READ); + let write = opts.contains(OpenOptions::WRITE); + + if read && !device.is_readable() { + return Err(Error::InvalidOperation); + } + if write && !device.is_writable() { + return Err(Error::ReadOnly); + } + + Ok(Arc::new(Self::Block(BlockFile { + device, + node, + position: IrqSafeSpinlock::new(0), + read, + write, + }))) + } + + pub(crate) fn char( + device: CharDeviceWrapper, + node: NodeRef, + opts: OpenOptions, + ) -> Result, Error> { + let read = opts.contains(OpenOptions::READ); + let write = opts.contains(OpenOptions::WRITE); + + if read && !device.is_readable() { + return Err(Error::InvalidOperation); + } + if write && !device.is_writable() { + return Err(Error::ReadOnly); + } + + Ok(Arc::new(Self::Char(CharFile { + device, + node, + read, + write, + }))) + } + + /// Clones an open file for sending it to another process + pub fn send(self: &Arc) -> Result, Error> { + match self.as_ref() { + Self::Char(_) => Ok(self.clone()), + Self::Block(_) => todo!(), + Self::Regular(file) => Ok(Arc::new(Self::Regular(file.clone()))), + Self::SharedMemory(shm) => Ok(Arc::new(Self::SharedMemory(shm.clone()))), + Self::PtySlave(pt, pt_node) => { + Ok(Arc::new(Self::PtySlave(pt.clone(), pt_node.clone()))) + } + Self::PtyMaster(pt, pt_node) => { + Ok(Arc::new(Self::PtyMaster(pt.clone(), pt_node.clone()))) + } + _ => { + log::info!("Invalid file send(): {:?}", self); + Err(Error::InvalidOperation) + } + } + } + + /// Reads entries from the directory + pub fn read_dir(&self, entries: &mut [MaybeUninit]) -> Result { + match self { + Self::Directory(dir) => dir.read_entries(entries), + _ => Err(Error::NotADirectory), + } + } + + /// Returns the underlying [Node] the file contains + pub fn node(&self) -> Option<&NodeRef> { + match self { + Self::Directory(file) => Some(&file.node), + Self::Regular(file) => Some(&file.node), + Self::Block(file) => Some(&file.node), + Self::Char(file) => Some(&file.node), + Self::PtyMaster(_, node) => Some(&node), + Self::PtySlave(_, node) => Some(&node), + _ => None, + } + } + + /// Polls a file for "read-readiness" + pub fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + match self { + Self::Char(f) => f.device.0.poll_read(cx), + Self::Channel(ch) => ch.poll_read(cx), + Self::Poll(ch) => ch.poll_read(cx), + Self::PtyMaster(f, _) => f.poll_read(cx), + Self::PtySlave(f, _) => f.poll_read(cx), + Self::PacketSocket(sock) => sock.poll_read(cx), + Self::StreamSocket(sock) => sock.poll_read(cx), + Self::ListenerSocket(sock) => sock.poll_read(cx), + Self::Timer(timer) => timer.poll_read(cx), + // Polling not implemented, return ready immediately (XXX ?) + _ => Poll::Ready(Err(Error::NotImplemented)), + } + } + + /// Performs a device-specific request + pub fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + match self { + Self::Char(f) => f.device.0.device_request(req), + Self::Block(f) => f.device.0.device_request(req), + Self::PtySlave(f, _) => f.device_request(req), + Self::PtyMaster(f, _) => f.device_request(req), + _ => Err(Error::InvalidOperation), + } + } + + /// Interprets the file as a poll channel + pub fn as_poll_channel(&self) -> Result<&FdPoll, Error> { + if let Self::Poll(poll) = self { + Ok(poll) + } else { + Err(Error::InvalidOperation) + } + } + + /// Interprets the file as a message channel + pub fn as_message_channel(&self) -> Result<&ChannelDescriptor, Error> { + if let Self::Channel(ch) = self { + Ok(ch) + } else { + Err(Error::InvalidOperation) + } + } + + /// Interprets the file as a socket + pub fn as_socket(&self) -> Result<&dyn Socket, Error> { + match self { + Self::PacketSocket(socket) => Ok(socket.0.as_ref()), + _ => Err(Error::InvalidOperation), + } + } + + /// Sends data to a socket + pub fn send_to(&self, buffer: &[u8], recepient: Option) -> Result { + match (self, recepient) { + (Self::PacketSocket(socket), recepient) => socket.send(recepient, buffer), + (Self::StreamSocket(socket), None) => socket.send(buffer), + (_, _) => todo!(), + } + } + + /// Receives data from a socket + pub fn receive_from( + &self, + buffer: &mut [u8], + remote: &mut MaybeUninit, + ) -> Result { + match self { + Self::PacketSocket(socket) => { + let (addr, len) = socket.receive(buffer)?; + remote.write(addr); + Ok(len) + } + Self::StreamSocket(socket) => { + // Always the same + remote.write(socket.remote_address().unwrap()); + socket.receive(buffer) + } + _ => Err(Error::InvalidOperation), + } + } + + /// Waits for incoming connection to be accepted by the listener + pub fn accept(&self, remote: &mut MaybeUninit) -> Result { + match self { + Self::ListenerSocket(socket) => { + let (address, incoming) = socket.accept()?; + remote.write(address); + Ok(File::from_stream_socket(incoming)) + } + _ => Err(Error::InvalidOperation), + } + } +} + +impl PageProvider for File { + fn get_page(&self, offset: u64) -> Result { + match self { + Self::Block(f) => f.device.0.get_page(offset), + Self::SharedMemory(f) => f.get_page(offset), + _ => Err(Error::InvalidOperation), + } + } + + fn release_page(&self, offset: u64, phys: PhysicalAddress) -> Result<(), Error> { + match self { + Self::Block(f) => f.device.0.release_page(offset, phys), + Self::SharedMemory(f) => f.release_page(offset, phys), + _ => Err(Error::InvalidOperation), + } + } + + fn clone_page( + &self, + _offset: u64, + _src_phys: PhysicalAddress, + _src_attrs: MapAttributes, + ) -> Result { + todo!() + } +} + +impl Read for File { + fn read(&self, buf: &mut [u8]) -> Result { + match self { + Self::Regular(file) => file.read(buf), + Self::Block(file) => file.read(buf), + Self::Char(file) => file.read(buf), + Self::AnonymousPipe(pipe) => pipe.read(buf), + Self::PtySlave(pt, _) => pt.read(buf), + Self::PtyMaster(pt, _) => pt.read(buf), + // TODO maybe allow reading trigger count? + Self::Timer(_) => Err(Error::InvalidOperation), + // TODO maybe allow reading FDs from poll channels as if they were regular streams? + Self::Poll(_) => Err(Error::InvalidOperation), + // TODO maybe allow reading messages from Channels? + Self::Channel(_) => Err(Error::InvalidOperation), + Self::SharedMemory(_) => Err(Error::InvalidOperation), + // TODO maybe allow reading messages from Packet/Stream sockets? + Self::PacketSocket(_) | Self::ListenerSocket(_) | Self::StreamSocket(_) => { + Err(Error::InvalidOperation) + } + Self::Directory(_) => Err(Error::IsADirectory), + } + } +} + +impl Write for File { + fn write(&self, buf: &[u8]) -> Result { + match self { + Self::Regular(file) => file.write(buf), + Self::Block(file) => file.write(buf), + Self::Char(file) => file.write(buf), + Self::AnonymousPipe(pipe) => pipe.write(buf), + Self::PtySlave(pt, _) => pt.write(buf), + Self::PtyMaster(pt, _) => pt.write(buf), + Self::Timer(timer) => timer.write(buf), + // TODO maybe allow adding FDs to poll channels this way + Self::Poll(_) => Err(Error::InvalidOperation), + // TODO maybe allow writing messages to Channels? + Self::Channel(_) => Err(Error::InvalidOperation), + Self::SharedMemory(_) => Err(Error::InvalidOperation), + // TODO maybe allow writing messages to Packet/Stream sockets? + Self::PacketSocket(_) | Self::ListenerSocket(_) | Self::StreamSocket(_) => { + Err(Error::InvalidOperation) + } + Self::Directory(_) => Err(Error::IsADirectory), + } + } +} + +impl Seek for File { + fn tell(&self) -> Result { + match self { + Self::Regular(file) => Ok(*file.position.lock()), + Self::Block(file) => Ok(*file.position.lock()), + Self::Directory(_) => Err(Error::IsADirectory), + _ => Err(Error::InvalidOperation), + } + } + + fn seek(&self, from: SeekFrom) -> Result { + match self { + Self::Regular(file) => file.seek(from), + Self::Block(file) => file.seek(from), + Self::Directory(_) => Err(Error::IsADirectory), + _ => Err(Error::InvalidOperation), + } + } +} + +impl fmt::Debug for File { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Regular(file) => f + .debug_struct("RegularFile") + .field("position", &*file.position.lock()) + .field("read", &file.read) + .field("write", &file.write) + .finish_non_exhaustive(), + Self::Block(file) => f + .debug_struct("BlockFile") + .field("position", &*file.position.lock()) + .field("read", &file.read) + .field("write", &file.write) + .finish_non_exhaustive(), + Self::Char(file) => f + .debug_struct("CharFile") + .field("read", &file.read) + .field("write", &file.write) + .finish_non_exhaustive(), + Self::Directory(_) => f.debug_struct("DirectoryFile").finish_non_exhaustive(), + Self::AnonymousPipe(_) => f.debug_struct("AnonymousPipe").finish_non_exhaustive(), + Self::Poll(_) => f.debug_struct("Poll").finish_non_exhaustive(), + Self::Channel(_) => f.debug_struct("Channel").finish_non_exhaustive(), + Self::SharedMemory(_) => f.debug_struct("SharedMemory").finish_non_exhaustive(), + Self::PtySlave(_, _) => f.debug_struct("PtySlave").finish_non_exhaustive(), + Self::PtyMaster(_, _) => f.debug_struct("PtyMaster").finish_non_exhaustive(), + Self::PacketSocket(sock) => f + .debug_struct("PacketSocket") + .field("local", &sock.local_address()) + .field("remote", &sock.remote_address()) + .finish_non_exhaustive(), + Self::StreamSocket(sock) => f + .debug_struct("StreamSocket") + .field("local", &sock.local_address()) + .field("remote", &sock.remote_address()) + .finish_non_exhaustive(), + Self::ListenerSocket(sock) => f + .debug_struct("ListenerSocket") + .field("local", &sock.local_address()) + .finish_non_exhaustive(), + Self::Timer(_) => f.debug_struct("Timer").finish_non_exhaustive(), + } + } +} + +impl FileSet { + /// Creates an empty [FileSet] + pub fn new() -> Self { + Self { + map: BTreeMap::new(), + } + } + + /// Returns the [FileRef] associated with given `fd` or an error if it does not exist + pub fn file(&self, fd: RawFd) -> Result<&FileRef, Error> { + self.map.get(&fd).ok_or(Error::InvalidFile) + } + + /// Associates a `file` with `fd`, returning an error if requested `fd` is already taken + pub fn set_file(&mut self, fd: RawFd, file: FileRef) -> Result<(), Error> { + if self.map.contains_key(&fd) { + return Err(Error::AlreadyExists); + } + self.map.insert(fd, file); + Ok(()) + } + + /// Associates a `file` with any available [RawFd] and returns it + pub fn place_file(&mut self, file: FileRef, skip_stdio: bool) -> Result { + let start = if skip_stdio { 3 } else { 0 }; + for idx in start..64 { + let fd = RawFd::from(idx); + + if let Entry::Vacant(e) = self.map.entry(fd) { + e.insert(file); + return Ok(fd); + } + } + + // TODO OutOfFiles + Err(Error::OutOfMemory) + } + + /// Removes and closes a [FileRef] from the struct + pub fn close_file(&mut self, fd: RawFd) -> Result<(), Error> { + // Do nothing, file will be dropped and closed + if self.map.remove(&fd).is_some() { + Ok(()) + } else { + Err(Error::InvalidFile) + } + } + + /// Removes all [FileRef]s from the struct which do not pass the `predicate` check + pub fn retain bool>(&mut self, predicate: F) { + self.map.retain(predicate); + } + + /// Returns an iterator over the file set + pub fn iter(&self) -> impl Iterator { + self.map.iter() + } + + /// Closes all of the files + pub fn close_all(&mut self) { + self.map.clear(); + } +} + +#[cfg(test)] +mod tests { + use core::{ + mem::MaybeUninit, + str::FromStr, + task::{Context, Poll}, + }; + use std::sync::{Arc, Mutex}; + + use yggdrasil_abi::{ + error::Error, + io::{DirectoryEntry, FileType, OpenOptions, SeekFrom}, + util::FixedString, + }; + + use crate::{ + device::CharDevice, + file::DirectoryOpenPosition, + impls::const_value_node, + node::{AccessToken, CommonImpl, DirectoryImpl, Node, NodeFlags, NodeRef, RegularImpl}, + traits::{Read, Seek, Write}, + FileReadiness, InstanceData, + }; + + #[test] + fn file_send_sync() { + fn file_send(_f: &T) {} + fn file_sync(_f: &T) {} + + let node = const_value_node("1234"); + let file = node + .open(OpenOptions::READ, AccessToken::test_authorized()) + .unwrap(); + + file_send(&file); + file_sync(&file); + } + + #[test] + fn physical_dir_read() { + struct D { + entries: Vec<(String, NodeRef)>, + } + struct F; + + impl CommonImpl for D {} + impl DirectoryImpl for D { + fn open(&self, _node: &NodeRef) -> Result { + Ok(DirectoryOpenPosition::FromPhysical(0)) + } + + fn read_entries( + &self, + _node: &NodeRef, + pos: u64, + entries: &mut [MaybeUninit], + ) -> Result<(usize, u64), Error> { + let pos = pos as usize; + if pos == self.entries.len() { + return Ok((0, pos as u64)); + } + + let count = core::cmp::min(entries.len(), self.entries.len() - pos); + for i in 0..count { + let (name, node) = &self.entries[i]; + let entry = DirectoryEntry { + name: FixedString::from_str(name)?, + ty: node.ty(), + }; + + entries[i].write(entry); + } + + Ok((count, (pos + count) as u64)) + } + } + + impl CommonImpl for F {} + impl RegularImpl for F {} + + let d = Node::directory( + D { + entries: Vec::from_iter([ + ("f1".to_owned(), Node::regular(F, NodeFlags::empty())), + ("f2".to_owned(), Node::regular(F, NodeFlags::empty())), + ("f3".to_owned(), Node::regular(F, NodeFlags::empty())), + ]), + }, + NodeFlags::empty(), + ); + + let f = d.open_directory(AccessToken::test_authorized()).unwrap(); + + let mut entries = [MaybeUninit::uninit(); 16]; + let count = f.read_dir(&mut entries).unwrap(); + assert_eq!(count, 3); + + unsafe { + assert_eq!( + MaybeUninit::slice_assume_init_ref(&entries[..count]), + &[ + DirectoryEntry { + name: FixedString::from_str("f1").unwrap(), + ty: FileType::File, + }, + DirectoryEntry { + name: FixedString::from_str("f2").unwrap(), + ty: FileType::File, + }, + DirectoryEntry { + name: FixedString::from_str("f3").unwrap(), + ty: FileType::File + } + ] + ); + } + + let count = f.read_dir(&mut entries).unwrap(); + assert_eq!(count, 0); + } + + #[test] + fn cache_dir_read() { + struct D; + + impl CommonImpl for D {} + impl DirectoryImpl for D { + fn open(&self, _node: &NodeRef) -> Result { + Ok(DirectoryOpenPosition::FromCache) + } + } + + let d = Node::directory(D, NodeFlags::empty()); + let child = Node::directory(D, NodeFlags::empty()); + + d.add_child("child1", child).unwrap(); + + let f = d.open_directory(AccessToken::test_authorized()).unwrap(); + + let mut entries = [MaybeUninit::uninit(); 16]; + let count = f.read_dir(&mut entries).unwrap(); + assert_eq!(count, 3); + unsafe { + assert_eq!( + MaybeUninit::slice_assume_init_ref(&entries[..count]), + &[ + DirectoryEntry { + name: FixedString::from_str(".").unwrap(), + ty: FileType::Directory + }, + DirectoryEntry { + name: FixedString::from_str("..").unwrap(), + ty: FileType::Directory + }, + DirectoryEntry { + name: FixedString::from_str("child1").unwrap(), + ty: FileType::Directory + } + ] + ); + } + + let count = f.read_dir(&mut entries).unwrap(); + assert_eq!(count, 0); + } + + #[test] + fn file_read_write() { + struct F { + data: Arc>>, + } + + impl CommonImpl for F { + fn size(&self, _node: &NodeRef) -> Result { + Ok(self.data.lock().unwrap().len() as _) + } + } + impl RegularImpl for F { + fn open( + &self, + _node: &NodeRef, + _opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + Ok((0, None)) + } + + fn read( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + let pos = pos as usize; + let data = self.data.lock().unwrap(); + if pos >= data.len() { + return Ok(0); + } + let count = core::cmp::min(data.len() - pos, buf.len()); + buf[..count].copy_from_slice(&data[pos..pos + count]); + Ok(count) + } + + fn write( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + pos: u64, + buf: &[u8], + ) -> Result { + let pos = pos as usize; + let mut data = self.data.lock().unwrap(); + data.resize(pos + buf.len(), 0); + data[pos..pos + buf.len()].copy_from_slice(buf); + Ok(buf.len()) + } + } + + let data = Arc::new(Mutex::new(vec![])); + let node = Node::regular(F { data: data.clone() }, NodeFlags::empty()); + let file = node + .open( + OpenOptions::READ | OpenOptions::WRITE, + AccessToken::test_authorized(), + ) + .unwrap(); + let mut buf = [0; 512]; + + assert_eq!(&*data.lock().unwrap(), &[]); + assert_eq!(file.tell().unwrap(), 0); + + assert_eq!(file.write(b"Hello").unwrap(), 5); + assert_eq!(file.tell().unwrap(), 5); + assert_eq!(&*data.lock().unwrap(), b"Hello"); + + assert_eq!(file.seek(SeekFrom::End(-2)).unwrap(), 3); + assert_eq!(file.tell().unwrap(), 3); + + assert_eq!(file.write(b"123456").unwrap(), 6); + assert_eq!(file.tell().unwrap(), 9); + assert_eq!(&*data.lock().unwrap(), b"Hel123456"); + + assert_eq!(file.seek(SeekFrom::Start(2)).unwrap(), 2); + assert_eq!(file.read(&mut buf).unwrap(), 7); + assert_eq!(file.tell().unwrap(), 9); + assert_eq!(&buf[..7], b"l123456"); + } + + #[test] + fn char_device() { + struct C; + + impl FileReadiness for C { + fn poll_read(&self, _cx: &mut Context<'_>) -> Poll> { + unreachable!() + } + } + + impl CharDevice for C { + fn read(&self, buf: &mut [u8]) -> Result { + buf.fill(b'@'); + Ok(buf.len()) + } + + fn is_writable(&self) -> bool { + false + } + } + + static DEV: C = C; + + let node = Node::char(&DEV, NodeFlags::empty()); + let mut buf = [0; 512]; + + let err = node + .open(OpenOptions::WRITE, AccessToken::test_authorized()) + .unwrap_err(); + assert_eq!(err, Error::ReadOnly); + + let file = node + .open(OpenOptions::READ, AccessToken::test_authorized()) + .unwrap(); + assert_eq!(file.tell().unwrap_err(), Error::InvalidOperation); + assert_eq!( + file.seek(SeekFrom::Start(10)).unwrap_err(), + Error::InvalidOperation + ); + assert_eq!(file.read(&mut buf).unwrap(), 512); + assert_eq!(buf, [b'@'; 512]); + + assert_eq!(file.write(b"1234").unwrap_err(), Error::ReadOnly); + } +} diff --git a/kernel/lib/vfs/src/file/pipe.rs b/kernel/lib/vfs/src/file/pipe.rs new file mode 100644 index 00000000..de9190d3 --- /dev/null +++ b/kernel/lib/vfs/src/file/pipe.rs @@ -0,0 +1,227 @@ +use core::{ + pin::Pin, + sync::atomic::{AtomicBool, Ordering}, + task::{Context, Poll}, +}; + +use alloc::{sync::Arc, vec, vec::Vec}; +use futures_util::{task::AtomicWaker, Future}; +use libk_thread::block; +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::error::Error; + +struct PipeInner { + data: Vec, + capacity: usize, + rd: usize, + wr: usize, +} + +pub struct Pipe { + inner: IrqSafeSpinlock, + shutdown: AtomicBool, + read_notify: AtomicWaker, + write_notify: AtomicWaker, +} + +pub enum PipeEnd { + Read(Arc), + Write(Arc), +} + +impl PipeInner { + pub fn new(capacity: usize) -> Self { + Self { + data: vec![0; capacity], + capacity, + rd: 0, + wr: 0, + } + } + + pub fn can_write(&self) -> bool { + (self.wr + 1) % self.capacity != self.rd + } + + pub fn can_read(&self) -> bool { + self.rd != self.wr + } + + pub unsafe fn write(&mut self, val: u8) { + self.data[self.wr] = val; + self.wr = (self.wr + 1) % self.capacity; + } + + pub unsafe fn read(&mut self) -> u8 { + let val = self.data[self.rd]; + self.rd = (self.rd + 1) % self.capacity; + val + } + + fn try_write(&mut self, val: u8) -> bool { + if self.can_write() { + unsafe { + self.write(val); + } + true + } else { + false + } + } + + fn try_read(&mut self) -> Option { + if self.can_read() { + Some(unsafe { self.read() }) + } else { + None + } + } +} + +impl Pipe { + pub fn new(capacity: usize) -> Self { + Self { + inner: IrqSafeSpinlock::new(PipeInner::new(capacity)), + shutdown: AtomicBool::new(false), + read_notify: AtomicWaker::new(), + write_notify: AtomicWaker::new(), + } + } + + pub fn shutdown(&self) { + self.shutdown.store(true, Ordering::Release); + self.read_notify.wake(); + self.write_notify.wake(); + } + + pub fn blocking_write(&self, val: u8) -> impl Future> + '_ { + struct F<'a> { + pipe: &'a Pipe, + val: u8, + } + + impl<'a> Future for F<'a> { + type Output = Result<(), Error>; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut lock = self.pipe.inner.lock(); + + // Try fast path before acquiring write notify to avoid unnecessary contention + if self.pipe.shutdown.load(Ordering::Acquire) { + // TODO BrokenPipe + return Poll::Ready(Err(Error::ReadOnly)); + } else if lock.try_write(self.val) { + self.pipe.read_notify.wake(); + return Poll::Ready(Ok(())); + } + + self.pipe.write_notify.register(cx.waker()); + + if self.pipe.shutdown.load(Ordering::Acquire) { + Poll::Ready(Err(Error::ReadOnly)) + } else if lock.try_write(self.val) { + self.pipe.read_notify.wake(); + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } + } + + F { pipe: self, val } + } + + pub fn blocking_read(&self) -> impl Future> + '_ { + struct F<'a> { + pipe: &'a Pipe, + } + + impl<'a> Future for F<'a> { + type Output = Option; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let mut lock = self.pipe.inner.lock(); + + if let Some(val) = lock.try_read() { + self.pipe.write_notify.wake(); + return Poll::Ready(Some(val)); + } else if self.pipe.shutdown.load(Ordering::Acquire) { + return Poll::Ready(None); + } + + self.pipe.read_notify.register(cx.waker()); + + if let Some(val) = lock.try_read() { + Poll::Ready(Some(val)) + } else if self.pipe.shutdown.load(Ordering::Acquire) { + Poll::Ready(None) + } else { + Poll::Pending + } + } + } + + F { pipe: self } + } +} + +impl PipeEnd { + pub fn new_pair(capacity: usize) -> (PipeEnd, PipeEnd) { + let pipe = Arc::new(Pipe::new(capacity)); + let read = PipeEnd::Read(pipe.clone()); + let write = PipeEnd::Write(pipe); + + (read, write) + } + + pub fn read(&self, buf: &mut [u8]) -> Result { + let PipeEnd::Read(read) = self else { + return Err(Error::InvalidOperation); + }; + + block! { + let mut pos = 0; + let mut rem = buf.len(); + + while rem != 0 { + if let Some(val) = read.blocking_read().await { + buf[pos] = val; + pos += 1; + rem -= 1; + } else { + break; + } + } + + Ok(pos) + }? + } + + pub fn write(&self, buf: &[u8]) -> Result { + let PipeEnd::Write(write) = self else { + return Err(Error::InvalidOperation); + }; + + block! { + let mut pos = 0; + let mut rem = buf.len(); + + while rem != 0 { + write.blocking_write(buf[pos]).await?; + pos += 1; + rem -= 1; + } + + Ok(pos) + }? + } +} + +impl Drop for PipeEnd { + fn drop(&mut self) { + match self { + Self::Read(read) => read.shutdown(), + Self::Write(write) => write.shutdown(), + } + } +} diff --git a/kernel/lib/vfs/src/file/regular.rs b/kernel/lib/vfs/src/file/regular.rs new file mode 100644 index 00000000..c36a413d --- /dev/null +++ b/kernel/lib/vfs/src/file/regular.rs @@ -0,0 +1,87 @@ +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::{error::Error, io::SeekFrom}; + +use super::InstanceData; +use crate::node::NodeRef; + +#[derive(Clone)] +pub struct RegularFile { + pub(super) node: NodeRef, + pub(super) read: bool, + pub(super) write: bool, + pub(super) instance_data: Option, + pub(super) position: IrqSafeSpinlock, +} + +impl RegularFile { + pub fn read(&self, buf: &mut [u8]) -> Result { + if !self.read { + return Err(Error::InvalidFile); + } + let mut position = self.position.lock(); + let reg = self.node.as_regular()?; + let count = reg.read(&self.node, self.instance_data.as_ref(), *position, buf)?; + *position += count as u64; + Ok(count) + } + + pub fn write(&self, buf: &[u8]) -> Result { + if !self.write { + return Err(Error::InvalidFile); + } + let mut position = self.position.lock(); + let reg = self.node.as_regular()?; + let count = reg.write(&self.node, self.instance_data.as_ref(), *position, buf)?; + *position += count as u64; + Ok(count) + } + + // TODO should seek beyond the end of a read-only file be allowed? + pub fn seek(&self, from: SeekFrom) -> Result { + let mut position = self.position.lock(); + + let newpos = match from { + SeekFrom::Current(off) => { + let newpos = i64::try_from(*position).unwrap() + off; + if newpos < 0 { + return Err(Error::InvalidArgument); + } + newpos as u64 + } + SeekFrom::Start(pos) => pos, + SeekFrom::End(off) => { + let reg = self.node.as_regular()?; + let size = i64::try_from(reg.size(&self.node)?).unwrap(); + let newpos = size + off; + + if newpos < 0 { + return Err(Error::InvalidArgument); + } + + newpos as u64 + } + }; + + *position = newpos; + Ok(newpos) + } +} + +impl Drop for RegularFile { + fn drop(&mut self) { + let reg = match self.node.as_regular() { + Ok(reg) => reg, + Err(err) => { + log::warn!( + "RegularFile::Drop: self.node.as_regular() failed: {:?}", + err + ); + return; + } + }; + + if let Err(err) = reg.close(&self.node, self.instance_data.as_ref()) { + log::warn!("RegularFile::Drop: close() failed: {:?}", err); + } + } +} diff --git a/kernel/lib/vfs/src/ioctx.rs b/kernel/lib/vfs/src/ioctx.rs new file mode 100644 index 00000000..96878ebf --- /dev/null +++ b/kernel/lib/vfs/src/ioctx.rs @@ -0,0 +1,664 @@ +use alloc::{borrow::ToOwned, sync::Arc}; +use libk_thread::binary::ProgramLoadSource; +use yggdrasil_abi::{ + error::Error, + io::{FileMode, FileType, GroupId, OpenOptions, UserId}, + path::{Path, PathBuf}, +}; + +use crate::{ + node::{AccessToken, CreateInfo}, + File, FileRef, NodeRef, +}; + +/// Describes a general filesystem access +pub enum Action { + /// Access involves reading data without modification + Read, + /// Access involves writing file data or modifying directory content + Write, + /// Access involves traversing a path or opening a file for execution + Execute, +} + +/// Contains the state of program's I/O context: current working directory, UID, GID, umask, etc. +#[derive(Clone)] +pub struct IoContext { + uid: UserId, + gid: GroupId, + umask: FileMode, + cwd_node: NodeRef, + cwd_path: PathBuf, + root: NodeRef, +} + +impl ProgramLoadSource for IoContext { + type File = File; + + fn open_executable>(&mut self, path: P) -> Result, Error> { + let node = self.find(None, path, true, true)?; + let access = self.check_access(Action::Read, &node)?; + node.open(OpenOptions::READ, access) + } +} + +impl IoContext { + /// Constructs a new [IoContext] with given root node (which also becomes the cwd). By default, + /// the root user/group is used. Default umask is 0o022. + pub fn new(root: NodeRef) -> Self { + Self { + uid: UserId::root(), + gid: GroupId::root(), + umask: FileMode::new(0o022), + cwd_node: root.clone(), + cwd_path: PathBuf::new(), + root, + } + } + + /// "Clones" an I/O context the way it is inherited by a newly spawned child process + pub fn inherit(other: &IoContext) -> Self { + other.clone() + } + + /// Returns the root node of the [IoContext] + pub fn root(&self) -> &NodeRef { + &self.root + } + + /// Returns the current working directory node of the [IoContext] + pub fn cwd(&self) -> &NodeRef { + &self.cwd_node + } + + /// Sets the current user ID of the context. Returns [Error::PermissionDenied] if current user + /// is not root. + pub fn set_uid(&mut self, uid: UserId) -> Result<(), Error> { + if uid != self.uid && !self.uid.is_root() { + Err(Error::PermissionDenied) + } else { + self.uid = uid; + Ok(()) + } + } + + #[cfg(test)] + fn set_uid_unchecked(&mut self, uid: UserId) { + self.uid = uid; + } + + /// Updates the context's permission mask + pub fn set_umask(&mut self, mask: FileMode) { + self.umask = mask; + } + + /// Returns the context's permission mask + pub fn umask(&self) -> FileMode { + self.umask + } + + /// Sets the current group ID of the context. Returns [Error::PermissionDenied] if current user + /// is not root. + pub fn set_gid(&mut self, gid: GroupId) -> Result<(), Error> { + if gid != self.gid && !self.uid.is_root() { + Err(Error::PermissionDenied) + } else { + self.gid = gid; + Ok(()) + } + } + + #[cfg(test)] + fn set_gid_unchecked(&mut self, gid: GroupId) { + self.gid = gid; + } + + /// Checks if the current user can access given [crate::Node] and, if so, returns an + /// [AccessToken]. + pub fn check_access(&self, action: Action, node: &NodeRef) -> Result { + let metadata = node.metadata()?; + + let allow = match action { + Action::Read => { + self.uid.is_root() + | metadata.user_read(self.uid) + | metadata.group_read(self.gid) + | metadata.other_read() + } + Action::Write => { + self.uid.is_root() + | metadata.user_write(self.uid) + | metadata.group_write(self.gid) + | metadata.other_write() + } + Action::Execute => { + metadata.user_exec(self.uid) | metadata.group_exec(self.gid) | metadata.other_exec() + } + }; + + if allow { + Ok(unsafe { AccessToken::authorized() }) + } else { + Err(Error::PermissionDenied) + } + } + + /// Changes current working directory to `path`. Will fail if access is denied or the path does + /// not point to a directory. + pub fn set_cwd>(&mut self, path: P) -> Result<(), Error> { + let path = path.as_ref(); + if !path.is_absolute() { + todo!(); + } + let node = self._find(self.root.clone(), path.trim_start_separators(), true, true)?; + if !node.is_directory() { + return Err(Error::NotADirectory); + } + self.cwd_node = node; + self.cwd_path = path.to_owned(); + Ok(()) + } + + /// Returns the current working directory path + pub fn cwd_path(&self) -> &Path { + self.cwd_path.as_ref() + } + + /// Makes a directory at given path become a "mountpoint" for the given filesystem root. + /// When accessed, the target directory will return contents of the filesystem root instead of + /// its own. Both the `target` path and `fs_root` Node must be directories. + pub fn mount>(&mut self, target: P, fs_root: NodeRef) -> Result<(), Error> { + if !self.uid.is_root() { + return Err(Error::PermissionDenied); + } + + let target = target.as_ref(); + if !target.is_absolute() { + todo!(); + } + let target_node = self._find( + self.root.clone(), + target.trim_start_separators(), + true, + false, + )?; + + target_node.set_mountpoint_target(fs_root) + } + + /// Locates a [crate::Node] at given path and opens it with requested access options. If no + /// such node exists and `OpenOptions::CREATE` is specified, will attempt to create a regular + /// file node at given path and then open it. + pub fn open>( + &mut self, + at: Option, + path: P, + opts: OpenOptions, + mode: FileMode, + ) -> Result { + let path = path.as_ref(); + let node = match self.find(at.clone(), path, true, true) { + Ok(node) => node, + Err(Error::DoesNotExist) if opts.contains(OpenOptions::CREATE) => { + // let create_mode = mode & !self.umask; + let (parent, name) = path.split_right(); + let parent = self.find(at, parent, true, true)?; + let create_info = CreateInfo { + name: name.into(), + mode: mode & !self.umask, + uid: self.uid, + gid: self.gid, + ty: FileType::File, + }; + let access = self.check_access(Action::Write, &parent)?; + parent.create(&create_info, access)? + } + Err(err) => return Err(err), + }; + + // If not read/write is requested, access is granted + let mut access = unsafe { AccessToken::authorized() }; + + if opts.contains(OpenOptions::READ) { + access += self.check_access(Action::Read, &node)?; + } + if opts.contains(OpenOptions::WRITE) { + access += self.check_access(Action::Write, &node)?; + } + + node.open(opts, access) + } + + /// Creates a directory a given path + pub fn create_directory>( + &mut self, + at: Option, + path: P, + mode: FileMode, + ) -> Result { + let path = path.as_ref(); + let (parent, name) = path.split_right(); + let parent = self.find(at, parent, true, true)?; + let access = self.check_access(Action::Write, &parent)?; + let create_info = CreateInfo { + name: name.into(), + ty: FileType::Directory, + uid: self.uid, + gid: self.gid, + mode: mode & !self.umask, + }; + + parent.create(&create_info, access) + } + + /// Creates an arbitrary node at given path + pub fn create_node>( + &mut self, + at: Option, + path: P, + node: NodeRef, + ) -> Result<(), Error> { + let path = path.as_ref(); + let (parent, name) = path.split_right(); + let parent = self.find(at, parent, true, true)?; + let access = self.check_access(Action::Write, &parent)?; + + parent.create_node(node, name, access)?; + + Ok(()) + } + + fn remove_entry>( + &mut self, + at: Option, + path: P, + directory: bool, + ) -> Result<(), Error> { + let path = path.as_ref(); + let (parent, name) = path.trim_end_separators().split_right(); + + if name.is_empty() { + log::warn!("Tried to remove weird path: {:?}", path); + return Err(Error::DoesNotExist); + } + + let parent = self.find(at, parent, false, false)?; + let access = self.check_access(Action::Write, &parent)?; + + if directory { + // parent.remove_directory(name, access) + todo!() + } else { + parent.remove_file(name, access) + } + } + + /// Removes a device or regular file node at given path + pub fn remove_file>( + &mut self, + at: Option, + path: P, + ) -> Result<(), Error> { + self.remove_entry(at, path, false) + } + + /// Locates a [crate::Node] pointed to by given [Path] + pub fn find>( + &mut self, + at: Option, + path: P, + follow_links: bool, + follow_mount: bool, + ) -> Result { + let mut path = path.as_ref(); + let at = if path.is_absolute() { + path = path.trim_start_separators(); + self.root.clone() + } else if let Some(at) = at { + at + } else { + self.cwd_node.clone() + }; + + self._find(at, path, follow_links, follow_mount) + } + + fn _resolve_link(&self, at: &NodeRef) -> Result { + let token = self.check_access(Action::Read, at)?; + // let _path = link.imp.read_to_string()?; + match at.read_symlink_node(token) { + Ok(node) => return Ok(node), + // Need to read the link data and resolve it manually + Err(Error::NotImplemented) => todo!(), + Err(e) => return Err(e), + } + } + + fn _resolve( + &self, + mut at: NodeRef, + follow_links: bool, + follow_mount: bool, + ) -> Result { + loop { + if follow_mount && let Some(target) = at.mountpoint_target() { + at = target.clone(); + continue; + } + + if follow_links { + // Resolve the link + match self._resolve_link(&at) { + Ok(node) => { + at = node; + } + // Not a link, not an error + Err(Error::InvalidFile) => break Ok(at), + Err(e) => return Err(e), + } + continue; + } + + break Ok(at); + } + } + + fn _find( + &self, + mut at: NodeRef, + path: &Path, + follow_links: bool, + follow_mount: bool, + ) -> Result { + let mut element; + let mut rest = path; + + loop { + (element, rest) = rest.split_left(); + + if !at.is_directory() { + return Err(Error::NotADirectory); + } + + match element { + Path::PARENT_NAME => { + at = at.parent(); + } + Path::SELF_NAME => {} + _ => break, + } + } + + at = self._resolve(at, follow_links, follow_mount)?; + + if !at.is_directory() { + return Err(Error::NotADirectory); + } + + if element.is_empty() && rest.is_empty() { + return Ok(at); + } + + let access = self.check_access(Action::Execute, &at)?; + let node = at.lookup_or_load(element, access)?; + let node = self._resolve(node, follow_links, follow_mount)?; + + if rest.is_empty() { + Ok(node) + } else { + self._find(node, rest, follow_links, follow_mount) + } + } +} + +#[cfg(test)] +mod tests { + use alloc::sync::Arc; + use yggdrasil_abi::{ + error::Error, + io::{FileMode, GroupId, OpenOptions, UserId}, + path::Path, + }; + + use crate::{ + impls::{const_value_node, f_symlink, mdir, value_node}, + node::AccessToken, + Read, + }; + + use super::IoContext; + + #[test] + fn access() { + let f1 = const_value_node("file1"); + let f2 = const_value_node("file2"); + let f3 = value_node("file3".to_owned()); + let root = mdir([("f1", f1.clone()), ("f2", f2.clone()), ("f3", f3.clone())]); + + let mut ioctx = IoContext::new(root.clone()); + + let uid = UserId::from(1); + let gid = GroupId::from(1); + + // 1:1 + ioctx.set_uid_unchecked(uid); + ioctx.set_gid_unchecked(gid); + + // 1:0, 0444 + f1.set_access( + Some(uid), + None, + Some(FileMode::new(0o444)), + AccessToken::test_authorized(), + ) + .unwrap(); + + // 0:1, 0644 + f2.set_access(None, Some(gid), None, AccessToken::test_authorized()) + .unwrap(); + + // 1:1, 0644 + f3.set_access(Some(uid), Some(gid), None, AccessToken::test_authorized()) + .unwrap(); + + // f1, read-only + ioctx + .open(None, "/f1", OpenOptions::READ, FileMode::empty()) + .unwrap(); + // f1, write + let err = ioctx + .open(None, "/f1", OpenOptions::WRITE, FileMode::empty()) + .unwrap_err(); + assert_eq!(err, Error::PermissionDenied); + + // f2, read-only + ioctx + .open(None, "/f2", OpenOptions::READ, FileMode::empty()) + .unwrap(); + // f2, write + let err = ioctx + .open(None, "/f2", OpenOptions::WRITE, FileMode::empty()) + .unwrap_err(); + assert_eq!(err, Error::PermissionDenied); + + // f3, read-only + ioctx + .open(None, "/f3", OpenOptions::READ, FileMode::empty()) + .unwrap(); + // f3, write + ioctx + .open(None, "/f3", OpenOptions::WRITE, FileMode::empty()) + .unwrap(); + } + + #[test] + fn cwd() { + let d1_f1 = const_value_node("dir1-file1"); + let d1 = mdir([("f1", d1_f1.clone())]); + let f1 = const_value_node("file1"); + let root = mdir([("f1", f1.clone()), ("d1", d1.clone())]); + + let mut ioctx = IoContext::new(root.clone()); + + assert_eq!(ioctx.cwd_path(), Path::empty()); + + let node = ioctx.find(None, "f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &f1)); + + let node = ioctx.find(None, "d1/f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &d1_f1)); + + let node = ioctx.find(None, "d1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &d1)); + + ioctx.set_cwd("/d1").unwrap(); + assert_eq!(ioctx.cwd_path(), Path::from_str("/d1")); + + let err = ioctx.find(None, "d1", true, true).unwrap_err(); + assert_eq!(err, Error::DoesNotExist); + + let node = ioctx.find(None, "f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &d1_f1)); + + let node = ioctx.find(None, "../d1/f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &d1_f1)); + + let node = ioctx.find(None, "/f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &f1)); + + let node = ioctx.find(None, "../f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &f1)); + + let node = ioctx.find(None, "..", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + } + + #[test] + fn mount_resolution() { + let root2_f1 = const_value_node("root2-file1"); + let root2 = mdir([("f1", root2_f1.clone())]); + let root1_f1 = const_value_node("root1-file1"); + let root1 = mdir([("f1", root1_f1.clone())]); + + let mut ioctx = IoContext::new(root1.clone()); + + let node = ioctx.find(None, "/f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &root1_f1)); + + ioctx.mount("/", root2.clone()).unwrap(); + + let node = ioctx.find(None, "/f1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &root2_f1)); + + let node = ioctx.find(None, "/f1", true, false).unwrap(); + assert!(Arc::ptr_eq(&node, &root1_f1)); + + let node = ioctx.find(None, "/", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &root2)); + + let node = ioctx.find(None, "/", true, false).unwrap(); + assert!(Arc::ptr_eq(&node, &root1)); + } + + #[test] + fn symlink_resolution() { + let f1 = const_value_node("file1"); + let l1 = f_symlink(f1.clone()); + let root = mdir([("l1", l1.clone())]); + + let mut ioctx = IoContext::new(root.clone()); + + // No follow + let node = ioctx.find(None, "/l1", false, true).unwrap(); + assert!(Arc::ptr_eq(&node, &l1)); + + // Follow + let node = ioctx.find(None, "/l1", true, true).unwrap(); + assert!(Arc::ptr_eq(&node, &f1)); + } + + #[test] + fn open_or_create() { + let root_d1_f1 = const_value_node("dir1/file1"); + let root_f1 = const_value_node("file1"); + let root_d1 = mdir([("f1", root_d1_f1.clone())]); + let root = mdir([("f1", root_f1.clone()), ("d1", root_d1.clone())]); + + let mut ioctx = IoContext::new(root.clone()); + let mut buf = [0; 512]; + + let file = ioctx + .open(None, "/d1/f1", OpenOptions::READ, FileMode::empty()) + .unwrap(); + assert!(Arc::ptr_eq(&file.node().unwrap(), &root_d1_f1)); + assert_eq!(file.read(&mut buf).unwrap(), 10); + assert_eq!(&buf[..10], b"dir1/file1"); + drop(file); + + let file = ioctx + .open( + None, + "/d1/f1", + OpenOptions::READ | OpenOptions::CREATE, + FileMode::empty(), + ) + .unwrap(); + assert!(Arc::ptr_eq(&file.node().unwrap(), &root_d1_f1)); + assert_eq!(file.read(&mut buf).unwrap(), 10); + assert_eq!(&buf[..10], b"dir1/file1"); + drop(file); + + let err = ioctx + .open( + None, + "/d1/f2", + OpenOptions::WRITE | OpenOptions::CREATE, + FileMode::empty(), + ) + .unwrap_err(); + assert_eq!(err, Error::ReadOnly); + } + + #[test] + fn find_tests() { + let root_d1_f1 = const_value_node("dir1/file1"); + let root_f1 = const_value_node("file1"); + let root_d1 = mdir([("f1", root_d1_f1.clone())]); + let root = mdir([("f1", root_f1.clone()), ("d1", root_d1.clone())]); + + let mut ioctx = IoContext::new(root.clone()); + + // Ok paths + let node = ioctx.find(None, Path::empty(), false, false).unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + let node = ioctx.find(None, Path::from_str("/"), false, false).unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + + // Weird paths + let node = ioctx + .find(None, Path::from_str("////.////./"), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + let node = ioctx + .find(None, Path::from_str("/../.././//."), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + + let node = ioctx + .find(None, Path::from_str("/f1"), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root_f1)); + let node = ioctx + .find(None, Path::from_str("/d1/f1"), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root_d1_f1)); + let node = ioctx + .find(None, Path::from_str("/d1/../d1/./f1"), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root_d1_f1)); + let node = ioctx + .find(None, Path::from_str("/d1/.."), false, false) + .unwrap(); + assert!(Arc::ptr_eq(&node, &root)); + } +} diff --git a/kernel/lib/vfs/src/lib.rs b/kernel/lib/vfs/src/lib.rs new file mode 100644 index 00000000..03bac5a7 --- /dev/null +++ b/kernel/lib/vfs/src/lib.rs @@ -0,0 +1,44 @@ +//! Virtual filesystem interfaces and driver implementation + +#![cfg_attr(not(test), no_std)] +#![cfg_attr(test, allow(unused_imports))] +#![allow(clippy::new_ret_no_self, clippy::new_without_default)] +#![deny(missing_docs)] +#![feature( + if_let_guard, + maybe_uninit_slice, + trait_alias, + let_chains, + new_uninit, + trait_upcasting +)] + +extern crate alloc; + +pub(crate) mod channel; +pub(crate) mod device; +pub(crate) mod file; +pub(crate) mod ioctx; +pub(crate) mod node; +pub(crate) mod path; +pub(crate) mod poll; +pub(crate) mod pty; +pub(crate) mod shared_memory; +pub(crate) mod socket; +pub(crate) mod timer; +pub(crate) mod traits; + +pub use channel::{Channel, ChannelDescriptor, Message, MessagePayload, Subscription}; +pub use device::CharDevice; +pub use file::{DirectoryOpenPosition, File, FileRef, FileSet, InstanceData}; +pub use ioctx::{Action, IoContext}; +pub use node::{ + impls, AccessToken, CommonImpl, CreateInfo, DirectoryImpl, Metadata, Node, NodeFlags, NodeRef, + RegularImpl, SymlinkImpl, +}; +pub use poll::FdPoll; +pub use pty::{PseudoTerminal, PseudoTerminalMaster, PseudoTerminalSlave}; +pub use shared_memory::SharedMemory; +pub use socket::{ConnectionSocket, ListenerSocket, PacketSocket, Socket}; +pub use timer::TimerFile; +pub use traits::{FileReadiness, Read, Seek, Write}; diff --git a/kernel/lib/vfs/src/node/access.rs b/kernel/lib/vfs/src/node/access.rs new file mode 100644 index 00000000..76a72430 --- /dev/null +++ b/kernel/lib/vfs/src/node/access.rs @@ -0,0 +1,79 @@ +use core::{ + marker::PhantomData, + ops::{Add, AddAssign}, +}; + +use yggdrasil_abi::io::{FileMode, GroupId, UserId}; + +use super::Metadata; + +/// Zero-sized token type used to ensure checked access to node functions +#[derive(Clone)] +pub struct AccessToken(PhantomData<()>); + +#[allow(missing_docs)] +impl Metadata { + pub fn user_read(&self, uid: UserId) -> bool { + self.uid == uid && self.mode.contains(FileMode::USER_READ) + } + + pub fn user_write(&self, uid: UserId) -> bool { + self.uid == uid && self.mode.contains(FileMode::USER_WRITE) + } + + pub fn user_exec(&self, uid: UserId) -> bool { + self.uid == uid && self.mode.contains(FileMode::USER_EXEC) + } + + pub fn group_read(&self, gid: GroupId) -> bool { + self.gid == gid && self.mode.contains(FileMode::GROUP_READ) + } + + pub fn group_write(&self, gid: GroupId) -> bool { + self.gid == gid && self.mode.contains(FileMode::GROUP_WRITE) + } + + pub fn group_exec(&self, gid: GroupId) -> bool { + self.gid == gid && self.mode.contains(FileMode::GROUP_EXEC) + } + + pub fn other_read(&self) -> bool { + self.mode.contains(FileMode::OTHER_READ) + } + + pub fn other_write(&self) -> bool { + self.mode.contains(FileMode::OTHER_WRITE) + } + + pub fn other_exec(&self) -> bool { + self.mode.contains(FileMode::OTHER_EXEC) + } +} + +impl AccessToken { + /// Creates an "authorized" [AccessToken]. + /// + /// # Safety + /// + /// Unsafe: allows for unchecked authorization of any node actions. + pub const unsafe fn authorized() -> Self { + Self(PhantomData) + } + + #[cfg(test)] + pub const fn test_authorized() -> Self { + unsafe { Self::authorized() } + } +} + +impl Add for AccessToken { + type Output = Self; + + fn add(self, _rhs: Self) -> Self::Output { + self + } +} + +impl AddAssign for AccessToken { + fn add_assign(&mut self, _rhs: Self) {} +} diff --git a/kernel/lib/vfs/src/node/impls.rs b/kernel/lib/vfs/src/node/impls.rs new file mode 100644 index 00000000..a55410c0 --- /dev/null +++ b/kernel/lib/vfs/src/node/impls.rs @@ -0,0 +1,516 @@ +//! Various helper node implementations for convenience +use core::{marker::PhantomData, str::FromStr}; + +use alloc::{ + string::{String, ToString}, + sync::Arc, + vec::Vec, +}; + +use libk_util::sync::IrqSafeSpinlock; +use yggdrasil_abi::{error::Error, io::OpenOptions}; + +use crate::{DirectoryOpenPosition, InstanceData}; + +use super::{CommonImpl, DirectoryImpl, Node, NodeFlags, NodeRef, RegularImpl, SymlinkImpl}; + +trait SliceRead { + fn read_slice(&self, offset: usize, buf: &mut [u8]) -> usize; +} + +trait SliceWrite { + fn write_slice(&mut self, offset: usize, buf: &[u8]) -> usize; +} + +trait AsInstanceData { + fn as_instance_data(&self) -> Vec; +} + +/// Closure interface for reading a single value +pub trait ValueReadFn = Fn() -> Result + Send + Sync; +/// Closure interface for writing a single value +pub trait ValueWriteFn = Fn(T) -> Result<(), Error> + Send + Sync; +/// Closure interface for reading bytes +pub trait ReadFn = Fn(u64, &mut [u8]) -> Result + Send + Sync; +/// Closure interface for writing bytes +pub trait WriteFn = Fn(u64, &[u8]) -> Result + Send + Sync; +/// Closure interface for reading links +pub trait ReadLinkFn = Fn() -> Result + Send + Sync; + +impl> SliceRead for T { + fn read_slice(&self, pos: usize, buf: &mut [u8]) -> usize { + let value = self.as_ref(); + if pos >= value.len() { + return 0; + } + let count = core::cmp::min(value.len() - pos, buf.len()); + buf[..count].copy_from_slice(&value[pos..pos + count]); + count + } +} + +impl SliceWrite for Vec { + fn write_slice(&mut self, offset: usize, buf: &[u8]) -> usize { + if offset + buf.len() > self.len() { + self.resize(offset + buf.len(), 0); + } + self[offset..offset + buf.len()].copy_from_slice(buf); + buf.len() + } +} + +impl AsInstanceData for T { + fn as_instance_data(&self) -> Vec { + self.to_string().as_bytes().to_vec() + } +} + +enum FnNodeData { + Read(Vec), + Write(IrqSafeSpinlock>), +} + +/// Allows read-only access to a value. The value is converted to a string representation when it's +/// read. +pub struct ReadOnlyFnValueNode> { + read: R, + _pd: PhantomData, +} + +/// Allows read-write access to a value (but not both at the same time). The value is converted +/// to/from a string representation when it's read/written. +pub struct FnValueNode, W: ValueWriteFn> { + read: R, + write: W, + _pd: PhantomData, +} + +/// Allows read-only access to a "functional file" +pub struct ReadOnlyFnNode { + read: R, +} + +/// In-memory directory using tree cache +pub struct MemoryDirectory; +/// In-memory symlink pointing to a fixed [Node] +pub struct FixedSymlink { + target: NodeRef, +} +/// In-memory functional symlink +pub struct FnSymlink +where + F: ReadLinkFn + 'static, +{ + read: F, +} + +impl ReadOnlyFnValueNode +where + T: ToString + Send + Sync + 'static, + R: ValueReadFn + 'static, +{ + /// Creates a new [ReadOnlyFnValueNode] with given read function + pub fn new(read: R) -> NodeRef { + Node::regular( + Self::new_impl(read), + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + ) + } + + const fn new_impl(read: R) -> Self { + Self { + read, + _pd: PhantomData, + } + } +} + +impl CommonImpl for ReadOnlyFnValueNode +where + T: ToString + Send + Sync, + R: ValueReadFn, +{ + fn size(&self, _node: &NodeRef) -> Result { + Ok(0) + } +} + +impl RegularImpl for ReadOnlyFnValueNode +where + T: ToString + Send + Sync, + R: ValueReadFn, +{ + fn open( + &self, + _node: &NodeRef, + opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + if opts.contains(OpenOptions::WRITE) { + return Err(Error::ReadOnly); + } + let t = (self.read)()?; + Ok((0, Some(Arc::new(t.as_instance_data())))) + } + + fn read( + &self, + _node: &NodeRef, + instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + let value = instance.unwrap().downcast_ref::>().unwrap(); + Ok(value.read_slice(pos as usize, buf)) + } + + fn write( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + _pos: u64, + _buf: &[u8], + ) -> Result { + Err(Error::ReadOnly) + } + + fn truncate(&self, _node: &NodeRef, _new_size: u64) -> Result<(), Error> { + Err(Error::ReadOnly) + } +} + +// Read-write FnNode + +impl FnNodeData { + fn write() -> Self { + Self::Write(IrqSafeSpinlock::new(Vec::new())) + } + + fn read(value: T) -> Self { + Self::Read(value.as_instance_data()) + } + + fn as_read(&self) -> Result<&Vec, Error> { + match self { + Self::Read(r) => Ok(r), + Self::Write(_) => Err(Error::InvalidOperation), + } + } + + fn as_write(&self) -> Result<&IrqSafeSpinlock>, Error> { + match self { + Self::Write(w) => Ok(w), + Self::Read(_) => Err(Error::InvalidOperation), + } + } +} + +impl FnValueNode +where + T: ToString + FromStr + Send + Sync + 'static, + R: ValueReadFn + 'static, + W: ValueWriteFn + 'static, +{ + /// Creates a new [FnValueNode] with given read and write functions + pub fn new(read: R, write: W) -> NodeRef { + Node::regular( + Self::new_impl(read, write), + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + ) + } + + const fn new_impl(read: R, write: W) -> Self { + Self { + read, + write, + _pd: PhantomData, + } + } +} + +impl CommonImpl for FnValueNode +where + T: ToString + FromStr + Send + Sync, + R: ValueReadFn, + W: ValueWriteFn, +{ + fn size(&self, _node: &NodeRef) -> Result { + Ok(0) + } +} + +impl RegularImpl for FnValueNode +where + T: ToString + FromStr + Send + Sync, + R: ValueReadFn, + W: ValueWriteFn, +{ + fn open( + &self, + _node: &NodeRef, + opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + if opts.contains(OpenOptions::READ | OpenOptions::WRITE) { + Err(Error::InvalidOperation) + } else if opts.contains(OpenOptions::WRITE) { + Ok((0, Some(Arc::new(FnNodeData::write())))) + } else if opts.contains(OpenOptions::READ) { + let t = (self.read)()?; + Ok((0, Some(Arc::new(FnNodeData::read(t))))) + } else { + Err(Error::InvalidOperation) + } + } + + fn close(&self, _node: &NodeRef, instance: Option<&InstanceData>) -> Result<(), Error> { + if let Ok(write) = instance + .unwrap() + .downcast_ref::() + .unwrap() + .as_write() + { + let write = write.lock(); + // Flush write + let str = core::str::from_utf8(write.as_ref()) + .map_err(|_| Error::InvalidArgument)? + .trim(); + let t = T::from_str(str).map_err(|_| Error::InvalidArgument)?; + + (self.write)(t)?; + } + Ok(()) + } + + fn read( + &self, + _node: &NodeRef, + instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + let instance = instance.unwrap().downcast_ref::().unwrap(); + Ok(instance.as_read()?.read_slice(pos as usize, buf)) + } + + fn write( + &self, + _node: &NodeRef, + instance: Option<&InstanceData>, + pos: u64, + buf: &[u8], + ) -> Result { + let instance = instance.unwrap().downcast_ref::().unwrap(); + Ok(instance.as_write()?.lock().write_slice(pos as _, buf)) + } + + fn truncate(&self, _node: &NodeRef, _new_size: u64) -> Result<(), Error> { + Ok(()) + } +} + +// Byte read-only node + +impl ReadOnlyFnNode +where + R: ReadFn + 'static, +{ + /// Creates a new [ReadOnlyFnNode] with given read function + pub fn new(read: R) -> NodeRef { + Node::regular(Self { read }, NodeFlags::IN_MEMORY_PROPS) + } +} + +impl CommonImpl for ReadOnlyFnNode +where + R: ReadFn, +{ + fn size(&self, _node: &NodeRef) -> Result { + Ok(0) + } +} + +impl RegularImpl for ReadOnlyFnNode +where + R: ReadFn, +{ + fn open( + &self, + _node: &NodeRef, + opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + if opts.contains(OpenOptions::WRITE) { + return Err(Error::ReadOnly); + } + Ok((0, None)) + } + + fn read( + &self, + _node: &NodeRef, + _instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + (self.read)(pos, buf) + } +} + +// In-memory directory + +impl MemoryDirectory { + /// Creates a [MemoryDirectory] with no children + pub fn empty() -> NodeRef { + Node::directory( + MemoryDirectory, + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + ) + } +} + +impl CommonImpl for MemoryDirectory {} +impl DirectoryImpl for MemoryDirectory { + fn open(&self, _node: &NodeRef) -> Result { + Ok(DirectoryOpenPosition::FromCache) + } +} + +// In-memory fixed symlink + +impl CommonImpl for FixedSymlink {} +impl SymlinkImpl for FixedSymlink { + fn target(&self, _node: &NodeRef) -> Result { + Ok(self.target.clone()) + } +} + +// In-memory functional symlink + +impl FnSymlink +where + F: ReadLinkFn + 'static, +{ + /// Creates a new [FnSymlink] node + pub fn new(read: F) -> NodeRef { + let data = Self { read }; + Node::symlink( + data, + NodeFlags::IN_MEMORY_SIZE | NodeFlags::IN_MEMORY_PROPS | NodeFlags::NO_LINK_CACHE, + ) + } +} + +impl CommonImpl for FnSymlink where F: ReadLinkFn + 'static {} +impl SymlinkImpl for FnSymlink +where + F: ReadLinkFn + 'static, +{ + fn target(&self, _node: &NodeRef) -> Result { + (self.read)() + } +} + +/// Creates a read-only value node with given `value` +pub fn const_value_node(value: T) -> NodeRef +where + T: ToString + Clone + Send + Sync + 'static, +{ + ReadOnlyFnValueNode::new(move || Ok(value.clone())) +} + +/// Creates a read-write value node with given `value` +pub fn value_node(value: T) -> NodeRef +where + T: ToString + FromStr + Clone + Send + Sync + 'static, +{ + let rd_state = Arc::new(IrqSafeSpinlock::new(value)); + let wr_state = rd_state.clone(); + + FnValueNode::new( + move || Ok(rd_state.lock().clone()), + move |t| { + *wr_state.lock() = t; + Ok(()) + }, + ) +} + +/// Creates a read-only node with given read function +pub fn read_fn_node(read: R) -> NodeRef { + ReadOnlyFnNode::new(read) +} + +/// Creates an in-memory directory from the iterator +pub fn mdir, I: IntoIterator>(it: I) -> NodeRef { + let dir = Node::directory( + MemoryDirectory, + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + ); + for (name, node) in it { + dir.add_child(name, node).unwrap(); + } + dir +} + +/// Creates a static symlink pointing to given node +pub fn f_symlink(target: NodeRef) -> NodeRef { + Node::symlink(FixedSymlink { target }, NodeFlags::IN_MEMORY_PROPS) +} + +#[cfg(test)] +mod tests { + use yggdrasil_abi::io::OpenOptions; + + use crate::{ + node::{ + impls::{const_value_node, value_node}, + AccessToken, + }, + traits::{Read, Seek, Write}, + }; + + #[test] + fn read_only_fn_node() { + let node = const_value_node("abcdef"); + let file = node + .open(OpenOptions::READ, AccessToken::test_authorized()) + .unwrap(); + let mut buf = [0; 512]; + + assert_eq!(file.tell().unwrap(), 0); + + assert_eq!(file.read(&mut buf[..3]).unwrap(), 3); + assert_eq!(&buf[..3], b"abc"); + + assert_eq!(file.read(&mut buf).unwrap(), 3); + assert_eq!(&buf[..3], b"def"); + } + + #[test] + fn fn_node() { + let node = value_node(1234); + let mut buf = [0; 512]; + + // Try read the value + let file = node + .open(OpenOptions::READ, AccessToken::test_authorized()) + .unwrap(); + assert_eq!(file.tell().unwrap(), 0); + assert_eq!(file.read(&mut buf).unwrap(), 4); + assert_eq!(&buf[..4], b"1234"); + + // Try write the value + let file = node + .open(OpenOptions::WRITE, AccessToken::test_authorized()) + .unwrap(); + assert_eq!(file.tell().unwrap(), 0); + assert_eq!(file.write(b"654321").unwrap(), 6); + drop(file); + + // Try read the value again + let file = node + .open(OpenOptions::READ, AccessToken::test_authorized()) + .unwrap(); + assert_eq!(file.tell().unwrap(), 0); + assert_eq!(file.read(&mut buf).unwrap(), 6); + assert_eq!(&buf[..6], b"654321"); + } +} diff --git a/kernel/lib/vfs/src/node/mod.rs b/kernel/lib/vfs/src/node/mod.rs new file mode 100644 index 00000000..0b619638 --- /dev/null +++ b/kernel/lib/vfs/src/node/mod.rs @@ -0,0 +1,425 @@ +use core::{any::Any, fmt}; + +use alloc::{boxed::Box, string::String, sync::Arc, vec::Vec}; +use libk_util::sync::IrqSafeSpinlock; +use ygg_driver_block::BlockDevice; +use yggdrasil_abi::{ + bitflags, + error::Error, + io::{FileMode, FileType, GroupId, UserId}, +}; + +mod access; +pub mod impls; +mod traits; + +// Node is implemented in the following modules +mod ops; +mod tree; + +pub use access::AccessToken; +pub use traits::{CommonImpl, DirectoryImpl, RegularImpl, SymlinkImpl}; + +use crate::{ + device::{BlockDeviceWrapper, CharDevice, CharDeviceWrapper}, + PseudoTerminalMaster, PseudoTerminalSlave, +}; + +/// Wrapper type for a [Node] shared reference +pub type NodeRef = Arc; + +bitflags! { + #[doc = "Describes additional flags for the node"] + pub struct NodeFlags: u32 { + #[doc = "Node's metadata only exists within the VFS cache and should not be fetched"] + const IN_MEMORY_PROPS: bit 0; + #[doc = "Node's size only exists within the VFS cache"] + const IN_MEMORY_SIZE: bit 1; + #[doc = "Don't cache symlink targets"] + const NO_LINK_CACHE: bit 2; + } +} + +/// Information used when creating an entry within a directory +#[derive(Debug, Clone)] +pub struct CreateInfo { + /// New entry name + pub name: String, + /// User ID of the entry + pub uid: UserId, + /// Group ID of the entry + pub gid: GroupId, + /// Access mode of the entry + pub mode: FileMode, + /// Entry type + pub ty: FileType, +} + +pub(crate) struct DirectoryData { + pub(crate) imp: Box, + pub(crate) mountpoint: IrqSafeSpinlock>, + pub(crate) children: IrqSafeSpinlock>, +} + +pub(crate) struct SymlinkData { + // Cached symlink target with the literal path + pub(crate) target: IrqSafeSpinlock>, + pub(crate) imp: Box, +} + +enum NodeImpl { + Regular(Box), + Directory(DirectoryData), + Block(BlockDeviceWrapper), + Char(CharDeviceWrapper), + Symlink(SymlinkData), + + // These map transparently to other types of nodes + PseudoTerminalSlave(Arc), + PseudoTerminalMaster(Arc), +} + +/// Metadata of the node +#[derive(Clone, Copy)] +pub struct Metadata { + /// User ID of the node + pub uid: UserId, + /// Group ID of the node + pub gid: GroupId, + /// Access mode of the node + pub mode: FileMode, +} + +struct PropertyCache { + metadata: Metadata, + size: Option, +} + +/// Decribes a single entry within a filesytem +pub struct Node { + data: NodeImpl, + flags: NodeFlags, + props: IrqSafeSpinlock, + parent: IrqSafeSpinlock>, +} + +impl Metadata { + pub(crate) const fn default_dir() -> Metadata { + Metadata { + uid: UserId::root(), + gid: GroupId::root(), + mode: FileMode::new(0o755), + } + } + + pub(crate) const fn default_file() -> Metadata { + Metadata { + uid: UserId::root(), + gid: GroupId::root(), + mode: FileMode::new(0o644), + } + } +} + +impl Node { + fn new(data: NodeImpl, flags: NodeFlags, metadata: Metadata) -> NodeRef { + Arc::new(Self { + data, + flags, + props: IrqSafeSpinlock::new(PropertyCache { + metadata, + size: None, + }), + parent: IrqSafeSpinlock::new(None), + }) + } + + pub(crate) fn pseudo_terminal_nodes( + master: Arc, + slave: Arc, + ) -> (NodeRef, NodeRef) { + let master = Self::new( + NodeImpl::PseudoTerminalMaster(master), + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + Metadata::default_file(), + ); + let slave = Self::new( + NodeImpl::PseudoTerminalSlave(slave), + NodeFlags::IN_MEMORY_PROPS | NodeFlags::IN_MEMORY_SIZE, + Metadata::default_file(), + ); + + (master, slave) + } + + /// Creates a new directory node with given [DirectoryImpl] + pub fn directory(data: T, flags: NodeFlags) -> NodeRef { + let data = NodeImpl::Directory(DirectoryData { + imp: Box::new(data), + mountpoint: IrqSafeSpinlock::new(None), + children: IrqSafeSpinlock::new(Vec::new()), + }); + Self::new(data, flags, Metadata::default_dir()) + } + + /// Creates a new file node with given [RegularImpl] + pub fn regular(data: T, flags: NodeFlags) -> NodeRef { + Self::new( + NodeImpl::Regular(Box::new(data)), + flags, + Metadata::default_file(), + ) + } + + /// Creates a new block device node with given [BlockDevice] + pub fn block(device: &'static dyn BlockDevice, flags: NodeFlags) -> NodeRef { + Self::new( + NodeImpl::Block(BlockDeviceWrapper(device)), + flags, + Metadata::default_file(), + ) + } + + /// Creates a new character device node with given [CharDevice] + pub fn char(device: &'static dyn CharDevice, flags: NodeFlags) -> NodeRef { + Self::new( + NodeImpl::Char(CharDeviceWrapper(device)), + flags, + Metadata::default_file(), + ) + } + + /// Creates a new symbolic link node with given [SymlinkImpl] + pub fn symlink(data: T, flags: NodeFlags) -> NodeRef { + Self::new( + NodeImpl::Symlink(SymlinkData { + target: IrqSafeSpinlock::new(None), + imp: Box::new(data), + }), + flags, + Metadata::default_file(), + ) + } + + /// Returns the impl data of the node as `dyn Any` + pub fn data_as_any(&self) -> &dyn Any { + match &self.data { + NodeImpl::Directory(dir) => dir.imp.as_any(), + NodeImpl::Regular(imp) => imp.as_any(), + NodeImpl::Block(w) => w.as_any(), + NodeImpl::Char(w) => w.as_any(), + NodeImpl::Symlink(w) => w.imp.as_any(), + NodeImpl::PseudoTerminalSlave(_) | NodeImpl::PseudoTerminalMaster(_) => todo!(), + } + } + + /// Returns the impl data of the node as `dyn CommonImpl` + pub fn data_as_common(&self) -> &dyn CommonImpl { + match &self.data { + NodeImpl::Directory(dir) => dir.imp.as_ref(), + NodeImpl::Regular(imp) => imp.as_ref(), + NodeImpl::Block(w) => w, + NodeImpl::Char(w) => w, + NodeImpl::Symlink(w) => w.imp.as_ref(), + NodeImpl::PseudoTerminalSlave(_) | NodeImpl::PseudoTerminalMaster(_) => todo!(), + } + } + + /// Attempts to cast the impl data of the node to `&T` + pub fn data_as_ref(&self) -> &T { + self.data_as_any().downcast_ref().unwrap() + } + + /// Returns the type of the node + pub fn ty(&self) -> FileType { + match &self.data { + NodeImpl::Regular(_) => FileType::File, + NodeImpl::Directory(_) => FileType::Directory, + NodeImpl::Block(_) => FileType::Block, + NodeImpl::Char(_) => FileType::Char, + NodeImpl::Symlink(_) => FileType::Symlink, + NodeImpl::PseudoTerminalSlave(_) | NodeImpl::PseudoTerminalMaster(_) => FileType::Char, + } + } + + /// Returns `true` if the node represents a directory + pub fn is_directory(&self) -> bool { + matches!(&self.data, NodeImpl::Directory(_)) + } + + /// Returns `true` if the node represents a character device and the character device is a + /// terminal + pub fn is_terminal(&self) -> bool { + if let NodeImpl::Char(dev) = &self.data { + dev.is_terminal() + } else { + false + } + } + + pub(crate) fn mountpoint_target(&self) -> Option { + match &self.data { + NodeImpl::Directory(dir) => dir.mountpoint.lock().clone(), + _ => None, + } + } + + pub(crate) fn set_mountpoint_target(self: &NodeRef, target: NodeRef) -> Result<(), Error> { + let directory = self.as_directory()?; + let mut mountpoint = directory.mountpoint.lock(); + let mut target_parent_lock = target.parent.lock(); + + if mountpoint.is_some() { + // TODO Busy + todo!(); + } + if target_parent_lock.is_some() { + // TODO mount a filesystem more than once? + return Err(Error::AlreadyExists); + } + if !target.is_directory() { + return Err(Error::NotADirectory); + } + + mountpoint.replace(target.clone()); + target_parent_lock.replace(self.clone()); + + Ok(()) + } + + pub(crate) fn as_directory(&self) -> Result<&DirectoryData, Error> { + match &self.data { + NodeImpl::Directory(dir) => Ok(dir), + _ => Err(Error::InvalidFile), + } + } + + pub(crate) fn as_regular(&self) -> Result<&dyn RegularImpl, Error> { + match &self.data { + NodeImpl::Regular(imp) => Ok(imp.as_ref()), + _ => Err(Error::InvalidFile), + } + } + + pub(crate) fn as_symlink(&self) -> Result<&SymlinkData, Error> { + match &self.data { + NodeImpl::Symlink(imp) => Ok(imp), + _ => Err(Error::InvalidFile), + } + } + + pub(crate) fn read_symlink_node(self: &NodeRef, _token: AccessToken) -> Result { + let symlink = self.as_symlink()?; + let mut cache = symlink.target.lock(); + + // If caching is not disabled and the target is cached, return it + if !self.flags.contains(NodeFlags::NO_LINK_CACHE) + && let Some((_, cached)) = cache.as_ref() + { + return Ok(cached.clone()); + } + + let target = symlink.imp.target(self)?; + + Ok(cache.insert((String::new(), target)).1.clone()) + } +} + +impl fmt::Debug for Node { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self.data { + NodeImpl::Directory(_) => f.debug_struct("DirectoryNode").finish_non_exhaustive(), + NodeImpl::Regular(_) => f.debug_struct("RegularNode").finish_non_exhaustive(), + NodeImpl::Char(_) => f.debug_struct("CharNode").finish_non_exhaustive(), + NodeImpl::Block(_) => f.debug_struct("BlockNode").finish_non_exhaustive(), + NodeImpl::Symlink(_) => f.debug_struct("SymlinkNode").finish_non_exhaustive(), + NodeImpl::PseudoTerminalSlave(_) => f + .debug_struct("PseudoTerminalSlave") + .finish_non_exhaustive(), + NodeImpl::PseudoTerminalMaster(_) => f + .debug_struct("PseudoTerminalMaster") + .finish_non_exhaustive(), + } + } +} + +#[cfg(test)] +mod tests { + use core::any::Any; + use std::sync::Arc; + + use crate::node::NodeFlags; + + use super::{CommonImpl, DirectoryImpl, Node, RegularImpl}; + + struct DummyDirectory; + struct DummyFile; + + impl CommonImpl for DummyDirectory {} + impl DirectoryImpl for DummyDirectory {} + + impl CommonImpl for DummyFile {} + impl RegularImpl for DummyFile {} + + #[test] + fn node_sync_send() { + fn node_send(_n: &T) {} + + let node = Node::regular(DummyFile, NodeFlags::empty()); + + node_send(&node); + } + + #[test] + fn dir_cache_add() { + let d1 = Node::directory(DummyDirectory, NodeFlags::empty()); + let d2 = Node::directory(DummyDirectory, NodeFlags::empty()); + let f1 = Node::regular(DummyFile, NodeFlags::empty()); + + assert!(Arc::ptr_eq(&f1.parent(), &f1)); + assert_eq!(d1.children_len().unwrap(), 0); + + d1.add_child("f1", f1.clone()).unwrap(); + assert!(Arc::ptr_eq(&f1.parent(), &d1)); + assert_eq!(d1.children_len().unwrap(), 1); + + assert!(Arc::ptr_eq(&d2.parent(), &d2)); + d2.add_child("d1", d1.clone()).unwrap(); + assert!(Arc::ptr_eq(&f1.parent(), &d1)); + assert!(Arc::ptr_eq(&d1.parent(), &d2)); + assert_eq!(d1.children_len().unwrap(), 1); + assert_eq!(d2.children_len().unwrap(), 1); + } + + #[test] + fn in_mem_dir_size_coherence() { + let d = Node::directory(DummyDirectory, NodeFlags::IN_MEMORY_SIZE); + + for i in 0..10 { + let name = format!("f{}", i); + let node = Node::regular(DummyFile, NodeFlags::empty()); + + d.add_child(name, node).unwrap(); + assert_eq!(d.size().unwrap(), d.children_len().unwrap() as u64); + } + } + + #[test] + fn data_any() { + struct AnyData { + value: u32, + } + + impl CommonImpl for AnyData { + fn as_any(&self) -> &dyn Any { + self + } + } + impl DirectoryImpl for AnyData {} + + let d = Node::directory(AnyData { value: 1234 }, NodeFlags::empty()); + let r = d.data_as_ref::(); + + assert_eq!(r.value, 1234); + } +} diff --git a/kernel/lib/vfs/src/node/ops.rs b/kernel/lib/vfs/src/node/ops.rs new file mode 100644 index 00000000..bfb88222 --- /dev/null +++ b/kernel/lib/vfs/src/node/ops.rs @@ -0,0 +1,216 @@ +use core::mem::MaybeUninit; + +use yggdrasil_abi::{ + error::Error, + io::{DeviceRequest, DirectoryEntry, FileMode, GroupId, OpenOptions, UserId}, +}; + +use crate::file::{File, FileRef}; + +use super::{AccessToken, CreateInfo, Metadata, Node, NodeFlags, NodeImpl, NodeRef}; + +impl Node { + // Devices + + /// Performs a device-specific function on a the device node + pub fn device_request(self: &NodeRef, req: &mut DeviceRequest) -> Result<(), Error> { + match &self.data { + NodeImpl::Block(dev) => dev.0.device_request(req), + NodeImpl::Char(dev) => dev.0.device_request(req), + _ => Err(Error::InvalidOperation), + } + } + + // Devices + files + + /// Opens the node with given [OpenOptions]. Only works for regular files and devices. For + /// directories, use [Node::open_directory]. + pub fn open(self: &NodeRef, opts: OpenOptions, _check: AccessToken) -> Result { + match &self.data { + NodeImpl::Regular(imp) => { + let (pos, instance) = imp.open(self, opts)?; + if opts.contains(OpenOptions::TRUNCATE) { + imp.truncate(self, 0)?; + } + Ok(File::regular(self.clone(), pos, instance, opts)) + } + NodeImpl::Block(dev) => File::block(dev.clone(), self.clone(), opts), + NodeImpl::Char(dev) => File::char(dev.clone(), self.clone(), opts), + // TODO: maybe merge open_directory and open? + NodeImpl::Directory(_) => Err(Error::IsADirectory), + NodeImpl::Symlink(_) => todo!(), + NodeImpl::PseudoTerminalSlave(_) | NodeImpl::PseudoTerminalMaster(_) => todo!(), + } + } + + // Directory + + /// Opens the node as a directory for reading its entries + pub fn open_directory(self: &NodeRef, _check: AccessToken) -> Result { + let dir = self.as_directory()?; + let pos = dir.imp.open(self)?; + Ok(File::directory(self.clone(), pos)) + } + + /// Reads entries from the directory + pub fn read_directory( + self: &NodeRef, + pos: u64, + entries: &mut [MaybeUninit], + ) -> Result<(usize, u64), Error> { + self.as_directory()?.imp.read_entries(self, pos, entries) + } + + /// Attempts to look up a child node with given name inside the directory node in the tree + /// cache. If no such node is present there, will attempt to fetch it from the underlying + /// filesystem. + pub fn lookup_or_load( + self: &NodeRef, + name: &str, + _check: AccessToken, + ) -> Result { + let dir = self.as_directory()?; + let children = dir.children.lock(); + + if let Some((_, node)) = children.iter().find(|(name_, _)| name_ == name) { + return Ok(node.clone()); + } + + // TODO lookup in real FS + + Err(Error::DoesNotExist) + } + + /// Creates an entry within a directory with given [CreateInfo]. + pub fn create(self: &NodeRef, info: &CreateInfo, check: AccessToken) -> Result { + let directory = self.as_directory()?; + let node = directory.imp.create_node(self, info.ty)?; + + // Fill out the node info + node.set_access( + Some(info.uid), + Some(info.gid), + Some(info.mode), + check.clone(), + )?; + + self.create_node(node, &info.name, check) + } + + /// Attaches a pre-created node to its parent + pub fn create_node( + self: &NodeRef, + node: NodeRef, + name: &str, + _check: AccessToken, + ) -> Result { + let directory = self.as_directory()?; + + match directory.imp.attach_node(self, &node, name) { + Ok(_) | Err(Error::NotImplemented) => (), + Err(err) => return Err(err), + } + + // Attach the created node to the directory in memory cache + self.add_child(name, node.clone())?; + + Ok(node) + } + + /// Removes a regular file, device or symlink from the directory + pub fn remove_file(self: &NodeRef, name: &str, check: AccessToken) -> Result<(), Error> { + let directory = self.as_directory()?; + let child = self.lookup_or_load(name, check)?; + + if child.is_directory() { + return Err(Error::IsADirectory); + } + + // Detach the node in the real filesystem + match directory.imp.unlink_node(self, name) { + Ok(_) | Err(Error::NotImplemented) => (), + Err(err) => return Err(err), + } + + // Detach the node in the tree cache + { + let mut children = directory.children.lock(); + children.retain(|(name_, _)| name != name_); + } + + // TODO child.destroy() or something? + Ok(()) + } + + // Common + + /// Changes user/group ID or access mode of the node + pub fn set_access( + self: &NodeRef, + uid: Option, + gid: Option, + mode: Option, + _check: AccessToken, + ) -> Result<(), Error> { + if uid.is_none() && gid.is_none() && mode.is_none() { + return Err(Error::InvalidOperation); + } + + let mut metadata = self.metadata()?; + + if let Some(uid) = uid { + metadata.uid = uid; + } + if let Some(gid) = gid { + metadata.gid = gid; + } + if let Some(mode) = mode { + metadata.mode = mode; + } + + // Update cached props + self.props.lock().metadata = metadata; + + if !self.flags.contains(NodeFlags::IN_MEMORY_PROPS) { + // Update permissions in the real node + todo!(); + } + + Ok(()) + } + + /// Returns the "metadata" of the file: uid, gid, access mode + pub fn metadata(self: &NodeRef) -> Result { + if self.flags.contains(NodeFlags::IN_MEMORY_PROPS) { + let props = self.props.lock(); + return Ok(props.metadata); + } + + self.data_as_common().metadata(self) + } + + // TODO clarify directory size + /// Returns the size in bytes of the node + pub fn size(self: &NodeRef) -> Result { + // Try to fetch the size from the cache + let mut props = self.props.lock(); + + if let Some(size) = props.size { + return Ok(size); + } + + if self.flags.contains(NodeFlags::IN_MEMORY_SIZE) { + if let Ok(dir) = self.as_directory() { + return Ok(dir.children.lock().len() as _); + } + + Err(Error::NotImplemented) + } else { + // Fetch the size from the node + let size = self.data_as_common().size(self)?; + props.size = Some(size); + + Ok(size) + } + } +} diff --git a/kernel/lib/vfs/src/node/traits.rs b/kernel/lib/vfs/src/node/traits.rs new file mode 100644 index 00000000..aea24a78 --- /dev/null +++ b/kernel/lib/vfs/src/node/traits.rs @@ -0,0 +1,145 @@ +use alloc::{boxed::Box, string::String}; +use core::{any::Any, mem::MaybeUninit}; + +use yggdrasil_abi::{ + error::Error, + io::{DirectoryEntry, FileType, OpenOptions}, +}; + +use crate::file::{DirectoryOpenPosition, InstanceData}; + +use super::{Metadata, NodeRef}; + +/// Common interface shared by all filesystem nodes +#[allow(unused)] +pub trait CommonImpl: Send + Sync { + /// Returns `&self` as a reference to `dyn Any` + fn as_any(&self) -> &dyn Any { + unimplemented!(); + } + + /// Fetches the metadata of the file from underlying storage + fn metadata(&self, node: &NodeRef) -> Result { + Err(Error::NotImplemented) + } + + /// Fetches the size of the file from underlying storage + fn size(&self, node: &NodeRef) -> Result { + Err(Error::NotImplemented) + } +} + +/// Regular file interface +#[allow(unused)] +pub trait RegularImpl: CommonImpl { + /// Opens the file for reading/writing and returns a `(start position, instance data)` tuple + fn open( + &self, + node: &NodeRef, + opts: OpenOptions, + ) -> Result<(u64, Option), Error> { + Err(Error::NotImplemented) + } + + /// Closes a file + fn close(&self, node: &NodeRef, instance: Option<&InstanceData>) -> Result<(), Error> { + Ok(()) + } + + /// Reads data from the file into given buffer + fn read( + &self, + node: &NodeRef, + instance: Option<&InstanceData>, + pos: u64, + buf: &mut [u8], + ) -> Result { + Err(Error::NotImplemented) + } + + /// Writes data to the file from given buffer + fn write( + &self, + node: &NodeRef, + instance: Option<&InstanceData>, + pos: u64, + buf: &[u8], + ) -> Result { + Err(Error::NotImplemented) + } + + /// Resizes the file to requested size + fn truncate(&self, node: &NodeRef, new_size: u64) -> Result<(), Error> { + Err(Error::NotImplemented) + } +} + +/// Directory implementation +#[allow(unused)] +pub trait DirectoryImpl: CommonImpl { + /// Opens a directory for reading its entries. Returns [DirectoryOpenPosition] to specify the + /// starting position. + fn open(&self, node: &NodeRef) -> Result { + Err(Error::NotImplemented) + } + + /// Fetches entries from a directory into given buffer + fn read_entries( + &self, + node: &NodeRef, + pos: u64, + entries: &mut [MaybeUninit], + ) -> Result<(usize, u64), Error> { + Err(Error::NotImplemented) + } + + /// Creates a child node, but does not associate it with the directory yet + fn create_node(&self, parent: &NodeRef, ty: FileType) -> Result { + Err(Error::ReadOnly) + } + + /// Associates the given node with the directory, creating an entry for it inside + fn attach_node(&self, parent: &NodeRef, child: &NodeRef, name: &str) -> Result<(), Error> { + Err(Error::NotImplemented) + } + + /// Removes an entry of the directory with given name + fn unlink_node(&self, parent: &NodeRef, name: &str) -> Result<(), Error> { + Err(Error::NotImplemented) + } + + /// Fetches the child of the directory with given name + fn lookup(&self, node: &NodeRef, name: &str) -> Result { + Err(Error::NotImplemented) + } + + /// Returns the "length" of the directory in entries + fn len(&self, node: &NodeRef) -> Result { + Err(Error::NotImplemented) + } +} + +/// Symbolic link interface +#[allow(unused)] +pub trait SymlinkImpl: CommonImpl { + /// Returns the target node (if such is available directly) of the link + fn target(&self, node: &NodeRef) -> Result { + Err(Error::NotImplemented) + } + + /// Fetches the contents of the symlink into a [String] + fn read_to_string(&self) -> Result { + let mut data = Box::new([0; 512]); + let len = self.read_link(data.as_mut())?; + if len == data.len() { + return Err(Error::InvalidFile); + } + let str = core::str::from_utf8(&data[..len]).map_err(|_| Error::InvalidFile)?; + Ok(String::from(str)) + } + + /// Fetches the contents of the symlink into a buffer + fn read_link(&self, buf: &mut [u8]) -> Result { + Err(Error::NotImplemented) + } +} diff --git a/kernel/lib/vfs/src/node/tree.rs b/kernel/lib/vfs/src/node/tree.rs new file mode 100644 index 00000000..5e701fa1 --- /dev/null +++ b/kernel/lib/vfs/src/node/tree.rs @@ -0,0 +1,46 @@ +use yggdrasil_abi::error::Error; + +use alloc::string::String; + +use super::{Node, NodeRef}; + +impl Node { + /// Returns the parent node of this node, or itself if no parent is present + pub fn parent(self: &NodeRef) -> NodeRef { + self.parent.lock().as_ref().unwrap_or(self).clone() + } + + /// Returns `true` if this node is a root + pub fn is_root(self: &NodeRef) -> bool { + self.parent.lock().is_none() + } + + /// Returns the count of entries in the directory + pub fn children_len(&self) -> Result { + let directory = self.as_directory()?; + Ok(directory.children.lock().len()) + } + + /// Adds an entry to the directory tree cache + pub fn add_child>( + self: &NodeRef, + name: S, + child: NodeRef, + ) -> Result<(), Error> { + let name = name.into(); + let directory = self.as_directory()?; + let mut children = directory.children.lock(); + + // TODO check if an entry already exists with such name + + // if children.contains_key(&name) { + // log::warn!("Directory cache already contains an entry: {:?}", name); + // return Err(Error::AlreadyExists); + // } + + assert!(child.parent.replace(Some(self.clone())).is_none()); + children.push((name, child)); + + Ok(()) + } +} diff --git a/kernel/lib/vfs/src/path.rs b/kernel/lib/vfs/src/path.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/lib/vfs/src/poll.rs b/kernel/lib/vfs/src/poll.rs new file mode 100644 index 00000000..d575c8d8 --- /dev/null +++ b/kernel/lib/vfs/src/poll.rs @@ -0,0 +1,68 @@ +use core::{ + future::poll_fn, + task::{Context, Poll}, + time::Duration, +}; + +use alloc::collections::BTreeMap; + +use futures_util::{future::BoxFuture, FutureExt}; +use libk_thread::{runtime, sync::Mutex}; +use libk_util::sync::LockMethod; +use yggdrasil_abi::{error::Error, io::RawFd}; + +use crate::{FileReadiness, FileRef}; + +/// Poll channel implementation. Allows blocking until a file descriptor signals an event or a +/// timeout is reached. +pub struct FdPoll { + fds: Mutex>, +} + +impl FdPoll { + /// Creates a new poll channel + pub fn new() -> Self { + Self { + fds: Mutex::new(BTreeMap::new()), + } + } + + /// Adds a file descriptor of interest to the poll channel + pub fn add(&self, fd: RawFd, file: FileRef) { + self.fds.lock().unwrap().insert(fd, file); + } + + /// Polls the channel once, returning either a file descriptor or timeout + pub async fn wait(&self, timeout: Option) -> Option<(RawFd, Result<(), Error>)> { + let mut timeout = timeout.map(|t| runtime::sleep(t).boxed()); + poll_fn(|cx| self.poll_once(cx, &mut timeout)).await + } + + fn poll_once( + &self, + cx: &mut Context<'_>, + timeout: &mut Option>, + ) -> Poll)>> { + if let Some(timeout) = timeout.as_mut() + && timeout.as_mut().poll(cx).is_ready() + { + // Timeout + return Poll::Ready(None); + } + + for (&fd, file) in self.fds.lock().unwrap().iter() { + if let Poll::Ready(result) = file.poll_read(cx) { + return Poll::Ready(Some((fd, result))); + } + } + + // Wait + Poll::Pending + } +} + +impl FileReadiness for FdPoll { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.poll_once(cx, &mut None).map(|_| Ok(())) + } +} diff --git a/kernel/lib/vfs/src/pty.rs b/kernel/lib/vfs/src/pty.rs new file mode 100644 index 00000000..597360a5 --- /dev/null +++ b/kernel/lib/vfs/src/pty.rs @@ -0,0 +1,397 @@ +//! Pseudo-terminal devices + +// TODO handle werase key +use core::{ + mem::MaybeUninit, + sync::atomic::{AtomicBool, Ordering}, + task::{Context, Poll}, +}; + +use alloc::{boxed::Box, sync::Arc}; +use libk_thread::{block, signal_process_group}; +use libk_util::{ + ring::{LossyRingQueue, RingBuffer}, + sync::{spin_rwlock::IrqSafeRwLock, IrqSafeSpinlock}, +}; +use yggdrasil_abi::{ + error::Error, + io::{ + DeviceRequest, TerminalInputOptions, TerminalLineOptions, TerminalOptions, + TerminalOutputOptions, TerminalSize, + }, + process::{ProcessId, Signal}, +}; + +const CAPACITY: usize = 8192; + +struct PtySlaveToMasterHalf { + ring: LossyRingQueue, + shutdown: AtomicBool, +} + +struct MasterToSlaveBuffer { + pending: Box<[MaybeUninit]>, + position: usize, +} + +struct PtyMasterToSlaveHalf { + // Actual data to be read by the slave + buffer: IrqSafeSpinlock, + ready_ring: LossyRingQueue, + signal_pgroup: IrqSafeRwLock>, +} + +/// Pseudo-terminal shared device +pub struct PseudoTerminal { + config: IrqSafeRwLock, + slave_to_master: PtySlaveToMasterHalf, + master_to_slave: PtyMasterToSlaveHalf, + size: IrqSafeRwLock, +} +/// Slave part of a PTY device +#[derive(Clone)] +pub struct PseudoTerminalSlave { + pty: Arc, +} +/// Master part of a PTY device +#[derive(Clone)] +pub struct PseudoTerminalMaster { + pty: Arc, +} + +fn read_all(source: &mut RingBuffer, target: &mut [u8], eof: Option) -> usize { + let mut pos = 0; + while pos < target.len() + && let Some(ch) = source.try_read() + { + if eof.map(|eof| eof == ch).unwrap_or(false) { + break; + } + target[pos] = ch; + pos += 1; + } + pos +} + +impl MasterToSlaveBuffer { + pub fn write_pending(&mut self, byte: u8) { + if self.position == self.pending.len() { + // TODO flush the buffer + todo!(); + } + + self.pending[self.position].write(byte); + self.position += 1; + } + + pub fn erase_pending(&mut self) -> bool { + if self.position != 0 { + self.position -= 1; + true + } else { + false + } + } + + pub fn flush(&mut self) -> &[u8] { + let data = unsafe { MaybeUninit::slice_assume_init_ref(&self.pending[..self.position]) }; + self.position = 0; + data + } +} + +impl PtyMasterToSlaveHalf { + pub fn with_capacity(capacity: usize) -> Result { + Ok(Self { + buffer: IrqSafeSpinlock::new(MasterToSlaveBuffer { + pending: Box::new_uninit_slice(256), + position: 0, + }), + ready_ring: LossyRingQueue::try_with_capacity(capacity)?, + signal_pgroup: IrqSafeRwLock::new(None), + }) + } + + pub fn read(&self, buf: &mut [u8], eof: Option) -> Result { + if let Some(mut lock) = self.ready_ring.try_read_lock() { + Ok(read_all(&mut lock, buf, eof)) + } else { + block!(self.read_async(buf, eof).await)? + } + } + + pub fn poll_readable(&self, cx: &mut Context<'_>) -> Poll> { + self.ready_ring.poll_readable(cx).map(Ok) + } + + pub fn flush(&self) { + let mut lock = self.buffer.lock(); + let data = lock.flush(); + self.ready_ring.write_multiple(data); + } + + async fn read_async(&self, buffer: &mut [u8], eof: Option) -> Result { + let mut lock = self.ready_ring.read_lock().await; + Ok(read_all(&mut lock, buffer, eof)) + } +} + +impl PtySlaveToMasterHalf { + pub fn with_capacity(capacity: usize) -> Result { + Ok(Self { + ring: LossyRingQueue::try_with_capacity(capacity)?, + shutdown: AtomicBool::new(false), + }) + } + + pub fn handle_input(&self, byte: u8, _config: &TerminalOutputOptions) { + // TODO handle output flags + self.ring.write(byte); + } + + pub fn read(&self, buf: &mut [u8]) -> Result { + if self.shutdown.load(Ordering::Acquire) { + return Ok(0); + } + + if let Some(mut lock) = self.ring.try_read_lock() { + let count = read_all(&mut lock, buf, None); + Ok(count) + } else { + todo!() + } + } + + pub fn poll_readable(&self, cx: &mut Context<'_>) -> Poll> { + if self.shutdown.load(Ordering::Acquire) || self.ring.poll_readable(cx).is_ready() { + Poll::Ready(Ok(())) + } else { + Poll::Pending + } + } +} + +impl PseudoTerminal { + /// Creates a pair of PTY slave/master devices + pub fn new( + config: TerminalOptions, + size: TerminalSize, + ) -> Result<(PseudoTerminalMaster, PseudoTerminalSlave), Error> { + let master_to_slave = PtyMasterToSlaveHalf::with_capacity(CAPACITY)?; + let slave_to_master = PtySlaveToMasterHalf::with_capacity(CAPACITY)?; + + let pty = Arc::new(Self { + config: IrqSafeRwLock::new(config), + master_to_slave, + slave_to_master, + size: IrqSafeRwLock::new(size), + }); + + let master = PseudoTerminalMaster { pty: pty.clone() }; + let slave = PseudoTerminalSlave { pty }; + + Ok((master, slave)) + } + + fn putc_from_slave(&self, byte: u8) { + let config = self.config.read(); + self.slave_to_master.handle_input(byte, &config.output) + } + + fn putc_from_master(&self, mut byte: u8) { + let config = self.config.read(); + + let mut buffer = self.master_to_slave.buffer.lock(); + + if byte == b'\r' && config.input.contains(TerminalInputOptions::CR_TO_NL) { + byte = b'\n'; + } + + if config.is_canonical() { + // Canonical line processing + + // Echo back + if byte == config.chars.erase { + let echo = + buffer.erase_pending() && config.line.contains(TerminalLineOptions::ECHO_ERASE); + + if echo { + for &ch in b"\x1B[D \x1B[D" { + self.slave_to_master.handle_input(ch, &config.output); + } + } + + return; + } else if byte == b'\n' { + // TODO NL_TO_CRNL + if config.is_echo_newline() { + self.slave_to_master.handle_input(byte, &config.output); + } + } else if byte.is_ascii_control() { + if config.line.contains(TerminalLineOptions::ECHO) { + self.slave_to_master.handle_input(b'^', &config.output); + self.slave_to_master + .handle_input(byte + 0x40, &config.output); + } + } else if config.line.contains(TerminalLineOptions::ECHO) { + self.slave_to_master.handle_input(byte, &config.output); + } + + if byte == config.chars.interrupt { + if config.line.contains(TerminalLineOptions::SIGNAL) { + self.slave_to_master.ring.notify_all(); + + if let Some(group_id) = *self.master_to_slave.signal_pgroup.read() { + signal_process_group(group_id, Signal::Interrupted); + self.master_to_slave.ready_ring.notify_all(); + return; + } + } else { + buffer.write_pending(byte); + } + } else { + buffer.write_pending(byte); + } + + if byte == b'\n' || byte == config.chars.eof { + let data = buffer.flush(); + self.master_to_slave.ready_ring.write_multiple(data); + } + } else { + // Raw line processing + self.master_to_slave.ready_ring.write(byte); + } + } + + fn write_from_slave(&self, buf: &[u8]) -> Result { + for &ch in buf { + self.putc_from_slave(ch); + } + Ok(buf.len()) + } + + fn write_from_master(&self, buf: &[u8]) -> Result { + for &ch in buf { + self.putc_from_master(ch); + } + Ok(buf.len()) + } + + fn read_from_slave(&self, buf: &mut [u8]) -> Result { + self.slave_to_master.read(buf) + } + + fn read_from_master(&self, buf: &mut [u8]) -> Result { + let eof = { + let config = self.config.read(); + config.is_canonical().then_some(config.chars.eof) + }; + self.master_to_slave.read(buf, eof) + } + + fn poll_from_slave(&self, cx: &mut Context<'_>) -> Poll> { + self.slave_to_master.poll_readable(cx) + } + + fn poll_from_master(&self, cx: &mut Context<'_>) -> Poll> { + self.master_to_slave.poll_readable(cx) + } + + fn close_master(&self) { + let config = self.config.read(); + self.master_to_slave.ready_ring.write(config.chars.eof); + } + + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + match req { + DeviceRequest::SetTerminalGroup(group_id) => { + log::info!("SetTerminalGroup {}", group_id); + self.master_to_slave + .signal_pgroup + .write() + .replace(*group_id); + Ok(()) + } + DeviceRequest::GetTerminalOptions(options) => { + options.write(*self.config.read()); + Ok(()) + } + DeviceRequest::SetTerminalOptions(options) => { + self.master_to_slave.flush(); + *self.config.write() = *options; + Ok(()) + } + DeviceRequest::GetTerminalSize(size) => { + size.write(*self.size.read()); + Ok(()) + } + DeviceRequest::SetTerminalSize(size) => { + // TODO SIGWINCH? + // TODO validate + *self.size.write() = *size; + Ok(()) + } + _ => Err(Error::InvalidOperation), + } + } +} + +impl PseudoTerminalSlave { + /// Reads from the master-to-slave half of the PTY + pub fn read(&self, buf: &mut [u8]) -> Result { + self.pty.read_from_master(buf) + } + + /// Writes to the slave-to-master half of the PTY + pub fn write(&self, buf: &[u8]) -> Result { + self.pty.write_from_slave(buf) + } + + /// Polls PTY read readiness + pub fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.pty.poll_from_master(cx) + } + + /// Performs a device-specific request to the PTY + pub fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + self.pty.device_request(req) + } +} + +impl PseudoTerminalMaster { + /// Reads from the slave-to-master half of the PTY + pub fn read(&self, buf: &mut [u8]) -> Result { + self.pty.read_from_slave(buf) + } + + /// Writes to the master-to-slave half of the PTY + pub fn write(&self, buf: &[u8]) -> Result { + self.pty.write_from_master(buf) + } + + /// Polls PTY read readiness + pub fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + self.pty.poll_from_slave(cx) + } + + /// Performs a device-specific request to the PTY + pub fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + self.pty.device_request(req) + } +} + +impl Drop for PseudoTerminalMaster { + fn drop(&mut self) { + self.pty.close_master(); + } +} + +impl Drop for PseudoTerminalSlave { + fn drop(&mut self) { + self.pty + .slave_to_master + .shutdown + .store(true, Ordering::Release); + self.pty.slave_to_master.ring.notify_all(); + } +} diff --git a/kernel/lib/vfs/src/shared_memory.rs b/kernel/lib/vfs/src/shared_memory.rs new file mode 100644 index 00000000..e852a9a7 --- /dev/null +++ b/kernel/lib/vfs/src/shared_memory.rs @@ -0,0 +1,53 @@ +use core::mem::MaybeUninit; + +use alloc::vec::Vec; +use libk_mm::{ + address::{AsPhysicalAddress, PhysicalAddress}, + table::MapAttributes, + PageBox, PageProvider, +}; +use yggdrasil_abi::error::Error; + +/// Shared memory VFS object +pub struct SharedMemory { + pages: Vec>>, +} + +impl SharedMemory { + /// Creates a new buffer of shared memory + pub fn new(size: usize) -> Result { + assert_eq!(size & 0xFFF, 0); + let page_count = size / 0x1000; + + let pages = (0..page_count) + .map(|_| PageBox::new_uninit()) + .collect::>()?; + + Ok(Self { pages }) + } +} + +impl PageProvider for SharedMemory { + fn get_page(&self, offset: u64) -> Result { + // TODO: magic numbers + let index = (offset / 0x1000) as usize; + self.pages + .get(index) + .map(|bx| unsafe { bx.as_physical_address() }) + .ok_or(Error::InvalidMemoryOperation) + } + + fn release_page(&self, _offset: u64, _phys: PhysicalAddress) -> Result<(), Error> { + // TODO track get/release? + Ok(()) + } + + fn clone_page( + &self, + _offset: u64, + _src_phys: PhysicalAddress, + _src_attrs: MapAttributes, + ) -> Result { + todo!() + } +} diff --git a/kernel/lib/vfs/src/socket.rs b/kernel/lib/vfs/src/socket.rs new file mode 100644 index 00000000..86c72412 --- /dev/null +++ b/kernel/lib/vfs/src/socket.rs @@ -0,0 +1,102 @@ +use core::ops::Deref; + +use alloc::sync::Arc; +use yggdrasil_abi::{ + error::Error, + net::{SocketAddr, SocketOption}, +}; + +use crate::FileReadiness; + +/// Interface for interacting with network sockets +#[allow(unused)] +pub trait Socket: FileReadiness + Send { + /// Socket listen/receive address + fn local_address(&self) -> SocketAddr; + + /// Socket remote address + fn remote_address(&self) -> Option; + + /// Closes a socket + fn close(&self) -> Result<(), Error>; + + /// Updates a socket option + fn set_option(&self, option: &SocketOption) -> Result<(), Error> { + Err(Error::InvalidOperation) + } + + /// Gets a socket option + fn get_option(&self, option: &mut SocketOption) -> Result<(), Error> { + Err(Error::InvalidOperation) + } +} + +/// Stateless/packet-based socket interface +pub trait PacketSocket: Socket { + /// Receives a packet into provided buffer. Will return an error if packet cannot be placed + /// within the buffer. + fn receive(&self, buffer: &mut [u8]) -> Result<(SocketAddr, usize), Error>; + + /// Sends provided data to the recepient specified by `destination` + fn send(&self, destination: Option, data: &[u8]) -> Result; +} + +/// Connection-based client socket interface +pub trait ConnectionSocket: Socket { + /// Receives data into provided buffer + fn receive(&self, buffer: &mut [u8]) -> Result; + + /// Transmits data + fn send(&self, data: &[u8]) -> Result; +} +/// Connection-based listener socket interface +pub trait ListenerSocket: Socket { + /// Blocks the execution until an incoming connection is accepted + fn accept(&self) -> Result<(SocketAddr, Arc), Error>; +} + +pub struct PacketSocketWrapper(pub Arc); +pub struct ListenerSocketWrapper(pub Arc); +pub struct ConnectionSocketWrapper(pub Arc); + +impl Deref for PacketSocketWrapper { + type Target = dyn PacketSocket; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl Drop for PacketSocketWrapper { + fn drop(&mut self) { + self.0.close().ok(); + } +} + +impl Deref for ListenerSocketWrapper { + type Target = dyn ListenerSocket; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl Drop for ListenerSocketWrapper { + fn drop(&mut self) { + self.0.close().ok(); + } +} + +impl Deref for ConnectionSocketWrapper { + type Target = dyn ConnectionSocket; + + fn deref(&self) -> &Self::Target { + self.0.as_ref() + } +} + +impl Drop for ConnectionSocketWrapper { + fn drop(&mut self) { + self.0.close().ok(); + } +} diff --git a/kernel/lib/vfs/src/timer.rs b/kernel/lib/vfs/src/timer.rs new file mode 100644 index 00000000..b59e8d08 --- /dev/null +++ b/kernel/lib/vfs/src/timer.rs @@ -0,0 +1,63 @@ +use core::{ + mem::size_of, + task::{Context, Poll}, + time::Duration, +}; + +use alloc::boxed::Box; + +use futures_util::FutureExt; +use libk_thread::{ + runtime::{self, SleepFuture}, + sync::Mutex, +}; +use libk_util::sync::LockMethod; +use yggdrasil_abi::error::Error; + +use crate::{FileReadiness, Write}; + +/// File-like structure to generate periodic or one-shit events at certain intervals +pub struct TimerFile { + inner: Box>>, + _repeat: bool, +} + +impl TimerFile { + /// Creates a new inert timer + pub fn new(repeat: bool) -> Self { + Self { + _repeat: repeat, + inner: Box::new(Mutex::new(None)), + } + } +} + +impl FileReadiness for TimerFile { + fn poll_read(&self, cx: &mut Context<'_>) -> Poll> { + // TODO repeat function + let mut lock = self.inner.lock()?; + let future = lock.as_mut().ok_or(Error::InvalidOperation)?; + future.poll_unpin(cx).map(Ok) + } +} + +impl Write for TimerFile { + fn write(&self, buf: &[u8]) -> Result { + if buf.len() != size_of::() { + return Err(Error::InvalidArgument); + } + let mut bytes = [0; 16]; + bytes.copy_from_slice(buf); + let tval = u128::from_ne_bytes(bytes); + + let mut lock = self.inner.lock()?; + if tval == 0 { + *lock = None; + } else { + let duration = Duration::from_micros(tval.try_into().unwrap()); + *lock = Some(runtime::sleep(duration)); + } + + Ok(buf.len()) + } +} diff --git a/kernel/lib/vfs/src/traits.rs b/kernel/lib/vfs/src/traits.rs new file mode 100644 index 00000000..9b9e6850 --- /dev/null +++ b/kernel/lib/vfs/src/traits.rs @@ -0,0 +1,11 @@ +use core::task::{Context, Poll}; + +use yggdrasil_abi::error::Error; + +pub use libk_util::io::{Read, Seek, Write}; + +/// Interface for polling files for events +pub trait FileReadiness: Sync { + /// Polls file's "read-ready" status + fn poll_read(&self, cx: &mut Context<'_>) -> Poll>; +} diff --git a/kernel/lib/vmalloc/Cargo.toml b/kernel/lib/vmalloc/Cargo.toml new file mode 100644 index 00000000..b4ec8bc5 --- /dev/null +++ b/kernel/lib/vmalloc/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "vmalloc" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } + +discrete_range_map = { git = "https://git.alnyan.me/yggdrasil/discrete_range_map.git" } + +[dev-dependencies] +itertools = "0.11.0" +proptest = "1.2.0" diff --git a/kernel/lib/vmalloc/src/allocator.rs b/kernel/lib/vmalloc/src/allocator.rs new file mode 100644 index 00000000..43398b25 --- /dev/null +++ b/kernel/lib/vmalloc/src/allocator.rs @@ -0,0 +1,349 @@ +use core::cmp::Ordering; + +use alloc::collections::{linked_list::CursorMut, LinkedList}; +use yggdrasil_abi::error::Error; + +use crate::VirtualMemoryRange; + +#[derive(PartialEq, Clone, Debug, Copy)] +struct AllocatorNode { + range: VirtualMemoryRange, + used: bool, +} + +impl PartialOrd for AllocatorNode { + fn partial_cmp(&self, other: &Self) -> Option { + self.range.start_pfn.partial_cmp(&other.range.start_pfn) + } +} + +pub struct TreeAllocator { + ranges: LinkedList, +} + +impl AllocatorNode { + pub const fn free(start_pfn: usize, end_pfn: usize) -> Self { + Self { + range: VirtualMemoryRange { start_pfn, end_pfn }, + used: false, + } + } + + #[cfg(test)] + pub const fn used(start_pfn: usize, end_pfn: usize) -> Self { + Self { + range: VirtualMemoryRange { start_pfn, end_pfn }, + used: true, + } + } + + #[inline] + pub const fn pfn_count(&self) -> usize { + self.range.end_pfn - self.range.start_pfn + } +} + +impl TreeAllocator { + pub fn new(start_pfn: usize, end_pfn: usize) -> Self { + let mut ranges = LinkedList::new(); + ranges.push_back(AllocatorNode::free(start_pfn, end_pfn)); + Self { ranges } + } + + fn find_region_mut bool>(&mut self, f: F) -> CursorMut { + let mut cursor = self.ranges.cursor_front_mut(); + while let Some(range) = cursor.current() { + if f(range) { + break; + } + cursor.move_next(); + } + cursor + } + + fn coalesce_regions(&mut self) { + let mut cursor = self.ranges.cursor_front_mut(); + + loop { + let Some(&mut next) = cursor.peek_next() else { + break; + }; + let current = cursor.current().unwrap(); + + if current.used == next.used { + debug_assert_eq!(current.range.end_pfn, next.range.start_pfn); + current.range.end_pfn = next.range.end_pfn; + + cursor.move_next(); + cursor.remove_current(); + cursor.move_prev(); + } else { + cursor.move_next(); + } + } + } + + fn set_range( + &mut self, + start_pfn: usize, + pfn_count: usize, + old_state: bool, + new_state: bool, + ) -> Result<(), Error> { + let insert = VirtualMemoryRange { + start_pfn, + end_pfn: start_pfn + pfn_count, + }; + let mut cursor = self.find_region_mut(|r| r.used == old_state && r.range.contains(&insert)); + let range = cursor.current().ok_or(Error::AlreadyExists)?; + + let start_pfn = range.range.start_pfn; + let end_pfn = range.range.end_pfn; + + match (insert.start_pfn == start_pfn, insert.end_pfn == end_pfn) { + // No split + (true, true) => { + range.used = new_state; + } + // Split start + (true, false) => { + range.used = new_state; + range.range.end_pfn = insert.end_pfn; + + cursor.insert_after(AllocatorNode { + range: VirtualMemoryRange { + start_pfn: insert.end_pfn, + end_pfn, + }, + used: old_state, + }); + } + // Split end + (false, true) => { + range.range.end_pfn = insert.start_pfn; + + cursor.insert_after(AllocatorNode { + range: VirtualMemoryRange { + start_pfn: insert.start_pfn, + end_pfn, + }, + used: new_state, + }); + } + // Split in the middle + (false, false) => { + range.range = insert; + range.used = new_state; + + cursor.insert_after(AllocatorNode { + range: VirtualMemoryRange { + start_pfn: insert.end_pfn, + end_pfn, + }, + used: old_state, + }); + cursor.insert_before(AllocatorNode { + range: VirtualMemoryRange { + start_pfn, + end_pfn: insert.start_pfn, + }, + used: old_state, + }); + } + } + + self.coalesce_regions(); + + Ok(()) + } + + pub fn insert(&mut self, start_pfn: usize, pfn_count: usize) -> Result<(), Error> { + self.set_range(start_pfn, pfn_count, false, true) + } + + pub fn free(&mut self, start_pfn: usize, pfn_count: usize) -> Result<(), Error> { + self.set_range(start_pfn, pfn_count, true, false) + } + + pub fn allocate(&mut self, pfn_count: usize) -> Option { + let mut cursor = self.find_region_mut(|r| !r.used && r.pfn_count() >= pfn_count); + let range = cursor.current()?; + + let start_pfn = range.range.start_pfn; + let end_pfn = range.range.end_pfn; + + range.used = true; + + if range.pfn_count() > pfn_count { + range.range.end_pfn = start_pfn + pfn_count; + + // Split the range + cursor.insert_after(AllocatorNode::free(start_pfn + pfn_count, end_pfn)); + } + + self.coalesce_regions(); + + Some(start_pfn) + } + + pub fn ranges(&self) -> impl Iterator + '_ { + self.ranges.iter().map(|r| (r.used, r.range)) + } +} + +#[cfg(test)] +mod tests { + use alloc::collections::LinkedList; + + use super::{AllocatorNode, TreeAllocator}; + + extern crate std; + + #[test] + fn deallocation() { + let ranges = LinkedList::from_iter([ + AllocatorNode::free(0, 12), + AllocatorNode::used(12, 24), + AllocatorNode::free(24, 32), + AllocatorNode::used(32, 64), + AllocatorNode::free(64, 128), + ]); + let mut alloc = TreeAllocator { ranges }; + + // No-split dealloc + assert_eq!(alloc.free(12, 12), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::free(0, 32), + AllocatorNode::used(32, 64), + AllocatorNode::free(64, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split at the start dealloc + assert_eq!(alloc.free(32, 8), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::free(0, 40), + AllocatorNode::used(40, 64), + AllocatorNode::free(64, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split at the end dealloc + assert_eq!(alloc.free(56, 8), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::free(0, 40), + AllocatorNode::used(40, 56), + AllocatorNode::free(56, 128), + ]); + + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split in the middle + assert_eq!(alloc.free(42, 4), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::free(0, 40), + AllocatorNode::used(40, 42), + AllocatorNode::free(42, 46), + AllocatorNode::used(46, 56), + AllocatorNode::free(56, 128), + ]); + + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Whole region free + assert_eq!(alloc.free(40, 2), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::free(0, 46), + AllocatorNode::used(46, 56), + AllocatorNode::free(56, 128), + ]); + + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + assert_eq!(alloc.free(46, 10), Ok(())); + let expected = LinkedList::from_iter([AllocatorNode::free(0, 128)]); + + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + } + + #[test] + fn allocation() { + let ranges = LinkedList::from_iter([ + AllocatorNode::free(0, 12), + AllocatorNode::used(12, 24), + AllocatorNode::free(24, 32), + AllocatorNode::used(32, 64), + ]); + let mut alloc = TreeAllocator { ranges }; + + // Non-splitting allocation + assert_eq!(alloc.allocate(12), Some(0)); + + // Splitting allocation + assert_eq!(alloc.allocate(4), Some(24)); + + // Non-splitting allocation + assert_eq!(alloc.allocate(4), Some(28)); + + // Out of memory + assert_eq!(alloc.allocate(1), None); + + let expected = LinkedList::from_iter([AllocatorNode::used(0, 64)]); + + itertools::assert_equal(alloc.ranges, expected); + } + + #[test] + fn insertion() { + let ranges = LinkedList::from_iter([ + AllocatorNode::free(0, 12), + AllocatorNode::used(12, 24), + AllocatorNode::free(24, 32), + AllocatorNode::used(32, 64), + AllocatorNode::free(64, 128), + ]); + let mut alloc = TreeAllocator { ranges }; + + // No split + assert_eq!(alloc.insert(0, 12), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::used(0, 24), + AllocatorNode::free(24, 32), + AllocatorNode::used(32, 64), + AllocatorNode::free(64, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split at the start + assert_eq!(alloc.insert(24, 4), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::used(0, 28), + AllocatorNode::free(28, 32), + AllocatorNode::used(32, 64), + AllocatorNode::free(64, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split at the end + assert_eq!(alloc.insert(30, 2), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::used(0, 28), + AllocatorNode::free(28, 30), + AllocatorNode::used(30, 64), + AllocatorNode::free(64, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + + // Split in the middle + assert_eq!(alloc.insert(72, 16), Ok(())); + let expected = LinkedList::from_iter([ + AllocatorNode::used(0, 28), + AllocatorNode::free(28, 30), + AllocatorNode::used(30, 64), + AllocatorNode::free(64, 72), + AllocatorNode::used(72, 88), + AllocatorNode::free(88, 128), + ]); + itertools::assert_equal(alloc.ranges.iter(), expected.iter()); + } +} diff --git a/kernel/lib/vmalloc/src/lib.rs b/kernel/lib/vmalloc/src/lib.rs new file mode 100644 index 00000000..716265ea --- /dev/null +++ b/kernel/lib/vmalloc/src/lib.rs @@ -0,0 +1,114 @@ +//! Virtual memory allocator for the Yggdrasil kernel. +//! +//! The allocator uses a [DiscreteRangeMap] to track the memory regions and allows attaching +//! metadata values to each region. "Touching" region coalescing is enabled through the [Eq] trait +//! implemented for [RangeData]. + +#![deny(missing_docs)] +#![no_std] +#![feature(linked_list_cursors, let_chains, btree_extract_if)] + +extern crate alloc; + +use core::ops::Range; + +use discrete_range_map::{DiscreteRangeMap, InclusiveInterval, InclusiveRange}; +use yggdrasil_abi::error::Error; + +#[cfg(target_pointer_width = "64")] +type PfnIndex = u64; + +/// Metadata associated with an allocated memory region. The [Eq] trait is used to coalesce "equal" +/// regions if they "touch". +pub trait RangeData: Eq + Clone {} + +fn ie(from: PfnIndex, to: PfnIndex) -> InclusiveInterval { + InclusiveInterval::from(from..to) +} + +/// Main virtual memory allocator +#[derive(Clone)] +pub struct VirtualMemoryAllocator { + map: DiscreteRangeMap, D>, + outer_range: InclusiveInterval, +} + +impl VirtualMemoryAllocator { + /// Creates a new virtual memory allocator, bounded by `lower_limit_pfn..upper_limit_pfn` + pub fn new(lower_limit_pfn: usize, upper_limit_pfn: usize) -> Self { + Self { + map: DiscreteRangeMap::new(), + outer_range: ie(lower_limit_pfn as _, upper_limit_pfn as _), + } + } + + /// Returns an iterator over the regions within the allocator + pub fn regions(&self) -> impl Iterator, &D)> { + self.map.iter().map(|(range, data)| { + let range = range.start() as usize..range.end() as usize + 1; + (range, data) + }) + } + + /// Allocates a contiguous range of virtual address space and associates metadata with it + pub fn allocate(&mut self, page_count: usize, data: D) -> Result { + let start_pfn = self + .map + .gaps_trimmed(self.outer_range) + .find_map(|range| { + if range.size() >= page_count as _ { + Some(range.start() as usize) + } else { + None + } + }) + .ok_or(Error::OutOfMemory)?; + + // Should not fail + self.insert(start_pfn, page_count, data)?; + + Ok(start_pfn) + } + + /// Tries to insert given PF# range with its associated metadata as allocated memory, + /// returning [Error] if requested range overlaps any existing allocated ranges + pub fn insert(&mut self, start_pfn: usize, page_count: usize, data: D) -> Result<(), Error> { + let end_pfn = (start_pfn + page_count) as PfnIndex; + let start_pfn = start_pfn as PfnIndex; + + self.map + .insert_merge_touching_if_values_equal(ie(start_pfn, end_pfn), data) + .map_err(|_| Error::AlreadyExists)?; + + Ok(()) + } + + /// Releases any pages overlapping the requested range, calling `release` on the ranges + pub fn free, D) -> Result<(), Error>>( + &mut self, + start_pfn: usize, + page_count: usize, + mut release: F, + ) -> Result<(), Error> { + let end_pfn = (start_pfn + page_count) as PfnIndex; + let start_pfn = start_pfn as PfnIndex; + + self.map + .cut_with_origin(ie(start_pfn, end_pfn)) + .try_for_each(|(origin, range, data)| { + let range = range.start() as usize..range.end() as usize + 1; + release(origin.start() as _, range, data) + }) + } + + /// Removes all allocations, invoking a function on each of them + pub fn clear, D) -> Result<(), Error>>( + &mut self, + mut release: F, + ) -> Result<(), Error> { + self.map.drain().try_for_each(|(range, data)| { + let range = range.start() as usize..range.end() as usize + 1; + release(range, data) + }) + } +} diff --git a/kernel/libk/Cargo.toml b/kernel/libk/Cargo.toml new file mode 100644 index 00000000..d21f44a3 --- /dev/null +++ b/kernel/libk/Cargo.toml @@ -0,0 +1,21 @@ +[package] +name = "libk" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libk-mm = { path = "libk-mm" } +libk-util = { path = "libk-util" } +libk-thread = { path = "libk-thread" } +libk-device = { path = "libk-device" } +kernel-arch = { path = "../arch" } + +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../lib/device-api", features = ["derive"] } + +log = "0.4.20" +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } +crossbeam-queue = { version = "0.3.8", default-features = false, features = ["alloc"] } diff --git a/kernel/libk/libk-device/Cargo.toml b/kernel/libk/libk-device/Cargo.toml new file mode 100644 index 00000000..88be578e --- /dev/null +++ b/kernel/libk/libk-device/Cargo.toml @@ -0,0 +1,13 @@ +[package] +name = "libk-device" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libk-util = { path = "../libk-util" } +kernel-arch = { path = "../../arch" } + +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +device-api = { path = "../../lib/device-api", features = ["derive"] } diff --git a/kernel/libk/libk-device/src/lib.rs b/kernel/libk/libk-device/src/lib.rs new file mode 100644 index 00000000..7feca539 --- /dev/null +++ b/kernel/libk/libk-device/src/lib.rs @@ -0,0 +1,70 @@ +#![no_std] + +use core::time::Duration; + +use device_api::{ + interrupt::{ + ExternalInterruptController, InterruptHandler, Irq, IrqOptions, LocalInterruptController, + MessageInterruptController, + }, + timer::MonotonicTimestampProviderDevice, +}; +use kernel_arch::{Architecture, ArchitectureImpl}; +use libk_util::OneTimeInit; +use yggdrasil_abi::error::Error; + +macro_rules! register_get { + ($register_name:ident, $get_name:ident, $global:ident, $ty:ty) => { + static $global: OneTimeInit<$ty> = OneTimeInit::new(); + + pub fn $register_name(intc: $ty) { + $global.init(intc); + } + + pub fn $get_name() -> $ty { + *$global.get() + } + }; +} + +register_get!( + register_external_interrupt_controller, + external_interrupt_controller, + EXTERNAL_INTC, + &'static dyn ExternalInterruptController +); + +register_get!( + register_monotonic_timestamp_provider, + monotonic_timestamp_provider, + MONOTONIC_TIMER, + &'static dyn MonotonicTimestampProviderDevice +); + +pub fn local_interrupt_controller() -> &'static dyn LocalInterruptController { + ArchitectureImpl::local_interrupt_controller() +} + +pub fn message_interrupt_controller() -> &'static dyn MessageInterruptController { + ArchitectureImpl::message_interrupt_controller() +} + +pub fn monotonic_timestamp() -> Result { + monotonic_timestamp_provider().monotonic_timestamp() +} + +#[inline] +pub fn register_global_interrupt( + irq: u32, + options: IrqOptions, + handler: &'static dyn InterruptHandler, +) -> Result<(), Error> { + let intc = external_interrupt_controller(); + + let irq = Irq::External(irq); + + intc.register_irq(irq, options, handler)?; + intc.enable_irq(irq)?; + + Ok(()) +} diff --git a/kernel/libk/libk-mm/Cargo.toml b/kernel/libk/libk-mm/Cargo.toml new file mode 100644 index 00000000..5536289a --- /dev/null +++ b/kernel/libk/libk-mm/Cargo.toml @@ -0,0 +1,15 @@ +[package] +name = "libk-mm" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +kernel-arch = { path = "../../arch" } +libk-util = { path = "../libk-util" } +libk-mm-interface = { path = "interface" } +vmalloc = { path = "../../lib/vmalloc" } + +log = "0.4.20" diff --git a/kernel/libk/libk-mm/interface/Cargo.toml b/kernel/libk/libk-mm/interface/Cargo.toml new file mode 100644 index 00000000..43cf71cb --- /dev/null +++ b/kernel/libk/libk-mm/interface/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "libk-mm-interface" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } + +kernel-arch-interface = { path = "../../../arch/interface" } + +bitflags = "2.3.3" +bytemuck = { version = "1.14.0", features = ["derive"] } diff --git a/kernel/libk/libk-mm/interface/src/address.rs b/kernel/libk/libk-mm/interface/src/address.rs new file mode 100644 index 00000000..a07ffe72 --- /dev/null +++ b/kernel/libk/libk-mm/interface/src/address.rs @@ -0,0 +1,161 @@ +use core::{ + fmt, + iter::Step, + mem::align_of, + ops::{Add, Sub}, +}; + +use bytemuck::{Pod, Zeroable}; +use kernel_arch_interface::mem::KernelTableManager; + +/// Wrapper type to represent a physical memory address +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Debug, Hash, Pod, Zeroable)] +#[repr(transparent)] +pub struct PhysicalAddress(pub(crate) u64); + +/// Interface for converting addresses from their raw values to more specific types +#[const_trait] +pub trait FromRaw { + /// Converts a raw value into the address wrapper type + fn from_raw(value: T) -> Self; +} + +/// Interface for converting wrapper types into their raw address representations +#[const_trait] +pub trait IntoRaw { + /// Converts a wrapper type value into its raw address + fn into_raw(self) -> T; +} + +/// Interface for obtaining physical addresses of values +pub trait AsPhysicalAddress { + /// Returns the value's physical address. + /// + /// # Safety + /// + /// The caller must ensure the value has been constructed and obtained through proper means. + unsafe fn as_physical_address(&self) -> PhysicalAddress; +} + +impl PhysicalAddress { + /// Physical address of zero + pub const ZERO: Self = Self(0); + + /// Maximum representable physical address + pub const MAX: Self = Self(u64::MAX); + /// Minumum representable physical address + pub const MIN: Self = Self(u64::MIN); + + /// Applies an offset to the address + pub const fn add(self, offset: usize) -> Self { + Self(self.0 + offset as u64) + } + + /// Returns `true` if the address is zero + #[inline(always)] + pub const fn is_zero(self) -> bool { + self.0 == 0 + } + + /// Returns `true` if the address is aligned to a boundary of a page at level `L` + #[inline] + pub const fn is_aligned_for(self) -> bool { + self.0 as usize % align_of::() == 0 + } + + /// Converts a previously virtualized physical address back into its physical form. + /// + /// # Safety + /// + /// The caller must ensure the function only receives addresses obtained through + /// [PhysicalAddress::virtualize_raw] or + /// [super::pointer::PhysicalRef]/[super::pointer::PhysicalRefMut] facilities. + #[inline] + pub unsafe fn raw_from_virtualized(address: usize) -> Self { + PhysicalAddress(K::physicalize(address)) + } + + /// Converts the physical address to a virtual one + #[inline] + pub fn raw_virtualize(self) -> usize { + K::virtualize(self.0) + } +} + +impl Add for PhysicalAddress { + type Output = Self; + + fn add(self, rhs: Self) -> Self::Output { + Self(self.0 + rhs.0) + } +} + +impl Sub for PhysicalAddress { + type Output = usize; + + fn sub(self, rhs: Self) -> Self::Output { + (self.0 - rhs.0) as usize + } +} + +// Conversions + +impl const FromRaw for PhysicalAddress { + fn from_raw(value: u64) -> Self { + Self(value) + } +} + +impl const FromRaw for PhysicalAddress { + fn from_raw(value: usize) -> Self { + Self(value as u64) + } +} + +impl const IntoRaw for PhysicalAddress { + fn into_raw(self) -> u64 { + self.0 + } +} + +impl const IntoRaw for PhysicalAddress { + fn into_raw(self) -> usize { + self.0 as usize + } +} + +impl From for u64 { + fn from(addr: PhysicalAddress) -> u64 { + addr.0 + } +} + +impl From for usize { + fn from(addr: PhysicalAddress) -> usize { + addr.0 as usize + } +} + +// Ranges + +impl Step for PhysicalAddress { + fn steps_between(_start: &Self, _end: &Self) -> Option { + todo!() + } + + fn forward_checked(start: Self, count: usize) -> Option { + start.0.checked_add(count as u64).map(Self) + } + + fn backward_checked(_start: Self, _count: usize) -> Option { + todo!() + } +} + +// fmt + +impl fmt::LowerHex for PhysicalAddress { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::LowerHex::fmt(&self.0, f) + } +} diff --git a/kernel/libk/libk-mm/interface/src/lib.rs b/kernel/libk/libk-mm/interface/src/lib.rs new file mode 100644 index 00000000..55d6ea10 --- /dev/null +++ b/kernel/libk/libk-mm/interface/src/lib.rs @@ -0,0 +1,51 @@ +#![no_std] +#![feature(step_trait, const_trait_impl, effects, strict_provenance)] + +use core::ops::{Deref, DerefMut}; + +use address::{AsPhysicalAddress, FromRaw, PhysicalAddress}; +use kernel_arch_interface::KERNEL_VIRT_OFFSET; + +pub mod address; +pub mod pointer; +pub mod process; +pub mod table; + +/// Wrapper type to represent an object residing within the kernel +#[repr(transparent)] +pub struct KernelImageObject { + inner: T, +} + +// KernelImageObject wrapper for objects inside the kernel + +impl KernelImageObject { + /// Wraps a value in the [KernelImageObject], allowing its physical address calculation. + /// + /// # Safety + /// + /// The caller must ensure the T is a `static (mut)` binding inside the kernel. + pub const unsafe fn new(inner: T) -> Self { + Self { inner } + } +} + +impl AsPhysicalAddress for KernelImageObject { + unsafe fn as_physical_address(&self) -> PhysicalAddress { + PhysicalAddress::from_raw(&self.inner as *const _ as usize - KERNEL_VIRT_OFFSET) + } +} + +impl Deref for KernelImageObject { + type Target = T; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +impl DerefMut for KernelImageObject { + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.inner + } +} diff --git a/kernel/libk/libk-mm/interface/src/pointer.rs b/kernel/libk/libk-mm/interface/src/pointer.rs new file mode 100644 index 00000000..0efea751 --- /dev/null +++ b/kernel/libk/libk-mm/interface/src/pointer.rs @@ -0,0 +1,163 @@ +use core::{ + fmt, + marker::PhantomData, + ops::{Deref, DerefMut}, +}; + +use kernel_arch_interface::mem::KernelTableManager; + +use crate::address::{AsPhysicalAddress, PhysicalAddress}; + +/// Wrapper for immutably accessing a value at a physical address +#[repr(transparent)] +pub struct PhysicalRef<'a, T: ?Sized, K: KernelTableManager> { + value: &'a T, + _pd: PhantomData, +} + +/// Wrapper for mutably accessing a value at a physical address +#[repr(transparent)] +pub struct PhysicalRefMut<'a, T: ?Sized, K: KernelTableManager> { + value: &'a mut T, + _pd: PhantomData, +} + +// PhysicalRefMut wrapper for safe mutable access to physical addresses + +impl<'a, T: Sized, K: KernelTableManager> PhysicalRefMut<'a, T, K> { + /// Maps a physical address into the kernel space as &mut T, allowing mmutable access to it. + /// + /// # Safety + /// + /// The caller must ensure the correct origin of the physical address as well that it actually + /// contains T. The caller must also take care of access synchronization and make sure no + /// aliasing occurs. + pub unsafe fn map(physical: PhysicalAddress) -> PhysicalRefMut<'a, T, K> { + let value = virtualize_raw::<_, K>(physical); + PhysicalRefMut { + value, + _pd: PhantomData, + } + } + + /// Maps a physical address into the kernel space as &mut [T], allowing mmutable access to it. + /// + /// # Safety + /// + /// The caller must ensure the correct origin of the physical address as well that it actually + /// contains [T; len]. The caller must also take care of access synchronization and make + /// sure no aliasing occurs. + pub unsafe fn map_slice(physical: PhysicalAddress, len: usize) -> PhysicalRefMut<'a, [T], K> { + let value = virtualize_slice_raw::<_, K>(physical, len); + PhysicalRefMut { + value, + _pd: PhantomData, + } + } +} + +impl PhysicalRefMut<'_, T, K> { + /// Returns the "address" part of the reference + #[inline] + pub fn as_address(&self) -> usize { + (self.value as *const T).addr() + } +} + +impl AsPhysicalAddress for PhysicalRefMut<'_, T, K> { + unsafe fn as_physical_address(&self) -> PhysicalAddress { + PhysicalAddress::raw_from_virtualized::(PhysicalRefMut::::as_address(self)) + } +} + +impl Deref for PhysicalRefMut<'_, T, K> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +impl DerefMut for PhysicalRefMut<'_, T, K> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.value + } +} + +impl fmt::Pointer for PhysicalRefMut<'_, T, K> { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&self.value, f) + } +} + +// PhysicalRef: same as PhysicalRefMut, except immutable + +impl<'a, T: Sized, K: KernelTableManager> PhysicalRef<'a, T, K> { + /// Maps a physical address into the kernel space as &T, allowing immutable access to it. + /// + /// # Safety + /// + /// The caller must ensure the correct origin of the physical address as well that it actually + /// contains T. + pub unsafe fn map(physical: PhysicalAddress) -> PhysicalRef<'a, T, K> { + let value = virtualize_raw::<_, K>(physical); + PhysicalRef { + value, + _pd: PhantomData, + } + } + + /// Maps a physical address into the kernel space as &[T] of given len, allowing immutable + /// access to it. + /// + /// # Safety + /// + /// The caller must ensure the correct origin of the physical address as well that it actually + /// contains [T; len]. + pub unsafe fn map_slice(physical: PhysicalAddress, len: usize) -> PhysicalRef<'a, [T], K> { + let value = virtualize_slice_raw::<_, K>(physical, len); + PhysicalRef { + value, + _pd: PhantomData, + } + } +} + +impl PhysicalRef<'_, T, K> { + /// Returns the "address" part of the reference + #[inline] + pub fn as_address(&self) -> usize { + (self.value as *const T).addr() + } +} + +impl AsPhysicalAddress for PhysicalRef<'_, T, K> { + unsafe fn as_physical_address(&self) -> PhysicalAddress { + PhysicalAddress::raw_from_virtualized::(PhysicalRef::::as_address(self)) + } +} + +impl Deref for PhysicalRef<'_, T, K> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +unsafe fn virtualize_raw<'a, T: Sized, K: KernelTableManager>( + physical: PhysicalAddress, +) -> &'a mut T { + // TODO check align + let address = physical.raw_virtualize::(); + &mut *(address as *mut T) +} + +unsafe fn virtualize_slice_raw<'a, T: Sized, K: KernelTableManager>( + physical: PhysicalAddress, + len: usize, +) -> &'a mut [T] { + // TODO check align + let address = physical.raw_virtualize::(); + core::slice::from_raw_parts_mut(address as *mut T, len) +} diff --git a/kernel/libk/libk-mm/interface/src/process.rs b/kernel/libk/libk-mm/interface/src/process.rs new file mode 100644 index 00000000..e44a68fc --- /dev/null +++ b/kernel/libk/libk-mm/interface/src/process.rs @@ -0,0 +1,47 @@ +use yggdrasil_abi::error::Error; + +use crate::{ + address::PhysicalAddress, + table::{MapAttributes, TableAllocator}, +}; + +/// Interface for virtual memory address space management +pub trait ProcessAddressSpaceManager: Sized { + /// PFN of a minimum address allowed for virtual region allocation + const LOWER_LIMIT_PFN: usize; + /// PFN of a maximum address allowed for virtual region allocation + const UPPER_LIMIT_PFN: usize; + + /// Constructs a new implementation-specific per-process address space + fn new() -> Result; + + /// Places a single PAGE_SIZE mapping into the address space. + /// + /// # Safety + /// + /// The caller must ensure the correct origin of the physical address being mapped. + unsafe fn map_page( + &mut self, + address: usize, + physical: PhysicalAddress, + flags: MapAttributes, + ) -> Result<(), Error>; + + /// Removes a single PAGE_SIZE mapping from the address space. + /// + /// # Safety + /// + /// The caller must ensure the process to which this address space belongs does not and + /// will not access this page. + unsafe fn unmap_page(&mut self, address: usize) -> Result; + + /// Returns the [PhysicalAddress] and [MapAttributes] associated with given virtual `address`, + /// if one is mapped + fn translate(&self, address: usize) -> Result<(PhysicalAddress, MapAttributes), Error>; + + /// Returns the implementation specific physical address of this space, with ASID applied + fn as_address_with_asid(&self) -> u64; + + /// Clears the address space by dropping and non-global tables + unsafe fn clear(&mut self); +} diff --git a/kernel/libk/libk-mm/interface/src/table.rs b/kernel/libk/libk-mm/interface/src/table.rs new file mode 100644 index 00000000..e1f5323a --- /dev/null +++ b/kernel/libk/libk-mm/interface/src/table.rs @@ -0,0 +1,144 @@ +use core::ops::{Deref, DerefMut, Range}; + +use bitflags::bitflags; +use yggdrasil_abi::error::Error; + +use super::address::PhysicalAddress; + +/// Interface for a single level of address translation +pub trait EntryLevel: Copy { + /// The right shift needed to obtain an index of an entry at this level from an address + const SHIFT: usize; + /// The size of a page at this entry level + const SIZE: usize = 1 << Self::SHIFT; +} + +pub trait TableAllocator { + fn allocate_page_table() -> Result; + unsafe fn free_page_table(address: PhysicalAddress); +} + +// TODO EXECUTABLE +bitflags! { + /// Describes how a page translation mapping should behave + #[derive(Clone, Copy)] + pub struct MapAttributes: u64 { + /// The data mapped can be read by the user process + const USER_READ = 1 << 0; + /// The data mapped can be written to by the user process + const USER_WRITE = 1 << 1; + /// The mapping is not global across the address spaces + const NON_GLOBAL = 1 << 2; + } +} + +#[const_trait] +pub trait EntryLevelExt: Sized { + fn page_index(&self) -> usize; + fn page_offset(&self) -> usize; + fn page_count(&self) -> usize; + fn page_align_up(&self) -> Self; + fn page_align_down(&self) -> Self; + fn is_page_aligned_for(&self) -> bool; +} + +#[const_trait] +trait AddressLike: Sized + Copy { + fn into_usize(self) -> usize; + fn from_usize(v: usize) -> Self; +} + +/// Interface for destroying memory translation tables +pub trait EntryLevelDrop { + /// Range covering the whole table + const FULL_RANGE: Range; + + /// Recursively destroys the specified range within the table + unsafe fn drop_range(&mut self, range: Range); + + /// Recursively destroys all the entries in within the table + unsafe fn drop_all(&mut self) { + self.drop_range::(Self::FULL_RANGE) + } +} + +/// Interface for non-terminal tables to retrieve the next level of address translation tables +pub trait NextPageTable { + /// Type for the next-level page table + type NextLevel; + /// Type for an immutable reference to the next-level page table + type TableRef: Deref; + /// Type for a mutable reference to the next-level page table + type TableRefMut: DerefMut; + + /// Tries looking up a next-level table at given index, allocating and mapping one if it is not + /// present there + fn get_mut_or_alloc( + &mut self, + index: usize, + ) -> Result; + /// Returns a mutable reference to a next-level table at `index`, if present + fn get_mut(&mut self, index: usize) -> Option; + /// Returns an immutable reference to a next-level table at `index`, if present + fn get(&self, index: usize) -> Option; +} + +/// Tag trait to mark that the page table level may point to a next-level table +pub trait NonTerminalEntryLevel: EntryLevel { + /// Tag type of the level this entry level may point to + type NextLevel: EntryLevel; +} + +impl const AddressLike for usize { + #[inline(always)] + fn into_usize(self) -> usize { + self + } + + #[inline(always)] + fn from_usize(v: usize) -> Self { + v + } +} + +impl const AddressLike for PhysicalAddress { + fn from_usize(v: usize) -> Self { + Self(v as _) + } + + fn into_usize(self) -> usize { + self.0 as _ + } +} + +impl const EntryLevelExt for T { + #[inline(always)] + fn page_index(&self) -> usize { + (self.into_usize() >> L::SHIFT) & 0x1FF + } + + #[inline(always)] + fn page_offset(&self) -> usize { + self.into_usize() & (L::SIZE - 1) + } + + #[inline(always)] + fn page_count(&self) -> usize { + (self.into_usize() + L::SIZE - 1) / L::SIZE + } + + #[inline(always)] + fn page_align_up(&self) -> Self { + Self::from_usize((self.into_usize() + L::SIZE - 1) & !(L::SIZE - 1)) + } + + #[inline(always)] + fn page_align_down(&self) -> Self { + Self::from_usize(self.into_usize() & !(L::SIZE - 1)) + } + + #[inline(always)] + fn is_page_aligned_for(&self) -> bool { + self.page_offset::() == 0 + } +} diff --git a/kernel/libk/libk-mm/src/address.rs b/kernel/libk/libk-mm/src/address.rs new file mode 100644 index 00000000..04994a98 --- /dev/null +++ b/kernel/libk/libk-mm/src/address.rs @@ -0,0 +1,17 @@ +use kernel_arch::{mem::KernelTableManager, KernelTableManagerImpl}; +pub use libk_mm_interface::address::{AsPhysicalAddress, FromRaw, IntoRaw, PhysicalAddress}; + +pub trait Virtualize { + fn virtualize(self) -> usize; + fn from_virtualized(value: usize) -> Self; +} + +impl Virtualize for PhysicalAddress { + fn virtualize(self) -> usize { + KernelTableManagerImpl::virtualize(self.into_raw()) + } + + fn from_virtualized(value: usize) -> Self { + PhysicalAddress::from_raw(KernelTableManagerImpl::physicalize(value)) + } +} diff --git a/kernel/libk/libk-mm/src/device.rs b/kernel/libk/libk-mm/src/device.rs new file mode 100644 index 00000000..8ead089b --- /dev/null +++ b/kernel/libk/libk-mm/src/device.rs @@ -0,0 +1,195 @@ +use core::{ + alloc::Layout, + mem::size_of, + ops::{Deref, DerefMut}, +}; + +use alloc::sync::Arc; +use kernel_arch::KernelTableManagerImpl; +use libk_mm_interface::address::{AsPhysicalAddress, FromRaw, IntoRaw, PhysicalAddress}; +use yggdrasil_abi::error::Error; + +pub use kernel_arch::mem::{DeviceMemoryAttributes, DeviceMemoryCaching}; + +pub type RawDeviceMemoryMapping = kernel_arch::mem::RawDeviceMemoryMapping; + +/// Describes a single untyped device memory mapping +#[derive(Clone, Debug)] +pub struct DeviceMemoryMapping { + #[allow(unused)] + inner: Arc, + address: usize, +} + +/// Describes a single typed device memory mapping +#[derive(Clone, Debug)] +pub struct DeviceMemoryIo<'a, T: ?Sized> { + #[allow(unused)] + inner: Arc, + value: &'a T, +} + +/// Describes a single typed and mutable device memory mapping +#[derive(Debug)] +pub struct DeviceMemoryIoMut<'a, T: ?Sized> { + #[allow(unused)] + inner: RawDeviceMemoryMapping, + value: &'a mut T, +} + +impl DeviceMemoryMapping { + /// Maps a region of physical memory as device memory of given size. + /// + /// See [RawDeviceMemoryMapping::map]. + /// + /// # Safety + /// + /// The caller must ensure proper access synchronization, as well as the address' origin. + pub unsafe fn map( + base: PhysicalAddress, + size: usize, + attrs: DeviceMemoryAttributes, + ) -> Result { + let inner = RawDeviceMemoryMapping::map(base.into_raw(), size, attrs)?; + let address = inner.address; + Ok(Self { + inner: Arc::new(inner), + address, + }) + } + + /// Returns the address to which the object is mapped + pub fn address(&self) -> usize { + self.address + } +} + +impl<'a, T: Sized> DeviceMemoryIo<'a, T> { + /// Interprets a raw device memory mapping as pointing to a value of `T`. + /// + /// # Safety + /// + /// The caller must ensure the mapping actually contains the value of `T`, as well as proper + /// access synchronization. + pub unsafe fn from_raw( + inner: Arc, + ) -> Result, Error> { + if size_of::() > inner.page_size * inner.page_count { + todo!(); + } + // TODO check align + let value = &*(inner.address as *const T); + Ok(DeviceMemoryIo { inner, value }) + } + + /// Maps a physical address as device memory of type `[T]`. + /// + /// # Safety + /// + /// The caller must ensure the address actually points to a value of type `T`, as well as + /// proper access synchronization. The caller must also ensure the `len` is valid. + pub unsafe fn map_slice( + base: PhysicalAddress, + count: usize, + attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + let layout = Layout::array::(count).unwrap(); + let inner = RawDeviceMemoryMapping::map(base.into_raw(), layout.size(), attrs)?; + let value = core::slice::from_raw_parts(inner.address as *mut T, count); + + Ok(DeviceMemoryIo { + inner: Arc::new(inner), + value, + }) + } + + /// Maps a physical address as device memory of type `T`. + /// + /// # Safety + /// + /// The caller must ensure the address actually points to a value of type `T`, as well as + /// proper access synchronization. + pub unsafe fn map( + base: PhysicalAddress, + attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + let inner = RawDeviceMemoryMapping::map(base.into_raw(), size_of::(), attrs)?; + let value = &*(inner.address as *const T); + + Ok(DeviceMemoryIo { + inner: Arc::new(inner), + value, + }) + } +} + +impl<'a, T: ?Sized> DeviceMemoryIo<'a, T> { + /// Extracts an inner reference to `U` from within `T`. + /// + /// # Safety + /// + /// To use this safely, the caller must guarantee the "extracted" reference will be the only + /// valid reference and that `&U` won't be usable again through its parent `T`. The caller must + /// also guarantee that `&U` is, in fact, contained within `T`. + pub unsafe fn extract<'b, U: ?Sized + 'b, F: FnOnce(&'a T) -> *const U>( + &'a self, + f: F, + ) -> DeviceMemoryIo<'b, U> { + let value = f(self.value); + + DeviceMemoryIo { + inner: self.inner.clone(), + value: &*value, + } + } +} + +impl<'a, T: ?Sized> Deref for DeviceMemoryIo<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +impl AsPhysicalAddress for DeviceMemoryIo<'_, T> { + unsafe fn as_physical_address(&self) -> PhysicalAddress { + PhysicalAddress::from_raw(self.inner.base_address) + } +} + +unsafe impl Send for DeviceMemoryIo<'_, T> {} + +impl<'a, T: Sized> DeviceMemoryIoMut<'a, T> { + /// Maps a physical address as device memory to a slice `[T; len]` + /// + /// # Safety + /// + /// The caller must ensure the address actually points to a value of type `T`, as well as + /// proper access synchronization. The caller must also ensure the `len` is valid. + pub unsafe fn map_slice( + base: PhysicalAddress, + len: usize, + attrs: DeviceMemoryAttributes, + ) -> Result, Error> { + let layout = Layout::array::(len).unwrap(); + let inner = RawDeviceMemoryMapping::map(base.into_raw(), layout.size(), attrs)?; + let value = core::slice::from_raw_parts_mut(inner.address as *mut T, len); + + Ok(DeviceMemoryIoMut { inner, value }) + } +} + +impl<'a, T: ?Sized> Deref for DeviceMemoryIoMut<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + self.value + } +} + +impl<'a, T: ?Sized> DerefMut for DeviceMemoryIoMut<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + self.value + } +} diff --git a/kernel/libk/libk-mm/src/lib.rs b/kernel/libk/libk-mm/src/lib.rs new file mode 100644 index 00000000..e40e32ea --- /dev/null +++ b/kernel/libk/libk-mm/src/lib.rs @@ -0,0 +1,340 @@ +#![feature( + strict_provenance, + maybe_uninit_slice, + slice_ptr_get, + step_trait, + const_trait_impl, + maybe_uninit_as_bytes, + effects +)] +#![no_std] + +extern crate alloc; + +use core::{ + alloc::Layout, + fmt, + mem::{size_of, MaybeUninit}, + ops::{Deref, DerefMut}, +}; + +use address::Virtualize; +use libk_mm_interface::{ + address::{AsPhysicalAddress, PhysicalAddress}, + table::{MapAttributes, TableAllocator}, +}; +use yggdrasil_abi::error::Error; + +pub mod address; +pub mod device; +pub mod phys; +pub mod pointer; +pub mod process; + +pub use libk_mm_interface::table; + +pub struct TableAllocatorImpl; + +impl TableAllocator for TableAllocatorImpl { + fn allocate_page_table() -> Result { + phys::alloc_page() + } + + unsafe fn free_page_table(address: PhysicalAddress) { + phys::free_page(address) + } +} + +// TODO find a way to integrate this nicely with Architecture? +pub const L3_PAGE_SIZE: usize = 1 << 12; +pub const L2_PAGE_SIZE: usize = 1 << 21; + +pub trait PageProvider { + fn get_page(&self, offset: u64) -> Result; + fn release_page(&self, offset: u64, phys: PhysicalAddress) -> Result<(), Error>; + fn clone_page( + &self, + offset: u64, + src_phys: PhysicalAddress, + src_attrs: MapAttributes, + ) -> Result; +} + +pub struct PageBox { + value: *mut T, + page_count: usize, +} + +impl PageBox { + #[inline] + fn alloc_slice(count: usize, zeroed: bool) -> Result<(PhysicalAddress, usize), Error> { + // TODO hardcoded page sizes + let layout = Layout::array::(count).unwrap(); + let page_count = (layout.size() + L3_PAGE_SIZE - 1) / L3_PAGE_SIZE; + let base = phys::alloc_pages_contiguous(page_count)?; + if zeroed { + let ptr = base.virtualize() as *mut u8; + let slice = unsafe { core::slice::from_raw_parts_mut(ptr, page_count * L3_PAGE_SIZE) }; + slice.fill(0); + } + Ok((base, page_count)) + } + + #[inline] + fn alloc() -> Result<(PhysicalAddress, usize), Error> { + let page_count = (size_of::() + L3_PAGE_SIZE - 1) / L3_PAGE_SIZE; + Ok((phys::alloc_pages_contiguous(page_count)?, page_count)) + } + + pub fn new(init: T) -> Result, Error> { + let (base, page_count) = Self::alloc()?; + let value = base.virtualize() as *mut T; + + unsafe { + value.write(init); + } + + let result = PageBox { value, page_count }; + result.trace_created(); + Ok(result) + } + + pub fn new_slice(item: T, count: usize) -> Result, Error> + where + T: Copy, + { + let (base, page_count) = Self::alloc_slice(count, false)?; + let base_virt_ptr = base.virtualize() as *mut T; + let value = core::ptr::slice_from_raw_parts_mut(base_virt_ptr, count); + + for i in 0..count { + unsafe { + value.get_unchecked_mut(i).write(item); + } + } + + let result = PageBox { value, page_count }; + result.trace_created(); + Ok(result) + } + + pub fn new_slice_with T>(f: F, count: usize) -> Result, Error> { + let mut value = Self::new_uninit_slice(count)?; + + for i in 0..count { + value[i].write(f(i)); + } + + Ok(unsafe { value.assume_init_slice() }) + } + + pub fn new_uninit() -> Result>, Error> { + let (base, page_count) = PageBox::>::alloc()?; + let value = base.virtualize() as *mut MaybeUninit; + let result = PageBox { value, page_count }; + result.trace_created(); + Ok(result) + } + + pub fn new_uninit_slice(count: usize) -> Result]>, Error> { + let (base, page_count) = PageBox::>::alloc_slice(count, false)?; + let base_virt_ptr = base.virtualize() as *mut MaybeUninit; + let value = core::ptr::slice_from_raw_parts_mut(base_virt_ptr, count); + let result = PageBox { value, page_count }; + result.trace_created(); + Ok(result) + } + + pub fn new_zeroed_slice(count: usize) -> Result]>, Error> { + let (base, page_count) = PageBox::>::alloc_slice(count, true)?; + let base_virt_ptr = base.virtualize() as *mut MaybeUninit; + let value = core::ptr::slice_from_raw_parts_mut(base_virt_ptr, count); + let result = PageBox { value, page_count }; + result.trace_created(); + Ok(result) + } +} + +impl PageBox { + #[inline] + pub fn as_ptr(&self) -> *const T { + self.value as _ + } + + #[inline] + fn trace_created(&self) { + log::trace!( + "Alloc PageBox<{}> @ {:p}, {}", + core::any::type_name::(), + self.value, + self.page_count + ); + } + + #[inline] + fn trace_dropped(&self) { + log::trace!( + "Free PageBox<{}> @ {:p}, {}", + core::any::type_name::(), + self.value, + self.page_count + ); + } +} + +impl PageBox<[T]> { + pub fn from_iter_exact>(it: I) -> Result + where + I::IntoIter: ExactSizeIterator, + { + let it = it.into_iter(); + let mut slice = PageBox::new_uninit_slice(it.len())?; + for (i, item) in it.enumerate() { + slice[i].write(item); + } + let slice = unsafe { slice.assume_init_slice() }; + Ok(slice) + } +} + +impl PageBox> { + /// Consumes the [PageBox], returning a new one with [MaybeUninit] removed. + /// + /// # Safety + /// + /// See [MaybeUninit::assume_init_mut]. + pub unsafe fn assume_init(self) -> PageBox { + // SAFETY: Memory-safe, as: + // 1. MaybeUninit is transparent + // 2. self.value still points to the same memory and is not deallocated + let page_count = self.page_count; + let value = MaybeUninit::assume_init_mut(&mut *self.value); + + // Prevent deallocation of the PageBox with MaybeUninit + core::mem::forget(self); + + PageBox { value, page_count } + } + + pub unsafe fn into_byte_slice(self) -> PageBox<[u8]> { + let page_count = self.page_count; + let value = MaybeUninit::slice_assume_init_mut(MaybeUninit::as_bytes_mut(&mut *self.value)); + + core::mem::forget(self); + + PageBox { value, page_count } + } +} + +impl PageBox<[MaybeUninit]> { + /// Consumes the [PageBox], returning a new one with [MaybeUninit] removed. + /// + /// # Safety + /// + /// See [MaybeUninit::slice_assume_init_mut]. + pub unsafe fn assume_init_slice(self) -> PageBox<[T]> { + // SAFETY: Memory-safe, as: + // 1. MaybeUninit is transparent + // 2. self.value still points to the same memory and is not deallocated + let page_count = self.page_count; + let value = MaybeUninit::slice_assume_init_mut(&mut *self.value); + + core::mem::forget(self); + + PageBox { value, page_count } + } + + /// Returns a reference to the slice data with [MaybeUninit] removed. + /// + /// # Safety + /// + /// See [MaybeUninit::slice_assume_init_ref] + pub unsafe fn assume_init_slice_ref(&self) -> &[T] { + MaybeUninit::slice_assume_init_ref(self.deref()) + } + + /// Returns a mutable reference to the slice data with [MaybeUninit] removed. + /// + /// # Safety + /// + /// See [MaybeUninit::slice_assume_init_mut] + pub unsafe fn assume_init_slice_mut(&mut self) -> &mut [T] { + MaybeUninit::slice_assume_init_mut(self.deref_mut()) + } + + /// Fills a slice of MaybeUninit with zeroes. + /// + /// # Safety + /// + /// Unsafe: will not drop possibly previously written data. Only meant for [Copy] and other + /// trivial types. + pub unsafe fn zero(p: &mut Self) { + let ptr = p.as_mut_ptr() as *mut u8; + let slice = core::slice::from_raw_parts_mut(ptr, p.page_count * L3_PAGE_SIZE); + slice.fill(0); + } +} + +impl AsPhysicalAddress for PageBox { + #[inline] + unsafe fn as_physical_address(&self) -> PhysicalAddress { + PhysicalAddress::from_virtualized(self.value.addr()) + } +} + +impl Deref for PageBox { + type Target = T; + + #[inline(always)] + fn deref(&self) -> &Self::Target { + unsafe { &*self.value } + } +} + +impl DerefMut for PageBox { + #[inline(always)] + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.value } + } +} + +impl Drop for PageBox { + fn drop(&mut self) { + self.trace_dropped(); + unsafe { + core::ptr::drop_in_place(self.value); + } + // SAFETY: Safe, pointer obtained through "virtualize" + let base = PhysicalAddress::from_virtualized(self.value.addr()); + for i in 0..self.page_count { + // SAFETY: Safe, page allocated only by this PageBox + unsafe { + phys::free_page(base.add(L3_PAGE_SIZE * i)); + } + } + } +} + +impl fmt::Pointer for PageBox { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + self.value.fmt(f) + } +} + +impl fmt::Debug for PageBox { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(self.deref(), f) + } +} + +impl fmt::Display for PageBox { + #[inline] + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Display::fmt(self.deref(), f) + } +} + +unsafe impl Send for PageBox {} +unsafe impl Sync for PageBox {} diff --git a/kernel/libk/libk-mm/src/phys/manager.rs b/kernel/libk/libk-mm/src/phys/manager.rs new file mode 100644 index 00000000..44ca720e --- /dev/null +++ b/kernel/libk/libk-mm/src/phys/manager.rs @@ -0,0 +1,200 @@ +//! Physical memory manager implementation +use core::sync::atomic::{AtomicUsize, Ordering}; + +use kernel_arch::KernelTableManagerImpl; +use libk_mm_interface::{ + address::{FromRaw, IntoRaw, PhysicalAddress}, + pointer::PhysicalRefMut, +}; +use yggdrasil_abi::{error::Error, system::SystemMemoryStats}; + +use crate::L3_PAGE_SIZE; + +pub type BitmapWord = u64; + +pub(super) const BITMAP_WORD_SIZE: usize = BitmapWord::BITS as usize; +pub(super) const BITMAP_PAGE_COUNT: usize = 512; + +const HUGE_PAGE_WORD_COUNT: usize = 512 / BITMAP_WORD_SIZE; + +pub(super) const TRACKED_PAGE_LIMIT: usize = (BITMAP_PAGE_COUNT * 4096) * 8; + +struct MemoryStats { + available_pages: AtomicUsize, + used_pages: AtomicUsize, +} + +static STATS: MemoryStats = MemoryStats { + available_pages: AtomicUsize::new(0), + used_pages: AtomicUsize::new(0), +}; + +/// Physical memory management interface +pub struct PhysicalMemoryManager { + bitmap: PhysicalRefMut<'static, [u64], KernelTableManagerImpl>, + last_free_bit: usize, + offset: usize, + page_count: usize, +} + +impl PhysicalMemoryManager { + pub unsafe fn new( + bitmap_phys_base: PhysicalAddress, + offset: usize, + page_count: usize, + ) -> PhysicalMemoryManager { + let bitmap_len = (page_count + (BITMAP_WORD_SIZE - 1)) / BITMAP_WORD_SIZE; + let mut bitmap = PhysicalRefMut::<'static, u64, KernelTableManagerImpl>::map_slice( + bitmap_phys_base, + bitmap_len, + ); + + bitmap.fill(BitmapWord::MAX); + + Self { + bitmap, + page_count, + offset, + last_free_bit: 0, + } + } + + #[inline] + fn mark_alloc(&mut self, index: usize) { + self.bitmap[index / BITMAP_WORD_SIZE] |= 1 << (index & (BITMAP_WORD_SIZE - 1)); + } + + #[inline] + fn mark_free(&mut self, index: usize) { + self.bitmap[index / BITMAP_WORD_SIZE] &= !(1 << (index & (BITMAP_WORD_SIZE - 1))); + } + + #[inline(always)] + fn is_alloc(&self, index: usize) -> bool { + self.bitmap[index / BITMAP_WORD_SIZE] & (1 << (index & (BITMAP_WORD_SIZE - 1))) != 0 + } + + /// Allocates a single page, marking it as used with `usage` + pub fn alloc_page(&mut self) -> Result { + for i in self.last_free_bit..self.page_count { + if self.is_alloc(i) { + continue; + } + + self.last_free_bit = i + 1; + self.mark_alloc(i); + + STATS.used_pages.fetch_add(1, Ordering::Relaxed); + + return Ok(PhysicalAddress::from_raw(i * L3_PAGE_SIZE + self.offset)); + } + + if self.last_free_bit != 0 { + self.last_free_bit = 0; + self.alloc_page() + } else { + Err(Error::OutOfMemory) + } + } + + pub fn alloc_2m_page(&mut self) -> Result { + let aligned_bit = self.last_free_bit & !511; + + 'l0: for i in (aligned_bit..self.page_count).step_by(512) { + for j in 0..HUGE_PAGE_WORD_COUNT { + if self.bitmap[i / BITMAP_WORD_SIZE + j] != 0 { + continue 'l0; + } + } + + for j in 0..HUGE_PAGE_WORD_COUNT { + self.bitmap[i / BITMAP_WORD_SIZE + j] = BitmapWord::MAX; + } + self.last_free_bit = i + 512; + + STATS.used_pages.fetch_add(512, Ordering::Relaxed); + + return Ok(PhysicalAddress::from_raw(i * L3_PAGE_SIZE + self.offset)); + } + + if self.last_free_bit != 0 { + self.last_free_bit = 0; + self.alloc_2m_page() + } else { + Err(Error::OutOfMemory) + } + } + + /// Allocates a contiguous range of physical pages, marking it as used with `usage` + pub fn alloc_contiguous_pages(&mut self, count: usize) -> Result { + 'l0: for i in self.last_free_bit..self.page_count { + for j in 0..count { + if self.is_alloc(i + j) { + continue 'l0; + } + } + + for j in 0..count { + self.mark_alloc(i + j); + } + self.last_free_bit = i + count; + + STATS.used_pages.fetch_add(count, Ordering::Relaxed); + + return Ok(PhysicalAddress::from_raw(i * L3_PAGE_SIZE + self.offset)); + } + + if self.last_free_bit != 0 { + self.last_free_bit = 0; + self.alloc_contiguous_pages(count) + } else { + Err(Error::OutOfMemory) + } + } + + /// Deallocates a physical memory page. + /// + /// # Safety + /// + /// `addr` must be a page-aligned physical address previously allocated by this implementation. + pub unsafe fn free_page(&mut self, page: PhysicalAddress) { + let page: usize = page.into_raw(); + assert!(page >= self.offset); + let index = (page - self.offset) / L3_PAGE_SIZE; + + STATS.used_pages.fetch_sub(1, Ordering::Relaxed); + + assert!(self.is_alloc(index)); + self.mark_free(index); + } + + /// Marks a previously reserved page as available. + /// + /// # Panics + /// + /// Will panic if the address does not point to a valid, reserved (and unallocated) page. + pub fn add_available_page(&mut self, page: PhysicalAddress) { + let page: usize = page.into_raw(); + assert!(page >= self.offset); + let index = (page - self.offset) / L3_PAGE_SIZE; + + STATS.available_pages.fetch_add(1, Ordering::Relaxed); + + assert!(self.is_alloc(index)); + self.mark_free(index); + } + + /// Returns memory usage stats + pub fn stats() -> SystemMemoryStats { + let available = STATS.available_pages.load(Ordering::Relaxed); + let used = STATS.used_pages.load(Ordering::Relaxed); + let free = available - used; + + SystemMemoryStats { + total_usable_pages: available, + allocated_pages: used, + free_pages: free, + page_size: L3_PAGE_SIZE, + } + } +} diff --git a/kernel/libk/libk-mm/src/phys/mod.rs b/kernel/libk/libk-mm/src/phys/mod.rs new file mode 100644 index 00000000..3b28a567 --- /dev/null +++ b/kernel/libk/libk-mm/src/phys/mod.rs @@ -0,0 +1,248 @@ +use core::ops::Range; + +use kernel_arch::{absolute_address, mem::PhysicalMemoryAllocator}; +use libk_mm_interface::address::{FromRaw, IntoRaw, PhysicalAddress}; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; +use yggdrasil_abi::{error::Error, system::SystemMemoryStats}; + +use crate::{ + phys::{ + manager::BITMAP_WORD_SIZE, + reserved::{is_reserved, reserve_region}, + }, + L2_PAGE_SIZE, L3_PAGE_SIZE, +}; + +use self::manager::{PhysicalMemoryManager, TRACKED_PAGE_LIMIT}; + +mod manager; +pub mod reserved; + +pub struct GlobalPhysicalAllocator; + +/// Defines an usable memory region +#[derive(Clone, Copy, Debug)] +pub struct PhysicalMemoryRegion { + /// Start of the region + pub base: PhysicalAddress, + /// Length of the region + pub size: usize, +} + +// 8 * 4096 bits per page, 1 page per bit +const MEMORY_UPPER_LIMIT: PhysicalAddress = PhysicalAddress::from_raw(TRACKED_PAGE_LIMIT * 4096); + +/// Global physical memory manager +pub static PHYSICAL_MEMORY: OneTimeInit> = + OneTimeInit::new(); + +impl PhysicalMemoryRegion { + /// Returns the end address of the region + pub const fn end(&self) -> PhysicalAddress { + self.base.add(self.size) + } + + /// Returns an address range covered by the region + pub fn range(&self) -> Range { + self.base..self.end() + } + + /// Constrains the [PhysicalMemoryRegion] to global memory limits set in the kernel + pub fn clamp(self, limit: PhysicalAddress) -> Option<(PhysicalAddress, PhysicalAddress)> { + let start = self.base.min(limit); + let end = self.end().min(limit); + + if start < end { + Some((start, end)) + } else { + None + } + } +} + +impl PhysicalMemoryAllocator for GlobalPhysicalAllocator { + type Address = PhysicalAddress; + + fn allocate_page() -> Result { + alloc_page() + } + + fn allocate_contiguous_pages(count: usize) -> Result { + alloc_pages_contiguous(count) + } + + unsafe fn free_page(page: Self::Address) { + free_page(page) + } +} + +/// Allocates a single physical page from the global manager +pub fn alloc_page() -> Result { + PHYSICAL_MEMORY.get().lock().alloc_page() +} + +/// Allocates a contiguous range of physical pages from the global manager +pub fn alloc_pages_contiguous(count: usize) -> Result { + PHYSICAL_MEMORY.get().lock().alloc_contiguous_pages(count) +} + +/// Allocates a single 2MiB page of physical memory from the global manager +pub fn alloc_2m_page() -> Result { + PHYSICAL_MEMORY.get().lock().alloc_2m_page() +} + +/// Returns physical memory stats +pub fn stats() -> SystemMemoryStats { + PhysicalMemoryManager::stats() +} + +/// Deallocates a physical memory page. +/// +/// # Safety +/// +/// `addr` must be a page-aligned physical address previously allocated by this implementation. +pub unsafe fn free_page(addr: PhysicalAddress) { + PHYSICAL_MEMORY.get().lock().free_page(addr) +} + +fn physical_memory_range>( + it: I, +) -> Option<(PhysicalAddress, PhysicalAddress)> { + let mut start = PhysicalAddress::MAX; + let mut end = PhysicalAddress::MIN; + + for (reg_start, reg_end) in it.into_iter().filter_map(|r| r.clamp(MEMORY_UPPER_LIMIT)) { + if reg_start < start { + start = reg_start; + } + if reg_end > end { + end = reg_end; + } + } + + if start == PhysicalAddress::MAX || end == PhysicalAddress::MIN { + None + } else { + Some((start, end)) + } +} + +/// Locates a contiguous region of available physical memory within the memory region list +pub fn find_contiguous_region>( + it: I, + count: usize, +) -> Option { + for (reg_start, reg_end) in it.into_iter().filter_map(|r| r.clamp(MEMORY_UPPER_LIMIT)) { + let mut collected = 0; + let mut base_addr = None; + + for addr in (reg_start..reg_end).step_by(L3_PAGE_SIZE) { + if is_reserved(addr) { + collected = 0; + base_addr = None; + continue; + } + if base_addr.is_none() { + base_addr = Some(addr); + } + collected += 1; + if collected == count { + return base_addr; + } + } + } + todo!() +} +// +/// Initializes physical memory manager from given available memory region iterator. +/// +/// 1. Finds a non-reserved range to place the page tracking array. +/// 2. Adds all non-reserved pages to the manager. +/// +/// # Safety +/// +/// The caller must ensure this function has not been called before and that the regions +/// are valid and actually available. +pub unsafe fn init_from_iter< + I: Iterator + Clone, + Map: FnOnce(I, PhysicalAddress, PhysicalAddress) -> Result<(), Error>, +>( + it: I, + map_physical_memory: Map, +) -> Result<(), Error> { + // Map the physical memory + let (phys_start, phys_end) = physical_memory_range(it.clone()).unwrap(); + + reserve_region("kernel", kernel_physical_memory_region()); + + map_physical_memory(it.clone(), phys_start, phys_end)?; + + let total_count = (phys_end - phys_start) / L3_PAGE_SIZE; + let page_bitmap_size = (total_count + BITMAP_WORD_SIZE - 1) / (BITMAP_WORD_SIZE / 8); + let page_bitmap_page_count = (page_bitmap_size + L3_PAGE_SIZE - 1) / L3_PAGE_SIZE; + + let page_bitmap_phys_base = find_contiguous_region(it.clone(), page_bitmap_page_count).unwrap(); + + reserve_region( + "page-bitmap", + PhysicalMemoryRegion { + base: page_bitmap_phys_base, + size: page_bitmap_page_count * L3_PAGE_SIZE, + }, + ); + + if IntoRaw::::into_raw(phys_start) & (L2_PAGE_SIZE - 1) != 0 { + todo!(); + } + + let mut manager = + PhysicalMemoryManager::new(page_bitmap_phys_base, phys_start.into_raw(), total_count); + let mut collected = 0; + const MAX_MEMORY: usize = 64 * 1024; + + for (start, end) in it.into_iter().filter_map(|r| r.clamp(MEMORY_UPPER_LIMIT)) { + for page in (start..end).step_by(L3_PAGE_SIZE) { + if collected >= MAX_MEMORY { + break; + } + + if is_reserved(page) { + continue; + } + + manager.add_available_page(page); + collected += 1; + } + } + + PHYSICAL_MEMORY.init(IrqSafeSpinlock::new(manager)); + + Ok(()) +} + +fn kernel_physical_memory_region() -> PhysicalMemoryRegion { + extern "C" { + static __kernel_phys_start: u8; + static __kernel_size: u8; + } + + let base = PhysicalAddress::from_raw(absolute_address!(__kernel_phys_start)); + let size = absolute_address!(__kernel_size); + + PhysicalMemoryRegion { base, size } +} + +#[no_mangle] +fn __allocate_page() -> Result { + alloc_page() +} + +#[no_mangle] +fn __allocate_contiguous_pages(count: usize) -> Result { + alloc_pages_contiguous(count) +} + +#[no_mangle] +unsafe fn __free_page(page: PhysicalAddress) { + free_page(page) +} diff --git a/kernel/libk/libk-mm/src/phys/reserved.rs b/kernel/libk/libk-mm/src/phys/reserved.rs new file mode 100644 index 00000000..bfe3b75b --- /dev/null +++ b/kernel/libk/libk-mm/src/phys/reserved.rs @@ -0,0 +1,27 @@ +//! Utilities for handling reserved memory regions + +use libk_mm_interface::address::PhysicalAddress; +use libk_util::StaticVector; + +use crate::phys::PhysicalMemoryRegion; + +static mut RESERVED_MEMORY: StaticVector = StaticVector::new(); + +/// Marks a region of physical memory as reserved. +/// +/// # Safety +/// +/// Can only be called from initialization code **before** physical memory manager is initialized. +pub unsafe fn reserve_region(_reason: &str, region: PhysicalMemoryRegion) { + RESERVED_MEMORY.push(region); +} + +/// Returns `true` if `addr` refers to any reserved memory region +pub fn is_reserved(addr: PhysicalAddress) -> bool { + for region in unsafe { RESERVED_MEMORY.iter() } { + if region.range().contains(&addr) { + return true; + } + } + false +} diff --git a/kernel/libk/libk-mm/src/pointer.rs b/kernel/libk/libk-mm/src/pointer.rs new file mode 100644 index 00000000..1f681f33 --- /dev/null +++ b/kernel/libk/libk-mm/src/pointer.rs @@ -0,0 +1,6 @@ +use kernel_arch::KernelTableManagerImpl; + +pub type PhysicalRef<'a, T> = + libk_mm_interface::pointer::PhysicalRef<'a, T, KernelTableManagerImpl>; +pub type PhysicalRefMut<'a, T> = + libk_mm_interface::pointer::PhysicalRefMut<'a, T, KernelTableManagerImpl>; diff --git a/kernel/libk/libk-mm/src/process.rs b/kernel/libk/libk-mm/src/process.rs new file mode 100644 index 00000000..9abcf7a1 --- /dev/null +++ b/kernel/libk/libk-mm/src/process.rs @@ -0,0 +1,451 @@ +use core::ops::Range; + +use alloc::sync::Arc; +use kernel_arch::ProcessAddressSpaceImpl; +use libk_mm_interface::{ + address::PhysicalAddress, + process::ProcessAddressSpaceManager, + table::{MapAttributes, TableAllocator}, +}; +use libk_util::sync::IrqSafeSpinlock; +use vmalloc::{RangeData, VirtualMemoryAllocator}; +use yggdrasil_abi::error::Error; + +use crate::{ + phys, + pointer::{PhysicalRef, PhysicalRefMut}, + PageProvider, TableAllocatorImpl, L3_PAGE_SIZE, +}; + +/// Describes how the physical memory is provided for the mapping +#[derive(Clone)] +pub enum VirtualRangeBacking { + /// Memory is taken from regular "anonymous" physical memory + Anonymous, + /// Mapping is backed by file blocks/device memory + File(FileBacking), +} + +/// Describes a file-backed memory range provider +#[derive(Clone)] +pub struct FileBacking { + offset: u64, + file: Arc, +} + +impl VirtualRangeBacking { + /// Creates a file-backed memory range provider + pub fn file(offset: u64, file: Arc) -> Result { + // XXX + // if !(offset as usize).is_page_aligned_for::() { + // todo!(); + // } + + Ok(Self::File(FileBacking { offset, file })) + } + + /// Creates a range of anonymous memory + pub fn anonymous() -> Self { + Self::Anonymous + } +} + +impl PageProvider for VirtualRangeBacking { + fn get_page(&self, offset: u64) -> Result { + match self { + Self::Anonymous => phys::alloc_page(), + Self::File(f) => f.file.get_page(f.offset + offset), + } + } + + fn release_page(&self, offset: u64, phys: PhysicalAddress) -> Result<(), Error> { + match self { + Self::Anonymous => unsafe { + phys::free_page(phys); + Ok(()) + }, + Self::File(f) => f.file.release_page(f.offset + offset, phys), + } + } + + fn clone_page( + &self, + _offset: u64, + src_phys: PhysicalAddress, + _src_attrs: MapAttributes, + ) -> Result { + match self { + Self::Anonymous => { + let dst_page = phys::alloc_page()?; + let src_map = unsafe { PhysicalRef::<[u8; 4096]>::map(src_phys) }; + let mut dst_map = unsafe { PhysicalRefMut::<[u8; 4096]>::map(dst_page) }; + dst_map.copy_from_slice(src_map.as_ref()); + Ok(dst_page) + } + Self::File(_) => todo!(), + } + } +} + +impl PartialEq for VirtualRangeBacking { + fn eq(&self, other: &Self) -> bool { + matches!(self, Self::Anonymous) && matches!(other, Self::Anonymous) + } +} + +impl Eq for VirtualRangeBacking {} + +impl RangeData for VirtualRangeBacking {} + +impl core::fmt::Debug for VirtualRangeBacking { + fn fmt(&self, f: &mut core::fmt::Formatter<'_>) -> core::fmt::Result { + match self { + Self::Anonymous => f.debug_struct("VirtualRangeBacking::Anonymous").finish(), + Self::File(fb) => f + .debug_struct("VirtualRangeBacking::File") + .field("offset", &fb.offset) + .finish(), + } + } +} + +struct Inner { + allocator: VirtualMemoryAllocator, + table: ProcessAddressSpaceImpl, +} + +/// Data structure for managing the address translation and allocation for a single process +pub struct ProcessAddressSpace { + inner: IrqSafeSpinlock>, +} + +impl Inner { + fn try_map_pages( + &mut self, + address: usize, + page_count: usize, + backing: &VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result<(), (usize, Error)> { + for i in 0..page_count { + let offset = (i * L3_PAGE_SIZE) as u64; + let virt = address + i * L3_PAGE_SIZE; + let phys = match backing.get_page(offset) { + Ok(page) => page, + Err(err) => { + return Err((i, err)); + } + }; + + if let Err(err) = unsafe { self.table.map_page(virt, phys, attributes) } { + backing.release_page(offset, phys).unwrap(); + return Err((i, err)); + } + } + + Ok(()) + } + + unsafe fn rollback_allocation( + &mut self, + start_pfn: usize, + pages_mapped: usize, + region_size: usize, + ) { + let unmap_range = start_pfn..start_pfn + pages_mapped; + self.allocator + .free(start_pfn, region_size, |origin_pfn, pfn_range, backing| { + for pfn in pfn_range { + if unmap_range.contains(&pfn) { + let offset = (pfn - origin_pfn) * L3_PAGE_SIZE; + let virt = pfn * L3_PAGE_SIZE; + + let phys = self.table.unmap_page(virt)?; + + backing.release_page(offset as u64, phys)?; + } + } + + Ok(()) + }) + .unwrap(); + } + + fn map_range( + &mut self, + address: usize, + page_count: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result<(), Error> { + // If inserting fails, the range cannot be mapped + let start_pfn = address / L3_PAGE_SIZE; + self.allocator + .insert(start_pfn, page_count, backing.clone())?; + + if let Err((mapped, error)) = self.try_map_pages(address, page_count, &backing, attributes) + { + debug_assert!(mapped < page_count); + unsafe { + self.rollback_allocation(start_pfn, mapped, page_count); + } + return Err(error); + }; + + Ok(()) + } + + fn map_single( + &mut self, + address: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result { + let start_pfn = address / L3_PAGE_SIZE; + self.allocator.insert(start_pfn, 1, backing.clone())?; + + let phys = match backing.get_page(0) { + Ok(page) => page, + Err(err) => { + // Do nothing, as the page has not been allocated to this range yet + self.allocator.free(start_pfn, 1, |_, _, _| Ok(())).unwrap(); + return Err(err); + } + }; + + if let Err(err) = unsafe { self.table.map_page(address, phys, attributes) } { + self.allocator + .free(start_pfn, 1, |_, _, _| { + // Deallocate the page, but do not unmap, as the mapping failed + unsafe { + phys::free_page(phys); + } + Ok(()) + }) + .unwrap(); + return Err(err); + } + + Ok(phys) + } + + fn alloc_range( + &mut self, + page_count: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result { + let start_pfn = self.allocator.allocate(page_count, backing.clone())?; + let address = start_pfn * L3_PAGE_SIZE; + + if let Err((mapped, error)) = self.try_map_pages(address, page_count, &backing, attributes) + { + debug_assert!(mapped < page_count); + unsafe { + self.rollback_allocation(start_pfn, mapped, page_count); + } + return Err(error); + }; + + Ok(address) + } + + unsafe fn unmap_range(&mut self, start_address: usize, page_count: usize) -> Result<(), Error> { + let start_pfn = start_address / L3_PAGE_SIZE; + + self.allocator + .free(start_pfn, page_count, |origin_pfn, pfn_range, backing| { + for pfn in pfn_range { + let offset = ((pfn - origin_pfn) * L3_PAGE_SIZE) as u64; + + let virt = pfn * L3_PAGE_SIZE; + let phys = self.table.unmap_page(virt)?; + + backing.release_page(offset, phys)?; + } + + Ok(()) + })?; + + Ok(()) + } + + unsafe fn clear(&mut self) -> Result<(), Error> { + self.allocator.clear(|pfn_range, backing| { + let origin_pfn = pfn_range.start; + for pfn in pfn_range { + let offset = ((pfn - origin_pfn) * L3_PAGE_SIZE) as u64; + + let virt = pfn * L3_PAGE_SIZE; + let phys = unsafe { self.table.unmap_page(virt)? }; + + backing.release_page(offset, phys)?; + } + + Ok(()) + })?; + + // Drop the tables + self.table.clear(); + + Ok(()) + } + + fn clone_range( + &mut self, + source: &Self, + pfn_range: Range, + backing: &VirtualRangeBacking, + ) -> Result<(), Error> { + self.allocator + .insert(pfn_range.start, pfn_range.len(), backing.clone()) + .unwrap(); + + let start = pfn_range.start * L3_PAGE_SIZE; + let end = pfn_range.end * L3_PAGE_SIZE; + + log::debug!("clone_range({:#x?})", start..end); + + for i in pfn_range { + let address = i * L3_PAGE_SIZE; + let offset = (address - start) as u64; + let (src_page, attrs) = source.table.translate(address).unwrap(); + let dst_page = backing.clone_page(offset, src_page, attrs)?; + unsafe { + self.table.map_page(address, dst_page, attrs).unwrap(); + } + } + + Ok(()) + } +} + +impl ProcessAddressSpace { + /// Constructs a new [ProcessAddressSpace] + pub fn new() -> Result { + let table = ProcessAddressSpaceImpl::new()?; + let allocator = VirtualMemoryAllocator::new( + ProcessAddressSpaceImpl::::LOWER_LIMIT_PFN, + ProcessAddressSpaceImpl::::UPPER_LIMIT_PFN, + ); + Ok(Self { + inner: IrqSafeSpinlock::new(Inner { table, allocator }), + }) + } + + /// Performs a "fork" operation of the address space, cloning all the mappings into a new one + pub fn fork(&self) -> Result { + let src_inner = self.inner.lock(); + let new_table = ProcessAddressSpaceImpl::new()?; + let mut new_inner = Inner { + allocator: VirtualMemoryAllocator::new( + ProcessAddressSpaceImpl::::LOWER_LIMIT_PFN, + ProcessAddressSpaceImpl::::UPPER_LIMIT_PFN, + ), + table: new_table, + }; + + log::debug!("fork address space!"); + + for (range, backing) in src_inner.allocator.regions() { + // If they are present in existing allocator, there should be no + // problem adding them to a new one + new_inner.clone_range(&src_inner, range, backing)?; + } + + for (range, _) in new_inner.allocator.regions() { + let start = range.start * L3_PAGE_SIZE; + let end = range.end * L3_PAGE_SIZE; + log::debug!("forked region: {:#x?}", start..end); + } + + Ok(Self { + inner: IrqSafeSpinlock::new(new_inner), + }) + } + + /// Allocates a region of virtual memory within the address space and maps the pages to the + /// ones returned from `get_page` function + pub fn allocate( + &self, + _hint: Option, + size: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result { + assert_eq!(size & (L3_PAGE_SIZE - 1), 0); + + let mut lock = self.inner.lock(); + + lock.alloc_range(size / L3_PAGE_SIZE, backing, attributes) + } + + /// Maps a region of memory in the address space + pub fn map( + &self, + address: usize, + size: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result<(), Error> { + assert_eq!(address & (L3_PAGE_SIZE - 1), 0); + assert_eq!(size & (L3_PAGE_SIZE - 1), 0); + + let mut lock = self.inner.lock(); + + lock.map_range(address, size / L3_PAGE_SIZE, backing, attributes) + } + + /// Adds a single-page mapping to the address space + pub fn map_single( + &self, + address: usize, + backing: VirtualRangeBacking, + attributes: MapAttributes, + ) -> Result { + assert_eq!(address & (L3_PAGE_SIZE - 1), 0); + + self.inner.lock().map_single(address, backing, attributes) + } + + /// Returns the [PhysicalAddress] associated with given virtual `address`, + /// if one is mapped + pub fn translate(&self, address: usize) -> Result { + // Offset is handled at impl level + self.inner.lock().table.translate(address).map(|e| e.0) + } + + /// Removes a single PAGE_SIZE mapping from the address space. + /// + /// See [ProcessAddressSpaceManager::unmap]. + /// + /// # Safety + /// + /// The caller must ensure the process to which this address space belongs does not and + /// will not access this page. + pub unsafe fn unmap(&self, address: usize, size: usize) -> Result<(), Error> { + assert_eq!(address & (L3_PAGE_SIZE - 1), 0); + assert_eq!(size & (L3_PAGE_SIZE - 1), 0); + + let mut lock = self.inner.lock(); + + lock.unmap_range(address, size / L3_PAGE_SIZE) + } + + /// Returns the physical address of this table, with ASID applied + pub fn as_address_with_asid(&self) -> u64 { + self.inner.lock().table.as_address_with_asid() + } + + /// Removes all allocations and their associated mappings from the address space + pub fn clear(&self) -> Result<(), Error> { + let mut inner = self.inner.lock(); + unsafe { inner.clear() } + } +} + +impl Drop for ProcessAddressSpace { + fn drop(&mut self) { + self.clear().ok(); + } +} diff --git a/kernel/libk/libk-thread/Cargo.toml b/kernel/libk/libk-thread/Cargo.toml new file mode 100644 index 00000000..984d0388 --- /dev/null +++ b/kernel/libk/libk-thread/Cargo.toml @@ -0,0 +1,26 @@ +[package] +name = "libk-thread" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +libk-util = { path = "../libk-util" } +libk-mm = { path = "../libk-mm" } +libk-device = { path = "../libk-device" } +kernel-arch = { path = "../../arch" } +abi-lib = { git = "https://git.alnyan.me/yggdrasil/abi-generator.git" } + +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } + +log = "0.4.20" +atomic_enum = "0.2.0" +crossbeam-queue = { version = "0.3.8", default-features = false, features = ["alloc"] } +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } + +[dependencies.elf] +version = "0.7.2" +git = "https://git.alnyan.me/yggdrasil/yggdrasil-elf.git" +default-features = false +features = ["no_std_stream"] diff --git a/kernel/libk/libk-thread/src/binary/elf.rs b/kernel/libk/libk-thread/src/binary/elf.rs new file mode 100644 index 00000000..0963e090 --- /dev/null +++ b/kernel/libk/libk-thread/src/binary/elf.rs @@ -0,0 +1,306 @@ +//! ELF binary format support +use core::ops::DerefMut; + +use alloc::sync::Arc; +use elf::{ + abi::{PF_W, PF_X, PT_LOAD, PT_TLS}, + endian::AnyEndian, + segment::ProgramHeader, + ElfStream, ParseError, +}; +use libk_mm::{ + pointer::{PhysicalRef, PhysicalRefMut}, + process::{ProcessAddressSpace, VirtualRangeBacking}, + table::MapAttributes, +}; +use libk_util::io::{Read, Seek}; +use yggdrasil_abi::{error::Error, io::SeekFrom}; + +use crate::{ + process::ProcessImage, + types::{ProcessTlsInfo, ProcessTlsLayout}, +}; + +#[derive(Clone)] +struct FileReader { + file: F, +} + +impl elf::io_traits::InputStream for FileReader { + fn read_exact(&mut self, buf: &mut [u8]) -> Result<(), elf::io_traits::StreamError> { + self.file.read_exact(buf).map_err(conv_stream_error) + } + + fn seek(&mut self, pos: elf::io_traits::SeekFrom) -> Result { + self.file + .seek(conv_seek_from(pos)) + .map_err(conv_stream_error) + } +} + +#[inline] +fn conv_stream_error(_v: Error) -> elf::io_traits::StreamError { + elf::io_traits::StreamError { + message: "Elf read error", + } +} + +#[inline] +fn conv_seek_from(v: elf::io_traits::SeekFrom) -> SeekFrom { + match v { + elf::io_traits::SeekFrom::End(off) => SeekFrom::End(off), + elf::io_traits::SeekFrom::Start(off) => SeekFrom::Start(off), + _ => todo!(), + } +} + +#[inline] +fn from_parse_error(v: ParseError) -> Error { + log::warn!("ELF loading error: {:?}", v); + Error::InvalidFile +} + +/// Creates a new copy of the TLS from given master image +pub fn clone_tls(space: &ProcessAddressSpace, image: &ProcessImage) -> Result { + let Some(tls) = image.tls.as_ref() else { + // No TLS + return Ok(0); + }; + + assert_ne!(tls.master_copy_base, 0); + assert_ne!(tls.layout.mem_size, 0); + + let address = space.allocate( + None, + 0x1000, + VirtualRangeBacking::anonymous(), + MapAttributes::USER_READ | MapAttributes::USER_WRITE | MapAttributes::NON_GLOBAL, + )?; + + // debugln!( + // "Clone TLS from {:#x} (data={:#x}) to {:#x} (data={:#x})", + // tls.master_copy_base, + // tls.master_copy_base + tls.layout.data_offset, + // address, + // address + tls.layout.data_offset + // ); + // debugln!("tls ptr = {:#x}", address + tls.layout.ptr_offset); + + let src_phys = space.translate(tls.master_copy_base)?; + let dst_phys = space.translate(address)?; + + // Copy data + unsafe { + let src = + PhysicalRef::::map_slice(src_phys.add(tls.layout.data_offset), tls.layout.mem_size); + let mut dst = + PhysicalRefMut::map_slice(dst_phys.add(tls.layout.data_offset), tls.layout.mem_size); + + dst.copy_from_slice(&src); + } + + // Setup self-pointer + unsafe { + let mut dst = PhysicalRefMut::::map(dst_phys.add(tls.layout.ptr_offset)); + + *dst = address + tls.layout.ptr_offset; + } + + Ok(address + tls.layout.ptr_offset) +} + +fn load_bytes( + space: &ProcessAddressSpace, + addr: usize, + mut src: F, + len: usize, +) -> Result<(), Error> +where + F: FnMut(usize, PhysicalRefMut<'_, [u8]>) -> Result<(), Error>, +{ + // TODO check for crazy addresses here + + let dst_page_off = addr & 0xFFF; + let dst_page_aligned = addr & !0xFFF; + let mut off = 0usize; + let mut rem = len; + + while rem != 0 { + let page_idx = (dst_page_off + off) / 0x1000; + let page_off = (dst_page_off + off) % 0x1000; + let count = core::cmp::min(rem, 0x1000 - page_off); + + let virt_page = dst_page_aligned + page_idx * 0x1000; + assert_eq!(virt_page & 0xFFF, 0); + + let phys_page = space.translate(virt_page)?; + let dst_slice = unsafe { PhysicalRefMut::map_slice(phys_page.add(page_off), count) }; + + src(off, dst_slice)?; + + rem -= count; + off += count; + } + + Ok(()) +} + +fn load_segment( + space: &ProcessAddressSpace, + phdr: &ProgramHeader, + file: &FileReader, +) -> Result<(), Error> { + if phdr.p_memsz == 0 { + return Ok(()); + } + + let attrs = match (phdr.p_flags & PF_W, phdr.p_flags & PF_X) { + (0, 0) => MapAttributes::USER_READ, + (_, 0) => MapAttributes::USER_WRITE | MapAttributes::USER_READ, + (0, _) => MapAttributes::USER_READ, + (_, _) => MapAttributes::USER_WRITE | MapAttributes::USER_READ, + } | MapAttributes::NON_GLOBAL; + + // Map the range + let aligned_start = (phdr.p_vaddr as usize) & !0xFFF; + let aligned_end = ((phdr.p_vaddr + phdr.p_memsz) as usize + 0xFFF) & !0xFFF; + + space.map( + aligned_start, + aligned_end - aligned_start, + VirtualRangeBacking::anonymous(), + attrs, + )?; + + if phdr.p_filesz > 0 { + load_bytes( + space, + phdr.p_vaddr as usize, + |off, mut dst| { + file.file + .seek(SeekFrom::Start(phdr.p_offset + off as u64))?; + file.file.read_exact(dst.deref_mut()) + }, + phdr.p_filesz as usize, + )?; + } + + if phdr.p_memsz > phdr.p_filesz { + let addr = (phdr.p_vaddr + phdr.p_filesz) as usize; + let len = (phdr.p_memsz - phdr.p_filesz) as usize; + + load_bytes( + space, + addr, + |_, mut dst| { + dst.fill(0); + Ok(()) + }, + len, + )?; + } + + Ok(()) +} + +fn tls_segment( + space: &ProcessAddressSpace, + phdr: &ProgramHeader, + file: &FileReader, +) -> Result { + assert_ne!(phdr.p_memsz, 0); + + if !phdr.p_align.is_power_of_two() { + return Err(Error::InvalidArgument); + } + + let layout = ProcessTlsLayout::new(phdr.p_align as _, phdr.p_filesz as _, phdr.p_memsz as _); + let data_offset = layout.data_offset; + let data_size = layout.data_size; + let mem_size = layout.mem_size; + let aligned_size = (layout.full_size + 0xFFF) & !0xFFF; + assert!(aligned_size <= 0x1000); + + let base_address = space.allocate( + None, + aligned_size, + VirtualRangeBacking::anonymous(), + MapAttributes::USER_READ | MapAttributes::USER_WRITE | MapAttributes::NON_GLOBAL, + )?; + + // debugln!( + // "Allocated TLS master copy @ {:#x}, tls={:#x}, tls_data={:#x}", + // base_address, + // base_address + layout.ptr_offset, + // base_address + layout.data_offset + // ); + + let tls = ProcessTlsInfo { + master_copy_base: base_address, + layout, + }; + + if data_size > 0 { + load_bytes( + space, + base_address + data_offset, + |off, mut dst| { + file.file + .seek(SeekFrom::Start(phdr.p_offset + off as u64))?; + file.file.read_exact(dst.deref_mut()) + }, + data_size, + )?; + } + + if mem_size > data_size { + load_bytes( + space, + base_address + data_offset + data_size, + |_, mut dst| { + dst.fill(0); + Ok(()) + }, + mem_size - data_size, + )?; + } + + Ok(tls) +} + +/// Loads an ELF binary from `file` into the target address space +pub fn load_elf_from_file( + space: &ProcessAddressSpace, + file: Arc, +) -> Result { + let file = FileReader { file }; + let elf = ElfStream::::open_stream(file.clone()).map_err(from_parse_error)?; + let mut image_end = 0; + let mut tls = None; + + for phdr in elf.segments() { + match phdr.p_type { + PT_LOAD => { + if phdr.p_vaddr + phdr.p_memsz > image_end { + image_end = phdr.p_vaddr + phdr.p_memsz; + } + + load_segment(space, phdr, &file)?; + } + PT_TLS => { + assert!(tls.is_none()); + tls.replace(tls_segment(space, phdr, &file)?); + } + _ => (), + } + } + + // let image_end = (image_end as usize).page_align_up::(); + + // debugln!("Loaded image end: {:#x}", image_end); + + Ok(ProcessImage { + entry: elf.ehdr.e_entry as usize, + tls, + }) +} diff --git a/kernel/libk/libk-thread/src/binary/mod.rs b/kernel/libk/libk-thread/src/binary/mod.rs new file mode 100644 index 00000000..39a9d265 --- /dev/null +++ b/kernel/libk/libk-thread/src/binary/mod.rs @@ -0,0 +1,278 @@ +use core::{alloc::Layout, ptr::NonNull}; + +use alloc::{ + borrow::ToOwned, + string::String, + sync::{Arc, Weak}, + vec::Vec, +}; +use kernel_arch::task::TaskContext; +use libk_mm::{ + pointer::PhysicalRefMut, + process::{ProcessAddressSpace, VirtualRangeBacking}, + table::MapAttributes, +}; +use libk_util::io::{Read, Seek}; +use yggdrasil_abi::{ + error::Error, + io::SeekFrom, + pass::{Place, Placer}, + path::Path, + process::ProgramArgumentInner, +}; + +use crate::{ + mem::ForeignPointer, + process::{ProcessImage, ProcessImpl, ProcessIo, ProcessManager}, + thread::Thread, + TaskContextImpl, +}; + +pub mod elf; + +pub trait ProgramLoadSource { + type File: Seek + Read; + + fn open_executable>(&mut self, path: P) -> Result, Error>; +} + +struct BufferPlacer<'a> { + buffer: &'a mut [u8], + virtual_offset: usize, + offset: usize, +} + +impl<'a> BufferPlacer<'a> { + pub fn new(virtual_offset: usize, buffer: &'a mut [u8]) -> Self { + Self { + buffer, + virtual_offset, + offset: 0, + } + } + + unsafe fn alloc_layout( + &mut self, + layout: Layout, + ) -> Result<(NonNull, NonNull), Error> { + // TODO checks + let aligned = (self.offset + layout.align() - 1) & !(layout.align() - 1); + self.offset = aligned + layout.size(); + Ok(( + NonNull::new_unchecked(self.buffer.as_mut_ptr().add(aligned) as *mut T), + NonNull::new_unchecked((self.virtual_offset + aligned) as *mut T), + )) + } +} + +unsafe impl<'a> Placer for BufferPlacer<'a> { + fn place_ref(&mut self, r: &T) -> Result, Error> { + let layout = Layout::new::(); + unsafe { + let (kernel, user) = self.alloc_layout::(layout)?; + kernel.as_ptr().write(r.place(self)?); + Ok(user) + } + } + + fn place_slice(&mut self, r: &[T]) -> Result, Error> { + let layout = Layout::array::(r.len()).unwrap(); + unsafe { + let (kernel, user) = self.alloc_layout::(layout)?; + let kernel = NonNull::slice_from_raw_parts(kernel, r.len()); + let user = NonNull::slice_from_raw_parts(user, r.len()); + for (i, elem) in r.iter().enumerate() { + kernel + .get_unchecked_mut(i) + .as_ptr() + .write(elem.place(self)?); + } + Ok(user) + } + } +} + +// args, envs are passed as Vec to ensure kernel ownership +#[allow(clippy::ptr_arg)] +fn setup_program_env( + space: &ProcessAddressSpace, + virt: usize, + args: &Vec, + envs: &Vec, +) -> Result { + // TODO growing buffer + let phys_page = space.map_single( + virt, + VirtualRangeBacking::anonymous(), + MapAttributes::USER_READ | MapAttributes::USER_WRITE | MapAttributes::NON_GLOBAL, + )?; + let mut buffer = unsafe { PhysicalRefMut::map_slice(phys_page, 4096) }; + let mut placer = BufferPlacer::new(virt, &mut buffer); + + let args = args.iter().map(String::as_ref).collect::>(); + let envs = envs.iter().map(String::as_ref).collect::>(); + + let in_kernel = ProgramArgumentInner { + args: &args, + env: &envs, + }; + let in_user = in_kernel.place_ref(&mut placer)?; + + Ok(in_user as *const _ as usize) +} + +fn setup_context( + space: &ProcessAddressSpace, + image: &ProcessImage, + args: &Vec, + envs: &Vec, +) -> Result { + const USER_STACK_PAGES: usize = 32; + + let virt_stack_base = 0x3000000; + // 0x1000 of guard page + let virt_args_base = virt_stack_base + (USER_STACK_PAGES + 1) * 0x1000; + + space.map( + virt_stack_base, + USER_STACK_PAGES * 0x1000, + VirtualRangeBacking::anonymous(), + MapAttributes::USER_WRITE | MapAttributes::USER_READ | MapAttributes::NON_GLOBAL, + )?; + + let arg = setup_program_env(space, virt_args_base, args, envs)?; + + let user_sp = + virt_stack_base + USER_STACK_PAGES * 0x1000 - TaskContextImpl::USER_STACK_EXTRA_ALIGN; + + // Fill with some sentinel value to detect stack underflows + let ptr = user_sp as *mut u64; + + #[allow(clippy::reversed_empty_ranges)] + for i in 0..TaskContextImpl::USER_STACK_EXTRA_ALIGN / 8 { + unsafe { + ptr.add(i).write_foreign_volatile(space, 0xDEADC0DE); + } + } + + let tls_address = elf::clone_tls(space, image)?; + + TaskContext::user( + image.entry, + arg, + space.as_address_with_asid(), + user_sp, + tls_address, + ) +} + +fn setup_binary( + name: S, + parent: Option>>, + space: ProcessAddressSpace, + image: ProcessImage, + args: &Vec, + envs: &Vec, +) -> Result<(Arc>, Arc), Error> +where + S: Into, + PM: ProcessManager>, + IO: ProcessIo, +{ + let context = setup_context(&space, &image, args, envs)?; + let (process, main) = + ProcessImpl::new_with_main(name, parent, Arc::new(space), context, Some(image)); + Ok((process, main)) +} + +fn load_binary( + head: &[u8], + file: Arc, + space: &ProcessAddressSpace, +) -> Result { + if head.starts_with(b"\x7FELF") { + elf::load_elf_from_file(space, file) + } else { + Err(Error::UnrecognizedExecutable) + } +} + +fn xxx_load_program>( + space: &ProcessAddressSpace, + source: &mut PS, + path: P, + args: Vec, + envs: Vec, +) -> Result<(ProcessImage, Vec, Vec), Error> { + let mut head = [0; 256]; + let path = path.as_ref(); + let file = source.open_executable(path)?; + + file.seek(SeekFrom::Start(0))?; + let count = file.read(&mut head)?; + let head = &head[..count]; + + if let Some(shebang) = head.strip_prefix(b"#!") + && let Some((shebang, _)) = shebang.split_once(|&ch| ch == b'\n') + { + let shebang = core::str::from_utf8(shebang).map_err(|_| Error::InvalidFile)?; + let mut shebang_args = shebang.split(' ').map(|s| s.to_owned()).collect::>(); + if shebang_args.is_empty() || shebang_args.len() >= 8 { + return Err(Error::UnrecognizedExecutable); + } + shebang_args.extend_from_slice(&args); + + return xxx_load_program(space, source, shebang_args[0].clone(), shebang_args, envs); + } + + file.seek(SeekFrom::Start(0))?; + + let image = load_binary(head, file, space)?; + + Ok((image, args, envs)) +} + +/// Loads a program from given `path` +pub fn load( + source: &mut PS, + parent: Option>>, + path: P, + args: &[&str], + envs: &[&str], +) -> Result<(Arc>, Arc), Error> +where + PS: ProgramLoadSource, + P: AsRef, + PM: ProcessManager>, + IO: ProcessIo, +{ + let path = path.as_ref(); + let args = args.iter().map(|&s| s.to_owned()).collect(); + let envs = envs.iter().map(|&s| s.to_owned()).collect(); + + let space = ProcessAddressSpace::new()?; + let (image, args, envs) = xxx_load_program(&space, source, path, args, envs)?; + setup_binary(path.display(), parent, space, image, &args, &envs) +} + +pub fn load_into( + process: &ProcessImpl, + path: P, + args: Vec, + envs: Vec, +) -> Result<(TaskContextImpl, ProcessImage), Error> +where + P: AsRef, + PM: ProcessManager>, + IO: ProcessIo + ProgramLoadSource, +{ + let mut io = process.io.lock(); + // Have to make the Path owned, going to drop the address space from which it came + let path = path.as_ref().to_owned(); + let space = process.space(); + space.clear()?; + let (image, args, envs) = xxx_load_program(&space, &mut *io, &path, args, envs)?; + let context = setup_context(&space, &image, &args, &envs)?; + + Ok((context, image)) +} diff --git a/kernel/libk/libk-thread/src/futex.rs b/kernel/libk/libk-thread/src/futex.rs new file mode 100644 index 00000000..af82a9da --- /dev/null +++ b/kernel/libk/libk-thread/src/futex.rs @@ -0,0 +1,71 @@ +use core::{ + pin::Pin, + sync::atomic::{AtomicU32, Ordering}, + task::{Context, Poll}, +}; + +use alloc::sync::Arc; +use futures_util::Future; +use libk_util::waker::QueueWaker; + +/// User-space mutex (like BSD/Linux's futex) data structure +pub struct UserspaceMutex { + queue: QueueWaker, + address: usize, +} + +impl UserspaceMutex { + /// Creates a new [UserspaceMutex] associated with given `address` + pub fn new(address: usize) -> Self { + Self { + queue: QueueWaker::new(), + address, + } + } + + /// Blocks until the value at the mutex's address becomes different from `compare_value` + pub fn wait(self: Arc, compare_value: u32) -> impl Future { + struct WaitFuture { + mutex: Arc, + compare_value: u32, + } + + impl WaitFuture { + fn load(&self, ordering: Ordering) -> u32 { + let value = unsafe { &*(self.mutex.address as *const AtomicU32) }; + value.load(ordering) + } + } + + impl Future for WaitFuture { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.mutex.queue.register(cx.waker()); + + // Test the mutex + if self.load(Ordering::Acquire) != self.compare_value { + self.mutex.queue.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + } + } + + WaitFuture { + mutex: self, + compare_value, + } + } + + /// Wakes up a single task waiting on the mutex + pub fn wake(&self) { + self.queue.wake_one(); + } + + /// Wakes up all tasks waiting on the mutex + pub fn wake_all(&self) { + self.queue.wake_all(); + } +} diff --git a/kernel/libk/libk-thread/src/lib.rs b/kernel/libk/libk-thread/src/lib.rs new file mode 100644 index 00000000..fe3c5503 --- /dev/null +++ b/kernel/libk/libk-thread/src/lib.rs @@ -0,0 +1,58 @@ +#![no_std] +#![feature( + trait_alias, + never_type, + inline_const, + arbitrary_self_types, + slice_ptr_get, + slice_split_once, + let_chains +)] + +extern crate alloc; + +use api::__signal_process_group; +use kernel_arch::{Architecture, ArchitectureImpl, KernelTableManagerImpl}; +use libk_mm::phys::GlobalPhysicalAllocator; + +pub(crate) mod api { + use yggdrasil_abi::process::{ProcessId, Signal}; + + extern "Rust" { + pub fn __signal_process_group(group_id: ProcessId, signal: Signal); + } +} + +#[macro_use] +pub mod runtime; + +pub mod binary; +pub mod futex; +pub mod mem; +pub mod process; +pub mod sched; +pub mod sync; +pub mod thread; +pub mod types; + +pub type TaskContextImpl = + kernel_arch::TaskContextImpl; + +use sched::CpuQueue; +pub use types::{AtomicThreadState, ThreadAffinity, ThreadId, ThreadState}; + +use yggdrasil_abi::process::{ProcessId, Signal}; + +/// Returns local CPU index +#[inline] +pub fn cpu_index() -> u32 { + ArchitectureImpl::cpu_index::() +} + +pub fn cpu_count() -> usize { + ArchitectureImpl::cpu_count() +} + +pub fn signal_process_group(group_id: ProcessId, signal: Signal) { + unsafe { __signal_process_group(group_id, signal) } +} diff --git a/kernel/libk/libk-thread/src/mem.rs b/kernel/libk/libk-thread/src/mem.rs new file mode 100644 index 00000000..0631e33c --- /dev/null +++ b/kernel/libk/libk-thread/src/mem.rs @@ -0,0 +1,193 @@ +use core::{alloc::Layout, mem::size_of}; + +use libk_mm::{address::Virtualize, process::ProcessAddressSpace}; +use yggdrasil_abi::error::Error; + +// XXX +const KERNEL_VIRT_OFFSET: usize = 0xFFFFFF8000000000; + +/// Helper trait to allow cross-address space access to pointers +pub trait ForeignPointer: Sized { + /// Perform a volatile pointer write without dropping the old value. + /// + /// # Panics + /// + /// The function panics if any of the following conditions is met: + /// + /// * The address of the pointer is not mapped in the `space`. + /// * The pointer is not writable. + /// * The pointer is misaligned. + /// + /// # Safety + /// + /// As this function allows direct memory writes, it is inherently unsafe. + unsafe fn write_foreign_volatile(self: *mut Self, space: &ProcessAddressSpace, value: Self); + + /// Performs pointer validation for given address space: + /// + /// * Checks if the pointer has proper alignment for the type. + /// * Checks if the pointer is mapped in the address space. + /// * Checks if the pointer is above the userspace memory boundary. + /// + /// # Safety + /// + /// Even though this function does the necessary checks, it is still a raw pointer to reference + /// conversion, and thus is unsafe. + unsafe fn validate_user_ptr<'a>( + self: *const Self, + space: &ProcessAddressSpace, + ) -> Result<&'a Self, Error>; + + /// [ForeignPointer::validate_user_ptr], with extra "writability" check. + /// + /// # Safety + /// + /// Even though this function does the necessary checks, it is still a raw pointer to reference + /// conversion, and thus is unsafe. + unsafe fn validate_user_mut<'a>( + self: *mut Self, + space: &ProcessAddressSpace, + ) -> Result<&'a mut Self, Error>; + + /// [ForeignPointer::validate_user_ptr], but for slices + /// + /// # Safety + /// + /// Even though this function does the necessary checks, it is still a raw pointer to reference + /// conversion, and thus is unsafe. + unsafe fn validate_user_slice<'a>( + self: *const Self, + len: usize, + space: &ProcessAddressSpace, + ) -> Result<&'a [Self], Error>; + + /// [ForeignPointer::validate_user_slice], but for mutable slices + /// + /// # Safety + /// + /// Even though this function does the necessary checks, it is still a raw pointer to reference + /// conversion, and thus is unsafe. + unsafe fn validate_user_slice_mut<'a>( + self: *mut Self, + len: usize, + space: &ProcessAddressSpace, + ) -> Result<&'a mut [Self], Error>; +} + +impl ForeignPointer for T { + unsafe fn write_foreign_volatile(self: *mut Self, space: &ProcessAddressSpace, value: T) { + // TODO check align + let addr = self as usize; + let start_page = addr & !0xFFF; + let end_page = (addr + size_of::() - 1) & !0xFFF; + let page_offset = addr & 0xFFF; + + if start_page != end_page { + todo!("Foreign pointer write crossed a page boundary"); + } + + let phys_page = space + .translate(start_page) + .expect("Address is not mapped in the target address space"); + + let virt_ptr = phys_page.add(page_offset).virtualize() as *mut T; + virt_ptr.write_volatile(value); + } + + unsafe fn validate_user_slice_mut<'a>( + self: *mut Self, + len: usize, + space: &ProcessAddressSpace, + ) -> Result<&'a mut [Self], Error> { + let base = self as usize; + let layout = Layout::array::(len).unwrap(); + + validate_user_align_size(base, &layout)?; + validate_user_region(space, base, layout.size(), true)?; + + Ok(core::slice::from_raw_parts_mut(self, len)) + } + + unsafe fn validate_user_slice<'a>( + self: *const Self, + len: usize, + space: &ProcessAddressSpace, + ) -> Result<&'a [Self], Error> { + let base = self as usize; + let layout = Layout::array::(len).unwrap(); + + validate_user_align_size(base, &layout)?; + validate_user_region(space, base, layout.size(), false)?; + + Ok(core::slice::from_raw_parts(self, len)) + } + + unsafe fn validate_user_mut<'a>( + self: *mut Self, + space: &ProcessAddressSpace, + ) -> Result<&'a mut Self, Error> { + let addr = self as usize; + let layout = Layout::new::(); + + // Common validation + validate_user_align_size(addr, &layout)?; + + // Validate that the pages covered by this address are mapped as writable by the process + // TODO for CoW this may differ + validate_user_region(space, addr, layout.size(), true)?; + + Ok(&mut *self) + } + + unsafe fn validate_user_ptr<'a>( + self: *const Self, + space: &ProcessAddressSpace, + ) -> Result<&'a Self, Error> { + let addr = self as usize; + let layout = Layout::new::(); + + // Common validation + validate_user_align_size(addr, &layout)?; + validate_user_region(space, addr, layout.size(), false)?; + + Ok(&*self) + } +} + +fn validate_user_align_size(addr: usize, layout: &Layout) -> Result<(), Error> { + // Explicitly disallow NULL + if addr == 0 { + return Err(Error::InvalidArgument); + } + // Validate alignment + if addr % layout.align() != 0 { + return Err(Error::InvalidArgument); + } + if addr + layout.size() > KERNEL_VIRT_OFFSET { + todo!(); + } + + Ok(()) +} + +/// Validates access to given userspace memory region with given constraints +pub fn validate_user_region( + space: &ProcessAddressSpace, + base: usize, + len: usize, + _need_write: bool, +) -> Result<(), Error> { + if base + len > crate::mem::KERNEL_VIRT_OFFSET { + panic!("Invalid argument"); + } + + let aligned_start = base & !0xFFF; + let aligned_end = (base + len + 0xFFF) & !0xFFF; + + for page in (aligned_start..aligned_end).step_by(0x1000) { + // TODO check writability + space.translate(page)?; + } + + Ok(()) +} diff --git a/kernel/libk/libk-thread/src/process.rs b/kernel/libk/libk-thread/src/process.rs new file mode 100644 index 00000000..4b041253 --- /dev/null +++ b/kernel/libk/libk-thread/src/process.rs @@ -0,0 +1,428 @@ +use core::marker::PhantomData; + +use abi_lib::SyscallRegister; +use alloc::{ + collections::BTreeMap, + string::String, + sync::{Arc, Weak}, + vec::Vec, +}; +use futures_util::Future; +use kernel_arch::KernelTableManagerImpl; +use libk_mm::{phys::GlobalPhysicalAllocator, process::ProcessAddressSpace}; +use libk_util::{ + event::OneTimeEvent, + sync::{ + spin_rwlock::{IrqSafeRwLock, IrqSafeRwLockWriteGuard}, + IrqSafeSpinlock, + }, +}; +use yggdrasil_abi::{ + error::Error, + process::{ExitCode, ProcessId, Signal, ThreadSpawnOptions}, +}; + +use crate::{ + futex::UserspaceMutex, + thread::Thread, + types::{AllocateProcessId, ProcessTlsInfo}, + TaskContextImpl, ThreadId, +}; + +pub trait ForkFrame = kernel_arch::task::ForkFrame; + +pub trait Process: Send + Sync { + /// Returns the [ProcessId] of this process + fn id(&self) -> ProcessId; + + /// Returns the name of the process + fn name(&self) -> &str; + + fn handle_thread_exit(&self, id: ThreadId, code: ExitCode); + fn terminate_others(&self, except: ThreadId) -> impl Future + Send; +} + +pub trait ProcessIo: Sized { + type Node; + + fn new() -> Self; + fn handle_exit(&mut self); + fn fork_from(&mut self, src: &Self) -> Result<(), Error>; +} + +pub trait ProcessManager: Send + Sync { + type Process: Process; + + fn register_process(process: Arc); + fn get(id: ProcessId) -> Option>; + fn for_each)>(f: F); +} + +/// Describes information about a program's image in memory +#[derive(Clone)] +pub struct ProcessImage { + /// Entry point address + pub entry: usize, + /// Thread-local storage information + pub tls: Option, +} + +pub struct ProcessInner { + session_id: ProcessId, + group_id: ProcessId, + + session_terminal: Option>, + threads: Vec>, + mutexes: BTreeMap>, + space: Option>, + image: Option, +} + +/// Describes a process within the system +pub struct ProcessImpl, IO: ProcessIo> { + name: String, + id: ProcessId, + + #[allow(unused)] + parent: Option>>, + + inner: IrqSafeRwLock>, + + pub(crate) exit: OneTimeEvent, + + /// Process I/O information + pub io: IrqSafeSpinlock, + + _pm: PhantomData, +} + +impl, IO: ProcessIo> Process for ProcessImpl { + fn id(&self) -> ProcessId { + self.id + } + + fn name(&self) -> &str { + self.name.as_ref() + } + + fn handle_thread_exit(&self, thread: ThreadId, code: ExitCode) { + log::debug!("Thread {} of process {}: {:?}", thread, self.id, code); + let mut inner = self.inner.write(); + + assert!(inner.remove_thread(thread)); + + let last_thread = inner.threads.is_empty(); + + if last_thread { + log::debug!("Last thread of {} exited", self.id); + self.cleanup(inner); + self.exit.signal(code); + } + } + + async fn terminate_others(&self, except: ThreadId) { + let mut inner = self.inner.write(); + + for thread in inner.threads.iter() { + if thread.id == except { + continue; + } + + log::info!("Terminate thread {}", thread.id); + thread.terminate().await; + } + + inner.retain_thread(except); + } +} + +impl, IO: ProcessIo> ProcessImpl { + /// Creates a new process with given main thread + pub fn new_with_main>( + name: S, + parent: Option>, + space: Arc, + context: TaskContextImpl, + image: Option, + ) -> (Arc, Arc) { + let name = name.into(); + let id = ProcessId::new(); + + let process = Arc::new(Self { + name, + id, + parent, + + inner: IrqSafeRwLock::new(ProcessInner::new(id, Some(space.clone()), image)), + + exit: OneTimeEvent::new(), + io: IrqSafeSpinlock::new(IO::new()), + + _pm: PhantomData, + }); + + // Create "main" thread + let thread = Thread::new_uthread(process.id, space, context); + process.inner.write().threads.push(thread.clone()); + + PM::register_process(process.clone()); + + (process, thread) + } + + /// Spawns a new child thread within the process + pub fn spawn_thread(self: &Arc, options: &ThreadSpawnOptions) -> Result { + log::debug!( + "Spawn thread in {} with options: {:#x?}", + self.id(), + options + ); + // let mut inner = self.inner.write(); + + // let space = inner.space.clone().unwrap(); + + todo!() + // XXX + // let tls_address = if let Some(image) = inner.image.as_ref() { + // proc::elf::clone_tls(&space, image)? + // } else { + // 0 + // }; + + // let context = TaskContextImpl::user( + // options.entry as _, + // options.argument as _, + // space.as_address_with_asid(), + // options.stack_top, + // tls_address, + // )?; + // let thread = Thread::new_uthread(self.clone(), space.clone(), context); + // let id = thread.id; + + // inner.add_thread(thread.clone()); + + // thread.enqueue(); + + // Ok(id) + } + + unsafe fn fork_inner>( + self: &Arc, + frame: &F, + ) -> Result { + let src_inner = self.inner.read(); + let new_space = src_inner.space.as_ref().unwrap().fork()?; + let new_context = frame.fork(new_space.as_address_with_asid())?; + + let (new_process, new_main) = Self::new_with_main( + self.name(), + Some(Arc::downgrade(self)), + Arc::new(new_space), + new_context, + src_inner.image.clone(), + ); + + { + let mut dst_io = new_process.io.lock(); + let src_io = self.io.lock(); + + dst_io.fork_from(&src_io)?; + } + + new_process.inherit(self)?; + + log::info!("Process::fork -> {:?}", new_process.id()); + new_main.enqueue(); + + Ok(new_process.id()) + } + + /// Performs a "fork" operation on the process, creating an identical copy of it, cloning + /// the register state from provided [ForkFrame]. + /// + /// # Safety + /// + /// Unsafe: frame must be a valid frame to be forked, the function is not yet stable and does + /// not yet properly fork all the necessary context details. + pub unsafe fn raw_fork>(frame: &mut F) { + let src_thread = Thread::current(); + let src_process = src_thread.process::(); + + let value = src_process.fork_inner(frame).into_syscall_register(); + + frame.set_return_value(value as _); + } + + /// Replaces the process address space with a new one, loaded from the specified program + pub fn exec(&self, _program: &str, _argv: Vec, _envp: Vec) -> Result { + // XXX + todo!() + // if self.inner.read().thread_count() != 1 { + // todo!(); + // } + + // let (context, image) = proc::exec::load_into(self, program, argv, envp)?; + // let mut inner = self.inner.write(); + // let main = &inner.threads()[0]; + + // let old_context = unsafe { main.replace_context(context) }; + // let new_context = main.context.as_ptr(); + // inner.set_image(Some(image)); + + // drop(inner); + + // // TODO old context is leaked + // unsafe { (*new_context).switch(&old_context) } + // unreachable!() + } + + /// Returns the address space of the process + pub fn space(&self) -> Arc { + self.inner.read().space.clone().unwrap() + } + + /// Returns the process group ID of the process + pub fn group_id(&self) -> ProcessId { + self.inner.read().group_id + } + + /// Returns the process session ID of the process + pub fn session_id(&self) -> ProcessId { + self.inner.read().session_id + } + + /// Changes the process's group ID + pub fn set_group_id(&self, id: ProcessId) { + self.inner.write().group_id = id; + } + + /// Changes the process's session ID + pub fn set_session_id(&self, id: ProcessId) { + self.inner.write().session_id = id; + } + + // Resources + + /// Returns the current session terminal of the process, if set + pub fn session_terminal(&self) -> Option> { + self.inner.read().session_terminal.clone() + } + + /// Changes the current session terminal of the process + pub fn set_session_terminal(&self, node: Arc) { + self.inner.write().session_terminal = Some(node); + } + + /// Resets the current session terminal of the process + pub fn clear_session_terminal(&self) -> Option> { + self.inner.write().session_terminal.take() + } + + /// Inherits the process information from the `parent` + pub fn inherit(&self, parent: &Self) -> Result<(), Error> { + let mut our_inner = self.inner.write(); + let their_inner = parent.inner.read(); + + our_inner.session_id = their_inner.session_id; + our_inner.group_id = their_inner.group_id; + our_inner.session_terminal = their_inner.session_terminal.clone(); + + Ok(()) + } + + // State + + /// Returns the [ExitCode] of the process, if it has exited + pub fn get_exit_status(&self) -> Option { + self.exit.try_read_copy() + } + + /// Returns `true` if the process has exited + pub fn has_exited(&self) -> bool { + self.exit.is_signalled() + } + + pub async fn wait_for_exit(&self) -> ExitCode { + self.exit.wait_copy().await + } + + /// Cleans up process resources + fn cleanup(&self, mut inner: IrqSafeRwLockWriteGuard>) { + self.io.lock().handle_exit(); + inner.space = None; + } + + /// Raises a signal for the specified process + pub fn raise_signal(self: &Arc, signal: Signal) { + let thread = self.inner.read().threads[0].clone(); + thread.raise_signal(signal); + } + + /// Raises a signal for the specified process group + pub fn signal_group(group_id: ProcessId, signal: Signal) { + PM::for_each(|_, proc| { + let inner = proc.inner.read(); + if !proc.has_exited() && inner.group_id == group_id { + log::debug!( + "Deliver group ({}) signal to {}: {:?}", + group_id, + proc.id(), + signal + ); + drop(inner); + proc.raise_signal(signal); + } + }); + } + + /// Returns a [UserspaceMutex] associated with the `address`. If one does not exist, will + /// create it. + pub fn get_or_insert_mutex(&self, address: usize) -> Arc { + let mut inner = self.inner.write(); + inner + .mutexes + .entry(address) + .or_insert_with(|| Arc::new(UserspaceMutex::new(address))) + .clone() + } + + // Process list + + /// Returns the process with given [ProcessId], if it exists + pub fn get(id: ProcessId) -> Option> { + PM::get(id) + } +} + +impl ProcessInner { + pub fn new( + id: ProcessId, + space: Option>, + image: Option, + ) -> Self { + Self { + session_id: id, + group_id: id, + session_terminal: None, + threads: Vec::new(), + + mutexes: BTreeMap::new(), + image, + space: space.clone(), + } + } + + pub fn add_thread(&mut self, thread: Arc) { + self.threads.push(thread); + } + + pub fn remove_thread(&mut self, id: ThreadId) -> bool { + let n = self.threads.len(); + self.threads.retain(|t| t.id != id); + n != self.threads.len() + } + + pub fn retain_thread(&mut self, except: ThreadId) { + self.threads.retain(|t| t.id == except) + } +} diff --git a/kernel/libk/libk-thread/src/runtime/executor.rs b/kernel/libk/libk-thread/src/runtime/executor.rs new file mode 100644 index 00000000..debf16b6 --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/executor.rs @@ -0,0 +1,78 @@ +use core::task::{Context, Poll, Waker}; + +use alloc::{boxed::Box, format, sync::Arc}; +use futures_util::{task::waker_ref, Future}; +use kernel_arch::task::TaskContext; +use libk_util::waker::WakeWeak; +use yggdrasil_abi::error::Error; + +use crate::{thread::Thread, TaskContextImpl}; + +use super::{ + task::{Task, Termination}, + task_queue, +}; + +/// Pushes a task into the executor's queue +pub fn enqueue(task: Arc) -> Result<(), Error> { + task_queue::push_task(task) +} + +/// Spawns a background worker to execute the tasks from the global queue +pub fn spawn_async_worker(index: usize) -> Result<(), Error> { + let name = format!("[async-worker-{}]", index); + + let thread = Thread::new_kthread( + name, + TaskContextImpl::kernel_closure(move || loop { + let task = task_queue::pop_task().unwrap(); + let mut future_slot = task.future.lock(); + + if let Some(mut future) = future_slot.take() { + let waker = waker_ref(&task); + let context = &mut Context::from_waker(&waker); + + if future.as_mut().poll(context).is_pending() { + *future_slot = Some(future); + } + } + })?, + ); + + thread.enqueue(); + + Ok(()) +} + +/// Creates a new task for the [Future] and queues it for execution in background +pub fn spawn + Send + 'static>( + future: F, +) -> Result<(), Error> { + enqueue(Task::new(future)) +} + +/// Runs a [Future] to its completion on the current thread +pub fn run_to_completion<'a, T, F: Future + Send + 'a>(future: F) -> Result { + let mut future = Box::pin(future); + + // Make a weak ref for the waker + let weak = Thread::current().downgrade(); + + let waker = WakeWeak::weak_waker(weak.clone()); + let waker = unsafe { Waker::from_raw(waker) }; + + loop { + let context = &mut Context::from_waker(&waker); + + match future.as_mut().poll(context) { + Poll::Ready(value) => break Ok(value), + Poll::Pending => { + if let Some(thread) = unsafe { Thread::upgrade(&weak) } { + if let Err(error) = thread.suspend() { + break Err(error); + } + } + } + } + } +} diff --git a/kernel/libk/libk-thread/src/runtime/macros.rs b/kernel/libk/libk-thread/src/runtime/macros.rs new file mode 100644 index 00000000..00c7da9b --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/macros.rs @@ -0,0 +1,8 @@ +#[macro_export] +macro_rules! block { + ($($stmt:tt)*) => { + $crate::runtime::run_to_completion(alloc::boxed::Box::pin(async move { + $($stmt)* + })) + }; +} diff --git a/kernel/libk/libk-thread/src/runtime/mod.rs b/kernel/libk/libk-thread/src/runtime/mod.rs new file mode 100644 index 00000000..25248abf --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/mod.rs @@ -0,0 +1,11 @@ +#[macro_use] +mod macros; + +mod executor; +mod task; +mod task_queue; +mod timer; + +pub use executor::{run_to_completion, spawn, spawn_async_worker}; +pub use task_queue::init_task_queue; +pub use timer::{run_with_timeout, sleep, tick, FutureTimeout, SleepFuture}; diff --git a/kernel/libk/libk-thread/src/runtime/task.rs b/kernel/libk/libk-thread/src/runtime/task.rs new file mode 100644 index 00000000..61125442 --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/task.rs @@ -0,0 +1,45 @@ +use core::fmt; + +use alloc::sync::Arc; +use futures_util::{future::BoxFuture, task::ArcWake, Future, FutureExt}; +use libk_util::sync::IrqSafeSpinlock; + +use super::executor; + +pub trait Termination { + fn print(&self); +} + +pub struct Task { + pub(super) future: IrqSafeSpinlock>>, +} + +impl ArcWake for Task { + fn wake_by_ref(arc_self: &Arc) { + executor::enqueue(arc_self.clone()).unwrap(); + } +} + +impl Task { + pub fn new + Send + 'static>(future: F) -> Arc { + let future = IrqSafeSpinlock::new(Some( + async move { + future.await.print(); + } + .boxed(), + )); + Arc::new(Self { future }) + } +} + +impl Termination for () { + fn print(&self) {} +} + +impl Termination for Result { + fn print(&self) { + if let Err(error) = self { + log::error!("A task finished with an error: {:?}", error); + } + } +} diff --git a/kernel/libk/libk-thread/src/runtime/task_queue.rs b/kernel/libk/libk-thread/src/runtime/task_queue.rs new file mode 100644 index 00000000..fa5cf6fe --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/task_queue.rs @@ -0,0 +1,71 @@ +use alloc::sync::Arc; +use crossbeam_queue::ArrayQueue; +use libk_util::{sync::IrqGuard, OneTimeInit}; +use yggdrasil_abi::error::Error; + +use crate::thread::Thread; + +use super::task::Task; + +pub(super) static TASK_QUEUE: OneTimeInit = OneTimeInit::new(); + +pub(super) struct TaskQueue { + // Queue of workers waiting for an item + pending_workers: ArrayQueue>, + task_queue: ArrayQueue>, +} + +impl TaskQueue { + pub fn new(task_capacity: usize) -> Self { + assert!(task_capacity > 0); + Self { + pending_workers: ArrayQueue::new(16), + task_queue: ArrayQueue::new(task_capacity), + } + } + + fn wakeup_one(&self) { + if let Some(worker) = self.pending_workers.pop() { + worker.enqueue(); + } + } + + pub fn enqueue(&self, task: Arc) -> Result<(), Error> { + let _irq = IrqGuard::acquire(); + if self.task_queue.push(task).is_err() { + todo!(); + } + self.wakeup_one(); + Ok(()) + } + + pub fn dequeue(&self) -> Result, Error> { + let thread = Thread::current(); + // assert!(PlatformImpl::interrupt_mask()); + loop { + if let Some(task) = self.task_queue.pop() { + return Ok(task); + } + + if self.pending_workers.push(thread.clone()).is_err() { + panic!("Pending worker queue overflow"); + } + + // This must not fail. Signals must not be raised. + thread.suspend().unwrap(); + } + } +} + +/// Initializes the global async/await task queue +pub fn init_task_queue() { + TASK_QUEUE.init(TaskQueue::new(128)); +} + +pub(super) fn push_task(task: Arc) -> Result<(), Error> { + TASK_QUEUE.get().enqueue(task) +} + +pub(super) fn pop_task() -> Result, Error> { + TASK_QUEUE.get().dequeue() +} diff --git a/kernel/libk/libk-thread/src/runtime/timer.rs b/kernel/libk/libk-thread/src/runtime/timer.rs new file mode 100644 index 00000000..0150ee6e --- /dev/null +++ b/kernel/libk/libk-thread/src/runtime/timer.rs @@ -0,0 +1,141 @@ +use core::{ + pin::Pin, + task::{Context, Poll, Waker}, + time::Duration, +}; + +use alloc::vec::Vec; +use futures_util::{future::BoxFuture, Future, FutureExt}; +use libk_util::sync::IrqSafeSpinlock; + +// 1..32ms, tick every 1ms +static SHORT_TERM_SLEEPS: IrqSafeSpinlock> = + IrqSafeSpinlock::new(TimerWheel::new(0, 1)); +// 32ms..288ms, tick every 8ms +static LONG_TERM_SLEEPS: IrqSafeSpinlock> = + IrqSafeSpinlock::new(TimerWheel::new(4, 8)); + +// A simple timer wheel inspired by +// https://www.snellman.net/blog/archive/2016-07-27-ratas-hierarchical-timer-wheel/ +// The wheel consists of a ring with slots. When some task wants to sleep for N ticks, +// it adds itself to a list N steps after current tick (modulo STEPS). The whell also contains +// a sort of interval to "scale" ticks for longer-term sleeps. +struct TimerWheel { + // TODO maybe use something better than Vec + ring: [Vec; STEPS], + tick: u64, + real_tick: u64, + base: u64, + interval: u64, +} + +impl TimerWheel { + pub const fn new(base: u64, interval: u64) -> Self { + Self { + ring: [const { Vec::new() }; STEPS], + tick: 0, + real_tick: 0, + base, + interval, + } + } + + pub fn tick(&mut self) { + self.real_tick += 1; + if self.real_tick == self.interval { + self.tick = self.tick.wrapping_add(1); + for waker in self.ring[(self.tick % (STEPS as u64)) as usize].drain(..) { + waker.wake(); + } + self.real_tick = 0; + } + } + + pub fn wake_after(&mut self, ticks: u64, waker: &Waker) { + debug_assert!((ticks / self.interval) - self.base <= STEPS as u64,); + let ticks = ticks.max(1); + self.ring[((self.tick + ticks / self.interval - self.base) % (STEPS as u64)) as usize] + .push(waker.clone()); + } +} + +fn register_timeout(duration: Duration, waker: &Waker) { + let nticks = duration.as_millis().min(288) as u64; + + if nticks < 32 { + SHORT_TERM_SLEEPS.lock().wake_after(nticks, waker); + } else { + LONG_TERM_SLEEPS.lock().wake_after(nticks, waker); + } +} + +pub enum FutureTimeout { + Ok(T), + Timeout, +} + +pub struct SleepFuture { + deadline: Duration, +} + +impl Future for SleepFuture { + type Output = (); + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let now = libk_device::monotonic_timestamp().unwrap(); + match self.deadline.checked_sub(now) { + // Pending + Some(duration) if !duration.is_zero() => { + register_timeout(duration, cx.waker()); + Poll::Pending + } + // Ran out + _ => Poll::Ready(()), + } + } +} + +/// Suspends the task until given duration passes +pub fn sleep(duration: Duration) -> SleepFuture { + let now = libk_device::monotonic_timestamp().unwrap(); + let deadline = now + duration; + + SleepFuture { deadline } +} + +pub fn run_with_timeout<'a, T: 'a, F: Future + Send + 'a>( + duration: Duration, + fut: F, +) -> impl Future> + 'a { + struct TimeoutFuture<'f, T> { + fut: BoxFuture<'f, T>, + sleep_fut: BoxFuture<'f, ()>, + } + + impl<'f, T> Future for TimeoutFuture<'f, T> { + type Output = FutureTimeout; + + fn poll(mut self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + let (timeout, result) = (self.sleep_fut.as_mut().poll(cx), self.fut.as_mut().poll(cx)); + + if let Poll::Ready(result) = result { + Poll::Ready(FutureTimeout::Ok(result)) + } else if timeout.is_ready() { + Poll::Ready(FutureTimeout::Timeout) + } else { + Poll::Pending + } + } + } + + TimeoutFuture { + fut: fut.boxed(), + sleep_fut: sleep(duration).boxed(), + } +} + +/// Updates the runtime's time +pub fn tick(_now: Duration) { + SHORT_TERM_SLEEPS.lock().tick(); + LONG_TERM_SLEEPS.lock().tick(); +} diff --git a/kernel/libk/libk-thread/src/sched.rs b/kernel/libk/libk-thread/src/sched.rs new file mode 100644 index 00000000..09a5d7c2 --- /dev/null +++ b/kernel/libk/libk-thread/src/sched.rs @@ -0,0 +1,202 @@ +use core::cell::Cell; + +use alloc::{sync::Arc, vec::Vec}; +use crossbeam_queue::SegQueue; +use kernel_arch::{ + task::{Scheduler, TaskContext}, + Architecture, ArchitectureImpl, CpuImpl, +}; +use libk_util::{sync::IrqGuard, OneTimeInit}; + +use crate::{thread::Thread, TaskContextImpl, ThreadId, ThreadState}; + +/// Per-CPU queue +pub struct CpuQueue { + queue: SegQueue, + index: usize, + idle: Cell, +} + +static QUEUES: OneTimeInit> = OneTimeInit::new(); + +impl CpuQueue { + /// Creates a new [CpuQueue] for CPU with given `index` + pub fn new(index: usize) -> Self { + let idle = TaskContextImpl::kernel( + ArchitectureImpl::idle_task(), + CpuImpl::::local().id() as usize, + ) + .expect("Could not construct an idle task"); + + Self { + queue: SegQueue::new(), + index, + idle: Cell::new(idle), + } + } + + /// Returns the queue's associated CPU index + #[inline] + pub fn index(&self) -> usize { + self.index + } + + /// "Enters" the scheduler by selecting a first task to execute. + /// + /// # Safety + /// + /// Only meant to be called once per each CPU when everything's ready. + pub unsafe fn enter(&self) -> ! { + let _guard = IrqGuard::acquire(); + + (*self.idle.as_ptr()).enter() + } + + fn pop(&self) -> (Option>, Option) { + while let Some(id) = self.queue.pop() { + let Some(thread) = Thread::get(id) else { + continue; + }; + + let mut sched = thread.sched.lock(); + + assert!(sched.in_queue); + assert!(core::ptr::eq(self, sched.queue.unwrap())); + + match sched.state { + ThreadState::Ready => { + sched.state = ThreadState::Running; + drop(sched); + return (Some(thread), Some(id)); + } + ThreadState::Running => { + panic!("Unexpected state: Running ({:?})", id); + } + ThreadState::Terminated => { + sched.in_queue = false; + sched.queue = None; + thread.set_terminated(); + } + ThreadState::Suspended => { + sched.queue = None; + sched.in_queue = false; + } + } + } + + (None, None) + } +} + +impl Scheduler for CpuQueue { + type ThreadId = ThreadId; + + fn for_cpu(index: usize) -> &'static Self { + &QUEUES.get()[index] + } + + fn for_affinity_mask(mask: u64) -> &'static Self { + debug_assert_ne!(mask, 0); + + QUEUES + .get() + .iter() + .filter(|c| mask & (1 << c.index) != 0) + .min_by_key(|c| c.queue.len()) + .unwrap() + } + + fn local() -> &'static Self { + CpuImpl::local().scheduler() + } + + fn is_local(&self) -> bool { + assert!(ArchitectureImpl::interrupt_mask()); + core::ptr::eq(Self::local(), self) + } + + fn push(&self, task: Self::ThreadId) { + self.queue.push(task); + } + + unsafe fn yield_cpu(&self) -> bool { + assert!(ArchitectureImpl::interrupt_mask()); + + let mut cpu = CpuImpl::::local(); + + let current_id = cpu.current_thread_id(); + let current = current_id.and_then(Thread::get); + + let drop_current = if let Some(current) = current.as_ref() { + let mut sched = current.sched.lock(); + + let q = sched.queue.unwrap(); + assert!(core::ptr::eq(q, self)); + assert!(sched.in_queue); + + match sched.state { + ThreadState::Ready => { + self.queue.push(current.id); + false + } + ThreadState::Running => { + sched.state = ThreadState::Ready; + + self.queue.push(current.id); + false + } + ThreadState::Terminated => { + sched.in_queue = false; + sched.queue = None; + + current.set_terminated(); + true + } + ThreadState::Suspended => { + sched.queue = None; + sched.in_queue = false; + false + } + } + } else { + false + }; + + let (next, next_id) = self.pop(); + + let current_ctx = if let Some(current) = current.as_ref() { + ¤t.context + } else { + &self.idle + }; + + let next_ctx = if let Some(next) = next.as_ref() { + &next.context + } else { + &self.idle + }; + + cpu.set_current_thread_id(next_id); + + let current_ctx = current_ctx.as_ptr(); + let next_ctx = next_ctx.as_ptr(); + + if !core::ptr::eq(current_ctx, next_ctx) { + // Perform the switch + if drop_current { + (*next_ctx).switch_and_drop(Arc::into_raw(current.unwrap()) as _); + } else { + (*next_ctx).switch(&*current_ctx); + } + + true + } else { + false + } + } +} + +/// Initializes the global queue list +pub fn init_queues(queues: Vec) { + QUEUES.init(queues); +} diff --git a/kernel/libk/libk-thread/src/sync.rs b/kernel/libk/libk-thread/src/sync.rs new file mode 100644 index 00000000..175be23a --- /dev/null +++ b/kernel/libk/libk-thread/src/sync.rs @@ -0,0 +1,213 @@ +use core::{ + cell::UnsafeCell, + future::poll_fn, + ops::{Deref, DerefMut}, + sync::atomic::{AtomicBool, AtomicU32, Ordering}, + task::{Context, Poll}, +}; + +use alloc::sync::Arc; +use crossbeam_queue::ArrayQueue; +use kernel_arch::task::Scheduler; +use libk_util::{sync::LockMethod, waker::QueueWaker}; +use yggdrasil_abi::error::Error; + +use crate::{sched::CpuQueue, thread::Thread}; + +struct ThreadedMutexInner { + queue: ArrayQueue>, + lock: AtomicU32, +} + +pub struct AsyncMutex { + value: UnsafeCell, + waker: QueueWaker, + lock: AtomicBool, +} + +pub struct AsyncMutexGuard<'a, T> { + mutex: &'a AsyncMutex, +} + +pub struct Mutex { + value: UnsafeCell, + lock: ThreadedMutexInner, +} + +pub struct MutexGuard<'a, T> { + value: &'a UnsafeCell, + lock: &'a ThreadedMutexInner, +} + +impl AsyncMutex { + pub fn new(value: T) -> Self { + Self { + value: UnsafeCell::new(value), + waker: QueueWaker::new(), + lock: AtomicBool::new(false), + } + } + + pub fn try_lock(&self) -> bool { + self.lock + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } + + pub fn poll_lock(&self, cx: &mut Context<'_>) -> Poll> { + self.waker.register(cx.waker()); + + if self.try_lock() { + self.waker.remove(cx.waker()); + return Poll::Ready(AsyncMutexGuard { mutex: self }); + } + + Poll::Pending + } + + pub async fn lock(&self) -> AsyncMutexGuard { + poll_fn(|cx| self.poll_lock(cx)).await + } + + pub unsafe fn force_unlock(&self) { + self.lock.store(false, Ordering::Release); + self.waker.wake_one(); + } + + #[inline] + pub fn get(&self) -> *mut T { + self.value.get() + } +} + +unsafe impl Sync for AsyncMutex {} + +unsafe impl<'a, T> Send for AsyncMutexGuard<'a, T> {} +unsafe impl<'a, T> Sync for AsyncMutexGuard<'a, T> {} + +impl<'a, T> Deref for AsyncMutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.mutex.get() } + } +} + +impl<'a, T> DerefMut for AsyncMutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.mutex.get() } + } +} + +impl<'a, T> Drop for AsyncMutexGuard<'a, T> { + fn drop(&mut self) { + unsafe { + self.mutex.force_unlock(); + } + } +} + +impl ThreadedMutexInner { + const UNLOCKED: u32 = 0; + const LOCKED: u32 = 1; + + pub fn new(queue_capacity: usize) -> Self { + Self { + queue: ArrayQueue::new(queue_capacity), + lock: AtomicU32::new(Self::UNLOCKED), + } + } + + fn try_lock(&self) -> bool { + self.lock + .compare_exchange( + Self::UNLOCKED, + Self::LOCKED, + Ordering::Acquire, + Ordering::Relaxed, + ) + .is_ok() + } +} + +impl<'q> LockMethod<'q> for ThreadedMutexInner { + type Guard<'a> = () where 'a: 'q, Self: 'a; + + fn lock(&self) -> Result<(), Error> { + // Fast-path + + loop { + if self.try_lock() { + return Ok(()); + } + + let thread = Thread::current(); + + self.queue + .push(thread.clone()) + .map_err(|_| Error::WouldBlock)?; + + thread.suspend()?; + } + } + + unsafe fn release(&self) { + if self.lock.swap(Self::UNLOCKED, Ordering::Release) == Self::LOCKED { + if let Some(t) = self.queue.pop() { + t.enqueue(); + + // Yield current thread to avoid congestion + unsafe { + CpuQueue::local().yield_cpu(); + } + } + } + } +} + +impl Mutex { + pub fn new(value: T) -> Self { + Self { + value: UnsafeCell::new(value), + lock: ThreadedMutexInner::new(32), + } + } +} + +impl<'q, T> LockMethod<'q> for Mutex { + type Guard<'a> = MutexGuard<'a, T> where 'a: 'q, Self: 'a; + + fn lock(&'q self) -> Result, Error> { + self.lock.lock()?; + Ok(MutexGuard { + lock: &self.lock, + value: &self.value, + }) + } + + unsafe fn release(&self) { + self.lock.release(); + } +} + +unsafe impl Sync for Mutex {} + +impl<'a, T> Deref for MutexGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.value.get() } + } +} + +impl<'a, T> DerefMut for MutexGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.value.get() } + } +} + +impl<'a, T> Drop for MutexGuard<'a, T> { + fn drop(&mut self) { + unsafe { self.lock.release() } + } +} diff --git a/kernel/libk/libk-thread/src/thread.rs b/kernel/libk/libk-thread/src/thread.rs new file mode 100644 index 00000000..8d259b87 --- /dev/null +++ b/kernel/libk/libk-thread/src/thread.rs @@ -0,0 +1,461 @@ +use core::{cell::Cell, mem::size_of, ops::Deref}; + +use alloc::{ + collections::BTreeMap, + string::String, + sync::{Arc, Weak}, +}; +use crossbeam_queue::SegQueue; +use futures_util::task::ArcWake; +use kernel_arch::{ + task::{Scheduler, TaskContext, TaskFrame}, + CpuImpl, +}; +use libk_mm::process::ProcessAddressSpace; +use libk_util::{ + event::BoolEvent, + sync::{spin_rwlock::IrqSafeRwLock, IrqGuard, IrqSafeSpinlock}, +}; +use yggdrasil_abi::{ + error::Error, + process::{ExitCode, ProcessId, Signal, SignalEntryData}, +}; + +use crate::{ + mem::ForeignPointer, + process::{Process, ProcessManager}, + sched::CpuQueue, + types::{ThreadAffinity, ThreadId, ThreadState}, + TaskContextImpl, +}; + +/// Provides details about how the thread is scheduled onto CPUs +pub struct ThreadSchedulingInfo { + /// Current state + pub state: ThreadState, + + /// Is the thread present in any queue + pub in_queue: bool, + /// Queue into which the thread was last put + pub queue: Option<&'static CpuQueue>, +} + +struct SignalEntry { + entry: usize, + stack: usize, +} + +struct ThreadInner { + signal_entry: Option, +} + +/// Describes a single thread within the system +pub struct Thread { + /// Unique thread ID + pub id: ThreadId, + /// Thread name + pub name: Option, + /// Scheduling information + pub sched: IrqSafeSpinlock, + /// Low-level context details + pub context: Cell, + process: Option, + space: Option>, + + inner: IrqSafeSpinlock, + signal_queue: SegQueue, + + pub exit: BoolEvent, + /// CPU scheduling affinity mask + pub affinity: ThreadAffinity, +} + +/// Wrapper which guarantees the thread referred to is the current one on the current CPU +#[repr(C)] +pub struct CurrentThread(Arc, IrqGuard); + +struct GlobalThreadList { + data: BTreeMap>, +} + +static THREADS: IrqSafeRwLock = IrqSafeRwLock::new(GlobalThreadList::new()); + +// TODO this is ugly? +#[no_mangle] +unsafe extern "C" fn __arch_drop_thread(thread_ptr: *const Thread) { + let thread = Arc::from_raw(thread_ptr); + Thread::remove_from_list(thread.id); + Arc::decrement_strong_count(thread_ptr); +} + +impl Thread { + fn new( + id: ThreadId, + name: Option, + process: Option, + space: Option>, + context: TaskContextImpl, + ) -> Arc { + let thread = Arc::new(Self { + id, + name, + sched: IrqSafeSpinlock::new(ThreadSchedulingInfo { + state: ThreadState::Suspended, + in_queue: false, + queue: None, + }), + context: Cell::new(context), + process, + space, + + inner: IrqSafeSpinlock::new(ThreadInner { signal_entry: None }), + signal_queue: SegQueue::new(), + exit: BoolEvent::new(), + + affinity: ThreadAffinity::any_cpu(), + }); + + THREADS.write().insert(thread.clone()); + + thread + } + + /// Constructs a new kernel-space thread + pub fn new_kthread>(name: N, context: TaskContextImpl) -> Arc { + Self::new( + ThreadId::next_kernel(), + Some(name.into()), + None, + None, + context, + ) + } + + /// Constructs a new user-space thread + pub fn new_uthread( + parent: ProcessId, + space: Arc, + context: TaskContextImpl, + ) -> Arc { + Self::new( + ThreadId::next_user(), + None, + Some(parent), + Some(space), + context, + ) + } + + /// Replaces the current context of the thread with a new one. + /// + /// TODO: how do I drop the old one within exec(), lol + /// + /// # Safety + /// + /// Unsafe: directly sets the thread's context. + pub unsafe fn replace_context(&self, context: TaskContextImpl) -> TaskContextImpl { + self.context.replace(context) + } + + // Get/Set + + /// Updates the thread affinity to run on a specific set of CPUs + pub fn set_affinity(&self, affinity: u64) { + self.affinity.set(affinity); + } + + /// Updates the thread signal entry/stack information + pub fn set_signal_entry(&self, entry: usize, stack: usize) { + let mut inner = self.inner.lock(); + inner.signal_entry.replace(SignalEntry { entry, stack }); + } + + /// Returns the thread address space (usually provided by its parent process). If none exists, + /// panics. + pub fn address_space(&self) -> &ProcessAddressSpace { + self.space.as_ref().unwrap() + } + + /// Returns the thread's parent process ID, panics if there's none + pub fn process_id(&self) -> ProcessId { + self.process.unwrap() + } + + /// Returns the thread's parent process reference + pub fn try_get_process(&self) -> Option> { + self.process.and_then(PM::get) + } + + /// Returns the thread's parent process reference, panics if there's none + pub fn process(&self) -> Arc { + PM::get(self.process.unwrap()).unwrap() + } + + /// Removes the thread from the thread list + pub fn remove_from_list(id: ThreadId) { + THREADS + .write() + .remove(id) + .expect("Thread was not in the thread list?"); + } + + /// Updates the thread's terminated status and wakes up any other threads waiting for it to + /// exit + pub fn set_terminated(&self) { + self.exit.signal_saturating(); + } + + // Signals + + /// Pushes a signal to the thread's signal queue + pub fn raise_signal(&self, signal: Signal) { + self.signal_queue.push(signal); + self.enqueue(); + } + + // Scheduling + + /// Changes thread state to "Ready" and inserts it into given `queue`, if it's not yet in one + pub fn enqueue_to(&self, queue: &'static CpuQueue) { + let mut sched = self.sched.lock(); + + if sched.state == ThreadState::Terminated { + return; + } + + match sched.state { + ThreadState::Running | ThreadState::Ready => { + assert!(sched.in_queue); + } + ThreadState::Suspended => { + sched.state = ThreadState::Ready; + + if !sched.in_queue { + assert!(sched.queue.is_none()); + + sched.in_queue = true; + sched.queue = Some(queue); + + queue.push(self.id); + } + } + ThreadState::Terminated => panic!("Cannot enqueue a terminated thread"), + } + } + + /// Changes thread state to `state` and removes it from its queue. If the thread is currently + /// running on local CPU, will yield control to the next thread + pub fn dequeue(&self, state: ThreadState) { + let mut sched = self.sched.lock(); + + debug_assert_ne!(state, ThreadState::Running); + debug_assert_ne!(state, ThreadState::Ready); + + let old_state = sched.state; + sched.state = state; + + if let Some(queue) = sched.queue { + match (queue.is_local(), old_state) { + (true, ThreadState::Running) => unsafe { + debug_assert!(sched.in_queue); + drop(sched); + queue.yield_cpu(); + }, + (false, ThreadState::Running) => { + // XXX + // debugln!("deq remote {:?}", self.id); + debug_assert!(sched.in_queue); + } + (_, ThreadState::Ready) => { + debug_assert!(sched.in_queue); + } + (_, ThreadState::Suspended | ThreadState::Terminated) => { + todo!() + } + } + } else { + assert!(!sched.in_queue); + assert_ne!(old_state, ThreadState::Running); + assert_ne!(old_state, ThreadState::Ready); + } + } + + /// Inserts the thread as "Ready" into the best queue (based on affinity and queue load) + pub fn enqueue(&self) { + let queue = CpuQueue::for_affinity_mask(self.affinity.get()); + self.enqueue_to(queue); + } + + /// Requests thread termination and blocks until said thread finishes fully + pub async fn terminate(self: &Arc) { + // Will not abort the execution: called from another thread + self.dequeue(ThreadState::Terminated); + self.exit.wait().await; + } + + /// Returns the current thread on the CPU. + /// + /// # Panics + /// + /// Will panic if no current thread is present. For try-style getter, see + /// [Thread::get_current]. + pub fn current() -> CurrentThread { + Self::get_current().unwrap() + } + + /// Returns the current thread on the CPU, if any is present + pub fn get_current() -> Option { + // IrqGuard is held throughout + let cpu = CpuImpl::::local(); + let thread = cpu.current_thread_id().and_then(Self::get); + + thread.map(|t| CurrentThread(t, cpu.into_guard())) + } + + /// Returns a thread for given `id`, if such exists + pub fn get(id: ThreadId) -> Option> { + THREADS.read().get(id).cloned() + } + + pub unsafe fn upgrade(weak: &Weak) -> Option { + let guard = IrqGuard::acquire(); + let strong = weak.upgrade()?; + Some(CurrentThread(strong, guard)) + } +} + +impl GlobalThreadList { + pub const fn new() -> Self { + Self { + data: BTreeMap::new(), + } + } + + #[inline] + pub fn get(&self, id: ThreadId) -> Option<&Arc> { + self.data.get(&id) + } + + pub fn insert(&mut self, thread: Arc) { + let id = thread.id; + debug_assert!(!self.data.contains_key(&id)); + self.data.insert(id, thread); + } + + pub fn remove(&mut self, id: ThreadId) -> Option> { + self.data.remove(&id) + } +} + +unsafe impl Sync for Thread {} + +impl ArcWake for Thread { + fn wake_by_ref(arc_self: &Arc) { + arc_self.clone().enqueue() + } +} + +impl CurrentThread { + pub fn downgrade(self) -> Weak { + Arc::downgrade(&self.0) + } + + /// Terminate the current thread + pub fn exit(&self, code: ExitCode) -> ! { + if let Some(process) = self.try_get_process::() { + process.handle_thread_exit(self.id, code); + } + self.dequeue(ThreadState::Terminated); + unreachable!() + } + + // TODO: test multithreaded process exit + /// Terminate the parent process of the thread, including all other threads and the current + /// thread itself + pub fn exit_process(&self, code: ExitCode) -> ! { + let _guard = IrqGuard::acquire(); + + let process = self.process::(); + + let p = process.clone(); + + block! { + p.terminate_others(self.id).await; + } + .unwrap(); + + self.exit::(code) + } + + /// Suspends a thread from further execution until it is awoken + pub fn suspend(&self) -> Result<(), Error> { + self.dequeue(ThreadState::Suspended); + + if !self.signal_queue.is_empty() { + return Err(Error::Interrupted); + } + + Ok(()) + } + + /// Sets up a return frame to handle a pending signal, if any is present in the task's queue. + /// + /// # Safety + /// + /// This function is only meant to be called right before returning from an userspace + /// exception handler. + pub unsafe fn handle_pending_signals(&self, frame: &mut F) { + if !self.id.is_user() { + return; + } + + if let Some(signal) = self.signal_queue.pop() { + let inner = self.inner.lock(); + + let Some(entry) = inner.signal_entry.as_ref() else { + drop(inner); + self.exit_process::(ExitCode::BySignal(signal)); + }; + + // TODO check if really in a syscall, lol + let syscall_return = -(u32::from(Error::Interrupted) as isize); + frame.set_return_value(syscall_return as u64); + + // Setup signal frame + let usp = ((entry.stack - size_of::()) & !0xF) + - TaskContextImpl::SIGNAL_STACK_EXTRA_ALIGN; + let frame_ptr = usp as *mut SignalEntryData; + + let saved_frame = frame.store(); + frame_ptr.write_foreign_volatile( + self.address_space(), + SignalEntryData { + signal, + frame: saved_frame, + }, + ); + + // Setup return to signal handler + log::debug!( + "Signal entry @ pc={:#x}, sp={:#x} (top = {:#x})", + entry.entry, + usp, + entry.stack + ); + + frame.set_user_sp(usp); + frame.set_user_ip(entry.entry); + + // Pass the frame pointer as an argument to signal handler entry + frame.set_argument(usp as _); + } + } +} + +impl Deref for CurrentThread { + type Target = Arc; + + fn deref(&self) -> &Self::Target { + &self.0 + } +} diff --git a/kernel/libk/libk-thread/src/types.rs b/kernel/libk/libk-thread/src/types.rs new file mode 100644 index 00000000..f07d62e5 --- /dev/null +++ b/kernel/libk/libk-thread/src/types.rs @@ -0,0 +1,200 @@ +use core::{ + fmt, + mem::size_of, + sync::atomic::{AtomicU32, AtomicU64, Ordering}, +}; + +use atomic_enum::atomic_enum; +use yggdrasil_abi::process::ProcessId; + +pub trait AllocateProcessId { + fn new() -> Self; +} + +/// Represents the states a thread can be at some point in time +#[atomic_enum] +#[derive(PartialEq)] +pub enum ThreadState { + /// Thread is ready for execution and is present in some CPU's queue + Ready, + /// Thread is currently being executed by some CPU + Running, + /// Thread is present in a global list, but is not queued for execution until it is resumed + Suspended, + /// Thread is terminated and waits to be reaped + Terminated, +} + +/// Mask describing CPUs a thread is allowed to be scheduled to +#[derive(Debug)] +#[repr(transparent)] +pub struct ThreadAffinity(AtomicU64); + +/// Unique number describing a single kernel or userspace thread +#[derive(Debug, PartialEq, Eq, Clone, Copy, Ord, PartialOrd)] +pub enum ThreadId { + /// Describes a kernel-space thread + Kernel(u64), + /// Describes an user-space thread + User(u64), +} + +// TLS layout (x86-64): +// | mem_size | uthread_size | +// | Data .......| self, ??? | +// +// TLS layout (aarch64): +// | uthread_size (0x10?) | mem_size | +// | ??? | Data .....| + +/// Describes Thread-Local Storage of a process +#[derive(Clone, Debug)] +pub struct ProcessTlsInfo { + /// Location of the TLS master copy within the process's memory + pub master_copy_base: usize, + /// Layout of the TLS + pub layout: ProcessTlsLayout, +} + +/// Describes TLS layout for a program image +#[derive(Clone, Debug)] +pub struct ProcessTlsLayout { + /// Data offset from the TLS base + pub data_offset: usize, + /// struct uthread offset from the TLS base + pub uthread_offset: usize, + /// Pointer offset from the TLS base. The pointer is passed to the userspace + pub ptr_offset: usize, + + /// Data size of the TLS segment + pub data_size: usize, + /// Memory size of the TLS segment (mem_size >= data_size) + pub mem_size: usize, + + /// Overall allocation size of the TLS data + pub full_size: usize, +} + +impl ThreadAffinity { + /// Mask value for a thread to be scheduled onto any CPU + pub const ANY_CPU: u64 = u64::MAX; + + /// Constructs an affinity mask allowing a thread to be scheduled onto any CPU + pub const fn any_cpu() -> Self { + Self(AtomicU64::new(Self::ANY_CPU)) + } + + /// Constructs an affinity mask targeting a single CPU + pub const fn only_cpu(index: usize) -> Self { + Self(AtomicU64::new(1 << index)) + } + + /// Returns the current value of the thread's CPU affinity mask + #[inline] + pub fn get(&self) -> u64 { + self.0.load(Ordering::Relaxed) + } + + /// Updates the thread's CPU affinity mask + #[inline] + pub fn set(&self, value: u64) { + debug_assert_ne!(value, 0); + self.0.store(value, Ordering::Relaxed); + } +} + +impl ThreadId { + /// Generates a new kernel-space thread ID + pub fn next_kernel() -> Self { + static COUNT: AtomicU64 = AtomicU64::new(1); + let id = COUNT.fetch_add(1, Ordering::SeqCst); + Self::Kernel(id) + } + + /// Generates a new user-space thread ID + pub fn next_user() -> Self { + static COUNT: AtomicU64 = AtomicU64::new(1); + let id = COUNT.fetch_add(1, Ordering::SeqCst); + Self::User(id) + } + + /// Returns the number of the userspace thread represented by this ID. + /// + /// # Panics + /// + /// Will panic if this [ThreadId] does not represent a user-space thread. + pub fn as_user(&self) -> u64 { + match self { + Self::Kernel(_) => panic!(), + &Self::User(id) => id, + } + } + + /// Returns `true` if the [ThreadId] represents a user-space thread + pub fn is_user(&self) -> bool { + matches!(self, ThreadId::User(_)) + } +} + +impl fmt::Display for ThreadId { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Kernel(id) => write!(f, "#[{id}]"), + Self::User(id) => write!(f, "#{id}"), + } + } +} + +impl AllocateProcessId for ProcessId { + fn new() -> Self { + static COUNTER: AtomicU32 = AtomicU32::new(1); + let id = COUNTER.fetch_add(1, Ordering::SeqCst); + unsafe { Self::from_raw(id) } + } +} + +#[cfg(target_arch = "aarch64")] +impl ProcessTlsLayout { + /// Constructs a new thread-local storage layout info struct + pub fn new(align: usize, data_size: usize, mem_size: usize) -> Self { + debug_assert!(align.is_power_of_two()); + let tls_block0_offset = (size_of::() * 2 + align - 1) & !(align - 1); + + let full_size = (tls_block0_offset + mem_size + align - 1) & !(align - 1); + + Self { + data_offset: tls_block0_offset, + uthread_offset: 0, + ptr_offset: 0, + + data_size, + mem_size, + full_size, + } + } +} + +#[cfg(target_arch = "x86_64")] +impl ProcessTlsLayout { + /// Constructs a new thread-local storage layout info struct + pub fn new(align: usize, data_size: usize, mem_size: usize) -> Self { + // The static TLS blocks are placed below TP + // TP points to the TCB + debug_assert!(align.is_power_of_two()); + let back_size = (mem_size + align - 1) & !(align - 1); + // Self-pointer + let forward_size = size_of::(); + + let full_size = back_size + forward_size; + + Self { + data_offset: 0, + uthread_offset: back_size, + ptr_offset: back_size, + + data_size, + mem_size, + full_size, + } + } +} diff --git a/kernel/libk/libk-util/Cargo.toml b/kernel/libk/libk-util/Cargo.toml new file mode 100644 index 00000000..7aeb87e3 --- /dev/null +++ b/kernel/libk/libk-util/Cargo.toml @@ -0,0 +1,14 @@ +[package] +name = "libk-util" +version = "0.1.0" +edition = "2021" + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +yggdrasil-abi = { git = "https://git.alnyan.me/yggdrasil/yggdrasil-abi.git" } +kernel-arch = { path = "../../arch" } + +log = "0.4.20" +crossbeam-queue = { version = "0.3.8", default-features = false, features = ["alloc"] } +futures-util = { version = "0.3.28", default-features = false, features = ["alloc", "async-await"] } diff --git a/kernel/libk/libk-util/src/event.rs b/kernel/libk/libk-util/src/event.rs new file mode 100644 index 00000000..6d532dcd --- /dev/null +++ b/kernel/libk/libk-util/src/event.rs @@ -0,0 +1,140 @@ +use core::{ + future::poll_fn, + sync::atomic::{AtomicBool, Ordering}, + task::Poll, +}; + +use crate::{sync::spin_rwlock::IrqSafeRwLock, waker::QueueWaker}; + +pub struct OneTimeEvent { + // TODO lockless like OneTimeInit? + value: IrqSafeRwLock>, + notify: QueueWaker, +} + +pub struct BoolEvent { + state: AtomicBool, + notify: QueueWaker, +} + +impl OneTimeEvent { + /// Constructs an [OneTimeEvent] + pub const fn new() -> Self { + Self { + value: IrqSafeRwLock::new(None), + notify: QueueWaker::new(), + } + } + + /// Signals a value into the event. Returns an [Err] with the value if the event + /// has already been signalled before. + pub fn try_signal(&self, value: T) -> Result<(), T> { + let mut lock = self.value.write(); + if lock.is_some() { + return Err(value); + } + lock.replace(value); + self.notify.wake_all(); + Ok(()) + } + + /// Signals a value into the event. + /// + /// # Panics + /// + /// Will panic if the event has already been signal. Use this function only when + /// the event is known to only be signalled once. + pub fn signal(&self, value: T) { + if self.try_signal(value).is_err() { + panic!("OneTimeEvent signalled more than once"); + } + } + + /// Returns `true` if a value has been signalled into the event + pub fn is_signalled(&self) -> bool { + self.value.read().is_some() + } + + /// Returns [Some] with the copy of the value if the event has been signalled, + /// [None] otherwise + pub fn try_read_copy(&self) -> Option + where + T: Copy, + { + *self.value.read() + } + + /// Waits for event to happen without returning anything + pub async fn wait(&self) { + poll_fn(|cx| { + self.notify.register(cx.waker()); + if self.is_signalled() { + self.notify.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + }) + .await + } + + /// Waits for event to happen and copies its data as a return value + pub async fn wait_copy(&self) -> T + where + T: Copy, + { + poll_fn(|cx| { + self.notify.register(cx.waker()); + if let Some(value) = self.try_read_copy() { + self.notify.remove(cx.waker()); + Poll::Ready(value) + } else { + Poll::Pending + } + }) + .await + } +} + +impl BoolEvent { + pub const fn new() -> Self { + Self { + state: AtomicBool::new(false), + notify: QueueWaker::new(), + } + } + + pub fn is_signalled(&self) -> bool { + self.state.load(Ordering::Acquire) + } + + pub fn signal(&self) { + if self.try_signal().is_err() { + panic!("OneTimeBoolEvent signalled more than once"); + } + } + + pub fn signal_saturating(&self) { + self.try_signal().ok(); + } + + pub fn try_signal(&self) -> Result<(), bool> { + self.state + .compare_exchange(false, true, Ordering::Acquire, Ordering::Relaxed)?; + self.notify.wake_all(); + Ok(()) + } + + pub async fn wait(&self) { + poll_fn(|cx| { + self.notify.register(cx.waker()); + if self.is_signalled() { + self.notify.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + }) + .await + } +} diff --git a/kernel/libk/libk-util/src/io.rs b/kernel/libk/libk-util/src/io.rs new file mode 100644 index 00000000..6fad6b2a --- /dev/null +++ b/kernel/libk/libk-util/src/io.rs @@ -0,0 +1,62 @@ +use alloc::sync::Arc; +use yggdrasil_abi::{error::Error, io::SeekFrom}; + +/// Immutable read interface for VFS objects +pub trait Read { + /// Reads bytes into the given buffer + fn read(&self, buf: &mut [u8]) -> Result; + + /// Reads exactly `buf.len()` bytes into the given buffer, fails if such amount is not + /// available + fn read_exact(&self, buf: &mut [u8]) -> Result<(), Error> { + match self.read(buf) { + Ok(count) if count == buf.len() => Ok(()), + Ok(_) => Err(Error::InvalidFile), + Err(err) => Err(err), + } + } +} + +/// Immutable write interface for VFS objects +pub trait Write { + /// Writes bytes from the given buffer + fn write(&self, buf: &[u8]) -> Result; +} + +/// Immutable file positioning interface for VFS objects +pub trait Seek { + /// Changes position inside the file to a requested one. Fails if the file does not support + /// seeking. + fn seek(&self, from: SeekFrom) -> Result; + + /// Returns the position within the file, determined as an offset in bytes from the beginning + /// of the file. Fails if the file does not support seeking or the "offset" is not defined for + /// such type of nodes. + fn tell(&self) -> Result { + self.seek(SeekFrom::Current(0)) + } +} + +impl Read for Arc { + #[inline] + fn read(&self, buf: &mut [u8]) -> Result { + self.as_ref().read(buf) + } + + #[inline] + fn read_exact(&self, buf: &mut [u8]) -> Result<(), Error> { + self.as_ref().read_exact(buf) + } +} + +impl Seek for Arc { + #[inline] + fn seek(&self, from: SeekFrom) -> Result { + self.as_ref().seek(from) + } + + #[inline] + fn tell(&self) -> Result { + self.as_ref().tell() + } +} diff --git a/kernel/libk/libk-util/src/lib.rs b/kernel/libk/libk-util/src/lib.rs new file mode 100644 index 00000000..6f8e7c25 --- /dev/null +++ b/kernel/libk/libk-util/src/lib.rs @@ -0,0 +1,94 @@ +#![no_std] +#![feature(maybe_uninit_slice, new_uninit, allocator_api, let_chains)] + +extern crate alloc; + +use core::{ + mem::MaybeUninit, + ops::{Deref, DerefMut}, + panic, +}; + +pub mod event; +pub mod io; +pub mod queue; +pub mod ring; +pub mod sync; +pub mod waker; + +// TODO make a crate for such utils? +pub use kernel_arch::util::OneTimeInit; + +pub enum ConstAssert {} +pub trait IsTrue {} + +impl IsTrue for ConstAssert {} + +/// Statically-allocated "dynamic" vector +pub struct StaticVector { + data: [MaybeUninit; N], + len: usize, +} + +impl StaticVector { + /// Constructs an empty instance of [StaticVector] + pub const fn new() -> Self + where + T: Copy, + { + Self { + data: [MaybeUninit::uninit(); N], + len: 0, + } + } + + /// Appends an item to the vector. + /// + /// # Panics + /// + /// Will panic if the vector is full. + pub fn push(&mut self, value: T) { + if self.len >= N { + panic!("Static vector overflow: reached limit of {}", N); + } + + self.data[self.len].write(value); + self.len += 1; + } + + /// Returns the number of items present in the vector + pub fn len(&self) -> usize { + self.len + } + + /// Returns `true` if the vector is empty + pub fn is_empty(&self) -> bool { + self.len == 0 + } + + /// Clears the vector, dropping all elements + pub fn clear(&mut self) { + let old_len = self.len; + self.len = 0; + + for i in 0..old_len { + unsafe { + self.data[i].assume_init_drop(); + } + } + } +} + +impl Deref for StaticVector { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + unsafe { MaybeUninit::slice_assume_init_ref(&self.data[..self.len]) } + } +} + +impl DerefMut for StaticVector { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { MaybeUninit::slice_assume_init_mut(&mut self.data[..self.len]) } + } +} diff --git a/kernel/libk/libk-util/src/queue.rs b/kernel/libk/libk-util/src/queue.rs new file mode 100644 index 00000000..620048b1 --- /dev/null +++ b/kernel/libk/libk-util/src/queue.rs @@ -0,0 +1,119 @@ +use core::{ + future::poll_fn, + task::{Context, Poll}, +}; + +use crossbeam_queue::{ArrayQueue, SegQueue}; + +use crate::waker::QueueWaker; + +pub type BoundedQueue = ArrayQueue; +pub type UnboundedQueue = SegQueue; + +pub struct BoundedMpmcQueue { + queue: BoundedQueue, + read_notify: QueueWaker, + write_notify: QueueWaker, +} + +pub struct UnboundedMpmcQueue { + queue: UnboundedQueue, + read_notify: QueueWaker, +} + +impl BoundedMpmcQueue { + pub fn new(capacity: usize) -> Self { + Self { + queue: BoundedQueue::new(capacity), + read_notify: QueueWaker::new(), + write_notify: QueueWaker::new(), + } + } + + pub fn try_push_back(&self, value: T) -> Result<(), T> { + self.queue.push(value)?; + self.read_notify.wake_one(); + Ok(()) + } + + pub fn try_pop_front(&self) -> Option { + let value = self.queue.pop()?; + self.write_notify.wake_one(); + Some(value) + } + + pub async fn pop_front(&self) -> T { + poll_fn(|cx| { + self.read_notify.register(cx.waker()); + match self.try_pop_front() { + Some(value) => { + self.read_notify.remove(cx.waker()); + Poll::Ready(value) + } + None => Poll::Pending, + } + }) + .await + } + + pub async fn push_back(&self, value: T) { + let mut value = Some(value); + poll_fn(|cx| { + self.write_notify.register(cx.waker()); + match self.try_push_back(value.take().unwrap()) { + Ok(()) => { + // Succeeded + self.write_notify.remove(cx.waker()); + Poll::Ready(()) + } + Err(back) => { + // Failed, put the value back + value.replace(back); + Poll::Pending + } + } + }) + .await + } + + pub fn poll_not_empty(&self, cx: &mut Context<'_>) -> Poll<()> { + self.read_notify.register(cx.waker()); + if !self.queue.is_empty() { + self.read_notify.remove(cx.waker()); + Poll::Ready(()) + } else { + Poll::Pending + } + } +} + +impl UnboundedMpmcQueue { + pub const fn new() -> Self { + Self { + queue: SegQueue::new(), + read_notify: QueueWaker::new(), + } + } + + pub fn push_back(&self, value: T) { + self.queue.push(value); + self.read_notify.wake_one(); + } + + pub fn try_pop_front(&self) -> Option { + self.queue.pop() + } + + pub async fn pop_front(&self) -> T { + poll_fn(|cx| { + self.read_notify.register(cx.waker()); + if let Some(value) = self.try_pop_front() { + self.read_notify.remove(cx.waker()); + Poll::Ready(value) + } else { + Poll::Pending + } + }) + .await + } +} diff --git a/kernel/libk/libk-util/src/ring.rs b/kernel/libk/libk-util/src/ring.rs new file mode 100644 index 00000000..c711043f --- /dev/null +++ b/kernel/libk/libk-util/src/ring.rs @@ -0,0 +1,224 @@ +//! Ring buffer implementation + +use core::{ + future::poll_fn, + mem::MaybeUninit, + task::{Context, Poll}, +}; + +use alloc::boxed::Box; +use yggdrasil_abi::error::Error; + +use crate::{ + sync::{IrqSafeSpinlock, IrqSafeSpinlockGuard}, + waker::QueueWaker, +}; + +// TODO use lockless ring data structure + +/// Ring buffer base +pub struct RingBuffer { + rd: usize, + wr: usize, + capacity: usize, + data: Option]>>, +} + +pub struct LossyRingQueue { + ring: IrqSafeSpinlock>, + read_notify: QueueWaker, +} + +impl RingBuffer { + /// Constructs an empty [RingBuffer]. + /// + /// NOTE does not allocate until the first write operation. Any read accesses + /// without checking readable count will cause a panic. + pub const fn with_capacity(capacity: usize) -> Self { + Self { + rd: 0, + wr: 0, + capacity, + data: None, + } + } + + /// Constructs an empty [RingBuffer], trying to allocate its data + pub fn try_with_capacity(capacity: usize) -> Result { + let data = Box::try_new_uninit_slice(capacity).map_err(|_| Error::OutOfMemory)?; + Ok(Self { + rd: 0, + wr: 0, + capacity, + data: Some(data), + }) + } + + const fn readable_count_at(&self, at: usize) -> usize { + if at <= self.wr { + self.wr - at + } else { + self.wr + (self.capacity - at) + } + } + + /// Returns `true` if the ring has data that can be read + #[inline] + pub const fn is_readable(&self) -> bool { + self.is_readable_at(self.rd) + } + + const fn is_readable_at(&self, at: usize) -> bool { + if at <= self.wr { + (self.wr - at) > 0 + } else { + (self.wr + (self.capacity - at)) > 0 + } + } + + /// Returns `true` if the ring is not full + #[inline] + pub fn is_writable(&self) -> bool { + (self.wr + 1) % self.capacity != self.rd + } + + /// Returns `true` if the ring contains given element + pub fn contains(&self, t: &T) -> bool + where + T: PartialEq, + { + let data = self.data.as_ref().unwrap(); + let count = self.readable_count_at(self.rd); + + for i in 0..count { + if unsafe { data[(self.rd + i) % self.capacity].assume_init_ref() == t } { + return true; + } + } + + false + } + + /// Reads a single value from the ring without checking if it's empty. + /// + /// # Safety + /// + /// The caller must perform the necessary checks to avoid reading beyond the write head. + #[inline] + pub unsafe fn read_single_unchecked(&mut self) -> T { + let data = self.data.as_ref().unwrap(); + let res = data[self.rd].assume_init_read(); + self.rd = (self.rd + 1) % self.capacity; + res + } + + /// Reads all entries available from `pos` to the write head. + /// + /// # Safety + /// + /// The caller must perform the necessary checks to avoid reading beyond the write head. + pub unsafe fn read_all_static(&mut self, pos: usize, buffer: &mut [T]) -> usize + where + T: Copy, + { + let data = self.data.as_ref().unwrap(); + let mut pos = (self.rd + pos) % self.capacity; + let mut off = 0; + while off < buffer.len() && self.is_readable_at(pos) { + buffer[off] = data[pos].assume_init(); + pos += 1; + off += 1; + } + off + } + + /// Writes a single entry to the buffer + #[inline] + pub fn write(&mut self, ch: T) { + let data = self + .data + .get_or_insert_with(|| Box::new_uninit_slice(self.capacity)); + + data[self.wr].write(ch); + self.wr = (self.wr + 1) % self.capacity; + } + + /// Returns [Some] with a value if it could be read, [None] otherwise + pub fn try_read(&mut self) -> Option { + if self.is_readable() { + Some(unsafe { self.read_single_unchecked() }) + } else { + None + } + } +} + +impl LossyRingQueue { + pub const fn with_capacity(capacity: usize) -> Self { + Self { + ring: IrqSafeSpinlock::new(RingBuffer::with_capacity(capacity)), + read_notify: QueueWaker::new(), + } + } + + pub fn try_with_capacity(capacity: usize) -> Result { + Ok(Self { + ring: IrqSafeSpinlock::new(RingBuffer::try_with_capacity(capacity)?), + read_notify: QueueWaker::new(), + }) + } + + pub fn write(&self, value: T) { + self.ring.lock().write(value); + self.read_notify.wake_one(); + } + + pub fn write_multiple(&self, values: &[T]) { + let mut lock = self.ring.lock(); + for &value in values { + lock.write(value); + } + self.read_notify.wake_one(); + } + + pub fn try_read(&self) -> Option { + // SAFETY If lock is acquired, then there's at least one readable element + self.try_read_lock() + .map(|mut f| unsafe { f.read_single_unchecked() }) + } + + #[inline] + pub fn try_read_lock(&self) -> Option>> { + let lock = self.ring.lock(); + lock.is_readable().then_some(lock) + } + + pub async fn read(&self) -> T { + let mut lock = self.read_lock().await; + // SAFETY If lock is acquired, then there's at least one readable element + unsafe { lock.read_single_unchecked() } + } + + pub async fn read_lock(&self) -> IrqSafeSpinlockGuard> { + poll_fn(|cx| self.poll_lock(cx)).await + } + + #[inline] + pub fn poll_lock(&self, cx: &mut Context<'_>) -> Poll>> { + self.read_notify.register(cx.waker()); + if let Some(lock) = self.try_read_lock() { + self.read_notify.remove(cx.waker()); + Poll::Ready(lock) + } else { + Poll::Pending + } + } + + pub fn poll_readable(&self, cx: &mut Context<'_>) -> Poll<()> { + self.poll_lock(cx).map(|_| ()) + } + + pub fn notify_all(&self) { + self.read_notify.wake_all(); + } +} diff --git a/kernel/libk/libk-util/src/sync/fence.rs b/kernel/libk/libk-util/src/sync/fence.rs new file mode 100644 index 00000000..876690a3 --- /dev/null +++ b/kernel/libk/libk-util/src/sync/fence.rs @@ -0,0 +1,44 @@ +use core::sync::atomic::{AtomicUsize, Ordering}; + +/// Simple spinloop-based fence guaranteeing that the execution resumes only after its condition is +/// met. +pub struct SpinFence { + value: AtomicUsize, +} + +// SpinFence impls +impl SpinFence { + /// Constructs a new [SpinFence] + pub const fn new() -> Self { + Self { + value: AtomicUsize::new(0), + } + } + + /// Resets a fence back to its original state + pub fn reset(&self) { + self.value.store(0, Ordering::Release); + } + + /// "Signals" a fence, incrementing its internal counter by one + pub fn signal(&self) { + self.value.fetch_add(1, Ordering::SeqCst); + } + + /// Waits until the fence is signalled at least the amount of times specified + pub fn wait_all(&self, count: usize) { + while self.value.load(Ordering::Acquire) < count { + core::hint::spin_loop(); + } + } + + /// Waits until the fence is signalled at least once + pub fn wait_one(&self) { + self.wait_all(1); + } + + /// Returns `true` if the fence has been signalled at least the amount of times specified + pub fn try_wait_all(&self, count: usize) -> bool { + self.value.load(Ordering::Acquire) >= count + } +} diff --git a/kernel/libk/libk-util/src/sync/mod.rs b/kernel/libk/libk-util/src/sync/mod.rs new file mode 100644 index 00000000..830dbe62 --- /dev/null +++ b/kernel/libk/libk-util/src/sync/mod.rs @@ -0,0 +1,38 @@ +use core::sync::atomic::{AtomicBool, Ordering}; + +use kernel_arch::ArchitectureImpl; +use yggdrasil_abi::error::Error; + +pub mod fence; +pub mod spin_rwlock; + +pub use fence::SpinFence; +pub type IrqSafeSpinlock = kernel_arch::sync::IrqSafeSpinlock; +pub type IrqSafeSpinlockGuard<'a, T> = + kernel_arch::sync::IrqSafeSpinlockGuard<'a, ArchitectureImpl, T>; +pub type IrqGuard = kernel_arch::guard::IrqGuard; + +static LOCK_HACK: AtomicBool = AtomicBool::new(false); + +pub trait LockMethod<'q>: Sync { + type Guard<'a> + where + 'a: 'q, + Self: 'a; + + fn lock(&'q self) -> Result, Error>; + + /// # Safety + /// + /// Only meant to be called from Guard's [Drop] impl. + unsafe fn release(&self); +} + +/// "Hacks" all the locks in the kernel to make them function as "NULL"-locks instead of spinlocks. +/// +/// # Safety +/// +/// Only meant to be called from panic handler when the caller is sure other CPUs are halted. +pub unsafe fn hack_locks() { + LOCK_HACK.store(true, Ordering::Release); +} diff --git a/kernel/libk/libk-util/src/sync/spin_rwlock.rs b/kernel/libk/libk-util/src/sync/spin_rwlock.rs new file mode 100644 index 00000000..5ed6feda --- /dev/null +++ b/kernel/libk/libk-util/src/sync/spin_rwlock.rs @@ -0,0 +1,166 @@ +use core::{ + cell::UnsafeCell, + ops::{Deref, DerefMut}, + sync::atomic::{AtomicUsize, Ordering}, +}; + +use super::IrqGuard; + +struct RwLockInner { + value: AtomicUsize, +} + +pub struct IrqSafeRwLock { + value: UnsafeCell, + inner: RwLockInner, +} + +pub struct IrqSafeRwLockReadGuard<'a, T> { + lock: &'a IrqSafeRwLock, + _guard: IrqGuard, +} + +pub struct IrqSafeRwLockWriteGuard<'a, T> { + lock: &'a IrqSafeRwLock, + _guard: IrqGuard, +} + +impl RwLockInner { + const LOCKED_READ: usize = 1 << 2; + const LOCKED_WRITE: usize = 1; + + const fn new() -> Self { + Self { + value: AtomicUsize::new(0), + } + } + + #[inline] + fn acquire_read_raw(&self) -> usize { + let value = self.value.fetch_add(Self::LOCKED_READ, Ordering::Acquire); + + if value > usize::MAX / 2 { + self.value.fetch_sub(Self::LOCKED_READ, Ordering::Relaxed); + panic!("Too many read locks acquired"); + } + + value + } + + #[inline] + fn try_acquire_read(&self) -> bool { + let value = self.acquire_read_raw(); + let acquired = value & Self::LOCKED_WRITE != Self::LOCKED_WRITE; + + if !acquired { + unsafe { + self.release_read(); + } + } + + acquired + } + + #[inline] + fn try_acquire_write(&self) -> bool { + self.value + .compare_exchange(0, Self::LOCKED_WRITE, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + } + + #[inline] + fn acquire_read(&self) { + while !self.try_acquire_read() { + core::hint::spin_loop(); + } + } + + #[inline] + fn acquire_write(&self) { + while !self.try_acquire_write() { + core::hint::spin_loop(); + } + } + + #[inline] + unsafe fn release_read(&self) { + self.value.fetch_sub(Self::LOCKED_READ, Ordering::Release); + } + + #[inline] + unsafe fn release_write(&self) { + self.value.fetch_and(!Self::LOCKED_WRITE, Ordering::Release); + } +} + +impl IrqSafeRwLock { + pub const fn new(value: T) -> Self { + Self { + value: UnsafeCell::new(value), + inner: RwLockInner::new(), + } + } + + pub fn read(&self) -> IrqSafeRwLockReadGuard { + let guard = IrqGuard::acquire(); + self.inner.acquire_read(); + IrqSafeRwLockReadGuard { + lock: self, + _guard: guard, + } + } + + pub fn write(&self) -> IrqSafeRwLockWriteGuard { + let guard = IrqGuard::acquire(); + self.inner.acquire_write(); + IrqSafeRwLockWriteGuard { + lock: self, + _guard: guard, + } + } + + unsafe fn release_read(&self) { + self.inner.release_read(); + } + + unsafe fn release_write(&self) { + self.inner.release_write(); + } +} + +unsafe impl Sync for IrqSafeRwLock {} +unsafe impl Send for IrqSafeRwLock {} + +impl<'a, T> Deref for IrqSafeRwLockReadGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.lock.value.get() } + } +} + +impl<'a, T> Drop for IrqSafeRwLockReadGuard<'a, T> { + fn drop(&mut self) { + unsafe { self.lock.release_read() } + } +} + +impl<'a, T> Deref for IrqSafeRwLockWriteGuard<'a, T> { + type Target = T; + + fn deref(&self) -> &Self::Target { + unsafe { &*self.lock.value.get() } + } +} + +impl<'a, T> DerefMut for IrqSafeRwLockWriteGuard<'a, T> { + fn deref_mut(&mut self) -> &mut Self::Target { + unsafe { &mut *self.lock.value.get() } + } +} + +impl<'a, T> Drop for IrqSafeRwLockWriteGuard<'a, T> { + fn drop(&mut self) { + unsafe { self.lock.release_write() } + } +} diff --git a/kernel/libk/libk-util/src/sync/spinlock.rs b/kernel/libk/libk-util/src/sync/spinlock.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/libk/libk-util/src/waker.rs b/kernel/libk/libk-util/src/waker.rs new file mode 100644 index 00000000..3345f682 --- /dev/null +++ b/kernel/libk/libk-util/src/waker.rs @@ -0,0 +1,134 @@ +use core::{ + marker::PhantomData, + task::{RawWaker, RawWakerVTable, Waker}, +}; + +use alloc::{collections::VecDeque, sync::Weak}; +use futures_util::task::ArcWake; + +use crate::sync::IrqSafeSpinlock; + +/// Async/await primitive to suspend and wake up tasks waiting on some shared resource +pub struct QueueWaker { + queue: IrqSafeSpinlock>, +} + +pub struct WakeWeak { + _pd: PhantomData, +} + +impl WakeWeak { + pub const VTABLE: RawWakerVTable = RawWakerVTable::new( + Self::clone_waker, + Self::wake, + Self::wake_by_ref, + Self::drop_waker, + ); + + unsafe fn drop_waker(ptr: *const ()) { + let weak = Weak::from_raw(ptr as *const T); + + // Explicitly drop the thing + drop(weak); + } + + unsafe fn wake(ptr: *const ()) { + let weak = Weak::from_raw(ptr as *const T); + + if let Some(strong) = weak.upgrade() { + strong.wake(); + } + + // Weak gets dropped + } + + unsafe fn wake_by_ref(ptr: *const ()) { + let weak = Weak::from_raw(ptr as *const T); + + if let Some(strong) = weak.upgrade() { + strong.wake(); + } + + // Weak doesn't get dropped + core::mem::forget(weak); + } + + unsafe fn clone_waker(ptr: *const ()) -> RawWaker { + let weak = Weak::from_raw(ptr as *const T); + + let waker = Self::weak_waker(weak.clone()); + + // Prevent decrement of the refcount + core::mem::forget(weak); + + waker + } + + pub fn weak_waker(weak: Weak) -> RawWaker { + let raw = Weak::into_raw(weak); + + RawWaker::new(raw as *const (), &Self::VTABLE) + } +} + +impl QueueWaker { + /// Constructs an empty [QueueWaker] + pub const fn new() -> Self { + Self { + queue: IrqSafeSpinlock::new(VecDeque::new()), + } + } + + /// Registers a [Waker] reference to be waken up by this [QueueWaker] + pub fn register(&self, waker: &Waker) { + let mut queue = self.queue.lock(); + + if queue.iter().any(|other| other.will_wake(waker)) { + return; + } + + queue.push_back(waker.clone()); + } + + /// Removes a [Waker] reference from this [QueueWaker] + pub fn remove(&self, waker: &Waker) -> bool { + let mut queue = self.queue.lock(); + let mut index = 0; + let mut removed = false; + + while index < queue.len() { + if queue[index].will_wake(waker) { + removed = true; + queue.remove(index); + } + index += 1; + } + + removed + } + + /// Wakes up up to `limit` tasks waiting on this queue + pub fn wake_some(&self, limit: usize) -> usize { + let mut queue = self.queue.lock(); + let mut count = 0; + + while count < limit + && let Some(item) = queue.pop_front() + { + item.wake(); + count += 1; + } + + count + } + + /// Wakes up a single task waiting on this queue + pub fn wake_one(&self) -> bool { + self.wake_some(1) != 0 + } + + /// Wakes up all tasks waiting on this queue + pub fn wake_all(&self) -> usize { + self.wake_some(usize::MAX) + } +} diff --git a/kernel/libk/src/arch.rs b/kernel/libk/src/arch.rs new file mode 100644 index 00000000..933f7c5d --- /dev/null +++ b/kernel/libk/src/arch.rs @@ -0,0 +1,85 @@ +use core::ops::{Deref, DerefMut}; + +use device_api::interrupt::IpiMessage; +use kernel_arch::{Architecture, ArchitectureImpl, CpuImpl, LocalCpuImpl}; +use libk_thread::sched::CpuQueue; +use libk_util::sync::IrqGuard; + +/// Kernel wrapper for local CPU info structure. See [kernel_arch::LocalCpuImpl]. +#[repr(transparent)] +pub struct LocalCpu<'a>(LocalCpuImpl<'a, CpuQueue>); +/// Kernel wrapper for per-CPU info structure. See [kernel_arch::LocalCpuImpl]. +#[repr(transparent)] +pub struct Cpu(CpuImpl); + +impl Cpu { + pub unsafe fn init_local( + id: Option, + data: ::PerCpuData, + ) { + ArchitectureImpl::init_local_cpu::(id, data) + } + + /// Returns local CPU reference + #[inline] + pub fn local<'a>() -> LocalCpu<'a> { + LocalCpu(CpuImpl::local()) + } + + /// Returns local CPU reference or None if it hasn't yet been initialized + #[inline] + pub fn try_local<'a>() -> Option> { + CpuImpl::try_local().map(LocalCpu) + } + + /// Pushes a message to the IPI queue + #[inline] + pub fn push_ipi_queue(cpu_id: u32, msg: IpiMessage) { + CpuImpl::::push_ipi_queue(cpu_id, msg) + } + + /// Initialize the IPI queues for all present CPUs + #[inline] + pub fn init_ipi_queues(cpu_count: usize) { + CpuImpl::::init_ipi_queues(cpu_count) + } +} + +impl Deref for Cpu { + type Target = CpuImpl; + + #[inline] + fn deref(&self) -> &Self::Target { + &self.0 + } +} + +impl DerefMut for Cpu { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + &mut self.0 + } +} + +impl<'a> LocalCpu<'a> { + /// Converts the local CPU handle into its IRQ guard + pub fn into_guard(self) -> IrqGuard { + self.0.into_guard() + } +} + +impl<'a> Deref for LocalCpu<'a> { + type Target = CpuImpl; + + #[inline] + fn deref(&self) -> &Self::Target { + self.0.deref() + } +} + +impl<'a> DerefMut for LocalCpu<'a> { + #[inline] + fn deref_mut(&mut self) -> &mut Self::Target { + self.0.deref_mut() + } +} diff --git a/kernel/libk/src/lib.rs b/kernel/libk/src/lib.rs new file mode 100644 index 00000000..4c65d5ed --- /dev/null +++ b/kernel/libk/src/lib.rs @@ -0,0 +1,33 @@ +#![no_std] +#![feature( + maybe_uninit_slice, + step_trait, + const_trait_impl, + effects, + slice_ptr_get, + strict_provenance, + never_type, + let_chains, + allocator_api, + maybe_uninit_uninit_array, + const_maybe_uninit_uninit_array, + new_uninit, + inline_const, + trait_alias +)] + +extern crate alloc; + +pub use libk_thread::{block, cpu_count, cpu_index, runtime}; + +pub mod arch; + +pub mod device { + pub use libk_device::*; +} + +#[repr(C)] +pub struct AlignedTo { + pub align: [Align; 0], + pub bytes: Bytes, +} diff --git a/kernel/src/arch/aarch64/boot/entry.S b/kernel/src/arch/aarch64/boot/entry.S new file mode 100644 index 00000000..c41548c7 --- /dev/null +++ b/kernel/src/arch/aarch64/boot/entry.S @@ -0,0 +1,111 @@ +.set CNTHCTL_EL2_EL1PCTEN, 1 << 0 +.set CNTHCTL_EL2_EL1PCEN, 1 << 1 + +.set HCR_EL2_RW_EL1IsAArch64, 1 << 31 + +.set SCTLR_EL2_RES1, 0x30C50830 + +.set SPSR_EL2_EL1h, 0x5 +.set SPSR_EL2_MASK_DAIF, 0xF << 6 + +.macro MOV_L reg, value + mov \reg, #((\value) & 0xFFFF) + movk \reg, #((\value) >> 16), lsl #16 +.endm + +.macro MOV_ABS reg, sym + movz \reg, #:abs_g2:\sym + movk \reg, #:abs_g1_nc:\sym + movk \reg, #:abs_g0_nc:\sym +.endm + +.macro LEAVE_EL2, ret_label + mrs x8, CNTHCTL_EL2 + orr x8, x8, #(CNTHCTL_EL2_EL1PCTEN | CNTHCTL_EL2_EL1PCEN) + msr CNTHCTL_EL2, x8 + msr CNTVOFF_EL2, xzr + + MOV_L x8, SCTLR_EL2_RES1 + msr SCTLR_EL2, x8 + + mov x8, #HCR_EL2_RW_EL1IsAArch64 + msr HCR_EL2, x8 + + mov x8, #SPSR_EL2_EL1h + orr x8, x8, #SPSR_EL2_MASK_DAIF + msr SPSR_EL2, x8 + + adr x8, \ret_label + msr ELR_EL2, x8 + + isb + eret +.endm + +.global __aarch64_entry +.global __aarch64_ap_entry + +.section .text.entry +__aarch64_entry: + // x0 -- dtb_phys + + // Multiple processor cores may or may not be running at this point + mrs x1, mpidr_el1 + ands x1, x1, #0xF + bne 1f + + mrs x8, CurrentEL + lsr x8, x8, #2 + cmp x8, #2 + bne .el1 + + // Leave EL2 +.el2: + LEAVE_EL2 .el1 +.el1: + dsb sy + isb + + // Zero .bss + MOV_ABS x8, __bss_start_phys + MOV_ABS x9, __bss_end_phys + // Zero .bss +1: + cmp x8, x9 + beq 2f + strb wzr, [x8] + add x8, x8, #1 + b 1b +2: + + // BSP in SMP or uniprocessor + ldr x1, ={stack_bottom} + {stack_size} - {kernel_virt_offset} + mov sp, x1 + + bl {kernel_lower_entry} - {kernel_virt_offset} + + // AP in a SMP system + // TODO spin loop for this method of init +1: + b . + +.section .text +__aarch64_ap_entry: + // x0 -- physical sp + + mrs x8, CurrentEL + lsr x8, x8, #2 + cmp x8, #2 + bne .ap_el1 + +.ap_el2: + LEAVE_EL2 .ap_el1 + +.ap_el1: + dsb sy + isb + + mov sp, x0 + bl {kernel_ap_lower_entry} - {kernel_virt_offset} + + b . diff --git a/kernel/src/arch/aarch64/boot/mod.rs b/kernel/src/arch/aarch64/boot/mod.rs new file mode 100644 index 00000000..77be9ded --- /dev/null +++ b/kernel/src/arch/aarch64/boot/mod.rs @@ -0,0 +1,168 @@ +//! AArch64 boot and entry implementation +use core::{arch::global_asm, sync::atomic::Ordering}; + +use aarch64_cpu::{ + asm::barrier, + registers::{CPACR_EL1, ID_AA64MMFR0_EL1, MAIR_EL1, SCTLR_EL1, TCR_EL1, TTBR0_EL1}, +}; +use kernel_arch::{absolute_address, Architecture, ArchitectureImpl}; +use kernel_arch_aarch64::CPU_COUNT; +use kernel_fs::devfs; +use libk::runtime; +use libk_mm::{ + address::{IntoRaw, PhysicalAddress, Virtualize}, + phys, + table::EntryLevel, +}; +use tock_registers::interfaces::{ReadWriteable, Readable, Writeable}; + +use super::{exception, BootStack, BOOT_STACK_SIZE, PLATFORM}; +use crate::{arch::L3, kernel_main, kernel_secondary_main, mem::KERNEL_VIRT_OFFSET}; + +unsafe fn pre_init_mmu() { + if !ID_AA64MMFR0_EL1.matches_all(ID_AA64MMFR0_EL1::TGran4::Supported) { + // TODO early panic + todo!(); + } + + MAIR_EL1.write( + //// Attribute 0 -- normal memory + // Inner + MAIR_EL1::Attr0_Normal_Inner::WriteBack_NonTransient_ReadWriteAlloc + + // Outer + MAIR_EL1::Attr0_Normal_Outer::WriteBack_NonTransient_ReadWriteAlloc + + //// Attribute 1 -- device memory + MAIR_EL1::Attr1_Device::nonGathering_nonReordering_EarlyWriteAck, + ); + + TCR_EL1.modify( + // General + TCR_EL1::IPS::Bits_48 + + // TTBR0 + TCR_EL1::TG0::KiB_4 + TCR_EL1::T0SZ.val(25) + TCR_EL1::SH0::Inner + + // TTBR1 + TCR_EL1::TG1::KiB_4 + TCR_EL1::T1SZ.val(25) + TCR_EL1::SH1::Outer, + ); +} + +unsafe fn enable_mmu() { + barrier::dmb(barrier::ISH); + + SCTLR_EL1.modify( + // Enable translation + SCTLR_EL1::M::Enable + + // (TODO) Disable I + D caches + SCTLR_EL1::I::NonCacheable + SCTLR_EL1::C::NonCacheable, + ); + + barrier::isb(barrier::SY); +} + +unsafe fn enter_higher_half(sp: usize, elr: usize, x0: usize) -> ! { + unsafe { + core::arch::asm!(r#" + mov sp, {sp} + mov x0, {x0} + mov lr, xzr + br {elr} + "#, elr = in(reg) elr, sp = in(reg) sp, x0 = in(reg) x0, options(noreturn)); + } +} + +// NOTE executes in "lower-half" address space, MMU not yet enabled +unsafe extern "C" fn __aarch64_el1_bsp_lower_entry(dtb: PhysicalAddress) -> ! { + ArchitectureImpl::set_interrupt_mask(true); + + // Don't trap FP operations + CPACR_EL1.modify(CPACR_EL1::FPEN::TrapNothing); + + // Setup MMU to jump to "higher-half" address space + pre_init_mmu(); + kernel_arch_aarch64::mem::load_fixed_tables(); + enable_mmu(); + + // Safety: SP points to the .bss section, so it's +offset mapped + let sp = unsafe { BSP_STACK.data.as_ptr().add(BOOT_STACK_SIZE) as usize } + KERNEL_VIRT_OFFSET; + let elr = absolute_address!(__aarch64_bsp_upper_entry); + + // TODO pass dtb + enter_higher_half(sp, elr, dtb.into_raw()); +} + +unsafe extern "C" fn __aarch64_bsp_upper_entry(dtb: PhysicalAddress) -> ! { + // Remove the "lower-half" mapping, no longer needed + TTBR0_EL1.set(0); + + // Setup the "runtime" part of the kernel tables + PLATFORM + .init_memory_management(dtb) + .expect("Could not initialize memory management"); + barrier::isb(barrier::SY); + + exception::init_exceptions(); + + devfs::init(); + + runtime::init_task_queue(); + + // Initialize the BSP CPU + the devices + PLATFORM + .init_platform(true) + .expect("Could not initialize the platform"); + + kernel_main() +} + +unsafe extern "C" fn __aarch64_el1_ap_lower_entry() -> ! { + const AP_STACK_PAGES: usize = 8; + ArchitectureImpl::set_interrupt_mask(true); + + // Unmask FP operations + CPACR_EL1.modify(CPACR_EL1::FPEN::TrapNothing); + + pre_init_mmu(); + kernel_arch_aarch64::mem::load_fixed_tables(); + enable_mmu(); + + let stack_pages = phys::alloc_pages_contiguous(AP_STACK_PAGES).unwrap(); + let stack_base = stack_pages.virtualize(); + let sp = stack_base + L3::SIZE * AP_STACK_PAGES; + + let elr = absolute_address!(__aarch64_ap_upper_entry); + + enter_higher_half(sp, elr, 0); +} + +extern "C" fn __aarch64_ap_upper_entry() -> ! { + barrier::dmb(barrier::ISH); + barrier::isb(barrier::SY); + + let cpu_id = CPU_COUNT.fetch_add(1, Ordering::SeqCst); + aarch64_cpu::asm::sev(); + + infoln!("cpu{} initializing", cpu_id); + + exception::init_exceptions(); + + unsafe { + PLATFORM + .init_platform(false) + .expect("Could not initialize the AP"); + } + + kernel_secondary_main() +} + +#[link_section = ".bss"] +static BSP_STACK: BootStack = BootStack { + data: [0; BOOT_STACK_SIZE], +}; + +global_asm!( + include_str!("entry.S"), + kernel_lower_entry = sym __aarch64_el1_bsp_lower_entry, + kernel_ap_lower_entry = sym __aarch64_el1_ap_lower_entry, + stack_bottom = sym BSP_STACK, + kernel_virt_offset = const KERNEL_VIRT_OFFSET, + stack_size = const BOOT_STACK_SIZE +); diff --git a/kernel/src/arch/aarch64/exception.rs b/kernel/src/arch/aarch64/exception.rs new file mode 100644 index 00000000..5334489e --- /dev/null +++ b/kernel/src/arch/aarch64/exception.rs @@ -0,0 +1,234 @@ +//! Exception and interrupt management functions + +use core::arch::global_asm; + +use aarch64_cpu::{ + asm::barrier, + registers::{ + ELR_EL1, ESR_EL1, FAR_EL1, SCTLR_EL1, TCR_EL1, TPIDR_EL0, TPIDR_EL1, TTBR0_EL1, TTBR1_EL1, + VBAR_EL1, + }, +}; +use abi::{ + process::{Signal, SignalEntryData}, + SyscallFunction, +}; +use kernel_arch::{task::TaskFrame, Architecture, ArchitectureImpl}; +use kernel_arch_aarch64::context::ExceptionFrame; +use libk::device::external_interrupt_controller; +use libk_thread::thread::Thread; +use tock_registers::interfaces::{Readable, Writeable}; + +use crate::{debug::LogLevel, syscall::raw_syscall_handler, task::process::ProcessManagerImpl}; + +/// Initializes the exception/interrupt vectors. May be called repeatedly (though that makes no +/// sense). +pub fn init_exceptions() { + extern "C" { + static __aarch64_el1_vectors: u8; + } + let vbar = unsafe { &__aarch64_el1_vectors as *const _ }; + VBAR_EL1.set(vbar as u64); + barrier::isb(barrier::SY); +} + +fn dump_irrecoverable_exception(frame: &ExceptionFrame, ec: u64, iss: u64) { + // let cpu = Cpu::get_local(); + + log_print_raw!(LogLevel::Fatal, "SYNC exception:\n"); + log_print_raw!(LogLevel::Fatal, "FAR: {:#x}\n", FAR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "ELR: {:#x}\n", ELR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "ESR: {:#x}\n", ESR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "TTBR0_EL1: {:#x}\n", TTBR0_EL1.get()); + log_print_raw!(LogLevel::Fatal, "TTBR1_EL1: {:#x}\n", TTBR1_EL1.get()); + log_print_raw!(LogLevel::Fatal, "Register dump:\n"); + log_print_raw!(LogLevel::Fatal, "{:?}\n", frame); + + // XXX + // if let Some(cpu) = cpu { + // let current = cpu.queue().current_process(); + + // if let Some(current) = current { + // log_print_raw!(LogLevel::Fatal, "In process {}\n", current.id()); + // } + // } + + match ec { + // Data abort from lower level + 0b100100 => { + log_print_raw!(LogLevel::Fatal, "Exception kind: Data Abort from EL0\n"); + let dfsc = iss & 0x3F; + + if iss & (1 << 24) != 0 { + let access_size_str = match (iss >> 22) & 0x3 { + 0 => "i8", + 1 => "i16", + 2 => "i32", + 3 => "i64", + _ => unreachable!(), + }; + let access_type_str = if iss & (1 << 6) != 0 { "write" } else { "read" }; + + log_print_raw!( + LogLevel::Fatal, + "Invalid {} of a {} to/from {:#x}\n", + access_type_str, + access_size_str, + FAR_EL1.get() + ); + } + + log_print_raw!(LogLevel::Fatal, "DFSC = {:#x}\n", dfsc); + } + // Instruction abort from lower level + 0b100000 => { + log_print_raw!( + LogLevel::Fatal, + "Exception kind: Instruction Abort from EL0\n" + ); + let ifsc = iss & 0x3F; + log_print_raw!(LogLevel::Fatal, "IFSC = {:#x}\n", ifsc); + } + + _ => (), + } + + log_print_raw!(LogLevel::Fatal, "System register dump:\n"); + log_print_raw!(LogLevel::Fatal, "SCTLR_EL1 = {:#x}\n", SCTLR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "TCR_EL1 = {:#x}\n", TCR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "TPIDR_EL1 = {:#x}\n", TPIDR_EL1.get()); + log_print_raw!(LogLevel::Fatal, "TPIDR_EL0 = {:#x}\n", TPIDR_EL0.get()); +} + +#[no_mangle] +extern "C" fn __aa64_el0_sync_handler(frame: *mut ExceptionFrame) { + assert!(ArchitectureImpl::interrupt_mask()); + let frame = unsafe { &mut *frame }; + + el0_sync_inner(frame); + + unsafe { + let thread = Thread::current(); + thread.handle_pending_signals::(frame); + } +} + +#[no_mangle] +extern "C" fn __aa64_el0_irq_handler(frame: *mut ExceptionFrame) { + assert!(ArchitectureImpl::interrupt_mask()); + let frame = unsafe { &mut *frame }; + + irq_common(); + + unsafe { + let thread = Thread::current(); + thread.handle_pending_signals::(frame); + } +} + +#[no_mangle] +extern "C" fn __aa64_el0_fiq_handler() { + todo!(); +} + +#[no_mangle] +extern "C" fn __aa64_el0_serror_handler() { + todo!(); +} + +// EL1 +#[no_mangle] +extern "C" fn __aa64_el1_sync_handler(frame: *mut ExceptionFrame) { + let frame = unsafe { &*frame }; + let esr_el1 = ESR_EL1.get(); + let ec = (esr_el1 >> 26) & 0x3F; + let iss = esr_el1 & 0x1FFFFFF; + + unsafe { + libk_util::sync::hack_locks(); + } + + dump_irrecoverable_exception(frame, ec, iss); + + panic!("Irrecoverable exception in kernel mode"); +} + +#[no_mangle] +extern "C" fn __aa64_el1_irq_handler(_frame: *mut ExceptionFrame) { + irq_common(); +} + +#[no_mangle] +extern "C" fn __aa64_el1_fiq_handler() { + todo!(); +} + +#[no_mangle] +extern "C" fn __aa64_el1_serror_handler() { + todo!(); +} + +fn el0_sync_inner(frame: &mut ExceptionFrame) { + let esr_el1 = ESR_EL1.get(); + let ec = (esr_el1 >> 26) & 0x3F; + + match ec { + // SVC in AArch64 + 0b010101 => { + let func = frame.r[8]; + if func == usize::from(SyscallFunction::ExitSignal) as u64 { + unsafe { + handle_signal_exit(frame); + } + return; + } + + let args = &frame.r[0..6]; + let result = raw_syscall_handler(func, args); + frame.r[0] = result; + } + // BRK in AArch64 + 0b111100 => { + let thread = Thread::current(); + warnln!("Thread {} {:?} hit a breakpoint", thread.id, thread.name); + thread.raise_signal(Signal::Aborted); + } + _ => { + let thread = Thread::current(); + let iss = esr_el1 & 0x1FFFFFF; + if ec == 0b100100 { + // Data abort from lower level + let thread = Thread::current(); + warnln!( + "Data abort in {} {:?} at {:#x} with address {:#x}", + thread.id, + thread.name, + ELR_EL1.get(), + FAR_EL1.get() + ); + } + dump_irrecoverable_exception(frame, ec, iss); + + thread.raise_signal(Signal::MemoryAccessViolation); + return; + } + } +} + +fn irq_common() { + external_interrupt_controller().handle_pending_irqs(); +} + +unsafe fn handle_signal_exit(frame: &mut ExceptionFrame) { + // TODO validate the argument + let saved_data = &*(frame.r[0] as *const SignalEntryData); + debugln!( + "Handling signal exit to pc={:#x}, sp={:#x}", + saved_data.frame.elr_el1, + saved_data.frame.sp_el0 + ); + + frame.restore(&saved_data.frame); +} + +global_asm!(include_str!("vectors.S")); diff --git a/kernel/src/arch/aarch64/gic/gicc.rs b/kernel/src/arch/aarch64/gic/gicc.rs new file mode 100644 index 00000000..32518521 --- /dev/null +++ b/kernel/src/arch/aarch64/gic/gicc.rs @@ -0,0 +1,59 @@ +//! ARM GICv2 CPU registers +use libk_mm::device::DeviceMemoryIo; +use tock_registers::{ + interfaces::{Readable, Writeable}, + register_bitfields, register_structs, + registers::ReadWrite, +}; + +register_bitfields! { + u32, + CTLR [ + Enable OFFSET(0) NUMBITS(1) [] + ], + PMR [ + Priority OFFSET(0) NUMBITS(8) [] + ], + IAR [ + InterruptID OFFSET(0) NUMBITS(10) [] + ], + EOIR [ + EOINTID OFFSET(0) NUMBITS(10) [] + ] +} + +register_structs! { + #[allow(non_snake_case)] + pub(super) GiccRegs { + (0x00 => CTLR: ReadWrite), + (0x04 => PMR: ReadWrite), + (0x08 => _0), + (0x0C => IAR: ReadWrite), + (0x10 => EOIR: ReadWrite), + (0x14 => @END), + } +} + +pub(super) struct Gicc { + regs: DeviceMemoryIo<'static, GiccRegs>, +} + +impl Gicc { + pub const fn new(regs: DeviceMemoryIo<'static, GiccRegs>) -> Self { + Self { regs } + } + + pub unsafe fn init(&self) { + debugln!("Enabling GICv2 GICC"); + self.regs.CTLR.write(CTLR::Enable::SET); + self.regs.PMR.write(PMR::Priority.val(0xFF)); + } + + pub fn pending_irq_number(&self) -> usize { + self.regs.IAR.read(IAR::InterruptID) as usize + } + + pub fn clear_irq(&self, irq: usize) { + self.regs.EOIR.write(EOIR::EOINTID.val(irq as u32)); + } +} diff --git a/kernel/src/arch/aarch64/gic/gicd.rs b/kernel/src/arch/aarch64/gic/gicd.rs new file mode 100644 index 00000000..5b48ec21 --- /dev/null +++ b/kernel/src/arch/aarch64/gic/gicd.rs @@ -0,0 +1,183 @@ +//! ARM GICv2 Distributor registers +use device_api::interrupt::{IpiDeliveryTarget, IrqLevel, IrqOptions, IrqTrigger}; +use libk_mm::device::DeviceMemoryIo; +use spinning_top::Spinlock; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite, WriteOnly}, +}; + +register_bitfields! { + u32, + CTLR [ + Enable OFFSET(0) NUMBITS(1) [] + ], + TYPER [ + ITLinesNumber OFFSET(0) NUMBITS(5) [] + ], + ITARGETSR [ + Offset3 OFFSET(24) NUMBITS(8) [], + Offset2 OFFSET(16) NUMBITS(8) [], + Offset1 OFFSET(8) NUMBITS(8) [], + Offset0 OFFSET(0) NUMBITS(8) [] + ], + SGIR [ + TargetListFilter OFFSET(24) NUMBITS(2) [ + SpecifiedOnly = 0, + AllExceptLocal = 1, + LocalOnly = 2, + ], + CPUTargetList OFFSET(16) NUMBITS(8) [], + INTID OFFSET(0) NUMBITS(4) [] + ], +} + +register_structs! { + #[allow(non_snake_case)] + pub(super) GicdSharedRegs { + (0x000 => CTLR: ReadWrite), + (0x004 => TYPER: ReadWrite), + (0x008 => _0), + (0x104 => ISENABLER: [ReadWrite; 31]), + (0x180 => _1), + (0x820 => ITARGETSR: [ReadWrite; 248]), + (0xC00 => _2), + (0xC08 => ICFGR: [ReadWrite; 62]), + (0xD00 => _3), + (0xF00 => SGIR: WriteOnly), + (0xF04 => @END), + } +} + +register_structs! { + #[allow(non_snake_case)] + pub(super) GicdBankedRegs { + (0x000 => _0), + (0x100 => ISENABLER: ReadWrite), + (0x104 => _1), + (0x800 => ITARGETSR: [ReadOnly; 8]), + (0x820 => _2), + (0xC00 => ICFGR: [ReadWrite; 2]), + (0xC08 => @END), + } +} + +pub(super) struct Gicd { + shared_regs: Spinlock>, + banked_regs: DeviceMemoryIo<'static, GicdBankedRegs>, +} + +impl GicdSharedRegs { + #[inline(always)] + fn num_irqs(&self) -> usize { + ((self.TYPER.read(TYPER::ITLinesNumber) as usize) + 1) * 32 + } + + #[inline(always)] + fn itargets_slice(&self) -> &[ReadWrite] { + assert!(self.num_irqs() >= 36); + let itargetsr_max_index = ((self.num_irqs() - 32) >> 2) - 1; + &self.ITARGETSR[0..itargetsr_max_index] + } +} + +impl Gicd { + pub const fn new( + shared_regs: DeviceMemoryIo<'static, GicdSharedRegs>, + banked_regs: DeviceMemoryIo<'static, GicdBankedRegs>, + ) -> Self { + let shared_regs = Spinlock::new(shared_regs); + Self { + shared_regs, + banked_regs, + } + } + + #[allow(dead_code)] + pub unsafe fn set_sgir(&self, target: IpiDeliveryTarget, interrupt_id: u64) { + assert_eq!(interrupt_id & !0xF, 0); + let value = match target { + IpiDeliveryTarget::OtherCpus => SGIR::TargetListFilter::AllExceptLocal, + IpiDeliveryTarget::Specific(_mask) => { + // TODO: need to handle self-ipi case, releasing the lock somehow + todo!(); + } + IpiDeliveryTarget::ThisCpu => todo!(), + } + SGIR::INTID.val(interrupt_id as u32); + + self.shared_regs.lock().SGIR.write(value); + } + + fn local_gic_target_mask(&self) -> u32 { + self.banked_regs.ITARGETSR[0].read(ITARGETSR::Offset0) + } + + pub fn configure_irq(&self, irq: usize, options: IrqOptions) { + // TODO configure trigger level + // 2 bits per IRQ, 16 entries per register + let reg = irq / 16; + let shift = (irq % 16) * 2; + let cfgr_value = match (options.trigger, options.level) { + (IrqTrigger::Level, IrqLevel::ActiveLow) => 0, + (IrqTrigger::Level, _) => 0, + (_, IrqLevel::ActiveLow) => 1, + (_, _) => 1, + }; + + match reg { + // Private IRQs + 0..=1 => { + todo!(); + } + // Shared IRQs + _ => { + let regs = self.shared_regs.lock(); + let reg = ®s.ICFGR[reg - 2]; + + let v = reg.get() & !(0x3 << shift); + reg.set(v | (cfgr_value << shift)); + } + } + } + + pub fn enable_irq(&self, irq: usize) { + let reg = irq >> 5; + let bit = 1u32 << (irq & 0x1F); + + match reg { + // Private IRQs + 0 => { + let reg = &self.banked_regs.ISENABLER; + + reg.set(reg.get() | bit); + } + // Shared IRQs + _ => { + let regs = self.shared_regs.lock(); + let reg = ®s.ISENABLER[reg - 1]; + + reg.set(reg.get() | bit); + } + } + } + + pub unsafe fn init(&self) { + let mask = self.local_gic_target_mask(); + let regs = self.shared_regs.lock(); + + debugln!("Enabling GICv2 GICD, max IRQ number: {}", regs.num_irqs()); + + regs.CTLR.modify(CTLR::Enable::SET); + + for reg in regs.itargets_slice().iter() { + // Redirect all IRQs to cpu0 (this CPU) + reg.write( + ITARGETSR::Offset0.val(mask) + + ITARGETSR::Offset1.val(mask) + + ITARGETSR::Offset2.val(mask) + + ITARGETSR::Offset3.val(mask), + ); + } + } +} diff --git a/kernel/src/arch/aarch64/gic/mod.rs b/kernel/src/arch/aarch64/gic/mod.rs new file mode 100644 index 00000000..db543a0b --- /dev/null +++ b/kernel/src/arch/aarch64/gic/mod.rs @@ -0,0 +1,234 @@ +//! ARM Generic Interrupt Controller v2 driver + +use core::sync::atomic::Ordering; + +use aarch64_cpu::asm::barrier; +use abi::error::Error; +use alloc::{boxed::Box, sync::Arc}; +use device_api::{ + interrupt::{ + ExternalInterruptController, FixedInterruptTable, InterruptHandler, InterruptTable, + IpiDeliveryTarget, IpiMessage, Irq, IrqOptions, LocalInterruptController, + MessageInterruptController, MsiInfo, + }, + Device, +}; +use device_tree::{device_tree_driver, dt::DevTreeIndexPropExt}; +use kernel_arch_aarch64::{GicInterface, CPU_COUNT}; +use libk::{arch::Cpu, cpu_index, device::register_external_interrupt_controller}; +use libk_mm::{ + address::{FromRaw, IntoRaw, PhysicalAddress}, + device::{DeviceMemoryIo, RawDeviceMemoryMapping}, +}; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; + +use self::{gicc::Gicc, gicd::Gicd}; + +use super::AArch64; + +const MAX_IRQ: usize = 300; +const IPI_VECTOR: u64 = 1; + +pub mod gicc; +pub mod gicd; + +/// ARM Generic Interrupt Controller v2 +pub struct Gic { + gicc: OneTimeInit, + gicd: OneTimeInit, + gicd_base: PhysicalAddress, + gicc_base: PhysicalAddress, + table: IrqSafeSpinlock>, +} + +/// Per-CPU GIC information +pub struct GicPerCpu {} + +impl GicInterface for Gic {} + +impl Device for Gic { + fn display_name(&self) -> &'static str { + "ARM Generic Interrupt Controller v2" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + let gicd_mmio = Arc::new(RawDeviceMemoryMapping::map( + self.gicd_base.into_raw(), + 0x1000, + Default::default(), + )?); + let gicd_mmio_shared = DeviceMemoryIo::from_raw(gicd_mmio.clone())?; + let gicd_mmio_banked = DeviceMemoryIo::from_raw(gicd_mmio)?; + let gicc_mmio = DeviceMemoryIo::map(self.gicc_base, Default::default())?; + + let gicd = Gicd::new(gicd_mmio_shared, gicd_mmio_banked); + let gicc = Gicc::new(gicc_mmio); + + gicd.init(); + gicc.init(); + + self.gicd.init(gicd); + self.gicc.init(gicc); + + register_external_interrupt_controller(self); + AArch64::set_gic(self); + + Ok(()) + } +} + +impl ExternalInterruptController for Gic { + fn register_irq( + &self, + irq: Irq, + options: IrqOptions, + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + let mut table = self.table.lock(); + let gicd = self.gicd.get(); + + let index = match irq { + Irq::External(i) => i + 32, + Irq::Private(i) => i + 16, + } as usize; + + debugln!( + "Bound irq{} to {:?} {:?} {:?}", + index, + handler.display_name(), + options.trigger, + options.level + ); + if index >= 32 { + gicd.configure_irq(index, options); + } + table.insert(index, handler)?; + + Ok(()) + } + + fn enable_irq(&self, irq: Irq) -> Result<(), Error> { + let gicd = self.gicd.get(); + let index = match irq { + Irq::External(i) => i + 32, + Irq::Private(i) => i + 16, + } as usize; + gicd.enable_irq(index); + Ok(()) + } + + fn handle_pending_irqs(&self) { + let gicc = self.gicc.get(); + let irq_number = gicc.pending_irq_number(); + if irq_number >= MAX_IRQ { + return; + } + + if irq_number == IPI_VECTOR as usize { + gicc.clear_irq(irq_number); + // XXX + todo!(); + // Cpu::local().handle_ipi(); + // return; + } + + gicc.clear_irq(irq_number); + + { + let table = self.table.lock(); + match table.handler(irq_number) { + Some(handler) => { + drop(table); + handler.handle_irq(None); + } + None => warnln!("No handler for irq{}", irq_number), + } + } + } +} + +impl MessageInterruptController for Gic { + fn handle_msi(&self, _vector: usize) { + todo!() + } + + fn register_msi_range( + &self, + _range: &mut [MsiInfo], + _handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + todo!() + } +} + +impl LocalInterruptController for Gic { + fn send_ipi(&self, target: IpiDeliveryTarget, msg: IpiMessage) -> Result<(), Error> { + // TODO message queue insertion should be moved + match target { + IpiDeliveryTarget::OtherCpus => { + let local = cpu_index(); + for i in 0..CPU_COUNT.load(Ordering::Acquire) { + if i != local as usize { + Cpu::push_ipi_queue(i as u32, msg); + } + } + } + IpiDeliveryTarget::Specific(_) => todo!(), + IpiDeliveryTarget::ThisCpu => todo!(), + } + + // Issue a memory barrier + barrier::dsb(barrier::ISH); + barrier::isb(barrier::SY); + + unsafe { + self.gicd.get().set_sgir(target, IPI_VECTOR); + } + + Ok(()) + } + + unsafe fn init_ap(&self) -> Result<(), Error> { + self.gicc.get().init(); + Ok(()) + } +} + +impl Gic { + /// Constructs an instance of GICv2. + /// + /// # Safety + /// + /// The caller must ensure the addresses actually point to the GIC components. + pub unsafe fn new(gicd_base: PhysicalAddress, gicc_base: PhysicalAddress) -> Self { + Self { + gicc: OneTimeInit::new(), + gicd: OneTimeInit::new(), + gicd_base, + gicc_base, + table: IrqSafeSpinlock::new(FixedInterruptTable::new()), + } + } +} + +impl GicPerCpu { + /// Constructs per-CPU GIC data structure + pub fn new() -> Self { + Self {} + } +} + +device_tree_driver! { + compatible: ["arm,cortex-a15-gic", "arm,gic-400"], + probe(dt) => { + let reg = device_tree::find_prop(&dt.node, "reg")?; + + let (gicc_base, _) = reg.cell2_array_item(0, dt.address_cells, dt.size_cells)?; + let (gicd_base, _) = reg.cell2_array_item(1, dt.address_cells, dt.size_cells)?; + + Some(Box::new(unsafe { Gic::new( + PhysicalAddress::from_raw(gicc_base), + PhysicalAddress::from_raw(gicd_base), + )})) + } +} diff --git a/kernel/src/arch/aarch64/mod.rs b/kernel/src/arch/aarch64/mod.rs new file mode 100644 index 00000000..5144d879 --- /dev/null +++ b/kernel/src/arch/aarch64/mod.rs @@ -0,0 +1,369 @@ +//! AArch64 architecture and platforms implementation + +use core::sync::atomic::Ordering; + +use aarch64_cpu::registers::{CNTP_CTL_EL0, CNTP_TVAL_EL0}; +use abi::error::Error; +use device_api::{ + interrupt::{IpiDeliveryTarget, IpiMessage, Irq, LocalInterruptController}, + ResetDevice, +}; +use device_tree::dt::{DevTreeIndexPropExt, DevTreeNodeInfo, DeviceTree, FdtMemoryRegionIter}; +use git_version::git_version; +use kernel_arch_aarch64::{ + mem::{ + table::{L1, L2, L3}, + EarlyMapping, HEAP_MAPPING_OFFSET, MEMORY_LIMIT, RAM_MAPPING_L1_COUNT, + }, + ArchitectureImpl, PerCpuData, +}; +use libk::{arch::Cpu, device::external_interrupt_controller}; +use libk_mm::{ + address::{FromRaw, IntoRaw, PhysicalAddress}, + phys::PhysicalMemoryRegion, + phys::{self, reserved::reserve_region}, + pointer::PhysicalRef, + table::{EntryLevel, EntryLevelExt}, +}; +use libk_util::OneTimeInit; +use tock_registers::interfaces::Writeable; +use ygg_driver_pci::PciBusManager; + +use crate::{ + debug, + device::{self, power::arm_psci::Psci}, + fs::{Initrd, INITRD_DATA}, + mem::heap, +}; + +use self::gic::Gic; + +use super::Platform; + +pub mod boot; +pub mod exception; +pub mod gic; +pub mod smp; +pub mod timer; + +const BOOT_STACK_SIZE: usize = 4096 * 32; + +#[derive(Clone, Copy)] +#[repr(C, align(0x20))] +struct BootStack { + data: [u8; BOOT_STACK_SIZE], +} + +/// AArch64 architecture implementation +pub struct AArch64 { + dt: OneTimeInit>, + + /// Optional instance of PSCI on this platform + pub psci: OneTimeInit<&'static Psci>, + reset: OneTimeInit<&'static dyn ResetDevice>, + + initrd: OneTimeInit>, +} + +impl Platform for AArch64 { + const KERNEL_VIRT_OFFSET: usize = 0xFFFFFF8000000000; + + type L3 = L3; + + unsafe fn start_application_processors(&self) { + let dt = self.dt.get(); + if let Err(error) = smp::start_ap_cores(dt) { + errorln!("Could not initialize AP CPUs: {:?}", error); + } + } + + unsafe fn send_ipi(&self, _target: IpiDeliveryTarget, _msg: IpiMessage) -> Result<(), Error> { + Ok(()) + // XXX + // if let Some(local_intc) = self.lintc.try_get() { + // local_intc.send_ipi(target, msg) + // } else { + // Ok(()) + // } + } + + fn register_reset_device(&self, reset: &'static dyn ResetDevice) -> Result<(), Error> { + self.reset.init(reset); + Ok(()) + } + + unsafe fn reset(&self) -> ! { + if let Some(reset) = self.reset.try_get() { + reset.reset() + } else { + let psci = self.psci.get(); + psci.reset() + } + } +} + +static GIC: OneTimeInit<&'static Gic> = OneTimeInit::new(); + +impl AArch64 { + fn set_gic(gic: &'static Gic) { + GIC.init(gic); + } + + fn extract_initrd_from_dt( + &self, + dt: &DeviceTree, + ) -> Option<(PhysicalAddress, PhysicalAddress)> { + let chosen = dt.node_by_path("/chosen")?; + let initrd_start = device_tree::find_prop(&chosen, "linux,initrd-start")?; + let initrd_end = device_tree::find_prop(&chosen, "linux,initrd-end")?; + + let address_cells = dt.address_cells(); + + let initrd_start = initrd_start.cell1_array_item(0, address_cells)?; + let initrd_end = initrd_end.cell1_array_item(0, address_cells)?; + + let initrd_start = PhysicalAddress::from_raw(initrd_start); + let initrd_end = PhysicalAddress::from_raw(initrd_end); + + Some((initrd_start, initrd_end)) + } + + fn map_physical_memory + Clone>( + _it: I, + _memory_start: PhysicalAddress, + memory_end: PhysicalAddress, + ) -> Result<(), Error> { + let end_l1i = memory_end.page_align_up::().page_index::(); + if end_l1i > RAM_MAPPING_L1_COUNT { + todo!() + } + + // Map 1GiB chunks + for index in 0..end_l1i { + unsafe { + kernel_arch_aarch64::mem::map_ram_l1(index); + } + } + + MEMORY_LIMIT.store(memory_end.into_raw(), Ordering::Release); + + Ok(()) + } + + unsafe fn init_memory_management(&'static self, dtb: PhysicalAddress) -> Result<(), Error> { + // 16x2MiB + const HEAP_PAGES: usize = 16; + + // Initialize the runtime mappings + kernel_arch_aarch64::mem::init_fixed_tables(); + + // Extract the size of the device tree + let dtb_size = { + let dtb_header = EarlyMapping::::map_slice(dtb, DeviceTree::MIN_HEADER_SIZE)?; + DeviceTree::read_totalsize(dtb_header.as_ref()).unwrap() + }; + + reserve_region( + "dtb", + PhysicalMemoryRegion { + base: dtb, + size: (dtb_size + 0xFFF) & !0xFFF, + }, + ); + + let dtb_slice = EarlyMapping::::map_slice(dtb, dtb_size)?; + + let dt = DeviceTree::from_addr(dtb_slice.as_ptr() as usize); + + // Setup initrd from the dt + let initrd = self.extract_initrd_from_dt(&dt); + + if let Some((start, end)) = initrd { + let aligned_start = start.page_align_down::(); + let aligned_end = end.page_align_up::(); + + let size = aligned_end - aligned_start; + reserve_region( + "initrd", + PhysicalMemoryRegion { + base: aligned_start, + size, + }, + ); + } + + // Initialize the physical memory + let regions = FdtMemoryRegionIter::new(&dt); + + phys::init_from_iter(regions, Self::map_physical_memory)?; + + // Setup the heap + for i in 0..HEAP_PAGES { + let l2_page = phys::alloc_2m_page()?; + kernel_arch_aarch64::mem::map_heap_l2(i, l2_page); + } + + heap::init_heap(HEAP_MAPPING_OFFSET, HEAP_PAGES * L2::SIZE); + + // EarlyMapping for DTB no longer needed, it lives in physical memory and can be obtained + // through PhysicalRef + let dtb_slice: PhysicalRef<'static, [u8]> = PhysicalRef::map_slice(dtb, dtb_size); + let dt = DeviceTree::from_addr(dtb_slice.as_ptr() as usize); + + self.dt.init(dt); + + // Setup initrd + if let Some((initrd_start, initrd_end)) = initrd { + let aligned_start = initrd_start.page_align_down::(); + let aligned_end = initrd_end.page_align_up::(); + let len = initrd_end - initrd_start; + + let data = unsafe { PhysicalRef::map_slice(initrd_start, len) }; + self.initrd.init(data); + + INITRD_DATA.init(Initrd { + phys_page_start: aligned_start, + phys_page_len: aligned_end - aligned_start, + data: self.initrd.get().as_ref(), + }); + } + + Ok(()) + } + + unsafe fn init_platform(&self, is_bsp: bool) -> Result<(), Error> { + let per_cpu = PerCpuData { + gic: OneTimeInit::new(), + }; + Cpu::init_local(None, per_cpu); + + if is_bsp { + ygg_driver_pci::register_vendor_driver( + "Virtio PCI Network Device", + 0x1AF4, + 0x1000, + ygg_driver_virtio_net::probe, + ); + ygg_driver_pci::register_class_driver( + "AHCI SATA Controller", + 0x01, + Some(0x06), + Some(0x01), + ygg_driver_ahci::probe, + ); + ygg_driver_pci::register_class_driver( + "USB xHCI", + 0x0C, + Some(0x03), + Some(0x30), + ygg_driver_usb_xhci::probe, + ); + + let dt = self.dt.get(); + + let address_cells = dt.address_cells(); + let size_cells = dt.size_cells(); + + // Setup /chosen.stdout-path to get early debug printing + let chosen_stdout_path = dt.chosen_stdout_path(); + let chosen_stdout = chosen_stdout_path.and_then(|path| dt.node_by_path(path)); + + if let Some(node) = chosen_stdout.clone() { + let probe = DevTreeNodeInfo { + address_cells, + size_cells, + node, + }; + + if let Some((device, _)) = + device_tree::driver::probe_dt_node(&probe, device::register_device) + { + device.init()?; + } + } + + debug::init(); + + infoln!( + "Yggdrasil v{} ({})", + env!("CARGO_PKG_VERSION"), + git_version!() + ); + infoln!("Initializing aarch64 platform"); + + let nodes = dt.root().children(); + if let Err(error) = + device_tree::driver::enumerate_dt(address_cells, size_cells, nodes, |_, probe| { + // Skip chosen-stdout, already initialized + if let Some(ref chosen_stdout) = chosen_stdout + && chosen_stdout.name() == probe.node.name() + { + return Ok(()); + } + + if let Some((device, _)) = + device_tree::driver::probe_dt_node(&probe, device::register_device) + { + device.init()?; + } + + Ok(()) + }) + { + warnln!( + "{} errors encountered when initializing platform devices", + error + ); + } + + // Initialize IRQs for the devices + device::manager_lock().devices().for_each(|dev| unsafe { + if let Err(error) = dev.init_irq() { + warnln!( + "Could not init IRQs for {:?}: {:?}", + dev.display_name(), + error + ); + } + }); + + infoln!("Enumerated devices:"); + device::manager_lock().devices().for_each(|dev| { + infoln!("* {:?}", dev.display_name()); + }); + + PciBusManager::setup_bus_devices()?; + } else { + // BSP already initialized everything needed + // Setup timer and local interrupt controller + if let Some(gic) = GIC.try_get() { + unsafe { + gic.init_ap().unwrap(); + } + } + + // TODO device-tree initialization for this + CNTP_CTL_EL0.write(CNTP_CTL_EL0::ENABLE::SET + CNTP_CTL_EL0::IMASK::CLEAR); + CNTP_TVAL_EL0.set(10000000); + external_interrupt_controller() + .enable_irq(Irq::Private(14)) + .unwrap(); + } + + if let Some(gic) = GIC.try_get() { + let cpu_data = ArchitectureImpl::local_cpu_data().unwrap(); + cpu_data.gic.init(*gic); + } + + Ok(()) + } +} + +/// AArch64 implementation value +pub static PLATFORM: AArch64 = AArch64 { + dt: OneTimeInit::new(), + initrd: OneTimeInit::new(), + + psci: OneTimeInit::new(), + reset: OneTimeInit::new(), +}; diff --git a/kernel/src/arch/aarch64/smp.rs b/kernel/src/arch/aarch64/smp.rs new file mode 100644 index 00000000..cf49dfe2 --- /dev/null +++ b/kernel/src/arch/aarch64/smp.rs @@ -0,0 +1,126 @@ +//! Simultaneous multiprocessing support for aarch64 +use core::sync::atomic::Ordering; + +use abi::error::Error; +use device_api::CpuBringupDevice; +use device_tree::dt::{DevTreeIndexNodePropGet, DeviceTree}; +use kernel_arch_aarch64::CPU_COUNT; + +use crate::arch::PLATFORM; +use crate::mem::KERNEL_VIRT_OFFSET; + +use super::{BootStack, BOOT_STACK_SIZE}; + +#[derive(Debug)] +enum CpuEnableMethod { + Psci, + // Not currently supported + #[allow(dead_code)] + SpinTable { + release_addr: usize, + }, +} + +struct CpuInfo<'a> { + id: u32, + compatible: &'a str, + enable_method: CpuEnableMethod, +} + +fn enumerate_cpus<'a>(dt: &'a DeviceTree) -> impl Iterator> { + let cpus = dt.node_by_path("/cpus").unwrap(); + + cpus.children().filter_map(|cpu_node| { + let compatible = cpu_node.prop("compatible")?; + let id = cpu_node.prop("reg")?; + let enable_method_str: &str = cpu_node.prop("enable-method")?; + let enable_method = match enable_method_str { + "psci" => CpuEnableMethod::Psci, + _ => todo!(), + }; + + Some(CpuInfo { + id, + compatible, + enable_method, + }) + }) +} + +impl CpuEnableMethod { + unsafe fn start_cpu(&self, id: usize, ip: usize, sp: usize) -> Result<(), Error> { + match self { + Self::Psci => { + let psci = PLATFORM.psci.try_get().ok_or_else(|| { + warnln!( + "cpu{} has to be enabled through PSCI, but no PSCI found", + id + ); + Error::InvalidArgument + })?; + + psci.start_cpu(id, ip, sp) + } + _ => todo!(), + } + } +} + +// TODO can be made smaller +#[link_section = ".bss"] +static AP_TRAMPOLINE_STACK: BootStack = BootStack { + data: [0; BOOT_STACK_SIZE], +}; + +/// Starts application processors using the method specified in the device tree. +/// +/// TODO: currently does not handle systems where APs are already started before entry. +/// +/// # Safety +/// +/// The caller must ensure the physical memory manager was initialized, virtual memory tables are +/// set up and the function has not been called before. +pub unsafe fn start_ap_cores(dt: &DeviceTree) -> Result<(), Error> { + extern "C" { + fn __aarch64_ap_entry(); + } + + // Safety: safe, the stack is inside the kernel + let sp = AP_TRAMPOLINE_STACK.data.as_ptr() as usize - KERNEL_VIRT_OFFSET + BOOT_STACK_SIZE; + + for cpu in enumerate_cpus(dt).filter(|cpu| cpu.id != 0) { + debugln!( + "cpu{}: enable-method={:?}, compatible={:?}", + cpu.id, + cpu.enable_method, + cpu.compatible + ); + + // const AP_STACK_PAGES: usize = 4; + // let stack_pages = phys::alloc_pages_contiguous(AP_STACK_PAGES)?; + // debugln!( + // "cpu{} stack: {:#x}..{:#x}", + // cpu.id, + // stack_pages, + // stack_pages.add(AP_STACK_PAGES * 0x1000) + // ); + // Wait for the CPU to come up + let old_count = CPU_COUNT.load(Ordering::Acquire); + + // Safety: safe, the function is inside the kernel + let ip = __aarch64_ap_entry as usize - KERNEL_VIRT_OFFSET; + // let sp = stack_pages.add(AP_STACK_PAGES * 0x1000); + if let Err(error) = cpu.enable_method.start_cpu(cpu.id as usize, ip, sp) { + errorln!("Couldn't start cpu{} up: {:?}", cpu.id, error); + continue; + } + + while CPU_COUNT.load(Ordering::Acquire) == old_count { + aarch64_cpu::asm::wfe(); + } + + debugln!("cpu{} is up", cpu.id); + } + + Ok(()) +} diff --git a/kernel/src/arch/aarch64/timer.rs b/kernel/src/arch/aarch64/timer.rs new file mode 100644 index 00000000..9579cc7e --- /dev/null +++ b/kernel/src/arch/aarch64/timer.rs @@ -0,0 +1,126 @@ +//! AArch64 Generic Timer + +use core::time::Duration; + +use aarch64_cpu::registers::{CNTFRQ_EL0, CNTPCT_EL0, CNTP_CTL_EL0, CNTP_TVAL_EL0}; +use abi::error::Error; +use alloc::boxed::Box; +use device_api::{ + interrupt::{InterruptHandler, Irq}, + timer::MonotonicTimestampProviderDevice, + Device, +}; +use device_tree::device_tree_driver; +use kernel_arch::task::Scheduler; +use libk::{ + arch::Cpu, + device::{external_interrupt_controller, register_monotonic_timestamp_provider}, + runtime, +}; +use tock_registers::interfaces::{ReadWriteable, Readable, Writeable}; + +/// ARM Generic Timer driver +pub struct ArmTimer { + irq: Irq, +} + +/// ARM timer tick interval (in some time units?) +pub const TICK_INTERVAL: u64 = 1000000; + +impl InterruptHandler for ArmTimer { + fn handle_irq(&self, _vector: Option) -> bool { + CNTP_TVAL_EL0.set(TICK_INTERVAL); + let now = self.monotonic_timestamp().unwrap(); + + runtime::tick(now); + + unsafe { + Cpu::local().scheduler().yield_cpu(); + } + + true + } +} + +impl MonotonicTimestampProviderDevice for ArmTimer { + fn monotonic_timestamp(&self) -> Result { + let count = CNTPCT_EL0.get() * 1_000_000; + let freq = CNTFRQ_EL0.get(); + + Ok(Duration::from_nanos((count / freq) * 1_000)) + } +} + +impl Device for ArmTimer { + fn display_name(&self) -> &'static str { + "ARM Generic Timer" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + CNTP_CTL_EL0.write(CNTP_CTL_EL0::ENABLE::SET + CNTP_CTL_EL0::IMASK::SET); + register_monotonic_timestamp_provider(self); + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + let intc = external_interrupt_controller(); + + intc.register_irq(self.irq, Default::default(), self)?; + + CNTP_CTL_EL0.modify(CNTP_CTL_EL0::IMASK::CLEAR); + CNTP_TVAL_EL0.set(TICK_INTERVAL); + + intc.enable_irq(self.irq)?; + + Ok(()) + } +} + +// impl TimestampSource for ArmTimer { +// fn timestamp(&self) -> Result { +// } +// } + +// impl InterruptSource for ArmTimer { +// fn handle_irq(&self) -> Result { +// CNTP_TVAL_EL0.set(TICK_INTERVAL); +// let t = self.timestamp()?; +// +// wait::tick(t); +// tasklet::tick(t); +// +// unsafe { +// Cpu::local().queue().yield_cpu(); +// } +// +// Ok(true) +// } +// +// unsafe fn init_irq(&'static self) -> Result<(), Error> { +// let intc = PLATFORM.interrupt_controller(); +// +// intc.register_handler(self.irq, self)?; +// intc.enable_irq(self.irq)?; +// +// Ok(()) +// } +// } + +impl ArmTimer { + /// Constructs an instance of ARM generic timer. + /// + /// # Safety + /// + /// The caller must ensure the function has not been called before. + pub const unsafe fn new(irq: Irq) -> Self { + Self { irq } + } +} + +device_tree_driver! { + compatible: ["arm,armv8-timer"], + probe(_dt) => { + // TODO actually get info from the dt + Some(Box::new(unsafe { ArmTimer::new(Irq::Private(14)) })) + } +} diff --git a/kernel/src/arch/aarch64/vectors.S b/kernel/src/arch/aarch64/vectors.S new file mode 100644 index 00000000..32b865e6 --- /dev/null +++ b/kernel/src/arch/aarch64/vectors.S @@ -0,0 +1,129 @@ +// vi:ft=a64asm: + +.macro EXC_VECTOR el, ht, bits, kind +.p2align 7 + b __aa\bits\()_el\el\ht\()_\kind +.endm + +.macro EXC_HANDLER el, ht, bits, kind +__aa\bits\()_el\el\ht\()_\kind: +.if \bits == 32 + // TODO + b . +.endif + + EXC_SAVE_STATE + mov x0, sp + mov lr, xzr + bl __aa64_el\el\()_\kind\()_handler + EXC_RESTORE_STATE + eret +.endm + +// 32 gp regs + 3 special regs +.set PT_REGS_SIZE, (16 * 16 + 16 * 2) + +.macro EXC_SAVE_STATE + sub sp, sp, #PT_REGS_SIZE + + stp x0, x1, [sp, #16 * 0] + stp x2, x3, [sp, #16 * 1] + stp x4, x5, [sp, #16 * 2] + stp x6, x7, [sp, #16 * 3] + stp x8, x9, [sp, #16 * 4] + stp x10, x11, [sp, #16 * 5] + stp x12, x13, [sp, #16 * 6] + stp x14, x15, [sp, #16 * 7] + + stp x16, x17, [sp, #16 * 8] + stp x18, x19, [sp, #16 * 9] + stp x20, x21, [sp, #16 * 10] + stp x22, x23, [sp, #16 * 11] + stp x24, x25, [sp, #16 * 12] + stp x26, x27, [sp, #16 * 13] + stp x28, x29, [sp, #16 * 14] + stp x30, x31, [sp, #16 * 15] + + mrs x0, spsr_el1 + mrs x1, elr_el1 + mrs x2, sp_el0 + + // TODO + stp x0, x1, [sp, #16 * 16] + stp x2, xzr, [sp, #16 * 17] +.endm + +.macro EXC_RESTORE_STATE + ldp x0, x1, [sp, #16 * 16] + ldp x2, x3, [sp, #16 * 17] + + msr spsr_el1, x0 + msr elr_el1, x1 + msr sp_el0, x2 + + ldp x0, x1, [sp, #16 * 0] + ldp x2, x3, [sp, #16 * 1] + ldp x4, x5, [sp, #16 * 2] + ldp x6, x7, [sp, #16 * 3] + ldp x8, x9, [sp, #16 * 4] + ldp x10, x11, [sp, #16 * 5] + ldp x12, x13, [sp, #16 * 6] + ldp x14, x15, [sp, #16 * 7] + + ldp x16, x17, [sp, #16 * 8] + ldp x18, x19, [sp, #16 * 9] + ldp x20, x21, [sp, #16 * 10] + ldp x22, x23, [sp, #16 * 11] + ldp x24, x25, [sp, #16 * 12] + ldp x26, x27, [sp, #16 * 13] + ldp x28, x29, [sp, #16 * 14] + ldp x30, x31, [sp, #16 * 15] + + add sp, sp, #PT_REGS_SIZE +.endm + +.section .text.vectors +.global __aarch64_el1_vectors +.p2align 12 +__aarch64_el1_vectors: + EXC_VECTOR 1, t, 64, sync + EXC_VECTOR 1, t, 64, irq + EXC_VECTOR 1, t, 64, fiq + EXC_VECTOR 1, t, 64, serror + + EXC_VECTOR 1, h, 64, sync + EXC_VECTOR 1, h, 64, irq + EXC_VECTOR 1, h, 64, fiq + EXC_VECTOR 1, h, 64, serror + + EXC_VECTOR 0, t, 64, sync + EXC_VECTOR 0, t, 64, irq + EXC_VECTOR 0, t, 64, fiq + EXC_VECTOR 0, t, 64, serror + + EXC_VECTOR 0, t, 32, sync + EXC_VECTOR 0, t, 32, irq + EXC_VECTOR 0, t, 32, fiq + EXC_VECTOR 0, t, 32, serror + +.section .text +.p2align 7 +EXC_HANDLER 1, t, 64, sync +EXC_HANDLER 1, t, 64, irq +EXC_HANDLER 1, t, 64, fiq +EXC_HANDLER 1, t, 64, serror + +EXC_HANDLER 1, h, 64, sync +EXC_HANDLER 1, h, 64, irq +EXC_HANDLER 1, h, 64, fiq +EXC_HANDLER 1, h, 64, serror + +EXC_HANDLER 0, t, 64, sync +EXC_HANDLER 0, t, 64, irq +EXC_HANDLER 0, t, 64, fiq +EXC_HANDLER 0, t, 64, serror + +EXC_HANDLER 0, t, 32, sync +EXC_HANDLER 0, t, 32, irq +EXC_HANDLER 0, t, 32, fiq +EXC_HANDLER 0, t, 32, serror diff --git a/kernel/src/arch/mod.rs b/kernel/src/arch/mod.rs new file mode 100644 index 00000000..c742d66b --- /dev/null +++ b/kernel/src/arch/mod.rs @@ -0,0 +1,81 @@ +//! Provides architecture/platform-specific implementation details + +use abi::error::Error; + +use cfg_if::cfg_if; +use device_api::{ + interrupt::{IpiDeliveryTarget, IpiMessage}, + ResetDevice, +}; +use kernel_arch::{Architecture, ArchitectureImpl}; +use libk_mm::table::EntryLevel; + +cfg_if! { + if #[cfg(target_arch = "aarch64")] { + pub mod aarch64; + + pub use aarch64::{AArch64 as PlatformImpl, PLATFORM}; + } else if #[cfg(target_arch = "x86_64")] { + pub mod x86_64; + + pub use x86_64::{X86_64 as PlatformImpl, PLATFORM}; + } else { + compile_error!("Architecture is not supported"); + } +} + +/// Architecture-specific lowest level of page mapping +pub type L3 = ::L3; + +// Architecture interfaces + +/// Interface for an architecture-specific facilities +#[allow(unused)] +pub trait Platform { + /// Address, to which "zero" address is mapped in the virtual address space + const KERNEL_VIRT_OFFSET: usize; + + /// Lowest page entry level, usually 4KiB pages + type L3: EntryLevel; + + /// Starts up the application processors that may be present in the system. + /// + /// # Safety + /// + /// Only safe to call once during system init. + unsafe fn start_application_processors(&self) {} + + // Architecture intrinsics + + /// Adds a reset device to the system + fn register_reset_device(&self, reset: &'static dyn ResetDevice) -> Result<(), Error> { + Err(Error::NotImplemented) + } + + /// Sends a message to the requested set of CPUs through an interprocessor interrupt. + /// + /// # Note + /// + /// u64 limits the number of targetable CPUs to (only) 64. Platform-specific implementations + /// may impose narrower restrictions. + /// + /// # Safety + /// + /// As the call may alter the flow of execution on CPUs, this function is unsafe. + unsafe fn send_ipi(&self, target: IpiDeliveryTarget, msg: IpiMessage) -> Result<(), Error> { + Ok(()) + } + + /// Performs a CPU reset. + /// + /// # Safety + /// + /// The caller must ensure it is actually safe to reset, i.e. no critical processes will be + /// aborted and no data will be lost. + unsafe fn reset(&self) -> ! { + ArchitectureImpl::set_interrupt_mask(true); + loop { + ArchitectureImpl::wait_for_interrupt(); + } + } +} diff --git a/kernel/src/arch/x86_64/acpi.rs b/kernel/src/arch/x86_64/acpi.rs new file mode 100644 index 00000000..fa2c5390 --- /dev/null +++ b/kernel/src/arch/x86_64/acpi.rs @@ -0,0 +1,383 @@ +//! x86-64 implementation of ACPI management interfaces +use core::{ + alloc::{AllocError, Allocator, GlobalAlloc, Layout}, + ptr::NonNull, + sync::atomic::Ordering, + time::Duration, +}; + +use acpi_lib::{AcpiHandler, AcpiTables, PhysicalMapping}; +use acpi_system::{ + AcpiInterruptMethod, AcpiSleepState, AcpiSystem, AcpiSystemError, EventAction, FixedEvent, +}; +use alloc::boxed::Box; +use device_api::{ + interrupt::{InterruptHandler, IpiDeliveryTarget, IpiMessage, Irq}, + Device, +}; +use kernel_arch_x86_64::CPU_COUNT; +use libk::device::external_interrupt_controller; +use libk_mm::{ + address::{FromRaw, PhysicalAddress, Virtualize}, + pointer::PhysicalRef, +}; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; +use yggdrasil_abi::error::Error; + +use crate::{ + arch::{ + x86_64::{apic::ioapic::ISA_IRQ_OFFSET, SHUTDOWN_FENCE}, + Platform, PLATFORM, + }, + mem::{heap::GLOBAL_HEAP, read_memory, write_memory}, +}; + +use super::intrinsics; + +#[derive(Clone, Copy)] +#[doc(hidden)] +pub struct AcpiAllocator; +#[derive(Clone, Copy)] +#[doc(hidden)] +pub struct AcpiHandlerImpl; +struct SciHandler; + +static ACPI_SYSTEM: OneTimeInit>> = OneTimeInit::new(); + +impl Device for SciHandler { + fn display_name(&self) -> &'static str { + "ACPI interrupt handler" + } +} + +impl InterruptHandler for SciHandler { + fn handle_irq(&self, _vector: Option) -> bool { + log::trace!("ACPI SCI received"); + ACPI_SYSTEM.get().lock().handle_sci(); + true + } +} + +unsafe impl Allocator for AcpiAllocator { + fn allocate(&self, layout: Layout) -> Result, AllocError> { + let ptr = unsafe { GLOBAL_HEAP.alloc(layout) }; + log::trace!("ACPI alloc: {:?} -> {:p}", layout, ptr); + + if ptr.is_null() { + Err(AllocError) + } else { + unsafe { + Ok(NonNull::slice_from_raw_parts( + NonNull::new_unchecked(ptr), + layout.size(), + )) + } + } + } + + unsafe fn deallocate(&self, ptr: NonNull, layout: Layout) { + log::trace!("ACPI dealloc: {:?}, {:?}", ptr, layout); + GLOBAL_HEAP.dealloc(ptr.as_ptr(), layout); + } +} + +impl acpi_system::Handler for AcpiHandlerImpl { + type MappedSlice = PhysicalRef<'static, [u8]>; + + unsafe fn map_slice(address: u64, length: u64) -> Self::MappedSlice { + PhysicalRef::map_slice( + PhysicalAddress::from_raw(address), + length.try_into().unwrap(), + ) + } + + fn io_read_u8(port: u16) -> u8 { + let value = unsafe { intrinsics::inb(port) }; + log::trace!("io_read_u8 {:#x} <- {:#x}", port, value); + value + } + + fn io_read_u16(port: u16) -> u16 { + let value = unsafe { intrinsics::inw(port) }; + log::trace!("io_read_u16 {:#x} <- {:#x}", port, value); + value + } + + fn io_read_u32(port: u16) -> u32 { + let value = unsafe { intrinsics::inl(port) }; + log::trace!("io_read_u32 {:#x} <- {:#x}", port, value); + value + } + + fn io_write_u8(port: u16, value: u8) { + log::trace!("io_write_u8 {:#x}, {:#x}", port, value); + unsafe { intrinsics::outb(port, value) } + } + + fn io_write_u16(port: u16, value: u16) { + log::trace!("io_write_u16 {:#x}, {:#x}", port, value); + unsafe { intrinsics::outw(port, value) } + } + + fn io_write_u32(port: u16, value: u32) { + log::trace!("io_write_u32 {:#x}, {:#x}", port, value); + unsafe { intrinsics::outl(port, value) } + } + + fn mem_read_u8(address: u64) -> u8 { + let value = unsafe { read_memory(PhysicalAddress::from_raw(address)) }; + log::trace!("mem_read_u8 {:#x} -> {:#x}", address, value); + value + } + + fn mem_read_u16(address: u64) -> u16 { + let value = unsafe { read_memory(PhysicalAddress::from_raw(address)) }; + log::trace!("mem_read_u16 {:#x} -> {:#x}", address, value); + value + } + + fn mem_read_u32(address: u64) -> u32 { + let value = unsafe { read_memory(PhysicalAddress::from_raw(address)) }; + log::trace!("mem_read_u32 {:#x} -> {:#x}", address, value); + value + } + + fn mem_read_u64(address: u64) -> u64 { + let value = unsafe { read_memory(PhysicalAddress::from_raw(address)) }; + log::trace!("mem_read_u64 {:#x} -> {:#x}", address, value); + value + } + + fn mem_write_u8(address: u64, value: u8) { + log::trace!("mem_write_u8 {:#x}, {:#x}", address, value); + unsafe { write_memory(PhysicalAddress::from_raw(address), value) } + } + + fn mem_write_u16(address: u64, value: u16) { + log::trace!("mem_write_u16 {:#x}, {:#x}", address, value); + unsafe { write_memory(PhysicalAddress::from_raw(address), value) } + } + + fn mem_write_u32(address: u64, value: u32) { + log::trace!("mem_write_u32 {:#x}, {:#x}", address, value); + unsafe { write_memory(PhysicalAddress::from_raw(address), value) } + } + + fn mem_write_u64(address: u64, value: u64) { + log::trace!("mem_write_u64 {:#x}, {:#x}", address, value); + unsafe { write_memory(PhysicalAddress::from_raw(address), value) } + } + + fn install_interrupt_handler(irq: u32) -> Result<(), AcpiSystemError> { + infoln!("Installing ACPI SCI handler at IRQ #{}", irq); + + let intc = external_interrupt_controller(); + let handler = Box::leak(Box::new(SciHandler)); + let irq = Irq::External(irq + ISA_IRQ_OFFSET); + + intc.register_irq(irq, Default::default(), handler).unwrap(); + intc.enable_irq(irq).unwrap(); + + Ok(()) + } + + fn stall(_duration: Duration) { + // TODO polling_sleep is not yet implemented properly + todo!() + // util::polling_sleep(duration).ok(); + } +} + +impl aml::Handler for AcpiHandlerImpl { + fn read_io_u8(&self, port: u16) -> u8 { + ::io_read_u8(port) + } + + fn read_io_u16(&self, port: u16) -> u16 { + ::io_read_u16(port) + } + + fn read_io_u32(&self, port: u16) -> u32 { + ::io_read_u32(port) + } + + fn write_io_u8(&self, port: u16, value: u8) { + ::io_write_u8(port, value) + } + + fn write_io_u16(&self, port: u16, value: u16) { + ::io_write_u16(port, value) + } + + fn write_io_u32(&self, port: u16, value: u32) { + ::io_write_u32(port, value) + } + + fn read_u8(&self, address: usize) -> u8 { + ::mem_read_u8(address as u64) + } + + fn read_u16(&self, address: usize) -> u16 { + ::mem_read_u16(address as u64) + } + + fn read_u32(&self, address: usize) -> u32 { + ::mem_read_u32(address as u64) + } + + fn read_u64(&self, address: usize) -> u64 { + ::mem_read_u64(address as u64) + } + + fn write_u8(&self, address: usize, value: u8) { + ::mem_write_u8(address as u64, value) + } + + fn write_u16(&self, address: usize, value: u16) { + ::mem_write_u16(address as u64, value) + } + + fn write_u32(&self, address: usize, value: u32) { + ::mem_write_u32(address as u64, value) + } + + fn write_u64(&self, address: usize, value: u64) { + ::mem_write_u64(address as u64, value) + } + + fn read_pci_u8(&self, _segment: u16, _bus: u8, _device: u8, _function: u8, _offset: u16) -> u8 { + 0xFF + } + + fn read_pci_u16( + &self, + _segment: u16, + _bus: u8, + _device: u8, + _function: u8, + _offset: u16, + ) -> u16 { + 0xFFFF + } + + fn read_pci_u32( + &self, + _segment: u16, + _bus: u8, + _device: u8, + _function: u8, + _offset: u16, + ) -> u32 { + 0xFFFFFFFF + } + + fn write_pci_u8( + &self, + _segment: u16, + _bus: u8, + _device: u8, + _function: u8, + _offset: u16, + _value: u8, + ) { + } + + fn write_pci_u16( + &self, + _segment: u16, + _bus: u8, + _device: u8, + _function: u8, + _offset: u16, + _value: u16, + ) { + } + + fn write_pci_u32( + &self, + _segment: u16, + _bus: u8, + _device: u8, + _function: u8, + _offset: u16, + _value: u32, + ) { + } + + fn read_ec_u8(&self, _address: u64) -> u8 { + 0x00 + } + + fn write_ec_u8(&self, _address: u64, _value: u8) {} + + fn sleep(&self, _duration: Duration) { + todo!() + // util::polling_sleep(duration).unwrap(); + } +} + +impl AcpiHandler for AcpiHandlerImpl { + // No actual address space modification is performed + unsafe fn map_physical_region( + &self, + physical_address: usize, + size: usize, + ) -> PhysicalMapping { + PhysicalMapping::new( + physical_address, + NonNull::new_unchecked( + PhysicalAddress::from_raw(physical_address).virtualize() as *mut T + ), + size, + size, + *self, + ) + } + + // Unmap nothing, these addresses are "virtualized" to high address space + fn unmap_physical_region(_region: &PhysicalMapping) {} +} + +/// Initializes ACPI management +#[allow(unused)] +pub fn init_acpi(tables: &'static AcpiTables) -> Result<(), Error> { + // TODO currently broken for real HW + let mut system = AcpiSystem::new(tables, Box::new(AcpiHandlerImpl)).unwrap(); + + system.initialize(AcpiInterruptMethod::Apic).unwrap(); + + system + .enable_fixed_event( + &FixedEvent::POWER_BUTTON, + Box::new(|_| { + log::info!("Power button was pressed"); + + // TODO the correct way would be to + // 1. Nicely ask all the processes to quit + // 2. Wait for some time + // 3. Kill the remaining ones + // 4. Halt other cores + // 5. Sync filesystem + // 6. Do something with the devices + // 7. Actually enter the S5 state + + unsafe { + PLATFORM + .send_ipi(IpiDeliveryTarget::OtherCpus, IpiMessage::Shutdown) + .unwrap(); + } + + SHUTDOWN_FENCE.signal(); + SHUTDOWN_FENCE.wait_all(CPU_COUNT.load(Ordering::Acquire)); + + log::info!("CPUs are parked, can shutdown now"); + + EventAction::EnterSleepState(AcpiSleepState::S5) + }), + ) + .unwrap(); + + ACPI_SYSTEM.init(IrqSafeSpinlock::new(system)); + + Ok(()) +} diff --git a/kernel/src/arch/x86_64/apic/ioapic.rs b/kernel/src/arch/x86_64/apic/ioapic.rs new file mode 100644 index 00000000..80f4f1ad --- /dev/null +++ b/kernel/src/arch/x86_64/apic/ioapic.rs @@ -0,0 +1,318 @@ +//! x86-64 I/O APIC driver implementation +use abi::error::Error; +use acpi_lib::platform::interrupt::{Apic as AcpiApic, Polarity, TriggerMode}; +use device_api::{ + interrupt::{ + ExternalInterruptController, FixedInterruptTable, InterruptHandler, InterruptTable, Irq, + IrqLevel, IrqOptions, IrqTrigger, + }, + Device, +}; +use libk_mm::{ + address::{FromRaw, PhysicalAddress}, + device::DeviceMemoryIo, +}; +use libk_util::sync::IrqSafeSpinlock; +use tock_registers::{ + interfaces::{Readable, Writeable}, + register_structs, + registers::{ReadWrite, WriteOnly}, +}; + +use crate::arch::x86_64::{acpi::AcpiAllocator, apic::local::BSP_APIC_ID}; + +use super::{APIC_EXTERNAL_OFFSET, POPULATED_EXTERNAL_VECTORS}; + +pub const ISA_IRQ_OFFSET: u32 = 1024; + +// IRQ 0 is timer, IRQ 1 reserved (for now?), +32 offset for exception entries +const IO_APIC_VECTOR_OFFSET: u32 = 32 + APIC_EXTERNAL_OFFSET; + +const REG_IOAPIC_VERSION: u32 = 0x01; +const REG_REDIRECTION_BASE: u32 = 0x10; + +const ENTRY_LOW_MASK: u32 = 1 << 16; +const ENTRY_LOW_TRIGGER_LEVEL: u32 = 1 << 15; +const ENTRY_LOW_POLARITY_LOW: u32 = 1 << 13; +const ENTRY_LOW_DESTINATION_LOGICAL: u32 = 1 << 11; + +const ENTRY_HIGH_APIC_ID_SHIFT: u32 = 24; + +#[allow(dead_code)] +#[derive(Clone, Copy)] +struct IsaRedirection { + gsi_index: u32, + level: IrqLevel, + trigger: IrqTrigger, +} + +register_structs! { + #[allow(non_snake_case)] + Regs { + (0x00 => Index: WriteOnly), + (0x04 => _0), + (0x10 => Data: ReadWrite), + (0x14 => @END), + } +} + +struct Inner { + regs: DeviceMemoryIo<'static, Regs>, + max_gsi: u32, +} + +/// I/O APIC interface. Provides a way to route and control how interrupts from external devices +/// are handled. +pub struct IoApic { + inner: IrqSafeSpinlock, + isa_redirections: [Option; 16], + + table: IrqSafeSpinlock>, +} + +impl Regs { + #[inline] + fn read(&self, reg: u32) -> u32 { + self.Index.set(reg); + self.Data.get() + } + + #[inline] + fn write(&self, reg: u32, value: u32) { + self.Index.set(reg); + self.Data.set(value); + } +} + +impl Inner { + fn map_gsi(&mut self, gsi: u32, vector: u32, apic_id: u32) -> Result<(), Error> { + assert!(gsi < self.max_gsi); + assert!(vector < 0x100); + + infoln!("map_irq gsi{}, vec{}, apic{}", gsi, vector, apic_id); + + let mut low = self.regs.read(REG_REDIRECTION_BASE + gsi * 2); + let mut high = self.regs.read(REG_REDIRECTION_BASE + gsi * 2 + 1); + + // Vector + low &= !0xFF; + low |= vector; + // Destination - physical + low &= !ENTRY_LOW_DESTINATION_LOGICAL; + // Clear delivery mode + low &= !(0x7 << 8); + + // Destination APIC ID + high &= !(0xFF << ENTRY_HIGH_APIC_ID_SHIFT); + high |= apic_id << ENTRY_HIGH_APIC_ID_SHIFT; + + self.regs.write(REG_REDIRECTION_BASE + gsi * 2, low); + self.regs.write(REG_REDIRECTION_BASE + gsi * 2 + 1, high); + + Ok(()) + } + + fn configure_gsi(&mut self, gsi: u32, level: IrqLevel, trigger: IrqTrigger) { + assert!(gsi < self.max_gsi); + + let mut low = self.regs.read(REG_REDIRECTION_BASE + gsi * 2); + + match level { + IrqLevel::Default => (), + IrqLevel::ActiveLow => { + low |= ENTRY_LOW_POLARITY_LOW; + } + IrqLevel::ActiveHigh => { + low &= !ENTRY_LOW_POLARITY_LOW; + } + } + + match trigger { + IrqTrigger::Default => (), + IrqTrigger::Level => { + low |= ENTRY_LOW_TRIGGER_LEVEL; + } + IrqTrigger::Edge => { + low &= !ENTRY_LOW_TRIGGER_LEVEL; + } + } + + self.regs.write(REG_REDIRECTION_BASE + gsi * 2, low); + } + + fn set_gsi_enabled(&mut self, gsi: u32, enabled: bool) { + assert!(gsi < self.max_gsi); + + let low = self.regs.read(REG_REDIRECTION_BASE + gsi * 2); + if enabled { + debugln!("Unmask GSI #{}", gsi); + self.regs + .write(REG_REDIRECTION_BASE + gsi * 2, low & !ENTRY_LOW_MASK) + } else { + self.regs + .write(REG_REDIRECTION_BASE + gsi * 2, low | ENTRY_LOW_MASK); + } + } +} + +impl Device for IoApic { + fn display_name(&self) -> &'static str { + "I/O APIC" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + todo!() + } +} + +impl ExternalInterruptController for IoApic { + fn register_irq( + &self, + irq: Irq, + options: IrqOptions, + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + let mut inner = self.inner.lock(); + let table_vector = self.table.lock().insert_least_loaded(handler)?; + + let gsi_target_vector = (table_vector as u32) + IO_APIC_VECTOR_OFFSET; + let bsp_apic = *BSP_APIC_ID.get(); + + infoln!( + "Binding {:?} ({}) to {}:{}", + irq, + handler.display_name(), + bsp_apic, + table_vector + ); + + let (gsi, level, trigger) = match irq { + Irq::External(irq) if irq >= ISA_IRQ_OFFSET => { + if let Some(redir) = self.isa_redirections[(irq - ISA_IRQ_OFFSET) as usize].as_ref() + { + // Mapped to a (possibly different) GSI, but also with possibly different options + ( + redir.gsi_index, + options.level.override_default(redir.level), + options.trigger.override_default(redir.trigger), + ) + } else { + // Directly mapped to a GSI + (irq - ISA_IRQ_OFFSET, options.level, options.trigger) + } + } + Irq::External(irq) => (irq, options.level, options.trigger), + Irq::Private(_) => unimplemented!(), + }; + + inner.configure_gsi(gsi, level, trigger); + inner.map_gsi(gsi, gsi_target_vector, bsp_apic)?; + + Ok(()) + } + + fn enable_irq(&self, irq: Irq) -> Result<(), Error> { + let mut inner = self.inner.lock(); + let gsi = self.translate_irq(irq); + inner.set_gsi_enabled(gsi, true); + Ok(()) + } + + fn handle_specific_irq(&self, gsi: usize) { + let table = self.table.lock(); + + if let Some(handler) = table.handler(gsi) { + handler.handle_irq(None); + } else { + warnln!("No handler set for GSI #{}", gsi); + } + } +} + +impl IoApic { + /// Creates an I/O APIC instance from its ACPI definition + pub fn from_acpi(info: &AcpiApic) -> Result { + if info.io_apics.len() != 1 { + warnln!("The system has multiple I/O APICs, but the kernel only knows how to use one"); + } + let ioapic = info.io_apics.first().unwrap(); + + infoln!("I/O APIC at {:#x}", ioapic.address); + + let mut isa_redirections = [None; 16]; + + for redir in info.interrupt_source_overrides.iter() { + let index = redir.isa_source as usize; + let trigger = match redir.trigger_mode { + // ISA IRQs are edge-triggered by default + TriggerMode::SameAsBus => IrqTrigger::Edge, + TriggerMode::Edge => IrqTrigger::Edge, + TriggerMode::Level => IrqTrigger::Level, + }; + let level = match redir.polarity { + // Level-triggered ISA IRQs are ActiveLow by default + Polarity::SameAsBus => { + if trigger == IrqTrigger::Level { + IrqLevel::ActiveLow + } else { + IrqLevel::ActiveHigh + } + } + Polarity::ActiveLow => IrqLevel::ActiveLow, + Polarity::ActiveHigh => IrqLevel::ActiveHigh, + }; + + debugln!( + "ISA IRQ #{} -> GSI #{}", + index, + redir.global_system_interrupt + ); + isa_redirections[index].replace(IsaRedirection { + gsi_index: redir.global_system_interrupt, + level, + trigger, + }); + } + + // TODO properly map this using DeviceMemory + // let regs = Regs { + // base: unsafe { PhysicalAddress::from_raw(ioapic.address as u64).virtualize_raw() }, + // }; + // let mapping = unsafe { DeviceMemoryMapping::map(base, size) }; + let regs = unsafe { + DeviceMemoryIo::<'_, Regs>::map( + PhysicalAddress::from_raw(ioapic.address as u64), + Default::default(), + )? + }; + + let max_gsi = (regs.read(REG_IOAPIC_VERSION) >> 16) & 0xFF; + + infoln!("Maximum GSI number: {}", max_gsi); + + let mut inner = Inner { regs, max_gsi }; + + // Mask all GSIs + for gsi in 0..max_gsi { + inner.set_gsi_enabled(gsi, false); + } + + Ok(Self { + isa_redirections, + inner: IrqSafeSpinlock::new(inner), + table: IrqSafeSpinlock::new(FixedInterruptTable::new()), + }) + } + + fn translate_irq(&self, irq: Irq) -> u32 { + let redir = &self.isa_redirections; + match irq { + Irq::External(irq) if irq >= ISA_IRQ_OFFSET => { + let isa = irq - ISA_IRQ_OFFSET; + redir[isa as usize].map(|t| t.gsi_index).unwrap_or(isa) + } + Irq::External(irq) => irq, + Irq::Private(_) => unreachable!(), + } + } +} diff --git a/kernel/src/arch/x86_64/apic/local.rs b/kernel/src/arch/x86_64/apic/local.rs new file mode 100644 index 00000000..0ce7443a --- /dev/null +++ b/kernel/src/arch/x86_64/apic/local.rs @@ -0,0 +1,380 @@ +//! x86-64 Local APIC driver implementation +use core::sync::atomic::Ordering; + +use abi::error::Error; +use alloc::{vec, vec::Vec}; +use device_api::{ + interrupt::{ + InterruptAffinity, InterruptHandler, IpiDeliveryTarget, IpiMessage, + LocalInterruptController, MessageInterruptController, MsiInfo, + }, + Device, +}; +use kernel_arch_x86_64::{ + mem::table::L3, registers::MSR_IA32_APIC_BASE, LocalApicInterface, CPU_COUNT, +}; +use libk::arch::Cpu; +use libk_mm::{ + address::{FromRaw, IntoRaw, PhysicalAddress}, + device::DeviceMemoryIo, + table::EntryLevelExt, +}; +use libk_util::{ + sync::{IrqGuard, IrqSafeSpinlock}, + OneTimeInit, +}; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite, WriteOnly}, +}; + +use crate::arch::x86_64::apic::APIC_MSI_OFFSET; + +use super::{ + APIC_IPI_VECTOR, APIC_LINT0_VECTOR, APIC_LINT1_VECTOR, APIC_SPURIOUS_VECTOR, APIC_TIMER_VECTOR, + MAX_MSI_VECTORS, +}; + +const TIMER_INTERVAL: u32 = 150000; + +/// When initialized, contains the Local APIC ID of the bootstrap processor +pub static BSP_APIC_ID: OneTimeInit = OneTimeInit::new(); + +register_bitfields! { + u32, + Id [ + ApicId OFFSET(24) NUMBITS(8) [] + ], + SpuriousVector [ + Vector OFFSET(0) NUMBITS(8) [], + SoftwareEnable OFFSET(8) NUMBITS(1) [], + ], + TimerLocalVectorEntry [ + Vector OFFSET(0) NUMBITS(8) [], + Mask OFFSET(16) NUMBITS(1) [ + Masked = 1, + Unmasked = 0 + ], + TimerMode OFFSET(17) NUMBITS(1) [ + Periodic = 1, + OneShot = 0 + ] + ], + LocalVectorEntry [ + Vector OFFSET(0) NUMBITS(8) [], + Mask OFFSET(16) NUMBITS(1) [ + Masked = 1, + Unmasked = 0, + ], + DeliveryMode OFFSET(8) NUMBITS(3) [ + Nmi = 4, + ExtINT = 7 + ], + ], + ICR0 [ + Vector OFFSET(0) NUMBITS(8) [], + Destination OFFSET(8) NUMBITS(3) [ + Normal = 1, + Lowest = 2, + SMI = 3, + NMI = 4, + INIT = 5, + SIPI = 6 + ], + DeliveryStatus OFFSET(12) NUMBITS(1) [], + INIT0 OFFSET(14) NUMBITS(1) [ + Deassert = 0, + Assert = 1, + ], + INIT1 OFFSET(15) NUMBITS(1) [ + Deassert = 1, + Assert = 0, + ], + DestinationType OFFSET(18) NUMBITS(3) [ + Physical = 0, + This = 1, + All = 2, + AllExceptThis = 3, + ] + ], + ICR1 [ + PhysicalDestination OFFSET(24) NUMBITS(4) [] + ], +} + +register_structs! { + #[allow(non_snake_case, missing_docs)] + Regs { + (0x00 => _0), + (0x20 => Id: ReadOnly), + (0x24 => _1), + (0x80 => TaskPriorityRegister: ReadWrite), + (0x84 => _13), + (0xB0 => EndOfInterrupt: WriteOnly), + (0xB4 => _2), + (0xF0 => SpuriousVector: ReadWrite), + (0xF4 => _3), + (0x100 => ISR0: ReadOnly), + (0x104 => _14), + (0x280 => ErrorStatus: ReadOnly), + (0x284 => _4), + (0x300 => ICR0: ReadWrite), + (0x304 => _5), + (0x310 => ICR1: ReadWrite), + (0x314 => _6), + (0x320 => TimerLocalVectorEntry: ReadWrite), + (0x324 => _7), + (0x350 => LInt0: ReadWrite), + (0x354 => _8), + (0x360 => LInt1: ReadWrite), + (0x364 => _9), + (0x380 => TimerInitCount: ReadWrite), + (0x384 => _10), + (0x390 => TimerCurrentCount: ReadOnly), + (0x394 => _11), + (0x3E0 => TimerDivideConfig: ReadWrite), + (0x3E4 => _12), + (0x530 => @END), + } +} + +/// Per-processor local APIC interface +pub struct LocalApic { + regs: DeviceMemoryIo<'static, Regs>, + id: u32, + msi_vectors: Vec>>, +} + +unsafe impl Send for LocalApic {} +unsafe impl Sync for LocalApic {} + +impl Device for LocalApic { + fn display_name(&self) -> &'static str { + "Local APIC" + } +} + +impl LocalApicInterface for LocalApic { + fn clear_interrupt(&self) { + self.regs.EndOfInterrupt.set(0); + } + + unsafe fn wakeup_cpu(&self, apic_id: u32, entry_vector: PhysicalAddress) { + infoln!("Waking up apic{}, entry = {:#x}", apic_id, entry_vector); + + while self.regs.ICR0.matches_all(ICR0::DeliveryStatus::SET) { + core::hint::spin_loop(); + } + + let entry_vector = entry_vector.page_index::(); + + // INIT assert + self.regs.ICR1.write(ICR1::PhysicalDestination.val(apic_id)); + + self.regs.ICR0.write( + ICR0::Destination::INIT + + ICR0::DestinationType::Physical + + ICR0::INIT0::Assert + + ICR0::INIT1::Assert, + ); + + while self.regs.ICR0.matches_all(ICR0::DeliveryStatus::SET) { + core::hint::spin_loop(); + } + + // INIT deassert + self.regs.ICR1.write(ICR1::PhysicalDestination.val(apic_id)); + + self.regs.ICR0.write( + ICR0::Destination::INIT + + ICR0::DestinationType::Physical + + ICR0::INIT0::Deassert + + ICR0::INIT1::Deassert, + ); + + while self.regs.ICR0.matches_all(ICR0::DeliveryStatus::SET) { + core::hint::spin_loop(); + } + + // Send another SIPI type IPI because the spec says so + self.regs.ICR1.write(ICR1::PhysicalDestination.val(apic_id)); + + self.regs.ICR0.write( + ICR0::Vector.val(entry_vector as u32) + + ICR0::Destination::SIPI + + ICR0::DestinationType::Physical, + ); + + while self.regs.ICR0.matches_all(ICR0::DeliveryStatus::SET) { + core::hint::spin_loop(); + } + } +} + +impl MessageInterruptController for LocalApic { + fn handle_msi(&self, vector: usize) { + // TODO this is ugly + let row = &self.msi_vectors[vector]; + let mut i = 0; + + loop { + let table = row.lock(); + let Some(&handler) = table.get(i) else { + break; + }; + drop(table); + + handler.handle_irq(Some(vector)); + + i += 1; + } + } + + fn register_msi_range( + &self, + range: &mut [MsiInfo], + handler: &'static dyn InterruptHandler, + ) -> Result<(), Error> { + let _guard = IrqGuard::acquire(); + + // TODO fill smallest vectors first + // TODO don't ignore affinity + + for (i, msi) in range.iter_mut().enumerate() { + let row = &self.msi_vectors[i]; + let mut row = row.lock(); + + row.push(handler); + + infoln!( + "Bind {}:{} -> apic{}:msi{}", + handler.display_name(), + i, + self.id, + i + ); + + let value = 32 + APIC_MSI_OFFSET + i as u32; + let address = IntoRaw::::into_raw(Self::base()) | ((self.id as usize) << 12); + + *msi = MsiInfo { + address, + value, + vector: i, + affinity: InterruptAffinity::Specific(self.id as _), + }; + } + + Ok(()) + } +} + +impl LocalInterruptController for LocalApic { + fn send_ipi(&self, target: IpiDeliveryTarget, msg: IpiMessage) -> Result<(), Error> { + while self.regs.ICR0.matches_all(ICR0::DeliveryStatus::SET) { + core::hint::spin_loop(); + } + + // TODO use NMI or regular interrupt depending on severity of the message + match target { + IpiDeliveryTarget::OtherCpus => { + let local = Cpu::local(); + let local_id = local.id() as usize; + + for i in 0..CPU_COUNT.load(Ordering::Acquire) { + if i != local_id { + Cpu::push_ipi_queue(i as u32, msg); + } + } + + self.regs.ICR1.write(ICR1::PhysicalDestination.val(0)); + self.regs.ICR0.write( + ICR0::Vector.val(APIC_IPI_VECTOR + 32) + + ICR0::Destination::NMI + + ICR0::DestinationType::AllExceptThis, + ); + + Ok(()) + } + IpiDeliveryTarget::ThisCpu => todo!(), + IpiDeliveryTarget::Specific(_) => todo!(), + } + } + + unsafe fn init_ap(&self) -> Result<(), Error> { + todo!() + } +} + +impl LocalApic { + /// Constructs a new instance of Local APIC. + /// + /// # Safety + /// + /// Only meant to be called once per processor during their init. + pub unsafe fn new() -> Self { + let regs = DeviceMemoryIo::::map(Self::base(), Default::default()).unwrap(); + + let id = regs.Id.read(Id::ApicId); + + if Self::is_bootstrap_cpu() { + BSP_APIC_ID.init(id); + } + + Self::enable(); + // Configure spurious interrupt handler + regs.SpuriousVector.write( + SpuriousVector::SoftwareEnable::SET + + SpuriousVector::Vector.val(APIC_SPURIOUS_VECTOR + 32), + ); + + // Configure task priority register + regs.TaskPriorityRegister.set(0); + + // Enable timer + regs.TimerDivideConfig.set(0x3); + regs.TimerInitCount.set(TIMER_INTERVAL); + + // Configure local interrupt vectors + regs.TimerLocalVectorEntry.write( + TimerLocalVectorEntry::Vector.val(APIC_TIMER_VECTOR + 32) + + TimerLocalVectorEntry::Mask::Unmasked + + TimerLocalVectorEntry::TimerMode::Periodic, + ); + // LINT0 unmasked, leave LINT1 masked + regs.LInt0.write( + LocalVectorEntry::Mask::Unmasked + + LocalVectorEntry::Vector.val(APIC_LINT0_VECTOR + 32) + + LocalVectorEntry::DeliveryMode::ExtINT, + ); + regs.LInt1.write( + LocalVectorEntry::Mask::Masked + LocalVectorEntry::Vector.val(APIC_LINT1_VECTOR + 32), + ); + + let msi_vectors = vec![IrqSafeSpinlock::new(Vec::new()); MAX_MSI_VECTORS as _]; + + Self { + id, + regs, + msi_vectors, + } + } + + #[inline] + fn base() -> PhysicalAddress { + PhysicalAddress::from_raw(MSR_IA32_APIC_BASE.read_base()) + } + + #[inline] + fn is_bootstrap_cpu() -> bool { + MSR_IA32_APIC_BASE.read(MSR_IA32_APIC_BASE::BootstrapCpuCore) != 0 + } + + #[inline] + fn enable() { + MSR_IA32_APIC_BASE.modify( + MSR_IA32_APIC_BASE::ApicEnable::SET + MSR_IA32_APIC_BASE::ExtendedEnable::CLEAR, + ); + } +} diff --git a/kernel/src/arch/x86_64/apic/mod.rs b/kernel/src/arch/x86_64/apic/mod.rs new file mode 100644 index 00000000..89d8ce4c --- /dev/null +++ b/kernel/src/arch/x86_64/apic/mod.rs @@ -0,0 +1,131 @@ +//! x86-64 APIC interface (Local + I/O) + +use core::arch::global_asm; + +use kernel_arch::task::Scheduler; +use kernel_arch_x86_64::context::IrqFrame; +use libk::{ + arch::Cpu, + device::{external_interrupt_controller, message_interrupt_controller}, +}; +use libk_thread::thread::Thread; +use static_assertions::{const_assert, const_assert_eq}; + +use crate::task::process::ProcessManagerImpl; + +use super::exception; + +pub mod ioapic; +pub mod local; + +// I/O APIC 0..MAX_EXTERNAL_VECTORS range is mapped to BSP Local APIC 2.. + +/// Fixed IRQ vector for Local APIC timer +pub const APIC_TIMER_VECTOR: u32 = 0x00; +/// Fixed IRQ vector for LINT0 line +pub const APIC_LINT0_VECTOR: u32 = 0x01; +/// Fixed IRQ vector for LINT1 line +pub const APIC_LINT1_VECTOR: u32 = 0x02; +/// Fixed vector for inter-processor interrupt +pub const APIC_IPI_VECTOR: u32 = 0x03; +/// Fixed vector for spurious interrupt +pub const APIC_SPURIOUS_VECTOR: u32 = 0xDF; +/// Start of the I/O APIC IRQ range +pub const APIC_EXTERNAL_OFFSET: u32 = 4; +/// Start of the MSI range +pub const APIC_MSI_OFFSET: u32 = APIC_EXTERNAL_OFFSET + MAX_EXTERNAL_VECTORS; +/// Maximum number of APIC vectors allocated for handling IRQs from I/O APIC +pub const MAX_EXTERNAL_VECTORS: u32 = APIC_SPURIOUS_VECTOR - APIC_EXTERNAL_OFFSET - MAX_MSI_VECTORS; +/// Number of I/O APIC IRQ vectors that are actually populated +pub const POPULATED_EXTERNAL_VECTORS: u32 = 16; +/// Maximum number of APIC vectors allocated for handling MSIs +pub const MAX_MSI_VECTORS: u32 = 16; + +const_assert!(POPULATED_EXTERNAL_VECTORS <= MAX_EXTERNAL_VECTORS); +const_assert_eq!(APIC_MSI_OFFSET + MAX_MSI_VECTORS, APIC_SPURIOUS_VECTOR); + +/// Fills the IDT with interrupt vectors for this APIC +pub fn setup_vectors(idt: &mut [exception::Entry]) { + extern "C" { + // IRQ vectors + static __x86_64_apic_vectors: [usize; 224]; + } + + for (i, &entry) in unsafe { __x86_64_apic_vectors.iter() }.enumerate() { + idt[i] = exception::Entry::new( + entry, + 0x08, + exception::Entry::PRESENT | exception::Entry::INT32, + ); + } +} + +unsafe extern "C" fn irq_handler(vector: usize, frame: *mut IrqFrame) { + if vector >= POPULATED_EXTERNAL_VECTORS as _ { + todo!("Got a weird IRQ with vector {}", vector); + } + + let cpu = Cpu::local(); + let frame = &mut *frame; + + external_interrupt_controller().handle_specific_irq(vector); + cpu.local_apic().clear_interrupt(); + + if let Some(thread) = Thread::get_current() { + thread.handle_pending_signals::(frame); + } +} + +unsafe extern "C" fn msi_handler(vector: usize, frame: *mut IrqFrame) { + if vector >= MAX_MSI_VECTORS as _ { + todo!("Got a weird MSI with vector {}", vector); + } + + let cpu = Cpu::local(); + let frame = &mut *frame; + + message_interrupt_controller().handle_msi(vector); + cpu.local_apic().clear_interrupt(); + + if let Some(thread) = Thread::get_current() { + thread.handle_pending_signals::(frame); + } +} + +unsafe extern "C" fn local_timer_irq_handler(frame: *mut IrqFrame) { + let frame = &mut *frame; + let cpu = Cpu::local(); + // Clear interrupt before switching, because otherwise we won't receive the next one + cpu.local_apic().clear_interrupt(); + + if let Some(queue) = cpu.try_get_scheduler() { + queue.yield_cpu(); + } + + if let Some(thread) = Thread::get_current() { + thread.handle_pending_signals::(frame); + } +} + +unsafe extern "C" fn dummy_irq_handler() { + todo!() +} + +unsafe extern "C" fn ipi_handler() { + let cpu = Cpu::local(); + todo!("Processor {} received an IPI", cpu.id()); +} + +global_asm!( + include_str!("vectors.S"), + local_timer_irq_handler = sym local_timer_irq_handler, + irq_handler = sym irq_handler, + msi_handler = sym msi_handler, + ipi_handler = sym ipi_handler, + dummy_irq_handler = sym dummy_irq_handler, + irq_vector_offset = const APIC_EXTERNAL_OFFSET, + irq_vector_count = const MAX_EXTERNAL_VECTORS, + msi_vector_offset = const APIC_MSI_OFFSET, + msi_vector_count = const MAX_MSI_VECTORS, + options(att_syntax) +); diff --git a/kernel/src/arch/x86_64/apic/vectors.S b/kernel/src/arch/x86_64/apic/vectors.S new file mode 100644 index 00000000..2ff7a27b --- /dev/null +++ b/kernel/src/arch/x86_64/apic/vectors.S @@ -0,0 +1,223 @@ +.altmacro + +.global __x86_64_apic_vectors + +.set IRQ_REG_RAX, 0 * 8 +.set IRQ_REG_RCX, 1 * 8 +.set IRQ_REG_RDX, 2 * 8 +.set IRQ_REG_RBX, 3 * 8 +.set IRQ_REG_RSI, 4 * 8 +.set IRQ_REG_RDI, 5 * 8 +.set IRQ_REG_RBP, 6 * 8 +.set IRQ_REG_R8, 7 * 8 +.set IRQ_REG_R9, 8 * 8 +.set IRQ_REG_R10, 9 * 8 +.set IRQ_REG_R11, 10 * 8 +.set IRQ_REG_R12, 11 * 8 +.set IRQ_REG_R13, 12 * 8 +.set IRQ_REG_R14, 13 * 8 +.set IRQ_REG_R15, 14 * 8 + +// 15 registers + stack align word if needed +.set IRQ_STATE_SIZE, 15 * 8 + +.macro IRQ_SWAPGS_IF_NEEDED, cs_off + cmpq $0x08, \cs_off(%rsp) + je 1f + swapgs +1: +.endm + +.macro IRQ_SAVE_STATE + // Save state + subq $IRQ_STATE_SIZE, %rsp + + movq %rax, IRQ_REG_RAX(%rsp) + movq %rcx, IRQ_REG_RCX(%rsp) + movq %rdx, IRQ_REG_RDX(%rsp) + movq %rbx, IRQ_REG_RBX(%rsp) + movq %rsi, IRQ_REG_RSI(%rsp) + movq %rdi, IRQ_REG_RDI(%rsp) + movq %rbp, IRQ_REG_RBP(%rsp) + movq %r8, IRQ_REG_R8(%rsp) + movq %r9, IRQ_REG_R9(%rsp) + movq %r10, IRQ_REG_R10(%rsp) + movq %r11, IRQ_REG_R11(%rsp) + movq %r12, IRQ_REG_R12(%rsp) + movq %r13, IRQ_REG_R13(%rsp) + movq %r14, IRQ_REG_R14(%rsp) + movq %r15, IRQ_REG_R15(%rsp) + + // Save current stack into %rbp + movq %rsp, %rbp + + // Force correct stack alignment + orq $0xF, %rsp + xorq $0xF, %rsp +.endm + +.macro IRQ_RESTORE_STATE + // Restore the stack pointer + movq %rbp, %rsp + + // Restore state + movq IRQ_REG_RAX(%rsp), %rax + movq IRQ_REG_RCX(%rsp), %rcx + movq IRQ_REG_RDX(%rsp), %rdx + movq IRQ_REG_RBX(%rsp), %rbx + movq IRQ_REG_RSI(%rsp), %rsi + movq IRQ_REG_RDI(%rsp), %rdi + movq IRQ_REG_RBP(%rsp), %rbp + movq IRQ_REG_R8(%rsp), %r8 + movq IRQ_REG_R9(%rsp), %r9 + movq IRQ_REG_R10(%rsp), %r10 + movq IRQ_REG_R11(%rsp), %r11 + movq IRQ_REG_R12(%rsp), %r12 + movq IRQ_REG_R13(%rsp), %r13 + movq IRQ_REG_R14(%rsp), %r14 + movq IRQ_REG_R15(%rsp), %r15 + + addq $IRQ_STATE_SIZE, %rsp +.endm + +.macro IRQ_VECTOR, n +irq_vector_\n: + // %rsp + 0: %rip + // %rsp + 8: %cs + IRQ_SWAPGS_IF_NEEDED 8 + IRQ_SAVE_STATE + + // Force correct segment registers + mov $0x10, %ax + mov %ax, %ss + mov %ax, %ds + mov %ax, %es + + mov $\n, %rdi + mov %rbp, %rsi + call {irq_handler} + + IRQ_RESTORE_STATE + IRQ_SWAPGS_IF_NEEDED 8 + + iretq +.endm + +.macro IRQ_VECTOR_ENTRY, n +.quad irq_vector_\n +.endm + +.macro IRQ_VECTORS, start, end +.set i, 0 +.rept \end - \start + IRQ_VECTOR %i + .set i, i+1 +.endr +.endm + +.macro IRQ_VECTOR_ENTRIES, start, end +.set i, 0 +.rept \end - \start + IRQ_VECTOR_ENTRY %i + .set i, i+1 +.endr +.endm + +.macro MSI_VECTOR, n +msi_vector_\n: + // %rsp + 0: %rip + // %rsp + 8: %cs + IRQ_SWAPGS_IF_NEEDED 8 + IRQ_SAVE_STATE + + // Force correct segment registers + mov $0x10, %ax + mov %ax, %ss + mov %ax, %ds + mov %ax, %es + + mov $\n, %rdi + mov %rbp, %rsi + call {msi_handler} + + IRQ_RESTORE_STATE + IRQ_SWAPGS_IF_NEEDED 8 + + iretq +.endm + +.macro MSI_VECTOR_ENTRY, n +.quad msi_vector_\n +.endm + +.macro MSI_VECTORS, start, end +.set i, 0 +.rept \end - \start + MSI_VECTOR %i + .set i, i+1 +.endr +.endm + +.macro MSI_VECTOR_ENTRIES, start, end +.set i, 0 +.rept \end - \start + MSI_VECTOR_ENTRY %i + .set i, i+1 +.endr +.endm + +.macro FILL_EMPTY_SPACE, start, end +.set i, 0 +.rept \end - \start +.quad dummy_vector +.endr +.endm + +.section .text +local_timer_vector: + IRQ_SWAPGS_IF_NEEDED 8 + IRQ_SAVE_STATE + + mov %rbp, %rdi + call {local_timer_irq_handler} + + IRQ_RESTORE_STATE + IRQ_SWAPGS_IF_NEEDED 8 + iretq + +ipi_vector: + IRQ_SWAPGS_IF_NEEDED 8 + IRQ_SAVE_STATE + + call {ipi_handler} + jmp . + +dummy_vector: + IRQ_SWAPGS_IF_NEEDED 8 + IRQ_SAVE_STATE + + call {dummy_irq_handler} + jmp . + +IRQ_VECTORS {irq_vector_offset}, {irq_vector_offset} + {irq_vector_count} +MSI_VECTORS {msi_vector_offset}, {msi_vector_offset} + {msi_vector_count} + +.section .rodata +// 224 vectors: 256 - 32 (exceptions) +.p2align 4 +.type __x86_64_apic_vectors, @object +__x86_64_apic_vectors: + // Local timer IRQ: 0 + .quad local_timer_vector + // Dummy entries (currently): 1..=2 + .quad dummy_vector + .quad dummy_vector + // IPI vector: 3 + .quad ipi_vector + // Regular IRQ vectors: 4..207 + IRQ_VECTOR_ENTRIES {irq_vector_offset}, {irq_vector_offset} + {irq_vector_count} + // MSI vectors: 207..223 + MSI_VECTOR_ENTRIES {msi_vector_offset}, {msi_vector_offset} + {msi_vector_count} + // Spurious interrupt vector: 223 + .quad dummy_vector +.size __x86_64_apic_vectors, . - __x86_64_apic_vectors diff --git a/kernel/src/arch/x86_64/boot/ap_boot.S b/kernel/src/arch/x86_64/boot/ap_boot.S new file mode 100644 index 00000000..25630dd5 --- /dev/null +++ b/kernel/src/arch/x86_64/boot/ap_boot.S @@ -0,0 +1,115 @@ +[org 0x7000] +[bits 16] + +; Data at 0x6000 + +; Layout: +; +0x00: cr3 (only u32) +; +0x08: stack_base +; +0x10: stack_size +; +0x18: entry + +__x86_64_ap_bootstrap: + cli + + ; Reset DS + mov ax, 0 + mov ds, ax + + ; Disable NMI + in al, 0x70 + or al, 0x80 + out 0x70, al + + ; Load GDT32 and enable protected mode + lgdt [gdt32_ptr] + mov eax, cr0 + or al, 1 + mov cr0, eax + + jmp 0x08:ap_start_32 + +[bits 32] +ap_start_32: + cli + + ; Proper DS + mov ax, 0x10 + mov ds, ax + + ; Enable PSE+PAE + mov eax, cr4 + or eax, (1 << 5) | (1 << 4) + mov cr4, eax + + ; Load CR3 + mov eax, dword [0x6000 + 0x00] + mov cr3, eax + + ; Enable EFER.LME + EFER.NXE + mov ecx, 0xC0000080 + rdmsr + or eax, 1 << 8 + or eax, 1 << 11 + wrmsr + + ; Enable paging + mov eax, cr0 + or eax, 1 << 31 + mov cr0, eax + + ; Load GDT64 + lgdt [gdt64_ptr] + + jmp 0x08:ap_start_64 + +[bits 64] +ap_start_64: + mov rax, 0x10 + mov ds, rax + mov es, rax + mov ss, rax + + ; Load stack + mov rsp, qword [0x6000 + 0x08] + add rsp, qword [0x6000 + 0x10] + mov rbp, rsp + + ; Push a dummy value onto the stack for align + push qword 0 + + ; Jump to kernel entry + mov rax, qword [0x6000 + 0x18] + + jmp rax + +align 4 +gdt32: + ; NULL + dq 0 + ; CS32 + dq 0xCF98000000FFFF + ; DS32 + dq 0xCF92000000FFFF +gdt32_end: + +align 4 +gdt32_ptr: + ; limit + dw gdt32_end - gdt32 - 1 + dd gdt32 + +align 4 +gdt64: + ; NULL + dq 0 + ; CS64 + dq 0x00209A0000000000 + ; DS64 + dq 0x0000920000000000 +gdt64_end: + +align 4 +gdt64_ptr: + dw gdt64_end - gdt64 - 1 + dd gdt64 diff --git a/kernel/src/arch/x86_64/boot/mod.rs b/kernel/src/arch/x86_64/boot/mod.rs new file mode 100644 index 00000000..e1d0ef21 --- /dev/null +++ b/kernel/src/arch/x86_64/boot/mod.rs @@ -0,0 +1,168 @@ +//! x86-64 boot and entry functions +use core::{arch::global_asm, sync::atomic::Ordering}; + +use kernel_arch_x86_64::{registers::MSR_IA32_KERNEL_GS_BASE, CPU_COUNT}; +use kernel_fs::devfs; +use libk::runtime; +use tock_registers::interfaces::Writeable; +use yboot_proto::{ + v1::{FramebufferOption, MemoryMap}, + LoadProtocolHeader, LoadProtocolV1, KERNEL_MAGIC, LOADER_MAGIC, PROTOCOL_VERSION_1, +}; + +use crate::{kernel_main, kernel_secondary_main, mem::KERNEL_VIRT_OFFSET}; + +use super::{cpuid::init_cpuid, exception, PLATFORM}; + +pub enum BootData { + YBoot(&'static LoadProtocolV1), +} + +const BOOT_STACK_SIZE: usize = 1024 * 1024; + +#[repr(C, align(0x20))] +struct BootStack { + data: [u8; BOOT_STACK_SIZE], +} + +#[link_section = ".bss"] +static mut BSP_STACK: BootStack = BootStack { + data: [0; BOOT_STACK_SIZE], +}; + +#[used] +#[link_section = ".data.yboot"] +static YBOOT_DATA: LoadProtocolV1 = LoadProtocolV1 { + header: LoadProtocolHeader { + kernel_magic: KERNEL_MAGIC, + version: PROTOCOL_VERSION_1, + }, + kernel_virt_offset: KERNEL_VIRT_OFFSET as _, + + memory_map: MemoryMap { address: 0, len: 0 }, + + rsdp_address: 0, + initrd_address: 0, + initrd_size: 0, + + opt_framebuffer: FramebufferOption { + req_width: 640, + req_height: 480, + + res_width: 0, + res_height: 0, + res_stride: 0, + res_address: 0, + res_size: 0, + }, +}; + +unsafe fn init_dummy_cpu() { + // TODO this is incorrect + static UNINIT_CPU_INNER: usize = 0; + static UNINIT_CPU_PTR: &usize = &UNINIT_CPU_INNER; + + // Point %gs to a dummy structure so that Cpu::get_local() works properly even before the CPU + // data structure is initialized + MSR_IA32_KERNEL_GS_BASE.set(&UNINIT_CPU_PTR as *const _ as u64); + core::arch::asm!("swapgs"); + MSR_IA32_KERNEL_GS_BASE.set(&UNINIT_CPU_PTR as *const _ as u64); + core::arch::asm!("swapgs"); +} + +extern "C" fn __x86_64_upper_entry() -> ! { + // Safety: ok, CPU hasn't been initialized yet and it's the early kernel entry + unsafe { + init_dummy_cpu(); + } + + PLATFORM.set_boot_data(BootData::YBoot(&YBOOT_DATA)); + + // Gather available CPU features + init_cpuid(); + + // Setup memory management: kernel virtual memory tables, physical page manager and heap + unsafe { + PLATFORM + .init_memory_management() + .expect("Could not initialize memory management"); + } + + unsafe { + exception::init_exceptions(0); + } + + // Initialize async executor queue + runtime::init_task_queue(); + + devfs::init(); + + // Initializes: local CPU, platform devices (timers/serials/etc), debug output + unsafe { + PLATFORM + .init_platform(0) + .expect("Could not initialize the platform"); + } + + kernel_main() +} + +/// Application processor entry point +pub extern "C" fn __x86_64_ap_entry() -> ! { + let cpu_id = CPU_COUNT.load(Ordering::Acquire); + + unsafe { + init_dummy_cpu(); + } + + // Still not initialized: GDT, IDT, CPU features, syscall, kernel_gs_base + + unsafe { + // Cpu::init_local(LocalApic::new(), cpu_id as u32); + // syscall::init_syscall(); + exception::init_exceptions(cpu_id); + + PLATFORM + .init_platform(cpu_id) + .expect("Could not initialize the platform (AP)"); + } + + CPU_COUNT.fetch_add(1, Ordering::Release); + + kernel_secondary_main() +} + +global_asm!( + r#" +// {boot_data} +.global __x86_64_entry + +.section .text.entry +__x86_64_entry: + cli + mov ${yboot_loader_magic}, %edi + cmp %edi, %eax + je 2f + + // (Currently) unsupported bootloader +1: + cli + hlt + jmp 1b + +2: + // yboot entry method + movabsq ${stack_bottom} + {stack_size}, %rax + movabsq ${entry}, %rcx + mov %rax, %rsp + callq *%rcx + +.section .text +"#, + yboot_loader_magic = const LOADER_MAGIC, + stack_size = const BOOT_STACK_SIZE, + stack_bottom = sym BSP_STACK, + boot_data = sym YBOOT_DATA, + entry = sym __x86_64_upper_entry, + options(att_syntax) +); diff --git a/kernel/src/arch/x86_64/cpuid.rs b/kernel/src/arch/x86_64/cpuid.rs new file mode 100644 index 00000000..a4afc17e --- /dev/null +++ b/kernel/src/arch/x86_64/cpuid.rs @@ -0,0 +1,111 @@ +//! x86-64 CPUID interface + +use bitflags::bitflags; +use kernel_arch_x86_64::registers::{CR4, XCR0}; +use libk_util::OneTimeInit; +use tock_registers::interfaces::ReadWriteable; + +bitflags! { + pub struct ProcessorFeatures: u64 { + const PDPE1GB = 1 << 0; + } +} + +bitflags! { + pub struct EcxFeatures: u32 { + const XSAVE = 1 << 26; + const AVX = 1 << 28; + } +} + +bitflags! { + pub struct EdxFeatures: u32 { + const FXSR = 1 << 24; + const PGE = 1 << 13; + } +} + +bitflags! { + pub struct ExtEdxFeatures: u32 { + const PDPE1GB = 1 << 26; + } +} + +unsafe fn raw_cpuid(eax: u32, result: &mut [u32]) { + core::arch::asm!( + r#" + push %rbx + cpuid + mov %ebx, {0:e} + pop %rbx + "#, + out(reg) result[0], + out("edx") result[1], + out("ecx") result[2], + in("eax") eax, + options(att_syntax) + ); +} + +fn cpuid_features() -> (EcxFeatures, EdxFeatures) { + let mut raw = [0; 3]; + + unsafe { + raw_cpuid(0x1, &mut raw); + } + + ( + EcxFeatures::from_bits_truncate(raw[2]), + EdxFeatures::from_bits_truncate(raw[1]), + ) +} + +fn cpuid_ext_features() -> ExtEdxFeatures { + let mut raw = [0; 3]; + + unsafe { + raw_cpuid(0x80000001, &mut raw); + } + + ExtEdxFeatures::from_bits_truncate(raw[1]) +} + +pub static PROCESSOR_FEATURES: OneTimeInit = OneTimeInit::new(); + +pub fn init_cpuid() { + let mut features = ProcessorFeatures::empty(); + + let ext_edx = cpuid_ext_features(); + + if ext_edx.contains(ExtEdxFeatures::PDPE1GB) { + features |= ProcessorFeatures::PDPE1GB; + } + + PROCESSOR_FEATURES.init(features); +} + +pub fn enable_features() { + let (ecx, edx) = cpuid_features(); + + if !ecx.contains(EcxFeatures::XSAVE) { + panic!("XSAVE feature is required"); + } + + if !edx.contains(EdxFeatures::FXSR) { + panic!("FXSR feature is required"); + } + + if !edx.contains(EdxFeatures::PGE) { + todo!("PGE feature (currently) is not optional"); + } + + CR4.modify(CR4::OSXSAVE::SET + CR4::OSFXSR::SET + CR4::PGE::SET); + + // XXX? SSE is supported on all x86-64s + XCR0.modify(XCR0::X87::SET + XCR0::SSE::SET); + + if ecx.contains(EcxFeatures::AVX) { + // Enable AVX + XCR0.modify(XCR0::AVX::SET); + } +} diff --git a/kernel/src/arch/x86_64/entry.S b/kernel/src/arch/x86_64/entry.S new file mode 100644 index 00000000..e69de29b diff --git a/kernel/src/arch/x86_64/exception.rs b/kernel/src/arch/x86_64/exception.rs new file mode 100644 index 00000000..1c032cfd --- /dev/null +++ b/kernel/src/arch/x86_64/exception.rs @@ -0,0 +1,273 @@ +//! x86-64 exception and interrupt handling +use core::{arch::global_asm, mem::size_of, ptr::addr_of}; + +use abi::{primitive_enum, process::Signal}; +use kernel_arch_x86_64::{context::ExceptionFrame, registers::CR3}; +use libk::arch::Cpu; +use libk_thread::thread::Thread; +use tock_registers::interfaces::Readable; + +use crate::{arch::x86_64::apic, task::process::ProcessManagerImpl}; + +use super::PLATFORM; + +primitive_enum! { + enum ExceptionKind: u64 { + DivisionError = 0, + Debug = 1, + NonMaskableInterrupt = 2, + Breakpoint = 3, + Overflow = 4, + BoundRangeExceeded = 5, + InvalidOpcode = 6, + DeviceNotAvailable = 7, + DoubleFault = 8, + InvalidTss = 10, + SegmentNotPresent = 11, + StackSegmentFault = 12, + GeneralProtectionFault = 13, + PageFault = 14, + FpuException = 16, + AlignmentCheck = 17, + MachineCheck = 18, + SimdFpuException = 19, + VirtualizationException = 20, + ControlProtectionException = 21, + + Unknown = 99, + } +} + +impl ExceptionKind { + fn ring3_possible(&self) -> bool { + matches!( + self, + Self::DivisionError + | Self::Debug + | Self::Breakpoint + | Self::Overflow + | Self::BoundRangeExceeded + | Self::InvalidOpcode + | Self::GeneralProtectionFault + | Self::PageFault + | Self::FpuException + | Self::AlignmentCheck + | Self::SimdFpuException + ) + } +} + +/// Exception table entry +#[allow(dead_code)] +#[derive(Clone, Copy)] +#[repr(packed)] +pub struct Entry { + base_lo: u16, + selector: u16, + __res0: u8, + flags: u8, + base_hi: u16, + base_ex: u32, + __res1: u32, +} + +#[allow(dead_code)] +#[repr(packed)] +struct Pointer { + limit: u16, + offset: usize, +} + +// impl ExceptionFrame { +// fn dump(&self, level: debug::LogLevel) { +// log_print_raw!(level, " CS:RIP = {:#x}:{:#x}\n", self.cs, self.rip); +// log_print_raw!(level, " SS:RSP = {:#x}:{:#x}\n", self.ss, self.rsp); +// +// log_print_raw!( +// level, +// "RAX = {:#018x}, RCX = {:#018x}\n", +// self.rax, +// self.rcx +// ); +// log_print_raw!( +// level, +// "RDX = {:#018x}, RBX = {:#018x}\n", +// self.rdx, +// self.rbx +// ); +// log_print_raw!( +// level, +// "RSI = {:#018x}, RDI = {:#018x}\n", +// self.rsi, +// self.rdi +// ); +// log_print_raw!(level, "RBP = {:#018x}\n\n", self.rbp); +// +// log_print_raw!(level, " R8 = {:#018x}, R9 = {:#018x}\n", self.r8, self.r9); +// log_print_raw!( +// level, +// "R10 = {:#018x}, R11 = {:#018x}\n", +// self.r10, +// self.r11 +// ); +// log_print_raw!( +// level, +// "R12 = {:#018x}, R13 = {:#018x}\n", +// self.r12, +// self.r13 +// ); +// log_print_raw!( +// level, +// "R14 = {:#018x}, R15 = {:#018x}\n", +// self.r14, +// self.r15 +// ); +// } +// } + +const SIZE: usize = 256; + +impl Entry { + /// Entry is valid + pub const PRESENT: u8 = 1 << 7; + /// Entry is a 32-bit interrupt + pub const INT32: u8 = 0xE; + + /// Empty entry + pub const NULL: Self = Self { + base_lo: 0, + base_hi: 0, + base_ex: 0, + selector: 0, + flags: 0, + __res0: 0, + __res1: 0, + }; + + /// Constructs an interrupt table entry + pub const fn new(base: usize, selector: u16, flags: u8) -> Self { + Self { + base_lo: (base & 0xFFFF) as u16, + base_hi: ((base >> 16) & 0xFFFF) as u16, + base_ex: (base >> 32) as u32, + selector, + flags, + __res0: 0, + __res1: 0, + } + } +} + +static mut IDT: [Entry; SIZE] = [Entry::NULL; SIZE]; + +fn user_exception_inner(kind: ExceptionKind, _frame: &ExceptionFrame) { + let thread = Thread::current(); + let cr3 = CR3.get(); + + warnln!("{:?} in {} {:?}", kind, thread.id, thread.name); + // XXX + // frame.dump(debug::LogLevel::Warning); + warnln!("CR3 = {:#x}", cr3); + + match kind { + ExceptionKind::PageFault => { + let cr2: usize; + unsafe { + core::arch::asm!("mov %cr2, {0}", out(reg) cr2, options(att_syntax)); + } + + warnln!("CR2 = {:#x}", cr2); + + thread.raise_signal(Signal::MemoryAccessViolation); + } + ExceptionKind::GeneralProtectionFault => { + thread.raise_signal(Signal::MemoryAccessViolation); + } + ExceptionKind::FpuException => { + todo!() + } + ExceptionKind::InvalidOpcode => { + thread.raise_signal(Signal::Aborted); + } + ExceptionKind::Breakpoint => { + todo!() + } + _ => todo!("No handler for exception: {:?}", kind), + } +} + +fn kernel_exception_inner(kind: ExceptionKind, frame: &ExceptionFrame) -> ! { + let cr3: usize; + let cr2: usize; + unsafe { + core::arch::asm!("movq %cr3, {0}", out(reg) cr3, options(att_syntax)); + core::arch::asm!("movq %cr2, {0}", out(reg) cr2, options(att_syntax)); + } + + fatalln!("{:?} in KERNEL, frame {:p}, cr3 = {:#x}", kind, frame, cr3); + if kind == ExceptionKind::PageFault { + fatalln!("cr2 = {:#x}", cr2); + } + + // XXX + // frame.dump(debug::LogLevel::Fatal); + + panic!("Irrecoverable exception"); +} + +extern "C" fn __x86_64_exception_handler(frame: *mut ExceptionFrame) { + let frame = unsafe { &mut *frame }; + let kind = ExceptionKind::try_from(frame.exc_number).unwrap_or(ExceptionKind::Unknown); + + if kind.ring3_possible() && frame.cs == 0x23 { + user_exception_inner(kind, frame); + + unsafe { + Thread::current().handle_pending_signals::(frame); + } + } else { + if kind == ExceptionKind::NonMaskableInterrupt { + let cpu = Cpu::local(); + + if let Some(msg) = cpu.get_ipi() { + unsafe { + PLATFORM.handle_ipi(msg); + } + } + } + + kernel_exception_inner(kind, frame) + } +} + +/// Initializes the interrupt descriptor table for the given CPU. +/// +/// # Safety +/// +/// Only meant to be called once per each CPU during their init. +pub unsafe fn init_exceptions(cpu_index: usize) { + if cpu_index == 0 { + extern "C" { + static __x86_64_exception_vectors: [usize; 32]; + } + + for (i, &entry) in __x86_64_exception_vectors.iter().enumerate() { + IDT[i] = Entry::new(entry, 0x08, Entry::PRESENT | Entry::INT32); + } + + apic::setup_vectors(&mut IDT[32..]); + } + + let idtr = Pointer { + limit: (IDT.len() * size_of::()) as u16 - 1, + offset: addr_of!(IDT) as usize, + }; + + core::arch::asm!("wbinvd; lidt ({0})", in(reg) &idtr, options(att_syntax)); +} + +global_asm!( + include_str!("vectors.S"), + exception_handler = sym __x86_64_exception_handler, + options(att_syntax) +); diff --git a/kernel/src/arch/x86_64/gdt.rs b/kernel/src/arch/x86_64/gdt.rs new file mode 100644 index 00000000..300f50e6 --- /dev/null +++ b/kernel/src/arch/x86_64/gdt.rs @@ -0,0 +1,199 @@ +//! x86-64 Global Descriptor Table interface +use core::mem::size_of; + +use alloc::boxed::Box; + +#[allow(dead_code)] +#[repr(packed)] +struct Entry { + limit_lo: u16, + base_lo: u16, + base_mi: u8, + access: u8, + flags: u8, + base_hi: u8, +} + +#[allow(dead_code)] +#[repr(packed)] +struct Tss { + _0: u32, + rsp0: u64, + rsp1: u64, + rsp2: u64, + _1: u32, + ist1: u64, + ist2: u64, + ist3: u64, + ist4: u64, + ist5: u64, + ist6: u64, + ist7: u64, + _2: u64, + _3: u16, + iopb_base: u16, +} + +#[allow(dead_code)] +#[repr(packed)] +struct Pointer { + limit: u16, + offset: usize, +} + +impl Tss { + const NULL: Self = Self { + _0: 0, + rsp0: 0, + rsp1: 0, + rsp2: 0, + _1: 0, + ist1: 0, + ist2: 0, + ist3: 0, + ist4: 0, + ist5: 0, + ist6: 0, + ist7: 0, + _2: 0, + _3: 0, + iopb_base: size_of::() as u16, + }; +} + +impl Entry { + const FLAG_LONG: u8 = 1 << 5; + const ACC_PRESENT: u8 = 1 << 7; + const ACC_SYSTEM: u8 = 1 << 4; + const ACC_EXECUTE: u8 = 1 << 3; + const ACC_WRITE: u8 = 1 << 1; + + #[allow(unused)] + const ACC_RING3: u8 = 3 << 5; + #[allow(unused)] + const ACC_ACCESS: u8 = 1 << 0; + + const NULL: Self = Self { + base_lo: 0, + base_mi: 0, + base_hi: 0, + access: 0, + flags: 0, + limit_lo: 0, + }; + const RING0_CS64: Self = Entry::new( + 0, + 0, + Entry::FLAG_LONG, + Entry::ACC_PRESENT | Entry::ACC_SYSTEM | Entry::ACC_EXECUTE, + ); + const RING0_DS64: Self = Entry::new( + 0, + 0, + 0, + Entry::ACC_PRESENT | Entry::ACC_SYSTEM | Entry::ACC_WRITE, + ); + const RING3_DS64: Self = Entry::new( + 0, + 0, + 0, + Entry::ACC_PRESENT | Entry::ACC_SYSTEM | Entry::ACC_RING3 | Entry::ACC_WRITE, + ); + const RING3_CS64: Self = Entry::new( + 0, + 0, + Entry::FLAG_LONG, + Entry::ACC_PRESENT | Entry::ACC_SYSTEM | Entry::ACC_RING3 | Entry::ACC_EXECUTE, + ); + + const fn new(base: u32, limit: u32, flags: u8, access: u8) -> Self { + Self { + base_lo: (base & 0xFFFF) as u16, + base_mi: ((base >> 16) & 0xFF) as u8, + base_hi: ((base >> 24) & 0xFF) as u8, + access, + flags: (flags & 0xF0) | (((limit >> 16) & 0xF) as u8), + limit_lo: (limit & 0xFFFF) as u16, + } + } + + const fn tss_low(base: u32, limit: u32) -> Self { + Self::new( + base, + limit, + Entry::FLAG_LONG, + Entry::ACC_ACCESS | Entry::ACC_PRESENT | Entry::ACC_EXECUTE, + ) + } +} + +// NULL, CS64, DS64, DS64, CS64, TSS, TSSext +const GDT_SIZE: usize = 7; + +/// Initializes the global descriptor table. +/// +/// # Safety +/// +/// Only meant to be called by the CPUs during their early init. +pub unsafe fn init() -> usize { + // Won't be deallocated, so leaks are not a concern + let tss_addr = Box::into_raw(Box::new(Tss::NULL)) as usize; + let mut gdt = Box::new([ + Entry::NULL, + Entry::RING0_CS64, + Entry::RING0_DS64, + Entry::RING3_DS64, + Entry::RING3_CS64, + Entry::tss_low(tss_addr as u32, (size_of::() - 1) as u32), + Entry::NULL, + ]); + + let tss_high = &mut gdt[6] as *mut _ as *mut u64; + tss_high.write_unaligned((tss_addr >> 32) as u64); + + let gdt_addr = Box::into_raw(gdt) as usize; + + let gdtr = Pointer { + limit: (GDT_SIZE * size_of::()) as u16 - 1, + offset: gdt_addr, + }; + + core::arch::asm!( + r#" + wbinvd + lgdt ({0}) + + // Have to use iretq here + mov %rsp, %rcx + leaq 1f(%rip), %rax + + // SS:RSP + pushq $0x10 + pushq %rcx + + // RFLAGS + pushfq + + // CS:RIP + pushq $0x08 + pushq %rax + iretq + 1: + mov $0x10, %ax + mov %ax, %ds + mov %ax, %es + mov %ax, %fs + mov %ax, %gs + mov %ax, %ss + + mov $0x28, %ax + ltr %ax + "#, + in(reg) &gdtr, + out("rax") _, + out("rcx") _, + options(att_syntax) + ); + + tss_addr +} diff --git a/kernel/src/arch/x86_64/intrinsics.rs b/kernel/src/arch/x86_64/intrinsics.rs new file mode 100644 index 00000000..b1d9274c --- /dev/null +++ b/kernel/src/arch/x86_64/intrinsics.rs @@ -0,0 +1,138 @@ +//! x86-64 architecture helper functions + +use core::marker::PhantomData; + +/// Wrapper struct providing access to an x86 I/O port +#[derive(Clone, Debug)] +#[repr(transparent)] +pub struct IoPort { + address: u16, + _pd: PhantomData, +} + +/// Common interface for reading and writing I/O ports +pub trait IoPortAccess { + /// Reads a value from the port + fn read(&self) -> T; + /// Writes a value to the port + fn write(&self, value: T); +} + +impl IoPort { + /// Constructs a new I/O port interface + pub const fn new(address: u16) -> Self { + Self { + address, + _pd: PhantomData, + } + } +} + +impl IoPortAccess for IoPort { + fn write(&self, value: u8) { + unsafe { + outb(self.address, value); + } + } + + fn read(&self) -> u8 { + unsafe { inb(self.address) } + } +} + +impl IoPortAccess for IoPort { + fn write(&self, value: u16) { + unsafe { + outw(self.address, value); + } + } + + fn read(&self) -> u16 { + unsafe { inw(self.address) } + } +} + +impl IoPortAccess for IoPort { + fn write(&self, value: u32) { + unsafe { + outl(self.address, value); + } + } + + fn read(&self) -> u32 { + unsafe { inl(self.address) } + } +} + +/// Reads a byte from the I/O port +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn inb(port: u16) -> u8 { + let value: u8; + core::arch::asm!("inb %dx, %al", in("dx") port, out("al") value, options(att_syntax)); + value +} + +/// Reads a 16-bit value from the I/O port +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn inw(port: u16) -> u16 { + let value: u16; + core::arch::asm!("inw %dx, %ax", in("dx") port, out("ax") value, options(att_syntax)); + value +} + +/// Reads a 32-bit value from the I/O port +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn inl(port: u16) -> u32 { + let value: u32; + core::arch::asm!("inl %dx, %eax", in("dx") port, out("eax") value, options(att_syntax)); + value +} + +/// Writes a byte to the I/O port. +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn outb(port: u16, value: u8) { + core::arch::asm!("outb %al, %dx", in("dx") port, in("al") value, options(att_syntax)); +} + +/// Writes a 16-bit value to the I/O port. +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn outw(port: u16, value: u16) { + core::arch::asm!("outw %ax, %dx", in("dx") port, in("ax") value, options(att_syntax)); +} + +/// Writes a 32-bit value to the I/O port. +/// +/// # Safety +/// +/// Provides direct access to port I/O, unsafe. +#[inline] +pub unsafe fn outl(port: u16, value: u32) { + core::arch::asm!("outl %eax, %dx", in("dx") port, in("eax") value, options(att_syntax)); +} + +#[inline] +pub fn flush_cpu_cache() { + unsafe { + core::arch::asm!("wbinvd"); + } +} diff --git a/kernel/src/arch/x86_64/mod.rs b/kernel/src/arch/x86_64/mod.rs new file mode 100644 index 00000000..db5dc6f9 --- /dev/null +++ b/kernel/src/arch/x86_64/mod.rs @@ -0,0 +1,493 @@ +//! x86-64 architecture implementation +use core::{mem::size_of, ops::DerefMut, ptr::null_mut, sync::atomic::Ordering}; + +use abi::error::Error; +use acpi_lib::{mcfg::Mcfg, AcpiTables, InterruptModel}; +use alloc::boxed::Box; +use device_api::{interrupt::Irq, Device}; +use git_version::git_version; +use kernel_arch_x86_64::{ + mem::{ + init_fixed_tables, map_heap_block, + table::{PageAttributes, PageEntry, PageTable, L1, L2, L3}, + EarlyMapping, HEAP_MAPPING_OFFSET, MEMORY_LIMIT, RAM_MAPPING_L1, + }, + PerCpuData, +}; +use kernel_fs::devfs; +use libk::{arch::Cpu, device::register_external_interrupt_controller}; +use libk_device::register_monotonic_timestamp_provider; +use libk_mm::{ + address::{FromRaw, IntoRaw, PhysicalAddress, Virtualize}, + phys::{self, reserved::reserve_region, PhysicalMemoryRegion}, + table::{EntryLevel, EntryLevelExt}, +}; +use libk_util::{sync::SpinFence, OneTimeInit}; +use yboot_proto::{v1::AvailableMemoryRegion, LoadProtocolV1}; +use ygg_driver_pci::PciBusManager; + +mod acpi; +mod apic; +mod boot; +mod cpuid; +mod exception; +mod gdt; +mod intrinsics; +mod peripherals; +mod smp; +mod syscall; + +use crate::{ + arch::x86_64::{ + apic::ioapic::ISA_IRQ_OFFSET, + intrinsics::{IoPort, IoPortAccess}, + }, + debug::{self, LogLevel}, + device::{ + self, + display::{console, fb_console::FramebufferConsole, linear_fb::LinearFramebuffer}, + }, + fs::{Initrd, INITRD_DATA}, + mem::heap, +}; + +use self::{ + acpi::{AcpiAllocator, AcpiHandlerImpl}, + apic::{ioapic::IoApic, local::LocalApic}, + boot::BootData, + cpuid::{ProcessorFeatures, PROCESSOR_FEATURES}, + peripherals::{i8253::I8253, ps2::PS2Controller, serial::ComPort}, +}; + +use super::{IpiMessage, Platform}; + +/// x86-64 architecture implementation +pub struct X86_64 { + boot_data: OneTimeInit, + acpi: OneTimeInit>, + + // Display + framebuffer: OneTimeInit, + fbconsole: OneTimeInit, +} + +static SHUTDOWN_FENCE: SpinFence = SpinFence::new(); + +/// Global x86-64 architecture value +pub static PLATFORM: X86_64 = X86_64 { + boot_data: OneTimeInit::new(), + acpi: OneTimeInit::new(), + + framebuffer: OneTimeInit::new(), + fbconsole: OneTimeInit::new(), +}; + +impl Platform for X86_64 { + const KERNEL_VIRT_OFFSET: usize = 0xFFFFFF8000000000; + type L3 = kernel_arch_x86_64::mem::table::L3; + + unsafe fn start_application_processors(&self) { + if let Some(acpi) = self.acpi.try_get() { + let Some(pinfo) = acpi + .platform_info_in(AcpiAllocator) + .ok() + .and_then(|p| p.processor_info) + else { + return; + }; + + smp::start_ap_cores(&pinfo); + } + } +} + +impl X86_64 { + unsafe fn handle_ipi(&self, _msg: IpiMessage) { + warnln!("Received an IPI"); + todo!(); + } + + fn set_boot_data(&self, data: BootData) { + match data { + BootData::YBoot(data) => { + // Reserve the memory map + unsafe { + reserve_region( + "mmap", + PhysicalMemoryRegion { + base: PhysicalAddress::from_raw(data.memory_map.address), + size: data.memory_map.len as usize * size_of::(), + }, + ); + } + + // Reserve initrd, if not NULL + if data.initrd_address != 0 && data.initrd_size != 0 { + let aligned_start = data.initrd_address & !0xFFF; + let aligned_end = (data.initrd_address + data.initrd_size + 0xFFF) & !0xFFF; + + unsafe { + reserve_region( + "initrd", + PhysicalMemoryRegion { + base: PhysicalAddress::from_raw(aligned_start), + size: (aligned_end - aligned_start) as usize, + }, + ); + } + } + } + } + + self.boot_data.init(data); + } + + fn map_physical_memory + Clone>( + it: I, + _memory_start: PhysicalAddress, + memory_end: PhysicalAddress, + ) -> Result<(), Error> { + let end_l1i = IntoRaw::::into_raw(memory_end) + .page_align_up::() + .page_index::(); + + if end_l1i > 512 { + todo!( + "Cannot handle {}GiB of RAM", + end_l1i * L1::SIZE / (1024 * 1024 * 1024) + ); + } + + MEMORY_LIMIT.store(memory_end.into_raw(), Ordering::Release); + + // Check if 1GiB pages are supported + if PROCESSOR_FEATURES + .get() + .contains(ProcessorFeatures::PDPE1GB) + { + // Just map gigabytes of RAM + for l1i in 0..end_l1i { + // TODO NX + unsafe { + RAM_MAPPING_L1[l1i] = PageEntry::::block( + PhysicalAddress::from_raw(l1i * L1::SIZE), + PageAttributes::WRITABLE, + ); + } + } + } else { + // Allocate the intermediate tables first + let l2_tables_start = phys::find_contiguous_region(it, end_l1i) + .expect("Could not allocate the memory for RAM mapping L2 tables"); + + unsafe { + reserve_region( + "ram-l2-tables", + PhysicalMemoryRegion { + base: l2_tables_start, + size: end_l1i * L3::SIZE, + }, + ); + } + + // Fill in the tables + for l1i in 0..end_l1i { + let l2_phys_addr = l2_tables_start.add(l1i * L3::SIZE); + + // TODO (minor) the slice is uninitialized, maybe find some way to deal with that + // case nicely + // Safety: ok, the mapping is done to the memory obtained from + // find_contiguous_region() + let mut l2_data = + unsafe { EarlyMapping::<[PageEntry; 512]>::map(l2_phys_addr)? }; + // Safety: ok, the slice comes from EarlyMapping of a page-aligned region + let l2 = unsafe { PageTable::from_raw_slice_mut(l2_data.deref_mut()) }; + + for l2i in 0..512 { + // TODO NX + l2[l2i] = PageEntry::::block( + PhysicalAddress::from_raw((l1i * L1::SIZE) | (l2i * L2::SIZE)), + PageAttributes::WRITABLE, + ); + } + + // Point the L1 entry to the L2 table + unsafe { + RAM_MAPPING_L1[l1i] = + PageEntry::::table(l2_phys_addr, PageAttributes::WRITABLE) + }; + + intrinsics::flush_cpu_cache(); + // The EarlyMapping is then dropped + } + } + + Ok(()) + } + + unsafe fn init_physical_memory_from_yboot(data: &LoadProtocolV1) -> Result<(), Error> { + let mmap = EarlyMapping::::map_slice( + PhysicalAddress::from_raw(data.memory_map.address), + data.memory_map.len as usize, + )?; + + phys::init_from_iter( + mmap.as_ref().iter().map(|reg| PhysicalMemoryRegion { + base: PhysicalAddress::from_raw(reg.start_address), + size: reg.page_count as usize * L3::SIZE, + }), + Self::map_physical_memory, + ) + } + + unsafe fn init_memory_management(&self) -> Result<(), Error> { + const HEAP_PAGES: usize = 16; + + init_fixed_tables(); + + // Reserve lower 4MiB just in case + reserve_region( + "lowmem", + PhysicalMemoryRegion { + base: PhysicalAddress::ZERO, + size: 4 * 1024 * 1024, + }, + ); + + match self.boot_data.get() { + &BootData::YBoot(data) => Self::init_physical_memory_from_yboot(data)?, + } + + // Setup heap + for i in 0..HEAP_PAGES { + // Allocate in 2MiB chunks + let l2_page = phys::alloc_2m_page()?; + + map_heap_block(i, l2_page); + } + + heap::init_heap(HEAP_MAPPING_OFFSET, HEAP_PAGES * L2::SIZE); + + Ok(()) + } + + unsafe fn init_platform(&'static self, cpu_id: usize) -> Result<(), Error> { + let local_apic = Box::leak(Box::new(LocalApic::new())); + let tss_address = gdt::init(); + + let cpu_data = PerCpuData { + this: null_mut(), + tss_address, + tmp_address: 0, + local_apic, + }; + cpuid::enable_features(); + Cpu::init_local(Some(cpu_id as _), cpu_data); + syscall::init_syscall(); + + if cpu_id == 0 { + // Register the PCI drivers + // TODO make this implicit init + ygg_driver_pci::register_class_driver( + "NVMe Host Controller", + 0x01, + Some(0x08), + Some(0x02), + ygg_driver_nvme::probe, + ); + ygg_driver_pci::register_class_driver( + "AHCI SATA Controller", + 0x01, + Some(0x06), + Some(0x01), + ygg_driver_ahci::probe, + ); + ygg_driver_pci::register_class_driver( + "USB xHCI", + 0x0C, + Some(0x03), + Some(0x30), + ygg_driver_usb_xhci::probe, + ); + ygg_driver_pci::register_vendor_driver( + "Virtio PCI Network Device", + 0x1AF4, + 0x1000, + ygg_driver_virtio_net::probe, + ); + + match self.boot_data.get() { + &BootData::YBoot(data) => { + let start = PhysicalAddress::from_raw(data.initrd_address); + Self::init_initrd(start, start.add(data.initrd_size as usize)); + } + } + + self.init_acpi_from_boot_data()?; + + Self::disable_8259(); + + let timer = Box::leak(Box::new(I8253::new())); + register_monotonic_timestamp_provider(timer); + + let com1_3 = Box::leak(Box::new(ComPort::new( + 0x3F8, + 0x3E8, + Irq::External(ISA_IRQ_OFFSET + 4), + ))); + debug::add_sink(com1_3.port_a(), LogLevel::Debug); + + self.init_framebuffer()?; + + debug::init(); + + infoln!( + "Yggdrasil v{} ({})", + env!("CARGO_PKG_VERSION"), + git_version!() + ); + + let ps2 = Box::leak(Box::new(PS2Controller::new( + Irq::External(ISA_IRQ_OFFSET + 1), + Irq::External(ISA_IRQ_OFFSET + 12), + 0x64, + 0x60, + ))); + ps2.init()?; + + if let Some(acpi) = self.acpi.try_get() { + self.init_platform_from_acpi(acpi)?; + } + + timer.init_irq()?; + // ps2.connect(self.tty.get()); + ps2.init_irq()?; + + device::register_device(ps2); + + PciBusManager::setup_bus_devices()?; + } + + Ok(()) + } + + unsafe fn init_acpi_from_boot_data(&self) -> Result<(), Error> { + match self.boot_data.get() { + &BootData::YBoot(data) => self.init_acpi_from_rsdp(data.rsdp_address as usize), + } + } + + unsafe fn init_acpi_from_rsdp(&self, rsdp: usize) -> Result<(), Error> { + let acpi_tables = AcpiTables::from_rsdp(AcpiHandlerImpl, rsdp).map_err(|err| { + errorln!("Could not initialize ACPI tables: {:?}", err); + Error::InvalidArgument + })?; + self.acpi.init(acpi_tables); + Ok(()) + } + + unsafe fn init_platform_from_acpi( + &self, + acpi: &'static AcpiTables, + ) -> Result<(), Error> { + let platform_info = acpi.platform_info_in(AcpiAllocator).map_err(|err| { + errorln!("Could not get ACPI platform info: {:?}", err); + Error::InvalidArgument + })?; + + let InterruptModel::Apic(apic_info) = platform_info.interrupt_model else { + panic!("The processor does not support APIC"); + }; + + let ioapic = IoApic::from_acpi(&apic_info)?; + let ioapic = Box::leak(Box::new(ioapic)); + register_external_interrupt_controller(ioapic); + + // acpi::init_acpi(acpi).unwrap(); + + if let Ok(mcfg) = acpi.find_table::() { + for entry in mcfg.entries() { + PciBusManager::add_segment_from_mcfg(entry)?; + } + } + + Ok(()) + } + + unsafe fn init_framebuffer(&'static self) -> Result<(), Error> { + match self.boot_data.get() { + &BootData::YBoot(data) => { + let info = &data.opt_framebuffer; + + self.framebuffer.init(LinearFramebuffer::from_physical_bits( + PhysicalAddress::from_raw(info.res_address), + info.res_size as usize, + info.res_stride as usize, + info.res_width, + info.res_height, + )?); + } + } + + self.fbconsole.init(FramebufferConsole::from_framebuffer( + self.framebuffer.get(), + None, + )?); + debug::add_sink(self.fbconsole.get(), LogLevel::Info); + + // self.tty.init(CombinedTerminal::new(self.fbconsole.get())); + + devfs::add_named_block_device(self.framebuffer.get(), "fb0")?; + // devfs::add_char_device(self.tty.get(), CharDeviceType::TtyRegular)?; + console::add_console_autoflush(self.fbconsole.get()); + + Ok(()) + } + + fn init_initrd(initrd_start: PhysicalAddress, initrd_end: PhysicalAddress) { + if initrd_start.is_zero() || initrd_end <= initrd_start { + infoln!("No initrd loaded"); + return; + } + + let start_aligned = initrd_start.page_align_down::(); + let end_aligned = initrd_start.page_align_up::(); + + let data = unsafe { + core::slice::from_raw_parts( + start_aligned.virtualize() as *const u8, + initrd_end - initrd_start, + ) + }; + + let initrd = Initrd { + phys_page_start: start_aligned, + phys_page_len: end_aligned - start_aligned, + data, + }; + + INITRD_DATA.init(initrd); + } + + unsafe fn disable_8259() { + infoln!("Disabling i8259 PIC"); + // TODO should I make a module for 8259 if I don't even use it? + let pic_master_cmd = IoPort::::new(0x20); + let pic_master_data = IoPort::::new(0x21); + let pic_slave_cmd = IoPort::::new(0xA0); + let pic_slave_data = IoPort::::new(0xA1); + + // Remap PIC IRQ vectors to 32.. + pic_master_cmd.write(0x11); + pic_slave_cmd.write(0x11); + + pic_master_data.write(32); + pic_slave_data.write(32 + 8); + + pic_slave_data.write(0xFF); + pic_master_data.write(0xFF); + + pic_master_cmd.write(0x20); + pic_slave_cmd.write(0x20); + } +} diff --git a/kernel/src/arch/x86_64/peripherals/i8253.rs b/kernel/src/arch/x86_64/peripherals/i8253.rs new file mode 100644 index 00000000..0b3b24cf --- /dev/null +++ b/kernel/src/arch/x86_64/peripherals/i8253.rs @@ -0,0 +1,96 @@ +use core::time::Duration; + +use abi::error::Error; +use device_api::{ + interrupt::{InterruptHandler, Irq}, + timer::MonotonicTimestampProviderDevice, + Device, +}; +use libk::{device::external_interrupt_controller, runtime}; +use libk_util::sync::IrqSafeSpinlock; + +use crate::arch::x86_64::{ + apic::ioapic::ISA_IRQ_OFFSET, + intrinsics::{IoPort, IoPortAccess}, +}; + +const FREQUENCY: u32 = 1193180; + +const CMD_CH0: u8 = 0 << 6; +const CMD_ACC_WORD: u8 = 3 << 4; +const CMD_MODE_RATE: u8 = 2 << 1; + +struct Inner { + ch0_data: IoPort, + #[allow(unused)] + ch1_data: IoPort, + #[allow(unused)] + ch2_data: IoPort, + cmd: IoPort, + + tick: u64, +} + +pub struct I8253 { + inner: IrqSafeSpinlock, +} + +impl MonotonicTimestampProviderDevice for I8253 { + fn monotonic_timestamp(&self) -> Result { + let tick = self.inner.lock().tick; + + Ok(Duration::from_millis(tick)) + } +} + +impl InterruptHandler for I8253 { + fn handle_irq(&self, _vector: Option) -> bool { + let mut inner = self.inner.lock(); + inner.tick += 1; + + let now = Duration::from_millis(inner.tick); + drop(inner); + + runtime::tick(now); + + true + } +} + +impl Device for I8253 { + fn display_name(&self) -> &'static str { + "i8253 PIT" + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + let intc = external_interrupt_controller(); + let inner = self.inner.lock(); + + let div: u16 = (FREQUENCY / 1000).try_into().unwrap(); + + inner.cmd.write(CMD_CH0 | CMD_ACC_WORD | CMD_MODE_RATE); + inner.ch0_data.write(div as u8); + inner.ch0_data.write((div >> 8) as u8); + + let irq = Irq::External(ISA_IRQ_OFFSET); + intc.register_irq(irq, Default::default(), self)?; + intc.enable_irq(irq)?; + + Ok(()) + } +} + +impl I8253 { + pub const fn new() -> Self { + Self { + inner: IrqSafeSpinlock::new(Inner { + ch0_data: IoPort::new(0x40), + ch1_data: IoPort::new(0x41), + ch2_data: IoPort::new(0x42), + cmd: IoPort::new(0x43), + + tick: 0, + }), + } + } +} diff --git a/kernel/src/arch/x86_64/peripherals/mod.rs b/kernel/src/arch/x86_64/peripherals/mod.rs new file mode 100644 index 00000000..64a4ff6d --- /dev/null +++ b/kernel/src/arch/x86_64/peripherals/mod.rs @@ -0,0 +1,5 @@ +//! x86-64 platform peripheral drivers + +pub mod i8253; +pub mod ps2; +pub mod serial; diff --git a/kernel/src/arch/x86_64/peripherals/ps2/codeset.rs b/kernel/src/arch/x86_64/peripherals/ps2/codeset.rs new file mode 100644 index 00000000..714a2a8e --- /dev/null +++ b/kernel/src/arch/x86_64/peripherals/ps2/codeset.rs @@ -0,0 +1,148 @@ +use abi::io::KeyboardKey as Key; + +macro_rules! table { + { + $vis:vis static $name:ident: + [$ty:ty; $size:expr; $default:expr] = + [ $( $index:literal => $value:expr ),* $(,)? ]; + } => { + $vis static $name: [$ty; $size] = const { + let mut array = [$default; $size]; + + $( + array[$index] = $value; + )* + + array + }; + }; +} + +table! { + pub static CODE_SET_1_00: [Key; 128; Key::Unknown] = [ + 0x01 => Key::Escape, + 0x02 => Key::Char(b'1'), + 0x03 => Key::Char(b'2'), + + 0x04 => Key::Char(b'3'), + 0x05 => Key::Char(b'4'), + 0x06 => Key::Char(b'5'), + 0x07 => Key::Char(b'6'), + + 0x08 => Key::Char(b'7'), + 0x09 => Key::Char(b'8'), + 0x0A => Key::Char(b'9'), + 0x0B => Key::Char(b'0'), + + 0x0C => Key::Char(b'-'), + 0x0D => Key::Char(b'='), + 0x0E => Key::Backspace, + 0x0F => Key::Tab, + + 0x10 => Key::Char(b'q'), + 0x11 => Key::Char(b'w'), + 0x12 => Key::Char(b'e'), + 0x13 => Key::Char(b'r'), + + 0x14 => Key::Char(b't'), + 0x15 => Key::Char(b'y'), + 0x16 => Key::Char(b'u'), + 0x17 => Key::Char(b'i'), + + 0x18 => Key::Char(b'o'), + 0x19 => Key::Char(b'p'), + 0x1A => Key::Char(b'['), + 0x1B => Key::Char(b']'), + + 0x1C => Key::Enter, + 0x1D => Key::LControl, + 0x1E => Key::Char(b'a'), + 0x1F => Key::Char(b's'), + + 0x20 => Key::Char(b'd'), + 0x21 => Key::Char(b'f'), + 0x22 => Key::Char(b'g'), + 0x23 => Key::Char(b'h'), + + 0x24 => Key::Char(b'j'), + 0x25 => Key::Char(b'k'), + 0x26 => Key::Char(b'l'), + 0x27 => Key::Char(b';'), + + 0x28 => Key::Char(b'\''), + 0x29 => Key::Char(b'`'), + 0x2A => Key::LShift, + 0x2B => Key::Char(b'\\'), + + 0x2C => Key::Char(b'z'), + 0x2D => Key::Char(b'x'), + 0x2E => Key::Char(b'c'), + 0x2F => Key::Char(b'v'), + + 0x30 => Key::Char(b'b'), + 0x31 => Key::Char(b'n'), + 0x32 => Key::Char(b'm'), + 0x33 => Key::Char(b','), + + 0x34 => Key::Char(b'.'), + 0x35 => Key::Char(b'/'), + 0x36 => Key::RShift, + // 0x37 => Key::KeypadMul, + + 0x38 => Key::LAlt, + 0x39 => Key::Char(b' '), + 0x3A => Key::CapsLock, + 0x3B => Key::F(1), + + 0x3C => Key::F(2), + 0x3D => Key::F(3), + 0x3E => Key::F(4), + 0x3F => Key::F(5), + + 0x40 => Key::F(6), + 0x41 => Key::F(7), + 0x42 => Key::F(8), + 0x43 => Key::F(9), + + 0x44 => Key::F(10), + // 0x45 => Key::NumLock, + // 0x46 => Key::ScrollLock, + // 0x47 => Key::Keypad7, + + // 0x48 => Key::Keypad8, + // 0x49 => Key::Keypad9, + // 0x4A => Key::KeypadMinus, + // 0x4B => Key::Keypad4, + + // 0x4C => Key::Keypad5, + // 0x4D => Key::Keypad6, + // 0x4E => Key::KeypadPlus, + // 0x4F => Key::Keypad1, + + // 0x50 => Key::Keypad2, + // 0x51 => Key::Keypad3, + // 0x52 => Key::Keypad0, + // 0x53 => Key::KeypadPeriod, + + 0x57 => Key::F(11), + + 0x58 => Key::F(12), + ]; +} + +table! { + pub static CODE_SET_1_E0: [Key; 128; Key::Unknown] = [ + 0x1D => Key::RControl, + 0x38 => Key::RAlt, + // 0x47 => Key::Home, + // 0x48 => Key::Up, + // 0x49 => Key::PageUp, + // 0x4B => Key::Left, + // 0x4D => Key::Right, + // 0x4F => Key::End, + // 0x50 => Key::Down, + // 0x51 => Key::PageDown, + // 0x52 => Key::Insert, + // 0x53 => Key::Delete, + ]; +} diff --git a/kernel/src/arch/x86_64/peripherals/ps2/mod.rs b/kernel/src/arch/x86_64/peripherals/ps2/mod.rs new file mode 100644 index 00000000..cf9315c3 --- /dev/null +++ b/kernel/src/arch/x86_64/peripherals/ps2/mod.rs @@ -0,0 +1,157 @@ +//! Intel 8042 PS/2 controller driver implemenation +use abi::{ + error::Error, + io::{KeyboardKey, KeyboardKeyEvent}, +}; +use device_api::{ + interrupt::{InterruptHandler, Irq}, + Device, +}; +use libk::device::external_interrupt_controller; +use libk_util::sync::IrqSafeSpinlock; + +use crate::arch::x86_64::{ + intrinsics::{IoPort, IoPortAccess}, + peripherals::ps2::codeset::{CODE_SET_1_00, CODE_SET_1_E0}, +}; + +mod codeset; + +struct Inner { + command: IoPort, + data: IoPort, +} + +/// PS/2 controller driver +pub struct PS2Controller { + primary_irq: Irq, + #[allow(unused)] + auxiliary_irq: Irq, + + inner: IrqSafeSpinlock, +} + +fn translate(e0: bool, key: u8) -> KeyboardKey { + debug_assert!(key < 0x80); + + if e0 { + CODE_SET_1_E0[key as usize] + } else { + CODE_SET_1_00[key as usize] + } +} + +impl Inner { + const STATUS_OUTPUT_FULL: u8 = 1 << 0; + const STATUS_INPUT_FULL: u8 = 1 << 1; + + fn send_command(&mut self, cmd: u8) { + while self.command.read() & Self::STATUS_INPUT_FULL != 0 { + core::hint::spin_loop(); + } + self.command.write(cmd); + } + + // fn recv_timeout(&mut self, timeout: u64) -> Option { + // let mut counter = 0; + // while self.command.read() & Self::STATUS_OUTPUT_FULL == 0 { + // counter += 1; + // if counter > timeout { + // return None; + // } + // core::hint::spin_loop(); + // } + // Some(self.data.read()) + // } + + fn try_recv(&mut self) -> Option { + if self.command.read() & Inner::STATUS_OUTPUT_FULL != 0 { + Some(self.data.read()) + } else { + None + } + } +} + +impl InterruptHandler for PS2Controller { + fn handle_irq(&self, _vector: Option) -> bool { + let mut count = 0; + let mut inner = self.inner.lock(); + + loop { + let Some(mut scancode) = inner.try_recv() else { + break; + }; + + count += 1; + + let e0 = scancode == 0xE0; + let release = scancode >= 0x80; + + if release { + scancode -= 0x80; + } + + let key = translate(e0, scancode); + let event = if release { + KeyboardKeyEvent::Released(key) + } else { + KeyboardKeyEvent::Pressed(key) + }; + + ygg_driver_input::send_event(event); + } + + count != 0 + } +} + +impl Device for PS2Controller { + fn display_name(&self) -> &'static str { + "PS/2 Controller" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + let mut inner = self.inner.lock(); + // let intc = PLATFORM.interrupt_controller(); + let intc = external_interrupt_controller(); + + intc.register_irq(self.primary_irq, Default::default(), self)?; + + // Disable PS/2 devices from sending any further data + inner.send_command(0xAD); + inner.send_command(0xA7); + + // Flush the buffer + while inner.command.read() & Inner::STATUS_OUTPUT_FULL != 0 { + inner.data.read(); + } + + // Enable primary port + inner.send_command(0xAE); + + intc.enable_irq(self.primary_irq)?; + + Ok(()) + } +} + +impl PS2Controller { + /// Constructs a new instance of the device + pub const fn new(primary_irq: Irq, auxiliary_irq: Irq, cmd_port: u16, data_port: u16) -> Self { + let inner = Inner { + command: IoPort::new(cmd_port), + data: IoPort::new(data_port), + }; + + Self { + primary_irq, + auxiliary_irq, + inner: IrqSafeSpinlock::new(inner), + } + } +} diff --git a/kernel/src/arch/x86_64/peripherals/serial.rs b/kernel/src/arch/x86_64/peripherals/serial.rs new file mode 100644 index 00000000..9bd079ef --- /dev/null +++ b/kernel/src/arch/x86_64/peripherals/serial.rs @@ -0,0 +1,90 @@ +//! Driver for x86 COM ports +use abi::error::Error; +use device_api::{interrupt::Irq, serial::SerialDevice, Device}; +use libk_util::sync::IrqSafeSpinlock; + +use crate::{ + arch::x86_64::intrinsics::{IoPort, IoPortAccess}, + debug::DebugSink, +}; + +// Single port +struct Inner { + dr: IoPort, + lsr: IoPort, +} + +/// Single port of the COM port pair +pub struct Port { + inner: IrqSafeSpinlock, +} + +/// COM port pair +#[allow(unused)] +pub struct ComPort { + port_a: Port, + port_b: Port, + irq: Irq, +} + +impl DebugSink for Port { + fn putc(&self, c: u8) -> Result<(), Error> { + self.send(c) + } + + fn supports_control_sequences(&self) -> bool { + true + } +} + +impl SerialDevice for Port { + fn send(&self, byte: u8) -> Result<(), Error> { + let inner = self.inner.lock(); + + while inner.lsr.read() & Self::LSR_THRE == 0 { + core::hint::spin_loop(); + } + + inner.dr.write(byte); + Ok(()) + } +} + +impl Device for Port { + fn display_name(&self) -> &'static str { + "COM port" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + Ok(()) + } +} + +impl Port { + const LSR_THRE: u8 = 1 << 5; + + const fn new(base: u16) -> Self { + Self { + inner: IrqSafeSpinlock::new(Inner { + dr: IoPort::new(base), + lsr: IoPort::new(base + 5), + }), + } + } +} + +impl ComPort { + /// Constructs a COM port pair + pub const fn new(port_a: u16, port_b: u16, irq: Irq) -> Self { + Self { + port_a: Port::new(port_a), + port_b: Port::new(port_b), + irq, + } + } + + /// Returns a reference to the A port of this COM pair + pub fn port_a(&self) -> &Port { + &self.port_a + } +} diff --git a/kernel/src/arch/x86_64/smp.rs b/kernel/src/arch/x86_64/smp.rs new file mode 100644 index 00000000..d38a5729 --- /dev/null +++ b/kernel/src/arch/x86_64/smp.rs @@ -0,0 +1,116 @@ +//! x86-64 multiprocessing implementation +use core::sync::atomic::Ordering; + +use acpi_lib::platform::{ProcessorInfo, ProcessorState}; +use kernel_arch::{Architecture, ArchitectureImpl}; +use kernel_arch_x86_64::{ + mem::{ + flush_tlb_entry, + table::{PageAttributes, PageEntry, PageTable, L1, L2}, + KERNEL_TABLES, + }, + CPU_COUNT, +}; +use libk::arch::Cpu; +use libk_mm::{ + address::{AsPhysicalAddress, FromRaw, IntoRaw, PhysicalAddress, Virtualize}, + phys, + pointer::PhysicalRefMut, + TableAllocatorImpl, +}; + +use crate::arch::x86_64::boot::__x86_64_ap_entry; + +use super::acpi::AcpiAllocator; + +static AP_BOOTSTRAP_BIN: &[u8] = include_bytes!(concat!(env!("OUT_DIR"), "/__x86_64_ap_boot.bin")); + +const AP_STACK_PAGES: usize = 8; +const AP_BOOTSTRAP_DATA: PhysicalAddress = PhysicalAddress::from_raw(0x6000usize); +const AP_BOOTSTRAP_CODE: PhysicalAddress = PhysicalAddress::from_raw(0x7000usize); + +#[repr(C)] +#[allow(dead_code)] +struct ApBootstrapData { + cr3: PhysicalAddress, + stack_base: usize, + stack_size: usize, + entry: usize, +} + +unsafe fn start_ap_core(apic_id: u32) { + assert!(ArchitectureImpl::interrupt_mask()); + + let bsp_cpu = Cpu::local(); + let bsp_apic = bsp_cpu.local_apic(); + + let cr3 = KERNEL_TABLES.as_physical_address(); + let stack_base = phys::alloc_pages_contiguous(AP_STACK_PAGES) + .unwrap() + .virtualize(); + let stack_size = AP_STACK_PAGES * 0x1000; + + let data = ApBootstrapData { + cr3, + stack_base, + stack_size, + entry: __x86_64_ap_entry as usize, + }; + + let mut data_ref = PhysicalRefMut::::map(AP_BOOTSTRAP_DATA); + *data_ref = data; + + let cpu_count = CPU_COUNT.load(Ordering::Acquire); + + // Send an IPI to wake up the AP + core::arch::asm!("wbinvd"); + bsp_apic.wakeup_cpu(apic_id, AP_BOOTSTRAP_CODE); + + while cpu_count == CPU_COUNT.load(Ordering::Acquire) { + core::hint::spin_loop(); + } + + infoln!("cpu{} up", cpu_count); +} + +/// Starts up application processors specified by ACPI MADT. +/// +/// # Safety +/// +/// Only meant to be called once by the BSP. +pub unsafe fn start_ap_cores(info: &ProcessorInfo) { + let aps = &info.application_processors; + + if aps.is_empty() { + return; + } + + // Temporarily identity-map the lowest 2MiB + let mut identity_l1 = PageTable::::new_zeroed::().unwrap(); + let mut identity_l2 = PageTable::::new_zeroed::().unwrap(); + + identity_l1[0] = + PageEntry::::table(identity_l2.as_physical_address(), PageAttributes::WRITABLE); + identity_l2[0] = PageEntry::::block(PhysicalAddress::ZERO, PageAttributes::WRITABLE); + + assert_eq!(KERNEL_TABLES.l0.data[0], 0); + KERNEL_TABLES.l0.data[0] = IntoRaw::::into_raw(identity_l1.as_physical_address()) + | (PageAttributes::WRITABLE | PageAttributes::PRESENT).bits(); + + // Load AP_BOOTSTRAP_CODE + let mut code_ref = PhysicalRefMut::map_slice(AP_BOOTSTRAP_CODE, AP_BOOTSTRAP_BIN.len()); + code_ref.copy_from_slice(AP_BOOTSTRAP_BIN); + + for ap in aps.iter() { + if ap.is_ap && ap.state == ProcessorState::WaitingForSipi { + start_ap_core(ap.local_apic_id); + } + } + + // Remove the identity-map + identity_l2[0] = PageEntry::INVALID; + flush_tlb_entry(0); + KERNEL_TABLES.l0.data[0] = 0; + + // TODO drop the tables +} diff --git a/kernel/src/arch/x86_64/syscall.S b/kernel/src/arch/x86_64/syscall.S new file mode 100644 index 00000000..e84f7f17 --- /dev/null +++ b/kernel/src/arch/x86_64/syscall.S @@ -0,0 +1,190 @@ +.global __x86_64_syscall_vector + +.set REG_RAX, 0 * 8 +.set REG_RDI, 1 * 8 +.set REG_RSI, 2 * 8 +.set REG_RDX, 3 * 8 +.set REG_R10, 4 * 8 +.set REG_R8, 5 * 8 +.set REG_R9, 6 * 8 + +.set REG_RCX, 7 * 8 +.set REG_R11, 8 * 8 + +.set REG_USER_IP, 9 * 8 +.set REG_USER_SP, 10 * 8 +.set REG_USER_FLAGS, 11 * 8 + +.set REG_RBX, 12 * 8 +.set REG_RBP, 13 * 8 // +.set REG_R12, 14 * 8 // +.set REG_R13, 15 * 8 // Overwritten by iret +.set REG_R14, 16 * 8 // +.set REG_R15, 17 * 8 // + +// 15 general-purpose registers +// user ip +// user sp +// user flags +.set SYSCALL_STATE_SIZE, 18 * 8 + +.set REG_IRET_RCX, -(SYSCALL_STATE_SIZE - REG_RCX) +.set REG_IRET_R11, -(SYSCALL_STATE_SIZE - REG_R11) +.set REG_IRET_USER_IP, -(SYSCALL_STATE_SIZE - REG_USER_IP) +.set REG_IRET_USER_SP, -(SYSCALL_STATE_SIZE - REG_USER_SP) +.set REG_IRET_USER_FLAGS, -(SYSCALL_STATE_SIZE - REG_USER_FLAGS) + +.macro SYSCALL_SAVE_STATE + subq $SYSCALL_STATE_SIZE, %rsp + + // Syscall-specific ordering for these + movq %rax, REG_RAX(%rsp) + movq %rdi, REG_RDI(%rsp) + movq %rsi, REG_RSI(%rsp) + movq %rdx, REG_RDX(%rsp) + movq %r10, REG_R10(%rsp) + movq %r8, REG_R8(%rsp) + movq %r9, REG_R9(%rsp) + + movq %rcx, REG_RCX(%rsp) + movq %r11, REG_R11(%rsp) + + movq %gs:(16), %rax + movq %rcx, REG_USER_IP(%rsp) + movq %rax, REG_USER_SP(%rsp) + movq %r11, REG_USER_FLAGS(%rsp) + + movq %rbx, REG_RBX(%rsp) + movq %rbp, REG_RBP(%rsp) + movq %r12, REG_R12(%rsp) + movq %r13, REG_R13(%rsp) + movq %r14, REG_R14(%rsp) + movq %r15, REG_R15(%rsp) +.endm + +.macro SYSCALL_RESTORE_STATE +.endm + +.section .text +__x86_64_syscall_vector: + // On entry: + // %rcx - userspace %rip + // %r11 - rflags + + // syscall can only be issued from the userspace, so swapgs + swapgs + + // Store user RSP + // TODO: eliminate magic %gs-relative addresses + mov %rsp, %gs:(16) + // Load the task's RSP0 from TSS + mov %gs:(8), %rsp + mov 4(%rsp), %rsp + + SYSCALL_SAVE_STATE + + mov $0x10, %ax + mov %ax, %ss + mov %ax, %ds + mov %ax, %es + + // Save %rsp to %rdi (arg 1) and %rbp + mov %rsp, %rdi + mov %rsp, %rbp + + // Force correct stack alignment + orq $0xF, %rsp + xorq $0xF, %rsp + + call {syscall_handler} + + // Restore the stack + mov %rbp, %rsp + +.restore_state: + // Restore non-clobbered state + movq REG_RAX(%rsp), %rax + movq REG_RDI(%rsp), %rdi + movq REG_RSI(%rsp), %rsi + movq REG_RDX(%rsp), %rdx + movq REG_R10(%rsp), %r10 + movq REG_R8(%rsp), %r8 + movq REG_R9(%rsp), %r9 + + movq REG_RBX(%rsp), %rbx + movq REG_RBP(%rsp), %rbp + movq REG_R12(%rsp), %r12 + movq REG_R13(%rsp), %r13 + movq REG_R14(%rsp), %r14 + movq REG_R15(%rsp), %r15 + + movq REG_RCX(%rsp), %rcx + movq REG_USER_IP(%rsp), %r11 + + // TODO do I also need to check if rflags changed? + // If user_ip != rcx (syscall user ip), then use iretq instead + // Most likely the frame was loaded from a signal entry/return + cmpq %rcx, %r11 + jne .return_via_iret + + // Still not restored: + // %rcx (user ip), %r11 (user flags), user sp + +.return_via_sysret: + // Regular syscall return + movq REG_USER_SP(%rsp), %rcx + movq %rcx, %gs:(16) + + movq REG_USER_IP(%rsp), %rcx + movq REG_USER_FLAGS(%rsp), %r11 + + addq $SYSCALL_STATE_SIZE, %rsp + + // %rcx and %r11 now contain the expected values + // Restore user RSP + mov %gs:(16), %rsp + + swapgs + sysretq + +.return_via_iret: + .set IRET_FRAME_SIZE, 5 * 8 + .set IRET_SS, 4 * 8 + .set IRET_RSP, 3 * 8 + .set IRET_RFLAGS, 2 * 8 + .set IRET_CS, 1 * 8 + .set IRET_RIP, 0 * 8 + + // Need to restore %rcx, %r11, user ip, user sp, user flags + // Syscall frame is ordered in a way to prevent iret frame from + // overwriting any context that was not restored at this moment + + // r15, r14, r13, r12, and rbp will be overwritten, but they're + // already restored to their registers by now + + // Restore %r11 and only use %rcx + movq REG_R11(%rsp), %r11 + + addq $SYSCALL_STATE_SIZE, %rsp + + subq $IRET_FRAME_SIZE, %rsp + + // SS:RSP + movq (IRET_FRAME_SIZE + REG_IRET_USER_SP)(%rsp), %rcx + movq $0x1B, IRET_SS(%rsp) + movq %rcx, IRET_RSP(%rsp) + + // RFLAGS + movq (IRET_FRAME_SIZE + REG_IRET_USER_FLAGS)(%rsp), %rcx + movq %rcx, IRET_RFLAGS(%rsp) + + // CS:RIP + movq (IRET_FRAME_SIZE + REG_IRET_USER_IP)(%rsp), %rcx + movq $0x23, IRET_CS(%rsp) + movq %rcx, IRET_RIP(%rsp) + + // Restore %rcx + movq (IRET_FRAME_SIZE + REG_IRET_RCX)(%rsp), %rcx + + swapgs + iretq diff --git a/kernel/src/arch/x86_64/syscall.rs b/kernel/src/arch/x86_64/syscall.rs new file mode 100644 index 00000000..b1db7314 --- /dev/null +++ b/kernel/src/arch/x86_64/syscall.rs @@ -0,0 +1,87 @@ +//! x86-64 implementation of system call interface + +use core::arch::global_asm; + +use abi::{process::SignalEntryData, SyscallFunction}; +use kernel_arch::task::TaskFrame; +use kernel_arch_x86_64::{ + context::SyscallFrame, + registers::{MSR_IA32_EFER, MSR_IA32_LSTAR, MSR_IA32_SFMASK, MSR_IA32_STAR}, +}; +use libk_thread::thread::Thread; +use tock_registers::interfaces::{ReadWriteable, Writeable}; + +use crate::{ + syscall::raw_syscall_handler, + task::process::{ProcessImpl, ProcessManagerImpl}, +}; + +fn syscall_inner(frame: &mut SyscallFrame) { + if frame.rax == usize::from(SyscallFunction::ExitSignal) as u64 { + unsafe { + handle_signal_exit(frame); + return; + } + } + if frame.rax == usize::from(SyscallFunction::Fork) as u64 { + unsafe { + ProcessImpl::raw_fork(frame); + return; + } + } + + let result = raw_syscall_handler(frame.rax, &frame.args); + + frame.rax = result; +} + +extern "C" fn __x86_64_syscall_handler(frame: *mut SyscallFrame) { + let frame = unsafe { &mut *frame }; + syscall_inner(frame); + + let thread = Thread::current(); + unsafe { + thread.handle_pending_signals::(frame); + } +} + +unsafe fn handle_signal_exit(frame: &mut F) { + // TODO validate the argument + let saved_data = &*(frame.argument() as *const SignalEntryData); + infoln!( + "Handling signal exit to ip={:#x}, sp={:#x}", + saved_data.frame.user_ip, + saved_data.frame.user_sp + ); + + frame.restore(&saved_data.frame); +} + +/// Initializes system call instruction support for the current CPU. +/// +/// # Safety +/// +/// Only meant to be called once per each CPU during their init. +pub unsafe fn init_syscall() { + extern "C" { + fn __x86_64_syscall_vector(); + } + + // Initialize syscall vector + MSR_IA32_LSTAR.set(__x86_64_syscall_vector as u64); + MSR_IA32_SFMASK.write(MSR_IA32_SFMASK::IF::Masked); + MSR_IA32_STAR.write( + // On sysret, CS = val + 16 (0x23), SS = val + 8 (0x1B) + MSR_IA32_STAR::SYSRET_CS_SS.val(0x1B - 8) + + // On syscall, CS = val (0x08), SS = val + 8 (0x10) + MSR_IA32_STAR::SYSCALL_CS_SS.val(0x08), + ); + + MSR_IA32_EFER.modify(MSR_IA32_EFER::SCE::Enable); +} + +global_asm!( + include_str!("syscall.S"), + syscall_handler = sym __x86_64_syscall_handler, + options(att_syntax) +); diff --git a/kernel/src/arch/x86_64/vectors.S b/kernel/src/arch/x86_64/vectors.S new file mode 100644 index 00000000..8afd3942 --- /dev/null +++ b/kernel/src/arch/x86_64/vectors.S @@ -0,0 +1,173 @@ +// vi: ft=asm : +// 15 general-purpose registers +.set PT_REGS_SIZE, 15 * 8 + +.macro EXC_SAVE_STATE + sub $PT_REGS_SIZE, %rsp + mov %rax, 0(%rsp) + mov %rcx, 8(%rsp) + mov %rdx, 16(%rsp) + mov %rbx, 24(%rsp) + mov %rsi, 32(%rsp) + mov %rdi, 40(%rsp) + mov %rbp, 48(%rsp) + + mov %r8, 56(%rsp) + mov %r9, 64(%rsp) + mov %r10, 72(%rsp) + mov %r11, 80(%rsp) + mov %r12, 88(%rsp) + mov %r13, 96(%rsp) + mov %r14, 104(%rsp) + mov %r15, 112(%rsp) +.endm + +.macro EXC_RESTORE_STATE + mov 0(%rsp), %rax + mov 8(%rsp), %rcx + mov 16(%rsp), %rdx + mov 24(%rsp), %rbx + mov 32(%rsp), %rsi + mov 40(%rsp), %rdi + mov 48(%rsp), %rbp + + mov 56(%rsp), %r8 + mov 64(%rsp), %r9 + mov 72(%rsp), %r10 + mov 80(%rsp), %r11 + mov 88(%rsp), %r12 + mov 96(%rsp), %r13 + mov 104(%rsp), %r14 + mov 112(%rsp), %r15 + + addq $PT_REGS_SIZE, %rsp +.endm + +.macro ISR_NERR, n +__x86_64_exc_\n: + cli + pushq $0 + pushq $\n + jmp __x86_64_exc_common +.endm + +.macro ISR_YERR, n +__x86_64_exc_\n: + cli + pushq $\n + jmp __x86_64_exc_common +.endm + +.macro EXC_SWAPGS_IF_NEEDED, cs_off + cmpq $0x08, \cs_off(%rsp) + je 1f + swapgs +1: +.endm + +.global __x86_64_exception_vectors + +.section .text +__x86_64_exc_common: + // %rsp + 0: error number + // %rsp + 8: error code + // %rsp + 16: %rip + // %rsp + 24: %cs + + EXC_SWAPGS_IF_NEEDED 24 + + EXC_SAVE_STATE + + // Save current stack into %rdi (arg 1) + %rbp + movq %rsp, %rdi + movq %rsp, %rbp + + // Force correct stack alignment + orq $0xF, %rsp + xorq $0xF, %rsp + + call {exception_handler} + + // Restore the stack + movq %rbp, %rsp + + EXC_RESTORE_STATE + + // Remove error code and number from the stack + addq $16, %rsp + + EXC_SWAPGS_IF_NEEDED 8 + + iretq + +ISR_NERR 0 +ISR_NERR 1 +ISR_NERR 2 +ISR_NERR 3 +ISR_NERR 4 +ISR_NERR 5 +ISR_NERR 6 +ISR_NERR 7 +ISR_YERR 8 +ISR_NERR 9 +ISR_YERR 10 +ISR_YERR 11 +ISR_YERR 12 +ISR_YERR 13 +ISR_YERR 14 +ISR_NERR 15 +ISR_NERR 16 +ISR_YERR 17 +ISR_NERR 18 +ISR_NERR 19 +ISR_NERR 20 +ISR_NERR 21 +ISR_NERR 22 +ISR_NERR 23 +ISR_NERR 24 +ISR_NERR 25 +ISR_NERR 26 +ISR_NERR 27 +ISR_NERR 28 +ISR_NERR 29 +ISR_YERR 30 +ISR_NERR 31 + +.section .rodata +.global __x86_64_exception_vectors +.p2align 4 +__x86_64_exception_vectors: + .quad __x86_64_exc_0 + .quad __x86_64_exc_1 + .quad __x86_64_exc_2 + .quad __x86_64_exc_3 + .quad __x86_64_exc_4 + .quad __x86_64_exc_5 + .quad __x86_64_exc_6 + .quad __x86_64_exc_7 + .quad __x86_64_exc_8 + .quad __x86_64_exc_9 + .quad __x86_64_exc_10 + .quad __x86_64_exc_11 + .quad __x86_64_exc_12 + .quad __x86_64_exc_13 + .quad __x86_64_exc_14 + .quad __x86_64_exc_15 + .quad __x86_64_exc_16 + .quad __x86_64_exc_17 + .quad __x86_64_exc_18 + .quad __x86_64_exc_19 + .quad __x86_64_exc_20 + .quad __x86_64_exc_21 + .quad __x86_64_exc_22 + .quad __x86_64_exc_23 + .quad __x86_64_exc_24 + .quad __x86_64_exc_25 + .quad __x86_64_exc_26 + .quad __x86_64_exc_27 + .quad __x86_64_exc_28 + .quad __x86_64_exc_29 + .quad __x86_64_exc_30 + .quad __x86_64_exc_31 + +.section .text diff --git a/kernel/src/debug.rs b/kernel/src/debug.rs new file mode 100644 index 00000000..5c28f4b7 --- /dev/null +++ b/kernel/src/debug.rs @@ -0,0 +1,283 @@ +//! Utilities for debug information logging +use core::fmt::{self, Arguments}; + +use abi::error::Error; +use libk_thread::process::Process; +use libk_util::{ring::RingBuffer, sync::IrqSafeSpinlock, StaticVector}; + +const MAX_DEBUG_SINKS: usize = 4; +const RING_LOGGER_CAPACITY: usize = 65536; + +struct SimpleLogger; + +struct RingLoggerInner { + data: RingBuffer, +} + +/// Logger sink which collects output to an internal ring buffer +pub struct RingLoggerSink { + inner: IrqSafeSpinlock, +} + +/// Defines the severity of the message +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord)] +pub enum LogLevel { + /// Very verbose low-level debugging information + Trace, + /// Debugging and verbose information + Debug, + /// General information about transitions in the system state + Info, + /// Non-critical abnormalities or notices + Warning, + /// Failures of non-essential components + Error, + /// Irrecoverable errors which result in kernel panic + Fatal, +} + +/// Generic interface for debug output +pub trait DebugSink { + /// Sends a single byte to the output + fn putc(&self, c: u8) -> Result<(), Error>; + + /// Sends a string of bytes to the output + fn puts(&self, s: &str) -> Result<(), Error> { + for &byte in s.as_bytes() { + self.putc(byte)?; + } + Ok(()) + } + + /// Returns `true` if the device supports vt100-like control sequences + fn supports_control_sequences(&self) -> bool { + false + } +} + +macro_rules! log_print_raw { + ($level:expr, $($args:tt)+) => { + $crate::debug::debug_internal(format_args!($($args)+), $level) + }; +} + +macro_rules! log_print { + ($level:expr, $($args:tt)+) => { + log_print_raw!($level, "cpu{}:{}:{}: {}", libk::cpu_index(), file!(), line!(), format_args!($($args)+)) + }; +} + +macro_rules! debug_tpl { + ($d:tt $name:ident, $nameln:ident, $level:ident) => { + #[allow(unused_macros)] + /// Prints the message to the log + macro_rules! $name { + ($d($d args:tt)+) => (log_print!($crate::debug::LogLevel::$level, $d($d args)+)); + } + + /// Prints the message to the log, terminated by a newline character + #[allow(unused_macros)] + macro_rules! $nameln { + () => { + $name!("\n") + }; + ($d($d args:tt)+) => ($name!("{}\n", format_args!($d($d args)+))); + } + }; +} + +debug_tpl!($ debug, debugln, Debug); +debug_tpl!($ info, infoln, Info); +debug_tpl!($ warn, warnln, Warning); +debug_tpl!($ error, errorln, Error); +debug_tpl!($ fatal, fatalln, Fatal); + +#[derive(Clone, Copy)] +struct DebugSinkWrapper { + inner: &'static dyn DebugSink, + level: LogLevel, +} + +impl log::Log for SimpleLogger { + fn enabled(&self, _metadata: &log::Metadata) -> bool { + true + } + + fn log(&self, record: &log::Record) { + let file = record.file().unwrap_or(""); + let line = record.line().unwrap_or(0); + + match record.level() { + log::Level::Error => { + log_print_raw!(LogLevel::Error, "{}:{}: {}\n", file, line, record.args()) + } + log::Level::Warn => { + log_print_raw!(LogLevel::Warning, "{}:{}: {}\n", file, line, record.args()) + } // warnln!("{}", record.args()), + log::Level::Info => { + log_print_raw!(LogLevel::Info, "{}:{}: {}\n", file, line, record.args()) + } + log::Level::Debug => { + log_print_raw!(LogLevel::Debug, "{}:{}: {}\n", file, line, record.args()) + } + log::Level::Trace => { + log_print_raw!(LogLevel::Trace, "{}:{}: {}\n", file, line, record.args()) + } + } + } + + fn flush(&self) {} +} + +impl LogLevel { + fn log_prefix(self) -> &'static str { + match self { + LogLevel::Trace => "", + LogLevel::Debug => "", + LogLevel::Info => "\x1b[36m\x1b[1m", + LogLevel::Warning => "\x1b[33m\x1b[1m", + LogLevel::Error => "\x1b[31m\x1b[1m", + LogLevel::Fatal => "\x1b[38;2;255;0;0m\x1b[1m", + } + } + + fn log_suffix(self) -> &'static str { + match self { + LogLevel::Trace => "", + LogLevel::Debug => "", + LogLevel::Info => "\x1b[0m", + LogLevel::Warning => "\x1b[0m", + LogLevel::Error => "\x1b[0m", + LogLevel::Fatal => "\x1b[0m", + } + } +} + +impl fmt::Write for DebugSinkWrapper { + fn write_str(&mut self, s: &str) -> fmt::Result { + self.inner.puts(s).ok(); + Ok(()) + } +} + +impl RingLoggerSink { + const fn new() -> Self { + Self { + inner: IrqSafeSpinlock::new(RingLoggerInner { + data: RingBuffer::with_capacity(RING_LOGGER_CAPACITY), + }), + } + } + + /// Reads data from the sink without blocking and waiting for more to arrive + pub fn read(&self, pos: usize, buffer: &mut [u8]) -> usize { + unsafe { self.inner.lock().data.read_all_static(pos, buffer) } + } + + fn write_fmt(&self, args: fmt::Arguments<'_>) -> fmt::Result { + use fmt::Write; + self.inner.lock().write_fmt(args) + } +} + +impl fmt::Write for RingLoggerInner { + fn write_str(&mut self, s: &str) -> fmt::Result { + for ch in s.bytes() { + self.data.write(ch); + } + Ok(()) + } +} + +static LOGGER: SimpleLogger = SimpleLogger; + +static DEBUG_SINKS: IrqSafeSpinlock> = + IrqSafeSpinlock::new(StaticVector::new()); + +/// See [RingLoggerSink] +pub static RING_LOGGER_SINK: RingLoggerSink = RingLoggerSink::new(); + +/// Prints a hex-dump of a slice, appending a virtual address offset to the output +pub fn hex_dump(level: LogLevel, addr_offset: usize, data: &[u8]) { + const WINDOW_SIZE: usize = 16; + let window_count = (data.len() + WINDOW_SIZE) / WINDOW_SIZE; + + for iw in 0..window_count { + let off = iw * WINDOW_SIZE; + let len = core::cmp::min(data.len() - off, WINDOW_SIZE); + let window = &data[off..off + len]; + + log_print_raw!(level, "{:04X}: ", addr_offset + off); + for i in 0..WINDOW_SIZE { + if i < window.len() { + log_print_raw!(level, "{:02X}", window[i]); + } else { + log_print_raw!(level, " "); + } + + if i % 2 == 1 { + log_print_raw!(level, " "); + } + } + + for &ch in window { + if ch.is_ascii_graphic() || ch == b' ' { + log_print_raw!(level, "{}", ch as char); + } else { + log_print_raw!(level, "."); + } + } + + log_print_raw!(level, "\n"); + } +} + +/// Adds a debugging output sink +pub fn add_sink(sink: &'static dyn DebugSink, level: LogLevel) { + DEBUG_SINKS + .lock() + .push(DebugSinkWrapper { inner: sink, level }); +} + +/// Print a trace message coming from a process +pub fn program_trace(process: &P, message: &str) { + log_print_raw!( + LogLevel::Trace, + "[trace {} {:?}] {}\n", + process.id(), + process.name(), + message + ); +} + +/// Resets the debugging terminal by clearing it +pub fn init() { + log::set_logger(&LOGGER) + .map(|_| log::set_max_level(log::LevelFilter::Trace)) + .ok(); + + log_print_raw!(LogLevel::Info, "\x1b[0m"); +} + +#[doc(hidden)] +pub fn debug_internal(args: Arguments, level: LogLevel) { + use fmt::Write; + + RING_LOGGER_SINK.write_fmt(args).ok(); + + for sink in DEBUG_SINKS.lock().iter_mut() { + if level < sink.level { + continue; + } + + if sink.inner.supports_control_sequences() { + sink.write_str(level.log_prefix()).ok(); + } + + sink.write_fmt(args).ok(); + + if sink.inner.supports_control_sequences() { + sink.write_str(level.log_suffix()).ok(); + } + } +} diff --git a/kernel/src/device/bus/dt_pci.rs b/kernel/src/device/bus/dt_pci.rs new file mode 100644 index 00000000..5cd98231 --- /dev/null +++ b/kernel/src/device/bus/dt_pci.rs @@ -0,0 +1,188 @@ +//! PCI bus device wrapper for device tree + +use alloc::{collections::BTreeMap, vec::Vec}; +use device_api::interrupt::{IrqLevel, IrqOptions, IrqTrigger}; +use device_tree::{ + device_tree_driver, + dt::{self, DevTreeIndexNodeExt, DevTreeIndexPropExt, DevTreeNodeInfo}, +}; +use libk_mm::address::{FromRaw, PhysicalAddress}; +use ygg_driver_pci::{ + device::{PciInterrupt, PciInterruptPin, PciInterruptRoute}, + PciAddress, PciAddressRange, PciBusManager, PciRangeType, +}; + +fn extract_ranges(dt: &DevTreeNodeInfo) -> Vec { + let Some(ranges) = dt::find_prop(&dt.node, "ranges") else { + return Vec::new(); + }; + let pci_address_cells = dt.node.address_cells(); + let pci_size_cells = dt.node.size_cells(); + + let cells_per_range = dt.address_cells + pci_address_cells + pci_size_cells; + + assert_eq!(ranges.len() % cells_per_range, 0); + + let range_count = ranges.len() / (cells_per_range * 4); + + let mut result = Vec::new(); + + for i in 0..range_count { + let ty_bits = ranges.cell1_array_item(i * cells_per_range, 1).unwrap(); + let ty = match (ty_bits >> 24) & 0x3 { + 0 => PciRangeType::Configuration, + 1 => PciRangeType::Io, + 2 => PciRangeType::Memory32, + 3 => PciRangeType::Memory64, + _ => unreachable!(), + }; + let bus_number = (ty_bits >> 16) as u8; + + let pci_base = match pci_address_cells { + 3 => { + let hi = ranges.cell1_array_item(i * cells_per_range + 1, 1).unwrap(); + let lo = ranges.cell1_array_item(i * cells_per_range + 2, 1).unwrap(); + + (hi << 32) | lo + } + _ => unimplemented!(), + }; + + let host_base = PhysicalAddress::from_raw(match dt.address_cells { + 2 => { + let hi = ranges + .cell1_array_item(i * cells_per_range + pci_address_cells, 1) + .unwrap(); + let lo = ranges + .cell1_array_item(i * cells_per_range + pci_address_cells + 1, 1) + .unwrap(); + + (hi << 32) | lo + } + _ => unimplemented!(), + }); + + let size = match pci_size_cells { + 2 => { + let hi = ranges + .cell1_array_item( + i * cells_per_range + pci_address_cells + dt.address_cells, + 1, + ) + .unwrap(); + let lo = ranges + .cell1_array_item( + i * cells_per_range + pci_address_cells + dt.address_cells + 1, + 1, + ) + .unwrap(); + + (hi << 32) | lo + } + _ => unimplemented!(), + } as usize; + + result.push(PciAddressRange { + ty, + bus_number, + host_base, + pci_base, + size, + }); + } + + result +} + +fn extract_interrupt_map(dt: &DevTreeNodeInfo) -> BTreeMap { + // let interrupt_map_mask = devtree::find_prop(&dt.node, "interrupt-map").unwrap(); + let interrupt_map = dt::find_prop(&dt.node, "interrupt-map").unwrap(); + let pci_address_cells = dt.node.address_cells(); + + // TODO replace 3 with interrupt-cells in interrupt-controller + let cells_per_imap = pci_address_cells + /* Pin */ 1 + /* #interrupt-cells in interrupt-controller */ 3 + /* Interrupt Controller Data */ 3; + + assert_eq!(interrupt_map.len() % (4 * cells_per_imap), 0); + + let mut imap = BTreeMap::new(); + + for i in 0..interrupt_map.len() / (4 * cells_per_imap) { + let pci_address_0 = interrupt_map + .cell1_array_item(i * cells_per_imap, 1) + .unwrap(); + + let bus = (pci_address_0 >> 24) as u8; + let device = ((pci_address_0 >> 11) & 0x1F) as u8; + let function = ((pci_address_0 >> 8) & 0x7) as u8; + + let address = PciAddress::for_function(0, bus, device, function); + + let pin = interrupt_map + .cell1_array_item(i * cells_per_imap + pci_address_cells, 1) + .unwrap() as u32; + + let Ok(pin) = PciInterruptPin::try_from(pin) else { + continue; + }; + + let _interrupt_ty = interrupt_map + .cell1_array_item(i * cells_per_imap + pci_address_cells + 4, 1) + .unwrap(); + let interrupt_number = interrupt_map + .cell1_array_item(i * cells_per_imap + pci_address_cells + 5, 1) + .unwrap(); + let interrupt_mode = interrupt_map + .cell1_array_item(i * cells_per_imap + pci_address_cells + 6, 1) + .unwrap(); + + let (trigger, level) = match interrupt_mode { + 0x04 => (IrqTrigger::Level, IrqLevel::ActiveHigh), + _ => todo!(), + }; + + let src = PciInterrupt { address, pin }; + let dst = PciInterruptRoute { + number: interrupt_number as _, + options: IrqOptions { trigger, level }, + }; + + // TODO use phandle for interrupt-controller + // TODO interrupt-controller-specific decoding of idata + // TODO don't ignore interrupt_ty, don't assume they're all SPIs + imap.insert(src, dst); + } + + imap +} + +device_tree_driver! { + compatible: ["pci-host-ecam-generic"], + probe(dt) => { + let reg = dt::find_prop(&dt.node, "reg")?; + let bus_range = dt::find_prop(&dt.node, "bus-range")?; + + let (cfg_space_base, _) = reg + .cell2_array_item(0, dt.address_cells, dt.size_cells) + .unwrap(); + let cfg_space_base = PhysicalAddress::from_raw(cfg_space_base); + + let bus_start = bus_range.cell1_array_item(0, 1)? as u8; + let bus_end = bus_range.cell1_array_item(1, 1)? as u8; + + let ranges = extract_ranges(dt); + let interrupt_map = extract_interrupt_map(dt); + + if ranges.is_empty() { + return None; + } + + PciBusManager::add_segment_from_device_tree( + cfg_space_base, + bus_start..bus_end, + ranges, + interrupt_map + ).ok(); + + None + } +} diff --git a/kernel/src/device/bus/mod.rs b/kernel/src/device/bus/mod.rs new file mode 100644 index 00000000..a71aedfb --- /dev/null +++ b/kernel/src/device/bus/mod.rs @@ -0,0 +1,6 @@ +//! Bus devices + +#[cfg(target_arch = "aarch64")] +pub mod dt_pci; +#[cfg(target_arch = "aarch64")] +pub mod simple_bus; diff --git a/kernel/src/device/bus/simple_bus.rs b/kernel/src/device/bus/simple_bus.rs new file mode 100644 index 00000000..ce045bbf --- /dev/null +++ b/kernel/src/device/bus/simple_bus.rs @@ -0,0 +1,29 @@ +//! Simple "passthrough" bus device + +use device_tree::{device_tree_driver, dt::DevTreeIndexNodeExt}; + +use crate::device; + +device_tree_driver! { + compatible: ["simple-bus"], + probe(dt) => { + let address_cells = dt.node.address_cells(); + let size_cells = dt.node.size_cells(); + + let nodes = dt.node.children(); + + // Iterate devices on the bus + device_tree::driver::enumerate_dt(address_cells, size_cells, nodes, |_, probe| { + if let Some((device, _)) = device_tree::driver::probe_dt_node(&probe, device::register_device) { + unsafe { + device.init()?; + } + } + + Ok(()) + }).ok(); + + // Don't yield any devices + None + } +} diff --git a/kernel/src/device/display/console.rs b/kernel/src/device/display/console.rs new file mode 100644 index 00000000..e5f5fec5 --- /dev/null +++ b/kernel/src/device/display/console.rs @@ -0,0 +1,559 @@ +//! Console device interfaces + +use core::time::Duration; + +use abi::{error::Error, primitive_enum}; +use alloc::{vec, vec::Vec}; +use bitflags::bitflags; +use libk::runtime; +use libk_util::{sync::IrqSafeSpinlock, StaticVector}; + +use crate::debug::DebugSink; + +const CONSOLE_ROW_LEN: usize = 80; +const MAX_CSI_ARGS: usize = 8; + +const DEFAULT_FG_COLOR: ColorAttribute = ColorAttribute::White; +const DEFAULT_BG_COLOR: ColorAttribute = ColorAttribute::Blue; + +primitive_enum! { + #[allow(missing_docs)] + #[doc = "Color attribute of a console character"] + pub enum ColorAttribute: u8 { + Black = 0, + Red = 1, + Green = 2, + Yellow = 3, + Blue = 4, + Magenta = 5, + Cyan = 6, + White = 7, + } +} + +bitflags! { + #[doc = "Extra attributes of a console character"] + #[derive(Clone, Copy)] + pub struct Attributes: u8 { + #[allow(missing_docs)] + const BOLD = 1 << 0; + } +} + +impl ColorAttribute { + fn from_vt100(val: u8) -> Self { + match val { + 0..=7 => Self::try_from(val).unwrap(), + _ => ColorAttribute::Red, + } + } + + /// Converts the attribute to RGBA representation + pub fn as_rgba(&self, bold: bool) -> u32 { + let color = match self { + Self::Black => 0x000000, + Self::Red => 0x7F0000, + Self::Green => 0x007F00, + Self::Yellow => 0x7F7F00, + Self::Blue => 0x00007F, + Self::Magenta => 0x7F007F, + Self::Cyan => 0x007F7F, + Self::White => 0x7F7F7F, + }; + if bold { + color * 2 + } else { + color + } + } +} + +/// Represents a single character with its attributes +#[derive(Clone, Copy)] +#[repr(C)] +pub struct ConsoleChar { + attributes: u16, + char: u8, + _pad: u8, +} + +/// Represents a single line in the console buffer +#[derive(Clone, Copy)] +pub struct ConsoleRow { + dirty: u8, + chars: [ConsoleChar; CONSOLE_ROW_LEN], +} + +/// Buffer that contains text rows of the console with their attributes + tracks dirty rows which +/// need to be flushed to the display +pub struct ConsoleBuffer { + rows: Vec, + height: u32, +} + +enum EscapeState { + Normal, + Escape, + Csi, +} + +/// Common state for console output devices +pub struct ConsoleState { + /// Current cursor row + pub cursor_row: u32, + /// Current cursor column + pub cursor_col: u32, + /// Current foreground color + pub fg_color: ColorAttribute, + /// Current background color + pub bg_color: ColorAttribute, + /// Current set of attributes + pub attributes: Attributes, + + esc_args: StaticVector, + esc_state: EscapeState, + + /// Row buffer + pub buffer: ConsoleBuffer, +} + +/// Helper type to iterate over dirty rows in the buffer +pub struct RowIter<'a> { + buffer: &'a mut ConsoleBuffer, + index: u32, +} + +/// Interface to implement buffered console semantics on an abstract console output device +pub trait DisplayConsole { + /// Returns the state lock + fn state(&self) -> &IrqSafeSpinlock; + + /// Flushes the data from console buffer to the display + fn flush(&self, state: &mut ConsoleState); + + /// Writes characters to the backing buffer + handles special control sequences + fn write_char(&self, c: u8) { + let mut state = self.state().lock(); + if state.putc(c) { + self.flush(&mut state); + } + } + + /// Returns the dimensions of the console in chars: (rows, columns) + fn text_dimensions(&self) -> (usize, usize) { + let state = self.state().lock(); + (state.buffer.height as _, CONSOLE_ROW_LEN as _) + } +} + +impl ConsoleChar { + /// Empty character + pub const BLANK: Self = Self { + attributes: 0, + char: 0, + _pad: 0, + }; + + /// Constructs a console character from a char and its attributes + #[inline(always)] + pub fn from_parts( + char: u8, + fg: ColorAttribute, + bg: ColorAttribute, + attributes: Attributes, + ) -> Self { + let attributes = ((attributes.bits() as u16) << 8) + | ((u8::from(bg) as u16) << 4) + | (u8::from(fg) as u16); + Self { + attributes, + char, + _pad: 0, + } + } + + /// Returns the attributes of the character + #[inline(always)] + pub fn attributes(self) -> (ColorAttribute, ColorAttribute, Attributes) { + let fg = + ColorAttribute::try_from((self.attributes & 0xF) as u8).unwrap_or(DEFAULT_FG_COLOR); + let bg = ColorAttribute::try_from(((self.attributes >> 4) & 0xF) as u8) + .unwrap_or(DEFAULT_BG_COLOR); + let attributes = + Attributes::from_bits((self.attributes >> 8) as u8).unwrap_or(Attributes::empty()); + + (fg, bg, attributes) + } + + /// Returns the character data of this [ConsoleChar] + #[inline(always)] + pub const fn character(self) -> u8 { + self.char + } +} + +impl<'a> RowIter<'a> { + /// Returns the next dirty row + pub fn next_dirty(&mut self) -> Option<(u32, &[ConsoleChar])> { + loop { + if self.index == self.buffer.height { + return None; + } + + if !self.buffer.rows[self.index as usize].clear_dirty() { + self.index += 1; + continue; + } + + let row_index = self.index; + let row = &self.buffer.rows[self.index as usize]; + + self.index += 1; + + return Some((row_index, &row.chars)); + } + } +} + +impl ConsoleRow { + /// Constructs a row filled with blank characters + pub const fn zeroed() -> Self { + Self { + dirty: 1, + chars: [ConsoleChar { + attributes: ((DEFAULT_BG_COLOR as u8) as u16) << 4, + char: b' ', + _pad: 0, + }; CONSOLE_ROW_LEN], + } + } + + /// Returns `true` if the row's dirty flag is set + #[inline] + pub const fn is_dirty(&self) -> bool { + self.dirty != 0 + } + + /// Clears "dirty" flag for the row + #[inline] + pub fn clear_dirty(&mut self) -> bool { + let old = self.dirty; + self.dirty = 0; + old == 1 + } + + /// Clears the console row with blank characters + pub fn clear(&mut self, bg: ColorAttribute) { + self.dirty = 1; + self.chars + .fill(ConsoleChar::from_parts(b' ', bg, bg, Attributes::empty())); + } +} + +impl ConsoleBuffer { + /// Constructs a fixed-size console buffer + pub fn new(height: u32) -> Result { + // let size = size_of::() * (height as usize); + let mut rows = vec![ConsoleRow::zeroed(); height as usize]; + + for row in rows.iter_mut() { + row.clear(DEFAULT_BG_COLOR); + } + + Ok(Self { rows, height }) + // let size = size_of::() * (height as usize); + // let page_count = (size + 0xFFF) / 0x1000; + // let pages = phys::alloc_pages_contiguous(page_count, PageUsage::Used)?; + + // let rows = unsafe { + // core::slice::from_raw_parts_mut(pages.virtualize() as *mut ConsoleRow, height as usize) + // }; + + // for row in rows.iter_mut() { + // row.clear(DEFAULT_BG_COLOR); + // } + + // Ok(Self { rows, height }) + } + + #[inline(never)] + fn set_char(&mut self, row: u32, col: u32, c: ConsoleChar) { + self.rows[row as usize].dirty = 1; + self.rows[row as usize].chars[col as usize] = c; + } + + #[inline(never)] + fn set_dirty(&mut self, row: u32) { + self.rows[row as usize].dirty = 1; + } + + /// Returns an iterator over dirty rows, while clearing dirty flag for them + pub fn flush_rows(&mut self) -> RowIter { + RowIter { + buffer: self, + index: 0, + } + } + + fn clear(&mut self, bg: ColorAttribute) { + for row in self.rows.iter_mut() { + row.clear(bg); + } + } + + fn clear_row(&mut self, row: u32, bg: ColorAttribute) { + self.rows[row as usize].dirty = 1; + self.rows[row as usize].clear(bg); + } + + fn erase_in_row(&mut self, row: u32, start: usize, bg: ColorAttribute) { + self.rows[row as usize].dirty = 1; + self.rows[row as usize].chars[start..].fill(ConsoleChar::from_parts( + b' ', + DEFAULT_FG_COLOR, + bg, + Attributes::empty(), + )); + } + + fn scroll_once(&mut self, bg: ColorAttribute) { + self.rows.copy_within(1.., 0); + self.rows[(self.height - 1) as usize].clear(bg); + + // Mark everything dirty + self.rows.iter_mut().for_each(|row| { + row.dirty = 1; + }); + } +} + +impl ConsoleState { + /// Constructs a new console state with given buffer + pub fn new(buffer: ConsoleBuffer) -> Self { + Self { + cursor_row: 0, + cursor_col: 0, + fg_color: DEFAULT_FG_COLOR, + bg_color: DEFAULT_BG_COLOR, + attributes: Attributes::empty(), + + esc_args: StaticVector::new(), + esc_state: EscapeState::Normal, + + buffer, + } + } + + fn putc_normal(&mut self, c: u8) -> bool { + let mut flush = false; + + match c { + c if c >= 127 => { + self.buffer.set_char( + self.cursor_row, + self.cursor_col, + ConsoleChar::from_parts( + b'?', + self.fg_color, + ColorAttribute::Red, + self.attributes, + ), + ); + + self.cursor_col += 1; + } + b'\x1b' => { + self.esc_state = EscapeState::Escape; + return false; + } + b'\r' => { + self.cursor_col = 0; + } + b'\n' => { + self.cursor_row += 1; + self.cursor_col = 0; + flush = true; + } + _ => { + self.buffer.set_char( + self.cursor_row, + self.cursor_col, + ConsoleChar::from_parts(c, self.fg_color, self.bg_color, self.attributes), + ); + + self.cursor_col += 1; + } + } + + if self.cursor_col == CONSOLE_ROW_LEN as u32 { + self.cursor_col = 0; + self.cursor_row += 1; + } + + if self.cursor_row == self.buffer.height { + self.buffer.scroll_once(self.bg_color); + self.cursor_row = self.buffer.height - 1; + flush = true; + } + + flush + } + + fn handle_csi(&mut self, c: u8) -> bool { + match c { + // Move back one character + b'D' => { + if self.cursor_col > 0 { + self.cursor_col -= 1; + } + } + // Manipulate display attributes + b'm' => { + if let Some(arg) = self.esc_args.first() { + match arg { + // Reset + 0 => { + self.fg_color = DEFAULT_FG_COLOR; + self.bg_color = DEFAULT_BG_COLOR; + self.attributes = Attributes::empty(); + } + // Bold + 1 => { + self.attributes |= Attributes::BOLD; + } + // Foreground colors + 30..=39 => { + let vt_color = self.esc_args[0] % 10; + if vt_color == 9 { + self.fg_color = DEFAULT_FG_COLOR; + } else { + self.fg_color = ColorAttribute::from_vt100(vt_color as u8); + } + } + // Background colors + 40..=49 => { + let vt_color = self.esc_args[0] % 10; + if vt_color == 9 { + self.bg_color = DEFAULT_BG_COLOR; + } else { + self.bg_color = ColorAttribute::from_vt100(vt_color as u8); + } + } + _ => (), + } + } + } + // Move cursor to position + b'f' => { + let row = self.esc_args[0].clamp(1, self.buffer.height) - 1; + let col = self.esc_args[1].clamp(1, CONSOLE_ROW_LEN as u32) - 1; + + self.buffer.set_dirty(row); + + self.cursor_row = row; + self.cursor_col = col; + } + // Clear rows/columns/screen + b'J' => match self.esc_args[0] { + // Erase lines down + 0 => (), + // Erase lines up + 1 => (), + // Erase all + 2 => { + self.buffer.clear(self.bg_color); + } + _ => (), + }, + // Erase in Line + b'K' => match self.esc_args[0] { + // Erase to Right + 0 => { + self.buffer + .erase_in_row(self.cursor_row, self.cursor_col as _, self.bg_color); + } + // Erase All + 2 => { + self.buffer.clear_row(self.cursor_row, self.bg_color); + } + _ => (), + }, + _ => (), + } + + self.esc_state = EscapeState::Normal; + false + } + + fn handle_csi_byte(&mut self, c: u8) -> bool { + match c { + b'0'..=b'9' => { + let arg = self.esc_args.last_mut().unwrap(); + *arg *= 10; + *arg += (c - b'0') as u32; + false + } + b';' => { + self.esc_args.push(0); + false + } + _ => self.handle_csi(c), + } + } + + fn putc(&mut self, c: u8) -> bool { + match self.esc_state { + EscapeState::Normal => self.putc_normal(c), + EscapeState::Escape => match c { + b'[' => { + self.esc_state = EscapeState::Csi; + self.esc_args.clear(); + self.esc_args.push(0); + false + } + _ => { + self.esc_state = EscapeState::Normal; + self.esc_args.clear(); + false + } + }, + EscapeState::Csi => self.handle_csi_byte(c), + } + } +} + +impl DebugSink for dyn DisplayConsole { + fn putc(&self, c: u8) -> Result<(), Error> { + self.write_char(c); + Ok(()) + } + + fn supports_control_sequences(&self) -> bool { + true + } +} + +static CONSOLES: IrqSafeSpinlock> = + IrqSafeSpinlock::new(Vec::new()); + +/// Adds a console device to a auto-flush list +pub fn add_console_autoflush(console: &'static dyn DisplayConsole) { + CONSOLES.lock().push(console); +} + +/// Flushes console buffers to their displays +pub fn flush_consoles() { + for console in CONSOLES.lock().iter() { + let mut state = console.state().lock(); + console.flush(&mut state); + } +} + +/// Periodically flushes data from console buffers onto their displays +pub async fn update_consoles_task() { + loop { + flush_consoles(); + + runtime::sleep(Duration::from_millis(20)).await; + } +} diff --git a/kernel/src/device/display/fb_console.rs b/kernel/src/device/display/fb_console.rs new file mode 100644 index 00000000..50ad2fab --- /dev/null +++ b/kernel/src/device/display/fb_console.rs @@ -0,0 +1,198 @@ +//! Framebuffer console driver + +use abi::error::Error; +use libk_util::sync::IrqSafeSpinlock; + +use crate::debug::DebugSink; + +use super::{ + console::{Attributes, ConsoleBuffer, ConsoleState, DisplayConsole}, + font::PcScreenFont, + linear_fb::LinearFramebuffer, + DisplayDevice, +}; + +struct Inner { + framebuffer: &'static LinearFramebuffer, + font: PcScreenFont<'static>, + char_width: u32, + char_height: u32, + width: u32, + height: u32, + cursor_row: u32, + cursor_col: u32, +} + +struct DrawGlyph { + sx: u32, + sy: u32, + c: u8, + fg: u32, + bg: u32, + bytes_per_line: usize, +} + +/// Framebuffer console device wrapper +pub struct FramebufferConsole { + inner: IrqSafeSpinlock, + state: IrqSafeSpinlock, +} + +impl DebugSink for FramebufferConsole { + fn putc(&self, c: u8) -> Result<(), Error> { + self.write_char(c); + Ok(()) + } + + fn supports_control_sequences(&self) -> bool { + true + } +} + +impl DisplayConsole for FramebufferConsole { + fn state(&self) -> &IrqSafeSpinlock { + &self.state + } + + fn flush(&self, state: &mut ConsoleState) { + let mut inner = self.inner.lock(); + let font = inner.font; + let cw = inner.char_width; + let ch = inner.char_height; + + let bytes_per_line = (font.width() as usize + 7) / 8; + + let mut iter = state.buffer.flush_rows(); + + let old_cursor_col = inner.cursor_col; + let old_cursor_row = inner.cursor_row; + // New cursor + let cursor_col = state.cursor_col; + let cursor_row = state.cursor_row; + + inner.fill_rect( + old_cursor_col * cw, + old_cursor_row * ch, + cw, + ch, + state.bg_color.as_rgba(false), + ); + + while let Some((row_idx, row)) = iter.next_dirty() { + if row_idx >= inner.height { + break; + } + + for (col_idx, &chr) in row.iter().take(inner.width as _).enumerate() { + let glyph = chr.character(); + let (fg, bg, attr) = chr.attributes(); + + let mut fg = fg.as_rgba(attr.contains(Attributes::BOLD)); + let mut bg = bg.as_rgba(false); + + if row_idx == cursor_row && col_idx == cursor_col as usize { + core::mem::swap(&mut fg, &mut bg); + } + inner.draw_glyph( + font, + DrawGlyph { + sx: (col_idx as u32) * cw, + sy: row_idx * ch, + c: glyph, + fg, + bg, + bytes_per_line, + }, + ); + } + } + + // Place cursor + inner.fill_rect( + cursor_col * cw, + cursor_row * ch, + cw, + ch, + state.fg_color.as_rgba(false), + ); + + inner.cursor_col = cursor_col; + inner.cursor_row = cursor_row; + } +} + +impl FramebufferConsole { + /// Constructs an instance of console from its framebuffer reference + pub fn from_framebuffer( + framebuffer: &'static LinearFramebuffer, + font: Option>, + ) -> Result { + let font = font.unwrap_or_default(); + let char_width = font.width(); + let char_height = font.height(); + let dim = framebuffer.dimensions(); + let buffer = ConsoleBuffer::new(dim.height / char_height)?; + + let inner = Inner { + framebuffer, + font, + width: dim.width / char_width, + height: dim.height / char_height, + char_width, + char_height, + cursor_row: 0, + cursor_col: 0, + }; + + Ok(Self { + inner: IrqSafeSpinlock::new(inner), + state: IrqSafeSpinlock::new(ConsoleState::new(buffer)), + }) + } +} + +impl Inner { + #[optimize(speed)] + fn draw_glyph(&mut self, font: PcScreenFont<'static>, g: DrawGlyph) { + let Some(mut fb) = (unsafe { self.framebuffer.lock() }) else { + return; + }; + + let mut c = g.c as u32; + if c >= font.len() { + c = b'?' as u32; + } + + let mut glyph = font.raw_glyph_data(c); + + let mut y = 0; + + while y < font.height() { + let mut mask = 1 << (font.width() - 1); + let mut x = 0; + + while x < font.width() { + let v = if glyph[0] & mask != 0 { g.fg } else { g.bg }; + let v = v | 0xFF000000; + fb[g.sy + y][(g.sx + x) as usize] = v; + mask >>= 1; + x += 1; + } + + glyph = &glyph[g.bytes_per_line..]; + y += 1; + } + } + + #[optimize(speed)] + fn fill_rect(&mut self, x: u32, y: u32, w: u32, h: u32, val: u32) { + let Some(mut fb) = (unsafe { self.framebuffer.lock() }) else { + return; + }; + + for i in 0..h { + let row = &mut fb[i + y]; + row[x as usize..(x + w) as usize].fill(val); + } + } +} diff --git a/kernel/src/device/display/font.psfu b/kernel/src/device/display/font.psfu new file mode 100644 index 00000000..e789b771 Binary files /dev/null and b/kernel/src/device/display/font.psfu differ diff --git a/kernel/src/device/display/font.rs b/kernel/src/device/display/font.rs new file mode 100644 index 00000000..f45824a3 --- /dev/null +++ b/kernel/src/device/display/font.rs @@ -0,0 +1,73 @@ +//! Font management and data structures + +use core::mem::size_of; + +use abi::error::Error; +use bytemuck::{Pod, Zeroable}; +use libk::AlignedTo; + +static CONSOLE_FONT: &AlignedTo = &AlignedTo { + align: [], + bytes: *include_bytes!("font.psfu"), +}; + +#[repr(C)] +#[derive(Pod, Zeroable, Clone, Copy)] +struct PsfHeader { + magic: u32, + version: u32, + header_size: u32, + flags: u32, + num_glyph: u32, + bytes_per_glyph: u32, + height: u32, + width: u32, +} + +/// Represents a PSF-format font object +#[derive(Clone, Copy)] +pub struct PcScreenFont<'a> { + header: &'a PsfHeader, + glyph_data: &'a [u8], +} + +impl Default for PcScreenFont<'static> { + fn default() -> Self { + Self::from_bytes(&CONSOLE_FONT.bytes).unwrap() + } +} + +impl<'a> PcScreenFont<'a> { + /// Constructs an instance of [PcScreenFont] from its byte representation + pub fn from_bytes(bytes: &'a [u8]) -> Result { + let header: &PsfHeader = bytemuck::from_bytes(&bytes[..size_of::()]); + let glyph_data = &bytes[header.header_size as usize..]; + + Ok(Self { header, glyph_data }) + } + + /// Returns the character width of the font + #[inline] + pub const fn width(&self) -> u32 { + self.header.width + } + + /// Returns the character height of the font + #[inline] + pub const fn height(&self) -> u32 { + self.header.height + } + + /// Returns the count of glyphs present in the font + #[allow(clippy::len_without_is_empty)] + #[inline] + pub const fn len(&self) -> u32 { + self.header.num_glyph + } + + /// Returns the data slice of a single glyph within the font + #[inline] + pub fn raw_glyph_data(&self, index: u32) -> &[u8] { + &self.glyph_data[(index * self.header.bytes_per_glyph) as usize..] + } +} diff --git a/kernel/src/device/display/linear_fb.rs b/kernel/src/device/display/linear_fb.rs new file mode 100644 index 00000000..6cb50184 --- /dev/null +++ b/kernel/src/device/display/linear_fb.rs @@ -0,0 +1,264 @@ +//! Abstract linear framebuffer device implementation + +use core::{ + ops::{Index, IndexMut}, + task::{Context, Poll}, +}; + +use abi::{error::Error, io::DeviceRequest, process::ProcessId}; +use device_api::Device; +use libk_mm::{ + address::{IntoRaw, PhysicalAddress}, + device::{DeviceMemoryAttributes, DeviceMemoryCaching, RawDeviceMemoryMapping}, + table::{EntryLevel, MapAttributes}, + PageProvider, +}; +use libk_thread::thread::Thread; +use libk_util::sync::IrqSafeSpinlock; +use ygg_driver_block::BlockDevice; + +use crate::arch::L3; + +use super::{DisplayDevice, DisplayDimensions}; + +struct Inner { + base: usize, + stride: usize, + holder: Option, +} + +#[doc(hidden)] +pub struct FramebufferAccess { + dimensions: DisplayDimensions, + base: usize, + stride: usize, +} + +/// Linear framebuffer wrapper +pub struct LinearFramebuffer { + inner: IrqSafeSpinlock, + phys_base: PhysicalAddress, + dimensions: DisplayDimensions, + size: usize, +} + +impl LinearFramebuffer { + /// Constructs a liner framebuffer struct from its components. + /// + /// # Safety + /// + /// Unsafe: the caller must ensure the validity of all the arguments. + pub unsafe fn from_physical_bits( + phys_base: PhysicalAddress, + size: usize, + stride: usize, + width: u32, + height: u32, + ) -> Result { + let base = unsafe { + RawDeviceMemoryMapping::map( + phys_base.into_raw(), + size, + DeviceMemoryAttributes { + caching: DeviceMemoryCaching::Cacheable, + }, + ) + }? + .leak(); + + let inner = Inner { + base, + stride, + holder: None, + }; + + let res = Self { + inner: IrqSafeSpinlock::new(inner), + dimensions: DisplayDimensions { width, height }, + phys_base, + size, + }; + + // Clear the screen + res.lock().unwrap().fill_rows(0, height, 0); + + Ok(res) + } + + /// Temporary function to provide framebuffer access + /// + /// # Safety + /// + /// Unsafe: access is not synchronized + // TODO doesn't actually lock + pub unsafe fn lock(&self) -> Option { + let inner = self.inner.lock(); + + if inner.holder.is_some() { + None + } else { + Some(FramebufferAccess { + dimensions: self.dimensions, + base: inner.base, + stride: inner.stride, + }) + } + } +} + +impl BlockDevice for LinearFramebuffer { + fn size(&self) -> Result { + Ok(self.size as _) + } + + fn poll_read( + &self, + _cx: &mut Context<'_>, + _pos: u64, + _buf: &mut [u8], + ) -> Poll> { + todo!() + } + + fn poll_write( + &self, + _cx: &mut Context<'_>, + _pos: u64, + _buf: &[u8], + ) -> Poll> { + todo!() + } + + fn is_readable(&self) -> bool { + false + } + + fn is_writable(&self) -> bool { + false + } + + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + let thread = Thread::current(); + match req { + DeviceRequest::AcquireDevice => { + self.inner.lock().holder.replace(thread.process_id()); + Ok(()) + } + _ => todo!(), + } + } +} + +impl PageProvider for LinearFramebuffer { + fn get_page(&self, offset: u64) -> Result { + let offset = offset as usize; + if offset + L3::SIZE > self.size { + warnln!( + "Tried to map offset {:#x}, but size is {:#x}", + offset, + self.size + ); + Err(Error::InvalidMemoryOperation) + } else { + let page = self.phys_base.add(offset); + Ok(page) + } + } + + fn release_page(&self, _offset: u64, _phys: PhysicalAddress) -> Result<(), Error> { + Ok(()) + } + + fn clone_page( + &self, + _offset: u64, + _src_phys: PhysicalAddress, + _src_attrs: MapAttributes, + ) -> Result { + todo!() + } +} + +impl Device for LinearFramebuffer { + fn display_name(&self) -> &'static str { + "Linear Framebuffer" + } +} + +impl DisplayDevice for LinearFramebuffer { + fn dimensions(&self) -> DisplayDimensions { + self.dimensions + } +} + +impl FramebufferAccess { + /// Copies `count` rows starting from `src_row` to `dst_row` + #[optimize(speed)] + pub fn copy_rows(&mut self, src_row: u32, dst_row: u32, count: u32) { + use core::ffi::c_void; + extern "C" { + fn memmove(dst: *mut c_void, src: *const c_void, len: usize) -> *mut c_void; + } + + if src_row == dst_row { + return; + } + + let src_end_row = core::cmp::min(self.dimensions.height, src_row + count); + let dst_end_row = core::cmp::min(self.dimensions.height, dst_row + count); + + if dst_end_row <= dst_row || src_end_row <= dst_row { + return; + } + let count = core::cmp::min(src_end_row - src_row, dst_end_row - dst_row) as usize; + + let src_base_addr = self.base + self.stride * src_row as usize; + let dst_base_addr = self.base + self.stride * dst_row as usize; + + unsafe { + memmove( + dst_base_addr as *mut c_void, + src_base_addr as *mut c_void, + self.stride * count, + ); + } + } + + /// Fills the specified number of pixel rows with given pixel value + pub fn fill_rows(&mut self, start_row: u32, count: u32, value: u32) { + use core::ffi::c_void; + extern "C" { + fn memset(s: *mut c_void, c: u32, len: usize) -> *mut c_void; + } + + let end_row = core::cmp::min(self.dimensions.height, start_row + count); + if end_row <= start_row { + return; + } + + let count = (end_row - start_row) as usize; + let base_addr = self.base + self.stride * start_row as usize; + + unsafe { + memset(base_addr as *mut c_void, value, self.stride * count); + } + } +} + +impl Index for FramebufferAccess { + type Output = [u32]; + + fn index(&self, index: u32) -> &Self::Output { + assert!(index < self.dimensions.height); + let row_addr = self.base + self.stride * index as usize; + unsafe { core::slice::from_raw_parts(row_addr as *const u32, self.dimensions.width as _) } + } +} + +impl IndexMut for FramebufferAccess { + fn index_mut(&mut self, index: u32) -> &mut Self::Output { + assert!(index < self.dimensions.height); + let row_addr = self.base + self.stride * index as usize; + unsafe { core::slice::from_raw_parts_mut(row_addr as *mut u32, self.dimensions.width as _) } + } +} diff --git a/kernel/src/device/display/mod.rs b/kernel/src/device/display/mod.rs new file mode 100644 index 00000000..02df0cca --- /dev/null +++ b/kernel/src/device/display/mod.rs @@ -0,0 +1,25 @@ +//! Display device interfaces + +use super::Device; + +pub mod console; +pub mod font; + +#[cfg(feature = "fb_console")] +pub mod fb_console; +pub mod linear_fb; + +/// Resolution of the display device +#[derive(Clone, Copy, Debug)] +pub struct DisplayDimensions { + /// Width of the display in pixels + pub width: u32, + /// Height of the display in pixels + pub height: u32, +} + +/// Abstract display device interface +pub trait DisplayDevice: Device { + /// Returns the dimensions of the display in its current mode + fn dimensions(&self) -> DisplayDimensions; +} diff --git a/kernel/src/device/mod.rs b/kernel/src/device/mod.rs new file mode 100644 index 00000000..6fd64d9e --- /dev/null +++ b/kernel/src/device/mod.rs @@ -0,0 +1,25 @@ +//! Device management and interfaces + +use device_api::{manager::DeviceManager, Device, DeviceId}; +use libk_util::sync::{IrqSafeSpinlock, IrqSafeSpinlockGuard}; + +pub mod bus; + +pub mod display; +pub mod power; +pub mod serial; +pub mod timer; +pub mod tty; + +static DEVICE_MANAGER: IrqSafeSpinlock = IrqSafeSpinlock::new(DeviceManager::new()); + +/// Adds a device to the kernel's device table and returns the ID assigned to it +pub fn register_device(device: &'static dyn Device) -> DeviceId { + debugln!("Register {:?}", device.display_name()); + DEVICE_MANAGER.lock().register(device) +} + +/// Returns a safe reference to the kernel's [DeviceManager] instance +pub fn manager_lock<'a>() -> IrqSafeSpinlockGuard<'a, DeviceManager> { + DEVICE_MANAGER.lock() +} diff --git a/kernel/src/device/platform.rs b/kernel/src/device/platform.rs new file mode 100644 index 00000000..414c99ec --- /dev/null +++ b/kernel/src/device/platform.rs @@ -0,0 +1,20 @@ +//! Hardware platform interface + +use abi::error::Error; + +use crate::arch::CpuMessage; + +use super::{ + interrupt::{ExternalInterruptController, IpiDeliveryTarget}, + timer::TimestampSource, +}; + +/// Platform interface for interacting with a general hardware set +pub trait Platform { + /// Returns the platform's primary timestamp source. + /// + /// # Note + /// + /// May not be initialized at the moment of calling. + fn timestamp_source(&self) -> &dyn TimestampSource; +} diff --git a/kernel/src/device/power/arm_psci.rs b/kernel/src/device/power/arm_psci.rs new file mode 100644 index 00000000..5868b2fd --- /dev/null +++ b/kernel/src/device/power/arm_psci.rs @@ -0,0 +1,93 @@ +//! ARM PSCI driver implementation + +use abi::error::Error; +use alloc::boxed::Box; +use device_api::{CpuBringupDevice, Device, ResetDevice}; +use device_tree::{device_tree_driver, dt::DevTreeIndexNodePropGet}; +use kernel_arch::{Architecture, ArchitectureImpl}; + +use crate::arch::PLATFORM; + +enum CallMethod { + Hvc, + Smc, +} + +/// ARM Power State Coordination Interface driver +pub struct Psci { + method: CallMethod, + cpu_on: u32, + #[allow(dead_code)] + cpu_off: u32, + #[allow(dead_code)] + cpu_suspend: u32, +} + +impl Device for Psci { + fn display_name(&self) -> &'static str { + "ARM PSCI" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + PLATFORM.psci.init(self); + Ok(()) + } +} + +impl CpuBringupDevice for Psci { + unsafe fn start_cpu(&self, id: usize, ip: usize, arg0: usize) -> Result<(), Error> { + self.call(self.cpu_on as _, id as _, ip as _, arg0 as _); + Ok(()) + } +} + +impl ResetDevice for Psci { + unsafe fn reset(&self) -> ! { + ArchitectureImpl::set_interrupt_mask(true); + + self.call(Self::SYSTEM_RESET as _, 0, 0, 0); + + loop { + ArchitectureImpl::wait_for_interrupt(); + } + } +} + +impl Psci { + const SYSTEM_RESET: u32 = 0x84000009; + + #[inline] + unsafe fn call(&self, mut x0: u64, x1: u64, x2: u64, x3: u64) -> u64 { + match self.method { + CallMethod::Hvc => { + core::arch::asm!("hvc #0", inlateout("x0") x0, in("x1") x1, in("x2") x2, in("x3") x3) + } + CallMethod::Smc => { + core::arch::asm!("smc #0", inlateout("x0") x0, in("x1") x1, in("x2") x2, in("x3") x3) + } + } + x0 + } +} + +device_tree_driver! { + compatible: ["arm,psci-1.0", "arm,psci"], + probe(dt) => { + let method: &str = dt.node.prop("method")?; + let method = match method { + "hvc" => CallMethod::Hvc, + "smc" => CallMethod::Smc, + _ => panic!("Unknown PSCI call method: {:?}", method) + }; + let cpu_on = dt.node.prop("cpu_on")?; + let cpu_off = dt.node.prop("cpu_off")?; + let cpu_suspend = dt.node.prop("cpu_suspend")?; + + Some(Box::new(Psci { + method, + cpu_on, + cpu_off, + cpu_suspend + })) + } +} diff --git a/kernel/src/device/power/mod.rs b/kernel/src/device/power/mod.rs new file mode 100644 index 00000000..8b996e54 --- /dev/null +++ b/kernel/src/device/power/mod.rs @@ -0,0 +1,10 @@ +//! Power-management related device drivers + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(target_arch = "aarch64")] { + pub mod arm_psci; + // pub mod sunxi_rwdog; + } +} diff --git a/kernel/src/device/power/sunxi_rwdog.rs b/kernel/src/device/power/sunxi_rwdog.rs new file mode 100644 index 00000000..a8868427 --- /dev/null +++ b/kernel/src/device/power/sunxi_rwdog.rs @@ -0,0 +1,108 @@ +//! Allwinner (H6) R Watchdog driver + +use abi::error::Error; +use alloc::boxed::Box; +use device_api::{Device, ResetDevice}; +use libk::util::OneTimeInit; +use tock_registers::{ + interfaces::Writeable, register_bitfields, register_structs, registers::ReadWrite, +}; + +use crate::{ + arch::{Architecture, PLATFORM}, + device::devtree::{self, DevTreeIndexNodePropGet, DevTreeIndexPropExt}, + device_tree_driver, + mem::device::DeviceMemoryIo, + sync::IrqSafeSpinlock, +}; + +register_bitfields! { + u32, + CTRL [ + KEY OFFSET(1) NUMBITS(12) [ + Value = 0xA57 + ], + RESTART OFFSET(0) NUMBITS(1) [] + ], + CFG [ + CONFIG OFFSET(0) NUMBITS(2) [ + System = 1, + ] + ], + MODE [ + EN OFFSET(0) NUMBITS(1) [], + ] +} + +register_structs! { + #[allow(non_snake_case)] + Regs { + (0x00 => IRQ_EN: ReadWrite), + (0x04 => IRQ_STA: ReadWrite), + (0x08 => _0), + (0x10 => CTRL: ReadWrite), + (0x14 => CFG: ReadWrite), + (0x18 => MODE: ReadWrite), + (0x1C => @END), + } +} + +struct Inner { + regs: DeviceMemoryIo, +} + +struct RWdog { + inner: OneTimeInit>, + base: usize, +} + +impl ResetDevice for RWdog { + unsafe fn reset(&self) -> ! { + // TODO disable IRQs + let inner = self.inner.get().lock(); + + inner.regs.CFG.write(CFG::CONFIG::System); + inner.regs.MODE.write(MODE::EN::SET); + inner.regs.CTRL.write(CTRL::KEY::Value + CTRL::RESTART::SET); + + loop { + core::arch::asm!("wfe"); + } + } +} + +impl Device for RWdog { + fn display_name(&self) -> &'static str { + "Allwinner H6 Watchdog" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + let regs = DeviceMemoryIo::map("r_wdog", self.base)?; + + self.inner.init(IrqSafeSpinlock::new(Inner { regs })); + + PLATFORM.register_reset_device(self)?; + + Ok(()) + } +} + +device_tree_driver! { + compatible: ["allwinner,sun50i-h6-wdt"], + probe(of) => { + let reg = devtree::find_prop(&of.node, "reg")?; + let status: &str = of.node.prop("status").unwrap_or("enabled"); + + if status == "disabled" { + return None; + } + + let (base, _) = reg.cell2_array_item(0, of.address_cells, of.size_cells)?; + let base = base as usize; + + Some(Box::new(RWdog { + inner: OneTimeInit::new(), + base + })) + } +} diff --git a/kernel/src/device/serial/mod.rs b/kernel/src/device/serial/mod.rs new file mode 100644 index 00000000..4a364aa0 --- /dev/null +++ b/kernel/src/device/serial/mod.rs @@ -0,0 +1,10 @@ +//! Serial device interfaces + +use cfg_if::cfg_if; + +cfg_if! { + if #[cfg(target_arch = "aarch64")] { + pub mod pl011; + // pub mod sunxi_uart; + } +} diff --git a/kernel/src/device/serial/pl011.rs b/kernel/src/device/serial/pl011.rs new file mode 100644 index 00000000..08f830cd --- /dev/null +++ b/kernel/src/device/serial/pl011.rs @@ -0,0 +1,250 @@ +//! ARM PL011 driver +use abi::{error::Error, io::DeviceRequest, process::ProcessId}; +use alloc::boxed::Box; +use device_api::{ + interrupt::{InterruptHandler, Irq}, + serial::SerialDevice, + Device, +}; +use device_tree::{device_tree_driver, dt::DevTreeIndexPropExt}; +use futures_util::task::{Context, Poll}; +use kernel_fs::devfs::{self, CharDeviceType}; +use libk::{block, device::external_interrupt_controller}; +use libk_mm::{ + address::{FromRaw, PhysicalAddress}, + device::DeviceMemoryIo, +}; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite, WriteOnly}, +}; +use vfs::{CharDevice, FileReadiness}; + +use crate::{ + debug::{self, DebugSink, LogLevel}, + device::tty::{TtyContext, TtyDevice}, +}; + +register_bitfields! { + u32, + FR [ + TXFF OFFSET(5) NUMBITS(1) [], + RXFE OFFSET(4) NUMBITS(1) [], + BUSY OFFSET(3) NUMBITS(1) [], + ], + CR [ + RXE OFFSET(9) NUMBITS(1) [], + TXE OFFSET(8) NUMBITS(1) [], + UARTEN OFFSET(0) NUMBITS(1) [], + ], + ICR [ + ALL OFFSET(0) NUMBITS(11) [], + ], + IMSC [ + RXIM OFFSET(4) NUMBITS(1) [], + ] +} + +register_structs! { + #[allow(non_snake_case)] + Regs { + /// Transmit/receive data register + (0x00 => DR: ReadWrite), + (0x04 => _0), + (0x18 => FR: ReadOnly), + (0x1C => _1), + (0x2C => LCR_H: ReadWrite), + (0x30 => CR: ReadWrite), + (0x34 => IFLS: ReadWrite), + (0x38 => IMSC: ReadWrite), + (0x3C => _2), + (0x44 => ICR: WriteOnly), + (0x48 => @END), + } +} + +struct Pl011Inner { + regs: DeviceMemoryIo<'static, Regs>, +} + +/// PL011 device instance +pub struct Pl011 { + inner: OneTimeInit>, + base: PhysicalAddress, + irq: Irq, + context: TtyContext, +} + +impl Pl011Inner { + fn send_byte(&mut self, b: u8) -> Result<(), Error> { + while self.regs.FR.matches_all(FR::TXFF::SET) { + core::hint::spin_loop(); + } + self.regs.DR.set(b as u32); + Ok(()) + } + + unsafe fn init(&mut self) { + self.regs.CR.set(0); + self.regs.ICR.write(ICR::ALL::CLEAR); + self.regs + .CR + .write(CR::UARTEN::SET + CR::TXE::SET + CR::RXE::SET); + } +} + +impl DebugSink for Pl011 { + fn putc(&self, byte: u8) -> Result<(), Error> { + self.send(byte) + } + + fn supports_control_sequences(&self) -> bool { + true + } +} + +impl TtyDevice for Pl011 { + fn context(&self) -> &TtyContext { + &self.context + } +} + +impl FileReadiness for Pl011 { + fn poll_read(&self, _cx: &mut Context<'_>) -> Poll> { + todo!() + } +} + +// impl CharDevice for Pl011 { +// fn write(&self, blocking: bool, data: &[u8]) -> Result { +// assert!(blocking); +// self.line_write(data) +// } +// +// fn read(&'static self, blocking: bool, data: &mut [u8]) -> Result { +// assert!(blocking); +// match block! { +// self.line_read(data).await +// } { +// Ok(res) => res, +// Err(err) => Err(err), +// } +// } +// +// } + +impl CharDevice for Pl011 { + fn write(&self, data: &[u8]) -> Result { + self.line_write(data) + } + + fn read(&self, data: &mut [u8]) -> Result { + match block! { + self.line_read(data).await + } { + Ok(res) => res, + Err(err) => Err(err), + } + } + + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + match req { + &mut DeviceRequest::SetTerminalGroup(id) => { + self.set_signal_group(ProcessId::from(id)); + Ok(()) + } + DeviceRequest::SetTerminalOptions(config) => self.context.set_config(config), + DeviceRequest::GetTerminalOptions(config) => { + config.write(self.context.config()); + Ok(()) + } + _ => Err(Error::InvalidArgument), + } + } +} + +impl SerialDevice for Pl011 { + fn send(&self, byte: u8) -> Result<(), Error> { + self.inner.get().lock().send_byte(byte) + } +} + +impl InterruptHandler for Pl011 { + fn handle_irq(&self, _vector: Option) -> bool { + let inner = self.inner.get().lock(); + inner.regs.ICR.write(ICR::ALL::CLEAR); + + let byte = inner.regs.DR.get(); + drop(inner); + + // if byte == b'\x1b' as u32 { + // use crate::task::sched::CpuQueue; + + // for (i, queue) in CpuQueue::all().enumerate() { + // log_print_raw!(LogLevel::Fatal, "queue{}:\n", i); + // let lock = unsafe { queue.grab() }; + // for item in lock.iter() { + // log_print_raw!( + // LogLevel::Fatal, + // "* {} {:?} {:?}\n", + // item.id(), + // item.name(), + // item.state() + // ); + // } + // } + // } else { + self.recv_byte(byte as u8); + // } + + true + } +} + +impl Device for Pl011 { + fn display_name(&self) -> &'static str { + "Primecell PL011 UART" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + let mut inner = Pl011Inner { + regs: DeviceMemoryIo::map(self.base, Default::default())?, + }; + inner.init(); + + self.inner.init(IrqSafeSpinlock::new(inner)); + + debug::add_sink(self, LogLevel::Debug); + devfs::add_char_device(self, CharDeviceType::TtySerial)?; + + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + let intc = external_interrupt_controller(); + + intc.register_irq(self.irq, Default::default(), self)?; + self.inner.get().lock().regs.IMSC.modify(IMSC::RXIM::SET); + intc.enable_irq(self.irq)?; + + Ok(()) + } +} + +device_tree_driver! { + compatible: ["arm,pl011"], + probe(of) => { + let reg = device_tree::find_prop(&of.node, "reg")?; + let (base, _) = reg.cell2_array_item(0, of.address_cells, of.size_cells)?; + + Some(Box::new(Pl011 { + inner: OneTimeInit::new(), + // TODO obtain IRQ from dt + irq: Irq::External(1), + context: TtyContext::new(), + base: PhysicalAddress::from_raw(base) + })) + } +} diff --git a/kernel/src/device/serial/sunxi_uart.rs b/kernel/src/device/serial/sunxi_uart.rs new file mode 100644 index 00000000..79fac776 --- /dev/null +++ b/kernel/src/device/serial/sunxi_uart.rs @@ -0,0 +1,206 @@ +//! Allwinner (H6) UART implementation + +use abi::{error::Error, io::DeviceRequest}; +use alloc::boxed::Box; +use device_api::{interrupt::InterruptHandler, serial::SerialDevice, Device}; +use libk::util::OneTimeInit; +use tock_registers::{ + interfaces::{ReadWriteable, Readable, Writeable}, + register_bitfields, register_structs, + registers::{ReadOnly, ReadWrite}, +}; +use vfs::CharDevice; + +use crate::{ + arch::{aarch64::IrqNumber, Architecture, PLATFORM}, + debug::{self, DebugSink, LogLevel}, + device::{ + devtree::{self, DevTreeIndexPropExt}, + tty::{CharRing, TtyDevice}, + }, + device_tree_driver, + fs::devfs::{self, CharDeviceType}, + mem::device::DeviceMemoryIo, + sync::IrqSafeSpinlock, +}; + +register_bitfields! { + u32, + USR [ + TFE OFFSET(2) NUMBITS(1) [], + TFNF OFFSET(1) NUMBITS(1) [] + ], + IER [ + ERBFI OFFSET(0) NUMBITS(1) [], + ], + IIR [ + IID OFFSET(0) NUMBITS(4) [ + RecvDataAvailable = 0b0100, + ] + ] +} + +register_structs! { + #[allow(non_snake_case)] + Regs { + (0x00 => DLL: ReadWrite), + (0x04 => IER: ReadWrite), + (0x08 => IIR: ReadWrite), + (0x0C => _0), + (0x7C => USR: ReadOnly), + (0x80 => @END), + } +} + +struct Inner { + regs: DeviceMemoryIo, +} + +struct SunxiUart { + inner: OneTimeInit>, + base: usize, + irq: IrqNumber, + ring: CharRing<16>, +} + +impl DebugSink for SunxiUart { + fn putc(&self, c: u8) -> Result<(), Error> { + self.send(c) + } + + fn supports_control_sequences(&self) -> bool { + true + } +} + +impl CharDevice for SunxiUart { + fn read(&'static self, _blocking: bool, data: &mut [u8]) -> Result { + self.line_read(data) + } + + fn write(&self, _blocking: bool, data: &[u8]) -> Result { + self.line_write(data) + } + + fn device_request(&self, req: &mut DeviceRequest) -> Result<(), Error> { + match req { + &mut DeviceRequest::SetTerminalGroup(id) => { + self.set_signal_group(id as _); + Ok(()) + } + _ => Err(Error::InvalidArgument), + } + } +} + +impl TtyDevice<16> for SunxiUart { + fn ring(&self) -> &CharRing<16> { + &self.ring + } +} + +impl InterruptHandler for SunxiUart { + fn handle_irq(&self) -> bool { + let inner = self.inner.get().lock(); + + if inner.regs.IIR.matches_all(IIR::IID::RecvDataAvailable) { + let byte = inner.regs.DLL.get(); + drop(inner); + + if byte == b'\x1b' as u32 { + panic!("RESET TRIGGERED"); + } + + self.recv_byte(byte as u8); + } + // inner.regs.ICR.write(ICR::ALL::CLEAR); + + // let byte = inner.regs.DR.get(); + // drop(inner); + + // if byte == b'\x1b' as u32 { + // use crate::task::sched::CpuQueue; + + // for (i, queue) in CpuQueue::all().enumerate() { + // log_print_raw!(LogLevel::Fatal, "queue{}:\n", i); + // let lock = unsafe { queue.grab() }; + // for item in lock.iter() { + // log_print_raw!( + // LogLevel::Fatal, + // "* {} {:?} {:?}\n", + // item.id(), + // item.name(), + // item.state() + // ); + // } + // } + // } else { + // self.recv_byte(byte as u8); + // } + + true + } +} + +impl SerialDevice for SunxiUart { + fn send(&self, byte: u8) -> Result<(), Error> { + let inner = self.inner.get().lock(); + if byte == b'\n' { + while inner.regs.USR.matches_all(USR::TFE::CLEAR) { + core::hint::spin_loop(); + } + inner.regs.DLL.set(b'\r' as u32); + } + while inner.regs.USR.matches_all(USR::TFE::CLEAR) { + core::hint::spin_loop(); + } + inner.regs.DLL.set(byte as u32); + Ok(()) + } +} + +impl Device for SunxiUart { + fn display_name(&self) -> &'static str { + "Allwinner UART" + } + + unsafe fn init(&'static self) -> Result<(), Error> { + let regs = DeviceMemoryIo::::map("sunxi-uart", self.base)?; + self.inner.init(IrqSafeSpinlock::new(Inner { regs })); + debug::add_sink(self, LogLevel::Debug); + devfs::add_char_device(self, CharDeviceType::TtySerial)?; + Ok(()) + } + + unsafe fn init_irq(&'static self) -> Result<(), Error> { + let intc = PLATFORM.external_interrupt_controller(); + + intc.register_irq(self.irq, Default::default(), self)?; + intc.enable_irq(self.irq)?; + + let inner = self.inner.get().lock(); + inner.regs.IER.modify(IER::ERBFI::SET); + + Ok(()) + } +} + +device_tree_driver! { + compatible: ["snps,dw-apb-uart"], + probe(of) => { + let reg = devtree::find_prop(&of.node, "reg")?; + let (base, _) = reg.cell2_array_item(0, of.address_cells, of.size_cells)?; + + if base == 0x05000000 { + Some(Box::new(SunxiUart { + inner: OneTimeInit::new(), + ring: CharRing::new(), + irq: IrqNumber::Shared(0), + base: base as usize + })) + } else { + // TODO don't just hardcode and ignore other UARTs + None + } + } +} diff --git a/kernel/src/device/timer.rs b/kernel/src/device/timer.rs new file mode 100644 index 00000000..17f1690a --- /dev/null +++ b/kernel/src/device/timer.rs @@ -0,0 +1 @@ +//! Timer device utilities diff --git a/kernel/src/device/tty.rs b/kernel/src/device/tty.rs new file mode 100644 index 00000000..c213adc3 --- /dev/null +++ b/kernel/src/device/tty.rs @@ -0,0 +1,299 @@ +//! Terminal driver implementation +use core::{ + pin::Pin, + sync::atomic::{AtomicBool, Ordering}, + task::{Context, Poll}, +}; + +use abi::{ + error::Error, + io::{TerminalInputOptions, TerminalLineOptions, TerminalOptions, TerminalOutputOptions}, + process::{ProcessId, Signal}, +}; +use device_api::serial::SerialDevice; +use futures_util::Future; +use libk_thread::process::ProcessImpl; +use libk_util::{ring::RingBuffer, sync::IrqSafeSpinlock, waker::QueueWaker}; + +use crate::{proc::io::ProcessIoImpl, task::process::ProcessManagerImpl}; + +struct TerminalRing { + buffer: IrqSafeSpinlock>, + eof: AtomicBool, + notify: QueueWaker, +} + +impl TerminalRing { + const CAPACITY: usize = 128; + + const fn new() -> Self { + Self { + buffer: IrqSafeSpinlock::new(RingBuffer::with_capacity(TerminalRing::CAPACITY)), + eof: AtomicBool::new(false), + notify: QueueWaker::new(), + } + } + + fn write(&self, ch: u8, signal: bool) { + self.buffer.lock().write(ch); + if signal { + self.notify.wake_one(); + } + } + + fn signal(&self) { + self.notify.wake_all(); + } + + fn read_blocking(&self) -> impl Future> + '_ { + struct F<'f> { + ring: &'f TerminalRing, + } + + impl<'f> Future for F<'f> { + type Output = Option; + + fn poll(self: Pin<&mut Self>, cx: &mut Context<'_>) -> Poll { + self.ring.notify.register(cx.waker()); + + if self + .ring + .eof + .compare_exchange(true, false, Ordering::Acquire, Ordering::Relaxed) + .is_ok() + { + self.ring.notify.remove(cx.waker()); + return Poll::Ready(None); + } + + let mut lock = self.ring.buffer.lock(); + + if lock.is_readable() { + self.ring.notify.remove(cx.waker()); + Poll::Ready(Some(unsafe { lock.read_single_unchecked() })) + } else { + Poll::Pending + } + } + } + + F { ring: self } + } +} + +struct TtyContextInner { + config: TerminalOptions, + process_group: Option, +} + +/// Represents the context of a terminal device +pub struct TtyContext { + ring: TerminalRing, // AsyncRing, + inner: IrqSafeSpinlock, +} + +// TODO merge this code with PTY +/// Terminal device interface +pub trait TtyDevice: SerialDevice { + /// Returns the ring buffer associated with the device + fn context(&self) -> &TtyContext; + + /// Sets the process group to which signals from this terminal should be delivered + fn set_signal_group(&self, id: ProcessId) { + self.context().inner.lock().process_group.replace(id); + } + + /// Sends a single byte to the terminal + fn line_send(&self, byte: u8) -> Result<(), Error> { + let cx = self.context(); + let inner = cx.inner.lock(); + + if byte == b'\n' + && inner + .config + .output + .contains(TerminalOutputOptions::NL_TO_CRNL) + { + self.send(b'\r').ok(); + } + + drop(inner); + + self.send(byte) + } + + /// Receives a single byte from the terminal + fn recv_byte(&self, mut byte: u8) { + let cx = self.context(); + let inner = cx.inner.lock(); + + if byte == b'\r' && inner.config.input.contains(TerminalInputOptions::CR_TO_NL) { + byte = b'\n'; + } + + if byte == b'\n' { + // TODO implement proper echo here + let _echo = inner.config.line.contains(TerminalLineOptions::ECHO) + || inner + .config + .line + .contains(TerminalLineOptions::CANONICAL | TerminalLineOptions::ECHO_NL); + + if inner + .config + .output + .contains(TerminalOutputOptions::NL_TO_CRNL) + { + self.send(b'\r').ok(); + } + self.send(byte).ok(); + } else if inner.config.line.contains(TerminalLineOptions::ECHO) { + if byte.is_ascii_control() { + if byte != inner.config.chars.erase && byte != inner.config.chars.werase { + self.send(b'^').ok(); + self.send(byte + 0x40).ok(); + } + } else { + self.send(byte).ok(); + } + } + + // byte == config.chars.interrupt + if byte == inner.config.chars.interrupt + && inner.config.line.contains(TerminalLineOptions::SIGNAL) + { + let pgrp = inner.process_group; + + cx.signal(); + + if let Some(pgrp) = pgrp { + drop(inner); + ProcessImpl::::signal_group( + pgrp, + Signal::Interrupted, + ); + return; + } else { + debugln!("Terminal has no process group attached"); + } + } + + let canonical = inner.config.line.contains(TerminalLineOptions::CANONICAL); + + drop(inner); + cx.putc(byte, !canonical || byte == b'\n' || byte == b'\r'); + } + + /// Reads and processes data from the terminal + async fn line_read(&self, data: &mut [u8]) -> Result { + let cx = self.context(); + let mut inner = cx.inner.lock(); + + if data.is_empty() { + return Ok(0); + } + + if !inner.config.is_canonical() { + drop(inner); + let Some(byte) = cx.getc().await else { + return Ok(0); + }; + data[0] = byte; + Ok(1) + } else { + let mut rem = data.len(); + let mut off = 0; + + // Run until either end of buffer or return condition is reached + while rem != 0 { + drop(inner); + let Some(byte) = cx.getc().await else { + break; + }; + inner = cx.inner.lock(); + + if inner.config.is_canonical() { + if byte == inner.config.chars.eof { + break; + } else if byte == inner.config.chars.erase { + // Erase + if off != 0 { + self.raw_write(b"\x1b[D \x1b[D")?; + off -= 1; + rem += 1; + } + + continue; + } else if byte == inner.config.chars.werase { + todo!() + } + } + + data[off] = byte; + off += 1; + rem -= 1; + + if byte == b'\n' || byte == b'\r' { + break; + } + } + + Ok(off) + } + } + + /// Processes and writes the data to the terminal + fn line_write(&self, data: &[u8]) -> Result { + for &byte in data { + self.line_send(byte)?; + } + Ok(data.len()) + } + + /// Writes raw data to the terminal bypassing the processing functions + fn raw_write(&self, data: &[u8]) -> Result { + for &byte in data { + self.send(byte)?; + } + Ok(data.len()) + } +} + +impl TtyContext { + /// Constructs a new [TtyContext] + pub fn new() -> Self { + Self { + ring: TerminalRing::new(), // AsyncRing::new(0), + inner: IrqSafeSpinlock::new(TtyContextInner { + config: TerminalOptions::const_default(), + process_group: None, + }), + } + } + + /// Signals an event on the terminal + pub fn signal(&self) { + self.ring.signal() + } + + /// Writes a single character to the terminal + pub fn putc(&self, ch: u8, signal: bool) { + self.ring.write(ch, signal); + } + + /// Performs a blocking read of a single character from the terminal + pub async fn getc(&self) -> Option { + self.ring.read_blocking().await + } + + /// Changes the configuration of the terminal + pub fn set_config(&self, config: &TerminalOptions) -> Result<(), Error> { + self.inner.lock().config = *config; + Ok(()) + } + + /// Returns the configuration of the terminal + pub fn config(&self) -> TerminalOptions { + self.inner.lock().config + } +} diff --git a/kernel/src/fs/mod.rs b/kernel/src/fs/mod.rs new file mode 100644 index 00000000..ce0ff18d --- /dev/null +++ b/kernel/src/fs/mod.rs @@ -0,0 +1,73 @@ +//! Filesystem implementations + +use core::ptr::NonNull; + +use kernel_fs::devfs; +use libk_mm::{ + address::{PhysicalAddress, Virtualize}, + phys, +}; +use libk_util::OneTimeInit; +use memfs::block::{self, BlockAllocator}; +use vfs::{impls::read_fn_node, NodeRef}; +use yggdrasil_abi::{error::Error, io::MountOptions}; + +use crate::proc::random; + +// pub mod devfs; +pub mod sysfs; + +/// Describes in-memory filesystem image used as initial root +pub struct Initrd { + /// Page-aligned start address of the initrd + pub phys_page_start: PhysicalAddress, + /// Page-aligned length + pub phys_page_len: usize, + /// Safe reference to the initrd data slice + pub data: &'static [u8], +} + +/// Holds reference to the data of initrd as well as its page-aligned physical memory region +pub static INITRD_DATA: OneTimeInit = OneTimeInit::new(); + +/// Implementation of [memfs::block::BlockAllocator] for the kernel +pub struct FileBlockAllocator; + +unsafe impl BlockAllocator for FileBlockAllocator { + fn alloc() -> Result, Error> { + // TODO make this a static assertion + assert_eq!(block::SIZE, 4096); + let page = phys::alloc_page()?; + Ok(unsafe { NonNull::new_unchecked(page.virtualize() as *mut _) }) + } + + unsafe fn dealloc(block: NonNull) { + let page = block.as_ptr() as usize; + let physical = PhysicalAddress::from_virtualized(page); + phys::free_page(physical); + } +} + +/// Constructs an instance of a filesystem for given set of [MountOptions] +pub fn create_filesystem(options: &MountOptions) -> Result { + let fs_name = options.filesystem.unwrap(); + + match fs_name { + "devfs" => Ok(devfs::root().clone()), + "sysfs" => Ok(sysfs::root().clone()), + _ => todo!(), + } +} + +/// Adds "pseudo"-devices to the filesystem (i.e. /dev/random) +pub fn add_pseudo_devices() -> Result<(), Error> { + let random = read_fn_node(move |_, buf| { + random::read(buf); + Ok(buf.len()) + }); + + let root = devfs::root(); + root.add_child("random", random)?; + + Ok(()) +} diff --git a/kernel/src/fs/sysfs.rs b/kernel/src/fs/sysfs.rs new file mode 100644 index 00000000..6ca38d8f --- /dev/null +++ b/kernel/src/fs/sysfs.rs @@ -0,0 +1,54 @@ +//! "System" filesystem implementation + +use abi::error::Error; +use git_version::git_version; +use libk_mm::phys; +use libk_util::OneTimeInit; +use vfs::{ + impls::{const_value_node, mdir, read_fn_node, ReadOnlyFnValueNode}, + NodeRef, +}; + +use crate::{debug, util}; + +static ROOT: OneTimeInit = OneTimeInit::new(); + +/// Returns the root of the filesystem +pub fn root() -> &'static NodeRef { + ROOT.get() +} + +fn read_kernel_log(pos: u64, buffer: &mut [u8]) -> Result { + Ok(debug::RING_LOGGER_SINK.read(pos as usize, buffer)) +} + +/// Sets up the entries within the filesystem +pub fn init() { + let d_kernel = mdir([ + ("version", const_value_node(env!("CARGO_PKG_VERSION"))), + ("rev", const_value_node(git_version!())), + ("log", read_fn_node(read_kernel_log)), + ]); + let d_mem_phys = mdir([ + ( + "total_pages", + ReadOnlyFnValueNode::new(|| Ok(phys::stats().total_usable_pages)), + ), + ( + "free_pages", + ReadOnlyFnValueNode::new(|| Ok(phys::stats().free_pages)), + ), + ( + "allocated_pages", + ReadOnlyFnValueNode::new(|| Ok(phys::stats().allocated_pages)), + ), + ]); + let d_mem = mdir([("phys", d_mem_phys)]); + let root = mdir([ + ("kernel", d_kernel), + ("mem", d_mem), + ("arch", const_value_node(util::arch_str())), + ]); + + ROOT.init(root); +} diff --git a/kernel/src/init.rs b/kernel/src/init.rs new file mode 100644 index 00000000..22190535 --- /dev/null +++ b/kernel/src/init.rs @@ -0,0 +1,81 @@ +//! Kernel main process implementation: filesystem initialization and userspace init start + +use abi::error::Error; +use alloc::borrow::ToOwned; +use kernel_fs::devfs; +use libk::runtime; +use libk_thread::thread::Thread; +use memfs::MemoryFilesystem; +use vfs::{impls::FnSymlink, IoContext, NodeRef}; + +use crate::{ + fs::{FileBlockAllocator, INITRD_DATA}, + proc::{self, random}, + task::process::ProcessManagerImpl, +}; + +fn setup_root() -> Result { + let initrd_data = INITRD_DATA.get(); + let fs = MemoryFilesystem::::from_slice(initrd_data.data).unwrap(); + fs.root() +} + +/// Kernel's "main" process function. +/// +/// # Note +/// +/// This function is meant to be used as a kernel-space process after all the platform-specific +/// initialization has finished. +pub fn kinit() -> Result<(), Error> { + infoln!("In main"); + + runtime::spawn(ygg_driver_usb::bus::bus_handler())?; + + devfs::root().add_child( + "tty", + FnSymlink::new(|| { + let thread = Thread::current(); + let process = thread.process::(); + + if let Some(tty) = process.session_terminal() { + Ok(tty) + } else { + Err(Error::InvalidFile) + } + }), + )?; + + ygg_driver_net_loopback::init(); + ygg_driver_net_core::start_network_tasks()?; + + #[cfg(feature = "fb_console")] + { + use crate::device::display::console::update_consoles_task; + + runtime::spawn(async move { + update_consoles_task().await; + })?; + } + + // Add keyboard device + devfs::add_named_char_device(&ygg_driver_input::KEYBOARD_DEVICE, "kbd".to_owned())?; + + random::init(); + + let root = setup_root()?; + + let mut ioctx = IoContext::new(root); + + { + let (user_init, user_init_main) = + proc::load_binary(&mut ioctx, None, "/init", &["/init", "xxx"], &[])?; + + let mut io = user_init.io.lock(); + io.set_ioctx(ioctx); + drop(io); + + user_init_main.enqueue(); + } + + Ok(()) +} diff --git a/kernel/src/main.rs b/kernel/src/main.rs new file mode 100644 index 00000000..23818470 --- /dev/null +++ b/kernel/src/main.rs @@ -0,0 +1,118 @@ +//! osdev-x kernel crate +#![feature( + step_trait, + decl_macro, + naked_functions, + asm_const, + panic_info_message, + optimize_attribute, + effects, + const_trait_impl, + maybe_uninit_slice, + arbitrary_self_types, + const_mut_refs, + let_chains, + linked_list_cursors, + rustc_private, + allocator_api, + trait_alias, + strict_provenance, + slice_ptr_get, + slice_split_once, + iter_collect_into, + iter_next_chunk, + exact_size_is_empty, + inline_const, + maybe_uninit_uninit_array, + const_maybe_uninit_uninit_array, + never_type +)] +#![allow( + clippy::new_without_default, + clippy::fn_to_numeric_cast, + clippy::match_ref_pats, + clippy::match_single_binding, + async_fn_in_trait +)] +#![deny(missing_docs)] +#![no_std] +#![no_main] + +use arch::Platform; +use kernel_arch::{Architecture, ArchitectureImpl}; +use libk::arch::Cpu; +use libk_util::sync::SpinFence; + +use crate::{arch::PLATFORM, fs::sysfs, mem::heap, task::spawn_kernel_closure}; + +extern crate yggdrasil_abi as abi; + +extern crate alloc; +extern crate compiler_builtins; + +#[macro_use] +pub mod debug; +#[macro_use] +pub mod arch; + +pub mod device; +pub mod fs; +pub mod init; +pub mod mem; +pub mod panic; +pub mod proc; +pub mod syscall; +pub mod task; +pub mod util; + +static CPU_INIT_FENCE: SpinFence = SpinFence::new(); + +/// Common kernel main function for application processors +pub fn kernel_secondary_main() -> ! { + // Synchronize the CPUs to this point + CPU_INIT_FENCE.signal(); + CPU_INIT_FENCE.wait_all(ArchitectureImpl::cpu_count()); + + unsafe { + task::enter(); + } +} + +/// Common kernel main function. Must be called for BSP processor only. +/// +/// # Prerequisites +/// +/// Before the function can be called, the following preparations must be made: +/// +/// * Virtual memory set up according to the architecture's memory map +/// * Physical memory +/// * Heap +/// * Basic debugging facilities +/// * Initrd +pub fn kernel_main() -> ! { + debugln!("Heap: {:#x?}", heap::heap_range()); + + // Setup the sysfs + sysfs::init(); + fs::add_pseudo_devices().unwrap(); + + unsafe { + PLATFORM.start_application_processors(); + } + + Cpu::init_ipi_queues(ArchitectureImpl::cpu_count()); + + // Wait until all APs initialize + CPU_INIT_FENCE.signal(); + CPU_INIT_FENCE.wait_all(ArchitectureImpl::cpu_count()); + + task::init().expect("Failed to initialize the scheduler"); + + spawn_kernel_closure("[kinit]", init::kinit).expect("Could not spawn [kinit]"); + + infoln!("All cpus ready"); + + unsafe { + task::enter(); + } +} diff --git a/kernel/src/mem/heap.rs b/kernel/src/mem/heap.rs new file mode 100644 index 00000000..59c58ace --- /dev/null +++ b/kernel/src/mem/heap.rs @@ -0,0 +1,66 @@ +//! Kernel's global heap allocator +use core::{ + alloc::{GlobalAlloc, Layout}, + ops::Range, + ptr::{null_mut, NonNull}, +}; + +use libk_util::sync::IrqSafeSpinlock; +use linked_list_allocator::Heap; + +/// Kernel heap manager +pub struct KernelAllocator { + inner: IrqSafeSpinlock, +} + +impl KernelAllocator { + const fn empty() -> Self { + Self { + inner: IrqSafeSpinlock::new(Heap::empty()), + } + } + + unsafe fn init(&self, base: usize, size: usize) { + self.inner.lock().init(base as _, size); + } + + fn range(&self) -> Range { + let lock = self.inner.lock(); + lock.bottom() as usize..lock.top() as usize + } +} + +unsafe impl GlobalAlloc for KernelAllocator { + unsafe fn alloc(&self, layout: Layout) -> *mut u8 { + match self.inner.lock().allocate_first_fit(layout) { + Ok(v) => v.as_ptr(), + Err(e) => { + errorln!("Failed to allocate {:?}: {:?}", layout, e); + null_mut() + } + } + } + + unsafe fn dealloc(&self, ptr: *mut u8, layout: Layout) { + let ptr = NonNull::new(ptr).unwrap(); + self.inner.lock().deallocate(ptr, layout) + } +} + +/// Kernel's global allocator +#[global_allocator] +pub static GLOBAL_HEAP: KernelAllocator = KernelAllocator::empty(); + +/// Sets up kernel's global heap with given memory range. +/// +/// # Safety +/// +/// The caller must ensure the range is valid and mapped virtual memory. +pub unsafe fn init_heap(heap_base: usize, heap_size: usize) { + GLOBAL_HEAP.init(heap_base, heap_size); +} + +/// Returns the heap address range +pub fn heap_range() -> Range { + GLOBAL_HEAP.range() +} diff --git a/kernel/src/mem/mod.rs b/kernel/src/mem/mod.rs new file mode 100644 index 00000000..a6010756 --- /dev/null +++ b/kernel/src/mem/mod.rs @@ -0,0 +1,69 @@ +//! Memory management utilities and types + +use core::{ + ffi::c_void, + mem::{align_of, size_of}, +}; + +use libk_mm::{address::PhysicalAddress, device::DeviceMemoryMapping}; + +use crate::arch::{Platform, PlatformImpl}; + +pub mod heap; + +/// Offset applied to the physical kernel image when translating it into the virtual address space +pub const KERNEL_VIRT_OFFSET: usize = PlatformImpl::KERNEL_VIRT_OFFSET; + +/// Reads a value from an arbitrary physical address. +/// +/// # Safety +/// +/// The caller must ensure the correct origin of the address, its alignment and that the access is +/// properly synchronized. +pub unsafe fn read_memory(address: PhysicalAddress) -> T { + let io = DeviceMemoryMapping::map(address, size_of::(), Default::default()).unwrap(); + let address = io.address(); + + if address % align_of::() == 0 { + (address as *const T).read_volatile() + } else { + (address as *const T).read_unaligned() + } +} + +/// Writes a value to an arbitrary physical address. +/// +/// # Safety +/// +/// The caller must ensure the correct origin of the address, its alignment and that the access is +/// properly synchronized. +pub unsafe fn write_memory(address: PhysicalAddress, value: T) { + let io = DeviceMemoryMapping::map(address, size_of::(), Default::default()).unwrap(); + let address = io.address(); + + if address % align_of::() == 0 { + (address as *mut T).write_volatile(value) + } else { + (address as *mut T).write_unaligned(value) + } +} + +#[no_mangle] +unsafe extern "C" fn memcpy(p0: *mut c_void, p1: *const c_void, len: usize) -> *mut c_void { + compiler_builtins::mem::memcpy(p0 as _, p1 as _, len) as _ +} + +#[no_mangle] +unsafe extern "C" fn memcmp(p0: *const c_void, p1: *const c_void, len: usize) -> i32 { + compiler_builtins::mem::memcmp(p0 as _, p1 as _, len) +} + +#[no_mangle] +unsafe extern "C" fn memmove(dst: *mut c_void, src: *const c_void, len: usize) -> *mut c_void { + compiler_builtins::mem::memmove(dst as _, src as _, len) as _ +} + +#[no_mangle] +unsafe extern "C" fn memset(dst: *mut c_void, val: i32, len: usize) -> *mut c_void { + compiler_builtins::mem::memset(dst as _, val, len) as _ +} diff --git a/kernel/src/panic.rs b/kernel/src/panic.rs new file mode 100644 index 00000000..43dea3c7 --- /dev/null +++ b/kernel/src/panic.rs @@ -0,0 +1,112 @@ +//! Kernel panic handler code +use core::sync::atomic::{AtomicBool, AtomicU32, Ordering}; + +use device_api::interrupt::{IpiDeliveryTarget, IpiMessage}; +use kernel_arch::{Architecture, ArchitectureImpl}; +use libk::arch::Cpu; +use libk_util::sync::{hack_locks, SpinFence}; + +use crate::{ + arch::{Platform, PLATFORM}, + debug::{debug_internal, LogLevel}, + device::display::console::flush_consoles, +}; + +static PANIC_HANDLED_FENCE: SpinFence = SpinFence::new(); + +// Just a simple sequencer to ensure secondary panics don't trash the screen +static PANIC_FINISHED_FENCE: SpinFence = SpinFence::new(); +static PANIC_SEQUENCE: AtomicU32 = AtomicU32::new(0); + +/// Panic handler for CPUs other than the one that initiated it +pub fn panic_secondary() -> ! { + // Will also mask IRQs in this section + let cpu = Cpu::local(); + + PANIC_HANDLED_FENCE.signal(); + PANIC_FINISHED_FENCE.wait_one(); + + while PANIC_SEQUENCE.load(Ordering::Acquire) != cpu.id() { + core::hint::spin_loop(); + } + + log_print_raw!(LogLevel::Fatal, "X"); + flush_consoles(); + + PANIC_SEQUENCE.fetch_add(1, Ordering::Release); + + loop { + ArchitectureImpl::wait_for_interrupt(); + } +} + +#[panic_handler] +fn panic_handler(pi: &core::panic::PanicInfo) -> ! { + let cpu = Cpu::local(); + + static PANIC_HAPPENED: AtomicBool = AtomicBool::new(false); + + fatalln!("{:?}", pi); + + if PANIC_HAPPENED + .compare_exchange(false, true, Ordering::Release, Ordering::Acquire) + .is_ok() + { + // Let other CPUs know we're screwed + unsafe { + PLATFORM + .send_ipi(IpiDeliveryTarget::OtherCpus, IpiMessage::Panic) + .ok(); + } + + let ap_count = ArchitectureImpl::cpu_count() - 1; + PANIC_HANDLED_FENCE.wait_all(ap_count); + + unsafe { + hack_locks(); + } + + log_print_raw!(LogLevel::Fatal, "--- BEGIN PANIC ---\n"); + log_print_raw!(LogLevel::Fatal, "In CPU {}\n", cpu.id()); + log_print_raw!(LogLevel::Fatal, "Kernel panic "); + + if let Some(location) = pi.location() { + log_print_raw!( + LogLevel::Fatal, + "at {}:{}:", + location.file(), + location.line() + ); + } else { + log_print_raw!(LogLevel::Fatal, ":"); + } + + log_print_raw!(LogLevel::Fatal, "\n"); + + if let Some(msg) = pi.message() { + debug_internal(*msg, LogLevel::Fatal); + log_print_raw!(LogLevel::Fatal, "\n"); + } + + log_print_raw!(LogLevel::Fatal, "--- END PANIC ---\n"); + + PANIC_FINISHED_FENCE.signal(); + while PANIC_SEQUENCE.load(Ordering::Acquire) != cpu.id() { + core::hint::spin_loop(); + } + + log_print_raw!(LogLevel::Fatal, "X"); + + flush_consoles(); + + PANIC_SEQUENCE.fetch_add(1, Ordering::Release); + + unsafe { + PLATFORM.reset(); + } + } + + loop { + ArchitectureImpl::wait_for_interrupt(); + } +} diff --git a/kernel/src/proc/elf.rs b/kernel/src/proc/elf.rs new file mode 100644 index 00000000..e69de29b diff --git a/kernel/src/proc/exec.rs b/kernel/src/proc/exec.rs new file mode 100644 index 00000000..0e12beee --- /dev/null +++ b/kernel/src/proc/exec.rs @@ -0,0 +1,30 @@ +//! Binary execution functions +use core::{alloc::Layout, ptr::NonNull}; + +use abi::{ + error::Error, + io::SeekFrom, + pass::{Place, Placer}, + path::Path, + process::ProgramArgumentInner, +}; +use alloc::{ + borrow::ToOwned, + string::String, + sync::{Arc, Weak}, + vec::Vec, +}; +use kernel_arch::task::TaskContext; +use libk::thread::TaskContextImpl; +use libk_mm::{ + pointer::PhysicalRefMut, + process::{ProcessAddressSpace, VirtualRangeBacking}, + table::MapAttributes, +}; +use libk_thread::{mem::ForeignPointer, process::Process, thread::Thread}; +use vfs::{FileRef, IoContext, Read, Seek}; + +use crate::{ + proc, + task::process::{ProcessImage, ProcessImpl}, +}; diff --git a/kernel/src/proc/io.rs b/kernel/src/proc/io.rs new file mode 100644 index 00000000..19990b82 --- /dev/null +++ b/kernel/src/proc/io.rs @@ -0,0 +1,57 @@ +//! Process I/O management + +use abi::error::Error; +use libk_thread::process::ProcessIo; +use vfs::{FileSet, IoContext, Node}; + +/// I/O context of a process, contains information like root, current directory and file +/// descriptor table +pub struct ProcessIoImpl { + ioctx: Option, + /// Process' set of files + pub files: FileSet, +} + +impl ProcessIo for ProcessIoImpl { + type Node = Node; + + fn new() -> Self { + Self { + ioctx: None, + files: FileSet::new(), + } + } + + fn handle_exit(&mut self) { + self.files.close_all(); + } + + fn fork_from(&mut self, src: &Self) -> Result<(), Error> { + let new_ioctx = IoContext::inherit(src.ioctx()); + self.set_ioctx(new_ioctx); + + for (old_fd, old_file) in src.files.iter() { + let new_file = old_file.send()?; + self.files.set_file(*old_fd, new_file)?; + } + + Ok(()) + } +} + +impl ProcessIoImpl { + /// Returns the associated [IoContext] reference + pub fn ioctx_mut(&mut self) -> &mut IoContext { + self.ioctx.as_mut().unwrap() + } + + /// Returns the associated [IoContext] reference + pub fn ioctx(&self) -> &IoContext { + self.ioctx.as_ref().unwrap() + } + + /// Changes the [IoContext] of this struct + pub fn set_ioctx(&mut self, ioctx: IoContext) { + self.ioctx = Some(ioctx); + } +} diff --git a/kernel/src/proc/mod.rs b/kernel/src/proc/mod.rs new file mode 100644 index 00000000..daf65ad4 --- /dev/null +++ b/kernel/src/proc/mod.rs @@ -0,0 +1,23 @@ +//! Internal management for processes + +use abi::{error::Error, path::Path}; +use alloc::sync::{Arc, Weak}; +use libk_thread::thread::Thread; +use vfs::IoContext; + +use crate::task::process::ProcessImpl; + +pub mod io; +pub mod random; + +/// Loads a binary and creates a process for it. See [libk_thread::binary::load]. +#[inline] +pub fn load_binary>( + ioctx: &mut IoContext, + parent: Option>, + path: P, + args: &[&str], + envs: &[&str], +) -> Result<(Arc, Arc), Error> { + libk_thread::binary::load(ioctx, parent, path, args, envs) +} diff --git a/kernel/src/proc/random.rs b/kernel/src/proc/random.rs new file mode 100644 index 00000000..cbf2873c --- /dev/null +++ b/kernel/src/proc/random.rs @@ -0,0 +1,76 @@ +//! Random generation utilities + +use libk::device::monotonic_timestamp; +use libk_util::{sync::IrqSafeSpinlock, OneTimeInit}; + +const BUFFER_SIZE: usize = 1024; + +struct RandomState { + data: [u8; BUFFER_SIZE], + pos: usize, + last_state: u32, +} + +impl RandomState { + fn new(seed: u32) -> Self { + Self { + data: [0; BUFFER_SIZE], + pos: BUFFER_SIZE, + last_state: seed, + } + } + + fn next(&mut self) -> u32 { + let mut x = self.last_state; + x ^= x << 13; + x ^= x >> 17; + x ^= x << 5; + self.last_state = x; + x + } + + fn fill_buf(&mut self) { + self.pos = 0; + for i in (0..self.data.len()).step_by(4) { + let v = self.next(); + self.data[i..i + 4].copy_from_slice(&v.to_ne_bytes()); + } + } + + fn read_buf(&mut self, buf: &mut [u8]) { + let mut rem = buf.len(); + let mut pos = 0; + + while rem != 0 { + if self.pos == BUFFER_SIZE { + self.fill_buf(); + } + let count = core::cmp::min(rem, BUFFER_SIZE - self.pos); + + buf[pos..pos + count].copy_from_slice(&self.data[self.pos..self.pos + count]); + + self.pos += count; + rem -= count; + pos += count; + } + } +} + +static RANDOM_STATE: OneTimeInit> = OneTimeInit::new(); + +/// Fills `buf` with random bytes +pub fn read(buf: &mut [u8]) { + let state = RANDOM_STATE.get(); + state.lock().read_buf(buf) +} + +/// Initializes the random generator state +pub fn init() { + let now = monotonic_timestamp().unwrap(); + let random_seed = now.subsec_millis(); + + let mut state = RandomState::new(random_seed); + state.fill_buf(); + + RANDOM_STATE.init(IrqSafeSpinlock::new(state)); +} diff --git a/kernel/src/syscall/arg.rs b/kernel/src/syscall/arg.rs new file mode 100644 index 00000000..a7dcf4e8 --- /dev/null +++ b/kernel/src/syscall/arg.rs @@ -0,0 +1,33 @@ +use libk_thread::{mem::ForeignPointer, thread::Thread}; +use yggdrasil_abi::error::Error; + +pub(super) fn ref_const<'a, T: Sized>(addr: usize) -> Result<&'a T, Error> { + let proc = Thread::current(); + let ptr = addr as *const T; + unsafe { ptr.validate_user_ptr(proc.address_space()) } +} +pub(super) fn ref_mut<'a, T: Sized>(addr: usize) -> Result<&'a mut T, Error> { + let proc = Thread::current(); + let ptr = addr as *mut T; + unsafe { ptr.validate_user_mut(proc.address_space()) } +} + +pub(super) fn str_ref<'a>(base: usize, len: usize) -> Result<&'a str, Error> { + let slice = slice_ref(base, len)?; + if slice.contains(&0) { + warnln!("User-supplied string contains NUL characters"); + return Err(Error::InvalidArgument); + } + Ok(core::str::from_utf8(slice).unwrap()) +} + +pub(super) fn slice_ref<'a, T: Sized>(base: usize, count: usize) -> Result<&'a [T], Error> { + let proc = Thread::current(); + let ptr = base as *const T; + unsafe { ptr.validate_user_slice(count, proc.address_space()) } +} +pub(super) fn slice_mut<'a, T: Sized>(base: usize, count: usize) -> Result<&'a mut [T], Error> { + let proc = Thread::current(); + let ptr = base as *mut T; + unsafe { ptr.validate_user_slice_mut(count, proc.address_space()) } +} diff --git a/kernel/src/syscall/mod.rs b/kernel/src/syscall/mod.rs new file mode 100644 index 00000000..4ba2eb5c --- /dev/null +++ b/kernel/src/syscall/mod.rs @@ -0,0 +1,864 @@ +//! System function call handlers + +use abi::{error::Error, io::RawFd, SyscallFunction}; +use libk_util::sync::IrqSafeSpinlockGuard; +use vfs::NodeRef; + +use crate::{proc::io::ProcessIoImpl, task::process::ProcessImpl}; + +mod arg; + +fn run_with_io) -> T>( + proc: &ProcessImpl, + f: F, +) -> T { + let io = proc.io.lock(); + f(io) +} + +fn run_with_io_at< + T, + F: FnOnce(NodeRef, IrqSafeSpinlockGuard) -> Result, +>( + proc: &ProcessImpl, + at: Option, + f: F, +) -> Result { + let mut io = proc.io.lock(); + let at = at + .map(|fd| { + io.files + .file(fd) + .and_then(|f| f.node().ok_or(Error::InvalidFile).cloned()) + }) + .transpose()? + // at = None + .unwrap_or_else(|| io.ioctx_mut().cwd().clone()); + + f(at, io) +} + +mod impls { + pub(crate) use abi::{ + error::Error, + io::{ + DeviceRequest, DirectoryEntry, FileAttr, FileMetadataUpdate, FileMode, + MessageDestination, MountOptions, OpenOptions, PollControl, RawFd, + ReceivedMessageMetadata, SeekFrom, SentMessage, TerminalOptions, TerminalSize, + UnmountOptions, + }, + mem::MappingSource, + net::{SocketOption, SocketType}, + process::{ExitCode, MutexOperation, Signal, SignalEntryData, SpawnOptions}, + system::SystemInfo, + }; + use abi::{ + io::ChannelPublisherId, + process::{ExecveOptions, ProcessId, SpawnOption}, + }; + use alloc::{boxed::Box, sync::Arc}; + use libk::{block, runtime}; + use vfs::{File, IoContext, MessagePayload, Read, Seek, Write}; + use ygg_driver_net_core::socket::{RawSocket, TcpListener, TcpSocket, UdpSocket}; + + use core::{ + mem::MaybeUninit, net::SocketAddr, num::NonZeroUsize, sync::atomic::AtomicU32, + time::Duration, + }; + + use libk_mm::{ + phys, + process::VirtualRangeBacking, + table::{EntryLevelExt, MapAttributes}, + }; + use libk_thread::{ + process::{Process, ProcessManager}, + thread::Thread, + }; + + use crate::{ + arch::L3, + debug::LogLevel, + fs, + proc::{self, random}, + task::process::ProcessManagerImpl, + }; + + use super::{run_with_io, run_with_io_at}; + + // Misc + pub(crate) fn debug_trace(message: &str) { + let thread = Thread::current(); + let process = thread.process::(); + + log_print_raw!( + LogLevel::Debug, + "[{}:{}] TRACE: {}\n", + process.id(), + thread.id, + message + ); + } + + pub(crate) fn get_random(buffer: &mut [u8]) { + random::read(buffer); + } + + pub(crate) fn mount(options: &MountOptions<'_>) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let fs_root = fs::create_filesystem(options)?; + io.ioctx_mut().mount(options.target, fs_root)?; + Ok(()) + }) + } + + pub(crate) fn unmount(_options: &UnmountOptions) -> Result<(), Error> { + todo!() + } + + // Memory management + pub(crate) fn map_memory( + _hint: Option, + len: usize, + source: &MappingSource, + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + let space = thread.address_space(); + + let len = len.page_align_up::(); + + run_with_io(&process, |io| { + let backing = match source { + MappingSource::Anonymous => VirtualRangeBacking::anonymous(), + &MappingSource::File(fd, offset) => { + let file = io.files.file(fd)?; + VirtualRangeBacking::file(offset, file.clone())? + } + }; + + space.allocate( + None, + len, + backing, + MapAttributes::USER_WRITE | MapAttributes::USER_READ | MapAttributes::NON_GLOBAL, + ) + }) + } + + pub(crate) fn unmap_memory(address: usize, len: usize) -> Result<(), Error> { + let thread = Thread::current(); + let space = thread.address_space(); + + if len & 0xFFF != 0 { + todo!(); + } + + unsafe { + space.unmap(address, len)?; + } + + Ok(()) + } + + pub(crate) fn get_system_info(element: &mut SystemInfo) -> Result<(), Error> { + match element { + SystemInfo::MemoryStats(stats) => { + *stats = phys::stats(); + Ok(()) + } + } + } + + // Process/thread management + pub(crate) fn exit_process(code: ExitCode) -> ! { + let thread = Thread::current(); + thread.exit_process::(code) + } + + pub(crate) fn spawn_process(options: &SpawnOptions<'_>) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + // Setup a new process from the file + let (child_process, child_main) = proc::load_binary( + io.ioctx_mut(), + Some(Arc::downgrade(&process)), + options.program, + options.arguments, + options.environment, + )?; + let pid = child_process.id(); + + // Inherit group and session from the creator + child_process.inherit(&process)?; + + // Inherit root from the creator + // let child_ioctx = IoContext::new(io.ioctx().root().clone()); + let child_ioctx = IoContext::inherit(io.ioctx_mut()); + let mut child_io = child_process.io.lock(); + child_io.set_ioctx(child_ioctx); + + for opt in options.optional { + match opt { + &SpawnOption::InheritFile { source, child } => { + if let Ok(src_file) = io.files.file(source) { + child_io.files.set_file(child, src_file.clone())?; + } + } + &SpawnOption::SetProcessGroup(pgroup) => { + let pgroup = if pgroup.into_raw() == 0 { + child_process.id() + } else { + pgroup + }; + child_process.set_group_id(pgroup); + } + _ => (), + } + } + + if let Some(fd) = options.optional.iter().find_map(|item| { + if let &SpawnOption::GainTerminal(fd) = item { + Some(fd) + } else { + None + } + }) { + debugln!("{} requested terminal {:?}", pid, fd); + let file = child_io.files.file(fd)?; + // let node = file.node().ok_or(Error::InvalidFile)?; + let mut req = DeviceRequest::SetTerminalGroup(child_process.group_id().into()); + file.device_request(&mut req)?; + + if let Some(node) = file.node() { + child_process.set_session_terminal(node.clone()); + } + // node.device_request(&mut req)?; + } + + drop(child_io); + child_main.enqueue(); + + Ok(pid as _) + }) + } + + pub(crate) fn wait_process(pid: ProcessId, status: &mut ExitCode) -> Result<(), Error> { + let target = ProcessManagerImpl::get(pid).ok_or(Error::DoesNotExist)?; + *status = block!(target.wait_for_exit().await)?; + Ok(()) + } + + pub(crate) fn get_pid() -> ProcessId { + let thread = Thread::current(); + let process = thread.process::(); + process.id() + } + + pub(crate) fn nanosleep(duration: &Duration) { + block! { + runtime::sleep(*duration).await + } + .unwrap(); + } + + pub(crate) fn set_signal_entry(ip: usize, sp: usize) { + let thread = Thread::current(); + thread.set_signal_entry(ip, sp); + } + + pub(crate) fn send_signal(pid: ProcessId, signal: Signal) -> Result<(), Error> { + let target = ProcessManagerImpl::get(pid).ok_or(Error::DoesNotExist)?; + target.raise_signal(signal); + Ok(()) + } + + pub(crate) fn mutex(mutex: &AtomicU32, op: &MutexOperation) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + let mutex = process.get_or_insert_mutex((mutex as *const AtomicU32).addr()); + + match op { + &MutexOperation::Wait(value, _timeout) => block! { mutex.wait(value).await }.unwrap(), + MutexOperation::Wake => mutex.wake(), + MutexOperation::WakeAll => mutex.wake_all(), + } + + Ok(()) + } + + pub(crate) fn start_session() -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + let session_terminal = process.clear_session_terminal(); + + if let Some(ctty) = session_terminal { + // Drop all FDs referring to the old session terminal + run_with_io(&process, |mut io| { + io.files.retain(|_, f| { + f.node() + .map(|node| !Arc::ptr_eq(node, &ctty)) + .unwrap_or(true) + }); + }); + } + + process.set_session_id(process.id()); + process.set_group_id(process.id()); + + Ok(()) + } + + // I/O + pub(crate) fn open( + at: Option, + path: &str, + opts: OpenOptions, + mode: FileMode, + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |at, mut io| { + let file = io.ioctx_mut().open(Some(at), path, opts, mode)?; + + // TODO NO_CTTY? + if process.session_terminal().is_none() + && let Some(node) = file.node() + && node.is_terminal() + { + debugln!("Session terminal set for #{}: {}", process.id(), path); + process.set_session_terminal(node.clone()); + } + + let fd = io.files.place_file(file, true)?; + Ok(fd) + }) + } + + pub(crate) fn close(fd: RawFd) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let res = io.files.close_file(fd); + + if res == Err(Error::InvalidFile) { + warnln!("Double close of fd {:?} in process {}", fd, process.id()); + } + + res + }) + } + + pub(crate) fn write(fd: RawFd, buffer: &[u8]) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| io.files.file(fd)?.write(buffer)) + } + + pub(crate) fn read(fd: RawFd, buffer: &mut [u8]) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| io.files.file(fd)?.read(buffer)) + } + + pub(crate) fn seek(fd: RawFd, pos: SeekFrom) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| io.files.file(fd)?.seek(pos)) + } + + pub(crate) fn open_directory(at: Option, path: &str) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |at, mut io| { + let node = io.ioctx_mut().find(Some(at), path, true, true)?; + let access = io.ioctx_mut().check_access(vfs::Action::Read, &node)?; + let file = node.open_directory(access)?; + let fd = io.files.place_file(file, true)?; + + Ok(fd) + }) + } + + pub(crate) fn read_directory_entries( + fd: RawFd, + entries: &mut [MaybeUninit], + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| io.files.file(fd)?.read_dir(entries)) + } + + pub(crate) fn create_directory( + at: Option, + path: &str, + mode: FileMode, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |at, mut io| { + io.ioctx_mut().create_directory(Some(at), path, mode)?; + Ok(()) + }) + } + + pub(crate) fn remove_directory(_at: Option, _path: &str) -> Result<(), Error> { + todo!() + } + + pub(crate) fn remove(at: Option, path: &str) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |at, mut io| { + io.ioctx_mut().remove_file(Some(at), path)?; + Ok(()) + }) + } + + pub(crate) fn clone_fd(source_fd: RawFd, target_fd: Option) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = io.files.file(source_fd)?.clone(); + + let fd = match target_fd { + Some(target_fd) => { + io.files.set_file(target_fd, file)?; + target_fd + } + None => io.files.place_file(file, true)?, + }; + + Ok(fd) + }) + } + + pub(crate) fn update_metadata( + at: Option, + _path: &str, + _update: &FileMetadataUpdate, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |_at, _io| { + todo!(); + }) + } + + pub(crate) fn get_metadata( + at: Option, + path: &str, + buffer: &mut MaybeUninit, + follow: bool, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io_at(&process, at, |at, mut io| { + let node = if path.is_empty() { + at + // at.ok_or(Error::InvalidArgument)? + } else { + io.ioctx_mut().find(Some(at), path, follow, true)? + }; + + let metadata = node.metadata()?; + let size = node.size()?; + + buffer.write(FileAttr { + size, + ty: node.ty(), + mode: metadata.mode, + uid: metadata.uid, + gid: metadata.gid, + }); + + Ok(()) + }) + } + + pub(crate) fn device_request(fd: RawFd, req: &mut DeviceRequest) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| io.files.file(fd)?.device_request(req)) + } + + // Misc I/O + pub(crate) fn open_channel(name: &str, subscribe: bool) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = File::new_message_channel(name, subscribe); + let fd = io.files.place_file(file, true)?; + Ok(fd) + }) + } + + pub(crate) fn create_timer(repeat: bool) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = File::new_timer(repeat); + let fd = io.files.place_file(file, true)?; + Ok(fd) + }) + } + + pub(crate) fn create_pty( + options: &TerminalOptions, + size: &TerminalSize, + output: &mut [MaybeUninit; 2], + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let (master, slave) = File::new_pseudo_terminal(*options, *size)?; + let master_fd = io.files.place_file(master, true)?; + let slave_fd = io.files.place_file(slave, true)?; + + output[0].write(master_fd); + output[1].write(slave_fd); + + Ok(()) + }) + } + + pub(crate) fn create_shared_memory(size: usize) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = File::new_shared_memory(size)?; + let fd = io.files.place_file(file, true)?; + Ok(fd) + }) + } + + pub(crate) fn create_poll_channel() -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let poll = File::new_poll_channel(); + let fd = io.files.place_file(poll, true)?; + + Ok(fd) + }) + } + + pub(crate) fn create_pipe(ends: &mut [MaybeUninit; 2]) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let (read, write) = File::new_pipe_pair(256); + + let read_fd = io.files.place_file(read, true)?; + let write_fd = io.files.place_file(write, true)?; + + ends[0].write(read_fd); + ends[1].write(write_fd); + + Ok(()) + }) + } + + pub(crate) fn poll_channel_wait( + poll_fd: RawFd, + timeout: &Option, + output: &mut Option<(RawFd, Result<(), Error>)>, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let poll_file = io.files.file(poll_fd)?; + let poll = poll_file.as_poll_channel()?; + + *output = block! { + poll.wait(*timeout).await + }?; + + Ok(()) + }) + } + + pub(crate) fn poll_channel_control( + poll_fd: RawFd, + control: PollControl, + fd: RawFd, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let poll_file = io.files.file(poll_fd)?; + let poll = poll_file.as_poll_channel()?; + + match control { + PollControl::AddFd => { + let polled_file = io.files.file(fd)?.clone(); + poll.add(fd, polled_file); + } + } + + Ok(()) + }) + } + + pub(crate) fn send_message( + fd: RawFd, + message: &SentMessage<'_>, + destination: MessageDestination, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let file = io.files.file(fd)?; + let channel = file.as_message_channel()?; + + match message { + &SentMessage::File(fd) => { + let sent_file = io.files.file(fd)?; + + channel.send_message(MessagePayload::File(sent_file.clone()), destination)?; + } + &SentMessage::Data(data) => { + channel.send_message(MessagePayload::Data(Box::from(data)), destination)?; + } + } + + Ok(()) + }) + } + + pub(crate) fn receive_message( + fd: RawFd, + metadata: &mut MaybeUninit, + buf: &mut [u8], + from: &mut MaybeUninit, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = io.files.file(fd)?; + let channel = file.as_message_channel()?; + + let message = channel.receive_message()?; + + from.write(message.source); + + match &message.payload { + MessagePayload::Data(data) => { + // TODO allow truncated messages? + let len = data.len(); + if buf.len() < len { + return Err(Error::MissingData); + } + + metadata.write(ReceivedMessageMetadata::Data(len)); + buf[..len].copy_from_slice(data); + + Ok(()) + } + MessagePayload::File(file) => { + let fd = io.files.place_file(file.clone(), true)?; + + metadata.write(ReceivedMessageMetadata::File(fd)); + + Ok(()) + } + } + }) + } + + // Network + pub(crate) fn connect_socket( + socket_fd: Option, + remote: &SocketAddr, + ty: SocketType, + local_result: &mut MaybeUninit, + ) -> Result { + assert!(socket_fd.is_none()); + + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let (local, file) = match ty { + SocketType::TcpStream => { + let (local, socket) = TcpSocket::connect((*remote).into())?; + (local, File::from_stream_socket(socket)) + } + _ => return Err(Error::InvalidArgument), + }; + let fd = io.files.place_file(file, true)?; + local_result.write(local.into()); + Ok(fd) + }) + } + + pub(crate) fn bind_socket(listen: &SocketAddr, ty: SocketType) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = match ty { + SocketType::UdpPacket => { + File::from_packet_socket(UdpSocket::bind((*listen).into())?) + } + SocketType::RawPacket => File::from_packet_socket(RawSocket::bind()?), + SocketType::TcpStream => { + File::from_listener_socket(TcpListener::bind((*listen).into())?) + } + }; + let fd = io.files.place_file(file, true)?; + Ok(fd) + }) + } + + pub(crate) fn accept( + socket_fd: RawFd, + remote_result: &mut MaybeUninit, + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |mut io| { + let file = io.files.file(socket_fd)?; + let mut remote = MaybeUninit::uninit(); + let accepted_file = file.accept(&mut remote)?; + let accepted_fd = io.files.place_file(accepted_file, true)?; + unsafe { + remote_result.write(remote.assume_init().into()); + } + Ok(accepted_fd) + }) + } + + pub(crate) fn send_to( + socket_fd: RawFd, + buffer: &[u8], + recepient: &Option, + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let file = io.files.file(socket_fd)?; + + file.send_to(buffer, recepient.map(Into::into)) + }) + } + + pub(crate) fn receive_from( + socket_fd: RawFd, + buffer: &mut [u8], + remote_result: &mut MaybeUninit, + ) -> Result { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let file = io.files.file(socket_fd)?; + let mut remote = MaybeUninit::uninit(); + let len = file.receive_from(buffer, &mut remote)?; + remote_result.write(unsafe { remote.assume_init() }.into()); + Ok(len) + }) + } + + pub(crate) fn set_socket_option(socket_fd: RawFd, option: &SocketOption) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let file = io.files.file(socket_fd)?; + let socket = file.as_socket()?; + socket.set_option(option) + }) + } + + pub(crate) fn get_socket_option( + socket_fd: RawFd, + option: &mut SocketOption, + ) -> Result<(), Error> { + let thread = Thread::current(); + let process = thread.process::(); + + run_with_io(&process, |io| { + let file = io.files.file(socket_fd)?; + let socket = file.as_socket()?; + socket.get_option(option) + }) + } + + // Handled outside + pub(crate) fn exit_signal(_frame: &SignalEntryData) -> ! { + unreachable!() + } + + pub(crate) fn fork() -> Result { + unreachable!() + } + + pub(crate) fn execve(_options: &ExecveOptions<'_>) -> Result<(), Error> { + unreachable!() + } +} + +mod generated { + #![allow(unreachable_code)] + + use abi::{io::ChannelPublisherId, process::ProcessId, SyscallFunction}; + use abi_lib::SyscallRegister; + + use super::{ + arg, + impls::{self, *}, + }; + + include!(concat!(env!("OUT_DIR"), "/generated_dispatcher.rs")); +} + +/// Entrypoint for system calls that takes raw argument values +pub fn raw_syscall_handler(func: u64, args: &[u64]) -> u64 { + let Ok(func) = SyscallFunction::try_from(func as usize) else { + todo!("Undefined syscall: {}", func); + }; + let args = unsafe { core::mem::transmute(args) }; + + let result = generated::handle_syscall(func, args); + + let value = match result { + Ok(value) => value, + Err(e) => (-(e as u32 as isize)) as usize, + }; + + value as _ +} diff --git a/kernel/src/task/mod.rs b/kernel/src/task/mod.rs new file mode 100644 index 00000000..ccce3e8b --- /dev/null +++ b/kernel/src/task/mod.rs @@ -0,0 +1,80 @@ +//! Multitasking and process/thread management interfaces + +#![allow(dead_code)] + +use abi::error::Error; +use alloc::{string::String, vec::Vec}; +use kernel_arch::{ + task::{Scheduler, TaskContext, Termination}, + Architecture, ArchitectureImpl, +}; +use libk::{arch::Cpu, runtime}; +use libk_thread::{ + sched::{init_queues, CpuQueue}, + thread::Thread, + TaskContextImpl, +}; +use libk_util::sync::SpinFence; + +use self::process::ProcessManagerImpl; + +pub mod process; + +/// Creates a new kernel-space process to execute a closure and queues it to some CPU +pub fn spawn_kernel_closure, T: Termination, F: Fn() -> T + Send + 'static>( + name: S, + f: F, +) -> Result<(), Error> { + let thread = Thread::new_kthread( + name, + TaskContextImpl::kernel_closure(move || { + let result = f(); + Thread::current().exit::(result.into_exit_code()); + })?, + ); + thread.enqueue(); + + Ok(()) +} + +/// Sets up CPU queues and gives them some processes to run +pub fn init() -> Result<(), Error> { + let cpu_count = ArchitectureImpl::cpu_count(); + + // Create a queue for each CPU + init_queues(Vec::from_iter((0..cpu_count).map(CpuQueue::new))); + + // Spawn async workers + (0..cpu_count).for_each(|index| { + runtime::spawn_async_worker(index).unwrap(); + }); + + Ok(()) +} + +/// Sets up the local CPU queue and switches to some task in it for execution. +/// +/// # Note +/// +/// Any locks held at this point will not be dropped properly, which may lead to a deadlock. +/// +/// # Safety +/// +/// Only safe to call once at the end of non-threaded system initialization. +pub unsafe fn enter() -> ! { + static AP_CAN_ENTER: SpinFence = SpinFence::new(); + + let mut cpu = Cpu::local(); + + if cpu.id() != 0 { + // Wait until BSP allows us to enter + AP_CAN_ENTER.wait_one(); + } else { + AP_CAN_ENTER.signal(); + } + + let queue = CpuQueue::for_cpu(cpu.id() as usize); + cpu.set_scheduler(queue); + + queue.enter() +} diff --git a/kernel/src/task/process.rs b/kernel/src/task/process.rs new file mode 100644 index 00000000..d069fe5b --- /dev/null +++ b/kernel/src/task/process.rs @@ -0,0 +1,45 @@ +//! Process data structures + +use abi::process::{ProcessId, Signal}; +use alloc::{collections::BTreeMap, sync::Arc}; +use libk_thread::process::{Process, ProcessManager}; +use libk_util::sync::spin_rwlock::IrqSafeRwLock; + +use crate::proc::io::ProcessIoImpl; + +pub use libk_thread::{ + process::ProcessImage, + types::{ProcessTlsInfo, ProcessTlsLayout}, +}; + +/// Process manager implementation +pub struct ProcessManagerImpl; + +/// Alias type for [libk_thread::process::ProcessImpl] +pub type ProcessImpl = libk_thread::process::ProcessImpl; + +static PROCESSES: IrqSafeRwLock>> = + IrqSafeRwLock::new(BTreeMap::new()); + +impl ProcessManager for ProcessManagerImpl { + type Process = ProcessImpl; + + fn register_process(process: Arc) { + PROCESSES.write().insert(process.id(), process); + } + + fn get(id: ProcessId) -> Option> { + PROCESSES.read().get(&id).cloned() + } + + fn for_each)>(f: F) { + for (k, v) in PROCESSES.read().iter() { + f(*k, v); + } + } +} + +#[no_mangle] +fn __signal_process_group(group_id: ProcessId, signal: Signal) { + ProcessImpl::signal_group(group_id, signal) +} diff --git a/kernel/src/util/mod.rs b/kernel/src/util/mod.rs new file mode 100644 index 00000000..11d685cb --- /dev/null +++ b/kernel/src/util/mod.rs @@ -0,0 +1,30 @@ +//! Various kernel utility functions + +/// Extension trait for [Iterator]s of [Result]s +pub trait ResultIterator { + /// Drops entries from the iterator until the first error + fn collect_error(self) -> Option; +} + +impl>> ResultIterator for I { + fn collect_error(self) -> Option { + for item in self { + if let Err(e) = item { + return Some(e); + } + } + None + } +} + +/// Returns the architecture name string +pub const fn arch_str() -> &'static str { + #[cfg(target_arch = "aarch64")] + { + "aarch64" + } + #[cfg(target_arch = "x86_64")] + { + "x86_64" + } +} diff --git a/kernel/tools/gentables/Cargo.toml b/kernel/tools/gentables/Cargo.toml new file mode 100644 index 00000000..6598260b --- /dev/null +++ b/kernel/tools/gentables/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "gentables" +version = "0.1.0" +edition = "2021" +authors = ["Mark Poliakov "] + +# See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html + +[dependencies] +memtables = { path = "../../lib/memtables", features = ["all"] } + +bytemuck = "1.14.0" +elf = "0.7.2" +thiserror = "1.0.48" +clap = { version = "4.4.2", features = ["derive"] } +bitflags = "2.4.0" diff --git a/kernel/tools/gentables/src/aarch64.rs b/kernel/tools/gentables/src/aarch64.rs new file mode 100644 index 00000000..624b61e8 --- /dev/null +++ b/kernel/tools/gentables/src/aarch64.rs @@ -0,0 +1,193 @@ +use core::fmt; +use std::{ + io::{Read, Seek}, + mem::offset_of, +}; + +use bitflags::bitflags; +use elf::{ + abi::{PF_W, PF_X, PT_LOAD}, + endian::AnyEndian, + ElfStream, +}; +use memtables::aarch64::{FixedTables, KERNEL_L3_COUNT}; + +use crate::{GenData, GenError}; + +bitflags! { + #[derive(Clone, Copy)] + struct PageFlags: u64 { + const PRESENT = 1 << 0; + const ACCESS = 1 << 10; + const SH_INNER = 3 << 8; + const PAGE_ATTR_NORMAL = 0 << 2; + const AP_BOTH_READONLY = 3 << 6; + const TABLE = 1 << 1; + const PAGE = 1 << 1; + const UXN = 1 << 54; + const PXN = 1 << 53; + } +} + +impl PageFlags { + pub fn kernel_table() -> Self { + Self::PRESENT | Self::ACCESS | Self::SH_INNER | Self::PAGE_ATTR_NORMAL | Self::TABLE + } + + pub fn kernel_page() -> Self { + Self::PRESENT | Self::ACCESS | Self::SH_INNER | Self::PAGE_ATTR_NORMAL | Self::PAGE + } +} + +pub struct AArch64Builder { + elf: ElfStream, + data: GenData, + tables: FixedTables, + + l1i: usize, + l2i_start: usize, + l2i_end: usize, +} + +impl PageFlags { + fn from_elf(flags: u32) -> Self { + let mut out = Self::UXN | Self::PXN; + if flags & PF_X != 0 { + out.remove(Self::PXN); + } + if flags & PF_W == 0 { + out |= Self::AP_BOTH_READONLY; + } + out + } +} + +impl fmt::Display for PageFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let x = if self.contains(Self::PXN) { '-' } else { 'x' }; + let w = if self.contains(Self::AP_BOTH_READONLY) { + '-' + } else { + 'w' + }; + write!(f, "r{}{}", w, x) + } +} + +const L1_SHIFT: u64 = 30; +const L2_SHIFT: u64 = 21; +const L3_SHIFT: u64 = 12; +const L2_ENTRY_SIZE: u64 = 1 << L2_SHIFT; +const L3_ENTRY_SIZE: u64 = 1 << L3_SHIFT; + +// TODO proper granularity +impl AArch64Builder { + pub fn new(elf: ElfStream, data: GenData) -> Result { + let l1i = (data.kernel_start >> L1_SHIFT) as usize & 0x1FF; + let l2i_start = (data.kernel_start >> L2_SHIFT) as usize & 0x1FF; + let l2i_end = ((data.kernel_end + L2_ENTRY_SIZE - 1) >> L2_SHIFT) as usize & 0x1FF; + + if l2i_end - l2i_start > KERNEL_L3_COUNT { + todo!() + } + + Ok(Self { + elf, + data, + + tables: FixedTables::zeroed(), + + l1i, + l2i_start, + l2i_end, + }) + } + + // TODO the build function is almost identical to x86-64 one, but with slight changes, so might + // wanna unify this later + pub fn build(mut self) -> Result<(FixedTables, u64), GenError> { + assert_eq!(offset_of!(FixedTables, l1), 0); + + let l2_physical_address = + self.data.table_physical_address + offset_of!(FixedTables, l2) as u64; + + // L1 -> L2 + self.tables.l1.data[self.l1i] = l2_physical_address | PageFlags::kernel_table().bits(); + + // L2 -> L3s + for l2i in self.l2i_start..self.l2i_end { + let l3_table_index = l2i - self.l2i_start; + let l3_physical_address = self.data.table_physical_address + + (offset_of!(FixedTables, l3s) + 0x1000 * l3_table_index) as u64; + + self.tables.l2.data[l2i] = l3_physical_address | PageFlags::kernel_table().bits(); + } + + for (i, segment) in self.elf.segments().into_iter().enumerate() { + if segment.p_type != PT_LOAD + || segment.p_vaddr != segment.p_paddr + self.data.kernel_virt_offset + { + continue; + } + + let aligned_virt_start = segment.p_vaddr & !(L3_ENTRY_SIZE - 1); + let aligned_virt_end = + (segment.p_vaddr + segment.p_memsz + L3_ENTRY_SIZE - 1) & !(L3_ENTRY_SIZE - 1); + let aligned_phys_start = segment.p_paddr & !(L3_ENTRY_SIZE - 1); + let count = (aligned_virt_end - aligned_virt_start) / 0x1000; + + let flags = PageFlags::from_elf(segment.p_flags); + + println!( + "{}: {:#x?} -> {:#x} {}", + i, + aligned_virt_start..aligned_virt_end, + aligned_phys_start, + flags + ); + Self::map_segment( + self.l2i_start, + &mut self.tables, + aligned_virt_start, + aligned_phys_start, + count as usize, + flags, + )?; + } + + Ok((self.tables, self.data.table_offset)) + } + + fn map_segment( + start_l2i: usize, + tables: &mut FixedTables, + vaddr_start: u64, + paddr_start: u64, + count: usize, + flags: PageFlags, + ) -> Result<(), GenError> { + for index in 0..count { + let vaddr = vaddr_start + index as u64 * L3_ENTRY_SIZE; + let paddr = paddr_start + index as u64 * L3_ENTRY_SIZE; + + let entry = paddr | (PageFlags::kernel_page() | flags).bits(); + + let l2i = (vaddr >> L2_SHIFT) as usize & 0x1FF - start_l2i; + let l3i = (vaddr >> L3_SHIFT) as usize & 0x1FF; + + let l3 = &mut tables.l3s[l2i]; + + if l3.data[l3i] != 0 { + if l3.data[l3i] != entry { + todo!(); + } else { + continue; + } + } + + l3.data[l3i] = entry; + } + + Ok(()) + } +} diff --git a/kernel/tools/gentables/src/main.rs b/kernel/tools/gentables/src/main.rs new file mode 100644 index 00000000..3ec366ff --- /dev/null +++ b/kernel/tools/gentables/src/main.rs @@ -0,0 +1,201 @@ +use std::{ + fs::OpenOptions, + io::{Read, Seek, SeekFrom, Write}, + ops::Range, + path::{Path, PathBuf}, + process::ExitCode, +}; + +use clap::Parser; +use elf::{ + abi::{EM_AARCH64, EM_X86_64, PT_LOAD}, + endian::AnyEndian, + ElfStream, +}; +use memtables::any::AnyTables; +use thiserror::Error; + +use crate::{aarch64::AArch64Builder, x86_64::X8664Builder}; + +mod aarch64; +mod x86_64; + +#[derive(Error, Debug)] +pub enum GenError { + #[error("I/O error: {0}")] + IoError(#[from] std::io::Error), + #[error("ELF parse error: {0}")] + ElfParseError(#[from] elf::ParseError), + + #[error("Image's arhitecture is not supported")] + UnsupportedArchitecture, + #[error("Could not determine the kernel image address range (possibly incorrect segments?)")] + NoKernelImageRange, + #[error("Kernel image is too large: {0:#x?} ({1}B). Maximum size: {2}B")] + KernelTooLarge(Range, u64, u64), + #[error("Kernel image is missing a required symbol: {0:?}")] + MissingSymbol(&'static str), + #[error("Kernel image is missing a required section: {0:?}")] + MissingSection(&'static str), + #[error("Incorrect tables section placement: {0:#x}")] + IncorrectTablesPlacement(u64), +} + +#[derive(Parser)] +struct Args { + image: PathBuf, +} + +pub struct GenData { + pub kernel_start: u64, + pub kernel_end: u64, + + pub table_offset: u64, + pub table_physical_address: u64, + pub kernel_virt_offset: u64, +} + +fn kernel_image_range( + elf: &mut ElfStream, + kernel_virt_offset: u64, +) -> Result<(u64, u64), GenError> { + let mut start = u64::MAX; + let mut end = u64::MIN; + + for segment in elf.segments() { + if segment.p_type != PT_LOAD || segment.p_vaddr != segment.p_paddr + kernel_virt_offset { + continue; + } + + let aligned_start = segment.p_vaddr & !0xFFF; + let aligned_end = (segment.p_vaddr + segment.p_memsz + 0xFFF) & !0xFFF; + + if aligned_end > end { + end = aligned_end; + } + + if aligned_start < start { + start = aligned_start; + } + } + + if start < end { + Ok((start, end)) + } else { + Err(GenError::NoKernelImageRange) + } +} + +fn kernel_virt_offset(elf: &mut ElfStream) -> Result { + let (symtab, symstrtab) = elf + .symbol_table()? + .ok_or_else(|| GenError::MissingSection(".symtab"))?; + + for sym in symtab { + let name = symstrtab.get(sym.st_name as _)?; + + if name == "KERNEL_VIRT_OFFSET" { + // TODO symbol checks + return Ok(sym.st_value); + } + } + + Err(GenError::MissingSymbol("KERNEL_VIRT_OFFSET")) +} + +fn find_tables(elf: &mut ElfStream) -> Result<(u64, u64), GenError> { + let (shdrs, strtab) = elf.section_headers_with_strtab()?; + let strtab = strtab.ok_or_else(|| GenError::MissingSection(".strtab"))?; + + for shdr in shdrs { + let name = strtab.get(shdr.sh_name as _)?; + + if name == ".data.tables" { + // TODO section checks + return Ok((shdr.sh_offset, shdr.sh_addr)); + } + } + + Err(GenError::MissingSection(".data.tables")) +} + +fn into_any, U>((l, r): (T, U)) -> (AnyTables, U) { + (l.into(), r) +} + +fn build_tables(file: F) -> Result<(AnyTables, u64), GenError> { + let mut elf = ElfStream::::open_stream(file)?; + + let kernel_virt_offset = kernel_virt_offset(&mut elf)?; + let (kernel_start, kernel_end) = kernel_image_range(&mut elf, kernel_virt_offset)?; + let (table_offset, table_virt_addr) = find_tables(&mut elf)?; + let table_physical_address = table_virt_addr + .checked_sub(kernel_virt_offset) + .ok_or_else(|| GenError::IncorrectTablesPlacement(table_virt_addr))?; + + println!("Kernel image range: {:#x?}", kernel_start..kernel_end); + println!("KERNEL_VIRT_OFFSET = {:#x}", kernel_virt_offset); + + match elf.ehdr.e_machine { + EM_X86_64 => X8664Builder::new( + elf, + GenData { + kernel_virt_offset, + kernel_start, + kernel_end, + table_offset, + table_physical_address, + }, + )? + .build() + .map(into_any), + EM_AARCH64 => AArch64Builder::new( + elf, + GenData { + kernel_virt_offset, + kernel_start, + kernel_end, + table_offset, + table_physical_address, + }, + )? + .build() + .map(into_any), + _ => todo!(), + } +} + +fn write_tables( + mut file: F, + offset: u64, + tables: AnyTables, +) -> Result<(), GenError> { + file.seek(SeekFrom::Start(offset))?; + file.write_all(tables.as_bytes())?; + Ok(()) +} + +fn gentables>(image: P) -> Result<(), GenError> { + let mut file = OpenOptions::new() + .read(true) + .write(true) + .truncate(false) + .open(image)?; + + let (tables, file_offset) = build_tables(&mut file)?; + write_tables(file, file_offset, tables)?; + + Ok(()) +} + +fn main() -> ExitCode { + let args = Args::parse(); + + match gentables(&args.image) { + Ok(()) => ExitCode::SUCCESS, + Err(err) => { + eprintln!("{}: {}", args.image.display(), err); + ExitCode::FAILURE + } + } +} diff --git a/kernel/tools/gentables/src/x86_64.rs b/kernel/tools/gentables/src/x86_64.rs new file mode 100644 index 00000000..d7b12f5c --- /dev/null +++ b/kernel/tools/gentables/src/x86_64.rs @@ -0,0 +1,191 @@ +use core::fmt; +use std::{ + io::{Read, Seek}, + mem::offset_of, +}; + +use bitflags::bitflags; +use elf::{abi::PT_LOAD, endian::AnyEndian, ElfStream}; +use memtables::x86_64::{FixedTables, KERNEL_L3_COUNT}; + +use crate::{GenData, GenError}; + +bitflags! { + #[derive(Clone, Copy)] + struct PageFlags: u64 { + const PRESENT = 1 << 0; + const WRITABLE = 1 << 1; + // const NX = 1 << 63; + } +} + +pub struct X8664Builder { + elf: ElfStream, + data: GenData, + tables: FixedTables, + + l0i: usize, + l1i: usize, + start_l2i: usize, + end_l2i: usize, +} + +impl PageFlags { + fn from_elf(_flags: u32) -> Self { + let mut out = Self::empty(); + // if flags & PF_W != 0 { + out |= Self::WRITABLE; + // } + // if flags & PF_X == 0 { + // // out |= Self::NX; + // } + out + } +} + +impl fmt::Display for PageFlags { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!( + f, + "r{}{}", + if self.contains(Self::WRITABLE) { + 'w' + } else { + '-' + }, + 'x' // if self.contains(Self::NX) { '-' } else { 'x' } + ) + } +} + +impl X8664Builder { + pub fn new(elf: ElfStream, data: GenData) -> Result { + let l2_aligned_start = data.kernel_start & !0x1FFFFF; + let l2_aligned_end = (data.kernel_end + 0x1FFFFF) & !0x1FFFFF; + + if l2_aligned_end <= l2_aligned_start { + todo!(); + } + + if (l2_aligned_end - l2_aligned_start) as usize >= KERNEL_L3_COUNT * 0x200000 { + return Err(GenError::KernelTooLarge( + l2_aligned_start..l2_aligned_end, + l2_aligned_end - l2_aligned_start, + (KERNEL_L3_COUNT * 0x20000) as u64, + )); + } + + let l0i = (data.kernel_start >> 39) as usize & 0x1FF; + let l1i = (data.kernel_start >> 30) as usize & 0x1FF; + let start_l2i = (l2_aligned_start >> 21) as usize & 0x1FF; + let end_l2i = (l2_aligned_end >> 21) as usize & 0x1FF; + + Ok(Self { + elf, + data, + tables: FixedTables::zeroed(), + + l0i, + l1i, + start_l2i, + end_l2i, + }) + } + + pub fn build(mut self) -> Result<(FixedTables, u64), GenError> { + assert_eq!(offset_of!(FixedTables, l0), 0); + let l1_physical_address = + self.data.table_physical_address + offset_of!(FixedTables, kernel_l1) as u64; + let l2_physical_address = + self.data.table_physical_address + offset_of!(FixedTables, kernel_l2) as u64; + + // L0 -> L1 + self.tables.l0.data[self.l0i] = + l1_physical_address | (PageFlags::PRESENT | PageFlags::WRITABLE).bits(); + + // L1 -> L2 + self.tables.kernel_l1.data[self.l1i] = + l2_physical_address | (PageFlags::PRESENT | PageFlags::WRITABLE).bits(); + + // L2 -> L3s + for l2i in self.start_l2i..self.end_l2i { + let l3_table_index = l2i - self.start_l2i; + let l3_physical_address = self.data.table_physical_address + + (offset_of!(FixedTables, kernel_l3s) + 0x1000 * l3_table_index) as u64; + + self.tables.kernel_l2.data[l2i] = + l3_physical_address | (PageFlags::PRESENT | PageFlags::WRITABLE).bits(); + } + + for (i, segment) in self.elf.segments().into_iter().enumerate() { + if segment.p_type != PT_LOAD + || segment.p_vaddr != segment.p_paddr + self.data.kernel_virt_offset + { + continue; + } + + let aligned_virt_start = segment.p_vaddr & !0xFFF; + let aligned_virt_end = (segment.p_vaddr + segment.p_memsz + 0xFFF) & !0xFFF; + let aligned_phys_start = segment.p_paddr & !0xFFF; + let count = (aligned_virt_end - aligned_virt_start) / 0x1000; + + let flags = PageFlags::from_elf(segment.p_flags); + + println!( + "{}: {:#x?} -> {:#x} {}", + i, + aligned_virt_start..aligned_virt_end, + aligned_phys_start, + flags + ); + + Self::map_segment( + self.start_l2i, + &mut self.tables, + aligned_virt_start, + aligned_phys_start, + count as usize, + flags, + )?; + } + + Ok((self.tables, self.data.table_offset)) + } + + fn map_segment( + l2i_offset: usize, + tables: &mut FixedTables, + vaddr: u64, + paddr: u64, + count: usize, + flags: PageFlags, + ) -> Result<(), GenError> { + for index in 0..count { + let address = vaddr + index as u64 * 0x1000; + let page = paddr + index as u64 * 0x1000; + + let entry = page | (PageFlags::PRESENT | flags).bits(); + + let l2i = ((address >> 21) as usize & 0x1FF) - l2i_offset; + let l3i = (address >> 12) as usize & 0x1FF; + + let l3 = &mut tables.kernel_l3s[l2i]; + + if l3.data[l3i] != 0 { + if l3.data[l3i] != entry { + eprintln!( + "{:#x} is already mapped to {:#x}, tried to map to {:#x}", + address, l3.data[l3i], page + ); + todo!(); + } else { + continue; + } + } + + l3.data[l3i] = entry; + } + + Ok(()) + } +}