aarch64: reenable psci, add spin-table smp init

This commit is contained in:
Mark Poliakov 2024-12-16 14:24:46 +02:00
parent dfae656833
commit 699816d61d
10 changed files with 303 additions and 137 deletions

View File

@ -512,6 +512,11 @@ pub fn setup_memory_attributes() {
);
}
/// Enables data cache.
///
/// # Safety
///
/// Manipulates low-level machine state, use with care.
pub unsafe fn enable_dcache() {
barrier::dsb(barrier::ISHST);
barrier::isb(barrier::SY);
@ -522,6 +527,11 @@ pub unsafe fn enable_dcache() {
barrier::isb(barrier::SY);
}
/// Enables instruction cache.
///
/// # Safety
///
/// Manipulates low-level machine state, use with care.
pub unsafe fn enable_icache() {
barrier::isb(barrier::SY);
@ -531,6 +541,11 @@ pub unsafe fn enable_icache() {
barrier::isb(barrier::SY);
}
/// Disables instruction cache.
///
/// # Safety
///
/// Manipulates low-level machine state, use with care. Might break some instructions.
pub unsafe fn disable_icache() {
barrier::isb(barrier::SY);

View File

@ -1,5 +1,6 @@
#![cfg_attr(any(not(test), rust_analyzer), no_std)]
#![feature(trait_alias, let_chains, decl_macro)]
#![allow(clippy::type_complexity)]
extern crate alloc;

View File

@ -34,6 +34,11 @@ impl<'a> DeviceTree<'a> {
Self { tree, index }
}
/// Constructs a device tree representation from raw FDT address + index buffer.
///
/// # Safety
///
/// Accepts a raw address, unsafe.
pub unsafe fn from_raw_with_index(address: usize, index: &'a mut [u8]) -> Result<Self, Error> {
let tree = DevTree::from_raw_pointer(ptr::with_exposed_provenance(address))
.map_err(|_| Error::InvalidArgument)?;
@ -41,6 +46,11 @@ impl<'a> DeviceTree<'a> {
Ok(Self { tree, index })
}
/// Constructs a device tree representation from raw FDT address + a **static** index buffer.
///
/// # Safety
///
/// Accepts a raw address, uses a statically mutable index buffer, unsafe.
pub unsafe fn from_raw(address: usize) -> Result<Self, Error> {
static mut BUFFER: FdtIndexBuffer = FdtIndexBuffer([0; FDT_INDEX_BUFFER_SIZE]);
#[allow(static_mut_refs)]

View File

@ -21,11 +21,12 @@ use yggdrasil_abi::error::Error;
use crate::task::{process::Process, thread::Thread};
const MAX_DEBUG_SINKS: usize = 4;
const MAX_DEBUG_SINKS: usize = 8;
const RING_LOGGER_CAPACITY: usize = 65536;
static RING_AVAILABLE: AtomicBool = AtomicBool::new(false);
static SERIAL_SINK_SET_UP: AtomicBool = AtomicBool::new(false);
static DEBUG_LOCK: IrqSafeSpinlock<()> = IrqSafeSpinlock::new(());
struct KernelLoggerSink;
/// Locking log sink for dumping panic info
@ -184,6 +185,7 @@ impl log::Log for KernelLoggerSink {
return;
}
let _guard = DEBUG_LOCK.lock();
if RING_AVAILABLE.load(Ordering::Acquire) {
RING_LOGGER_SINK.log(record);
}

View File

@ -19,49 +19,8 @@
movk \reg, #:abs_g0_nc:\sym
.endm
.global __aarch64_entry
.global __aarch64_ap_entry
.section .text.entry
__aarch64_entry:
// x0 -- dtb_phys
//////////////////////////////
// Check CPU index //
//////////////////////////////
mrs x1, mpidr_el1
ands x1, x1, #3
bne .spin_wait
//////////////////////////////
// Setup stack //
//////////////////////////////
MOV_ABS x1, {stack_bottom} + {stack_size} - {kernel_virt_offset}
mov sp, x1
//////////////////////////////
// Check CurrentEL //
//////////////////////////////
mrs x1, CurrentEL
lsr x1, x1, #2
and x1, x1, #2
MOV_ABS x2, .el_table
ldr x2, [x2, x1, lsl #3]
br x2
// TODO code for leaving EL3
.el3:
b .
.el2:
//////////////////////////////
// Leave EL2 //
//////////////////////////////
// Setup stack for EL1
MOV_ABS x1, {stack_bottom} + {stack_size} - {kernel_virt_offset}
msr sp_el1, x1
// clobbers: x1
.macro LEAVE_EL2 destination
// Setup EL1 physical timer
mrs x1, cnthctl_el2
orr x1, x1, #(1 << 0) // EL0PCTEN = 1
@ -81,11 +40,48 @@ __aarch64_entry:
// Return to EL1
mov x1, #0x3C5
msr spsr_el2, x1
adr x1, .el1
adr x1, \destination
msr elr_el2, x1
eret
.endm
.el1:
.global __aarch64_entry
.global __aarch64_ap_entry
.global __aarch64_ap_spin_table_entry
.section .text.entry
__aarch64_entry:
// x0 -- dtb_phys
//////////////////////////////
// Check CPU index //
//////////////////////////////
mrs x1, mpidr_el1
ands x1, x1, #3
bne .spin_wait
//////////////////////////////
// Check CurrentEL //
//////////////////////////////
mrs x1, CurrentEL
lsr x1, x1, #2
and x1, x1, #3
MOV_ABS x2, .bsp_el_table
ldr x2, [x2, x1, lsl #3]
br x2
// TODO code for leaving EL3
.bsp_el3:
b .
.bsp_el2:
//////////////////////////////
// Leave EL2 //
//////////////////////////////
LEAVE_EL2 .bsp_el1
.bsp_el1:
//////////////////////////////
// Setup EL1 for Rust entry //
//////////////////////////////
@ -111,7 +107,7 @@ __aarch64_entry:
b .
// EL0 impossible
.el0:
.bsp_el0:
udf #0
.spin_wait:
@ -121,12 +117,71 @@ __aarch64_entry:
b .spin_wait
.p2align 3
.el_table:
.dword .el0
.dword .el1
.dword .el2
.dword .el3
.bsp_el_table:
.dword .bsp_el0
.dword .bsp_el1
.dword .bsp_el2
.dword .bsp_el3
//////////////////////////////
// AP startup code //
//////////////////////////////
.section .text
__aarch64_ap_spin_table_entry:
// Spin-table entry, expected state:
// * MMU is not yet enabled
// * In lower half
// * x0 is uninitialized
MOV_ABS x1, {spin_table_stack} - {kernel_virt_offset}
// Atomic load-acquire the stack address
ldar x0, [x1]
dsb ish
isb sy
// Fall through to __aarch64_ap_entry
__aarch64_ap_entry:
// Application processsor entry, expected state:
// * MMU is not yet enabled
// * In lower half
// * x0 -- physical stack pointer
mrs x1, CurrentEL
lsr x1, x1, #2
and x1, x1, #3
adr x2, .el_table_ap
ldr x2, [x2, x1, lsl #3]
br x2
b .
// TODO code to leave EL3 on AP
.ap_el3:
b .
.ap_el2:
LEAVE_EL2 .ap_el1
.ap_el1:
dsb ish
isb sy
mov sp, x0
MOV_ABS x0, {ap_el1_entry} - {kernel_virt_offset}
blr x0
b .
// EL0 impossible
.ap_el0:
udf #0
.p2align 3
.el_table_ap:
.dword .ap_el0 - {kernel_virt_offset}
.dword .ap_el1 - {kernel_virt_offset}
.dword .ap_el2 - {kernel_virt_offset}
.dword .ap_el3 - {kernel_virt_offset}

View File

@ -1,18 +1,30 @@
//! AArch64 boot and entry implementation
use core::arch::global_asm;
use core::{
arch::global_asm,
sync::atomic::{AtomicUsize, Ordering},
};
use aarch64_cpu::{
asm::barrier,
registers::{CPACR_EL1, ID_AA64MMFR0_EL1, SCTLR_EL1, TCR_EL1, TTBR0_EL1},
};
use kernel_arch::{absolute_address, Architecture, ArchitectureImpl};
use kernel_arch_aarch64::mem;
use kernel_arch_aarch64::{
mem::{self, table::L3},
CPU_COUNT,
};
use libk::{devfs, task::runtime};
use libk_mm::address::PhysicalAddress;
use libk_mm::{
address::{PhysicalAddress, Virtualize},
phys,
table::EntryLevel,
};
use tock_registers::interfaces::{ReadWriteable, Readable, Writeable};
use super::{exception, BootStack, PLATFORM};
use crate::{arch::aarch64::BOOT_STACK_SIZE, kernel_main, mem::KERNEL_VIRT_OFFSET};
use crate::{
arch::aarch64::BOOT_STACK_SIZE, kernel_main, kernel_secondary_main, mem::KERNEL_VIRT_OFFSET,
};
unsafe fn check_mmu_features() {
if ID_AA64MMFR0_EL1.matches_all(ID_AA64MMFR0_EL1::TGran4::NotSupported) {
@ -97,8 +109,7 @@ unsafe extern "C" fn __aarch64_bsp_upper_entry(dtb: PhysicalAddress) -> ! {
// Remove the "lower-half" mapping, no longer needed
TTBR0_EL1.set(0);
barrier::dsb(barrier::ISH);
barrier::isb(barrier::SY);
mem::tlb_flush_all();
// Setup the "runtime" part of the kernel tables
if PLATFORM.init_memory_management(dtb).is_err() {
@ -124,53 +135,63 @@ unsafe extern "C" fn __aarch64_bsp_upper_entry(dtb: PhysicalAddress) -> ! {
// TODO re-implement for Pi 4B
unsafe extern "C" fn __aarch64_el1_ap_lower_entry() -> ! {
ArchitectureImpl::halt();
// const AP_STACK_PAGES: usize = 8;
// ArchitectureImpl::set_interrupt_mask(true);
const AP_STACK_PAGES: usize = 8;
ArchitectureImpl::set_interrupt_mask(true);
// // Unmask FP operations
// CPACR_EL1.modify(CPACR_EL1::FPEN::TrapNothing);
// Unmask FP operations
CPACR_EL1.modify(CPACR_EL1::FPEN::TrapNothing);
// pre_init_mmu();
// kernel_arch_aarch64::mem::load_fixed_tables();
// enable_mmu();
check_mmu_features();
pre_init_mmu();
kernel_arch_aarch64::mem::load_fixed_tables();
enable_mmu();
// let stack_pages = phys::alloc_pages_contiguous(AP_STACK_PAGES).unwrap();
// let stack_base = stack_pages.virtualize();
// let sp = stack_base + L3::SIZE * AP_STACK_PAGES;
let stack_pages = phys::alloc_pages_contiguous(AP_STACK_PAGES).unwrap();
let stack_base = stack_pages.virtualize();
let sp = stack_base + L3::SIZE * AP_STACK_PAGES;
// let elr = absolute_address!(__aarch64_ap_upper_entry);
let elr = absolute_address!(__aarch64_ap_upper_entry);
// enter_higher_half(sp, elr, 0);
barrier::dsb(barrier::ISH);
barrier::isb(barrier::SY);
enter_higher_half(sp, elr, 0);
}
extern "C" fn __aarch64_ap_upper_entry() -> ! {
ArchitectureImpl::halt();
// barrier::dmb(barrier::ISH);
// barrier::isb(barrier::SY);
// Flush lower half
TTBR0_EL1.set(0);
// let cpu_id = CPU_COUNT.fetch_add(1, Ordering::SeqCst);
// aarch64_cpu::asm::sev();
mem::tlb_flush_all();
// log::info!("cpu{} initializing", cpu_id);
let cpu_id = CPU_COUNT.fetch_add(1, Ordering::SeqCst);
barrier::dsb(barrier::ISHST);
barrier::isb(barrier::SY);
aarch64_cpu::asm::sev();
barrier::isb(barrier::SY);
// exception::init_exceptions();
log::info!("cpu{} initializing", cpu_id);
// unsafe {
// PLATFORM
// .init_platform(false)
// .expect("Could not initialize the AP");
// }
exception::init_exceptions();
// kernel_secondary_main()
unsafe {
PLATFORM
.init_platform(false)
.expect("Could not initialize the AP");
}
kernel_secondary_main()
}
#[link_section = ".bss"]
static BSP_STACK: BootStack<BOOT_STACK_SIZE> = BootStack::zeroed();
pub(crate) static SPIN_TABLE_STACK: AtomicUsize = AtomicUsize::new(0);
global_asm!(
include_str!("entry.S"),
bsp_el1_entry = sym __aarch64_el1_bsp_lower_entry,
ap_el1_entry = sym __aarch64_el1_ap_lower_entry,
spin_table_stack = sym SPIN_TABLE_STACK,
stack_bottom = sym BSP_STACK,
kernel_virt_offset = const KERNEL_VIRT_OFFSET,
stack_size = const BOOT_STACK_SIZE

View File

@ -10,7 +10,6 @@ use device_api::{
ResetDevice,
};
use device_tree::{driver::unflatten_device_tree, DeviceTree, DeviceTreeNodeExt};
use kernel_arch::Architecture;
use kernel_arch_aarch64::{
mem::{
self,
@ -32,7 +31,7 @@ use ygg_driver_pci::PciBusManager;
use crate::{
// device::power::arm_psci::Psci,
device::MACHINE_NAME,
device::{power::arm_psci::Psci, MACHINE_NAME},
fs::{Initrd, INITRD_DATA},
util::call_init_array,
};
@ -60,9 +59,10 @@ pub struct AArch64 {
dt: OneTimeInit<DeviceTree<'static>>,
/// Optional instance of PSCI on this platform
// pub psci: OneTimeInit<Arc<Psci>>,
pub psci: OneTimeInit<Arc<Psci>>,
reset: OneTimeInit<Arc<dyn ResetDevice>>,
initrd: OneTimeInit<PhysicalRef<'static, [u8]>>,
machine_compatible: OneTimeInit<&'static str>,
}
impl<const SIZE: usize> BootStack<SIZE> {
@ -81,6 +81,13 @@ impl Platform for AArch64 {
type L3 = L3;
unsafe fn start_application_processors(&self) {
if let Some(compatible) = self.machine_compatible.try_get() {
if *compatible == "raspberrypi,4-model-b" {
log::warn!("raspi4b: cache workaround disable SMP");
return;
}
}
let dt = self.dt.get();
if let Err(error) = smp::start_ap_cores(dt) {
log::error!("Could not initialize AP CPUs: {:?}", error);
@ -93,13 +100,12 @@ impl Platform for AArch64 {
}
unsafe fn reset(&self) -> ! {
ArchitectureImpl::halt();
// if let Some(reset) = self.reset.try_get() {
// reset.reset()
// } else {
// let psci = self.psci.get();
// psci.reset()
// }
if let Some(reset) = self.reset.try_get() {
reset.reset()
} else {
let psci = self.psci.get();
psci.reset()
}
}
}
@ -281,6 +287,7 @@ impl AArch64 {
let (machine_compatible, machine_name) = Self::machine_name(dt);
if let Some(compatible) = machine_compatible {
self.machine_compatible.init(compatible);
Self::apply_machine_workarounds(compatible);
}
@ -344,6 +351,7 @@ pub static PLATFORM: AArch64 = AArch64 {
dt: OneTimeInit::new(),
initrd: OneTimeInit::new(),
// psci: OneTimeInit::new(),
psci: OneTimeInit::new(),
reset: OneTimeInit::new(),
machine_compatible: OneTimeInit::new(),
};

View File

@ -1,11 +1,20 @@
//! Simultaneous multiprocessing support for aarch64
use core::sync::atomic::Ordering;
use core::{
ptr,
sync::atomic::{compiler_fence, AtomicU64, Ordering},
};
use aarch64_cpu::asm::barrier;
use abi::error::Error;
use device_api::CpuBringupDevice;
use device_tree::{DeviceTree, DeviceTreeNodeExt};
use kernel_arch_aarch64::CPU_COUNT;
use libk_mm::address::{PhysicalAddress, Virtualize};
use crate::mem::KERNEL_VIRT_OFFSET;
use crate::{
arch::{aarch64::boot::SPIN_TABLE_STACK, PLATFORM},
mem::KERNEL_VIRT_OFFSET,
};
use super::{BootStack, BOOT_STACK_SIZE};
@ -15,6 +24,7 @@ static AP_TRAMPOLINE_STACK: BootStack<BOOT_STACK_SIZE> = BootStack::zeroed();
#[derive(Debug)]
enum CpuEnableMethod {
Psci,
SpinTable(PhysicalAddress),
}
struct CpuInfo<'a> {
@ -26,13 +36,20 @@ struct CpuInfo<'a> {
fn enumerate_cpus<'a>(dt: &'a DeviceTree) -> impl Iterator<Item = CpuInfo<'a>> {
// let cpus = dt.find_absolute("/cpus").unwrap();
let cpus = dt.find_absolute("/cpus").unwrap();
let address_cells = cpus.parent_address_cells();
cpus.children().filter_map(|cpu_node| {
cpus.children().filter_map(move |cpu_node| {
let compatible = cpu_node.prop_string("compatible")?;
let id = cpu_node.prop_cell("reg", 0)? as u32;
let id = cpu_node.prop_cell("reg", 1)? as u32;
let enable_method = cpu_node.prop_string("enable-method")?;
let enable_method = match enable_method {
"psci" => CpuEnableMethod::Psci,
"spin-table" => {
let cpu_release_addr = PhysicalAddress::from_u64(
cpu_node.prop_cell("cpu-release-addr", address_cells)?,
);
CpuEnableMethod::SpinTable(cpu_release_addr)
}
_ => {
log::warn!("Unknown enable method for cpu #{id}: {enable_method:?}");
return None;
@ -48,23 +65,49 @@ fn enumerate_cpus<'a>(dt: &'a DeviceTree) -> impl Iterator<Item = CpuInfo<'a>> {
}
impl CpuEnableMethod {
unsafe fn start_cpu(&self, _id: usize, _ip: usize, _sp: usize) -> Result<(), Error> {
log::warn!("PSCI temporarily disabled");
Err(Error::NotImplemented)
// log::info!("Start CPU #{id}");
// match self {
// Self::Psci => {
// let psci = PLATFORM.psci.try_get().ok_or_else(|| {
// log::warn!(
// "cpu{} has to be enabled through PSCI, but no PSCI found",
// id
// );
// Error::InvalidArgument
// })?;
unsafe fn start_cpu(&self, id: usize, ip: usize, sp: usize) -> Result<(), Error> {
extern "C" {
fn __aarch64_ap_spin_table_entry();
}
// psci.start_cpu(id, ip, sp)
// }
// }
log::info!("Start CPU #{id}");
match self {
Self::Psci => {
let psci = PLATFORM.psci.try_get().ok_or_else(|| {
log::warn!("cpu{id} has to be enabled through PSCI, but no PSCI found");
Error::DoesNotExist
})?;
psci.start_cpu(id, ip, sp)
}
&Self::SpinTable(cpu_release_addr) => {
// Store a stack for the CPU
SPIN_TABLE_STACK.store(sp, Ordering::Release);
// Make sure the compiler doesn't reorder the stores
compiler_fence(Ordering::SeqCst);
barrier::dsb(barrier::ISHST);
// Make the CPU jump to __aarch64_ap_spin_table_entry first
let release_ptr =
ptr::with_exposed_provenance_mut::<u64>(cpu_release_addr.virtualize());
let release_atomic = AtomicU64::from_ptr(release_ptr);
let spin_entry_addr = __aarch64_ap_spin_table_entry as usize - KERNEL_VIRT_OFFSET;
release_atomic.store(spin_entry_addr as u64, Ordering::Release);
barrier::dsb(barrier::ISHST);
barrier::isb(barrier::SY);
// sev for implementations which spin with wfe
aarch64_cpu::asm::sev();
barrier::isb(barrier::SY);
// Caller will wait for the CPU to finish its startup and proceed
Ok(())
}
}
}
}
@ -86,10 +129,11 @@ pub unsafe fn start_ap_cores(dt: &DeviceTree) -> Result<(), Error> {
for cpu in enumerate_cpus(dt).filter(|cpu| cpu.id != 0) {
log::debug!(
"cpu{}: enable-method={:?}, compatible={:?}",
"cpu{}: enable-method={:x?}, compatible={:?}, sp={:#x}",
cpu.id,
cpu.enable_method,
cpu.compatible
cpu.compatible,
sp
);
// Wait for the CPU to come up
@ -97,7 +141,10 @@ pub unsafe fn start_ap_cores(dt: &DeviceTree) -> Result<(), Error> {
// Safety: safe, the function is inside the kernel
let ip = __aarch64_ap_entry as usize - KERNEL_VIRT_OFFSET;
// let sp = stack_pages.add(AP_STACK_PAGES * 0x1000);
barrier::dsb(barrier::ISH);
barrier::isb(barrier::SY);
if let Err(error) = cpu.enable_method.start_cpu(cpu.id as usize, ip, sp) {
log::error!("Couldn't start cpu{} up: {:?}", cpu.id, error);
continue;

View File

@ -5,8 +5,8 @@ use libk_util::OneTimeInit;
pub mod bus;
pub mod display;
pub mod power;
pub mod serial;
// pub mod power;
// pub mod timer;
/// Generic machine description string

View File

@ -3,7 +3,10 @@
use abi::error::Error;
use alloc::sync::Arc;
use device_api::{device::Device, CpuBringupDevice, ResetDevice};
use device_tree::{device_tree_driver, dt::DevTreeIndexNodePropGet};
use device_tree::{
driver::{device_tree_driver, Node, ProbeContext},
DeviceTreePropertyRead,
};
use kernel_arch::{Architecture, ArchitectureImpl};
use crate::arch::PLATFORM;
@ -72,23 +75,27 @@ impl Psci {
device_tree_driver! {
compatible: ["arm,psci-1.0", "arm,psci"],
probe(of) => {
todo!()
// let method: &str = dt.node.prop("method")?;
// let method = match method {
// "hvc" => CallMethod::Hvc,
// "smc" => CallMethod::Smc,
// _ => panic!("Unknown PSCI call method: {:?}", method)
// };
// let cpu_on = dt.node.prop("cpu_on")?;
// let cpu_off = dt.node.prop("cpu_off")?;
// let cpu_suspend = dt.node.prop("cpu_suspend")?;
driver: {
fn probe(&self, node: &Arc<Node>, _context: &ProbeContext) -> Option<Arc<dyn Device>> {
let method = node.property("method")?;
let method = match method.as_str()? {
"hvc" => CallMethod::Hvc,
"smc" => CallMethod::Smc,
method => {
log::warn!("Ignoring PSCI with unknown method: {method:?}");
return None;
}
};
let cpu_on = node.property("cpu_on")?.read_cell(0, 1)? as u32;
let cpu_off = node.property("cpu_off")?.read_cell(0, 1)? as u32;
let cpu_suspend = node.property("cpu_suspend")?.read_cell(0, 1)? as u32;
// Some(Arc::new(Psci {
// method,
// cpu_on,
// cpu_off,
// cpu_suspend
// }))
Some(Arc::new(Psci {
method,
cpu_on,
cpu_off,
cpu_suspend
}))
}
}
}