From 9911c7ea9b2f5289edbb2e73cc30a970390377bf Mon Sep 17 00:00:00 2001 From: Mark Poliakov Date: Tue, 18 Mar 2025 20:02:18 +0200 Subject: [PATCH] aarch64: feature parity with riscv64 --- build.zig | 2 + src/arch/aarch64.zig | 40 +++++++------------ src/arch/aarch64/boot.zig | 17 ++++---- src/arch/aarch64/context.S | 58 +++++++++++++++++++++++++++ src/arch/aarch64/context.zig | 55 ++++++++++++++++++++++++++ src/arch/aarch64/regs.zig | 11 ++++-- src/arch/riscv64.zig | 60 +--------------------------- src/arch/riscv64/boot.zig | 31 +++------------ src/arch/riscv64/context.S | 67 +++++++++++++++++++++++++++++++ src/arch/riscv64/context.zig | 62 +++++++++++++++++++++++++++++ src/arch/riscv64/entry.S | 76 ++---------------------------------- src/mem/tls.zig | 69 ++++++++++++++++++++++++++++++++ 12 files changed, 356 insertions(+), 192 deletions(-) create mode 100644 src/arch/aarch64/context.S create mode 100644 src/arch/aarch64/context.zig create mode 100644 src/arch/riscv64/context.S create mode 100644 src/arch/riscv64/context.zig create mode 100644 src/mem/tls.zig diff --git a/build.zig b/build.zig index ef795d7..209c2b9 100644 --- a/build.zig +++ b/build.zig @@ -51,6 +51,7 @@ const SupportedArch = enum { }, .aarch64 => { kernel.entry = .{ .symbol_name = "__aa64_entry" }; + kernel.link_z_max_page_size = 0x1000; kernel.setLinkerScript(b.path("etc/aarch64-unknown-none.ld")); kernel.addCSourceFiles(.{ @@ -162,6 +163,7 @@ pub fn build(b: *std.Build) anyerror!void { .name = "kernel", .root_module = kernel_module, .pic = true, + .use_lld = true, }); kernel.pie = true; diff --git a/src/arch/aarch64.zig b/src/arch/aarch64.zig index 5a4977f..e6da0ed 100644 --- a/src/arch/aarch64.zig +++ b/src/arch/aarch64.zig @@ -5,28 +5,7 @@ const regs = @import("aarch64/regs.zig"); export const _ = boot.aa64_bsp_lower_entry; -pub const Context = struct { - pub fn idle() Context { - @panic("TODO"); - } - - pub fn kernel(pc: usize, arg: usize) Context { - _ = pc; - _ = arg; - @panic("TODO"); - } - - pub fn enter(self: *Context) noreturn { - _ = self; - @panic("TODO"); - } - - pub fn switch_from(self: *Context, from: *Context) void { - _ = self; - _ = from; - @panic("TODO"); - } -}; +pub const Context = @import("aarch64/context.zig").Context; pub fn set_interrupt_mask(masked: bool) bool { const old = interrupt_mask(); @@ -38,16 +17,27 @@ pub fn set_interrupt_mask(masked: bool) bool { return old; } -pub fn interrupt_mask() bool { +pub inline fn interrupt_mask() bool { return regs.DAIF.read().I; } +pub inline fn wait_for_interrupt() void { + asm volatile ("wfi"); +} + pub fn halt() noreturn { - while (true) {} + while (true) { + _ = set_interrupt_mask(true); + wait_for_interrupt(); + } } pub fn spin_hint() void { - // TODO + asm volatile ("isb sy" ::: "memory"); +} + +pub inline fn set_thread_pointer(tp: usize) void { + regs.TPIDR_EL0.set(tp); } pub inline fn barrier(comptime kind: std.builtin.AtomicOrder) void { diff --git a/src/arch/aarch64/boot.zig b/src/arch/aarch64/boot.zig index 318d848..34b774e 100644 --- a/src/arch/aarch64/boot.zig +++ b/src/arch/aarch64/boot.zig @@ -2,6 +2,7 @@ const kernel = @import("../../kernel.zig"); const vmm = @import("vmm.zig"); const dtb = @import("../../util/dtb.zig"); const exception = @import("exception.zig"); +const tls = @import("../../mem/tls.zig"); const arch = kernel.arch; const mem = kernel.mem; @@ -54,14 +55,9 @@ fn aa64_bsp_upper_entry(real_address: u64) callconv(.C) noreturn { setup_memory_from_fdt(real_address); - asm volatile ("" ::: "memory"); + setup_per_cpu(); - // Test exception handling - const p: *const u32 = @ptrFromInt(0x111122223338); - const v: u32 = p.*; - log.info("v = {}", .{v}); - - arch.halt(); + kernel.kernel_main(); } pub export fn aa64_bsp_lower_entry(real_address: u64, dtb_address: u64) callconv(.C) noreturn { @@ -131,3 +127,10 @@ fn setup_memory_from_fdt(real_address: usize) void { phys_memory.init(); } + +fn setup_per_cpu() void { + const tls_data = tls.load_kernel_tls_image(); + const tp = @intFromPtr(tls_data.ptr); + log.info("Set TP = 0x{x}", .{tp}); + arch.set_thread_pointer(tp); +} diff --git a/src/arch/aarch64/context.S b/src/arch/aarch64/context.S new file mode 100644 index 0000000..7a2cbc7 --- /dev/null +++ b/src/arch/aarch64/context.S @@ -0,0 +1,58 @@ +// vi:set ft=asm: + +.global __aa64_enter_task +.global __aa64_switch_task +.global __aa64_task_enter_kernel + +.set CONTEXT_SIZE, (12 * 8) + +.macro SAVE_TASK_CONTEXT + sub sp, sp, #CONTEXT_SIZE + stp x19, x20, [sp, #0 * 16] + stp x21, x22, [sp, #1 * 16] + stp x23, x24, [sp, #2 * 16] + stp x25, x26, [sp, #3 * 16] + stp x27, x28, [sp, #4 * 16] + stp x29, x30, [sp, #5 * 16] +.endm + +.macro RESTORE_TASK_CONTEXT + ldp x19, x20, [sp, #0 * 16] + ldp x21, x22, [sp, #1 * 16] + ldp x23, x24, [sp, #2 * 16] + ldp x25, x26, [sp, #3 * 16] + ldp x27, x28, [sp, #4 * 16] + ldp x29, x30, [sp, #5 * 16] + add sp, sp, #CONTEXT_SIZE +.endm + +.pushsection .text + +__aa64_task_enter_kernel: + // arg, entry + ldp x0, lr, [sp] + add sp, sp, #16 + + // TODO enter task via eret to EL1t + + ret + +__aa64_switch_task: + // x0 -- "dst" context + // x1 -- "src" context + + SAVE_TASK_CONTEXT + + mov x19, sp + str x19, [x1] + +__aa64_enter_task: + // x0 -- "dst" context + ldr x0, [x0] + mov sp, x0 + + RESTORE_TASK_CONTEXT + + ret + +.popsection // .text diff --git a/src/arch/aarch64/context.zig b/src/arch/aarch64/context.zig new file mode 100644 index 0000000..4ab95b4 --- /dev/null +++ b/src/arch/aarch64/context.zig @@ -0,0 +1,55 @@ +const thread = @import("../../thread.zig"); + +fn idle_function() callconv(.naked) noreturn { + asm volatile ("b ."); +} + +extern fn __aa64_enter_task(cx: *Context) callconv(.C) noreturn; +extern fn __aa64_switch_task(dcx: *Context, scx: *Context) callconv(.C) void; +extern fn __aa64_task_enter_kernel() callconv(.C) noreturn; + +pub const Context = extern struct { + const STACK_SIZE: usize = 16384; + + kstack: thread.KStack(STACK_SIZE), + + pub fn idle() Context { + const entry = @intFromPtr(&idle_function); + return Context.kernel(entry, 0); + } + + pub fn kernel(pc: usize, arg: usize) Context { + var ks = thread.KStack(STACK_SIZE).create(); + const entry = @intFromPtr(&__aa64_task_enter_kernel); + + ks.push(pc); + ks.push(arg); + + ks.push(entry); // x30/lr + ks.push(0); // x29 + ks.push(0); // x28 + ks.push(0); // x27 + ks.push(0); // x26 + ks.push(0); // x25 + ks.push(0); // x24 + ks.push(0); // x23 + ks.push(0); // x22 + ks.push(0); // x21 + ks.push(0); // x20 + ks.push(0); // x19 + + return Context{ .kstack = ks }; + } + + pub fn enter(self: *Context) noreturn { + __aa64_enter_task(self); + } + + pub fn switch_from(self: *Context, from: *Context) void { + __aa64_switch_task(self, from); + } +}; + +comptime { + asm (@embedFile("context.S")); +} diff --git a/src/arch/aarch64/regs.zig b/src/arch/aarch64/regs.zig index 48ac49d..0cc22cd 100644 --- a/src/arch/aarch64/regs.zig +++ b/src/arch/aarch64/regs.zig @@ -36,9 +36,10 @@ fn Register(comptime name: []const u8, comptime bits: type) type { pub const TTBR0_EL1 = Register("ttbr0_el1", u64); pub const TTBR1_EL1 = Register("ttbr1_el1", u64); -pub const VBAR_EL1 = Register("vbar_el1", u64); -pub const ELR_EL1 = Register("elr_el1", u64); -pub const FAR_EL1 = Register("far_el1", u64); + +// NOTE: tpidr_el0 is used until codegen can emit TLS instructions against tpidr_el1 +pub const TPIDR_EL0 = Register("tpidr_el0", u64); + pub const DAIF = Register("daif", packed struct(u64) { // 0..6 _0: u6 = 0, @@ -54,6 +55,10 @@ pub const DAIF = Register("daif", packed struct(u64) { _1: u54 = 0, }); +pub const VBAR_EL1 = Register("vbar_el1", u64); +pub const ELR_EL1 = Register("elr_el1", u64); +pub const FAR_EL1 = Register("far_el1", u64); + pub const ESR_EL1 = Register("esr_el1", packed struct(u64) { // 0..25 ISS: u25 = 0, diff --git a/src/arch/riscv64.zig b/src/arch/riscv64.zig index f8e8595..4ff443a 100644 --- a/src/arch/riscv64.zig +++ b/src/arch/riscv64.zig @@ -2,74 +2,16 @@ const boot = @import("riscv64/boot.zig"); const regs = @import("riscv64/regs.zig"); -const thread = @import("../thread.zig"); const std = @import("std"); const builtin = @import("builtin"); -const Arena = @import("../arena.zig").Arena; - export const _ = boot.rv64_bsp_lower_entry; -extern fn __rv64_enter_task(cx: *Context) callconv(.C) noreturn; -extern fn __rv64_switch_task(dcx: *Context, scx: *Context) callconv(.C) void; -extern fn __rv64_task_enter_kernel() callconv(.C) noreturn; - -fn idle_function() callconv(.naked) noreturn { - asm volatile ("j ."); -} - /// This CPU's HART (HARdware Thread) ID. pub threadlocal var t_hart_id: u32 = 0; /// RISC-V task context -pub const Context = extern struct { - const STACK_SIZE: usize = 8192; - - // Has to be exactly at offset 0x00, used in assembly. - kstack: thread.KStack(STACK_SIZE), - - /// Constructs an idle context struct. - pub fn idle() @This() { - const entry = @intFromPtr(&idle_function); - return Context.kernel(entry, 0); - } - - /// Constructs a kernel task context with entry point in `pc` and an `arg`ument. - pub fn kernel(pc: usize, arg: usize) @This() { - var ks = thread.KStack(STACK_SIZE).create(); - const entry = @intFromPtr(&__rv64_task_enter_kernel); - - ks.push(pc); - ks.push(arg); - - ks.push(0); // x8/s0/fp - ks.push(0); // x9/s1 - ks.push(0); // x18/s2 - ks.push(0); // x19/s3 - ks.push(0); // x20/s4 - ks.push(0); // x21/s5 - ks.push(0); // x22/s6 - ks.push(0); // x23/s7 - ks.push(0); // x24/s8 - ks.push(0); // x25/s9 - ks.push(0); // x26/s10 - ks.push(0); // x27/s11 - ks.push(0); // x4/gp - ks.push(entry); // x1/ra return address - - return .{ .kstack = ks }; - } - - /// Low-level task context entry function. - pub fn enter(self: *@This()) noreturn { - __rv64_enter_task(self); - } - - /// Low-level task context switch function. - pub fn switch_from(self: *@This(), from: *@This()) void { - __rv64_switch_task(self, from); - } -}; +pub const Context = @import("riscv64/context.zig").Context; pub inline fn halt() noreturn { while (true) { diff --git a/src/arch/riscv64/boot.zig b/src/arch/riscv64/boot.zig index 02f2111..81d3ed1 100644 --- a/src/arch/riscv64/boot.zig +++ b/src/arch/riscv64/boot.zig @@ -7,6 +7,7 @@ const dtb = @import("../../util/dtb.zig"); const mem = @import("../../mem.zig"); const arena = @import("../../arena.zig"); const exception = @import("exception.zig"); +const tls = @import("../../mem/tls.zig"); const phys_memory = mem.phys; const PAGE_SIZE = mem.vmm.PAGE_SIZE; @@ -76,36 +77,14 @@ pub export fn rv64_bsp_lower_entry(real_address: usize, bsp_hart_id: usize, dtb_ extern const __rela_start: u8; extern const __rela_end: u8; -extern var __tdata_start: u8; -extern var __tdata_end: u8; -extern var __tbss_start: u8; -extern var __tbss_end: u8; extern var __kernel_start: u8; extern var __kernel_end: u8; fn setup_per_cpu() void { - // Assume .tbss follows .tdata - const tdata_start = @intFromPtr(&__tdata_start); - const tdata_end = @intFromPtr(&__tdata_end); - const tdata_size = tdata_end - tdata_start; - const tbss_start = @intFromPtr(&__tbss_start); - const tbss_end = @intFromPtr(&__tbss_end); - const tbss_size = tbss_end - tbss_start; - - const tdata_data = @as([*]u8, @ptrFromInt(tdata_start))[0..tdata_size]; - - const tls_size = tdata_size + tbss_size; - const tls_page_count = (tls_size + PAGE_SIZE - 1) / PAGE_SIZE; - // Variant I: TLS block 0 follows TP after a certain displacement - const tls_address = phys_memory.alloc_pages(tls_page_count).?.virtualize(); - const tls_data = @as([*]u8, @ptrFromInt(tls_address))[0..tls_size]; - - log.info("Allocated TLS @ {*}", .{tls_data}); - - @memcpy(tls_data[0..tdata_size], tdata_data); - @memset(tls_data[tdata_size..], 0); - - arch.set_thread_pointer(tls_address); + const tls_data = tls.load_kernel_tls_image(); + const tp = @intFromPtr(tls_data.ptr); + log.info("Set TP = 0x{x}", .{tp}); + arch.set_thread_pointer(tp); } export fn rv64_relocate_kernel(image_base: usize, rela_start: usize, rela_end: usize) void { diff --git a/src/arch/riscv64/context.S b/src/arch/riscv64/context.S new file mode 100644 index 0000000..2596b12 --- /dev/null +++ b/src/arch/riscv64/context.S @@ -0,0 +1,67 @@ +.pushsection .text +.option push +.option norvc + +.global __rv64_enter_task +.global __rv64_switch_task +.global __rv64_task_enter_kernel + +.macro LOAD_TASK_STATE + ld ra, 0 * 8(sp) + ld gp, 1 * 8(sp) + ld s11, 2 * 8(sp) + ld s10, 3 * 8(sp) + ld s9, 4 * 8(sp) + ld s8, 5 * 8(sp) + ld s7, 6 * 8(sp) + ld s6, 7 * 8(sp) + ld s5, 8 * 8(sp) + ld s4, 9 * 8(sp) + ld s3, 10 * 8(sp) + ld s2, 11 * 8(sp) + ld s1, 12 * 8(sp) + ld s0, 13 * 8(sp) + + addi sp, sp, 14 * 8 +.endm + +.macro SAVE_TASK_STATE + addi sp, sp, -(14 * 8) + + sd ra, 0 * 8(sp) + sd gp, 1 * 8(sp) + sd s11, 2 * 8(sp) + sd s10, 3 * 8(sp) + sd s9, 4 * 8(sp) + sd s8, 5 * 8(sp) + sd s7, 6 * 8(sp) + sd s6, 7 * 8(sp) + sd s5, 8 * 8(sp) + sd s4, 9 * 8(sp) + sd s3, 10 * 8(sp) + sd s2, 11 * 8(sp) + sd s1, 12 * 8(sp) + sd s0, 13 * 8(sp) +.endm + +__rv64_task_enter_kernel: + ld a0, (sp) // argument + ld ra, 8(sp) // entry + addi sp, sp, 16 + + // TODO S-mode -> S-mode return via sret + ret + +__rv64_switch_task: + // a0 - new context + // a1 - old context + SAVE_TASK_STATE + sd sp, (a1) +__rv64_enter_task: + // a0 -- new context + ld sp, (a0) + LOAD_TASK_STATE + ret + +.option pop // norvc +.popsection // .text diff --git a/src/arch/riscv64/context.zig b/src/arch/riscv64/context.zig new file mode 100644 index 0000000..34f90f7 --- /dev/null +++ b/src/arch/riscv64/context.zig @@ -0,0 +1,62 @@ +const thread = @import("../../thread.zig"); + +fn idle_function() callconv(.naked) noreturn { + asm volatile ("j ."); +} + +extern fn __rv64_enter_task(cx: *Context) callconv(.C) noreturn; +extern fn __rv64_switch_task(dcx: *Context, scx: *Context) callconv(.C) void; +extern fn __rv64_task_enter_kernel() callconv(.C) noreturn; + +pub const Context = extern struct { + const STACK_SIZE: usize = 8192; + + // Has to be exactly at offset 0x00, used in assembly. + kstack: thread.KStack(STACK_SIZE), + + /// Constructs an idle context struct. + pub fn idle() @This() { + const entry = @intFromPtr(&idle_function); + return Context.kernel(entry, 0); + } + + /// Constructs a kernel task context with entry point in `pc` and an `arg`ument. + pub fn kernel(pc: usize, arg: usize) @This() { + var ks = thread.KStack(STACK_SIZE).create(); + const entry = @intFromPtr(&__rv64_task_enter_kernel); + + ks.push(pc); + ks.push(arg); + + ks.push(0); // x8/s0/fp + ks.push(0); // x9/s1 + ks.push(0); // x18/s2 + ks.push(0); // x19/s3 + ks.push(0); // x20/s4 + ks.push(0); // x21/s5 + ks.push(0); // x22/s6 + ks.push(0); // x23/s7 + ks.push(0); // x24/s8 + ks.push(0); // x25/s9 + ks.push(0); // x26/s10 + ks.push(0); // x27/s11 + ks.push(0); // x4/gp + ks.push(entry); // x1/ra return address + + return .{ .kstack = ks }; + } + + /// Low-level task context entry function. + pub fn enter(self: *@This()) noreturn { + __rv64_enter_task(self); + } + + /// Low-level task context switch function. + pub fn switch_from(self: *@This(), from: *@This()) void { + __rv64_switch_task(self, from); + } +}; + +comptime { + asm (@embedFile("context.S")); +} diff --git a/src/arch/riscv64/entry.S b/src/arch/riscv64/entry.S index 394f56d..5336a60 100644 --- a/src/arch/riscv64/entry.S +++ b/src/arch/riscv64/entry.S @@ -25,7 +25,7 @@ __rv64_entry: .ascii "RISCV\x00\x00\x00" // Magic 1 .ascii "RSC\x05" // Magic 2 .long 0 -.option pop +.option pop // rvc .option push .option norvc @@ -80,80 +80,12 @@ __rv64_real_entry: jr t0 .size __rv64_entry, . - __rv64_entry -.option pop -.popsection +.option pop // norvc +.popsection // .text.entry .pushsection .bss .p2align 4 __rv64_bsp_stack_bottom: .skip 65536 __rv64_bsp_stack_top: -.popsection - -.pushsection .text -.option push -.option norvc - -.global __rv64_enter_task -.global __rv64_switch_task -.global __rv64_task_enter_kernel - -.macro LOAD_TASK_STATE - ld ra, 0 * 8(sp) - ld gp, 1 * 8(sp) - ld s11, 2 * 8(sp) - ld s10, 3 * 8(sp) - ld s9, 4 * 8(sp) - ld s8, 5 * 8(sp) - ld s7, 6 * 8(sp) - ld s6, 7 * 8(sp) - ld s5, 8 * 8(sp) - ld s4, 9 * 8(sp) - ld s3, 10 * 8(sp) - ld s2, 11 * 8(sp) - ld s1, 12 * 8(sp) - ld s0, 13 * 8(sp) - - addi sp, sp, 14 * 8 -.endm - -.macro SAVE_TASK_STATE - addi sp, sp, -(14 * 8) - - sd ra, 0 * 8(sp) - sd gp, 1 * 8(sp) - sd s11, 2 * 8(sp) - sd s10, 3 * 8(sp) - sd s9, 4 * 8(sp) - sd s8, 5 * 8(sp) - sd s7, 6 * 8(sp) - sd s6, 7 * 8(sp) - sd s5, 8 * 8(sp) - sd s4, 9 * 8(sp) - sd s3, 10 * 8(sp) - sd s2, 11 * 8(sp) - sd s1, 12 * 8(sp) - sd s0, 13 * 8(sp) -.endm - -__rv64_task_enter_kernel: - ld a0, (sp) // argument - ld ra, 8(sp) // entry - addi sp, sp, 16 - - // TODO S-mode -> S-mode return via sret - ret - -__rv64_switch_task: - // a0 - new context - // a1 - old context - SAVE_TASK_STATE - sd sp, (a1) -__rv64_enter_task: - // a0 -- new context - ld sp, (a0) - LOAD_TASK_STATE - ret - -.option pop -.popsection +.popsection // .bss diff --git a/src/mem/tls.zig b/src/mem/tls.zig new file mode 100644 index 0000000..eee7568 --- /dev/null +++ b/src/mem/tls.zig @@ -0,0 +1,69 @@ +//! Thread-local storage implementation. + +const builtin = @import("builtin"); + +const vmm = @import("vmm.zig"); +const phys_memory = @import("phys.zig"); +const kernel = @import("../kernel.zig"); + +const PAGE_SIZE = vmm.PAGE_SIZE; +const log = kernel.debug.log; + +/// Thread-local storage layout variant used by this target platform. +pub const TLS_VARIANT: enum { + /// Variant I: + /// + /// [ TCB ] [ pad to p_align ] [ MODULE 0 ] [ MODULE 1 ] ... + /// | | | + /// | | | + /// tp off1 off2 + variant1, + /// Variant II: + /// + /// ... [ MODULE 1 ] [ MODULE 0 ] [ TCB ] + /// | | | + /// | | | + /// off2 off1 tp + variant2, +} = switch (builtin.cpu.arch) { + .riscv64, .aarch64 => .variant1, + // x86-64 uses variant 2 + else => @panic("Unsupported CPU architecture"), +}; + +extern var __tdata_start: u8; +extern var __tdata_end: u8; +extern var __tbss_start: u8; +extern var __tbss_end: u8; + +/// Allocates a storage for one per-CPU TLS block, clones the TLS image +/// (as described by .tbss/.tdata sections) and returns the result. +pub fn load_kernel_tls_image() []u8 { + // Assume .tbss follows .tdata + const tdata_start = @intFromPtr(&__tdata_start); + const tdata_end = @intFromPtr(&__tdata_end); + const tdata_size = tdata_end - tdata_start; + const tbss_start = @intFromPtr(&__tbss_start); + const tbss_end = @intFromPtr(&__tbss_end); + const tbss_size = tbss_end - tbss_start; + + const tdata_data = @as([*]u8, @ptrFromInt(tdata_start))[0..tdata_size]; + + switch (comptime TLS_VARIANT) { + .variant1 => { + const tls_size = tdata_size + tbss_size; + const tls_page_count = (tls_size + PAGE_SIZE - 1) / PAGE_SIZE; + // Variant I: TLS block 0 follows TP after a certain displacement + const tls_address = phys_memory.alloc_pages(tls_page_count).?.virtualize(); + const tls_data = @as([*]u8, @ptrFromInt(tls_address))[0..tls_size]; + + log.info("Allocated TLS @ {*}", .{tls_data}); + + @memcpy(tls_data[0..tdata_size], tdata_data); + @memset(tls_data[tdata_size..], 0); + + return tls_data; + }, + .variant2 => @panic("TODO: TLS variant II"), + } +}