From 000b434c960d1fe5f06670b22eb27dd5383cc0a2 Mon Sep 17 00:00:00 2001
From: Mark Poliakov <mark@alnyan.me>
Date: Wed, 26 Mar 2025 14:09:17 +0200
Subject: [PATCH] WIP: Userspace entry code for both platforms

---
 src/arch.zig                   |  12 ++-
 src/arch/aarch64/boot.zig      |  13 ++-
 src/arch/aarch64/context.S     |  30 +++++-
 src/arch/aarch64/context.zig   |  79 ++++++++++++----
 src/arch/aarch64/exception.zig |   1 +
 src/arch/aarch64/regs.zig      |  13 ++-
 src/arch/aarch64/vmm.zig       | 166 +++++++++++++++++++++++++++++----
 src/arch/riscv64/boot.zig      |   2 +
 src/arch/riscv64/context.S     |   7 +-
 src/arch/riscv64/context.zig   |  84 ++++++-----------
 src/arch/riscv64/vmm.zig       |  39 +++++---
 src/kernel.zig                 |  26 +++---
 src/mem/vmalloc.zig            |  26 +++++-
 src/mem/vmm.zig                |  21 ++++-
 src/thread.zig                 |  39 ++++++--
 src/util/rangemap.zig          |  42 ++++-----
 16 files changed, 435 insertions(+), 165 deletions(-)

diff --git a/src/arch.zig b/src/arch.zig
index c0618c5..928e1ba 100644
--- a/src/arch.zig
+++ b/src/arch.zig
@@ -4,10 +4,18 @@
 const std = @import("std");
 const builtin = @import("builtin");
 
-pub const impl = switch (builtin.cpu.arch) {
+pub const cpu: enum {
+    riscv64,
+    aarch64,
+} = switch (builtin.cpu.arch) {
+    .riscv64 => .riscv64,
+    .aarch64 => .aarch64,
+    else => @compileError("Unsupported architecture"),
+};
+
+pub const impl = switch (cpu) {
     .riscv64 => @import("arch/riscv64.zig"),
     .aarch64 => @import("arch/aarch64.zig"),
-    else => @compileError("Unsupported architecture"),
 };
 
 pub const vmm = impl.vmm;
diff --git a/src/arch/aarch64/boot.zig b/src/arch/aarch64/boot.zig
index 34b774e..a8f3410 100644
--- a/src/arch/aarch64/boot.zig
+++ b/src/arch/aarch64/boot.zig
@@ -13,8 +13,10 @@ extern const __aa64_bsp_stack_top: u8;
 
 var g_dtb_address: u64 = undefined;
 
-fn early_debug_print(byte: u8) void {
-    const address = 0x9000000;
+fn early_debug_print_high(byte: u8) void {
+    // TODO this is incorrect: writes should come to a memory region marked as device memory,
+    //      "virtualize" range is normal memory.
+    const address = 0x9000000 + vmm.VIRTUALIZE_BASE;
     @as(*volatile u32, @ptrFromInt(address)).* = byte;
 }
 
@@ -44,14 +46,15 @@ fn aa64_bsp_upper_entry(real_address: u64) callconv(.C) noreturn {
 
     arch.barrier(.acq_rel);
     aa64_relocate_kernel(rel_offset, rela_start, rela_end);
+    vmm.unmap_early();
     arch.barrier(.acq_rel);
 
-    log.set_write_fn(&early_debug_print);
+    log.set_write_fn(&early_debug_print_high);
 
     exception.init();
 
-    mem.PhysicalAddress.g_virtualize_base = 0;
-    mem.PhysicalAddress.g_virtualize_size = 16 << 30;
+    mem.PhysicalAddress.g_virtualize_base = vmm.VIRTUALIZE_BASE;
+    mem.PhysicalAddress.g_virtualize_size = 16 << vmm.L1.SHIFT;
 
     setup_memory_from_fdt(real_address);
 
diff --git a/src/arch/aarch64/context.S b/src/arch/aarch64/context.S
index 7a2cbc7..b70d10e 100644
--- a/src/arch/aarch64/context.S
+++ b/src/arch/aarch64/context.S
@@ -3,6 +3,7 @@
 .global __aa64_enter_task
 .global __aa64_switch_task
 .global __aa64_task_enter_kernel
+.global __aa64_task_enter_user
 
 .set CONTEXT_SIZE, (12 * 8)
 
@@ -28,14 +29,37 @@
 
 .pushsection .text
 
+__aa64_task_enter_user:
+    // x0 == sp, ...
+    ldr x0, [sp, #16 * 0]
+    msr sp_el0, x0
+
+    // x0 == arg, x1 == entry
+    ldp x0, x1, [sp, #16 * 1]
+    add sp, sp, #32
+
+    msr elr_el1, x1
+
+    mov x1, #(1 << 9)
+    msr spsr_el1, x1
+
+    mov lr, xzr
+
+    dsb ish
+    isb sy
+
+    eret
+
 __aa64_task_enter_kernel:
     // arg, entry
-    ldp x0, lr, [sp]
-    add sp, sp, #16
+    ldp x0, x1, [sp]
+    // return address
+    ldr lr, [sp, #16]
+    add sp, sp, #24
 
     // TODO enter task via eret to EL1t
 
-    ret
+    br x1
 
 __aa64_switch_task:
     // x0 -- "dst" context
diff --git a/src/arch/aarch64/context.zig b/src/arch/aarch64/context.zig
index 4ab95b4..d58598e 100644
--- a/src/arch/aarch64/context.zig
+++ b/src/arch/aarch64/context.zig
@@ -1,30 +1,83 @@
 const thread = @import("../../thread.zig");
+const vmm = @import("vmm.zig");
+const mem = @import("../../mem.zig");
+const regs = @import("regs.zig");
+const kernel = @import("../../kernel.zig");
 
-fn idle_function() callconv(.naked) noreturn {
-    asm volatile ("b .");
-}
+const ProcessAddressSpace = mem.vmm.ProcessAddressSpace;
+const arch = kernel.arch;
 
 extern fn __aa64_enter_task(cx: *Context) callconv(.C) noreturn;
 extern fn __aa64_switch_task(dcx: *Context, scx: *Context) callconv(.C) void;
 extern fn __aa64_task_enter_kernel() callconv(.C) noreturn;
+extern fn __aa64_task_enter_user() callconv(.C) noreturn;
 
 pub const Context = extern struct {
     const STACK_SIZE: usize = 16384;
 
     kstack: thread.KStack(STACK_SIZE),
 
+    ttbr0: u64 = 0,
+
     pub fn idle() Context {
-        const entry = @intFromPtr(&idle_function);
-        return Context.kernel(entry, 0);
+        return Context.kernel(&thread.idle_function, 0);
     }
 
-    pub fn kernel(pc: usize, arg: usize) Context {
-        var ks = thread.KStack(STACK_SIZE).create();
-        const entry = @intFromPtr(&__aa64_task_enter_kernel);
+    pub fn user(address_space: *const ProcessAddressSpace, pc: usize, sp: usize, arg: usize) @This() {
+        const space_physical = address_space.physical_address();
+        const space_asid = address_space.asid();
 
+        var ks = thread.KStack(STACK_SIZE).create();
+
+        const ttbr0 = @as(u64, @bitCast(regs.TTBR0_EL1.Bits{
+            .BADDR = @truncate(space_physical.raw),
+            .ASID = @truncate(space_asid),
+        }));
+
+        // Arguments to __aa64_task_enter_user
         ks.push(pc);
         ks.push(arg);
+        ks.push(0); // Padding
+        ks.push(sp);
 
+        setup_stack_common(&ks, @intFromPtr(&__aa64_task_enter_user));
+
+        return .{ .kstack = ks, .ttbr0 = ttbr0 };
+    }
+
+    pub fn kernel(function: *const thread.KernelThreadFn, arg: usize) Context {
+        var ks = thread.KStack(STACK_SIZE).create();
+
+        // Arguments to __aa64_task_enter_kernel
+        ks.push(@intFromPtr(&thread.kernel_return));
+        ks.push(@intFromPtr(function));
+        ks.push(arg);
+
+        setup_stack_common(&ks, @intFromPtr(&__aa64_task_enter_kernel));
+
+        return Context{ .kstack = ks };
+    }
+
+    pub fn enter(self: *Context) noreturn {
+        self.load_state();
+        __aa64_enter_task(self);
+    }
+
+    pub fn switch_from(self: *Context, from: *Context) void {
+        from.store_state();
+        self.load_state();
+        __aa64_switch_task(self, from);
+    }
+
+    pub fn load_state(self: *Context) void {
+        regs.TTBR0_EL1.set(self.ttbr0);
+    }
+
+    pub fn store_state(self: *Context) void {
+        _ = self;
+    }
+
+    fn setup_stack_common(ks: *thread.KStack(STACK_SIZE), entry: usize) void {
         ks.push(entry); // x30/lr
         ks.push(0); // x29
         ks.push(0); // x28
@@ -37,16 +90,6 @@ pub const Context = extern struct {
         ks.push(0); // x21
         ks.push(0); // x20
         ks.push(0); // x19
-
-        return Context{ .kstack = ks };
-    }
-
-    pub fn enter(self: *Context) noreturn {
-        __aa64_enter_task(self);
-    }
-
-    pub fn switch_from(self: *Context, from: *Context) void {
-        __aa64_switch_task(self, from);
     }
 };
 
diff --git a/src/arch/aarch64/exception.zig b/src/arch/aarch64/exception.zig
index 1d036c5..813be4b 100644
--- a/src/arch/aarch64/exception.zig
+++ b/src/arch/aarch64/exception.zig
@@ -41,6 +41,7 @@ export fn __aa64_el1_sync_handler(frame: *ExceptionFrame) callconv(.C) void {
 
     log.err("Exception in EL1:", .{});
     log.err("  EC = {s} (0b{b:06}) ISS = 0x{x}", .{ esr.EC.as_str(), @intFromEnum(esr.EC), esr.ISS });
+    log.err("  ESR = 0x{x:016}", .{@as(u64, @bitCast(esr))});
     log.err("  ELR = 0x{x:016}", .{elr});
 
     switch (esr.as_enum()) {
diff --git a/src/arch/aarch64/regs.zig b/src/arch/aarch64/regs.zig
index 0cc22cd..21fb4c0 100644
--- a/src/arch/aarch64/regs.zig
+++ b/src/arch/aarch64/regs.zig
@@ -6,6 +6,8 @@ fn Register(comptime name: []const u8, comptime bits: type) type {
         else => bits,
     };
     return enum(repr) {
+        pub const Bits = bits;
+
         pub fn set(value: repr) void {
             asm volatile ("msr " ++ name ++ ", %[value]"
                 :
@@ -34,8 +36,15 @@ fn Register(comptime name: []const u8, comptime bits: type) type {
     };
 }
 
-pub const TTBR0_EL1 = Register("ttbr0_el1", u64);
-pub const TTBR1_EL1 = Register("ttbr1_el1", u64);
+pub const TTBR = packed struct(u64) {
+    // 0..48
+    BADDR: u48 = 0,
+    // 48..64
+    ASID: u16 = 0,
+};
+
+pub const TTBR0_EL1 = Register("ttbr0_el1", TTBR);
+pub const TTBR1_EL1 = Register("ttbr1_el1", TTBR);
 
 // NOTE: tpidr_el0 is used until codegen can emit TLS instructions against tpidr_el1
 pub const TPIDR_EL0 = Register("tpidr_el0", u64);
diff --git a/src/arch/aarch64/vmm.zig b/src/arch/aarch64/vmm.zig
index 0cb93c1..def4634 100644
--- a/src/arch/aarch64/vmm.zig
+++ b/src/arch/aarch64/vmm.zig
@@ -1,13 +1,20 @@
+const std = @import("std");
 const mem = @import("../../mem.zig");
 const regs = @import("regs.zig");
+const kernel = @import("../../kernel.zig");
 
 const PhysicalAddress = mem.PhysicalAddress;
+const AtomicU8 = std.atomic.Value(u8);
+const log = kernel.log;
 
 pub const KERNEL_VIRTUAL_BASE: usize = 0xFFFFFF8000000000;
 pub const KERNEL_L1_INDEX: usize = L1.index(KERNEL_VIRTUAL_BASE);
+pub const KERNEL_VIRTUAL_SIZE: usize = 16 * L1.SIZE;
+pub const VIRTUALIZE_BASE: usize = KERNEL_VIRTUAL_BASE + KERNEL_VIRTUAL_SIZE;
+pub const VIRTUALIZE_BASE_L1I: usize = L1.index(VIRTUALIZE_BASE);
 
-pub const L1 = mem.TranslationLevel(30);
-pub const L2 = mem.TranslationLevel(21);
+pub const L1 = mem.TranslationLevel(30, L2);
+pub const L2 = mem.TranslationLevel(21, L3);
 pub const L3 = mem.vmm.L3;
 
 pub const RawEntry = packed struct(u64) {
@@ -135,31 +142,149 @@ pub fn Table(comptime Level: type) type {
     return struct {
         pub const Entry = TableEntry(Level);
 
+        pub const Error = mem.vmm.AddressSpaceError;
+
         entries: [512]Entry align(4096) = [_]Entry{.INVALID} ** 512,
 
+        pub fn allocate_empty() Error!*@This() {
+            const page = mem.phys.alloc_page() orelse return error.out_of_pages;
+            const table = @as(*@This(), @ptrFromInt(page.virtualize()));
+            for (0..512) |i| {
+                table.entry(i).* = .INVALID;
+            }
+            return table;
+        }
+
         pub inline fn entry(self: *@This(), index: usize) *Entry {
             return &self.entries[index];
         }
+
+        pub fn physical_address(self: *const @This()) PhysicalAddress {
+            return PhysicalAddress.from_virtualized(@intFromPtr(self));
+        }
+
+        pub usingnamespace if (Level.NextLevel) |NextLevel| struct {
+            pub fn get_next_level(self: *Table(Level), index: usize) ?*Table(NextLevel) {
+                const ent = self.entry(index);
+                if (ent.raw.V and !ent.raw.P) {
+                    @panic("TODO: translate existing table");
+                }
+                return null;
+            }
+
+            pub fn get_or_create_next_level(self: *Table(Level), index: usize) Error!*Table(NextLevel) {
+                const ent = self.entry(index);
+                if (ent.raw.V) {
+                    if (!ent.raw.P) {
+                        @panic("TODO: mixed hugepages and tables");
+                    }
+
+                    // Entry is a table
+                    @panic("TODO: translate existing table");
+                } else {
+                    const table = try Table(NextLevel).allocate_empty();
+                    const physical = table.physical_address();
+                    ent.* = TableEntry(Level).table(physical, .{});
+                    return table;
+                }
+            }
+        } else struct {};
     };
 }
 
-// 0x0000_0000_0000_0000 .. 0x0000_0080_0000_0000
-var g_fixed_low = Table(L1){};
+pub const ProcessAddressSpace = struct {
+    l1: *Table(L1),
+    asid: u8,
+
+    pub const Error = mem.vmm.AddressSpaceError;
+
+    var g_asid: AtomicU8 = .{ .raw = 1 };
+
+    pub fn init() Error!ProcessAddressSpace {
+        const table = try Table(L1).allocate_empty();
+        const asid = g_asid.fetchAdd(1, .seq_cst);
+        return .{ .l1 = table, .asid = asid };
+    }
+
+    pub fn physical_address(self: *const @This()) PhysicalAddress {
+        return self.l1.physical_address();
+    }
+
+    pub fn map_page(self: *@This(), virtual: usize, physical: PhysicalAddress) Error!void {
+        // TODO align check on both virtual and physical
+
+        const l1i = L1.index(virtual);
+        const l2i = L2.index(virtual);
+        const l3i = L3.index(virtual);
+
+        const l2 = try self.l1.get_or_create_next_level(l1i);
+        const l3 = try l2.get_or_create_next_level(l2i);
+
+        const entry = l3.entry(l3i);
+
+        if (entry.raw.V) {
+            @panic("TODO: handle already present");
+        }
+
+        entry.* = TableEntry(L3).normal_page(physical, RawEntry{ .AP = .both_readwrite, .NG = true });
+        tlb_flush_vma_asid(virtual, self.asid);
+
+        log.debug("Map 0x{x} -> page 0x{x}", .{ virtual, physical.raw });
+    }
+};
+
+pub inline fn tlb_flush_vma(vma: usize) void {
+    const xt = vma >> 12;
+    asm volatile (
+        \\ dsb ishst
+        \\ tlbi vaae1, %[xt]
+        \\ dsb ish
+        \\ isb sy
+        :
+        : [xt] "r" (xt),
+        : "memory"
+    );
+}
+
+pub inline fn tlb_flush_vma_asid(vma: usize, asid: usize) void {
+    const xt = (vma >> 12) | (asid << 48);
+    asm volatile (
+        \\ dsb ishst
+        \\ tlbi vae1, %[xt]
+        \\ dsb ish
+        \\ isb sy
+        :
+        : [xt] "r" (xt),
+        : "memory"
+    );
+}
+
+pub inline fn tlb_flush_asid(asid: usize) void {
+    const xt = asid << 48;
+    asm volatile (
+        \\ dsb ishst
+        \\ tlbi aside1, %[xt]
+        \\ dsb ish
+        \\ isb sy
+        :
+        : [xt] "r" (xt),
+        : "memory"
+    );
+}
+
 // 0xFFFF_FF80_0000_0000 .. 0xFFFF_FFFF_FFFF_FFFF
 var g_fixed_high = Table(L1){};
 
+pub fn unmap_early() void {
+    // Flush whole ASID 0
+    tlb_flush_asid(0);
+    regs.TTBR0_EL1.set(0);
+}
+
 pub fn map_early(real_address: usize) void {
     _ = real_address;
 
-    for (0..16) |i| {
-        // Identity
-        g_fixed_low.entry(i).* = TableEntry(L1).normal_block(
-            .{ .raw = i << L1.SHIFT },
-            .{},
-        );
-    }
-
-    for (0..16) |i| {
+    for (0..L1.page_count(KERNEL_VIRTUAL_SIZE)) |i| {
         // Identity + KERNEL_VIRTUAL_BASE
         g_fixed_high.entry(i).* = TableEntry(L1).normal_block(
             .{ .raw = i << L1.SHIFT },
@@ -167,11 +292,18 @@ pub fn map_early(real_address: usize) void {
         );
     }
 
-    const ttbr0 = @intFromPtr(&g_fixed_low);
-    const ttbr1 = @intFromPtr(&g_fixed_high);
+    for (0..16) |i| {
+        // Identity + VIRTUALIZE_BASE for "Whole RAM mapping"
+        g_fixed_high.entry(VIRTUALIZE_BASE_L1I + i).* = TableEntry(L1).normal_block(
+            .{ .raw = i << L1.SHIFT },
+            .{},
+        );
+    }
 
-    regs.TTBR0_EL1.set(ttbr0);
-    regs.TTBR1_EL1.set(ttbr1);
+    const ttbr = @intFromPtr(&g_fixed_high);
+
+    regs.TTBR0_EL1.write(.{ .BADDR = @truncate(ttbr) });
+    regs.TTBR1_EL1.write(.{ .BADDR = @truncate(ttbr) });
 
     regs.TCR_EL1.write(.{
         .AS = .asid_8bit,
diff --git a/src/arch/riscv64/boot.zig b/src/arch/riscv64/boot.zig
index e466c19..0503833 100644
--- a/src/arch/riscv64/boot.zig
+++ b/src/arch/riscv64/boot.zig
@@ -55,6 +55,8 @@ pub export fn rv64_bsp_lower_entry(real_address: usize, bsp_hart_id: usize, dtb_
     g_dtb_address = dtb_address;
     g_bsp_hart_id = @truncate(bsp_hart_id);
 
+    vmm.g_kernel_real_base = real_address;
+
     vmm.map_early(real_address);
 
     // &bspUpperEntry will yield a pointer like: X + P, where
diff --git a/src/arch/riscv64/context.S b/src/arch/riscv64/context.S
index 08d954b..e743774 100644
--- a/src/arch/riscv64/context.S
+++ b/src/arch/riscv64/context.S
@@ -70,11 +70,12 @@ __rv64_task_enter_user:
 
 __rv64_task_enter_kernel:
     ld a0, (sp)     // argument
-    ld ra, 8(sp)    // entry
-    addi sp, sp, 16
+    ld t0, 8(sp)    // entry
+    ld ra, 16(sp)   // return address
+    addi sp, sp, 24
 
     // TODO S-mode -> S-mode return via sret
-    ret
+    jr t0
 
 __rv64_switch_task:
     // a0 - new context
diff --git a/src/arch/riscv64/context.zig b/src/arch/riscv64/context.zig
index df33329..8d8ef63 100644
--- a/src/arch/riscv64/context.zig
+++ b/src/arch/riscv64/context.zig
@@ -7,10 +7,6 @@ const vmm = @import("vmm.zig");
 const ProcessAddressSpace = mem.vmm.ProcessAddressSpace;
 const log = kernel.log;
 
-fn idle_function() callconv(.naked) noreturn {
-    asm volatile ("j .");
-}
-
 extern fn __rv64_enter_task(cx: *Context) callconv(.C) noreturn;
 extern fn __rv64_switch_task(dcx: *Context, scx: *Context) callconv(.C) void;
 extern fn __rv64_task_enter_kernel() callconv(.C) noreturn;
@@ -26,72 +22,40 @@ pub const Context = extern struct {
 
     /// Constructs an idle context struct.
     pub fn idle() @This() {
-        const entry = @intFromPtr(&idle_function);
-        return Context.kernel(entry, 0);
+        return Context.kernel(&thread.idle_function, 0);
     }
 
     pub fn user(address_space: *const ProcessAddressSpace, pc: usize, sp: usize, arg: usize) @This() {
         const space_physical = address_space.physical_address();
         const space_asid = address_space.asid();
 
-        const satp = regs.SATP.Bits {
-            .PPN = @truncate(space_physical.raw >> 12),
-            .ASID = @truncate(space_asid),
-            .MODE = .sv39
-        };
+        const satp = regs.SATP.Bits{ .PPN = @truncate(space_physical.raw >> 12), .ASID = @truncate(space_asid), .MODE = .sv39 };
 
         var ks = thread.KStack(STACK_SIZE).create();
-        const entry = @intFromPtr(&__rv64_task_enter_user);
 
         ks.push(pc);
         ks.push(sp);
         ks.push(arg);
 
-        ks.push(0); // x8/s0/fp
-        ks.push(0); // x9/s1
-        ks.push(0); // x18/s2
-        ks.push(0); // x19/s3
-        ks.push(0); // x20/s4
-        ks.push(0); // x21/s5
-        ks.push(0); // x22/s6
-        ks.push(0); // x23/s7
-        ks.push(0); // x24/s8
-        ks.push(0); // x25/s9
-        ks.push(0); // x26/s10
-        ks.push(0); // x27/s11
-        ks.push(0); // x4/gp
-        ks.push(entry); // x1/ra return address
+        setup_stack_common(&ks, @intFromPtr(&__rv64_task_enter_user));
 
-        return .{
-            .kstack = ks,
-            .satp = @bitCast(satp)
-        };
+        return .{ .kstack = ks, .satp = @bitCast(satp) };
     }
 
     /// Constructs a kernel task context with entry point in `pc` and an `arg`ument.
-    pub fn kernel(pc: usize, arg: usize) @This() {
+    pub fn kernel(function: *const thread.KernelThreadFn, arg: usize) @This() {
         var ks = thread.KStack(STACK_SIZE).create();
-        const entry = @intFromPtr(&__rv64_task_enter_kernel);
 
-        ks.push(pc);
+        const table_physical = vmm.kernel_table_physical();
+        const satp = regs.SATP.Bits{ .PPN = @truncate(table_physical >> 12), .MODE = .sv39 };
+
+        ks.push(@intFromPtr(&thread.kernel_return));
+        ks.push(@intFromPtr(function));
         ks.push(arg);
 
-        ks.push(0); // x8/s0/fp
-        ks.push(0); // x9/s1
-        ks.push(0); // x18/s2
-        ks.push(0); // x19/s3
-        ks.push(0); // x20/s4
-        ks.push(0); // x21/s5
-        ks.push(0); // x22/s6
-        ks.push(0); // x23/s7
-        ks.push(0); // x24/s8
-        ks.push(0); // x25/s9
-        ks.push(0); // x26/s10
-        ks.push(0); // x27/s11
-        ks.push(0); // x4/gp
-        ks.push(entry); // x1/ra return address
+        setup_stack_common(&ks, @intFromPtr(&__rv64_task_enter_kernel));
 
-        return .{ .kstack = ks };
+        return .{ .kstack = ks, .satp = @bitCast(satp) };
     }
 
     /// Low-level task context entry function.
@@ -108,17 +72,29 @@ pub const Context = extern struct {
     }
 
     fn load_state(self: *@This()) void {
-        if (self.satp != 0) {
-            log.info("Load SATP = 0x{x}", .{self.satp});
-            regs.SATP.set(self.satp);
-        } else {
-            vmm.load_kernel_table();
-        }
+        regs.SATP.set(self.satp);
     }
 
     fn store_state(self: *@This()) void {
         _ = self;
     }
+
+    fn setup_stack_common(ks: *thread.KStack(STACK_SIZE), entry: usize) void {
+        ks.push(0); // x8/s0/fp
+        ks.push(0); // x9/s1
+        ks.push(0); // x18/s2
+        ks.push(0); // x19/s3
+        ks.push(0); // x20/s4
+        ks.push(0); // x21/s5
+        ks.push(0); // x22/s6
+        ks.push(0); // x23/s7
+        ks.push(0); // x24/s8
+        ks.push(0); // x25/s9
+        ks.push(0); // x26/s10
+        ks.push(0); // x27/s11
+        ks.push(0); // x4/gp
+        ks.push(entry); // x1/ra return address
+    }
 };
 
 comptime {
diff --git a/src/arch/riscv64/vmm.zig b/src/arch/riscv64/vmm.zig
index 4efc59c..3799c01 100644
--- a/src/arch/riscv64/vmm.zig
+++ b/src/arch/riscv64/vmm.zig
@@ -55,11 +55,15 @@ pub const RawEntry = packed struct(u64) {
         const rhs = @as(u64, @bitCast(mask));
         lhs.* &= ~rhs;
     }
+
+    pub fn is_table(self: @This()) bool {
+        return !self.r and !self.w and !self.x;
+    }
 };
 
 pub fn TableEntry(comptime Level: type) type {
     _ = Level;
-    return struct {
+    return packed struct(u64) {
         raw: RawEntry,
 
         pub const INVALID: @This() = .{ .raw = .{} };
@@ -96,12 +100,11 @@ pub fn TableEntry(comptime Level: type) type {
                 .v = true,
             }) };
         }
-
     };
 }
 
 pub fn Table(comptime Level: type) type {
-    return struct {
+    return extern struct {
         pub const Entry = TableEntry(Level);
 
         entries: [512]Entry align(4096),
@@ -131,9 +134,11 @@ pub fn Table(comptime Level: type) type {
 
         pub usingnamespace if (Level.NextLevel) |NextLevel| struct {
             pub fn get_next_level(self: *Table(Level), index: usize) ?*Table(NextLevel) {
-                _ = self;
-                _ = index;
-                @panic("TODO");
+                const ent = self.entry(index);
+                if (ent.raw.v and ent.raw.is_table()) {
+                    @panic("TODO: translate existing table");
+                }
+                return null;
             }
 
             pub fn get_or_create_next_level(self: *Table(Level), index: usize) Error!*Table(NextLevel) {
@@ -141,11 +146,11 @@ pub fn Table(comptime Level: type) type {
 
                 if (ent.raw.v) {
                     // TODO handle mixed hugepages + tables
-                    if (ent.raw.r or ent.raw.w or ent.raw.x) {
+                    if (!ent.raw.is_table()) {
                         @panic("TODO: handle mixed hugepages and tables");
                     }
                     // It is a table
-                    @panic("OOO");
+                    @panic("TODO: translate existing table");
                 } else {
                     // Allocate a new entry
                     const table = try Table(NextLevel).allocate_empty();
@@ -210,13 +215,21 @@ pub const ProcessAddressSpace = struct {
 
 var g_fixed = Table(L1).empty();
 var g_fixed_lock: sync.Spinlock = .{};
+pub var g_kernel_real_base: u64 = undefined;
+extern var __kernel_start: u8;
 
 pub fn virtualize_range() usize {
     return EARLY_MAPPING_SIZE * L1.SIZE;
 }
 
+pub fn kernel_table_physical() u64 {
+    const address = @as(usize, @intFromPtr(&g_fixed));
+    const kernel_start = @intFromPtr(&__kernel_start);
+    return address - kernel_start + g_kernel_real_base;
+}
+
 pub fn unmap_early() void {
-    // Make lower half mappings non-executable
+    // Unmap lower half
     const guard = g_fixed_lock.lock_irqsave();
     defer guard.release();
     for (0..EARLY_MAPPING_SIZE) |i| {
@@ -224,11 +237,6 @@ pub fn unmap_early() void {
     }
 }
 
-pub fn load_kernel_table() void {
-    const address = @as(usize, @intFromPtr(&g_fixed));
-    regs.SATP.write(.{ .PPN = @intCast(address >> 12), .MODE = .sv39 });
-}
-
 pub fn map_early(real_address: usize) void {
     const real_l1 = L1.index(real_address);
 
@@ -253,7 +261,8 @@ pub fn map_early(real_address: usize) void {
         .{ .r = true, .w = true, .x = true },
     );
 
-    load_kernel_table();
+    const address = @intFromPtr(&g_fixed);
+    regs.SATP.write(.{ .PPN = @intCast(address >> 12), .MODE = .sv39 });
 }
 
 pub inline fn flush_vma(page: usize) void {
diff --git a/src/kernel.zig b/src/kernel.zig
index ef9f42c..d7e8b64 100644
--- a/src/kernel.zig
+++ b/src/kernel.zig
@@ -16,13 +16,9 @@ pub const TRACE_PHYSICAL_ALLOCATOR: bool = false;
 
 const std = @import("std");
 
-fn f0(arg: usize) callconv(.C) noreturn {
-    var c: usize = 0;
-    while (true) {
-        f1(arg, c);
-        c += 1;
-        thread.yield();
-    }
+fn f0(arg: usize) callconv(.C) void {
+    log.info("Argument is {}", .{arg});
+    thread.yield();
 }
 
 noinline fn f1(arg: usize, c: usize) void {
@@ -41,10 +37,18 @@ pub export fn kernel_main() callconv(.C) noreturn {
     var a = arena.Arena.init(256 * 0x1000) orelse @panic("Could not setup kernel arena");
     thread.Queue.init_this_cpu(&a);
 
-    const t = thread.test_create_user_from_code(&a, &[_]u8 {
-        0x6F, 0x00, 0x00, 0x00
-    });
-    thread.enqueue(t);
+    const t0 = thread.Thread.create_kernel(&a, &f0, 1234);
+
+    const code = switch (comptime arch.cpu) {
+        .riscv64 => &[_]u8{ 0x6F, 0x00, 0x00, 0x00 },
+        .aarch64 => &[_]u8{
+            0x00, 0x00, 0x00, 0x14,
+        },
+    };
+    const t1 = thread.test_create_user_from_code(&a, code) catch @panic("Could not create test thread");
+
+    thread.enqueue(t0);
+    thread.enqueue(t1);
 
     log.info("Test", .{});
     // log.write("\x1B[2J", .{});
diff --git a/src/mem/vmalloc.zig b/src/mem/vmalloc.zig
index 8afc857..80032ff 100644
--- a/src/mem/vmalloc.zig
+++ b/src/mem/vmalloc.zig
@@ -22,13 +22,27 @@ pub const VirtualMemoryAllocator = struct {
     /// One of errors returned by the allocation logic + underlying allocator error.
     pub const Error = error{ already_exists, invalid_region, cannot_fit };
 
+    pub const DrainIterator = struct {
+        vma: *VirtualMemoryAllocator,
+
+        pub fn next(self: *@This()) ?Range(u64) {
+            while (self.vma.head) |head| {
+                self.vma.head = head.next;
+                const range = head.range;
+                // TODO free the range
+                return range;
+            }
+            return null;
+        }
+    };
+
     /// An iterator over VM regions being freed.
     pub const FreeIterator = struct {
         range: Range(u64),
         vma: *VirtualMemoryAllocator,
         current: ?*VirtualMemoryRange,
 
-        fn next(self: *@This()) Error!?Range(u64) {
+        pub fn next(self: *@This()) Error!?Range(u64) {
             while (self.current) |n| {
                 if (n.range.intersect(&self.range)) |xs| {
                     if (xs.start == n.range.start) {
@@ -64,7 +78,7 @@ pub const VirtualMemoryAllocator = struct {
                     } else {
                         // Insert a new node after the current one
                         const new_node = self.vma.arena.create(VirtualMemoryRange);
-                        new_node.* = VirtualMemoryRange {
+                        new_node.* = VirtualMemoryRange{
                             .range = .{ .start = xs.end(), .len = n.range.end() - xs.end() },
                             .prev = n,
                             .next = n.next,
@@ -202,13 +216,17 @@ pub const VirtualMemoryAllocator = struct {
 
     /// Deallocates (shrinks/truncates) regions intersecting the requested range.
     pub fn free(self: *@This(), start_pfn: u64, pfn_count: u64) FreeIterator {
-        const range = Range(u64) { .start = start_pfn, .len = pfn_count };
-        return FreeIterator {
+        const range = Range(u64){ .start = start_pfn, .len = pfn_count };
+        return FreeIterator{
             .current = self.head,
             .vma = self,
             .range = range,
         };
     }
+
+    pub fn drain(self: *@This()) DrainIterator {
+        return DrainIterator{ .vma = self };
+    }
 };
 
 test "Inserted entries in vmalloc are properly ordered" {
diff --git a/src/mem/vmm.zig b/src/mem/vmm.zig
index a59e946..cee0e46 100644
--- a/src/mem/vmm.zig
+++ b/src/mem/vmm.zig
@@ -7,6 +7,7 @@ const kernel = @import("../kernel.zig");
 const sync = @import("../sync.zig");
 
 const arch = kernel.arch;
+const log = kernel.log;
 const Arena = arena.Arena;
 
 /// Last virtual memory translation level. Always 4KiB on all platforms.
@@ -17,7 +18,7 @@ pub const PAGE_SIZE: usize = L3.SIZE;
 
 pub const AddressSpaceError = error{
     out_of_pages,
-};
+} || vmalloc.VirtualMemoryAllocator.Error;
 
 /// Helper function to construct a "Translation Level" struct type from a bit shift.
 pub fn TranslationLevel(comptime shift: usize, comptime Next: ?type) type {
@@ -69,6 +70,14 @@ pub const ProcessAddressSpace = struct {
         return .{ .inner = inner, .allocator = allocator, .lock = .{} };
     }
 
+    pub fn clear(self: *@This()) void {
+        var drain = self.allocator.drain();
+        while (drain.next()) |range| {
+            log.info("Free range: 0x{x}..0x{x}", .{ range.start * L3.SIZE, range.end() * L3.SIZE });
+            // TODO unmap/free pages
+        }
+    }
+
     pub fn map_single_page(
         self: *@This(),
         virtual: usize,
@@ -77,8 +86,14 @@ pub const ProcessAddressSpace = struct {
         self.lock.lock();
         defer self.lock.release();
 
-        // TODO If allocation succeeds, but mapping fails, rollback
-        self.allocator.insert(.{ .start = L3.page_number(virtual), .len = 1 }) catch @panic("TODO error");
+        try self.allocator.insert(.{ .start = L3.page_number(virtual), .len = 1 });
+        errdefer {
+            var it = self.allocator.free(L3.page_number(virtual), 1);
+            while (it.next() catch unreachable) |n| {
+                // TODO: inner.unmap_page()
+                _ = n;
+            }
+        }
         try self.inner.map_page(virtual, physical);
     }
 
diff --git a/src/thread.zig b/src/thread.zig
index 8a60d3b..9f21473 100644
--- a/src/thread.zig
+++ b/src/thread.zig
@@ -9,6 +9,23 @@ const mem = @import("mem.zig");
 
 const ProcessAddressSpace = mem.vmm.ProcessAddressSpace;
 
+// TODO: are kernel threads needed at all if we're doing a microkernel?
+
+/// Signature for kernel thread entry
+pub const KernelThreadFn = fn (usize) callconv(.C) void;
+
+pub fn kernel_return() callconv(.C) noreturn {
+    @panic("TODO: kernel thread exit");
+}
+
+/// Task to run when there are no real threads in the queue
+pub fn idle_function(arg: usize) callconv(.C) noreturn {
+    _ = arg;
+    while (true) {
+        arch.wait_for_interrupt();
+    }
+}
+
 /// Per-CPU thread queue structure.
 pub const Queue = struct {
     /// Idle task context. Used when there are no other tasks running.
@@ -97,12 +114,14 @@ pub const Thread = struct {
     // TODO move to process
     address_space: ?ProcessAddressSpace = null,
 
-    /// Creates a new (kernel) thread with given `pc` (entry point) and `arg`ument.
-    pub fn create_kernel(a: *arena.Arena, pc: usize, arg: usize) *Thread {
+    pub const Error = error{out_of_memory} || mem.vmm.AddressSpaceError;
+
+    /// Creates a new (kernel) thread with given `function` and `arg`ument.
+    pub fn create_kernel(a: *arena.Arena, function: *const KernelThreadFn, arg: usize) *Thread {
         const thread = a.create(Thread);
         thread.* = .{
             .allocator = a,
-            .arch_context = arch.Context.kernel(pc, arg),
+            .arch_context = arch.Context.kernel(function, arg),
         };
         return thread;
     }
@@ -192,12 +211,18 @@ pub fn yield() void {
     Queue.t_this_cpu.?.yield();
 }
 
-pub fn test_create_user_from_code(a: *arena.Arena, code: []const u8) *Thread {
-    var address_space = ProcessAddressSpace.init(a) catch @panic("TODO");
+pub fn test_create_user_from_code(a: *arena.Arena, code: []const u8) Thread.Error!*Thread {
+    var address_space = try ProcessAddressSpace.init(a);
+    errdefer {
+        address_space.clear();
+    }
 
     // Map 0x200000
-    const page = mem.phys.alloc_page() orelse @panic("TODO error");
-    address_space.map_single_page(0x200000, page) catch @panic("TODO error map");
+    const page = mem.phys.alloc_page() orelse return error.out_of_memory;
+    errdefer {
+        mem.phys.free_page(page);
+    }
+    try address_space.map_single_page(0x200000, page);
 
     const page_data = @as([*]u8, @ptrFromInt(page.virtualize()))[0..code.len];
     @memcpy(page_data, code);
diff --git a/src/util/rangemap.zig b/src/util/rangemap.zig
index b661db8..6c11af1 100644
--- a/src/util/rangemap.zig
+++ b/src/util/rangemap.zig
@@ -11,7 +11,7 @@ pub fn RangeMap(
     comptime K: type,
     comptime V: type,
     comptime ops: struct {
-        deinit_fn: ?fn(*V) void = null,
+        deinit_fn: ?fn (*V) void = null,
         merge_fn: ?fn (*const V, *const V) bool = null,
     },
 ) type {
@@ -244,11 +244,11 @@ test "Range map merging insertion" {
     {
         var it = map.iterator();
         const n0 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 0, .len = 10 }, n0.key);
+        try std.testing.expectEqual(Range(u32){ .start = 0, .len = 10 }, n0.key);
         try std.testing.expectEqual(true, n0.value);
 
         const n1 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 10, .len = 30 }, n1.key);
+        try std.testing.expectEqual(Range(u32){ .start = 10, .len = 30 }, n1.key);
         try std.testing.expectEqual(false, n1.value);
 
         try std.testing.expectEqual(null, it.next());
@@ -261,19 +261,19 @@ test "Range map merging insertion" {
     {
         var it = map.iterator();
         const n0 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 0, .len = 10 }, n0.key);
+        try std.testing.expectEqual(Range(u32){ .start = 0, .len = 10 }, n0.key);
         try std.testing.expectEqual(true, n0.value);
 
         const n1 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 10, .len = 30 }, n1.key);
+        try std.testing.expectEqual(Range(u32){ .start = 10, .len = 30 }, n1.key);
         try std.testing.expectEqual(false, n1.value);
 
         const n2 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 40, .len = 10 }, n2.key);
+        try std.testing.expectEqual(Range(u32){ .start = 40, .len = 10 }, n2.key);
         try std.testing.expectEqual(true, n2.value);
 
         const n3 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 50, .len = 10 }, n3.key);
+        try std.testing.expectEqual(Range(u32){ .start = 50, .len = 10 }, n3.key);
         try std.testing.expectEqual(false, n3.value);
 
         try std.testing.expectEqual(null, it.next());
@@ -286,23 +286,23 @@ test "Range map merging insertion" {
     {
         var it = map.iterator();
         const n0 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 0, .len = 10 }, n0.key);
+        try std.testing.expectEqual(Range(u32){ .start = 0, .len = 10 }, n0.key);
         try std.testing.expectEqual(true, n0.value);
 
         const n1 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 10, .len = 30 }, n1.key);
+        try std.testing.expectEqual(Range(u32){ .start = 10, .len = 30 }, n1.key);
         try std.testing.expectEqual(false, n1.value);
 
         const n2 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 40, .len = 10 }, n2.key);
+        try std.testing.expectEqual(Range(u32){ .start = 40, .len = 10 }, n2.key);
         try std.testing.expectEqual(true, n2.value);
 
         const n3 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 50, .len = 20 }, n3.key);
+        try std.testing.expectEqual(Range(u32){ .start = 50, .len = 20 }, n3.key);
         try std.testing.expectEqual(false, n3.value);
 
         const n4 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 71, .len = 9 }, n4.key);
+        try std.testing.expectEqual(Range(u32){ .start = 71, .len = 9 }, n4.key);
         try std.testing.expectEqual(false, n4.value);
 
         try std.testing.expectEqual(null, it.next());
@@ -314,19 +314,19 @@ test "Range map merging insertion" {
     {
         var it = map.iterator();
         const n0 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 0, .len = 10 }, n0.key);
+        try std.testing.expectEqual(Range(u32){ .start = 0, .len = 10 }, n0.key);
         try std.testing.expectEqual(true, n0.value);
 
         const n1 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 10, .len = 30 }, n1.key);
+        try std.testing.expectEqual(Range(u32){ .start = 10, .len = 30 }, n1.key);
         try std.testing.expectEqual(false, n1.value);
 
         const n2 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 40, .len = 10 }, n2.key);
+        try std.testing.expectEqual(Range(u32){ .start = 40, .len = 10 }, n2.key);
         try std.testing.expectEqual(true, n2.value);
 
         const n3 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 50, .len = 30 }, n3.key);
+        try std.testing.expectEqual(Range(u32){ .start = 50, .len = 30 }, n3.key);
         try std.testing.expectEqual(false, n3.value);
 
         try std.testing.expectEqual(null, it.next());
@@ -339,23 +339,23 @@ test "Range map merging insertion" {
     {
         var it = map.iterator();
         const n0 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 0, .len = 10 }, n0.key);
+        try std.testing.expectEqual(Range(u32){ .start = 0, .len = 10 }, n0.key);
         try std.testing.expectEqual(true, n0.value);
 
         const n1 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 10, .len = 30 }, n1.key);
+        try std.testing.expectEqual(Range(u32){ .start = 10, .len = 30 }, n1.key);
         try std.testing.expectEqual(false, n1.value);
 
         const n2 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 40, .len = 10 }, n2.key);
+        try std.testing.expectEqual(Range(u32){ .start = 40, .len = 10 }, n2.key);
         try std.testing.expectEqual(true, n2.value);
 
         const n3 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 50, .len = 30 }, n3.key);
+        try std.testing.expectEqual(Range(u32){ .start = 50, .len = 30 }, n3.key);
         try std.testing.expectEqual(false, n3.value);
 
         const n4 = it.next().?;
-        try std.testing.expectEqual(Range(u32) { .start = 100, .len = 20 }, n4.key);
+        try std.testing.expectEqual(Range(u32){ .start = 100, .len = 20 }, n4.key);
         try std.testing.expectEqual(false, n4.value);
 
         try std.testing.expectEqual(null, it.next());