Make thread queue per-CPU

This commit is contained in:
2025-03-17 17:54:51 +02:00
parent c803a3e2b2
commit c4a80c3378
4 changed files with 177 additions and 126 deletions
+9
View File
@@ -12,6 +12,10 @@ extern fn __rv64_enter_task(cx: *arch().Context) callconv(.C) noreturn;
extern fn __rv64_switch_task(dcx: *arch().Context, scx: *arch().Context) callconv(.C) void;
extern fn __rv64_task_enter_kernel() callconv(.C) noreturn;
fn idleFunction() callconv(.naked) noreturn {
asm volatile ("j .");
}
pub fn arch() type {
return struct {
pub threadlocal var tHartId: u32 = 0;
@@ -22,6 +26,11 @@ pub fn arch() type {
// Has to be exactly at offset 0x00, used in assembly
kstack: thread.KStack(STACK_SIZE),
pub fn idle() @This() {
const entry = @intFromPtr(&idleFunction);
return Context.kernel(entry, 0);
}
pub fn kernel(pc: usize, arg: usize) @This() {
var ks = thread.KStack(STACK_SIZE).create();
const entry = @intFromPtr(&__rv64_task_enter_kernel);
+91 -57
View File
@@ -4,15 +4,15 @@ const kernel = @import("../../kernel.zig");
const vmm = @import("vmm.zig");
const regs = @import("regs.zig");
const dtb = @import("../../util/dtb.zig");
const physMemory = @import("../../mem/phys.zig");
const mem = @import("../../mem.zig");
const arena = @import("../../arena.zig");
const exception = @import("exception.zig");
const physMemory = mem.phys;
const PAGE_SIZE = mem.vmm.PAGE_SIZE;
const log = debug.log;
const arch = kernel.arch;
extern const __rela_start: u8;
extern const __rela_end: u8;
extern const __rv64_bsp_stack_top: u8;
extern const __kernel_start: u8;
extern const __kernel_end: u8;
@@ -20,7 +20,94 @@ extern const __kernel_end: u8;
var gDtbAddress: usize = 0;
var gBspHartId: u32 = 0;
pub export fn rv64RelocateKernel(imageBase: usize, relaStart: usize, relaEnd: usize) void {
fn bspUpperEntry(realAddress: usize, unused: usize) callconv(.C) noreturn {
_ = unused;
arch.barrier(.acq_rel);
// Relocate the kernel yet again, this time to another base
const relaStart = @intFromPtr(&__rela_start);
const relaEnd = @intFromPtr(&__rela_end);
const relOffset = vmm.KERNEL_VIRTUAL_BASE + vmm.L1.offset(realAddress);
arch.barrier(.acq_rel);
rv64RelocateKernel(relOffset, relaStart, relaEnd);
vmm.unmapEarly();
// Setup exception handling
exception.init();
debug.log.setWriteFn(&sbi.debugPrintByte);
kernel.mem.PhysicalAddress.gVirtualizeBase = 0;
kernel.mem.PhysicalAddress.gVirtualizeSize = vmm.virtualizeRange();
// Setup physical memory management
setupMemoryFromFdt(realAddress);
setupPerCpu();
arch.tHartId = gBspHartId;
kernel.kernel_main();
}
pub export fn rv64BspLowerEntry(realAddress: usize, bspHartId: usize, dtbAddress: usize) callconv(.C) noreturn {
debug.log.setWriteFn(&sbi.debugPrintByte);
gDtbAddress = dtbAddress;
gBspHartId = @truncate(bspHartId);
vmm.mapEarly(realAddress);
// &bspUpperEntry will yield a pointer like: X + P, where
// * X is symbol's raw address,
// * P is the physical load base of the image (0x80200000 on rv64 usually)
//
// Relocate the address to point to Y + P, where Y is the virtual load base
// const kernelL1Offset = realAddress & ((1 << 30) - 1);
const realAddressL1Offset = vmm.L1.offset(realAddress);
const virtualEntry = @intFromPtr(&bspUpperEntry) + vmm.KERNEL_VIRTUAL_BASE - realAddress + realAddressL1Offset;
const virtualSp = @intFromPtr(&__rv64_bsp_stack_top) + vmm.KERNEL_VIRTUAL_BASE - realAddress + realAddressL1Offset;
longJump(virtualEntry, virtualSp, realAddress, 0);
arch.halt();
}
// Functions used by the boot process
extern const __rela_start: u8;
extern const __rela_end: u8;
extern var __tdata_start: u8;
extern var __tdata_end: u8;
extern var __tbss_start: u8;
extern var __tbss_end: u8;
fn setupPerCpu() void {
// Assume .tbss follows .tdata
const tdataStart = @intFromPtr(&__tdata_start);
const tdataEnd = @intFromPtr(&__tdata_end);
const tdataSize = tdataEnd - tdataStart;
const tbssStart = @intFromPtr(&__tbss_start);
const tbssEnd = @intFromPtr(&__tbss_end);
const tbssSize = tbssEnd - tbssStart;
const tdataData = @as([*]u8, @ptrFromInt(tdataStart))[0..tdataSize];
const tlsSize = tdataSize + tbssSize;
const tlsPageCount = (tlsSize + PAGE_SIZE - 1) / PAGE_SIZE;
// Variant I: TLS block 0 follows TP after a certain displacement
const tlsAddress = physMemory.alloc_pages(tlsPageCount).?.virtualize();
const tlsData = @as([*]u8, @ptrFromInt(tlsAddress))[0..tlsSize];
log.info("Allocated TLS @ {*}", .{ tlsData });
@memcpy(tlsData[0..tdataSize], tdataData);
@memset(tlsData[tdataSize..], 0);
arch.setThreadPointer(tlsAddress);
}
export fn rv64RelocateKernel(imageBase: usize, relaStart: usize, relaEnd: usize) void {
const elf = @import("std").elf;
const relaTablePtr = @as([*]elf.Rela, @ptrFromInt(relaStart));
@@ -74,36 +161,6 @@ fn setupMemoryFromFdt(realAddress: usize) void {
physMemory.init();
}
fn bspUpperEntry(realAddress: usize, unused: usize) callconv(.C) noreturn {
_ = unused;
arch.barrier(.acq_rel);
// Relocate the kernel yet again, this time to another base
const relaStart = @intFromPtr(&__rela_start);
const relaEnd = @intFromPtr(&__rela_end);
const relOffset = vmm.KERNEL_VIRTUAL_BASE + vmm.L1.offset(realAddress);
arch.barrier(.acq_rel);
rv64RelocateKernel(relOffset, relaStart, relaEnd);
vmm.unmapEarly();
// Setup exception handling
exception.init();
debug.log.setWriteFn(&sbi.debugPrintByte);
kernel.mem.PhysicalAddress.gVirtualizeBase = 0;
kernel.mem.PhysicalAddress.gVirtualizeSize = vmm.virtualizeRange();
// Setup physical memory management
setupMemoryFromFdt(realAddress);
kernel.thread.setupCurrentCpu();
arch.tHartId = gBspHartId;
kernel.kernel_main();
}
inline fn longJump(pc: usize, sp: usize, a0: usize, a1: usize) noreturn {
asm volatile (
\\ mv sp, %[sp]
@@ -117,26 +174,3 @@ inline fn longJump(pc: usize, sp: usize, a0: usize, a1: usize) noreturn {
);
unreachable;
}
pub export fn rv64BspLowerEntry(realAddress: usize, bspHartId: usize, dtbAddress: usize) callconv(.C) noreturn {
debug.log.setWriteFn(&sbi.debugPrintByte);
gDtbAddress = dtbAddress;
gBspHartId = @truncate(bspHartId);
vmm.mapEarly(realAddress);
// &bspUpperEntry will yield a pointer like: X + P, where
// * X is symbol's raw address,
// * P is the physical load base of the image (0x80200000 on rv64 usually)
//
// Relocate the address to point to Y + P, where Y is the virtual load base
// const kernelL1Offset = realAddress & ((1 << 30) - 1);
const realAddressL1Offset = vmm.L1.offset(realAddress);
const virtualEntry = @intFromPtr(&bspUpperEntry) + vmm.KERNEL_VIRTUAL_BASE - realAddress + realAddressL1Offset;
const virtualSp = @intFromPtr(&__rv64_bsp_stack_top) + vmm.KERNEL_VIRTUAL_BASE - realAddress + realAddressL1Offset;
longJump(virtualEntry, virtualSp, realAddress, 0);
arch.halt();
}
+6 -6
View File
@@ -17,6 +17,7 @@ fn f0(arg: usize) callconv(.C) noreturn {
while (true) {
f1(arg, c);
c += 1;
thread.yield();
}
}
@@ -25,18 +26,17 @@ noinline fn f1(arg: usize, c: usize) void {
}
pub export fn kernel_main() callconv(.C) noreturn {
log.write("\x1B[2J", .{});
var a = arena.Arena.setup(256 * 0x1000) orelse @panic("Could not setup kernel arena");
const pc = @intFromPtr(&f0);
thread.Queue.initThisCpu(&a);
// log.write("\x1B[2J", .{});
for (0..32) |i| {
const pc = @intFromPtr(&f0);
for (0..4) |i| {
const t = thread.Thread.create(&a, pc, i);
thread.addThread(t);
thread.enqueue(t);
}
thread.enter();
arch.halt();
}
pub fn panic(msg: []const u8, error_return_trace: ?*std.builtin.StackTrace, return_address: ?usize) noreturn {
+71 -63
View File
@@ -5,6 +5,70 @@ const arch = @import("kernel.zig").arch;
const log = @import("debug.zig").log;
const mem = @import("mem.zig");
pub const Queue = struct {
idle: arch.Context,
current: ?*Thread = null,
head: ?*Thread = null,
pub threadlocal var thisCpu: ?*Queue = null;
pub fn initThisCpu(a: *arena.Arena) void {
const idle = arch.Context.idle();
const q = a.create(Queue);
q.* = .{ .idle = idle };
thisCpu = q;
}
pub fn enter(self: *@This()) noreturn {
if (self.head) |gt| {
self.current = gt;
gt.enter();
} else {
self.current = null;
self.idle.enter();
}
}
pub fn yield(self: *@This()) void {
if (self.current) |curr| {
// Switching from thread
if (curr.next) |next| {
// ... to thread
if (next != curr) {
self.current = next;
next.switchFrom(curr);
}
} else {
// ... to idle
self.current = null;
self.idle.switchFrom(&curr.archContext);
}
} else {
// Switching from idle
if (self.head) |gt| {
// ... to thread
self.current = gt;
gt.archContext.switchFrom(&self.idle);
return;
}
// ... back to idle
}
}
pub fn enqueue(self: *@This(), t: *Thread) void {
if (self.head) |gt| {
t.next = gt;
t.prev = gt.prev;
gt.prev.?.next = t;
gt.prev = t;
} else {
self.head = t;
t.next = t;
t.prev = t;
}
}
};
pub const Thread = struct {
allocator: *arena.Arena,
archContext: arch.Context,
@@ -12,11 +76,11 @@ pub const Thread = struct {
next: ?*Thread = null,
prev: ?*Thread = null,
pub fn create(a: *arena.Arena, pc: usize, sp: usize) *Thread {
pub fn create(a: *arena.Arena, pc: usize, arg: usize) *Thread {
const thread = a.create(Thread);
thread.* = .{
.allocator = a,
.archContext = arch.Context.kernel(pc, sp),
.archContext = arch.Context.kernel(pc, arg),
};
return thread;
}
@@ -42,11 +106,7 @@ pub fn KStack(comptime SIZE: usize) type {
const physicalBase = mem.phys.alloc_pages(SIZE * @sizeOf(usize) / 0x1000) orelse @panic("OOM");
const ptr = @as(*[SIZE]usize, @ptrFromInt(physicalBase.virtualize()));
return .{
.data = ptr,
.physicalBase = physicalBase,
.sp = @ptrFromInt(@intFromPtr(&ptr[0]) + SIZE * @sizeOf(usize))
};
return .{ .data = ptr, .physicalBase = physicalBase, .sp = @ptrFromInt(@intFromPtr(&ptr[0]) + SIZE * @sizeOf(usize)) };
}
pub fn push(self: *@This(), value: usize) void {
@@ -59,66 +119,14 @@ pub fn KStack(comptime SIZE: usize) type {
};
}
var gThreadHead: ?*Thread = null;
var gCurrent: ?*Thread = null;
pub fn addThread(t: *Thread) void {
if (gThreadHead) |gt| {
t.next = gt;
t.prev = gt.prev;
gt.prev.?.next = t;
gt.prev = t;
} else {
gThreadHead = t;
t.next = t;
t.prev = t;
}
pub fn enqueue(t: *Thread) void {
Queue.thisCpu.?.enqueue(t);
}
pub fn enter() noreturn {
if (gThreadHead) |gt| {
gCurrent = gt;
gt.enter();
}
@panic("Unreachable");
Queue.thisCpu.?.enter();
}
pub fn yield() void {
const curr = gCurrent orelse @panic("No current thread");
const next = curr.next orelse @panic("No next thread");
if (curr != next) {
gCurrent = next;
next.switchFrom(curr);
}
}
extern var __tdata_start: u8;
extern var __tdata_end: u8;
extern var __tbss_start: u8;
extern var __tbss_end: u8;
pub fn setupCurrentCpu() void {
// Assume .tbss follows .tdata
const tdataStart = @intFromPtr(&__tdata_start);
const tdataEnd = @intFromPtr(&__tdata_end);
const tdataSize = tdataEnd - tdataStart;
const tbssStart = @intFromPtr(&__tbss_start);
const tbssEnd = @intFromPtr(&__tbss_end);
const tbssSize = tbssEnd - tbssStart;
const tdataData = @as([*]u8, @ptrFromInt(tdataStart))[0..tdataSize];
const tlsSize = tdataSize + tbssSize;
const tlsPageCount = (tlsSize + mem.vmm.PAGE_SIZE - 1) / mem.vmm.PAGE_SIZE;
// Variant I: TLS block 0 follows TP after a certain displacement
const tlsAddress = mem.phys.alloc_pages(tlsPageCount).?.virtualize();
const tlsData = @as([*]u8, @ptrFromInt(tlsAddress))[0..tlsSize];
log.info("Allocated TLS @ {*}", .{ tlsData });
@memcpy(tlsData[0..tdataSize], tdataData);
@memset(tlsData[tdataSize..], 0);
arch.setThreadPointer(tlsAddress);
Queue.thisCpu.?.yield();
}