From 4878a0b06e15ae499230bd7cc36916c9f2d85a5d Mon Sep 17 00:00:00 2001 From: Mark Date: Fri, 7 Feb 2020 15:02:06 +0200 Subject: [PATCH] Proper FPU init, fxsave/fxrstor for threads --- arch/amd64/kernel.c | 3 +++ arch/amd64/sched_s.S | 16 ++++++++++++ include/arch/amd64/asm/asm_thread.h | 2 ++ include/sys/thread.h | 1 + sys/thread.c | 40 +++++++++++++++++++++++++++++ 5 files changed, 62 insertions(+) diff --git a/arch/amd64/kernel.c b/arch/amd64/kernel.c index d6f87bd..c8d0121 100644 --- a/arch/amd64/kernel.c +++ b/arch/amd64/kernel.c @@ -11,6 +11,7 @@ #include "arch/amd64/hw/ps2.h" #include "arch/amd64/cpuid.h" #include "arch/amd64/mm/mm.h" +#include "arch/amd64/fpu.h" #include "sys/block/ram.h" #include "sys/config.h" #include "sys/kernel.h" @@ -77,6 +78,8 @@ void kernel_early_init(struct amd64_loader_data *data) { amd64_make_random_seed(); + amd64_fpu_init(); + #if defined(AMD64_SMP) amd64_smp_init(); #endif diff --git a/arch/amd64/sched_s.S b/arch/amd64/sched_s.S index face07a..b9c73e7 100644 --- a/arch/amd64/sched_s.S +++ b/arch/amd64/sched_s.S @@ -62,6 +62,14 @@ context_exec_enter: context_switch_to: // %rdi - new thread // %rsi - from + pushq %rdi + pushq %rsi + + call context_save_fpu + + popq %rsi + popq %rdi + pushq %r15 pushq %r14 pushq %r13 @@ -83,6 +91,14 @@ context_switch_first: popq %r14 popq %r15 + pushq %rdi + pushq %rsi + + call context_restore_fpu + + popq %rsi + popq %rdi + // Load TSS.RSP0 for user -> kernel transition // %rax = top of task's stack movq THREAD_RSP0_TOP(%rdi), %rax diff --git a/include/arch/amd64/asm/asm_thread.h b/include/arch/amd64/asm/asm_thread.h index d5f9db6..55491b5 100644 --- a/include/arch/amd64/asm/asm_thread.h +++ b/include/arch/amd64/asm/asm_thread.h @@ -16,5 +16,7 @@ struct thread_data { uintptr_t rsp0_base, rsp0_size; uintptr_t rsp3_base, rsp3_size; + + void *fxsave; }; #endif diff --git a/include/sys/thread.h b/include/sys/thread.h index 1b99249..5668cc8 100644 --- a/include/sys/thread.h +++ b/include/sys/thread.h @@ -21,6 +21,7 @@ enum thread_state { #define THREAD_KERNEL (1 << 0) #define THREAD_EMPTY (1 << 1) +#define THREAD_FPU_SAVED (1 << 2) #define thread_signal_clear(thr, signum) \ (thr)->sigq &= ~(1ULL << ((signum) - 1)) diff --git a/sys/thread.c b/sys/thread.c index 4eba3ca..3f7b968 100644 --- a/sys/thread.c +++ b/sys/thread.c @@ -40,6 +40,26 @@ struct sys_fork_frame { LIST_HEAD(threads_all_head); static pid_t last_kernel_pid = 0; static pid_t last_user_pid = 0; +static uint64_t fxsave_buf[FXSAVE_REGION / 8] __attribute__((aligned(16))); + +void context_save_fpu(struct thread *new, struct thread *old) { + _assert(old); + if (old->data.fxsave) { + asm volatile ("fxsave (%0)"::"r"(fxsave_buf):"memory"); + memcpy(old->data.fxsave, fxsave_buf, FXSAVE_REGION); + old->flags |= THREAD_FPU_SAVED; + } + +} + +void context_restore_fpu(struct thread *new, struct thread *old) { + _assert(new); + if (new->flags & THREAD_FPU_SAVED) { + memcpy(fxsave_buf, new->data.fxsave, FXSAVE_REGION); + asm volatile ("fxrstor (%0)"::"r"(fxsave_buf):"memory"); + new->flags &= ~THREAD_FPU_SAVED; + } +} pid_t thread_alloc_pid(int is_user) { if (is_user) { @@ -226,6 +246,13 @@ int thread_init(struct thread *thr, uintptr_t entry, void *arg, int user) { thr->name[0] = 0; thr->flags = user ? 0 : THREAD_KERNEL; + if (!(thr->flags & THREAD_KERNEL)) { + thr->data.fxsave = kmalloc(FXSAVE_REGION); + _assert(thr->data.fxsave); + } else { + thr->data.fxsave = NULL; + } + list_link_init(&thr->g_link); uint64_t *stack = (uint64_t *) (thr->data.rsp0_base + thr->data.rsp0_size); @@ -375,6 +402,16 @@ int sys_fork(struct sys_fork_frame *frame) { dst->signal_entry = src->signal_entry; + strcpy(dst->name, src->name); + + dst->data.fxsave = kmalloc(FXSAVE_REGION); + _assert(dst->data.fxsave); + _assert(src->data.fxsave); + + if (src->flags & THREAD_FPU_SAVED) { + memcpy(dst->data.fxsave, src->data.fxsave, FXSAVE_REGION); + } + thread_ioctx_fork(dst, src); dst->state = THREAD_READY; @@ -546,6 +583,9 @@ int sys_execve(const char *path, const char **argv, const char **envp) { thr->data.cr3 = MM_PHYS(thr->space); thr->flags = 0; + thr->data.fxsave = kmalloc(FXSAVE_REGION); + _assert(thr->data.fxsave); + mm_space_clone(thr->space, mm_kernel, MM_CLONE_FLG_KERNEL); } else { mm_space_release(thr->space);