From cbb39f9d772f235a2eaf70f8525e5f4fcf7fca59 Mon Sep 17 00:00:00 2001 From: Mark Date: Thu, 30 Jan 2020 17:29:40 +0200 Subject: [PATCH] Add basic userspace binary execution (ported from older kernel) --- etc/make/amd64/platform.mk | 3 +- include/sys/amd64/mm/mm.h | 20 ++++---- include/sys/binfmt_elf.h | 5 ++ include/sys/thread.h | 2 + include/sys/vmalloc.h | 2 +- sys/amd64/binfmt_elf.c | 102 +++++++++++++++++++++++++++++++++++++ sys/amd64/hw/exc.c | 8 ++- sys/amd64/kernel.c | 4 ++ sys/amd64/mm/map.c | 3 +- sys/amd64/mm/vmalloc.c | 4 +- sys/amd64/sched_s.S | 2 + sys/sched.c | 19 +++++++ sys/thread.c | 39 +++++++++++++- 13 files changed, 196 insertions(+), 17 deletions(-) create mode 100644 include/sys/binfmt_elf.h create mode 100644 sys/amd64/binfmt_elf.c diff --git a/etc/make/amd64/platform.mk b/etc/make/amd64/platform.mk index f523d32..fc4ffc4 100644 --- a/etc/make/amd64/platform.mk +++ b/etc/make/amd64/platform.mk @@ -44,7 +44,8 @@ OBJS+=$(O)/sys/amd64/hw/rs232.o \ $(O)/sys/amd64/cpuid.o \ $(O)/sys/amd64/sched_s.o \ $(O)/sys/amd64/syscall_s.o \ - $(O)/sys/amd64/syscall.o + $(O)/sys/amd64/syscall.o \ + $(O)/sys/amd64/binfmt_elf.o kernel_LINKER=sys/amd64/link.ld kernel_LDFLAGS=-nostdlib \ diff --git a/include/sys/amd64/mm/mm.h b/include/sys/amd64/mm/mm.h index e634012..5f87528 100644 --- a/include/sys/amd64/mm/mm.h +++ b/include/sys/amd64/mm/mm.h @@ -20,7 +20,7 @@ #define MM_PTE_INDEX_MASK 0x1FF #define MM_PTE_COUNT 512 -#define MM_PTE_FLAGS_MASK 0xFFF +#define MM_PTE_FLAGS_MASK (0xFFF | MM_PAGE_NOEXEC) #define MM_PTE_MASK (~0xFFF) #define MM_PAGE_MASK (~0xFFF) @@ -36,15 +36,15 @@ #define MM_PDPTI_SHIFT 30 #define MM_PML4I_SHIFT 39 -#define MM_PAGE_PRESENT (1 << 0) -#define MM_PAGE_WRITE (1 << 1) -#define MM_PAGE_USER (1 << 2) -#define MM_PAGE_WT (1 << 3) -#define MM_PAGE_NOCACHE (1 << 4) -#define MM_PAGE_ACCESSED (1 << 5) -#define MM_PAGE_DIRTY (1 << 6) -#define MM_PAGE_HUGE (1 << 7) -#define MM_PAGE_GLOBAL (1 << 8) +#define MM_PAGE_PRESENT (1ULL << 0) +#define MM_PAGE_WRITE (1ULL << 1) +#define MM_PAGE_USER (1ULL << 2) +#define MM_PAGE_WT (1ULL << 3) +#define MM_PAGE_NOCACHE (1ULL << 4) +#define MM_PAGE_ACCESSED (1ULL << 5) +#define MM_PAGE_DIRTY (1ULL << 6) +#define MM_PAGE_HUGE (1ULL << 7) +#define MM_PAGE_GLOBAL (1ULL << 8) #define MM_PAGE_NOEXEC (1ULL << 63) /// Page map level 4 diff --git a/include/sys/binfmt_elf.h b/include/sys/binfmt_elf.h new file mode 100644 index 0000000..2031f53 --- /dev/null +++ b/include/sys/binfmt_elf.h @@ -0,0 +1,5 @@ +#pragma once + +struct thread; + +int elf_load(struct thread *thr, const void *src, uintptr_t *entry); diff --git a/include/sys/thread.h b/include/sys/thread.h index fa554d8..8aa4df1 100644 --- a/include/sys/thread.h +++ b/include/sys/thread.h @@ -1,10 +1,12 @@ #pragma once #include "sys/amd64/asm/asm_thread.h" +#include "sys/mm.h" struct thread { // Platform data struct thread_data data; + mm_space_t space; pid_t pid; struct thread *prev, *next; diff --git a/include/sys/vmalloc.h b/include/sys/vmalloc.h index e17d02c..de75763 100644 --- a/include/sys/vmalloc.h +++ b/include/sys/vmalloc.h @@ -34,7 +34,7 @@ uintptr_t vmfind(const mm_space_t pd, uintptr_t from, uintptr_t to, size_t npage * @return Virtual address of the resulting range on success, * MM_NADDR otherwise */ -uintptr_t vmalloc(mm_space_t pd, uintptr_t from, uintptr_t to, size_t npages, int flags); +uintptr_t vmalloc(mm_space_t pd, uintptr_t from, uintptr_t to, size_t npages, uint64_t flags); /** * @brief Deallocate a virtual memory range and physical pages diff --git a/sys/amd64/binfmt_elf.c b/sys/amd64/binfmt_elf.c new file mode 100644 index 0000000..d0eb49d --- /dev/null +++ b/sys/amd64/binfmt_elf.c @@ -0,0 +1,102 @@ +#include "sys/amd64/mm/phys.h" +#include "sys/amd64/mm/map.h" +#include "sys/binfmt_elf.h" +#include "sys/string.h" +#include "sys/thread.h" +#include "sys/debug.h" +#include "sys/panic.h" +#include "sys/errno.h" +#include "sys/elf.h" + +#define ELF_ADDR_MIN 0x400000 + +int elf_load(struct thread *thr, const void *from, uintptr_t *entry) { + //struct amd64_thread *thr_plat = &thr->data; + Elf64_Ehdr *ehdr = (Elf64_Ehdr *) from; + // Check magic + if (strncmp((const char *) ehdr->e_ident, "\x7F""ELF", 4) != 0) { + kerror("elf: magic mismatch\n"); + return -EINVAL; + } + + if (ehdr->e_ident[EI_CLASS] != ELFCLASS64) { + kerror("elf: object was not intended for 64-bit\n"); + return -EINVAL; + } + + Elf64_Shdr *shdrs = (Elf64_Shdr *) (ehdr->e_shoff + (uintptr_t) from); + const char *shstrtabd = (const char *) (shdrs[ehdr->e_shstrndx].sh_offset + (uintptr_t) from); + + // Load the sections + for (size_t i = 0; i < ehdr->e_shnum; ++i) { + Elf64_Shdr *shdr = &shdrs[i]; + const char *name = &shstrtabd[shdr->sh_name]; + + if (shdr->sh_flags & SHF_ALLOC) { + if (!strncmp(name, ".note", 5)) { + // Fuck you, gcc + continue; + } + if (!strncmp(name, ".gnu", 4)) { + // Fuck you, gcc + continue; + } + + kdebug("Loading %s\n", name); + + // If the section is below what is allowed + if (shdr->sh_addr < ELF_ADDR_MIN) { + kerror("elf: section address is below allowed\n"); + return -EINVAL; + } + + // Allocate memory for the section + size_t sec_pages = (shdr->sh_size + 0xFFF) / 0x1000; + kdebug("%s needs %u pages\n", name, sec_pages); + if (sec_pages > 1) { + panic("elf: I was too lazy to implement this yet\n"); + } + + uintptr_t page_virt = shdr->sh_addr & ~0xFFF; + uintptr_t page_offset = shdr->sh_addr & 0xFFF; + + kdebug("%s base page is VMA %p\n", name, page_virt); + + uintptr_t page_phys; + + if ((page_phys = amd64_map_get(thr->space, page_virt, NULL)) == MM_NADDR) { + kdebug("Allocating physical page\n"); + page_phys = amd64_phys_alloc_page(); + + if (page_phys == MM_NADDR) { + panic("elf: out of memory\n"); + } + + if (amd64_map_single(thr->space, page_virt, page_phys, (1 << 1) | (1 << 2)) != 0) { + panic("elf: map failed\n"); + } + } else { + kdebug("Not mapping\n"); + } + + if (shdr->sh_type == SHT_PROGBITS) { + kdebug("elf: memcpy %p <- %p %S\n", MM_VIRTUALIZE(page_phys) + page_offset, + (uintptr_t) from + shdr->sh_offset, + shdr->sh_size); + memcpy((void *) (MM_VIRTUALIZE(page_phys) + page_offset), + (const void *) ((uintptr_t) from + shdr->sh_offset), + shdr->sh_size); + } else if (shdr->sh_type == SHT_NOBITS) { + kdebug("elf: memset 0 %p %S\n", MM_VIRTUALIZE(page_phys) + page_offset, + shdr->sh_size); + memset((void *) (MM_VIRTUALIZE(page_phys) + page_offset), + 0, shdr->sh_size); + } + } + } + + *entry = ehdr->e_entry; + + return 0; +} + diff --git a/sys/amd64/hw/exc.c b/sys/amd64/hw/exc.c index 8a6ec17..34ab884 100644 --- a/sys/amd64/hw/exc.c +++ b/sys/amd64/hw/exc.c @@ -1,6 +1,7 @@ #include "sys/types.h" #include "sys/debug.h" #include "sys/panic.h" +#include "sys/mm.h" #define X86_EXCEPTION_PF 14 @@ -65,10 +66,15 @@ void amd64_exception(struct amd64_exception_frame *frame) { (frame->rflags & X86_FLAGS_TF) ? 'T' : '-'); if (frame->exc_no == X86_EXCEPTION_PF) { - uintptr_t cr2; + uintptr_t cr2, cr3; asm volatile ("movq %%cr2, %0":"=r"(cr2)); + asm volatile ("movq %%cr3, %0":"=r"(cr3)); kfatal("Page fault info:\n"); + kfatal("%%cr3 = %p\n", cr3); + if (MM_VIRTUALIZE(cr3) == (uintptr_t) mm_kernel) { + kfatal("(Kernel)\n"); + } kfatal("Fault address: %p\n", cr2); if (frame->exc_code & X86_PF_RESVD) { diff --git a/sys/amd64/kernel.c b/sys/amd64/kernel.c index 254a7bd..65301c1 100644 --- a/sys/amd64/kernel.c +++ b/sys/amd64/kernel.c @@ -57,6 +57,10 @@ void kernel_main(struct amd64_loader_data *data) { syscall_init(); sched_init(); + if (data->initrd_ptr) { + extern void sched_user_init(uintptr_t base); + sched_user_init(MM_VIRTUALIZE(data->initrd_ptr)); + } sched_enter(); panic("This code should not run\n"); diff --git a/sys/amd64/mm/map.c b/sys/amd64/mm/map.c index 313892c..2e31e06 100644 --- a/sys/amd64/mm/map.c +++ b/sys/amd64/mm/map.c @@ -178,9 +178,10 @@ int amd64_map_single(mm_space_t pml4, uintptr_t virt_addr, uintptr_t phys, uint6 assert(!(pt[pti] & MM_PAGE_PRESENT), "Entry already present for %p\n", virt_addr); #if defined(KERNEL_TEST_MODE) - kdebug("map %p -> %p %cr%c\n", virt_addr, phys, + kdebug("map %p -> %p %cr%c%c%c\n", virt_addr, phys, (flags & MM_PAGE_USER) ? 'u' : '-', (flags & MM_PAGE_WRITE) ? 'w' : '-', + (flags & MM_PAGE_NOEXEC) ? '-' : 'x', (flags & MM_PAGE_GLOBAL) ? 'G' : '-'); #endif diff --git a/sys/amd64/mm/vmalloc.c b/sys/amd64/mm/vmalloc.c index 7cd9511..2b6772c 100644 --- a/sys/amd64/mm/vmalloc.c +++ b/sys/amd64/mm/vmalloc.c @@ -26,10 +26,10 @@ no_match: return MM_NADDR; } -uintptr_t vmalloc(mm_space_t pml4, uintptr_t from, uintptr_t to, size_t npages, int flags) { +uintptr_t vmalloc(mm_space_t pml4, uintptr_t from, uintptr_t to, size_t npages, uint64_t flags) { uintptr_t addr = vmfind(pml4, from, to, npages); uintptr_t virt_page, phys_page; - uint64_t rflags = flags & (MM_PAGE_USER | MM_PAGE_WRITE); + uint64_t rflags = flags & (MM_PAGE_USER | MM_PAGE_WRITE | MM_PAGE_NOEXEC); if (addr == MM_NADDR) { return MM_NADDR; diff --git a/sys/amd64/sched_s.S b/sys/amd64/sched_s.S index 0dbef4e..1781fd1 100644 --- a/sys/amd64/sched_s.S +++ b/sys/amd64/sched_s.S @@ -23,6 +23,8 @@ context_exec_enter: // %rdx - stack // %rcx - entry movq THREAD_RSP0(%rsi), %rsp + movq THREAD_CR3(%rsi), %rax + movq %rax, %cr3 pushq $0x1B pushq %rdx diff --git a/sys/sched.c b/sys/sched.c index 072776f..7dffa8b 100644 --- a/sys/sched.c +++ b/sys/sched.c @@ -83,6 +83,25 @@ void yield(void) { context_switch_to(to, from); } +static struct thread user_init; + +static void user_init_func(void *arg) { + kdebug("Starting user init\n"); + + extern int sys_execve(const char *path, const char **argp, const char **envp); + sys_execve(arg, NULL, NULL); + + while (1) { + asm volatile ("hlt"); + } +} + +void sched_user_init(uintptr_t base) { + thread_init(&user_init, (uintptr_t) user_init_func, (void *) base, 0); + user_init.pid = thread_alloc_pid(0); + sched_queue(&user_init); +} + void sched_init(void) { thread_init(&thread_idle, (uintptr_t) idle, 0, 0); thread_idle.pid = -1; diff --git a/sys/thread.c b/sys/thread.c index ff646ea..a299ddd 100644 --- a/sys/thread.c +++ b/sys/thread.c @@ -1,5 +1,6 @@ #include "sys/amd64/mm/phys.h" #include "sys/amd64/mm/pool.h" +#include "sys/binfmt_elf.h" #include "sys/amd64/cpu.h" #include "sys/vmalloc.h" #include "sys/assert.h" @@ -51,6 +52,7 @@ int thread_init(struct thread *thr, uintptr_t entry, void *arg, int user) { mm_space_t space = amd64_mm_pool_alloc(); mm_space_clone(space, mm_kernel, MM_CLONE_FLG_KERNEL); thr->data.cr3 = MM_PHYS(space); + thr->space = space; uintptr_t ustack_base = vmalloc(space, 0x1000000, 0xF0000000, 4, MM_PAGE_WRITE | MM_PAGE_USER); thr->data.rsp3_base = ustack_base; @@ -64,6 +66,7 @@ int thread_init(struct thread *thr, uintptr_t entry, void *arg, int user) { } } else { thr->data.cr3 = MM_PHYS(mm_kernel); + thr->space = mm_kernel; } // Initial thread context @@ -163,7 +166,7 @@ int sys_fork(struct sys_fork_frame *frame) { dst->data.rsp0_top = dst->data.rsp0_base + dst->data.rsp0_size; mm_space_t space = amd64_mm_pool_alloc(); - mm_space_fork(space, (mm_space_t) MM_VIRTUALIZE(src->data.cr3), MM_CLONE_FLG_KERNEL | MM_CLONE_FLG_USER); + mm_space_fork(space, src->space, MM_CLONE_FLG_KERNEL | MM_CLONE_FLG_USER); dst->data.rsp3_base = src->data.rsp3_base; dst->data.rsp3_size = src->data.rsp3_size; @@ -175,6 +178,7 @@ int sys_fork(struct sys_fork_frame *frame) { } dst->data.cr3 = MM_PHYS(space); + dst->space = space; uint64_t *stack = (uint64_t *) (dst->data.rsp0_base + dst->data.rsp0_size); @@ -238,6 +242,39 @@ int sys_fork(struct sys_fork_frame *frame) { return dst->pid; } +int sys_execve(const char *path, const char **argp, const char **envp) { + struct thread *thr = thread_self; + + if (thr->space == mm_kernel) { + // Have to allocate a new PID for kernel -> userspace transition + thr->pid = thread_alloc_pid(1); + + thr->space = amd64_mm_pool_alloc(); + _assert(thr->space); + thr->data.cr3 = MM_PHYS(thr->space); + + mm_space_clone(thr->space, mm_kernel, MM_CLONE_FLG_KERNEL); + } else { + mm_space_release(thr->space); + } + + uintptr_t entry; + if (elf_load(thr, path, &entry) != 0) { + panic("Feck\n"); + } + + thr->data.rsp0 = thr->data.rsp0_top; + + // Allocate a new user stack + uintptr_t ustack = vmalloc(thr->space, 0x100000, 0xF0000000, 4, MM_PAGE_USER | MM_PAGE_WRITE | MM_PAGE_NOEXEC); + thr->data.rsp3_base = ustack; + thr->data.rsp3_size = 4 * MM_PAGE_SIZE; + + context_exec_enter(NULL, thr, ustack + 4 * MM_PAGE_SIZE, entry); + + panic("This code shouldn't run\n"); +} + __attribute__((noreturn)) void sys_exit(int status) { struct thread *thr = thread_self; kdebug("Thread %d exited with status %d\n", thr->pid, status);