#include "fs/ofile.h" #include "fs/sysfs.h" #include "fs/vfs.h" #include "sys/assert.h" #include "sys/snprintf.h" #include "sys/debug.h" #include "sys/elf.h" #include "user/module.h" #include "sys/mem/phys.h" #include "sys/mem/slab.h" #include "sys/mem/vmalloc.h" #include "sys/heap.h" #include "sys/string.h" #include "sys/thread.h" #include "sys/panic.h" #include "sys/syms.h" #include "sys/hash.h" #include "user/errno.h" #include "user/fcntl.h" #define MODULE_MAP_START 0x100000000 #define MODULE_MAP_END 0xF00000000 #define SYM_MOD_DESC "_mod_desc" #define SYM_MOD_ENTER "_mod_enter" #define SYM_MOD_EXIT "_mod_exit" struct object_image { void *base; size_t size; const char *shstrtab; const char *symstrtab; Elf64_Sym *symtab; Elf64_Shdr *sh_symtab; }; struct object { struct module_desc *module_desc; int (*module_enter) (void *); void (*module_exit) (void); struct object_image image; // Symbol values struct hash export; // In-memory object uintptr_t object_base; size_t object_page_count; // PLT and GOT void *gotplt; size_t gotplt_size; struct list_head link; }; static struct slab_cache *g_object_cache; static LIST_HEAD(g_module_list); static int mod_sym_lookup(const char *name, void **value) { struct object *mod; struct hash_pair *pair; list_for_each_entry(mod, &g_module_list, link) { pair = hash_lookup(&mod->export, name); if (pair) { *value = pair->value; return 0; } } return -1; } static int mod_loaded(const char *name) { struct object *mod; list_for_each_entry(mod, &g_module_list, link) { if (!strcmp(mod->module_desc->name, name)) { return 1; } } return 0; } static struct object *object_create(void) { if (!g_object_cache) { g_object_cache = slab_cache_get(sizeof(struct object)); } struct object *r = slab_calloc(g_object_cache); if (!r) { return NULL; } _assert(shash_init(&r->export, 16) == 0); list_head_init(&r->link); return r; } static void object_free(struct object *obj) { kfree(obj->image.base); // TODO: free pages occupied by the object slab_free(g_object_cache, obj); } static void object_finalize_load(struct object *obj) { // Image no longer needed kfree(obj->image.base); memset(&obj->image, 0, sizeof(struct object_image)); } static int object_read(struct object *obj, struct vfs_ioctx *ctx, const char *pathname) { struct ofile of = {0}; struct stat st; int res; if ((res = vfs_openat(ctx, &of, NULL, pathname, O_RDONLY, 0)) != 0) { return res; } if ((res = vfs_fstatat(ctx, of.file.vnode, NULL, &st, AT_EMPTY_PATH)) != 0) { goto cleanup; } obj->image.size = st.st_size; // TODO: optimize how loading is handled? obj->image.base = kmalloc(obj->image.size); if (!obj->image.base) { res = -ENOMEM; goto cleanup; } if ((res = vfs_read(ctx, &of, obj->image.base, obj->image.size)) != (int) obj->image.size) { if (res >= 0) { kwarn("File read returned %S, expected to read %S\n", res, obj->image.size); res = -EINVAL; } goto cleanup; } res = 0; cleanup: vfs_close(ctx, &of); return res; } static int object_section_load(struct object *obj, Elf64_Shdr *shdr) { uintptr_t page_phys; uintptr_t page_start = (shdr->sh_addr + obj->object_base) & ~0xFFF; uintptr_t page_end = (shdr->sh_addr + obj->object_base + shdr->sh_size + 0xFFF) & ~0xFFF; for (uintptr_t page = page_start; page < page_end; page += MM_PAGE_SIZE) { // Get or map physical page if (mm_map_get(mm_kernel, page, &page_phys) == MM_NADDR) { kdebug("MAP OBJECT PAGE %p\n", page); page_phys = mm_phys_alloc_page(PU_KERNEL); _assert(page_phys != MM_NADDR); _assert(mm_map_single(mm_kernel, page, page_phys, MM_PAGE_WRITE) == 0); } } switch (shdr->sh_type) { case SHT_NOBITS: memset((void *) (shdr->sh_addr + obj->object_base), 0, shdr->sh_size); break; case SHT_PROGBITS: kdebug("Load: %p <- %p, %S\n", shdr->sh_addr, shdr->sh_offset, shdr->sh_size); memcpy((void *) (shdr->sh_addr + obj->object_base), obj->image.base + shdr->sh_offset, shdr->sh_size); break; default: panic("Unhandled section type: %02x\n", shdr->sh_type); } return 0; } static inline Elf64_Shdr *object_shdr(struct object *obj, size_t index) { return obj->image.base + ((Elf64_Ehdr *) obj->image.base)->e_shoff + ((Elf64_Ehdr *) obj->image.base)->e_shentsize * index; } static int object_reloc(struct object *obj) { Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; Elf64_Rela *relatab; const char *name; ehdr = obj->image.base; // Map pages for GOT/PLT size_t pltoff = 0; uint8_t *plt = obj->gotplt; _assert(!(((uintptr_t) plt) & 0xFFF)); for (size_t i = 0; i < obj->gotplt_size; i += MM_PAGE_SIZE) { uintptr_t page_phys = mm_phys_alloc_page(PU_KERNEL); _assert(mm_map_get(mm_kernel, (uintptr_t) plt + i, NULL) == MM_NADDR); _assert(mm_map_single(mm_kernel, (uintptr_t) plt + i, page_phys, MM_PAGE_WRITE) == 0); } // Export global symbols shdr = obj->image.sh_symtab; for (size_t i = 0; i < shdr->sh_size / shdr->sh_entsize; ++i) { Elf64_Sym *sym = obj->image.base + shdr->sh_offset + i * shdr->sh_entsize; Elf64_Word type = ELF64_ST_TYPE(sym->st_info); Elf64_Word bind = ELF64_ST_BIND(sym->st_info); Elf64_Shdr *sym_section; if (bind == STB_GLOBAL && type != STT_SECTION) { name = &obj->image.symstrtab[sym->st_name]; if (sym->st_shndx != SHN_UNDEF && sym->st_shndx != SHN_COMMON) { sym_section = object_shdr(obj, sym->st_shndx); void *value = (void *) (sym_section->sh_addr + obj->object_base + sym->st_value); _assert(hash_insert(&obj->export, name, value) == 0); } } } // Find relocation sections for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); if (shdr->sh_type == SHT_REL) { panic("TODO: support .rel sections without explicit addend\n"); } else if (shdr->sh_type == SHT_RELA) { relatab = obj->image.base + shdr->sh_offset; for (size_t j = 0; j < shdr->sh_size / shdr->sh_entsize; ++j) { Elf64_Rela *rela = &relatab[j]; Elf64_Sym *sym = &obj->image.symtab[ELF64_R_SYM(rela->r_info)]; Elf64_Word sym_type = ELF64_ST_TYPE(sym->st_info); uintptr_t value = 0; switch (sym_type) { case STT_NOTYPE: case STT_FUNC: case STT_OBJECT: { if (sym->st_shndx == SHN_UNDEF) { // External relocation against kernel or other module // symbol name = &obj->image.symstrtab[sym->st_name]; // 1. Try to lookup kernel symbol sym = ksym_lookup(name); if (!sym) { // 2. Try to lookup symbol in other modules void *r; if (mod_sym_lookup(name, &r) != 0) { panic("Undefined reference to %s\n", name); } value = (uintptr_t) r; } else { value = sym->st_value; } kdebug("Resolved %s: %p\n", name, value); } else if (sym->st_shndx == SHN_COMMON) { panic("AAAA\n"); } else { Elf64_Shdr *tgt = object_shdr(obj, sym->st_shndx); name = &obj->image.symstrtab[sym->st_name]; value = sym->st_value + obj->object_base + tgt->sh_addr; kdebug("Internal %s: %p\n", name, value); } } break; case STT_SECTION: { _assert(sym->st_shndx != SHN_UNDEF); Elf64_Shdr *tgt = object_shdr(obj, sym->st_shndx); value = tgt->sh_addr + obj->object_base; } break; default: panic("Unhandled symbol type: %02x\n", sym_type); } uintptr_t S = value; intptr_t A = rela->r_addend; uintptr_t P = rela->r_offset + obj->object_base; uintptr_t L, GOT; union { int32_t word32; intptr_t full; } relv; // Write relocation switch (ELF64_R_TYPE(rela->r_info)) { #define R_X86_64_64 1 case R_X86_64_64: // S + A relv.full = S + A; memcpy((void *) obj->object_base + rela->r_offset, &relv.full, 8); break; #define R_X86_64_32 10 case R_X86_64_32: // S + A relv.full = S + A; kdebug("S = %p, A = %d\n", S, A); if (!!(relv.full & 0x8000000000000000) != !!(relv.full & 0x80000000)) { panic("Can't fit relocation against symbol %p: overflow\n", S); } relv.word32 = relv.full & 0xFFFFFFFF; memcpy((void *) obj->object_base + rela->r_offset, &relv.word32, 4); break; #define R_X86_64_PC32 2 case R_X86_64_PC32: // S + A - P relv.full = S + A - P; kdebug("S = %p, A = %d, P = %p\n", S, A, P); if (!!(relv.full & 0x8000000000000000) != !!(relv.full & 0x80000000)) { panic("Can't fit relocation against symbol %p: overflow\n", S); } relv.word32 = relv.full & 0xFFFFFFFF; memcpy((void *) obj->object_base + rela->r_offset, &relv.word32, 4); break; #define R_X86_64_PLT32 4 case R_X86_64_PLT32: // L + A - P // Put full symbol value + trampoline into PLT // jmpq *0x0(%rip) plt[pltoff + 0] = 0xFF; plt[pltoff + 1] = 0x25; plt[pltoff + 2] = 0x00; plt[pltoff + 3] = 0x00; plt[pltoff + 4] = 0x00; plt[pltoff + 5] = 0x00; // .quad VALUE memcpy(&plt[pltoff + 6], &value, sizeof(uintptr_t)); L = ((uintptr_t) obj->gotplt) + pltoff; pltoff += 16; relv.full = L + A - P; kdebug("L = %p, A = %d, P = %p\n", L, A, P); if (!!(relv.full & 0x8000000000000000) != !!(relv.full & 0x80000000)) { panic("Can't fit relocation against symbol %p: overflow\n", S); } relv.word32 = relv.full & 0xFFFFFFFF; memcpy((void *) obj->object_base + rela->r_offset, &relv.word32, 4); break; #define R_X86_64_REX_GOTP 42 case R_X86_64_REX_GOTP: // Looks like GOT + A - P memcpy(&plt[pltoff], &value, sizeof(uintptr_t)); GOT = ((uintptr_t) obj->gotplt) + pltoff; pltoff += 16; relv.full = GOT + A - P; kdebug("GOT = %p, A = %d, P = %p\n", GOT, A, P); if (!!(relv.full & 0x8000000000000000) != !!(relv.full & 0x80000000)) { panic("Can't fit relocation against symbol %p: overflow\n", S); } relv.word32 = relv.full & 0xFFFFFFFF; memcpy((void *) obj->object_base + rela->r_offset, &relv.word32, 4); break; default: panic("Unhandled reloc type: %02x\n", ELF64_R_TYPE(rela->r_info)); } } } } return 0; } static int object_extract_info(struct object *obj) { const char *name, *shstrtab; Elf64_Ehdr *ehdr; Elf64_Shdr *shdr, *sh_shstrtab; ehdr = obj->image.base; sh_shstrtab = object_shdr(obj, ehdr->e_shstrndx); obj->image.shstrtab = obj->image.base + sh_shstrtab->sh_offset; // Locate symtab and strtab for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); name = &obj->image.shstrtab[shdr->sh_name]; if (shdr->sh_type == SHT_STRTAB && !strcmp(name, ".strtab")) { obj->image.symstrtab = obj->image.base + shdr->sh_offset; continue; } else if (shdr->sh_type == SHT_SYMTAB) { if (obj->image.sh_symtab) { kwarn("Object defines several symbol tables\n"); return -EINVAL; } obj->image.sh_symtab = shdr; obj->image.symtab = obj->image.base + shdr->sh_offset; } } if (!obj->image.symtab) { kerror("No symbol table in object\n"); return -EINVAL; } if (!obj->image.symstrtab) { kerror("No strtab in object\n"); return -EINVAL; } // Locate entry, exit, info symbols shdr = obj->image.sh_symtab; for (size_t i = 0; i < shdr->sh_size / shdr->sh_entsize; ++i) { Elf64_Sym *sym = obj->image.base + shdr->sh_offset + i * shdr->sh_entsize; Elf64_Word type = ELF64_ST_TYPE(sym->st_info); if (type == STT_FUNC || type == STT_OBJECT || type == STT_NOTYPE) { name = &obj->image.symstrtab[sym->st_name]; Elf64_Shdr *sym_shdr = object_shdr(obj, sym->st_shndx); void *value = (void *) (sym->st_value + obj->object_base + sym_shdr->sh_addr); if (!strcmp(name, SYM_MOD_ENTER)) { obj->module_enter = value; } else if (!strcmp(name, SYM_MOD_EXIT)) { obj->module_exit = value; } else if (!strcmp(name, SYM_MOD_DESC)) { obj->module_desc = value; } } } if (!obj->module_desc) { kerror("Module defines no info struct\n"); return -EINVAL; } if (!obj->module_enter) { kerror("Module defines no entry function\n"); return -EINVAL; } return 0; } static int object_load(struct object *obj) { Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; uintptr_t obj_lowest = (uintptr_t) -1; uintptr_t obj_highest = 0; int res; ehdr = obj->image.base; // Find out the object size for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); if (shdr->sh_flags & SHF_ALLOC) { if (shdr->sh_addr < obj_lowest) { obj_lowest = shdr->sh_addr; } if (shdr->sh_addr + shdr->sh_size > obj_highest) { obj_highest = shdr->sh_addr + shdr->sh_size; } } } if (obj_lowest == obj_highest) { // No loadable sections? return -EINVAL; } // Align both addresses obj_lowest &= ~0xFFF; obj_highest = (obj_highest + 0xFFF) & ~0xFFF; // Find GOT size from relocation count (count each reloc against // PLT/GOT as a single PLT entry) obj->gotplt_size = 0; for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); if (shdr->sh_type == SHT_RELA) { for (size_t j = 0; j < shdr->sh_size / shdr->sh_entsize; ++j) { Elf64_Rela *rela = obj->image.base + shdr->sh_offset + j * shdr->sh_entsize; Elf64_Word type = ELF64_R_TYPE(rela->r_info); switch (type) { case R_X86_64_PLT32: case R_X86_64_REX_GOTP: obj->gotplt_size += 16; break; default: break; } } } } obj->object_page_count = (obj_highest - obj_lowest) / MM_PAGE_SIZE; obj->object_page_count += (obj->gotplt_size + MM_PAGE_SIZE - 1) / MM_PAGE_SIZE; // Allocate virtual memory region for the object obj->object_base = vmfind(mm_kernel, MODULE_MAP_START, MODULE_MAP_END, obj->object_page_count); if (obj->object_base == MM_NADDR) { return -ENOMEM; } obj->gotplt = (void *) (obj->object_base + ((obj_highest - obj_lowest) & ~0xFFF)); // Load the sections for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); if (shdr->sh_flags & SHF_ALLOC && shdr->sh_size) { _assert(object_section_load(obj, shdr) == 0); } } // Extract useful info/symbols/sections if ((res = object_extract_info(obj)) != 0) { return res; } // Perform relocations _assert(object_reloc(obj) == 0); return 0; } static int object_check_deps(struct object *obj) { Elf64_Ehdr *ehdr; Elf64_Shdr *shdr; const char *shstrtab, *name; char depname[64]; const char *p, *e; ehdr = obj->image.base; shstrtab = obj->image.base + object_shdr(obj, ehdr->e_shstrndx)->sh_offset; for (size_t i = 0; i < ehdr->e_shnum; ++i) { shdr = object_shdr(obj, i); name = &shstrtab[shdr->sh_name]; if (!strcmp(name, ".deps")) { p = obj->image.base + shdr->sh_offset; while (1) { e = strchr(p, ','); if (e) { _assert((size_t) (e - p) < sizeof(depname)); strncpy(depname, p, e - p); depname[e - p] = 0; } else { _assert(strlen(p) < sizeof(depname)); strcpy(depname, p); } if (!mod_loaded(depname)) { kwarn("unresolved dependency: %s\n", depname); return -ENOENT; } if (!e) { break; } p = e + 1; } } } return 0; } int sys_module_unload(const char *_name) { // TODO: check dependencies uintptr_t cr3_old, cr3; cr3 = MM_PHYS(mm_kernel); char name[64]; _assert(strlen(_name) < sizeof(name)); strcpy(name, _name); asm volatile ("movq %%cr3, %0":"=r"(cr3_old)::"memory"); asm volatile ("cli"); asm volatile ("movq %0, %%cr3"::"r"(cr3):"memory"); struct object *mod = NULL, *it; list_for_each_entry(it, &g_module_list, link) { if (!strcmp(it->module_desc->name, name)) { mod = it; break; } } if (!mod) { kdebug("No module with name \"%s\"\n", name); asm volatile ("movq %0, %%cr3"::"r"(cr3_old):"memory"); return -ENOENT; } if (mod->module_exit) { mod->module_exit(); } asm volatile ("movq %0, %%cr3"::"r"(cr3_old):"memory"); kdebug("Module %s unloaded\n", name); object_free(mod); return 0; } int sys_module_load(const char *_path, const char *params) { struct object *obj = object_create(); int res; struct process *proc; char path[256]; uintptr_t cr3_old, cr3; cr3 = MM_PHYS(mm_kernel); asm volatile ("movq %%cr3, %0":"=r"(cr3_old)::"memory"); strcpy(path, _path); proc = thread_self->proc; _assert(proc); if (proc->ioctx.uid != 0) { res = -EACCES; goto cleanup; } // Load the image if ((res = object_read(obj, &proc->ioctx, path)) != 0) { goto cleanup; } asm volatile ("cli"); asm volatile ("movq %0, %%cr3"::"r"(cr3):"memory"); if ((res = object_check_deps(obj)) != 0) { goto cleanup; } if ((res = object_load(obj)) != 0) { goto cleanup; } object_finalize_load(obj); list_add(&obj->link, &g_module_list); debug_dump(DEBUG_DEFAULT, (void *) (obj->object_base + 0x1d), 64); if ((res = obj->module_enter((void *) params)) != 0) { goto cleanup; } // obj continues its lifetime asm volatile ("movq %0, %%cr3"::"r"(cr3_old):"memory"); return 0; cleanup: asm volatile ("movq %0, %%cr3"::"r"(cr3_old):"memory"); object_free(obj); return res; } int mod_list(void *ctx, char *buf, size_t lim) { struct object *mod; list_for_each_entry(mod, &g_module_list, link) { uintptr_t phys = mm_map_get(mm_kernel, (uintptr_t) mod->module_desc, NULL); const char *name = "???"; if (phys != MM_NADDR) { struct module_desc *desc = (struct module_desc *) MM_VIRTUALIZE(phys); name = desc->name; } sysfs_buf_printf(buf, lim, "%s %u %p\n", name, mod->object_page_count, mod->object_base); } return 0; }