391 lines
12 KiB
C
391 lines
12 KiB
C
#include "arch/amd64/mm/pool.h"
|
|
#include "arch/amd64/mm/map.h"
|
|
#include "sys/mem/shmem.h"
|
|
#include "sys/mem/phys.h"
|
|
#include "sys/assert.h"
|
|
#include "sys/string.h"
|
|
#include "sys/thread.h"
|
|
#include "sys/debug.h"
|
|
#include "sys/panic.h"
|
|
#include "sys/mm.h"
|
|
|
|
uintptr_t mm_map_get(const mm_space_t pml4, uintptr_t vaddr, uint64_t *flags) {
|
|
vaddr = AMD64_MM_STRIPSX(vaddr);
|
|
size_t pml4i = (vaddr >> MM_PML4I_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdpti = (vaddr >> MM_PDPTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdi = (vaddr >> MM_PDI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pti = (vaddr >> MM_PTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
|
|
mm_pdpt_t pdpt;
|
|
mm_pagedir_t pd;
|
|
mm_pagetab_t pt;
|
|
|
|
// L4:
|
|
if (!(pml4[pml4i] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pml4[pml4i] & MM_PAGE_HUGE) {
|
|
panic("NYI\n");
|
|
}
|
|
|
|
// L3:
|
|
pdpt = (mm_pdpt_t) MM_VIRTUALIZE(pml4[pml4i] & MM_PTE_MASK);
|
|
|
|
if (!(pdpt[pdpti] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pdpt[pdpti] & MM_PAGE_HUGE) {
|
|
return (pdpt[pdpti] & MM_PTE_MASK) | (vaddr & MM_PAGE_L3_OFFSET_MASK);
|
|
}
|
|
|
|
// L2:
|
|
pd = (mm_pagedir_t) MM_VIRTUALIZE(pdpt[pdpti] & MM_PTE_MASK);
|
|
|
|
if (!(pd[pdi] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pd[pdi] & MM_PAGE_HUGE) {
|
|
// Page size is 2MiB (1 << 21)
|
|
return (pd[pti] & MM_PTE_MASK) | (vaddr & MM_PAGE_L2_OFFSET_MASK);
|
|
}
|
|
|
|
// L1:
|
|
pt = (mm_pagetab_t) MM_VIRTUALIZE(pd[pdi] & MM_PTE_MASK);
|
|
|
|
if (!(pt[pti] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (flags) {
|
|
*flags = pt[pti] & MM_PTE_FLAGS_MASK;
|
|
}
|
|
|
|
return (pt[pti] & MM_PTE_MASK) | (vaddr & MM_PAGE_OFFSET_MASK);
|
|
}
|
|
|
|
uintptr_t mm_umap_single(mm_space_t pml4, uintptr_t vaddr, uint32_t size) {
|
|
vaddr = AMD64_MM_STRIPSX(vaddr);
|
|
// TODO: support page sizes other than 4KiB
|
|
// (Though I can't think of any reason to use it)
|
|
size_t pml4i = (vaddr >> MM_PML4I_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdpti = (vaddr >> MM_PDPTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdi = (vaddr >> MM_PDI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pti = (vaddr >> MM_PTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
|
|
mm_pdpt_t pdpt;
|
|
mm_pagedir_t pd;
|
|
mm_pagetab_t pt;
|
|
|
|
// L4:
|
|
if (!(pml4[pml4i] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pml4[pml4i] & MM_PAGE_HUGE) {
|
|
panic("NYI\n");
|
|
}
|
|
|
|
// L3:
|
|
pdpt = (mm_pdpt_t) MM_VIRTUALIZE(pml4[pml4i] & MM_PTE_MASK);
|
|
|
|
if (!(pdpt[pdpti] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pdpt[pdpti] & MM_PAGE_HUGE) {
|
|
panic("NYI\n");
|
|
}
|
|
|
|
// L2:
|
|
pd = (mm_pagedir_t) MM_VIRTUALIZE(pdpt[pdpti] & MM_PTE_MASK);
|
|
|
|
if (!(pd[pdi] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
if (pd[pdi] & MM_PAGE_HUGE) {
|
|
panic("NYI\n");
|
|
}
|
|
|
|
// L1:
|
|
pt = (mm_pagetab_t) MM_VIRTUALIZE(pd[pdi] & MM_PTE_MASK);
|
|
|
|
if (!(pt[pti] & MM_PAGE_PRESENT)) {
|
|
return MM_NADDR;
|
|
}
|
|
|
|
uint64_t old = pt[pti] & MM_PTE_MASK;
|
|
pt[pti] = 0;
|
|
|
|
asm volatile("invlpg (%0)"::"r"(vaddr));
|
|
struct page *page = PHYS2PAGE(old);
|
|
_assert(page);
|
|
_assert(page->refcount);
|
|
--page->refcount;
|
|
|
|
return old;
|
|
}
|
|
|
|
int mm_map_single(mm_space_t pml4, uintptr_t virt_addr, uintptr_t phys, uint64_t flags) {
|
|
virt_addr = AMD64_MM_STRIPSX(virt_addr);
|
|
// TODO: support page sizes other than 4KiB
|
|
// (Though I can't think of any reason to use it)
|
|
size_t pml4i = (virt_addr >> MM_PML4I_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdpti = (virt_addr >> MM_PDPTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pdi = (virt_addr >> MM_PDI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
size_t pti = (virt_addr >> MM_PTI_SHIFT) & MM_PTE_INDEX_MASK;
|
|
|
|
mm_pdpt_t pdpt;
|
|
mm_pagedir_t pd;
|
|
mm_pagetab_t pt;
|
|
|
|
if (!(pml4[pml4i] & MM_PAGE_PRESENT)) {
|
|
// Allocate PDPT
|
|
pdpt = (mm_pdpt_t) amd64_mm_pool_alloc();
|
|
assert(pdpt, "PDPT alloc failed\n");
|
|
//kdebug("Allocated PDPT = %p\n", pdpt);
|
|
|
|
pml4[pml4i] = MM_PHYS(pdpt) |
|
|
MM_PAGE_PRESENT |
|
|
MM_PAGE_USER |
|
|
MM_PAGE_WRITE;
|
|
} else {
|
|
pdpt = (mm_pdpt_t) MM_VIRTUALIZE(pml4[pml4i] & MM_PTE_MASK);
|
|
}
|
|
|
|
if (!(pdpt[pdpti] & MM_PAGE_PRESENT)) {
|
|
// Allocate PD
|
|
pd = (mm_pagedir_t) amd64_mm_pool_alloc();
|
|
assert(pd, "PD alloc failed\n");
|
|
//kdebug("Allocated PD = %p\n", pd);
|
|
|
|
pdpt[pdpti] = MM_PHYS(pd) |
|
|
MM_PAGE_PRESENT |
|
|
MM_PAGE_USER |
|
|
MM_PAGE_WRITE;
|
|
} else {
|
|
pd = (mm_pagedir_t) MM_VIRTUALIZE(pdpt[pdpti] & MM_PTE_MASK);
|
|
}
|
|
|
|
if (!(pd[pdi] & MM_PAGE_PRESENT)) {
|
|
// Allocate PT
|
|
pt = (mm_pagetab_t) amd64_mm_pool_alloc();
|
|
assert(pt, "PT alloc failed\n");
|
|
//kdebug("Allocated PT = %p\n", pt);
|
|
|
|
pd[pdi] = MM_PHYS(pt) |
|
|
MM_PAGE_PRESENT |
|
|
MM_PAGE_USER |
|
|
MM_PAGE_WRITE;
|
|
} else {
|
|
pt = (mm_pagetab_t) MM_VIRTUALIZE(pd[pdi] & MM_PTE_MASK);
|
|
}
|
|
|
|
// Disallow overwriting without unmapping entries first
|
|
assert(!(pt[pti] & MM_PAGE_PRESENT), "Entry already present for %p\n", virt_addr);
|
|
|
|
#if defined(KERNEL_TEST_MODE)
|
|
kdebug("map %p -> %p %cr%c%c%c\n", virt_addr, phys,
|
|
(flags & MM_PAGE_USER) ? 'u' : '-',
|
|
(flags & MM_PAGE_WRITE) ? 'w' : '-',
|
|
(flags & MM_PAGE_NOEXEC) ? '-' : 'x',
|
|
(flags & MM_PAGE_GLOBAL) ? 'G' : '-');
|
|
#endif
|
|
|
|
// Increase refcount on physical page
|
|
struct page *pg = PHYS2PAGE(phys);
|
|
_assert(pg);
|
|
++pg->refcount;
|
|
|
|
pt[pti] = (phys & MM_PAGE_MASK) |
|
|
(flags & MM_PTE_FLAGS_MASK) |
|
|
MM_PAGE_PRESENT;
|
|
|
|
asm volatile("invlpg (%0)"::"r"(virt_addr));
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mm_space_clone(mm_space_t dst_pml4, const mm_space_t src_pml4, uint32_t flags) {
|
|
if ((flags & MM_CLONE_FLG_USER)) {
|
|
panic("NYI\n");
|
|
}
|
|
|
|
if ((flags & MM_CLONE_FLG_KERNEL)) {
|
|
// Kernel table references may be cloned verbatim, as they're guarannteed to be
|
|
// shared across all the spaces.
|
|
// This allows to save some resources on allocating the actual PDPT/PD/PTs
|
|
for (size_t i = AMD64_PML4I_USER_END; i < MM_PTE_COUNT; ++i) {
|
|
dst_pml4[i] = src_pml4[i];
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
int mm_space_fork(struct process *dst, const struct process *src, uint32_t flags) {
|
|
mm_pml4_t dst_pml4 = dst->space;
|
|
const mm_pml4_t src_pml4 = src->space;
|
|
|
|
if (flags & MM_CLONE_FLG_USER) {
|
|
for (size_t pml4i = 0; pml4i < AMD64_PML4I_USER_END; ++pml4i) {
|
|
if (!(src_pml4[pml4i] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
if (src_pml4[pml4i] & MM_PAGE_HUGE) {
|
|
panic("PML4 page has PS bit set\n");
|
|
}
|
|
|
|
mm_pdpt_t src_pdpt = (mm_pdpt_t) MM_VIRTUALIZE(src_pml4[pml4i] & MM_PTE_MASK);
|
|
// Make sure we've got a clean table
|
|
_assert(!(dst_pml4[pml4i] & MM_PAGE_PRESENT));
|
|
mm_pdpt_t dst_pdpt = (mm_pdpt_t) amd64_mm_pool_alloc();
|
|
_assert(dst_pdpt);
|
|
dst_pml4[pml4i] = MM_PHYS(dst_pdpt) | (src_pml4[pml4i] & MM_PTE_FLAGS_MASK);
|
|
|
|
for (size_t pdpti = 0; pdpti < MM_PTE_COUNT; ++pdpti) {
|
|
if (!(src_pdpt[pdpti] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
if (src_pdpt[pdpti] & MM_PAGE_HUGE) {
|
|
// Not allowed in U/S
|
|
panic("1GiB pages not supported in userspace\n");
|
|
}
|
|
|
|
mm_pagedir_t src_pd = (mm_pagedir_t) MM_VIRTUALIZE(src_pdpt[pdpti] & MM_PTE_MASK);
|
|
mm_pagedir_t dst_pd = (mm_pagedir_t) amd64_mm_pool_alloc();
|
|
_assert(dst_pd);
|
|
dst_pdpt[pdpti] = MM_PHYS(dst_pd) | (src_pdpt[pdpti] & MM_PTE_FLAGS_MASK);
|
|
|
|
for (size_t pdi = 0; pdi < MM_PTE_COUNT; ++pdi) {
|
|
if (!(src_pd[pdi] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
if (src_pd[pdi] & MM_PAGE_HUGE) {
|
|
panic("2MiB pages not supported in userspace\n");
|
|
}
|
|
|
|
mm_pagetab_t src_pt = (mm_pagetab_t) MM_VIRTUALIZE(src_pd[pdi] & MM_PTE_MASK);
|
|
mm_pagetab_t dst_pt = (mm_pagetab_t) amd64_mm_pool_alloc();
|
|
_assert(dst_pt);
|
|
dst_pd[pdi] = MM_PHYS(dst_pt) | (src_pd[pdi] & MM_PTE_FLAGS_MASK);
|
|
|
|
for (size_t pti = 0; pti < MM_PTE_COUNT; ++pti) {
|
|
if (!(src_pt[pti] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
uintptr_t src_page_virt = (pml4i << MM_PML4I_SHIFT) |
|
|
(pdpti << MM_PDPTI_SHIFT) |
|
|
(pdi << MM_PDI_SHIFT) |
|
|
(pti << MM_PTI_SHIFT);
|
|
uintptr_t src_page_phys = src_pt[pti] & MM_PTE_MASK;
|
|
struct page *src_page = PHYS2PAGE(src_page_phys);
|
|
|
|
_assert(src_page->refcount);
|
|
|
|
#define MM_USE_COW
|
|
if ((src_pt[pti] & MM_PAGE_WRITE) && src_page->usage == PU_PRIVATE) {
|
|
// Clone the mapping, use CoW
|
|
#if defined(MM_USE_COW)
|
|
uint64_t access = src_pt[pti] & (MM_PTE_FLAGS_MASK & ~MM_PAGE_WRITE);
|
|
dst_pt[pti] = src_page_phys | access;
|
|
src_pt[pti] &= ~MM_PAGE_WRITE;
|
|
asm volatile("invlpg (%0)"::"r"(src_page_virt));
|
|
++src_page->refcount;
|
|
#else
|
|
uint64_t access = src_pt[pti] & MM_PTE_FLAGS_MASK;
|
|
uintptr_t new_page = mm_phys_alloc_page(PU_PRIVATE);
|
|
_assert(new_page != MM_NADDR);
|
|
memcpy((void *) MM_VIRTUALIZE(new_page),
|
|
(const void *) MM_VIRTUALIZE(src_page_phys),
|
|
0x1000);
|
|
dst_pt[pti] = new_page | access;
|
|
++PHYS2PAGE(new_page)->refcount;
|
|
#endif
|
|
} else {
|
|
// Just clone the mapping - it's readonly
|
|
dst_pt[pti] = src_page_phys | (src_pt[pti] & MM_PTE_FLAGS_MASK);
|
|
++src_page->refcount;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
// Kernel pages don't need to be copied - just use mm_space_clone(, , MM_CLONE_FLG_KERNEL)
|
|
return mm_space_clone(dst_pml4, src_pml4, MM_CLONE_FLG_KERNEL & flags);
|
|
}
|
|
|
|
void mm_space_release(struct process *proc) {
|
|
mm_space_t pml4 = proc->space;
|
|
|
|
if (pml4 == mm_kernel) {
|
|
panic("???\n");
|
|
}
|
|
|
|
for (size_t pml4i = 0; pml4i < AMD64_PML4I_USER_END; ++pml4i) {
|
|
if (!(pml4[pml4i] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
mm_pdpt_t pdpt = (mm_pdpt_t) MM_VIRTUALIZE(pml4[pml4i] & MM_PTE_MASK);
|
|
|
|
for (size_t pdpti = 0; pdpti < MM_PTE_COUNT; ++pdpti) {
|
|
if (!(pdpt[pdpti] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
mm_pagedir_t pd = (mm_pagedir_t) MM_VIRTUALIZE(pdpt[pdpti] & MM_PTE_MASK);
|
|
|
|
for (size_t pdi = 0; pdi < MM_PTE_COUNT; ++pdi) {
|
|
if (!(pd[pdi] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
mm_space_t pt = (mm_space_t) MM_VIRTUALIZE(pd[pdi] & MM_PTE_MASK);
|
|
|
|
for (size_t pti = 0; pti < MM_PTE_COUNT; ++pti) {
|
|
if (!(pt[pti] & MM_PAGE_PRESENT)) {
|
|
continue;
|
|
}
|
|
|
|
uintptr_t page_phys = pt[pti] & MM_PTE_MASK;
|
|
struct page *page = PHYS2PAGE(page_phys);
|
|
_assert(page->refcount);
|
|
--page->refcount;
|
|
|
|
// Any page with zero refcount can be released
|
|
if (!page->refcount) {
|
|
mm_phys_free_page(page_phys);
|
|
}
|
|
}
|
|
|
|
amd64_mm_pool_free(pt);
|
|
}
|
|
|
|
amd64_mm_pool_free(pd);
|
|
}
|
|
|
|
amd64_mm_pool_free(pdpt);
|
|
|
|
pml4[pml4i] = 0;
|
|
}
|
|
}
|
|
|
|
void mm_space_free(struct process *proc) {
|
|
mm_space_release(proc);
|
|
amd64_mm_pool_free(proc->space);
|
|
}
|
|
|
|
void mm_describe(const mm_space_t pml4) {
|
|
kwarn("mm_describe was removed until I reimplement it properly\n");
|
|
}
|