ld: rework dynamic loader, proper pie binaries

This commit is contained in:
Mark Poliakov 2024-11-14 03:08:39 +02:00
parent a9f4a958de
commit e2ef677b4a
11 changed files with 532 additions and 671 deletions

View File

@ -1,33 +0,0 @@
use crate::STATE;
#[allow(non_camel_case_types)]
#[repr(C)]
pub struct tls_index {
dtpmod: usize,
dtpoff: usize,
}
pub unsafe extern "C" fn __dl_tls_get_addr(index: *const tls_index) -> usize {
let state = STATE.as_ref().unwrap();
let dtpmod = (*index).dtpmod;
let dtpoff = (*index).dtpoff;
assert_ne!(dtpmod, 0);
state.tls_address(dtpmod).expect("__tls_get_addr: failed") + dtpoff
}
#[cfg(any(target_arch = "aarch64", rust_analyzer))]
std::arch::global_asm!(
r#"
.global __dl_tlsdesc_static
__dl_tlsdesc_static:
// x0 -- ptr to two words
ldr x0, [x0, #8]
ret
"#
);
extern "C" {
pub fn __dl_tlsdesc_static(value: &[usize; 2]) -> usize;
}

View File

@ -1,25 +1,21 @@
use std::{io, path::PathBuf};
use std::io;
#[derive(thiserror::Error, Debug)]
#[derive(Debug, thiserror::Error)]
pub enum Error {
#[error("I/O error: {0}")]
#[error("I/O error")]
Io(#[from] io::Error),
#[error("ELF parse error: {0}")]
#[error("ELF parse error")]
ElfParse(#[from] elf::ParseError),
#[error("Could not map memory ({0} bytes): {1:?}")]
Map(usize, yggdrasil_rt::Error),
#[error("Could not locate library: {0}")]
#[error("Memory mapping error: {0:?}")]
MemoryMap(yggdrasil_rt::Error),
#[error("Object not loaded yet")]
NotLoaded,
#[error("Unsupported ELF type: {0:#06x}")]
UnhandledElfType(u16),
#[error("Unresolved symbols")]
UnresolvedSymbols,
#[error("Library not found: {0:?}")]
LibraryNotFound(String),
#[error("Cannot perform the operation: object is not loaded yet")]
ObjectNotLoaded,
#[error("Undefined reference(s)")]
UndefinedReference,
#[error("Unable to perform relocation")]
CannotRelocate,
#[error("Unsupported relocation type: {0}")]
UnsupportedRelocation(u32),
#[error("{0:?} is missing a dynamic symbol table")]
MissingDynamicSymbolTable(PathBuf),
#[error("No entry point in loaded object: not an executable?")]
NoEntryPoint,
#[error("Object does not have an entry (trying to run a shared library?)")]
NoEntrypoint,
}

View File

@ -1,167 +1,84 @@
#![feature(yggdrasil_os)]
use std::{
collections::HashMap,
env,
path::{Path, PathBuf},
process::ExitCode,
};
#![feature(yggdrasil_os, never_type, map_try_insert, slice_ptr_get)]
use std::{collections::HashMap, env, process::ExitCode};
use error::Error;
use object::Object;
use state::State;
use yggdrasil_rt::process::ProgramArgumentInner;
use crate::object::Object;
pub mod error;
pub mod object;
pub mod mapping;
pub mod state;
pub mod relocation;
pub mod search;
mod builtins;
mod error;
mod object;
mod relocation;
mod state;
struct Config {
library_path: Vec<PathBuf>,
trace_libraries: bool,
}
impl Config {
pub fn from_env() -> Result<Self, Error> {
let library_path = if let Ok(paths) = env::var("LD_LIBRARY_PATH") {
paths.split(':').map(PathBuf::from).collect()
} else {
vec!["/lib".into()]
};
let trace_libraries = env::var("LD_TRACE_LOADED_OBJECTS")
.map(|v| v == "1")
.unwrap_or(false);
Ok(Self {
library_path,
trace_libraries,
})
}
}
static mut STATE: Option<State> = None;
fn resolve_library(config: &Config, name: &str) -> Result<PathBuf, Error> {
// TODO temporary hack for hash-tagged libstd
if name.starts_with("libstd-") && name.ends_with(".so") {
return resolve_library(config, "libstd.so");
}
for lib_dir in config.library_path.iter() {
let path = lib_dir.join(name);
if path.exists() {
return Ok(path);
fn run(binary: &str, args: &[String]) -> Result<!, Error> {
let mut state = State::new();
let mut root = Object::open(binary)?;
let mut libraries = HashMap::new();
for needed in root.needed() {
// TODO load needed of needed
let path = search::find_library(needed.as_str())?;
if libraries.contains_key(&path) {
continue;
}
let object = Object::open(&path)?;
libraries.insert(path, object);
}
Err(Error::LibraryNotFound(name.into()))
}
fn run<P: AsRef<Path>>(path: P, args: &[String]) -> Result<(), Error> {
let mut state = State::default();
let mut libs = HashMap::new();
let config = Config::from_env()?;
state.insert_linker_builtins();
// Open and load the main object
let mut main_object = Object::open(path)?;
main_object.load(&mut state)?;
main_object.collect_dependencies(&mut libs, &|name| resolve_library(&config, name))?;
if config.trace_libraries {
println!("Main object: {}", main_object.path.display());
for (item, _) in libs.iter() {
println!("* {}", item.display());
}
return Ok(());
}
// Load the libraries first
for (_, lib) in libs.iter_mut() {
root.load(&mut state)?;
for (_, lib) in libraries.iter_mut() {
lib.load(&mut state)?;
}
root.resolve_symbols(&mut state);
for (_, lib) in libraries.iter_mut() {
lib.resolve_symbols(&mut state);
}
// Relocate the libraries
for (_, lib) in libs.iter_mut() {
if let Err(undefined) = state.no_undefined_symbols() {
eprintln!("Undefined symbols:");
for (path, syms) in undefined {
eprintln!(" in {path:?}");
for sym in syms {
eprintln!(" * {sym:?}");
}
}
return Err(Error::UnresolvedSymbols);
}
root.relocate(&mut state)?;
for (_, lib) in libraries.iter_mut() {
lib.relocate(&mut state)?;
}
// Then relocate the main object
main_object.relocate(&mut state)?;
debug_trace!("Load finished");
if !state.undefined_references.is_empty() {
for item in state.undefined_references.iter() {
eprintln!("Undefined reference to {:?}", item);
}
return Err(Error::UndefinedReference);
}
let entry = root.entry().ok_or(Error::NoEntrypoint)?;
debug_trace!("entry = {:p}", entry);
let args = args.iter().map(|s| s.as_str()).collect::<Vec<_>>();
// TODO pass environment to the program
// TODO
let envs = vec![];
let arg = Box::new(ProgramArgumentInner {
args: &args,
env: &envs,
env: &envs
});
let arg = Box::into_raw(arg).addr();
let entry = main_object.entry()?.ok_or(Error::NoEntryPoint)?;
// Store object as linker's global state and enter the program
unsafe {
STATE = Some(state);
// FIXME TLS relocation and allocation is really broken for aarch64, so this hack ensures
// code doesn't try to add the TLS provided by the kernel to this linker to whatever
// is returned from __dl_tlsdesc_static.
//
// This breaks TLS for the loader itself, but at least allows proper relocations
// against local TLS data to be performed directly without having to go and collect
// all TLS locals into one single blob.
#[cfg(target_arch = "aarch64")]
std::arch::asm!("msr tpidr_el0, xzr");
entry(Box::leak(arg) as *mut _ as usize);
}
entry(arg);
unreachable!()
}
fn main() -> ExitCode {
let args: Vec<_> = env::args().collect();
assert_ne!(args.len(), 0);
let args: Vec<String> = env::args().skip(1).collect();
if args.len() == 1 {
eprintln!(
"{}\n\n{} PROGRAM [ARGS...]",
r#"
This program is the Yggdrasil OS dynamic executable linker/loader.
It is not meant to be called directly without arguments, but to be
used as an interpreter for dynamically-linked programs instead.
If needed, the program can still be invoked like follows:
"#
.trim(),
args[0]
);
return ExitCode::SUCCESS;
};
let args = &args[1..];
let program = PathBuf::from(&args[0]);
match run(program, args) {
// Normal execution doesn't return here, but if LD_TRACE_LOADED_OBJECTS is set,
// the loader will exit after printing everything
Ok(()) => ExitCode::SUCCESS,
Err(error) => {
eprintln!("Error: {}", error);
ExitCode::FAILURE
}
if args.is_empty() {
// Dump help and exit
todo!()
}
let Err(error) = run(&args[0], &args);
eprintln!("Error: {error}");
ExitCode::FAILURE
}

View File

@ -0,0 +1,62 @@
use std::{
ops::{Deref, DerefMut, Range},
ptr::NonNull,
};
use yggdrasil_rt::mem::MappingSource;
use crate::error::Error;
pub struct Mapping {
data: NonNull<[u8]>,
}
impl Mapping {
pub fn new(size: usize) -> Result<Mapping, Error> {
let size = (size + 0xFFF) & !0xFFF;
let base = unsafe { yggdrasil_rt::sys::map_memory(None, size, &MappingSource::Anonymous) }
.map_err(|e| Error::MemoryMap(e))?;
let base_ptr =
unsafe { NonNull::new_unchecked(core::ptr::with_exposed_provenance_mut(base)) };
let data = NonNull::slice_from_raw_parts(base_ptr, size);
Ok(Self { data })
}
pub fn qword(&mut self, offset: u64) -> NonNull<i64> {
unsafe { self.data.as_non_null_ptr().add(offset as usize).cast() }
}
pub fn base(&self) -> usize {
self.data.addr().into()
}
pub fn range(&self) -> Range<usize> {
self.base()..self.base() + self.len()
}
}
impl Drop for Mapping {
fn drop(&mut self) {
let base = self.data.addr().into();
let size = self.data.len();
unsafe {
yggdrasil_rt::sys::unmap_memory(base, size).ok();
}
}
}
impl Deref for Mapping {
type Target = [u8];
fn deref(&self) -> &Self::Target {
unsafe { self.data.as_ref() }
}
}
impl DerefMut for Mapping {
fn deref_mut(&mut self) -> &mut Self::Target {
unsafe { self.data.as_mut() }
}
}

View File

@ -1,35 +1,46 @@
use std::{
collections::HashMap,
fs::File,
io::{BufReader, Read, Seek, SeekFrom},
path::{Path, PathBuf},
rc::Rc,
};
use elf::{endian::AnyEndian, symbol::Symbol, ElfStream};
use yggdrasil_rt::mem::MappingSource;
use elf::{
abi::{
DF_1_PIE, DT_FLAGS_1, DT_NEEDED, ET_DYN, ET_EXEC, PT_DYNAMIC, PT_GNU_EH_FRAME,
PT_GNU_RELRO, PT_GNU_STACK, PT_INTERP, PT_LOAD, PT_NOTE, PT_NULL, PT_PHDR, SHN_UNDEF,
SHT_REL, SHT_RELA, STB_GLOBAL, STB_LOCAL, STB_WEAK,
},
endian::AnyEndian,
symbol::Symbol,
ElfStream,
};
use crate::{relocation::RelocationExt, Error, State};
use crate::{
error::Error,
mapping::Mapping,
relocation::{Relocation, RelocationValue},
state::{ExportedSymbol, State},
};
pub enum ElfType {
// ET_EXEC
Static,
// ET_DYN
Relocatable(bool),
}
pub enum ResolvedSymbol<'s> {
Global(&'s ExportedSymbol),
Local,
Null,
}
#[derive(Debug)]
pub struct DynamicSymbol {
pub value: Option<usize>,
pub tls_index: Option<usize>,
pub raw: Symbol,
pub name: Rc<str>,
}
pub struct ObjectTls {
pub data: ObjectMapping,
// Maps symbols to their offsets within the TLS data
pub symbol_table: HashMap<Rc<str>, usize>,
}
pub struct ObjectMapping {
pub base: usize,
pub size: usize,
}
pub struct Object {
pub path: PathBuf,
pub file: BufReader<File>,
@ -37,68 +48,83 @@ pub struct Object {
pub vma_start: usize,
pub vma_end: usize,
mapping: Option<ObjectMapping>,
tls_module_id: Option<usize>,
pub ty: ElfType,
needed: Vec<String>,
mapping: Option<Mapping>,
dynamic_symbol_array: Vec<DynamicSymbol>,
}
impl ObjectTls {
pub fn new(size: usize) -> Result<Self, Error> {
Ok(Self {
data: ObjectMapping::new(size)?,
symbol_table: HashMap::new(),
})
impl ResolvedSymbol<'_> {
pub fn value(&self) -> i64 {
match *self {
Self::Local => todo!(),
Self::Global(sym) => sym.value as i64,
Self::Null => 0,
}
}
}
impl Object {
pub fn open<P: AsRef<Path>>(path: P) -> Result<Self, Error> {
let path = path.as_ref().to_owned();
debug_trace!("Object::open({:?})", path);
let file = BufReader::new(File::open(&path)?);
let mut elf = ElfStream::open_stream(file)?;
let file = BufReader::new(File::open(&path)?);
let (vma_start, vma_end) = object_bounds(&elf);
// TODO check ELF validity
let ty = match elf.ehdr.e_type {
ET_EXEC => ElfType::Static,
ET_DYN => {
// Look for PIE flag in .dynamic
let is_executable = if let Some(dynamic) = elf.dynamic()? {
dynamic
.iter()
.any(|e| e.d_tag == DT_FLAGS_1 && e.d_val() == DF_1_PIE as _)
} else {
// Weird, ET_DYN without .dynamic?
false
};
// Extract info from .dynamic
let mut needed = vec![];
if let Some(dynamic) = elf.dynamic()? {
// TODO use filter() instead?
for entry in dynamic {
if entry.d_tag == elf::abi::DT_NEEDED {
needed.push(entry.d_val() as usize);
}
ElfType::Relocatable(is_executable)
}
}
t => return Err(Error::UnhandledElfType(t)),
};
let needed = if !needed.is_empty() {
// TODO handle
let (_, dynstr) = elf
.dynamic_symbol_table()?
.ok_or_else(|| Error::MissingDynamicSymbolTable(path.clone()))?;
let (vma_start, vma_end) = Self::bounds(&elf);
needed
.into_iter()
.map(|off| dynstr.get(off).map(ToOwned::to_owned))
.collect::<Result<_, _>>()?
// Extract DT_NEEDED entries
let needed = if let Some(dynamic) = elf.dynamic()? {
dynamic
.iter()
.filter_map(|entry| (entry.d_tag == DT_NEEDED).then(|| entry.d_val() as usize))
.collect()
} else {
vec![]
};
let mut dynamic_symbol_array = vec![];
let needed = if !needed.is_empty() {
// TODO error?
let (_, dynstr) = elf.dynamic_symbol_table()?.unwrap();
if let Some((dynsym, dynstr)) = elf.dynamic_symbol_table()? {
for symbol in dynsym {
let name: Rc<str> = dynstr.get(symbol.st_name as usize)?.into();
needed
.into_iter()
.map(|off| dynstr.get(off).map(ToOwned::to_owned))
.collect::<Result<Vec<_>, _>>()?
} else {
vec![]
};
// Extract dynamic symbols
let mut dynamic_symbol_array = vec![];
if let Some((dynsyms, dynstr)) = elf.dynamic_symbol_table()? {
for symbol in dynsyms {
let name = dynstr.get(symbol.st_name as usize)?.into();
dynamic_symbol_array.push(DynamicSymbol {
value: None,
tls_index: None,
raw: symbol.clone(),
name: name.clone(),
name,
});
}
}
@ -108,211 +134,196 @@ impl Object {
file,
elf,
ty,
vma_start,
vma_end,
mapping: None,
tls_module_id: None,
needed,
mapping: None,
dynamic_symbol_array,
})
}
pub fn entry(&self) -> Result<Option<extern "C" fn(usize) -> !>, Error> {
let mapping = self.mapping.as_ref().ok_or(Error::ObjectNotLoaded)?;
if self.elf.ehdr.e_entry == 0 {
return Ok(None);
}
let vma = self.elf.ehdr.e_entry as usize;
debug_trace!("entry = {:#x}", mapping.base + vma - self.vma_start);
Ok(Some(unsafe {
std::mem::transmute(mapping.base + vma - self.vma_start)
}))
}
pub fn load(&mut self, state: &mut State) -> Result<(), Error> {
// Already loaded
if self.mapping.is_some() {
return Ok(());
}
let size = self.vma_end - self.vma_start;
let mut mapping = ObjectMapping::new(size)?;
debug_trace!("Load {:?}", self.path);
debug_trace!("Image range: {:#x?}", self.vma_start..self.vma_end);
let base = mapping.base;
let object_data = mapping.as_slice_mut();
let mapping_size = self.vma_end - self.vma_start;
let mut mapping = match self.ty {
ElfType::Relocatable(_) => Mapping::new(mapping_size)?,
// TODO fixed mapping for this one
ElfType::Static => todo!(),
};
for segment in self.elf.segments() {
if segment.p_type != elf::abi::PT_LOAD {
continue;
}
}
debug_trace!(
"Actual range: {:#x?} ({:+#x})",
mapping.range(),
mapping.base() as i64 - self.vma_start as i64
);
// Load segments
for segment in self.elf.segments() {
let mem_size = segment.p_memsz as usize;
let file_size = segment.p_filesz as usize;
match segment.p_type {
elf::abi::PT_LOAD => {
let rel_offset = segment.p_vaddr as usize - self.vma_start;
PT_NULL => continue,
PT_LOAD => {
// Map offset from image VMA to "real" VMA
let load_offset = segment.p_vaddr as usize - self.vma_start;
let segment_data = &mut mapping[load_offset..load_offset + mem_size];
debug_trace!(
"{:?} {:#x?} -> {:#x?}",
self.path,
segment.p_vaddr..segment.p_vaddr + mem_size as u64,
base + rel_offset..base + rel_offset + mem_size
);
let segment_data = &mut object_data[rel_offset..rel_offset + mem_size];
if file_size > 0 {
// Load segment data
if file_size != 0 {
self.file.seek(SeekFrom::Start(segment.p_offset))?;
self.file.read_exact(&mut segment_data[..file_size])?;
}
if mem_size > file_size {
segment_data[file_size..mem_size].fill(0);
}
segment_data[file_size..].fill(0);
}
elf::abi::PT_TLS => {
let (tls_index, tls) = state.new_tls((mem_size + 0xFFF) & !0xFFF)?;
let tls_data = tls.data.as_slice_mut();
if file_size > 0 {
self.file.seek(SeekFrom::Start(segment.p_offset))?;
self.file.read_exact(&mut tls_data[..file_size])?;
}
if mem_size > file_size {
tls_data[file_size..mem_size].fill(0);
}
self.tls_module_id = Some(tls_index);
}
_ => (),
// TODO handle GNU_STACK
PT_DYNAMIC | PT_GNU_RELRO | PT_GNU_STACK | PT_INTERP | PT_PHDR
| PT_GNU_EH_FRAME | PT_NOTE => (),
_ => todo!("Unhandled segment type"),
}
}
for dynsym in self.dynamic_symbol_array.iter_mut() {
if dynsym.raw.st_shndx == elf::abi::SHN_UNDEF {
// Export dynamic symbols into the linking table
for dynsym in self.dynamic_symbol_array.iter() {
// Don't export undefined symbols
if dynsym.raw.st_shndx == SHN_UNDEF {
continue;
}
state.export_symbol(dynsym, mapping.base, &self.path, self.tls_module_id, self.vma_start);
// TODO: don't export hidden symbols?
// Symbol actual address offset: V + B - S
// Where V: symbol value as defined in the table
// B: "actual" load base
// S: "image" load base
let offset = mapping.base() as isize - self.vma_start as isize;
match dynsym.raw.st_bind() {
STB_GLOBAL => state.export(&self.path, dynsym, offset, false),
STB_WEAK => state.export(&self.path, dynsym, offset, true),
STB_LOCAL => todo!(),
_ => todo!(),
}
}
self.mapping = Some(mapping);
Ok(())
}
pub fn relocate(&mut self, state: &mut State) -> Result<(), Error> {
let mapping = self.mapping.as_mut().ok_or(Error::ObjectNotLoaded)?;
pub fn entry(&self) -> Option<extern "C" fn(usize)> {
let entry = self.elf.ehdr.e_entry as i64;
let offset = match &self.ty {
// Fixed entry
ElfType::Static => 0,
// PIE executable
ElfType::Relocatable(true) => {
let mapping = self.mapping.as_ref()?;
mapping.base() as i64 - self.vma_start as i64
}
// Shared library
ElfType::Relocatable(false) => return None,
};
for dynsym in self.dynamic_symbol_array.iter_mut() {
if dynsym.value.is_none() {
debug_trace!("Resolve {:?}", dynsym.name);
state.resolve_symbol(dynsym);
} else if dynsym.raw.st_shndx == elf::abi::SHN_UNDEF {
state.undefined_references.push(dynsym.name.clone());
let entry: usize = (entry + offset).try_into().unwrap();
Some(unsafe { core::mem::transmute(entry) })
}
pub fn needed(&self) -> impl Iterator<Item = &String> {
self.needed.iter()
}
pub fn resolve_symbols(&mut self, state: &mut State) {
for dynsym in self.dynamic_symbol_array.iter() {
// Don't resolve local symbols
if dynsym.raw.st_bind() == STB_LOCAL {
continue;
}
if dynsym.raw.st_shndx == SHN_UNDEF {
state.ensure_resolved(&self.path, dynsym);
}
}
}
pub fn relocate(&mut self, state: &mut State) -> Result<(), Error> {
let mapping = self.mapping.as_mut().ok_or(Error::NotLoaded)?;
let image_offset = mapping.base() as isize - self.vma_start as isize;
// TODO x86-64 doesn't seem to have REL sections, but having them handled would still be
// nice
// Perform relocations
let rela_sections = self
.elf
.section_headers()
.iter()
.filter(|s| s.sh_type == elf::abi::SHT_RELA)
.filter(|s| s.sh_type == SHT_RELA)
.cloned()
.collect::<Vec<_>>();
for rela in rela_sections {
let rela = self.elf.section_data_as_relas(&rela)?;
for rela_section in rela_sections {
let rela_section = self.elf.section_data_as_relas(&rela_section)?;
for entry in rela {
if let Some(value) = entry.resolve(
state,
self.tls_module_id,
|idx| Ok(&self.dynamic_symbol_array[idx as usize]),
mapping.base,
self.vma_start
)? {
value.write(mapping, entry.r_offset - self.vma_start as u64);
for rela in rela_section {
let dynsym = &self.dynamic_symbol_array[rela.r_sym as usize];
let sym = match dynsym.raw.st_bind() {
STB_GLOBAL => ResolvedSymbol::Global(state.lookup(dynsym).unwrap()),
STB_LOCAL => {
if dynsym.name.is_empty() {
ResolvedSymbol::Null
} else {
todo!("Relocation against local symbol: {:?}", dynsym.name)
}
}
_ => todo!(),
};
if let Some(value) = rela.resolve(&sym, mapping.base())? {
value.write(mapping, rela.r_offset);
}
}
}
let rel_sections = self
.elf
.section_headers()
.iter()
.filter(|s| s.sh_type == SHT_REL)
.cloned()
.collect::<Vec<_>>();
if !rel_sections.is_empty() {
todo!("SHT_REL is not yet implemented")
}
Ok(())
}
// TODO this only resolves direct dependencies, need recursion
pub fn collect_dependencies<R>(
&mut self,
libs: &mut HashMap<PathBuf, Object>,
resolver: &R,
) -> Result<(), Error>
where
R: Fn(&str) -> Result<PathBuf, Error>,
{
for dep in self.needed.drain(..) {
let path = resolver(&dep)?;
fn bounds<F: Read + Seek>(elf: &ElfStream<AnyEndian, F>) -> (usize, usize) {
let mut start = u64::MAX;
let mut end = u64::MIN;
if libs.contains_key(&path) {
for segment in elf.segments() {
if segment.p_type != PT_LOAD {
continue;
}
let object = Object::open(&path)?;
libs.insert(path, object);
let start_vma = segment.p_vaddr & !(segment.p_align - 1);
let end_vma =
(segment.p_vaddr + segment.p_memsz + segment.p_align - 1) & !(segment.p_align - 1);
if start_vma < start {
start = start_vma;
}
if end_vma > end {
end = end_vma;
}
}
Ok(())
(start.try_into().unwrap(), end.try_into().unwrap())
}
}
impl ObjectMapping {
pub fn new(size: usize) -> Result<Self, Error> {
let base = unsafe { yggdrasil_rt::sys::map_memory(None, size, &MappingSource::Anonymous) }
.map_err(|e| Error::Map(size, e))?;
Ok(Self { base, size })
}
pub fn as_slice_mut(&mut self) -> &mut [u8] {
unsafe { std::slice::from_raw_parts_mut(self.base as *mut u8, self.size) }
}
}
impl Drop for ObjectMapping {
fn drop(&mut self) {
unsafe {
yggdrasil_rt::sys::unmap_memory(self.base, self.size).ok();
}
}
}
fn object_bounds<F: Read + Seek>(elf: &ElfStream<AnyEndian, F>) -> (usize, usize) {
let mut vma_min = usize::MAX;
let mut vma_max = usize::MIN;
for segment in elf.segments() {
if segment.p_type != elf::abi::PT_LOAD {
continue;
}
let start_vma = (segment.p_vaddr & !(segment.p_align - 1)) as usize;
let end_vma = ((segment.p_vaddr + segment.p_memsz + segment.p_align - 1)
& !(segment.p_align - 1)) as usize;
if start_vma < vma_min {
vma_min = start_vma;
}
if end_vma > vma_max {
vma_max = end_vma;
}
}
(vma_min, vma_max)
}

View File

@ -1,174 +0,0 @@
use elf::relocation::Rela;
use crate::{
object::{DynamicSymbol, ObjectMapping},
Error, State,
};
pub enum RelValue {
QWord(i64),
#[allow(unused)] // unused on x86_64
QDWord(i64, i64),
}
impl RelValue {
pub fn write(&self, mapping: &mut ObjectMapping, offset: u64) {
let addr = mapping.base + offset as usize;
unsafe {
match *self {
Self::QWord(value) => {
(addr as *mut i64).write_volatile(value);
}
Self::QDWord(word0, word1) => {
(addr as *mut i64).write_volatile(word0);
(addr as *mut i64).add(1).write_volatile(word1);
}
}
}
}
}
pub trait RelocationExt {
fn resolve<'a, F: Fn(u32) -> Result<&'a DynamicSymbol, Error>>(
&'a self,
state: &mut State,
tls_module_id: Option<usize>,
image_symbol: F,
image_base: usize,
vma_start: usize,
) -> Result<Option<RelValue>, Error>;
}
#[cfg(any(target_arch = "x86_64", rust_analyzer))]
impl RelocationExt for Rela {
fn resolve<'a, F: Fn(u32) -> Result<&'a DynamicSymbol, Error>>(
&'a self,
state: &mut State,
tls_module_id: Option<usize>,
image_symbol: F,
image_base: usize,
vma_start: usize,
) -> Result<Option<RelValue>, Error> {
let image_base = image_base as i64;
let symbol = image_symbol(self.r_sym)?;
let base_value = match self.r_type {
elf::abi::R_X86_64_JUMP_SLOT => {
// TODO lazy binding could be implemented here
state.lookup_resolved_symbol(&symbol.name)
}
elf::abi::R_X86_64_GLOB_DAT => state.lookup_resolved_glob_dat(&symbol.name),
elf::abi::R_X86_64_64 => state.lookup_resolved_symbol(&symbol.name),
// Retrieve raw symbol value, as this might be an offset into local TLS struct
elf::abi::R_X86_64_DTPMOD64 | elf::abi::R_X86_64_DTPOFF64 => {
symbol.raw.st_value as usize
}
_ => {
if let Some(value) = symbol.value {
value
} else if symbol.raw.st_symtype() == elf::abi::STT_NOTYPE {
0
} else {
return Err(Error::CannotRelocate);
}
}
} as i64;
match self.r_type {
// Direct 64 bit
elf::abi::R_X86_64_64 => Ok(Some(RelValue::QWord(base_value + self.r_addend))),
elf::abi::R_X86_64_JUMP_SLOT => Ok(Some(RelValue::QWord(base_value))),
elf::abi::R_X86_64_COPY => todo!("{:?}: R_X86_64_COPY", symbol.name),
// GLOB_DAT
elf::abi::R_X86_64_GLOB_DAT => Ok(Some(RelValue::QWord(base_value))),
// Adjust by image base
elf::abi::R_X86_64_RELATIVE => Ok(Some(RelValue::QWord(image_base + self.r_addend))),
// ID of module containing this symbol (if not present assume local symbol)
elf::abi::R_X86_64_DTPMOD64 => {
// TODO I'm not sure if per-object TLS is really needed:
// all TLS segments could just be collected into a single one, simplifying
// __tls_get_addr()
if let Some((module_id, _)) = state.lookup_resolved_tls_symbol(&symbol.name) {
Ok(Some(RelValue::QWord(module_id as _)))
} else {
Ok(Some(RelValue::QWord(tls_module_id.unwrap() as _)))
}
}
// Offset in module's TLS block
elf::abi::R_X86_64_DTPOFF64 => {
if let Some((_, value)) = state.lookup_resolved_tls_symbol(&symbol.name) {
Ok(Some(RelValue::QWord(self.r_addend + value as i64)))
} else {
Ok(Some(RelValue::QWord(self.r_addend + base_value)))
}
}
ty => Err(Error::UnsupportedRelocation(ty)),
}
}
}
#[cfg(any(target_arch = "aarch64", rust_analyzer))]
impl RelocationExt for Rela {
fn resolve<'a, F: Fn(u32) -> Result<&'a DynamicSymbol, Error>>(
&'a self,
state: &mut State,
tls_module_id: Option<usize>,
image_symbol: F,
image_base: usize,
vma_start: usize,
) -> Result<Option<RelValue>, Error> {
let image_base = image_base as i64;
let symbol = image_symbol(self.r_sym)?;
let base_value = match self.r_type {
elf::abi::R_AARCH64_ABS64 => state.lookup_resolved_symbol(&symbol.name),
// TODO lazy binding?
elf::abi::R_AARCH64_JUMP_SLOT => state.lookup_resolved_symbol(&symbol.name),
elf::abi::R_AARCH64_GLOB_DAT => state.lookup_resolved_glob_dat(&symbol.name),
elf::abi::R_AARCH64_TLSDESC => symbol.value.unwrap_or(symbol.raw.st_value as usize),
_ => {
if let Some(value) = symbol.value {
value
} else if symbol.raw.st_symtype() == elf::abi::STT_NOTYPE {
0
} else {
return Err(Error::CannotRelocate);
}
}
} as i64;
match self.r_type {
// Adjust by image base
elf::abi::R_AARCH64_RELATIVE => Ok(Some(RelValue::QWord(image_base + self.r_addend))),
elf::abi::R_AARCH64_JUMP_SLOT => Ok(Some(RelValue::QWord(base_value))),
// GLOB_DAT
elf::abi::R_AARCH64_GLOB_DAT => Ok(Some(RelValue::QWord(base_value))),
// Direct 64 bit
elf::abi::R_AARCH64_ABS64 => Ok(Some(RelValue::QWord(base_value + self.r_addend))),
elf::abi::R_AARCH64_TLSDESC => {
let word0 = crate::builtins::__dl_tlsdesc_static as usize;
let reloc_offset =
unsafe { *((image_base as usize + self.r_offset as usize) as *const i64) };
let word1 = if let Some((module_id, offset)) =
state.lookup_resolved_tls_symbol(&symbol.name)
{
// FIXME not really sure about this code
let tls_base = state.tls_table[module_id].data.base;
base_value + reloc_offset + (tls_base + offset) as i64
} else {
let local_tls = state.tls_table[tls_module_id.unwrap() - 1].data.base;
local_tls as i64 + reloc_offset + self.r_addend
};
Ok(Some(RelValue::QDWord(word0 as _, word1)))
}
_ => Err(Error::UnsupportedRelocation(self.r_type)),
}
}
}

View File

@ -0,0 +1,26 @@
use crate::{error::Error, mapping::Mapping, object::ResolvedSymbol};
#[cfg(any(target_arch = "x86_64", rust_analyzer))]
mod x86_64;
pub enum RelaValue {
QWord(i64),
}
pub trait RelocationValue {
fn write(&self, mapping: &mut Mapping, offset: u64);
}
pub trait Relocation {
type Value: RelocationValue;
fn resolve(&self, symbol: &ResolvedSymbol, load_base: usize) -> Result<Option<Self::Value>, Error>;
}
impl RelocationValue for RelaValue {
fn write(&self, mapping: &mut Mapping, offset: u64) {
match *self {
Self::QWord(value) => unsafe { mapping.qword(offset).write_unaligned(value) },
}
}
}

View File

@ -0,0 +1,71 @@
use std::path::Path;
use elf::{
abi::{
R_X86_64_64, R_X86_64_DTPMOD64, R_X86_64_DTPOFF64, R_X86_64_GLOB_DAT, R_X86_64_JUMP_SLOT,
R_X86_64_RELATIVE,
},
relocation::Rela,
};
use crate::{error::Error, object::{DynamicSymbol, ResolvedSymbol}, state::State};
use super::{RelaValue, Relocation};
impl Relocation for Rela {
type Value = RelaValue;
fn resolve(
&self,
symbol: &ResolvedSymbol,
load_base: usize,
) -> Result<Option<Self::Value>, Error> {
let s = symbol.value() as i64;
if s == 0 && self.r_type != R_X86_64_RELATIVE {
todo!()
}
match self.r_type {
// S
R_X86_64_JUMP_SLOT | R_X86_64_GLOB_DAT => Ok(Some(RelaValue::QWord(s))),
// S + A
R_X86_64_64 => todo!(),
// B + A
R_X86_64_RELATIVE => Ok(Some(RelaValue::QWord(load_base as i64 + self.r_addend))),
// TLS
R_X86_64_DTPOFF64 => todo!(),
R_X86_64_DTPMOD64 => todo!(),
_ => todo!(),
}
}
// fn resolve<'a, F: Fn(u32) -> Result<&'a DynamicSymbol, Error>>(
// &'a self,
// ) -> Result<Option<Self::Value>, Error> {
// todo!()
// // let image_offset = image_offset as i64;
// // let symbol = image_symbol(self.r_sym)?;
// // let base_value = match self.r_type {
// // // image_offset already applied
// // R_X86_64_JUMP_SLOT => state.lookup(source, symbol),
// // // image_offset already applied
// // R_X86_64_GLOB_DAT => state.lookup(source, symbol),
// // R_X86_64_64 => todo!(),
// // R_X86_64_DTPMOD64 | R_X86_64_DTPOFF64 => todo!(),
// // _ => todo!(),
// // } as i64;
// // match self.r_type {
// // // S
// // R_X86_64_GLOB_DAT | R_X86_64_JUMP_SLOT => Ok(Some(RelaValue::QWord(base_value))),
// // // S + A
// // R_X86_64_64 => todo!(),
// // // B + A
// // R_X86_64_RELATIVE => todo!(),
// // // TODO TLS relocations
// // R_X86_64_DTPMOD64 => todo!(),
// // R_X86_64_DTPOFF64 => todo!(),
// // _ => todo!("Unhandled relocation type: {:#x}", self.r_type)
// // }
// }
}

View File

@ -0,0 +1,11 @@
use std::path::{Path, PathBuf};
use crate::error::Error;
pub fn find_library(name: &str) -> Result<PathBuf, Error> {
let path = Path::new("/lib").join(name);
if path.exists() {
return Ok(path);
}
Err(Error::LibraryNotFound(name.to_owned()))
}

View File

@ -1,129 +1,95 @@
use std::{
collections::HashMap,
collections::{HashMap, HashSet},
path::{Path, PathBuf},
rc::Rc,
};
use crate::{
builtins,
object::{DynamicSymbol, ObjectTls},
Error,
};
use elf::abi::{STT_FUNC, STT_NOTYPE, STT_OBJECT, STT_TLS};
use crate::object::DynamicSymbol;
pub struct ExportedSymbol {
pub source: PathBuf,
pub value: usize,
pub weak: bool
}
#[derive(Default)]
pub struct State {
pub glob_dat: HashMap<Rc<str>, usize>,
pub symbol_table: HashMap<Rc<str>, (usize, PathBuf)>,
pub tls_table: Vec<ObjectTls>,
pub tls_symbols: HashMap<Rc<str>, (usize, usize)>,
// This list should be empty if everything succeeds
pub undefined_references: Vec<Rc<str>>,
symbol_table: HashMap<Rc<str>, ExportedSymbol>,
undefined_references: HashMap<PathBuf, HashSet<Rc<str>>>
}
impl State {
pub fn new_tls(&mut self, size: usize) -> Result<(usize, &mut ObjectTls), Error> {
let index = self.tls_table.len();
let tls = ObjectTls::new(size)?;
self.tls_table.push(tls);
let value = &mut self.tls_table[index];
// Zero reserved or something
Ok((index + 1, value))
}
pub fn insert_linker_builtins(&mut self) {
let linker = PathBuf::from("/libexec/dyn-loader");
self.symbol_table.insert(
"__tls_get_addr".into(),
(builtins::__dl_tls_get_addr as usize, linker),
);
}
pub fn tls_address(&self, index: usize) -> Option<usize> {
self.tls_table
.get(index - 1)
.as_ref()
.map(|tls| tls.data.base)
}
pub fn export_symbol<P: AsRef<Path>>(
&mut self,
sym: &mut DynamicSymbol,
load_base: usize,
source: P,
tls_index: Option<usize>,
vma_start: usize,
) {
if sym.raw.st_symtype() == elf::abi::STT_TLS {
// If it exports TLS symbols, it has TLS, I guess
let self_module_id = tls_index.expect("TLS symbol in an object without TLS segment");
// TODO sanity checks (that the symbol actually fits within the allocated TLS block)
let (module_id, value) =
if let Some(&(module_id, symbol_value)) = self.tls_symbols.get(&sym.name) {
(module_id, symbol_value)
} else {
// Does not exist, export it
self.tls_symbols.insert(
sym.name.clone(),
(self_module_id, sym.raw.st_value as usize),
);
(self_module_id, sym.raw.st_value as usize)
};
sym.value = Some(value);
sym.tls_index = Some(module_id);
} else {
let sym_value = sym.raw.st_value as usize + load_base - vma_start;
let value = if let Some((value, _)) = self.symbol_table.get(&sym.name) {
*value
} else {
// Does not exist, export it
self.symbol_table
.insert(sym.name.clone(), (sym_value, source.as_ref().to_owned()));
sym_value
};
sym.value = Some(value);
pub fn new() -> Self {
Self {
symbol_table: HashMap::new(),
undefined_references: HashMap::new(),
}
}
pub fn resolve_symbol(&mut self, sym: &mut DynamicSymbol) {
if sym.raw.st_symtype() == elf::abi::STT_TLS {
if let Some(&(module_id, value)) = self.tls_symbols.get(&sym.name) {
sym.value = Some(value);
sym.tls_index = Some(module_id);
} else if !sym.name.is_empty() {
self.undefined_references.push(sym.name.clone());
pub fn export(&mut self, source: impl AsRef<Path>, sym: &DynamicSymbol, offset: isize, weak: bool) {
match sym.raw.st_symtype() {
STT_FUNC | STT_OBJECT | STT_NOTYPE => {
let source = source.as_ref().to_owned();
let value: usize = (isize::try_from(sym.raw.st_value).unwrap() + offset)
.try_into()
.unwrap();
match self.symbol_table.get_mut(&sym.name) {
// Stronger binding exported
Some(export) if export.weak && !weak => {
*export = ExportedSymbol { source, value, weak };
},
// Do nothing, already strong or already weak
Some(_) => (),
None => {
self.symbol_table.insert(sym.name.clone(), ExportedSymbol { source, value, weak });
}
}
}
} else if let Some((value, _)) = self.symbol_table.get(&sym.name) {
sym.value = Some(*value);
} else if !sym.name.is_empty() {
self.undefined_references.push(sym.name.clone());
STT_TLS => todo!(),
_ => todo!(),
}
}
pub fn lookup_resolved_glob_dat(&mut self, name: &Rc<str>) -> usize {
if let Some((value, _)) = self.symbol_table.get(name) {
*value
} else if let Some(value) = self.glob_dat.get(name) {
*value
pub fn ensure_resolved(&mut self, source: impl AsRef<Path>, sym: &DynamicSymbol) {
match sym.raw.st_symtype() {
STT_FUNC | STT_OBJECT | STT_NOTYPE => {
if !self.symbol_table.contains_key(&sym.name) {
self.undefined(source, &sym.name);
}
},
STT_TLS => todo!(),
_ => todo!(),
}
}
pub fn lookup(&mut self, sym: &DynamicSymbol) -> Option<&ExportedSymbol> {
match sym.raw.st_symtype() {
STT_FUNC | STT_OBJECT | STT_NOTYPE => {
self.symbol_table.get(&sym.name)
},
STT_TLS => todo!(),
_ => todo!(),
}
}
pub fn undefined(&mut self, source: impl AsRef<Path>, sym: &Rc<str>) {
todo!()
// let source = source.as_ref();
// let list = if let Some(list) = self.undefined_references.get_mut(source) {
// list
// } else {
// self.undefined_references.try_insert(source.to_owned(), HashSet::new()).unwrap()
// };
// list.insert(sym.clone());
}
pub fn no_undefined_symbols(&self) -> Result<(), &HashMap<PathBuf, HashSet<Rc<str>>>> {
if self.undefined_references.is_empty() {
Ok(())
} else {
self.undefined_references.push(name.clone());
0
Err(&self.undefined_references)
}
}
pub fn lookup_resolved_symbol(&mut self, name: &Rc<str>) -> usize {
if let Some((value, _)) = self.symbol_table.get(name) {
*value
} else {
self.undefined_references.push(name.clone());
0
}
}
pub fn lookup_resolved_tls_symbol(&mut self, name: &Rc<str>) -> Option<(usize, usize)> {
// TODO differentiate between local TLS references and really undefined TLS references?
self.tls_symbols.get(name).copied()
}
}

View File

@ -49,14 +49,14 @@ fn build_test_c_program(
llvm: &Llvm,
install: &mut Vec<(PathBuf, PathBuf)>,
) -> Result<(), Error> {
log::info!("Building a test C program");
log::info!("Building a test C program [PIE]");
let target_dir = &env.userspace_output_dir;
let mut command = llvm.c_clang(env);
command
.args([
"-Bdynamic",
"-fpie",
"-Bdynamic",
"-O0",
"-ggdb",
"-lm",
@ -70,9 +70,17 @@ fn build_test_c_program(
return Err(Error::ExternalCommandFailed);
}
log::info!("Building a test C program [static]");
let mut command = llvm.c_clang(env);
command
.args(["-static", "-O0", "-ggdb", "-fstack-protector-strong", "-lm"])
.args([
"-v",
"-static",
"-O0",
"-ggdb",
"-fstack-protector-strong",
"-lm",
])
.arg("-o")
.arg(target_dir.join("c-test-static"))
.arg(env.workspace_root.join("test.c"));