diff --git a/Cargo.lock b/Cargo.lock
index 7bde5841..de3cb0cc 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -838,6 +838,7 @@ version = "0.1.0"
 dependencies = [
  "aarch64-cpu",
  "bitflags 2.6.0",
+ "cc",
  "device-api",
  "kernel-arch-interface",
  "libk-mm-interface",
diff --git a/etc/aarch64-unknown-qemu.json b/etc/aarch64-unknown-qemu.json
index b4d979e4..37262517 100644
--- a/etc/aarch64-unknown-qemu.json
+++ b/etc/aarch64-unknown-qemu.json
@@ -6,7 +6,7 @@
   "data-layout": "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128",
   "max-atomic-width": 128,
   "target-pointer-width": "64",
-  "features": "+v8a,+strict-align,+neon,+fp-armv8",
+  "features": "+v8a,+strict-align,-neon,-fp-armv8",
 
   "disable-redzone": true,
   "executables": true,
diff --git a/kernel/arch/aarch64/Cargo.toml b/kernel/arch/aarch64/Cargo.toml
index afe3d595..6eec9b9b 100644
--- a/kernel/arch/aarch64/Cargo.toml
+++ b/kernel/arch/aarch64/Cargo.toml
@@ -14,3 +14,6 @@ bitflags = "2.6.0"
 static_assertions = "1.1.0"
 aarch64-cpu = "9.4.0"
 tock-registers = "0.8.1"
+
+[build-dependencies]
+cc = "1.0"
diff --git a/kernel/arch/aarch64/build.rs b/kernel/arch/aarch64/build.rs
new file mode 100644
index 00000000..21c01357
--- /dev/null
+++ b/kernel/arch/aarch64/build.rs
@@ -0,0 +1,21 @@
+use std::env;
+
+fn build_fp_context_obj() {
+    const FP_CONTEXT_S: &str = "src/fp_context.S";
+
+    let out_dir = env::var("OUT_DIR").unwrap();
+
+    println!("cargo:rerun-if-changed={}", FP_CONTEXT_S);
+
+    cc::Build::new()
+        .out_dir(&out_dir)
+        .compiler("clang")
+        .target("aarch64-unknown-none")
+        .flag("-march=armv8-a+fp")
+        .file(FP_CONTEXT_S)
+        .compile("fp_context");
+}
+
+fn main() {
+    build_fp_context_obj();
+}
diff --git a/kernel/arch/aarch64/src/context.S b/kernel/arch/aarch64/src/context.S
index 581cf4cf..37833ef2 100644
--- a/kernel/arch/aarch64/src/context.S
+++ b/kernel/arch/aarch64/src/context.S
@@ -52,9 +52,10 @@ __aarch64_task_enter_kernel:
     eret
 
 __aarch64_task_enter_user:
-    // x0 == sp, x1 == ignored
+    // x0 == sp, x1 == mdscr_el1
     ldp x0, x1, [sp, #16 * 0]
     msr sp_el0, x0
+    msr mdscr_el1, x1
 
     # EL0t, IRQs unmasked
     msr spsr_el1, xzr
diff --git a/kernel/arch/aarch64/src/context.rs b/kernel/arch/aarch64/src/context.rs
index 1b798676..c05f32bc 100644
--- a/kernel/arch/aarch64/src/context.rs
+++ b/kernel/arch/aarch64/src/context.rs
@@ -1,5 +1,5 @@
 //! AArch64-specific task context implementation
-use core::{arch::global_asm, cell::UnsafeCell, fmt, marker::PhantomData};
+use core::{arch::global_asm, cell::UnsafeCell, ffi::c_void, fmt, marker::PhantomData};
 
 use kernel_arch_interface::{
     mem::{KernelTableManager, PhysicalMemoryAllocator},
@@ -29,6 +29,12 @@ struct TaskContextInner {
     sp: usize,
 }
 
+#[repr(align(0x10))]
+pub struct FpContext {
+    // q0..q31 x 128bit + fpcr/fpsr
+    inner: [u8; 16 * 32 + 16],
+}
+
 /// AArch64 implementation of a task context
 #[allow(unused)]
 pub struct TaskContextImpl<
@@ -36,6 +42,7 @@ pub struct TaskContextImpl<
     PA: PhysicalMemoryAllocator<Address = PhysicalAddress>,
 > {
     inner: UnsafeCell<TaskContextInner>,
+    fp_context: UnsafeCell<FpContext>,
     stack_base_phys: PhysicalAddress,
     stack_size: usize,
 
@@ -45,6 +52,22 @@ pub struct TaskContextImpl<
 
 const COMMON_CONTEXT_SIZE: usize = 8 * 14;
 
+impl FpContext {
+    pub const fn new() -> Self {
+        Self {
+            inner: [0; 16 * 32 + 16],
+        }
+    }
+
+    pub unsafe fn store(this: *mut Self) {
+        __aarch64_fp_store_context(this as _)
+    }
+
+    pub unsafe fn restore(this: *const Self) {
+        __aarch64_fp_restore_context(this as _)
+    }
+}
+
 impl TaskFrame for ExceptionFrame {
     fn store(&self) -> SavedFrame {
         SavedFrame {
@@ -151,6 +174,7 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
 
         Ok(Self {
             inner: UnsafeCell::new(TaskContextInner { sp }),
+            fp_context: UnsafeCell::new(FpContext::new()),
             stack_base_phys,
             stack_size: KERNEL_TASK_PAGES * 0x1000,
 
@@ -166,9 +190,11 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
 
         let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000);
 
+        let mdscr_el1 = if context.single_step { 1 << 0 } else { 0 };
+
         stack.push(context.entry);
         stack.push(context.argument);
-        stack.push(0);
+        stack.push(mdscr_el1);
         stack.push(context.stack_pointer);
 
         setup_common_context(
@@ -182,6 +208,7 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
 
         Ok(Self {
             inner: UnsafeCell::new(TaskContextInner { sp }),
+            fp_context: UnsafeCell::new(FpContext::new()),
             stack_base_phys,
             stack_size: USER_TASK_PAGES * 0x1000,
 
@@ -191,14 +218,28 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
     }
 
     unsafe fn enter(&self) -> ! {
+        FpContext::restore(self.fp_context.get());
+
         __aarch64_enter_task(self.inner.get())
     }
 
     unsafe fn switch(&self, from: &Self) {
-        __aarch64_switch_task(self.inner.get(), from.inner.get())
+        let dst = self.inner.get();
+        let src = from.inner.get();
+
+        if dst != src {
+            // Save the old context
+            FpContext::store(from.fp_context.get());
+            // Load next context
+            FpContext::restore(self.fp_context.get());
+
+            __aarch64_switch_task(self.inner.get(), from.inner.get())
+        }
     }
 
     unsafe fn switch_and_drop(&self, thread: *const ()) {
+        FpContext::restore(self.fp_context.get());
+
         __aarch64_switch_task_and_drop(self.inner.get(), thread);
     }
 }
@@ -241,6 +282,8 @@ extern "C" {
     fn __aarch64_switch_task_and_drop(to: *mut TaskContextInner, thread: *const ()) -> !;
     fn __aarch64_task_enter_kernel();
     fn __aarch64_task_enter_user();
+    fn __aarch64_fp_store_context(to: *mut c_void);
+    fn __aarch64_fp_restore_context(from: *const c_void);
 }
 
 global_asm!(include_str!("context.S"), context_size = const COMMON_CONTEXT_SIZE);
diff --git a/kernel/arch/aarch64/src/fp_context.S b/kernel/arch/aarch64/src/fp_context.S
new file mode 100644
index 00000000..d8ea308b
--- /dev/null
+++ b/kernel/arch/aarch64/src/fp_context.S
@@ -0,0 +1,52 @@
+.section .text
+.global __aarch64_fp_store_context
+.global __aarch64_fp_restore_context
+
+__aarch64_fp_store_context:
+    // x0 - destination
+    stp q0,  q1,  [x0, #16 * 0 ]
+    stp q2,  q3,  [x0, #16 * 2 ]
+    stp q4,  q5,  [x0, #16 * 4 ]
+    stp q6,  q7,  [x0, #16 * 6 ]
+    stp q8,  q9,  [x0, #16 * 8 ]
+    stp q10, q11, [x0, #16 * 10]
+    stp q12, q13, [x0, #16 * 12]
+    stp q14, q15, [x0, #16 * 14]
+    stp q16, q17, [x0, #16 * 16]
+    stp q18, q19, [x0, #16 * 18]
+    stp q20, q21, [x0, #16 * 20]
+    stp q22, q23, [x0, #16 * 22]
+    stp q24, q25, [x0, #16 * 24]
+    stp q26, q27, [x0, #16 * 26]
+    stp q28, q29, [x0, #16 * 28]
+    stp q30, q31, [x0, #16 * 30]!
+    mrs x1, fpsr
+    str x1, [x0, #16 * 2]
+    mrs x1, fpcr
+    str x1, [x0, #16 * 2 + 8]
+
+    ret
+
+__aarch64_fp_restore_context:
+    // x0 - source
+    ldp q0,  q1,  [x0, #16 * 0 ]
+    ldp q2,  q3,  [x0, #16 * 2 ]
+    ldp q4,  q5,  [x0, #16 * 4 ]
+    ldp q6,  q7,  [x0, #16 * 6 ]
+    ldp q8,  q9,  [x0, #16 * 8 ]
+    ldp q10, q11, [x0, #16 * 10]
+    ldp q12, q13, [x0, #16 * 12]
+    ldp q14, q15, [x0, #16 * 14]
+    ldp q16, q17, [x0, #16 * 16]
+    ldp q18, q19, [x0, #16 * 18]
+    ldp q20, q21, [x0, #16 * 20]
+    ldp q22, q23, [x0, #16 * 22]
+    ldp q24, q25, [x0, #16 * 24]
+    ldp q26, q27, [x0, #16 * 26]
+    ldp q28, q29, [x0, #16 * 28]
+    ldp q30, q31, [x0, #16 * 30]!
+    ldp x0, x1, [x0, #16 * 2]
+    msr fpsr, x0
+    msr fpcr, x1
+
+    ret
diff --git a/kernel/arch/aarch64/src/lib.rs b/kernel/arch/aarch64/src/lib.rs
index 3617a31a..563f4f6b 100644
--- a/kernel/arch/aarch64/src/lib.rs
+++ b/kernel/arch/aarch64/src/lib.rs
@@ -50,6 +50,9 @@ impl ArchitectureImpl {
 impl Architecture for ArchitectureImpl {
     type PerCpuData = PerCpuData;
     type CpuFeatures = ();
+    type BreakpointType = u32;
+
+    const BREAKPOINT_VALUE: Self::BreakpointType = 0x200020D4;
 
     fn cpu_index<S: Scheduler + 'static>() -> u32 {
         (MPIDR_EL1.get() & 0xFF) as u32
diff --git a/kernel/arch/i686/src/context.rs b/kernel/arch/i686/src/context.rs
index 78905562..99193e41 100644
--- a/kernel/arch/i686/src/context.rs
+++ b/kernel/arch/i686/src/context.rs
@@ -101,8 +101,13 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
         let stack_base = stack_base_phys.raw_virtualize::<K>();
 
         let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000);
+        let mut flags = 0x200;
 
-        stack.push(0x200);
+        if context.single_step {
+            flags |= 1 << 8;
+        }
+
+        stack.push(flags);
         stack.push(context.entry as _);
         stack.push(context.stack_pointer);
 
@@ -372,7 +377,7 @@ impl TaskFrame for ExceptionFrame {
     }
 
     fn user_ip(&self) -> usize {
-        todo!()
+        self.eip as _
     }
 
     fn argument(&self) -> u64 {
diff --git a/kernel/arch/i686/src/lib.rs b/kernel/arch/i686/src/lib.rs
index 5c472955..fee996d1 100644
--- a/kernel/arch/i686/src/lib.rs
+++ b/kernel/arch/i686/src/lib.rs
@@ -48,6 +48,9 @@ extern "C" fn idle_task(_: usize) -> ! {
 impl Architecture for ArchitectureImpl {
     type PerCpuData = PerCpuData;
     type CpuFeatures = CpuFeatures;
+    type BreakpointType = u8;
+
+    const BREAKPOINT_VALUE: Self::BreakpointType = 0xCC;
 
     unsafe fn init_local_cpu<S: Scheduler + 'static>(id: Option<u32>, data: Self::PerCpuData) {
         use alloc::boxed::Box;
diff --git a/kernel/arch/interface/src/lib.rs b/kernel/arch/interface/src/lib.rs
index 0de9269b..de1b3aea 100644
--- a/kernel/arch/interface/src/lib.rs
+++ b/kernel/arch/interface/src/lib.rs
@@ -26,6 +26,9 @@ pub trait Architecture: Sized {
     type PerCpuData;
     type CpuFeatures: CpuFeatureSet;
 
+    type BreakpointType;
+    const BREAKPOINT_VALUE: Self::BreakpointType;
+
     // Cpu management
 
     /// # Safety
diff --git a/kernel/arch/x86_64/src/context.rs b/kernel/arch/x86_64/src/context.rs
index 2e5d636f..03ba6468 100644
--- a/kernel/arch/x86_64/src/context.rs
+++ b/kernel/arch/x86_64/src/context.rs
@@ -443,13 +443,7 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
         })
     }
 
-    fn user(
-        context: UserContextInfo, // entry: usize,
-                                  // arg: usize,
-                                  // cr3: u64,
-                                  // user_stack_sp: usize,
-                                  // fs_base: usize,
-    ) -> Result<Self, Error> {
+    fn user(context: UserContextInfo) -> Result<Self, Error> {
         const USER_TASK_PAGES: usize = 8;
 
         let stack_base_phys = PA::allocate_contiguous_pages(USER_TASK_PAGES)?;
@@ -457,7 +451,12 @@ impl<K: KernelTableManager, PA: PhysicalMemoryAllocator<Address = PhysicalAddres
 
         let mut stack = StackBuilder::new(stack_base, USER_TASK_PAGES * 0x1000);
 
-        stack.push(0x200);
+        let mut flags = 0x200;
+        if context.single_step {
+            flags |= 1 << 8;
+        }
+
+        stack.push(flags);
         stack.push(context.entry as _);
         stack.push(context.argument);
         stack.push(context.stack_pointer);
diff --git a/kernel/arch/x86_64/src/lib.rs b/kernel/arch/x86_64/src/lib.rs
index 91fad736..30fada4f 100644
--- a/kernel/arch/x86_64/src/lib.rs
+++ b/kernel/arch/x86_64/src/lib.rs
@@ -89,6 +89,9 @@ impl ArchitectureImpl {
 impl Architecture for ArchitectureImpl {
     type PerCpuData = PerCpuData;
     type CpuFeatures = CpuFeatures;
+    type BreakpointType = u8;
+
+    const BREAKPOINT_VALUE: Self::BreakpointType = 0xCC;
 
     unsafe fn set_local_cpu(cpu: *mut ()) {
         MSR_IA32_KERNEL_GS_BASE.set(cpu as u64);
diff --git a/kernel/libk/src/task/binary/mod.rs b/kernel/libk/src/task/binary/mod.rs
index 72aa52e2..8082d29a 100644
--- a/kernel/libk/src/task/binary/mod.rs
+++ b/kernel/libk/src/task/binary/mod.rs
@@ -37,6 +37,15 @@ pub mod elf;
 
 pub type LoadedProcess = (Arc<Process>, Arc<Thread>);
 
+pub struct LoadOptions<'e, P: AsRef<Path>> {
+    pub parent: Option<Weak<Process>>,
+    pub group_id: ProcessGroupId,
+    pub path: P,
+    pub args: &'e [&'e str],
+    pub envs: &'e [&'e str],
+    pub single_step: bool,
+}
+
 struct BufferPlacer<'a> {
     buffer: &'a mut [u8],
     virtual_offset: usize,
@@ -122,12 +131,16 @@ fn setup_program_env(
     Ok(in_user as *const _ as usize)
 }
 
-fn setup_context(
+fn setup_context<P>(
+    options: &LoadOptions<P>,
     space: &ProcessAddressSpace,
     image: &ProcessImage,
     args: &Vec<String>,
     envs: &Vec<String>,
-) -> Result<TaskContextImpl, Error> {
+) -> Result<TaskContextImpl, Error>
+where
+    P: AsRef<Path>,
+{
     const USER_STACK_PAGES: usize = 32;
 
     let virt_stack_base = 0x3000000;
@@ -174,14 +187,13 @@ fn setup_context(
         stack_pointer: ptr.addr(),
         tls: tls_address,
         address_space: space.as_address_with_asid(),
-        single_step: false,
+        single_step: options.single_step,
     })
 }
 
-fn setup_binary<S>(
+fn setup_binary<S, P>(
+    options: &LoadOptions<P>,
     name: S,
-    group_id: ProcessGroupId,
-    parent: Option<Weak<Process>>,
     space: ProcessAddressSpace,
     image: ProcessImage,
     args: &Vec<String>,
@@ -189,12 +201,13 @@ fn setup_binary<S>(
 ) -> Result<LoadedProcess, Error>
 where
     S: Into<String>,
+    P: AsRef<Path>,
 {
-    let context = setup_context(&space, &image, args, envs)?;
+    let context = setup_context(options, &space, &image, args, envs)?;
     let (process, main) = Process::new_with_main(
         name,
-        group_id,
-        parent,
+        options.group_id,
+        options.parent.clone(),
         Arc::new(space),
         context,
         Some(image),
@@ -263,24 +276,16 @@ fn xxx_load_program<P: AsRef<Path>>(
     } else {
         Err(Error::UnrecognizedExecutable)
     }
-
-    // let image = load_binary(head, file, space)?;
-
-    // Ok((image, args, envs))
 }
 
 /// Loads a program from given `path`
 pub fn load<P: AsRef<Path>>(
     ioctx: &mut IoContext,
-    group_id: ProcessGroupId,
-    parent: Option<Weak<Process>>,
-    path: P,
-    args: &[&str],
-    envs: &[&str],
+    options: &LoadOptions<P>,
 ) -> Result<LoadedProcess, Error> {
-    let path = path.as_ref();
-    let args = args.iter().map(|&s| s.to_owned()).collect();
-    let envs = envs.iter().map(|&s| s.to_owned()).collect();
+    let path = options.path.as_ref();
+    let args = options.args.iter().map(|&s| s.to_owned()).collect();
+    let envs = options.envs.iter().map(|&s| s.to_owned()).collect();
 
     let space = ProcessAddressSpace::new()?;
     let (image, args, envs) = xxx_load_program(&space, ioctx, path, args, envs)?;
@@ -289,22 +294,22 @@ pub fn load<P: AsRef<Path>>(
         Some((_, name)) => name,
         None => name,
     };
-    setup_binary(name, group_id, parent, space, image, &args, &envs)
+    setup_binary(options, name, space, image, &args, &envs)
 }
 
 pub fn load_into<P: AsRef<Path>>(
-    process: &Process,
-    path: P,
+    options: &LoadOptions<P>,
     args: Vec<String>,
     envs: Vec<String>,
 ) -> Result<(TaskContextImpl, ProcessImage), Error> {
+    let process = options.parent.as_ref().unwrap().upgrade().unwrap();
     let mut io = process.io.lock();
     // Have to make the Path owned, going to drop the address space from which it came
-    let path = path.as_ref().to_owned();
+    let path = options.path.as_ref().to_owned();
     let space = process.space();
     space.clear()?;
     let (image, args, envs) = xxx_load_program(&space, io.ioctx_mut(), &path, args, envs)?;
-    let context = setup_context(&space, &image, &args, &envs)?;
+    let context = setup_context(options, &space, &image, &args, &envs)?;
 
     Ok((context, image))
 }
diff --git a/kernel/libk/src/task/mem.rs b/kernel/libk/src/task/mem.rs
index 9f674ab0..2081779a 100644
--- a/kernel/libk/src/task/mem.rs
+++ b/kernel/libk/src/task/mem.rs
@@ -27,6 +27,17 @@ pub trait ForeignPointer: Sized {
     /// As this function allows direct memory writes, it is inherently unsafe.
     unsafe fn write_foreign_volatile(self: *mut Self, space: &ProcessAddressSpace, value: Self);
 
+    unsafe fn try_write_foreign_volatile(
+        self: *mut Self,
+        space: &ProcessAddressSpace,
+        value: Self,
+    ) -> Result<(), Error>;
+
+    unsafe fn try_read_foreign_volatile(
+        self: *const Self,
+        space: &ProcessAddressSpace,
+    ) -> Result<Self, Error>;
+
     /// Performs pointer validation for given address space:
     ///
     /// * Checks if the pointer has proper alignment for the type.
@@ -80,6 +91,15 @@ pub trait ForeignPointer: Sized {
 
 impl<T> ForeignPointer for T {
     unsafe fn write_foreign_volatile(self: *mut Self, space: &ProcessAddressSpace, value: T) {
+        self.try_write_foreign_volatile(space, value)
+            .expect("Invalid foreign pointer, could not write")
+    }
+
+    unsafe fn try_write_foreign_volatile(
+        self: *mut Self,
+        space: &ProcessAddressSpace,
+        value: Self,
+    ) -> Result<(), Error> {
         // TODO check align
         let addr = self as usize;
         let start_page = addr & !0xFFF;
@@ -90,12 +110,30 @@ impl<T> ForeignPointer for T {
             todo!("Foreign pointer write crossed a page boundary");
         }
 
-        let phys_page = space
-            .translate(start_page)
-            .expect("Address is not mapped in the target address space");
+        let phys_page = space.translate(start_page)?;
 
         let virt_ptr = phys_page.add(page_offset).virtualize() as *mut T;
-        virt_ptr.write_volatile(value);
+        virt_ptr.write_unaligned(value);
+        Ok(())
+    }
+
+    unsafe fn try_read_foreign_volatile(
+        self: *const Self,
+        space: &ProcessAddressSpace,
+    ) -> Result<Self, Error> {
+        let addr = self as usize;
+        let start_page = addr & !0xFFF;
+        let end_page = (addr + size_of::<T>() - 1) & !0xFFF;
+        let page_offset = addr & 0xFFF;
+
+        if start_page != end_page {
+            todo!("Foreign pointer write crossed a page boundary");
+        }
+
+        let phys_page = space.translate(start_page)?;
+        let virt_ptr = phys_page.add(page_offset).virtualize() as *const Self;
+
+        Ok(virt_ptr.read_unaligned())
     }
 
     unsafe fn validate_user_slice_mut<'a>(
diff --git a/kernel/libk/src/task/thread.rs b/kernel/libk/src/task/thread.rs
index daba8b22..040f501d 100644
--- a/kernel/libk/src/task/thread.rs
+++ b/kernel/libk/src/task/thread.rs
@@ -1,7 +1,7 @@
 use core::{cell::Cell, mem::size_of, ops::Deref};
 
 use alloc::{
-    collections::BTreeMap,
+    collections::{btree_map, BTreeMap, BTreeSet},
     string::String,
     sync::{Arc, Weak},
 };
@@ -9,7 +9,7 @@ use crossbeam_queue::SegQueue;
 use futures_util::task::ArcWake;
 use kernel_arch::{
     task::{Scheduler, TaskContext, TaskFrame},
-    CpuImpl,
+    Architecture, ArchitectureImpl, CpuImpl,
 };
 use libk_mm::process::ProcessAddressSpace;
 use libk_util::{
@@ -24,7 +24,7 @@ use yggdrasil_abi::{
 };
 
 use crate::task::{
-    mem::ForeignPointer,
+    mem::{self, ForeignPointer},
     sched::CpuQueue,
     types::{ThreadAffinity, ThreadId, ThreadState},
     TaskContextImpl,
@@ -32,6 +32,9 @@ use crate::task::{
 
 use super::{debug::ThreadDebugger, process::Process};
 
+type BreakpointType = <ArchitectureImpl as Architecture>::BreakpointType;
+const BREAKPOINT_VALUE: BreakpointType = ArchitectureImpl::BREAKPOINT_VALUE;
+
 /// Provides details about how the thread is scheduled onto CPUs
 pub struct ThreadSchedulingInfo {
     /// Current state
@@ -45,8 +48,10 @@ pub struct ThreadSchedulingInfo {
 
 pub struct ThreadDebuggingInfo {
     pub single_step: bool,
+    pub restore_breakpoint: Option<usize>,
     pub debugger: Option<ThreadDebugger>,
     pub saved_frame: Option<SavedFrame>,
+    pub breakpoints: BTreeMap<usize, BreakpointType>,
 }
 
 struct SignalEntry {
@@ -115,10 +120,13 @@ impl Thread {
                 in_queue: false,
                 queue: None,
             }),
+            // TODO lazy initialization for debugging info
             debug: IrqSafeSpinlock::new(ThreadDebuggingInfo {
                 single_step: false,
+                restore_breakpoint: None,
                 debugger: None,
                 saved_frame: None,
+                breakpoints: BTreeMap::new(),
             }),
             context: Cell::new(context),
             process,
@@ -243,6 +251,46 @@ impl Thread {
         debug.debugger = Some(debugger);
 
         self.signal_queue.push(Signal::Debug);
+
+        let frame = self
+            .process()
+            .map_image(|img| DebugFrame::Startup {
+                image_base: img.load_base,
+                ip_offset: img.ip_offset,
+                ip: img.entry,
+            })
+            .unwrap();
+
+        debug.debugger.as_mut().unwrap().send(&frame).ok();
+    }
+
+    pub fn set_breakpoint(&self, address: usize) -> Result<(), Error> {
+        log::debug!(
+            "Set breakpoint in {} ({:?}) @ {:#x}",
+            self.id,
+            self.name,
+            address
+        );
+        let mut debug = self.debug.lock();
+        debug.set_breakpoint_inner(self.address_space(), address)
+    }
+
+    pub fn read_memory(&self, address: usize, buffer: &mut [u8]) -> Result<(), Error> {
+        log::debug!(
+            "Read memory in {} ({:?}) @ {:#x}",
+            self.id,
+            self.name,
+            address
+        );
+
+        let space = self.address_space();
+
+        // TODO optimize this later
+        for i in 0..buffer.len() {
+            buffer[i] = unsafe { ((address + i) as *const u8).try_read_foreign_volatile(space) }?;
+        }
+
+        Ok(())
     }
 
     pub fn resume(&self, single_step: bool) {
@@ -453,23 +501,77 @@ impl CurrentThread {
     pub fn handle_single_step<F: TaskFrame>(&self, frame: &mut F) {
         {
             let mut debug = self.debug.lock();
+            let space = self.address_space();
 
-            // Single step cleared
-            if !debug.single_step {
-                log::debug!("Clear single step ({} {:?})", self.id, self.name);
-                frame.set_single_step(false);
-                return;
+            if let Some(original) = debug.restore_breakpoint.take() {
+                let brk_range = original..original + size_of::<BreakpointType>();
+                assert!(!brk_range.contains(&frame.user_ip()));
+
+                log::debug!(
+                    "Restore breakpoint, current_ip={:#x}, breakpoint_ip={:#x}",
+                    frame.user_ip(),
+                    original
+                );
+                debug.set_breakpoint_inner(space, original).unwrap();
+            } else {
+                // Single step cleared
+                if !debug.single_step {
+                    log::debug!("Clear single step ({} {:?})", self.id, self.name);
+                    frame.set_single_step(false);
+                    return;
+                }
             }
 
             let frame = frame.store();
             debug.saved_frame = Some(frame.clone());
             // TODO handle cases of detached debugger
             let debugger = debug.debugger.as_ref().unwrap();
-
             debugger.send(&DebugFrame::Step { frame }).ok();
         }
 
-        self.suspend().unwrap();
+        match self.suspend() {
+            Ok(_) | Err(Error::Interrupted) => (),
+            Err(err) => panic!("TODO: handle error in debug suspend: {:?}", err),
+        }
+    }
+
+    pub fn handle_breakpoint<F: TaskFrame>(&self, frame: &mut F) -> bool {
+        let mut debug = self.debug.lock();
+        let ip = frame.user_ip();
+
+        if let Some(value) = debug.breakpoints.remove(&ip) {
+            let space = self.address_space();
+
+            // Restore original code
+            let pointer = ip as *mut BreakpointType;
+            unsafe { pointer.write_foreign_volatile(space, value) };
+
+            log::debug!(
+                "Thread {} ({:?}) hit a breakpoint @ {:#x}, step={}",
+                self.id,
+                self.name,
+                ip,
+                debug.single_step
+            );
+
+            // TODO handle cases when no debugger is attached (clear breakpoint and resume)
+            frame.set_single_step(true);
+
+            let frame = frame.store();
+            debug.restore_breakpoint = Some(ip);
+            debug.saved_frame = Some(frame.clone());
+
+            let debugger = debug.debugger.as_ref().unwrap();
+            debugger.send(&DebugFrame::HitBreakpoint { frame }).ok();
+
+            drop(debug);
+
+            self.suspend().unwrap();
+
+            true
+        } else {
+            false
+        }
     }
 
     /// Sets up a return frame to handle a pending signal, if any is present in the task's queue.
@@ -485,26 +587,18 @@ impl CurrentThread {
 
         if let Some(signal) = self.signal_queue.pop() {
             if signal == Signal::Debug {
-                log::info!("Entered debug signal");
+                frame.set_single_step(true);
+
+                let frame = frame.store();
                 let mut debug = self.debug.lock();
+
                 debug.single_step = true;
+                debug.saved_frame = Some(frame.clone());
+
                 let debugger = debug.debugger.as_ref().unwrap();
                 let process = self.process();
 
-                frame.set_single_step(true);
-
-                // Send initial frame
-                let saved_frame = frame.store();
-                let (image_base, ip_offset) = process
-                    .map_image(|img| (img.load_base, img.ip_offset))
-                    .unwrap();
-                debugger
-                    .send(&DebugFrame::Startup {
-                        image_base,
-                        ip_offset,
-                        frame: saved_frame,
-                    })
-                    .ok();
+                debugger.send(&DebugFrame::Step { frame }).ok();
 
                 return;
             }
@@ -558,3 +652,28 @@ impl Deref for CurrentThread {
         &self.0
     }
 }
+
+impl ThreadDebuggingInfo {
+    fn set_breakpoint_inner(
+        &mut self,
+        space: &ProcessAddressSpace,
+        address: usize,
+    ) -> Result<(), Error> {
+        match self.breakpoints.entry(address) {
+            btree_map::Entry::Vacant(vacant) => {
+                let pointer = address as *mut BreakpointType;
+
+                // Read old code from the address space at that location
+                let original =
+                    unsafe { (pointer as *const BreakpointType).try_read_foreign_volatile(space) }?;
+
+                unsafe { pointer.write_foreign_volatile(space, BREAKPOINT_VALUE) };
+
+                vacant.insert(original);
+                Ok(())
+            }
+            // No need, breakpoint already present
+            btree_map::Entry::Occupied(_) => Err(Error::AlreadyExists),
+        }
+    }
+}
diff --git a/kernel/libk/src/vfs/terminal.rs b/kernel/libk/src/vfs/terminal.rs
index 074b6cf4..8514472c 100644
--- a/kernel/libk/src/vfs/terminal.rs
+++ b/kernel/libk/src/vfs/terminal.rs
@@ -43,8 +43,8 @@ pub trait TerminalOutput: Sync {
 
     fn size(&self) -> TerminalSize {
         TerminalSize {
-            rows: 80,
-            columns: 24,
+            rows: 24,
+            columns: 80,
         }
     }
     fn set_size(&self, size: TerminalSize) -> Result<(), Error> {
diff --git a/kernel/modules/test_mod/Cargo.lock b/kernel/modules/test_mod/Cargo.lock
index 9dde9b9b..f400ae32 100644
--- a/kernel/modules/test_mod/Cargo.lock
+++ b/kernel/modules/test_mod/Cargo.lock
@@ -237,6 +237,7 @@ version = "0.1.0"
 dependencies = [
  "aarch64-cpu",
  "bitflags",
+ "cc",
  "device-api",
  "kernel-arch-interface",
  "libk-mm-interface",
diff --git a/kernel/src/arch/i686/exception.rs b/kernel/src/arch/i686/exception.rs
index f1617c24..0a9c9fb1 100644
--- a/kernel/src/arch/i686/exception.rs
+++ b/kernel/src/arch/i686/exception.rs
@@ -149,6 +149,16 @@ impl Entry {
 
 static mut IDT: [Entry; SIZE] = [Entry::NULL; SIZE];
 
+fn dump_user_exception(kind: ExceptionKind, frame: &ExceptionFrame) {
+    let thread = Thread::current();
+    warnln!("{:?} in {} ({:?})", kind, thread.id, thread.name);
+    warnln!("ip = {:02x}:{:08x}", frame.cs, frame.eip);
+    warnln!("cr3 = {:#010x}", CR3.get());
+    if kind == ExceptionKind::PageFault {
+        warnln!("cr2 = {:#010x}", CR2.get());
+    }
+}
+
 fn kernel_exception_inner(kind: ExceptionKind, frame: &ExceptionFrame) -> ! {
     let cr3 = CR3.get();
 
@@ -165,35 +175,36 @@ fn kernel_exception_inner(kind: ExceptionKind, frame: &ExceptionFrame) -> ! {
 fn user_exception_inner(kind: ExceptionKind, frame: &mut ExceptionFrame) {
     let thread = Thread::current();
 
-    if kind != ExceptionKind::Debug {
-        let cr3 = CR3.get();
-
-        warnln!("{:?} in {} {:?}", kind, thread.id, thread.name);
-        warnln!("CS:EIP = {:#02x}:{:#010x}", frame.cs, frame.eip);
-        warnln!("CR3 = {:#x}", cr3);
-    }
-
-    match kind {
+    let dump = match kind {
         ExceptionKind::PageFault => {
-            let cr2 = CR2.get();
-            warnln!("CR2 = {:#x}", cr2);
-
             thread.raise_signal(Signal::MemoryAccessViolation);
+            true
         }
         ExceptionKind::GeneralProtectionFault => {
-            thread.raise_signal(Signal::MemoryAccessViolation);
+            if thread.handle_breakpoint(frame) {
+                false
+            } else {
+                thread.raise_signal(Signal::MemoryAccessViolation);
+                true
+            }
         }
         ExceptionKind::InvalidOpcode => {
             thread.raise_signal(Signal::Aborted);
+            true
         }
         ExceptionKind::Debug => {
             // TODO check if the thread was really in single-step mode or has debugging related to
             // the address in exception description
             thread.handle_single_step(frame);
+            false
         }
         _ => {
             todo!()
         }
+    };
+
+    if dump {
+        dump_user_exception(kind, frame);
     }
 }
 
diff --git a/kernel/src/arch/x86_64/exception.rs b/kernel/src/arch/x86_64/exception.rs
index f964a5cd..3f5c55fc 100644
--- a/kernel/src/arch/x86_64/exception.rs
+++ b/kernel/src/arch/x86_64/exception.rs
@@ -113,45 +113,54 @@ impl Entry {
 
 static mut IDT: [Entry; SIZE] = [Entry::NULL; SIZE];
 
+fn dump_user_exception(kind: ExceptionKind, frame: &ExceptionFrame) {
+    let thread = Thread::current();
+    warnln!("{:?} in {} ({:?})", kind, thread.id, thread.name);
+    warnln!("ip = {:02x}:{:08x}", frame.cs, frame.rip);
+    warnln!("cr3 = {:#010x}", CR3.get());
+    if kind == ExceptionKind::PageFault {
+        warnln!("cr2 = {:#010x}", CR2.get());
+    }
+}
+
 fn user_exception_inner(kind: ExceptionKind, frame: &mut ExceptionFrame) {
     let thread = Thread::current();
-    let cr3 = CR3.get();
 
-    if kind != ExceptionKind::Debug {
-        warnln!("{:?} in {} {:?}", kind, thread.id, thread.name);
-        // XXX
-        // frame.dump(debug::LogLevel::Warning);
-        warnln!("CR3 = {:#x}", cr3);
-    }
-
-    log::warn!("{:#x?}", frame);
-
-    match kind {
+    let dump = match kind {
         ExceptionKind::Debug => {
             // TODO check if the thread was really in single-step mode or has debugging related to
             // the address in exception description
             thread.handle_single_step(frame);
+            false
         }
         ExceptionKind::PageFault => {
-            let cr2 = CR2.get();
-
-            warnln!("CR2 = {:#x}", cr2);
-
             thread.raise_signal(Signal::MemoryAccessViolation);
+            true
         }
         ExceptionKind::GeneralProtectionFault => {
-            thread.raise_signal(Signal::MemoryAccessViolation);
+            if thread.handle_breakpoint(frame) {
+                false
+            } else {
+                thread.raise_signal(Signal::MemoryAccessViolation);
+                true
+            }
         }
         ExceptionKind::FpuException => {
             todo!()
         }
         ExceptionKind::InvalidOpcode => {
+            // TODO handle ud2 as breakpoint? (it's 2 bytes)
             thread.raise_signal(Signal::Aborted);
+            true
         }
         ExceptionKind::Breakpoint => {
             todo!()
         }
         _ => todo!("No handler for exception: {:?}", kind),
+    };
+
+    if dump {
+        dump_user_exception(kind, frame);
     }
 }
 
diff --git a/kernel/src/init.rs b/kernel/src/init.rs
index 329ce38c..70ffa5e3 100644
--- a/kernel/src/init.rs
+++ b/kernel/src/init.rs
@@ -6,7 +6,7 @@ use kernel_fs::devfs;
 use libk::{
     module::load_kernel_symbol_table,
     random,
-    task::{process::Process, runtime, thread::Thread},
+    task::{binary::LoadOptions, process::Process, runtime, thread::Thread},
     vfs::{impls::fn_symlink, IoContext, NodeRef},
 };
 use memfs::MemoryFilesystem;
@@ -75,8 +75,15 @@ pub fn kinit() -> Result<(), Error> {
 
     {
         let group_id = Process::create_group();
-        let (user_init, user_init_main) =
-            proc::load_binary(&mut ioctx, group_id, None, "/init", &["/init", "xxx"], &[])?;
+        let options = LoadOptions {
+            group_id,
+            parent: None,
+            path: "/init",
+            args: &["/init", "xxx"],
+            envs: &[],
+            single_step: false,
+        };
+        let (user_init, user_init_main) = proc::load_binary(&mut ioctx, &options)?;
 
         let mut io = user_init.io.lock();
         io.set_ioctx(ioctx);
diff --git a/kernel/src/proc/mod.rs b/kernel/src/proc/mod.rs
index e5068ae7..763c1bf2 100644
--- a/kernel/src/proc/mod.rs
+++ b/kernel/src/proc/mod.rs
@@ -3,7 +3,7 @@
 use abi::{error::Error, path::Path, process::ProcessGroupId};
 use alloc::sync::{Arc, Weak};
 use libk::{
-    task::{process::Process, thread::Thread},
+    task::{binary::LoadOptions, process::Process, thread::Thread},
     vfs::IoContext,
 };
 
@@ -11,11 +11,7 @@ use libk::{
 #[inline]
 pub fn load_binary<P: AsRef<Path>>(
     ioctx: &mut IoContext,
-    group_id: ProcessGroupId,
-    parent: Option<Weak<Process>>,
-    path: P,
-    args: &[&str],
-    envs: &[&str],
+    options: &LoadOptions<P>,
 ) -> Result<(Arc<Process>, Arc<Thread>), Error> {
-    libk::task::binary::load(ioctx, group_id, parent, path, args, envs)
+    libk::task::binary::load(ioctx, options)
 }
diff --git a/kernel/src/syscall/imp/sys_debug.rs b/kernel/src/syscall/imp/sys_debug.rs
index e0c034e2..ce7a6c9e 100644
--- a/kernel/src/syscall/imp/sys_debug.rs
+++ b/kernel/src/syscall/imp/sys_debug.rs
@@ -16,16 +16,21 @@ pub(crate) fn debug_trace(message: &str) {
     );
 }
 
-pub(crate) fn debug_control(pid: ProcessId, op: &DebugOperation) -> Result<(), Error> {
+pub(crate) fn debug_control(pid: ProcessId, op: &mut DebugOperation) -> Result<(), Error> {
     let target = Process::get(pid).ok_or(Error::DoesNotExist)?;
     let target_thread = target.as_single_thread().unwrap();
 
     match op {
-        &DebugOperation::Continue(single_step) => {
+        &mut DebugOperation::Continue(single_step) => {
             // TODO check if it's paused currently
             target_thread.resume(single_step);
             Ok(())
         }
+        &mut DebugOperation::SetBreakpoint(address) => target_thread.set_breakpoint(address),
+        DebugOperation::ReadMemory { address, buffer } => {
+            target_thread.read_memory(*address, buffer)
+        }
+        DebugOperation::WriteMemory { address, buffer } => todo!(),
         _ => todo!(),
     }
 }
diff --git a/kernel/src/syscall/imp/sys_process.rs b/kernel/src/syscall/imp/sys_process.rs
index 2d81bdbd..a49fa9c6 100644
--- a/kernel/src/syscall/imp/sys_process.rs
+++ b/kernel/src/syscall/imp/sys_process.rs
@@ -12,7 +12,10 @@ use abi::{
 use alloc::sync::Arc;
 use libk::{
     block,
-    task::{debug::ThreadDebugger, process::Process, runtime, thread::Thread, ThreadId},
+    task::{
+        binary::LoadOptions, debug::ThreadDebugger, process::Process, runtime, thread::Thread,
+        ThreadId,
+    },
     vfs::IoContext,
 };
 use libk_mm::{
@@ -89,7 +92,6 @@ pub(crate) fn spawn_process(options: &SpawnOptions<'_>) -> Result<ProcessId, Err
     let process = thread.process();
 
     run_with_io(&process, |mut io| {
-        // let mut attach_debugger = None;
         let attach_debugger = options
             .optional
             .iter()
@@ -105,14 +107,15 @@ pub(crate) fn spawn_process(options: &SpawnOptions<'_>) -> Result<ProcessId, Err
             })?;
 
         // Setup a new process from the file
-        let (child_process, child_main) = proc::load_binary(
-            io.ioctx_mut(),
-            process.group_id(),
-            Some(Arc::downgrade(&process)),
-            options.program,
-            options.arguments,
-            options.environment,
-        )?;
+        let load_options = LoadOptions {
+            group_id: process.group_id(),
+            parent: Some(Arc::downgrade(&process)),
+            path: options.program,
+            args: options.arguments,
+            envs: options.arguments,
+            single_step: attach_debugger.is_some(),
+        };
+        let (child_process, child_main) = proc::load_binary(io.ioctx_mut(), &load_options)?;
         let pid = child_process.id;
 
         // Inherit group and session from the creator
@@ -166,8 +169,9 @@ pub(crate) fn spawn_process(options: &SpawnOptions<'_>) -> Result<ProcessId, Err
 
         if let Some(debugger) = attach_debugger {
             child_main.attach_debugger(ThreadDebugger::new(debugger));
+        } else {
+            child_main.enqueue();
         }
-        child_main.enqueue();
 
         Ok(pid as _)
     })
diff --git a/lib/abi/def/yggdrasil.abi b/lib/abi/def/yggdrasil.abi
index e9576186..b032ac7f 100644
--- a/lib/abi/def/yggdrasil.abi
+++ b/lib/abi/def/yggdrasil.abi
@@ -138,4 +138,4 @@ syscall execve(options: &ExecveOptions<'_>) -> Result<()>;
 
 // Debugging
 syscall debug_trace(message: &str);
-syscall debug_control(pid: ProcessId, op: &DebugOperation<'_>) -> Result<()>;
+syscall debug_control(pid: ProcessId, op: &mut DebugOperation<'_>) -> Result<()>;
diff --git a/lib/abi/src/arch/aarch64.rs b/lib/abi/src/arch/aarch64.rs
index a57cdca0..134e75d2 100644
--- a/lib/abi/src/arch/aarch64.rs
+++ b/lib/abi/src/arch/aarch64.rs
@@ -1,5 +1,7 @@
 #![allow(missing_docs)]
 
+use super::FrameOps;
+
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, Default)]
 #[repr(C)]
@@ -10,3 +12,13 @@ pub struct SavedFrame {
     pub sp_el0: u64,
     pub mdscr_el1: u64,
 }
+
+impl FrameOps for SavedFrame {
+    fn set_user_ip(&mut self, value: usize) {
+        self.elr_el1 = value as _;
+    }
+
+    fn user_ip(&self) -> usize {
+        self.elr_el1 as _
+    }
+}
diff --git a/lib/abi/src/arch/i686.rs b/lib/abi/src/arch/i686.rs
index 4a5b8158..1857a20e 100644
--- a/lib/abi/src/arch/i686.rs
+++ b/lib/abi/src/arch/i686.rs
@@ -1,5 +1,7 @@
 #![allow(missing_docs)]
 
+use super::FrameOps;
+
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, Default)]
 #[repr(C)]
@@ -16,3 +18,13 @@ pub struct SavedFrame {
     pub user_sp: u32,
     pub eflags: u32,
 }
+
+impl FrameOps for SavedFrame {
+    fn set_user_ip(&mut self, value: usize) {
+        self.user_ip = value as _;
+    }
+
+    fn user_ip(&self) -> usize {
+        self.user_ip as _
+    }
+}
diff --git a/lib/abi/src/arch/mod.rs b/lib/abi/src/arch/mod.rs
index 88c30fb6..9cdefe20 100644
--- a/lib/abi/src/arch/mod.rs
+++ b/lib/abi/src/arch/mod.rs
@@ -16,3 +16,8 @@ pub(crate) mod i686;
 use i686 as arch_impl;
 
 pub use arch_impl::SavedFrame;
+
+pub trait FrameOps {
+    fn set_user_ip(&mut self, value: usize);
+    fn user_ip(&self) -> usize;
+}
diff --git a/lib/abi/src/arch/x86_64.rs b/lib/abi/src/arch/x86_64.rs
index 21750dc9..0297f738 100644
--- a/lib/abi/src/arch/x86_64.rs
+++ b/lib/abi/src/arch/x86_64.rs
@@ -1,5 +1,7 @@
 #![allow(missing_docs)]
 
+use super::FrameOps;
+
 #[cfg_attr(feature = "serde", derive(serde::Serialize, serde::Deserialize))]
 #[derive(Clone, Debug, Default)]
 #[repr(C)]
@@ -25,3 +27,13 @@ pub struct SavedFrame {
     pub user_sp: u64,
     pub rflags: u64,
 }
+
+impl FrameOps for SavedFrame {
+    fn set_user_ip(&mut self, value: usize) {
+        self.user_ip = value as _;
+    }
+
+    fn user_ip(&self) -> usize {
+        self.user_ip as _
+    }
+}
diff --git a/lib/abi/src/debug.rs b/lib/abi/src/debug.rs
index 87ae9cd8..75a3739a 100644
--- a/lib/abi/src/debug.rs
+++ b/lib/abi/src/debug.rs
@@ -7,6 +7,7 @@ pub enum DebugOperation<'a> {
 
     Interrupt,
     Continue(bool),
+    SetBreakpoint(usize),
 
     ReadMemory {
         address: usize,
@@ -25,11 +26,14 @@ pub enum DebugFrame {
     Startup {
         image_base: usize,
         ip_offset: usize,
-        frame: SavedFrame,
+        ip: usize,
     },
     Step {
         frame: SavedFrame,
     },
+    HitBreakpoint {
+        frame: SavedFrame,
+    },
     // TODO exit status
     Exited,
 }
diff --git a/userspace/Cargo.lock b/userspace/Cargo.lock
index 86fb14d1..9b1fad04 100644
--- a/userspace/Cargo.lock
+++ b/userspace/Cargo.lock
@@ -701,6 +701,7 @@ dependencies = [
  "iced-x86",
  "libterm",
  "rangemap",
+ "rustc-demangle",
  "serde",
  "serde_json",
  "thiserror",
@@ -729,6 +730,12 @@ dependencies = [
  "bitflags 1.3.2",
 ]
 
+[[package]]
+name = "rustc-demangle"
+version = "0.1.24"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "719b953e2095829ee67db738b3bfa9fa368c94900df327b3f07fe6e794d2fe1f"
+
 [[package]]
 name = "rustix"
 version = "0.38.34"
diff --git a/userspace/rdb/Cargo.toml b/userspace/rdb/Cargo.toml
index 77f99e2d..d00203a8 100644
--- a/userspace/rdb/Cargo.toml
+++ b/userspace/rdb/Cargo.toml
@@ -14,6 +14,7 @@ serde = { version = "1.0.193", features = ["derive"], default-features = false }
 thiserror = "1.0.58"
 elf = "0.7.4"
 rangemap = "1.5.1"
+rustc-demangle = "0.1.24"
 
 [target.'cfg(any(target_arch = "x86_64", target_arch = "x86"))'.dependencies]
 iced-x86 = { version = "1.21.0", default-features = false, features = ["gas", "decoder", "std"] }
diff --git a/userspace/rdb/src/aarch64.rs b/userspace/rdb/src/aarch64.rs
index 2eadbe0c..ee239744 100644
--- a/userspace/rdb/src/aarch64.rs
+++ b/userspace/rdb/src/aarch64.rs
@@ -2,6 +2,8 @@ use std::fmt::{self, Display};
 
 use yggdrasil_abi::arch::SavedFrame;
 
+use crate::debugger::SymbolResolver;
+
 use super::{Error, InstructionFormatter, Target};
 
 pub struct TargetImpl;
@@ -38,7 +40,7 @@ impl Target for TargetImpl {
     ) -> Result<Vec<(usize, Self::Instruction)>, Error> {
         Ok(vec![])
     }
-    fn new_instruction_formatter() -> Self::InstructionFormatter {
+    fn new_instruction_formatter(_resolver: SymbolResolver) -> Self::InstructionFormatter {
         Unsupported
     }
 
@@ -46,7 +48,7 @@ impl Target for TargetImpl {
         FlagFormat(frame.spsr_el1)
     }
     fn register_list(frame: &SavedFrame, out: &mut Vec<(String, Self::Register)>) {
-        for i in 0..30 {
+        for i in 0..10 {
             out.push((format!("x{}", i), frame.gp_regs[i]));
         }
         out.push(("pc".into(), frame.elr_el1 as _));
diff --git a/userspace/rdb/src/debugger.rs b/userspace/rdb/src/debugger.rs
index 5c281cfe..c86ba2f8 100644
--- a/userspace/rdb/src/debugger.rs
+++ b/userspace/rdb/src/debugger.rs
@@ -1,4 +1,6 @@
 use std::{
+    cell::{RefCell, RefMut},
+    collections::HashMap,
     fmt::Write,
     fs::File,
     io::{BufReader, Read, Seek, SeekFrom},
@@ -8,9 +10,10 @@ use std::{
     },
     path::Path,
     process::{Child, Command},
+    sync::Arc,
 };
 
-use elf::{endian::AnyEndian, segment::ProgramHeader, ElfStream};
+use elf::{endian::AnyEndian, segment::ProgramHeader, symbol::Symbol, ElfStream};
 use libterm::{Color, Term, TermKey};
 use rangemap::RangeMap;
 use yggdrasil_rt::{debug::DebugFrame, process::ProcessId};
@@ -19,12 +22,28 @@ use crate::state::State;
 use crate::InstructionFormatter;
 use crate::{comm::Comm, Error, Target};
 
+pub struct SymbolResolver {
+    image: Arc<ImageInfo>,
+    ip_offset: usize,
+    ip: u64,
+}
+
+pub struct ImageInfo {
+    symbol_table: HashMap<String, Symbol>,
+    segment_headers: RangeMap<usize, ProgramHeader>,
+    segments: RefCell<RangeMap<usize, Vec<u8>>>,
+    functions: RangeMap<u64, String>,
+}
+
 pub struct Debugger<T: Target> {
     comm: Comm,
     term: Term,
     file: BufReader<File>,
-    segment_headers: RangeMap<usize, ProgramHeader>,
-    segments: RangeMap<usize, Vec<u8>>,
+
+    image: Arc<ImageInfo>,
+
+    command: Option<String>,
+    status: Option<String>,
 
     term_fd: RawFd,
     comm_fd: RawFd,
@@ -40,7 +59,7 @@ impl<T: Target> Debugger<T> {
     pub fn from_command<P: AsRef<Path>>(image: P, mut command: Command) -> Result<Self, Error> {
         let image = image.as_ref();
 
-        let segment_headers = extract_segments(image)?;
+        let image_info = read_image(image)?;
 
         let file = BufReader::new(File::open(image)?);
         let comm = Comm::open("rdb-1")?;
@@ -64,8 +83,10 @@ impl<T: Target> Debugger<T> {
             comm,
             term,
 
-            segment_headers,
-            segments: RangeMap::new(),
+            image: image_info.into(),
+
+            command: None,
+            status: None,
 
             term_fd,
             comm_fd,
@@ -79,11 +100,20 @@ impl<T: Target> Debugger<T> {
     }
 
     fn handle_frame(&mut self, frame: DebugFrame) -> Result<(), Error> {
+        self.status = None;
+
         match frame {
-            DebugFrame::Startup { image_base, frame, ip_offset } => {
+            DebugFrame::Startup {
+                image_base,
+                ip_offset,
+                ip,
+            } => {
                 let pid = unsafe { ProcessId::from_raw(self.child.id()) };
                 let mut state = State::new(image_base, ip_offset, pid);
-                state.update(&frame, true)?;
+                if ip >= ip_offset {
+                    state.update_ip(ip - ip_offset);
+                }
+                self.status = Some(format!("Attached to #{} @ {:#x}", pid, state.current_ip));
                 self.state = Some(state);
                 Ok(())
             }
@@ -92,6 +122,12 @@ impl<T: Target> Debugger<T> {
                 state.update(&frame, true)?;
                 Ok(())
             }
+            DebugFrame::HitBreakpoint { frame } => {
+                let state = self.state.as_mut().unwrap();
+                state.update(&frame, true)?;
+                self.status = Some(format!("Hit breakpoint @ {:#x}", state.current_ip));
+                Ok(())
+            }
             DebugFrame::Exited => {
                 self.child_exited = true;
                 Ok(())
@@ -100,22 +136,99 @@ impl<T: Target> Debugger<T> {
     }
 
     fn handle_key(&mut self, key: TermKey) -> Result<(), Error> {
-        match key {
-            TermKey::Char('q') => {
-                // TODO send exit to the child
-                // self.child.kill()?;
-                todo!();
+        if let Some(command) = self.command.as_mut() {
+            match key {
+                TermKey::Char('\x7F') => {
+                    if !command.is_empty() {
+                        command.pop();
+                    }
+                    Ok(())
+                }
+                TermKey::Char('\n') => {
+                    let command = command.clone();
+                    self.command = None;
+                    self.run_command(&command)?;
+                    Ok(())
+                }
+                TermKey::Char(ch) if ch.is_ascii_graphic() || ch == ' ' => {
+                    command.push(ch);
+                    Ok(())
+                }
+                _ => Ok(()),
             }
-            TermKey::Char('s') => {
-                // Send resume to the debugee
-                if let Some(state) = self.state.as_mut() {
-                    state.resume(true)?;
+        } else {
+            match key {
+                TermKey::Char(':') => {
+                    self.command = Some(String::new());
+                    Ok(())
+                }
+                TermKey::Char('q') => {
+                    // TODO send exit to the child
+                    // self.child.kill()?;
+                    todo!();
+                }
+                TermKey::Char('s') => {
+                    // Send resume to the debugee
+                    if let Some(state) = self.state.as_mut() {
+                        state.resume(true)?;
+                    }
+                    Ok(())
+                }
+                TermKey::Char('c') => {
+                    if let Some(state) = self.state.as_mut() {
+                        state.resume(false)?;
+                    }
+                    Ok(())
+                }
+                _ => Ok(()),
+            }
+        }
+    }
+
+    fn run_command(&mut self, command: &str) -> Result<(), Error> {
+        let words: Vec<&str> = command.split(' ').collect();
+
+        if words.is_empty() {
+            return Ok(());
+        }
+        let Some(state) = self.state.as_mut() else {
+            return Ok(());
+        };
+
+        match words[0] {
+            "break" | "b" if words.len() == 2 => {
+                let address = Self::parse_location(&self.image, state.ip_offset, words[1])?;
+                if let Err(error) = state.set_breakpoint(address) {
+                    self.status = Some(format!(
+                        "Couldn't set breakpoint @ {:#x}: {:?}",
+                        address, error
+                    ));
                 }
                 Ok(())
             }
-            TermKey::Char('c') => {
-                if let Some(state) = self.state.as_mut() {
-                    state.resume(false)?;
+            "read" | "r" if words.len() == 3 => {
+                let ty = match words[1] {
+                    "b" => 1,
+                    "w" => 2,
+                    "d" => 4,
+                    "q" => 8,
+                    _ => return Ok(()),
+                };
+                let address = convert_address(words[2])?;
+
+                let mut buf = [0; 8];
+                if let Ok(_) = state.read_memory(address, &mut buf[..ty]) {
+                    let value = match ty {
+                        1 => buf[0] as u64,
+                        2 => u16::from_ne_bytes([buf[0], buf[1]]) as u64,
+                        4 => u32::from_ne_bytes([buf[0], buf[1], buf[2], buf[3]]) as u64,
+                        8 => u64::from_ne_bytes(buf),
+                        _ => unreachable!(),
+                    };
+
+                    self.status = Some(format!("*{:#x} -> {:#x}", address, value));
+                } else {
+                    self.status = Some(format!("Could not read memory at {:#x}", address));
                 }
                 Ok(())
             }
@@ -128,10 +241,12 @@ impl<T: Target> Debugger<T> {
             return Ok(vec![]);
         };
 
+        let mut segments = self.image.segments.borrow_mut();
+
         // Find segment
-        let (range, segment) = match self.segments.get_key_value(&state.current_ip) {
+        let (range, segment) = match segments.get_key_value(&state.current_ip) {
             Some(seg) => seg,
-            None if let Some(header) = self.segment_headers.get(&state.current_ip) => {
+            None if let Some(header) = self.image.segment_headers.get(&state.current_ip) => {
                 let start = header.p_vaddr as usize;
                 let end = (header.p_vaddr + header.p_memsz) as usize;
                 let mut buffer = vec![0; header.p_memsz as usize];
@@ -139,18 +254,19 @@ impl<T: Target> Debugger<T> {
                 self.file.seek(SeekFrom::Start(header.p_offset))?;
                 self.file.read_exact(&mut buffer)?;
 
-                self.segments.insert(start..end, buffer);
+                segments.insert(start..end, buffer);
 
-                self.segments.get_key_value(&state.current_ip).unwrap()
+                segments.get_key_value(&state.current_ip).unwrap()
             }
             // Outside of any segments
             None => return Ok(vec![]),
         };
 
         let offset_within_segment = state.current_ip - range.start;
+        let upper_limit = std::cmp::min(segment.len(), offset_within_segment + amount * 8);
 
         T::disassemble(
-            &segment[offset_within_segment..offset_within_segment + amount * 8],
+            &segment[offset_within_segment..upper_limit],
             state.current_ip + state.ip_offset,
             amount,
         )
@@ -163,7 +279,7 @@ impl<T: Target> Debugger<T> {
             return Ok(0);
         };
 
-        let columns = (width + REG_WIDTH - 3) / REG_WIDTH;
+        let columns = core::cmp::max((width - 2) / REG_WIDTH, 1);
         let mut gpregs = vec![];
         T::register_list(&state.last_frame, &mut gpregs);
         let rows = 1 + (gpregs.len() + columns - 1) / columns;
@@ -213,22 +329,31 @@ impl<T: Target> Debugger<T> {
     }
 
     fn redraw(&mut self) -> Result<(), Error> {
+        let resolver = self.symbol_resolver();
         let (width, height) = self.term.size()?;
         self.term.clear(libterm::Clear::All)?;
 
         // Show register block
         let regs_rows = self.print_registers(width)?;
 
-        let disassembly = self.disassembly(height - regs_rows - 1)?;
+        if let Some((symbol, offset)) = resolver
+            .as_ref()
+            .and_then(SymbolResolver::resolve_current_function)
+        {
+            self.term.set_cursor_position(regs_rows, 1).ok();
+            write!(self.term, "<{}> + {}", symbol, offset).ok();
+        }
+
+        let disassembly = self.disassembly(height - regs_rows - 2)?;
 
         if !disassembly.is_empty() {
-            let mut formatter = T::new_instruction_formatter();
+            let mut formatter = T::new_instruction_formatter(resolver.unwrap());
             let mut buffer = String::new();
 
             // Show disassembly block
             for (i, (ip, insn)) in disassembly.into_iter().enumerate() {
                 let is_current = i == 0;
-                self.term.set_cursor_position(i + regs_rows, 0)?;
+                self.term.set_cursor_position(i + regs_rows + 1, 0)?;
                 buffer.clear();
                 formatter.format_instruction(&insn, &mut buffer);
                 if is_current {
@@ -256,7 +381,13 @@ impl<T: Target> Debugger<T> {
             write!(self.term, " Waiting for inferior process").ok();
         }
 
+        // TODO deconflict status and command
         self.term.set_cursor_position(height - 1, 0)?;
+        if let Some(command) = self.command.as_ref() {
+            write!(self.term, ":{}", command).ok();
+        } else if let Some(status) = self.status.as_ref() {
+            write!(self.term, ">> {}", status).ok();
+        }
         self.term.reset_style()?;
         self.term.flush()?;
 
@@ -287,13 +418,75 @@ impl<T: Target> Debugger<T> {
         println!("Program exited with status {:?}", status);
         Ok(())
     }
+
+    pub fn symbol_resolver(&self) -> Option<SymbolResolver> {
+        let state = self.state.as_ref()?;
+        Some(SymbolResolver {
+            image: self.image.clone(),
+            ip_offset: state.ip_offset,
+            ip: state.current_ip as _,
+        })
+    }
+
+    fn parse_location(image: &ImageInfo, ip_offset: usize, string: &str) -> Result<u64, Error> {
+        // TODO validate that the breakpoint is within .text segment (or maybe delegate this
+        //      validation to the kernel?)
+        if let Some(sym) = image.symbol_table.get(string) {
+            Ok(sym.st_value + ip_offset as u64)
+        } else {
+            convert_address(string)
+        }
+    }
 }
 
-fn extract_segments<P: AsRef<Path>>(path: P) -> Result<RangeMap<usize, ProgramHeader>, Error> {
+impl SymbolResolver {
+    fn resolve_symbol_inner(image: &ImageInfo, ip: u64) -> Option<(&str, usize)> {
+        if let Some((range, function)) = image.functions.get_key_value(&ip) {
+            assert!(range.start <= ip);
+            return Some((function.as_str(), (ip - range.start).try_into().unwrap()));
+        }
+
+        None
+    }
+
+    pub fn resolve_symbol(&self, ip: u64) -> Option<(&str, usize)> {
+        Self::resolve_symbol_inner(&self.image, ip)
+    }
+
+    pub fn resolve_current_function(&self) -> Option<(&str, usize)> {
+        Self::resolve_symbol_inner(&self.image, self.ip)
+    }
+
+    pub fn to_image_address(&self, address: u64) -> Option<u64> {
+        address.checked_sub(self.ip_offset as _)
+    }
+}
+
+fn read_image<P: AsRef<Path>>(path: P) -> Result<ImageInfo, Error> {
     let file = BufReader::new(File::open(path)?);
-    let elf = ElfStream::<AnyEndian, _>::open_stream(file).unwrap();
+    let mut elf = ElfStream::<AnyEndian, _>::open_stream(file).unwrap();
 
     let mut ranges = RangeMap::new();
+    let mut symbols = HashMap::new();
+    let mut functions = RangeMap::new();
+
+    let (symtab, strtab) = elf.symbol_table().unwrap().unwrap();
+
+    #[cfg(any(not(target_arch = "aarch64"), rust_analyzer))]
+    for sym in symtab {
+        let raw_name = strtab.get(sym.st_name as _).unwrap();
+        let demangled_name = rustc_demangle::demangle(raw_name).to_string();
+
+        if sym.st_symtype() == elf::abi::STT_FUNC && sym.st_size != 0 {
+            functions.insert(
+                sym.st_value..sym.st_value + sym.st_size,
+                demangled_name.clone(),
+            );
+        }
+
+        symbols.insert(demangled_name, sym);
+    }
+
     for seg in elf.segments() {
         if seg.p_type != elf::abi::PT_LOAD {
             continue;
@@ -305,5 +498,17 @@ fn extract_segments<P: AsRef<Path>>(path: P) -> Result<RangeMap<usize, ProgramHe
         ranges.insert(start..end, *seg);
     }
 
-    Ok(ranges)
+    Ok(ImageInfo {
+        functions,
+        symbol_table: symbols,
+        segment_headers: ranges,
+        segments: RangeMap::new().into(),
+    })
+}
+
+fn convert_address(s: &str) -> Result<u64, Error> {
+    if let Some(v) = s.strip_prefix("0x") {
+        return u64::from_str_radix(v, 16).map_err(|_| Error::InvalidAddress(s.into()));
+    }
+    todo!()
 }
diff --git a/userspace/rdb/src/main.rs b/userspace/rdb/src/main.rs
index abb98d97..2cb390e7 100644
--- a/userspace/rdb/src/main.rs
+++ b/userspace/rdb/src/main.rs
@@ -2,7 +2,7 @@
 use std::{fmt::{LowerHex, Display}, io, path::PathBuf, process::Command};
 
 use clap::Parser;
-use debugger::Debugger;
+use debugger::{Debugger, SymbolResolver};
 use imp::TargetImpl;
 use yggdrasil_abi::arch::SavedFrame;
 
@@ -26,6 +26,8 @@ pub enum Error {
     TermError(#[from] libterm::Error),
     #[error("Debug control error: {0:?}")]
     DebugError(yggdrasil_rt::Error),
+    #[error("Invalid address: {0:?}")]
+    InvalidAddress(String)
 }
 
 pub trait Target {
@@ -38,7 +40,7 @@ pub trait Target {
         ip: usize,
         limit: usize,
     ) -> Result<Vec<(usize, Self::Instruction)>, Error>;
-    fn new_instruction_formatter() -> Self::InstructionFormatter;
+    fn new_instruction_formatter(resolver: SymbolResolver) -> Self::InstructionFormatter;
 
     fn register_list(frame: &SavedFrame, out: &mut Vec<(String, Self::Register)>);
     fn flags_register_as_display(frame: &SavedFrame) -> impl Display;
diff --git a/userspace/rdb/src/state.rs b/userspace/rdb/src/state.rs
index 94a5df90..87686805 100644
--- a/userspace/rdb/src/state.rs
+++ b/userspace/rdb/src/state.rs
@@ -33,15 +33,35 @@ impl<T: Target> State<T> {
 
     pub fn resume(&mut self, step: bool) -> Result<(), Error> {
         unsafe {
-            yggdrasil_rt::sys::debug_control(self.pid, &DebugOperation::Continue(step))
+            yggdrasil_rt::sys::debug_control(self.pid, &mut DebugOperation::Continue(step))
                 .map_err(Error::DebugError)
         }
     }
 
+    pub fn set_breakpoint(&mut self, address: u64) -> Result<(), Error> {
+        unsafe {
+            yggdrasil_rt::sys::debug_control(self.pid, &mut DebugOperation::SetBreakpoint(address.try_into().unwrap())).map_err(Error::DebugError)
+        }
+    }
+
+    pub fn read_memory(&mut self, address: u64, buffer: &mut [u8]) -> Result<(), Error> {
+        let mut op = DebugOperation::ReadMemory { address: address.try_into().unwrap(), buffer };
+        unsafe {
+            yggdrasil_rt::sys::debug_control(
+                self.pid,
+                &mut op
+            ).map_err(Error::DebugError)
+        }
+    }
+
+    pub fn update_ip(&mut self, ip: usize) {
+        self.current_ip = ip;
+    }
+
     pub fn update(&mut self, frame: &SavedFrame, _refresh: bool) -> Result<(), Error> {
         let ip = T::real_ip(frame) - self.ip_offset;
+        self.update_ip(ip);
         self.last_frame = frame.clone();
-        self.current_ip = ip;
 
         Ok(())
     }
diff --git a/userspace/rdb/src/x86.rs b/userspace/rdb/src/x86.rs
index 2bb9646e..3b9521c5 100644
--- a/userspace/rdb/src/x86.rs
+++ b/userspace/rdb/src/x86.rs
@@ -3,7 +3,7 @@ use std::fmt::{self, Display};
 use iced_x86::{Decoder, DecoderOptions, Formatter, GasFormatter, Instruction};
 use yggdrasil_abi::arch::SavedFrame;
 
-use crate::{InstructionFormatter, Target};
+use crate::{debugger::SymbolResolver, InstructionFormatter, Target};
 
 #[cfg(any(target_pointer_width = "32", rust_analyzer))]
 const BITNESS: u32 = 32;
@@ -44,6 +44,28 @@ impl InstructionFormatter<Instruction> for GasFormatter {
     }
 }
 
+impl iced_x86::SymbolResolver for SymbolResolver {
+    fn symbol(
+        &mut self,
+        _instruction: &Instruction,
+        _operand: u32,
+        _instruction_operand: Option<u32>,
+        address: u64,
+        _address_size: u32,
+    ) -> Option<iced_x86::SymbolResult<'_>> {
+        let image_addr = self.to_image_address(address)?;
+        let (symbol, offset) = self.resolve_symbol(image_addr)?;
+
+        let string = if offset == 0 {
+            format!("{:#x} <{}>", address, symbol)
+        } else {
+            format!("{:#x} <{}+{:#x}>", address, symbol, offset)
+        };
+
+        Some(iced_x86::SymbolResult::with_string(address, string))
+    }
+}
+
 impl Target for TargetImpl {
     type Instruction = Instruction;
     type InstructionFormatter = GasFormatter;
@@ -62,15 +84,14 @@ impl Target for TargetImpl {
             instructions.push((insn.ip() as usize, insn));
         }
 
-        debug_trace!("{}", instructions.len());
         Ok(instructions)
     }
 
-    fn new_instruction_formatter() -> Self::InstructionFormatter {
-         let mut formatter = GasFormatter::new();
-         formatter.options_mut().set_uppercase_hex(false);
-         formatter.options_mut().set_branch_leading_zeros(false);
-         formatter
+    fn new_instruction_formatter(resolver: SymbolResolver) -> Self::InstructionFormatter {
+        let mut formatter = GasFormatter::with_options(Some(Box::new(resolver)), None);
+        formatter.options_mut().set_uppercase_hex(false);
+        formatter.options_mut().set_branch_leading_zeros(false);
+        formatter
     }
 
     fn register_list(frame: &SavedFrame, out: &mut Vec<(String, Self::Register)>) {
diff --git a/userspace/sysutils/Cargo.toml b/userspace/sysutils/Cargo.toml
index be0bdd1f..8bf83110 100644
--- a/userspace/sysutils/Cargo.toml
+++ b/userspace/sysutils/Cargo.toml
@@ -94,3 +94,7 @@ path = "src/chmod.rs"
 [[bin]]
 name = "sysmon"
 path = "src/sysmon.rs"
+
+[[bin]]
+name = "tst"
+path = "src/tst.rs"
diff --git a/userspace/sysutils/src/tst.rs b/userspace/sysutils/src/tst.rs
new file mode 100644
index 00000000..3b90fc03
--- /dev/null
+++ b/userspace/sysutils/src/tst.rs
@@ -0,0 +1,35 @@
+use std::time::Duration;
+
+fn f(v: f64, s : bool) {
+    let mut c = 0;
+    loop {
+        let mut x = core::hint::black_box(1.0);
+        let y = core::hint::black_box(2.0);
+        for _ in 0..10 {
+            x *= y;
+        }
+        let z = core::hint::black_box(v);
+        let v = core::hint::black_box(core::hint::black_box(x * y) + z);
+        if s {
+            if v != 2050.000 {
+            println!("{:.3} ", v);
+            }
+        }
+        // if c == 100 {
+        //     println!();
+        //     c = 0;
+        // }
+    }
+}
+
+fn main() {
+    std::thread::spawn(|| f(1.5, false));
+    std::thread::spawn(|| f(3.5, false));
+    std::thread::spawn(|| f(0.75, false));
+    std::thread::spawn(|| f(0.75, false));
+    std::thread::spawn(|| f(0.75, false));
+    std::thread::spawn(|| f(0.75, false));
+    // std::thread::spawn(f);
+
+    f(2.0, true)
+}
diff --git a/xtask/src/build/userspace.rs b/xtask/src/build/userspace.rs
index 6e07fc47..0e47f5ad 100644
--- a/xtask/src/build/userspace.rs
+++ b/xtask/src/build/userspace.rs
@@ -34,6 +34,7 @@ const PROGRAMS: &[(&str, &str)] = &[
     ("chmod", "bin/chmod"),
     //    ("sha256sum", "bin/sha256sum"),
     ("sysmon", "bin/sysmon"),
+    ("tst", "bin/tst"),
     // netutils
     ("netconf", "sbin/netconf"),
     ("dhcp-client", "sbin/dhcp-client"),