use std::{collections::HashMap, fmt}; use crate::{ compile::CompileOptions, error::{EvalError, MachineError, MachineErrorKind}, vm::{ env::Environment, instruction::{ConstantId, Instruction, MathInstruction}, macros::MacroExpand, module::{Module, ModuleConstant, ModuleRef}, prelude, stack::Stack, value::{BytecodeFunction, NativeFunction, TryFromValue, Value}, }, }; #[derive(Debug, Clone, PartialEq)] pub struct InstructionPointer { pub module: ModuleRef, pub address: usize, } #[derive(Debug, Clone, PartialEq)] pub struct CallFrame { pub arguments: Vec, pub return_address: Option, pub event: ExecutionEvent, pub locals: HashMap, } pub struct Machine { pub ip: Option, value_stack: Stack, pub call_stack: Stack, pub trace_instructions: bool, pub trace_calls: bool, pub trace_returns: bool, // Top-level locals locals: HashMap, } #[derive(Debug, Clone, PartialEq)] pub enum ExecutionEvent { ModuleExit(ModuleRef), BytecodeFunctionExit(BytecodeFunction), None, } impl Default for Machine { fn default() -> Self { Self { ip: None, value_stack: Stack::new(1024), call_stack: Stack::new(32), locals: HashMap::new(), trace_instructions: false, trace_calls: false, trace_returns: false, } } } impl Machine { pub fn error_at_ip(&self, kind: MachineErrorKind) -> MachineError { MachineError { ip: self.ip.clone(), error: kind, } } pub fn ip(&self) -> Option { self.ip.clone() } pub fn set_local(&mut self, index: u32, value: Value) { let locals = match self.call_stack.current_mut() { Some(frame) => &mut frame.locals, None => &mut self.locals, }; locals.insert(index, value); } fn get_local(&self, index: u32) -> Option<&Value> { let locals = match self.call_stack.current() { Some(frame) => &frame.locals, None => &self.locals, }; locals.get(&index) } fn pop(&mut self) -> Result { self.value_stack .pop() .ok_or_else(|| self.error_at_ip(MachineErrorKind::ValueStackUnderflow)) } fn push(&mut self, value: Value) -> Result<(), MachineError> { self.value_stack .push(value) .map_err(|_| self.error_at_ip(MachineErrorKind::ValueStackOverflow)) } fn enter_bytecode_function( &mut self, bytecode: BytecodeFunction, arguments: Vec, ) -> Result<(), MachineError> { let source_ip = self.ip.clone(); let BytecodeFunction { module, required_count, address, } = bytecode.clone(); if required_count > arguments.len() { return Err(self.error_at_ip(MachineErrorKind::ArgumentCountMismatch( required_count, arguments.len(), ))); } let frame = CallFrame { arguments, event: ExecutionEvent::BytecodeFunctionExit(bytecode), return_address: source_ip.map(|ip| InstructionPointer { module: ip.module, address: ip.address + 1, }), locals: HashMap::new(), }; let entry_ip = InstructionPointer { module, address }; if self.trace_calls { eprintln!("TRACE: Call bytecode function"); if let Some(source_ip) = self.ip.as_ref() { eprintln!("TRACE: From {source_ip}"); } else { eprintln!("TRACE: From "); } eprintln!("TRACE: To {entry_ip}"); } if self.call_stack.push(frame).is_err() { return Err(self.error_at_ip(MachineErrorKind::CallStackOverflow)); } self.ip = Some(entry_ip); Ok(()) } fn execute_call( &mut self, environment: &mut Environment, count: usize, ) -> Result<(), MachineError> { enum Callee { Bytecode(BytecodeFunction), Native(NativeFunction), } let callee = self.pop()?; let callee = match callee { Value::BytecodeFunction(bytecode) => Callee::Bytecode(bytecode), Value::NativeFunction(native) => Callee::Native(native), _ => return Err(self.error_at_ip(MachineErrorKind::InvalidArgument)), }; let mut arguments = vec![]; for _ in 0..count { arguments.push(self.pop()?); } match callee { Callee::Bytecode(bytecode) => { self.enter_bytecode_function(bytecode, arguments)?; } Callee::Native(native) => { let source_ip = self.ip.clone().unwrap(); let result = native.invoke(self, environment, &arguments)?; self.push(result)?; self.ip = Some(InstructionPointer { module: source_ip.module, address: source_ip.address + 1, }); } } Ok(()) } fn execute_builtin_native( &mut self, environment: &mut Environment, function: F, count: usize, ) -> Result<(), MachineError> where F: Fn(&mut Self, &mut Environment, &[Value]) -> Result, { let mut args = vec![]; for _ in 0..count { args.push(self.pop()?); } let value = function(self, environment, &args)?; self.push(value)?; Ok(()) } fn execute_return(&mut self) -> Result { let ip = self.ip.clone().unwrap(); if self.trace_returns { eprintln!("TRACE: Return"); eprintln!("TRACE: From {ip}"); ip.module.dump(Some(ip.address), 4, 0); } if let Some(frame) = self.call_stack.pop() { if self.trace_returns { if let Some(target_ip) = frame.return_address.as_ref() { eprintln!("TRACE: To {target_ip}"); } else { eprintln!("TRACE: To "); } } self.ip = frame.return_address; Ok(frame.event) } else { if self.trace_returns { eprintln!("TRACE: To "); } self.ip = None; Ok(ExecutionEvent::ModuleExit(ip.module)) } } fn execute_math( &mut self, environment: &mut Environment, math: MathInstruction, count: usize, ) -> Result<(), MachineError> { let function = match math { MathInstruction::Add => prelude::builtin_add, MathInstruction::Sub => prelude::builtin_sub, MathInstruction::Mul => prelude::builtin_mul, MathInstruction::Div => prelude::builtin_div, MathInstruction::Mod => prelude::builtin_mod, MathInstruction::And => prelude::builtin_and, MathInstruction::Or => prelude::builtin_or, MathInstruction::BitwiseAnd => prelude::builtin_bitwise_and, MathInstruction::BitwiseOr => prelude::builtin_bitwise_or, MathInstruction::BitwiseXor => prelude::builtin_bitwise_xor, MathInstruction::Gt => prelude::builtin_cmp_gt, MathInstruction::Lt => prelude::builtin_cmp_lt, MathInstruction::Eq => prelude::builtin_cmp_eq, MathInstruction::Ne => prelude::builtin_cmp_ne, MathInstruction::Ge => prelude::builtin_cmp_ge, MathInstruction::Le => prelude::builtin_cmp_le, }; self.execute_builtin_native(environment, function, count) } fn execute_branch(&mut self, offset: isize) -> Result { let value = self.pop()?; let do_branch = !bool::try_from_value(&value).unwrap_or_default(); if do_branch { self.execute_jump(offset)?; } Ok(!do_branch) } fn execute_jump(&mut self, offset: isize) -> Result<(), MachineError> { let ip = self.ip.clone().unwrap(); self.ip = Some(InstructionPointer { module: ip.module, address: ip.address.wrapping_add_signed(offset), }); Ok(()) } fn execute_push_constant(&mut self, index: ConstantId) -> Result<(), MachineError> { let ip = self.ip.as_ref().unwrap(); let constant = ip.module.constant(index).expect("TODO"); let value = match constant { ModuleConstant::LocalFunction(address, required_count) => { Value::BytecodeFunction(BytecodeFunction { module: ip.module.clone(), required_count, address, }) } ModuleConstant::String(value) => Value::String(value), ModuleConstant::Integer(value) => Value::Integer(value), ModuleConstant::Identifier(identifier) => Value::Identifier(identifier), ModuleConstant::Value(value) => value.as_ref().clone(), }; self.push(value) } fn execute_push_argument(&mut self, index: usize) -> Result<(), MachineError> { let frame = self.call_stack.current().expect("valid call frame"); let argument = frame.arguments.get(index); match argument { Some(arg) => self.push(arg.clone()), None => self.push(Value::Nil), } } fn execute_get_global(&mut self, environment: &mut Environment) -> Result<(), MachineError> { let ident = self.pop()?; match ident { Value::Identifier(ident) => { let value = environment .global_value(&ident) .ok_or_else(|| self.error_at_ip(MachineErrorKind::UnboundIdentifier(ident)))?; self.push(value) } _ => todo!(), } } fn execute_set_global(&mut self, environment: &mut Environment) -> Result<(), MachineError> { let ident = self.pop()?; let value = self.pop()?; let Value::Identifier(ident) = ident else { todo!(); }; environment.set_global_value(ident, value); self.push(Value::Nil)?; Ok(()) } fn execute_get_local(&mut self, index: u32) -> Result<(), MachineError> { let value = self.get_local(index).cloned(); if let Some(value) = value { self.push(value.clone())?; } else { eprintln!(":: Warning: local #{index} referenced before assignment"); self.push(Value::Nil)?; } Ok(()) } fn execute_set_local(&mut self, index: u32) -> Result<(), MachineError> { let value = self.pop()?; self.set_local(index, value); Ok(()) } fn execute_export_macro( &mut self, environment: &mut Environment, index: ConstantId, ) -> Result<(), MachineError> { let ip = self.ip.clone().unwrap(); let ident = self.pop()?; let Value::Identifier(ident) = ident else { return Err(self.error_at_ip(MachineErrorKind::InvalidArgument)); }; let function = ip.module.constant(index).unwrap(); let ModuleConstant::LocalFunction(address, required_count) = function else { return Err(self.error_at_ip(MachineErrorKind::InvalidArgument)); }; let function = BytecodeFunction { module: ip.module.clone(), required_count, address, }; environment.set_global_macro(ident, function); Ok(()) } fn trace_instruction(&self, ip: &InstructionPointer) { let code = ip.module.instruction(ip.address); let Some(code) = code else { eprintln!("{ip}: "); return; }; eprint!("{ip}: {code:08x} "); if let Ok(instruction) = Instruction::try_from(code) { eprint!("{instruction:?}"); match instruction { Instruction::PushConstant(index) => { if let Some(constant) = ip.module.constant(index) { eprint!(" [-> {constant}]"); } else { eprint!(" [-> ]"); } } Instruction::PushArgument(index) => { if let Some(argument) = self .call_stack .current() .and_then(|frame| frame.arguments.get(usize::from(index))) { eprint!(" [-> {argument}]"); } else { eprint!(" [-> ]") } } _ => (), } } else { eprint!(""); } eprintln!(); } fn unwind(&mut self, until: ExecutionEvent) { if self.trace_returns { eprintln!("TRACE: Begin unwind"); if let Some(ip) = self.ip.as_ref() { eprintln!("TRACE: <- {ip}"); } else { eprintln!("TRACE: <- "); } } let mut ip = self.ip.clone(); while let Some(frame) = self.call_stack.pop() { if self.trace_returns { eprintln!("TRACE: Unwind frame:"); if let Some(ip) = frame.return_address.as_ref() { eprintln!("TRACE: -> {ip}"); } else { eprintln!("TRACE: -> "); } } ip = frame.return_address; if frame.event == until { break; } } self.ip = ip; if self.trace_returns { eprintln!("TRACE: Finished unwind"); } } pub fn execute_next( &mut self, environment: &mut Environment, ) -> Result { let ip = self .ip .clone() .ok_or_else(|| self.error_at_ip(MachineErrorKind::UndefinedInstructionPointer))?; if self.trace_instructions { self.trace_instruction(&ip); } let instruction = ip.module.instruction(ip.address).ok_or_else(|| { self.error_at_ip(MachineErrorKind::InstructionOutOfBounds(ip.clone())) })?; let instruction = Instruction::try_from(instruction) .map_err(|e| self.error_at_ip(MachineErrorKind::Instruction(e)))?; let mut advance = true; let mut event = ExecutionEvent::None; match instruction { Instruction::PushNil => { self.push(Value::Nil)?; } Instruction::PushInteger(value) => { self.push(Value::Integer(value.sign_extend_i64()))?; } Instruction::PushBool(value) => { self.push(Value::Boolean(value))?; } Instruction::PushConstant(index) => { self.execute_push_constant(index)?; } Instruction::PushArgument(index) => { self.execute_push_argument(index.into())?; } Instruction::Drop => { self.pop()?; } Instruction::ExportMacro(index) => { self.execute_export_macro(environment, index)?; } Instruction::GetGlobal => { self.execute_get_global(environment)?; } Instruction::SetGlobal => { self.execute_set_global(environment)?; } Instruction::SetLocal(index) => { self.execute_set_local(index.into())?; } Instruction::GetLocal(index) => { self.execute_get_local(index.into())?; } Instruction::Return => { advance = false; event = self.execute_return()?; } Instruction::Call(count) => { advance = false; self.execute_call(environment, count.into())?; } Instruction::Math(math, count) => { self.execute_math(environment, math, count.into())?; } Instruction::Branch(offset) => { advance = self.execute_branch(offset.sign_extend_i64() as isize)?; } Instruction::Jump(offset) => { advance = false; self.execute_jump(offset.sign_extend_i64() as isize)?; } } if advance { self.ip = Some(InstructionPointer { module: ip.module, address: ip.address + 1, }); } Ok(event) } pub fn eval_bytecode_call( &mut self, environment: &mut Environment, function: BytecodeFunction, args: &[Value], ) -> Result { let expect = ExecutionEvent::BytecodeFunctionExit(function.clone()); self.enter_bytecode_function(function, args.into())?; loop { let event = self.execute_next(environment)?; if event == expect { break; } } let value = self.pop()?; Ok(value) } pub fn load_module( &mut self, module: ModuleRef, advance_on_return: bool, ) -> Result { let entry = module.entry(); let entry_ip = InstructionPointer { module: module.clone(), address: entry, }; let entry_frame = CallFrame { arguments: vec![], event: ExecutionEvent::ModuleExit(module.clone()), locals: HashMap::new(), return_address: self.ip.clone().map(|ip| InstructionPointer { module: ip.module, address: ip.address + advance_on_return as usize, }), }; if self.trace_calls { eprintln!("TRACE: Enter module"); if let Some(source_ip) = self.ip.as_ref() { eprintln!("TRACE: From {source_ip}"); } else { eprintln!("TRACE: From "); } eprintln!("TRACE: To {entry_ip}"); } if self.call_stack.push(entry_frame).is_err() { return Err(self.error_at_ip(MachineErrorKind::CallStackOverflow)); } self.ip = Some(entry_ip); Ok(module) } pub fn eval_module( &mut self, environment: &mut Environment, module: ModuleRef, advance_on_return: bool, ) -> Result { let module = match self.load_module(module, advance_on_return) { Ok(module) => module, Err(error) => return Err(EvalError::Machine(error)), }; let expect = ExecutionEvent::ModuleExit(module.clone()); loop { let event = match self.execute_next(environment) { Ok(event) => event, Err(error) => { self.unwind(expect); return Err(EvalError::Machine(error)); } }; if event == expect { break; } } self.pop().map_err(EvalError::Machine) } pub fn eval_value( &mut self, compile_options: CompileOptions, environment: &mut Environment, value: Value, advance_on_return: bool, ) -> Result { let value = value.macro_expand(self, environment, false)?; let module = Module::compile_value(compile_options, &value)?; let module = ModuleRef::from(module); self.eval_module(environment, module, advance_on_return) } } impl fmt::Display for InstructionPointer { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { if let Some(name) = self.module.name.as_ref() { write!(f, "<{} {:p}>:{}", name, self.module, self.address) } else { write!(f, ":{}", self.module, self.address) } } } #[cfg(test)] mod tests { use std::sync::atomic::{AtomicI64, Ordering}; use crate::vm::{ env::Environment, instruction::{Instruction, MathInstruction, U}, machine::{InstructionPointer, Machine}, module::{Module, ModuleBuilder, ModuleConstant, ModuleRef}, value::Value, }; fn execute_all( count: usize, build: F, prepare: G, ) -> (Machine, Vec) { let dummy = ModuleRef::from(Module::dummy()); let mut machine = Machine { ip: Some(InstructionPointer { module: dummy, address: 0, }), ..Default::default() }; let mut env = Environment::default(); prepare(&mut machine, &mut env); let mut values = vec![]; for i in 0..count { let mut builder = ModuleBuilder::new(); builder.entry(0); build(i as u32, &mut builder); builder.add(Instruction::Return); let module = builder.build(); values.push(machine.eval_module(&mut env, module.into(), false).unwrap()); } (machine, values) } #[test] fn test_basic() { let (m, vs) = execute_all( 1, |_, builder| { let c0 = builder.constant(ModuleConstant::Integer(3)); builder.add_all([ Instruction::PushInteger(U::truncate(1)), Instruction::PushInteger(U::truncate(2)), Instruction::Math(MathInstruction::Add, U::truncate(2)), Instruction::PushConstant(c0), Instruction::Math(MathInstruction::Add, U::truncate(2)), ]); }, |_, _| {}, ); assert!(m.value_stack.is_empty()); assert!(m.call_stack.is_empty()); assert_eq!(&vs, &[Value::Integer(6)]); } #[test] fn test_local_function_call() { let (m, vs) = execute_all( 1, |_, builder| { let c0 = builder.constant(ModuleConstant::LocalFunction(4, 1)); builder.add_all([ // main Instruction::PushInteger(U::truncate(34)), Instruction::PushConstant(c0), Instruction::Call(U::truncate(1)), Instruction::Return, // c0 Instruction::PushArgument(U::truncate(0)), Instruction::PushInteger(U::truncate(1200)), Instruction::Math(MathInstruction::Add, U::truncate(2)), Instruction::Return, ]); }, |_, _| {}, ); assert!(m.value_stack.is_empty()); assert!(m.call_stack.is_empty()); assert_eq!(&vs, &[Value::Integer(1234)]); } #[test] fn test_cross_module_call() { static NATIVE_STATE: AtomicI64 = AtomicI64::new(-1); let (m, vs) = execute_all( 2, |id, builder| match id { 1 => { let c0 = builder.constant(ModuleConstant::LocalFunction(4, 1)); let c1 = builder.constant(ModuleConstant::Identifier("extern-function".into())); builder.add_all([ // main: (local 1) Instruction::PushInteger(U::truncate(1)), Instruction::PushConstant(c0), Instruction::Call(U::truncate(1)), Instruction::Return, // module 0 local function // (fn (a) (extern-function a 2)) Instruction::PushInteger(U::truncate(2)), Instruction::PushArgument(U::truncate(0)), Instruction::PushConstant(c1), Instruction::GetGlobal, Instruction::Call(U::truncate(2)), Instruction::Return, ]); } 0 => { let c0 = builder.constant(ModuleConstant::Integer(3)); let c1 = builder.constant(ModuleConstant::Identifier("native".into())); let c2 = builder.constant(ModuleConstant::LocalFunction(4, 2)); let c3 = builder.constant(ModuleConstant::Identifier("extern-function".into())); builder.add_all([ // main Instruction::PushConstant(c2), Instruction::PushConstant(c3), Instruction::SetGlobal, Instruction::Return, // extern-function // (fn (a b) (native 3 b a)) Instruction::PushArgument(U::truncate(0)), Instruction::PushArgument(U::truncate(1)), Instruction::PushConstant(c0), Instruction::PushConstant(c1), Instruction::GetGlobal, Instruction::Call(U::truncate(3)), Instruction::Return, ]); } _ => unreachable!(), }, |_, env| { env.defun_native("native", |_, _, args| { assert_eq!( &args, &[Value::Integer(3), Value::Integer(2), Value::Integer(1)] ); NATIVE_STATE.store(4321, Ordering::Release); Ok(Value::Integer(1234)) }); }, ); assert!(m.value_stack.is_empty()); assert!(m.call_stack.is_empty()); assert_eq!(&vs, &[Value::Nil, Value::Integer(1234)]); assert_eq!(NATIVE_STATE.load(Ordering::Acquire), 4321); } }