Files
lysp/src/vm/machine.rs
T

773 lines
26 KiB
Rust

use std::{collections::HashMap, fmt};
use crate::{
compile::CompileOptions,
error::{EvalError, MachineError, MachineErrorKind},
vm::{
env::Environment,
instruction::{ConstantId, Instruction, MathInstruction},
macros::MacroExpand,
module::{Module, ModuleConstant, ModuleRef},
prelude,
stack::Stack,
value::{BytecodeFunction, NativeFunction, TryFromValue, Value},
},
};
#[derive(Debug, Clone, PartialEq)]
pub struct InstructionPointer {
pub module: ModuleRef,
pub address: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CallFrame {
pub arguments: Vec<Value>,
pub return_address: Option<InstructionPointer>,
pub event: ExecutionEvent,
pub locals: HashMap<u32, Value>,
}
pub struct Machine {
pub ip: Option<InstructionPointer>,
value_stack: Stack<Value>,
pub call_stack: Stack<CallFrame>,
pub trace_instructions: bool,
pub trace_calls: bool,
pub trace_returns: bool,
// Top-level locals
locals: HashMap<u32, Value>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ExecutionEvent {
ModuleExit(ModuleRef),
BytecodeFunctionExit(BytecodeFunction),
None,
}
impl Default for Machine {
fn default() -> Self {
Self {
ip: None,
value_stack: Stack::new(1024),
call_stack: Stack::new(32),
locals: HashMap::new(),
trace_instructions: false,
trace_calls: false,
trace_returns: false,
}
}
}
impl Machine {
pub fn error_at_ip(&self, kind: MachineErrorKind) -> MachineError {
MachineError {
ip: self.ip.clone(),
error: kind,
}
}
pub fn ip(&self) -> Option<InstructionPointer> {
self.ip.clone()
}
pub fn set_local(&mut self, index: u32, value: Value) {
let locals = match self.call_stack.current_mut() {
Some(frame) => &mut frame.locals,
None => &mut self.locals,
};
locals.insert(index, value);
}
fn get_local(&self, index: u32) -> Option<&Value> {
let locals = match self.call_stack.current() {
Some(frame) => &frame.locals,
None => &self.locals,
};
locals.get(&index)
}
fn pop(&mut self) -> Result<Value, MachineError> {
self.value_stack
.pop()
.ok_or_else(|| self.error_at_ip(MachineErrorKind::ValueStackUnderflow))
}
fn push(&mut self, value: Value) -> Result<(), MachineError> {
self.value_stack
.push(value)
.map_err(|_| self.error_at_ip(MachineErrorKind::ValueStackOverflow))
}
fn enter_bytecode_function(
&mut self,
bytecode: BytecodeFunction,
arguments: Vec<Value>,
) -> Result<(), MachineError> {
let source_ip = self.ip.clone();
let BytecodeFunction {
module,
required_count,
address,
} = bytecode.clone();
if required_count > arguments.len() {
return Err(self.error_at_ip(MachineErrorKind::ArgumentCountMismatch(
required_count,
arguments.len(),
)));
}
let frame = CallFrame {
arguments,
event: ExecutionEvent::BytecodeFunctionExit(bytecode),
return_address: source_ip.map(|ip| InstructionPointer {
module: ip.module,
address: ip.address + 1,
}),
locals: HashMap::new(),
};
let entry_ip = InstructionPointer { module, address };
if self.trace_calls {
eprintln!("TRACE: Call bytecode function");
if let Some(source_ip) = self.ip.as_ref() {
eprintln!("TRACE: From {source_ip}");
} else {
eprintln!("TRACE: From <undefined>");
}
eprintln!("TRACE: To {entry_ip}");
}
if self.call_stack.push(frame).is_err() {
return Err(self.error_at_ip(MachineErrorKind::CallStackOverflow));
}
self.ip = Some(entry_ip);
Ok(())
}
fn execute_call(
&mut self,
environment: &mut Environment,
count: usize,
) -> Result<(), MachineError> {
enum Callee {
Bytecode(BytecodeFunction),
Native(NativeFunction),
}
let callee = self.pop()?;
let callee = match callee {
Value::BytecodeFunction(bytecode) => Callee::Bytecode(bytecode),
Value::NativeFunction(native) => Callee::Native(native),
_ => return Err(self.error_at_ip(MachineErrorKind::InvalidArgument)),
};
let mut arguments = vec![];
for _ in 0..count {
arguments.push(self.pop()?);
}
match callee {
Callee::Bytecode(bytecode) => {
self.enter_bytecode_function(bytecode, arguments)?;
}
Callee::Native(native) => {
let source_ip = self.ip.clone().unwrap();
let result = native.invoke(self, environment, &arguments)?;
self.push(result)?;
self.ip = Some(InstructionPointer {
module: source_ip.module,
address: source_ip.address + 1,
});
}
}
Ok(())
}
fn execute_builtin_native<F>(
&mut self,
environment: &mut Environment,
function: F,
count: usize,
) -> Result<(), MachineError>
where
F: Fn(&mut Self, &mut Environment, &[Value]) -> Result<Value, MachineError>,
{
let mut args = vec![];
for _ in 0..count {
args.push(self.pop()?);
}
let value = function(self, environment, &args)?;
self.push(value)?;
Ok(())
}
fn execute_return(&mut self) -> Result<ExecutionEvent, MachineError> {
let ip = self.ip.clone().unwrap();
if self.trace_returns {
eprintln!("TRACE: Return");
eprintln!("TRACE: From {ip}");
ip.module.dump(Some(ip.address), 4, 0);
}
if let Some(frame) = self.call_stack.pop() {
if self.trace_returns {
if let Some(target_ip) = frame.return_address.as_ref() {
eprintln!("TRACE: To {target_ip}");
} else {
eprintln!("TRACE: To <undefined>");
}
}
self.ip = frame.return_address;
Ok(frame.event)
} else {
if self.trace_returns {
eprintln!("TRACE: To <undefined>");
}
self.ip = None;
Ok(ExecutionEvent::ModuleExit(ip.module))
}
}
fn execute_math(
&mut self,
environment: &mut Environment,
math: MathInstruction,
count: usize,
) -> Result<(), MachineError> {
let function = match math {
MathInstruction::Add => prelude::builtin_add,
MathInstruction::Sub => prelude::builtin_sub,
MathInstruction::Mul => prelude::builtin_mul,
MathInstruction::Div => prelude::builtin_div,
MathInstruction::Mod => prelude::builtin_mod,
MathInstruction::And => prelude::builtin_and,
MathInstruction::Or => prelude::builtin_or,
MathInstruction::BitwiseAnd => prelude::builtin_bitwise_and,
MathInstruction::BitwiseOr => prelude::builtin_bitwise_or,
MathInstruction::BitwiseXor => prelude::builtin_bitwise_xor,
MathInstruction::Gt => prelude::builtin_cmp_gt,
MathInstruction::Lt => prelude::builtin_cmp_lt,
MathInstruction::Eq => prelude::builtin_cmp_eq,
MathInstruction::Ne => prelude::builtin_cmp_ne,
MathInstruction::Ge => prelude::builtin_cmp_ge,
MathInstruction::Le => prelude::builtin_cmp_le,
};
self.execute_builtin_native(environment, function, count)
}
fn execute_branch(&mut self, offset: isize) -> Result<bool, MachineError> {
let value = self.pop()?;
let do_branch = !bool::try_from_value(&value).unwrap_or_default();
if do_branch {
self.execute_jump(offset)?;
}
Ok(!do_branch)
}
fn execute_jump(&mut self, offset: isize) -> Result<(), MachineError> {
let ip = self.ip.clone().unwrap();
self.ip = Some(InstructionPointer {
module: ip.module,
address: ip.address.wrapping_add_signed(offset),
});
Ok(())
}
fn execute_push_constant(&mut self, index: ConstantId) -> Result<(), MachineError> {
let ip = self.ip.as_ref().unwrap();
let constant = ip.module.constant(index).expect("TODO");
let value = match constant {
ModuleConstant::LocalFunction(address, required_count) => {
Value::BytecodeFunction(BytecodeFunction {
module: ip.module.clone(),
required_count,
address,
})
}
ModuleConstant::String(value) => Value::String(value),
ModuleConstant::Integer(value) => Value::Integer(value),
ModuleConstant::Identifier(identifier) => Value::Identifier(identifier),
ModuleConstant::Value(value) => value.as_ref().clone(),
};
self.push(value)
}
fn execute_push_argument(&mut self, index: usize) -> Result<(), MachineError> {
let frame = self.call_stack.current().expect("valid call frame");
let argument = frame.arguments.get(index);
match argument {
Some(arg) => self.push(arg.clone()),
None => self.push(Value::Nil),
}
}
fn execute_get_global(&mut self, environment: &mut Environment) -> Result<(), MachineError> {
let ident = self.pop()?;
match ident {
Value::Identifier(ident) => {
let value = environment
.global_value(&ident)
.ok_or_else(|| self.error_at_ip(MachineErrorKind::UnboundIdentifier(ident)))?;
self.push(value)
}
_ => todo!(),
}
}
fn execute_set_global(&mut self, environment: &mut Environment) -> Result<(), MachineError> {
let ident = self.pop()?;
let value = self.pop()?;
let Value::Identifier(ident) = ident else {
todo!();
};
environment.set_global_value(ident, value);
self.push(Value::Nil)?;
Ok(())
}
fn execute_get_local(&mut self, index: u32) -> Result<(), MachineError> {
let value = self.get_local(index).cloned();
if let Some(value) = value {
self.push(value.clone())?;
} else {
eprintln!(":: Warning: local #{index} referenced before assignment");
self.push(Value::Nil)?;
}
Ok(())
}
fn execute_set_local(&mut self, index: u32) -> Result<(), MachineError> {
let value = self.pop()?;
self.set_local(index, value);
Ok(())
}
fn execute_export_macro(
&mut self,
environment: &mut Environment,
index: ConstantId,
) -> Result<(), MachineError> {
let ip = self.ip.clone().unwrap();
let ident = self.pop()?;
let Value::Identifier(ident) = ident else {
return Err(self.error_at_ip(MachineErrorKind::InvalidArgument));
};
let function = ip.module.constant(index).unwrap();
let ModuleConstant::LocalFunction(address, required_count) = function else {
return Err(self.error_at_ip(MachineErrorKind::InvalidArgument));
};
let function = BytecodeFunction {
module: ip.module.clone(),
required_count,
address,
};
environment.set_global_macro(ident, function);
Ok(())
}
fn trace_instruction(&self, ip: &InstructionPointer) {
let code = ip.module.instruction(ip.address);
let Some(code) = code else {
eprintln!("{ip}: <undefined>");
return;
};
eprint!("{ip}: {code:08x} ");
if let Ok(instruction) = Instruction::try_from(code) {
eprint!("{instruction:?}");
match instruction {
Instruction::PushConstant(index) => {
if let Some(constant) = ip.module.constant(index) {
eprint!(" [-> {constant}]");
} else {
eprint!(" [-> <undefined>]");
}
}
Instruction::PushArgument(index) => {
if let Some(argument) = self
.call_stack
.current()
.and_then(|frame| frame.arguments.get(usize::from(index)))
{
eprint!(" [-> {argument}]");
} else {
eprint!(" [-> <undefined>]")
}
}
_ => (),
}
} else {
eprint!("<undefined>");
}
eprintln!();
}
fn unwind(&mut self, until: ExecutionEvent) {
if self.trace_returns {
eprintln!("TRACE: Begin unwind");
if let Some(ip) = self.ip.as_ref() {
eprintln!("TRACE: <- {ip}");
} else {
eprintln!("TRACE: <- <undefined>");
}
}
let mut ip = self.ip.clone();
while let Some(frame) = self.call_stack.pop() {
if self.trace_returns {
eprintln!("TRACE: Unwind frame:");
if let Some(ip) = frame.return_address.as_ref() {
eprintln!("TRACE: -> {ip}");
} else {
eprintln!("TRACE: -> <undefined>");
}
}
ip = frame.return_address;
if frame.event == until {
break;
}
}
self.ip = ip;
if self.trace_returns {
eprintln!("TRACE: Finished unwind");
}
}
pub fn execute_next(
&mut self,
environment: &mut Environment,
) -> Result<ExecutionEvent, MachineError> {
let ip = self
.ip
.clone()
.ok_or_else(|| self.error_at_ip(MachineErrorKind::UndefinedInstructionPointer))?;
if self.trace_instructions {
self.trace_instruction(&ip);
}
let instruction = ip.module.instruction(ip.address).ok_or_else(|| {
self.error_at_ip(MachineErrorKind::InstructionOutOfBounds(ip.clone()))
})?;
let instruction = Instruction::try_from(instruction)
.map_err(|e| self.error_at_ip(MachineErrorKind::Instruction(e)))?;
let mut advance = true;
let mut event = ExecutionEvent::None;
match instruction {
Instruction::PushNil => {
self.push(Value::Nil)?;
}
Instruction::PushInteger(value) => {
self.push(Value::Integer(value.sign_extend_i64()))?;
}
Instruction::PushBool(value) => {
self.push(Value::Boolean(value))?;
}
Instruction::PushConstant(index) => {
self.execute_push_constant(index)?;
}
Instruction::PushArgument(index) => {
self.execute_push_argument(index.into())?;
}
Instruction::Drop => {
self.pop()?;
}
Instruction::ExportMacro(index) => {
self.execute_export_macro(environment, index)?;
}
Instruction::GetGlobal => {
self.execute_get_global(environment)?;
}
Instruction::SetGlobal => {
self.execute_set_global(environment)?;
}
Instruction::SetLocal(index) => {
self.execute_set_local(index.into())?;
}
Instruction::GetLocal(index) => {
self.execute_get_local(index.into())?;
}
Instruction::Return => {
advance = false;
event = self.execute_return()?;
}
Instruction::Call(count) => {
advance = false;
self.execute_call(environment, count.into())?;
}
Instruction::Math(math, count) => {
self.execute_math(environment, math, count.into())?;
}
Instruction::Branch(offset) => {
advance = self.execute_branch(offset.sign_extend_i64() as isize)?;
}
Instruction::Jump(offset) => {
advance = false;
self.execute_jump(offset.sign_extend_i64() as isize)?;
}
}
if advance {
self.ip = Some(InstructionPointer {
module: ip.module,
address: ip.address + 1,
});
}
Ok(event)
}
pub fn eval_bytecode_call(
&mut self,
environment: &mut Environment,
function: BytecodeFunction,
args: &[Value],
) -> Result<Value, MachineError> {
let expect = ExecutionEvent::BytecodeFunctionExit(function.clone());
self.enter_bytecode_function(function, args.into())?;
loop {
let event = self.execute_next(environment)?;
if event == expect {
break;
}
}
let value = self.pop()?;
Ok(value)
}
pub fn load_module(
&mut self,
module: ModuleRef,
advance_on_return: bool,
) -> Result<ModuleRef, MachineError> {
let entry = module.entry();
let entry_ip = InstructionPointer {
module: module.clone(),
address: entry,
};
let entry_frame = CallFrame {
arguments: vec![],
event: ExecutionEvent::ModuleExit(module.clone()),
locals: HashMap::new(),
return_address: self.ip.clone().map(|ip| InstructionPointer {
module: ip.module,
address: ip.address + advance_on_return as usize,
}),
};
if self.trace_calls {
eprintln!("TRACE: Enter module");
if let Some(source_ip) = self.ip.as_ref() {
eprintln!("TRACE: From {source_ip}");
} else {
eprintln!("TRACE: From <undefined>");
}
eprintln!("TRACE: To {entry_ip}");
}
if self.call_stack.push(entry_frame).is_err() {
return Err(self.error_at_ip(MachineErrorKind::CallStackOverflow));
}
self.ip = Some(entry_ip);
Ok(module)
}
pub fn eval_module(
&mut self,
environment: &mut Environment,
module: ModuleRef,
advance_on_return: bool,
) -> Result<Value, EvalError> {
let module = match self.load_module(module, advance_on_return) {
Ok(module) => module,
Err(error) => return Err(EvalError::Machine(error)),
};
let expect = ExecutionEvent::ModuleExit(module.clone());
loop {
let event = match self.execute_next(environment) {
Ok(event) => event,
Err(error) => {
self.unwind(expect);
return Err(EvalError::Machine(error));
}
};
if event == expect {
break;
}
}
self.pop().map_err(EvalError::Machine)
}
pub fn eval_value(
&mut self,
compile_options: CompileOptions,
environment: &mut Environment,
value: Value,
advance_on_return: bool,
) -> Result<Value, EvalError> {
let value = value.macro_expand(self, environment, false)?;
let module = Module::compile_value(compile_options, &value)?;
let module = ModuleRef::from(module);
self.eval_module(environment, module, advance_on_return)
}
}
impl fmt::Display for InstructionPointer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
if let Some(name) = self.module.name.as_ref() {
write!(f, "<{} {:p}>:{}", name, self.module, self.address)
} else {
write!(f, "<unnamed {:p}>:{}", self.module, self.address)
}
}
}
#[cfg(test)]
mod tests {
use std::sync::atomic::{AtomicI64, Ordering};
use crate::vm::{
env::Environment,
instruction::{Instruction, MathInstruction, U},
machine::{InstructionPointer, Machine},
module::{Module, ModuleBuilder, ModuleConstant, ModuleRef},
value::Value,
};
fn execute_all<F: Fn(u32, &mut ModuleBuilder), G: FnOnce(&mut Machine, &mut Environment)>(
count: usize,
build: F,
prepare: G,
) -> (Machine, Vec<Value>) {
let dummy = ModuleRef::from(Module::dummy());
let mut machine = Machine {
ip: Some(InstructionPointer {
module: dummy,
address: 0,
}),
..Default::default()
};
let mut env = Environment::default();
prepare(&mut machine, &mut env);
let mut values = vec![];
for i in 0..count {
let mut builder = ModuleBuilder::new();
builder.entry(0);
build(i as u32, &mut builder);
builder.add(Instruction::Return);
let module = builder.build();
values.push(machine.eval_module(&mut env, module.into(), false).unwrap());
}
(machine, values)
}
#[test]
fn test_basic() {
let (m, vs) = execute_all(
1,
|_, builder| {
let c0 = builder.constant(ModuleConstant::Integer(3));
builder.add_all([
Instruction::PushInteger(U::truncate(1)),
Instruction::PushInteger(U::truncate(2)),
Instruction::Math(MathInstruction::Add, U::truncate(2)),
Instruction::PushConstant(c0),
Instruction::Math(MathInstruction::Add, U::truncate(2)),
]);
},
|_, _| {},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Integer(6)]);
}
#[test]
fn test_local_function_call() {
let (m, vs) = execute_all(
1,
|_, builder| {
let c0 = builder.constant(ModuleConstant::LocalFunction(4, 1));
builder.add_all([
// main
Instruction::PushInteger(U::truncate(34)),
Instruction::PushConstant(c0),
Instruction::Call(U::truncate(1)),
Instruction::Return,
// c0
Instruction::PushArgument(U::truncate(0)),
Instruction::PushInteger(U::truncate(1200)),
Instruction::Math(MathInstruction::Add, U::truncate(2)),
Instruction::Return,
]);
},
|_, _| {},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Integer(1234)]);
}
#[test]
fn test_cross_module_call() {
static NATIVE_STATE: AtomicI64 = AtomicI64::new(-1);
let (m, vs) = execute_all(
2,
|id, builder| match id {
1 => {
let c0 = builder.constant(ModuleConstant::LocalFunction(4, 1));
let c1 = builder.constant(ModuleConstant::Identifier("extern-function".into()));
builder.add_all([
// main: (local 1)
Instruction::PushInteger(U::truncate(1)),
Instruction::PushConstant(c0),
Instruction::Call(U::truncate(1)),
Instruction::Return,
// module 0 local function
// (fn (a) (extern-function a 2))
Instruction::PushInteger(U::truncate(2)),
Instruction::PushArgument(U::truncate(0)),
Instruction::PushConstant(c1),
Instruction::GetGlobal,
Instruction::Call(U::truncate(2)),
Instruction::Return,
]);
}
0 => {
let c0 = builder.constant(ModuleConstant::Integer(3));
let c1 = builder.constant(ModuleConstant::Identifier("native".into()));
let c2 = builder.constant(ModuleConstant::LocalFunction(4, 2));
let c3 = builder.constant(ModuleConstant::Identifier("extern-function".into()));
builder.add_all([
// main
Instruction::PushConstant(c2),
Instruction::PushConstant(c3),
Instruction::SetGlobal,
Instruction::Return,
// extern-function
// (fn (a b) (native 3 b a))
Instruction::PushArgument(U::truncate(0)),
Instruction::PushArgument(U::truncate(1)),
Instruction::PushConstant(c0),
Instruction::PushConstant(c1),
Instruction::GetGlobal,
Instruction::Call(U::truncate(3)),
Instruction::Return,
]);
}
_ => unreachable!(),
},
|_, env| {
env.defun_native("native", |_, _, args| {
assert_eq!(
&args,
&[Value::Integer(3), Value::Integer(2), Value::Integer(1)]
);
NATIVE_STATE.store(4321, Ordering::Release);
Ok(Value::Integer(1234))
});
},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Nil, Value::Integer(1234)]);
assert_eq!(NATIVE_STATE.load(Ordering::Acquire), 4321);
}
}