Files
lysp/src/vm/machine.rs
T

813 lines
26 KiB
Rust

use crate::{
compile::{CompileContext, CompileOptions},
error::{MachineError, MachineErrorAt, MachineErrorLocation, ValueConversionError},
vm::{
Value,
env::Environment,
instruction::{
ArgumentCount, ConstantId, ImmediateInteger, Instruction, LocalId, ReadEncoded,
},
macros::MacroExpand,
prelude,
stack::Stack,
value::{ClosureValue, IdentifierValue, NumberValue, UpvalueValue},
},
};
struct CallFrame {
closure: ClosureValue,
ip: usize,
base_pointer: usize,
}
pub struct Machine {
data_stack: Stack<Value>,
call_stack: Stack<CallFrame>,
tmp: Option<Value>,
upvalue_arena: Vec<UpvalueValue>,
pub trace_instructions: bool,
pub trace_returns: bool,
pub trace_stack: bool,
pub trace_calls: bool,
}
impl Default for Machine {
fn default() -> Self {
Self {
data_stack: Stack::new(1024),
call_stack: Stack::new(64),
tmp: None,
upvalue_arena: vec![],
trace_calls: false,
trace_stack: false,
trace_returns: false,
trace_instructions: false,
}
}
}
impl Machine {
pub(crate) fn fetch_byte(&mut self) -> Result<u8, MachineError> {
let frame = self
.call_stack
.head_mut()
.ok_or(MachineError::InstructionPointerUndefined)?;
let byte = frame
.closure
.instruction_byte(frame.ip)
.ok_or(MachineError::InstructionPointerOutOfBounds)?;
frame.ip += 1;
Ok(byte)
}
#[inline]
fn fetch_opcode(&mut self) -> Result<Instruction, MachineError> {
let byte = self.fetch_byte()?;
Ok(byte.try_into()?)
}
fn push(&mut self, value: Value) -> Result<(), MachineError> {
self.data_stack
.push(value)
.map_err(|_| MachineError::DataStackOverflow)
}
fn pop(&mut self) -> Result<Value, MachineError> {
self.data_stack
.pop()
.ok_or(MachineError::DataStackUnderflow)
}
pub fn current_location(&self) -> Option<MachineErrorLocation> {
self.call_stack.head().map(|frame| MachineErrorLocation {
function: frame.closure.function.clone(),
offset: frame.ip,
})
}
fn local_slot(&mut self, id: LocalId) -> Result<&mut Value, MachineError> {
let frame = self
.call_stack
.head()
.ok_or(MachineError::CallStackUnderflow)?;
let sp = frame.base_pointer + 1 + usize::from(id);
self.data_stack
.get_mut(sp)
.ok_or(MachineError::UndefinedLocalReference)
}
fn upvalue_slot(&mut self, id: LocalId) -> Result<&mut Value, MachineError> {
let frame = self
.call_stack
.head()
.ok_or(MachineError::CallStackUnderflow)?;
let upvalue_arena_index = frame
.closure
.upvalues
.get(usize::from(id))
.copied()
.ok_or(MachineError::UndefinedUpvalueReference)?;
let upvalue_value = self
.upvalue_arena
.get_mut(upvalue_arena_index)
.ok_or(MachineError::UndefinedUpvalueReference)?;
match upvalue_value {
UpvalueValue::Open(sp) => self
.data_stack
.get_mut(*sp)
.ok_or(MachineError::UndefinedUpvalueReference),
UpvalueValue::Closed(boxed) => Ok(boxed.as_mut()),
}
}
fn execute_get_local(&mut self, id: LocalId) -> Result<(), MachineError> {
let value = self.local_slot(id)?.clone();
self.push(value)
}
fn execute_set_local(&mut self, id: LocalId) -> Result<(), MachineError> {
let value = self.pop()?;
*self.local_slot(id)? = value;
Ok(())
}
fn execute_get_upvalue(&mut self, id: LocalId) -> Result<(), MachineError> {
let value = self.upvalue_slot(id)?.clone();
self.push(value)?;
Ok(())
}
fn execute_set_upvalue(&mut self, id: LocalId) -> Result<(), MachineError> {
let value = self.pop()?;
*self.upvalue_slot(id)? = value;
Ok(())
}
fn execute_get_global(&mut self, env: &mut Environment) -> Result<(), MachineError> {
let identifier = self.pop()?;
let Value::Identifier(identifier) = identifier else {
return Err(MachineError::InvalidInstructionArgument(
Instruction::GetGlobal,
ValueConversionError {
expected: "identifier".into(),
got: identifier,
},
));
};
let value = env
.global_value(&identifier)
.ok_or(MachineError::UnboundIdentifier(identifier))?;
self.push(value)?;
Ok(())
}
fn execute_set_global(&mut self, env: &mut Environment) -> Result<(), MachineError> {
let identifier = self.pop()?;
let Value::Identifier(identifier) = identifier else {
return Err(MachineError::InvalidInstructionArgument(
Instruction::SetGlobal,
ValueConversionError {
expected: "identifier".into(),
got: identifier,
},
));
};
let value = self.pop()?;
env.set_global_value(identifier, value);
Ok(())
}
fn execute_call(
&mut self,
env: &mut Environment,
argument_count: usize,
) -> Result<(), MachineError> {
let base_pointer = self
.data_stack
.pointer()
.checked_sub(argument_count + 1)
.ok_or(MachineError::DataStackUnderflow)?;
let callable = &self.data_stack[base_pointer];
let closure = match callable {
Value::Closure(closure) => closure.clone(),
// Make closure from just the function
Value::Function(function) => ClosureValue {
function: function.clone(),
upvalues: vec![],
},
Value::NativeFunction(function) => {
let function = function.clone();
// TODO remove argument cloning
let mut arguments = (0..argument_count)
.map(|_| self.pop())
.collect::<Result<Vec<_>, _>>()?;
arguments.reverse();
if self.trace_calls {
eprintln!("TRACE: Call native");
eprintln!("TRACE: {function}");
if let Some(location) = self.current_location() {
eprintln!("TRACE: From {location}");
} else {
eprintln!("TRACE: From <unknown>");
}
if !arguments.is_empty() {
eprintln!("TRACE: With arguments:");
for (i, arg) in arguments.iter().enumerate() {
eprintln!("TRACE: [{i}] {arg}");
}
}
}
let value = function.invoke(self, env, &arguments[..])?;
// Drop native function itself from the stack
self.pop()?;
self.push(value)?;
return Ok(());
}
_ => {
return Err(MachineError::InvalidInstructionArgument(
Instruction::Call,
ValueConversionError {
expected: "closure, function or native function".into(),
got: callable.clone(),
},
));
}
};
if argument_count < closure.function.min_arity {
todo!("TODO function called with less arguments than expected")
}
if argument_count > closure.function.max_arity {
todo!("TODO function called with more arguments than expected")
}
if closure.function.max_arity == usize::MAX {
todo!("VM support for &rest argument")
}
for _ in argument_count..closure.function.max_arity {
self.push(Value::Nil)?;
}
// if argument_count != closure.function.arity {
// todo!("TODO error here")
// }
if self.trace_calls {
eprintln!("TRACE: Call closure");
eprintln!("TRACE: {closure}");
if let Some(location) = self.current_location() {
eprintln!("TRACE: From {location}");
} else {
eprintln!("TRACE: From <unknown>");
}
if argument_count != 0 {
eprintln!("TRACE: With arguments:");
for i in 0..argument_count {
let sp = base_pointer + 1 + i;
if let Some(argument) = self.data_stack.get(sp) {
eprintln!("TRACE: [{i}] {argument}");
} else {
eprintln!("TRACE: [{i}] <invalid>");
}
}
}
}
let frame = CallFrame {
closure,
base_pointer,
ip: 0,
};
self.call_stack
.push(frame)
.map_err(|_| MachineError::CallStackOverflow)?;
Ok(())
}
fn capture_stack_upvalue(&mut self, sp: usize) -> Result<usize, MachineError> {
for arena_index in (0..self.upvalue_arena.len()).rev() {
let upvalue = &self.upvalue_arena[arena_index];
if let UpvalueValue::Open(target_sp) = upvalue
&& *target_sp == sp
{
return Ok(arena_index);
}
}
let arena_index = self.upvalue_arena.len();
self.upvalue_arena.push(UpvalueValue::Open(sp));
Ok(arena_index)
}
fn close_upvalues(&mut self, sp: usize) {
// TODO this is inefficient
for uv in self.upvalue_arena.iter_mut() {
if let UpvalueValue::Open(target_sp) = uv
&& *target_sp >= sp
{
let value = self.data_stack[*target_sp].clone();
*uv = UpvalueValue::Closed(Box::new(value));
}
}
}
fn execute_make_closure(&mut self) -> Result<(), MachineError> {
let value = self.pop()?;
let Value::Function(function) = value else {
return Err(MachineError::InvalidInstructionArgument(
Instruction::MakeClosure,
ValueConversionError {
expected: "function".into(),
got: value,
},
));
};
let mut closure = ClosureValue {
function,
upvalues: vec![],
};
for upvalue_def in closure.function.upvalues.iter() {
if upvalue_def.is_local {
let frame = self.call_stack.head().unwrap();
let sp = frame.base_pointer + 1 + usize::from(upvalue_def.index);
let arena_index = self.capture_stack_upvalue(sp)?;
closure.upvalues.push(arena_index);
} else {
todo!();
}
}
self.push(Value::Closure(closure))?;
Ok(())
}
fn execute_return(&mut self) -> Result<(), MachineError> {
let return_value = self.pop()?;
if self.trace_returns {
eprintln!("TRACE: Function return");
if let Some(location) = self.current_location() {
eprintln!("TRACE: From {location}");
} else {
eprintln!("TRACE: From <unknown>");
}
let csp = self.call_stack.pointer();
if csp > 0
&& let Some(frame) = self.call_stack.get(csp - 1)
{
eprintln!("TRACE: To {}+{}", frame.closure, frame.ip);
} else {
eprintln!("TRACE: To <unknown>");
}
eprintln!("TRACE: With value {return_value}");
}
let frame = self
.call_stack
.pop()
.ok_or(MachineError::CallStackUnderflow)?;
self.data_stack.set_pointer(frame.base_pointer);
self.push(return_value)?;
Ok(())
}
fn execute_declare_macro(&mut self, env: &mut Environment) -> Result<(), MachineError> {
let identifier = self.pop()?;
let function = self.pop()?;
let Value::Identifier(identifier) = identifier else {
return Err(MachineError::InvalidInstructionArgument(
Instruction::DeclareMacro,
ValueConversionError {
expected: "identifier".into(),
got: identifier,
},
));
};
let Value::Function(function) = function else {
return Err(MachineError::InvalidInstructionArgument(
Instruction::DeclareMacro,
ValueConversionError {
expected: "function".into(),
got: function,
},
));
};
env.defmacro_bytecode(identifier, function);
Ok(())
}
fn execute_branch(&mut self, check_condition: bool, target: isize) -> Result<(), MachineError> {
let do_branch = if check_condition {
let condition_value = self.pop()?;
!condition_value.is_trueish()
} else {
true
};
if do_branch {
let frame = self
.call_stack
.head_mut()
.ok_or(MachineError::CallStackUnderflow)?;
let ip = frame
.ip
.checked_add_signed(target)
.ok_or(MachineError::InvalidBranchTarget(frame.ip, target))?;
frame.ip = ip;
}
Ok(())
}
fn trace_stack(&self) {
eprint!("TRACE: [");
for v in 0..self.data_stack.pointer() {
if v != 0 {
eprint!(" ");
}
eprint!("{}", self.data_stack[v]);
}
eprintln!("]");
}
fn trace_instruction(&self) {
let Some(frame) = self.call_stack.head() else {
eprintln!("<undefined>");
return;
};
frame.closure.function.disassemble(frame.ip, 0, 0, false);
}
fn execute_next(&mut self, env: &mut Environment) -> Result<(), MachineError> {
if self.trace_instructions {
if self.trace_stack {
self.trace_stack();
}
self.trace_instruction();
}
let opcode = self.fetch_opcode()?;
match opcode {
// values
Instruction::PushNil => self.push(Value::Nil)?,
Instruction::PushInteger => {
let value = ImmediateInteger::read_encoded(self)?;
self.push(Value::Number(NumberValue::Int(value.sign_extend_i64())))?;
}
Instruction::PushTrue => self.push(true.into())?,
Instruction::PushFalse => self.push(false.into())?,
Instruction::PushConstant => {
let index = ConstantId::read_encoded(self)?;
let frame = self.call_stack.head().expect("unreachable");
let value = frame
.closure
.function
.constants
.get(usize::from(index))
.cloned()
.ok_or(MachineError::UndefinedConstantReference)?;
self.push(value)?;
}
Instruction::Drop => {
let _ = self.pop()?;
}
// binding
Instruction::SetLocal => {
let id = LocalId::read_encoded(self)?;
self.execute_set_local(id)?;
}
Instruction::GetLocal => {
let id = LocalId::read_encoded(self)?;
self.execute_get_local(id)?;
}
Instruction::SetUpvalue => {
let id = LocalId::read_encoded(self)?;
self.execute_set_upvalue(id)?;
}
Instruction::GetUpvalue => {
let id = LocalId::read_encoded(self)?;
self.execute_get_upvalue(id)?;
}
Instruction::SetGlobal => self.execute_set_global(env)?,
Instruction::GetGlobal => self.execute_get_global(env)?,
Instruction::DeclareMacro => self.execute_declare_macro(env)?,
// arithmetic
Instruction::Gt
| Instruction::Lt
| Instruction::Eq
| Instruction::Ge
| Instruction::Le
| Instruction::Ne
| Instruction::Add
| Instruction::Sub
| Instruction::Mul
| Instruction::Div
| Instruction::Mod
| Instruction::Not
| Instruction::Negate => {
let argument_count = usize::from(ArgumentCount::read_encoded(self)?);
let mut arguments = (0..argument_count)
.map(|_| self.pop())
.collect::<Result<Vec<_>, _>>()?;
arguments.reverse();
let function = prelude::dispatch_arithmetic(opcode);
let value = (function)(self, env, &arguments[..])?;
self.push(value)?;
}
// function
Instruction::Return => {
self.execute_return()?;
}
Instruction::Call => {
let argument_count = usize::from(ArgumentCount::read_encoded(self)?);
self.execute_call(env, argument_count)?;
}
Instruction::MakeClosure => {
self.execute_make_closure()?;
}
Instruction::SetTemp => {
self.tmp = Some(self.pop()?);
}
Instruction::GetTemp => {
let tmp = self.tmp.take().ok_or(MachineError::TempRegisterEmpty)?;
self.push(tmp)?;
}
Instruction::CloseUpvalue => {
self.close_upvalues(self.data_stack.pointer() - 1);
self.pop()?;
}
Instruction::Branch => {
let target = self.fetch_byte()? as i8;
self.execute_branch(true, target as isize)?;
}
Instruction::Jump => {
let target = self.fetch_byte()? as i8;
self.execute_branch(false, target as isize)?;
}
Instruction::DeclareGlobal => todo!(),
}
Ok(())
}
fn unwind_to(&mut self, depth: usize) {
// Data stack management
while self.call_stack.pointer() != depth {
let frame = self.call_stack.pop().unwrap();
self.data_stack.set_pointer(frame.base_pointer);
}
}
pub fn evaluate_closure(
&mut self,
env: &mut Environment,
closure: ClosureValue,
argument_count: usize,
) -> Result<Value, MachineErrorAt> {
let unwind_target = self.call_stack.pointer();
self.push(Value::Closure(closure))
.map_err(MachineErrorAt::at_unknown)?;
self.execute_call(env, argument_count)
.map_err(MachineErrorAt::at_unknown)?;
while self.call_stack.pointer() != unwind_target {
let location = self.current_location();
if let Err(error) = self.execute_next(env) {
// Unwind up to entry depth
self.unwind_to(unwind_target);
return Err(error.at(location));
}
}
self.pop().map_err(MachineErrorAt::at_unknown)
}
pub fn evaluate_closure_args(
&mut self,
env: &mut Environment,
closure: ClosureValue,
args: &[Value],
) -> Result<Value, MachineErrorAt> {
let max_arity = closure.function.max_arity;
if args.len() < closure.function.min_arity {
todo!()
}
if args.len() > closure.function.max_arity {
todo!()
}
self.push(Value::Closure(closure))
.map_err(MachineErrorAt::at_unknown)?;
if max_arity == usize::MAX {
todo!("VM support for &rest argument")
}
for arg in args.iter() {
self.push(arg.clone()).map_err(MachineErrorAt::at_unknown)?;
}
for _ in args.len()..max_arity {
self.push(Value::Nil).map_err(MachineErrorAt::at_unknown)?;
}
let unwind_target = self.call_stack.pointer();
self.execute_call(env, args.len())
.map_err(MachineErrorAt::at_unknown)?;
while self.call_stack.pointer() != unwind_target {
let location = self.current_location();
if let Err(error) = self.execute_next(env) {
// Unwind up to entry depth
self.unwind_to(unwind_target);
return Err(error.at(location));
}
}
self.pop().map_err(MachineErrorAt::at_unknown)
}
pub fn evaluate_value(
&mut self,
compile_options: CompileOptions,
chunk_name: Option<IdentifierValue>,
env: &mut Environment,
value: Value,
) -> Result<Value, MachineErrorAt> {
let value = value.macro_expand(self, env, false)?;
let function = CompileContext::compile_value(compile_options, chunk_name, &value)
.map_err(MachineError::Compile)
.map_err(MachineErrorAt::at_unknown)?;
let closure = ClosureValue {
function,
upvalues: vec![],
};
let value = self.evaluate_closure(env, closure, 0)?;
Ok(value)
}
}
#[cfg(test)]
mod tests {
use std::rc::Rc;
use crate::{
error::{MachineError, MachineErrorAt},
vm::{
Value,
env::Environment,
instruction::Instruction,
machine::Machine,
value::{BytecodeFunction, ClosureValue, NumberValue},
},
};
fn try_eval(
env: &mut Environment,
mut instructions: Vec<u8>,
constants: Vec<Value>,
) -> (Machine, Result<Value, MachineErrorAt>) {
instructions.push(Instruction::Return.into());
let closure = ClosureValue {
upvalues: vec![],
function: Rc::new(BytecodeFunction {
identifier: Some("test-script".into()),
instructions: instructions.into(),
constants: constants.into(),
upvalues: [].into(),
arity: 0,
}),
};
let mut machine = Machine::default();
let result = machine.evaluate_closure(env, closure, 0);
(machine, result)
}
fn eval0(
env: &mut Environment,
instructions: Vec<u8>,
constants: Vec<Value>,
) -> (Machine, Value) {
let (machine, value) = try_eval(env, instructions, constants);
let value = value.expect("evaluation failed");
(machine, value)
}
#[test]
fn test_stack_execution() {
let mut env = Environment::default();
let (m, v) = eval0(&mut env, vec![Instruction::PushNil.into()], vec![]);
assert_eq!(v, Value::Nil);
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
let (m, v) = eval0(&mut env, vec![Instruction::PushTrue.into()], vec![]);
assert_eq!(v, true.into());
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
let (m, v) = eval0(&mut env, vec![Instruction::PushFalse.into()], vec![]);
assert_eq!(v, false.into());
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
}
#[test]
fn test_unwind() {
let mut env = Environment::default();
// Cause data stack underflow for unwind
let (m, r) = try_eval(&mut env, vec![Instruction::Drop.into()], vec![]);
let e = r.unwrap_err();
assert_eq!(e.location.map(|a| a.offset), Some(1));
assert_eq!(e.error, MachineError::DataStackUnderflow);
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
}
#[test]
fn test_closure_call_no_upvalues_yes_locals() {
let mut env = Environment::default();
// (lambda (y) (let (x 123) (+ x y)))
let lambda_function = Rc::new(BytecodeFunction {
identifier: None,
instructions: [
// x 123
Instruction::PushInteger.into(),
123,
0,
Instruction::GetLocal.into(),
1,
Instruction::GetLocal.into(),
0,
Instruction::Add.into(),
2,
Instruction::SetTemp.into(),
Instruction::Drop.into(),
Instruction::GetTemp.into(),
Instruction::Return.into(),
]
.into(),
constants: [].into(),
upvalues: [].into(),
arity: 2,
});
let (m, r) = eval0(
&mut env,
vec![
Instruction::PushConstant.into(),
0,
0,
Instruction::PushInteger.into(),
65,
1,
Instruction::Call.into(),
1,
],
vec![Value::Function(lambda_function)],
);
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(r, Value::Number(NumberValue::Int(444)));
}
#[test]
fn test_closure_call_no_upvalues_no_locals() {
let mut env = Environment::default();
// (lambda (x y) (+ x y))
let lambda_function = Rc::new(BytecodeFunction {
identifier: None,
instructions: [
Instruction::GetLocal.into(),
0,
Instruction::GetLocal.into(),
1,
Instruction::Add.into(),
2,
Instruction::Return.into(),
]
.into(),
constants: [].into(),
upvalues: [].into(),
arity: 2,
});
let (m, r) = eval0(
&mut env,
vec![
Instruction::PushConstant.into(),
0,
0,
Instruction::PushInteger.into(),
123,
0,
Instruction::PushInteger.into(),
65,
1,
Instruction::Call.into(),
2,
],
vec![Value::Function(lambda_function)],
);
assert!(m.data_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(r, Value::Number(NumberValue::Int(444)));
}
}