From 36e6cc836478990d0ee5428ca9f1935924fe7332 Mon Sep 17 00:00:00 2001 From: Mark Poliakov Date: Thu, 30 Apr 2026 16:23:39 +0300 Subject: [PATCH] Initial commit --- .gitignore | 1 + Cargo.lock | 278 ++++++++++++++++++++++++ Cargo.toml | 10 + src/compile/block.rs | 342 ++++++++++++++++++++++++++++++ src/compile/error.rs | 7 + src/compile/function.rs | 40 ++++ src/compile/mod.rs | 11 + src/compile/module.rs | 84 ++++++++ src/compile/syntax.rs | 264 +++++++++++++++++++++++ src/compile/value.rs | 31 +++ src/error.rs | 9 + src/lib.rs | 6 + src/main.rs | 52 +++++ src/parse.rs | 407 +++++++++++++++++++++++++++++++++++ src/vm/instruction.rs | 120 +++++++++++ src/vm/loader.rs | 1 + src/vm/machine.rs | 459 ++++++++++++++++++++++++++++++++++++++++ src/vm/mod.rs | 8 + src/vm/module.rs | 188 ++++++++++++++++ src/vm/pool.rs | 50 +++++ src/vm/prelude.rs | 3 + src/vm/stack.rs | 63 ++++++ src/vm/value.rs | 176 +++++++++++++++ 23 files changed, 2610 insertions(+) create mode 100644 .gitignore create mode 100644 Cargo.lock create mode 100644 Cargo.toml create mode 100644 src/compile/block.rs create mode 100644 src/compile/error.rs create mode 100644 src/compile/function.rs create mode 100644 src/compile/mod.rs create mode 100644 src/compile/module.rs create mode 100644 src/compile/syntax.rs create mode 100644 src/compile/value.rs create mode 100644 src/error.rs create mode 100644 src/lib.rs create mode 100644 src/main.rs create mode 100644 src/parse.rs create mode 100644 src/vm/instruction.rs create mode 100644 src/vm/loader.rs create mode 100644 src/vm/machine.rs create mode 100644 src/vm/mod.rs create mode 100644 src/vm/module.rs create mode 100644 src/vm/pool.rs create mode 100644 src/vm/prelude.rs create mode 100644 src/vm/stack.rs create mode 100644 src/vm/value.rs diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ea8c4bf --- /dev/null +++ b/.gitignore @@ -0,0 +1 @@ +/target diff --git a/Cargo.lock b/Cargo.lock new file mode 100644 index 0000000..1ca0baa --- /dev/null +++ b/Cargo.lock @@ -0,0 +1,278 @@ +# This file is automatically @generated by Cargo. +# It is not intended for manual editing. +version = 4 + +[[package]] +name = "anstream" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d" +dependencies = [ + "anstyle", + "anstyle-parse", + "anstyle-query", + "anstyle-wincon", + "colorchoice", + "is_terminal_polyfill", + "utf8parse", +] + +[[package]] +name = "anstyle" +version = "1.0.14" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000" + +[[package]] +name = "anstyle-parse" +version = "1.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e" +dependencies = [ + "utf8parse", +] + +[[package]] +name = "anstyle-query" +version = "1.1.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc" +dependencies = [ + "windows-sys", +] + +[[package]] +name = "anstyle-wincon" +version = "3.0.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d" +dependencies = [ + "anstyle", + "once_cell_polyfill", + "windows-sys", +] + +[[package]] +name = "bitmatch" +version = "0.1.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "6a53e105d41966c9b4594b8e3b7cf8e81ae63cc83664880b049af8a11381a3ad" +dependencies = [ + "boolean_expression", + "proc-macro2", + "quote", + "syn 1.0.109", +] + +[[package]] +name = "boolean_expression" +version = "0.3.11" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7c33ef624481a2d2252fd352266c050e83203343d0884622f7ba09782abbfa83" +dependencies = [ + "itertools", + "smallvec", +] + +[[package]] +name = "clap" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51" +dependencies = [ + "clap_builder", + "clap_derive", +] + +[[package]] +name = "clap_builder" +version = "4.6.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f" +dependencies = [ + "anstream", + "anstyle", + "clap_lex", + "strsim", +] + +[[package]] +name = "clap_derive" +version = "4.6.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9" +dependencies = [ + "heck", + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "clap_lex" +version = "1.1.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9" + +[[package]] +name = "colorchoice" +version = "1.0.5" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570" + +[[package]] +name = "either" +version = "1.15.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719" + +[[package]] +name = "heck" +version = "0.5.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea" + +[[package]] +name = "is_terminal_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695" + +[[package]] +name = "itertools" +version = "0.9.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b" +dependencies = [ + "either", +] + +[[package]] +name = "lysp" +version = "0.1.0" +dependencies = [ + "bitmatch", + "clap", + "nom", + "thiserror", +] + +[[package]] +name = "memchr" +version = "2.8.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79" + +[[package]] +name = "nom" +version = "8.0.0" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405" +dependencies = [ + "memchr", +] + +[[package]] +name = "once_cell_polyfill" +version = "1.70.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe" + +[[package]] +name = "proc-macro2" +version = "1.0.106" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934" +dependencies = [ + "unicode-ident", +] + +[[package]] +name = "quote" +version = "1.0.45" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924" +dependencies = [ + "proc-macro2", +] + +[[package]] +name = "smallvec" +version = "1.15.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03" + +[[package]] +name = "strsim" +version = "0.11.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f" + +[[package]] +name = "syn" +version = "1.0.109" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "syn" +version = "2.0.117" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99" +dependencies = [ + "proc-macro2", + "quote", + "unicode-ident", +] + +[[package]] +name = "thiserror" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4" +dependencies = [ + "thiserror-impl", +] + +[[package]] +name = "thiserror-impl" +version = "2.0.18" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5" +dependencies = [ + "proc-macro2", + "quote", + "syn 2.0.117", +] + +[[package]] +name = "unicode-ident" +version = "1.0.24" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75" + +[[package]] +name = "utf8parse" +version = "0.2.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821" + +[[package]] +name = "windows-link" +version = "0.2.1" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5" + +[[package]] +name = "windows-sys" +version = "0.61.2" +source = "registry+https://github.com/rust-lang/crates.io-index" +checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc" +dependencies = [ + "windows-link", +] diff --git a/Cargo.toml b/Cargo.toml new file mode 100644 index 0000000..e18d2c3 --- /dev/null +++ b/Cargo.toml @@ -0,0 +1,10 @@ +[package] +name = "lysp" +version = "0.1.0" +edition = "2024" + +[dependencies] +thiserror = "2.0.18" +clap = { version = "4.6.1", features = ["derive"] } +nom = "8.0.0" +bitmatch = "0.1.1" diff --git a/src/compile/block.rs b/src/compile/block.rs new file mode 100644 index 0000000..1c0bfd0 --- /dev/null +++ b/src/compile/block.rs @@ -0,0 +1,342 @@ +use std::rc::Rc; + +use crate::{ + compile::{ + error::CompileError, + function::FunctionSignature, + module::CompilationModule, + syntax::{CallExpression, Expression, FunctionBody, LambdaExpression}, + value::{BuiltinFunction, CompileConstant, CompileValue}, + }, + vm::instruction::{Instruction, U}, +}; + +pub struct FunctionBlock { + pub(crate) instructions: Vec, + signature: FunctionSignature, +} + +pub struct LocalBlock<'a> { + // TODO local bindings + function: &'a mut FunctionBlock, + module: &'a mut CompilationModule, + parent: Option<&'a mut LocalBlock<'a>>, +} + +impl FunctionBlock { + pub fn new(signature: FunctionSignature) -> Self { + Self { + instructions: vec![], + signature, + } + } + + pub fn emit(&mut self, instruction: Instruction) { + eprintln!("emit {instruction:?}"); + self.instructions.push(instruction); + } + + pub fn compile_body( + &mut self, + module: &mut CompilationModule, + body: &FunctionBody, + ) -> Result<(), CompileError> { + let mut local = LocalBlock::root(self, module); + for statement in &body.head { + local.compile_statement(statement)?; + } + let value = local.compile_expression(&body.tail)?; + local.compile_return(value)?; + Ok(()) + } +} + +impl<'a> LocalBlock<'a> { + fn root(function: &'a mut FunctionBlock, module: &'a mut CompilationModule) -> Self { + Self { + function, + module, + parent: None, + } + } + + fn compile_push(&mut self, value: CompileValue) -> Result<(), CompileError> { + match value { + CompileValue::Nil => todo!(), + CompileValue::Boolean(_) => todo!(), + CompileValue::Integer(value) => { + // TODO signed/unsigned + if let Some(value) = U::new(value as u32) { + self.function.emit(Instruction::PushInteger(value)); + } else { + todo!() + } + } + CompileValue::Argument(index) => { + let Some(index) = U::new(index as u32) else { + todo!(); + }; + self.function.emit(Instruction::PushArgument(index)); + } + CompileValue::LocalFunction(index) => { + let value = self + .module + .constant(CompileConstant::LocalFunction(index))?; + self.function.emit(Instruction::PushConstant(value)); + } + // Already on stack + CompileValue::Stack => (), + } + + Ok(()) + } + + pub fn compile_return(&mut self, value: CompileValue) -> Result<(), CompileError> { + self.compile_push(value)?; + self.function.emit(Instruction::Return); + Ok(()) + } + + pub fn compile_statement(&mut self, expression: &Expression) -> Result<(), CompileError> { + todo!() + } + + fn compile_identifier(&mut self, identifier: &Rc) -> Result { + if let Some(_builtin) = BuiltinFunction::from_identifier(identifier) { + todo!("Illegal"); + } + if let Some(argument) = self.function.signature.argument(identifier) { + return Ok(CompileValue::Argument(argument)); + } + // TODO local bindings + todo!() + } + + fn compile_lambda(&mut self, lambda: &LambdaExpression) -> Result { + let index = self + .module + .compile_function(lambda.signature.clone(), &lambda.body, false)?; + Ok(CompileValue::LocalFunction(index)) + } + + fn compile_builtin_add(&mut self, args: &[Expression]) -> Result { + // TODO optimize literals + for arg in args.iter().rev() { + let arg = self.compile_expression(arg)?; + self.compile_push(arg)?; + } + let Some(count) = U::new(args.len() as u32) else { + todo!() + }; + self.function.emit(Instruction::Add(count)); + Ok(CompileValue::Stack) + } + + fn compile_builtin_sub(&mut self, args: &[Expression]) -> Result { + todo!() + } + + fn compile_call_builtin( + &mut self, + builtin: BuiltinFunction, + args: &[Expression], + ) -> Result { + match builtin { + BuiltinFunction::Add => self.compile_builtin_add(args), + BuiltinFunction::Sub => self.compile_builtin_sub(args), + } + } + + fn compile_call(&mut self, call: &CallExpression) -> Result { + eprintln!("compile_call({:?}, {:?})", &call.callee, &call.arguments); + match call.callee.as_ref() { + Expression::Identifier(identifier) + if let Some(builtin) = BuiltinFunction::from_identifier(identifier.as_ref()) => + { + self.compile_call_builtin(builtin, &call.arguments) + } + _ => { + // Push arguments in reverse order + let Some(count) = U::new(call.arguments.len() as u32) else { + todo!(); + }; + for arg in call.arguments.iter().rev() { + let arg = self.compile_expression(arg)?; + self.compile_push(arg)?; + } + let callee = self.compile_expression(&call.callee)?; + self.compile_push(callee)?; + self.function.emit(Instruction::Call(count)); + Ok(CompileValue::Stack) + } + } + } + + pub fn compile_expression( + &mut self, + expression: &Expression, + ) -> Result { + match expression { + Expression::Nil => Ok(CompileValue::Nil), + Expression::BooleanLiteral(value) => Ok(CompileValue::Boolean(*value)), + Expression::IntegerLiteral(value) => Ok(CompileValue::Integer(*value)), + Expression::Identifier(identifier) => self.compile_identifier(identifier), + Expression::Lambda(lambda) => self.compile_lambda(lambda), + Expression::Call(call) => self.compile_call(call), + } + } +} + +#[cfg(test)] +mod tests { + use std::rc::Rc; + + use crate::{ + compile::{ + block::{FunctionBlock, LocalBlock}, + function::FunctionSignature, + module::CompilationModule, + syntax::{CallExpression, Expression, FunctionBody, LambdaExpression}, + value::CompileValue, + }, + vm::instruction::{Instruction, U}, + }; + + fn test_compile(expression: &Expression) -> (CompilationModule, FunctionBlock, CompileValue) { + let mut module = CompilationModule::default(); + let mut function = FunctionBlock { + signature: FunctionSignature { + required_arguments: vec!["arg0".into()], + optional_arguments: vec!["arg1".into()], + rest_argument: Some("arg2".into()), + }, + instructions: vec![], + }; + let mut local = LocalBlock::root(&mut function, &mut module); + let value = local.compile_expression(expression).unwrap(); + (module, function, value) + } + + #[test] + fn test_identity_compile() { + let (_, f, v) = test_compile(&Expression::IntegerLiteral(1)); + assert!(f.instructions.is_empty()); + assert_eq!(v, CompileValue::Integer(1)); + } + + #[test] + fn test_compile_lambda_returning_lambda() { + // ( ((lambda () (lambda (a b) (+ a b)))) 1 2 ) + let (m, f, v) = test_compile(&Expression::Call(CallExpression { + callee: Rc::new(Expression::Call(CallExpression { + callee: Rc::new(Expression::Lambda(LambdaExpression { + signature: FunctionSignature { + required_arguments: vec![], + optional_arguments: vec![], + rest_argument: None, + }, + body: FunctionBody { + head: vec![], + tail: Rc::new(Expression::Lambda(LambdaExpression { + signature: FunctionSignature { + required_arguments: vec!["a".into(), "b".into()], + optional_arguments: vec![], + rest_argument: None, + }, + body: FunctionBody { + head: vec![], + tail: Rc::new(Expression::Call(CallExpression { + callee: Rc::new(Expression::Identifier("+".into())), + arguments: vec![ + Expression::Identifier("a".into()), + Expression::Identifier("b".into()), + ], + })), + }, + })), + }, + })), + arguments: vec![], + })), + arguments: vec![Expression::IntegerLiteral(1), Expression::IntegerLiteral(2)], + })); + assert_eq!( + &f.instructions[..], + &[ + Instruction::PushInteger(U::truncate(2)), + Instruction::PushInteger(U::truncate(1)), + Instruction::PushConstant(U::truncate(2)), + Instruction::Call(U::truncate(0)), + Instruction::Call(U::truncate(2)), + ] + ); + assert_eq!( + &m.local_functions.get(&0).unwrap().instructions[..], + &[ + Instruction::PushConstant(U::truncate(1)), + Instruction::Return + ] + ); + assert_eq!( + &m.local_functions.get(&1).unwrap().instructions[..], + &[ + Instruction::PushArgument(U::truncate(1)), + Instruction::PushArgument(U::truncate(0)), + Instruction::Add(U::truncate(2)), + Instruction::Return + ] + ); + assert_eq!(v, CompileValue::Stack); + } + + #[test] + fn test_compile_lambda() { + // (+ ((lambda (a) (+ a 1))) 2) + let (m, f, v) = test_compile(&Expression::Call(CallExpression { + callee: Rc::new(Expression::Identifier("+".into())), + arguments: vec![ + Expression::Call(CallExpression { + callee: Expression::Lambda(LambdaExpression { + signature: FunctionSignature { + required_arguments: vec!["a".into()], + optional_arguments: vec![], + rest_argument: None, + }, + body: FunctionBody { + head: vec![], + tail: Rc::new(Expression::Call(CallExpression { + callee: Rc::new(Expression::Identifier("+".into())), + arguments: vec![ + Expression::Identifier("a".into()), + Expression::IntegerLiteral(1), + ], + })), + }, + }) + .into(), + arguments: vec![], + }), + Expression::IntegerLiteral(2), + ], + })); + assert_eq!(v, CompileValue::Stack); + assert_eq!( + &m.local_functions.get(&0).unwrap().instructions[..], + &[ + Instruction::PushInteger(U::truncate(1)), + Instruction::PushArgument(U::truncate(0)), + Instruction::Add(U::truncate(2)), + Instruction::Return + ] + ); + assert_eq!( + &f.instructions[..], + &[ + Instruction::PushInteger(U::truncate(2)), + Instruction::PushConstant(U::truncate(1)), + Instruction::Call(U::truncate(0)), + Instruction::Add(U::truncate(2)) + ] + ); + } +} diff --git a/src/compile/error.rs b/src/compile/error.rs new file mode 100644 index 0000000..b1e56bd --- /dev/null +++ b/src/compile/error.rs @@ -0,0 +1,7 @@ +use crate::compile::syntax::ParseError; + +#[derive(Debug, thiserror::Error)] +pub enum CompileError { + #[error("parse error: {0}")] + Parse(#[from] ParseError), +} diff --git a/src/compile/function.rs b/src/compile/function.rs new file mode 100644 index 0000000..058f008 --- /dev/null +++ b/src/compile/function.rs @@ -0,0 +1,40 @@ +use std::rc::Rc; + +#[derive(Debug, Clone, PartialEq)] +pub struct FunctionSignature { + pub required_arguments: Vec>, + pub optional_arguments: Vec>, + pub rest_argument: Option>, +} + +impl FunctionSignature { + pub const EMPTY: Self = Self { + required_arguments: vec![], + optional_arguments: vec![], + rest_argument: None, + }; + + pub fn argument(&self, name: &str) -> Option { + if let Some(index) = self + .required_arguments + .iter() + .position(|a| a.as_ref() == name) + { + Some(index) + } else if let Some(index) = self + .optional_arguments + .iter() + .position(|a| a.as_ref() == name) + { + Some(index + self.required_arguments.len()) + } else if self + .rest_argument + .as_ref() + .is_some_and(|a| a.as_ref() == name) + { + Some(self.required_arguments.len() + self.optional_arguments.len()) + } else { + todo!() + } + } +} diff --git a/src/compile/mod.rs b/src/compile/mod.rs new file mode 100644 index 0000000..12dddd8 --- /dev/null +++ b/src/compile/mod.rs @@ -0,0 +1,11 @@ +mod block; +mod error; +mod function; +mod module; +mod syntax; +mod value; + +pub use error::CompileError; +pub use function::FunctionSignature; +pub use module::CompilationModule; +pub use syntax::{CallExpression, Expression, FunctionBody, LambdaExpression}; diff --git a/src/compile/module.rs b/src/compile/module.rs new file mode 100644 index 0000000..6e70a7a --- /dev/null +++ b/src/compile/module.rs @@ -0,0 +1,84 @@ +use std::collections::HashMap; + +use crate::{ + compile::{ + block::FunctionBlock, error::CompileError, function::FunctionSignature, + syntax::FunctionBody, value::CompileConstant, + }, + vm::{ + instruction::ConstantId, + module::{Module, ModuleConstant}, + pool::Pool, + }, +}; + +#[derive(Default)] +pub struct CompilationModule { + constant_pool: Pool, + pub(crate) local_functions: HashMap, + local_function_index: u32, + root: Option, +} + +impl CompilationModule { + pub fn constant(&mut self, value: CompileConstant) -> Result { + match self.constant_pool.key(value) { + Some(key) => Ok(key), + None => todo!(), + } + } + + pub fn compile_function( + &mut self, + signature: FunctionSignature, + body: &FunctionBody, + root: bool, + ) -> Result { + let index = self.local_function_index; + if root && self.root.is_some() { + todo!() + } + self.local_function_index += 1; + let mut function = FunctionBlock::new(signature); + function.compile_body(self, body)?; + self.local_functions.insert(index, function); + if root { + self.root = Some(index); + } + Ok(index) + } + + pub fn compile_module(self) -> Result { + // Emit all function code first + let mut function_offsets = HashMap::new(); + let mut instructions = vec![]; + let root = self.root.unwrap(); + for (index, function) in self.local_functions.into_iter() { + function_offsets.insert(index, instructions.len()); + instructions.extend(function.instructions.into_iter().map(u32::from)); + } + let entry = *function_offsets.get(&root).unwrap(); + let constants = self + .constant_pool + .into_iter() + .map(|(value, key)| { + ( + key, + match value { + CompileConstant::Integer(value) => ModuleConstant::Integer(value), + CompileConstant::LocalFunction(index) => { + let address = *function_offsets.get(&index).unwrap(); + ModuleConstant::LocalFunction(address) + } + }, + ) + }) + .collect(); + + Ok(Module { + constants, + instructions, + entry, + }) + } +} diff --git a/src/compile/syntax.rs b/src/compile/syntax.rs new file mode 100644 index 0000000..1b1e971 --- /dev/null +++ b/src/compile/syntax.rs @@ -0,0 +1,264 @@ +use std::rc::Rc; + +use crate::{ + compile::function::FunctionSignature, + vm::value::{ConsCell, Keyword, Value}, +}; + +#[derive(Debug, thiserror::Error)] +pub enum ParseError { + #[error("Non-expression value")] + NonExpressionValue, + #[error("Expected argument list: {0}")] + ExpectedArgumentList(Value), +} + +#[derive(Debug, PartialEq)] +pub struct FunctionBody { + pub head: Vec, + pub tail: Rc, +} + +#[derive(Debug, PartialEq)] +pub struct LambdaExpression { + pub signature: FunctionSignature, + pub body: FunctionBody, +} + +#[derive(Debug, PartialEq)] +pub struct CallExpression { + pub callee: Rc, + pub arguments: Vec, +} + +#[derive(Debug, PartialEq)] +pub enum Expression { + Nil, + BooleanLiteral(bool), + IntegerLiteral(i64), + Identifier(Rc), + Lambda(LambdaExpression), + Call(CallExpression), +} + +impl FunctionSignature { + fn parse(mut value: &Value) -> Result { + enum Mode { + Required, + Optional, + Rest, + } + + let mut required_arguments = vec![]; + let mut optional_arguments = vec![]; + let mut rest_argument = None; + let mut mode = Mode::Required; + + while !value.is_nil() { + let Value::Cons(cons) = value else { + todo!(); + }; + let ConsCell(car, cdr) = cons.as_ref(); + + match (&mode, car) { + (Mode::Required, Value::Identifier(arg)) => { + required_arguments.push(arg.clone()); + } + (Mode::Optional, Value::Identifier(arg)) => { + optional_arguments.push(arg.clone()); + } + (Mode::Rest, Value::Identifier(arg)) => { + if rest_argument.is_some() { + todo!(); + } + rest_argument = Some(arg.clone()); + } + (Mode::Required, Value::Keyword(Keyword::Optional)) => { + mode = Mode::Optional; + } + (Mode::Required, Value::Keyword(Keyword::Rest)) => { + mode = Mode::Rest; + } + (Mode::Optional, Value::Keyword(Keyword::Rest)) => { + if optional_arguments.is_empty() { + todo!(); + } + mode = Mode::Rest; + } + _ => todo!(), + } + + value = cdr; + } + + match mode { + Mode::Required => (), + Mode::Optional => { + if optional_arguments.is_empty() { + todo!() + } + } + Mode::Rest => { + if rest_argument.is_none() { + todo!() + } + } + } + + Ok(Self { + required_arguments, + optional_arguments, + rest_argument, + }) + } +} + +impl FunctionBody { + fn parse(mut value: &Value) -> Result { + let mut expressions = vec![]; + while !value.is_nil() { + let Value::Cons(cons) = value else { todo!() }; + let ConsCell(car, cdr) = cons.as_ref(); + + let expression = Expression::parse(car)?; + expressions.push(expression); + + value = cdr; + } + let Some(tail) = expressions.pop() else { + todo!() + }; + Ok(Self { + head: expressions, + tail: tail.into(), + }) + } +} + +impl LambdaExpression { + fn parse(value: &ConsCell) -> Result { + let ConsCell(car, cdr) = value; + let signature = FunctionSignature::parse(car)?; + let body = FunctionBody::parse(cdr)?; + Ok(Self { signature, body }) + } +} + +impl Expression { + fn parse_cons(value: &ConsCell) -> Result { + let ConsCell(car, cdr) = value; + match car { + Value::Keyword(Keyword::Lambda) => { + let Value::Cons(cdr) = cdr else { + todo!(); + }; + LambdaExpression::parse(cdr).map(Self::Lambda) + } + _ => { + let callee = Expression::parse(car)?; + + let mut arguments = vec![]; + let mut list = cdr; + while !list.is_nil() { + let Value::Cons(cons) = list else { + todo!(); + }; + let ConsCell(car, cdr) = cons.as_ref(); + + let expression = Expression::parse(car)?; + arguments.push(expression); + + list = cdr; + } + + Ok(Self::Call(CallExpression { + callee: callee.into(), + arguments, + })) + } + } + } + + pub fn parse(value: &Value) -> Result { + match value { + Value::Nil => Ok(Self::Nil), + Value::Boolean(value) => Ok(Self::BooleanLiteral(*value)), + Value::Integer(value) => Ok(Self::IntegerLiteral(*value)), + Value::Identifier(value) => Ok(Self::Identifier(value.clone())), + Value::Cons(cons) => Self::parse_cons(cons), + _ => Err(ParseError::NonExpressionValue), + } + } +} + +#[cfg(test)] +mod tests { + use crate::{ + compile::{ + function::FunctionSignature, + syntax::{CallExpression, Expression, FunctionBody, LambdaExpression}, + }, + vm::value::{Keyword, Value}, + }; + + #[test] + fn test_parse_basic() { + let v = Value::Nil; + let e = Expression::parse(&v).unwrap(); + assert_eq!(e, Expression::Nil); + + let v = Value::Integer(1234); + let e = Expression::parse(&v).unwrap(); + assert_eq!(e, Expression::IntegerLiteral(1234)); + + let v = Value::Boolean(false); + let e = Expression::parse(&v).unwrap(); + assert_eq!(e, Expression::BooleanLiteral(false)); + + let v = Value::Identifier("a".into()); + let e = Expression::parse(&v).unwrap(); + assert_eq!(e, Expression::Identifier("a".into())); + } + + #[test] + fn test_parse_lambda() { + let args = Value::list_or_nil([ + Value::Identifier("a".into()), + Value::Keyword(Keyword::Optional), + Value::Identifier("b".into()), + Value::Keyword(Keyword::Rest), + Value::Identifier("c".into()), + ]); + let body = Value::list_or_nil([ + Value::Identifier("+".into()), + Value::Identifier("a".into()), + Value::Integer(1), + ]); + let lambda = Value::Keyword(Keyword::Lambda).cons(args.cons(body.cons(Value::Nil))); + let expr = Expression::parse(&lambda).unwrap(); + + assert_eq!( + expr, + Expression::Lambda(LambdaExpression { + signature: FunctionSignature { + required_arguments: vec!["a".into()], + optional_arguments: vec!["b".into()], + rest_argument: Some("c".into()) + }, + body: FunctionBody { + head: vec![], + tail: Expression::Call(CallExpression { + callee: Expression::Identifier("+".into()).into(), + arguments: vec![ + Expression::Identifier("a".into()), + Expression::IntegerLiteral(1) + ] + }) + .into() + } + }) + ); + + let lambda = Value::list_or_nil([Value::Keyword(())]); + } +} diff --git a/src/compile/value.rs b/src/compile/value.rs new file mode 100644 index 0000000..95b6cc9 --- /dev/null +++ b/src/compile/value.rs @@ -0,0 +1,31 @@ +#[derive(Debug, PartialEq)] +pub enum CompileValue { + Integer(i64), + Boolean(bool), + LocalFunction(u32), + Argument(usize), + Stack, + Nil, +} + +#[derive(Debug, Hash, PartialEq, Eq)] +pub enum CompileConstant { + Integer(i64), + LocalFunction(u32), +} + +#[derive(Debug, PartialEq, Clone, Copy)] +pub enum BuiltinFunction { + Add, + Sub, +} + +impl BuiltinFunction { + pub fn from_identifier(identifier: &str) -> Option { + match identifier { + "+" => Some(Self::Add), + "-" => Some(Self::Sub), + _ => None, + } + } +} diff --git a/src/error.rs b/src/error.rs new file mode 100644 index 0000000..d55c0b1 --- /dev/null +++ b/src/error.rs @@ -0,0 +1,9 @@ +use crate::{compile::CompileError, vm::machine::MachineError}; + +#[derive(Debug, thiserror::Error)] +pub enum EvalError { + #[error("machine error: {0}")] + Machine(#[from] MachineError), + #[error("compilation error: {0}")] + Compile(#[from] CompileError), +} diff --git a/src/lib.rs b/src/lib.rs new file mode 100644 index 0000000..2e589d9 --- /dev/null +++ b/src/lib.rs @@ -0,0 +1,6 @@ +#![feature(coverage_attribute)] + +pub mod compile; +pub mod error; +pub mod parse; +pub mod vm; diff --git a/src/main.rs b/src/main.rs new file mode 100644 index 0000000..0441290 --- /dev/null +++ b/src/main.rs @@ -0,0 +1,52 @@ +use std::io::stdin; + +use lysp::{parse::parse_value, vm::machine::Machine}; + +fn main() { + let mut vm = Machine::default(); + + let mut input = String::new(); + let stdin = stdin(); + loop { + let len = stdin.read_line(&mut input).unwrap(); + if len == 0 { + break; + } + + let mut i = input.trim_start(); + while !i.is_empty() { + let result = parse_value(i); + let (o, value) = match result { + Ok(r) => r, + Err(nom::Err::Incomplete(_)) => { + break; + } + Err(error) => { + eprintln!("Syntax error: {error}"); + i = ""; + break; + } + }; + + let result = vm.eval_value(&value); + let result = match result { + Ok(r) => r, + Err(error) => { + eprintln!("Error in expression:"); + eprintln!(); + eprintln!(" {value}:"); + eprintln!(); + eprintln!(":: {error}"); + i = ""; + break; + } + }; + + println!("{result}"); + + i = o.trim_start(); + } + + input = i.into(); + } +} diff --git a/src/parse.rs b/src/parse.rs new file mode 100644 index 0000000..9338a1f --- /dev/null +++ b/src/parse.rs @@ -0,0 +1,407 @@ +use nom::{ + FindToken, IResult, Input, Parser, + branch::alt, + bytes::streaming::tag, + character::{ + anychar, + streaming::{char, multispace0, one_of}, + }, + combinator::{map, map_res, opt, recognize, value}, + error::{Error, ErrorKind, FromExternalError, ParseError}, + multi::{fold_many1, many0}, + sequence::{delimited, preceded}, +}; + +use crate::vm::value::{Keyword, Value}; + +struct IdentifierHead; +struct IdentifierTail; + +impl FindToken for IdentifierHead { + fn find_token(&self, token: char) -> bool { + token.is_alphabetic() || "~!@$%^&*-=_+<>?/".contains(token) + } +} + +impl FindToken for IdentifierTail { + fn find_token(&self, token: char) -> bool { + token.is_alphanumeric() || "~!@$%^&*-=_+<>?/".contains(token) + } +} + +fn parse_sign(input: &str) -> IResult<&str, bool> { + map( + opt(alt((value(true, char('-')), value(false, char('+'))))), + |sign| sign.unwrap_or(false), + ) + .parse(input) +} + +fn parse_hex_digit(input: &str) -> IResult<&str, char> { + let (tail, ch) = anychar(input)?; + match ch { + '0'..='9' | 'a'..='f' | 'A'..='F' => Ok((tail, ch)), + _ if ch.is_alphabetic() => Err(nom::Err::Failure(Error::from_char(input, ch))), + _ => Err(nom::Err::Error(Error::from_char(input, ch))), + } +} + +fn parse_dec_digit(input: &str) -> IResult<&str, char> { + let (tail, ch) = anychar(input)?; + match ch { + '0'..='9' => Ok((tail, ch)), + _ if ch.is_alphabetic() => Err(nom::Err::Failure(Error::from_char(input, ch))), + _ => Err(nom::Err::Error(Error::from_char(input, ch))), + } +} + +fn parse_oct_digit(input: &str) -> IResult<&str, char> { + let (tail, ch) = anychar(input)?; + match ch { + '0'..='7' => Ok((tail, ch)), + _ if ch.is_alphanumeric() => Err(nom::Err::Failure(Error::from_char(input, ch))), + _ => Err(nom::Err::Error(Error::from_char(input, ch))), + } +} + +struct OverflowError; + +fn parse_integer_inner(input: I, prefix: P, radix: u64, digit: D) -> IResult +where + I: Input, + E: ParseError, + E: FromExternalError, + P: Parser, + D: Parser, +{ + #[coverage(off)] + fn digit_to_u8(ch: char) -> u8 { + match ch { + '0'..='9' => ch as u8 - b'0', + 'a'..='f' => ch as u8 - b'a' + 10, + 'A'..='F' => ch as u8 - b'A' + 10, + _ => unreachable!(), + } + } + + let (output, result) = preceded( + prefix, + fold_many1( + digit, + || Ok(0u64), + move |acc, ch| match acc { + Ok(acc) + if let Some(acc) = acc + .checked_mul(radix) + .and_then(|acc| acc.checked_add(digit_to_u8(ch) as u64)) => + { + Ok(acc) + } + Ok(_) => Err(OverflowError), + Err(e) => Err(e), + }, + ), + ) + .parse(input.clone())?; + + match result { + Ok(result) => Ok((output, result)), + Err(OverflowError) => Err(nom::Err::Failure(E::from_external_error( + input, + ErrorKind::Fold, + OverflowError, + ))), + } +} + +fn check_decimal(input: &str) -> IResult<&str, ()> { + let (_, ch) = anychar(input)?; + if ch.is_ascii_digit() { + Ok((input, ())) + } else { + Err(nom::Err::Error(Error::from_char(input, ch))) + } +} + +fn parse_integer_oct(input: &str) -> IResult<&str, u64> { + parse_integer_inner(input, alt((tag("0o"), tag("0O"))), 8, parse_oct_digit) +} + +fn parse_integer_dec(input: &str) -> IResult<&str, u64> { + parse_integer_inner(input, check_decimal, 10, parse_dec_digit) +} + +fn parse_integer_hex(input: &str) -> IResult<&str, u64> { + parse_integer_inner(input, alt((tag("0x"), tag("0X"))), 16, parse_hex_digit) +} + +fn parse_integer(input: &str) -> IResult<&str, Value> { + map_res( + ( + parse_sign, + alt((parse_integer_hex, parse_integer_oct, parse_integer_dec)), + ), + |(minus, value)| { + i64::try_from(value).map(|value| Value::Integer(if minus { -value } else { value })) + }, + ) + .parse(input) +} + +fn parse_identifier(input: &str) -> IResult<&str, &str> { + recognize(preceded( + one_of(IdentifierHead), + many0(one_of(IdentifierTail)), + )) + .parse(input) +} + +fn parse_identifier_or_keyword_or_nil(input: &str) -> IResult<&str, Value> { + map(parse_identifier, |ident| match ident { + "NIL" | "nil" => Value::Nil, + "lambda" => Value::Keyword(Keyword::Lambda), + "defun" => Value::Keyword(Keyword::Defun), + "&optional" => Value::Keyword(Keyword::Optional), + "&rest" => Value::Keyword(Keyword::Rest), + _ => Value::Identifier(ident.into()), + }) + .parse(input) +} + +fn parse_list_or_nil(input: &str) -> IResult<&str, Value> { + map( + delimited( + char('('), + many0(preceded(multispace0, parse_value)), + preceded(multispace0, char(')')), + ), + Value::list_or_nil, + ) + .parse(input) +} + +fn parse_boolean(input: &str) -> IResult<&str, Value> { + map( + alt(( + value(true, tag("#t")), + value(true, tag("#T")), + value(false, tag("#f")), + value(false, tag("#F")), + )), + Value::Boolean, + ) + .parse(input) +} + +pub fn parse_value(input: &str) -> IResult<&str, Value> { + alt(( + parse_list_or_nil, + parse_boolean, + parse_integer, + parse_identifier_or_keyword_or_nil, + )) + .parse(input) +} + +#[cfg(test)] +mod tests { + use nom::error::{Error, FromExternalError, ParseError}; + + use crate::{ + parse::{ + OverflowError, parse_boolean, parse_identifier, parse_identifier_or_keyword_or_nil, + parse_integer, parse_integer_dec, parse_integer_hex, parse_integer_oct, + parse_list_or_nil, parse_value, + }, + vm::value::{Keyword, Value}, + }; + + #[test] + fn test_integer_dec() { + let (r, v) = parse_integer_dec("1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, 1234); + let (r, v) = parse_integer_dec("18446744073709551615\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, u64::MAX); + + // illegal digit + let e = parse_integer_dec("1234A").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("A", 'A'))); + + // overflow + let e = parse_integer_dec("9999999999999999999999999").unwrap_err(); + assert_eq!( + e, + nom::Err::Failure(Error::from_external_error( + "9999999999999999999999999", + nom::error::ErrorKind::Fold, + OverflowError + )) + ); + } + + #[test] + fn test_integer_oct() { + let (r, v) = parse_integer_oct("0o1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, 0o1234); + let (r, v) = parse_integer_oct("0O1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, 0o1234); + + // illegal digit + let e = parse_integer_oct("0o12349").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("9", '9'))); + } + + #[test] + fn test_integer_hex() { + let (r, v) = parse_integer_hex("0x123Fa4\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, 0x123FA4); + let (r, v) = parse_integer_hex("0X123Fa4\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, 0x123FA4); + + // illegal digit + let e = parse_integer_hex("0x1AG").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("G", 'G'))); + } + + #[test] + fn test_integer() { + // Dec path + let (r, v) = parse_integer("1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(1234)); + let (r, v) = parse_integer("+1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(1234)); + let (r, v) = parse_integer("-1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(-1234)); + // Oct path + let (r, v) = parse_integer("0o1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(0o1234)); + let (r, v) = parse_integer("+0O1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(0o1234)); + let (r, v) = parse_integer("-0o1234\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(-0o1234)); + // Hex path + let (r, v) = parse_integer("0x1234AF\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(0x1234AF)); + let (r, v) = parse_integer("+0X1234AF\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(0x1234AF)); + let (r, v) = parse_integer("-0x1234AF\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(-0x1234AF)); + + // Illegal path + let e = parse_integer("0x12V34").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("V34", 'V'))); + let e = parse_integer("-0X12V34").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("V34", 'V'))); + let e = parse_integer("-0o81").unwrap_err(); + assert_eq!(e, nom::Err::Failure(Error::from_char("81", '8'))); + } + + #[test] + fn test_identifier() { + let (r, v) = parse_identifier("+\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, "+"); + let (r, v) = parse_identifier("a-1\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, "a-1"); + } + + #[test] + fn test_identifier_or_keyword_or_nil() { + let (r, v) = parse_identifier_or_keyword_or_nil("+\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Identifier("+".into())); + let (r, v) = parse_identifier_or_keyword_or_nil("lambda\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Keyword(Keyword::Lambda)); + let (r, v) = parse_identifier_or_keyword_or_nil("NIL\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Nil); + } + + #[test] + fn test_list_or_nil() { + let (r, v) = parse_list_or_nil("()\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Nil); + let (r, v) = parse_list_or_nil("( )\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Nil); + let (r, v) = parse_list_or_nil("( a b -1 )\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!( + v, + Value::list_or_nil([ + Value::Identifier("a".into()), + Value::Identifier("b".into()), + Value::Integer(-1), + ]) + ); + } + + #[test] + fn test_boolean() { + let (r, v) = parse_boolean("#t\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Boolean(true)); + let (r, v) = parse_boolean("#T\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Boolean(true)); + let (r, v) = parse_boolean("#F\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Boolean(false)); + let (r, v) = parse_boolean("#F\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Boolean(false)); + } + + #[test] + fn test_value() { + let (r, v) = parse_value("+123\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(123)); + let (r, v) = parse_value("-0x123\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Integer(-0x123)); + let (r, v) = parse_value("abcdef-ghijkl\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Identifier("abcdef-ghijkl".into())); + let (r, v) = parse_value("+\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Identifier("+".into())); + let (r, v) = parse_value("+x\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Identifier("+x".into())); + let (r, v) = parse_value("lambda-\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Identifier("lambda-".into())); + let (r, v) = parse_value("lambda\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!(v, Value::Keyword(Keyword::Lambda)); + let (r, v) = parse_value("(f #T -0x1)\n").unwrap(); + assert_eq!(r, "\n"); + assert_eq!( + v, + Value::list_or_nil([ + Value::Identifier("f".into()), + Value::Boolean(true), + Value::Integer(-0x1) + ]) + ); + } +} diff --git a/src/vm/instruction.rs b/src/vm/instruction.rs new file mode 100644 index 0000000..b141b33 --- /dev/null +++ b/src/vm/instruction.rs @@ -0,0 +1,120 @@ +use std::fmt; + +use bitmatch::bitmatch; + +#[derive(Debug, thiserror::Error)] +pub enum InstructionError { + #[error("invalid instruction")] + Invalid, +} + +#[derive(Debug, thiserror::Error)] +pub enum InstructionEncodeError {} + +#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)] +pub struct U(u32); + +#[derive(Debug, Clone, Copy, PartialEq)] +pub enum Instruction { + PushNil, + PushInteger(U<24>), + PushBool(bool), + PushConstant(ConstantId), + PushArgument(U<6>), + SetGlobal, + GetGlobal, + Call(U<6>), + Return, + Add(U<6>), + Sub(U<6>), +} + +pub type ConstantId = U<24>; + +impl U { + pub const BITS: usize = N; + pub const ZERO: Self = Self(0); + + pub fn sign_extend_i64(&self) -> i64 { + if self.0 & (1 << N) != 0 { + todo!() + } else { + self.0 as i64 + } + } + + pub const fn new(value: u32) -> Option { + if value >= (1u32 << N) { + None + } else { + Some(Self(value)) + } + } + + pub const fn truncate(value: u32) -> Self { + Self(value & ((1 << N) - 1)) + } + + pub const unsafe fn new_unchecked(value: u32) -> Self { + Self(value) + } +} + +impl fmt::Debug for U { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Debug::fmt(&self.0, f) + } +} + +impl Into for U { + fn into(self) -> usize { + self.0 as usize + } +} + +impl Into for U { + fn into(self) -> u32 { + self.0 + } +} + +impl From for u32 { + fn from(instruction: Instruction) -> u32 { + match instruction { + Instruction::PushNil => 0b0000_0000_0000_0001, + Instruction::PushBool(value) => 0b0000_0000_0000_0010 | (value as u32), + Instruction::Return => 0b0000_0000_0000_0100, + Instruction::SetGlobal => 0b0000_0000_0000_0101, + Instruction::GetGlobal => 0b0000_0000_0000_0110, + Instruction::Add(count) => 0b0000_0001_0000_0000 | count.0, + Instruction::Sub(count) => 0b0000_0001_0100_0000 | count.0, + Instruction::Call(count) => 0b0000_0000_0100_0000 | count.0, + Instruction::PushInteger(value) => 0b0001_0000_0000_0000 | value.0, + Instruction::PushConstant(index) => 0b0010_0000_0000_0000 | index.0, + Instruction::PushArgument(index) => 0b0000_0000_1000_0000 | index.0, + } + } +} + +impl TryFrom for Instruction { + type Error = InstructionError; + + #[bitmatch] + fn try_from(value: u32) -> Result { + #[bitmatch] + match value { + "0000_0000_0000_0001" => Ok(Instruction::PushNil), + "0000_0000_0000_001x" => Ok(Instruction::PushBool(x != 0)), + "0000_0000_0000_0100" => Ok(Instruction::Return), + "0000_0000_0000_0101" => Ok(Instruction::SetGlobal), + "0000_0000_0000_0110" => Ok(Instruction::GetGlobal), + "0000_0000_01xx_xxxx" => Ok(Instruction::Call(U(x))), + "0000_0000_10xx_xxxx" => Ok(Instruction::PushArgument(U(x))), + "0000_0001_00xx_xxxx" => Ok(Instruction::Add(U(x))), + "0000_0001_01xx_xxxx" => Ok(Instruction::Sub(U(x))), + "0001_xxxx_xxxx_xxxx" => Ok(Instruction::PushInteger(U(x))), + "0010_xxxx_xxxx_xxxx" => Ok(Instruction::PushConstant(U(x))), + _ => todo!(), + } + } +} diff --git a/src/vm/loader.rs b/src/vm/loader.rs new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/src/vm/loader.rs @@ -0,0 +1 @@ + diff --git a/src/vm/machine.rs b/src/vm/machine.rs new file mode 100644 index 0000000..f7531df --- /dev/null +++ b/src/vm/machine.rs @@ -0,0 +1,459 @@ +use std::{collections::HashMap, fmt, rc::Rc}; + +use crate::{ + error::EvalError, + vm::{ + instruction::{ConstantId, Instruction, InstructionError}, + module::{Module, ModuleConstant, ModuleRef}, + stack::Stack, + value::{BytecodeFunction, NativeFunction, Value}, + }, +}; + +#[derive(Debug, thiserror::Error)] +pub enum MachineError { + #[error("Instruction error: {0}")] + Instruction(#[from] InstructionError), + #[error("Instruction pointer is undefined")] + UndefinedInstructionPointer, + #[error("Data stack underflowed")] + ValueStackUnderflow, + #[error("Data stack overflowed")] + ValueStackOverflow, + #[error("Call stack underflowed")] + CallStackUnderflow, + #[error("Call stack overflowed")] + CallStackOverflow, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct InstructionPointer { + pub module: ModuleRef, + pub address: usize, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct CallFrame { + arguments: Vec, + return_address: InstructionPointer, + event: ExecutionEvent, +} + +pub struct Machine { + globals: HashMap, Value>, + ip: Option, + value_stack: Stack, + call_stack: Stack, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum ExecutionEvent { + ModuleEntry(ModuleRef), + None, +} + +impl Default for Machine { + fn default() -> Self { + Self { + globals: Default::default(), + ip: None, + value_stack: Stack::new(1024), + call_stack: Stack::new(32), + } + } +} + +impl Machine { + fn pop(&mut self) -> Result { + self.value_stack + .pop() + .ok_or(MachineError::ValueStackUnderflow) + } + + fn push(&mut self, value: Value) -> Result<(), MachineError> { + self.value_stack + .push(value) + .map_err(|_| MachineError::ValueStackOverflow) + } + + fn execute_call(&mut self, count: usize) -> Result<(), MachineError> { + enum Callee { + Bytecode(BytecodeFunction), + Native(NativeFunction), + } + + let source_ip = self.ip.clone().unwrap(); + + let callee = self.pop()?; + let callee = match callee { + Value::BytecodeFunction(bytecode) => Callee::Bytecode(bytecode), + Value::NativeFunction(native) => Callee::Native(native), + _ => todo!(), + }; + let mut arguments = vec![]; + for _ in 0..count { + arguments.push(self.pop()?); + } + match callee { + Callee::Bytecode(bytecode) => { + let BytecodeFunction { module, address } = bytecode; + let frame = CallFrame { + arguments, + event: ExecutionEvent::None, + return_address: InstructionPointer { + module: source_ip.module, + address: source_ip.address + 1, + }, + }; + if self.call_stack.push(frame).is_err() { + return Err(MachineError::CallStackOverflow); + } + self.ip = Some(InstructionPointer { module, address }); + } + Callee::Native(native) => { + let result = native.apply(self, &arguments).unwrap(); + self.push(result)?; + self.ip = Some(InstructionPointer { + module: source_ip.module, + address: source_ip.address + 1, + }); + } + } + Ok(()) + } + + fn execute_return(&mut self) -> Result { + let ip = self.ip.clone().unwrap(); + if let Some(frame) = self.call_stack.pop() { + self.ip = Some(frame.return_address); + Ok(frame.event) + } else { + self.ip = None; + Ok(ExecutionEvent::ModuleEntry(ip.module)) + } + } + + fn execute_add(&mut self, count: usize) -> Result<(), MachineError> { + let mut accumulator = 0i64; + for _ in 0..count { + let arg = self.pop()?; + match arg { + Value::Integer(value) => { + accumulator = accumulator.wrapping_add(value); + } + _ => todo!("{arg:?}"), + } + } + self.push(Value::Integer(accumulator))?; + Ok(()) + } + + fn execute_sub(&mut self, count: usize) -> Result<(), MachineError> { + todo!() + } + + fn execute_push_constant(&mut self, index: ConstantId) -> Result<(), MachineError> { + let ip = self.ip.as_ref().unwrap(); + let constant = ip.module.constant(index).expect("TODO"); + let value = match constant { + ModuleConstant::LocalFunction(address) => Value::BytecodeFunction(BytecodeFunction { + module: ip.module.clone(), + address, + }), + ModuleConstant::Integer(value) => Value::Integer(value), + ModuleConstant::Identifier(identifier) => Value::Identifier(identifier), + }; + + self.push(value) + } + + fn execute_push_argument(&mut self, index: usize) -> Result<(), MachineError> { + let frame = self.call_stack.current().expect("valid call frame"); + let argument = frame.arguments.get(index); + match argument { + Some(arg) => self.push(arg.clone()), + None => self.push(Value::Nil), + } + } + + fn execute_get_global(&mut self) -> Result<(), MachineError> { + let ident = self.pop()?; + match ident { + Value::Identifier(ident) => { + let value = self.globals.get(&ident).cloned().unwrap(); + self.push(value) + } + _ => todo!(), + } + } + + fn execute_set_global(&mut self) -> Result<(), MachineError> { + let ident = self.pop()?; + let value = self.pop()?; + let Value::Identifier(ident) = ident else { + todo!(); + }; + self.globals.insert(ident, value); + self.push(Value::Nil)?; + Ok(()) + } + + pub fn set_global>>(&mut self, identifier: S, value: Value) { + self.globals.insert(identifier.into(), value); + } + + pub fn execute_next(&mut self) -> Result { + let ip = self.ip.clone().unwrap(); + let instruction = ip.module.instruction(ip.address).expect("TODO"); + let instruction = Instruction::try_from(instruction)?; + eprintln!("{ip}: {instruction:?}"); + let mut advance = true; + let mut event = ExecutionEvent::None; + match instruction { + Instruction::PushNil => { + self.push(Value::Nil)?; + } + Instruction::PushInteger(value) => { + self.push(Value::Integer(value.sign_extend_i64()))?; + } + Instruction::PushBool(value) => { + self.push(Value::Boolean(value))?; + } + Instruction::PushConstant(index) => { + self.execute_push_constant(index)?; + } + Instruction::PushArgument(index) => { + self.execute_push_argument(index.into())?; + } + Instruction::GetGlobal => { + self.execute_get_global()?; + } + Instruction::SetGlobal => { + self.execute_set_global()?; + } + Instruction::Return => { + advance = false; + event = self.execute_return()?; + } + Instruction::Call(count) => { + advance = false; + self.execute_call(count.into())?; + } + Instruction::Add(count) => { + self.execute_add(count.into())?; + } + Instruction::Sub(count) => { + self.execute_sub(count.into())?; + } + } + if advance { + self.ip = Some(InstructionPointer { + module: ip.module, + address: ip.address + 1, + }); + } + Ok(event) + } + + pub fn load_module(&mut self, module: Module) -> Result { + let module = ModuleRef::from(module); + let entry = module.entry(); + let entry_ip = InstructionPointer { + module: module.clone(), + address: entry, + }; + if let Some(ip) = self.ip.clone() + && self + .call_stack + .push(CallFrame { + arguments: vec![], + return_address: ip, + event: ExecutionEvent::ModuleEntry(module.clone()), + }) + .is_err() + { + return Err(MachineError::CallStackOverflow); + } + self.ip = Some(entry_ip); + Ok(module) + } + + pub fn eval_module(&mut self, module: Module) -> Result { + let module = self.load_module(module)?; + let expect = ExecutionEvent::ModuleEntry(module); + loop { + let event = self.execute_next()?; + if event == expect { + break; + } + } + let value = self.pop()?; + Ok(value) + } + + pub fn eval_value(&mut self, value: &Value) -> Result { + let module = Module::compile_value(value)?; + self.eval_module(module).map_err(EvalError::from) + } +} + +impl fmt::Display for InstructionPointer { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "{:p}:{}", self.module, self.address) + } +} + +#[cfg(test)] +mod tests { + use std::sync::atomic::{AtomicI64, Ordering}; + + use crate::vm::{ + instruction::{Instruction, U}, + machine::{InstructionPointer, Machine}, + module::{Module, ModuleBuilder, ModuleConstant, ModuleRef}, + value::{NativeFunction, Value}, + }; + + fn execute_all( + count: usize, + build: F, + prepare: G, + ) -> (Machine, Vec) { + let dummy = ModuleRef::from(Module::dummy()); + let mut machine = Machine { + ip: Some(InstructionPointer { + module: dummy, + address: 0, + }), + ..Default::default() + }; + prepare(&mut machine); + let mut values = vec![]; + for i in 0..count { + let mut builder = ModuleBuilder::new(); + builder.entry(0); + build(i as u32, &mut builder); + builder.add(Instruction::Return); + let module = builder.build(); + values.push(machine.eval_module(module).unwrap()); + } + (machine, values) + } + + #[test] + fn test_basic() { + let (m, vs) = execute_all( + 1, + |_, builder| { + let c0 = builder.constant(ModuleConstant::Integer(3)); + builder.add_all([ + Instruction::PushInteger(U::truncate(1)), + Instruction::PushInteger(U::truncate(2)), + Instruction::Add(U::truncate(2)), + Instruction::PushConstant(c0), + Instruction::Add(U::truncate(2)), + ]); + }, + |_| {}, + ); + assert!(m.value_stack.is_empty()); + assert!(m.call_stack.is_empty()); + assert_eq!(&vs, &[Value::Integer(6)]); + } + + #[test] + fn test_local_function_call() { + let (m, vs) = execute_all( + 1, + |_, builder| { + let c0 = builder.constant(ModuleConstant::LocalFunction(4)); + builder.add_all([ + // main + Instruction::PushInteger(U::truncate(34)), + Instruction::PushConstant(c0), + Instruction::Call(U::truncate(1)), + Instruction::Return, + // c0 + Instruction::PushArgument(U::truncate(0)), + Instruction::PushInteger(U::truncate(1200)), + Instruction::Add(U::truncate(2)), + Instruction::Return, + ]); + }, + |_| {}, + ); + assert!(m.value_stack.is_empty()); + assert!(m.call_stack.is_empty()); + assert_eq!(&vs, &[Value::Integer(1234)]); + } + + #[test] + fn test_cross_module_call() { + static NATIVE_STATE: AtomicI64 = AtomicI64::new(-1); + let (m, vs) = execute_all( + 2, + |id, builder| match id { + 1 => { + let c0 = builder.constant(ModuleConstant::LocalFunction(4)); + let c1 = builder.constant(ModuleConstant::Identifier("extern-function".into())); + builder.add_all([ + // main: (local 1) + Instruction::PushInteger(U::truncate(1)), + Instruction::PushConstant(c0), + Instruction::Call(U::truncate(1)), + Instruction::Return, + // module 0 local function + // (fn (a) (extern-function a 2)) + Instruction::PushInteger(U::truncate(2)), + Instruction::PushArgument(U::truncate(0)), + Instruction::PushConstant(c1), + Instruction::GetGlobal, + Instruction::Call(U::truncate(2)), + Instruction::Return, + ]); + } + 0 => { + let c0 = builder.constant(ModuleConstant::Integer(3)); + let c1 = builder.constant(ModuleConstant::Identifier("native".into())); + let c2 = builder.constant(ModuleConstant::LocalFunction(4)); + let c3 = builder.constant(ModuleConstant::Identifier("extern-function".into())); + builder.add_all([ + // main + Instruction::PushConstant(c2), + Instruction::PushConstant(c3), + Instruction::SetGlobal, + Instruction::Return, + // extern-function + // (fn (a b) (native 3 b a)) + Instruction::PushArgument(U::truncate(0)), + Instruction::PushArgument(U::truncate(1)), + Instruction::PushConstant(c0), + Instruction::PushConstant(c1), + Instruction::GetGlobal, + Instruction::Call(U::truncate(3)), + Instruction::Return, + ]); + } + _ => unreachable!(), + }, + |m| { + m.set_global( + "native", + Value::NativeFunction(NativeFunction::new("native", |_, args| { + assert_eq!(args.len(), 3); + assert_eq!( + &args, + &[Value::Integer(3), Value::Integer(2), Value::Integer(1)] + ); + NATIVE_STATE.store(4321, Ordering::Release); + Ok(Value::Integer(1234)) + })), + ); + }, + ); + assert!(m.value_stack.is_empty()); + assert!(m.call_stack.is_empty()); + assert_eq!(&vs, &[Value::Nil, Value::Integer(1234)]); + assert_eq!(NATIVE_STATE.load(Ordering::Acquire), 4321); + } +} diff --git a/src/vm/mod.rs b/src/vm/mod.rs new file mode 100644 index 0000000..7550456 --- /dev/null +++ b/src/vm/mod.rs @@ -0,0 +1,8 @@ +pub mod instruction; +pub mod loader; +pub mod machine; +pub mod module; +pub mod pool; +pub mod prelude; +pub mod stack; +pub mod value; diff --git a/src/vm/module.rs b/src/vm/module.rs new file mode 100644 index 0000000..c3bd1b5 --- /dev/null +++ b/src/vm/module.rs @@ -0,0 +1,188 @@ +use std::{collections::HashMap, fmt, ops::Deref, rc::Rc}; + +use crate::{ + compile::{CompilationModule, CompileError, Expression, FunctionBody, FunctionSignature}, + vm::{ + instruction::{ConstantId, Instruction}, + pool::Pool, + value::Value, + }, +}; + +#[derive(Debug, Clone, PartialEq, Eq, Hash)] +pub enum ModuleConstant { + Integer(i64), + LocalFunction(usize), + Identifier(Rc), +} + +#[derive(Clone)] +pub struct ModuleRef { + inner: Rc, +} + +pub struct Module { + pub constants: HashMap, + pub instructions: Vec, + pub entry: usize, +} + +#[derive(Default)] +pub struct ModuleBuilder { + constants: Pool, + instructions: Vec, + entry: Option, +} + +impl ModuleRef { + pub fn ptr_eq(a: &Self, b: &Self) -> bool { + Rc::ptr_eq(&a.inner, &b.inner) + } +} + +impl From for ModuleRef { + fn from(value: Module) -> Self { + Self { + inner: Rc::new(value), + } + } +} + +impl fmt::Debug for ModuleRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "Module({:p})", self.inner) + } +} + +impl fmt::Pointer for ModuleRef { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + fmt::Pointer::fmt(&self.inner, f) + } +} + +impl PartialEq for ModuleRef { + fn eq(&self, other: &Self) -> bool { + Rc::ptr_eq(&self.inner, &other.inner) + } +} + +impl Deref for ModuleRef { + type Target = Module; + + fn deref(&self) -> &Self::Target { + &self.inner + } +} + +#[cfg(test)] +impl Module { + pub fn dummy() -> Self { + Self { + constants: HashMap::new(), + instructions: vec![0], + entry: 0, + } + } +} + +impl Module { + pub fn instruction(&self, ip: usize) -> Option { + self.instructions.get(ip).copied() + } + + pub fn constant(&self, id: ConstantId) -> Option { + self.constants.get(&id).cloned() + } + + pub fn len(&self) -> usize { + self.instructions.len() + } + + pub fn is_empty(&self) -> bool { + self.instructions.is_empty() + } + + pub fn entry(&self) -> usize { + self.entry + } + + pub fn compile_value(value: &Value) -> Result { + let expression = Expression::parse(value)?; + let mut module = CompilationModule::default(); + module.compile_function( + FunctionSignature::EMPTY, + &FunctionBody { + head: vec![], + tail: Rc::new(expression), + }, + true, + )?; + module.compile_module() + } +} + +impl ModuleBuilder { + pub fn new() -> Self { + Self::default() + } + + pub fn build(self) -> Module { + Module { + constants: self.constants.into_map(), + instructions: self.instructions, + entry: self.entry.unwrap(), + } + } + + pub fn constant(&mut self, value: ModuleConstant) -> ConstantId { + self.constants.key(value).unwrap() + } + + pub fn add_all>(&mut self, insns: I) -> &mut Self { + self.instructions.extend(insns.into_iter().map(u32::from)); + self + } + + pub fn add(&mut self, instruction: Instruction) -> &mut Self { + self.instructions.push(instruction.into()); + self + } + + pub fn entry(&mut self, entry: usize) -> &mut Self { + self.entry = Some(entry); + self + } +} + +#[cfg(test)] +mod tests { + use std::collections::HashMap; + + use crate::vm::{ + instruction::{Instruction, U}, + module::{Module, ModuleConstant}, + value::{Keyword, Value}, + }; + + #[test] + fn test_compile_value_basic() { + let v = Value::list_or_nil([ + Value::Identifier("+".into()), + Value::Integer(1), + Value::Integer(2), + ]); + let m = Module::compile_value(&v).unwrap(); + assert!(m.constants.is_empty()); + let is = [ + Instruction::PushInteger(U::truncate(2)), + Instruction::PushInteger(U::truncate(1)), + Instruction::Add(U::truncate(2)), + Instruction::Return, + ] + .into_iter() + .map(Into::into) + .collect::>(); + assert_eq!(m.instructions, is); + assert_eq!(m.entry, 0); + } +} diff --git a/src/vm/pool.rs b/src/vm/pool.rs new file mode 100644 index 0000000..dd1a28c --- /dev/null +++ b/src/vm/pool.rs @@ -0,0 +1,50 @@ +use std::{ + collections::{HashMap, hash_map}, + hash::Hash, +}; + +use crate::vm::instruction::U; + +pub struct Pool { + map: HashMap>, + index: u32, +} + +impl Pool { + pub fn key(&mut self, value: T) -> Option> { + match self.map.entry(value) { + hash_map::Entry::Vacant(slot) => { + let index = U::new(self.index + 1)?; + self.index += 1; + Some(*slot.insert(index)) + } + hash_map::Entry::Occupied(slot) => Some(*slot.get()), + } + } + + pub fn into_map(mut self) -> HashMap, T> { + let mut result = HashMap::new(); + for (value, key) in self.map.drain() { + result.insert(key, value); + } + result + } +} + +impl IntoIterator for Pool { + type Item = (T, U); + type IntoIter = > as IntoIterator>::IntoIter; + + fn into_iter(self) -> Self::IntoIter { + self.map.into_iter() + } +} + +impl Default for Pool { + fn default() -> Self { + Self { + map: HashMap::new(), + index: 0, + } + } +} diff --git a/src/vm/prelude.rs b/src/vm/prelude.rs new file mode 100644 index 0000000..8984f9c --- /dev/null +++ b/src/vm/prelude.rs @@ -0,0 +1,3 @@ +use crate::vm::machine::Machine; + +pub fn load(machine: &mut Machine) {} diff --git a/src/vm/stack.rs b/src/vm/stack.rs new file mode 100644 index 0000000..80910b1 --- /dev/null +++ b/src/vm/stack.rs @@ -0,0 +1,63 @@ +use std::{fmt, mem::MaybeUninit, ops::Deref}; + +pub struct Stack { + data: Box<[MaybeUninit]>, + pointer: usize, +} + +impl fmt::Debug for Stack { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let data = &self[..]; + f.debug_struct("Stack") + .field("pointer", &self.pointer) + .field("data", &data) + .finish() + } +} + +impl Deref for Stack { + type Target = [T]; + + fn deref(&self) -> &Self::Target { + let slice = &self.data[self.pointer..]; + unsafe { slice.assume_init_ref() } + } +} + +impl Stack { + pub fn new(limit: usize) -> Self { + assert_ne!(limit, 0); + Self { + data: Box::new_uninit_slice(limit), + pointer: limit, + } + } + + pub fn current(&self) -> Option<&T> { + if self.pointer >= self.data.len() { + None + } else { + Some(unsafe { self.data[self.pointer].assume_init_ref() }) + } + } + + pub fn push(&mut self, value: T) -> Result<(), T> { + if self.pointer > 0 { + self.pointer -= 1; + self.data[self.pointer].write(value); + Ok(()) + } else { + Err(value) + } + } + + pub fn pop(&mut self) -> Option { + if self.pointer >= self.data.len() { + None + } else { + let value = unsafe { self.data[self.pointer].assume_init_read() }; + self.pointer += 1; + Some(value) + } + } +} diff --git a/src/vm/value.rs b/src/vm/value.rs new file mode 100644 index 0000000..e23c590 --- /dev/null +++ b/src/vm/value.rs @@ -0,0 +1,176 @@ +use std::{fmt, rc::Rc}; + +use crate::vm::{ + machine::Machine, + module::{Module, ModuleRef}, +}; + +#[derive(Debug)] +pub enum Error {} + +#[derive(Debug, Clone)] +pub struct NativeFunction { + name: Rc, + inner: fn(&mut Machine, &[Value]) -> Result, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct BytecodeFunction { + pub module: ModuleRef, + pub address: usize, +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Value { + // "Expression" values + Nil, + Boolean(bool), + Integer(i64), + Identifier(Rc), + Cons(Rc), + Keyword(Keyword), + // "Runtime" values + BytecodeFunction(BytecodeFunction), + NativeFunction(NativeFunction), +} + +#[derive(Debug, Clone, PartialEq)] +pub enum Keyword { + Lambda, + Defun, + Optional, + Rest, +} + +#[derive(Debug, Clone, PartialEq)] +pub struct ConsCell(pub Value, pub Value); + +impl Value { + pub fn is_nil(&self) -> bool { + matches!(self, Self::Nil) + } + + pub fn cons(self, cdr: Value) -> Self { + Self::Cons(Rc::new(ConsCell(self, cdr))) + } + + pub fn list_or_nil>(items: I) -> Self { + Self::list_or_nil_inner(&mut items.into_iter()) + } + + fn list_or_nil_inner>(items: &mut I) -> Self { + match items.next() { + Some(value) => value.cons(Self::list_or_nil_inner(items)), + None => Self::Nil, + } + } +} + +impl ConsCell { + fn fmt_inner(&self, f: &mut fmt::Formatter<'_>, first: bool) -> fmt::Result { + let Self(car, cdr) = self; + if !first { + write!(f, " ")?; + } + write!(f, "{car}")?; + match cdr { + Value::Nil => Ok(()), + Value::Cons(cons) => cons.fmt_inner(f, false), + _ => { + write!(f, " . {cdr}") + } + } + } +} + +impl NativeFunction { + pub fn new>>( + name: S, + inner: fn(&mut Machine, &[Value]) -> Result, + ) -> Self { + Self { + name: name.into(), + inner, + } + } + + pub fn apply(&self, machine: &mut Machine, arguments: &[Value]) -> Result { + (self.inner)(machine, arguments) + } +} + +impl PartialEq for NativeFunction { + fn eq(&self, other: &Self) -> bool { + self.name == other.name + } +} + +impl fmt::Display for ConsCell { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "(")?; + self.fmt_inner(f, true)?; + write!(f, ")") + } +} + +impl fmt::Display for NativeFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "", self.name, self.inner) + } +} + +impl fmt::Display for BytecodeFunction { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + write!(f, "", self.module, self.address) + } +} + +impl fmt::Display for Keyword { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + let word = match self { + Self::Lambda => "lambda", + Self::Defun => "defun", + Self::Optional => "&optional", + Self::Rest => "&rest", + }; + write!(f, "{word}") + } +} + +impl fmt::Display for Value { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Self::Nil => write!(f, "NIL"), + Self::Boolean(true) => write!(f, "#T"), + Self::Boolean(false) => write!(f, "#F"), + Self::Integer(value) => write!(f, "{value}"), + Self::Identifier(value) => write!(f, "{value}"), + Self::Keyword(keyword) => write!(f, "{keyword}"), + Self::Cons(cons) => write!(f, "{cons}"), + Self::BytecodeFunction(bytecode) => write!(f, "{bytecode}"), + Self::NativeFunction(native) => write!(f, "{native}"), + } + } +} + +#[cfg(test)] +mod tests { + use crate::vm::value::{Keyword, Value}; + + #[test] + fn test_value_formatting() { + let v = Value::Nil; + assert_eq!(&format!("{v}"), "NIL"); + let v = Value::Integer(1234); + assert_eq!(&format!("{v}"), "1234"); + let v = Value::Boolean(true); + assert_eq!(&format!("{v}"), "#T"); + let v = Value::Boolean(false); + assert_eq!(&format!("{v}"), "#F"); + let v = Value::Boolean(false).cons(Value::Integer(1234)); + assert_eq!(&format!("{v}"), "(#F . 1234)"); + let v = Value::Boolean(false) + .cons(Value::Integer(1234).cons(Value::Keyword(Keyword::Lambda).cons(Value::Nil))); + assert_eq!(&format!("{v}"), "(#F 1234 lambda)"); + } +}