Initial commit

This commit is contained in:
2026-04-30 16:23:39 +03:00
commit 36e6cc8364
23 changed files with 2610 additions and 0 deletions
+1
View File
@@ -0,0 +1 @@
/target
Generated
+278
View File
@@ -0,0 +1,278 @@
# This file is automatically @generated by Cargo.
# It is not intended for manual editing.
version = 4
[[package]]
name = "anstream"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "824a212faf96e9acacdbd09febd34438f8f711fb84e09a8916013cd7815ca28d"
dependencies = [
"anstyle",
"anstyle-parse",
"anstyle-query",
"anstyle-wincon",
"colorchoice",
"is_terminal_polyfill",
"utf8parse",
]
[[package]]
name = "anstyle"
version = "1.0.14"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "940b3a0ca603d1eade50a4846a2afffd5ef57a9feac2c0e2ec2e14f9ead76000"
[[package]]
name = "anstyle-parse"
version = "1.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "52ce7f38b242319f7cabaa6813055467063ecdc9d355bbb4ce0c68908cd8130e"
dependencies = [
"utf8parse",
]
[[package]]
name = "anstyle-query"
version = "1.1.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
dependencies = [
"windows-sys",
]
[[package]]
name = "anstyle-wincon"
version = "3.0.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
dependencies = [
"anstyle",
"once_cell_polyfill",
"windows-sys",
]
[[package]]
name = "bitmatch"
version = "0.1.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "6a53e105d41966c9b4594b8e3b7cf8e81ae63cc83664880b049af8a11381a3ad"
dependencies = [
"boolean_expression",
"proc-macro2",
"quote",
"syn 1.0.109",
]
[[package]]
name = "boolean_expression"
version = "0.3.11"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7c33ef624481a2d2252fd352266c050e83203343d0884622f7ba09782abbfa83"
dependencies = [
"itertools",
"smallvec",
]
[[package]]
name = "clap"
version = "4.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1ddb117e43bbf7dacf0a4190fef4d345b9bad68dfc649cb349e7d17d28428e51"
dependencies = [
"clap_builder",
"clap_derive",
]
[[package]]
name = "clap_builder"
version = "4.6.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "714a53001bf66416adb0e2ef5ac857140e7dc3a0c48fb28b2f10762fc4b5069f"
dependencies = [
"anstream",
"anstyle",
"clap_lex",
"strsim",
]
[[package]]
name = "clap_derive"
version = "4.6.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f2ce8604710f6733aa641a2b3731eaa1e8b3d9973d5e3565da11800813f997a9"
dependencies = [
"heck",
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "clap_lex"
version = "1.1.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "c8d4a3bb8b1e0c1050499d1815f5ab16d04f0959b233085fb31653fbfc9d98f9"
[[package]]
name = "colorchoice"
version = "1.0.5"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
[[package]]
name = "either"
version = "1.15.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "48c757948c5ede0e46177b7add2e67155f70e33c07fea8284df6576da70b3719"
[[package]]
name = "heck"
version = "0.5.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
[[package]]
name = "is_terminal_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "a6cb138bb79a146c1bd460005623e142ef0181e3d0219cb493e02f7d08a35695"
[[package]]
name = "itertools"
version = "0.9.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "284f18f85651fe11e8a991b2adb42cb078325c996ed026d994719efcfca1d54b"
dependencies = [
"either",
]
[[package]]
name = "lysp"
version = "0.1.0"
dependencies = [
"bitmatch",
"clap",
"nom",
"thiserror",
]
[[package]]
name = "memchr"
version = "2.8.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
[[package]]
name = "nom"
version = "8.0.0"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "df9761775871bdef83bee530e60050f7e54b1105350d6884eb0fb4f46c2f9405"
dependencies = [
"memchr",
]
[[package]]
name = "once_cell_polyfill"
version = "1.70.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
[[package]]
name = "proc-macro2"
version = "1.0.106"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "8fd00f0bb2e90d81d1044c2b32617f68fcb9fa3bb7640c23e9c748e53fb30934"
dependencies = [
"unicode-ident",
]
[[package]]
name = "quote"
version = "1.0.45"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
dependencies = [
"proc-macro2",
]
[[package]]
name = "smallvec"
version = "1.15.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
[[package]]
name = "strsim"
version = "0.11.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
[[package]]
name = "syn"
version = "1.0.109"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "72b64191b275b66ffe2469e8af2c1cfe3bafa67b529ead792a6d0160888b4237"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "syn"
version = "2.0.117"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e665b8803e7b1d2a727f4023456bbbbe74da67099c585258af0ad9c5013b9b99"
dependencies = [
"proc-macro2",
"quote",
"unicode-ident",
]
[[package]]
name = "thiserror"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "4288b5bcbc7920c07a1149a35cf9590a2aa808e0bc1eafaade0b80947865fbc4"
dependencies = [
"thiserror-impl",
]
[[package]]
name = "thiserror-impl"
version = "2.0.18"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ebc4ee7f67670e9b64d05fa4253e753e016c6c95ff35b89b7941d6b856dec1d5"
dependencies = [
"proc-macro2",
"quote",
"syn 2.0.117",
]
[[package]]
name = "unicode-ident"
version = "1.0.24"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "e6e4313cd5fcd3dad5cafa179702e2b244f760991f45397d14d4ebf38247da75"
[[package]]
name = "utf8parse"
version = "0.2.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "06abde3611657adf66d383f00b093d7faecc7fa57071cce2578660c9f1010821"
[[package]]
name = "windows-link"
version = "0.2.1"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
[[package]]
name = "windows-sys"
version = "0.61.2"
source = "registry+https://github.com/rust-lang/crates.io-index"
checksum = "ae137229bcbd6cdf0f7b80a31df61766145077ddf49416a728b02cb3921ff3fc"
dependencies = [
"windows-link",
]
+10
View File
@@ -0,0 +1,10 @@
[package]
name = "lysp"
version = "0.1.0"
edition = "2024"
[dependencies]
thiserror = "2.0.18"
clap = { version = "4.6.1", features = ["derive"] }
nom = "8.0.0"
bitmatch = "0.1.1"
+342
View File
@@ -0,0 +1,342 @@
use std::rc::Rc;
use crate::{
compile::{
error::CompileError,
function::FunctionSignature,
module::CompilationModule,
syntax::{CallExpression, Expression, FunctionBody, LambdaExpression},
value::{BuiltinFunction, CompileConstant, CompileValue},
},
vm::instruction::{Instruction, U},
};
pub struct FunctionBlock {
pub(crate) instructions: Vec<Instruction>,
signature: FunctionSignature,
}
pub struct LocalBlock<'a> {
// TODO local bindings
function: &'a mut FunctionBlock,
module: &'a mut CompilationModule,
parent: Option<&'a mut LocalBlock<'a>>,
}
impl FunctionBlock {
pub fn new(signature: FunctionSignature) -> Self {
Self {
instructions: vec![],
signature,
}
}
pub fn emit(&mut self, instruction: Instruction) {
eprintln!("emit {instruction:?}");
self.instructions.push(instruction);
}
pub fn compile_body(
&mut self,
module: &mut CompilationModule,
body: &FunctionBody,
) -> Result<(), CompileError> {
let mut local = LocalBlock::root(self, module);
for statement in &body.head {
local.compile_statement(statement)?;
}
let value = local.compile_expression(&body.tail)?;
local.compile_return(value)?;
Ok(())
}
}
impl<'a> LocalBlock<'a> {
fn root(function: &'a mut FunctionBlock, module: &'a mut CompilationModule) -> Self {
Self {
function,
module,
parent: None,
}
}
fn compile_push(&mut self, value: CompileValue) -> Result<(), CompileError> {
match value {
CompileValue::Nil => todo!(),
CompileValue::Boolean(_) => todo!(),
CompileValue::Integer(value) => {
// TODO signed/unsigned
if let Some(value) = U::new(value as u32) {
self.function.emit(Instruction::PushInteger(value));
} else {
todo!()
}
}
CompileValue::Argument(index) => {
let Some(index) = U::new(index as u32) else {
todo!();
};
self.function.emit(Instruction::PushArgument(index));
}
CompileValue::LocalFunction(index) => {
let value = self
.module
.constant(CompileConstant::LocalFunction(index))?;
self.function.emit(Instruction::PushConstant(value));
}
// Already on stack
CompileValue::Stack => (),
}
Ok(())
}
pub fn compile_return(&mut self, value: CompileValue) -> Result<(), CompileError> {
self.compile_push(value)?;
self.function.emit(Instruction::Return);
Ok(())
}
pub fn compile_statement(&mut self, expression: &Expression) -> Result<(), CompileError> {
todo!()
}
fn compile_identifier(&mut self, identifier: &Rc<str>) -> Result<CompileValue, CompileError> {
if let Some(_builtin) = BuiltinFunction::from_identifier(identifier) {
todo!("Illegal");
}
if let Some(argument) = self.function.signature.argument(identifier) {
return Ok(CompileValue::Argument(argument));
}
// TODO local bindings
todo!()
}
fn compile_lambda(&mut self, lambda: &LambdaExpression) -> Result<CompileValue, CompileError> {
let index = self
.module
.compile_function(lambda.signature.clone(), &lambda.body, false)?;
Ok(CompileValue::LocalFunction(index))
}
fn compile_builtin_add(&mut self, args: &[Expression]) -> Result<CompileValue, CompileError> {
// TODO optimize literals
for arg in args.iter().rev() {
let arg = self.compile_expression(arg)?;
self.compile_push(arg)?;
}
let Some(count) = U::new(args.len() as u32) else {
todo!()
};
self.function.emit(Instruction::Add(count));
Ok(CompileValue::Stack)
}
fn compile_builtin_sub(&mut self, args: &[Expression]) -> Result<CompileValue, CompileError> {
todo!()
}
fn compile_call_builtin(
&mut self,
builtin: BuiltinFunction,
args: &[Expression],
) -> Result<CompileValue, CompileError> {
match builtin {
BuiltinFunction::Add => self.compile_builtin_add(args),
BuiltinFunction::Sub => self.compile_builtin_sub(args),
}
}
fn compile_call(&mut self, call: &CallExpression) -> Result<CompileValue, CompileError> {
eprintln!("compile_call({:?}, {:?})", &call.callee, &call.arguments);
match call.callee.as_ref() {
Expression::Identifier(identifier)
if let Some(builtin) = BuiltinFunction::from_identifier(identifier.as_ref()) =>
{
self.compile_call_builtin(builtin, &call.arguments)
}
_ => {
// Push arguments in reverse order
let Some(count) = U::new(call.arguments.len() as u32) else {
todo!();
};
for arg in call.arguments.iter().rev() {
let arg = self.compile_expression(arg)?;
self.compile_push(arg)?;
}
let callee = self.compile_expression(&call.callee)?;
self.compile_push(callee)?;
self.function.emit(Instruction::Call(count));
Ok(CompileValue::Stack)
}
}
}
pub fn compile_expression(
&mut self,
expression: &Expression,
) -> Result<CompileValue, CompileError> {
match expression {
Expression::Nil => Ok(CompileValue::Nil),
Expression::BooleanLiteral(value) => Ok(CompileValue::Boolean(*value)),
Expression::IntegerLiteral(value) => Ok(CompileValue::Integer(*value)),
Expression::Identifier(identifier) => self.compile_identifier(identifier),
Expression::Lambda(lambda) => self.compile_lambda(lambda),
Expression::Call(call) => self.compile_call(call),
}
}
}
#[cfg(test)]
mod tests {
use std::rc::Rc;
use crate::{
compile::{
block::{FunctionBlock, LocalBlock},
function::FunctionSignature,
module::CompilationModule,
syntax::{CallExpression, Expression, FunctionBody, LambdaExpression},
value::CompileValue,
},
vm::instruction::{Instruction, U},
};
fn test_compile(expression: &Expression) -> (CompilationModule, FunctionBlock, CompileValue) {
let mut module = CompilationModule::default();
let mut function = FunctionBlock {
signature: FunctionSignature {
required_arguments: vec!["arg0".into()],
optional_arguments: vec!["arg1".into()],
rest_argument: Some("arg2".into()),
},
instructions: vec![],
};
let mut local = LocalBlock::root(&mut function, &mut module);
let value = local.compile_expression(expression).unwrap();
(module, function, value)
}
#[test]
fn test_identity_compile() {
let (_, f, v) = test_compile(&Expression::IntegerLiteral(1));
assert!(f.instructions.is_empty());
assert_eq!(v, CompileValue::Integer(1));
}
#[test]
fn test_compile_lambda_returning_lambda() {
// ( ((lambda () (lambda (a b) (+ a b)))) 1 2 )
let (m, f, v) = test_compile(&Expression::Call(CallExpression {
callee: Rc::new(Expression::Call(CallExpression {
callee: Rc::new(Expression::Lambda(LambdaExpression {
signature: FunctionSignature {
required_arguments: vec![],
optional_arguments: vec![],
rest_argument: None,
},
body: FunctionBody {
head: vec![],
tail: Rc::new(Expression::Lambda(LambdaExpression {
signature: FunctionSignature {
required_arguments: vec!["a".into(), "b".into()],
optional_arguments: vec![],
rest_argument: None,
},
body: FunctionBody {
head: vec![],
tail: Rc::new(Expression::Call(CallExpression {
callee: Rc::new(Expression::Identifier("+".into())),
arguments: vec![
Expression::Identifier("a".into()),
Expression::Identifier("b".into()),
],
})),
},
})),
},
})),
arguments: vec![],
})),
arguments: vec![Expression::IntegerLiteral(1), Expression::IntegerLiteral(2)],
}));
assert_eq!(
&f.instructions[..],
&[
Instruction::PushInteger(U::truncate(2)),
Instruction::PushInteger(U::truncate(1)),
Instruction::PushConstant(U::truncate(2)),
Instruction::Call(U::truncate(0)),
Instruction::Call(U::truncate(2)),
]
);
assert_eq!(
&m.local_functions.get(&0).unwrap().instructions[..],
&[
Instruction::PushConstant(U::truncate(1)),
Instruction::Return
]
);
assert_eq!(
&m.local_functions.get(&1).unwrap().instructions[..],
&[
Instruction::PushArgument(U::truncate(1)),
Instruction::PushArgument(U::truncate(0)),
Instruction::Add(U::truncate(2)),
Instruction::Return
]
);
assert_eq!(v, CompileValue::Stack);
}
#[test]
fn test_compile_lambda() {
// (+ ((lambda (a) (+ a 1))) 2)
let (m, f, v) = test_compile(&Expression::Call(CallExpression {
callee: Rc::new(Expression::Identifier("+".into())),
arguments: vec![
Expression::Call(CallExpression {
callee: Expression::Lambda(LambdaExpression {
signature: FunctionSignature {
required_arguments: vec!["a".into()],
optional_arguments: vec![],
rest_argument: None,
},
body: FunctionBody {
head: vec![],
tail: Rc::new(Expression::Call(CallExpression {
callee: Rc::new(Expression::Identifier("+".into())),
arguments: vec![
Expression::Identifier("a".into()),
Expression::IntegerLiteral(1),
],
})),
},
})
.into(),
arguments: vec![],
}),
Expression::IntegerLiteral(2),
],
}));
assert_eq!(v, CompileValue::Stack);
assert_eq!(
&m.local_functions.get(&0).unwrap().instructions[..],
&[
Instruction::PushInteger(U::truncate(1)),
Instruction::PushArgument(U::truncate(0)),
Instruction::Add(U::truncate(2)),
Instruction::Return
]
);
assert_eq!(
&f.instructions[..],
&[
Instruction::PushInteger(U::truncate(2)),
Instruction::PushConstant(U::truncate(1)),
Instruction::Call(U::truncate(0)),
Instruction::Add(U::truncate(2))
]
);
}
}
+7
View File
@@ -0,0 +1,7 @@
use crate::compile::syntax::ParseError;
#[derive(Debug, thiserror::Error)]
pub enum CompileError {
#[error("parse error: {0}")]
Parse(#[from] ParseError),
}
+40
View File
@@ -0,0 +1,40 @@
use std::rc::Rc;
#[derive(Debug, Clone, PartialEq)]
pub struct FunctionSignature {
pub required_arguments: Vec<Rc<str>>,
pub optional_arguments: Vec<Rc<str>>,
pub rest_argument: Option<Rc<str>>,
}
impl FunctionSignature {
pub const EMPTY: Self = Self {
required_arguments: vec![],
optional_arguments: vec![],
rest_argument: None,
};
pub fn argument(&self, name: &str) -> Option<usize> {
if let Some(index) = self
.required_arguments
.iter()
.position(|a| a.as_ref() == name)
{
Some(index)
} else if let Some(index) = self
.optional_arguments
.iter()
.position(|a| a.as_ref() == name)
{
Some(index + self.required_arguments.len())
} else if self
.rest_argument
.as_ref()
.is_some_and(|a| a.as_ref() == name)
{
Some(self.required_arguments.len() + self.optional_arguments.len())
} else {
todo!()
}
}
}
+11
View File
@@ -0,0 +1,11 @@
mod block;
mod error;
mod function;
mod module;
mod syntax;
mod value;
pub use error::CompileError;
pub use function::FunctionSignature;
pub use module::CompilationModule;
pub use syntax::{CallExpression, Expression, FunctionBody, LambdaExpression};
+84
View File
@@ -0,0 +1,84 @@
use std::collections::HashMap;
use crate::{
compile::{
block::FunctionBlock, error::CompileError, function::FunctionSignature,
syntax::FunctionBody, value::CompileConstant,
},
vm::{
instruction::ConstantId,
module::{Module, ModuleConstant},
pool::Pool,
},
};
#[derive(Default)]
pub struct CompilationModule {
constant_pool: Pool<CompileConstant, { ConstantId::BITS }>,
pub(crate) local_functions: HashMap<u32, FunctionBlock>,
local_function_index: u32,
root: Option<u32>,
}
impl CompilationModule {
pub fn constant(&mut self, value: CompileConstant) -> Result<ConstantId, CompileError> {
match self.constant_pool.key(value) {
Some(key) => Ok(key),
None => todo!(),
}
}
pub fn compile_function(
&mut self,
signature: FunctionSignature,
body: &FunctionBody,
root: bool,
) -> Result<u32, CompileError> {
let index = self.local_function_index;
if root && self.root.is_some() {
todo!()
}
self.local_function_index += 1;
let mut function = FunctionBlock::new(signature);
function.compile_body(self, body)?;
self.local_functions.insert(index, function);
if root {
self.root = Some(index);
}
Ok(index)
}
pub fn compile_module(self) -> Result<Module, CompileError> {
// Emit all function code first
let mut function_offsets = HashMap::new();
let mut instructions = vec![];
let root = self.root.unwrap();
for (index, function) in self.local_functions.into_iter() {
function_offsets.insert(index, instructions.len());
instructions.extend(function.instructions.into_iter().map(u32::from));
}
let entry = *function_offsets.get(&root).unwrap();
let constants = self
.constant_pool
.into_iter()
.map(|(value, key)| {
(
key,
match value {
CompileConstant::Integer(value) => ModuleConstant::Integer(value),
CompileConstant::LocalFunction(index) => {
let address = *function_offsets.get(&index).unwrap();
ModuleConstant::LocalFunction(address)
}
},
)
})
.collect();
Ok(Module {
constants,
instructions,
entry,
})
}
}
+264
View File
@@ -0,0 +1,264 @@
use std::rc::Rc;
use crate::{
compile::function::FunctionSignature,
vm::value::{ConsCell, Keyword, Value},
};
#[derive(Debug, thiserror::Error)]
pub enum ParseError {
#[error("Non-expression value")]
NonExpressionValue,
#[error("Expected argument list: {0}")]
ExpectedArgumentList(Value),
}
#[derive(Debug, PartialEq)]
pub struct FunctionBody {
pub head: Vec<Expression>,
pub tail: Rc<Expression>,
}
#[derive(Debug, PartialEq)]
pub struct LambdaExpression {
pub signature: FunctionSignature,
pub body: FunctionBody,
}
#[derive(Debug, PartialEq)]
pub struct CallExpression {
pub callee: Rc<Expression>,
pub arguments: Vec<Expression>,
}
#[derive(Debug, PartialEq)]
pub enum Expression {
Nil,
BooleanLiteral(bool),
IntegerLiteral(i64),
Identifier(Rc<str>),
Lambda(LambdaExpression),
Call(CallExpression),
}
impl FunctionSignature {
fn parse(mut value: &Value) -> Result<Self, ParseError> {
enum Mode {
Required,
Optional,
Rest,
}
let mut required_arguments = vec![];
let mut optional_arguments = vec![];
let mut rest_argument = None;
let mut mode = Mode::Required;
while !value.is_nil() {
let Value::Cons(cons) = value else {
todo!();
};
let ConsCell(car, cdr) = cons.as_ref();
match (&mode, car) {
(Mode::Required, Value::Identifier(arg)) => {
required_arguments.push(arg.clone());
}
(Mode::Optional, Value::Identifier(arg)) => {
optional_arguments.push(arg.clone());
}
(Mode::Rest, Value::Identifier(arg)) => {
if rest_argument.is_some() {
todo!();
}
rest_argument = Some(arg.clone());
}
(Mode::Required, Value::Keyword(Keyword::Optional)) => {
mode = Mode::Optional;
}
(Mode::Required, Value::Keyword(Keyword::Rest)) => {
mode = Mode::Rest;
}
(Mode::Optional, Value::Keyword(Keyword::Rest)) => {
if optional_arguments.is_empty() {
todo!();
}
mode = Mode::Rest;
}
_ => todo!(),
}
value = cdr;
}
match mode {
Mode::Required => (),
Mode::Optional => {
if optional_arguments.is_empty() {
todo!()
}
}
Mode::Rest => {
if rest_argument.is_none() {
todo!()
}
}
}
Ok(Self {
required_arguments,
optional_arguments,
rest_argument,
})
}
}
impl FunctionBody {
fn parse(mut value: &Value) -> Result<Self, ParseError> {
let mut expressions = vec![];
while !value.is_nil() {
let Value::Cons(cons) = value else { todo!() };
let ConsCell(car, cdr) = cons.as_ref();
let expression = Expression::parse(car)?;
expressions.push(expression);
value = cdr;
}
let Some(tail) = expressions.pop() else {
todo!()
};
Ok(Self {
head: expressions,
tail: tail.into(),
})
}
}
impl LambdaExpression {
fn parse(value: &ConsCell) -> Result<Self, ParseError> {
let ConsCell(car, cdr) = value;
let signature = FunctionSignature::parse(car)?;
let body = FunctionBody::parse(cdr)?;
Ok(Self { signature, body })
}
}
impl Expression {
fn parse_cons(value: &ConsCell) -> Result<Self, ParseError> {
let ConsCell(car, cdr) = value;
match car {
Value::Keyword(Keyword::Lambda) => {
let Value::Cons(cdr) = cdr else {
todo!();
};
LambdaExpression::parse(cdr).map(Self::Lambda)
}
_ => {
let callee = Expression::parse(car)?;
let mut arguments = vec![];
let mut list = cdr;
while !list.is_nil() {
let Value::Cons(cons) = list else {
todo!();
};
let ConsCell(car, cdr) = cons.as_ref();
let expression = Expression::parse(car)?;
arguments.push(expression);
list = cdr;
}
Ok(Self::Call(CallExpression {
callee: callee.into(),
arguments,
}))
}
}
}
pub fn parse(value: &Value) -> Result<Self, ParseError> {
match value {
Value::Nil => Ok(Self::Nil),
Value::Boolean(value) => Ok(Self::BooleanLiteral(*value)),
Value::Integer(value) => Ok(Self::IntegerLiteral(*value)),
Value::Identifier(value) => Ok(Self::Identifier(value.clone())),
Value::Cons(cons) => Self::parse_cons(cons),
_ => Err(ParseError::NonExpressionValue),
}
}
}
#[cfg(test)]
mod tests {
use crate::{
compile::{
function::FunctionSignature,
syntax::{CallExpression, Expression, FunctionBody, LambdaExpression},
},
vm::value::{Keyword, Value},
};
#[test]
fn test_parse_basic() {
let v = Value::Nil;
let e = Expression::parse(&v).unwrap();
assert_eq!(e, Expression::Nil);
let v = Value::Integer(1234);
let e = Expression::parse(&v).unwrap();
assert_eq!(e, Expression::IntegerLiteral(1234));
let v = Value::Boolean(false);
let e = Expression::parse(&v).unwrap();
assert_eq!(e, Expression::BooleanLiteral(false));
let v = Value::Identifier("a".into());
let e = Expression::parse(&v).unwrap();
assert_eq!(e, Expression::Identifier("a".into()));
}
#[test]
fn test_parse_lambda() {
let args = Value::list_or_nil([
Value::Identifier("a".into()),
Value::Keyword(Keyword::Optional),
Value::Identifier("b".into()),
Value::Keyword(Keyword::Rest),
Value::Identifier("c".into()),
]);
let body = Value::list_or_nil([
Value::Identifier("+".into()),
Value::Identifier("a".into()),
Value::Integer(1),
]);
let lambda = Value::Keyword(Keyword::Lambda).cons(args.cons(body.cons(Value::Nil)));
let expr = Expression::parse(&lambda).unwrap();
assert_eq!(
expr,
Expression::Lambda(LambdaExpression {
signature: FunctionSignature {
required_arguments: vec!["a".into()],
optional_arguments: vec!["b".into()],
rest_argument: Some("c".into())
},
body: FunctionBody {
head: vec![],
tail: Expression::Call(CallExpression {
callee: Expression::Identifier("+".into()).into(),
arguments: vec![
Expression::Identifier("a".into()),
Expression::IntegerLiteral(1)
]
})
.into()
}
})
);
let lambda = Value::list_or_nil([Value::Keyword(())]);
}
}
+31
View File
@@ -0,0 +1,31 @@
#[derive(Debug, PartialEq)]
pub enum CompileValue {
Integer(i64),
Boolean(bool),
LocalFunction(u32),
Argument(usize),
Stack,
Nil,
}
#[derive(Debug, Hash, PartialEq, Eq)]
pub enum CompileConstant {
Integer(i64),
LocalFunction(u32),
}
#[derive(Debug, PartialEq, Clone, Copy)]
pub enum BuiltinFunction {
Add,
Sub,
}
impl BuiltinFunction {
pub fn from_identifier(identifier: &str) -> Option<Self> {
match identifier {
"+" => Some(Self::Add),
"-" => Some(Self::Sub),
_ => None,
}
}
}
+9
View File
@@ -0,0 +1,9 @@
use crate::{compile::CompileError, vm::machine::MachineError};
#[derive(Debug, thiserror::Error)]
pub enum EvalError {
#[error("machine error: {0}")]
Machine(#[from] MachineError),
#[error("compilation error: {0}")]
Compile(#[from] CompileError),
}
+6
View File
@@ -0,0 +1,6 @@
#![feature(coverage_attribute)]
pub mod compile;
pub mod error;
pub mod parse;
pub mod vm;
+52
View File
@@ -0,0 +1,52 @@
use std::io::stdin;
use lysp::{parse::parse_value, vm::machine::Machine};
fn main() {
let mut vm = Machine::default();
let mut input = String::new();
let stdin = stdin();
loop {
let len = stdin.read_line(&mut input).unwrap();
if len == 0 {
break;
}
let mut i = input.trim_start();
while !i.is_empty() {
let result = parse_value(i);
let (o, value) = match result {
Ok(r) => r,
Err(nom::Err::Incomplete(_)) => {
break;
}
Err(error) => {
eprintln!("Syntax error: {error}");
i = "";
break;
}
};
let result = vm.eval_value(&value);
let result = match result {
Ok(r) => r,
Err(error) => {
eprintln!("Error in expression:");
eprintln!();
eprintln!(" {value}:");
eprintln!();
eprintln!(":: {error}");
i = "";
break;
}
};
println!("{result}");
i = o.trim_start();
}
input = i.into();
}
}
+407
View File
@@ -0,0 +1,407 @@
use nom::{
FindToken, IResult, Input, Parser,
branch::alt,
bytes::streaming::tag,
character::{
anychar,
streaming::{char, multispace0, one_of},
},
combinator::{map, map_res, opt, recognize, value},
error::{Error, ErrorKind, FromExternalError, ParseError},
multi::{fold_many1, many0},
sequence::{delimited, preceded},
};
use crate::vm::value::{Keyword, Value};
struct IdentifierHead;
struct IdentifierTail;
impl FindToken<char> for IdentifierHead {
fn find_token(&self, token: char) -> bool {
token.is_alphabetic() || "~!@$%^&*-=_+<>?/".contains(token)
}
}
impl FindToken<char> for IdentifierTail {
fn find_token(&self, token: char) -> bool {
token.is_alphanumeric() || "~!@$%^&*-=_+<>?/".contains(token)
}
}
fn parse_sign(input: &str) -> IResult<&str, bool> {
map(
opt(alt((value(true, char('-')), value(false, char('+'))))),
|sign| sign.unwrap_or(false),
)
.parse(input)
}
fn parse_hex_digit(input: &str) -> IResult<&str, char> {
let (tail, ch) = anychar(input)?;
match ch {
'0'..='9' | 'a'..='f' | 'A'..='F' => Ok((tail, ch)),
_ if ch.is_alphabetic() => Err(nom::Err::Failure(Error::from_char(input, ch))),
_ => Err(nom::Err::Error(Error::from_char(input, ch))),
}
}
fn parse_dec_digit(input: &str) -> IResult<&str, char> {
let (tail, ch) = anychar(input)?;
match ch {
'0'..='9' => Ok((tail, ch)),
_ if ch.is_alphabetic() => Err(nom::Err::Failure(Error::from_char(input, ch))),
_ => Err(nom::Err::Error(Error::from_char(input, ch))),
}
}
fn parse_oct_digit(input: &str) -> IResult<&str, char> {
let (tail, ch) = anychar(input)?;
match ch {
'0'..='7' => Ok((tail, ch)),
_ if ch.is_alphanumeric() => Err(nom::Err::Failure(Error::from_char(input, ch))),
_ => Err(nom::Err::Error(Error::from_char(input, ch))),
}
}
struct OverflowError;
fn parse_integer_inner<I, E, P, D>(input: I, prefix: P, radix: u64, digit: D) -> IResult<I, u64, E>
where
I: Input,
E: ParseError<I>,
E: FromExternalError<I, OverflowError>,
P: Parser<I, Error = E>,
D: Parser<I, Output = char, Error = E>,
{
#[coverage(off)]
fn digit_to_u8(ch: char) -> u8 {
match ch {
'0'..='9' => ch as u8 - b'0',
'a'..='f' => ch as u8 - b'a' + 10,
'A'..='F' => ch as u8 - b'A' + 10,
_ => unreachable!(),
}
}
let (output, result) = preceded(
prefix,
fold_many1(
digit,
|| Ok(0u64),
move |acc, ch| match acc {
Ok(acc)
if let Some(acc) = acc
.checked_mul(radix)
.and_then(|acc| acc.checked_add(digit_to_u8(ch) as u64)) =>
{
Ok(acc)
}
Ok(_) => Err(OverflowError),
Err(e) => Err(e),
},
),
)
.parse(input.clone())?;
match result {
Ok(result) => Ok((output, result)),
Err(OverflowError) => Err(nom::Err::Failure(E::from_external_error(
input,
ErrorKind::Fold,
OverflowError,
))),
}
}
fn check_decimal(input: &str) -> IResult<&str, ()> {
let (_, ch) = anychar(input)?;
if ch.is_ascii_digit() {
Ok((input, ()))
} else {
Err(nom::Err::Error(Error::from_char(input, ch)))
}
}
fn parse_integer_oct(input: &str) -> IResult<&str, u64> {
parse_integer_inner(input, alt((tag("0o"), tag("0O"))), 8, parse_oct_digit)
}
fn parse_integer_dec(input: &str) -> IResult<&str, u64> {
parse_integer_inner(input, check_decimal, 10, parse_dec_digit)
}
fn parse_integer_hex(input: &str) -> IResult<&str, u64> {
parse_integer_inner(input, alt((tag("0x"), tag("0X"))), 16, parse_hex_digit)
}
fn parse_integer(input: &str) -> IResult<&str, Value> {
map_res(
(
parse_sign,
alt((parse_integer_hex, parse_integer_oct, parse_integer_dec)),
),
|(minus, value)| {
i64::try_from(value).map(|value| Value::Integer(if minus { -value } else { value }))
},
)
.parse(input)
}
fn parse_identifier(input: &str) -> IResult<&str, &str> {
recognize(preceded(
one_of(IdentifierHead),
many0(one_of(IdentifierTail)),
))
.parse(input)
}
fn parse_identifier_or_keyword_or_nil(input: &str) -> IResult<&str, Value> {
map(parse_identifier, |ident| match ident {
"NIL" | "nil" => Value::Nil,
"lambda" => Value::Keyword(Keyword::Lambda),
"defun" => Value::Keyword(Keyword::Defun),
"&optional" => Value::Keyword(Keyword::Optional),
"&rest" => Value::Keyword(Keyword::Rest),
_ => Value::Identifier(ident.into()),
})
.parse(input)
}
fn parse_list_or_nil(input: &str) -> IResult<&str, Value> {
map(
delimited(
char('('),
many0(preceded(multispace0, parse_value)),
preceded(multispace0, char(')')),
),
Value::list_or_nil,
)
.parse(input)
}
fn parse_boolean(input: &str) -> IResult<&str, Value> {
map(
alt((
value(true, tag("#t")),
value(true, tag("#T")),
value(false, tag("#f")),
value(false, tag("#F")),
)),
Value::Boolean,
)
.parse(input)
}
pub fn parse_value(input: &str) -> IResult<&str, Value> {
alt((
parse_list_or_nil,
parse_boolean,
parse_integer,
parse_identifier_or_keyword_or_nil,
))
.parse(input)
}
#[cfg(test)]
mod tests {
use nom::error::{Error, FromExternalError, ParseError};
use crate::{
parse::{
OverflowError, parse_boolean, parse_identifier, parse_identifier_or_keyword_or_nil,
parse_integer, parse_integer_dec, parse_integer_hex, parse_integer_oct,
parse_list_or_nil, parse_value,
},
vm::value::{Keyword, Value},
};
#[test]
fn test_integer_dec() {
let (r, v) = parse_integer_dec("1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, 1234);
let (r, v) = parse_integer_dec("18446744073709551615\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, u64::MAX);
// illegal digit
let e = parse_integer_dec("1234A").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("A", 'A')));
// overflow
let e = parse_integer_dec("9999999999999999999999999").unwrap_err();
assert_eq!(
e,
nom::Err::Failure(Error::from_external_error(
"9999999999999999999999999",
nom::error::ErrorKind::Fold,
OverflowError
))
);
}
#[test]
fn test_integer_oct() {
let (r, v) = parse_integer_oct("0o1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, 0o1234);
let (r, v) = parse_integer_oct("0O1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, 0o1234);
// illegal digit
let e = parse_integer_oct("0o12349").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("9", '9')));
}
#[test]
fn test_integer_hex() {
let (r, v) = parse_integer_hex("0x123Fa4\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, 0x123FA4);
let (r, v) = parse_integer_hex("0X123Fa4\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, 0x123FA4);
// illegal digit
let e = parse_integer_hex("0x1AG").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("G", 'G')));
}
#[test]
fn test_integer() {
// Dec path
let (r, v) = parse_integer("1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(1234));
let (r, v) = parse_integer("+1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(1234));
let (r, v) = parse_integer("-1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(-1234));
// Oct path
let (r, v) = parse_integer("0o1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(0o1234));
let (r, v) = parse_integer("+0O1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(0o1234));
let (r, v) = parse_integer("-0o1234\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(-0o1234));
// Hex path
let (r, v) = parse_integer("0x1234AF\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(0x1234AF));
let (r, v) = parse_integer("+0X1234AF\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(0x1234AF));
let (r, v) = parse_integer("-0x1234AF\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(-0x1234AF));
// Illegal path
let e = parse_integer("0x12V34").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("V34", 'V')));
let e = parse_integer("-0X12V34").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("V34", 'V')));
let e = parse_integer("-0o81").unwrap_err();
assert_eq!(e, nom::Err::Failure(Error::from_char("81", '8')));
}
#[test]
fn test_identifier() {
let (r, v) = parse_identifier("+\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, "+");
let (r, v) = parse_identifier("a-1\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, "a-1");
}
#[test]
fn test_identifier_or_keyword_or_nil() {
let (r, v) = parse_identifier_or_keyword_or_nil("+\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Identifier("+".into()));
let (r, v) = parse_identifier_or_keyword_or_nil("lambda\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Keyword(Keyword::Lambda));
let (r, v) = parse_identifier_or_keyword_or_nil("NIL\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Nil);
}
#[test]
fn test_list_or_nil() {
let (r, v) = parse_list_or_nil("()\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Nil);
let (r, v) = parse_list_or_nil("( )\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Nil);
let (r, v) = parse_list_or_nil("( a b -1 )\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(
v,
Value::list_or_nil([
Value::Identifier("a".into()),
Value::Identifier("b".into()),
Value::Integer(-1),
])
);
}
#[test]
fn test_boolean() {
let (r, v) = parse_boolean("#t\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Boolean(true));
let (r, v) = parse_boolean("#T\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Boolean(true));
let (r, v) = parse_boolean("#F\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Boolean(false));
let (r, v) = parse_boolean("#F\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Boolean(false));
}
#[test]
fn test_value() {
let (r, v) = parse_value("+123\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(123));
let (r, v) = parse_value("-0x123\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Integer(-0x123));
let (r, v) = parse_value("abcdef-ghijkl\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Identifier("abcdef-ghijkl".into()));
let (r, v) = parse_value("+\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Identifier("+".into()));
let (r, v) = parse_value("+x\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Identifier("+x".into()));
let (r, v) = parse_value("lambda-\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Identifier("lambda-".into()));
let (r, v) = parse_value("lambda\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(v, Value::Keyword(Keyword::Lambda));
let (r, v) = parse_value("(f #T -0x1)\n").unwrap();
assert_eq!(r, "\n");
assert_eq!(
v,
Value::list_or_nil([
Value::Identifier("f".into()),
Value::Boolean(true),
Value::Integer(-0x1)
])
);
}
}
+120
View File
@@ -0,0 +1,120 @@
use std::fmt;
use bitmatch::bitmatch;
#[derive(Debug, thiserror::Error)]
pub enum InstructionError {
#[error("invalid instruction")]
Invalid,
}
#[derive(Debug, thiserror::Error)]
pub enum InstructionEncodeError {}
#[derive(Clone, Copy, PartialEq, Eq, PartialOrd, Ord, Hash)]
pub struct U<const N: usize>(u32);
#[derive(Debug, Clone, Copy, PartialEq)]
pub enum Instruction {
PushNil,
PushInteger(U<24>),
PushBool(bool),
PushConstant(ConstantId),
PushArgument(U<6>),
SetGlobal,
GetGlobal,
Call(U<6>),
Return,
Add(U<6>),
Sub(U<6>),
}
pub type ConstantId = U<24>;
impl<const N: usize> U<N> {
pub const BITS: usize = N;
pub const ZERO: Self = Self(0);
pub fn sign_extend_i64(&self) -> i64 {
if self.0 & (1 << N) != 0 {
todo!()
} else {
self.0 as i64
}
}
pub const fn new(value: u32) -> Option<Self> {
if value >= (1u32 << N) {
None
} else {
Some(Self(value))
}
}
pub const fn truncate(value: u32) -> Self {
Self(value & ((1 << N) - 1))
}
pub const unsafe fn new_unchecked(value: u32) -> Self {
Self(value)
}
}
impl<const N: usize> fmt::Debug for U<N> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Debug::fmt(&self.0, f)
}
}
impl<const N: usize> Into<usize> for U<N> {
fn into(self) -> usize {
self.0 as usize
}
}
impl<const N: usize> Into<u32> for U<N> {
fn into(self) -> u32 {
self.0
}
}
impl From<Instruction> for u32 {
fn from(instruction: Instruction) -> u32 {
match instruction {
Instruction::PushNil => 0b0000_0000_0000_0001,
Instruction::PushBool(value) => 0b0000_0000_0000_0010 | (value as u32),
Instruction::Return => 0b0000_0000_0000_0100,
Instruction::SetGlobal => 0b0000_0000_0000_0101,
Instruction::GetGlobal => 0b0000_0000_0000_0110,
Instruction::Add(count) => 0b0000_0001_0000_0000 | count.0,
Instruction::Sub(count) => 0b0000_0001_0100_0000 | count.0,
Instruction::Call(count) => 0b0000_0000_0100_0000 | count.0,
Instruction::PushInteger(value) => 0b0001_0000_0000_0000 | value.0,
Instruction::PushConstant(index) => 0b0010_0000_0000_0000 | index.0,
Instruction::PushArgument(index) => 0b0000_0000_1000_0000 | index.0,
}
}
}
impl TryFrom<u32> for Instruction {
type Error = InstructionError;
#[bitmatch]
fn try_from(value: u32) -> Result<Self, Self::Error> {
#[bitmatch]
match value {
"0000_0000_0000_0001" => Ok(Instruction::PushNil),
"0000_0000_0000_001x" => Ok(Instruction::PushBool(x != 0)),
"0000_0000_0000_0100" => Ok(Instruction::Return),
"0000_0000_0000_0101" => Ok(Instruction::SetGlobal),
"0000_0000_0000_0110" => Ok(Instruction::GetGlobal),
"0000_0000_01xx_xxxx" => Ok(Instruction::Call(U(x))),
"0000_0000_10xx_xxxx" => Ok(Instruction::PushArgument(U(x))),
"0000_0001_00xx_xxxx" => Ok(Instruction::Add(U(x))),
"0000_0001_01xx_xxxx" => Ok(Instruction::Sub(U(x))),
"0001_xxxx_xxxx_xxxx" => Ok(Instruction::PushInteger(U(x))),
"0010_xxxx_xxxx_xxxx" => Ok(Instruction::PushConstant(U(x))),
_ => todo!(),
}
}
}
+1
View File
@@ -0,0 +1 @@
+459
View File
@@ -0,0 +1,459 @@
use std::{collections::HashMap, fmt, rc::Rc};
use crate::{
error::EvalError,
vm::{
instruction::{ConstantId, Instruction, InstructionError},
module::{Module, ModuleConstant, ModuleRef},
stack::Stack,
value::{BytecodeFunction, NativeFunction, Value},
},
};
#[derive(Debug, thiserror::Error)]
pub enum MachineError {
#[error("Instruction error: {0}")]
Instruction(#[from] InstructionError),
#[error("Instruction pointer is undefined")]
UndefinedInstructionPointer,
#[error("Data stack underflowed")]
ValueStackUnderflow,
#[error("Data stack overflowed")]
ValueStackOverflow,
#[error("Call stack underflowed")]
CallStackUnderflow,
#[error("Call stack overflowed")]
CallStackOverflow,
}
#[derive(Debug, Clone, PartialEq)]
pub struct InstructionPointer {
pub module: ModuleRef,
pub address: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub struct CallFrame {
arguments: Vec<Value>,
return_address: InstructionPointer,
event: ExecutionEvent,
}
pub struct Machine {
globals: HashMap<Rc<str>, Value>,
ip: Option<InstructionPointer>,
value_stack: Stack<Value>,
call_stack: Stack<CallFrame>,
}
#[derive(Debug, Clone, PartialEq)]
pub enum ExecutionEvent {
ModuleEntry(ModuleRef),
None,
}
impl Default for Machine {
fn default() -> Self {
Self {
globals: Default::default(),
ip: None,
value_stack: Stack::new(1024),
call_stack: Stack::new(32),
}
}
}
impl Machine {
fn pop(&mut self) -> Result<Value, MachineError> {
self.value_stack
.pop()
.ok_or(MachineError::ValueStackUnderflow)
}
fn push(&mut self, value: Value) -> Result<(), MachineError> {
self.value_stack
.push(value)
.map_err(|_| MachineError::ValueStackOverflow)
}
fn execute_call(&mut self, count: usize) -> Result<(), MachineError> {
enum Callee {
Bytecode(BytecodeFunction),
Native(NativeFunction),
}
let source_ip = self.ip.clone().unwrap();
let callee = self.pop()?;
let callee = match callee {
Value::BytecodeFunction(bytecode) => Callee::Bytecode(bytecode),
Value::NativeFunction(native) => Callee::Native(native),
_ => todo!(),
};
let mut arguments = vec![];
for _ in 0..count {
arguments.push(self.pop()?);
}
match callee {
Callee::Bytecode(bytecode) => {
let BytecodeFunction { module, address } = bytecode;
let frame = CallFrame {
arguments,
event: ExecutionEvent::None,
return_address: InstructionPointer {
module: source_ip.module,
address: source_ip.address + 1,
},
};
if self.call_stack.push(frame).is_err() {
return Err(MachineError::CallStackOverflow);
}
self.ip = Some(InstructionPointer { module, address });
}
Callee::Native(native) => {
let result = native.apply(self, &arguments).unwrap();
self.push(result)?;
self.ip = Some(InstructionPointer {
module: source_ip.module,
address: source_ip.address + 1,
});
}
}
Ok(())
}
fn execute_return(&mut self) -> Result<ExecutionEvent, MachineError> {
let ip = self.ip.clone().unwrap();
if let Some(frame) = self.call_stack.pop() {
self.ip = Some(frame.return_address);
Ok(frame.event)
} else {
self.ip = None;
Ok(ExecutionEvent::ModuleEntry(ip.module))
}
}
fn execute_add(&mut self, count: usize) -> Result<(), MachineError> {
let mut accumulator = 0i64;
for _ in 0..count {
let arg = self.pop()?;
match arg {
Value::Integer(value) => {
accumulator = accumulator.wrapping_add(value);
}
_ => todo!("{arg:?}"),
}
}
self.push(Value::Integer(accumulator))?;
Ok(())
}
fn execute_sub(&mut self, count: usize) -> Result<(), MachineError> {
todo!()
}
fn execute_push_constant(&mut self, index: ConstantId) -> Result<(), MachineError> {
let ip = self.ip.as_ref().unwrap();
let constant = ip.module.constant(index).expect("TODO");
let value = match constant {
ModuleConstant::LocalFunction(address) => Value::BytecodeFunction(BytecodeFunction {
module: ip.module.clone(),
address,
}),
ModuleConstant::Integer(value) => Value::Integer(value),
ModuleConstant::Identifier(identifier) => Value::Identifier(identifier),
};
self.push(value)
}
fn execute_push_argument(&mut self, index: usize) -> Result<(), MachineError> {
let frame = self.call_stack.current().expect("valid call frame");
let argument = frame.arguments.get(index);
match argument {
Some(arg) => self.push(arg.clone()),
None => self.push(Value::Nil),
}
}
fn execute_get_global(&mut self) -> Result<(), MachineError> {
let ident = self.pop()?;
match ident {
Value::Identifier(ident) => {
let value = self.globals.get(&ident).cloned().unwrap();
self.push(value)
}
_ => todo!(),
}
}
fn execute_set_global(&mut self) -> Result<(), MachineError> {
let ident = self.pop()?;
let value = self.pop()?;
let Value::Identifier(ident) = ident else {
todo!();
};
self.globals.insert(ident, value);
self.push(Value::Nil)?;
Ok(())
}
pub fn set_global<S: Into<Rc<str>>>(&mut self, identifier: S, value: Value) {
self.globals.insert(identifier.into(), value);
}
pub fn execute_next(&mut self) -> Result<ExecutionEvent, MachineError> {
let ip = self.ip.clone().unwrap();
let instruction = ip.module.instruction(ip.address).expect("TODO");
let instruction = Instruction::try_from(instruction)?;
eprintln!("{ip}: {instruction:?}");
let mut advance = true;
let mut event = ExecutionEvent::None;
match instruction {
Instruction::PushNil => {
self.push(Value::Nil)?;
}
Instruction::PushInteger(value) => {
self.push(Value::Integer(value.sign_extend_i64()))?;
}
Instruction::PushBool(value) => {
self.push(Value::Boolean(value))?;
}
Instruction::PushConstant(index) => {
self.execute_push_constant(index)?;
}
Instruction::PushArgument(index) => {
self.execute_push_argument(index.into())?;
}
Instruction::GetGlobal => {
self.execute_get_global()?;
}
Instruction::SetGlobal => {
self.execute_set_global()?;
}
Instruction::Return => {
advance = false;
event = self.execute_return()?;
}
Instruction::Call(count) => {
advance = false;
self.execute_call(count.into())?;
}
Instruction::Add(count) => {
self.execute_add(count.into())?;
}
Instruction::Sub(count) => {
self.execute_sub(count.into())?;
}
}
if advance {
self.ip = Some(InstructionPointer {
module: ip.module,
address: ip.address + 1,
});
}
Ok(event)
}
pub fn load_module(&mut self, module: Module) -> Result<ModuleRef, MachineError> {
let module = ModuleRef::from(module);
let entry = module.entry();
let entry_ip = InstructionPointer {
module: module.clone(),
address: entry,
};
if let Some(ip) = self.ip.clone()
&& self
.call_stack
.push(CallFrame {
arguments: vec![],
return_address: ip,
event: ExecutionEvent::ModuleEntry(module.clone()),
})
.is_err()
{
return Err(MachineError::CallStackOverflow);
}
self.ip = Some(entry_ip);
Ok(module)
}
pub fn eval_module(&mut self, module: Module) -> Result<Value, MachineError> {
let module = self.load_module(module)?;
let expect = ExecutionEvent::ModuleEntry(module);
loop {
let event = self.execute_next()?;
if event == expect {
break;
}
}
let value = self.pop()?;
Ok(value)
}
pub fn eval_value(&mut self, value: &Value) -> Result<Value, EvalError> {
let module = Module::compile_value(value)?;
self.eval_module(module).map_err(EvalError::from)
}
}
impl fmt::Display for InstructionPointer {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "{:p}:{}", self.module, self.address)
}
}
#[cfg(test)]
mod tests {
use std::sync::atomic::{AtomicI64, Ordering};
use crate::vm::{
instruction::{Instruction, U},
machine::{InstructionPointer, Machine},
module::{Module, ModuleBuilder, ModuleConstant, ModuleRef},
value::{NativeFunction, Value},
};
fn execute_all<F: Fn(u32, &mut ModuleBuilder), G: FnOnce(&mut Machine)>(
count: usize,
build: F,
prepare: G,
) -> (Machine, Vec<Value>) {
let dummy = ModuleRef::from(Module::dummy());
let mut machine = Machine {
ip: Some(InstructionPointer {
module: dummy,
address: 0,
}),
..Default::default()
};
prepare(&mut machine);
let mut values = vec![];
for i in 0..count {
let mut builder = ModuleBuilder::new();
builder.entry(0);
build(i as u32, &mut builder);
builder.add(Instruction::Return);
let module = builder.build();
values.push(machine.eval_module(module).unwrap());
}
(machine, values)
}
#[test]
fn test_basic() {
let (m, vs) = execute_all(
1,
|_, builder| {
let c0 = builder.constant(ModuleConstant::Integer(3));
builder.add_all([
Instruction::PushInteger(U::truncate(1)),
Instruction::PushInteger(U::truncate(2)),
Instruction::Add(U::truncate(2)),
Instruction::PushConstant(c0),
Instruction::Add(U::truncate(2)),
]);
},
|_| {},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Integer(6)]);
}
#[test]
fn test_local_function_call() {
let (m, vs) = execute_all(
1,
|_, builder| {
let c0 = builder.constant(ModuleConstant::LocalFunction(4));
builder.add_all([
// main
Instruction::PushInteger(U::truncate(34)),
Instruction::PushConstant(c0),
Instruction::Call(U::truncate(1)),
Instruction::Return,
// c0
Instruction::PushArgument(U::truncate(0)),
Instruction::PushInteger(U::truncate(1200)),
Instruction::Add(U::truncate(2)),
Instruction::Return,
]);
},
|_| {},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Integer(1234)]);
}
#[test]
fn test_cross_module_call() {
static NATIVE_STATE: AtomicI64 = AtomicI64::new(-1);
let (m, vs) = execute_all(
2,
|id, builder| match id {
1 => {
let c0 = builder.constant(ModuleConstant::LocalFunction(4));
let c1 = builder.constant(ModuleConstant::Identifier("extern-function".into()));
builder.add_all([
// main: (local 1)
Instruction::PushInteger(U::truncate(1)),
Instruction::PushConstant(c0),
Instruction::Call(U::truncate(1)),
Instruction::Return,
// module 0 local function
// (fn (a) (extern-function a 2))
Instruction::PushInteger(U::truncate(2)),
Instruction::PushArgument(U::truncate(0)),
Instruction::PushConstant(c1),
Instruction::GetGlobal,
Instruction::Call(U::truncate(2)),
Instruction::Return,
]);
}
0 => {
let c0 = builder.constant(ModuleConstant::Integer(3));
let c1 = builder.constant(ModuleConstant::Identifier("native".into()));
let c2 = builder.constant(ModuleConstant::LocalFunction(4));
let c3 = builder.constant(ModuleConstant::Identifier("extern-function".into()));
builder.add_all([
// main
Instruction::PushConstant(c2),
Instruction::PushConstant(c3),
Instruction::SetGlobal,
Instruction::Return,
// extern-function
// (fn (a b) (native 3 b a))
Instruction::PushArgument(U::truncate(0)),
Instruction::PushArgument(U::truncate(1)),
Instruction::PushConstant(c0),
Instruction::PushConstant(c1),
Instruction::GetGlobal,
Instruction::Call(U::truncate(3)),
Instruction::Return,
]);
}
_ => unreachable!(),
},
|m| {
m.set_global(
"native",
Value::NativeFunction(NativeFunction::new("native", |_, args| {
assert_eq!(args.len(), 3);
assert_eq!(
&args,
&[Value::Integer(3), Value::Integer(2), Value::Integer(1)]
);
NATIVE_STATE.store(4321, Ordering::Release);
Ok(Value::Integer(1234))
})),
);
},
);
assert!(m.value_stack.is_empty());
assert!(m.call_stack.is_empty());
assert_eq!(&vs, &[Value::Nil, Value::Integer(1234)]);
assert_eq!(NATIVE_STATE.load(Ordering::Acquire), 4321);
}
}
+8
View File
@@ -0,0 +1,8 @@
pub mod instruction;
pub mod loader;
pub mod machine;
pub mod module;
pub mod pool;
pub mod prelude;
pub mod stack;
pub mod value;
+188
View File
@@ -0,0 +1,188 @@
use std::{collections::HashMap, fmt, ops::Deref, rc::Rc};
use crate::{
compile::{CompilationModule, CompileError, Expression, FunctionBody, FunctionSignature},
vm::{
instruction::{ConstantId, Instruction},
pool::Pool,
value::Value,
},
};
#[derive(Debug, Clone, PartialEq, Eq, Hash)]
pub enum ModuleConstant {
Integer(i64),
LocalFunction(usize),
Identifier(Rc<str>),
}
#[derive(Clone)]
pub struct ModuleRef {
inner: Rc<Module>,
}
pub struct Module {
pub constants: HashMap<ConstantId, ModuleConstant>,
pub instructions: Vec<u32>,
pub entry: usize,
}
#[derive(Default)]
pub struct ModuleBuilder {
constants: Pool<ModuleConstant, { ConstantId::BITS }>,
instructions: Vec<u32>,
entry: Option<usize>,
}
impl ModuleRef {
pub fn ptr_eq(a: &Self, b: &Self) -> bool {
Rc::ptr_eq(&a.inner, &b.inner)
}
}
impl From<Module> for ModuleRef {
fn from(value: Module) -> Self {
Self {
inner: Rc::new(value),
}
}
}
impl fmt::Debug for ModuleRef {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "Module({:p})", self.inner)
}
}
impl fmt::Pointer for ModuleRef {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
fmt::Pointer::fmt(&self.inner, f)
}
}
impl PartialEq for ModuleRef {
fn eq(&self, other: &Self) -> bool {
Rc::ptr_eq(&self.inner, &other.inner)
}
}
impl Deref for ModuleRef {
type Target = Module;
fn deref(&self) -> &Self::Target {
&self.inner
}
}
#[cfg(test)]
impl Module {
pub fn dummy() -> Self {
Self {
constants: HashMap::new(),
instructions: vec![0],
entry: 0,
}
}
}
impl Module {
pub fn instruction(&self, ip: usize) -> Option<u32> {
self.instructions.get(ip).copied()
}
pub fn constant(&self, id: ConstantId) -> Option<ModuleConstant> {
self.constants.get(&id).cloned()
}
pub fn len(&self) -> usize {
self.instructions.len()
}
pub fn is_empty(&self) -> bool {
self.instructions.is_empty()
}
pub fn entry(&self) -> usize {
self.entry
}
pub fn compile_value(value: &Value) -> Result<Self, CompileError> {
let expression = Expression::parse(value)?;
let mut module = CompilationModule::default();
module.compile_function(
FunctionSignature::EMPTY,
&FunctionBody {
head: vec![],
tail: Rc::new(expression),
},
true,
)?;
module.compile_module()
}
}
impl ModuleBuilder {
pub fn new() -> Self {
Self::default()
}
pub fn build(self) -> Module {
Module {
constants: self.constants.into_map(),
instructions: self.instructions,
entry: self.entry.unwrap(),
}
}
pub fn constant(&mut self, value: ModuleConstant) -> ConstantId {
self.constants.key(value).unwrap()
}
pub fn add_all<I: IntoIterator<Item = Instruction>>(&mut self, insns: I) -> &mut Self {
self.instructions.extend(insns.into_iter().map(u32::from));
self
}
pub fn add(&mut self, instruction: Instruction) -> &mut Self {
self.instructions.push(instruction.into());
self
}
pub fn entry(&mut self, entry: usize) -> &mut Self {
self.entry = Some(entry);
self
}
}
#[cfg(test)]
mod tests {
use std::collections::HashMap;
use crate::vm::{
instruction::{Instruction, U},
module::{Module, ModuleConstant},
value::{Keyword, Value},
};
#[test]
fn test_compile_value_basic() {
let v = Value::list_or_nil([
Value::Identifier("+".into()),
Value::Integer(1),
Value::Integer(2),
]);
let m = Module::compile_value(&v).unwrap();
assert!(m.constants.is_empty());
let is = [
Instruction::PushInteger(U::truncate(2)),
Instruction::PushInteger(U::truncate(1)),
Instruction::Add(U::truncate(2)),
Instruction::Return,
]
.into_iter()
.map(Into::into)
.collect::<Vec<u32>>();
assert_eq!(m.instructions, is);
assert_eq!(m.entry, 0);
}
}
+50
View File
@@ -0,0 +1,50 @@
use std::{
collections::{HashMap, hash_map},
hash::Hash,
};
use crate::vm::instruction::U;
pub struct Pool<T, const N: usize> {
map: HashMap<T, U<N>>,
index: u32,
}
impl<T: Hash + Eq, const N: usize> Pool<T, N> {
pub fn key(&mut self, value: T) -> Option<U<N>> {
match self.map.entry(value) {
hash_map::Entry::Vacant(slot) => {
let index = U::new(self.index + 1)?;
self.index += 1;
Some(*slot.insert(index))
}
hash_map::Entry::Occupied(slot) => Some(*slot.get()),
}
}
pub fn into_map(mut self) -> HashMap<U<N>, T> {
let mut result = HashMap::new();
for (value, key) in self.map.drain() {
result.insert(key, value);
}
result
}
}
impl<T, const N: usize> IntoIterator for Pool<T, N> {
type Item = (T, U<N>);
type IntoIter = <HashMap<T, U<N>> as IntoIterator>::IntoIter;
fn into_iter(self) -> Self::IntoIter {
self.map.into_iter()
}
}
impl<T, const N: usize> Default for Pool<T, N> {
fn default() -> Self {
Self {
map: HashMap::new(),
index: 0,
}
}
}
+3
View File
@@ -0,0 +1,3 @@
use crate::vm::machine::Machine;
pub fn load(machine: &mut Machine) {}
+63
View File
@@ -0,0 +1,63 @@
use std::{fmt, mem::MaybeUninit, ops::Deref};
pub struct Stack<T> {
data: Box<[MaybeUninit<T>]>,
pointer: usize,
}
impl<T: fmt::Debug> fmt::Debug for Stack<T> {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let data = &self[..];
f.debug_struct("Stack")
.field("pointer", &self.pointer)
.field("data", &data)
.finish()
}
}
impl<T> Deref for Stack<T> {
type Target = [T];
fn deref(&self) -> &Self::Target {
let slice = &self.data[self.pointer..];
unsafe { slice.assume_init_ref() }
}
}
impl<T> Stack<T> {
pub fn new(limit: usize) -> Self {
assert_ne!(limit, 0);
Self {
data: Box::new_uninit_slice(limit),
pointer: limit,
}
}
pub fn current(&self) -> Option<&T> {
if self.pointer >= self.data.len() {
None
} else {
Some(unsafe { self.data[self.pointer].assume_init_ref() })
}
}
pub fn push(&mut self, value: T) -> Result<(), T> {
if self.pointer > 0 {
self.pointer -= 1;
self.data[self.pointer].write(value);
Ok(())
} else {
Err(value)
}
}
pub fn pop(&mut self) -> Option<T> {
if self.pointer >= self.data.len() {
None
} else {
let value = unsafe { self.data[self.pointer].assume_init_read() };
self.pointer += 1;
Some(value)
}
}
}
+176
View File
@@ -0,0 +1,176 @@
use std::{fmt, rc::Rc};
use crate::vm::{
machine::Machine,
module::{Module, ModuleRef},
};
#[derive(Debug)]
pub enum Error {}
#[derive(Debug, Clone)]
pub struct NativeFunction {
name: Rc<str>,
inner: fn(&mut Machine, &[Value]) -> Result<Value, Error>,
}
#[derive(Debug, Clone, PartialEq)]
pub struct BytecodeFunction {
pub module: ModuleRef,
pub address: usize,
}
#[derive(Debug, Clone, PartialEq)]
pub enum Value {
// "Expression" values
Nil,
Boolean(bool),
Integer(i64),
Identifier(Rc<str>),
Cons(Rc<ConsCell>),
Keyword(Keyword),
// "Runtime" values
BytecodeFunction(BytecodeFunction),
NativeFunction(NativeFunction),
}
#[derive(Debug, Clone, PartialEq)]
pub enum Keyword {
Lambda,
Defun,
Optional,
Rest,
}
#[derive(Debug, Clone, PartialEq)]
pub struct ConsCell(pub Value, pub Value);
impl Value {
pub fn is_nil(&self) -> bool {
matches!(self, Self::Nil)
}
pub fn cons(self, cdr: Value) -> Self {
Self::Cons(Rc::new(ConsCell(self, cdr)))
}
pub fn list_or_nil<I: IntoIterator<Item = Self>>(items: I) -> Self {
Self::list_or_nil_inner(&mut items.into_iter())
}
fn list_or_nil_inner<I: Iterator<Item = Self>>(items: &mut I) -> Self {
match items.next() {
Some(value) => value.cons(Self::list_or_nil_inner(items)),
None => Self::Nil,
}
}
}
impl ConsCell {
fn fmt_inner(&self, f: &mut fmt::Formatter<'_>, first: bool) -> fmt::Result {
let Self(car, cdr) = self;
if !first {
write!(f, " ")?;
}
write!(f, "{car}")?;
match cdr {
Value::Nil => Ok(()),
Value::Cons(cons) => cons.fmt_inner(f, false),
_ => {
write!(f, " . {cdr}")
}
}
}
}
impl NativeFunction {
pub fn new<S: Into<Rc<str>>>(
name: S,
inner: fn(&mut Machine, &[Value]) -> Result<Value, Error>,
) -> Self {
Self {
name: name.into(),
inner,
}
}
pub fn apply(&self, machine: &mut Machine, arguments: &[Value]) -> Result<Value, Error> {
(self.inner)(machine, arguments)
}
}
impl PartialEq for NativeFunction {
fn eq(&self, other: &Self) -> bool {
self.name == other.name
}
}
impl fmt::Display for ConsCell {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "(")?;
self.fmt_inner(f, true)?;
write!(f, ")")
}
}
impl fmt::Display for NativeFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<native {:?} {:p}>", self.name, self.inner)
}
}
impl fmt::Display for BytecodeFunction {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "<bytecode {:p}:{}>", self.module, self.address)
}
}
impl fmt::Display for Keyword {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
let word = match self {
Self::Lambda => "lambda",
Self::Defun => "defun",
Self::Optional => "&optional",
Self::Rest => "&rest",
};
write!(f, "{word}")
}
}
impl fmt::Display for Value {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Self::Nil => write!(f, "NIL"),
Self::Boolean(true) => write!(f, "#T"),
Self::Boolean(false) => write!(f, "#F"),
Self::Integer(value) => write!(f, "{value}"),
Self::Identifier(value) => write!(f, "{value}"),
Self::Keyword(keyword) => write!(f, "{keyword}"),
Self::Cons(cons) => write!(f, "{cons}"),
Self::BytecodeFunction(bytecode) => write!(f, "{bytecode}"),
Self::NativeFunction(native) => write!(f, "{native}"),
}
}
}
#[cfg(test)]
mod tests {
use crate::vm::value::{Keyword, Value};
#[test]
fn test_value_formatting() {
let v = Value::Nil;
assert_eq!(&format!("{v}"), "NIL");
let v = Value::Integer(1234);
assert_eq!(&format!("{v}"), "1234");
let v = Value::Boolean(true);
assert_eq!(&format!("{v}"), "#T");
let v = Value::Boolean(false);
assert_eq!(&format!("{v}"), "#F");
let v = Value::Boolean(false).cons(Value::Integer(1234));
assert_eq!(&format!("{v}"), "(#F . 1234)");
let v = Value::Boolean(false)
.cons(Value::Integer(1234).cons(Value::Keyword(Keyword::Lambda).cons(Value::Nil)));
assert_eq!(&format!("{v}"), "(#F 1234 lambda)");
}
}