Merge compiler and vm

This commit is contained in:
Mark Poliakov 2021-04-06 17:38:09 +03:00
parent 07a0266fea
commit 46d1de09bf
25 changed files with 1300 additions and 12 deletions

@ -1,17 +1,24 @@
O=build
SRCS=$(shell find src -name "*.c" -type f)
OBJS=$(SRCS:src/%.c=$(O)/%.o)
HDRS=$(shell find include -type f -name "*.h")
SRC_DIRS=$(shell find src -type d)
DIRS=$(SRC_DIRS:src/%=$(O)/%) $(O)
LISP2CC_DIR?=../lisp2cc
CFLAGS=-Iinclude \
-I$(LISP2CC_DIR)/include/core \
VM_OBJS=$(O)/vm/main.o \
$(O)/vm/vmstate.o \
$(O)/vm/vmstring.o \
$(O)/vm/vmval.o \
$(O)/core/vector.o
COMPILER_OBJS=$(O)/compiler/main.o \
$(O)/compiler/compile.o \
$(O)/compiler/node.o \
$(O)/compiler/parse.o \
$(O)/compiler/unit.o \
$(O)/core/vector.o \
$(O)/core/hash.o
CFLAGS=-Icore/include \
-Werror \
-Wall \
-Wextra
HDRS=$(shell find . -type f -name "*.h")
DIRS=$(shell find compiler core vm -type d -printf "$(O)/%p ")
all: $(DIRS) $(O)/lisp2
all: $(DIRS) $(O)/l2vm $(O)/l2c
clean:
rm -rf $(O)
@ -19,8 +26,17 @@ clean:
$(DIRS):
mkdir -p $@
$(O)/lisp2: $(OBJS)
$(CC) $(LDFLAGS) -o $@ $(OBJS)
$(O)/l2vm: $(VM_OBJS)
$(CC) $(LDFLAGS) -o $@ $(VM_OBJS)
$(O)/%.o: src/%.c $(HDRS)
$(O)/l2c: $(COMPILER_OBJS)
$(CC) $(LDFLAGS) -o $@ $(COMPILER_OBJS)
$(O)/vm/%.o: vm/%.c $(HDRS)
$(CC) -Ivm/include -c $(CFLAGS) -o $@ $<
$(O)/compiler/%.o: compiler/%.c $(HDRS)
$(CC) -Icompiler/include -c $(CFLAGS) -o $@ $<
$(O)/%.o: %.c $(HDRS)
$(CC) -c $(CFLAGS) -o $@ $<

307
compiler/compile.c Normal file

@ -0,0 +1,307 @@
#include "compile.h"
#include "unit.h"
#include "node.h"
#include "op.h"
#include <string.h>
#include <stdio.h>
int emit_arg_list(struct function *fn, struct context *ctx, struct node *expr) {
if (!expr) {
return 0;
}
assert(expr->type == N_CONS);
int e = emit_arg_list(fn, ctx, expr->n_cons.cdr);
emit(fn, ctx, expr->n_cons.car);
return e + 1;
}
void emit_function(struct context *ctx, struct function *fn) {
struct node *body = fn->body;
struct unit *root = ctx->root;
assert(ctx->owner == fn);
// Pass 0 - extract global scope
for (struct node *node = body; node; node = cdr(node)) {
struct node *expr = car(node);
if (pair_q(expr)) {
struct node *v0;
struct node *op = car(expr);
if (ident_q(op)) {
if (!strcmp(op->n_ident, "define")) {
v0 = cadr(expr);
if (ident_q(v0)) {
hash_insert(&ctx->vars, v0->n_ident, (void *) ctx->var_counter++);
} else if (cons_q(v0)) {
v0 = car(v0);
assert(ident_q(v0));
hash_insert(&ctx->vars, v0->n_ident, (void *) ctx->var_counter++);
} else {
printf("Unexpected expression: ");
vm_print(expr, 0);
printf("\n");
abort();
}
} else if (!strcmp(op->n_ident, "use")) {
v0 = cadr(expr);
assert(ident_q(v0));
// TODO `use`s should be local to their contexts
if (unit_ext_load(root, v0->n_ident) != 0) {
assert(0 && "Failed to load external unit\n");
}
}
}
}
}
// Pass 1 - emit code
// Emit entry
for (struct node *node = body; node; node = cdr(node)) {
struct node *expr = car(node);
emit(fn, ctx, expr);
}
// Obligatory program return
emit_insn(fn, OP(OP_RET, 0));
}
void emit(struct function *fn, struct context *ctx, struct node *expr) {
if (!expr) {
return;
}
struct node *n0, *n1, *n2, *n3;
int c0;
switch (expr->type) {
case N_CONS:
// Cons chain root - function call
n0 = expr->n_cons.car;
if (!n0) {
fprintf(stderr, "Call to nil\n");
abort();
}
if (n0->type != N_IDENT) {
fprintf(stderr, "TODO non-identifier calls\n");
abort();
}
// Only setq/define use this instruction
if (!strcmp(n0->n_ident, "define")) {
n0 = cadr(expr);
if (ident_q(n0)) {
int kind;
size_t index;
assert(ctx_lookup_name(ctx, n0->n_ident, &kind, &index) == 0);
assert(kind == NAME_GLOBAL);
n1 = caddr(expr);
assert(null_q(cdr(cddr(expr))));
// (define ident value)
// Emit value for n1
emit(fn, ctx, n1);
emit_insn(fn, OP(OP_STG, index));
} else if (cons_q(n0)) {
struct node *name = car(n0);
struct node *args = cdr(n0);
struct node *body = cddr(expr);
assert(ident_q(name));
int kind;
size_t index;
assert(ctx_lookup_name(ctx, name->n_ident, &kind, &index) == 0);
assert(kind == NAME_GLOBAL);
struct function *new_fn = unit_lambda(ctx->root);
struct context new_ctx;
new_fn->args = args;
new_fn->body = body;
ctx_init(&new_ctx, ctx);
new_ctx.owner = new_fn;
emit_function(&new_ctx, new_fn);
// TODO do this at compile time for named functions?
emit_insn(fn, OP(OP_LDF, new_fn->index));
emit_insn(fn, OP(OP_STG, index));
}
return;
} else if (!strcmp(n0->n_ident, "setq")) {
n0 = cadr(expr);
n1 = caddr(expr);
assert(null_q(cdr(cddr(expr))));
int kind;
size_t index;
assert(ctx_lookup_name(ctx, n0->n_ident, &kind, &index) == 0);
assert(kind == NAME_GLOBAL);
emit(fn, ctx, n1);
emit_insn(fn, OP(OP_STG, index));
return;
} else if (!strcmp(n0->n_ident, "use")) {
// Ignore
return;
}
// Check for macros/builtin operators
if (!strcmp(n0->n_ident, "if")) {
n1 = cadr(expr); // condition
n2 = caddr(expr); // yes-expr
n3 = car(cdr(cddr(expr))); // no-expr
assert(null_q(cddr(cddr(expr))));
emit(fn, ctx, n1);
// Will hop back and modify an already emitted jump with a proper
// offset
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
// Emit "yes" branch
emit(fn, ctx, n2);
size_t yes_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_JMP, 0));
// Emit "no" branch
size_t no_loc = fn->bytecode.size;
emit(fn, ctx, n3);
size_t end_loc = fn->bytecode.size;
// Fix up jumps
uint32_t *cond_jmp, *yes_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
yes_jmp = vector_ref(&fn->bytecode, yes_jmp_loc);
*cond_jmp |= (no_loc - cond_jmp_loc) & 0xFFFFFF;
*yes_jmp |= (end_loc - yes_jmp_loc) & 0xFFFFFF;
return;
} else if (!strcmp(n0->n_ident, "while")) {
n1 = cadr(expr); // condition
n2 = cddr(expr); // list of expressions
assert(!null_q(n2));
size_t begin_loc = fn->bytecode.size;
emit(fn, ctx, n1);
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
for (struct node *item = n2; item; item = cdr(item)) {
struct node *stmt = car(item);
emit(fn, ctx, stmt);
}
// Jump back to the beginning
size_t end_jmp_loc = fn->bytecode.size;
ssize_t diff = (ssize_t) begin_loc - (ssize_t) end_jmp_loc;
emit_insn(fn, OP(OP_JMP, diff));
size_t end_loc = fn->bytecode.size;
uint32_t *cond_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
*cond_jmp |= (end_loc - cond_jmp_loc) & 0xFFFFFF;
return;
}
c0 = emit_arg_list(fn, ctx, expr->n_cons.cdr);
if (!strcmp(n0->n_ident, "+")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_ADD, 0));
}
} else if (!strcmp(n0->n_ident, "-")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_SUB, 0));
}
} else if (!strcmp(n0->n_ident, "*")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_MUL, 0));
}
} else if (!strcmp(n0->n_ident, "/") && c0 == 2) {
emit_insn(fn, OP(OP_DIV, 0));
} else if (!strcmp(n0->n_ident, "%") && c0 == 2) {
emit_insn(fn, OP(OP_MOD, 0));
} else if (!strcmp(n0->n_ident, "cons") && c0 == 2) {
emit_insn(fn, OP(OP_CONS, 0));
} else if (!strcmp(n0->n_ident, "=") && c0 == 2) {
emit_insn(fn, OP(OP_EQ, 0));
} else if (!strcmp(n0->n_ident, "/=") && c0 == 2) {
emit_insn(fn, OP(OP_NEQ, 0));
} else if (!strcmp(n0->n_ident, ">") && c0 == 2) {
emit_insn(fn, OP(OP_GT, 0));
} else if (!strcmp(n0->n_ident, ">=") && c0 == 2) {
emit_insn(fn, OP(OP_GE, 0));
} else if (!strcmp(n0->n_ident, "<") && c0 == 2) {
emit_insn(fn, OP(OP_LT, 0));
} else if (!strcmp(n0->n_ident, "<=") && c0 == 2) {
emit_insn(fn, OP(OP_LE, 0));
} else if (!strcmp(n0->n_ident, "null?") && c0 == 1) {
emit_insn(fn, OP(OP_TEST, 0));
} else if (!strcmp(n0->n_ident, "not") && c0 == 1) {
emit_insn(fn, OP(OP_NOT, 0));
} else if (!strcmp(n0->n_ident, "car") && c0 == 1) {
emit_insn(fn, OP(OP_CAR, 0));
} else if (!strcmp(n0->n_ident, "cdr") && c0 == 1) {
emit_insn(fn, OP(OP_CDR, 0));
} else {
size_t index;
// TODO lookup unit-local function
// TODO function signature validation?
// Lookup external reference in the list, if already present
if (unit_ext_ref(ctx, n0->n_ident, &index) == 0) {
emit_insn(fn, OP(OP_XCALL, index));
} else {
printf("Undefined reference to function ");
vm_print(n0, 0);
printf("\n");
abort();
}
}
return;
case N_INTEGER:
if (expr->n_integer < 0x7FFFFF && expr->n_integer > -0x7FFFFF) {
emit_insn(fn, OP(OP_LDI, expr->n_integer));
} else {
printf("ldc ...\n");
}
return;
case N_IDENT:
if (!strcmp(expr->n_ident, "nil")) {
emit_insn(fn, OP(OP_LDNIL, 0));
} else if (!strcmp(expr->n_ident, "t")) {
emit_insn(fn, OP(OP_LDI, 1));
} else {
int kind;
size_t index;
if (ctx_lookup_name(ctx, expr->n_ident, &kind, &index) != 0) {
printf("Unresolved reference to: ");
vm_print(expr, 0);
printf("\n");
abort();
}
switch (kind) {
case NAME_GLOBAL:
emit_insn(fn, OP(OP_LDG, index));
break;
case NAME_ARGUMENT:
emit_insn(fn, OP(OP_LDARG, index));
break;
default:
abort();
}
}
return;
default:
printf("Unexpected value: ");
vm_print(expr, 0);
printf("\n");
abort();
}
}

@ -0,0 +1,8 @@
#pragma once
struct context;
struct function;
struct node;
void emit(struct function *fn, struct context *ctx, struct node *expr);
void emit_function(struct context *ctx, struct function *fn);

70
compiler/include/node.h Normal file

@ -0,0 +1,70 @@
#pragma once
#include <stdint.h>
#include <assert.h>
#include <stdlib.h>
enum node_type {
N_CONS,
N_IDENT,
N_INTEGER
};
struct node {
enum node_type type;
union {
struct {
struct node *car, *cdr;
} n_cons;
char *n_ident;
intmax_t n_integer;
};
};
static inline int cons_q(struct node *n) {
return !n || n->type == N_CONS;
}
static inline int null_q(struct node *n) {
return n == NULL;
}
static inline int pair_q(struct node *n) {
return n && n->type == N_CONS;
}
static inline int ident_q(struct node *n) {
return n && n->type == N_IDENT;
}
static inline int integer_q(struct node *n) {
return n && n->type == N_INTEGER;
}
static inline struct node *car(struct node *n) {
assert(pair_q(n));
return n->n_cons.car;
}
static inline struct node *cdr(struct node *n) {
assert(pair_q(n));
return n->n_cons.cdr;
}
static inline struct node *cadr(struct node *n) {
return car(cdr(n));
}
static inline struct node *caddr(struct node *n) {
return car(cdr(cdr(n)));
}
static inline struct node *cddr(struct node *n) {
return cdr(cdr(n));
}
struct node *cons(struct node *a, struct node *b);
struct node *ident(const char *i);
struct node *integer(intmax_t v);
struct node *list(int n, ...);
void vm_print(const struct node *node, int depth);

15
compiler/include/parse.h Normal file

@ -0,0 +1,15 @@
#pragma once
struct node;
struct vm_parser {
int ch;
void *ctx;
int (*peek)(struct vm_parser *p);
int (*pop)(struct vm_parser *p);
};
int vm_parse(struct vm_parser *p, struct node **out);
void vm_str_parser(struct vm_parser *p, const char *expr);
int vm_parse_str(const char *expr, struct node **out);

57
compiler/include/unit.h Normal file

@ -0,0 +1,57 @@
#pragma once
#include "hash.h"
#include "vector.h"
#define NAME_GLOBAL 1
#define NAME_LOCAL 2
#define NAME_EXTERNAL 3
#define NAME_ARGUMENT 4
struct unit;
struct context {
struct unit *root;
struct context *parent;
struct function *owner;
size_t var_counter;
struct hash vars;
};
struct ext_ref {
size_t unit_index;
size_t entry_index;
char name[64];
};
struct ext_unit_entry {
char name[64];
};
struct ext_unit {
size_t entry_count;
struct ext_unit_entry *entries;
char name[64];
};
struct function {
size_t index;
struct node *args, *body;
struct vector bytecode;
};
struct unit {
struct context global;
struct vector functions;
struct vector ext_refs;
struct vector ext_units;
};
void unit_init(struct unit *u);
void ctx_init(struct context *c, struct context *p);
struct function *unit_lambda(struct unit *u);
int unit_ext_ref(struct context *ctx, const char *name, size_t *index);
int unit_ext_load(struct unit *self, const char *unit);
int ctx_lookup_name(struct context *ctx, const char *name, int *kind, size_t *index);
void emit_insn(struct function *fn, uint32_t insn);

126
compiler/main.c Normal file

@ -0,0 +1,126 @@
#include <stdint.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <stdio.h>
#include "compile.h"
#include "binary.h"
#include "parse.h"
#include "hash.h"
#include "unit.h"
#include "node.h"
static struct node *read_program(const char *text) {
struct vm_parser p;
struct node *head, *tail, *expr;
vm_str_parser(&p, text);
head = NULL;
while (vm_parse(&p, &expr) == 0) {
expr = cons(expr, NULL);
if (head) {
tail->n_cons.cdr = expr;
} else {
head = expr;
}
tail = expr;
}
return head;
}
static void write_unit(FILE *fp, struct unit *u) {
struct bin_header hdr;
size_t offset = sizeof(struct bin_header);
// Step 1 generate offsets
hdr.magic = 0xCEBAB123;
hdr.version = 1;
hdr.global_pool_size = u->global.var_counter;
hdr.unit_table_offset = offset;
hdr.unit_table_size = u->ext_units.size;
for (size_t i = 0; i < u->ext_units.size; ++i) {
struct ext_unit *unit = vector_ref(&u->ext_units, i);
offset += sizeof(struct bin_unit_entry) + strlen(unit->name) + 1;
}
hdr.ref_table_offset = offset;
hdr.ref_table_size = u->ext_refs.size;
for (size_t i = 0; i < u->ext_refs.size; ++i) {
struct ext_ref *ref = vector_ref(&u->ext_refs, i);
offset += sizeof(struct bin_ref_entry) + strlen(ref->name) + 1;
}
hdr.export_table_offset = offset;
hdr.export_table_size = 0;
hdr.func_table_offset = offset;
hdr.func_table_size = u->functions.size;
for (size_t i = 0; i < u->functions.size; ++i) {
struct function *fn = vector_ref(&u->functions, i);
offset += sizeof(struct bin_func_entry) + fn->bytecode.size;
}
fwrite(&hdr, 1, sizeof(struct bin_header), fp);
for (size_t i = 0; i < u->ext_units.size; ++i) {
struct ext_unit *unit = vector_ref(&u->ext_units, i);
struct bin_unit_entry bin_unit;
bin_unit.name_len = strlen(unit->name);
fwrite(&bin_unit, 1, sizeof(struct bin_unit_entry), fp);
fwrite(unit->name, 1, bin_unit.name_len + 1, fp);
}
for (size_t i = 0; i < u->ext_refs.size; ++i) {
struct ext_ref *ref = vector_ref(&u->ext_refs, i);
struct bin_ref_entry bin_ref;
bin_ref.unit_index = ref->unit_index;
bin_ref.name_len = strlen(ref->name);
fwrite(&bin_ref, 1, sizeof(struct bin_ref_entry), fp);
fwrite(ref->name, 1, bin_ref.name_len + 1, fp);
}
for (size_t i = 0; i < u->functions.size; ++i) {
struct function *fn = vector_ref(&u->functions, i);
struct bin_func_entry bin_func;
size_t argc = 0;
for (struct node *arg = fn->args; arg; arg = cdr(arg)) {
++argc;
}
bin_func.argc = argc;
bin_func.len = fn->bytecode.size * sizeof(uint32_t);
fwrite(&bin_func, 1, sizeof(struct bin_func_entry), fp);
fwrite(fn->bytecode.data, 1, fn->bytecode.size * sizeof(uint32_t), fp);
}
}
int main(void) {
struct unit unit;
struct node *program;
FILE *fp;
const char *code =
"(use core)"
"(define in (cons 1 (cons 2 (cons 3 (cons 4 nil)))))"
"(while (not (null? in))"
" (print (car in))"
" (setq in (cdr in))"
")"
;
unit_init(&unit);
program = read_program(code);
struct function *main = unit_lambda(&unit);
main->args = NULL;
main->body = program;
unit.global.owner = main;
emit_function(&unit.global, main);
fp = fopen("output.vmx", "wb");
if (!fp) {
perror("output.vmx");
return -1;
}
write_unit(fp, &unit);
fclose(fp);
return 0;
}

80
compiler/node.c Normal file

@ -0,0 +1,80 @@
#include "node.h"
#include <string.h>
#include <stdarg.h>
#include <stdlib.h>
#include <stdio.h>
struct node *cons(struct node *a, struct node *b) {
struct node *n = malloc(sizeof(struct node));
n->type = N_CONS;
n->n_cons.car = a;
n->n_cons.cdr = b;
return n;
}
struct node *ident(const char *text) {
struct node *n = malloc(sizeof(struct node));
n->type = N_IDENT;
n->n_ident = strdup(text);
return n;
}
struct node *integer(intmax_t v) {
struct node *n = malloc(sizeof(struct node));
n->type = N_INTEGER;
n->n_integer = v;
return n;
}
struct node *list(int n, ...) {
struct node *head, *tail, *pair;
va_list ap;
va_start(ap, n);
head = NULL;
for (int i = 0; i < n; ++i) {
pair = cons(va_arg(ap, struct node *), NULL);
if (head) {
tail->n_cons.cdr = pair;
} else {
head = pair;
}
tail = pair;
}
va_end(ap);
return head;
}
void vm_print(const struct node *node, int depth) {
if (!node) {
printf("nil");
return;
}
switch (node->type) {
case N_IDENT:
printf("%s", node->n_ident);
break;
case N_INTEGER:
printf("%zd", node->n_integer);
break;
case N_CONS:
if (!depth) {
printf("(");
}
vm_print(node->n_cons.car, 0);
if (node->n_cons.cdr) {
printf(" ");
if (node->n_cons.cdr->type == N_CONS) {
vm_print(node->n_cons.cdr, depth + 1);
} else {
printf(". ");
vm_print(node->n_cons.cdr, 0);
}
}
if (!depth) {
printf(")");
}
break;
}
}

222
compiler/parse.c Normal file

@ -0,0 +1,222 @@
#include "parse.h"
#include "node.h"
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
static int vm_str_parser_peek(struct vm_parser *p) {
return *(const char *) p->ctx;
}
static int vm_str_parser_pop(struct vm_parser *p) {
char c = *(const char *) p->ctx;
if (c) {
++p->ctx;
}
return c;
}
static inline int is_space(int ch) {
return ch == ' ' || ch == '\t' || ch == '\n';
}
static inline int is_digit(int ch) {
return ch >= '0' && ch <= '9';
}
static inline int is_ident0(int ch) {
return (ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
strchr("+-*&^%$@!~<>/.", ch);
}
static int skip_whitespace(struct vm_parser *in) {
while (is_space(in->peek(in))) {
assert(in->pop(in) > 0);
}
return 0;
}
static int vm_parse_int(struct vm_parser *in, enum node_type *_type, intmax_t *_value) {
int ch;
intmax_t value = 0;
//char type_str[16];
//size_t idx;
enum node_type type;
ch = in->peek(in);
if (ch == '0') {
assert(in->pop(in) == ch);
ch = in->peek(in);
if (ch == 'x') {
assert(in->pop(in) == ch);
assert(0 && "TODO hex\n");
}
} else {
while (1) {
ch = in->peek(in);
if (!ch || (ch >= 'a' && ch <= 'z') || strchr(" \n\t()", ch)) {
break;
}
if (!is_digit(ch)) {
return -EINVAL;
}
assert(in->pop(in) == ch);
value *= 10;
value += ch - '0';
}
}
type = N_INTEGER;
//ch = in->peek(in);
//idx = 0;
//if (ch >= 'a' && ch <= 'z') {
// while ((ch = in->peek(in)) && ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9'))) {
// assert(idx < sizeof(type_str) - 1);
// assert(in->pop(in) == ch);
// type_str[idx++] = ch;
// }
// type_str[idx] = 0;
//}
//if (idx) {
// if (!strcmp(type_str, "usize") || !strcmp(type_str, "u")) {
// type = VT_USIZE;
// } else if (!strcmp(type_str, "i8")) {
// type = VT_I8;
// } else if (!strcmp(type_str, "u8")) {
// type = VT_U8;
// } else if (!strcmp(type_str, "i16")) {
// type = VT_I16;
// } else if (!strcmp(type_str, "u16")) {
// type = VT_U16;
// } else if (!strcmp(type_str, "i32")) {
// type = VT_I32;
// } else if (!strcmp(type_str, "u32")) {
// type = VT_U32;
// } else {
// panic("Unknown type suffix: %s\n", type_str);
// }
//} else {
// type = VT_I32;
//}
*_value = value;
*_type = type;
return 0;
}
int vm_parse(struct vm_parser *in, struct node **out) {
(void) out;
int res;
int ch;
if ((res = skip_whitespace(in)) != 0) {
return res;
}
ch = in->peek(in);
if (ch == 0) {
return -1;
} else if (ch == '(') {
struct node *head, *tail, *car, *pair;
assert(in->pop(in) == ch);
head = NULL;
while (1) {
if ((res = skip_whitespace(in)) != 0) {
return res;
}
ch = in->peek(in);
if (ch == ')') {
assert(in->pop(in) == ch);
*out = head;
break;
}
if ((res = vm_parse(in, &car)) != 0) {
return res;
}
pair = cons(car, NULL);
if (head) {
tail->n_cons.cdr = pair;
} else {
head = pair;
}
tail = pair;
}
return 0;
} else if (is_digit(ch)) {
intmax_t value;
enum node_type type;
if ((res = vm_parse_int(in, &type, &value)) != 0) {
return res;
}
(void) type;
*out = integer(value);
return 0;
} else if (ch == '#') {
assert(in->pop(in) == ch);
ch = in->peek(in);
if (ch == '\\') {
assert(in->pop(in) == ch);
ch = in->peek(in);
switch (ch) {
case 'n':
*out = integer('\n');
break;
default:
fprintf(stderr, "Unknown escape sequence: #\\%c\n", ch);
abort();
}
assert(in->pop(in) == ch);
} else if (!ch) {
return -EINVAL;
} else {
assert(in->pop(in) == ch);
*out = integer(ch);
}
return 0;
} else if (is_ident0(ch)) {
char buf[24];
size_t i = 0;
assert(in->pop(in) == ch);
while (1) {
assert(i < sizeof(buf) - 1);
buf[i++] = ch;
ch = in->peek(in);
if (!ch || strchr(" \n\t()", ch)) {
break;
}
assert(in->pop(in) == ch);
}
buf[i] = 0;
*out = ident(buf);
return 0;
}
fprintf(stderr, "Unrecognized character: #%c (%d)\n", ch, ch);
abort();
}
void vm_str_parser(struct vm_parser *p, const char *expr) {
p->ctx = (void *) expr;
p->ch = 0;
p->peek = vm_str_parser_peek;
p->pop = vm_str_parser_pop;
}
int vm_parse_str(const char *expr, struct node **out) {
struct vm_parser p;
vm_str_parser(&p, expr);
return vm_parse(&p, out);
}

129
compiler/unit.c Normal file

@ -0,0 +1,129 @@
#include "unit.h"
#include "node.h"
#include <string.h>
#include <stdlib.h>
#include <assert.h>
#include <errno.h>
#include <stdio.h>
static struct ext_unit_entry u_core_entries[] = {
{ "print" },
{ "file-open" },
{ "file-read" },
{ "file-write" },
{ "file-close" },
{ "file-seek" },
{ "exit" }
};
void unit_init(struct unit *u) {
shash_init(&u->global.vars, 16);
u->global.root = u;
u->global.parent = NULL;
vector_init(&u->functions, sizeof(struct function));
vector_init(&u->ext_refs, sizeof(struct ext_ref));
vector_init(&u->ext_units, sizeof(struct ext_unit));
}
void ctx_init(struct context *ctx, struct context *parent) {
ctx->root = parent->root;
ctx->parent = parent;
shash_init(&ctx->vars, 16);
}
struct function *unit_lambda(struct unit *u) {
size_t index = u->functions.size;
struct function *f = vector_append(&u->functions);
f->index = index;
vector_init(&f->bytecode, sizeof(uint32_t));
return f;
}
void emit_insn(struct function *fn, uint32_t insn) {
uint32_t *p = vector_append(&fn->bytecode);
*p = insn;
}
int unit_ext_ref(struct context *ctx, const char *name, size_t *index) {
(void) ctx;
(void) name;
(void) index;
struct unit *root = ctx->root;
// Try looking up an existing reference entry
for (size_t i = 0; i < root->ext_refs.size; ++i) {
struct ext_ref *ref = vector_ref(&root->ext_refs, i);
if (!strcmp(name, ref->name)) {
*index = i;
return 0;
}
}
// Lookup the name in external units
for (size_t i = 0; i < root->ext_units.size; ++i) {
struct ext_unit *unit = vector_ref(&root->ext_units, i);
for (size_t j = 0; j < unit->entry_count; ++j) {
struct ext_unit_entry *ent = &unit->entries[j];
if (!strcmp(ent->name, name)) {
*index = root->ext_refs.size;
struct ext_ref *ref = vector_append(&root->ext_refs);
ref->unit_index = i;
ref->entry_index = j;
strcpy(ref->name, name);
return 0;
}
}
}
return -ENOENT;
}
// TODO non-builtin units
int unit_ext_load(struct unit *self, const char *name) {
if (!strcmp(name, "core")) {
struct ext_unit *unit = vector_append(&self->ext_units);
unit->entry_count = sizeof(u_core_entries) / sizeof(u_core_entries[0]);
unit->entries = u_core_entries;
strcpy(unit->name, name);
return 0;
}
return -ENOENT;
}
// TODO external references
int ctx_lookup_name(struct context *ctx, const char *name, int *kind, size_t *index) {
struct hash_pair *p;
p = hash_lookup(&ctx->vars, name);
if (p) {
if (ctx->parent) {
printf("TODO local contexts\n");
abort();
}
*kind = NAME_GLOBAL;
*index = (size_t) p->value;
return 0;
}
// Lookup argument
assert(ctx->owner);
size_t arg_i = 0;
for (struct node *node = ctx->owner->args; node; node = cdr(node), ++arg_i) {
struct node *arg = car(node);
assert(ident_q(arg));
if (!strcmp(arg->n_ident, name)) {
*kind = NAME_ARGUMENT;
*index = arg_i;
return 0;
}
}
if (ctx->parent) {
return ctx_lookup_name(ctx->parent, name, kind, index);
}
return -ENOENT;
}

88
core/hash.c Normal file

@ -0,0 +1,88 @@
#include <stddef.h>
#include <stdlib.h>
#include <string.h>
#include <assert.h>
#include <errno.h>
#include "hash.h"
#define HASH_CHECK_DUP
static size_t shash_hash(const void *_s) {
const char *s = _s;
size_t hash = 5381;
int c;
while ((c = *s++)) {
hash = ((hash << 5) + hash) + c;
}
return hash;
}
static struct hash_pair *shash_pair_new(void *key, void *value) {
// Must fit into 128-sized block
assert(strlen(key) < 128 - sizeof(struct hash_pair));
struct hash_pair *res;
res = calloc(sizeof(struct hash_pair), 1);
if (!res) {
return NULL;
}
res->key = (void *) res + sizeof(struct hash_pair);
res->value = value;
strcpy((void *) res + sizeof(struct hash_pair), key);
list_head_init(&res->link);
return res;
}
static void shash_pair_free(struct hash_pair *pair) {
free(pair);
}
int shash_init(struct hash *h, size_t cap) {
h->hash = shash_hash;
h->pair_new = shash_pair_new;
h->pair_free = shash_pair_free;
h->keycmp = (int (*) (const void *, const void *)) strcmp;
h->bucket_count = cap;
h->buckets = malloc(sizeof(struct list_head) * cap);
if (!h->buckets) {
return -ENOMEM;
}
for (size_t i = 0; i < cap; ++i) {
list_head_init(&h->buckets[i]);
}
return 0;
}
int hash_insert(struct hash *h, const void *key, void *value) {
#if defined(HASH_CHECK_DUP)
assert(!hash_lookup(h, key));
#endif
struct hash_pair *pair = h->pair_new((void *) key, value);
if (!pair) {
return -ENOMEM;
}
size_t index = h->hash(key) % h->bucket_count;
list_add(&pair->link, &h->buckets[index]);
return 0;
}
struct hash_pair *hash_lookup(struct hash *h, const void *key) {
size_t index = h->hash(key) % h->bucket_count;
struct hash_pair *pair;
list_for_each_entry(pair, &h->buckets[index], link) {
if (!h->keycmp(pair->key, key)) {
return pair;
}
}
return NULL;
}

34
core/include/binary.h Normal file

@ -0,0 +1,34 @@
#pragma once
#include <stdint.h>
struct bin_header {
uint32_t magic;
uint32_t version;
uint32_t global_pool_size;
uint32_t unit_table_offset;
uint32_t unit_table_size;
uint32_t ref_table_offset;
uint32_t ref_table_size;
uint32_t export_table_offset;
uint32_t export_table_size;
uint32_t func_table_offset;
uint32_t func_table_size;
};
struct bin_unit_entry {
uint32_t name_len;
// TODO move all of this into strtable
char name[0];
};
struct bin_ref_entry {
uint32_t unit_index;
uint32_t name_len;
char name[0];
};
struct bin_func_entry {
uint32_t argc;
uint32_t len;
uint32_t data[0];
};

25
core/include/hash.h Normal file

@ -0,0 +1,25 @@
#pragma once
#include <stdint.h>
#include <stddef.h>
#include "list.h"
struct hash_pair {
void *key, *value;
struct list_head link;
};
#define HASH_DEBUG
struct hash {
size_t (*hash) (const void *);
struct hash_pair *(*pair_new) (void *, void *);
void (*pair_free) (struct hash_pair *);
int (*keycmp) (const void *, const void *);
size_t bucket_count;
struct list_head *buckets;
};
int shash_init(struct hash *h, size_t cap);
int hash_insert(struct hash *h, const void *key, void *value);
struct hash_pair *hash_lookup(struct hash *h, const void *key);

71
core/include/list.h Normal file

@ -0,0 +1,71 @@
#pragma once
struct list_head {
struct list_head *prev, *next;
};
#define LIST_HEAD(name) \
struct list_head name = { &name, &name }
#define list_for_each(pos, head) \
for (pos = (head)->next; pos != (head); pos = pos->next)
#define list_for_each_entry(pos, head, member) \
for (pos = list_entry((head)->next, typeof(*pos), member); \
&pos->member != (head); \
pos = list_entry(pos->member.next, typeof(*pos), member))
#define list_entry(link, type, member) ({ \
const typeof (((type *) 0)->member) *__memb = (link); \
(type *) ((char *) __memb - offsetof(type, member)); \
})
#define list_next_entry(pos, member) \
list_entry((pos)->member.next, typeof(*(pos)), member)
#define list_first_entry(ptr, type, member) \
list_entry((ptr)->next, type, member)
#define list_for_each_safe(pos, n, head) \
for (pos = (head)->next, n = pos->next; pos != (head); \
pos = n, n = pos->next)
static inline void list_head_init(struct list_head *list) {
list->next = list;
list->prev = list;
}
static inline void __list_add(struct list_head *new,
struct list_head *prev,
struct list_head *next) {
next->prev = new;
new->next = next;
new->prev = prev;
prev->next = new;
}
static inline void list_add(struct list_head *new, struct list_head *head) {
__list_add(new, head, head->next);
}
static inline void list_add_tail(struct list_head *new, struct list_head *head) {
__list_add(new, head->prev, head);
}
static inline void __list_del(struct list_head *prev, struct list_head *next) {
next->prev = prev;
prev->next = next;
}
static inline void list_del(struct list_head *entry) {
__list_del(entry->prev, entry->next);
}
static inline int list_empty(const struct list_head *head) {
return head->next == head;
}
static inline void list_del_init(struct list_head *entry) {
__list_del(entry->prev, entry->next);
list_head_init(entry);
}

40
core/include/op.h Normal file

@ -0,0 +1,40 @@
#pragma once
#define OP(x, y) \
(((x) << 24) | ((y) & 0xFFFFFF))
#define OP_ADD 0x04
#define OP_SUB 0x05
#define OP_MUL 0x06
#define OP_DIV 0x07
#define OP_MOD 0x08
#define OP_NOT 0x0C
#define OP_EQ 0x12
#define OP_NEQ 0x13
#define OP_GT 0x14
#define OP_GE 0x15
#define OP_LT 0x16
#define OP_LE 0x17
#define OP_BNORM 0x28
#define OP_LDC 0x30
#define OP_LDNIL 0x31
#define OP_LDI 0x32
#define OP_CAR 0x40
#define OP_CDR 0x41
#define OP_CONS 0x45
#define OP_TEST 0x46
#define OP_LDARG 0x4B
#define OP_LDG 0x4E
#define OP_STG 0x4F
#define OP_LDF 0x50
#define OP_XCALL 0x60
#define OP_LCALL 0x61
#define OP_JMP 0x62
#define OP_BT 0x63
#define OP_BF 0x64
#define OP_RET 0x6F