Support for nested functions, split builtins

This commit is contained in:
2021-04-08 20:47:31 +03:00
parent cffbb0a94d
commit e15327e2e5
6 changed files with 298 additions and 233 deletions
+1
View File
@@ -16,6 +16,7 @@ COMPILER_OBJS=$(O)/compiler/main.o \
$(O)/compiler/node.o \
$(O)/compiler/parse.o \
$(O)/compiler/unit.o \
$(O)/compiler/builtin.o \
$(O)/core/vector.o \
$(O)/core/hash.o
CFLAGS=-Icore/include \
+229
View File
@@ -0,0 +1,229 @@
#include "builtin.h"
#include "compile.h"
#include "vector.h"
#include "unit.h"
#include "node.h"
#include "op.h"
#include <assert.h>
#include <string.h>
#include <stdio.h>
int emit_builtin_syntax(struct function *fn,
struct context *ctx,
const char *name,
struct node *tail) {
struct node *arg0, *arg1, *arg2;
int kind;
size_t index;
// Only setq/define use this instruction
// TODO your defmacro could be here...
if (!strcmp(name, "define")) {
arg0 = car(tail);
if (ident_q(arg0)) {
arg1 = cadr(tail);
assert(null_q(cddr(tail)));
if (ctx_lookup_name(ctx, arg0->n_ident, &kind, &index) != 0) {
fprintf(stderr, "Unresolved reference to %s\n", arg0->n_ident);
abort();
}
// (define ident value)
// Emit value for n1
emit(fn, ctx, arg1);
emit_store(fn, kind, index);
} else if (cons_q(arg0)) {
struct node *name = car(arg0);
struct node *args = cdr(arg0);
struct node *body = cdr(tail);
assert(ident_q(name));
assert(ctx_lookup_name(ctx, name->n_ident, &kind, &index) == 0);
struct function *new_fn = unit_lambda(ctx->root);
struct context new_ctx;
new_fn->args = args;
new_fn->body = body;
new_fn->local_count = 0;
if (ctx->parent) {
ctx_init(&new_ctx, ctx->parent);
} else {
ctx_init(&new_ctx, ctx);
}
new_ctx.var_counter = 0;
new_ctx.owner = new_fn;
emit_function(&new_ctx, new_fn);
emit_insn(fn, OP(OP_LDF, new_fn->index));
emit_store(fn, kind, index);
}
return 0;
} else if (!strcmp(name, "setq")) {
arg0 = car(tail);
arg1 = cadr(tail);
assert(null_q(cddr(tail)));
assert(ident_q(arg0));
assert(ctx_lookup_name(ctx, arg0->n_ident, &kind, &index) == 0);
emit(fn, ctx, arg1);
emit_store(fn, kind, index);
return 0;
} else if (!strcmp(name, "lambda")) {
// (lambda list body...)
arg0 = car(tail);
arg1 = cdr(tail);
assert(cons_q(arg0));
assert(!null_q(arg1));
struct function *new_fn = unit_lambda(ctx->root);
struct context new_ctx;
new_fn->args = arg0;
new_fn->body = arg1;
new_fn->local_count = 0;
assert(!ctx->parent);
ctx_init(&new_ctx, ctx);
new_ctx.var_counter = 0;
new_ctx.owner = new_fn;
emit_function(&new_ctx, new_fn);
emit_insn(fn, OP(OP_LDF, new_fn->index));
return 0;
} else if (!strcmp(name, "begin")) {
assert(!null_q(tail));
for (arg0 = tail; arg0; arg0 = cdr(arg0)) {
emit(fn, ctx, car(arg0));
}
return 0;
} else if (!strcmp(name, "if")) {
arg0 = car(tail);
arg1 = cadr(tail);
arg2 = caddr(tail);
assert(null_q(cdr(cddr(tail))));
emit(fn, ctx, arg0);
// Will hop back and modify an already emitted jump with a proper
// offset
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
// Emit "yes" branch
emit(fn, ctx, arg1);
size_t yes_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_JMP, 0));
// Emit "no" branch
size_t no_loc = fn->bytecode.size;
emit(fn, ctx, arg2);
size_t end_loc = fn->bytecode.size;
// Fix up jumps
uint32_t *cond_jmp, *yes_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
yes_jmp = vector_ref(&fn->bytecode, yes_jmp_loc);
*cond_jmp |= (no_loc - cond_jmp_loc) & 0xFFFFFF;
*yes_jmp |= (end_loc - yes_jmp_loc) & 0xFFFFFF;
return 0;
} else if (!strcmp(name, "while")) {
arg0 = car(tail); // condition
arg1 = cdr(tail); // list of expressions
assert(!null_q(arg1));
size_t begin_loc = fn->bytecode.size;
emit(fn, ctx, arg0);
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
for (struct node *item = arg1; item; item = cdr(item)) {
emit(fn, ctx, car(item));
}
// Jump back to the beginning
size_t end_jmp_loc = fn->bytecode.size;
ssize_t diff = (ssize_t) begin_loc - (ssize_t) end_jmp_loc;
emit_insn(fn, OP(OP_JMP, diff));
size_t end_loc = fn->bytecode.size;
uint32_t *cond_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
*cond_jmp |= (end_loc - cond_jmp_loc) & 0xFFFFFF;
return 0;
} else if (!strcmp(name, "use") ||
!strcmp(name, "export")) {
// Ignore
return 0;
}
return -1;
}
int emit_builtin_insn(struct function *fn, const char *name, int argc) {
if (!strcmp(name, "+")) {
for (int i = 1; i < argc; ++i) {
emit_insn(fn, OP(OP_ADD, 0));
}
return 0;
} else if (!strcmp(name, "-")) {
for (int i = 1; i < argc; ++i) {
emit_insn(fn, OP(OP_SUB, 0));
}
return 0;
} else if (!strcmp(name, "*")) {
for (int i = 1; i < argc; ++i) {
emit_insn(fn, OP(OP_MUL, 0));
}
return 0;
} else if (!strcmp(name, "/") && argc == 2) {
emit_insn(fn, OP(OP_DIV, 0));
return 0;
} else if (!strcmp(name, "%") && argc == 2) {
emit_insn(fn, OP(OP_MOD, 0));
return 0;
} else if (!strcmp(name, "cons") && argc == 2) {
emit_insn(fn, OP(OP_CONS, 0));
return 0;
} else if (!strcmp(name, "=") && argc == 2) {
emit_insn(fn, OP(OP_EQ, 0));
return 0;
} else if (!strcmp(name, "/=") && argc == 2) {
emit_insn(fn, OP(OP_NEQ, 0));
return 0;
} else if (!strcmp(name, ">") && argc == 2) {
emit_insn(fn, OP(OP_GT, 0));
return 0;
} else if (!strcmp(name, ">=") && argc == 2) {
emit_insn(fn, OP(OP_GE, 0));
return 0;
} else if (!strcmp(name, "<") && argc == 2) {
emit_insn(fn, OP(OP_LT, 0));
return 0;
} else if (!strcmp(name, "<=") && argc == 2) {
emit_insn(fn, OP(OP_LE, 0));
return 0;
} else if (!strcmp(name, "null?") && argc == 1) {
emit_insn(fn, OP(OP_ISZ, 0));
return 0;
} else if (!strcmp(name, "not") && argc == 1) {
emit_insn(fn, OP(OP_NOT, 0));
return 0;
} else if (!strcmp(name, "car") && argc == 1) {
emit_insn(fn, OP(OP_CAR, 0));
return 0;
} else if (!strcmp(name, "cdr") && argc == 1) {
emit_insn(fn, OP(OP_CDR, 0));
return 0;
} else if (!strcmp(name, "debug/trace") && argc == 1) {
emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_TRACE));
return 0;
} else if (!strcmp(name, "debug/break")) {
emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_BREAKPOINT));
return 0;
}
return -1;
}
+42 -225
View File
@@ -1,3 +1,4 @@
#include "builtin.h"
#include "compile.h"
#include "unit.h"
#include "node.h"
@@ -72,7 +73,7 @@ void emit_function(struct context *ctx, struct function *fn) {
}
}
static void emit_store(struct function *fn, int kind, size_t index) {
void emit_store(struct function *fn, int kind, size_t index) {
switch (kind) {
case NAME_LOCAL:
emit_insn(fn, OP(OP_STL, index));
@@ -88,11 +89,41 @@ static void emit_store(struct function *fn, int kind, size_t index) {
}
}
void emit_call(struct function *fn, struct context *ctx, const char *name) {
size_t index;
int kind;
if (ctx_lookup_name(ctx, name, &kind, &index) != 0) {
printf("Undefined reference to function %s\n", name);
abort();
}
switch (kind) {
case NAME_EXTERNAL:
emit_insn(fn, OP(OP_XCALL, index));
break;
case NAME_LOCAL:
emit_insn(fn, OP(OP_LDL, index));
emit_insn(fn, OP(OP_CALL, 0));
break;
case NAME_ARGUMENT:
emit_insn(fn, OP(OP_LDARG, index));
emit_insn(fn, OP(OP_CALL, 0));
break;
case NAME_GLOBAL:
emit_insn(fn, OP(OP_GCALL, index));
break;
default:
printf("Unknown %d\n", kind);
abort();
}
}
void emit(struct function *fn, struct context *ctx, struct node *expr) {
if (!expr) {
return;
}
struct node *n0, *n1, *n2, *n3;
struct node *n0;
int c0;
switch (expr->type) {
@@ -109,235 +140,21 @@ void emit(struct function *fn, struct context *ctx, struct node *expr) {
abort();
}
// Only setq/define use this instruction
// TODO your defmacro could be here...
if (!strcmp(n0->n_ident, "define")) {
n0 = cadr(expr);
if (ident_q(n0)) {
int kind;
size_t index;
if (ctx_lookup_name(ctx, n0->n_ident, &kind, &index) != 0) {
fprintf(stderr, "Unresolved reference to %s\n", n0->n_ident);
abort();
}
n1 = caddr(expr);
assert(null_q(cdr(cddr(expr))));
// (define ident value)
// Emit value for n1
emit(fn, ctx, n1);
emit_store(fn, kind, index);
} else if (cons_q(n0)) {
struct node *name = car(n0);
struct node *args = cdr(n0);
struct node *body = cddr(expr);
assert(ident_q(name));
int kind;
size_t index;
assert(ctx_lookup_name(ctx, name->n_ident, &kind, &index) == 0);
assert(kind == NAME_GLOBAL);
struct function *new_fn = unit_lambda(ctx->root);
struct context new_ctx;
new_fn->args = args;
new_fn->body = body;
new_fn->local_count = 0;
assert(!ctx->parent);
ctx_init(&new_ctx, ctx);
new_ctx.var_counter = 0;
new_ctx.owner = new_fn;
emit_function(&new_ctx, new_fn);
// TODO do this at compile time for named functions?
emit_insn(fn, OP(OP_LDF, new_fn->index));
emit_insn(fn, OP(OP_STG, index));
}
return;
} else if (!strcmp(n0->n_ident, "setq")) {
n0 = cadr(expr);
n1 = caddr(expr);
assert(null_q(cdr(cddr(expr))));
int kind;
size_t index;
assert(ctx_lookup_name(ctx, n0->n_ident, &kind, &index) == 0);
emit(fn, ctx, n1);
emit_store(fn, kind, index);
return;
} else if (!strcmp(n0->n_ident, "use") || !strcmp(n0->n_ident, "export")) {
// Ignore
return;
}
// Check for macros/builtin operators
if (!strcmp(n0->n_ident, "lambda")) {
// (lambda list body...)
n0 = cadr(expr);
n1 = cddr(expr);
assert(cons_q(n0));
assert(!null_q(n1));
struct function *new_fn = unit_lambda(ctx->root);
struct context new_ctx;
new_fn->args = n0;
new_fn->body = n1;
new_fn->local_count = 0;
assert(!ctx->parent);
ctx_init(&new_ctx, ctx);
new_ctx.var_counter = 0;
new_ctx.owner = new_fn;
emit_function(&new_ctx, new_fn);
emit_insn(fn, OP(OP_LDF, new_fn->index));
return;
} else if (!strcmp(n0->n_ident, "begin")) {
n1 = cdr(expr);
assert(!null_q(n1));
for (; n1; n1 = cdr(n1)) {
n2 = car(n1);
emit(fn, ctx, n2);
}
return;
} else if (!strcmp(n0->n_ident, "if")) {
n1 = cadr(expr); // condition
n2 = caddr(expr); // yes-expr
n3 = car(cdr(cddr(expr))); // no-expr
assert(null_q(cddr(cddr(expr))));
emit(fn, ctx, n1);
// Will hop back and modify an already emitted jump with a proper
// offset
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
// Emit "yes" branch
emit(fn, ctx, n2);
size_t yes_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_JMP, 0));
// Emit "no" branch
size_t no_loc = fn->bytecode.size;
emit(fn, ctx, n3);
size_t end_loc = fn->bytecode.size;
// Fix up jumps
uint32_t *cond_jmp, *yes_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
yes_jmp = vector_ref(&fn->bytecode, yes_jmp_loc);
*cond_jmp |= (no_loc - cond_jmp_loc) & 0xFFFFFF;
*yes_jmp |= (end_loc - yes_jmp_loc) & 0xFFFFFF;
return;
} else if (!strcmp(n0->n_ident, "while")) {
n1 = cadr(expr); // condition
n2 = cddr(expr); // list of expressions
assert(!null_q(n2));
size_t begin_loc = fn->bytecode.size;
emit(fn, ctx, n1);
size_t cond_jmp_loc = fn->bytecode.size;
emit_insn(fn, OP(OP_BF, 0));
for (struct node *item = n2; item; item = cdr(item)) {
struct node *stmt = car(item);
emit(fn, ctx, stmt);
}
// Jump back to the beginning
size_t end_jmp_loc = fn->bytecode.size;
ssize_t diff = (ssize_t) begin_loc - (ssize_t) end_jmp_loc;
emit_insn(fn, OP(OP_JMP, diff));
size_t end_loc = fn->bytecode.size;
uint32_t *cond_jmp;
cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc);
*cond_jmp |= (end_loc - cond_jmp_loc) & 0xFFFFFF;
// Try builtin syntax construct
if (emit_builtin_syntax(fn, ctx, n0->n_ident, cdr(expr)) == 0) {
return;
}
c0 = emit_arg_list(fn, ctx, expr->n_cons.cdr);
if (!strcmp(n0->n_ident, "+")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_ADD, 0));
}
} else if (!strcmp(n0->n_ident, "-")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_SUB, 0));
}
} else if (!strcmp(n0->n_ident, "*")) {
for (int i = 1; i < c0; ++i) {
emit_insn(fn, OP(OP_MUL, 0));
}
} else if (!strcmp(n0->n_ident, "/") && c0 == 2) {
emit_insn(fn, OP(OP_DIV, 0));
} else if (!strcmp(n0->n_ident, "%") && c0 == 2) {
emit_insn(fn, OP(OP_MOD, 0));
} else if (!strcmp(n0->n_ident, "cons") && c0 == 2) {
emit_insn(fn, OP(OP_CONS, 0));
} else if (!strcmp(n0->n_ident, "=") && c0 == 2) {
emit_insn(fn, OP(OP_EQ, 0));
} else if (!strcmp(n0->n_ident, "/=") && c0 == 2) {
emit_insn(fn, OP(OP_NEQ, 0));
} else if (!strcmp(n0->n_ident, ">") && c0 == 2) {
emit_insn(fn, OP(OP_GT, 0));
} else if (!strcmp(n0->n_ident, ">=") && c0 == 2) {
emit_insn(fn, OP(OP_GE, 0));
} else if (!strcmp(n0->n_ident, "<") && c0 == 2) {
emit_insn(fn, OP(OP_LT, 0));
} else if (!strcmp(n0->n_ident, "<=") && c0 == 2) {
emit_insn(fn, OP(OP_LE, 0));
} else if (!strcmp(n0->n_ident, "null?") && c0 == 1) {
emit_insn(fn, OP(OP_ISZ, 0));
} else if (!strcmp(n0->n_ident, "not") && c0 == 1) {
emit_insn(fn, OP(OP_NOT, 0));
} else if (!strcmp(n0->n_ident, "car") && c0 == 1) {
emit_insn(fn, OP(OP_CAR, 0));
} else if (!strcmp(n0->n_ident, "cdr") && c0 == 1) {
emit_insn(fn, OP(OP_CDR, 0));
} else if (!strcmp(n0->n_ident, "debug/trace") && c0 == 1) {
emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_TRACE));
} else if (!strcmp(n0->n_ident, "debug/break")) {
emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_BREAKPOINT));
} else {
size_t index;
int kind;
if (ctx_lookup_name(ctx, n0->n_ident, &kind, &index) != 0) {
printf("Undefined reference to function ");
vm_print(n0, 0);
printf("\n");
abort();
}
switch (kind) {
case NAME_EXTERNAL:
emit_insn(fn, OP(OP_XCALL, index));
break;
case NAME_LOCAL:
emit_insn(fn, OP(OP_LCALL, index));
break;
case NAME_ARGUMENT:
emit_insn(fn, OP(OP_LDARG, index));
emit_insn(fn, OP(OP_CALL, 0));
break;
case NAME_GLOBAL:
emit_insn(fn, OP(OP_GCALL, index));
break;
default:
printf("Unknown %d\n", kind);
abort();
}
// Try builtin operator
if (emit_builtin_insn(fn, n0->n_ident, c0) == 0) {
return;
}
// If everything else failed, try to resolve this as a function
// call
emit_call(fn, ctx, n0->n_ident);
return;
case N_INTEGER:
if (expr->n_integer < 0x7FFFFF && expr->n_integer > -0x7FFFFF) {
+13
View File
@@ -0,0 +1,13 @@
#pragma once
struct function;
struct context;
struct node;
int emit_builtin_syntax(struct function *fn,
struct context *ctx,
const char *name,
struct node *tail);
int emit_builtin_insn(struct function *fn,
const char *name,
int argc);
+3
View File
@@ -1,8 +1,11 @@
#pragma once
#include <stddef.h>
struct context;
struct function;
struct node;
void emit_store(struct function *fn, int kind, size_t index);
void emit(struct function *fn, struct context *ctx, struct node *expr);
void emit_function(struct context *ctx, struct function *fn);
+10 -8
View File
@@ -1,13 +1,15 @@
(use mod2)
(use core)
(define a (cons 1 (cons 2 (cons 3 (cons 4 nil)))))
(define (f2 x)
(define (f1 x y)
(define (f0 x y z)
(+ x y z)
)
(define (f x)
(debug/break)
(+ x 1))
(f0 x y 1)
)
(print (f 1))
(f1 x 2)
)
(print (list-ref a 1))
(print (list-ref a 1))
(print (f2 3))