diff --git a/Makefile b/Makefile index 6913818..6a770b4 100644 --- a/Makefile +++ b/Makefile @@ -16,6 +16,7 @@ COMPILER_OBJS=$(O)/compiler/main.o \ $(O)/compiler/node.o \ $(O)/compiler/parse.o \ $(O)/compiler/unit.o \ + $(O)/compiler/builtin.o \ $(O)/core/vector.o \ $(O)/core/hash.o CFLAGS=-Icore/include \ diff --git a/compiler/builtin.c b/compiler/builtin.c new file mode 100644 index 0000000..3b4c4dc --- /dev/null +++ b/compiler/builtin.c @@ -0,0 +1,229 @@ +#include "builtin.h" +#include "compile.h" +#include "vector.h" +#include "unit.h" +#include "node.h" +#include "op.h" + +#include +#include +#include + +int emit_builtin_syntax(struct function *fn, + struct context *ctx, + const char *name, + struct node *tail) { + struct node *arg0, *arg1, *arg2; + int kind; + size_t index; + + // Only setq/define use this instruction + // TODO your defmacro could be here... + if (!strcmp(name, "define")) { + arg0 = car(tail); + + if (ident_q(arg0)) { + arg1 = cadr(tail); + assert(null_q(cddr(tail))); + + if (ctx_lookup_name(ctx, arg0->n_ident, &kind, &index) != 0) { + fprintf(stderr, "Unresolved reference to %s\n", arg0->n_ident); + abort(); + } + + // (define ident value) + // Emit value for n1 + emit(fn, ctx, arg1); + emit_store(fn, kind, index); + } else if (cons_q(arg0)) { + struct node *name = car(arg0); + struct node *args = cdr(arg0); + struct node *body = cdr(tail); + + assert(ident_q(name)); + assert(ctx_lookup_name(ctx, name->n_ident, &kind, &index) == 0); + + struct function *new_fn = unit_lambda(ctx->root); + struct context new_ctx; + new_fn->args = args; + new_fn->body = body; + new_fn->local_count = 0; + if (ctx->parent) { + ctx_init(&new_ctx, ctx->parent); + } else { + ctx_init(&new_ctx, ctx); + } + new_ctx.var_counter = 0; + new_ctx.owner = new_fn; + + emit_function(&new_ctx, new_fn); + emit_insn(fn, OP(OP_LDF, new_fn->index)); + emit_store(fn, kind, index); + } + return 0; + } else if (!strcmp(name, "setq")) { + arg0 = car(tail); + arg1 = cadr(tail); + assert(null_q(cddr(tail))); + + assert(ident_q(arg0)); + assert(ctx_lookup_name(ctx, arg0->n_ident, &kind, &index) == 0); + emit(fn, ctx, arg1); + emit_store(fn, kind, index); + return 0; + } else if (!strcmp(name, "lambda")) { + // (lambda list body...) + arg0 = car(tail); + arg1 = cdr(tail); + assert(cons_q(arg0)); + assert(!null_q(arg1)); + + struct function *new_fn = unit_lambda(ctx->root); + struct context new_ctx; + new_fn->args = arg0; + new_fn->body = arg1; + new_fn->local_count = 0; + assert(!ctx->parent); + ctx_init(&new_ctx, ctx); + new_ctx.var_counter = 0; + new_ctx.owner = new_fn; + + emit_function(&new_ctx, new_fn); + emit_insn(fn, OP(OP_LDF, new_fn->index)); + + return 0; + } else if (!strcmp(name, "begin")) { + assert(!null_q(tail)); + + for (arg0 = tail; arg0; arg0 = cdr(arg0)) { + emit(fn, ctx, car(arg0)); + } + return 0; + } else if (!strcmp(name, "if")) { + arg0 = car(tail); + arg1 = cadr(tail); + arg2 = caddr(tail); + assert(null_q(cdr(cddr(tail)))); + + emit(fn, ctx, arg0); + // Will hop back and modify an already emitted jump with a proper + // offset + size_t cond_jmp_loc = fn->bytecode.size; + emit_insn(fn, OP(OP_BF, 0)); + + // Emit "yes" branch + emit(fn, ctx, arg1); + size_t yes_jmp_loc = fn->bytecode.size; + emit_insn(fn, OP(OP_JMP, 0)); + + // Emit "no" branch + size_t no_loc = fn->bytecode.size; + emit(fn, ctx, arg2); + + size_t end_loc = fn->bytecode.size; + + // Fix up jumps + uint32_t *cond_jmp, *yes_jmp; + cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc); + yes_jmp = vector_ref(&fn->bytecode, yes_jmp_loc); + + *cond_jmp |= (no_loc - cond_jmp_loc) & 0xFFFFFF; + *yes_jmp |= (end_loc - yes_jmp_loc) & 0xFFFFFF; + return 0; + } else if (!strcmp(name, "while")) { + arg0 = car(tail); // condition + arg1 = cdr(tail); // list of expressions + assert(!null_q(arg1)); + + size_t begin_loc = fn->bytecode.size; + emit(fn, ctx, arg0); + size_t cond_jmp_loc = fn->bytecode.size; + emit_insn(fn, OP(OP_BF, 0)); + + for (struct node *item = arg1; item; item = cdr(item)) { + emit(fn, ctx, car(item)); + } + + // Jump back to the beginning + size_t end_jmp_loc = fn->bytecode.size; + ssize_t diff = (ssize_t) begin_loc - (ssize_t) end_jmp_loc; + emit_insn(fn, OP(OP_JMP, diff)); + + size_t end_loc = fn->bytecode.size; + uint32_t *cond_jmp; + cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc); + *cond_jmp |= (end_loc - cond_jmp_loc) & 0xFFFFFF; + return 0; + } else if (!strcmp(name, "use") || + !strcmp(name, "export")) { + // Ignore + return 0; + } + + return -1; +} + +int emit_builtin_insn(struct function *fn, const char *name, int argc) { + if (!strcmp(name, "+")) { + for (int i = 1; i < argc; ++i) { + emit_insn(fn, OP(OP_ADD, 0)); + } + return 0; + } else if (!strcmp(name, "-")) { + for (int i = 1; i < argc; ++i) { + emit_insn(fn, OP(OP_SUB, 0)); + } + return 0; + } else if (!strcmp(name, "*")) { + for (int i = 1; i < argc; ++i) { + emit_insn(fn, OP(OP_MUL, 0)); + } + return 0; + } else if (!strcmp(name, "/") && argc == 2) { + emit_insn(fn, OP(OP_DIV, 0)); + return 0; + } else if (!strcmp(name, "%") && argc == 2) { + emit_insn(fn, OP(OP_MOD, 0)); + return 0; + } else if (!strcmp(name, "cons") && argc == 2) { + emit_insn(fn, OP(OP_CONS, 0)); + return 0; + } else if (!strcmp(name, "=") && argc == 2) { + emit_insn(fn, OP(OP_EQ, 0)); + return 0; + } else if (!strcmp(name, "/=") && argc == 2) { + emit_insn(fn, OP(OP_NEQ, 0)); + return 0; + } else if (!strcmp(name, ">") && argc == 2) { + emit_insn(fn, OP(OP_GT, 0)); + return 0; + } else if (!strcmp(name, ">=") && argc == 2) { + emit_insn(fn, OP(OP_GE, 0)); + return 0; + } else if (!strcmp(name, "<") && argc == 2) { + emit_insn(fn, OP(OP_LT, 0)); + return 0; + } else if (!strcmp(name, "<=") && argc == 2) { + emit_insn(fn, OP(OP_LE, 0)); + return 0; + } else if (!strcmp(name, "null?") && argc == 1) { + emit_insn(fn, OP(OP_ISZ, 0)); + return 0; + } else if (!strcmp(name, "not") && argc == 1) { + emit_insn(fn, OP(OP_NOT, 0)); + return 0; + } else if (!strcmp(name, "car") && argc == 1) { + emit_insn(fn, OP(OP_CAR, 0)); + return 0; + } else if (!strcmp(name, "cdr") && argc == 1) { + emit_insn(fn, OP(OP_CDR, 0)); + return 0; + } else if (!strcmp(name, "debug/trace") && argc == 1) { + emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_TRACE)); + return 0; + } else if (!strcmp(name, "debug/break")) { + emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_BREAKPOINT)); + return 0; + } + return -1; +} diff --git a/compiler/compile.c b/compiler/compile.c index f3103ea..40cf471 100644 --- a/compiler/compile.c +++ b/compiler/compile.c @@ -1,3 +1,4 @@ +#include "builtin.h" #include "compile.h" #include "unit.h" #include "node.h" @@ -72,7 +73,7 @@ void emit_function(struct context *ctx, struct function *fn) { } } -static void emit_store(struct function *fn, int kind, size_t index) { +void emit_store(struct function *fn, int kind, size_t index) { switch (kind) { case NAME_LOCAL: emit_insn(fn, OP(OP_STL, index)); @@ -88,11 +89,41 @@ static void emit_store(struct function *fn, int kind, size_t index) { } } +void emit_call(struct function *fn, struct context *ctx, const char *name) { + size_t index; + int kind; + + if (ctx_lookup_name(ctx, name, &kind, &index) != 0) { + printf("Undefined reference to function %s\n", name); + abort(); + } + + switch (kind) { + case NAME_EXTERNAL: + emit_insn(fn, OP(OP_XCALL, index)); + break; + case NAME_LOCAL: + emit_insn(fn, OP(OP_LDL, index)); + emit_insn(fn, OP(OP_CALL, 0)); + break; + case NAME_ARGUMENT: + emit_insn(fn, OP(OP_LDARG, index)); + emit_insn(fn, OP(OP_CALL, 0)); + break; + case NAME_GLOBAL: + emit_insn(fn, OP(OP_GCALL, index)); + break; + default: + printf("Unknown %d\n", kind); + abort(); + } +} + void emit(struct function *fn, struct context *ctx, struct node *expr) { if (!expr) { return; } - struct node *n0, *n1, *n2, *n3; + struct node *n0; int c0; switch (expr->type) { @@ -109,235 +140,21 @@ void emit(struct function *fn, struct context *ctx, struct node *expr) { abort(); } - // Only setq/define use this instruction - // TODO your defmacro could be here... - if (!strcmp(n0->n_ident, "define")) { - n0 = cadr(expr); - if (ident_q(n0)) { - int kind; - size_t index; - - if (ctx_lookup_name(ctx, n0->n_ident, &kind, &index) != 0) { - fprintf(stderr, "Unresolved reference to %s\n", n0->n_ident); - abort(); - } - - n1 = caddr(expr); - assert(null_q(cdr(cddr(expr)))); - // (define ident value) - // Emit value for n1 - emit(fn, ctx, n1); - emit_store(fn, kind, index); - } else if (cons_q(n0)) { - struct node *name = car(n0); - struct node *args = cdr(n0); - struct node *body = cddr(expr); - - assert(ident_q(name)); - - int kind; - size_t index; - - assert(ctx_lookup_name(ctx, name->n_ident, &kind, &index) == 0); - assert(kind == NAME_GLOBAL); - - struct function *new_fn = unit_lambda(ctx->root); - struct context new_ctx; - new_fn->args = args; - new_fn->body = body; - new_fn->local_count = 0; - assert(!ctx->parent); - ctx_init(&new_ctx, ctx); - new_ctx.var_counter = 0; - new_ctx.owner = new_fn; - - emit_function(&new_ctx, new_fn); - // TODO do this at compile time for named functions? - emit_insn(fn, OP(OP_LDF, new_fn->index)); - emit_insn(fn, OP(OP_STG, index)); - } - return; - } else if (!strcmp(n0->n_ident, "setq")) { - n0 = cadr(expr); - n1 = caddr(expr); - assert(null_q(cdr(cddr(expr)))); - int kind; - size_t index; - - assert(ctx_lookup_name(ctx, n0->n_ident, &kind, &index) == 0); - emit(fn, ctx, n1); - - emit_store(fn, kind, index); - return; - } else if (!strcmp(n0->n_ident, "use") || !strcmp(n0->n_ident, "export")) { - // Ignore - return; - } - - // Check for macros/builtin operators - if (!strcmp(n0->n_ident, "lambda")) { - // (lambda list body...) - n0 = cadr(expr); - n1 = cddr(expr); - assert(cons_q(n0)); - assert(!null_q(n1)); - - struct function *new_fn = unit_lambda(ctx->root); - struct context new_ctx; - new_fn->args = n0; - new_fn->body = n1; - new_fn->local_count = 0; - assert(!ctx->parent); - ctx_init(&new_ctx, ctx); - new_ctx.var_counter = 0; - new_ctx.owner = new_fn; - - emit_function(&new_ctx, new_fn); - emit_insn(fn, OP(OP_LDF, new_fn->index)); - - return; - } else if (!strcmp(n0->n_ident, "begin")) { - n1 = cdr(expr); - assert(!null_q(n1)); - - for (; n1; n1 = cdr(n1)) { - n2 = car(n1); - - emit(fn, ctx, n2); - } - return; - } else if (!strcmp(n0->n_ident, "if")) { - n1 = cadr(expr); // condition - n2 = caddr(expr); // yes-expr - n3 = car(cdr(cddr(expr))); // no-expr - assert(null_q(cddr(cddr(expr)))); - - emit(fn, ctx, n1); - // Will hop back and modify an already emitted jump with a proper - // offset - size_t cond_jmp_loc = fn->bytecode.size; - emit_insn(fn, OP(OP_BF, 0)); - - // Emit "yes" branch - emit(fn, ctx, n2); - size_t yes_jmp_loc = fn->bytecode.size; - emit_insn(fn, OP(OP_JMP, 0)); - - // Emit "no" branch - size_t no_loc = fn->bytecode.size; - emit(fn, ctx, n3); - - size_t end_loc = fn->bytecode.size; - - // Fix up jumps - uint32_t *cond_jmp, *yes_jmp; - cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc); - yes_jmp = vector_ref(&fn->bytecode, yes_jmp_loc); - - *cond_jmp |= (no_loc - cond_jmp_loc) & 0xFFFFFF; - *yes_jmp |= (end_loc - yes_jmp_loc) & 0xFFFFFF; - return; - } else if (!strcmp(n0->n_ident, "while")) { - n1 = cadr(expr); // condition - n2 = cddr(expr); // list of expressions - assert(!null_q(n2)); - - size_t begin_loc = fn->bytecode.size; - emit(fn, ctx, n1); - size_t cond_jmp_loc = fn->bytecode.size; - emit_insn(fn, OP(OP_BF, 0)); - - for (struct node *item = n2; item; item = cdr(item)) { - struct node *stmt = car(item); - emit(fn, ctx, stmt); - } - - // Jump back to the beginning - size_t end_jmp_loc = fn->bytecode.size; - ssize_t diff = (ssize_t) begin_loc - (ssize_t) end_jmp_loc; - emit_insn(fn, OP(OP_JMP, diff)); - - size_t end_loc = fn->bytecode.size; - uint32_t *cond_jmp; - cond_jmp = vector_ref(&fn->bytecode, cond_jmp_loc); - *cond_jmp |= (end_loc - cond_jmp_loc) & 0xFFFFFF; + // Try builtin syntax construct + if (emit_builtin_syntax(fn, ctx, n0->n_ident, cdr(expr)) == 0) { return; } c0 = emit_arg_list(fn, ctx, expr->n_cons.cdr); - if (!strcmp(n0->n_ident, "+")) { - for (int i = 1; i < c0; ++i) { - emit_insn(fn, OP(OP_ADD, 0)); - } - } else if (!strcmp(n0->n_ident, "-")) { - for (int i = 1; i < c0; ++i) { - emit_insn(fn, OP(OP_SUB, 0)); - } - } else if (!strcmp(n0->n_ident, "*")) { - for (int i = 1; i < c0; ++i) { - emit_insn(fn, OP(OP_MUL, 0)); - } - } else if (!strcmp(n0->n_ident, "/") && c0 == 2) { - emit_insn(fn, OP(OP_DIV, 0)); - } else if (!strcmp(n0->n_ident, "%") && c0 == 2) { - emit_insn(fn, OP(OP_MOD, 0)); - } else if (!strcmp(n0->n_ident, "cons") && c0 == 2) { - emit_insn(fn, OP(OP_CONS, 0)); - } else if (!strcmp(n0->n_ident, "=") && c0 == 2) { - emit_insn(fn, OP(OP_EQ, 0)); - } else if (!strcmp(n0->n_ident, "/=") && c0 == 2) { - emit_insn(fn, OP(OP_NEQ, 0)); - } else if (!strcmp(n0->n_ident, ">") && c0 == 2) { - emit_insn(fn, OP(OP_GT, 0)); - } else if (!strcmp(n0->n_ident, ">=") && c0 == 2) { - emit_insn(fn, OP(OP_GE, 0)); - } else if (!strcmp(n0->n_ident, "<") && c0 == 2) { - emit_insn(fn, OP(OP_LT, 0)); - } else if (!strcmp(n0->n_ident, "<=") && c0 == 2) { - emit_insn(fn, OP(OP_LE, 0)); - } else if (!strcmp(n0->n_ident, "null?") && c0 == 1) { - emit_insn(fn, OP(OP_ISZ, 0)); - } else if (!strcmp(n0->n_ident, "not") && c0 == 1) { - emit_insn(fn, OP(OP_NOT, 0)); - } else if (!strcmp(n0->n_ident, "car") && c0 == 1) { - emit_insn(fn, OP(OP_CAR, 0)); - } else if (!strcmp(n0->n_ident, "cdr") && c0 == 1) { - emit_insn(fn, OP(OP_CDR, 0)); - } else if (!strcmp(n0->n_ident, "debug/trace") && c0 == 1) { - emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_TRACE)); - } else if (!strcmp(n0->n_ident, "debug/break")) { - emit_insn(fn, OP(OP_DEBUG, OP_DEBUG_BREAKPOINT)); - } else { - size_t index; - int kind; - - if (ctx_lookup_name(ctx, n0->n_ident, &kind, &index) != 0) { - printf("Undefined reference to function "); - vm_print(n0, 0); - printf("\n"); - abort(); - } - - switch (kind) { - case NAME_EXTERNAL: - emit_insn(fn, OP(OP_XCALL, index)); - break; - case NAME_LOCAL: - emit_insn(fn, OP(OP_LCALL, index)); - break; - case NAME_ARGUMENT: - emit_insn(fn, OP(OP_LDARG, index)); - emit_insn(fn, OP(OP_CALL, 0)); - break; - case NAME_GLOBAL: - emit_insn(fn, OP(OP_GCALL, index)); - break; - default: - printf("Unknown %d\n", kind); - abort(); - } + // Try builtin operator + if (emit_builtin_insn(fn, n0->n_ident, c0) == 0) { + return; } + + // If everything else failed, try to resolve this as a function + // call + emit_call(fn, ctx, n0->n_ident); return; case N_INTEGER: if (expr->n_integer < 0x7FFFFF && expr->n_integer > -0x7FFFFF) { diff --git a/compiler/include/builtin.h b/compiler/include/builtin.h new file mode 100644 index 0000000..7c28f1d --- /dev/null +++ b/compiler/include/builtin.h @@ -0,0 +1,13 @@ +#pragma once + +struct function; +struct context; +struct node; + +int emit_builtin_syntax(struct function *fn, + struct context *ctx, + const char *name, + struct node *tail); +int emit_builtin_insn(struct function *fn, + const char *name, + int argc); diff --git a/compiler/include/compile.h b/compiler/include/compile.h index 1321c15..1519f9d 100644 --- a/compiler/include/compile.h +++ b/compiler/include/compile.h @@ -1,8 +1,11 @@ #pragma once +#include struct context; struct function; struct node; +void emit_store(struct function *fn, int kind, size_t index); + void emit(struct function *fn, struct context *ctx, struct node *expr); void emit_function(struct context *ctx, struct function *fn); diff --git a/mod1.vml b/mod1.vml index 631786e..e4f5599 100644 --- a/mod1.vml +++ b/mod1.vml @@ -1,13 +1,15 @@ -(use mod2) (use core) -(define a (cons 1 (cons 2 (cons 3 (cons 4 nil))))) +(define (f2 x) + (define (f1 x y) + (define (f0 x y z) + (+ x y z) + ) -(define (f x) - (debug/break) - (+ x 1)) + (f0 x y 1) + ) -(print (f 1)) + (f1 x 2) +) -(print (list-ref a 1)) -(print (list-ref a 1)) +(print (f2 3))