From c74ea3b4170658c66c8dfe3497e27b02491c418c Mon Sep 17 00:00:00 2001 From: Mark Poliakov Date: Wed, 7 Apr 2021 22:24:33 +0300 Subject: [PATCH] Refactor l2vm --- Makefile | 10 +- core/hash.c | 15 ++ core/include/binary.h | 2 + core/include/hash.h | 1 + core/include/vector.h | 1 + core/vector.c | 29 +++- mod1.vml | 8 +- mod2.vml | 8 +- vm/error.c | 19 +++ vm/include/error.h | 15 ++ vm/include/load.h | 2 + vm/include/stack.h | 14 ++ vm/include/unit.h | 38 +++++ vm/include/vm.h | 4 - vm/include/vmstate.h | 74 +++------ vm/include/vmstring.h | 2 +- vm/include/vmval.h | 4 +- vm/load.c | 85 +++++++--- vm/main.c | 80 ++++++--- vm/stack.c | 34 ++++ vm/unit.c | 34 ++++ vm/vmstack.c | 39 +++++ vm/vmstate.c | 377 ++++++++++++++++++++++++------------------ vm/vmstring.c | 7 +- vm/vmval.c | 36 +++- 25 files changed, 653 insertions(+), 285 deletions(-) create mode 100644 vm/error.c create mode 100644 vm/include/error.h create mode 100644 vm/include/stack.h create mode 100644 vm/include/unit.h delete mode 100644 vm/include/vm.h create mode 100644 vm/stack.c create mode 100644 vm/unit.c create mode 100644 vm/vmstack.c diff --git a/Makefile b/Makefile index 40f1349..a2636a5 100644 --- a/Makefile +++ b/Makefile @@ -1,9 +1,13 @@ O=build VM_OBJS=$(O)/vm/main.o \ $(O)/vm/vmstate.o \ + $(O)/vm/vmstack.o \ $(O)/vm/vmstring.o \ $(O)/vm/vmval.o \ $(O)/vm/load.o \ + $(O)/vm/unit.o \ + $(O)/vm/stack.o \ + $(O)/vm/error.o \ $(O)/core/vector.o \ $(O)/core/hash.o COMPILER_OBJS=$(O)/compiler/main.o \ @@ -16,7 +20,9 @@ COMPILER_OBJS=$(O)/compiler/main.o \ CFLAGS=-Icore/include \ -Werror \ -Wall \ - -Wextra + -Wextra \ + -ggdb \ + -O0 HDRS=$(shell find . -type f -name "*.h") DIRS=$(shell find compiler core vm -type d -printf "$(O)/%p ") @@ -44,7 +50,7 @@ $(O)/%.o: %.c $(HDRS) $(CC) -c $(CFLAGS) -o $@ $< test: $(DIRS) $(O)/mod1.vmx $(O)/mod2.vmx $(O)/l2vm - cd $(O) && ./l2vm mod2.vmx + cd $(O) && $(L2VM_PREFIX) ./l2vm mod1.vmx $(O)/%.vmx: %.vml $(O)/l2c $(O)/l2c $< $@ diff --git a/core/hash.c b/core/hash.c index 45af280..b253108 100644 --- a/core/hash.c +++ b/core/hash.c @@ -63,6 +63,21 @@ int shash_init(struct hash *h, size_t cap) { return 0; } +void hash_free(struct hash *h) { + struct hash_pair *p; + assert(h->pair_free); + for (size_t i = 0; i < h->bucket_count; ++i) { + while (!list_empty(&h->buckets[i])) { + p = list_first_entry(&h->buckets[i], struct hash_pair, link); + list_del(&p->link); + h->pair_free(p); + } + } + free(h->buckets); + h->buckets = NULL; + h->bucket_count = 0; +} + int hash_insert(struct hash *h, const void *key, void *value) { #if defined(HASH_CHECK_DUP) assert(!hash_lookup(h, key)); diff --git a/core/include/binary.h b/core/include/binary.h index 897a841..106b5ab 100644 --- a/core/include/binary.h +++ b/core/include/binary.h @@ -1,6 +1,8 @@ #pragma once #include +#define IMAGE_MAGIC 0xCEBAB123 + // TODO move all strings/names into strtable struct bin_header { uint32_t magic; diff --git a/core/include/hash.h b/core/include/hash.h index 19c15d7..0695bc2 100644 --- a/core/include/hash.h +++ b/core/include/hash.h @@ -21,5 +21,6 @@ struct hash { }; int shash_init(struct hash *h, size_t cap); +void hash_free(struct hash *h); int hash_insert(struct hash *h, const void *key, void *value); struct hash_pair *hash_lookup(struct hash *h, const void *key); diff --git a/core/include/vector.h b/core/include/vector.h index 28c2d93..5fccd95 100644 --- a/core/include/vector.h +++ b/core/include/vector.h @@ -7,5 +7,6 @@ struct vector { }; void vector_init(struct vector *vec, size_t entsize); +void vector_free(struct vector *vec); void *vector_append(struct vector *vec); void *vector_ref(struct vector *vec, size_t index); diff --git a/core/vector.c b/core/vector.c index bec10f2..a1e9c82 100644 --- a/core/vector.c +++ b/core/vector.c @@ -1,12 +1,19 @@ #include "vector.h" #include +#include #include +#include -static void vector_resize(struct vector *vec, size_t new_cap) { +static int vector_resize(struct vector *vec, size_t new_cap) { vec->data = realloc(vec->data, vec->entsize * new_cap); - assert(vec->data); + if (!vec->data) { + vec->cap = 0; + vec->size = 0; + return -ENOMEM; + } vec->cap = new_cap; + return 0; } void vector_init(struct vector *vec, size_t entsize) { @@ -16,9 +23,25 @@ void vector_init(struct vector *vec, size_t entsize) { vec->entsize = entsize; } +void vector_free(struct vector *vec) { + vec->cap = 0; + vec->size = 0; + vec->entsize = 0; + free(vec->data); + vec->data = NULL; +} + +void vector_release(struct vector *vec) { + memset(vec->data, 0, vec->cap * vec->entsize); + free(vec->data); + vec->cap = 0; +} + void *vector_append(struct vector *vec) { if (vec->size == vec->cap) { - vector_resize(vec, vec->cap + 8); + if (vector_resize(vec, vec->cap + 8) != 0) { + return NULL; + } } return vec->data + vec->entsize * vec->size++; } diff --git a/mod1.vml b/mod1.vml index 1b6154e..48c095c 100644 --- a/mod1.vml +++ b/mod1.vml @@ -1,6 +1,4 @@ -(export - my-add) +(use mod2) +(use core) -(define xxx 1) -(define (my-add x y) - (+ x y)) +(print (f 1 2 3)) diff --git a/mod2.vml b/mod2.vml index 393e857..4349429 100644 --- a/mod2.vml +++ b/mod2.vml @@ -1,4 +1,6 @@ -(use mod1) -(use core) +(export sqr f) -(print (my-add 12 (my-add 2 3))) +(define (sqr x) (* x x)) + +(define (f x y z) + (+ (sqr x) (sqr y) (sqr z))) diff --git a/vm/error.c b/vm/error.c new file mode 100644 index 0000000..b271f93 --- /dev/null +++ b/vm/error.c @@ -0,0 +1,19 @@ +#include "error.h" + +const char *vm_strerror(int e) { + switch (e) { + case -ERR_IMAGE_MAGIC: return "Invalid image magic"; + case -ERR_IMAGE_INDEX: return "Image indices are invalid"; + case -ERR_IMAGE_IO: return "Image I/O error"; + case -ERR_OPCODE_UNDEFINED: return "Undefined opcode"; + case -ERR_RANGE: return "Range error"; + case -ERR_SYMBOL_UNDEFINED: return "Undefined reference to a symbol"; + case -ERR_OUT_OF_MEMORY: return "Out of memory"; + case -ERR_OPERAND_TYPE: return "Operand type error"; + case -ERR_STACK_UNDERFLOW: return "Stack underflow"; + case -ERR_STACK_OVERFLOW: return "Stack overflow"; + case -ERR_NOT_FOUND: return "Unit not found"; + default: + return "Unknown error"; + } +} diff --git a/vm/include/error.h b/vm/include/error.h new file mode 100644 index 0000000..5ccb64a --- /dev/null +++ b/vm/include/error.h @@ -0,0 +1,15 @@ +#pragma once + +#define ERR_IMAGE_MAGIC 1 +#define ERR_IMAGE_INDEX 2 +#define ERR_IMAGE_IO 3 +#define ERR_OPCODE_UNDEFINED 4 +#define ERR_RANGE 5 +#define ERR_SYMBOL_UNDEFINED 6 +#define ERR_OUT_OF_MEMORY 7 +#define ERR_OPERAND_TYPE 8 +#define ERR_STACK_UNDERFLOW 9 +#define ERR_STACK_OVERFLOW 10 +#define ERR_NOT_FOUND 11 + +const char *vm_strerror(int e); diff --git a/vm/include/load.h b/vm/include/load.h index ef6a576..b95a9cb 100644 --- a/vm/include/load.h +++ b/vm/include/load.h @@ -1,6 +1,8 @@ #pragma once #include + #include "list.h" +#include "vector.h" struct vm_state; struct vm_ref_entry; diff --git a/vm/include/stack.h b/vm/include/stack.h new file mode 100644 index 0000000..60c0e27 --- /dev/null +++ b/vm/include/stack.h @@ -0,0 +1,14 @@ +#pragma once +#include +#include + +// uint64_t only +struct stack { + size_t sp, size; + uint64_t *data; +}; + +int stack_init(struct stack *st, size_t size); +void stack_free(struct stack *st); +int stack_push(struct stack *st, uint64_t w); +int stack_pop(struct stack *st, uint64_t *w); diff --git a/vm/include/unit.h b/vm/include/unit.h new file mode 100644 index 0000000..3d66958 --- /dev/null +++ b/vm/include/unit.h @@ -0,0 +1,38 @@ +#pragma once +#include + +#include "vector.h" + +#define MAXARG 12 +#define MAXLOC 64 + +#define REF_NATIVE (1 << 0) +struct vm_ref_entry { + size_t unit_index; + int flags; + union { + size_t ref_index; + uintptr_t ref_native; + }; +}; + +struct vm_func_entry { + size_t argc, local_count; + uint64_t local_regs[MAXLOC]; + uint64_t arg_regs[MAXARG]; + uint32_t *bytecode; +}; + +struct vm_unit { + struct vector ref_table; + struct vector functions; + + uint64_t *global_pool; + size_t global_pool_size; + + int is_loaded; +}; + +struct vm_func_entry *unit_add_function(struct vm_unit *u); +struct vm_ref_entry *unit_add_ref(struct vm_unit *u); +void unit_free(struct vm_unit *u); diff --git a/vm/include/vm.h b/vm/include/vm.h deleted file mode 100644 index d5461b7..0000000 --- a/vm/include/vm.h +++ /dev/null @@ -1,4 +0,0 @@ -#pragma once -#include "op.h" - -#define FLAG_REF (1ULL << 63) diff --git a/vm/include/vmstate.h b/vm/include/vmstate.h index 78fe838..b530500 100644 --- a/vm/include/vmstate.h +++ b/vm/include/vmstate.h @@ -1,76 +1,48 @@ #pragma once #include #include +#include #include "vector.h" +#include "stack.h" -#define LIB_LOCAL ((size_t) -1) - -#define MAXARG 12 -#define MAXLOC 64 +#define FLAG_REF (1ULL << 63) struct vm_value; -#define REF_NATIVE (1 << 0) -struct vm_ref_entry { - size_t unit_index; - int flags; - union { - size_t ref_index; - uintptr_t ref_native; - }; -}; - -struct vm_func_entry { - size_t argc, local_count; - uint64_t local_regs[MAXLOC]; - uint64_t arg_regs[MAXARG]; - uint32_t *bytecode; -}; - -struct vm_unit { - struct vector ref_table; - struct vector functions; - - uint64_t *global_pool; - size_t global_pool_size; - - int is_loaded; -}; +static inline uint64_t encode_ref(struct vm_value *ref) { + uintptr_t addr = (uintptr_t) ref; + assert(!(addr & 1)); + if (sizeof(uintptr_t) == 8) { + return (addr >> 1) | FLAG_REF; + } else { + assert(0 && "TODO"); + } +} struct vm_state { - // Runtime stack - uint64_t *stack; - size_t sp, stack_size; - uint64_t *call_stack; - size_t csp, call_stack_size; + struct stack data_stack; + struct stack call_stack; struct vector units; size_t lp, fp, ip; }; -void vm_state_init(struct vm_state *vm, - size_t stack_size); -struct vm_unit *vm_add_unit(struct vm_state *vm, - size_t stack_size); -struct vm_func_entry *unit_add_function(struct vm_unit *u); -struct vm_ref_entry *unit_add_ref(struct vm_unit *u); +int vm_state_init(struct vm_state *vm, size_t stack_size); +void vm_state_free(struct vm_state *vm); -void vm_call_index(struct vm_state *vm, size_t unit_index, size_t index); -void vm_call_ref(struct vm_state *vm, struct vm_value *ref); +struct vm_unit *vm_add_unit(struct vm_state *vm, size_t stack_size); // Bytecode interpretation int vm_eval_step(struct vm_state *vm); int vm_eval_unit(struct vm_state *vm, size_t index); -// Stack frames -uint64_t vm_get_arg(struct vm_state *vm, size_t index); - // Stack operation -void push(struct vm_state *vm, uint64_t w); -uint64_t pop(struct vm_state *vm); +int vm_pop(struct vm_state *vm, uint64_t *w); +int vm_pop_integer(struct vm_state *vm, int64_t *v); -void push_ref(struct vm_state *vm, struct vm_value *ref); -void push_integer(struct vm_state *vm, int64_t w); +int vm_push_integer(struct vm_state *vm, int64_t v); +int vm_push_bool(struct vm_state *vm, int v); +int vm_push_ref(struct vm_state *vm, struct vm_value *obj); -uint64_t pop_integer(struct vm_state *vm); +uint64_t vm_get_arg(struct vm_state *vm, size_t index); diff --git a/vm/include/vmstring.h b/vm/include/vmstring.h index 38242b9..8f92b65 100644 --- a/vm/include/vmstring.h +++ b/vm/include/vmstring.h @@ -7,7 +7,7 @@ struct vm_string { char *data; }; -void vm_string_init(struct vm_string *s, const char *data); +int vm_string_init(struct vm_string *s, const char *data); void vm_string_empty(struct vm_string *s); const char *vm_cstr(const struct vm_string *s); size_t vm_strlen(const struct vm_string *s); diff --git a/vm/include/vmval.h b/vm/include/vmval.h index eb0a86c..7d9656b 100644 --- a/vm/include/vmval.h +++ b/vm/include/vmval.h @@ -4,7 +4,7 @@ #include #include "vmstring.h" -#include "vm.h" +#include "vmstate.h" enum vm_type { VT_CONS, @@ -55,6 +55,8 @@ static inline int pair_q(uint64_t w) { return ref_q(w) && (!null_q(w) && getref(w)->type == VT_CONS); } +void vm_value_free(struct vm_value *val); + struct vm_value *vm_cons(uint64_t w0, uint64_t w1); struct vm_value *vm_makestr(const char *str); struct vm_value *vm_func(size_t lib_index, size_t fn_index); diff --git a/vm/load.c b/vm/load.c index 8835b54..9a1c419 100644 --- a/vm/load.c +++ b/vm/load.c @@ -1,19 +1,24 @@ #include -#include #include #include #include #include +#include "binary.h" +#include "error.h" +#include "list.h" +#include "load.h" +#include "unit.h" #include "vmstate.h" #include "vmval.h" -#include "binary.h" -#include "list.h" -#include "load.h" - static int c_print(struct vm_state *vm) { - uint64_t w = pop(vm); + uint64_t w; + int res; + + if ((res = vm_pop(vm, &w)) != 0) { + return res; + } vm_print(w); printf("\n"); return 0; @@ -24,9 +29,16 @@ static struct vm_export_entry c_unit_core_exports[] = { }; static int load_core(struct vm_unit_info *info) { + struct vm_export_entry *ent; assert(info); - info->exports.data = c_unit_core_exports; - info->exports.size = sizeof(c_unit_core_exports) / sizeof(c_unit_core_exports[0]); + vector_init(&info->exports, sizeof(struct vm_export_entry)); + for (size_t i = 0; i < sizeof(c_unit_core_exports) / sizeof(struct vm_export_entry); ++i) { + ent = vector_append(&info->exports); + if (!ent) { + return -ERR_OUT_OF_MEMORY; + } + memcpy(ent, &c_unit_core_exports[i], sizeof(struct vm_export_entry)); + } info->unit = NULL; info->index = 0xFFFFFFFF; return 0; @@ -90,14 +102,19 @@ static int vm_load_functions(struct vm_unit *unit, struct bin_header *hdr, FILE for (size_t i = 0; i < hdr->func_table_size; ++i) { fread(&ent, 1, sizeof(struct bin_func_entry), fp); bytecode = malloc(ent.len); - assert(bytecode); + if (!bytecode) { + return -ERR_OUT_OF_MEMORY; + } fread(bytecode, 1, ent.len, fp); func = unit_add_function(unit); - if (i == 0) { - assert(!func->local_count); + if (i == 0 && ent.local_count) { + return -ERR_IMAGE_INDEX; } - assert(ent.local_count <= MAXLOC); + if (ent.local_count > MAXLOC) { + return -ERR_IMAGE_INDEX; + } + func->argc = ent.argc; func->bytecode = bytecode; func->local_count = ent.local_count; @@ -114,8 +131,6 @@ static int vm_load_refs(struct vm_unit *unit, struct bin_unit_entry unit_ent; struct vm_unresolved_ref *ref; char name[64]; - (void) unit; - (void) refs; vector_init(&unit_names, 64); @@ -123,10 +138,15 @@ static int vm_load_refs(struct vm_unit *unit, fseek(fp, hdr->unit_table_offset, SEEK_SET); for (size_t i = 0; i < hdr->unit_table_size; ++i) { fread(&unit_ent, 1, sizeof(struct bin_unit_entry), fp); - assert(unit_ent.name_len < sizeof(name) - 1); + if (unit_ent.name_len >= sizeof(name) - 1) { + return -ERR_IMAGE_INDEX; + } fread(name, 1, unit_ent.name_len + 1, fp); char *dst = vector_append(&unit_names); + if (!dst) { + return -ERR_OUT_OF_MEMORY; + } strcpy(dst, name); } @@ -135,18 +155,30 @@ static int vm_load_refs(struct vm_unit *unit, fseek(fp, hdr->ref_table_offset, SEEK_SET); for (size_t i = 0; i < hdr->ref_table_size; ++i) { fread(&ref_ent, 1, sizeof(struct bin_ref_entry), fp); - assert(ref_ent.name_len < sizeof(name) - 1); + if (ref_ent.name_len >= sizeof(name) - 1) { + return -ERR_IMAGE_INDEX; + } fread(name, 1, ref_ent.name_len + 1, fp); - assert(ref_ent.unit_index < hdr->unit_table_size); + if (ref_ent.unit_index >= hdr->unit_table_size) { + return -ERR_IMAGE_INDEX; + } ref = malloc(sizeof(struct vm_unresolved_ref)); - assert(ref != NULL); + if (!ref) { + return -ERR_OUT_OF_MEMORY; + } ref->entry = unit_add_ref(unit); + if (!ref->entry) { + return -ERR_OUT_OF_MEMORY; + } strcpy(ref->unit_name, vector_ref(&unit_names, ref_ent.unit_index)); strcpy(ref->sym_name, name); list_add(&ref->link, refs); } + + vector_free(&unit_names); + return 0; } @@ -160,10 +192,15 @@ static int vm_load_exports(struct vm_unit_info *info, fseek(fp, hdr->export_table_offset, SEEK_SET); for (size_t i = 0; i < hdr->export_table_size; ++i) { fread(&ent, 1, sizeof(struct bin_export_entry), fp); - assert(ent.name_len < sizeof(name) - 1); + if (ent.name_len >= sizeof(name) - 1) { + return -ERR_IMAGE_INDEX; + } fread(name, 1, ent.name_len + 1, fp); export = vector_append(&info->exports); + if (!export) { + return -ERR_OUT_OF_MEMORY; + } strcpy(export->name, name); export->is_native = 0; export->ex_index = ent.value; @@ -181,16 +218,20 @@ int vm_load_unit_file(struct vm_state *vm, int res; fread(&hdr, 1, sizeof(struct bin_header), fp); - assert(hdr.magic == 0xCEBAB123); + if (hdr.magic != IMAGE_MAGIC) { + return -ERR_IMAGE_MAGIC; + } if (info) { info->index = vm->units.size; } unit = vm_add_unit(vm, hdr.global_pool_size); + if (!unit) { + return -ERR_OUT_OF_MEMORY; + } if ((res = vm_load_refs(unit, refs, &hdr, fp)) != 0) { return res; } - if ((res = vm_load_functions(unit, &hdr, fp)) != 0) { return res; } @@ -222,7 +263,7 @@ int vm_load_unit(struct vm_state *vm, if ((fp = vm_open_unit(name)) == NULL) { fprintf(stderr, "Unit not found: %s\n", name); - return -ENOENT; + return -ERR_NOT_FOUND; } res = vm_load_unit_file(vm, info, refs, fp); fclose(fp); diff --git a/vm/main.c b/vm/main.c index 6efaca2..666d76d 100644 --- a/vm/main.c +++ b/vm/main.c @@ -1,10 +1,11 @@ #include #include #include -#include #include #include "vmstate.h" +#include "unit.h" +#include "error.h" #include "load.h" #include "hash.h" #include "list.h" @@ -19,32 +20,38 @@ static int lookup_sym(struct vm_unit_info *info, return 0; } } - return -ENOENT; + return -ERR_SYMBOL_UNDEFINED; } -int main(int argc, char **argv) { - assert(argc == 2); - - struct hash_pair *pair; - struct vm_unresolved_ref *ref; - struct vm_export_entry *export; - struct list_head unresolved; - struct hash unit_map; - struct vm_state vm; +static int execute_file(const char *filename) { FILE *fp; int res; + struct hash unit_map; + struct hash_pair *pair; + struct list_head unresolved; + struct vm_export_entry *export; + struct vm_ref_entry *ref_entry; + struct vm_state vm; + struct vm_unit_info *info; + struct vm_unresolved_ref *ref; - vm_state_init(&vm, 4096); + if ((res = vm_state_init(&vm, 4096)) != 0) { + return res; + } list_head_init(&unresolved); - shash_init(&unit_map, 16); + if (shash_init(&unit_map, 16) != 0) { + return -ERR_OUT_OF_MEMORY; + } + + if ((fp = fopen(filename, "rb")) == NULL) { + return -ERR_IMAGE_IO; + } // Load main unit - assert((fp = fopen(argv[1], "rb")) != NULL); if ((res = vm_load_unit_file(&vm, NULL, &unresolved, fp)) != 0) { - fprintf(stderr, "%s: failed to load unit\n", argv[1]); fclose(fp); - return -1; + return res; } fclose(fp); @@ -55,20 +62,21 @@ int main(int argc, char **argv) { if (!pair) { info = malloc(sizeof(struct vm_unit_info)); - assert(info); + if (!info) { + return -ERR_OUT_OF_MEMORY; + } if ((res = vm_load_unit(&vm, info, &unresolved, ref->unit_name)) != 0) { - fprintf(stderr, "failed to load unit %s\n", ref->unit_name); return res; } - assert(hash_insert(&unit_map, ref->unit_name, info) == 0); + if (hash_insert(&unit_map, ref->unit_name, info) != 0) { + return -ERR_OUT_OF_MEMORY; + } } } while (!list_empty(&unresolved)) { - struct vm_unit_info *info; - struct vm_ref_entry *ref_entry; ref = list_first_entry(&unresolved, struct vm_unresolved_ref, link); pair = hash_lookup(&unit_map, ref->unit_name); assert(pair); @@ -76,7 +84,6 @@ int main(int argc, char **argv) { // Lookup symbol in that unit if ((res = lookup_sym(info, ref->sym_name, &export)) != 0) { - fprintf(stderr, "Unresolved reference to %s\n", ref->sym_name); return -1; } @@ -92,10 +99,37 @@ int main(int argc, char **argv) { } list_del(&ref->link); + free(ref); } + for (size_t i = 0; i < unit_map.bucket_count; ++i) { + list_for_each_entry(pair, &unit_map.buckets[i], link) { + info = pair->value; + vector_free(&info->exports); + free(info); + } + } + hash_free(&unit_map); + // Start execution - vm_eval_unit(&vm, 0); + if ((res = vm_eval_unit(&vm, 0)) != 0) { + return res; + } + + vm_state_free(&vm); return 0; } + +int main(int argc, char **argv) { + int res; + if (argc != 2) { + fprintf(stderr, "Usage: %s INPUT", argv[0]); + return EXIT_FAILURE; + } + if ((res = execute_file(argv[1])) != 0) { + fprintf(stderr, "%s: %s\n", argv[1], vm_strerror(res)); + return EXIT_FAILURE; + } + return EXIT_SUCCESS; +} diff --git a/vm/stack.c b/vm/stack.c new file mode 100644 index 0000000..4e3ed3a --- /dev/null +++ b/vm/stack.c @@ -0,0 +1,34 @@ +#include "error.h" +#include "stack.h" + +#include + +int stack_init(struct stack *st, size_t size) { + st->data = malloc(sizeof(uint64_t) * size); + if (!st->data) { + return -ERR_OUT_OF_MEMORY; + } + st->size = size; + st->sp = size; + return 0; +} +void stack_free(struct stack *st) { + free(st->data); + st->data = NULL; + st->size = 0; + st->sp = 0; +} +int stack_push(struct stack *st, uint64_t w) { + if (st->sp == 0) { + return -ERR_STACK_OVERFLOW; + } + st->data[--st->sp] = w; + return 0; +} +int stack_pop(struct stack *st, uint64_t *w) { + if (st->sp == st->size) { + return -ERR_STACK_UNDERFLOW; + } + *w = st->data[st->sp++]; + return 0; +} diff --git a/vm/unit.c b/vm/unit.c new file mode 100644 index 0000000..881cd0d --- /dev/null +++ b/vm/unit.c @@ -0,0 +1,34 @@ +#include + +#include "vmval.h" +#include "unit.h" + +struct vm_ref_entry *unit_add_ref(struct vm_unit *u) { + return vector_append(&u->ref_table); +} + +struct vm_func_entry *unit_add_function(struct vm_unit *u) { + return vector_append(&u->functions); +} + +void unit_free(struct vm_unit *u) { + struct vm_func_entry *func; + uint64_t w; + + vector_free(&u->ref_table); + for (size_t i = 0; i < u->functions.size; ++i) { + func = vector_ref(&u->functions, i); + free(func->bytecode); + } + vector_free(&u->functions); + + for (size_t i = 0; i < u->global_pool_size; ++i) { + w = u->global_pool[i]; + if (ref_q(w)) { + vm_value_free(getref(w)); + } + } + + free(u->global_pool); + u->global_pool = NULL; +} diff --git a/vm/vmstack.c b/vm/vmstack.c new file mode 100644 index 0000000..37340b1 --- /dev/null +++ b/vm/vmstack.c @@ -0,0 +1,39 @@ +#include "vmstate.h" + +#include + +int vm_pop(struct vm_state *vm, uint64_t *w) { + return stack_pop(&vm->data_stack, w); +} + +int vm_pop_integer(struct vm_state *vm, int64_t *v) { + int res; + uint64_t w; + if ((res = stack_pop(&vm->data_stack, &w)) != 0) { + return res; + } + if (w & FLAG_REF) { + return -EINVAL; + } + if (w & (1ULL << 62)) { + w |= 1ULL << 63; + } + *v = w; + return 0; +} + +int vm_push_integer(struct vm_state *vm, int64_t v) { + return stack_push(&vm->data_stack, v & ~FLAG_REF); +} + +int vm_push_bool(struct vm_state *vm, int v) { + if (v) { + return stack_push(&vm->data_stack, 1); + } else { + return stack_push(&vm->data_stack, FLAG_REF); + } +} + +int vm_push_ref(struct vm_state *vm, struct vm_value *obj) { + return stack_push(&vm->data_stack, encode_ref(obj)); +} diff --git a/vm/vmstate.c b/vm/vmstate.c index a1dd83a..7af533b 100644 --- a/vm/vmstate.c +++ b/vm/vmstate.c @@ -1,6 +1,8 @@ +#include "error.h" +#include "op.h" +#include "unit.h" #include "vmstate.h" #include "vmval.h" -#include "vm.h" #include #include @@ -14,62 +16,71 @@ static inline int64_t sximm(uint32_t in) { } } -static inline uint64_t encode_ref(struct vm_value *ref) { - uintptr_t addr = (uintptr_t) ref; - assert(!(addr & 1)); - if (sizeof(uintptr_t) == 8) { - return (addr >> 1) | FLAG_REF; - } else { - abort(); +static int vm_push_context(struct vm_state *vm) { + int res; + if ((res = stack_push(&vm->call_stack, vm->lp)) != 0) { + return res; } -} - -uint64_t pop(struct vm_state *vm) { - assert(vm->sp != vm->stack_size); - return vm->stack[vm->sp++]; -} - -void push(struct vm_state *vm, uint64_t w) { - assert(vm->sp > 0); - vm->stack[--vm->sp] = w; -} - -void push_ref(struct vm_state *vm, struct vm_value *ref) { - assert(vm->sp > 0); - vm->stack[--vm->sp] = encode_ref(ref); -} - -void push_integer(struct vm_state *vm, int64_t w) { - push(vm, w & ~FLAG_REF); -} - -uint64_t pop_integer(struct vm_state *vm) { - uint64_t w = pop(vm); - assert(!(w & FLAG_REF)); - if (w & (1ULL << 62)) { - w |= 1ULL << 63; + if ((res = stack_push(&vm->call_stack, vm->fp)) != 0) { + return res; } - return w; + if ((res = stack_push(&vm->call_stack, vm->ip)) != 0) { + return res; + } + return 0; } -void vm_state_init(struct vm_state *vm, size_t stack_size) { - vm->stack = calloc(sizeof(uint64_t), stack_size); - vm->sp = stack_size; - vm->stack_size = stack_size; +static int vm_pop_context(struct vm_state *vm) { + int res; + if ((res = stack_pop(&vm->call_stack, &vm->ip)) != 0) { + return res; + } + if ((res = stack_pop(&vm->call_stack, &vm->fp)) != 0) { + return res; + } + if ((res = stack_pop(&vm->call_stack, &vm->lp)) != 0) { + return res; + } + return 0; +} - vm->call_stack_size = 1024; - vm->call_stack = calloc(sizeof(uint64_t), vm->call_stack_size); - vm->csp = vm->call_stack_size; +int vm_state_init(struct vm_state *vm, size_t stack_size) { + int res; + + if ((res = stack_init(&vm->data_stack, stack_size)) != 0) { + return res; + } + if ((res = stack_init(&vm->call_stack, 1024)) != 0) { + return res; + } vector_init(&vm->units, sizeof(struct vm_unit)); vm->lp = 0; vm->fp = 0; vm->ip = 0; + + return 0; +} + +void vm_state_free(struct vm_state *vm) { + struct vm_unit *unit; + stack_free(&vm->data_stack); + stack_free(&vm->call_stack); + for (size_t i = 0; i < vm->units.size; ++i) { + unit = vector_ref(&vm->units, i); + unit_free(unit); + } + vector_free(&vm->units); } struct vm_unit *vm_add_unit(struct vm_state *vm, size_t global_pool_size) { struct vm_unit *unit = vector_append(&vm->units); + if (!unit) { + return unit; + } + + unit->is_loaded = 0; unit->global_pool = calloc(sizeof(uint64_t), global_pool_size); unit->global_pool_size = global_pool_size; @@ -78,75 +89,86 @@ struct vm_unit *vm_add_unit(struct vm_state *vm, size_t global_pool_size) { return unit; } -struct vm_ref_entry *unit_add_ref(struct vm_unit *u) { - return vector_append(&u->ref_table); -} - -struct vm_func_entry *unit_add_function(struct vm_unit *u) { - return vector_append(&u->functions); -} - // Call an entry from function table of a unit -void vm_call_unit_index(struct vm_state *vm, size_t lib_index, size_t fn_index) { +int vm_call_unit_index(struct vm_state *vm, size_t lib_index, size_t fn_index) { struct vm_func_entry *func; struct vm_unit *unit; - assert(vm->csp >= 3); + int res; unit = vector_ref(&vm->units, lib_index); func = vector_ref(&unit->functions, fn_index); - vm->call_stack[--vm->csp] = vm->lp; - vm->call_stack[--vm->csp] = vm->fp; - vm->call_stack[--vm->csp] = vm->ip; + if (func->argc > MAXARG) { + return -ERR_RANGE; + } + + if ((res = vm_push_context(vm)) != 0) { + return res; + } + vm->lp = lib_index; vm->fp = fn_index; vm->ip = 0; - assert(func->argc <= MAXARG); for (size_t i = 0; i < func->argc; ++i) { - func->arg_regs[i] = pop(vm); + if ((res = stack_pop(&vm->data_stack, &func->arg_regs[i])) != 0) { + return res; + } } + + return 0; } -void vm_call_ref(struct vm_state *vm, struct vm_value *ref) { - assert(ref->type == VT_FUNC); - vm_call_unit_index(vm, ref->v_func.lib_index, ref->v_func.fn_index); +int vm_call_ref(struct vm_state *vm, struct vm_value *ref) { + if (ref->type != VT_FUNC) { + return -ERR_OPERAND_TYPE; + } + return vm_call_unit_index(vm, ref->v_func.lib_index, ref->v_func.fn_index); } -static uint64_t vm_read_ext_ref(struct vm_state *vm, struct vm_ref_entry *ref) { +static int vm_read_ext_ref(struct vm_state *vm, struct vm_ref_entry *ref, uint64_t *w) { struct vm_unit *unit; + int res; unit = vector_ref(&vm->units, ref->unit_index); if (!unit->is_loaded) { // Before trying to access target value, eval the unit first - int res = vm_eval_unit(vm, ref->unit_index); - assert(res == 0); + if ((res = vm_eval_unit(vm, ref->unit_index)) != 0) { + return res; + } } assert(ref->ref_index < unit->global_pool_size); - return unit->global_pool[ref->ref_index]; + if (ref->ref_index >= unit->global_pool_size) { + return -ERR_RANGE; + } + *w = unit->global_pool[ref->ref_index]; + return 0; } static int vm_eval_debug(struct vm_state *vm, struct vm_func_entry *func, uint32_t arg) { (void) func; uint64_t w0; + int res; switch (arg) { case 0x01: - w0 = pop(vm); + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } printf("trace: "); vm_print(w0); printf("\n"); return 0; default: - fprintf(stderr, "Undefined debug opcode: %02x\n", arg); - abort(); + return -ERR_OPCODE_UNDEFINED; } } int vm_eval_step(struct vm_state *vm) { uint64_t w0, w1; int64_t sw0, sw1; - size_t i0; ssize_t ii0; + size_t i0; struct vm_ref_entry *r0; + int res; assert(vm->lp < vm->units.size); struct vm_unit *unit = vector_ref(&vm->units, vm->lp); @@ -156,164 +178,199 @@ int vm_eval_step(struct vm_state *vm) { switch (opcode >> 24) { case OP_ADD: - sw0 = pop_integer(vm); - sw1 = pop_integer(vm); - push_integer(vm, sw0 + sw1); - return 0; + if ((res = vm_pop_integer(vm, &sw0)) != 0) { + return res; + } + if ((res = vm_pop_integer(vm, &sw1)) != 0) { + return res; + } + return vm_push_integer(vm, sw0 + sw1); + case OP_MUL: + if ((res = vm_pop_integer(vm, &sw0)) != 0) { + return res; + } + if ((res = vm_pop_integer(vm, &sw1)) != 0) { + return res; + } + return vm_push_integer(vm, sw0 * sw1); case OP_NOT: - w0 = pop(vm); - if (null_q(w0)) { - push_integer(vm, 1); - } else { - push_ref(vm, NULL); + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; } - return 0; + return vm_push_bool(vm, null_q(w0)); case OP_EQ: - w0 = pop(vm); - w1 = pop(vm); + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } + if ((res = stack_pop(&vm->data_stack, &w1)) != 0) { + return res; + } if (ref_q(w0) || ref_q(w1)) { - assert(0 && "Ref cmp not implemented yet"); + // TODO what do? + return -ERR_OPERAND_TYPE; } - if (w0 == w1) { - push_integer(vm, 1); - } else { - push_ref(vm, NULL); - } - return 0; + return vm_push_bool(vm, w0 == w1); // case OP_LDNIL: - push_ref(vm, NULL); - return 0; + return stack_push(&vm->data_stack, FLAG_REF); case OP_LDI: - push_integer(vm, sximm(opcode & 0xFFFFFF)); - return 0; + return vm_push_integer(vm, sximm(opcode & 0xFFFFFF)); case OP_CAR: - w0 = pop(vm); - assert(pair_q(w0)); - push(vm, getref(w0)->v_cons.fat_ar); - return 0; + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } + if (!pair_q(w0)) { + return -ERR_OPERAND_TYPE; + } + return stack_push(&vm->data_stack, getref(w0)->v_cons.fat_ar); case OP_CDR: - w0 = pop(vm); - assert(pair_q(w0)); - push(vm, getref(w0)->v_cons.fat_dr); - return 0; + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } + if (!pair_q(w0)) { + return -ERR_OPERAND_TYPE; + } + return stack_push(&vm->data_stack, getref(w0)->v_cons.fat_dr); case OP_CONS: - w0 = pop(vm); - w1 = pop(vm); - push_ref(vm, vm_cons(w0, w1)); - return 0; + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } + if ((res = stack_pop(&vm->data_stack, &w1)) != 0) { + return res; + } + return vm_push_ref(vm, vm_cons(w0, w1)); // case OP_LDARG: i0 = opcode & 0xFFFFFF; - assert(i0 < MAXARG); - push(vm, func->arg_regs[i0]); - return 0; + if (i0 >= MAXARG) { + return -ERR_RANGE; + } + return stack_push(&vm->data_stack, func->arg_regs[i0]); case OP_STARG: i0 = opcode & 0xFFFFFF; - w0 = pop(vm); - assert(i0 < MAXARG); - func->arg_regs[i0] = w0; - return 0; + if (i0 >= MAXARG) { + return -ERR_RANGE; + } + return stack_pop(&vm->data_stack, &func->arg_regs[i0]); case OP_LDG: i0 = opcode & 0xFFFFFF; - assert(i0 < unit->global_pool_size); - push(vm, unit->global_pool[i0]); - return 0; + if (i0 >= unit->global_pool_size) { + return -ERR_RANGE; + } + return stack_push(&vm->data_stack, unit->global_pool[i0]); case OP_STG: - w0 = pop(vm); i0 = opcode & 0xFFFFFF; - assert(i0 < unit->global_pool_size); - unit->global_pool[i0] = w0; - return 0; + if (i0 >= unit->global_pool_size) { + return -ERR_RANGE; + } + return stack_pop(&vm->data_stack, &unit->global_pool[i0]); case OP_LDF: i0 = opcode & 0xFFFFFF; - assert(i0 < unit->functions.size); - push_ref(vm, vm_func(vm->lp, i0)); - return 0; + if (i0 >= unit->functions.size) { + return -ERR_RANGE; + } + return vm_push_ref(vm, vm_func(vm->lp, i0)); case OP_STL: i0 = opcode & 0xFFFFFF; - assert(i0 < func->local_count); - func->local_regs[i0] = pop(vm); - return 0; + if (i0 >= func->local_count) { + return -ERR_RANGE; + } + return stack_pop(&vm->data_stack, &func->local_regs[i0]); case OP_LDL: i0 = opcode & 0xFFFFFF; - assert(i0 < func->local_count); - push(vm, func->local_regs[i0]); - return 0; + if (i0 >= func->local_count) { + return -ERR_RANGE; + } + return stack_push(&vm->data_stack, func->local_regs[i0]); // case OP_ISZ: - w0 = pop(vm); - if (null_q(w0)) { - push_integer(vm, 1); - } else { - push_ref(vm, NULL); + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; } - return 0; + return vm_push_bool(vm, null_q(w0)); // case OP_XCALL: i0 = opcode & 0xFFFFFF; + if (i0 >= unit->ref_table.size) { + return -ERR_RANGE; + } r0 = vector_ref(&unit->ref_table, i0); if (r0->flags & REF_NATIVE) { int (*func) (struct vm_state *) = (void *) r0->ref_native; - assert(func); + if (!func) { + return -ERR_OPERAND_TYPE; + } return func(vm); } else { - w0 = vm_read_ext_ref(vm, r0); - assert(func_q(w0)); - vm_call_ref(vm, getref(w0)); - return 0; + if ((res = vm_read_ext_ref(vm, r0, &w0)) != 0) { + return res; + } + if (!func_q(w0)) { + return -ERR_OPERAND_TYPE; + } + return vm_call_ref(vm, getref(w0)); } case OP_GCALL: i0 = opcode & 0xFFFFFF; - assert(i0 < unit->global_pool_size); + if (i0 >= unit->global_pool_size) { + return -ERR_RANGE; + } w0 = unit->global_pool[i0]; - assert(func_q(w0)); - vm_call_ref(vm, getref(w0)); - return 0; + if (!func_q(w0)) { + return -ERR_OPERAND_TYPE; + } + return vm_call_ref(vm, getref(w0)); case OP_CALL: - w0 = pop(vm); - assert(func_q(w0)); - vm_call_ref(vm, getref(w0)); - return 0; + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } + if (!func_q(w0)) { + return -ERR_OPERAND_TYPE; + } + return vm_call_ref(vm, getref(w0)); case OP_BF: - w0 = pop(vm); + if ((res = stack_pop(&vm->data_stack, &w0)) != 0) { + return res; + } ii0 = sximm(opcode & 0xFFFFFF); if (null_q(w0)) { - assert(ii0 != 0); + if (ii0 == 0) { + return -ERR_RANGE; + } vm->ip += ii0 - 1; } return 0; case OP_JMP: ii0 = sximm(opcode & 0xFFFFFF); - assert(ii0 != 0); + if (ii0 == 0) { + return -ERR_RANGE; + } vm->ip += ii0 - 1; return 0; case OP_RET: - assert(vm->call_stack_size - vm->csp >= 2); - vm->ip = vm->call_stack[vm->csp++]; - vm->fp = vm->call_stack[vm->csp++]; - vm->lp = vm->call_stack[vm->csp++]; - return 0; + return vm_pop_context(vm); // case OP_DEBUG: return vm_eval_debug(vm, func, opcode & 0xFFFFFF); default: - fprintf(stderr, "Undefined opcode: 0x%02hhx\n", opcode >> 24); - abort(); + return -ERR_OPCODE_UNDEFINED; } } int vm_eval_unit(struct vm_state *vm, size_t index) { struct vm_unit *unit; - size_t csp = vm->csp; + size_t csp; + int res; unit = vector_ref(&vm->units, index); + + csp = vm->call_stack.sp; vm_call_unit_index(vm, index, 0); + // Run until return - while (vm->csp != csp) { - int res = vm_eval_step(vm); - if (res == -1) { - return -1; + while (vm->call_stack.sp != csp) { + if ((res = vm_eval_step(vm)) != 0) { + return res; } } unit->is_loaded = 1; diff --git a/vm/vmstring.c b/vm/vmstring.c index 85fce42..06b731f 100644 --- a/vm/vmstring.c +++ b/vm/vmstring.c @@ -1,16 +1,21 @@ +#include "error.h" #include "vmstring.h" #include #include -void vm_string_init(struct vm_string *str, const char *text) { +int vm_string_init(struct vm_string *str, const char *text) { if (text && *text) { str->data = strdup(text); + if (!str->data) { + return -ERR_OUT_OF_MEMORY; + } str->len = strlen(text); str->cap = str->len + 1; } else { vm_string_empty(str); } + return 0; } void vm_string_empty(struct vm_string *str) { diff --git a/vm/vmval.c b/vm/vmval.c index a71bd7b..959c8f8 100644 --- a/vm/vmval.c +++ b/vm/vmval.c @@ -51,27 +51,45 @@ static void vm_print2(uint64_t w, int cdepth) { } } -struct vm_value *vm_cons(uint64_t w0, uint64_t w1) { - struct vm_value *v = malloc(sizeof(struct vm_value)); - v->type = VT_CONS; +static struct vm_value *vm_value_create(enum vm_type type) { + struct vm_value *v = calloc(1, sizeof(struct vm_value)); + if (!v) { + return NULL; + } + v->type = type; v->refcount = 0; + return v; +} + +void vm_value_free(struct vm_value *v) { + assert(v->refcount == 0); + free(v); +} + +struct vm_value *vm_cons(uint64_t w0, uint64_t w1) { + struct vm_value *v = vm_value_create(VT_CONS); + if (!v) { + return NULL; + } v->v_cons.fat_ar = w0; v->v_cons.fat_dr = w1; return v; } struct vm_value *vm_makestr(const char *str) { - struct vm_value *v = malloc(sizeof(struct vm_value)); - v->type = VT_STRING; - v->refcount = 0; + struct vm_value *v = vm_value_create(VT_STRING); + if (!v) { + return NULL; + } vm_string_init(&v->v_string, str); return v; } struct vm_value *vm_func(size_t lib_index, size_t fn_index) { - struct vm_value *v = malloc(sizeof(struct vm_value)); - v->type = VT_FUNC; - v->refcount = 0; + struct vm_value *v = vm_value_create(VT_FUNC); + if (!v) { + return NULL; + } v->v_func.lib_index = lib_index; v->v_func.fn_index = fn_index; return v;