#include "parse.h" #include "node.h" #include #include #include #include #include static int vm_str_parser_peek(struct vm_parser *p) { return *(const char *) p->ctx; } static int vm_str_parser_pop(struct vm_parser *p) { char c = *(const char *) p->ctx; if (c) { ++p->ctx; } return c; } static inline int is_space(int ch) { return ch == ' ' || ch == '\t' || ch == '\n'; } static inline int is_digit(int ch) { return ch >= '0' && ch <= '9'; } static inline int is_ident0(int ch) { return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z') || strchr("+-*&^%$@!~<>/=.", ch); } static int skip_whitespace(struct vm_parser *in) { while (is_space(in->peek(in))) { assert(in->pop(in) > 0); } return 0; } static int vm_parse_int(struct vm_parser *in, enum node_type *_type, intmax_t *_value) { int ch; intmax_t value = 0; //char type_str[16]; //size_t idx; enum node_type type; ch = in->peek(in); if (ch == '0') { assert(in->pop(in) == ch); ch = in->peek(in); if (ch == 'x') { assert(in->pop(in) == ch); assert(0 && "TODO hex\n"); } } else { while (1) { ch = in->peek(in); if (!ch || (ch >= 'a' && ch <= 'z') || strchr(" \n\t()", ch)) { break; } if (!is_digit(ch)) { return -EINVAL; } assert(in->pop(in) == ch); value *= 10; value += ch - '0'; } } type = N_INTEGER; //ch = in->peek(in); //idx = 0; //if (ch >= 'a' && ch <= 'z') { // while ((ch = in->peek(in)) && ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9'))) { // assert(idx < sizeof(type_str) - 1); // assert(in->pop(in) == ch); // type_str[idx++] = ch; // } // type_str[idx] = 0; //} //if (idx) { // if (!strcmp(type_str, "usize") || !strcmp(type_str, "u")) { // type = VT_USIZE; // } else if (!strcmp(type_str, "i8")) { // type = VT_I8; // } else if (!strcmp(type_str, "u8")) { // type = VT_U8; // } else if (!strcmp(type_str, "i16")) { // type = VT_I16; // } else if (!strcmp(type_str, "u16")) { // type = VT_U16; // } else if (!strcmp(type_str, "i32")) { // type = VT_I32; // } else if (!strcmp(type_str, "u32")) { // type = VT_U32; // } else { // panic("Unknown type suffix: %s\n", type_str); // } //} else { // type = VT_I32; //} *_value = value; *_type = type; return 0; } int vm_parse(struct vm_parser *in, struct node **out) { (void) out; int res; int ch; if ((res = skip_whitespace(in)) != 0) { return res; } ch = in->peek(in); if (ch == 0) { return -1; } else if (ch == '(') { struct node *head, *tail, *car, *pair; assert(in->pop(in) == ch); head = NULL; while (1) { if ((res = skip_whitespace(in)) != 0) { return res; } ch = in->peek(in); if (ch == ')') { assert(in->pop(in) == ch); *out = head; break; } if ((res = vm_parse(in, &car)) != 0) { return res; } pair = cons(car, NULL); if (head) { tail->n_cons.cdr = pair; } else { head = pair; } tail = pair; } return 0; } else if (is_digit(ch)) { intmax_t value; enum node_type type; if ((res = vm_parse_int(in, &type, &value)) != 0) { return res; } (void) type; *out = integer(value); return 0; } else if (ch == '#') { assert(in->pop(in) == ch); ch = in->peek(in); if (ch == '\\') { assert(in->pop(in) == ch); ch = in->peek(in); switch (ch) { case 'n': *out = integer('\n'); break; default: fprintf(stderr, "Unknown escape sequence: #\\%c\n", ch); abort(); } assert(in->pop(in) == ch); } else if (!ch) { return -EINVAL; } else { assert(in->pop(in) == ch); *out = integer(ch); } return 0; } else if (is_ident0(ch)) { char buf[24]; size_t i = 0; assert(in->pop(in) == ch); while (1) { assert(i < sizeof(buf) - 1); buf[i++] = ch; ch = in->peek(in); if (!ch || strchr(" \n\t()", ch)) { break; } assert(in->pop(in) == ch); } buf[i] = 0; *out = ident(buf); return 0; } else if (ch == '"') { char *buf; size_t cap, len; cap = 32; len = 0; buf = malloc(cap); if (!buf) { return -ENOMEM; } assert(in->pop(in) == ch); while (1) { ch = in->peek(in); if (!ch) { return -EINVAL; } assert(ch != '\\' && "Not implemented"); assert(in->pop(in) == ch); if (ch == '"') { break; } if (len == cap - 1) { cap += 32; buf = realloc(buf, cap); assert(buf); } buf[len++] = ch; } buf[len] = 0; *out = string_from_owned(buf); return 0; } fprintf(stderr, "Unrecognized character: #%c (%d)\n", ch, ch); abort(); } void vm_str_parser(struct vm_parser *p, const char *expr) { p->ctx = (void *) expr; p->ch = 0; p->peek = vm_str_parser_peek; p->pop = vm_str_parser_pop; } int vm_parse_str(const char *expr, struct node **out) { struct vm_parser p; vm_str_parser(&p, expr); return vm_parse(&p, out); } static int file_peek(struct vm_parser *p) { if (p->ch == -1) { int ch = fgetc(p->ctx); p->ch = ch; } if (p->ch == -1) { return 0; } return p->ch; } static int file_pop(struct vm_parser *p) { int ch = file_peek(p); if (ch == -1) { return 0; } p->ch = fgetc(p->ctx); return ch; } struct node *vm_load_file(FILE *fp) { struct node *head, *tail, *expr; struct vm_parser p; p.ch = -1; p.ctx = fp; p.peek = file_peek; p.pop = file_pop; head = NULL; while (vm_parse(&p, &expr) == 0) { expr = cons(expr, NULL); if (head) { tail->n_cons.cdr = expr; } else { head = expr; } tail = expr; } return head; }