299 lines
6.8 KiB
C
Raw Permalink Normal View History

2021-04-06 17:38:09 +03:00
#include "parse.h"
#include "node.h"
#include <string.h>
#include <assert.h>
#include <stdlib.h>
#include <errno.h>
#include <stdio.h>
static int vm_str_parser_peek(struct vm_parser *p) {
return *(const char *) p->ctx;
}
static int vm_str_parser_pop(struct vm_parser *p) {
char c = *(const char *) p->ctx;
if (c) {
++p->ctx;
}
return c;
}
static inline int is_space(int ch) {
return ch == ' ' || ch == '\t' || ch == '\n';
}
static inline int is_digit(int ch) {
return ch >= '0' && ch <= '9';
}
static inline int is_ident0(int ch) {
return (ch >= 'a' && ch <= 'z') ||
(ch >= 'A' && ch <= 'Z') ||
2021-04-06 18:22:12 +03:00
strchr("+-*&^%$@!~<>/=.", ch);
2021-04-06 17:38:09 +03:00
}
static int skip_whitespace(struct vm_parser *in) {
while (is_space(in->peek(in))) {
assert(in->pop(in) > 0);
}
return 0;
}
static int vm_parse_int(struct vm_parser *in, enum node_type *_type, intmax_t *_value) {
int ch;
intmax_t value = 0;
//char type_str[16];
//size_t idx;
enum node_type type;
ch = in->peek(in);
if (ch == '0') {
assert(in->pop(in) == ch);
ch = in->peek(in);
if (ch == 'x') {
assert(in->pop(in) == ch);
assert(0 && "TODO hex\n");
}
} else {
while (1) {
ch = in->peek(in);
if (!ch || (ch >= 'a' && ch <= 'z') || strchr(" \n\t()", ch)) {
break;
}
if (!is_digit(ch)) {
return -EINVAL;
}
assert(in->pop(in) == ch);
value *= 10;
value += ch - '0';
}
}
type = N_INTEGER;
//ch = in->peek(in);
//idx = 0;
//if (ch >= 'a' && ch <= 'z') {
// while ((ch = in->peek(in)) && ((ch >= 'a' && ch <= 'z') || (ch >= '0' && ch <= '9'))) {
// assert(idx < sizeof(type_str) - 1);
// assert(in->pop(in) == ch);
// type_str[idx++] = ch;
// }
// type_str[idx] = 0;
//}
//if (idx) {
// if (!strcmp(type_str, "usize") || !strcmp(type_str, "u")) {
// type = VT_USIZE;
// } else if (!strcmp(type_str, "i8")) {
// type = VT_I8;
// } else if (!strcmp(type_str, "u8")) {
// type = VT_U8;
// } else if (!strcmp(type_str, "i16")) {
// type = VT_I16;
// } else if (!strcmp(type_str, "u16")) {
// type = VT_U16;
// } else if (!strcmp(type_str, "i32")) {
// type = VT_I32;
// } else if (!strcmp(type_str, "u32")) {
// type = VT_U32;
// } else {
// panic("Unknown type suffix: %s\n", type_str);
// }
//} else {
// type = VT_I32;
//}
*_value = value;
*_type = type;
return 0;
}
int vm_parse(struct vm_parser *in, struct node **out) {
(void) out;
int res;
int ch;
if ((res = skip_whitespace(in)) != 0) {
return res;
}
ch = in->peek(in);
if (ch == 0) {
return -1;
} else if (ch == '(') {
struct node *head, *tail, *car, *pair;
assert(in->pop(in) == ch);
head = NULL;
while (1) {
if ((res = skip_whitespace(in)) != 0) {
return res;
}
ch = in->peek(in);
if (ch == ')') {
assert(in->pop(in) == ch);
*out = head;
break;
}
if ((res = vm_parse(in, &car)) != 0) {
return res;
}
pair = cons(car, NULL);
if (head) {
tail->n_cons.cdr = pair;
} else {
head = pair;
}
tail = pair;
}
return 0;
} else if (is_digit(ch)) {
intmax_t value;
enum node_type type;
if ((res = vm_parse_int(in, &type, &value)) != 0) {
return res;
}
(void) type;
*out = integer(value);
return 0;
} else if (ch == '#') {
assert(in->pop(in) == ch);
ch = in->peek(in);
if (ch == '\\') {
assert(in->pop(in) == ch);
ch = in->peek(in);
switch (ch) {
case 'n':
*out = integer('\n');
break;
default:
fprintf(stderr, "Unknown escape sequence: #\\%c\n", ch);
abort();
}
assert(in->pop(in) == ch);
} else if (!ch) {
return -EINVAL;
} else {
assert(in->pop(in) == ch);
*out = integer(ch);
}
return 0;
} else if (is_ident0(ch)) {
char buf[24];
size_t i = 0;
assert(in->pop(in) == ch);
while (1) {
assert(i < sizeof(buf) - 1);
buf[i++] = ch;
ch = in->peek(in);
if (!ch || strchr(" \n\t()", ch)) {
break;
}
assert(in->pop(in) == ch);
}
buf[i] = 0;
*out = ident(buf);
return 0;
} else if (ch == '"') {
char *buf;
size_t cap, len;
cap = 32;
len = 0;
buf = malloc(cap);
if (!buf) {
return -ENOMEM;
}
assert(in->pop(in) == ch);
while (1) {
ch = in->peek(in);
if (!ch) {
return -EINVAL;
}
assert(ch != '\\' && "Not implemented");
assert(in->pop(in) == ch);
if (ch == '"') {
break;
}
if (len == cap - 1) {
cap += 32;
buf = realloc(buf, cap);
assert(buf);
}
buf[len++] = ch;
}
buf[len] = 0;
*out = string_from_owned(buf);
return 0;
2021-04-06 17:38:09 +03:00
}
fprintf(stderr, "Unrecognized character: #%c (%d)\n", ch, ch);
abort();
}
void vm_str_parser(struct vm_parser *p, const char *expr) {
p->ctx = (void *) expr;
p->ch = 0;
p->peek = vm_str_parser_peek;
p->pop = vm_str_parser_pop;
}
int vm_parse_str(const char *expr, struct node **out) {
struct vm_parser p;
vm_str_parser(&p, expr);
return vm_parse(&p, out);
}
static int file_peek(struct vm_parser *p) {
if (p->ch == -1) {
int ch = fgetc(p->ctx);
p->ch = ch;
}
if (p->ch == -1) {
return 0;
}
return p->ch;
}
static int file_pop(struct vm_parser *p) {
int ch = file_peek(p);
if (ch == -1) {
return 0;
}
p->ch = fgetc(p->ctx);
return ch;
}
struct node *vm_load_file(FILE *fp) {
struct node *head, *tail, *expr;
struct vm_parser p;
p.ch = -1;
p.ctx = fp;
p.peek = file_peek;
p.pop = file_pop;
head = NULL;
while (vm_parse(&p, &expr) == 0) {
expr = cons(expr, NULL);
if (head) {
tail->n_cons.cdr = expr;
} else {
head = expr;
}
tail = expr;
}
return head;
}