aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Makefile5
-rw-r--r--asm.peg4
-rw-r--r--main.c109
-rw-r--r--minias.h8
-rw-r--r--parse.c128
-rw-r--r--util.c16
6 files changed, 143 insertions, 127 deletions
diff --git a/Makefile b/Makefile
index 94f3a7a..ba00754 100644
--- a/Makefile
+++ b/Makefile
@@ -7,6 +7,7 @@ CFLAGS+=-D _GNU_SOURCE
OBJ=\
main.o\
+ parse.o\
util.o
all: minias
@@ -17,8 +18,8 @@ minias: $(OBJ)
asm.peg.inc: asm.peg
leg -o $@ asm.peg
-main.o: asm.peg.inc
-main.o util.o: minias.h
+parse.o: asm.peg.inc
+main.o parse.o util.o: minias.h
check:
sh test/test.sh
diff --git a/asm.peg b/asm.peg
index f181cfe..8d16458 100644
--- a/asm.peg
+++ b/asm.peg
@@ -126,9 +126,9 @@ pop =
call = "call" 'q'? ws (
'*' t:m
- { $$.call = (Call){ .kind = ASM_CALL, .target.indirect=dupv(&t), .indirect=1 } ; }
+ { $$.call = (Call){ .kind = ASM_CALL, .target.indirect=internparsev(&t), .indirect=1 } ; }
| '*' t:r64
- { $$.call = (Call){ .kind = ASM_CALL, .target.indirect=dupv(&t), .indirect=1 } ; }
+ { $$.call = (Call){ .kind = ASM_CALL, .target.indirect=internparsev(&t), .indirect=1 } ; }
| t:value
{ $$.call = (Call){ .kind = ASM_CALL, .target.direct=t.value, .indirect=0 } ; }
)
diff --git a/main.c b/main.c
index cab664a..5a784d1 100644
--- a/main.c
+++ b/main.c
@@ -154,107 +154,6 @@ Relocation *newreloc() {
return &relocs[nrelocs++];
}
-static String decodestring(char *s) {
- int i;
- char *end;
- size_t len = 0;
- size_t cap = 0;
- uint8_t *data = NULL;
- uint8_t c = 0;
-
- /* The string is already validated by the parser so we omit some checks*/
- while (*s) {
- if (*s == '\\') {
- s++;
- if (*s >= '0' && *s <= '7') {
- c = strtoul(s, &end, 8);
- s += 3;
- } else if (*s == 'x') {
- s++;
- c = strtoul(s, &end, 16);
- s = end;
- } else if (*s == 'r') {
- c = '\r';
- } else if (*s == 'n') {
- c = '\n';
- } else if (*s == 't') {
- c = '\t';
- } else {
- unreachable();
- }
- } else {
- c = *s;
- s++;
- }
- if (len == cap) {
- cap = cap ? len * 2 : 8;
- data = realloc(data, cap);
- }
- data[len++] = c;
- }
- return (String){.kind = ASM_STRING, .len = len, .data = data};
-}
-
-static const Parsev *dupv(Parsev *p) {
- Parsev *r = xmalloc(sizeof(Parsev));
- *r = *p;
- return r;
-}
-
-#define INSTR1(V, A1) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = 0, .variant = V, .arg1 = dupv(&A1), .arg2 = NULL, .arg3 = NULL \
- } \
- }
-#define INSTR2(V, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = 0, .variant = V, .arg1 = dupv(&A1), .arg2 = dupv(&A2), \
- .arg3 = NULL \
- } \
- }
-#define INSTR3(V, A1, A2, A3) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = 0, .variant = V, .arg1 = dupv(&A1), .arg2 = dupv(&A2), \
- .arg3 = dupv(&A3) \
- } \
- }
-
-#define REG(K) \
- (Parsev) { .kind = K }
-
-#define YYSTYPE Parsev
-#define YY_CTX_LOCAL
-#define YY_CTX_MEMBERS Parsev v;
-#include "asm.peg.inc"
-
-void parse(void) {
- AsmLine *l, *prevl;
- yycontext ctx;
-
- memset(&ctx, 0, sizeof(yycontext));
- prevl = NULL;
- curlineno = 0;
-
- while (yyparse(&ctx)) {
- curlineno += 1;
- if (ctx.v.kind == ASM_SYNTAX_ERROR)
- lfatal("syntax error\n");
- if (ctx.v.kind == ASM_BLANK)
- continue;
- l = zalloc(sizeof(AsmLine));
- l->v = ctx.v;
- l->lineno = curlineno;
- if (prevl)
- prevl->next = l;
- else
- allasm = l;
- prevl = l;
- }
-}
-
/* Shorthand helpers to write section data. */
static void sb(uint8_t b) { secaddbyte(cursection, b); }
@@ -738,11 +637,13 @@ static void assemble(void) {
AsmLine *l;
cursection = text;
-
+ curlineno = 0;
for (l = allasm; l; l = l->next) {
+ curlineno++;
v = &l->v;
- curlineno = l->lineno;
switch (l->v.kind) {
+ case ASM_BLANK:
+ break;
case ASM_DIR_GLOBL:
sym = getsym(v->globl.name);
sym->global = 1;
@@ -1279,8 +1180,8 @@ static void outelf(void) {
int main(void) {
symbols = mkhtab(256);
outf = stdout;
+ allasm = parse();
initsections();
- parse();
assemble();
fillsymtab();
handlerelocs();
diff --git a/minias.h b/minias.h
index 8d8108c..adf12d6 100644
--- a/minias.h
+++ b/minias.h
@@ -327,6 +327,10 @@ union Parsev {
int64_t i64;
};
+extern size_t curlineno;
+
+/* parse.c */
+
typedef struct AsmLine AsmLine;
struct AsmLine {
int64_t lineno;
@@ -334,7 +338,7 @@ struct AsmLine {
AsmLine *next;
};
-extern size_t curlineno;
+AsmLine *parse(void);
/* util.c */
@@ -349,8 +353,6 @@ char *xmemdup(const char *, size_t);
char *xstrdup(const char *s);
void *zalloc(size_t n);
-const char *internstring(const char *s);
-
struct hashtable {
size_t len, cap;
struct hashtablekey *keys;
diff --git a/parse.c b/parse.c
new file mode 100644
index 0000000..381c3df
--- /dev/null
+++ b/parse.c
@@ -0,0 +1,128 @@
+#include "minias.h"
+
+/* Maintain a direct mapped cache of Parsev* to avoid duplication. */
+static const Parsev *internparsev(Parsev *p) {
+ /* An extremely simple direct mapped cache of *Parsev,
+ It relies on direct pointer comparison, which
+ itself only works because our pointers are interned. */
+ size_t idx;
+ const Parsev *interned;
+ static const Parsev *cache[4096] = {0};
+
+ idx = murmurhash64a((char *)p, sizeof(Parsev)) % sizeof(cache)/sizeof(cache[0]);
+ interned = cache[idx];
+ if (interned && memcmp(p, interned, sizeof(Parsev)) == 0)
+ return interned;
+ interned = (const Parsev *)xmemdup((char*)p, sizeof(Parsev));
+ cache[idx] = interned;
+ return interned;
+}
+
+/* Maintain a direct cache of strings to avoid duplication. */
+const char *internstring(const char *s) {
+ size_t idx, len;
+ const char *interned;
+ static const char *cache[4096] = {0};
+
+ len = strlen(s);
+ idx = murmurhash64a(s, len) % sizeof(cache)/sizeof(cache[0]);
+ interned = cache[idx];
+ if (interned && strcmp(s, cache[idx]) == 0)
+ return interned;
+ interned = xstrdup(s);
+ cache[idx] = interned;
+ return interned;
+}
+
+static String decodestring(char *s) {
+ int i;
+ char *end;
+ size_t len = 0;
+ size_t cap = 0;
+ uint8_t *data = NULL;
+ uint8_t c = 0;
+
+ /* The string is already validated by the parser so we omit some checks*/
+ while (*s) {
+ if (*s == '\\') {
+ s++;
+ if (*s >= '0' && *s <= '7') {
+ c = strtoul(s, &end, 8);
+ s += 3;
+ } else if (*s == 'x') {
+ s++;
+ c = strtoul(s, &end, 16);
+ s = end;
+ } else if (*s == 'r') {
+ c = '\r';
+ } else if (*s == 'n') {
+ c = '\n';
+ } else if (*s == 't') {
+ c = '\t';
+ } else {
+ unreachable();
+ }
+ } else {
+ c = *s;
+ s++;
+ }
+ if (len == cap) {
+ cap = cap ? len * 2 : 8;
+ data = realloc(data, cap);
+ }
+ data[len++] = c;
+ }
+ return (String){.kind = ASM_STRING, .len = len, .data = data};
+}
+
+#define INSTR1(V, A1) \
+ (Parsev) { \
+ .instr = (Instr) { \
+ .kind = 0, .variant = V, .arg1 = internparsev(&A1), .arg2 = NULL, .arg3 = NULL \
+ } \
+ }
+#define INSTR2(V, A1, A2) \
+ (Parsev) { \
+ .instr = (Instr) { \
+ .kind = 0, .variant = V, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \
+ .arg3 = NULL \
+ } \
+ }
+#define INSTR3(V, A1, A2, A3) \
+ (Parsev) { \
+ .instr = (Instr) { \
+ .kind = 0, .variant = V, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \
+ .arg3 = internparsev(&A3) \
+ } \
+ }
+
+#define REG(K) \
+ (Parsev) { .kind = K }
+
+#define YYSTYPE Parsev
+#define YY_CTX_LOCAL
+#define YY_CTX_MEMBERS Parsev v;
+#include "asm.peg.inc"
+
+AsmLine *parse(void) {
+ AsmLine *result, *l, *prevl;
+ yycontext ctx;
+
+ memset(&ctx, 0, sizeof(yycontext));
+ prevl = NULL;
+ curlineno = 0;
+
+ while (yyparse(&ctx)) {
+ curlineno += 1;
+ if (ctx.v.kind == ASM_SYNTAX_ERROR)
+ lfatal("syntax error\n");
+ l = zalloc(sizeof(AsmLine));
+ l->v = ctx.v;
+ if (prevl)
+ prevl->next = l;
+ else
+ result = l;
+ prevl = l;
+ }
+ return result;
+}
diff --git a/util.c b/util.c
index 5cede75..9ed1c50 100644
--- a/util.c
+++ b/util.c
@@ -76,22 +76,6 @@ char *xmemdup(const char *s, size_t n) {
char *xstrdup(const char *s) { return xmemdup(s, strlen(s) + 1); }
-const char *internstring(const char *s) {
- size_t idx, len;
- const char *interned;
- static const char *cache[4096] = {0};
-
- len = strlen(s);
- idx = murmurhash64a(s, len) % sizeof(cache)/sizeof(cache[0]);
- interned = cache[idx];
- if (interned && strcmp(s, cache[idx]) == 0) {
- return interned;
- }
- interned = xstrdup(s);
- cache[idx] = interned;
- return interned;
-}
-
void htabkey(struct hashtablekey *k, const char *s, size_t n) {
k->str = s;
k->len = n;