aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-05 18:10:36 +1300
committerAndrew Chambers <[email protected]>2021-10-05 18:10:36 +1300
commit5c0af4055474834542e2813f36f4dd9a16479e53 (patch)
tree0132bfdfd56bc995b83f2a6f1791dd4f4b8bc2c1
parentdda3e77e5b3c01c52fc38a29cd81d49c52abb26c (diff)
Port lexer to leg.
-rw-r--r--.gitignore5
-rw-r--r--README.md17
-rw-r--r--asm.peg80
-rw-r--r--asmparser.peg96
-rw-r--r--default.do28
-rw-r--r--main.c36
-rw-r--r--minias.h (renamed from dumbas.h)0
-rw-r--r--util.c2
8 files changed, 146 insertions, 118 deletions
diff --git a/.gitignore b/.gitignore
new file mode 100644
index 0000000..ae79679
--- /dev/null
+++ b/.gitignore
@@ -0,0 +1,5 @@
+.redo
+minias
+*.s
+*.o
+*.inc \ No newline at end of file
diff --git a/README.md b/README.md
index 4eb7c2b..2512215 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,6 @@
-# dumbas
+# minias
-A dumb assembler for x86_64, written for fun and learning.
+A mini assembler for x86_64, written for fun and learning.
Goals:
@@ -14,6 +14,19 @@ Non Goals:
- Assemble other architectures.
- Work as a library.
+# Building
+
+```
+redo minias
+```
+
+or
+
+```
+leg asm.peg > asm.peg.inc
+cc -O2 *.c -o minias
+```
+
# Resources
- [elf spec](https://refspecs.linuxfoundation.org/elf/elf.pdf)
diff --git a/asm.peg b/asm.peg
new file mode 100644
index 0000000..946f6e6
--- /dev/null
+++ b/asm.peg
@@ -0,0 +1,80 @@
+
+line = s:stmt (eol | !. ) { yy->v = s; }
+ | eol { yy->v.kind = ASM_BLANK; }
+ | . { yy->v.kind = ASM_SYNTAX_ERROR; }
+
+stmt = d:directive {$$ = d;}
+ | i:instr { $$ = i; }
+ | l:label { $$ = l; }
+
+directive = ".glob" "o"? "l" ws i:ident
+ { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; }
+ | ".data" { $$.kind = ASM_DIR_DATA; }
+ | ".text" { $$.kind = ASM_DIR_TEXT; }
+ | ".balign" ws n:number
+ { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; }
+ | ".byte" ws n:number
+ { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; }
+
+label = i:ident ':'
+ { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; }
+
+instr = "nop"{ $$.kind = ASM_NOP; }
+ | "leave" { $$.kind = ASM_LEAVE; }
+ | "ret" { $$.kind = ASM_RET; }
+ | i:jmp { $$ = i; }
+ | i:add { $$ = i; }
+
+jmp = "jmp" ws i:ident
+ { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
+
+add = "addq" ws s:r-m64 ws? ',' ws? d:r64
+ { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } }
+ | "addq" ws s:r64 ws? ',' ws? d:r-m64
+ { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } }
+ | "addq" ws s:imm ws? ',' ws? d:r-m64
+ { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } }
+
+r-m64 = r:r64 { $$ = r; }
+ | m:m { $$ = m; }
+
+m = '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind } }
+ | <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll(yytext, NULL, 10), .l = NULL, .reg = r.kind } }
+ | i:ident ws? '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind } }
+
+r64 = "%rax" { $$.kind = ASM_RAX }
+ | "%rcx" { $$.kind = ASM_RCX }
+ | "%rdx" { $$.kind = ASM_RDX }
+ | "%rbx" { $$.kind = ASM_RBX }
+ | "%rsp" { $$.kind = ASM_RSP }
+ | "%rbp" { $$.kind = ASM_RBP }
+ | "%rsi" { $$.kind = ASM_RSI }
+ | "%rdi" { $$.kind = ASM_RDI }
+ | "%r8" { $$.kind = ASM_R8 }
+ | "%r9" { $$.kind = ASM_R9 }
+ | "%r10" { $$.kind = ASM_R10 }
+ | "%r11" { $$.kind = ASM_R11 }
+ | "%r12" { $$.kind = ASM_R12 }
+ | "%r13" { $$.kind = ASM_R13 }
+ | "%r14" { $$.kind = ASM_R14 }
+ | "%r15" { $$.kind = ASM_R15 }
+
+
+imm = '$' i:ident
+ { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; }
+ | '$' <'-'?[0-9]+>
+ { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll(yytext, NULL, 10) }; }
+
+ident = <[_a-zA-Z][_a-zA-Z0-9]*>
+ { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup(yytext) }; }
+
+number = <'-'?[0-9]+>
+ { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll(yytext, NULL, 10) }; }
+
+ws = [ \t]+
+
+eol = ws? "\n"
+
diff --git a/asmparser.peg b/asmparser.peg
deleted file mode 100644
index 5a625ae..0000000
--- a/asmparser.peg
+++ /dev/null
@@ -1,96 +0,0 @@
-%prefix "asmparser"
-
-%value "Parsev"
-
-line <- s:stmt eol { $$ = s; }
- / eol { $$.kind = ASM_BLANK; }
- / . { $$.kind = ASM_SYNTAX_ERROR; }
-
-stmt <- d:directive {$$ = d;}
- / i:instr { $$ = i; }
- / l:label { $$ = l; }
-
-directive <- ".glob" "o"? "l" ws i:ident
- { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; }
- / ".data" { $$.kind = ASM_DIR_DATA; }
- / ".text" { $$.kind = ASM_DIR_TEXT; }
- / ".balign" ws n:number
- { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; }
- / ".byte" ws n:number
- { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; }
-
-label <- i:ident ':'
- { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; }
-
-instr <- "nop"{ $$.kind = ASM_NOP; }
- / "leave" { $$.kind = ASM_LEAVE; }
- / "ret" { $$.kind = ASM_RET; }
- / i:jmp { $$ = i; }
- / i:add { $$ = i; }
-
-jmp <- "jmp" ws i:ident
- { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
-
-add <- "add" 'q'? ws s:m ws? ',' ws? d:r64
- { $$.add = mkadd('q', s, d); }
- / "add" 'q'? ws s:imm ws? ',' ws? d:r64
- { $$.add = mkadd('q', s, d); }
- / "add" 'q'? ws s:r64 ws? ',' ws? d:m
- { $$.add = mkadd('q', s, d); }
- / "add" 'q'? ws s:r64 ws? ',' ws? d:r64
- { $$.add = mkadd('q', s, d); }
- / "addq" ws s:imm ws? ',' ws? d:m
- { $$.add = mkadd('q', s, d); }
-
-m <- '(' ws? r:r64 ws? ')'
- { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind }; }
- / <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')'
- { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll($1, NULL, 10), .l = NULL, .reg = r.kind }; }
- / i:ident ws? '(' ws? r:r64 ws? ')'
- { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; }
-
-r64 <- "%rax" { $$.kind = ASM_RAX; }
- / "%rcx" { $$.kind = ASM_RCX; }
- / "%rdx" { $$.kind = ASM_RDX; }
- / "%rbx" { $$.kind = ASM_RBX; }
- / "%rsp" { $$.kind = ASM_RSP; }
- / "%rbp" { $$.kind = ASM_RBP; }
- / "%rsi" { $$.kind = ASM_RSI; }
- / "%rdi" { $$.kind = ASM_RDI; }
- / "%r8" { $$.kind = ASM_R8; }
- / "%r9" { $$.kind = ASM_R9; }
- / "%r10" { $$.kind = ASM_R10; }
- / "%r11" { $$.kind = ASM_R11; }
- / "%r12" { $$.kind = ASM_R12; }
- / "%r13" { $$.kind = ASM_R13; }
- / "%r14" { $$.kind = ASM_R14; }
- / "%r15" { $$.kind = ASM_R15; }
-
-imm <- '$' i:ident
- { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; }
- / '$' <'-'?[0-9]+>
- { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll($1, NULL, 10) }; }
-
-ident <- <[_a-zA-Z][_a-zA-Z0-9]*>
- { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; }
-
-number <- <'-'?[0-9]+>
- { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll($1, NULL, 10) }; }
-
-ws <- [ \t]+
-
-eol <- ws? ("\n" / (! .))
-
-%source {
-
-static Parsev *dupv(Parsev *p) {
- Parsev *r = xmalloc(sizeof(Parsev));
- *r = *p;
- return r;
-}
-
-static Add mkadd(char t, Parsev s, Parsev d) {
- return (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) };
-}
-
-} \ No newline at end of file
diff --git a/default.do b/default.do
new file mode 100644
index 0000000..6787b8d
--- /dev/null
+++ b/default.do
@@ -0,0 +1,28 @@
+#!/bin/sh
+set -eu
+
+case "$1" in
+ *.o)
+ test "$1" = "main.o" && redo-ifchange asm.peg.inc
+ redo-ifchange "${1%.o}.c" minias.h
+ set -x
+ ${CC:- cc} ${CFLAGS:- -O -Og} -c -o "$3" "${1%.o}.c"
+ ;;
+ asm.peg.inc)
+ redo-ifchange asm.peg
+ set -x
+ leg asm.peg > "$3"
+ ;;
+ minias)
+ obj="main.o util.o "
+ redo-ifchange $obj
+ ${CC:- cc} ${LDFLAGS:-} -o "$3" $obj
+ ;;
+ fmt)
+ clang-format -i main.c util.c >&2
+ ;;
+ *)
+ echo "don't know how to build $1" 2>&1
+ exit 1
+ ;;
+esac \ No newline at end of file
diff --git a/main.c b/main.c
index 4806722..69de674 100644
--- a/main.c
+++ b/main.c
@@ -1,4 +1,4 @@
-#include "dumbas.h"
+#include "minias.h"
static AsmLine *allasm = NULL;
@@ -113,45 +113,43 @@ static void initsections(void) {
text->hdr.sh_addralign = 4;
}
-static const char *dbg_str[] = {"Evaluating rule", "Matched rule",
- "Abandoning rule"};
-#define PCC_DEBUG(event, rule, level, pos, buffer, length) \
- fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", (int)((level)*2), "", \
- dbg_str[event], rule, pos, (int)(length), buffer)
+static Parsev *dupv(Parsev *p) {
+ Parsev *r = xmalloc(sizeof(Parsev));
+ *r = *p;
+ return r;
+}
-#include "asmparser.c" // XXX resolve dependency cycle.
+#define YYSTYPE Parsev
+#define YY_CTX_LOCAL
+#define YY_CTX_MEMBERS Parsev v;
+#include "asm.peg.inc"
void parse(void) {
- int more;
uint64_t lineno;
- Parsev v;
AsmLine *l, *prevl;
- asmparser_context_t *ctx;
+ yycontext ctx;
- ctx = asmparser_create(NULL);
+ memset(&ctx, 0, sizeof(yycontext));
prevl = NULL;
lineno = 0;
- do {
- more = asmparser_parse(ctx, &v);
+ while (yyparse(&ctx)) {
lineno += 1;
- if (v.kind == ASM_SYNTAX_ERROR) {
+ if (ctx.v.kind == ASM_SYNTAX_ERROR) {
fprintf(stderr, "<stdin>:%lu: syntax error\n", lineno);
exit(1);
}
- if (v.kind == ASM_BLANK)
+ if (ctx.v.kind == ASM_BLANK)
continue;
l = zalloc(sizeof(AsmLine));
- l->v = v;
+ l->v = ctx.v;
l->lineno = lineno;
if (prevl)
prevl->next = l;
else
allasm = l;
prevl = l;
- } while (more);
-
- asmparser_destroy(ctx);
+ }
}
/* Shorthand helpers to write section bytes. */
diff --git a/dumbas.h b/minias.h
index 9889733..9889733 100644
--- a/dumbas.h
+++ b/minias.h
diff --git a/util.c b/util.c
index 6e09683..5ab9c48 100644
--- a/util.c
+++ b/util.c
@@ -1,4 +1,4 @@
-#include "dumbas.h"
+#include "minias.h"
static void vwarn(const char *fmt, va_list ap) {
vfprintf(stderr, fmt, ap);