diff options
| author | Andrew Chambers <[email protected]> | 2021-10-05 18:10:36 +1300 |
|---|---|---|
| committer | Andrew Chambers <[email protected]> | 2021-10-05 18:10:36 +1300 |
| commit | 5c0af4055474834542e2813f36f4dd9a16479e53 (patch) | |
| tree | 0132bfdfd56bc995b83f2a6f1791dd4f4b8bc2c1 | |
| parent | dda3e77e5b3c01c52fc38a29cd81d49c52abb26c (diff) | |
Port lexer to leg.
| -rw-r--r-- | .gitignore | 5 | ||||
| -rw-r--r-- | README.md | 17 | ||||
| -rw-r--r-- | asm.peg | 80 | ||||
| -rw-r--r-- | asmparser.peg | 96 | ||||
| -rw-r--r-- | default.do | 28 | ||||
| -rw-r--r-- | main.c | 36 | ||||
| -rw-r--r-- | minias.h (renamed from dumbas.h) | 0 | ||||
| -rw-r--r-- | util.c | 2 |
8 files changed, 146 insertions, 118 deletions
diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..ae79679 --- /dev/null +++ b/.gitignore @@ -0,0 +1,5 @@ +.redo +minias +*.s +*.o +*.inc
\ No newline at end of file @@ -1,6 +1,6 @@ -# dumbas +# minias -A dumb assembler for x86_64, written for fun and learning. +A mini assembler for x86_64, written for fun and learning. Goals: @@ -14,6 +14,19 @@ Non Goals: - Assemble other architectures. - Work as a library. +# Building + +``` +redo minias +``` + +or + +``` +leg asm.peg > asm.peg.inc +cc -O2 *.c -o minias +``` + # Resources - [elf spec](https://refspecs.linuxfoundation.org/elf/elf.pdf) @@ -0,0 +1,80 @@ + +line = s:stmt (eol | !. ) { yy->v = s; } + | eol { yy->v.kind = ASM_BLANK; } + | . { yy->v.kind = ASM_SYNTAX_ERROR; } + +stmt = d:directive {$$ = d;} + | i:instr { $$ = i; } + | l:label { $$ = l; } + +directive = ".glob" "o"? "l" ws i:ident + { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; } + | ".data" { $$.kind = ASM_DIR_DATA; } + | ".text" { $$.kind = ASM_DIR_TEXT; } + | ".balign" ws n:number + { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; } + | ".byte" ws n:number + { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; } + +label = i:ident ':' + { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; } + +instr = "nop"{ $$.kind = ASM_NOP; } + | "leave" { $$.kind = ASM_LEAVE; } + | "ret" { $$.kind = ASM_RET; } + | i:jmp { $$ = i; } + | i:add { $$ = i; } + +jmp = "jmp" ws i:ident + { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } + +add = "addq" ws s:r-m64 ws? ',' ws? d:r64 + { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } } + | "addq" ws s:r64 ws? ',' ws? d:r-m64 + { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } } + | "addq" ws s:imm ws? ',' ws? d:r-m64 + { $$.add = (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) } } + +r-m64 = r:r64 { $$ = r; } + | m:m { $$ = m; } + +m = '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind } } + | <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll(yytext, NULL, 10), .l = NULL, .reg = r.kind } } + | i:ident ws? '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind } } + +r64 = "%rax" { $$.kind = ASM_RAX } + | "%rcx" { $$.kind = ASM_RCX } + | "%rdx" { $$.kind = ASM_RDX } + | "%rbx" { $$.kind = ASM_RBX } + | "%rsp" { $$.kind = ASM_RSP } + | "%rbp" { $$.kind = ASM_RBP } + | "%rsi" { $$.kind = ASM_RSI } + | "%rdi" { $$.kind = ASM_RDI } + | "%r8" { $$.kind = ASM_R8 } + | "%r9" { $$.kind = ASM_R9 } + | "%r10" { $$.kind = ASM_R10 } + | "%r11" { $$.kind = ASM_R11 } + | "%r12" { $$.kind = ASM_R12 } + | "%r13" { $$.kind = ASM_R13 } + | "%r14" { $$.kind = ASM_R14 } + | "%r15" { $$.kind = ASM_R15 } + + +imm = '$' i:ident + { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; } + | '$' <'-'?[0-9]+> + { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll(yytext, NULL, 10) }; } + +ident = <[_a-zA-Z][_a-zA-Z0-9]*> + { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup(yytext) }; } + +number = <'-'?[0-9]+> + { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll(yytext, NULL, 10) }; } + +ws = [ \t]+ + +eol = ws? "\n" + diff --git a/asmparser.peg b/asmparser.peg deleted file mode 100644 index 5a625ae..0000000 --- a/asmparser.peg +++ /dev/null @@ -1,96 +0,0 @@ -%prefix "asmparser" - -%value "Parsev" - -line <- s:stmt eol { $$ = s; } - / eol { $$.kind = ASM_BLANK; } - / . { $$.kind = ASM_SYNTAX_ERROR; } - -stmt <- d:directive {$$ = d;} - / i:instr { $$ = i; } - / l:label { $$ = l; } - -directive <- ".glob" "o"? "l" ws i:ident - { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; } - / ".data" { $$.kind = ASM_DIR_DATA; } - / ".text" { $$.kind = ASM_DIR_TEXT; } - / ".balign" ws n:number - { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; } - / ".byte" ws n:number - { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; } - -label <- i:ident ':' - { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; } - -instr <- "nop"{ $$.kind = ASM_NOP; } - / "leave" { $$.kind = ASM_LEAVE; } - / "ret" { $$.kind = ASM_RET; } - / i:jmp { $$ = i; } - / i:add { $$ = i; } - -jmp <- "jmp" ws i:ident - { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } - -add <- "add" 'q'? ws s:m ws? ',' ws? d:r64 - { $$.add = mkadd('q', s, d); } - / "add" 'q'? ws s:imm ws? ',' ws? d:r64 - { $$.add = mkadd('q', s, d); } - / "add" 'q'? ws s:r64 ws? ',' ws? d:m - { $$.add = mkadd('q', s, d); } - / "add" 'q'? ws s:r64 ws? ',' ws? d:r64 - { $$.add = mkadd('q', s, d); } - / "addq" ws s:imm ws? ',' ws? d:m - { $$.add = mkadd('q', s, d); } - -m <- '(' ws? r:r64 ws? ')' - { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind }; } - / <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')' - { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll($1, NULL, 10), .l = NULL, .reg = r.kind }; } - / i:ident ws? '(' ws? r:r64 ws? ')' - { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; } - -r64 <- "%rax" { $$.kind = ASM_RAX; } - / "%rcx" { $$.kind = ASM_RCX; } - / "%rdx" { $$.kind = ASM_RDX; } - / "%rbx" { $$.kind = ASM_RBX; } - / "%rsp" { $$.kind = ASM_RSP; } - / "%rbp" { $$.kind = ASM_RBP; } - / "%rsi" { $$.kind = ASM_RSI; } - / "%rdi" { $$.kind = ASM_RDI; } - / "%r8" { $$.kind = ASM_R8; } - / "%r9" { $$.kind = ASM_R9; } - / "%r10" { $$.kind = ASM_R10; } - / "%r11" { $$.kind = ASM_R11; } - / "%r12" { $$.kind = ASM_R12; } - / "%r13" { $$.kind = ASM_R13; } - / "%r14" { $$.kind = ASM_R14; } - / "%r15" { $$.kind = ASM_R15; } - -imm <- '$' i:ident - { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; } - / '$' <'-'?[0-9]+> - { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll($1, NULL, 10) }; } - -ident <- <[_a-zA-Z][_a-zA-Z0-9]*> - { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; } - -number <- <'-'?[0-9]+> - { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll($1, NULL, 10) }; } - -ws <- [ \t]+ - -eol <- ws? ("\n" / (! .)) - -%source { - -static Parsev *dupv(Parsev *p) { - Parsev *r = xmalloc(sizeof(Parsev)); - *r = *p; - return r; -} - -static Add mkadd(char t, Parsev s, Parsev d) { - return (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) }; -} - -}
\ No newline at end of file diff --git a/default.do b/default.do new file mode 100644 index 0000000..6787b8d --- /dev/null +++ b/default.do @@ -0,0 +1,28 @@ +#!/bin/sh +set -eu + +case "$1" in + *.o) + test "$1" = "main.o" && redo-ifchange asm.peg.inc + redo-ifchange "${1%.o}.c" minias.h + set -x + ${CC:- cc} ${CFLAGS:- -O -Og} -c -o "$3" "${1%.o}.c" + ;; + asm.peg.inc) + redo-ifchange asm.peg + set -x + leg asm.peg > "$3" + ;; + minias) + obj="main.o util.o " + redo-ifchange $obj + ${CC:- cc} ${LDFLAGS:-} -o "$3" $obj + ;; + fmt) + clang-format -i main.c util.c >&2 + ;; + *) + echo "don't know how to build $1" 2>&1 + exit 1 + ;; +esac
\ No newline at end of file @@ -1,4 +1,4 @@ -#include "dumbas.h" +#include "minias.h" static AsmLine *allasm = NULL; @@ -113,45 +113,43 @@ static void initsections(void) { text->hdr.sh_addralign = 4; } -static const char *dbg_str[] = {"Evaluating rule", "Matched rule", - "Abandoning rule"}; -#define PCC_DEBUG(event, rule, level, pos, buffer, length) \ - fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", (int)((level)*2), "", \ - dbg_str[event], rule, pos, (int)(length), buffer) +static Parsev *dupv(Parsev *p) { + Parsev *r = xmalloc(sizeof(Parsev)); + *r = *p; + return r; +} -#include "asmparser.c" // XXX resolve dependency cycle. +#define YYSTYPE Parsev +#define YY_CTX_LOCAL +#define YY_CTX_MEMBERS Parsev v; +#include "asm.peg.inc" void parse(void) { - int more; uint64_t lineno; - Parsev v; AsmLine *l, *prevl; - asmparser_context_t *ctx; + yycontext ctx; - ctx = asmparser_create(NULL); + memset(&ctx, 0, sizeof(yycontext)); prevl = NULL; lineno = 0; - do { - more = asmparser_parse(ctx, &v); + while (yyparse(&ctx)) { lineno += 1; - if (v.kind == ASM_SYNTAX_ERROR) { + if (ctx.v.kind == ASM_SYNTAX_ERROR) { fprintf(stderr, "<stdin>:%lu: syntax error\n", lineno); exit(1); } - if (v.kind == ASM_BLANK) + if (ctx.v.kind == ASM_BLANK) continue; l = zalloc(sizeof(AsmLine)); - l->v = v; + l->v = ctx.v; l->lineno = lineno; if (prevl) prevl->next = l; else allasm = l; prevl = l; - } while (more); - - asmparser_destroy(ctx); + } } /* Shorthand helpers to write section bytes. */ @@ -1,4 +1,4 @@ -#include "dumbas.h" +#include "minias.h" static void vwarn(const char *fmt, va_list ap) { vfprintf(stderr, fmt, ap); |
