diff options
| author | Andrew Chambers <[email protected]> | 2021-10-03 21:35:28 +1300 |
|---|---|---|
| committer | Andrew Chambers <[email protected]> | 2021-10-03 21:35:28 +1300 |
| commit | 7102258076d80765df63d47f2117d2cd9fa3aa41 (patch) | |
| tree | 40005bb113225beff16e4b702ade488ef4331b2b | |
| parent | 204f57da606af1aa788335504f3d1f126a432d96 (diff) | |
More instructions.
| -rw-r--r-- | asmparser.peg | 56 | ||||
| -rw-r--r-- | dumbas.h | 88 | ||||
| -rw-r--r-- | main.c | 74 |
3 files changed, 181 insertions, 37 deletions
diff --git a/asmparser.peg b/asmparser.peg index 2accd8f..8f9eb92 100644 --- a/asmparser.peg +++ b/asmparser.peg @@ -2,6 +2,7 @@ %value "Parsev" + line <- s:stmt eol { $$ = s; } / eol { $$.kind = ASM_BLANK; } / . { $$.kind = ASM_SYNTAX_ERROR; } @@ -11,23 +12,54 @@ stmt <- d:directive {$$ = d;} / l:label { $$ = l; } directive <- ".glob" "o"? "l" ws i:ident - { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident }; } - / ".data" - { $$.kind = ASM_DIR_DATA; } - / ".text" - { $$.kind = ASM_DIR_TEXT; } + { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; } + / ".data" { $$.kind = ASM_DIR_DATA; } + / ".text" { $$.kind = ASM_DIR_TEXT; } / ".byte" ws n:number - { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number }; } + { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.value }; } -instr <- "nop" { $$.instr = (Instr){.kind = ASM_NOP }; } - / "ret" { $$.instr = (Instr){.kind = ASM_RET }; } +instr <- "nop"{ $$.kind = ASM_NOP; } + / "leave" { $$.kind = ASM_LEAVE; } + / "ret" { $$.kind = ASM_RET; } / "jmp" ws i:ident - { $$.instr = (Instr){.kind = ASM_JMP, .jmp.target = i.ident}; } + { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } + / "pushq" ws v:r64 + { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; } + / "pushq" ws v:imm + { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; } + / "movq" ws s:r64 ws? "," ws? d:r64 + { $$.movq = (Movq){.kind = ASM_MOVQ, .src = dupv(&s), .dst = dupv(&d)}; } label <- i:ident ':' - { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident}; } + { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; } + +imm <- '$' n:number { $$ = n; } + / '$' i:ident { $$ = i; } + +r64 <- "%rax" { $$.kind = ASM_RAX; } + / "%rcx" { $$.kind = ASM_RCX; } + / "%rdx" { $$.kind = ASM_RDX; } + / "%rbx" { $$.kind = ASM_RBX; } + / "%rsp" { $$.kind = ASM_RSP; } + / "%rbp" { $$.kind = ASM_RBP; } + / "%rsi" { $$.kind = ASM_RSI; } + / "%rdi" { $$.kind = ASM_RDI; } + + +number <- <[0-9]+> + { $$.number = (Number){ .kind = ASM_NUMBER, .value = strtoll($1, NULL, 10) }; } + +ident <- <[_a-zA-Z][_a-zA-Z0-9]*> + { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; } ws <- [ \t]+ + eol <- ws? ("\n" / (! .)) -number <- <[0-9]+> { $$.number = strtoll($1, NULL, 10); } -ident <- <[_a-zA-Z][_a-zA-Z0-9]*> { $$.ident = xstrdup($1); } + +%source { + Parsev *dupv(Parsev *p) { + Parsev *r = xmalloc(sizeof(Parsev)); + *r = *p; + return r; + } +}
\ No newline at end of file @@ -28,52 +28,100 @@ typedef struct { Section *section; } Symbol; -enum AsmKind { +typedef enum { + // Misc ASM_SYNTAX_ERROR, ASM_BLANK, + ASM_LABEL, + ASM_IDENT, + ASM_NUMBER, + // Directives ASM_DIR_GLOBL, ASM_DIR_DATA, ASM_DIR_TEXT, ASM_DIR_BYTE, - ASM_LABEL, + // Instructions ASM_NOP, ASM_RET, - ASM_JMP -}; + ASM_JMP, + ASM_LEAVE, + ASM_PUSHQ, + ASM_MOVQ, + // Registers, order matters. + ASM_RAX, + ASM_RCX, + ASM_RDX, + ASM_RBX, + ASM_RSP, + ASM_RBP, + ASM_RSI, + ASM_RDI, + +} AsmKind; + +static int isr64kind(AsmKind k) { + return k >= ASM_RAX && k <= ASM_RDI; +} + +typedef union Parsev Parsev; + +typedef struct { + AsmKind kind; + const char *target; +} Jmp; + +typedef struct { + AsmKind kind; + Parsev *arg; +} Pushq; typedef struct { - enum AsmKind kind; - union { - struct { - const char *target; - } jmp; - }; -} Instr; + AsmKind kind; + Parsev *src; + Parsev *dst; +} Movq; typedef struct { - enum AsmKind kind; + AsmKind kind; const char *name; } Label; typedef struct { - enum AsmKind kind; + AsmKind kind; const char *name; } Globl; typedef struct { - enum AsmKind kind; + AsmKind kind; uint8_t b; } Byte; -typedef union { - enum AsmKind kind; - Instr instr; +typedef struct { + AsmKind kind; + int64_t imm; +} Imm; + +typedef struct { + AsmKind kind; + const char *name; +} Ident; + +typedef struct { + AsmKind kind; + int64_t value; +} Number; + +union Parsev { + AsmKind kind; Label label; Globl globl; + Jmp jmp; + Pushq pushq; + Movq movq; Byte byte; - const char *ident; - int64_t number; -} Parsev; + Ident ident; + Number number; +}; typedef struct AsmLine AsmLine; struct AsmLine { @@ -230,6 +230,20 @@ static void prepass(void) { case ASM_RET: cursection->wco += 1; break; + case ASM_MOVQ: + if (isr64kind(v->movq.src->kind) && isr64kind(v->movq.dst->kind)) { + cursection->wco += 2; + } else { + cursection->wco += 16; // XXX likely wrong. + } + break; + case ASM_PUSHQ: + if (isr64kind(v->pushq.arg->kind)) { + cursection->wco += 2; + } else { + cursection->wco += 9; // XXX very pessimistic. + } + break; case ASM_JMP: cursection->wco += 5; break; @@ -275,6 +289,18 @@ static void fillsymtab(void) { } } +#define MODREGI 0x3 +#define REX_W 0x48 + +static uint8_t kindr64bits(AsmKind k) { + return (k - ASM_RAX) & 0xff; +} + +static uint8_t composemodrm(uint8_t mod, uint8_t regop, uint8_t rm) { + return (mod<<6) + (regop<<3) + rm; +} + + static void assemble() { Symbol *sym; Parsev *v; @@ -309,17 +335,55 @@ static void assemble() { case ASM_RET: secaddbyte(cursection, 0xc3); break; + case ASM_PUSHQ: { + Parsev *arg; + + arg = v->pushq.arg; + + if (isr64kind(arg->kind)) { + uint8_t ibuf[2] = {0x50, kindr64bits(arg->kind)}; + secaddbytes(cursection, ibuf, sizeof(ibuf)); + } else if (arg->kind == ASM_NUMBER) { + fatal("TODO"); + } else if (arg->kind == ASM_IDENT) { + fatal("TODO"); + } else { + fatal("BUG: unexpected pushq arg"); + } + + break; + } + + case ASM_MOVQ: { + Parsev *src, *dst; + + src = v->movq.src; + dst = v->movq.dst; + + if (isr64kind(src->kind) && isr64kind(dst->kind)) { + uint8_t ibuf[3] = { + REX_W, + 0x89, + composemodrm(MODREGI, kindr64bits(src->kind), kindr64bits(dst->kind)), + }; + secaddbytes(cursection, ibuf, sizeof(ibuf)); + } else { + fatal("TODO"); + } + break; + } + case ASM_JMP: { - sym = getsym(v->instr.jmp.target); + sym = getsym(v->jmp.target); if (sym->section && (sym->section == cursection)) { int64_t distance; distance = sym->wco - cursection->wco; if (distance <= 128 && distance >= -127) { - uint8_t jbuf[2] = {0xeb, 0x00}; - secaddbytes(cursection, jbuf, sizeof(jbuf)); + uint8_t ibuf[2] = {0xeb, 0x00}; + secaddbytes(cursection, ibuf, sizeof(ibuf)); } else { - uint8_t jbuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00}; - secaddbytes(cursection, jbuf, sizeof(jbuf)); + uint8_t ibuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00}; + secaddbytes(cursection, ibuf, sizeof(ibuf)); } } else { fatal("TODO, jmp to undefined symbol"); |
