diff options
| author | Andrew Chambers <[email protected]> | 2021-10-05 03:02:10 +1300 |
|---|---|---|
| committer | Andrew Chambers <[email protected]> | 2021-10-05 03:02:10 +1300 |
| commit | d8085a24dd755bf7b63d74be716efbfff6dd772d (patch) | |
| tree | 5c3a99000f0bdf3ccb6b4e5d73238f4dadd429c5 | |
| parent | 3cd70232589097fee7d05a647f473d77269d8e97 (diff) | |
Work on assembler.
| -rw-r--r-- | asmparser.peg | 77 | ||||
| -rw-r--r-- | dumbas.h | 71 | ||||
| -rw-r--r-- | main.c | 363 |
3 files changed, 226 insertions, 285 deletions
diff --git a/asmparser.peg b/asmparser.peg index 5d3d756..11dec46 100644 --- a/asmparser.peg +++ b/asmparser.peg @@ -2,7 +2,6 @@ %value "Parsev" - line <- s:stmt eol { $$ = s; } / eol { $$.kind = ASM_BLANK; } / . { $$.kind = ASM_SYNTAX_ERROR; } @@ -16,29 +15,39 @@ directive <- ".glob" "o"? "l" ws i:ident / ".data" { $$.kind = ASM_DIR_DATA; } / ".text" { $$.kind = ASM_DIR_TEXT; } / ".balign" ws n:number - { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.value }; } + { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; } / ".byte" ws n:number - { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.value }; } + { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; } + +label <- i:ident ':' + { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; } instr <- "nop"{ $$.kind = ASM_NOP; } / "leave" { $$.kind = ASM_LEAVE; } / "ret" { $$.kind = ASM_RET; } - / "jmp" ws i:ident - { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } - / "pushq" ws v:r64 - { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; } - / "pushq" ws v:imm - { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; } - / "movq" ws s:r64 ws? "," ws? d:r64 - { $$.movq = (Movq){.kind = ASM_MOVQ, .src = dupv(&s), .dst = dupv(&d)}; } - / "xorl" ws s:r32 ws? "," ws? d:r32 - { $$.xorl = (Xorl){.kind = ASM_XORL, .src = dupv(&s), .dst = dupv(&d)}; } + / i:jmp { $$ = i; } + / i:add { $$ = i; } -label <- i:ident ':' - { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; } +jmp <- "jmp" ws i:ident + { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } -imm <- '$' n:number { $$ = n; } - / '$' i:ident { $$ = i; } +add <- "add" 'q'? ws s:m ws? ',' ws? d:r64 + { $$.add = mkadd('q', s, d); } + / "add" 'q'? ws s:imm ws? ',' ws? d:r64 + { $$.add = mkadd('q', s, d); } + / "add" 'q'? ws s:r64 ws? ',' ws? d:m + { $$.add = mkadd('q', s, d); } + / "add" 'q'? ws s:r64 ws? ',' ws? d:r64 + { $$.add = mkadd('q', s, d); } + / "addq" ws s:imm ws? ',' ws? d:m + { $$.add = mkadd('q', s, d); } + +m <- '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind }; } + / <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll($1, NULL, 10), .l = NULL, .reg = r.kind }; } + / i:ident ws? '(' ws? r:r64 ws? ')' + { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; } r64 <- "%rax" { $$.kind = ASM_RAX; } / "%rcx" { $$.kind = ASM_RCX; } @@ -49,29 +58,31 @@ r64 <- "%rax" { $$.kind = ASM_RAX; } / "%rsi" { $$.kind = ASM_RSI; } / "%rdi" { $$.kind = ASM_RDI; } -r32 <- "%eax" { $$.kind = ASM_EAX; } - / "%ecx" { $$.kind = ASM_ECX; } - / "%edx" { $$.kind = ASM_EDX; } - / "%ebx" { $$.kind = ASM_EBX; } - / "%esp" { $$.kind = ASM_ESP; } - / "%ebp" { $$.kind = ASM_EBP; } - / "%esi" { $$.kind = ASM_ESI; } - / "%edi" { $$.kind = ASM_EDI; } - -number <- <[0-9]+> - { $$.number = (Number){ .kind = ASM_NUMBER, .value = strtoll($1, NULL, 10) }; } +imm <- '$' i:ident + { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; } + / '$' <'-'?[0-9]+> + { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll($1, NULL, 10) }; } ident <- <[_a-zA-Z][_a-zA-Z0-9]*> { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; } +number <- <'-'?[0-9]+> + { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll($1, NULL, 10) }; } + ws <- [ \t]+ eol <- ws? ("\n" / (! .)) %source { - Parsev *dupv(Parsev *p) { - Parsev *r = xmalloc(sizeof(Parsev)); - *r = *p; - return r; - } + +static Parsev *dupv(Parsev *p) { + Parsev *r = xmalloc(sizeof(Parsev)); + *r = *p; + return r; +} + +static Add mkadd(char t, Parsev s, Parsev d) { + return (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) }; +} + }
\ No newline at end of file @@ -21,10 +21,9 @@ typedef struct { typedef struct { const char *name; - int64_t wco; int64_t offset; int64_t size; - int global : 1; + int global; Section *section; } Symbol; @@ -33,8 +32,10 @@ typedef enum { ASM_SYNTAX_ERROR, ASM_BLANK, ASM_LABEL, + ASM_IMM, ASM_IDENT, ASM_NUMBER, + ASM_MEMARG, // Directives ASM_DIR_GLOBL, ASM_DIR_DATA, @@ -46,9 +47,7 @@ typedef enum { ASM_RET, ASM_JMP, ASM_LEAVE, - ASM_PUSHQ, - ASM_MOVQ, - ASM_XORL, + ASM_ADD, // Registers, order matters. ASM_EAX, ASM_ECX, @@ -68,40 +67,10 @@ typedef enum { ASM_RDI, } AsmKind; -static int isr64kind(AsmKind k) { - return k >= ASM_RAX && k <= ASM_RDI; -} - -static int isr32kind(AsmKind k) { - return k >= ASM_EAX && k <= ASM_EDI; -} - typedef union Parsev Parsev; typedef struct { AsmKind kind; - const char *target; -} Jmp; - -typedef struct { - AsmKind kind; - Parsev *arg; -} Pushq; - -typedef struct { - AsmKind kind; - Parsev *src; - Parsev *dst; -} Movq; - -typedef struct { - AsmKind kind; - Parsev *src; - Parsev *dst; -} Xorl; - -typedef struct { - AsmKind kind; const char *name; } Label; @@ -122,29 +91,49 @@ typedef struct { typedef struct { AsmKind kind; - int64_t imm; + const char *l; /* label */ + int64_t c; /* constant */ } Imm; typedef struct { AsmKind kind; + AsmKind reg; + const char *l; /* label */ + int64_t c; /* constant */ +} Memarg; + +typedef struct { + AsmKind kind; const char *name; } Ident; typedef struct { AsmKind kind; - int64_t value; + int64_t v; } Number; +typedef struct { + AsmKind kind; + const char *target; +} Jmp; + +typedef struct { + AsmKind kind; + char type; + Parsev *src; + Parsev *dst; +} Add; + union Parsev { AsmKind kind; Label label; Globl globl; Balign balign; + Memarg memarg; + Add add; Jmp jmp; - Pushq pushq; - Movq movq; - Xorl xorl; Byte byte; + Imm imm; Ident ident; Number number; }; @@ -152,7 +141,6 @@ union Parsev { typedef struct AsmLine AsmLine; struct AsmLine { int64_t lineno; - int64_t wco; // Worst case offset Parsev v; AsmLine *next; }; @@ -168,7 +156,6 @@ char *xmemdup(const char *, size_t); char *xstrdup(const char *s); void *zalloc(size_t n); - struct hashtable { size_t len, cap; struct hashtablekey *keys; @@ -1,15 +1,16 @@ #include "dumbas.h" -AsmLine *allasm = NULL; +static AsmLine *allasm = NULL; // Symbols in memory before // writing out the symtab section. -struct hashtable *symbols = NULL; +static struct hashtable *symbols = NULL; #define MAXSECTIONS 32 static Section sections[MAXSECTIONS]; static size_t nsections = 1; // first is reserved. +static Section *cursection; static Section *shstrtab = NULL; static Section *strtab = NULL; static Section *symtab = NULL; @@ -112,47 +113,11 @@ static void initsections(void) { text->hdr.sh_addralign = 4; } -static void out(uint8_t *buf, size_t n) { - if (write(STDOUT_FILENO, buf, n) != n) - fatal("io error"); -} - -static void outelf(void) { - size_t i; - uint64_t offset; - Elf64_Ehdr ehdr = {0}; - - ehdr.e_ident[0] = 0x7f; - ehdr.e_ident[1] = 'E'; - ehdr.e_ident[2] = 'L'; - ehdr.e_ident[3] = 'F'; - ehdr.e_ident[4] = ELFCLASS64; - ehdr.e_ident[5] = ELFDATA2LSB; - ehdr.e_ident[6] = 1; - ehdr.e_type = ET_REL; - ehdr.e_machine = EM_X86_64; - ehdr.e_flags = 0; - ehdr.e_version = 1; - ehdr.e_ehsize = sizeof(Elf64_Ehdr); - ehdr.e_shoff = sizeof(Elf64_Ehdr); - ehdr.e_shentsize = sizeof(Elf64_Shdr); - ehdr.e_shnum = nsections; - ehdr.e_shstrndx = 1; - - out((uint8_t *)&ehdr, sizeof(ehdr)); - offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; - - for (i = 0; i < nsections; i++) { - sections[i].hdr.sh_offset = offset; - out((uint8_t *)§ions[i].hdr, sizeof(Elf64_Shdr)); - offset += sections[i].hdr.sh_size; - } - for (i = 0; i < nsections; i++) { - if (sections[i].hdr.sh_type == SHT_NOBITS) - continue; - out(sections[i].data, sections[i].hdr.sh_size); - } -} +static const char *dbg_str[] = {"Evaluating rule", "Matched rule", + "Abandoning rule"}; +#define PCC_DEBUG(event, rule, level, pos, buffer, length) \ + fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", (int)((level)*2), "", \ + dbg_str[event], rule, pos, (int)(length), buffer) #include "asmparser.c" // XXX resolve dependency cycle. @@ -189,25 +154,56 @@ void parse(void) { asmparser_destroy(ctx); } -/* - First pass deals with finding the symbol information - and computing the worst case offsets for each instruction - and symbol. -*/ -static void prepass(void) { +/* Shorthand helpers to write section bytes. */ + +static void sb(uint8_t b) { secaddbyte(cursection, b); } + +static void sb2(uint8_t b1, uint8_t b2) { + uint8_t buf[2] = {b1, b2}; + secaddbytes(cursection, buf, sizeof(buf)); +} + +static void sb3(uint8_t b1, uint8_t b2, uint8_t b3) { + uint8_t buf[3] = {b1, b2, b3}; + secaddbytes(cursection, buf, sizeof(buf)); +} + +static void sb4(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4) { + uint8_t buf[4] = {b1, b2, b3, b4}; + secaddbytes(cursection, buf, sizeof(buf)); +} + +static void sw(uint32_t w) { + uint8_t buf[4] = {w & 0xff, (w & 0xff00) >> 8, (w & 0xff0000) >> 16, + (w & 0xff0000) >> 24}; + secaddbytes(cursection, buf, sizeof(buf)); +} + +/* Compose a ModR/M byte. */ +static uint8_t modrm(uint8_t mod, uint8_t regop, uint8_t rm) { + return (mod << 6) + (regop << 3) + rm; +} + +/* Convert an ASM_KIND to register bits */ +static uint8_t r64bits(AsmKind k) { return (k - ASM_RAX) & 0xff; } + +static uint8_t r32bits(AsmKind k) { return (k - ASM_EAX) & 0xff; } + +#define REX(W, R, X, B) \ + ((1 << 6) | ((W) << 3) | ((R) << 2) | ((X) << 1) | ((B) << 0)) +#define REX_W REX(1, 0, 0, 0) + +static void assemble() { Symbol *sym; Parsev *v; AsmLine *l; - Section *cursection; const char *label; - struct hashtablekey htk; cursection = text; for (l = allasm; l; l = l->next) { v = &l->v; - l->wco = cursection->wco; - switch (v->kind) { + switch (l->v.kind) { case ASM_DIR_GLOBL: label = v->globl.name; sym = getsym(label); @@ -219,47 +215,86 @@ static void prepass(void) { case ASM_DIR_TEXT: cursection = text; break; - case ASM_DIR_BALIGN: - cursection->wco += v->balign.align - 1; + case ASM_DIR_BALIGN: { + int64_t i, rem, amnt; + amnt = 0; + rem = cursection->hdr.sh_size % v->balign.align; + if (rem) + amnt = v->balign.align - rem; + for (i = 0; i < amnt; i++) { + sb(0x00); + } + break; + } + case ASM_DIR_BYTE: + sb(v->byte.b); break; case ASM_LABEL: label = v->label.name; sym = getsym(label); - sym->section = cursection; - sym->wco = cursection->wco; + sym->offset = cursection->hdr.sh_size; break; - case ASM_DIR_BYTE: case ASM_NOP: + sb(0x90); + break; case ASM_LEAVE: + sb(0xc9); + break; case ASM_RET: - cursection->wco += 1; + sb(0xc3); + break; + case ASM_ADD: { + Add *add = &v->add; + + switch (add->type) { + case 'q': + switch (add->src->kind) { + case ASM_IMM: + fatal("TODO"); + break; + case ASM_MEMARG: + fatal("TODO"); + break; + default: + switch (add->src->kind) { + case ASM_IMM: + fatal("TODO"); + break; + case ASM_MEMARG: + fatal("TODO"); + break; + default: + sb3(REX_W, 0x03, + modrm(0x03, r64bits(add->dst->kind), r64bits(add->src->kind))); + break; + } + break; + } + break; + default: + fatal("unknown type"); + } break; - case ASM_XORL: - if (isr32kind(v->movq.src->kind) && isr32kind(v->movq.dst->kind)) { - cursection->wco += 2; - } else { - cursection->wco += 15; // XXX pessimistic. } - break; - case ASM_MOVQ: - if (isr64kind(v->movq.src->kind) && isr64kind(v->movq.dst->kind)) { - cursection->wco += 3; + case ASM_JMP: { + int64_t distance; + + sym = getsym(v->jmp.target); + if (sym->section && (sym->section == cursection)) { + distance = sym->offset - cursection->hdr.sh_size; } else { - cursection->wco += 15; // XXX pessimistic. + distance = 0x7fffffff; // XXX } - break; - case ASM_PUSHQ: - if (isr64kind(v->pushq.arg->kind)) { - cursection->wco += 2; + if (distance <= 128 && distance >= -127) { + sb2(0xeb, (uint8_t)distance); } else { - cursection->wco += 9; // XXX pessimistic. + sb(0xe9); + sw((uint32_t)distance); } break; - case ASM_JMP: - cursection->wco += 5; - break; + } default: - fatal("prepass: unexpected kind: %d", v->kind); + fatal("assemble: unexpected kind: %d", l->v.kind); } } } @@ -300,153 +335,61 @@ static void fillsymtab(void) { } } -#define MODREGI 0x3 -#define REX_W 0x48 - -static uint8_t kindr64bits(AsmKind k) { - return (k - ASM_RAX) & 0xff; -} - -static uint8_t kindr32bits(AsmKind k) { - return (k - ASM_EAX) & 0xff; -} +FILE *outf = NULL; -static uint8_t composemodrm(uint8_t mod, uint8_t regop, uint8_t rm) { - return (mod<<6) + (regop<<3) + rm; +static void out(uint8_t *buf, size_t n) { + fwrite(buf, 1, n, outf); + if (ferror(outf)) + fatal("fwrite:"); } +static void outelf(void) { + size_t i; + uint64_t offset; + Elf64_Ehdr ehdr = {0}; -static void assemble() { - Symbol *sym; - Parsev *v; - AsmLine *l; - Section *cursection; - const char *label; - - cursection = text; + ehdr.e_ident[0] = 0x7f; + ehdr.e_ident[1] = 'E'; + ehdr.e_ident[2] = 'L'; + ehdr.e_ident[3] = 'F'; + ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[5] = ELFDATA2LSB; + ehdr.e_ident[6] = 1; + ehdr.e_type = ET_REL; + ehdr.e_machine = EM_X86_64; + ehdr.e_flags = 0; + ehdr.e_version = 1; + ehdr.e_ehsize = sizeof(Elf64_Ehdr); + ehdr.e_shoff = sizeof(Elf64_Ehdr); + ehdr.e_shentsize = sizeof(Elf64_Shdr); + ehdr.e_shnum = nsections; + ehdr.e_shstrndx = 1; - for (l = allasm; l; l = l->next) { - v = &l->v; - switch (l->v.kind) { - case ASM_DIR_GLOBL: - break; - case ASM_DIR_DATA: - cursection = data; - break; - case ASM_DIR_TEXT: - cursection = text; - break; - case ASM_DIR_BALIGN: { - int64_t i, rem, amnt; - amnt = 0; - rem = cursection->hdr.sh_size % v->balign.align; - if (rem) - amnt = v->balign.align - rem; - for (i = 0; i < amnt; i++) { - secaddbyte(cursection, 0x00); - } - break; - } - case ASM_DIR_BYTE: - secaddbyte(cursection, v->byte.b); - break; - case ASM_LABEL: - label = v->label.name; - sym = getsym(label); - sym->offset = cursection->hdr.sh_size; - break; - case ASM_NOP: - secaddbyte(cursection, 0x90); - break; - case ASM_LEAVE: - secaddbyte(cursection, 0xc9); - break; - case ASM_RET: - secaddbyte(cursection, 0xc3); - break; - case ASM_XORL: { - Parsev *src, *dst; - - src = v->movq.src; - dst = v->movq.dst; - - if (isr32kind(src->kind) && isr32kind(dst->kind)) { - uint8_t ibuf[2] = { - 0x31, - composemodrm(MODREGI, kindr32bits(src->kind), kindr32bits(dst->kind)), - }; - secaddbytes(cursection, ibuf, sizeof(ibuf)); - } else { - fatal("TODO"); - } - break; - } - case ASM_MOVQ: { - Parsev *src, *dst; - - src = v->movq.src; - dst = v->movq.dst; - - if (isr64kind(src->kind) && isr64kind(dst->kind)) { - uint8_t ibuf[3] = { - REX_W, - 0x89, - composemodrm(MODREGI, kindr64bits(src->kind), kindr64bits(dst->kind)), - }; - secaddbytes(cursection, ibuf, sizeof(ibuf)); - } else { - fatal("TODO"); - } - break; - } - case ASM_PUSHQ: { - Parsev *arg; - - arg = v->pushq.arg; - - if (isr64kind(arg->kind)) { - uint8_t ibuf[2] = {0x50, kindr64bits(arg->kind)}; - secaddbytes(cursection, ibuf, sizeof(ibuf)); - } else if (arg->kind == ASM_NUMBER) { - fatal("TODO"); - } else if (arg->kind == ASM_IDENT) { - fatal("TODO"); - } else { - fatal("BUG: unexpected pushq arg"); - } + out((uint8_t *)&ehdr, sizeof(ehdr)); + offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; - break; - } - case ASM_JMP: { - sym = getsym(v->jmp.target); - if (sym->section && (sym->section == cursection)) { - int64_t distance; - distance = sym->wco - cursection->wco; - if (distance <= 128 && distance >= -127) { - uint8_t ibuf[2] = {0xeb, 0x00}; - secaddbytes(cursection, ibuf, sizeof(ibuf)); - } else { - uint8_t ibuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00}; - secaddbytes(cursection, ibuf, sizeof(ibuf)); - } - } else { - fatal("TODO, jmp to undefined symbol"); - } - break; - } - default: - fatal("assemble: unexpected kind: %d", l->v.kind); - } + for (i = 0; i < nsections; i++) { + sections[i].hdr.sh_offset = offset; + out((uint8_t *)§ions[i].hdr, sizeof(Elf64_Shdr)); + offset += sections[i].hdr.sh_size; + } + for (i = 0; i < nsections; i++) { + if (sections[i].hdr.sh_type == SHT_NOBITS) + continue; + out(sections[i].data, sections[i].hdr.sh_size); } } int main(void) { symbols = mkhtab(256); + outf = stdout; + initsections(); parse(); - prepass(); assemble(); fillsymtab(); outelf(); + if (fflush(outf) != 0) + fatal("fflush:"); return 0; }
\ No newline at end of file |
