aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-05 03:02:10 +1300
committerAndrew Chambers <[email protected]>2021-10-05 03:02:10 +1300
commitd8085a24dd755bf7b63d74be716efbfff6dd772d (patch)
tree5c3a99000f0bdf3ccb6b4e5d73238f4dadd429c5
parent3cd70232589097fee7d05a647f473d77269d8e97 (diff)
Work on assembler.
-rw-r--r--asmparser.peg77
-rw-r--r--dumbas.h71
-rw-r--r--main.c363
3 files changed, 226 insertions, 285 deletions
diff --git a/asmparser.peg b/asmparser.peg
index 5d3d756..11dec46 100644
--- a/asmparser.peg
+++ b/asmparser.peg
@@ -2,7 +2,6 @@
%value "Parsev"
-
line <- s:stmt eol { $$ = s; }
/ eol { $$.kind = ASM_BLANK; }
/ . { $$.kind = ASM_SYNTAX_ERROR; }
@@ -16,29 +15,39 @@ directive <- ".glob" "o"? "l" ws i:ident
/ ".data" { $$.kind = ASM_DIR_DATA; }
/ ".text" { $$.kind = ASM_DIR_TEXT; }
/ ".balign" ws n:number
- { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.value }; }
+ { $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.number.v }; }
/ ".byte" ws n:number
- { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.value }; }
+ { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.v }; }
+
+label <- i:ident ':'
+ { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; }
instr <- "nop"{ $$.kind = ASM_NOP; }
/ "leave" { $$.kind = ASM_LEAVE; }
/ "ret" { $$.kind = ASM_RET; }
- / "jmp" ws i:ident
- { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
- / "pushq" ws v:r64
- { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; }
- / "pushq" ws v:imm
- { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; }
- / "movq" ws s:r64 ws? "," ws? d:r64
- { $$.movq = (Movq){.kind = ASM_MOVQ, .src = dupv(&s), .dst = dupv(&d)}; }
- / "xorl" ws s:r32 ws? "," ws? d:r32
- { $$.xorl = (Xorl){.kind = ASM_XORL, .src = dupv(&s), .dst = dupv(&d)}; }
+ / i:jmp { $$ = i; }
+ / i:add { $$ = i; }
-label <- i:ident ':'
- { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; }
+jmp <- "jmp" ws i:ident
+ { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
-imm <- '$' n:number { $$ = n; }
- / '$' i:ident { $$ = i; }
+add <- "add" 'q'? ws s:m ws? ',' ws? d:r64
+ { $$.add = mkadd('q', s, d); }
+ / "add" 'q'? ws s:imm ws? ',' ws? d:r64
+ { $$.add = mkadd('q', s, d); }
+ / "add" 'q'? ws s:r64 ws? ',' ws? d:m
+ { $$.add = mkadd('q', s, d); }
+ / "add" 'q'? ws s:r64 ws? ',' ws? d:r64
+ { $$.add = mkadd('q', s, d); }
+ / "addq" ws s:imm ws? ',' ws? d:m
+ { $$.add = mkadd('q', s, d); }
+
+m <- '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = NULL, .reg = r.kind }; }
+ / <'-'?[0-9]+> ws? '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = strtoll($1, NULL, 10), .l = NULL, .reg = r.kind }; }
+ / i:ident ws? '(' ws? r:r64 ws? ')'
+ { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; }
r64 <- "%rax" { $$.kind = ASM_RAX; }
/ "%rcx" { $$.kind = ASM_RCX; }
@@ -49,29 +58,31 @@ r64 <- "%rax" { $$.kind = ASM_RAX; }
/ "%rsi" { $$.kind = ASM_RSI; }
/ "%rdi" { $$.kind = ASM_RDI; }
-r32 <- "%eax" { $$.kind = ASM_EAX; }
- / "%ecx" { $$.kind = ASM_ECX; }
- / "%edx" { $$.kind = ASM_EDX; }
- / "%ebx" { $$.kind = ASM_EBX; }
- / "%esp" { $$.kind = ASM_ESP; }
- / "%ebp" { $$.kind = ASM_EBP; }
- / "%esi" { $$.kind = ASM_ESI; }
- / "%edi" { $$.kind = ASM_EDI; }
-
-number <- <[0-9]+>
- { $$.number = (Number){ .kind = ASM_NUMBER, .value = strtoll($1, NULL, 10) }; }
+imm <- '$' i:ident
+ { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; }
+ / '$' <'-'?[0-9]+>
+ { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll($1, NULL, 10) }; }
ident <- <[_a-zA-Z][_a-zA-Z0-9]*>
{ $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; }
+number <- <'-'?[0-9]+>
+ { $$.number = (Number){ .kind = ASM_NUMBER, .v = strtoll($1, NULL, 10) }; }
+
ws <- [ \t]+
eol <- ws? ("\n" / (! .))
%source {
- Parsev *dupv(Parsev *p) {
- Parsev *r = xmalloc(sizeof(Parsev));
- *r = *p;
- return r;
- }
+
+static Parsev *dupv(Parsev *p) {
+ Parsev *r = xmalloc(sizeof(Parsev));
+ *r = *p;
+ return r;
+}
+
+static Add mkadd(char t, Parsev s, Parsev d) {
+ return (Add){ .kind = ASM_ADD, .type = 'q', .src = dupv(&s), .dst = dupv(&d) };
+}
+
} \ No newline at end of file
diff --git a/dumbas.h b/dumbas.h
index 7bb489d..8fd023c 100644
--- a/dumbas.h
+++ b/dumbas.h
@@ -21,10 +21,9 @@ typedef struct {
typedef struct {
const char *name;
- int64_t wco;
int64_t offset;
int64_t size;
- int global : 1;
+ int global;
Section *section;
} Symbol;
@@ -33,8 +32,10 @@ typedef enum {
ASM_SYNTAX_ERROR,
ASM_BLANK,
ASM_LABEL,
+ ASM_IMM,
ASM_IDENT,
ASM_NUMBER,
+ ASM_MEMARG,
// Directives
ASM_DIR_GLOBL,
ASM_DIR_DATA,
@@ -46,9 +47,7 @@ typedef enum {
ASM_RET,
ASM_JMP,
ASM_LEAVE,
- ASM_PUSHQ,
- ASM_MOVQ,
- ASM_XORL,
+ ASM_ADD,
// Registers, order matters.
ASM_EAX,
ASM_ECX,
@@ -68,40 +67,10 @@ typedef enum {
ASM_RDI,
} AsmKind;
-static int isr64kind(AsmKind k) {
- return k >= ASM_RAX && k <= ASM_RDI;
-}
-
-static int isr32kind(AsmKind k) {
- return k >= ASM_EAX && k <= ASM_EDI;
-}
-
typedef union Parsev Parsev;
typedef struct {
AsmKind kind;
- const char *target;
-} Jmp;
-
-typedef struct {
- AsmKind kind;
- Parsev *arg;
-} Pushq;
-
-typedef struct {
- AsmKind kind;
- Parsev *src;
- Parsev *dst;
-} Movq;
-
-typedef struct {
- AsmKind kind;
- Parsev *src;
- Parsev *dst;
-} Xorl;
-
-typedef struct {
- AsmKind kind;
const char *name;
} Label;
@@ -122,29 +91,49 @@ typedef struct {
typedef struct {
AsmKind kind;
- int64_t imm;
+ const char *l; /* label */
+ int64_t c; /* constant */
} Imm;
typedef struct {
AsmKind kind;
+ AsmKind reg;
+ const char *l; /* label */
+ int64_t c; /* constant */
+} Memarg;
+
+typedef struct {
+ AsmKind kind;
const char *name;
} Ident;
typedef struct {
AsmKind kind;
- int64_t value;
+ int64_t v;
} Number;
+typedef struct {
+ AsmKind kind;
+ const char *target;
+} Jmp;
+
+typedef struct {
+ AsmKind kind;
+ char type;
+ Parsev *src;
+ Parsev *dst;
+} Add;
+
union Parsev {
AsmKind kind;
Label label;
Globl globl;
Balign balign;
+ Memarg memarg;
+ Add add;
Jmp jmp;
- Pushq pushq;
- Movq movq;
- Xorl xorl;
Byte byte;
+ Imm imm;
Ident ident;
Number number;
};
@@ -152,7 +141,6 @@ union Parsev {
typedef struct AsmLine AsmLine;
struct AsmLine {
int64_t lineno;
- int64_t wco; // Worst case offset
Parsev v;
AsmLine *next;
};
@@ -168,7 +156,6 @@ char *xmemdup(const char *, size_t);
char *xstrdup(const char *s);
void *zalloc(size_t n);
-
struct hashtable {
size_t len, cap;
struct hashtablekey *keys;
diff --git a/main.c b/main.c
index 68b5bdd..c3fdd29 100644
--- a/main.c
+++ b/main.c
@@ -1,15 +1,16 @@
#include "dumbas.h"
-AsmLine *allasm = NULL;
+static AsmLine *allasm = NULL;
// Symbols in memory before
// writing out the symtab section.
-struct hashtable *symbols = NULL;
+static struct hashtable *symbols = NULL;
#define MAXSECTIONS 32
static Section sections[MAXSECTIONS];
static size_t nsections = 1; // first is reserved.
+static Section *cursection;
static Section *shstrtab = NULL;
static Section *strtab = NULL;
static Section *symtab = NULL;
@@ -112,47 +113,11 @@ static void initsections(void) {
text->hdr.sh_addralign = 4;
}
-static void out(uint8_t *buf, size_t n) {
- if (write(STDOUT_FILENO, buf, n) != n)
- fatal("io error");
-}
-
-static void outelf(void) {
- size_t i;
- uint64_t offset;
- Elf64_Ehdr ehdr = {0};
-
- ehdr.e_ident[0] = 0x7f;
- ehdr.e_ident[1] = 'E';
- ehdr.e_ident[2] = 'L';
- ehdr.e_ident[3] = 'F';
- ehdr.e_ident[4] = ELFCLASS64;
- ehdr.e_ident[5] = ELFDATA2LSB;
- ehdr.e_ident[6] = 1;
- ehdr.e_type = ET_REL;
- ehdr.e_machine = EM_X86_64;
- ehdr.e_flags = 0;
- ehdr.e_version = 1;
- ehdr.e_ehsize = sizeof(Elf64_Ehdr);
- ehdr.e_shoff = sizeof(Elf64_Ehdr);
- ehdr.e_shentsize = sizeof(Elf64_Shdr);
- ehdr.e_shnum = nsections;
- ehdr.e_shstrndx = 1;
-
- out((uint8_t *)&ehdr, sizeof(ehdr));
- offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
-
- for (i = 0; i < nsections; i++) {
- sections[i].hdr.sh_offset = offset;
- out((uint8_t *)&sections[i].hdr, sizeof(Elf64_Shdr));
- offset += sections[i].hdr.sh_size;
- }
- for (i = 0; i < nsections; i++) {
- if (sections[i].hdr.sh_type == SHT_NOBITS)
- continue;
- out(sections[i].data, sections[i].hdr.sh_size);
- }
-}
+static const char *dbg_str[] = {"Evaluating rule", "Matched rule",
+ "Abandoning rule"};
+#define PCC_DEBUG(event, rule, level, pos, buffer, length) \
+ fprintf(stderr, "%*s%s %s @%zu [%.*s]\n", (int)((level)*2), "", \
+ dbg_str[event], rule, pos, (int)(length), buffer)
#include "asmparser.c" // XXX resolve dependency cycle.
@@ -189,25 +154,56 @@ void parse(void) {
asmparser_destroy(ctx);
}
-/*
- First pass deals with finding the symbol information
- and computing the worst case offsets for each instruction
- and symbol.
-*/
-static void prepass(void) {
+/* Shorthand helpers to write section bytes. */
+
+static void sb(uint8_t b) { secaddbyte(cursection, b); }
+
+static void sb2(uint8_t b1, uint8_t b2) {
+ uint8_t buf[2] = {b1, b2};
+ secaddbytes(cursection, buf, sizeof(buf));
+}
+
+static void sb3(uint8_t b1, uint8_t b2, uint8_t b3) {
+ uint8_t buf[3] = {b1, b2, b3};
+ secaddbytes(cursection, buf, sizeof(buf));
+}
+
+static void sb4(uint8_t b1, uint8_t b2, uint8_t b3, uint8_t b4) {
+ uint8_t buf[4] = {b1, b2, b3, b4};
+ secaddbytes(cursection, buf, sizeof(buf));
+}
+
+static void sw(uint32_t w) {
+ uint8_t buf[4] = {w & 0xff, (w & 0xff00) >> 8, (w & 0xff0000) >> 16,
+ (w & 0xff0000) >> 24};
+ secaddbytes(cursection, buf, sizeof(buf));
+}
+
+/* Compose a ModR/M byte. */
+static uint8_t modrm(uint8_t mod, uint8_t regop, uint8_t rm) {
+ return (mod << 6) + (regop << 3) + rm;
+}
+
+/* Convert an ASM_KIND to register bits */
+static uint8_t r64bits(AsmKind k) { return (k - ASM_RAX) & 0xff; }
+
+static uint8_t r32bits(AsmKind k) { return (k - ASM_EAX) & 0xff; }
+
+#define REX(W, R, X, B) \
+ ((1 << 6) | ((W) << 3) | ((R) << 2) | ((X) << 1) | ((B) << 0))
+#define REX_W REX(1, 0, 0, 0)
+
+static void assemble() {
Symbol *sym;
Parsev *v;
AsmLine *l;
- Section *cursection;
const char *label;
- struct hashtablekey htk;
cursection = text;
for (l = allasm; l; l = l->next) {
v = &l->v;
- l->wco = cursection->wco;
- switch (v->kind) {
+ switch (l->v.kind) {
case ASM_DIR_GLOBL:
label = v->globl.name;
sym = getsym(label);
@@ -219,47 +215,86 @@ static void prepass(void) {
case ASM_DIR_TEXT:
cursection = text;
break;
- case ASM_DIR_BALIGN:
- cursection->wco += v->balign.align - 1;
+ case ASM_DIR_BALIGN: {
+ int64_t i, rem, amnt;
+ amnt = 0;
+ rem = cursection->hdr.sh_size % v->balign.align;
+ if (rem)
+ amnt = v->balign.align - rem;
+ for (i = 0; i < amnt; i++) {
+ sb(0x00);
+ }
+ break;
+ }
+ case ASM_DIR_BYTE:
+ sb(v->byte.b);
break;
case ASM_LABEL:
label = v->label.name;
sym = getsym(label);
- sym->section = cursection;
- sym->wco = cursection->wco;
+ sym->offset = cursection->hdr.sh_size;
break;
- case ASM_DIR_BYTE:
case ASM_NOP:
+ sb(0x90);
+ break;
case ASM_LEAVE:
+ sb(0xc9);
+ break;
case ASM_RET:
- cursection->wco += 1;
+ sb(0xc3);
+ break;
+ case ASM_ADD: {
+ Add *add = &v->add;
+
+ switch (add->type) {
+ case 'q':
+ switch (add->src->kind) {
+ case ASM_IMM:
+ fatal("TODO");
+ break;
+ case ASM_MEMARG:
+ fatal("TODO");
+ break;
+ default:
+ switch (add->src->kind) {
+ case ASM_IMM:
+ fatal("TODO");
+ break;
+ case ASM_MEMARG:
+ fatal("TODO");
+ break;
+ default:
+ sb3(REX_W, 0x03,
+ modrm(0x03, r64bits(add->dst->kind), r64bits(add->src->kind)));
+ break;
+ }
+ break;
+ }
+ break;
+ default:
+ fatal("unknown type");
+ }
break;
- case ASM_XORL:
- if (isr32kind(v->movq.src->kind) && isr32kind(v->movq.dst->kind)) {
- cursection->wco += 2;
- } else {
- cursection->wco += 15; // XXX pessimistic.
}
- break;
- case ASM_MOVQ:
- if (isr64kind(v->movq.src->kind) && isr64kind(v->movq.dst->kind)) {
- cursection->wco += 3;
+ case ASM_JMP: {
+ int64_t distance;
+
+ sym = getsym(v->jmp.target);
+ if (sym->section && (sym->section == cursection)) {
+ distance = sym->offset - cursection->hdr.sh_size;
} else {
- cursection->wco += 15; // XXX pessimistic.
+ distance = 0x7fffffff; // XXX
}
- break;
- case ASM_PUSHQ:
- if (isr64kind(v->pushq.arg->kind)) {
- cursection->wco += 2;
+ if (distance <= 128 && distance >= -127) {
+ sb2(0xeb, (uint8_t)distance);
} else {
- cursection->wco += 9; // XXX pessimistic.
+ sb(0xe9);
+ sw((uint32_t)distance);
}
break;
- case ASM_JMP:
- cursection->wco += 5;
- break;
+ }
default:
- fatal("prepass: unexpected kind: %d", v->kind);
+ fatal("assemble: unexpected kind: %d", l->v.kind);
}
}
}
@@ -300,153 +335,61 @@ static void fillsymtab(void) {
}
}
-#define MODREGI 0x3
-#define REX_W 0x48
-
-static uint8_t kindr64bits(AsmKind k) {
- return (k - ASM_RAX) & 0xff;
-}
-
-static uint8_t kindr32bits(AsmKind k) {
- return (k - ASM_EAX) & 0xff;
-}
+FILE *outf = NULL;
-static uint8_t composemodrm(uint8_t mod, uint8_t regop, uint8_t rm) {
- return (mod<<6) + (regop<<3) + rm;
+static void out(uint8_t *buf, size_t n) {
+ fwrite(buf, 1, n, outf);
+ if (ferror(outf))
+ fatal("fwrite:");
}
+static void outelf(void) {
+ size_t i;
+ uint64_t offset;
+ Elf64_Ehdr ehdr = {0};
-static void assemble() {
- Symbol *sym;
- Parsev *v;
- AsmLine *l;
- Section *cursection;
- const char *label;
-
- cursection = text;
+ ehdr.e_ident[0] = 0x7f;
+ ehdr.e_ident[1] = 'E';
+ ehdr.e_ident[2] = 'L';
+ ehdr.e_ident[3] = 'F';
+ ehdr.e_ident[4] = ELFCLASS64;
+ ehdr.e_ident[5] = ELFDATA2LSB;
+ ehdr.e_ident[6] = 1;
+ ehdr.e_type = ET_REL;
+ ehdr.e_machine = EM_X86_64;
+ ehdr.e_flags = 0;
+ ehdr.e_version = 1;
+ ehdr.e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr.e_shoff = sizeof(Elf64_Ehdr);
+ ehdr.e_shentsize = sizeof(Elf64_Shdr);
+ ehdr.e_shnum = nsections;
+ ehdr.e_shstrndx = 1;
- for (l = allasm; l; l = l->next) {
- v = &l->v;
- switch (l->v.kind) {
- case ASM_DIR_GLOBL:
- break;
- case ASM_DIR_DATA:
- cursection = data;
- break;
- case ASM_DIR_TEXT:
- cursection = text;
- break;
- case ASM_DIR_BALIGN: {
- int64_t i, rem, amnt;
- amnt = 0;
- rem = cursection->hdr.sh_size % v->balign.align;
- if (rem)
- amnt = v->balign.align - rem;
- for (i = 0; i < amnt; i++) {
- secaddbyte(cursection, 0x00);
- }
- break;
- }
- case ASM_DIR_BYTE:
- secaddbyte(cursection, v->byte.b);
- break;
- case ASM_LABEL:
- label = v->label.name;
- sym = getsym(label);
- sym->offset = cursection->hdr.sh_size;
- break;
- case ASM_NOP:
- secaddbyte(cursection, 0x90);
- break;
- case ASM_LEAVE:
- secaddbyte(cursection, 0xc9);
- break;
- case ASM_RET:
- secaddbyte(cursection, 0xc3);
- break;
- case ASM_XORL: {
- Parsev *src, *dst;
-
- src = v->movq.src;
- dst = v->movq.dst;
-
- if (isr32kind(src->kind) && isr32kind(dst->kind)) {
- uint8_t ibuf[2] = {
- 0x31,
- composemodrm(MODREGI, kindr32bits(src->kind), kindr32bits(dst->kind)),
- };
- secaddbytes(cursection, ibuf, sizeof(ibuf));
- } else {
- fatal("TODO");
- }
- break;
- }
- case ASM_MOVQ: {
- Parsev *src, *dst;
-
- src = v->movq.src;
- dst = v->movq.dst;
-
- if (isr64kind(src->kind) && isr64kind(dst->kind)) {
- uint8_t ibuf[3] = {
- REX_W,
- 0x89,
- composemodrm(MODREGI, kindr64bits(src->kind), kindr64bits(dst->kind)),
- };
- secaddbytes(cursection, ibuf, sizeof(ibuf));
- } else {
- fatal("TODO");
- }
- break;
- }
- case ASM_PUSHQ: {
- Parsev *arg;
-
- arg = v->pushq.arg;
-
- if (isr64kind(arg->kind)) {
- uint8_t ibuf[2] = {0x50, kindr64bits(arg->kind)};
- secaddbytes(cursection, ibuf, sizeof(ibuf));
- } else if (arg->kind == ASM_NUMBER) {
- fatal("TODO");
- } else if (arg->kind == ASM_IDENT) {
- fatal("TODO");
- } else {
- fatal("BUG: unexpected pushq arg");
- }
+ out((uint8_t *)&ehdr, sizeof(ehdr));
+ offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
- break;
- }
- case ASM_JMP: {
- sym = getsym(v->jmp.target);
- if (sym->section && (sym->section == cursection)) {
- int64_t distance;
- distance = sym->wco - cursection->wco;
- if (distance <= 128 && distance >= -127) {
- uint8_t ibuf[2] = {0xeb, 0x00};
- secaddbytes(cursection, ibuf, sizeof(ibuf));
- } else {
- uint8_t ibuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00};
- secaddbytes(cursection, ibuf, sizeof(ibuf));
- }
- } else {
- fatal("TODO, jmp to undefined symbol");
- }
- break;
- }
- default:
- fatal("assemble: unexpected kind: %d", l->v.kind);
- }
+ for (i = 0; i < nsections; i++) {
+ sections[i].hdr.sh_offset = offset;
+ out((uint8_t *)&sections[i].hdr, sizeof(Elf64_Shdr));
+ offset += sections[i].hdr.sh_size;
+ }
+ for (i = 0; i < nsections; i++) {
+ if (sections[i].hdr.sh_type == SHT_NOBITS)
+ continue;
+ out(sections[i].data, sections[i].hdr.sh_size);
}
}
int main(void) {
symbols = mkhtab(256);
+ outf = stdout;
+
initsections();
parse();
- prepass();
assemble();
fillsymtab();
outelf();
+ if (fflush(outf) != 0)
+ fatal("fflush:");
return 0;
} \ No newline at end of file