aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-03 21:35:28 +1300
committerAndrew Chambers <[email protected]>2021-10-03 21:35:28 +1300
commit7102258076d80765df63d47f2117d2cd9fa3aa41 (patch)
tree40005bb113225beff16e4b702ade488ef4331b2b
parent204f57da606af1aa788335504f3d1f126a432d96 (diff)
More instructions.
-rw-r--r--asmparser.peg56
-rw-r--r--dumbas.h88
-rw-r--r--main.c74
3 files changed, 181 insertions, 37 deletions
diff --git a/asmparser.peg b/asmparser.peg
index 2accd8f..8f9eb92 100644
--- a/asmparser.peg
+++ b/asmparser.peg
@@ -2,6 +2,7 @@
%value "Parsev"
+
line <- s:stmt eol { $$ = s; }
/ eol { $$.kind = ASM_BLANK; }
/ . { $$.kind = ASM_SYNTAX_ERROR; }
@@ -11,23 +12,54 @@ stmt <- d:directive {$$ = d;}
/ l:label { $$ = l; }
directive <- ".glob" "o"? "l" ws i:ident
- { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident }; }
- / ".data"
- { $$.kind = ASM_DIR_DATA; }
- / ".text"
- { $$.kind = ASM_DIR_TEXT; }
+ { $$.globl = (Globl){.kind = ASM_DIR_GLOBL, .name = i.ident.name }; }
+ / ".data" { $$.kind = ASM_DIR_DATA; }
+ / ".text" { $$.kind = ASM_DIR_TEXT; }
/ ".byte" ws n:number
- { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number }; }
+ { $$.byte = (Byte){.kind = ASM_DIR_BYTE, .b = (uint8_t)n.number.value }; }
-instr <- "nop" { $$.instr = (Instr){.kind = ASM_NOP }; }
- / "ret" { $$.instr = (Instr){.kind = ASM_RET }; }
+instr <- "nop"{ $$.kind = ASM_NOP; }
+ / "leave" { $$.kind = ASM_LEAVE; }
+ / "ret" { $$.kind = ASM_RET; }
/ "jmp" ws i:ident
- { $$.instr = (Instr){.kind = ASM_JMP, .jmp.target = i.ident}; }
+ { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
+ / "pushq" ws v:r64
+ { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; }
+ / "pushq" ws v:imm
+ { $$.pushq = (Pushq){.kind = ASM_PUSHQ, .arg = dupv(&v)}; }
+ / "movq" ws s:r64 ws? "," ws? d:r64
+ { $$.movq = (Movq){.kind = ASM_MOVQ, .src = dupv(&s), .dst = dupv(&d)}; }
label <- i:ident ':'
- { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident}; }
+ { $$.label = (Label){.kind = ASM_LABEL, .name = i.ident.name}; }
+
+imm <- '$' n:number { $$ = n; }
+ / '$' i:ident { $$ = i; }
+
+r64 <- "%rax" { $$.kind = ASM_RAX; }
+ / "%rcx" { $$.kind = ASM_RCX; }
+ / "%rdx" { $$.kind = ASM_RDX; }
+ / "%rbx" { $$.kind = ASM_RBX; }
+ / "%rsp" { $$.kind = ASM_RSP; }
+ / "%rbp" { $$.kind = ASM_RBP; }
+ / "%rsi" { $$.kind = ASM_RSI; }
+ / "%rdi" { $$.kind = ASM_RDI; }
+
+
+number <- <[0-9]+>
+ { $$.number = (Number){ .kind = ASM_NUMBER, .value = strtoll($1, NULL, 10) }; }
+
+ident <- <[_a-zA-Z][_a-zA-Z0-9]*>
+ { $$.ident = (Ident){ .kind = ASM_IDENT, .name = xstrdup($1) }; }
ws <- [ \t]+
+
eol <- ws? ("\n" / (! .))
-number <- <[0-9]+> { $$.number = strtoll($1, NULL, 10); }
-ident <- <[_a-zA-Z][_a-zA-Z0-9]*> { $$.ident = xstrdup($1); }
+
+%source {
+ Parsev *dupv(Parsev *p) {
+ Parsev *r = xmalloc(sizeof(Parsev));
+ *r = *p;
+ return r;
+ }
+} \ No newline at end of file
diff --git a/dumbas.h b/dumbas.h
index 7dce54d..e05a452 100644
--- a/dumbas.h
+++ b/dumbas.h
@@ -28,52 +28,100 @@ typedef struct {
Section *section;
} Symbol;
-enum AsmKind {
+typedef enum {
+ // Misc
ASM_SYNTAX_ERROR,
ASM_BLANK,
+ ASM_LABEL,
+ ASM_IDENT,
+ ASM_NUMBER,
+ // Directives
ASM_DIR_GLOBL,
ASM_DIR_DATA,
ASM_DIR_TEXT,
ASM_DIR_BYTE,
- ASM_LABEL,
+ // Instructions
ASM_NOP,
ASM_RET,
- ASM_JMP
-};
+ ASM_JMP,
+ ASM_LEAVE,
+ ASM_PUSHQ,
+ ASM_MOVQ,
+ // Registers, order matters.
+ ASM_RAX,
+ ASM_RCX,
+ ASM_RDX,
+ ASM_RBX,
+ ASM_RSP,
+ ASM_RBP,
+ ASM_RSI,
+ ASM_RDI,
+
+} AsmKind;
+
+static int isr64kind(AsmKind k) {
+ return k >= ASM_RAX && k <= ASM_RDI;
+}
+
+typedef union Parsev Parsev;
+
+typedef struct {
+ AsmKind kind;
+ const char *target;
+} Jmp;
+
+typedef struct {
+ AsmKind kind;
+ Parsev *arg;
+} Pushq;
typedef struct {
- enum AsmKind kind;
- union {
- struct {
- const char *target;
- } jmp;
- };
-} Instr;
+ AsmKind kind;
+ Parsev *src;
+ Parsev *dst;
+} Movq;
typedef struct {
- enum AsmKind kind;
+ AsmKind kind;
const char *name;
} Label;
typedef struct {
- enum AsmKind kind;
+ AsmKind kind;
const char *name;
} Globl;
typedef struct {
- enum AsmKind kind;
+ AsmKind kind;
uint8_t b;
} Byte;
-typedef union {
- enum AsmKind kind;
- Instr instr;
+typedef struct {
+ AsmKind kind;
+ int64_t imm;
+} Imm;
+
+typedef struct {
+ AsmKind kind;
+ const char *name;
+} Ident;
+
+typedef struct {
+ AsmKind kind;
+ int64_t value;
+} Number;
+
+union Parsev {
+ AsmKind kind;
Label label;
Globl globl;
+ Jmp jmp;
+ Pushq pushq;
+ Movq movq;
Byte byte;
- const char *ident;
- int64_t number;
-} Parsev;
+ Ident ident;
+ Number number;
+};
typedef struct AsmLine AsmLine;
struct AsmLine {
diff --git a/main.c b/main.c
index 7dffafb..369925f 100644
--- a/main.c
+++ b/main.c
@@ -230,6 +230,20 @@ static void prepass(void) {
case ASM_RET:
cursection->wco += 1;
break;
+ case ASM_MOVQ:
+ if (isr64kind(v->movq.src->kind) && isr64kind(v->movq.dst->kind)) {
+ cursection->wco += 2;
+ } else {
+ cursection->wco += 16; // XXX likely wrong.
+ }
+ break;
+ case ASM_PUSHQ:
+ if (isr64kind(v->pushq.arg->kind)) {
+ cursection->wco += 2;
+ } else {
+ cursection->wco += 9; // XXX very pessimistic.
+ }
+ break;
case ASM_JMP:
cursection->wco += 5;
break;
@@ -275,6 +289,18 @@ static void fillsymtab(void) {
}
}
+#define MODREGI 0x3
+#define REX_W 0x48
+
+static uint8_t kindr64bits(AsmKind k) {
+ return (k - ASM_RAX) & 0xff;
+}
+
+static uint8_t composemodrm(uint8_t mod, uint8_t regop, uint8_t rm) {
+ return (mod<<6) + (regop<<3) + rm;
+}
+
+
static void assemble() {
Symbol *sym;
Parsev *v;
@@ -309,17 +335,55 @@ static void assemble() {
case ASM_RET:
secaddbyte(cursection, 0xc3);
break;
+ case ASM_PUSHQ: {
+ Parsev *arg;
+
+ arg = v->pushq.arg;
+
+ if (isr64kind(arg->kind)) {
+ uint8_t ibuf[2] = {0x50, kindr64bits(arg->kind)};
+ secaddbytes(cursection, ibuf, sizeof(ibuf));
+ } else if (arg->kind == ASM_NUMBER) {
+ fatal("TODO");
+ } else if (arg->kind == ASM_IDENT) {
+ fatal("TODO");
+ } else {
+ fatal("BUG: unexpected pushq arg");
+ }
+
+ break;
+ }
+
+ case ASM_MOVQ: {
+ Parsev *src, *dst;
+
+ src = v->movq.src;
+ dst = v->movq.dst;
+
+ if (isr64kind(src->kind) && isr64kind(dst->kind)) {
+ uint8_t ibuf[3] = {
+ REX_W,
+ 0x89,
+ composemodrm(MODREGI, kindr64bits(src->kind), kindr64bits(dst->kind)),
+ };
+ secaddbytes(cursection, ibuf, sizeof(ibuf));
+ } else {
+ fatal("TODO");
+ }
+ break;
+ }
+
case ASM_JMP: {
- sym = getsym(v->instr.jmp.target);
+ sym = getsym(v->jmp.target);
if (sym->section && (sym->section == cursection)) {
int64_t distance;
distance = sym->wco - cursection->wco;
if (distance <= 128 && distance >= -127) {
- uint8_t jbuf[2] = {0xeb, 0x00};
- secaddbytes(cursection, jbuf, sizeof(jbuf));
+ uint8_t ibuf[2] = {0xeb, 0x00};
+ secaddbytes(cursection, ibuf, sizeof(ibuf));
} else {
- uint8_t jbuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00};
- secaddbytes(cursection, jbuf, sizeof(jbuf));
+ uint8_t ibuf[5] = {0xe9, 0x00, 0x00, 0x00, 0x00};
+ secaddbytes(cursection, ibuf, sizeof(ibuf));
}
} else {
fatal("TODO, jmp to undefined symbol");