aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--asm.peg300
-rw-r--r--main.c304
-rw-r--r--minias.h30
-rw-r--r--test/test.sh106
-rw-r--r--util.c4
5 files changed, 366 insertions, 378 deletions
diff --git a/asm.peg b/asm.peg
index 1a834cc..c5abf3a 100644
--- a/asm.peg
+++ b/asm.peg
@@ -32,100 +32,64 @@ instr =
"nop" { $$.kind = ASM_NOP; }
| "leave" { $$.kind = ASM_LEAVE; }
| "ret" { $$.kind = ASM_RET; }
- | i:jmp { $$ = i; }
- | i:lea { $$ = i; }
- | i:movzx { $$ = i; }
- | i:mod-rm-binop { $$ = i; }
-
-jmp = "jmp" ws i:ident
- { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; }
-
-movzx =
- "movzx"
- (
- (
- 'b' ws s:m ws? ',' ws? d:r16
- | 'b' ws s:m ws? ',' ws? d:r32
- | 'b' ws s:m ws? ',' ws? d:r64
- | 'b'? ws s:r8 ws? ',' ws? d:r16
- | 'b'? ws s:r8 ws? ',' ws? d:r32
- | 'b'? ws s:r8 ws? ',' ws? d:r64
- ) { $$.movzx.type = 'b'; }
- |
- (
- 'w' ws s:m ws? ',' ws? d:r32
- | 'w' ws s:m ws? ',' ws? d:r64
- | 'w'? ws s:r16 ws? ',' ws? d:r32
- | 'w'? ws s:r16 ws? ',' ws? d:r64
- ) { $$.movzx.type = 'w'; }
- ) { $$.movzx.kind = ASM_MOVZX;
- $$.movzx.src = dupv(&s);
- $$.movzx.dst = dupv(&d); }
-
-lea =
- "lea"
- (
- 'w'? ws s:m ws? ',' ws? d:r16
- { $$.lea.type = 'w'; }
- | 'l'? ws s:m ws? ',' ws? d:r32
- { $$.lea.type = 'l'; }
- | 'q'? ws s:m ws? ',' ws? d:r64
- { $$.lea.type = 'q'; }
-
- ) { $$.lea.kind = ASM_LEA;
- $$.lea.src = dupv(&s);
- $$.lea.dst = dupv(&d); }
-
-mod-rm-binop =
- "add" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_ADD; }
- | "and" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_AND; }
- | "mov" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_MOV; }
- | "or" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_OR; }
- | "sub" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_SUB; }
- | "xchg" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_XCHG; }
- | "xor" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_XOR; }
-
-
-mod-rm-binop-args =
- (
- 'b' ws s:r-m8 ws? ',' ws? d:r8
- | 'b' ws s:r8 ws? ',' ws? d:r-m8
- | 'b' ws s:imm ws? ',' ws? d:r-m8
- | ws s:r8 ws? ',' ws? d:r8
- | ws s:m ws? ',' ws? d:r8
- | ws s:imm ws? ',' ws? d:r8
- )
- { $$.modrmbinop = (ModRMBinop){ .type = 'b', .src = dupv(&s), .dst = dupv(&d) }; }
- |
- (
- 'w' ws s:r-m16 ws? ',' ws? d:r16
- | 'w' ws s:r16 ws? ',' ws? d:r-m16
- | 'w' ws s:imm ws? ',' ws? d:r-m16
- | ws s:r16 ws? ',' ws? d:r16
- | ws s:m ws? ',' ws? d:r16
- | ws s:imm ws? ',' ws? d:r16
- )
- { $$.modrmbinop = (ModRMBinop){ .type = 'w', .src = dupv(&s), .dst = dupv(&d) }; }
- |
- (
- 'l' ws s:r-m32 ws? ',' ws? d:r32
- | 'l' ws s:r32 ws? ',' ws? d:r-m32
- | 'l' ws s:imm ws? ',' ws? d:r-m32
- | ws s:r32 ws? ',' ws? d:r32
- | ws s:m ws? ',' ws? d:r32
- | ws s:imm ws? ',' ws? d:r32
- )
- { $$.modrmbinop = (ModRMBinop){ .type = 'l', .src = dupv(&s), .dst = dupv(&d) }; }
- |
- (
- 'q' ws s:r-m64 ws? ',' ws? d:r64
- | 'q' ws s:r64 ws? ',' ws? d:r-m64
- | 'q' ws s:imm ws? ',' ws? d:r-m64
- | ws s:r64 ws? ',' ws? d:r64
- | ws s:m ws? ',' ws? d:r64
- | ws s:imm ws? ',' ws? d:r64
- ) { $$.modrmbinop = (ModRMBinop){ .type = 'q', .src = dupv(&s), .dst = dupv(&d) }; }
-
+ | i:xchg { $$ = i; }
+ | i:add { $$ = i; }
+ | i:and { $$ = i; }
+ | i:or { $$ = i; }
+ | i:sub { $$ = i; }
+ | i:xor { $$ = i; }
+
+xchg =
+ 'xchg' (
+ 'w'? ws s:ax ws? ',' ws? d:r16 { $$ = INSTR(0, s, d); }
+ | 'w'? ws s:r16 ws? ',' ws? d:ax { $$ = INSTR(1, s, d); }
+ | 'l'? ws s:eax ws? ',' ws? d:r32 { $$ = INSTR(2, s, d); }
+ | 'l'? ws s:r32 ws? ',' ws? d:eax { $$ = INSTR(3, s, d); }
+ | 'q'? ws s:rax ws? ',' ws? d:r64 { $$ = INSTR(4, s, d); }
+ | 'q'? ws s:r64 ws? ',' ws? d:rax { $$ = INSTR(5, s, d); }
+ | 'b'? ws s:r-m8 ws? ',' ws? d:r8 { $$ = INSTR(6, s, d); }
+ | 'b'? ws s:r8 ws? ',' ws? d:r-m8 { $$ = INSTR(7, s, d); }
+ | 'w'? ws s:r16 ws? ',' ws? d:r-m16 { $$ = INSTR(8, s, d); }
+ | 'w'? ws s:r-m16 ws? ',' ws? d:r16 { $$ = INSTR(9, s, d); }
+ | 'l'? ws s:r32 ws? ',' ws? d:r-m32 { $$ = INSTR(10, s, d); }
+ | 'l'? ws s:r-m32 ws? ',' ws? d:r32 { $$ = INSTR(11, s, d); }
+ | 'q'? ws s:r64 ws? ',' ws? d:r-m64 { $$ = INSTR(12, s, d); }
+ | 'q'? ws s:r-m64 ws? ',' ws? d:r64 { $$ = INSTR(13, s, d); }
+ ) { $$.instr.kind = ASM_XCHG; }
+
+# type$n-args has no meaning other than a pattern in the arg format
+
+add = "add" a:type1-args { a.instr.kind = ASM_ADD; $$ = a; }
+and = "and" a:type1-args { a.instr.kind = ASM_AND; $$ = a; }
+or = "or" a:type1-args { a.instr.kind = ASM_OR; $$ = a; }
+sub = "sub" a:type1-args { a.instr.kind = ASM_SUB; $$ = a; }
+xor = "xor" a:type1-args { a.instr.kind = ASM_XOR; $$ = a; }
+
+type1-args =
+ 'b'? ws s:imm ws? ',' ws? d:al { $$ = INSTR(0, s, d); }
+ 'w'? ws s:imm ws? ',' ws? d:ax { $$ = INSTR(1, s, d); }
+ 'l'? ws s:imm ws? ',' ws? d:eax { $$ = INSTR(2, s, d); }
+ 'q'? ws s:imm ws? ',' ws? d:rax { $$ = INSTR(3, s, d); }
+ 'b' ws s:imm ws? ',' ws? d:m { $$ = INSTR(4, s, d); }
+ 'w' ws s:imm ws? ',' ws? d:m { $$ = INSTR(5, s, d); }
+ 'l' ws s:imm ws? ',' ws? d:m { $$ = INSTR(6, s, d); }
+ 'q' ws s:imm ws? ',' ws? d:m { $$ = INSTR(7, s, d); }
+ 'b'? ws s:imm ws? ',' ws? d:r8 { $$ = INSTR(8, s, d); }
+ 'w'? ws s:imm ws? ',' ws? d:r16 { $$ = INSTR(9, s, d); }
+ 'l'? ws s:imm ws? ',' ws? d:r32 { $$ = INSTR(10, s, d); }
+ 'q'? ws s:imm ws? ',' ws? d:r64 { $$ = INSTR(11, s, d); }
+ 'b'? ws s:m ws? ',' ws? d:r8 { $$ = INSTR(12, s, d); }
+ 'w'? ws s:m ws? ',' ws? d:r16 { $$ = INSTR(13, s, d); }
+ 'l'? ws s:m ws? ',' ws? d:r32 { $$ = INSTR(14, s, d); }
+ 'q'? ws s:m ws? ',' ws? d:r64 { $$ = INSTR(15, s, d); }
+ 'b'? ws s:r8 ws? ',' ws? d:m { $$ = INSTR(16, s, d); }
+ 'w'? ws s:r16 ws? ',' ws? d:m { $$ = INSTR(17, s, d); }
+ 'l'? ws s:r32 ws? ',' ws? d:m { $$ = INSTR(18, s, d); }
+ 'q'? ws s:r64 ws? ',' ws? d:m { $$ = INSTR(19, s, d); }
+ 'b'? ws s:r8 ws? ',' ws? d:r8 { $$ = INSTR(20, s, d); }
+ 'w'? ws s:r16 ws? ',' ws? d:r16 { $$ = INSTR(21, s, d); }
+ 'l'? ws s:r32 ws? ',' ws? d:r32 { $$ = INSTR(22, s, d); }
+ 'q'? ws s:r64 ws? ',' ws? d:r64 { $$ = INSTR(23, s, d); }
r-m8 =
r:r8 { $$ = r; }
@@ -151,84 +115,86 @@ m =
| i:ident ws? '(' ws? r:r64 ws? ')'
{ $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; }
+imm =
+ '$' ws? <'-'?[0-9]+>
+ { $$.imm = (Imm){ .kind = ASM_IMM, .c = strtoll(yytext, NULL, 10), .l = NULL}; }
+
+al = "%al" { $$ = REG(ASM_AL); }
+ax = "%ax" { $$ = REG(ASM_AX); }
+eax = "%eax" { $$ = REG(ASM_EAX); }
+rax = "%rax" { $$ = REG(ASM_RAX); }
+
r8 =
- "%al" { $$.kind = ASM_AL; }
- | "%cl" { $$.kind = ASM_CL; }
- | "%dl" { $$.kind = ASM_DL; }
- | "%bl" { $$.kind = ASM_BL; }
- | "%spl" { $$.kind = ASM_SPL; }
- | "%bpl" { $$.kind = ASM_BPL; }
- | "%sil" { $$.kind = ASM_SIL; }
- | "%dil" { $$.kind = ASM_DIL; }
- | "%r8b" { $$.kind = ASM_R8B; }
- | "%r9b" { $$.kind = ASM_R9B; }
- | "%r10b" { $$.kind = ASM_R10B; }
- | "%r11b" { $$.kind = ASM_R11B; }
- | "%r12b" { $$.kind = ASM_R12B; }
- | "%r13b" { $$.kind = ASM_R13B; }
- | "%r14b" { $$.kind = ASM_R14B; }
- | "%r15b" { $$.kind = ASM_R15B; }
+ "%al" { $$ = REG(ASM_AL); }
+ | "%cl" { $$ = REG(ASM_CL); }
+ | "%dl" { $$ = REG(ASM_DL); }
+ | "%bl" { $$ = REG(ASM_BL); }
+ | "%spl" { $$ = REG(ASM_SPL); }
+ | "%bpl" { $$ = REG(ASM_BPL); }
+ | "%sil" { $$ = REG(ASM_SIL); }
+ | "%dil" { $$ = REG(ASM_DIL); }
+ | "%r8b" { $$ = REG(ASM_R8B); }
+ | "%r9b" { $$ = REG(ASM_R9B); }
+ | "%r10b" { $$ = REG(ASM_R10B); }
+ | "%r11b" { $$ = REG(ASM_R11B); }
+ | "%r12b" { $$ = REG(ASM_R12B); }
+ | "%r13b" { $$ = REG(ASM_R13B); }
+ | "%r14b" { $$ = REG(ASM_R14B); }
+ | "%r15b" { $$ = REG(ASM_R15B); }
r16 =
- "%ax" { $$.kind = ASM_AX; }
- | "%cx" { $$.kind = ASM_CX; }
- | "%dx" { $$.kind = ASM_DX; }
- | "%bx" { $$.kind = ASM_BX; }
- | "%sp" { $$.kind = ASM_SP; }
- | "%bp" { $$.kind = ASM_BP; }
- | "%si" { $$.kind = ASM_SI; }
- | "%di" { $$.kind = ASM_DI; }
- | "%r8w" { $$.kind = ASM_R8W; }
- | "%r9w" { $$.kind = ASM_R9W; }
- | "%r10w" { $$.kind = ASM_R10W; }
- | "%r11w" { $$.kind = ASM_R11W; }
- | "%r12w" { $$.kind = ASM_R12W; }
- | "%r13w" { $$.kind = ASM_R13W; }
- | "%r14w" { $$.kind = ASM_R14W; }
- | "%r15w" { $$.kind = ASM_R15W; }
+ "%ax" { $$ = REG(ASM_AX); }
+ | "%cx" { $$ = REG(ASM_CX); }
+ | "%dx" { $$ = REG(ASM_DX); }
+ | "%bx" { $$ = REG(ASM_BX); }
+ | "%sp" { $$ = REG(ASM_SP); }
+ | "%bp" { $$ = REG(ASM_BP); }
+ | "%si" { $$ = REG(ASM_SI); }
+ | "%di" { $$ = REG(ASM_DI); }
+ | "%r8w" { $$ = REG(ASM_R8W); }
+ | "%r9w" { $$ = REG(ASM_R9W); }
+ | "%r10w" { $$ = REG(ASM_R10W); }
+ | "%r11w" { $$ = REG(ASM_R11W); }
+ | "%r12w" { $$ = REG(ASM_R12W); }
+ | "%r13w" { $$ = REG(ASM_R13W); }
+ | "%r14w" { $$ = REG(ASM_R14W); }
+ | "%r15w" { $$ = REG(ASM_R15W); }
r32 =
- "%eax" { $$.kind = ASM_EAX; }
- | "%ecx" { $$.kind = ASM_ECX; }
- | "%edx" { $$.kind = ASM_EDX; }
- | "%ebx" { $$.kind = ASM_EBX; }
- | "%esp" { $$.kind = ASM_ESP; }
- | "%ebp" { $$.kind = ASM_EBP; }
- | "%esi" { $$.kind = ASM_ESI; }
- | "%edi" { $$.kind = ASM_EDI; }
- | "%r8d" { $$.kind = ASM_R8D; }
- | "%r9d" { $$.kind = ASM_R9D; }
- | "%r10d" { $$.kind = ASM_R10D; }
- | "%r11d" { $$.kind = ASM_R11D; }
- | "%r12d" { $$.kind = ASM_R12D; }
- | "%r13d" { $$.kind = ASM_R13D; }
- | "%r14d" { $$.kind = ASM_R14D; }
- | "%r15d" { $$.kind = ASM_R15D; }
-
-r64 = (
- "%rax" { $$.kind = ASM_RAX; }
- | "%rcx" { $$.kind = ASM_RCX; }
- | "%rdx" { $$.kind = ASM_RDX; }
- | "%rbx" { $$.kind = ASM_RBX; }
- | "%rsp" { $$.kind = ASM_RSP; }
- | "%rbp" { $$.kind = ASM_RBP; }
- | "%rsi" { $$.kind = ASM_RSI; }
- | "%rdi" { $$.kind = ASM_RDI; }
- | "%r8" ![lwb] { $$.kind = ASM_R8; }
- | "%r9" ![lwb] { $$.kind = ASM_R9; }
- | "%r10" ![lwb] { $$.kind = ASM_R10; }
- | "%r11" ![lwb] { $$.kind = ASM_R11; }
- | "%r12" ![lwb] { $$.kind = ASM_R12; }
- | "%r13" ![lwb] { $$.kind = ASM_R13; }
- | "%r14" ![lwb] { $$.kind = ASM_R14; }
- | "%r15" ![lwb] { $$.kind = ASM_R15; }
-)
-
-imm =
- '$' i:ident
- { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; }
- | '$' <'-'?[0-9]+>
- { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll(yytext, NULL, 10) }; }
+ "%eax" { $$ = REG(ASM_EAX); }
+ | "%ecx" { $$ = REG(ASM_ECX); }
+ | "%edx" { $$ = REG(ASM_EDX); }
+ | "%ebx" { $$ = REG(ASM_EBX); }
+ | "%esp" { $$ = REG(ASM_ESP); }
+ | "%ebp" { $$ = REG(ASM_EBP); }
+ | "%esi" { $$ = REG(ASM_ESI); }
+ | "%edi" { $$ = REG(ASM_EDI); }
+ | "%r8d" { $$ = REG(ASM_R8D); }
+ | "%r9d" { $$ = REG(ASM_R9D); }
+ | "%r10d" { $$ = REG(ASM_R10D); }
+ | "%r11d" { $$ = REG(ASM_R11D); }
+ | "%r12d" { $$ = REG(ASM_R12D); }
+ | "%r13d" { $$ = REG(ASM_R13D); }
+ | "%r14d" { $$ = REG(ASM_R14D); }
+ | "%r15d" { $$ = REG(ASM_R15D); }
+
+r64 =
+ "%rax" { $$ = REG(ASM_RAX); }
+ | "%rcx" { $$ = REG(ASM_RCX); }
+ | "%rdx" { $$ = REG(ASM_RDX); }
+ | "%rbx" { $$ = REG(ASM_RBX); }
+ | "%rsp" { $$ = REG(ASM_RSP); }
+ | "%rbp" { $$ = REG(ASM_RBP); }
+ | "%rsi" { $$ = REG(ASM_RSI); }
+ | "%rdi" { $$ = REG(ASM_RDI); }
+ | "%r8" ![lwb] { $$ = REG(ASM_R8); }
+ | "%r9" ![lwb] { $$ = REG(ASM_R9); }
+ | "%r10" ![lwb] { $$ = REG(ASM_R10); }
+ | "%r11" ![lwb] { $$ = REG(ASM_R11); }
+ | "%r12" ![lwb] { $$ = REG(ASM_R12); }
+ | "%r13" ![lwb] { $$ = REG(ASM_R13); }
+ | "%r14" ![lwb] { $$ = REG(ASM_R14); }
+ | "%r15" ![lwb] { $$ = REG(ASM_R15); }
ident =
<[_a-zA-Z][_a-zA-Z0-9]*>
diff --git a/main.c b/main.c
index e469bcd..38d816c 100644
--- a/main.c
+++ b/main.c
@@ -122,6 +122,15 @@ static Parsev *dupv(Parsev *p) {
return r;
}
+#define INSTR(V, S, D) \
+ (Parsev) { \
+ .instr = (Instr) { \
+ .kind = 0, .variant = V, .src = dupv(&S), .dst = dupv(&D) \
+ } \
+ }
+#define REG(K) \
+ (Parsev) { .kind = K }
+
#define YYSTYPE Parsev
#define YY_CTX_LOCAL
#define YY_CTX_MEMBERS Parsev v;
@@ -137,9 +146,8 @@ void parse(void) {
while (yyparse(&ctx)) {
curlineno += 1;
- if (ctx.v.kind == ASM_SYNTAX_ERROR) {
+ if (ctx.v.kind == ASM_SYNTAX_ERROR)
lfatal("syntax error\n");
- }
if (ctx.v.kind == ASM_BLANK)
continue;
l = zalloc(sizeof(AsmLine));
@@ -178,46 +186,129 @@ static void sw(uint32_t w) {
secaddbytes(cursection, buf, sizeof(buf));
}
-static int isregkind(AsmKind k) { return k > ASM_REG_BEGIN && k < ASM_REG_END; }
-
/* Convert an AsmKind to register bits in reg/rm style. */
-static uint8_t modrmregbits(AsmKind k) {
- return (k - (ASM_REG_BEGIN+1)) % 16;
+static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; }
+
+/* Is a register. */
+static uint8_t isreg(AsmKind k) { return k > ASM_REG_BEGIN && k < ASM_REG_END; }
+
+/* Is an r$n style register variant. */
+static uint8_t isreg8(AsmKind k) { return k >= ASM_AL && k <= ASM_R15B; }
+
+/* Is an r$n style register variant. */
+static uint8_t isreg16(AsmKind k) { return k >= ASM_AX && k <= ASM_R15W; }
+
+/* Is an r$n style register variant. */
+static uint8_t isreg64(AsmKind k) { return k >= ASM_RAX && k <= ASM_R15; }
+
+/* Is an r$n style register variant. */
+static uint8_t isregr(AsmKind k) { return !!(regbits(k) & (1 << 3)); }
+
+/* Compose a rex prefix - See intel manual. */
+static uint8_t rexbyte(uint8_t w, uint8_t r, uint8_t x, uint8_t b) {
+ return ((1 << 6) | ((!!w) << 3) | ((!!r) << 2) | ((!!x) << 1) | (!!b));
+}
+
+/* Compose a mod/reg/rm byte - See intel manual. */
+static uint8_t modregrm(uint8_t mod, uint8_t reg, uint8_t rm) {
+ return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
+}
+
+/* Assemble case op +rw | op + rd. */
+static void assembleplusr(Instr *i, uint8_t opcode, AsmKind reg) {
+ uint8_t bits = regbits(reg);
+ uint8_t rex = rexbyte(isreg64(reg), 0, 0, bits & (1 << 3));
+ if (isreg16(reg))
+ sb(0x66);
+ if (rex != rexbyte(0, 0, 0, 0))
+ sb(rex);
+ sb(opcode | (bits & 7));
}
-static uint8_t modrmregopcode(AsmKind k, char t) {
- uint8_t opcode;
- if (k == ASM_ADD) {
- opcode = 0x00;
- } else if (k == ASM_AND) {
- opcode = 0x20;
- } else if (k == ASM_MOV) {
- opcode = 0x88;
- } else if (k == ASM_OR) {
- opcode = 0x08;
- } else if (k == ASM_SUB) {
- opcode = 0x28;
- } else if (k == ASM_XCHG) {
- opcode = 0x86;
- } else if (k == ASM_XOR) {
- opcode = 0x30;
+/* Assemble case + r <-> r/m. */
+static void assemblerrm(Instr *i, uint8_t opcode) {
+
+ Memarg *memarg;
+ AsmKind regarg;
+ uint8_t rex, mod, reg, rm, sib;
+ int wantsib;
+ int64_t disp;
+ int dispsz;
+ int64_t imm;
+ int immsz;
+
+ mod = 0x03;
+ wantsib = 0;
+ sib = 0;
+ dispsz = 0;
+ immsz = 0;
+
+ if (i->src->kind == ASM_MEMARG) {
+ memarg = &i->src->memarg;
+ regarg = i->dst->kind;
+ reg = regbits(i->dst->kind);
+ } else if (i->dst->kind == ASM_MEMARG) {
+ memarg = &i->dst->memarg;
+ regarg = i->src->kind;
+ reg = regbits(i->src->kind);
+ } else {
+ memarg = NULL;
+ regarg = i->src->kind;
+ reg = regbits(i->src->kind);
+ rm = regbits(i->dst->kind);
+ }
+
+ if (memarg) {
+ rm = regbits(memarg->reg);
+ /* We cannot address ESP/RSP/... */
+ if ((rm & 7) == 4)
+ lfatal("addressing mode unrepresentable");
+ if (memarg->c == 0 && memarg->l == NULL) {
+ if ((rm & 7) == 5) { // BP style registers need displacement
+ mod = 0x01;
+ wantsib = 1;
+ sib = 0;
+ disp = 0;
+ dispsz = 1;
+ } else {
+ mod = 0x00;
+ }
} else {
- unreachable();
+ lfatal("TODO");
}
- if (t != 'b')
- opcode += 1;
- return opcode;
-}
+ }
+ if (isreg16(regarg))
+ sb(0x66);
-static uint8_t modrmmemopcode(AsmKind k, char t) {
- if (k == ASM_LEA)
- return 0x8d;
- return modrmregopcode(k, t) + 2;
-}
+ rex = rexbyte(isreg64(regarg), reg & (1 << 3), 0, rm & (1 << 3));
+ if (rex != rexbyte(0, 0, 0, 0))
+ sb(rex);
+
+ // fprintf(stderr, "%d %02x %02x", regarg - ASM_REG_BEGIN, regbits(regarg),
+ // mod);
+ sb2(isreg8(regarg) ? opcode : opcode + 1, modregrm(mod, reg, rm));
+
+ if (wantsib)
+ sb(sib);
-#define REX(W, R, X, B) \
- ((1 << 6) | (!!(W) << 3) | (!!(R) << 2) | (!!(X) << 1) | (!!(B) << 0))
+ switch (dispsz) {
+ case 1:
+ sb((uint8_t)disp);
+ break;
+ case 4:
+ sw((uint32_t)disp);
+ break;
+ }
+ switch (immsz) {
+ case 1:
+ sb((uint8_t)imm);
+ break;
+ case 4:
+ sw((uint32_t)imm);
+ break;
+ }
+}
static void assemble() {
Symbol *sym;
@@ -270,127 +361,42 @@ static void assemble() {
case ASM_RET:
sb(0xc3);
break;
+ /*
case ASM_MOVZX:
- case ASM_MOVSX: {
- fatal("TODO");
- }
+ case ASM_MOVSX:
+ case ASM_LEA:
+ */
case ASM_ADD:
case ASM_AND:
- case ASM_LEA:
case ASM_MOV:
case ASM_OR:
- case ASM_SUB:
- case ASM_XCHG:
- case ASM_XOR: {
-
- ModRMBinop *op;
- Memarg *memarg;
- uint8_t opcode;
- uint8_t rex, mod, reg, rm, sib;
- int wantsib;
- int64_t disp;
- int dispsz;
- int64_t imm;
- int immsz;
-
- op = &v->modrmbinop;
- memarg = NULL;
- mod = 0x03;
- wantsib = 0;
- sib = 0;
- dispsz = 0;
- immsz = 0;
-
- if (op->src->kind == ASM_MEMARG) {
- memarg = &op->src->memarg;
- } else if (op->dst->kind == ASM_MEMARG) {
- memarg = &op->dst->memarg;
- }
-
- if (memarg) {
- rm = modrmregbits(memarg->reg);
-
- /* We cannot address ESP/RSP/... */
- if ((rm & 7) == 4)
- lfatal("addressing mode unrepresentable");
-
- if (memarg->c == 0 && memarg->l == NULL) {
- if ((rm & 7) == 5) { // BP style registers need displacement
- mod = 0x01;
- wantsib = 1;
- sib = 0;
- disp = 0;
- dispsz = 1;
- } else {
- mod = 0x00;
- }
- } else {
- unreachable();
- }
- }
-
- if (isregkind(op->dst->kind)) {
- rm = modrmregbits(op->dst->kind);
- }
-
- if (isregkind(op->src->kind)) {
- opcode = modrmregopcode(op->kind, op->type);
- reg = modrmregbits(op->src->kind);
- } else if (op->src->kind == ASM_MEMARG) {
- opcode = modrmmemopcode(op->kind, op->type);
- reg = modrmregbits(op->dst->kind);
- } else if (op->src->kind == ASM_IMM) {
- opcode = 0x81;
- reg = 0x00;
- if (memarg) {
- rm = modrmregbits(memarg->reg);
- } else {
- rm = modrmregbits(op->dst->kind);
+ case ASM_XOR:
+ case ASM_SUB: {
+ Instr *instr = &v->instr;
+ if (instr->variant <= 12) {
+ lfatal("todo");
+ } else {
+ switch (v->kind) {
+ // clang-format off
+ case ASM_ADD: assemblerrm(instr, 0x00); break;
+ case ASM_AND: assemblerrm(instr, 0x00); break;
+ case ASM_MOV: assemblerrm(instr, 0x00); break;
+ case ASM_OR: assemblerrm(instr, 0x00); break;
+ case ASM_XOR: assemblerrm(instr, 0x00); break;
+ case ASM_SUB: assemblerrm(instr, 0x00); break;
+ default: unreachable();
+ // clang-format on
}
}
-
- if (op->type == 'w') {
- sb(0x66);
- }
-
- rex = REX(op->type == 'q', reg & (1 << 3), 0, rm & (1 << 3));
-
- if (rex != REX(0, 0, 0, 0)) {
- sb(rex);
- }
-
- sb2(opcode, ((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
-
- if (wantsib) {
- sb(sib);
- }
-
- switch (dispsz) {
- case 0:
- break;
- case 1:
- sb((uint8_t)disp);
- break;
- case 4:
- sw((uint32_t)disp);
- break;
- default:
- unreachable();
- }
-
- switch (immsz) {
- case 0:
- break;
- case 1:
- sb((uint8_t)imm);
- break;
- case 4:
- sw((uint32_t)imm);
- break;
- default:
- unreachable();
- }
-
+ break;
+ }
+ case ASM_XCHG: {
+ Instr *xchg = &v->instr;
+ if (xchg->variant <= 5)
+ assembleplusr(xchg, 0x90,
+ (xchg->variant % 2) ? xchg->src->kind : xchg->dst->kind);
+ else
+ assemblerrm(xchg, 0x86);
break;
}
case ASM_JMP: {
@@ -502,7 +508,7 @@ int main(void) {
initsections();
parse();
assemble();
- fillsymtab();
+ // fillsymtab();
outelf();
if (fflush(outf) != 0)
fatal("fflush:");
diff --git a/minias.h b/minias.h
index bf0b857..5767587 100644
--- a/minias.h
+++ b/minias.h
@@ -183,19 +183,22 @@ typedef struct {
typedef struct {
AsmKind kind;
- char type;
+ uint8_t variant;
Parsev *src;
Parsev *dst;
-} ModRMBinop;
+} Instr;
-typedef ModRMBinop Add;
-typedef ModRMBinop And;
-typedef ModRMBinop Lea;
-typedef ModRMBinop Mov;
-typedef ModRMBinop Movzx;
-typedef ModRMBinop Or;
-typedef ModRMBinop Sub;
-typedef ModRMBinop Xor;
+/*
+typedef Instr Add;
+typedef Instr And;
+typedef Instr Lea;
+typedef Instr Mov;
+typedef Instr Movzx;
+typedef Instr Or;
+typedef Instr Sub;
+typedef Instr Xor;
+*/
+typedef Instr Xchg;
union Parsev {
AsmKind kind;
@@ -203,15 +206,20 @@ union Parsev {
Globl globl;
Balign balign;
Memarg memarg;
- ModRMBinop modrmbinop;
+ Instr instr;
+ /*
Add add;
And and;
Lea lea;
Mov mov;
Movzx movzx;
Or or;
+ */
+ Xchg xchg;
+ /*
Xor xor;
Sub sub;
+ */
Jmp jmp;
Byte byte;
Imm imm;
diff --git a/test/test.sh b/test/test.sh
index 0d92e4a..9622963 100644
--- a/test/test.sh
+++ b/test/test.sh
@@ -12,7 +12,11 @@ t () {
clang -Wno-everything -c -s "$tmps" -o "$tmpo"
objcopy -j ".text" -O binary "$tmpo" "$tmpb"
want="$(xxd -ps "$tmpb" | head -n 1 | cut -d ' ' -f 2-)"
- ./minias < "$tmps" > "$tmpo"
+ if ! ./minias < "$tmps" > "$tmpo"
+ then
+ echo "failed to assemble: $1"
+ exit 1
+ fi
objcopy -j ".text" -O binary "$tmpo" "$tmpb"
got="$(xxd -ps "$tmpb" | head -n 1 | cut -d ' ' -f 2-)"
if test "$got" != "$want"
@@ -27,10 +31,21 @@ t () {
# TODO Tidy and be more systematic, we could just loop
-t "xchg %rax, %rax"
-t "xchg %eax, %eax"
-t "xchg %ax, %ax"
t "xchg %al, %al"
+t "xchg %ax, %ax"
+t "xchg %ax, %r9w"
+t "xchg %r9w, %ax"
+t "xchg %ax, %bx"
+t "xchg %bx, %ax"
+#t "xchg %eax, %eax" # XXX We encode this as nop, but clang does not.
+t "xchg %eax, %r9d"
+t "xchg %r9d, %eax"
+t "xchg %eax, %ebx"
+t "xchg %ebx, %eax"
+t "xchg %rax, %r9"
+t "xchg %r9, %rax"
+t "xchg %rax, %rbx"
+t "xchg %rbx, %rax"
t "xchg %rax, (%rax)"
t "xchg %eax, (%rax)"
t "xchg %ax, (%rax)"
@@ -40,32 +55,55 @@ t "xchg (%rax), %eax"
t "xchg (%rax), %ax"
t "xchg (%rax), %al"
-exit 0
-
t "addb %al, %al"
-t "addb (%rax), %al"
t "addb %al, (%rax)"
-
+t "addb (%rax), %al"
+t "addl %eax, %eax"
+t "addl %eax, %ebx"
+t "addl %eax, (%r9)"
+t "addl %eax, (%rax)"
+t "addl %ebx, (%r9)"
+t "addq %rax, %rax"
+t "addq %rax, %rbx"
+t "addq %rax, (%rax)"
+t "addq %rax, (%rbx)"
+t "addq (%rax), %rax"
+t "andb %al, %al"
+t "andb %al, (%rax)"
+t "andb (%rax), %al"
+t "andl %eax, %eax"
+t "andq %rax, %rax"
+t "andq %rax, (%rax)"
+t "andq (%rax), %rax"
+t "leave"
+t "nop"
+t "orb %al, %al"
+t "orb %al, (%rax)"
+t "orb (%rax), %al"
+t "orl %eax, %eax"
+t "orq (%rax), %rax"
+t "orq %rax, %rax"
+t "orq %rax, (%rax)"
+t "ret"
t "subb %al, %al"
-t "subb (%rax), %al"
t "subb %al, (%rax)"
-
+t "subb (%rax), %al"
+t "subl %eax, %eax"
+t "subq %rax, %rax"
+t "subq (%rax), %rax"
t "xorb %al, %al"
-t "xorb (%rax), %al"
t "xorb %al, (%rax)"
+t "xorb (%rax), %al"
+t "xorl %eax, %eax"
+t "xorq %rax, %rax"
+t "xorq %rax, (%rax)"
+t "xorq (%rax), %rax"
-t "orb %al, %al"
-t "orb (%rax), %al"
-t "orb %al, (%rax)"
-
-t "andb %al, %al"
-t "andb (%rax), %al"
-t "andb %al, (%rax)"
+exit 0
t "movb %al, %al"
t "movb (%rax), %al"
t "movb %al, (%rax)"
-
t "movw %ax, %r9w"
t "movw %ax, %ax"
t "addw %ax, %ax"
@@ -77,32 +115,4 @@ t "movl (%rax), %eax"
t "movl %eax, (%rax)"
t "leaw (%rax), %ax"
t "leaq (%rax), %rax"
-t "leal (%rax), %eax"
-t "addq (%rax), %rax"
-t "andq (%rax), %rax"
-t "orq (%rax), %rax"
-t "subq (%rax), %rax"
-t "xorq (%rax), %rax"
-t "nop"
-t "ret"
-t "leave"
-t "addq %rax, %rax"
-t "addl %eax, %eax"
-t "subq %rax, %rax"
-t "subl %eax, %eax"
-t "addq %rax, %rbx"
-t "addl %eax, %ebx"
-t "addq %rax, (%rax)"
-t "addq %rax, (%rbx)"
-t "addl %eax, (%rax)"
-t "addl %eax, (%r9)"
-t "addl %ebx, (%r9)"
-t "orq %rax, %rax"
-t "orq %rax, (%rax)"
-t "orl %eax, %eax"
-t "xorq %rax, %rax"
-t "xorq %rax, (%rax)"
-t "xorl %eax, %eax"
-t "andq %rax, %rax"
-t "andq %rax, (%rax)"
-t "andl %eax, %eax"
+t "leal (%rax), %eax" \ No newline at end of file
diff --git a/util.c b/util.c
index 43c3d8b..9ed1c50 100644
--- a/util.c
+++ b/util.c
@@ -27,9 +27,7 @@ void fatal(const char *fmt, ...) {
exit(1);
}
-void unreachable(void) {
- lfatal("BUG: unexpected internal condition");
-}
+void unreachable(void) { lfatal("BUG: unexpected internal condition"); }
void *xmalloc(size_t n) {
void *p;