diff options
| -rw-r--r-- | asm.peg | 300 | ||||
| -rw-r--r-- | main.c | 304 | ||||
| -rw-r--r-- | minias.h | 30 | ||||
| -rw-r--r-- | test/test.sh | 106 | ||||
| -rw-r--r-- | util.c | 4 |
5 files changed, 366 insertions, 378 deletions
@@ -32,100 +32,64 @@ instr = "nop" { $$.kind = ASM_NOP; } | "leave" { $$.kind = ASM_LEAVE; } | "ret" { $$.kind = ASM_RET; } - | i:jmp { $$ = i; } - | i:lea { $$ = i; } - | i:movzx { $$ = i; } - | i:mod-rm-binop { $$ = i; } - -jmp = "jmp" ws i:ident - { $$.jmp = (Jmp){.kind = ASM_JMP, .target = i.ident.name}; } - -movzx = - "movzx" - ( - ( - 'b' ws s:m ws? ',' ws? d:r16 - | 'b' ws s:m ws? ',' ws? d:r32 - | 'b' ws s:m ws? ',' ws? d:r64 - | 'b'? ws s:r8 ws? ',' ws? d:r16 - | 'b'? ws s:r8 ws? ',' ws? d:r32 - | 'b'? ws s:r8 ws? ',' ws? d:r64 - ) { $$.movzx.type = 'b'; } - | - ( - 'w' ws s:m ws? ',' ws? d:r32 - | 'w' ws s:m ws? ',' ws? d:r64 - | 'w'? ws s:r16 ws? ',' ws? d:r32 - | 'w'? ws s:r16 ws? ',' ws? d:r64 - ) { $$.movzx.type = 'w'; } - ) { $$.movzx.kind = ASM_MOVZX; - $$.movzx.src = dupv(&s); - $$.movzx.dst = dupv(&d); } - -lea = - "lea" - ( - 'w'? ws s:m ws? ',' ws? d:r16 - { $$.lea.type = 'w'; } - | 'l'? ws s:m ws? ',' ws? d:r32 - { $$.lea.type = 'l'; } - | 'q'? ws s:m ws? ',' ws? d:r64 - { $$.lea.type = 'q'; } - - ) { $$.lea.kind = ASM_LEA; - $$.lea.src = dupv(&s); - $$.lea.dst = dupv(&d); } - -mod-rm-binop = - "add" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_ADD; } - | "and" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_AND; } - | "mov" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_MOV; } - | "or" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_OR; } - | "sub" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_SUB; } - | "xchg" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_XCHG; } - | "xor" a:mod-rm-binop-args { $$ = a; $$.kind = ASM_XOR; } - - -mod-rm-binop-args = - ( - 'b' ws s:r-m8 ws? ',' ws? d:r8 - | 'b' ws s:r8 ws? ',' ws? d:r-m8 - | 'b' ws s:imm ws? ',' ws? d:r-m8 - | ws s:r8 ws? ',' ws? d:r8 - | ws s:m ws? ',' ws? d:r8 - | ws s:imm ws? ',' ws? d:r8 - ) - { $$.modrmbinop = (ModRMBinop){ .type = 'b', .src = dupv(&s), .dst = dupv(&d) }; } - | - ( - 'w' ws s:r-m16 ws? ',' ws? d:r16 - | 'w' ws s:r16 ws? ',' ws? d:r-m16 - | 'w' ws s:imm ws? ',' ws? d:r-m16 - | ws s:r16 ws? ',' ws? d:r16 - | ws s:m ws? ',' ws? d:r16 - | ws s:imm ws? ',' ws? d:r16 - ) - { $$.modrmbinop = (ModRMBinop){ .type = 'w', .src = dupv(&s), .dst = dupv(&d) }; } - | - ( - 'l' ws s:r-m32 ws? ',' ws? d:r32 - | 'l' ws s:r32 ws? ',' ws? d:r-m32 - | 'l' ws s:imm ws? ',' ws? d:r-m32 - | ws s:r32 ws? ',' ws? d:r32 - | ws s:m ws? ',' ws? d:r32 - | ws s:imm ws? ',' ws? d:r32 - ) - { $$.modrmbinop = (ModRMBinop){ .type = 'l', .src = dupv(&s), .dst = dupv(&d) }; } - | - ( - 'q' ws s:r-m64 ws? ',' ws? d:r64 - | 'q' ws s:r64 ws? ',' ws? d:r-m64 - | 'q' ws s:imm ws? ',' ws? d:r-m64 - | ws s:r64 ws? ',' ws? d:r64 - | ws s:m ws? ',' ws? d:r64 - | ws s:imm ws? ',' ws? d:r64 - ) { $$.modrmbinop = (ModRMBinop){ .type = 'q', .src = dupv(&s), .dst = dupv(&d) }; } - + | i:xchg { $$ = i; } + | i:add { $$ = i; } + | i:and { $$ = i; } + | i:or { $$ = i; } + | i:sub { $$ = i; } + | i:xor { $$ = i; } + +xchg = + 'xchg' ( + 'w'? ws s:ax ws? ',' ws? d:r16 { $$ = INSTR(0, s, d); } + | 'w'? ws s:r16 ws? ',' ws? d:ax { $$ = INSTR(1, s, d); } + | 'l'? ws s:eax ws? ',' ws? d:r32 { $$ = INSTR(2, s, d); } + | 'l'? ws s:r32 ws? ',' ws? d:eax { $$ = INSTR(3, s, d); } + | 'q'? ws s:rax ws? ',' ws? d:r64 { $$ = INSTR(4, s, d); } + | 'q'? ws s:r64 ws? ',' ws? d:rax { $$ = INSTR(5, s, d); } + | 'b'? ws s:r-m8 ws? ',' ws? d:r8 { $$ = INSTR(6, s, d); } + | 'b'? ws s:r8 ws? ',' ws? d:r-m8 { $$ = INSTR(7, s, d); } + | 'w'? ws s:r16 ws? ',' ws? d:r-m16 { $$ = INSTR(8, s, d); } + | 'w'? ws s:r-m16 ws? ',' ws? d:r16 { $$ = INSTR(9, s, d); } + | 'l'? ws s:r32 ws? ',' ws? d:r-m32 { $$ = INSTR(10, s, d); } + | 'l'? ws s:r-m32 ws? ',' ws? d:r32 { $$ = INSTR(11, s, d); } + | 'q'? ws s:r64 ws? ',' ws? d:r-m64 { $$ = INSTR(12, s, d); } + | 'q'? ws s:r-m64 ws? ',' ws? d:r64 { $$ = INSTR(13, s, d); } + ) { $$.instr.kind = ASM_XCHG; } + +# type$n-args has no meaning other than a pattern in the arg format + +add = "add" a:type1-args { a.instr.kind = ASM_ADD; $$ = a; } +and = "and" a:type1-args { a.instr.kind = ASM_AND; $$ = a; } +or = "or" a:type1-args { a.instr.kind = ASM_OR; $$ = a; } +sub = "sub" a:type1-args { a.instr.kind = ASM_SUB; $$ = a; } +xor = "xor" a:type1-args { a.instr.kind = ASM_XOR; $$ = a; } + +type1-args = + 'b'? ws s:imm ws? ',' ws? d:al { $$ = INSTR(0, s, d); } + 'w'? ws s:imm ws? ',' ws? d:ax { $$ = INSTR(1, s, d); } + 'l'? ws s:imm ws? ',' ws? d:eax { $$ = INSTR(2, s, d); } + 'q'? ws s:imm ws? ',' ws? d:rax { $$ = INSTR(3, s, d); } + 'b' ws s:imm ws? ',' ws? d:m { $$ = INSTR(4, s, d); } + 'w' ws s:imm ws? ',' ws? d:m { $$ = INSTR(5, s, d); } + 'l' ws s:imm ws? ',' ws? d:m { $$ = INSTR(6, s, d); } + 'q' ws s:imm ws? ',' ws? d:m { $$ = INSTR(7, s, d); } + 'b'? ws s:imm ws? ',' ws? d:r8 { $$ = INSTR(8, s, d); } + 'w'? ws s:imm ws? ',' ws? d:r16 { $$ = INSTR(9, s, d); } + 'l'? ws s:imm ws? ',' ws? d:r32 { $$ = INSTR(10, s, d); } + 'q'? ws s:imm ws? ',' ws? d:r64 { $$ = INSTR(11, s, d); } + 'b'? ws s:m ws? ',' ws? d:r8 { $$ = INSTR(12, s, d); } + 'w'? ws s:m ws? ',' ws? d:r16 { $$ = INSTR(13, s, d); } + 'l'? ws s:m ws? ',' ws? d:r32 { $$ = INSTR(14, s, d); } + 'q'? ws s:m ws? ',' ws? d:r64 { $$ = INSTR(15, s, d); } + 'b'? ws s:r8 ws? ',' ws? d:m { $$ = INSTR(16, s, d); } + 'w'? ws s:r16 ws? ',' ws? d:m { $$ = INSTR(17, s, d); } + 'l'? ws s:r32 ws? ',' ws? d:m { $$ = INSTR(18, s, d); } + 'q'? ws s:r64 ws? ',' ws? d:m { $$ = INSTR(19, s, d); } + 'b'? ws s:r8 ws? ',' ws? d:r8 { $$ = INSTR(20, s, d); } + 'w'? ws s:r16 ws? ',' ws? d:r16 { $$ = INSTR(21, s, d); } + 'l'? ws s:r32 ws? ',' ws? d:r32 { $$ = INSTR(22, s, d); } + 'q'? ws s:r64 ws? ',' ws? d:r64 { $$ = INSTR(23, s, d); } r-m8 = r:r8 { $$ = r; } @@ -151,84 +115,86 @@ m = | i:ident ws? '(' ws? r:r64 ws? ')' { $$.memarg = (Memarg){ .kind = ASM_MEMARG, .c = 0, .l = i.ident.name, .reg = r.kind }; } +imm = + '$' ws? <'-'?[0-9]+> + { $$.imm = (Imm){ .kind = ASM_IMM, .c = strtoll(yytext, NULL, 10), .l = NULL}; } + +al = "%al" { $$ = REG(ASM_AL); } +ax = "%ax" { $$ = REG(ASM_AX); } +eax = "%eax" { $$ = REG(ASM_EAX); } +rax = "%rax" { $$ = REG(ASM_RAX); } + r8 = - "%al" { $$.kind = ASM_AL; } - | "%cl" { $$.kind = ASM_CL; } - | "%dl" { $$.kind = ASM_DL; } - | "%bl" { $$.kind = ASM_BL; } - | "%spl" { $$.kind = ASM_SPL; } - | "%bpl" { $$.kind = ASM_BPL; } - | "%sil" { $$.kind = ASM_SIL; } - | "%dil" { $$.kind = ASM_DIL; } - | "%r8b" { $$.kind = ASM_R8B; } - | "%r9b" { $$.kind = ASM_R9B; } - | "%r10b" { $$.kind = ASM_R10B; } - | "%r11b" { $$.kind = ASM_R11B; } - | "%r12b" { $$.kind = ASM_R12B; } - | "%r13b" { $$.kind = ASM_R13B; } - | "%r14b" { $$.kind = ASM_R14B; } - | "%r15b" { $$.kind = ASM_R15B; } + "%al" { $$ = REG(ASM_AL); } + | "%cl" { $$ = REG(ASM_CL); } + | "%dl" { $$ = REG(ASM_DL); } + | "%bl" { $$ = REG(ASM_BL); } + | "%spl" { $$ = REG(ASM_SPL); } + | "%bpl" { $$ = REG(ASM_BPL); } + | "%sil" { $$ = REG(ASM_SIL); } + | "%dil" { $$ = REG(ASM_DIL); } + | "%r8b" { $$ = REG(ASM_R8B); } + | "%r9b" { $$ = REG(ASM_R9B); } + | "%r10b" { $$ = REG(ASM_R10B); } + | "%r11b" { $$ = REG(ASM_R11B); } + | "%r12b" { $$ = REG(ASM_R12B); } + | "%r13b" { $$ = REG(ASM_R13B); } + | "%r14b" { $$ = REG(ASM_R14B); } + | "%r15b" { $$ = REG(ASM_R15B); } r16 = - "%ax" { $$.kind = ASM_AX; } - | "%cx" { $$.kind = ASM_CX; } - | "%dx" { $$.kind = ASM_DX; } - | "%bx" { $$.kind = ASM_BX; } - | "%sp" { $$.kind = ASM_SP; } - | "%bp" { $$.kind = ASM_BP; } - | "%si" { $$.kind = ASM_SI; } - | "%di" { $$.kind = ASM_DI; } - | "%r8w" { $$.kind = ASM_R8W; } - | "%r9w" { $$.kind = ASM_R9W; } - | "%r10w" { $$.kind = ASM_R10W; } - | "%r11w" { $$.kind = ASM_R11W; } - | "%r12w" { $$.kind = ASM_R12W; } - | "%r13w" { $$.kind = ASM_R13W; } - | "%r14w" { $$.kind = ASM_R14W; } - | "%r15w" { $$.kind = ASM_R15W; } + "%ax" { $$ = REG(ASM_AX); } + | "%cx" { $$ = REG(ASM_CX); } + | "%dx" { $$ = REG(ASM_DX); } + | "%bx" { $$ = REG(ASM_BX); } + | "%sp" { $$ = REG(ASM_SP); } + | "%bp" { $$ = REG(ASM_BP); } + | "%si" { $$ = REG(ASM_SI); } + | "%di" { $$ = REG(ASM_DI); } + | "%r8w" { $$ = REG(ASM_R8W); } + | "%r9w" { $$ = REG(ASM_R9W); } + | "%r10w" { $$ = REG(ASM_R10W); } + | "%r11w" { $$ = REG(ASM_R11W); } + | "%r12w" { $$ = REG(ASM_R12W); } + | "%r13w" { $$ = REG(ASM_R13W); } + | "%r14w" { $$ = REG(ASM_R14W); } + | "%r15w" { $$ = REG(ASM_R15W); } r32 = - "%eax" { $$.kind = ASM_EAX; } - | "%ecx" { $$.kind = ASM_ECX; } - | "%edx" { $$.kind = ASM_EDX; } - | "%ebx" { $$.kind = ASM_EBX; } - | "%esp" { $$.kind = ASM_ESP; } - | "%ebp" { $$.kind = ASM_EBP; } - | "%esi" { $$.kind = ASM_ESI; } - | "%edi" { $$.kind = ASM_EDI; } - | "%r8d" { $$.kind = ASM_R8D; } - | "%r9d" { $$.kind = ASM_R9D; } - | "%r10d" { $$.kind = ASM_R10D; } - | "%r11d" { $$.kind = ASM_R11D; } - | "%r12d" { $$.kind = ASM_R12D; } - | "%r13d" { $$.kind = ASM_R13D; } - | "%r14d" { $$.kind = ASM_R14D; } - | "%r15d" { $$.kind = ASM_R15D; } - -r64 = ( - "%rax" { $$.kind = ASM_RAX; } - | "%rcx" { $$.kind = ASM_RCX; } - | "%rdx" { $$.kind = ASM_RDX; } - | "%rbx" { $$.kind = ASM_RBX; } - | "%rsp" { $$.kind = ASM_RSP; } - | "%rbp" { $$.kind = ASM_RBP; } - | "%rsi" { $$.kind = ASM_RSI; } - | "%rdi" { $$.kind = ASM_RDI; } - | "%r8" ![lwb] { $$.kind = ASM_R8; } - | "%r9" ![lwb] { $$.kind = ASM_R9; } - | "%r10" ![lwb] { $$.kind = ASM_R10; } - | "%r11" ![lwb] { $$.kind = ASM_R11; } - | "%r12" ![lwb] { $$.kind = ASM_R12; } - | "%r13" ![lwb] { $$.kind = ASM_R13; } - | "%r14" ![lwb] { $$.kind = ASM_R14; } - | "%r15" ![lwb] { $$.kind = ASM_R15; } -) - -imm = - '$' i:ident - { $$.imm = (Imm){.kind = ASM_IMM, .l = i.ident.name, .c = 0 }; } - | '$' <'-'?[0-9]+> - { $$.imm = (Imm){.kind = ASM_IMM, .l = NULL, .c = strtoll(yytext, NULL, 10) }; } + "%eax" { $$ = REG(ASM_EAX); } + | "%ecx" { $$ = REG(ASM_ECX); } + | "%edx" { $$ = REG(ASM_EDX); } + | "%ebx" { $$ = REG(ASM_EBX); } + | "%esp" { $$ = REG(ASM_ESP); } + | "%ebp" { $$ = REG(ASM_EBP); } + | "%esi" { $$ = REG(ASM_ESI); } + | "%edi" { $$ = REG(ASM_EDI); } + | "%r8d" { $$ = REG(ASM_R8D); } + | "%r9d" { $$ = REG(ASM_R9D); } + | "%r10d" { $$ = REG(ASM_R10D); } + | "%r11d" { $$ = REG(ASM_R11D); } + | "%r12d" { $$ = REG(ASM_R12D); } + | "%r13d" { $$ = REG(ASM_R13D); } + | "%r14d" { $$ = REG(ASM_R14D); } + | "%r15d" { $$ = REG(ASM_R15D); } + +r64 = + "%rax" { $$ = REG(ASM_RAX); } + | "%rcx" { $$ = REG(ASM_RCX); } + | "%rdx" { $$ = REG(ASM_RDX); } + | "%rbx" { $$ = REG(ASM_RBX); } + | "%rsp" { $$ = REG(ASM_RSP); } + | "%rbp" { $$ = REG(ASM_RBP); } + | "%rsi" { $$ = REG(ASM_RSI); } + | "%rdi" { $$ = REG(ASM_RDI); } + | "%r8" ![lwb] { $$ = REG(ASM_R8); } + | "%r9" ![lwb] { $$ = REG(ASM_R9); } + | "%r10" ![lwb] { $$ = REG(ASM_R10); } + | "%r11" ![lwb] { $$ = REG(ASM_R11); } + | "%r12" ![lwb] { $$ = REG(ASM_R12); } + | "%r13" ![lwb] { $$ = REG(ASM_R13); } + | "%r14" ![lwb] { $$ = REG(ASM_R14); } + | "%r15" ![lwb] { $$ = REG(ASM_R15); } ident = <[_a-zA-Z][_a-zA-Z0-9]*> @@ -122,6 +122,15 @@ static Parsev *dupv(Parsev *p) { return r; } +#define INSTR(V, S, D) \ + (Parsev) { \ + .instr = (Instr) { \ + .kind = 0, .variant = V, .src = dupv(&S), .dst = dupv(&D) \ + } \ + } +#define REG(K) \ + (Parsev) { .kind = K } + #define YYSTYPE Parsev #define YY_CTX_LOCAL #define YY_CTX_MEMBERS Parsev v; @@ -137,9 +146,8 @@ void parse(void) { while (yyparse(&ctx)) { curlineno += 1; - if (ctx.v.kind == ASM_SYNTAX_ERROR) { + if (ctx.v.kind == ASM_SYNTAX_ERROR) lfatal("syntax error\n"); - } if (ctx.v.kind == ASM_BLANK) continue; l = zalloc(sizeof(AsmLine)); @@ -178,46 +186,129 @@ static void sw(uint32_t w) { secaddbytes(cursection, buf, sizeof(buf)); } -static int isregkind(AsmKind k) { return k > ASM_REG_BEGIN && k < ASM_REG_END; } - /* Convert an AsmKind to register bits in reg/rm style. */ -static uint8_t modrmregbits(AsmKind k) { - return (k - (ASM_REG_BEGIN+1)) % 16; +static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; } + +/* Is a register. */ +static uint8_t isreg(AsmKind k) { return k > ASM_REG_BEGIN && k < ASM_REG_END; } + +/* Is an r$n style register variant. */ +static uint8_t isreg8(AsmKind k) { return k >= ASM_AL && k <= ASM_R15B; } + +/* Is an r$n style register variant. */ +static uint8_t isreg16(AsmKind k) { return k >= ASM_AX && k <= ASM_R15W; } + +/* Is an r$n style register variant. */ +static uint8_t isreg64(AsmKind k) { return k >= ASM_RAX && k <= ASM_R15; } + +/* Is an r$n style register variant. */ +static uint8_t isregr(AsmKind k) { return !!(regbits(k) & (1 << 3)); } + +/* Compose a rex prefix - See intel manual. */ +static uint8_t rexbyte(uint8_t w, uint8_t r, uint8_t x, uint8_t b) { + return ((1 << 6) | ((!!w) << 3) | ((!!r) << 2) | ((!!x) << 1) | (!!b)); +} + +/* Compose a mod/reg/rm byte - See intel manual. */ +static uint8_t modregrm(uint8_t mod, uint8_t reg, uint8_t rm) { + return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); +} + +/* Assemble case op +rw | op + rd. */ +static void assembleplusr(Instr *i, uint8_t opcode, AsmKind reg) { + uint8_t bits = regbits(reg); + uint8_t rex = rexbyte(isreg64(reg), 0, 0, bits & (1 << 3)); + if (isreg16(reg)) + sb(0x66); + if (rex != rexbyte(0, 0, 0, 0)) + sb(rex); + sb(opcode | (bits & 7)); } -static uint8_t modrmregopcode(AsmKind k, char t) { - uint8_t opcode; - if (k == ASM_ADD) { - opcode = 0x00; - } else if (k == ASM_AND) { - opcode = 0x20; - } else if (k == ASM_MOV) { - opcode = 0x88; - } else if (k == ASM_OR) { - opcode = 0x08; - } else if (k == ASM_SUB) { - opcode = 0x28; - } else if (k == ASM_XCHG) { - opcode = 0x86; - } else if (k == ASM_XOR) { - opcode = 0x30; +/* Assemble case + r <-> r/m. */ +static void assemblerrm(Instr *i, uint8_t opcode) { + + Memarg *memarg; + AsmKind regarg; + uint8_t rex, mod, reg, rm, sib; + int wantsib; + int64_t disp; + int dispsz; + int64_t imm; + int immsz; + + mod = 0x03; + wantsib = 0; + sib = 0; + dispsz = 0; + immsz = 0; + + if (i->src->kind == ASM_MEMARG) { + memarg = &i->src->memarg; + regarg = i->dst->kind; + reg = regbits(i->dst->kind); + } else if (i->dst->kind == ASM_MEMARG) { + memarg = &i->dst->memarg; + regarg = i->src->kind; + reg = regbits(i->src->kind); + } else { + memarg = NULL; + regarg = i->src->kind; + reg = regbits(i->src->kind); + rm = regbits(i->dst->kind); + } + + if (memarg) { + rm = regbits(memarg->reg); + /* We cannot address ESP/RSP/... */ + if ((rm & 7) == 4) + lfatal("addressing mode unrepresentable"); + if (memarg->c == 0 && memarg->l == NULL) { + if ((rm & 7) == 5) { // BP style registers need displacement + mod = 0x01; + wantsib = 1; + sib = 0; + disp = 0; + dispsz = 1; + } else { + mod = 0x00; + } } else { - unreachable(); + lfatal("TODO"); } - if (t != 'b') - opcode += 1; - return opcode; -} + } + if (isreg16(regarg)) + sb(0x66); -static uint8_t modrmmemopcode(AsmKind k, char t) { - if (k == ASM_LEA) - return 0x8d; - return modrmregopcode(k, t) + 2; -} + rex = rexbyte(isreg64(regarg), reg & (1 << 3), 0, rm & (1 << 3)); + if (rex != rexbyte(0, 0, 0, 0)) + sb(rex); + + // fprintf(stderr, "%d %02x %02x", regarg - ASM_REG_BEGIN, regbits(regarg), + // mod); + sb2(isreg8(regarg) ? opcode : opcode + 1, modregrm(mod, reg, rm)); + + if (wantsib) + sb(sib); -#define REX(W, R, X, B) \ - ((1 << 6) | (!!(W) << 3) | (!!(R) << 2) | (!!(X) << 1) | (!!(B) << 0)) + switch (dispsz) { + case 1: + sb((uint8_t)disp); + break; + case 4: + sw((uint32_t)disp); + break; + } + switch (immsz) { + case 1: + sb((uint8_t)imm); + break; + case 4: + sw((uint32_t)imm); + break; + } +} static void assemble() { Symbol *sym; @@ -270,127 +361,42 @@ static void assemble() { case ASM_RET: sb(0xc3); break; + /* case ASM_MOVZX: - case ASM_MOVSX: { - fatal("TODO"); - } + case ASM_MOVSX: + case ASM_LEA: + */ case ASM_ADD: case ASM_AND: - case ASM_LEA: case ASM_MOV: case ASM_OR: - case ASM_SUB: - case ASM_XCHG: - case ASM_XOR: { - - ModRMBinop *op; - Memarg *memarg; - uint8_t opcode; - uint8_t rex, mod, reg, rm, sib; - int wantsib; - int64_t disp; - int dispsz; - int64_t imm; - int immsz; - - op = &v->modrmbinop; - memarg = NULL; - mod = 0x03; - wantsib = 0; - sib = 0; - dispsz = 0; - immsz = 0; - - if (op->src->kind == ASM_MEMARG) { - memarg = &op->src->memarg; - } else if (op->dst->kind == ASM_MEMARG) { - memarg = &op->dst->memarg; - } - - if (memarg) { - rm = modrmregbits(memarg->reg); - - /* We cannot address ESP/RSP/... */ - if ((rm & 7) == 4) - lfatal("addressing mode unrepresentable"); - - if (memarg->c == 0 && memarg->l == NULL) { - if ((rm & 7) == 5) { // BP style registers need displacement - mod = 0x01; - wantsib = 1; - sib = 0; - disp = 0; - dispsz = 1; - } else { - mod = 0x00; - } - } else { - unreachable(); - } - } - - if (isregkind(op->dst->kind)) { - rm = modrmregbits(op->dst->kind); - } - - if (isregkind(op->src->kind)) { - opcode = modrmregopcode(op->kind, op->type); - reg = modrmregbits(op->src->kind); - } else if (op->src->kind == ASM_MEMARG) { - opcode = modrmmemopcode(op->kind, op->type); - reg = modrmregbits(op->dst->kind); - } else if (op->src->kind == ASM_IMM) { - opcode = 0x81; - reg = 0x00; - if (memarg) { - rm = modrmregbits(memarg->reg); - } else { - rm = modrmregbits(op->dst->kind); + case ASM_XOR: + case ASM_SUB: { + Instr *instr = &v->instr; + if (instr->variant <= 12) { + lfatal("todo"); + } else { + switch (v->kind) { + // clang-format off + case ASM_ADD: assemblerrm(instr, 0x00); break; + case ASM_AND: assemblerrm(instr, 0x00); break; + case ASM_MOV: assemblerrm(instr, 0x00); break; + case ASM_OR: assemblerrm(instr, 0x00); break; + case ASM_XOR: assemblerrm(instr, 0x00); break; + case ASM_SUB: assemblerrm(instr, 0x00); break; + default: unreachable(); + // clang-format on } } - - if (op->type == 'w') { - sb(0x66); - } - - rex = REX(op->type == 'q', reg & (1 << 3), 0, rm & (1 << 3)); - - if (rex != REX(0, 0, 0, 0)) { - sb(rex); - } - - sb2(opcode, ((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); - - if (wantsib) { - sb(sib); - } - - switch (dispsz) { - case 0: - break; - case 1: - sb((uint8_t)disp); - break; - case 4: - sw((uint32_t)disp); - break; - default: - unreachable(); - } - - switch (immsz) { - case 0: - break; - case 1: - sb((uint8_t)imm); - break; - case 4: - sw((uint32_t)imm); - break; - default: - unreachable(); - } - + break; + } + case ASM_XCHG: { + Instr *xchg = &v->instr; + if (xchg->variant <= 5) + assembleplusr(xchg, 0x90, + (xchg->variant % 2) ? xchg->src->kind : xchg->dst->kind); + else + assemblerrm(xchg, 0x86); break; } case ASM_JMP: { @@ -502,7 +508,7 @@ int main(void) { initsections(); parse(); assemble(); - fillsymtab(); + // fillsymtab(); outelf(); if (fflush(outf) != 0) fatal("fflush:"); @@ -183,19 +183,22 @@ typedef struct { typedef struct { AsmKind kind; - char type; + uint8_t variant; Parsev *src; Parsev *dst; -} ModRMBinop; +} Instr; -typedef ModRMBinop Add; -typedef ModRMBinop And; -typedef ModRMBinop Lea; -typedef ModRMBinop Mov; -typedef ModRMBinop Movzx; -typedef ModRMBinop Or; -typedef ModRMBinop Sub; -typedef ModRMBinop Xor; +/* +typedef Instr Add; +typedef Instr And; +typedef Instr Lea; +typedef Instr Mov; +typedef Instr Movzx; +typedef Instr Or; +typedef Instr Sub; +typedef Instr Xor; +*/ +typedef Instr Xchg; union Parsev { AsmKind kind; @@ -203,15 +206,20 @@ union Parsev { Globl globl; Balign balign; Memarg memarg; - ModRMBinop modrmbinop; + Instr instr; + /* Add add; And and; Lea lea; Mov mov; Movzx movzx; Or or; + */ + Xchg xchg; + /* Xor xor; Sub sub; + */ Jmp jmp; Byte byte; Imm imm; diff --git a/test/test.sh b/test/test.sh index 0d92e4a..9622963 100644 --- a/test/test.sh +++ b/test/test.sh @@ -12,7 +12,11 @@ t () { clang -Wno-everything -c -s "$tmps" -o "$tmpo" objcopy -j ".text" -O binary "$tmpo" "$tmpb" want="$(xxd -ps "$tmpb" | head -n 1 | cut -d ' ' -f 2-)" - ./minias < "$tmps" > "$tmpo" + if ! ./minias < "$tmps" > "$tmpo" + then + echo "failed to assemble: $1" + exit 1 + fi objcopy -j ".text" -O binary "$tmpo" "$tmpb" got="$(xxd -ps "$tmpb" | head -n 1 | cut -d ' ' -f 2-)" if test "$got" != "$want" @@ -27,10 +31,21 @@ t () { # TODO Tidy and be more systematic, we could just loop -t "xchg %rax, %rax" -t "xchg %eax, %eax" -t "xchg %ax, %ax" t "xchg %al, %al" +t "xchg %ax, %ax" +t "xchg %ax, %r9w" +t "xchg %r9w, %ax" +t "xchg %ax, %bx" +t "xchg %bx, %ax" +#t "xchg %eax, %eax" # XXX We encode this as nop, but clang does not. +t "xchg %eax, %r9d" +t "xchg %r9d, %eax" +t "xchg %eax, %ebx" +t "xchg %ebx, %eax" +t "xchg %rax, %r9" +t "xchg %r9, %rax" +t "xchg %rax, %rbx" +t "xchg %rbx, %rax" t "xchg %rax, (%rax)" t "xchg %eax, (%rax)" t "xchg %ax, (%rax)" @@ -40,32 +55,55 @@ t "xchg (%rax), %eax" t "xchg (%rax), %ax" t "xchg (%rax), %al" -exit 0 - t "addb %al, %al" -t "addb (%rax), %al" t "addb %al, (%rax)" - +t "addb (%rax), %al" +t "addl %eax, %eax" +t "addl %eax, %ebx" +t "addl %eax, (%r9)" +t "addl %eax, (%rax)" +t "addl %ebx, (%r9)" +t "addq %rax, %rax" +t "addq %rax, %rbx" +t "addq %rax, (%rax)" +t "addq %rax, (%rbx)" +t "addq (%rax), %rax" +t "andb %al, %al" +t "andb %al, (%rax)" +t "andb (%rax), %al" +t "andl %eax, %eax" +t "andq %rax, %rax" +t "andq %rax, (%rax)" +t "andq (%rax), %rax" +t "leave" +t "nop" +t "orb %al, %al" +t "orb %al, (%rax)" +t "orb (%rax), %al" +t "orl %eax, %eax" +t "orq (%rax), %rax" +t "orq %rax, %rax" +t "orq %rax, (%rax)" +t "ret" t "subb %al, %al" -t "subb (%rax), %al" t "subb %al, (%rax)" - +t "subb (%rax), %al" +t "subl %eax, %eax" +t "subq %rax, %rax" +t "subq (%rax), %rax" t "xorb %al, %al" -t "xorb (%rax), %al" t "xorb %al, (%rax)" +t "xorb (%rax), %al" +t "xorl %eax, %eax" +t "xorq %rax, %rax" +t "xorq %rax, (%rax)" +t "xorq (%rax), %rax" -t "orb %al, %al" -t "orb (%rax), %al" -t "orb %al, (%rax)" - -t "andb %al, %al" -t "andb (%rax), %al" -t "andb %al, (%rax)" +exit 0 t "movb %al, %al" t "movb (%rax), %al" t "movb %al, (%rax)" - t "movw %ax, %r9w" t "movw %ax, %ax" t "addw %ax, %ax" @@ -77,32 +115,4 @@ t "movl (%rax), %eax" t "movl %eax, (%rax)" t "leaw (%rax), %ax" t "leaq (%rax), %rax" -t "leal (%rax), %eax" -t "addq (%rax), %rax" -t "andq (%rax), %rax" -t "orq (%rax), %rax" -t "subq (%rax), %rax" -t "xorq (%rax), %rax" -t "nop" -t "ret" -t "leave" -t "addq %rax, %rax" -t "addl %eax, %eax" -t "subq %rax, %rax" -t "subl %eax, %eax" -t "addq %rax, %rbx" -t "addl %eax, %ebx" -t "addq %rax, (%rax)" -t "addq %rax, (%rbx)" -t "addl %eax, (%rax)" -t "addl %eax, (%r9)" -t "addl %ebx, (%r9)" -t "orq %rax, %rax" -t "orq %rax, (%rax)" -t "orl %eax, %eax" -t "xorq %rax, %rax" -t "xorq %rax, (%rax)" -t "xorl %eax, %eax" -t "andq %rax, %rax" -t "andq %rax, (%rax)" -t "andl %eax, %eax" +t "leal (%rax), %eax"
\ No newline at end of file @@ -27,9 +27,7 @@ void fatal(const char *fmt, ...) { exit(1); } -void unreachable(void) { - lfatal("BUG: unexpected internal condition"); -} +void unreachable(void) { lfatal("BUG: unexpected internal condition"); } void *xmalloc(size_t n) { void *p; |
