diff options
| -rw-r--r-- | LICENSE | 14 | ||||
| -rw-r--r-- | README.md | 54 | ||||
| -rw-r--r-- | asm.peg | 190 | ||||
| -rw-r--r-- | main.c | 218 | ||||
| -rw-r--r-- | minias.h | 1 | ||||
| -rw-r--r-- | parse.c | 9 | ||||
| -rw-r--r-- | test/test.sh | 38 |
7 files changed, 325 insertions, 199 deletions
@@ -0,0 +1,14 @@ +Copyright © 2021 Andrew Chambers +Copyright © 2017-2020 Michael Forney + +Permission to use, copy, modify, and/or distribute this software for any purpose +with or without fee is hereby granted, provided that the above copyright notice +and this permission notice appear in all copies. + +THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES WITH +REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND +FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY SPECIAL, DIRECT, +INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER RESULTING FROM LOSS +OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, NEGLIGENCE OR OTHER +TORTIOUS ACTION, ARISING OUT OF OR IN CONNECTION WITH THE USE OR PERFORMANCE OF +THIS SOFTWARE.
\ No newline at end of file @@ -1,11 +1,14 @@ # minias -A mini assembler for x86_64, written for fun and learning. +A mini assembler for x86-64, written for fun and learning. -Goals: +Minias can assemble large amounts of real world software after they have been compiled with the [cproc C compiler](https://sr.ht/~mcf/cproc/). It can also +assemble self hosted cproc, meaning it can indirectly assemble itself. + +Project Goals: - A simple, tiny, fast implementation (in that order). -- Assemble the output of [cproc](https://github.com/michaelforney/cproc)/[qbe](https://c9x.me/compile/) and [chibicc](https://github.com/rui314/chibicc). +- Assemble the output of [cproc](https://sr.ht/~mcf/cproc/)/[qbe](https://c9x.me/compile/) and [chibicc](https://github.com/rui314/chibicc). - Relocatable elf output. Non Goals: @@ -29,26 +32,53 @@ leg asm.peg > asm.peg.inc cc -O2 *.c -o minias ``` +# Roadmap + +Essential features: + +- [x] Self host with cproc. +- [ ] Self host with chibicc. + +Bonus features: + +- [ ] A man page explaining what is supported. +- [x] Two pass jump relaxing. +- [ ] Immediate relaxing. +- [ ] Simple immediate expressions. +- [ ] Assemble a libc. +- [ ] Test every opcode with all variants in our test suite. +- [ ] Parser that doesn't depend on peg/leg. + # Notes -- Minias deliberately does not free data as it all is - freed by the OS at the end of execution. In the future - we one ould use an arena allocator for minias and still - avoid manual calls to free. +- The implementation deliberately does not free allocated memory as it all is + freed by the OS at the end of execution. Memory usage is still + quite light as it uses string and value interning. In the future + we could use an arena allocator for minias and still avoid manual calls to free. + +- The implementation deliberately uses global variables in a style similar class members in C++. + This is a more traditional unix style where the unit of data encapsulation is a + small program. This choice makes sense given we don't aim to build a library. -- Minias deliberately kept the peg grammar quite repetitive - and simple, please keep it this. +- Minias deliberately keeps the peg grammar quite repetitive + and simple, please keep it this way. -- Our performance is quite fast, but with the current design - it is limited by the parser, it would be interesting - to see if we can improve the parser generator upstream. +- Performance is limited by the parser, it would be interesting + to see if we can improve the parser generator upstream. That being said, + performance is often better than gnu as and much better than the clang assembler, - One day it would be nice to write a 'minipeg' in a single .c file that can be bundled in projects. +# Contact + +Ask questions on the [mailing list](https://lists.sr.ht/~ach/minias). +Submit bugs to the [ticket system](https://todo.sr.ht/~ach/minias). + # Resources - [intel reference](https://software.intel.com/content/dam/develop/external/us/en/documents-tps/325383-sdm-vol-2abcd.pdf) - Specifically chapter 2.1 and chapter 3.1. - [elf spec](https://refspecs.linuxfoundation.org/elf/elf.pdf) +- [osdev wiki](https://wiki.osdev.org/X86-64_Instruction_Encoding) - [goas](https://github.com/DQNEO/goas) - [neatas](https://repo.or.cz/neatas.git) @@ -5,13 +5,10 @@ line = | . { yy->v.kind = ASM_SYNTAX_ERROR; } ws = ([ \t]+ | comment)+ - comment = "/*" ( ! "\n" ! "*/" . )* "*/" # No support for multiline comments for now as they break our line numbers. - eolcomment = '#' (! "\n" .)+ eol = ws? eolcomment? "\n" - stmt = '.' d:directive eol {$$ = d;} | i:instr eol { $$ = i; } @@ -66,25 +63,50 @@ label = { $$.label = (Label){.kind = ASM_LABEL, .name = i.charptr}; } instr = - # Ordered by relative frequency for performance. - - # Movs are very common, so they come first. + # Ordered by instruction frequency for performance. + # e.g. movs are very common, so they come first. + # The & operator means check without consuming input. (& 'm' ( - i:mov { $$ = i; } - | i:movsx { $$ = i; } - | i:movzx { $$ = i; } - | i:mul { $$ = i; } - # Less common, but we have already checked for 'm' - | i:movaps { $$ = i; } - | i:movq { $$ = i; } - | i:movsd { $$ = i; } - | i:movss { $$ = i; } - | i:mulsd { $$ = i; } - | i:mulss { $$ = i; })) - | i:add { $$ = i; } - | i:and { $$ = i; } - | i:cmp { $$ = i; } + i:mov { $$ = i; } + | i:movsx { $$ = i; } + | i:movzx { $$ = i; } + | i:mul { $$ = i; } + | i:movaps { $$ = i; } + | i:movq { $$ = i; } + | i:movsd { $$ = i; } + | i:movss { $$ = i; } + | i:mulsd { $$ = i; } + | i:mulss { $$ = i; })) + | (& 'a' + ( + i:add { $$ = i; } + | i:and { $$ = i; } + | i:addss { $$ = i; } + | i:addsd { $$ = i; } )) + | (& 'c' + ( + i:cmp { $$ = i; } + | i:call { $$ = i; } + | i:cvtsi2sd { $$ = i; } + | i:cvtsi2ss { $$ = i; } + | i:cvtss2sd { $$ = i; } + | i:cvtsd2ss { $$ = i; } + | i:cvttsd2si { $$ = i; } + | i:cvttss2si { $$ = i; } + | i:cltd { $$ = i; } + | i:cqto { $$ = i; })) + | (& 's' + ( + i:set { $$ = i; } + | i:sub { $$ = i; } + | i:sal { $$ = i; } + | i:sar { $$ = i; } + | i:shl { $$ = i; } + | i:shr { $$ = i; } + | i:subsd { $$ = i; } + | i:subss { $$ = i; })) + | i:or { $$ = i; } | i:leave { $$ = i; } | i:ret { $$ = i; } | i:push { $$ = i; } @@ -95,44 +117,18 @@ instr = | i:lea { $$ = i; } | i:imul { $$ = i; } | i:neg { $$ = i; } - | i:or { $$ = i; } - | (& 's' - ( - i:set { $$ = i; } - | i:sub { $$ = i; } - | i:sal { $$ = i; } - | i:sar { $$ = i; } - | i:shl { $$ = i; } - | i:shr { $$ = i; })) | i:test { $$ = i; } | i:xchg { $$ = i; } | i:xor { $$ = i; } - | i:call { $$ = i; } - # Misc - | i:cltd { $$ = i; } - | i:cqto { $$ = i; } - | i:nop { $$ = i; } # Floating point is less common, so check last. - | i:addss { $$ = i; } - | i:addsd { $$ = i; } | i:divss { $$ = i; } | i:divsd { $$ = i; } | i:pxor { $$ = i; } | i:xorpd { $$ = i; } | i:xorps { $$ = i; } - | i:subsd { $$ = i; } - | i:subss { $$ = i; } | i:ucomisd { $$ = i; } | i:ucomiss { $$ = i; } - | (& 'c' - ( - i:cvtsi2sd { $$ = i; } - | i:cvtsi2ss { $$ = i; } - | i:cvtss2sd { $$ = i; } - | i:cvtsd2ss { $$ = i; } - | i:cvttsd2si { $$ = i; } - | i:cvttss2si { $$ = i; })) - + | i:nop { $$ = i; } cltd = "cltd" { $$ = (Parsev){ .kind=ASM_CLTD }; } cqto = "cqto" { $$ = (Parsev){ .kind=ASM_CQTO }; } @@ -140,17 +136,15 @@ leave = "leave" { $$ = (Parsev){ .kind=ASM_LEAVE }; } nop = "nop" { $$ = (Parsev){ .kind=ASM_NOP }; } ret = "ret" { $$ = (Parsev){ .kind=ASM_RET }; } -push = - "push" ( - 'q'? ws s:r64 { $$ = INSTR1(0, s); } - | 'q' ws s:mem { $$ = INSTR1(1, s); } - ) { $$.instr.kind = ASM_PUSH; } +push = "push" ( + 'q'? ws s:r64 { $$ = INSTR1(0, s); } + | 'q' ws s:mem { $$ = INSTR1(1, s); } +) { $$.instr.kind = ASM_PUSH; } -pop = - "pop" ( - 'q'? ws d:r64 { $$ = INSTR1(0, d); } - | 'q' ws d:mem { $$ = INSTR1(1, d); } - ) { $$.instr.kind = ASM_POP; } +pop = "pop" ( + 'q'? ws d:r64 { $$ = INSTR1(0, d); } + | 'q' ws d:mem { $$ = INSTR1(1, d); } +) { $$.instr.kind = ASM_POP; } call = "call" 'q'? ws ( '*' t:mem @@ -175,20 +169,22 @@ condition-code = | "pe" { $$.i64 = 3; } | "p" { $$.i64 = 4; } | "o" { $$.i64 = 5; } - | "nz" { $$.i64 = 6; } - | "ns" { $$.i64 = 7; } - | "np" { $$.i64 = 8; } - | "no" { $$.i64 = 9; } - | "nle" { $$.i64 = 10; } - | "nl" { $$.i64 = 11; } - | "nge" { $$.i64 = 12; } - | "ng" { $$.i64 = 13; } - | "ne" { $$.i64 = 14; } - | "nc" { $$.i64 = 15; } - | "nbe" { $$.i64 = 16; } - | "nb" { $$.i64 = 17; } - | "nae" { $$.i64 = 18; } - | "na" { $$.i64 = 19; } + | ("n" + ( + "z" { $$.i64 = 6; } + | "s" { $$.i64 = 7; } + | "p" { $$.i64 = 8; } + | "o" { $$.i64 = 9; } + | "le" { $$.i64 = 10; } + | "l" { $$.i64 = 11; } + | "ge" { $$.i64 = 12; } + | "g" { $$.i64 = 13; } + | "e" { $$.i64 = 14; } + | "c" { $$.i64 = 15; } + | "be" { $$.i64 = 16; } + | "b" { $$.i64 = 17; } + | "ae" { $$.i64 = 18; } + | "a" { $$.i64 = 19; })) | "le" { $$.i64 = 20; } | "l" { $$.i64 = 21; } | "ge" { $$.i64 = 22; } @@ -429,22 +425,22 @@ test = "test" ( addsd = "addsd" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_ADDSD; } addss = "addss" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_ADDSS; } subsd = "subsd" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_SUBSD; } subss = "subss" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_SUBSS; } cvtsi2sd = "cvtsi2sd" ( @@ -463,12 +459,12 @@ cvtsi2ss = "cvtsi2ss" ( cvtss2sd = "cvtss2sd" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_CVTSS2SD; } cvtsd2ss = "cvtsd2ss" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_CVTSD2SS; } cvttss2si = "cvttss2si" ( @@ -492,7 +488,7 @@ divsd = "divsd" ( divss = "divss" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_DIVSS; } movaps = "movaps" ( @@ -503,12 +499,12 @@ movaps = "movaps" ( mulsd = "mulsd" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_MULSD; } mulss = "mulss" ( ws s:xmm ws? ',' ws? d:xmm { $$ = INSTR2(0, s, d); } - | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } + | ws s:mem ws? ',' ws? d:xmm { $$ = INSTR2(1, s, d); } ) { $$.instr.kind = ASM_MULSS; } movss = "movss" ( @@ -667,24 +663,24 @@ r64 = "%r" ( | "15" ![lwb] { $$ = REG(R15); } ) -xmm = "%x" ( +xmm = "%xmm" ( # Reverse order due to peg ordering. - "mm15" { $$ = REG(XMM15); } - | "mm14" { $$ = REG(XMM14); } - | "mm13" { $$ = REG(XMM13); } - | "mm12" { $$ = REG(XMM12); } - | "mm11" { $$ = REG(XMM11); } - | "mm10" { $$ = REG(XMM10); } - | "mm9" { $$ = REG(XMM7); } - | "mm8" { $$ = REG(XMM7); } - | "mm7" { $$ = REG(XMM7); } - | "mm6" { $$ = REG(XMM6); } - | "mm5" { $$ = REG(XMM5); } - | "mm4" { $$ = REG(XMM4); } - | "mm3" { $$ = REG(XMM3); } - | "mm2" { $$ = REG(XMM2); } - | "mm1" { $$ = REG(XMM1); } - | "mm0" { $$ = REG(XMM0); } + "15" { $$ = REG(XMM15); } + | "14" { $$ = REG(XMM14); } + | "13" { $$ = REG(XMM13); } + | "12" { $$ = REG(XMM12); } + | "11" { $$ = REG(XMM11); } + | "10" { $$ = REG(XMM10); } + | "9" { $$ = REG(XMM7); } + | "8" { $$ = REG(XMM7); } + | "7" { $$ = REG(XMM7); } + | "6" { $$ = REG(XMM6); } + | "5" { $$ = REG(XMM5); } + | "4" { $$ = REG(XMM4); } + | "3" { $$ = REG(XMM3); } + | "2" { $$ = REG(XMM2); } + | "1" { $$ = REG(XMM1); } + | "0" { $$ = REG(XMM0); } ) # We disallow newlines in our strings, it is simpler for lineno tracking. @@ -1,9 +1,11 @@ #include "minias.h" -#include <getopt.h> /* Parsed assembly */ static AsmLine *allasm = NULL; +/* Number of assembly the relaxation passes. */ +static int nrelax = 1; + /* Symbol table. */ static struct hashtable *symbols = NULL; @@ -29,7 +31,7 @@ static Section *datarel = NULL; static char *infilename = "<stdin>"; static size_t curlineno = 0; -void lfatal(const char *fmt, ...) { +static void lfatal(const char *fmt, ...) { va_list ap; fprintf(stderr, "%s:%ld: ", infilename, curlineno); va_start(ap, fmt); @@ -45,8 +47,11 @@ static Symbol *getsym(const char *name) { htabkey(&htk, name, strlen(name)); ps = (Symbol **)htabput(symbols, &htk); if (!*ps) { - *ps = zalloc(sizeof(Symbol)); - (*ps)->name = name; + *ps = xmalloc(sizeof(Symbol)); + **ps = (Symbol){ + .name = name, + .wco = -1, + }; } s = *ps; return s; @@ -155,7 +160,7 @@ static void initsections(void) { datarel->hdr.sh_entsize = sizeof(Elf64_Rela); } -Relocation *newreloc() { +static Relocation *newreloc() { if (nrelocs == reloccap) { reloccap = nrelocs ? nrelocs * 2 : 64; relocs = xreallocarray(relocs, reloccap, sizeof(Relocation)); @@ -208,15 +213,7 @@ static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; } static uint8_t isreg64(AsmKind k) { return k >= ASM_RAX && k <= ASM_R15; } -/* Register that requires the use of a rex prefix. */ -static uint8_t isrexreg(AsmKind k) { - return k > ASM_REG_BEGIN && k < ASM_REG_END && - (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || - k == ASM_SIL || k == ASM_DIL); -} - -/* Compose a rex prefix - See intel manual. */ - +/* Rex opcode prefix. */ typedef struct Rex { uint8_t required : 1; uint8_t w : 1; @@ -225,6 +222,13 @@ typedef struct Rex { uint8_t b : 1; } Rex; +/* Register that requires the use of a rex prefix. */ +static uint8_t isrexreg(AsmKind k) { + return k > ASM_REG_BEGIN && k < ASM_REG_END && + (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || + k == ASM_SIL || k == ASM_DIL); +} + static uint8_t rexbyte(Rex rex) { return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b); } @@ -379,7 +383,7 @@ static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, assemblemodregrm(rex, prefix, opcode, mod, reg, rm); if (mod == 1) { - assemblereloc(memarg->disp.l, memarg->disp.c, 1, R_X86_64_8); + assembleconstant(memarg->disp.c, 1); } else if (mod == 2) { assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); } @@ -439,7 +443,7 @@ static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, sb(sibbyte(scale, index, base)); if (mod == 1) { - assemblereloc(memarg->disp.l, memarg->disp.c, 1, R_X86_64_8); + assembleconstant(memarg->disp.c, 1); } else if (mod == 2) { assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); } @@ -559,7 +563,7 @@ static void assemblexchg(const Instr *xchg) { rex = (Rex){ .required = isrexreg(xchg->arg1->kind) || isrexreg(xchg->arg2->kind), .w = isreg64(xchg->arg1->kind) || isreg64(xchg->arg2->kind), - .r = !!(regbits(reg) & (1 << 3)), + .b = !!(regbits(reg) & (1 << 3)), }; assembleplusr(rex, prefix, opcode, regbits(reg)); } else { @@ -740,6 +744,82 @@ static void assembleset(const Instr *instr) { } } +static void assemblecall(const Call *call) { + Rex rex; + uint8_t rm; + + if (call->indirect) { + if (call->target.indirect->kind == ASM_MEMARG) { + rex = (Rex){0}; + assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, 0x02); + } else { + rm = regbits(call->target.indirect->kind); + rex = (Rex){.b = !!(rm & (1 << 3))}; + assemblemodregrm(rex, -1, 0xff, 0x03, 0x02, rm); + } + } else { + sb(0xe8); + assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, + R_X86_64_PC32); + } +} + +static void assembleimul(const Instr *instr) { + VarBytes prefix, opcode; + + if (instr->variant < 8) { + assembledivmulneg(instr, 0x05); + } else if (instr->variant < 14) { + opcode = 0x01000faf; + prefix = ((instr->variant - 8) % 3) == 0 ? 0x66 : -1; + assemblerrm(instr, prefix, opcode, 1); + } else { + const Imm *imm; + imm = &instr->arg3->imm; + opcode = 0x69; + prefix = ((instr->variant - 14) % 3) == 0 ? 0x66 : -1; + assemblerrm(instr, prefix, opcode, 1); + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); + } +} + +static void assemblejmp(const Jmp *j) { + int jmpsize; + int64_t distance; + Symbol *target; + + static uint8_t variant2op[31] = { + 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81, + 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, + 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, + }; + + jmpsize = 4; + target = getsym(j->target); + if (cursection == target->section && (target->defined || target->wco != -1)) { + if (target->defined) { + distance = target->offset - cursection->hdr.sh_size; + } else { + distance = target->wco - cursection->hdr.sh_size; + } + if ((distance - 1) >= -128 && (distance - 1) <= 127) { + jmpsize = 1; + } else { + jmpsize = 4; + } + } + + if (jmpsize == 4) { + if (j->variant) + sb(0x0f); + sb(variant2op[j->variant]); + assemblereloc(j->target, -4, 4, R_X86_64_PC32); + } else { + sb(variant2op[j->variant] + (j->variant ? -16 : 2)); + assemblereloc(j->target, -1, 1, R_X86_64_PC8); + } +} + static void assemble(void) { Symbol *sym; AsmLine *l; @@ -847,38 +927,12 @@ static void assemble(void) { lfatal("%s already defined", sym->name); sym->defined = 1; break; - case ASM_CALL: { - Rex rex; - uint8_t rm; - - if (v->call.indirect) { - if (v->call.target.indirect->kind == ASM_MEMARG) { - rex = (Rex){0}; - assemblemem(&v->call.target.indirect->memarg, rex, -1, 0xff, 0x02); - } else { - rm = regbits(v->call.target.indirect->kind); - rex = (Rex){.b = !!(rm & (1 << 3))}; - assemblemodregrm(rex, -1, 0xff, 0x03, 0x02, rm); - } - } else { - sb(0xe8); - assemblereloc(v->call.target.direct.l, v->call.target.direct.c - 4, 4, - R_X86_64_PC32); - } + case ASM_CALL: + assemblecall(&v->call); break; - } - case ASM_JMP: { - static uint8_t variant2op[31] = { - 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81, - 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, - 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, - }; - if (v->jmp.variant) - sb(0x0f); - sb(variant2op[v->jmp.variant]); - assemblereloc(v->jmp.target, -4, 4, R_X86_64_PC32); + case ASM_JMP: + assemblejmp(&v->jmp); break; - } case ASM_PUSH: { Rex rex; uint8_t reg; @@ -1054,25 +1108,9 @@ static void assemble(void) { case ASM_MULSS: assemblerrm(&v->instr, 0xf3, 0x01000f59, 1); break; - case ASM_IMUL: { - VarBytes prefix, opcode; - - if (v->instr.variant < 8) { - assembledivmulneg(&v->instr, 0x05); - } else if (v->instr.variant < 14) { - opcode = 0x01000faf; - prefix = ((v->instr.variant - 8) % 3) == 0 ? 0x66 : -1; - assemblerrm(&v->instr, prefix, opcode, 1); - } else { - const Imm *imm; - imm = &v->instr.arg3->imm; - opcode = 0x69; - prefix = ((v->instr.variant - 14) % 3) == 0 ? 0x66 : -1; - assemblerrm(&v->instr, prefix, opcode, 1); - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); - } + case ASM_IMUL: + assembleimul(&v->instr); break; - } case ASM_NEG: assembledivmulneg(&v->instr, 0x03); break; @@ -1085,10 +1123,9 @@ static void assemble(void) { assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x01); break; } - case ASM_PXOR: { + case ASM_PXOR: assemblerrm(&v->instr, 0x66, 0x01000fef, 1); break; - } case ASM_SET: assembleset(&v->instr); break; @@ -1152,6 +1189,32 @@ static void assemble(void) { } } +/* Reset while remembering symbol offsets so we can size jumps. */ +static void relaxreset(void) { + Symbol *sym; + Section *sec; + size_t i; + + /* Reset relocations and section data but retain capacity. */ + nrelocs = 0; + + for (i = 0; i < nsections; i++) { + sec = §ions[i]; + if (sec == shstrtab) + continue; + sec->hdr.sh_size = 0; + } + + /* Reset symbols, saving the worst case offset for the second pass. */ + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + sym = symbols->vals[i]; + *sym = (Symbol){ + .name = sym->name, .section = sym->section, .wco = sym->offset}; + } +} + static void addtosymtab(Symbol *sym) { Elf64_Sym elfsym; int stype; @@ -1216,10 +1279,14 @@ static int resolvereloc(Relocation *reloc) { return 0; switch (reloc->type) { - case R_X86_64_8: case R_X86_64_32: case R_X86_64_64: return 0; + case R_X86_64_PC8: + rdata = &reloc->section->data[reloc->offset]; + value = sym->offset - reloc->offset + reloc->addend; + rdata[0] = ((uint8_t)value & 0xff); + return 1; case R_X86_64_PC32: rdata = &reloc->section->data[reloc->offset]; value = sym->offset - reloc->offset + reloc->addend; @@ -1255,7 +1322,6 @@ static void appendreloc(Relocation *reloc) { case R_X86_64_PC32: case R_X86_64_32: case R_X86_64_64: - case R_X86_64_8: elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type); elfrel.r_offset = reloc->offset; elfrel.r_addend = reloc->addend; @@ -1325,8 +1391,11 @@ static void outelf(void) { } static void usage(char *argv0) { - fprintf(stderr, "minias - a mini assembler."); - fprintf(stderr, "usage: %s [-o out] [input]\n", argv0); + fprintf(stderr, "minias - a mini x86-64 assembler.\n\n"); + fprintf(stderr, "usage: %s [-r passes] [-o out] [input]\n", argv0); + fprintf(stderr, "\n"); + fprintf(stderr, " -r passes Jump relaxation iterations (default 1).\n"); + fprintf(stderr, " -o out Output file to write (default stdout).\n"); exit(2); } @@ -1344,6 +1413,9 @@ static void parseargs(int argc, char *argv[]) { case 'h': usage(argv0); break; + case 'r': + nrelax = atoi(*++argv); + break; case 'o': if (argv[1] == NULL) usage(argv0); @@ -1372,6 +1444,10 @@ int main(int argc, char *argv[]) { allasm = parseasm(); initsections(); assemble(); + while (nrelax-- > 0) { + relaxreset(); + assemble(); + } fillsymtab(); handlerelocs(); outelf(); @@ -24,6 +24,7 @@ typedef struct { const char *name; int32_t idx; int64_t offset; + int64_t wco; /* worst case offset */ int64_t size; int global; int defined; @@ -8,11 +8,10 @@ static const Parsev *internparsev(Parsev *p) { for equality, even on pointer values, this works because the pointers themselves are also interned. - This simplicity somes with one big cost - Parsev variants with padding - can trigger a false positive on valgrind. It should still safe - because reading these undefined bytes do not change the behavior of the - program. The best fix is still to avoid the padding bytes in the Parsev - variant layout using a tool such as 'pahole'. + This simplicity comes with one big cost - Parsev variants with padding + can trigger a false positive on valgrind. It should still be safe, + but the best fix is still to avoid the padding bytes in the Parsev + variants. */ size_t idx; const Parsev *interned; diff --git a/test/test.sh b/test/test.sh index 7ee3855..7b0e41c 100644 --- a/test/test.sh +++ b/test/test.sh @@ -8,7 +8,7 @@ tmpb="$(mktemp)" trap "rm -f \"$tmps\" \"$tmpo\" \"$tmpb\"" EXIT t () { - echo "$1" > "$tmps" + echo -e "$1" > "$tmps" clang -Wno-everything -c -x assembler "$tmps" -o "$tmpo" objcopy -j ".text" -O binary "$tmpo" "$tmpb" want="$(xxd -ps "$tmpb" | head -n 1 | cut -d ' ' -f 2-)" @@ -30,6 +30,18 @@ t () { echo -n "." } +# Various regression tests first. +t "xchgq %r13, %rax" +t "movl \$1000, %r8d" +t "movb %sil, (%rdi)" +t "movsbq (%rax), %rbx" +t "movq $-4132994306676758123, %rcx" +t "mov \$17293822569102704639, %rax" +t "callq *%rax" +t "callq *%r10" +t "movb %r11b, (%rsi, %r12, 1)" + + for r in rax r10 do for x in xmm0 xmm13 @@ -41,19 +53,6 @@ do done done -t "movl \$1000, %r8d" - -t "movb %sil, (%rdi)" - -t "movsbq (%rax), %rbx" - -t "movq $-4132994306676758123, %rcx" -t "mov \$17293822569102704639, %rax" - -t "callq *%rax" -t "callq *%r10" - -t "movb %r11b, (%rsi, %r12, 1)" t "cvttsd2si %xmm1, %rax" t "cvttsd2si %xmm10, %rax" @@ -153,6 +152,17 @@ conditioncodes=" o p pe po s z " +for fill in 0 1 129 +do + t "l:\n .fill $fill, 1, 0x00 \njmp l" + t "jmp l\n .fill $fill, 1, 0x00 \nl:" + for cc in $conditioncodes + do + t "l:\n .fill $fill, 1, 0x00 \nj${cc} l" + t "j${cc} l\n .fill $fill, 1, 0x00 \nl:" + done +done + for cc in $conditioncodes do t "set${cc} %al" |
