diff options
Diffstat (limited to 'main.c')
| -rw-r--r-- | main.c | 1916 |
1 files changed, 1000 insertions, 916 deletions
@@ -1,16 +1,16 @@ #include "minias.h" /* Parsed assembly */ -static AsmLine *allasm = NULL; +static AsmLine* allasm = NULL; /* Number of assembly relaxation passes. */ static int nrelax = 1; /* Symbols before writing to symtab section. */ -static struct hashtable *symbols = NULL; +static struct hashtable* symbols = NULL; /* Array of all relocations before adding to the rel section. */ -static Relocation *relocs = NULL; +static Relocation* relocs = NULL; static size_t nrelocs = 0; static size_t reloccap = 0; @@ -18,232 +18,279 @@ static size_t reloccap = 0; static Section sections[MAXSECTIONS]; static size_t nsections = 1; // first is reserved. -static Section *cursection = NULL; -static Section *shstrtab = NULL; -static Section *strtab = NULL; -static Section *symtab = NULL; -static Section *bss = NULL; -static Section *text = NULL; -static Section *data = NULL; -static Section *textrel = NULL; -static Section *datarel = NULL; - -static char *infilename = "<stdin>"; +static Section* cursection = NULL; +static Section* shstrtab = NULL; +static Section* strtab = NULL; +static Section* symtab = NULL; +static Section* bss = NULL; +static Section* text = NULL; +static Section* data = NULL; +static Section* textrel = NULL; +static Section* datarel = NULL; + +static char* infilename = "<stdin>"; static size_t curlineno = 0; -static void lfatal(const char *fmt, ...) { - va_list ap; - fprintf(stderr, "%s:%ld: ", infilename, curlineno); - va_start(ap, fmt); - vwarn(fmt, ap); - va_end(ap); - exit(1); -} - -static Symbol *getsym(const char *name) { - Symbol **ps, *s; - struct hashtablekey htk; - - htabkey(&htk, name, strlen(name)); - ps = (Symbol **)htabput(symbols, &htk); - if (!*ps) { - *ps = xmalloc(sizeof(Symbol)); - **ps = (Symbol){ - .name = name, - .wco = -1, - }; - } - s = *ps; - return s; +static void +lfatal(const char* fmt, ...) +{ + va_list ap; + fprintf(stderr, "%s:%ld: ", infilename, curlineno); + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(1); +} + +static Symbol* +getsym(const char* name) +{ + Symbol **ps, *s; + struct hashtablekey htk; + + htabkey(&htk, name, strlen(name)); + ps = (Symbol**)htabput(symbols, &htk); + if (!*ps) { + *ps = xmalloc(sizeof(Symbol)); + **ps = (Symbol){ + .name = name, + .wco = -1, + }; + } + s = *ps; + return s; } -static void secaddbytes(Section *s, const void *bytes, size_t n) { +static void +secaddbytes(Section* s, const void* bytes, size_t n) +{ + + if (s->hdr.sh_type == SHT_NOBITS) { + s->hdr.sh_size += n; + return; + } + + while (s->capacity < s->hdr.sh_size + n) { + s->capacity = s->capacity ? (s->capacity * 2) : 512; + s->data = xrealloc(s->data, s->capacity); + } + memcpy(s->data + s->hdr.sh_size, bytes, n); - if (s->hdr.sh_type == SHT_NOBITS) { s->hdr.sh_size += n; - return; - } - - while (s->capacity < s->hdr.sh_size + n) { - s->capacity = s->capacity ? (s->capacity * 2) : 512; - s->data = xrealloc(s->data, s->capacity); - } - memcpy(s->data + s->hdr.sh_size, bytes, n); - - s->hdr.sh_size += n; -} - -static void secaddbyte(Section *s, uint8_t b) { secaddbytes(s, &b, 1); } - -static Elf64_Word elfstr(Section *sec, const char *s) { - Elf64_Word i = sec->hdr.sh_size; - secaddbytes(sec, s, strlen(s) + 1); - return i; -} - -static Section *newsection() { - Section *s; - if (nsections >= MAXSECTIONS) - fatal("too many sections"); - s = §ions[nsections]; - s->idx = nsections; - nsections += 1; - return s; -} - -static Section *getsection(const char *name) { - size_t i; - char *secname; - Section *s; - - for (i = 0; i < nsections; i++) { - secname = (char *)shstrtab->data + sections[i].hdr.sh_name; - if (strcmp(secname, name) == 0) - return §ions[i]; - } - s = newsection(); - s->hdr.sh_name = elfstr(shstrtab, name); - return s; -} - -static void initsections(void) { - Elf64_Sym elfsym; - - shstrtab = newsection(); - secaddbyte(shstrtab, 0); - shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab"); - shstrtab->hdr.sh_type = SHT_STRTAB; - - strtab = newsection(); - secaddbyte(strtab, 0); - strtab->hdr.sh_name = elfstr(shstrtab, ".strtab"); - strtab->hdr.sh_type = SHT_STRTAB; - - symtab = newsection(); - symtab->hdr.sh_name = elfstr(shstrtab, ".symtab"); - symtab->hdr.sh_type = SHT_SYMTAB; - symtab->hdr.sh_link = strtab->idx; - symtab->hdr.sh_entsize = sizeof(Elf64_Sym); - memset(&elfsym, 0, sizeof(elfsym)); - secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); - - bss = newsection(); - bss->hdr.sh_name = elfstr(shstrtab, ".bss"); - bss->hdr.sh_type = SHT_NOBITS; - bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; - bss->hdr.sh_addralign = 16; // XXX right value? - - data = newsection(); - data->hdr.sh_name = elfstr(shstrtab, ".data"); - data->hdr.sh_type = SHT_PROGBITS; - data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; - data->hdr.sh_addralign = 16; // XXX right value? - - text = newsection(); - text->hdr.sh_name = elfstr(shstrtab, ".text"); - text->hdr.sh_type = SHT_PROGBITS; - text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC; - text->hdr.sh_addralign = 4; - - textrel = newsection(); - textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text"); - textrel->hdr.sh_type = SHT_RELA; - textrel->hdr.sh_info = text->idx; - textrel->hdr.sh_link = symtab->idx; - textrel->hdr.sh_entsize = sizeof(Elf64_Rela); - - datarel = newsection(); - datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data"); - datarel->hdr.sh_type = SHT_RELA; - datarel->hdr.sh_info = data->idx; - datarel->hdr.sh_link = symtab->idx; - datarel->hdr.sh_entsize = sizeof(Elf64_Rela); -} - -static Relocation *newreloc() { - if (nrelocs == reloccap) { - reloccap = nrelocs ? nrelocs * 2 : 64; - relocs = xreallocarray(relocs, reloccap, sizeof(Relocation)); - } - return &relocs[nrelocs++]; +} + +static void +secaddbyte(Section* s, uint8_t b) +{ + secaddbytes(s, &b, 1); +} + +static Elf64_Word +elfstr(Section* sec, const char* s) +{ + Elf64_Word i = sec->hdr.sh_size; + secaddbytes(sec, s, strlen(s) + 1); + return i; +} + +static Section* +newsection() +{ + Section* s; + if (nsections >= MAXSECTIONS) + fatal("too many sections"); + s = §ions[nsections]; + s->idx = nsections; + nsections += 1; + return s; +} + +static Section* +getsection(const char* name) +{ + size_t i; + char* secname; + Section* s; + + for (i = 0; i < nsections; i++) { + secname = (char*)shstrtab->data + sections[i].hdr.sh_name; + if (strcmp(secname, name) == 0) + return §ions[i]; + } + s = newsection(); + s->hdr.sh_name = elfstr(shstrtab, name); + return s; +} + +static void +initsections(void) +{ + Elf64_Sym elfsym; + + shstrtab = newsection(); + secaddbyte(shstrtab, 0); + shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab"); + shstrtab->hdr.sh_type = SHT_STRTAB; + + strtab = newsection(); + secaddbyte(strtab, 0); + strtab->hdr.sh_name = elfstr(shstrtab, ".strtab"); + strtab->hdr.sh_type = SHT_STRTAB; + + symtab = newsection(); + symtab->hdr.sh_name = elfstr(shstrtab, ".symtab"); + symtab->hdr.sh_type = SHT_SYMTAB; + symtab->hdr.sh_link = strtab->idx; + symtab->hdr.sh_entsize = sizeof(Elf64_Sym); + memset(&elfsym, 0, sizeof(elfsym)); + secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); + + bss = newsection(); + bss->hdr.sh_name = elfstr(shstrtab, ".bss"); + bss->hdr.sh_type = SHT_NOBITS; + bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; + bss->hdr.sh_addralign = 16; // XXX right value? + + data = newsection(); + data->hdr.sh_name = elfstr(shstrtab, ".data"); + data->hdr.sh_type = SHT_PROGBITS; + data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; + data->hdr.sh_addralign = 16; // XXX right value? + + text = newsection(); + text->hdr.sh_name = elfstr(shstrtab, ".text"); + text->hdr.sh_type = SHT_PROGBITS; + text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC; + text->hdr.sh_addralign = 4; + + textrel = newsection(); + textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text"); + textrel->hdr.sh_type = SHT_RELA; + textrel->hdr.sh_info = text->idx; + textrel->hdr.sh_link = symtab->idx; + textrel->hdr.sh_entsize = sizeof(Elf64_Rela); + + datarel = newsection(); + datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data"); + datarel->hdr.sh_type = SHT_RELA; + datarel->hdr.sh_info = data->idx; + datarel->hdr.sh_link = symtab->idx; + datarel->hdr.sh_entsize = sizeof(Elf64_Rela); +} + +static Relocation* +newreloc() +{ + if (nrelocs == reloccap) { + reloccap = nrelocs ? nrelocs * 2 : 64; + relocs = xreallocarray(relocs, reloccap, sizeof(Relocation)); + } + return &relocs[nrelocs++]; } /* Shorthand helpers to write section data. */ -static void sb(uint8_t b) { secaddbyte(cursection, b); } +static void +sb(uint8_t b) +{ + secaddbyte(cursection, b); +} -static void sbn(uint8_t *bytes, size_t n) { secaddbytes(cursection, bytes, n); } +static void +sbn(uint8_t* bytes, size_t n) +{ + secaddbytes(cursection, bytes, n); +} -static void su16(uint16_t w) { - uint8_t buf[2] = {w & 0xff, (w & 0xff00) >> 8}; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su16(uint16_t w) +{ + uint8_t buf[2] = { w & 0xff, (w & 0xff00) >> 8 }; + secaddbytes(cursection, buf, sizeof(buf)); } -static void su32(uint32_t l) { - uint8_t buf[4] = { - l & 0xff, - (l & 0xff00) >> 8, - (l & 0xff0000) >> 16, - (l & 0xff000000) >> 24, - }; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su32(uint32_t l) +{ + uint8_t buf[4] = { + l & 0xff, + (l & 0xff00) >> 8, + (l & 0xff0000) >> 16, + (l & 0xff000000) >> 24, + }; + secaddbytes(cursection, buf, sizeof(buf)); } -static void su64(uint64_t l) { - uint8_t buf[8] = { - l & 0xff, - (l & 0xff00) >> 8, - (l & 0xff0000) >> 16, - (l & 0xff000000) >> 24, - (l & 0xff00000000) >> 32, - (l & 0xff0000000000) >> 40, - (l & 0xff000000000000) >> 48, - (l & 0xff00000000000000) >> 56, - }; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su64(uint64_t l) +{ + uint8_t buf[8] = { + l & 0xff, + (l & 0xff00) >> 8, + (l & 0xff0000) >> 16, + (l & 0xff000000) >> 24, + (l & 0xff00000000) >> 32, + (l & 0xff0000000000) >> 40, + (l & 0xff000000000000) >> 48, + (l & 0xff00000000000000) >> 56, + }; + secaddbytes(cursection, buf, sizeof(buf)); } /* Convert an AsmKind to register bits in reg/rm style. */ -static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; } +static uint8_t +regbits(AsmKind k) +{ + return (k - (ASM_REG_BEGIN + 1)) % 16; +} /* Register that requires the use of a rex prefix. */ -static uint8_t isrexreg(AsmKind k) { - return k > ASM_REG_BEGIN && k < ASM_REG_END && - (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || - k == ASM_SIL || k == ASM_DIL); +static uint8_t +isrexreg(AsmKind k) +{ + return k > ASM_REG_BEGIN && k < ASM_REG_END + && (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || k == ASM_SIL || k == ASM_DIL); } -static uint8_t rexbyte(Rex rex) { - return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b); +static uint8_t +rexbyte(Rex rex) +{ + return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b); } /* Compose a mod/reg/rm byte - See intel manual. */ -static uint8_t modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) { - return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); +static uint8_t +modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) +{ + return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); } /* Compose an sib byte - See intel manual. */ -static uint8_t sibbyte(uint8_t ss, uint8_t idx, uint8_t base) { - return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7)); -} - -void assembleconstant(int64_t c, int nbytes) { - switch (nbytes) { - case 1: - sb((uint8_t)c); - break; - case 2: - su16((uint16_t)c); - break; - case 4: - su32((uint32_t)c); - break; - case 8: - su64((uint64_t)c); - break; - default: - unreachable(); - } +static uint8_t +sibbyte(uint8_t ss, uint8_t idx, uint8_t base) +{ + return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7)); +} + +void +assembleconstant(int64_t c, int nbytes) +{ + switch (nbytes) { + case 1: + sb((uint8_t)c); + break; + case 2: + su16((uint16_t)c); + break; + case 4: + su32((uint32_t)c); + break; + case 8: + su64((uint64_t)c); + break; + default: + unreachable(); + } } /* The VarBytes type encodes a variadic number of bytes. @@ -256,766 +303,803 @@ void assembleconstant(int64_t c, int nbytes) { */ typedef int32_t VarBytes; -static void assemblevbytes(VarBytes bytes) { - int i, n; - uint8_t b, shift; - - n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24); - for (i = n; i >= 0; i--) { - shift = i * 8; - b = (bytes & (0xff << shift)) >> shift; - sb(b); - } +static void +assemblevbytes(VarBytes bytes) +{ + int i, n; + uint8_t b, shift; + + n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24); + for (i = n; i >= 0; i--) { + shift = i * 8; + b = (bytes & (0xff << shift)) >> shift; + sb(b); + } } -static void assemblerex(Rex rex) { - if (rex.required || rex.w || rex.r || rex.x || rex.b) - sb(rexbyte(rex)); +static void +assemblerex(Rex rex) +{ + if (rex.required || rex.w || rex.r || rex.x || rex.b) + sb(rexbyte(rex)); } /* Assemble a symbolic value. */ -static void assemblereloc(const char *l, int64_t c, int nbytes, int type) { - Relocation *reloc; - Symbol *sym; - - if (l != NULL) { - reloc = newreloc(); - sym = getsym(l); - reloc->type = type; - reloc->section = cursection; - reloc->sym = sym; - reloc->offset = cursection->hdr.sh_size; - reloc->addend = c; - c = 0; - } - assembleconstant(c, nbytes); +static void +assemblereloc(const char* l, int64_t c, int nbytes, int type) +{ + Relocation* reloc; + Symbol* sym; + + if (l != NULL) { + reloc = newreloc(); + sym = getsym(l); + reloc->type = type; + reloc->section = cursection; + reloc->sym = sym; + reloc->offset = cursection->hdr.sh_size; + reloc->addend = c; + c = 0; + } + assembleconstant(c, nbytes); } /* Assemble a r <-> mem operation. */ -static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, - VarBytes opcode, uint8_t reg, int32_t nexti) { +static void +assemblemem( + const Memarg* memarg, Rex rex, VarBytes prefix, VarBytes opcode, uint8_t reg, int32_t nexti) +{ + + uint8_t mod, rm, scale, index, base; + + /* Rip relative addressing. */ + if (memarg->base == ASM_RIP) { + rm = 0x05; + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(0x00, reg, rm)); + + if (memarg->disp.l) { + assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, R_X86_64_PC32); + } else { + assembleconstant(memarg->disp.c, 4); + } + return; + } - uint8_t mod, rm, scale, index, base; + /* Direct memory access */ + if (memarg->base == ASM_NO_REG) { + mod = 0; + rm = 4; + + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(mod, reg, rm)); + + sb(sibbyte(0, 4, 5)); + if (memarg->disp.l) { + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + } else { + assembleconstant(memarg->disp.c, 4); + } + return; + } - /* Rip relative addressing. */ - if (memarg->base == ASM_RIP) { - rm = 0x05; - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(0x00, reg, rm)); + rm = regbits(memarg->base); + rex.b = !!(rm & (1 << 3)); - if (memarg->disp.l) { - assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, - R_X86_64_PC32); - } else { - assembleconstant(memarg->disp.c, 4); + /* Case when we don't need sib */ + if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) { + + if (memarg->disp.l == 0 && memarg->disp.c == 0) { + if ((rm & 7) == 5) { + mod = 1; + } else { + mod = 0; + } + } else { + mod = 2; + } + + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(mod, reg, rm)); + + if (mod == 1) { + assembleconstant(memarg->disp.c, 1); + } else if (mod == 2) { + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + } + return; } - return; - } - /* Direct memory access */ - if (memarg->base == ASM_NO_REG) { - mod = 0; + /* Setup sib indexing. */ + base = rm; rm = 4; - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(mod, reg, rm)); + if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) { + mod = 0; /* +0 */ + } else { + if (memarg->disp.l == NULL && memarg->disp.c >= -128 && memarg->disp.c <= 127) { + mod = 1; /* +disp8 */ + } else { + mod = 2; /* +disp32 */ + } + } - sb(sibbyte(0, 4, 5)); - if (memarg->disp.l) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + if (memarg->index == ASM_NO_REG) { + index = 4; } else { - assembleconstant(memarg->disp.c, 4); + if (memarg->index == ASM_RSP) + lfatal("rsp cannot be used as an index"); + index = regbits(memarg->index); } - return; - } - rm = regbits(memarg->base); - rex.b = !!(rm & (1 << 3)); + /* If our base is a bp register, we must use the index instead. */ + if ((base & 7) == 5 && memarg->index == ASM_NO_REG) { + index = base; + } - /* Case when we don't need sib */ - if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) { + rex.x = !!(index & (1 << 3)); - if (memarg->disp.l == 0 && memarg->disp.c == 0) { - if ((rm & 7) == 5) { - mod = 1; - } else { - mod = 0; - } - } else { - mod = 2; + switch (memarg->scale) { + case 0: + case 1: + scale = 0; + break; + case 2: + scale = 1; + break; + case 4: + scale = 2; + break; + case 8: + scale = 3; + break; + default: + lfatal("invalid addressing scale"); + return; } assemblevbytes(prefix); assemblerex(rex); assemblevbytes(opcode); sb(modregrmbyte(mod, reg, rm)); + sb(sibbyte(scale, index, base)); if (mod == 1) { - assembleconstant(memarg->disp.c, 1); + assembleconstant(memarg->disp.c, 1); } else if (mod == 2) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); } - return; - } - - /* Setup sib indexing. */ - base = rm; - rm = 4; +} - if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) { - mod = 0; /* +0 */ - } else { - if (memarg->disp.l == NULL && memarg->disp.c >= -128 && - memarg->disp.c <= 127) { - mod = 1; /* +disp8 */ - } else { - mod = 2; /* +disp32 */ - } - } - - if (memarg->index == ASM_NO_REG) { - index = 4; - } else { - if (memarg->index == ASM_RSP) - lfatal("rsp cannot be used as an index"); - index = regbits(memarg->index); - } - - /* If our base is a bp register, we must use the index instead. */ - if ((base & 7) == 5 && memarg->index == ASM_NO_REG) { - index = base; - } - - rex.x = !!(index & (1 << 3)); - - switch (memarg->scale) { - case 0: - case 1: - scale = 0; - break; - case 2: - scale = 1; - break; - case 4: - scale = 2; - break; - case 8: - scale = 3; - break; - default: - lfatal("invalid addressing scale"); - return; - } - - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(mod, reg, rm)); - sb(sibbyte(scale, index, base)); - - if (mod == 1) { - assembleconstant(memarg->disp.c, 1); - } else if (mod == 2) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); - } -} - -static void assemblecall(const Call *call) { - Rex rex; - uint8_t rm; - - if (call->indirect) { - if (call->target.indirect->kind == ASM_MEMARG) { - rex = (Rex){0}; - abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, - // 0x02); - } else { - rm = regbits(call->target.indirect->kind); - rex = (Rex){.b = !!(rm & (1 << 3))}; - assemblerex(rex); - assemblevbytes(0xff); - sb(modregrmbyte(0x03, 0x02, rm)); - } - } else { - sb(0xe8); - assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, - R_X86_64_PC32); - } -} - -static void assemblejmp(const Jmp *j) { - int jmpsize; - int64_t distance; - Symbol *target; - - static uint8_t cc2op[31] = { - 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81, - 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, - 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, - }; - - jmpsize = 4; - target = getsym(j->target); - if (cursection == target->section && (target->defined || target->wco != -1)) { - if (target->defined) { - distance = target->offset - cursection->hdr.sh_size; +static void +assemblecall(const Call* call) +{ + Rex rex; + uint8_t rm; + + if (call->indirect) { + if (call->target.indirect->kind == ASM_MEMARG) { + rex = (Rex){ 0 }; + abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, + // 0x02); + } else { + rm = regbits(call->target.indirect->kind); + rex = (Rex){ .b = !!(rm & (1 << 3)) }; + assemblerex(rex); + assemblevbytes(0xff); + sb(modregrmbyte(0x03, 0x02, rm)); + } } else { - distance = target->wco - cursection->hdr.sh_size; + sb(0xe8); + assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, R_X86_64_PC32); } - if ((distance - 1) >= -128 && (distance - 1) <= 127) { - jmpsize = 1; - } else { - jmpsize = 4; - } - } - - if (jmpsize == 4) { - if (j->cc) - sb(0x0f); - sb(cc2op[j->cc]); - assemblereloc(j->target, -4, 4, R_X86_64_PC32); - } else { - sb(cc2op[j->cc] + (j->cc ? -16 : 2)); - assemblereloc(j->target, -1, 1, R_X86_64_PC8); - } -} - -static void assembleabsimm(const Imm *imm) { - if (imm->nbytes == 1) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8); - else if (imm->nbytes == 2) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16); - else if (imm->nbytes == 4) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); - else if (imm->nbytes == 8) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64); - else - unreachable(); -} - -static void assembleinstr(const Instr *instr) { - Rex rex; - const Memarg *memarg; - const Imm *imm; - uint8_t reg, rm; - - switch (instr->encoder) { - case ENCODER_OP: - assemblevbytes(instr->opcode); - break; - case ENCODER_OPREG: - rm = regbits(instr->arg1->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, instr->fixedreg, rm)); - break; - case ENCODER_OPMEM: - memarg = &instr->arg1->memarg; - rex = instr->rex; - assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0); - break; - case ENCODER_R: - reg = regbits(instr->arg1->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind); - rex.b = !!(reg & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode | (reg & 7)); - break; - case ENCODER_RIMM: - imm = &instr->arg1->imm; - reg = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg2->kind); - rex.b = !!(reg & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode | (reg & 7)); - assembleabsimm(imm); - break; - case ENCODER_IMM: - imm = &instr->arg1->imm; - rex = instr->rex; - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - assembleabsimm(imm); - break; - case ENCODER_IMMREG: - imm = &instr->arg1->imm; - reg = instr->fixedreg; - rm = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg2->kind); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - assembleabsimm(imm); - break; - case ENCODER_IMMMEM: - imm = &instr->arg1->imm; - memarg = &instr->arg2->memarg; - reg = instr->fixedreg; - rex = instr->rex; - assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, - imm->nbytes); - assembleabsimm(imm); - break; - case ENCODER_REGMEM: - case ENCODER_MEMREG: - if (instr->encoder == ENCODER_MEMREG) { - memarg = &instr->arg1->memarg; - reg = regbits(instr->arg2->kind); - } else { - memarg = &instr->arg2->memarg; - reg = regbits(instr->arg1->kind); - } - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0); - break; - case ENCODER_REGREG: - case ENCODER_REGREG2: - if (instr->encoder == ENCODER_REGREG) { - reg = regbits(instr->arg1->kind); - rm = regbits(instr->arg2->kind); +} + +static void +assemblejmp(const Jmp* j) +{ + int jmpsize; + int64_t distance; + Symbol* target; + + // clang-format off + static uint8_t cc2op[31] = { + 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, + 0x89, 0x8b, 0x81, 0x8f, 0x8d, 0x8c, 0x8e, 0x85, + 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, 0x8c, 0x8d, + 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, + }; + // clang-format on + + jmpsize = 4; + target = getsym(j->target); + if (cursection == target->section && (target->defined || target->wco != -1)) { + if (target->defined) { + distance = target->offset - cursection->hdr.sh_size; + } else { + distance = target->wco - cursection->hdr.sh_size; + } + if ((distance - 1) >= -128 && (distance - 1) <= 127) { + jmpsize = 1; + } else { + jmpsize = 4; + } + } + + if (jmpsize == 4) { + if (j->cc) + sb(0x0f); + sb(cc2op[j->cc]); + assemblereloc(j->target, -4, 4, R_X86_64_PC32); } else { - reg = regbits(instr->arg2->kind); - rm = regbits(instr->arg1->kind); + sb(cc2op[j->cc] + (j->cc ? -16 : 2)); + assemblereloc(j->target, -1, 1, R_X86_64_PC8); } - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - break; - case ENCODER_IMMREGREG2: - imm = &instr->arg1->imm; - reg = regbits(instr->arg3->kind); - rm = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - assembleabsimm(imm); - break; - case ENCODER_IMMMEMREG: - imm = &instr->arg1->imm; - memarg = &instr->arg2->memarg; - reg = regbits(instr->arg3->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes); - assembleabsimm(imm); - break; - default: - unreachable(); - } -} - -static void assemble(void) { - Symbol *sym; - AsmLine *l; - const Parsev *v; - - cursection = text; - curlineno = 0; - for (l = allasm; l; l = l->next) { - curlineno++; - v = l->v; - switch (v->kind) { - case ASM_SYNTAX_ERROR: - lfatal("syntax error"); - break; - case ASM_BLANK: - break; - case ASM_DIR_GLOBL: - sym = getsym(v->globl.name); - sym->global = 1; - break; - case ASM_DIR_SECTION: { - const char *fp; - Section *s; - - s = getsection(v->section.name); - s->hdr.sh_type = v->section.type; - fp = v->section.flags; - while (fp && *fp) { - switch (*(fp++)) { - case 'a': - s->hdr.sh_flags |= SHF_ALLOC; - break; - case 'w': - s->hdr.sh_flags |= SHF_WRITE; - break; - case 'x': - s->hdr.sh_flags |= SHF_EXECINSTR; - break; - default: - unreachable(); +} + +static void +assembleabsimm(const Imm* imm) +{ + if (imm->nbytes == 1) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8); + else if (imm->nbytes == 2) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16); + else if (imm->nbytes == 4) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); + else if (imm->nbytes == 8) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64); + else + unreachable(); +} + +static void +assembleinstr(const Instr* instr) +{ + Rex rex; + const Memarg* memarg; + const Imm* imm; + uint8_t reg, rm; + + switch (instr->encoder) { + case ENCODER_OP: + assemblevbytes(instr->opcode); + break; + case ENCODER_OPREG: + rm = regbits(instr->arg1->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, instr->fixedreg, rm)); + break; + case ENCODER_OPMEM: + memarg = &instr->arg1->memarg; + rex = instr->rex; + assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0); + break; + case ENCODER_R: + reg = regbits(instr->arg1->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind); + rex.b = !!(reg & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode | (reg & 7)); + break; + case ENCODER_RIMM: + imm = &instr->arg1->imm; + reg = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg2->kind); + rex.b = !!(reg & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode | (reg & 7)); + assembleabsimm(imm); + break; + case ENCODER_IMM: + imm = &instr->arg1->imm; + rex = instr->rex; + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + assembleabsimm(imm); + break; + case ENCODER_IMMREG: + imm = &instr->arg1->imm; + reg = instr->fixedreg; + rm = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg2->kind); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + assembleabsimm(imm); + break; + case ENCODER_IMMMEM: + imm = &instr->arg1->imm; + memarg = &instr->arg2->memarg; + reg = instr->fixedreg; + rex = instr->rex; + assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, imm->nbytes); + assembleabsimm(imm); + break; + case ENCODER_REGMEM: + case ENCODER_MEMREG: + if (instr->encoder == ENCODER_MEMREG) { + memarg = &instr->arg1->memarg; + reg = regbits(instr->arg2->kind); + } else { + memarg = &instr->arg2->memarg; + reg = regbits(instr->arg1->kind); } - } - cursection = s; - break; - } - case ASM_DIR_DATA: - cursection = data; - break; - case ASM_DIR_TEXT: - cursection = text; - break; - case ASM_DIR_ASCII: - sbn(v->ascii.data, v->ascii.len); - break; - case ASM_DIR_ASCIIZ: - sbn(v->asciiz.data, v->asciiz.len); - sb(0x00); - break; - case ASM_DIR_BALIGN: { - int64_t offset, i, rem, amnt; - amnt = 0; - offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size; - rem = offset % v->balign.align; - if (rem) - amnt = v->balign.align - rem; - for (i = 0; i < amnt; i++) { - sb(0x00); - } - break; - } - case ASM_DIR_FILL: { - ssize_t i = 0; - - for (i = 0; i < v->fill.repeat; i++) { - switch (v->fill.size) { - case 1: - case 2: - case 4: - case 8: - assembleconstant(v->fill.value, v->fill.size); - break; - default: - lfatal("unsupported fill size '%d'", v->fill.size); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0); + break; + case ENCODER_REGREG: + case ENCODER_REGREG2: + if (instr->encoder == ENCODER_REGREG) { + reg = regbits(instr->arg1->kind); + rm = regbits(instr->arg2->kind); + } else { + reg = regbits(instr->arg2->kind); + rm = regbits(instr->arg1->kind); } - } - break; - } - case ASM_DIR_BYTE: - assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32); - break; - case ASM_DIR_SHORT: - assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32); - break; - case ASM_DIR_INT: - assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32); - break; - case ASM_DIR_QUAD: - assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64); - break; - case ASM_LABEL: - sym = getsym(v->label.name); - sym->section = cursection; - sym->offset = cursection->hdr.sh_size; - if (sym->defined) - lfatal("%s already defined", sym->name); - sym->defined = 1; - break; - case ASM_INSTR: - assembleinstr(&v->instr); - break; - case ASM_CALL: - assemblecall(&v->call); - break; - case ASM_JMP: - assemblejmp(&v->jmp); - break; + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + break; + case ENCODER_IMMREGREG2: + imm = &instr->arg1->imm; + reg = regbits(instr->arg3->kind); + rm = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + assembleabsimm(imm); + break; + case ENCODER_IMMMEMREG: + imm = &instr->arg1->imm; + memarg = &instr->arg2->memarg; + reg = regbits(instr->arg3->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes); + assembleabsimm(imm); + break; default: - lfatal("assemble: unexpected kind: %d", v->kind); + unreachable(); + } +} + +static void +assemble(void) +{ + Symbol* sym; + AsmLine* l; + const Parsev* v; + + cursection = text; + curlineno = 0; + for (l = allasm; l; l = l->next) { + curlineno++; + v = l->v; + switch (v->kind) { + case ASM_SYNTAX_ERROR: + lfatal("syntax error"); + break; + case ASM_BLANK: + break; + case ASM_DIR_GLOBL: + sym = getsym(v->globl.name); + sym->global = 1; + break; + case ASM_DIR_SECTION: { + const char* fp; + Section* s; + + s = getsection(v->section.name); + s->hdr.sh_type = v->section.type; + fp = v->section.flags; + while (fp && *fp) { + switch (*(fp++)) { + case 'a': + s->hdr.sh_flags |= SHF_ALLOC; + break; + case 'w': + s->hdr.sh_flags |= SHF_WRITE; + break; + case 'x': + s->hdr.sh_flags |= SHF_EXECINSTR; + break; + default: + unreachable(); + } + } + cursection = s; + break; + } + case ASM_DIR_DATA: + cursection = data; + break; + case ASM_DIR_TEXT: + cursection = text; + break; + case ASM_DIR_ASCII: + sbn(v->ascii.data, v->ascii.len); + break; + case ASM_DIR_ASCIIZ: + sbn(v->asciiz.data, v->asciiz.len); + sb(0x00); + break; + case ASM_DIR_BALIGN: { + int64_t offset, i, rem, amnt; + amnt = 0; + offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size; + rem = offset % v->balign.align; + if (rem) + amnt = v->balign.align - rem; + for (i = 0; i < amnt; i++) { + sb(0x00); + } + break; + } + case ASM_DIR_FILL: { + ssize_t i = 0; + + for (i = 0; i < v->fill.repeat; i++) { + switch (v->fill.size) { + case 1: + case 2: + case 4: + case 8: + assembleconstant(v->fill.value, v->fill.size); + break; + default: + lfatal("unsupported fill size '%d'", v->fill.size); + } + } + break; + } + case ASM_DIR_BYTE: + assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32); + break; + case ASM_DIR_SHORT: + assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32); + break; + case ASM_DIR_INT: + assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32); + break; + case ASM_DIR_QUAD: + assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64); + break; + case ASM_LABEL: + sym = getsym(v->label.name); + sym->section = cursection; + sym->offset = cursection->hdr.sh_size; + if (sym->defined) + lfatal("%s already defined", sym->name); + sym->defined = 1; + break; + case ASM_INSTR: + assembleinstr(&v->instr); + break; + case ASM_CALL: + assemblecall(&v->call); + break; + case ASM_JMP: + assemblejmp(&v->jmp); + break; + default: + lfatal("assemble: unexpected kind: %d", v->kind); + } } - } } /* Reset while remembering symbol offsets so we can size jumps. */ -static void relaxreset(void) { - Symbol *sym; - Section *sec; - size_t i; - - /* Reset relocations and section data but retain capacity. */ - nrelocs = 0; - - for (i = 0; i < nsections; i++) { - sec = §ions[i]; - if (sec == shstrtab) - continue; - sec->hdr.sh_size = 0; - } - - /* Reset symbols, saving the worst case offset for the second pass. */ - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - *sym = (Symbol){ - .name = sym->name, .section = sym->section, .wco = sym->offset}; - } -} - -static void addtosymtab(Symbol *sym) { - Elf64_Sym elfsym; - int stype; - int sbind; - - stype = 0; - if (sym->defined) { - sbind = sym->global ? STB_GLOBAL : STB_LOCAL; - } else { - sbind = STB_GLOBAL; - } - - sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize; - - elfsym.st_name = elfstr(strtab, sym->name); - elfsym.st_value = sym->offset; - elfsym.st_size = sym->size; - elfsym.st_info = ELF64_ST_INFO(sbind, stype); - elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF; - elfsym.st_other = 0; - secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); -} - -static void fillsymtab(void) { - Symbol *sym; - size_t i; - - // Local symbols - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - if (!sym->defined || sym->global) - continue; - addtosymtab(sym); - } - - // Global symbols - - // Set start of global symbols. - symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize; - - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - - if (sym->defined && !sym->global) - continue; - addtosymtab(sym); - } -} - -static int resolvereloc(Relocation *reloc) { - Symbol *sym; - uint8_t *rdata; - int64_t value; - - sym = reloc->sym; - - if (sym->section != reloc->section) - return 0; +static void +relaxreset(void) +{ + Symbol* sym; + Section* sec; + size_t i; + + /* Reset relocations and section data but retain capacity. */ + nrelocs = 0; + + for (i = 0; i < nsections; i++) { + sec = §ions[i]; + if (sec == shstrtab) + continue; + sec->hdr.sh_size = 0; + } - switch (reloc->type) { - case R_X86_64_32: - case R_X86_64_64: - return 0; - case R_X86_64_PC8: - rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; - rdata[0] = ((uint8_t)value & 0xff); - return 1; - case R_X86_64_PC32: - rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; - rdata[0] = ((uint32_t)value & 0xff); - rdata[1] = ((uint32_t)value & 0xff00) >> 8; - rdata[2] = ((uint32_t)value & 0xff0000) >> 16; - rdata[3] = ((uint32_t)value & 0xff000000) >> 24; - return 1; - default: - unreachable(); - return 0; - } -} - -static void appendreloc(Relocation *reloc) { - Symbol *sym; - Section *relsection; - Elf64_Rela elfrel; - - memset(&elfrel, 0, sizeof(elfrel)); - - sym = reloc->sym; - if (reloc->section == text) - relsection = textrel; - else if (reloc->section == data) - relsection = datarel; - else { - fatal("unexpected relocation for symbol '%s'", sym->name); - return; - } - - switch (reloc->type) { - case R_X86_64_PC32: - case R_X86_64_32: - case R_X86_64_64: - elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type); - elfrel.r_offset = reloc->offset; - elfrel.r_addend = reloc->addend; - break; - default: - unreachable(); - } - - secaddbytes(relsection, &elfrel, sizeof(elfrel)); -} - -static void handlerelocs(void) { - Relocation *reloc; - size_t i; - for (i = 0; i < nrelocs; i++) { - reloc = &relocs[i]; - if (resolvereloc(reloc)) - continue; - appendreloc(reloc); - } -} - -static void out(const void *buf, size_t n) { - fwrite(buf, 1, n, stdout); - if (ferror(stdout)) - fatal("fwrite:"); -} - -static void outelf(void) { - size_t i; - uint64_t offset; - Elf64_Ehdr ehdr; - - memset(&ehdr, 0, sizeof(ehdr)); - ehdr.e_ident[0] = 0x7f; - ehdr.e_ident[1] = 'E'; - ehdr.e_ident[2] = 'L'; - ehdr.e_ident[3] = 'F'; - ehdr.e_ident[4] = ELFCLASS64; - ehdr.e_ident[5] = ELFDATA2LSB; - ehdr.e_ident[6] = 1; - ehdr.e_type = ET_REL; - ehdr.e_machine = EM_X86_64; - ehdr.e_flags = 0; - ehdr.e_version = 1; - ehdr.e_ehsize = sizeof(Elf64_Ehdr); - ehdr.e_shoff = sizeof(Elf64_Ehdr); - ehdr.e_shentsize = sizeof(Elf64_Shdr); - ehdr.e_shnum = nsections; - ehdr.e_shstrndx = 1; - - out(&ehdr, sizeof(ehdr)); - offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; - - for (i = 0; i < nsections; i++) { - sections[i].hdr.sh_offset = offset; - out(§ions[i].hdr, sizeof(Elf64_Shdr)); - offset += sections[i].hdr.sh_size; - } - for (i = 0; i < nsections; i++) { - if (sections[i].hdr.sh_type == SHT_NOBITS) - continue; - out(sections[i].data, sections[i].hdr.sh_size); - } - if (fflush(stdout) != 0) - fatal("fflush:"); -} - -static void usage(char *argv0) { - fprintf(stderr, "minias - a mini x86-64 assembler.\n\n"); - fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0); - fprintf(stderr, "\n"); - fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n"); - fprintf(stderr, " -o out Output file to write (default stdout).\n"); - exit(2); -} - -static void parseargs(int argc, char *argv[]) { - char *a, *argv0, *outfname; - - argv0 = argv[0]; - - for (++argv; *argv; argv++) { - if (argv[0][0] != '-') - break; - for (a = &argv[0][1]; *a; a++) { - switch (*a) { - case '-': - case 'h': - usage(argv0); - break; - case 'r': - nrelax = atoi(*++argv); - break; - case 'o': - if (argv[1] == NULL) - usage(argv0); - outfname = *++argv; - if (!freopen(outfname, "w", stdout)) - fatal("unable to open %s:", outfname); + /* Reset symbols, saving the worst case offset for the second pass. */ + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + sym = symbols->vals[i]; + *sym = (Symbol){ .name = sym->name, .section = sym->section, .wco = sym->offset }; + } +} + +static void +addtosymtab(Symbol* sym) +{ + Elf64_Sym elfsym; + int stype; + int sbind; + + stype = 0; + if (sym->defined) { + sbind = sym->global ? STB_GLOBAL : STB_LOCAL; + } else { + sbind = STB_GLOBAL; + } + + sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize; + + elfsym.st_name = elfstr(strtab, sym->name); + elfsym.st_value = sym->offset; + elfsym.st_size = sym->size; + elfsym.st_info = ELF64_ST_INFO(sbind, stype); + elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF; + elfsym.st_other = 0; + secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); +} + +static void +fillsymtab(void) +{ + Symbol* sym; + size_t i; + + // Local symbols + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + sym = symbols->vals[i]; + if (!sym->defined || sym->global) + continue; + addtosymtab(sym); + } + + // Global symbols + + // Set start of global symbols. + symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize; + + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + + sym = symbols->vals[i]; + if (sym->defined && !sym->global) + continue; + addtosymtab(sym); + } +} + +static int +resolvereloc(Relocation* reloc) +{ + Symbol* sym; + uint8_t* rdata; + int64_t value; + + sym = reloc->sym; + + if (sym->section != reloc->section) + return 0; + + switch (reloc->type) { + case R_X86_64_32: + case R_X86_64_64: + return 0; + case R_X86_64_PC8: + rdata = &reloc->section->data[reloc->offset]; + value = sym->offset - reloc->offset + reloc->addend; + rdata[0] = ((uint8_t)value & 0xff); + return 1; + case R_X86_64_PC32: + rdata = &reloc->section->data[reloc->offset]; + value = sym->offset - reloc->offset + reloc->addend; + rdata[0] = ((uint32_t)value & 0xff); + rdata[1] = ((uint32_t)value & 0xff00) >> 8; + rdata[2] = ((uint32_t)value & 0xff0000) >> 16; + rdata[3] = ((uint32_t)value & 0xff000000) >> 24; + return 1; + default: + unreachable(); + return 0; + } +} + +static void +appendreloc(Relocation* reloc) +{ + Symbol* sym; + Section* relsection; + Elf64_Rela elfrel; + + memset(&elfrel, 0, sizeof(elfrel)); + + sym = reloc->sym; + if (reloc->section == text) + relsection = textrel; + else if (reloc->section == data) + relsection = datarel; + else { + fatal("unexpected relocation for symbol '%s'", sym->name); + return; + } + + switch (reloc->type) { + case R_X86_64_PC32: + case R_X86_64_32: + case R_X86_64_64: + elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type); + elfrel.r_offset = reloc->offset; + elfrel.r_addend = reloc->addend; break; - default: - usage(argv0); - } - } - } - - if (argv[0]) { - if (argv[1]) - usage(argv0); - infilename = argv[0]; - if (!freopen(infilename, "r", stdin)) - fatal("unable to open %s:", infilename); - } -} - -int main(int argc, char *argv[]) { - symbols = mkhtab(256); - parseargs(argc, argv); - allasm = parseasm(); - initsections(); - assemble(); - while (nrelax-- > 0) { - relaxreset(); + default: + unreachable(); + } + + secaddbytes(relsection, &elfrel, sizeof(elfrel)); +} + +static void +handlerelocs(void) +{ + Relocation* reloc; + size_t i; + for (i = 0; i < nrelocs; i++) { + reloc = &relocs[i]; + if (resolvereloc(reloc)) + continue; + appendreloc(reloc); + } +} + +static void +out(const void* buf, size_t n) +{ + fwrite(buf, 1, n, stdout); + if (ferror(stdout)) + fatal("fwrite:"); +} + +static void +outelf(void) +{ + size_t i; + uint64_t offset; + Elf64_Ehdr ehdr; + + memset(&ehdr, 0, sizeof(ehdr)); + ehdr.e_ident[0] = 0x7f; + ehdr.e_ident[1] = 'E'; + ehdr.e_ident[2] = 'L'; + ehdr.e_ident[3] = 'F'; + ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[5] = ELFDATA2LSB; + ehdr.e_ident[6] = 1; + ehdr.e_type = ET_REL; + ehdr.e_machine = EM_X86_64; + ehdr.e_flags = 0; + ehdr.e_version = 1; + ehdr.e_ehsize = sizeof(Elf64_Ehdr); + ehdr.e_shoff = sizeof(Elf64_Ehdr); + ehdr.e_shentsize = sizeof(Elf64_Shdr); + ehdr.e_shnum = nsections; + ehdr.e_shstrndx = 1; + + out(&ehdr, sizeof(ehdr)); + offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; + + for (i = 0; i < nsections; i++) { + sections[i].hdr.sh_offset = offset; + out(§ions[i].hdr, sizeof(Elf64_Shdr)); + offset += sections[i].hdr.sh_size; + } + for (i = 0; i < nsections; i++) { + if (sections[i].hdr.sh_type == SHT_NOBITS) + continue; + out(sections[i].data, sections[i].hdr.sh_size); + } + if (fflush(stdout) != 0) + fatal("fflush:"); +} + +static void +usage(char* argv0) +{ + fprintf(stderr, "minias - a mini x86-64 assembler.\n\n"); + fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0); + fprintf(stderr, "\n"); + fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n"); + fprintf(stderr, " -o out Output file to write (default stdout).\n"); + exit(2); +} + +static void +parseargs(int argc, char* argv[]) +{ + char *a, *argv0, *outfname; + + argv0 = argv[0]; + + for (++argv; *argv; argv++) { + if (argv[0][0] != '-') + break; + a = &argv[0][1]; + switch (*a) { + case '-': + case 'h': + usage(argv0); + break; + case 'r': + nrelax = atoi(*++argv); + break; + case 'o': + if (argv[1] == NULL) + usage(argv0); + outfname = *++argv; + if (!freopen(outfname, "w", stdout)) + fatal("unable to open %s:", outfname); + break; + default: + usage(argv0); + } + } + + if (argv[0]) { + if (argv[1]) + usage(argv0); + infilename = argv[0]; + if (!freopen(infilename, "r", stdin)) + fatal("unable to open %s:", infilename); + } +} + +int +main(int argc, char* argv[]) +{ + symbols = mkhtab(256); + parseargs(argc, argv); + allasm = parseasm(); + initsections(); assemble(); - } - fillsymtab(); - handlerelocs(); - outelf(); - return 0; + while (nrelax-- > 0) { + relaxreset(); + assemble(); + } + fillsymtab(); + handlerelocs(); + outelf(); + return 0; }
\ No newline at end of file |
