aboutsummaryrefslogtreecommitdiff
path: root/main.c
diff options
context:
space:
mode:
Diffstat (limited to 'main.c')
-rw-r--r--main.c1916
1 files changed, 1000 insertions, 916 deletions
diff --git a/main.c b/main.c
index 255b9a0..f9913f8 100644
--- a/main.c
+++ b/main.c
@@ -1,16 +1,16 @@
#include "minias.h"
/* Parsed assembly */
-static AsmLine *allasm = NULL;
+static AsmLine* allasm = NULL;
/* Number of assembly relaxation passes. */
static int nrelax = 1;
/* Symbols before writing to symtab section. */
-static struct hashtable *symbols = NULL;
+static struct hashtable* symbols = NULL;
/* Array of all relocations before adding to the rel section. */
-static Relocation *relocs = NULL;
+static Relocation* relocs = NULL;
static size_t nrelocs = 0;
static size_t reloccap = 0;
@@ -18,232 +18,279 @@ static size_t reloccap = 0;
static Section sections[MAXSECTIONS];
static size_t nsections = 1; // first is reserved.
-static Section *cursection = NULL;
-static Section *shstrtab = NULL;
-static Section *strtab = NULL;
-static Section *symtab = NULL;
-static Section *bss = NULL;
-static Section *text = NULL;
-static Section *data = NULL;
-static Section *textrel = NULL;
-static Section *datarel = NULL;
-
-static char *infilename = "<stdin>";
+static Section* cursection = NULL;
+static Section* shstrtab = NULL;
+static Section* strtab = NULL;
+static Section* symtab = NULL;
+static Section* bss = NULL;
+static Section* text = NULL;
+static Section* data = NULL;
+static Section* textrel = NULL;
+static Section* datarel = NULL;
+
+static char* infilename = "<stdin>";
static size_t curlineno = 0;
-static void lfatal(const char *fmt, ...) {
- va_list ap;
- fprintf(stderr, "%s:%ld: ", infilename, curlineno);
- va_start(ap, fmt);
- vwarn(fmt, ap);
- va_end(ap);
- exit(1);
-}
-
-static Symbol *getsym(const char *name) {
- Symbol **ps, *s;
- struct hashtablekey htk;
-
- htabkey(&htk, name, strlen(name));
- ps = (Symbol **)htabput(symbols, &htk);
- if (!*ps) {
- *ps = xmalloc(sizeof(Symbol));
- **ps = (Symbol){
- .name = name,
- .wco = -1,
- };
- }
- s = *ps;
- return s;
+static void
+lfatal(const char* fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s:%ld: ", infilename, curlineno);
+ va_start(ap, fmt);
+ vwarn(fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+static Symbol*
+getsym(const char* name)
+{
+ Symbol **ps, *s;
+ struct hashtablekey htk;
+
+ htabkey(&htk, name, strlen(name));
+ ps = (Symbol**)htabput(symbols, &htk);
+ if (!*ps) {
+ *ps = xmalloc(sizeof(Symbol));
+ **ps = (Symbol){
+ .name = name,
+ .wco = -1,
+ };
+ }
+ s = *ps;
+ return s;
}
-static void secaddbytes(Section *s, const void *bytes, size_t n) {
+static void
+secaddbytes(Section* s, const void* bytes, size_t n)
+{
+
+ if (s->hdr.sh_type == SHT_NOBITS) {
+ s->hdr.sh_size += n;
+ return;
+ }
+
+ while (s->capacity < s->hdr.sh_size + n) {
+ s->capacity = s->capacity ? (s->capacity * 2) : 512;
+ s->data = xrealloc(s->data, s->capacity);
+ }
+ memcpy(s->data + s->hdr.sh_size, bytes, n);
- if (s->hdr.sh_type == SHT_NOBITS) {
s->hdr.sh_size += n;
- return;
- }
-
- while (s->capacity < s->hdr.sh_size + n) {
- s->capacity = s->capacity ? (s->capacity * 2) : 512;
- s->data = xrealloc(s->data, s->capacity);
- }
- memcpy(s->data + s->hdr.sh_size, bytes, n);
-
- s->hdr.sh_size += n;
-}
-
-static void secaddbyte(Section *s, uint8_t b) { secaddbytes(s, &b, 1); }
-
-static Elf64_Word elfstr(Section *sec, const char *s) {
- Elf64_Word i = sec->hdr.sh_size;
- secaddbytes(sec, s, strlen(s) + 1);
- return i;
-}
-
-static Section *newsection() {
- Section *s;
- if (nsections >= MAXSECTIONS)
- fatal("too many sections");
- s = &sections[nsections];
- s->idx = nsections;
- nsections += 1;
- return s;
-}
-
-static Section *getsection(const char *name) {
- size_t i;
- char *secname;
- Section *s;
-
- for (i = 0; i < nsections; i++) {
- secname = (char *)shstrtab->data + sections[i].hdr.sh_name;
- if (strcmp(secname, name) == 0)
- return &sections[i];
- }
- s = newsection();
- s->hdr.sh_name = elfstr(shstrtab, name);
- return s;
-}
-
-static void initsections(void) {
- Elf64_Sym elfsym;
-
- shstrtab = newsection();
- secaddbyte(shstrtab, 0);
- shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab");
- shstrtab->hdr.sh_type = SHT_STRTAB;
-
- strtab = newsection();
- secaddbyte(strtab, 0);
- strtab->hdr.sh_name = elfstr(shstrtab, ".strtab");
- strtab->hdr.sh_type = SHT_STRTAB;
-
- symtab = newsection();
- symtab->hdr.sh_name = elfstr(shstrtab, ".symtab");
- symtab->hdr.sh_type = SHT_SYMTAB;
- symtab->hdr.sh_link = strtab->idx;
- symtab->hdr.sh_entsize = sizeof(Elf64_Sym);
- memset(&elfsym, 0, sizeof(elfsym));
- secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
-
- bss = newsection();
- bss->hdr.sh_name = elfstr(shstrtab, ".bss");
- bss->hdr.sh_type = SHT_NOBITS;
- bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
- bss->hdr.sh_addralign = 16; // XXX right value?
-
- data = newsection();
- data->hdr.sh_name = elfstr(shstrtab, ".data");
- data->hdr.sh_type = SHT_PROGBITS;
- data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
- data->hdr.sh_addralign = 16; // XXX right value?
-
- text = newsection();
- text->hdr.sh_name = elfstr(shstrtab, ".text");
- text->hdr.sh_type = SHT_PROGBITS;
- text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC;
- text->hdr.sh_addralign = 4;
-
- textrel = newsection();
- textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text");
- textrel->hdr.sh_type = SHT_RELA;
- textrel->hdr.sh_info = text->idx;
- textrel->hdr.sh_link = symtab->idx;
- textrel->hdr.sh_entsize = sizeof(Elf64_Rela);
-
- datarel = newsection();
- datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data");
- datarel->hdr.sh_type = SHT_RELA;
- datarel->hdr.sh_info = data->idx;
- datarel->hdr.sh_link = symtab->idx;
- datarel->hdr.sh_entsize = sizeof(Elf64_Rela);
-}
-
-static Relocation *newreloc() {
- if (nrelocs == reloccap) {
- reloccap = nrelocs ? nrelocs * 2 : 64;
- relocs = xreallocarray(relocs, reloccap, sizeof(Relocation));
- }
- return &relocs[nrelocs++];
+}
+
+static void
+secaddbyte(Section* s, uint8_t b)
+{
+ secaddbytes(s, &b, 1);
+}
+
+static Elf64_Word
+elfstr(Section* sec, const char* s)
+{
+ Elf64_Word i = sec->hdr.sh_size;
+ secaddbytes(sec, s, strlen(s) + 1);
+ return i;
+}
+
+static Section*
+newsection()
+{
+ Section* s;
+ if (nsections >= MAXSECTIONS)
+ fatal("too many sections");
+ s = &sections[nsections];
+ s->idx = nsections;
+ nsections += 1;
+ return s;
+}
+
+static Section*
+getsection(const char* name)
+{
+ size_t i;
+ char* secname;
+ Section* s;
+
+ for (i = 0; i < nsections; i++) {
+ secname = (char*)shstrtab->data + sections[i].hdr.sh_name;
+ if (strcmp(secname, name) == 0)
+ return &sections[i];
+ }
+ s = newsection();
+ s->hdr.sh_name = elfstr(shstrtab, name);
+ return s;
+}
+
+static void
+initsections(void)
+{
+ Elf64_Sym elfsym;
+
+ shstrtab = newsection();
+ secaddbyte(shstrtab, 0);
+ shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab");
+ shstrtab->hdr.sh_type = SHT_STRTAB;
+
+ strtab = newsection();
+ secaddbyte(strtab, 0);
+ strtab->hdr.sh_name = elfstr(shstrtab, ".strtab");
+ strtab->hdr.sh_type = SHT_STRTAB;
+
+ symtab = newsection();
+ symtab->hdr.sh_name = elfstr(shstrtab, ".symtab");
+ symtab->hdr.sh_type = SHT_SYMTAB;
+ symtab->hdr.sh_link = strtab->idx;
+ symtab->hdr.sh_entsize = sizeof(Elf64_Sym);
+ memset(&elfsym, 0, sizeof(elfsym));
+ secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
+
+ bss = newsection();
+ bss->hdr.sh_name = elfstr(shstrtab, ".bss");
+ bss->hdr.sh_type = SHT_NOBITS;
+ bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
+ bss->hdr.sh_addralign = 16; // XXX right value?
+
+ data = newsection();
+ data->hdr.sh_name = elfstr(shstrtab, ".data");
+ data->hdr.sh_type = SHT_PROGBITS;
+ data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
+ data->hdr.sh_addralign = 16; // XXX right value?
+
+ text = newsection();
+ text->hdr.sh_name = elfstr(shstrtab, ".text");
+ text->hdr.sh_type = SHT_PROGBITS;
+ text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ text->hdr.sh_addralign = 4;
+
+ textrel = newsection();
+ textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text");
+ textrel->hdr.sh_type = SHT_RELA;
+ textrel->hdr.sh_info = text->idx;
+ textrel->hdr.sh_link = symtab->idx;
+ textrel->hdr.sh_entsize = sizeof(Elf64_Rela);
+
+ datarel = newsection();
+ datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data");
+ datarel->hdr.sh_type = SHT_RELA;
+ datarel->hdr.sh_info = data->idx;
+ datarel->hdr.sh_link = symtab->idx;
+ datarel->hdr.sh_entsize = sizeof(Elf64_Rela);
+}
+
+static Relocation*
+newreloc()
+{
+ if (nrelocs == reloccap) {
+ reloccap = nrelocs ? nrelocs * 2 : 64;
+ relocs = xreallocarray(relocs, reloccap, sizeof(Relocation));
+ }
+ return &relocs[nrelocs++];
}
/* Shorthand helpers to write section data. */
-static void sb(uint8_t b) { secaddbyte(cursection, b); }
+static void
+sb(uint8_t b)
+{
+ secaddbyte(cursection, b);
+}
-static void sbn(uint8_t *bytes, size_t n) { secaddbytes(cursection, bytes, n); }
+static void
+sbn(uint8_t* bytes, size_t n)
+{
+ secaddbytes(cursection, bytes, n);
+}
-static void su16(uint16_t w) {
- uint8_t buf[2] = {w & 0xff, (w & 0xff00) >> 8};
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su16(uint16_t w)
+{
+ uint8_t buf[2] = { w & 0xff, (w & 0xff00) >> 8 };
+ secaddbytes(cursection, buf, sizeof(buf));
}
-static void su32(uint32_t l) {
- uint8_t buf[4] = {
- l & 0xff,
- (l & 0xff00) >> 8,
- (l & 0xff0000) >> 16,
- (l & 0xff000000) >> 24,
- };
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su32(uint32_t l)
+{
+ uint8_t buf[4] = {
+ l & 0xff,
+ (l & 0xff00) >> 8,
+ (l & 0xff0000) >> 16,
+ (l & 0xff000000) >> 24,
+ };
+ secaddbytes(cursection, buf, sizeof(buf));
}
-static void su64(uint64_t l) {
- uint8_t buf[8] = {
- l & 0xff,
- (l & 0xff00) >> 8,
- (l & 0xff0000) >> 16,
- (l & 0xff000000) >> 24,
- (l & 0xff00000000) >> 32,
- (l & 0xff0000000000) >> 40,
- (l & 0xff000000000000) >> 48,
- (l & 0xff00000000000000) >> 56,
- };
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su64(uint64_t l)
+{
+ uint8_t buf[8] = {
+ l & 0xff,
+ (l & 0xff00) >> 8,
+ (l & 0xff0000) >> 16,
+ (l & 0xff000000) >> 24,
+ (l & 0xff00000000) >> 32,
+ (l & 0xff0000000000) >> 40,
+ (l & 0xff000000000000) >> 48,
+ (l & 0xff00000000000000) >> 56,
+ };
+ secaddbytes(cursection, buf, sizeof(buf));
}
/* Convert an AsmKind to register bits in reg/rm style. */
-static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; }
+static uint8_t
+regbits(AsmKind k)
+{
+ return (k - (ASM_REG_BEGIN + 1)) % 16;
+}
/* Register that requires the use of a rex prefix. */
-static uint8_t isrexreg(AsmKind k) {
- return k > ASM_REG_BEGIN && k < ASM_REG_END &&
- (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL ||
- k == ASM_SIL || k == ASM_DIL);
+static uint8_t
+isrexreg(AsmKind k)
+{
+ return k > ASM_REG_BEGIN && k < ASM_REG_END
+ && (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || k == ASM_SIL || k == ASM_DIL);
}
-static uint8_t rexbyte(Rex rex) {
- return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b);
+static uint8_t
+rexbyte(Rex rex)
+{
+ return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b);
}
/* Compose a mod/reg/rm byte - See intel manual. */
-static uint8_t modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) {
- return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
+static uint8_t
+modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm)
+{
+ return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
}
/* Compose an sib byte - See intel manual. */
-static uint8_t sibbyte(uint8_t ss, uint8_t idx, uint8_t base) {
- return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7));
-}
-
-void assembleconstant(int64_t c, int nbytes) {
- switch (nbytes) {
- case 1:
- sb((uint8_t)c);
- break;
- case 2:
- su16((uint16_t)c);
- break;
- case 4:
- su32((uint32_t)c);
- break;
- case 8:
- su64((uint64_t)c);
- break;
- default:
- unreachable();
- }
+static uint8_t
+sibbyte(uint8_t ss, uint8_t idx, uint8_t base)
+{
+ return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7));
+}
+
+void
+assembleconstant(int64_t c, int nbytes)
+{
+ switch (nbytes) {
+ case 1:
+ sb((uint8_t)c);
+ break;
+ case 2:
+ su16((uint16_t)c);
+ break;
+ case 4:
+ su32((uint32_t)c);
+ break;
+ case 8:
+ su64((uint64_t)c);
+ break;
+ default:
+ unreachable();
+ }
}
/* The VarBytes type encodes a variadic number of bytes.
@@ -256,766 +303,803 @@ void assembleconstant(int64_t c, int nbytes) {
*/
typedef int32_t VarBytes;
-static void assemblevbytes(VarBytes bytes) {
- int i, n;
- uint8_t b, shift;
-
- n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24);
- for (i = n; i >= 0; i--) {
- shift = i * 8;
- b = (bytes & (0xff << shift)) >> shift;
- sb(b);
- }
+static void
+assemblevbytes(VarBytes bytes)
+{
+ int i, n;
+ uint8_t b, shift;
+
+ n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24);
+ for (i = n; i >= 0; i--) {
+ shift = i * 8;
+ b = (bytes & (0xff << shift)) >> shift;
+ sb(b);
+ }
}
-static void assemblerex(Rex rex) {
- if (rex.required || rex.w || rex.r || rex.x || rex.b)
- sb(rexbyte(rex));
+static void
+assemblerex(Rex rex)
+{
+ if (rex.required || rex.w || rex.r || rex.x || rex.b)
+ sb(rexbyte(rex));
}
/* Assemble a symbolic value. */
-static void assemblereloc(const char *l, int64_t c, int nbytes, int type) {
- Relocation *reloc;
- Symbol *sym;
-
- if (l != NULL) {
- reloc = newreloc();
- sym = getsym(l);
- reloc->type = type;
- reloc->section = cursection;
- reloc->sym = sym;
- reloc->offset = cursection->hdr.sh_size;
- reloc->addend = c;
- c = 0;
- }
- assembleconstant(c, nbytes);
+static void
+assemblereloc(const char* l, int64_t c, int nbytes, int type)
+{
+ Relocation* reloc;
+ Symbol* sym;
+
+ if (l != NULL) {
+ reloc = newreloc();
+ sym = getsym(l);
+ reloc->type = type;
+ reloc->section = cursection;
+ reloc->sym = sym;
+ reloc->offset = cursection->hdr.sh_size;
+ reloc->addend = c;
+ c = 0;
+ }
+ assembleconstant(c, nbytes);
}
/* Assemble a r <-> mem operation. */
-static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix,
- VarBytes opcode, uint8_t reg, int32_t nexti) {
+static void
+assemblemem(
+ const Memarg* memarg, Rex rex, VarBytes prefix, VarBytes opcode, uint8_t reg, int32_t nexti)
+{
+
+ uint8_t mod, rm, scale, index, base;
+
+ /* Rip relative addressing. */
+ if (memarg->base == ASM_RIP) {
+ rm = 0x05;
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(0x00, reg, rm));
+
+ if (memarg->disp.l) {
+ assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, R_X86_64_PC32);
+ } else {
+ assembleconstant(memarg->disp.c, 4);
+ }
+ return;
+ }
- uint8_t mod, rm, scale, index, base;
+ /* Direct memory access */
+ if (memarg->base == ASM_NO_REG) {
+ mod = 0;
+ rm = 4;
+
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(mod, reg, rm));
+
+ sb(sibbyte(0, 4, 5));
+ if (memarg->disp.l) {
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ } else {
+ assembleconstant(memarg->disp.c, 4);
+ }
+ return;
+ }
- /* Rip relative addressing. */
- if (memarg->base == ASM_RIP) {
- rm = 0x05;
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(0x00, reg, rm));
+ rm = regbits(memarg->base);
+ rex.b = !!(rm & (1 << 3));
- if (memarg->disp.l) {
- assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4,
- R_X86_64_PC32);
- } else {
- assembleconstant(memarg->disp.c, 4);
+ /* Case when we don't need sib */
+ if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) {
+
+ if (memarg->disp.l == 0 && memarg->disp.c == 0) {
+ if ((rm & 7) == 5) {
+ mod = 1;
+ } else {
+ mod = 0;
+ }
+ } else {
+ mod = 2;
+ }
+
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(mod, reg, rm));
+
+ if (mod == 1) {
+ assembleconstant(memarg->disp.c, 1);
+ } else if (mod == 2) {
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ }
+ return;
}
- return;
- }
- /* Direct memory access */
- if (memarg->base == ASM_NO_REG) {
- mod = 0;
+ /* Setup sib indexing. */
+ base = rm;
rm = 4;
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(mod, reg, rm));
+ if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) {
+ mod = 0; /* +0 */
+ } else {
+ if (memarg->disp.l == NULL && memarg->disp.c >= -128 && memarg->disp.c <= 127) {
+ mod = 1; /* +disp8 */
+ } else {
+ mod = 2; /* +disp32 */
+ }
+ }
- sb(sibbyte(0, 4, 5));
- if (memarg->disp.l) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ if (memarg->index == ASM_NO_REG) {
+ index = 4;
} else {
- assembleconstant(memarg->disp.c, 4);
+ if (memarg->index == ASM_RSP)
+ lfatal("rsp cannot be used as an index");
+ index = regbits(memarg->index);
}
- return;
- }
- rm = regbits(memarg->base);
- rex.b = !!(rm & (1 << 3));
+ /* If our base is a bp register, we must use the index instead. */
+ if ((base & 7) == 5 && memarg->index == ASM_NO_REG) {
+ index = base;
+ }
- /* Case when we don't need sib */
- if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) {
+ rex.x = !!(index & (1 << 3));
- if (memarg->disp.l == 0 && memarg->disp.c == 0) {
- if ((rm & 7) == 5) {
- mod = 1;
- } else {
- mod = 0;
- }
- } else {
- mod = 2;
+ switch (memarg->scale) {
+ case 0:
+ case 1:
+ scale = 0;
+ break;
+ case 2:
+ scale = 1;
+ break;
+ case 4:
+ scale = 2;
+ break;
+ case 8:
+ scale = 3;
+ break;
+ default:
+ lfatal("invalid addressing scale");
+ return;
}
assemblevbytes(prefix);
assemblerex(rex);
assemblevbytes(opcode);
sb(modregrmbyte(mod, reg, rm));
+ sb(sibbyte(scale, index, base));
if (mod == 1) {
- assembleconstant(memarg->disp.c, 1);
+ assembleconstant(memarg->disp.c, 1);
} else if (mod == 2) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
}
- return;
- }
-
- /* Setup sib indexing. */
- base = rm;
- rm = 4;
+}
- if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) {
- mod = 0; /* +0 */
- } else {
- if (memarg->disp.l == NULL && memarg->disp.c >= -128 &&
- memarg->disp.c <= 127) {
- mod = 1; /* +disp8 */
- } else {
- mod = 2; /* +disp32 */
- }
- }
-
- if (memarg->index == ASM_NO_REG) {
- index = 4;
- } else {
- if (memarg->index == ASM_RSP)
- lfatal("rsp cannot be used as an index");
- index = regbits(memarg->index);
- }
-
- /* If our base is a bp register, we must use the index instead. */
- if ((base & 7) == 5 && memarg->index == ASM_NO_REG) {
- index = base;
- }
-
- rex.x = !!(index & (1 << 3));
-
- switch (memarg->scale) {
- case 0:
- case 1:
- scale = 0;
- break;
- case 2:
- scale = 1;
- break;
- case 4:
- scale = 2;
- break;
- case 8:
- scale = 3;
- break;
- default:
- lfatal("invalid addressing scale");
- return;
- }
-
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(mod, reg, rm));
- sb(sibbyte(scale, index, base));
-
- if (mod == 1) {
- assembleconstant(memarg->disp.c, 1);
- } else if (mod == 2) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
- }
-}
-
-static void assemblecall(const Call *call) {
- Rex rex;
- uint8_t rm;
-
- if (call->indirect) {
- if (call->target.indirect->kind == ASM_MEMARG) {
- rex = (Rex){0};
- abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff,
- // 0x02);
- } else {
- rm = regbits(call->target.indirect->kind);
- rex = (Rex){.b = !!(rm & (1 << 3))};
- assemblerex(rex);
- assemblevbytes(0xff);
- sb(modregrmbyte(0x03, 0x02, rm));
- }
- } else {
- sb(0xe8);
- assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4,
- R_X86_64_PC32);
- }
-}
-
-static void assemblejmp(const Jmp *j) {
- int jmpsize;
- int64_t distance;
- Symbol *target;
-
- static uint8_t cc2op[31] = {
- 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81,
- 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e,
- 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
- };
-
- jmpsize = 4;
- target = getsym(j->target);
- if (cursection == target->section && (target->defined || target->wco != -1)) {
- if (target->defined) {
- distance = target->offset - cursection->hdr.sh_size;
+static void
+assemblecall(const Call* call)
+{
+ Rex rex;
+ uint8_t rm;
+
+ if (call->indirect) {
+ if (call->target.indirect->kind == ASM_MEMARG) {
+ rex = (Rex){ 0 };
+ abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff,
+ // 0x02);
+ } else {
+ rm = regbits(call->target.indirect->kind);
+ rex = (Rex){ .b = !!(rm & (1 << 3)) };
+ assemblerex(rex);
+ assemblevbytes(0xff);
+ sb(modregrmbyte(0x03, 0x02, rm));
+ }
} else {
- distance = target->wco - cursection->hdr.sh_size;
+ sb(0xe8);
+ assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, R_X86_64_PC32);
}
- if ((distance - 1) >= -128 && (distance - 1) <= 127) {
- jmpsize = 1;
- } else {
- jmpsize = 4;
- }
- }
-
- if (jmpsize == 4) {
- if (j->cc)
- sb(0x0f);
- sb(cc2op[j->cc]);
- assemblereloc(j->target, -4, 4, R_X86_64_PC32);
- } else {
- sb(cc2op[j->cc] + (j->cc ? -16 : 2));
- assemblereloc(j->target, -1, 1, R_X86_64_PC8);
- }
-}
-
-static void assembleabsimm(const Imm *imm) {
- if (imm->nbytes == 1)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8);
- else if (imm->nbytes == 2)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16);
- else if (imm->nbytes == 4)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
- else if (imm->nbytes == 8)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64);
- else
- unreachable();
-}
-
-static void assembleinstr(const Instr *instr) {
- Rex rex;
- const Memarg *memarg;
- const Imm *imm;
- uint8_t reg, rm;
-
- switch (instr->encoder) {
- case ENCODER_OP:
- assemblevbytes(instr->opcode);
- break;
- case ENCODER_OPREG:
- rm = regbits(instr->arg1->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind);
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, instr->fixedreg, rm));
- break;
- case ENCODER_OPMEM:
- memarg = &instr->arg1->memarg;
- rex = instr->rex;
- assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0);
- break;
- case ENCODER_R:
- reg = regbits(instr->arg1->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind);
- rex.b = !!(reg & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode | (reg & 7));
- break;
- case ENCODER_RIMM:
- imm = &instr->arg1->imm;
- reg = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg2->kind);
- rex.b = !!(reg & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode | (reg & 7));
- assembleabsimm(imm);
- break;
- case ENCODER_IMM:
- imm = &instr->arg1->imm;
- rex = instr->rex;
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- assembleabsimm(imm);
- break;
- case ENCODER_IMMREG:
- imm = &instr->arg1->imm;
- reg = instr->fixedreg;
- rm = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg2->kind);
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- assembleabsimm(imm);
- break;
- case ENCODER_IMMMEM:
- imm = &instr->arg1->imm;
- memarg = &instr->arg2->memarg;
- reg = instr->fixedreg;
- rex = instr->rex;
- assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg,
- imm->nbytes);
- assembleabsimm(imm);
- break;
- case ENCODER_REGMEM:
- case ENCODER_MEMREG:
- if (instr->encoder == ENCODER_MEMREG) {
- memarg = &instr->arg1->memarg;
- reg = regbits(instr->arg2->kind);
- } else {
- memarg = &instr->arg2->memarg;
- reg = regbits(instr->arg1->kind);
- }
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0);
- break;
- case ENCODER_REGREG:
- case ENCODER_REGREG2:
- if (instr->encoder == ENCODER_REGREG) {
- reg = regbits(instr->arg1->kind);
- rm = regbits(instr->arg2->kind);
+}
+
+static void
+assemblejmp(const Jmp* j)
+{
+ int jmpsize;
+ int64_t distance;
+ Symbol* target;
+
+ // clang-format off
+ static uint8_t cc2op[31] = {
+ 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85,
+ 0x89, 0x8b, 0x81, 0x8f, 0x8d, 0x8c, 0x8e, 0x85,
+ 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, 0x8c, 0x8d,
+ 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
+ };
+ // clang-format on
+
+ jmpsize = 4;
+ target = getsym(j->target);
+ if (cursection == target->section && (target->defined || target->wco != -1)) {
+ if (target->defined) {
+ distance = target->offset - cursection->hdr.sh_size;
+ } else {
+ distance = target->wco - cursection->hdr.sh_size;
+ }
+ if ((distance - 1) >= -128 && (distance - 1) <= 127) {
+ jmpsize = 1;
+ } else {
+ jmpsize = 4;
+ }
+ }
+
+ if (jmpsize == 4) {
+ if (j->cc)
+ sb(0x0f);
+ sb(cc2op[j->cc]);
+ assemblereloc(j->target, -4, 4, R_X86_64_PC32);
} else {
- reg = regbits(instr->arg2->kind);
- rm = regbits(instr->arg1->kind);
+ sb(cc2op[j->cc] + (j->cc ? -16 : 2));
+ assemblereloc(j->target, -1, 1, R_X86_64_PC8);
}
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- break;
- case ENCODER_IMMREGREG2:
- imm = &instr->arg1->imm;
- reg = regbits(instr->arg3->kind);
- rm = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- assembleabsimm(imm);
- break;
- case ENCODER_IMMMEMREG:
- imm = &instr->arg1->imm;
- memarg = &instr->arg2->memarg;
- reg = regbits(instr->arg3->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes);
- assembleabsimm(imm);
- break;
- default:
- unreachable();
- }
-}
-
-static void assemble(void) {
- Symbol *sym;
- AsmLine *l;
- const Parsev *v;
-
- cursection = text;
- curlineno = 0;
- for (l = allasm; l; l = l->next) {
- curlineno++;
- v = l->v;
- switch (v->kind) {
- case ASM_SYNTAX_ERROR:
- lfatal("syntax error");
- break;
- case ASM_BLANK:
- break;
- case ASM_DIR_GLOBL:
- sym = getsym(v->globl.name);
- sym->global = 1;
- break;
- case ASM_DIR_SECTION: {
- const char *fp;
- Section *s;
-
- s = getsection(v->section.name);
- s->hdr.sh_type = v->section.type;
- fp = v->section.flags;
- while (fp && *fp) {
- switch (*(fp++)) {
- case 'a':
- s->hdr.sh_flags |= SHF_ALLOC;
- break;
- case 'w':
- s->hdr.sh_flags |= SHF_WRITE;
- break;
- case 'x':
- s->hdr.sh_flags |= SHF_EXECINSTR;
- break;
- default:
- unreachable();
+}
+
+static void
+assembleabsimm(const Imm* imm)
+{
+ if (imm->nbytes == 1)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8);
+ else if (imm->nbytes == 2)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16);
+ else if (imm->nbytes == 4)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
+ else if (imm->nbytes == 8)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64);
+ else
+ unreachable();
+}
+
+static void
+assembleinstr(const Instr* instr)
+{
+ Rex rex;
+ const Memarg* memarg;
+ const Imm* imm;
+ uint8_t reg, rm;
+
+ switch (instr->encoder) {
+ case ENCODER_OP:
+ assemblevbytes(instr->opcode);
+ break;
+ case ENCODER_OPREG:
+ rm = regbits(instr->arg1->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind);
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, instr->fixedreg, rm));
+ break;
+ case ENCODER_OPMEM:
+ memarg = &instr->arg1->memarg;
+ rex = instr->rex;
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0);
+ break;
+ case ENCODER_R:
+ reg = regbits(instr->arg1->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind);
+ rex.b = !!(reg & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode | (reg & 7));
+ break;
+ case ENCODER_RIMM:
+ imm = &instr->arg1->imm;
+ reg = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg2->kind);
+ rex.b = !!(reg & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode | (reg & 7));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMM:
+ imm = &instr->arg1->imm;
+ rex = instr->rex;
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMREG:
+ imm = &instr->arg1->imm;
+ reg = instr->fixedreg;
+ rm = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg2->kind);
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMMEM:
+ imm = &instr->arg1->imm;
+ memarg = &instr->arg2->memarg;
+ reg = instr->fixedreg;
+ rex = instr->rex;
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, imm->nbytes);
+ assembleabsimm(imm);
+ break;
+ case ENCODER_REGMEM:
+ case ENCODER_MEMREG:
+ if (instr->encoder == ENCODER_MEMREG) {
+ memarg = &instr->arg1->memarg;
+ reg = regbits(instr->arg2->kind);
+ } else {
+ memarg = &instr->arg2->memarg;
+ reg = regbits(instr->arg1->kind);
}
- }
- cursection = s;
- break;
- }
- case ASM_DIR_DATA:
- cursection = data;
- break;
- case ASM_DIR_TEXT:
- cursection = text;
- break;
- case ASM_DIR_ASCII:
- sbn(v->ascii.data, v->ascii.len);
- break;
- case ASM_DIR_ASCIIZ:
- sbn(v->asciiz.data, v->asciiz.len);
- sb(0x00);
- break;
- case ASM_DIR_BALIGN: {
- int64_t offset, i, rem, amnt;
- amnt = 0;
- offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size;
- rem = offset % v->balign.align;
- if (rem)
- amnt = v->balign.align - rem;
- for (i = 0; i < amnt; i++) {
- sb(0x00);
- }
- break;
- }
- case ASM_DIR_FILL: {
- ssize_t i = 0;
-
- for (i = 0; i < v->fill.repeat; i++) {
- switch (v->fill.size) {
- case 1:
- case 2:
- case 4:
- case 8:
- assembleconstant(v->fill.value, v->fill.size);
- break;
- default:
- lfatal("unsupported fill size '%d'", v->fill.size);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0);
+ break;
+ case ENCODER_REGREG:
+ case ENCODER_REGREG2:
+ if (instr->encoder == ENCODER_REGREG) {
+ reg = regbits(instr->arg1->kind);
+ rm = regbits(instr->arg2->kind);
+ } else {
+ reg = regbits(instr->arg2->kind);
+ rm = regbits(instr->arg1->kind);
}
- }
- break;
- }
- case ASM_DIR_BYTE:
- assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32);
- break;
- case ASM_DIR_SHORT:
- assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32);
- break;
- case ASM_DIR_INT:
- assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32);
- break;
- case ASM_DIR_QUAD:
- assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64);
- break;
- case ASM_LABEL:
- sym = getsym(v->label.name);
- sym->section = cursection;
- sym->offset = cursection->hdr.sh_size;
- if (sym->defined)
- lfatal("%s already defined", sym->name);
- sym->defined = 1;
- break;
- case ASM_INSTR:
- assembleinstr(&v->instr);
- break;
- case ASM_CALL:
- assemblecall(&v->call);
- break;
- case ASM_JMP:
- assemblejmp(&v->jmp);
- break;
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ break;
+ case ENCODER_IMMREGREG2:
+ imm = &instr->arg1->imm;
+ reg = regbits(instr->arg3->kind);
+ rm = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMMEMREG:
+ imm = &instr->arg1->imm;
+ memarg = &instr->arg2->memarg;
+ reg = regbits(instr->arg3->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes);
+ assembleabsimm(imm);
+ break;
default:
- lfatal("assemble: unexpected kind: %d", v->kind);
+ unreachable();
+ }
+}
+
+static void
+assemble(void)
+{
+ Symbol* sym;
+ AsmLine* l;
+ const Parsev* v;
+
+ cursection = text;
+ curlineno = 0;
+ for (l = allasm; l; l = l->next) {
+ curlineno++;
+ v = l->v;
+ switch (v->kind) {
+ case ASM_SYNTAX_ERROR:
+ lfatal("syntax error");
+ break;
+ case ASM_BLANK:
+ break;
+ case ASM_DIR_GLOBL:
+ sym = getsym(v->globl.name);
+ sym->global = 1;
+ break;
+ case ASM_DIR_SECTION: {
+ const char* fp;
+ Section* s;
+
+ s = getsection(v->section.name);
+ s->hdr.sh_type = v->section.type;
+ fp = v->section.flags;
+ while (fp && *fp) {
+ switch (*(fp++)) {
+ case 'a':
+ s->hdr.sh_flags |= SHF_ALLOC;
+ break;
+ case 'w':
+ s->hdr.sh_flags |= SHF_WRITE;
+ break;
+ case 'x':
+ s->hdr.sh_flags |= SHF_EXECINSTR;
+ break;
+ default:
+ unreachable();
+ }
+ }
+ cursection = s;
+ break;
+ }
+ case ASM_DIR_DATA:
+ cursection = data;
+ break;
+ case ASM_DIR_TEXT:
+ cursection = text;
+ break;
+ case ASM_DIR_ASCII:
+ sbn(v->ascii.data, v->ascii.len);
+ break;
+ case ASM_DIR_ASCIIZ:
+ sbn(v->asciiz.data, v->asciiz.len);
+ sb(0x00);
+ break;
+ case ASM_DIR_BALIGN: {
+ int64_t offset, i, rem, amnt;
+ amnt = 0;
+ offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size;
+ rem = offset % v->balign.align;
+ if (rem)
+ amnt = v->balign.align - rem;
+ for (i = 0; i < amnt; i++) {
+ sb(0x00);
+ }
+ break;
+ }
+ case ASM_DIR_FILL: {
+ ssize_t i = 0;
+
+ for (i = 0; i < v->fill.repeat; i++) {
+ switch (v->fill.size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ assembleconstant(v->fill.value, v->fill.size);
+ break;
+ default:
+ lfatal("unsupported fill size '%d'", v->fill.size);
+ }
+ }
+ break;
+ }
+ case ASM_DIR_BYTE:
+ assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32);
+ break;
+ case ASM_DIR_SHORT:
+ assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32);
+ break;
+ case ASM_DIR_INT:
+ assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32);
+ break;
+ case ASM_DIR_QUAD:
+ assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64);
+ break;
+ case ASM_LABEL:
+ sym = getsym(v->label.name);
+ sym->section = cursection;
+ sym->offset = cursection->hdr.sh_size;
+ if (sym->defined)
+ lfatal("%s already defined", sym->name);
+ sym->defined = 1;
+ break;
+ case ASM_INSTR:
+ assembleinstr(&v->instr);
+ break;
+ case ASM_CALL:
+ assemblecall(&v->call);
+ break;
+ case ASM_JMP:
+ assemblejmp(&v->jmp);
+ break;
+ default:
+ lfatal("assemble: unexpected kind: %d", v->kind);
+ }
}
- }
}
/* Reset while remembering symbol offsets so we can size jumps. */
-static void relaxreset(void) {
- Symbol *sym;
- Section *sec;
- size_t i;
-
- /* Reset relocations and section data but retain capacity. */
- nrelocs = 0;
-
- for (i = 0; i < nsections; i++) {
- sec = &sections[i];
- if (sec == shstrtab)
- continue;
- sec->hdr.sh_size = 0;
- }
-
- /* Reset symbols, saving the worst case offset for the second pass. */
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
- *sym = (Symbol){
- .name = sym->name, .section = sym->section, .wco = sym->offset};
- }
-}
-
-static void addtosymtab(Symbol *sym) {
- Elf64_Sym elfsym;
- int stype;
- int sbind;
-
- stype = 0;
- if (sym->defined) {
- sbind = sym->global ? STB_GLOBAL : STB_LOCAL;
- } else {
- sbind = STB_GLOBAL;
- }
-
- sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
-
- elfsym.st_name = elfstr(strtab, sym->name);
- elfsym.st_value = sym->offset;
- elfsym.st_size = sym->size;
- elfsym.st_info = ELF64_ST_INFO(sbind, stype);
- elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF;
- elfsym.st_other = 0;
- secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
-}
-
-static void fillsymtab(void) {
- Symbol *sym;
- size_t i;
-
- // Local symbols
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
- if (!sym->defined || sym->global)
- continue;
- addtosymtab(sym);
- }
-
- // Global symbols
-
- // Set start of global symbols.
- symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
-
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
-
- if (sym->defined && !sym->global)
- continue;
- addtosymtab(sym);
- }
-}
-
-static int resolvereloc(Relocation *reloc) {
- Symbol *sym;
- uint8_t *rdata;
- int64_t value;
-
- sym = reloc->sym;
-
- if (sym->section != reloc->section)
- return 0;
+static void
+relaxreset(void)
+{
+ Symbol* sym;
+ Section* sec;
+ size_t i;
+
+ /* Reset relocations and section data but retain capacity. */
+ nrelocs = 0;
+
+ for (i = 0; i < nsections; i++) {
+ sec = &sections[i];
+ if (sec == shstrtab)
+ continue;
+ sec->hdr.sh_size = 0;
+ }
- switch (reloc->type) {
- case R_X86_64_32:
- case R_X86_64_64:
- return 0;
- case R_X86_64_PC8:
- rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
- rdata[0] = ((uint8_t)value & 0xff);
- return 1;
- case R_X86_64_PC32:
- rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
- rdata[0] = ((uint32_t)value & 0xff);
- rdata[1] = ((uint32_t)value & 0xff00) >> 8;
- rdata[2] = ((uint32_t)value & 0xff0000) >> 16;
- rdata[3] = ((uint32_t)value & 0xff000000) >> 24;
- return 1;
- default:
- unreachable();
- return 0;
- }
-}
-
-static void appendreloc(Relocation *reloc) {
- Symbol *sym;
- Section *relsection;
- Elf64_Rela elfrel;
-
- memset(&elfrel, 0, sizeof(elfrel));
-
- sym = reloc->sym;
- if (reloc->section == text)
- relsection = textrel;
- else if (reloc->section == data)
- relsection = datarel;
- else {
- fatal("unexpected relocation for symbol '%s'", sym->name);
- return;
- }
-
- switch (reloc->type) {
- case R_X86_64_PC32:
- case R_X86_64_32:
- case R_X86_64_64:
- elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type);
- elfrel.r_offset = reloc->offset;
- elfrel.r_addend = reloc->addend;
- break;
- default:
- unreachable();
- }
-
- secaddbytes(relsection, &elfrel, sizeof(elfrel));
-}
-
-static void handlerelocs(void) {
- Relocation *reloc;
- size_t i;
- for (i = 0; i < nrelocs; i++) {
- reloc = &relocs[i];
- if (resolvereloc(reloc))
- continue;
- appendreloc(reloc);
- }
-}
-
-static void out(const void *buf, size_t n) {
- fwrite(buf, 1, n, stdout);
- if (ferror(stdout))
- fatal("fwrite:");
-}
-
-static void outelf(void) {
- size_t i;
- uint64_t offset;
- Elf64_Ehdr ehdr;
-
- memset(&ehdr, 0, sizeof(ehdr));
- ehdr.e_ident[0] = 0x7f;
- ehdr.e_ident[1] = 'E';
- ehdr.e_ident[2] = 'L';
- ehdr.e_ident[3] = 'F';
- ehdr.e_ident[4] = ELFCLASS64;
- ehdr.e_ident[5] = ELFDATA2LSB;
- ehdr.e_ident[6] = 1;
- ehdr.e_type = ET_REL;
- ehdr.e_machine = EM_X86_64;
- ehdr.e_flags = 0;
- ehdr.e_version = 1;
- ehdr.e_ehsize = sizeof(Elf64_Ehdr);
- ehdr.e_shoff = sizeof(Elf64_Ehdr);
- ehdr.e_shentsize = sizeof(Elf64_Shdr);
- ehdr.e_shnum = nsections;
- ehdr.e_shstrndx = 1;
-
- out(&ehdr, sizeof(ehdr));
- offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
-
- for (i = 0; i < nsections; i++) {
- sections[i].hdr.sh_offset = offset;
- out(&sections[i].hdr, sizeof(Elf64_Shdr));
- offset += sections[i].hdr.sh_size;
- }
- for (i = 0; i < nsections; i++) {
- if (sections[i].hdr.sh_type == SHT_NOBITS)
- continue;
- out(sections[i].data, sections[i].hdr.sh_size);
- }
- if (fflush(stdout) != 0)
- fatal("fflush:");
-}
-
-static void usage(char *argv0) {
- fprintf(stderr, "minias - a mini x86-64 assembler.\n\n");
- fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0);
- fprintf(stderr, "\n");
- fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n");
- fprintf(stderr, " -o out Output file to write (default stdout).\n");
- exit(2);
-}
-
-static void parseargs(int argc, char *argv[]) {
- char *a, *argv0, *outfname;
-
- argv0 = argv[0];
-
- for (++argv; *argv; argv++) {
- if (argv[0][0] != '-')
- break;
- for (a = &argv[0][1]; *a; a++) {
- switch (*a) {
- case '-':
- case 'h':
- usage(argv0);
- break;
- case 'r':
- nrelax = atoi(*++argv);
- break;
- case 'o':
- if (argv[1] == NULL)
- usage(argv0);
- outfname = *++argv;
- if (!freopen(outfname, "w", stdout))
- fatal("unable to open %s:", outfname);
+ /* Reset symbols, saving the worst case offset for the second pass. */
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ sym = symbols->vals[i];
+ *sym = (Symbol){ .name = sym->name, .section = sym->section, .wco = sym->offset };
+ }
+}
+
+static void
+addtosymtab(Symbol* sym)
+{
+ Elf64_Sym elfsym;
+ int stype;
+ int sbind;
+
+ stype = 0;
+ if (sym->defined) {
+ sbind = sym->global ? STB_GLOBAL : STB_LOCAL;
+ } else {
+ sbind = STB_GLOBAL;
+ }
+
+ sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
+
+ elfsym.st_name = elfstr(strtab, sym->name);
+ elfsym.st_value = sym->offset;
+ elfsym.st_size = sym->size;
+ elfsym.st_info = ELF64_ST_INFO(sbind, stype);
+ elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF;
+ elfsym.st_other = 0;
+ secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
+}
+
+static void
+fillsymtab(void)
+{
+ Symbol* sym;
+ size_t i;
+
+ // Local symbols
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ sym = symbols->vals[i];
+ if (!sym->defined || sym->global)
+ continue;
+ addtosymtab(sym);
+ }
+
+ // Global symbols
+
+ // Set start of global symbols.
+ symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
+
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+
+ sym = symbols->vals[i];
+ if (sym->defined && !sym->global)
+ continue;
+ addtosymtab(sym);
+ }
+}
+
+static int
+resolvereloc(Relocation* reloc)
+{
+ Symbol* sym;
+ uint8_t* rdata;
+ int64_t value;
+
+ sym = reloc->sym;
+
+ if (sym->section != reloc->section)
+ return 0;
+
+ switch (reloc->type) {
+ case R_X86_64_32:
+ case R_X86_64_64:
+ return 0;
+ case R_X86_64_PC8:
+ rdata = &reloc->section->data[reloc->offset];
+ value = sym->offset - reloc->offset + reloc->addend;
+ rdata[0] = ((uint8_t)value & 0xff);
+ return 1;
+ case R_X86_64_PC32:
+ rdata = &reloc->section->data[reloc->offset];
+ value = sym->offset - reloc->offset + reloc->addend;
+ rdata[0] = ((uint32_t)value & 0xff);
+ rdata[1] = ((uint32_t)value & 0xff00) >> 8;
+ rdata[2] = ((uint32_t)value & 0xff0000) >> 16;
+ rdata[3] = ((uint32_t)value & 0xff000000) >> 24;
+ return 1;
+ default:
+ unreachable();
+ return 0;
+ }
+}
+
+static void
+appendreloc(Relocation* reloc)
+{
+ Symbol* sym;
+ Section* relsection;
+ Elf64_Rela elfrel;
+
+ memset(&elfrel, 0, sizeof(elfrel));
+
+ sym = reloc->sym;
+ if (reloc->section == text)
+ relsection = textrel;
+ else if (reloc->section == data)
+ relsection = datarel;
+ else {
+ fatal("unexpected relocation for symbol '%s'", sym->name);
+ return;
+ }
+
+ switch (reloc->type) {
+ case R_X86_64_PC32:
+ case R_X86_64_32:
+ case R_X86_64_64:
+ elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type);
+ elfrel.r_offset = reloc->offset;
+ elfrel.r_addend = reloc->addend;
break;
- default:
- usage(argv0);
- }
- }
- }
-
- if (argv[0]) {
- if (argv[1])
- usage(argv0);
- infilename = argv[0];
- if (!freopen(infilename, "r", stdin))
- fatal("unable to open %s:", infilename);
- }
-}
-
-int main(int argc, char *argv[]) {
- symbols = mkhtab(256);
- parseargs(argc, argv);
- allasm = parseasm();
- initsections();
- assemble();
- while (nrelax-- > 0) {
- relaxreset();
+ default:
+ unreachable();
+ }
+
+ secaddbytes(relsection, &elfrel, sizeof(elfrel));
+}
+
+static void
+handlerelocs(void)
+{
+ Relocation* reloc;
+ size_t i;
+ for (i = 0; i < nrelocs; i++) {
+ reloc = &relocs[i];
+ if (resolvereloc(reloc))
+ continue;
+ appendreloc(reloc);
+ }
+}
+
+static void
+out(const void* buf, size_t n)
+{
+ fwrite(buf, 1, n, stdout);
+ if (ferror(stdout))
+ fatal("fwrite:");
+}
+
+static void
+outelf(void)
+{
+ size_t i;
+ uint64_t offset;
+ Elf64_Ehdr ehdr;
+
+ memset(&ehdr, 0, sizeof(ehdr));
+ ehdr.e_ident[0] = 0x7f;
+ ehdr.e_ident[1] = 'E';
+ ehdr.e_ident[2] = 'L';
+ ehdr.e_ident[3] = 'F';
+ ehdr.e_ident[4] = ELFCLASS64;
+ ehdr.e_ident[5] = ELFDATA2LSB;
+ ehdr.e_ident[6] = 1;
+ ehdr.e_type = ET_REL;
+ ehdr.e_machine = EM_X86_64;
+ ehdr.e_flags = 0;
+ ehdr.e_version = 1;
+ ehdr.e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr.e_shoff = sizeof(Elf64_Ehdr);
+ ehdr.e_shentsize = sizeof(Elf64_Shdr);
+ ehdr.e_shnum = nsections;
+ ehdr.e_shstrndx = 1;
+
+ out(&ehdr, sizeof(ehdr));
+ offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
+
+ for (i = 0; i < nsections; i++) {
+ sections[i].hdr.sh_offset = offset;
+ out(&sections[i].hdr, sizeof(Elf64_Shdr));
+ offset += sections[i].hdr.sh_size;
+ }
+ for (i = 0; i < nsections; i++) {
+ if (sections[i].hdr.sh_type == SHT_NOBITS)
+ continue;
+ out(sections[i].data, sections[i].hdr.sh_size);
+ }
+ if (fflush(stdout) != 0)
+ fatal("fflush:");
+}
+
+static void
+usage(char* argv0)
+{
+ fprintf(stderr, "minias - a mini x86-64 assembler.\n\n");
+ fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n");
+ fprintf(stderr, " -o out Output file to write (default stdout).\n");
+ exit(2);
+}
+
+static void
+parseargs(int argc, char* argv[])
+{
+ char *a, *argv0, *outfname;
+
+ argv0 = argv[0];
+
+ for (++argv; *argv; argv++) {
+ if (argv[0][0] != '-')
+ break;
+ a = &argv[0][1];
+ switch (*a) {
+ case '-':
+ case 'h':
+ usage(argv0);
+ break;
+ case 'r':
+ nrelax = atoi(*++argv);
+ break;
+ case 'o':
+ if (argv[1] == NULL)
+ usage(argv0);
+ outfname = *++argv;
+ if (!freopen(outfname, "w", stdout))
+ fatal("unable to open %s:", outfname);
+ break;
+ default:
+ usage(argv0);
+ }
+ }
+
+ if (argv[0]) {
+ if (argv[1])
+ usage(argv0);
+ infilename = argv[0];
+ if (!freopen(infilename, "r", stdin))
+ fatal("unable to open %s:", infilename);
+ }
+}
+
+int
+main(int argc, char* argv[])
+{
+ symbols = mkhtab(256);
+ parseargs(argc, argv);
+ allasm = parseasm();
+ initsections();
assemble();
- }
- fillsymtab();
- handlerelocs();
- outelf();
- return 0;
+ while (nrelax-- > 0) {
+ relaxreset();
+ assemble();
+ }
+ fillsymtab();
+ handlerelocs();
+ outelf();
+ return 0;
} \ No newline at end of file