aboutsummaryrefslogtreecommitdiff
path: root/main.c
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-17 18:08:14 +1300
committerAndrew Chambers <[email protected]>2021-10-17 18:08:14 +1300
commit8464df80ace17b3decc59c4752bf1f4de9dccae8 (patch)
treedb62cd75918c2f466313d1171fb8573975baa3d4 /main.c
parentfccfcc2043e56306935dff29e66c414fd6c0a6d0 (diff)
parentbcaa994f61c57ca5c5446a4ead7015dd1ead95db (diff)
Merge branch 'master' of github.com:andrewchambers/minias
Diffstat (limited to 'main.c')
-rw-r--r--main.c218
1 files changed, 147 insertions, 71 deletions
diff --git a/main.c b/main.c
index b47fbec..c444cc8 100644
--- a/main.c
+++ b/main.c
@@ -1,9 +1,11 @@
#include "minias.h"
-#include <getopt.h>
/* Parsed assembly */
static AsmLine *allasm = NULL;
+/* Number of assembly the relaxation passes. */
+static int nrelax = 1;
+
/* Symbol table. */
static struct hashtable *symbols = NULL;
@@ -29,7 +31,7 @@ static Section *datarel = NULL;
static char *infilename = "<stdin>";
static size_t curlineno = 0;
-void lfatal(const char *fmt, ...) {
+static void lfatal(const char *fmt, ...) {
va_list ap;
fprintf(stderr, "%s:%ld: ", infilename, curlineno);
va_start(ap, fmt);
@@ -45,8 +47,11 @@ static Symbol *getsym(const char *name) {
htabkey(&htk, name, strlen(name));
ps = (Symbol **)htabput(symbols, &htk);
if (!*ps) {
- *ps = zalloc(sizeof(Symbol));
- (*ps)->name = name;
+ *ps = xmalloc(sizeof(Symbol));
+ **ps = (Symbol){
+ .name = name,
+ .wco = -1,
+ };
}
s = *ps;
return s;
@@ -155,7 +160,7 @@ static void initsections(void) {
datarel->hdr.sh_entsize = sizeof(Elf64_Rela);
}
-Relocation *newreloc() {
+static Relocation *newreloc() {
if (nrelocs == reloccap) {
reloccap = nrelocs ? nrelocs * 2 : 64;
relocs = xreallocarray(relocs, reloccap, sizeof(Relocation));
@@ -208,15 +213,7 @@ static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; }
static uint8_t isreg64(AsmKind k) { return k >= ASM_RAX && k <= ASM_R15; }
-/* Register that requires the use of a rex prefix. */
-static uint8_t isrexreg(AsmKind k) {
- return k > ASM_REG_BEGIN && k < ASM_REG_END &&
- (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL ||
- k == ASM_SIL || k == ASM_DIL);
-}
-
-/* Compose a rex prefix - See intel manual. */
-
+/* Rex opcode prefix. */
typedef struct Rex {
uint8_t required : 1;
uint8_t w : 1;
@@ -225,6 +222,13 @@ typedef struct Rex {
uint8_t b : 1;
} Rex;
+/* Register that requires the use of a rex prefix. */
+static uint8_t isrexreg(AsmKind k) {
+ return k > ASM_REG_BEGIN && k < ASM_REG_END &&
+ (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL ||
+ k == ASM_SIL || k == ASM_DIL);
+}
+
static uint8_t rexbyte(Rex rex) {
return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b);
}
@@ -379,7 +383,7 @@ static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix,
assemblemodregrm(rex, prefix, opcode, mod, reg, rm);
if (mod == 1) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 1, R_X86_64_8);
+ assembleconstant(memarg->disp.c, 1);
} else if (mod == 2) {
assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
}
@@ -439,7 +443,7 @@ static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix,
sb(sibbyte(scale, index, base));
if (mod == 1) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 1, R_X86_64_8);
+ assembleconstant(memarg->disp.c, 1);
} else if (mod == 2) {
assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
}
@@ -559,7 +563,7 @@ static void assemblexchg(const Instr *xchg) {
rex = (Rex){
.required = isrexreg(xchg->arg1->kind) || isrexreg(xchg->arg2->kind),
.w = isreg64(xchg->arg1->kind) || isreg64(xchg->arg2->kind),
- .r = !!(regbits(reg) & (1 << 3)),
+ .b = !!(regbits(reg) & (1 << 3)),
};
assembleplusr(rex, prefix, opcode, regbits(reg));
} else {
@@ -740,6 +744,82 @@ static void assembleset(const Instr *instr) {
}
}
+static void assemblecall(const Call *call) {
+ Rex rex;
+ uint8_t rm;
+
+ if (call->indirect) {
+ if (call->target.indirect->kind == ASM_MEMARG) {
+ rex = (Rex){0};
+ assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, 0x02);
+ } else {
+ rm = regbits(call->target.indirect->kind);
+ rex = (Rex){.b = !!(rm & (1 << 3))};
+ assemblemodregrm(rex, -1, 0xff, 0x03, 0x02, rm);
+ }
+ } else {
+ sb(0xe8);
+ assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4,
+ R_X86_64_PC32);
+ }
+}
+
+static void assembleimul(const Instr *instr) {
+ VarBytes prefix, opcode;
+
+ if (instr->variant < 8) {
+ assembledivmulneg(instr, 0x05);
+ } else if (instr->variant < 14) {
+ opcode = 0x01000faf;
+ prefix = ((instr->variant - 8) % 3) == 0 ? 0x66 : -1;
+ assemblerrm(instr, prefix, opcode, 1);
+ } else {
+ const Imm *imm;
+ imm = &instr->arg3->imm;
+ opcode = 0x69;
+ prefix = ((instr->variant - 14) % 3) == 0 ? 0x66 : -1;
+ assemblerrm(instr, prefix, opcode, 1);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
+ }
+}
+
+static void assemblejmp(const Jmp *j) {
+ int jmpsize;
+ int64_t distance;
+ Symbol *target;
+
+ static uint8_t variant2op[31] = {
+ 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81,
+ 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e,
+ 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
+ };
+
+ jmpsize = 4;
+ target = getsym(j->target);
+ if (cursection == target->section && (target->defined || target->wco != -1)) {
+ if (target->defined) {
+ distance = target->offset - cursection->hdr.sh_size;
+ } else {
+ distance = target->wco - cursection->hdr.sh_size;
+ }
+ if ((distance - 1) >= -128 && (distance - 1) <= 127) {
+ jmpsize = 1;
+ } else {
+ jmpsize = 4;
+ }
+ }
+
+ if (jmpsize == 4) {
+ if (j->variant)
+ sb(0x0f);
+ sb(variant2op[j->variant]);
+ assemblereloc(j->target, -4, 4, R_X86_64_PC32);
+ } else {
+ sb(variant2op[j->variant] + (j->variant ? -16 : 2));
+ assemblereloc(j->target, -1, 1, R_X86_64_PC8);
+ }
+}
+
static void assemble(void) {
Symbol *sym;
AsmLine *l;
@@ -847,38 +927,12 @@ static void assemble(void) {
lfatal("%s already defined", sym->name);
sym->defined = 1;
break;
- case ASM_CALL: {
- Rex rex;
- uint8_t rm;
-
- if (v->call.indirect) {
- if (v->call.target.indirect->kind == ASM_MEMARG) {
- rex = (Rex){0};
- assemblemem(&v->call.target.indirect->memarg, rex, -1, 0xff, 0x02);
- } else {
- rm = regbits(v->call.target.indirect->kind);
- rex = (Rex){.b = !!(rm & (1 << 3))};
- assemblemodregrm(rex, -1, 0xff, 0x03, 0x02, rm);
- }
- } else {
- sb(0xe8);
- assemblereloc(v->call.target.direct.l, v->call.target.direct.c - 4, 4,
- R_X86_64_PC32);
- }
+ case ASM_CALL:
+ assemblecall(&v->call);
break;
- }
- case ASM_JMP: {
- static uint8_t variant2op[31] = {
- 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81,
- 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e,
- 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
- };
- if (v->jmp.variant)
- sb(0x0f);
- sb(variant2op[v->jmp.variant]);
- assemblereloc(v->jmp.target, -4, 4, R_X86_64_PC32);
+ case ASM_JMP:
+ assemblejmp(&v->jmp);
break;
- }
case ASM_PUSH: {
Rex rex;
uint8_t reg;
@@ -1054,25 +1108,9 @@ static void assemble(void) {
case ASM_MULSS:
assemblerrm(&v->instr, 0xf3, 0x01000f59, 1);
break;
- case ASM_IMUL: {
- VarBytes prefix, opcode;
-
- if (v->instr.variant < 8) {
- assembledivmulneg(&v->instr, 0x05);
- } else if (v->instr.variant < 14) {
- opcode = 0x01000faf;
- prefix = ((v->instr.variant - 8) % 3) == 0 ? 0x66 : -1;
- assemblerrm(&v->instr, prefix, opcode, 1);
- } else {
- const Imm *imm;
- imm = &v->instr.arg3->imm;
- opcode = 0x69;
- prefix = ((v->instr.variant - 14) % 3) == 0 ? 0x66 : -1;
- assemblerrm(&v->instr, prefix, opcode, 1);
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
- }
+ case ASM_IMUL:
+ assembleimul(&v->instr);
break;
- }
case ASM_NEG:
assembledivmulneg(&v->instr, 0x03);
break;
@@ -1085,10 +1123,9 @@ static void assemble(void) {
assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x01);
break;
}
- case ASM_PXOR: {
+ case ASM_PXOR:
assemblerrm(&v->instr, 0x66, 0x01000fef, 1);
break;
- }
case ASM_SET:
assembleset(&v->instr);
break;
@@ -1152,6 +1189,32 @@ static void assemble(void) {
}
}
+/* Reset while remembering symbol offsets so we can size jumps. */
+static void relaxreset(void) {
+ Symbol *sym;
+ Section *sec;
+ size_t i;
+
+ /* Reset relocations and section data but retain capacity. */
+ nrelocs = 0;
+
+ for (i = 0; i < nsections; i++) {
+ sec = &sections[i];
+ if (sec == shstrtab)
+ continue;
+ sec->hdr.sh_size = 0;
+ }
+
+ /* Reset symbols, saving the worst case offset for the second pass. */
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ sym = symbols->vals[i];
+ *sym = (Symbol){
+ .name = sym->name, .section = sym->section, .wco = sym->offset};
+ }
+}
+
static void addtosymtab(Symbol *sym) {
Elf64_Sym elfsym;
int stype;
@@ -1216,10 +1279,14 @@ static int resolvereloc(Relocation *reloc) {
return 0;
switch (reloc->type) {
- case R_X86_64_8:
case R_X86_64_32:
case R_X86_64_64:
return 0;
+ case R_X86_64_PC8:
+ rdata = &reloc->section->data[reloc->offset];
+ value = sym->offset - reloc->offset + reloc->addend;
+ rdata[0] = ((uint8_t)value & 0xff);
+ return 1;
case R_X86_64_PC32:
rdata = &reloc->section->data[reloc->offset];
value = sym->offset - reloc->offset + reloc->addend;
@@ -1255,7 +1322,6 @@ static void appendreloc(Relocation *reloc) {
case R_X86_64_PC32:
case R_X86_64_32:
case R_X86_64_64:
- case R_X86_64_8:
elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type);
elfrel.r_offset = reloc->offset;
elfrel.r_addend = reloc->addend;
@@ -1325,8 +1391,11 @@ static void outelf(void) {
}
static void usage(char *argv0) {
- fprintf(stderr, "minias - a mini assembler.");
- fprintf(stderr, "usage: %s [-o out] [input]\n", argv0);
+ fprintf(stderr, "minias - a mini x86-64 assembler.\n\n");
+ fprintf(stderr, "usage: %s [-r passes] [-o out] [input]\n", argv0);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -r passes Jump relaxation iterations (default 1).\n");
+ fprintf(stderr, " -o out Output file to write (default stdout).\n");
exit(2);
}
@@ -1344,6 +1413,9 @@ static void parseargs(int argc, char *argv[]) {
case 'h':
usage(argv0);
break;
+ case 'r':
+ nrelax = atoi(*++argv);
+ break;
case 'o':
if (argv[1] == NULL)
usage(argv0);
@@ -1372,6 +1444,10 @@ int main(int argc, char *argv[]) {
allasm = parseasm();
initsections();
assemble();
+ while (nrelax-- > 0) {
+ relaxreset();
+ assemble();
+ }
fillsymtab();
handlerelocs();
outelf();