aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-19 15:47:22 +1300
committerAndrew Chambers <[email protected]>2021-10-19 15:47:22 +1300
commit66269a6c54613908a3c1a5851437044717b21d7e (patch)
tree86a52d9993b6e609a07e4481c04cad22ce42e309
parent61a23eb3fd34408ade4f4724fd992a5877d857e1 (diff)
Change formatting style.
-rw-r--r--Makefile4
-rw-r--r--main.c1916
-rw-r--r--minias.h514
-rw-r--r--parse.c460
-rw-r--r--util.c366
5 files changed, 1706 insertions, 1554 deletions
diff --git a/Makefile b/Makefile
index a615f8a..1970b64 100644
--- a/Makefile
+++ b/Makefile
@@ -24,7 +24,9 @@ parse.o: asm.peg.inc
main.o parse.o util.o: minias.h
fmt:
- clang-format -i *.c *.h
+ clang-format \
+ -style='{BasedOnStyle: WebKit, AlwaysBreakAfterReturnType: TopLevelDefinitions, ColumnLimit: 100}'\
+ -i *.c *.h
check:
sh test/test.sh
diff --git a/main.c b/main.c
index 255b9a0..f9913f8 100644
--- a/main.c
+++ b/main.c
@@ -1,16 +1,16 @@
#include "minias.h"
/* Parsed assembly */
-static AsmLine *allasm = NULL;
+static AsmLine* allasm = NULL;
/* Number of assembly relaxation passes. */
static int nrelax = 1;
/* Symbols before writing to symtab section. */
-static struct hashtable *symbols = NULL;
+static struct hashtable* symbols = NULL;
/* Array of all relocations before adding to the rel section. */
-static Relocation *relocs = NULL;
+static Relocation* relocs = NULL;
static size_t nrelocs = 0;
static size_t reloccap = 0;
@@ -18,232 +18,279 @@ static size_t reloccap = 0;
static Section sections[MAXSECTIONS];
static size_t nsections = 1; // first is reserved.
-static Section *cursection = NULL;
-static Section *shstrtab = NULL;
-static Section *strtab = NULL;
-static Section *symtab = NULL;
-static Section *bss = NULL;
-static Section *text = NULL;
-static Section *data = NULL;
-static Section *textrel = NULL;
-static Section *datarel = NULL;
-
-static char *infilename = "<stdin>";
+static Section* cursection = NULL;
+static Section* shstrtab = NULL;
+static Section* strtab = NULL;
+static Section* symtab = NULL;
+static Section* bss = NULL;
+static Section* text = NULL;
+static Section* data = NULL;
+static Section* textrel = NULL;
+static Section* datarel = NULL;
+
+static char* infilename = "<stdin>";
static size_t curlineno = 0;
-static void lfatal(const char *fmt, ...) {
- va_list ap;
- fprintf(stderr, "%s:%ld: ", infilename, curlineno);
- va_start(ap, fmt);
- vwarn(fmt, ap);
- va_end(ap);
- exit(1);
-}
-
-static Symbol *getsym(const char *name) {
- Symbol **ps, *s;
- struct hashtablekey htk;
-
- htabkey(&htk, name, strlen(name));
- ps = (Symbol **)htabput(symbols, &htk);
- if (!*ps) {
- *ps = xmalloc(sizeof(Symbol));
- **ps = (Symbol){
- .name = name,
- .wco = -1,
- };
- }
- s = *ps;
- return s;
+static void
+lfatal(const char* fmt, ...)
+{
+ va_list ap;
+ fprintf(stderr, "%s:%ld: ", infilename, curlineno);
+ va_start(ap, fmt);
+ vwarn(fmt, ap);
+ va_end(ap);
+ exit(1);
+}
+
+static Symbol*
+getsym(const char* name)
+{
+ Symbol **ps, *s;
+ struct hashtablekey htk;
+
+ htabkey(&htk, name, strlen(name));
+ ps = (Symbol**)htabput(symbols, &htk);
+ if (!*ps) {
+ *ps = xmalloc(sizeof(Symbol));
+ **ps = (Symbol){
+ .name = name,
+ .wco = -1,
+ };
+ }
+ s = *ps;
+ return s;
}
-static void secaddbytes(Section *s, const void *bytes, size_t n) {
+static void
+secaddbytes(Section* s, const void* bytes, size_t n)
+{
+
+ if (s->hdr.sh_type == SHT_NOBITS) {
+ s->hdr.sh_size += n;
+ return;
+ }
+
+ while (s->capacity < s->hdr.sh_size + n) {
+ s->capacity = s->capacity ? (s->capacity * 2) : 512;
+ s->data = xrealloc(s->data, s->capacity);
+ }
+ memcpy(s->data + s->hdr.sh_size, bytes, n);
- if (s->hdr.sh_type == SHT_NOBITS) {
s->hdr.sh_size += n;
- return;
- }
-
- while (s->capacity < s->hdr.sh_size + n) {
- s->capacity = s->capacity ? (s->capacity * 2) : 512;
- s->data = xrealloc(s->data, s->capacity);
- }
- memcpy(s->data + s->hdr.sh_size, bytes, n);
-
- s->hdr.sh_size += n;
-}
-
-static void secaddbyte(Section *s, uint8_t b) { secaddbytes(s, &b, 1); }
-
-static Elf64_Word elfstr(Section *sec, const char *s) {
- Elf64_Word i = sec->hdr.sh_size;
- secaddbytes(sec, s, strlen(s) + 1);
- return i;
-}
-
-static Section *newsection() {
- Section *s;
- if (nsections >= MAXSECTIONS)
- fatal("too many sections");
- s = &sections[nsections];
- s->idx = nsections;
- nsections += 1;
- return s;
-}
-
-static Section *getsection(const char *name) {
- size_t i;
- char *secname;
- Section *s;
-
- for (i = 0; i < nsections; i++) {
- secname = (char *)shstrtab->data + sections[i].hdr.sh_name;
- if (strcmp(secname, name) == 0)
- return &sections[i];
- }
- s = newsection();
- s->hdr.sh_name = elfstr(shstrtab, name);
- return s;
-}
-
-static void initsections(void) {
- Elf64_Sym elfsym;
-
- shstrtab = newsection();
- secaddbyte(shstrtab, 0);
- shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab");
- shstrtab->hdr.sh_type = SHT_STRTAB;
-
- strtab = newsection();
- secaddbyte(strtab, 0);
- strtab->hdr.sh_name = elfstr(shstrtab, ".strtab");
- strtab->hdr.sh_type = SHT_STRTAB;
-
- symtab = newsection();
- symtab->hdr.sh_name = elfstr(shstrtab, ".symtab");
- symtab->hdr.sh_type = SHT_SYMTAB;
- symtab->hdr.sh_link = strtab->idx;
- symtab->hdr.sh_entsize = sizeof(Elf64_Sym);
- memset(&elfsym, 0, sizeof(elfsym));
- secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
-
- bss = newsection();
- bss->hdr.sh_name = elfstr(shstrtab, ".bss");
- bss->hdr.sh_type = SHT_NOBITS;
- bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
- bss->hdr.sh_addralign = 16; // XXX right value?
-
- data = newsection();
- data->hdr.sh_name = elfstr(shstrtab, ".data");
- data->hdr.sh_type = SHT_PROGBITS;
- data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
- data->hdr.sh_addralign = 16; // XXX right value?
-
- text = newsection();
- text->hdr.sh_name = elfstr(shstrtab, ".text");
- text->hdr.sh_type = SHT_PROGBITS;
- text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC;
- text->hdr.sh_addralign = 4;
-
- textrel = newsection();
- textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text");
- textrel->hdr.sh_type = SHT_RELA;
- textrel->hdr.sh_info = text->idx;
- textrel->hdr.sh_link = symtab->idx;
- textrel->hdr.sh_entsize = sizeof(Elf64_Rela);
-
- datarel = newsection();
- datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data");
- datarel->hdr.sh_type = SHT_RELA;
- datarel->hdr.sh_info = data->idx;
- datarel->hdr.sh_link = symtab->idx;
- datarel->hdr.sh_entsize = sizeof(Elf64_Rela);
-}
-
-static Relocation *newreloc() {
- if (nrelocs == reloccap) {
- reloccap = nrelocs ? nrelocs * 2 : 64;
- relocs = xreallocarray(relocs, reloccap, sizeof(Relocation));
- }
- return &relocs[nrelocs++];
+}
+
+static void
+secaddbyte(Section* s, uint8_t b)
+{
+ secaddbytes(s, &b, 1);
+}
+
+static Elf64_Word
+elfstr(Section* sec, const char* s)
+{
+ Elf64_Word i = sec->hdr.sh_size;
+ secaddbytes(sec, s, strlen(s) + 1);
+ return i;
+}
+
+static Section*
+newsection()
+{
+ Section* s;
+ if (nsections >= MAXSECTIONS)
+ fatal("too many sections");
+ s = &sections[nsections];
+ s->idx = nsections;
+ nsections += 1;
+ return s;
+}
+
+static Section*
+getsection(const char* name)
+{
+ size_t i;
+ char* secname;
+ Section* s;
+
+ for (i = 0; i < nsections; i++) {
+ secname = (char*)shstrtab->data + sections[i].hdr.sh_name;
+ if (strcmp(secname, name) == 0)
+ return &sections[i];
+ }
+ s = newsection();
+ s->hdr.sh_name = elfstr(shstrtab, name);
+ return s;
+}
+
+static void
+initsections(void)
+{
+ Elf64_Sym elfsym;
+
+ shstrtab = newsection();
+ secaddbyte(shstrtab, 0);
+ shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab");
+ shstrtab->hdr.sh_type = SHT_STRTAB;
+
+ strtab = newsection();
+ secaddbyte(strtab, 0);
+ strtab->hdr.sh_name = elfstr(shstrtab, ".strtab");
+ strtab->hdr.sh_type = SHT_STRTAB;
+
+ symtab = newsection();
+ symtab->hdr.sh_name = elfstr(shstrtab, ".symtab");
+ symtab->hdr.sh_type = SHT_SYMTAB;
+ symtab->hdr.sh_link = strtab->idx;
+ symtab->hdr.sh_entsize = sizeof(Elf64_Sym);
+ memset(&elfsym, 0, sizeof(elfsym));
+ secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
+
+ bss = newsection();
+ bss->hdr.sh_name = elfstr(shstrtab, ".bss");
+ bss->hdr.sh_type = SHT_NOBITS;
+ bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
+ bss->hdr.sh_addralign = 16; // XXX right value?
+
+ data = newsection();
+ data->hdr.sh_name = elfstr(shstrtab, ".data");
+ data->hdr.sh_type = SHT_PROGBITS;
+ data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE;
+ data->hdr.sh_addralign = 16; // XXX right value?
+
+ text = newsection();
+ text->hdr.sh_name = elfstr(shstrtab, ".text");
+ text->hdr.sh_type = SHT_PROGBITS;
+ text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC;
+ text->hdr.sh_addralign = 4;
+
+ textrel = newsection();
+ textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text");
+ textrel->hdr.sh_type = SHT_RELA;
+ textrel->hdr.sh_info = text->idx;
+ textrel->hdr.sh_link = symtab->idx;
+ textrel->hdr.sh_entsize = sizeof(Elf64_Rela);
+
+ datarel = newsection();
+ datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data");
+ datarel->hdr.sh_type = SHT_RELA;
+ datarel->hdr.sh_info = data->idx;
+ datarel->hdr.sh_link = symtab->idx;
+ datarel->hdr.sh_entsize = sizeof(Elf64_Rela);
+}
+
+static Relocation*
+newreloc()
+{
+ if (nrelocs == reloccap) {
+ reloccap = nrelocs ? nrelocs * 2 : 64;
+ relocs = xreallocarray(relocs, reloccap, sizeof(Relocation));
+ }
+ return &relocs[nrelocs++];
}
/* Shorthand helpers to write section data. */
-static void sb(uint8_t b) { secaddbyte(cursection, b); }
+static void
+sb(uint8_t b)
+{
+ secaddbyte(cursection, b);
+}
-static void sbn(uint8_t *bytes, size_t n) { secaddbytes(cursection, bytes, n); }
+static void
+sbn(uint8_t* bytes, size_t n)
+{
+ secaddbytes(cursection, bytes, n);
+}
-static void su16(uint16_t w) {
- uint8_t buf[2] = {w & 0xff, (w & 0xff00) >> 8};
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su16(uint16_t w)
+{
+ uint8_t buf[2] = { w & 0xff, (w & 0xff00) >> 8 };
+ secaddbytes(cursection, buf, sizeof(buf));
}
-static void su32(uint32_t l) {
- uint8_t buf[4] = {
- l & 0xff,
- (l & 0xff00) >> 8,
- (l & 0xff0000) >> 16,
- (l & 0xff000000) >> 24,
- };
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su32(uint32_t l)
+{
+ uint8_t buf[4] = {
+ l & 0xff,
+ (l & 0xff00) >> 8,
+ (l & 0xff0000) >> 16,
+ (l & 0xff000000) >> 24,
+ };
+ secaddbytes(cursection, buf, sizeof(buf));
}
-static void su64(uint64_t l) {
- uint8_t buf[8] = {
- l & 0xff,
- (l & 0xff00) >> 8,
- (l & 0xff0000) >> 16,
- (l & 0xff000000) >> 24,
- (l & 0xff00000000) >> 32,
- (l & 0xff0000000000) >> 40,
- (l & 0xff000000000000) >> 48,
- (l & 0xff00000000000000) >> 56,
- };
- secaddbytes(cursection, buf, sizeof(buf));
+static void
+su64(uint64_t l)
+{
+ uint8_t buf[8] = {
+ l & 0xff,
+ (l & 0xff00) >> 8,
+ (l & 0xff0000) >> 16,
+ (l & 0xff000000) >> 24,
+ (l & 0xff00000000) >> 32,
+ (l & 0xff0000000000) >> 40,
+ (l & 0xff000000000000) >> 48,
+ (l & 0xff00000000000000) >> 56,
+ };
+ secaddbytes(cursection, buf, sizeof(buf));
}
/* Convert an AsmKind to register bits in reg/rm style. */
-static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; }
+static uint8_t
+regbits(AsmKind k)
+{
+ return (k - (ASM_REG_BEGIN + 1)) % 16;
+}
/* Register that requires the use of a rex prefix. */
-static uint8_t isrexreg(AsmKind k) {
- return k > ASM_REG_BEGIN && k < ASM_REG_END &&
- (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL ||
- k == ASM_SIL || k == ASM_DIL);
+static uint8_t
+isrexreg(AsmKind k)
+{
+ return k > ASM_REG_BEGIN && k < ASM_REG_END
+ && (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || k == ASM_SIL || k == ASM_DIL);
}
-static uint8_t rexbyte(Rex rex) {
- return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b);
+static uint8_t
+rexbyte(Rex rex)
+{
+ return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b);
}
/* Compose a mod/reg/rm byte - See intel manual. */
-static uint8_t modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) {
- return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
+static uint8_t
+modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm)
+{
+ return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7));
}
/* Compose an sib byte - See intel manual. */
-static uint8_t sibbyte(uint8_t ss, uint8_t idx, uint8_t base) {
- return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7));
-}
-
-void assembleconstant(int64_t c, int nbytes) {
- switch (nbytes) {
- case 1:
- sb((uint8_t)c);
- break;
- case 2:
- su16((uint16_t)c);
- break;
- case 4:
- su32((uint32_t)c);
- break;
- case 8:
- su64((uint64_t)c);
- break;
- default:
- unreachable();
- }
+static uint8_t
+sibbyte(uint8_t ss, uint8_t idx, uint8_t base)
+{
+ return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7));
+}
+
+void
+assembleconstant(int64_t c, int nbytes)
+{
+ switch (nbytes) {
+ case 1:
+ sb((uint8_t)c);
+ break;
+ case 2:
+ su16((uint16_t)c);
+ break;
+ case 4:
+ su32((uint32_t)c);
+ break;
+ case 8:
+ su64((uint64_t)c);
+ break;
+ default:
+ unreachable();
+ }
}
/* The VarBytes type encodes a variadic number of bytes.
@@ -256,766 +303,803 @@ void assembleconstant(int64_t c, int nbytes) {
*/
typedef int32_t VarBytes;
-static void assemblevbytes(VarBytes bytes) {
- int i, n;
- uint8_t b, shift;
-
- n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24);
- for (i = n; i >= 0; i--) {
- shift = i * 8;
- b = (bytes & (0xff << shift)) >> shift;
- sb(b);
- }
+static void
+assemblevbytes(VarBytes bytes)
+{
+ int i, n;
+ uint8_t b, shift;
+
+ n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24);
+ for (i = n; i >= 0; i--) {
+ shift = i * 8;
+ b = (bytes & (0xff << shift)) >> shift;
+ sb(b);
+ }
}
-static void assemblerex(Rex rex) {
- if (rex.required || rex.w || rex.r || rex.x || rex.b)
- sb(rexbyte(rex));
+static void
+assemblerex(Rex rex)
+{
+ if (rex.required || rex.w || rex.r || rex.x || rex.b)
+ sb(rexbyte(rex));
}
/* Assemble a symbolic value. */
-static void assemblereloc(const char *l, int64_t c, int nbytes, int type) {
- Relocation *reloc;
- Symbol *sym;
-
- if (l != NULL) {
- reloc = newreloc();
- sym = getsym(l);
- reloc->type = type;
- reloc->section = cursection;
- reloc->sym = sym;
- reloc->offset = cursection->hdr.sh_size;
- reloc->addend = c;
- c = 0;
- }
- assembleconstant(c, nbytes);
+static void
+assemblereloc(const char* l, int64_t c, int nbytes, int type)
+{
+ Relocation* reloc;
+ Symbol* sym;
+
+ if (l != NULL) {
+ reloc = newreloc();
+ sym = getsym(l);
+ reloc->type = type;
+ reloc->section = cursection;
+ reloc->sym = sym;
+ reloc->offset = cursection->hdr.sh_size;
+ reloc->addend = c;
+ c = 0;
+ }
+ assembleconstant(c, nbytes);
}
/* Assemble a r <-> mem operation. */
-static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix,
- VarBytes opcode, uint8_t reg, int32_t nexti) {
+static void
+assemblemem(
+ const Memarg* memarg, Rex rex, VarBytes prefix, VarBytes opcode, uint8_t reg, int32_t nexti)
+{
+
+ uint8_t mod, rm, scale, index, base;
+
+ /* Rip relative addressing. */
+ if (memarg->base == ASM_RIP) {
+ rm = 0x05;
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(0x00, reg, rm));
+
+ if (memarg->disp.l) {
+ assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, R_X86_64_PC32);
+ } else {
+ assembleconstant(memarg->disp.c, 4);
+ }
+ return;
+ }
- uint8_t mod, rm, scale, index, base;
+ /* Direct memory access */
+ if (memarg->base == ASM_NO_REG) {
+ mod = 0;
+ rm = 4;
+
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(mod, reg, rm));
+
+ sb(sibbyte(0, 4, 5));
+ if (memarg->disp.l) {
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ } else {
+ assembleconstant(memarg->disp.c, 4);
+ }
+ return;
+ }
- /* Rip relative addressing. */
- if (memarg->base == ASM_RIP) {
- rm = 0x05;
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(0x00, reg, rm));
+ rm = regbits(memarg->base);
+ rex.b = !!(rm & (1 << 3));
- if (memarg->disp.l) {
- assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4,
- R_X86_64_PC32);
- } else {
- assembleconstant(memarg->disp.c, 4);
+ /* Case when we don't need sib */
+ if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) {
+
+ if (memarg->disp.l == 0 && memarg->disp.c == 0) {
+ if ((rm & 7) == 5) {
+ mod = 1;
+ } else {
+ mod = 0;
+ }
+ } else {
+ mod = 2;
+ }
+
+ assemblevbytes(prefix);
+ assemblerex(rex);
+ assemblevbytes(opcode);
+ sb(modregrmbyte(mod, reg, rm));
+
+ if (mod == 1) {
+ assembleconstant(memarg->disp.c, 1);
+ } else if (mod == 2) {
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ }
+ return;
}
- return;
- }
- /* Direct memory access */
- if (memarg->base == ASM_NO_REG) {
- mod = 0;
+ /* Setup sib indexing. */
+ base = rm;
rm = 4;
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(mod, reg, rm));
+ if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) {
+ mod = 0; /* +0 */
+ } else {
+ if (memarg->disp.l == NULL && memarg->disp.c >= -128 && memarg->disp.c <= 127) {
+ mod = 1; /* +disp8 */
+ } else {
+ mod = 2; /* +disp32 */
+ }
+ }
- sb(sibbyte(0, 4, 5));
- if (memarg->disp.l) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ if (memarg->index == ASM_NO_REG) {
+ index = 4;
} else {
- assembleconstant(memarg->disp.c, 4);
+ if (memarg->index == ASM_RSP)
+ lfatal("rsp cannot be used as an index");
+ index = regbits(memarg->index);
}
- return;
- }
- rm = regbits(memarg->base);
- rex.b = !!(rm & (1 << 3));
+ /* If our base is a bp register, we must use the index instead. */
+ if ((base & 7) == 5 && memarg->index == ASM_NO_REG) {
+ index = base;
+ }
- /* Case when we don't need sib */
- if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) {
+ rex.x = !!(index & (1 << 3));
- if (memarg->disp.l == 0 && memarg->disp.c == 0) {
- if ((rm & 7) == 5) {
- mod = 1;
- } else {
- mod = 0;
- }
- } else {
- mod = 2;
+ switch (memarg->scale) {
+ case 0:
+ case 1:
+ scale = 0;
+ break;
+ case 2:
+ scale = 1;
+ break;
+ case 4:
+ scale = 2;
+ break;
+ case 8:
+ scale = 3;
+ break;
+ default:
+ lfatal("invalid addressing scale");
+ return;
}
assemblevbytes(prefix);
assemblerex(rex);
assemblevbytes(opcode);
sb(modregrmbyte(mod, reg, rm));
+ sb(sibbyte(scale, index, base));
if (mod == 1) {
- assembleconstant(memarg->disp.c, 1);
+ assembleconstant(memarg->disp.c, 1);
} else if (mod == 2) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
}
- return;
- }
-
- /* Setup sib indexing. */
- base = rm;
- rm = 4;
+}
- if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) {
- mod = 0; /* +0 */
- } else {
- if (memarg->disp.l == NULL && memarg->disp.c >= -128 &&
- memarg->disp.c <= 127) {
- mod = 1; /* +disp8 */
- } else {
- mod = 2; /* +disp32 */
- }
- }
-
- if (memarg->index == ASM_NO_REG) {
- index = 4;
- } else {
- if (memarg->index == ASM_RSP)
- lfatal("rsp cannot be used as an index");
- index = regbits(memarg->index);
- }
-
- /* If our base is a bp register, we must use the index instead. */
- if ((base & 7) == 5 && memarg->index == ASM_NO_REG) {
- index = base;
- }
-
- rex.x = !!(index & (1 << 3));
-
- switch (memarg->scale) {
- case 0:
- case 1:
- scale = 0;
- break;
- case 2:
- scale = 1;
- break;
- case 4:
- scale = 2;
- break;
- case 8:
- scale = 3;
- break;
- default:
- lfatal("invalid addressing scale");
- return;
- }
-
- assemblevbytes(prefix);
- assemblerex(rex);
- assemblevbytes(opcode);
- sb(modregrmbyte(mod, reg, rm));
- sb(sibbyte(scale, index, base));
-
- if (mod == 1) {
- assembleconstant(memarg->disp.c, 1);
- } else if (mod == 2) {
- assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
- }
-}
-
-static void assemblecall(const Call *call) {
- Rex rex;
- uint8_t rm;
-
- if (call->indirect) {
- if (call->target.indirect->kind == ASM_MEMARG) {
- rex = (Rex){0};
- abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff,
- // 0x02);
- } else {
- rm = regbits(call->target.indirect->kind);
- rex = (Rex){.b = !!(rm & (1 << 3))};
- assemblerex(rex);
- assemblevbytes(0xff);
- sb(modregrmbyte(0x03, 0x02, rm));
- }
- } else {
- sb(0xe8);
- assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4,
- R_X86_64_PC32);
- }
-}
-
-static void assemblejmp(const Jmp *j) {
- int jmpsize;
- int64_t distance;
- Symbol *target;
-
- static uint8_t cc2op[31] = {
- 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81,
- 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e,
- 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
- };
-
- jmpsize = 4;
- target = getsym(j->target);
- if (cursection == target->section && (target->defined || target->wco != -1)) {
- if (target->defined) {
- distance = target->offset - cursection->hdr.sh_size;
+static void
+assemblecall(const Call* call)
+{
+ Rex rex;
+ uint8_t rm;
+
+ if (call->indirect) {
+ if (call->target.indirect->kind == ASM_MEMARG) {
+ rex = (Rex){ 0 };
+ abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff,
+ // 0x02);
+ } else {
+ rm = regbits(call->target.indirect->kind);
+ rex = (Rex){ .b = !!(rm & (1 << 3)) };
+ assemblerex(rex);
+ assemblevbytes(0xff);
+ sb(modregrmbyte(0x03, 0x02, rm));
+ }
} else {
- distance = target->wco - cursection->hdr.sh_size;
+ sb(0xe8);
+ assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, R_X86_64_PC32);
}
- if ((distance - 1) >= -128 && (distance - 1) <= 127) {
- jmpsize = 1;
- } else {
- jmpsize = 4;
- }
- }
-
- if (jmpsize == 4) {
- if (j->cc)
- sb(0x0f);
- sb(cc2op[j->cc]);
- assemblereloc(j->target, -4, 4, R_X86_64_PC32);
- } else {
- sb(cc2op[j->cc] + (j->cc ? -16 : 2));
- assemblereloc(j->target, -1, 1, R_X86_64_PC8);
- }
-}
-
-static void assembleabsimm(const Imm *imm) {
- if (imm->nbytes == 1)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8);
- else if (imm->nbytes == 2)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16);
- else if (imm->nbytes == 4)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
- else if (imm->nbytes == 8)
- assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64);
- else
- unreachable();
-}
-
-static void assembleinstr(const Instr *instr) {
- Rex rex;
- const Memarg *memarg;
- const Imm *imm;
- uint8_t reg, rm;
-
- switch (instr->encoder) {
- case ENCODER_OP:
- assemblevbytes(instr->opcode);
- break;
- case ENCODER_OPREG:
- rm = regbits(instr->arg1->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind);
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, instr->fixedreg, rm));
- break;
- case ENCODER_OPMEM:
- memarg = &instr->arg1->memarg;
- rex = instr->rex;
- assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0);
- break;
- case ENCODER_R:
- reg = regbits(instr->arg1->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind);
- rex.b = !!(reg & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode | (reg & 7));
- break;
- case ENCODER_RIMM:
- imm = &instr->arg1->imm;
- reg = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg2->kind);
- rex.b = !!(reg & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode | (reg & 7));
- assembleabsimm(imm);
- break;
- case ENCODER_IMM:
- imm = &instr->arg1->imm;
- rex = instr->rex;
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- assembleabsimm(imm);
- break;
- case ENCODER_IMMREG:
- imm = &instr->arg1->imm;
- reg = instr->fixedreg;
- rm = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg2->kind);
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- assembleabsimm(imm);
- break;
- case ENCODER_IMMMEM:
- imm = &instr->arg1->imm;
- memarg = &instr->arg2->memarg;
- reg = instr->fixedreg;
- rex = instr->rex;
- assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg,
- imm->nbytes);
- assembleabsimm(imm);
- break;
- case ENCODER_REGMEM:
- case ENCODER_MEMREG:
- if (instr->encoder == ENCODER_MEMREG) {
- memarg = &instr->arg1->memarg;
- reg = regbits(instr->arg2->kind);
- } else {
- memarg = &instr->arg2->memarg;
- reg = regbits(instr->arg1->kind);
- }
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0);
- break;
- case ENCODER_REGREG:
- case ENCODER_REGREG2:
- if (instr->encoder == ENCODER_REGREG) {
- reg = regbits(instr->arg1->kind);
- rm = regbits(instr->arg2->kind);
+}
+
+static void
+assemblejmp(const Jmp* j)
+{
+ int jmpsize;
+ int64_t distance;
+ Symbol* target;
+
+ // clang-format off
+ static uint8_t cc2op[31] = {
+ 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85,
+ 0x89, 0x8b, 0x81, 0x8f, 0x8d, 0x8c, 0x8e, 0x85,
+ 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, 0x8c, 0x8d,
+ 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87,
+ };
+ // clang-format on
+
+ jmpsize = 4;
+ target = getsym(j->target);
+ if (cursection == target->section && (target->defined || target->wco != -1)) {
+ if (target->defined) {
+ distance = target->offset - cursection->hdr.sh_size;
+ } else {
+ distance = target->wco - cursection->hdr.sh_size;
+ }
+ if ((distance - 1) >= -128 && (distance - 1) <= 127) {
+ jmpsize = 1;
+ } else {
+ jmpsize = 4;
+ }
+ }
+
+ if (jmpsize == 4) {
+ if (j->cc)
+ sb(0x0f);
+ sb(cc2op[j->cc]);
+ assemblereloc(j->target, -4, 4, R_X86_64_PC32);
} else {
- reg = regbits(instr->arg2->kind);
- rm = regbits(instr->arg1->kind);
+ sb(cc2op[j->cc] + (j->cc ? -16 : 2));
+ assemblereloc(j->target, -1, 1, R_X86_64_PC8);
}
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- break;
- case ENCODER_IMMREGREG2:
- imm = &instr->arg1->imm;
- reg = regbits(instr->arg3->kind);
- rm = regbits(instr->arg2->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- rex.b = !!(rm & (1 << 3));
- assemblevbytes(instr->prefix);
- assemblerex(rex);
- assemblevbytes(instr->opcode);
- sb(modregrmbyte(0x03, reg, rm));
- assembleabsimm(imm);
- break;
- case ENCODER_IMMMEMREG:
- imm = &instr->arg1->imm;
- memarg = &instr->arg2->memarg;
- reg = regbits(instr->arg3->kind);
- rex = instr->rex;
- rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
- rex.r = !!(reg & (1 << 3));
- assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes);
- assembleabsimm(imm);
- break;
- default:
- unreachable();
- }
-}
-
-static void assemble(void) {
- Symbol *sym;
- AsmLine *l;
- const Parsev *v;
-
- cursection = text;
- curlineno = 0;
- for (l = allasm; l; l = l->next) {
- curlineno++;
- v = l->v;
- switch (v->kind) {
- case ASM_SYNTAX_ERROR:
- lfatal("syntax error");
- break;
- case ASM_BLANK:
- break;
- case ASM_DIR_GLOBL:
- sym = getsym(v->globl.name);
- sym->global = 1;
- break;
- case ASM_DIR_SECTION: {
- const char *fp;
- Section *s;
-
- s = getsection(v->section.name);
- s->hdr.sh_type = v->section.type;
- fp = v->section.flags;
- while (fp && *fp) {
- switch (*(fp++)) {
- case 'a':
- s->hdr.sh_flags |= SHF_ALLOC;
- break;
- case 'w':
- s->hdr.sh_flags |= SHF_WRITE;
- break;
- case 'x':
- s->hdr.sh_flags |= SHF_EXECINSTR;
- break;
- default:
- unreachable();
+}
+
+static void
+assembleabsimm(const Imm* imm)
+{
+ if (imm->nbytes == 1)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8);
+ else if (imm->nbytes == 2)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16);
+ else if (imm->nbytes == 4)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
+ else if (imm->nbytes == 8)
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64);
+ else
+ unreachable();
+}
+
+static void
+assembleinstr(const Instr* instr)
+{
+ Rex rex;
+ const Memarg* memarg;
+ const Imm* imm;
+ uint8_t reg, rm;
+
+ switch (instr->encoder) {
+ case ENCODER_OP:
+ assemblevbytes(instr->opcode);
+ break;
+ case ENCODER_OPREG:
+ rm = regbits(instr->arg1->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind);
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, instr->fixedreg, rm));
+ break;
+ case ENCODER_OPMEM:
+ memarg = &instr->arg1->memarg;
+ rex = instr->rex;
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0);
+ break;
+ case ENCODER_R:
+ reg = regbits(instr->arg1->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind);
+ rex.b = !!(reg & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode | (reg & 7));
+ break;
+ case ENCODER_RIMM:
+ imm = &instr->arg1->imm;
+ reg = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg2->kind);
+ rex.b = !!(reg & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode | (reg & 7));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMM:
+ imm = &instr->arg1->imm;
+ rex = instr->rex;
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMREG:
+ imm = &instr->arg1->imm;
+ reg = instr->fixedreg;
+ rm = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg2->kind);
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMMEM:
+ imm = &instr->arg1->imm;
+ memarg = &instr->arg2->memarg;
+ reg = instr->fixedreg;
+ rex = instr->rex;
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, imm->nbytes);
+ assembleabsimm(imm);
+ break;
+ case ENCODER_REGMEM:
+ case ENCODER_MEMREG:
+ if (instr->encoder == ENCODER_MEMREG) {
+ memarg = &instr->arg1->memarg;
+ reg = regbits(instr->arg2->kind);
+ } else {
+ memarg = &instr->arg2->memarg;
+ reg = regbits(instr->arg1->kind);
}
- }
- cursection = s;
- break;
- }
- case ASM_DIR_DATA:
- cursection = data;
- break;
- case ASM_DIR_TEXT:
- cursection = text;
- break;
- case ASM_DIR_ASCII:
- sbn(v->ascii.data, v->ascii.len);
- break;
- case ASM_DIR_ASCIIZ:
- sbn(v->asciiz.data, v->asciiz.len);
- sb(0x00);
- break;
- case ASM_DIR_BALIGN: {
- int64_t offset, i, rem, amnt;
- amnt = 0;
- offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size;
- rem = offset % v->balign.align;
- if (rem)
- amnt = v->balign.align - rem;
- for (i = 0; i < amnt; i++) {
- sb(0x00);
- }
- break;
- }
- case ASM_DIR_FILL: {
- ssize_t i = 0;
-
- for (i = 0; i < v->fill.repeat; i++) {
- switch (v->fill.size) {
- case 1:
- case 2:
- case 4:
- case 8:
- assembleconstant(v->fill.value, v->fill.size);
- break;
- default:
- lfatal("unsupported fill size '%d'", v->fill.size);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0);
+ break;
+ case ENCODER_REGREG:
+ case ENCODER_REGREG2:
+ if (instr->encoder == ENCODER_REGREG) {
+ reg = regbits(instr->arg1->kind);
+ rm = regbits(instr->arg2->kind);
+ } else {
+ reg = regbits(instr->arg2->kind);
+ rm = regbits(instr->arg1->kind);
}
- }
- break;
- }
- case ASM_DIR_BYTE:
- assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32);
- break;
- case ASM_DIR_SHORT:
- assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32);
- break;
- case ASM_DIR_INT:
- assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32);
- break;
- case ASM_DIR_QUAD:
- assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64);
- break;
- case ASM_LABEL:
- sym = getsym(v->label.name);
- sym->section = cursection;
- sym->offset = cursection->hdr.sh_size;
- if (sym->defined)
- lfatal("%s already defined", sym->name);
- sym->defined = 1;
- break;
- case ASM_INSTR:
- assembleinstr(&v->instr);
- break;
- case ASM_CALL:
- assemblecall(&v->call);
- break;
- case ASM_JMP:
- assemblejmp(&v->jmp);
- break;
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ break;
+ case ENCODER_IMMREGREG2:
+ imm = &instr->arg1->imm;
+ reg = regbits(instr->arg3->kind);
+ rm = regbits(instr->arg2->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ rex.b = !!(rm & (1 << 3));
+ assemblevbytes(instr->prefix);
+ assemblerex(rex);
+ assemblevbytes(instr->opcode);
+ sb(modregrmbyte(0x03, reg, rm));
+ assembleabsimm(imm);
+ break;
+ case ENCODER_IMMMEMREG:
+ imm = &instr->arg1->imm;
+ memarg = &instr->arg2->memarg;
+ reg = regbits(instr->arg3->kind);
+ rex = instr->rex;
+ rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind);
+ rex.r = !!(reg & (1 << 3));
+ assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes);
+ assembleabsimm(imm);
+ break;
default:
- lfatal("assemble: unexpected kind: %d", v->kind);
+ unreachable();
+ }
+}
+
+static void
+assemble(void)
+{
+ Symbol* sym;
+ AsmLine* l;
+ const Parsev* v;
+
+ cursection = text;
+ curlineno = 0;
+ for (l = allasm; l; l = l->next) {
+ curlineno++;
+ v = l->v;
+ switch (v->kind) {
+ case ASM_SYNTAX_ERROR:
+ lfatal("syntax error");
+ break;
+ case ASM_BLANK:
+ break;
+ case ASM_DIR_GLOBL:
+ sym = getsym(v->globl.name);
+ sym->global = 1;
+ break;
+ case ASM_DIR_SECTION: {
+ const char* fp;
+ Section* s;
+
+ s = getsection(v->section.name);
+ s->hdr.sh_type = v->section.type;
+ fp = v->section.flags;
+ while (fp && *fp) {
+ switch (*(fp++)) {
+ case 'a':
+ s->hdr.sh_flags |= SHF_ALLOC;
+ break;
+ case 'w':
+ s->hdr.sh_flags |= SHF_WRITE;
+ break;
+ case 'x':
+ s->hdr.sh_flags |= SHF_EXECINSTR;
+ break;
+ default:
+ unreachable();
+ }
+ }
+ cursection = s;
+ break;
+ }
+ case ASM_DIR_DATA:
+ cursection = data;
+ break;
+ case ASM_DIR_TEXT:
+ cursection = text;
+ break;
+ case ASM_DIR_ASCII:
+ sbn(v->ascii.data, v->ascii.len);
+ break;
+ case ASM_DIR_ASCIIZ:
+ sbn(v->asciiz.data, v->asciiz.len);
+ sb(0x00);
+ break;
+ case ASM_DIR_BALIGN: {
+ int64_t offset, i, rem, amnt;
+ amnt = 0;
+ offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size;
+ rem = offset % v->balign.align;
+ if (rem)
+ amnt = v->balign.align - rem;
+ for (i = 0; i < amnt; i++) {
+ sb(0x00);
+ }
+ break;
+ }
+ case ASM_DIR_FILL: {
+ ssize_t i = 0;
+
+ for (i = 0; i < v->fill.repeat; i++) {
+ switch (v->fill.size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ assembleconstant(v->fill.value, v->fill.size);
+ break;
+ default:
+ lfatal("unsupported fill size '%d'", v->fill.size);
+ }
+ }
+ break;
+ }
+ case ASM_DIR_BYTE:
+ assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32);
+ break;
+ case ASM_DIR_SHORT:
+ assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32);
+ break;
+ case ASM_DIR_INT:
+ assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32);
+ break;
+ case ASM_DIR_QUAD:
+ assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64);
+ break;
+ case ASM_LABEL:
+ sym = getsym(v->label.name);
+ sym->section = cursection;
+ sym->offset = cursection->hdr.sh_size;
+ if (sym->defined)
+ lfatal("%s already defined", sym->name);
+ sym->defined = 1;
+ break;
+ case ASM_INSTR:
+ assembleinstr(&v->instr);
+ break;
+ case ASM_CALL:
+ assemblecall(&v->call);
+ break;
+ case ASM_JMP:
+ assemblejmp(&v->jmp);
+ break;
+ default:
+ lfatal("assemble: unexpected kind: %d", v->kind);
+ }
}
- }
}
/* Reset while remembering symbol offsets so we can size jumps. */
-static void relaxreset(void) {
- Symbol *sym;
- Section *sec;
- size_t i;
-
- /* Reset relocations and section data but retain capacity. */
- nrelocs = 0;
-
- for (i = 0; i < nsections; i++) {
- sec = &sections[i];
- if (sec == shstrtab)
- continue;
- sec->hdr.sh_size = 0;
- }
-
- /* Reset symbols, saving the worst case offset for the second pass. */
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
- *sym = (Symbol){
- .name = sym->name, .section = sym->section, .wco = sym->offset};
- }
-}
-
-static void addtosymtab(Symbol *sym) {
- Elf64_Sym elfsym;
- int stype;
- int sbind;
-
- stype = 0;
- if (sym->defined) {
- sbind = sym->global ? STB_GLOBAL : STB_LOCAL;
- } else {
- sbind = STB_GLOBAL;
- }
-
- sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
-
- elfsym.st_name = elfstr(strtab, sym->name);
- elfsym.st_value = sym->offset;
- elfsym.st_size = sym->size;
- elfsym.st_info = ELF64_ST_INFO(sbind, stype);
- elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF;
- elfsym.st_other = 0;
- secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
-}
-
-static void fillsymtab(void) {
- Symbol *sym;
- size_t i;
-
- // Local symbols
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
- if (!sym->defined || sym->global)
- continue;
- addtosymtab(sym);
- }
-
- // Global symbols
-
- // Set start of global symbols.
- symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
-
- for (i = 0; i < symbols->cap; i++) {
- if (!symbols->keys[i].str)
- continue;
- sym = symbols->vals[i];
-
- if (sym->defined && !sym->global)
- continue;
- addtosymtab(sym);
- }
-}
-
-static int resolvereloc(Relocation *reloc) {
- Symbol *sym;
- uint8_t *rdata;
- int64_t value;
-
- sym = reloc->sym;
-
- if (sym->section != reloc->section)
- return 0;
+static void
+relaxreset(void)
+{
+ Symbol* sym;
+ Section* sec;
+ size_t i;
+
+ /* Reset relocations and section data but retain capacity. */
+ nrelocs = 0;
+
+ for (i = 0; i < nsections; i++) {
+ sec = &sections[i];
+ if (sec == shstrtab)
+ continue;
+ sec->hdr.sh_size = 0;
+ }
- switch (reloc->type) {
- case R_X86_64_32:
- case R_X86_64_64:
- return 0;
- case R_X86_64_PC8:
- rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
- rdata[0] = ((uint8_t)value & 0xff);
- return 1;
- case R_X86_64_PC32:
- rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
- rdata[0] = ((uint32_t)value & 0xff);
- rdata[1] = ((uint32_t)value & 0xff00) >> 8;
- rdata[2] = ((uint32_t)value & 0xff0000) >> 16;
- rdata[3] = ((uint32_t)value & 0xff000000) >> 24;
- return 1;
- default:
- unreachable();
- return 0;
- }
-}
-
-static void appendreloc(Relocation *reloc) {
- Symbol *sym;
- Section *relsection;
- Elf64_Rela elfrel;
-
- memset(&elfrel, 0, sizeof(elfrel));
-
- sym = reloc->sym;
- if (reloc->section == text)
- relsection = textrel;
- else if (reloc->section == data)
- relsection = datarel;
- else {
- fatal("unexpected relocation for symbol '%s'", sym->name);
- return;
- }
-
- switch (reloc->type) {
- case R_X86_64_PC32:
- case R_X86_64_32:
- case R_X86_64_64:
- elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type);
- elfrel.r_offset = reloc->offset;
- elfrel.r_addend = reloc->addend;
- break;
- default:
- unreachable();
- }
-
- secaddbytes(relsection, &elfrel, sizeof(elfrel));
-}
-
-static void handlerelocs(void) {
- Relocation *reloc;
- size_t i;
- for (i = 0; i < nrelocs; i++) {
- reloc = &relocs[i];
- if (resolvereloc(reloc))
- continue;
- appendreloc(reloc);
- }
-}
-
-static void out(const void *buf, size_t n) {
- fwrite(buf, 1, n, stdout);
- if (ferror(stdout))
- fatal("fwrite:");
-}
-
-static void outelf(void) {
- size_t i;
- uint64_t offset;
- Elf64_Ehdr ehdr;
-
- memset(&ehdr, 0, sizeof(ehdr));
- ehdr.e_ident[0] = 0x7f;
- ehdr.e_ident[1] = 'E';
- ehdr.e_ident[2] = 'L';
- ehdr.e_ident[3] = 'F';
- ehdr.e_ident[4] = ELFCLASS64;
- ehdr.e_ident[5] = ELFDATA2LSB;
- ehdr.e_ident[6] = 1;
- ehdr.e_type = ET_REL;
- ehdr.e_machine = EM_X86_64;
- ehdr.e_flags = 0;
- ehdr.e_version = 1;
- ehdr.e_ehsize = sizeof(Elf64_Ehdr);
- ehdr.e_shoff = sizeof(Elf64_Ehdr);
- ehdr.e_shentsize = sizeof(Elf64_Shdr);
- ehdr.e_shnum = nsections;
- ehdr.e_shstrndx = 1;
-
- out(&ehdr, sizeof(ehdr));
- offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
-
- for (i = 0; i < nsections; i++) {
- sections[i].hdr.sh_offset = offset;
- out(&sections[i].hdr, sizeof(Elf64_Shdr));
- offset += sections[i].hdr.sh_size;
- }
- for (i = 0; i < nsections; i++) {
- if (sections[i].hdr.sh_type == SHT_NOBITS)
- continue;
- out(sections[i].data, sections[i].hdr.sh_size);
- }
- if (fflush(stdout) != 0)
- fatal("fflush:");
-}
-
-static void usage(char *argv0) {
- fprintf(stderr, "minias - a mini x86-64 assembler.\n\n");
- fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0);
- fprintf(stderr, "\n");
- fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n");
- fprintf(stderr, " -o out Output file to write (default stdout).\n");
- exit(2);
-}
-
-static void parseargs(int argc, char *argv[]) {
- char *a, *argv0, *outfname;
-
- argv0 = argv[0];
-
- for (++argv; *argv; argv++) {
- if (argv[0][0] != '-')
- break;
- for (a = &argv[0][1]; *a; a++) {
- switch (*a) {
- case '-':
- case 'h':
- usage(argv0);
- break;
- case 'r':
- nrelax = atoi(*++argv);
- break;
- case 'o':
- if (argv[1] == NULL)
- usage(argv0);
- outfname = *++argv;
- if (!freopen(outfname, "w", stdout))
- fatal("unable to open %s:", outfname);
+ /* Reset symbols, saving the worst case offset for the second pass. */
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ sym = symbols->vals[i];
+ *sym = (Symbol){ .name = sym->name, .section = sym->section, .wco = sym->offset };
+ }
+}
+
+static void
+addtosymtab(Symbol* sym)
+{
+ Elf64_Sym elfsym;
+ int stype;
+ int sbind;
+
+ stype = 0;
+ if (sym->defined) {
+ sbind = sym->global ? STB_GLOBAL : STB_LOCAL;
+ } else {
+ sbind = STB_GLOBAL;
+ }
+
+ sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
+
+ elfsym.st_name = elfstr(strtab, sym->name);
+ elfsym.st_value = sym->offset;
+ elfsym.st_size = sym->size;
+ elfsym.st_info = ELF64_ST_INFO(sbind, stype);
+ elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF;
+ elfsym.st_other = 0;
+ secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym));
+}
+
+static void
+fillsymtab(void)
+{
+ Symbol* sym;
+ size_t i;
+
+ // Local symbols
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ sym = symbols->vals[i];
+ if (!sym->defined || sym->global)
+ continue;
+ addtosymtab(sym);
+ }
+
+ // Global symbols
+
+ // Set start of global symbols.
+ symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
+
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+
+ sym = symbols->vals[i];
+ if (sym->defined && !sym->global)
+ continue;
+ addtosymtab(sym);
+ }
+}
+
+static int
+resolvereloc(Relocation* reloc)
+{
+ Symbol* sym;
+ uint8_t* rdata;
+ int64_t value;
+
+ sym = reloc->sym;
+
+ if (sym->section != reloc->section)
+ return 0;
+
+ switch (reloc->type) {
+ case R_X86_64_32:
+ case R_X86_64_64:
+ return 0;
+ case R_X86_64_PC8:
+ rdata = &reloc->section->data[reloc->offset];
+ value = sym->offset - reloc->offset + reloc->addend;
+ rdata[0] = ((uint8_t)value & 0xff);
+ return 1;
+ case R_X86_64_PC32:
+ rdata = &reloc->section->data[reloc->offset];
+ value = sym->offset - reloc->offset + reloc->addend;
+ rdata[0] = ((uint32_t)value & 0xff);
+ rdata[1] = ((uint32_t)value & 0xff00) >> 8;
+ rdata[2] = ((uint32_t)value & 0xff0000) >> 16;
+ rdata[3] = ((uint32_t)value & 0xff000000) >> 24;
+ return 1;
+ default:
+ unreachable();
+ return 0;
+ }
+}
+
+static void
+appendreloc(Relocation* reloc)
+{
+ Symbol* sym;
+ Section* relsection;
+ Elf64_Rela elfrel;
+
+ memset(&elfrel, 0, sizeof(elfrel));
+
+ sym = reloc->sym;
+ if (reloc->section == text)
+ relsection = textrel;
+ else if (reloc->section == data)
+ relsection = datarel;
+ else {
+ fatal("unexpected relocation for symbol '%s'", sym->name);
+ return;
+ }
+
+ switch (reloc->type) {
+ case R_X86_64_PC32:
+ case R_X86_64_32:
+ case R_X86_64_64:
+ elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type);
+ elfrel.r_offset = reloc->offset;
+ elfrel.r_addend = reloc->addend;
break;
- default:
- usage(argv0);
- }
- }
- }
-
- if (argv[0]) {
- if (argv[1])
- usage(argv0);
- infilename = argv[0];
- if (!freopen(infilename, "r", stdin))
- fatal("unable to open %s:", infilename);
- }
-}
-
-int main(int argc, char *argv[]) {
- symbols = mkhtab(256);
- parseargs(argc, argv);
- allasm = parseasm();
- initsections();
- assemble();
- while (nrelax-- > 0) {
- relaxreset();
+ default:
+ unreachable();
+ }
+
+ secaddbytes(relsection, &elfrel, sizeof(elfrel));
+}
+
+static void
+handlerelocs(void)
+{
+ Relocation* reloc;
+ size_t i;
+ for (i = 0; i < nrelocs; i++) {
+ reloc = &relocs[i];
+ if (resolvereloc(reloc))
+ continue;
+ appendreloc(reloc);
+ }
+}
+
+static void
+out(const void* buf, size_t n)
+{
+ fwrite(buf, 1, n, stdout);
+ if (ferror(stdout))
+ fatal("fwrite:");
+}
+
+static void
+outelf(void)
+{
+ size_t i;
+ uint64_t offset;
+ Elf64_Ehdr ehdr;
+
+ memset(&ehdr, 0, sizeof(ehdr));
+ ehdr.e_ident[0] = 0x7f;
+ ehdr.e_ident[1] = 'E';
+ ehdr.e_ident[2] = 'L';
+ ehdr.e_ident[3] = 'F';
+ ehdr.e_ident[4] = ELFCLASS64;
+ ehdr.e_ident[5] = ELFDATA2LSB;
+ ehdr.e_ident[6] = 1;
+ ehdr.e_type = ET_REL;
+ ehdr.e_machine = EM_X86_64;
+ ehdr.e_flags = 0;
+ ehdr.e_version = 1;
+ ehdr.e_ehsize = sizeof(Elf64_Ehdr);
+ ehdr.e_shoff = sizeof(Elf64_Ehdr);
+ ehdr.e_shentsize = sizeof(Elf64_Shdr);
+ ehdr.e_shnum = nsections;
+ ehdr.e_shstrndx = 1;
+
+ out(&ehdr, sizeof(ehdr));
+ offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections;
+
+ for (i = 0; i < nsections; i++) {
+ sections[i].hdr.sh_offset = offset;
+ out(&sections[i].hdr, sizeof(Elf64_Shdr));
+ offset += sections[i].hdr.sh_size;
+ }
+ for (i = 0; i < nsections; i++) {
+ if (sections[i].hdr.sh_type == SHT_NOBITS)
+ continue;
+ out(sections[i].data, sections[i].hdr.sh_size);
+ }
+ if (fflush(stdout) != 0)
+ fatal("fflush:");
+}
+
+static void
+usage(char* argv0)
+{
+ fprintf(stderr, "minias - a mini x86-64 assembler.\n\n");
+ fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0);
+ fprintf(stderr, "\n");
+ fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n");
+ fprintf(stderr, " -o out Output file to write (default stdout).\n");
+ exit(2);
+}
+
+static void
+parseargs(int argc, char* argv[])
+{
+ char *a, *argv0, *outfname;
+
+ argv0 = argv[0];
+
+ for (++argv; *argv; argv++) {
+ if (argv[0][0] != '-')
+ break;
+ a = &argv[0][1];
+ switch (*a) {
+ case '-':
+ case 'h':
+ usage(argv0);
+ break;
+ case 'r':
+ nrelax = atoi(*++argv);
+ break;
+ case 'o':
+ if (argv[1] == NULL)
+ usage(argv0);
+ outfname = *++argv;
+ if (!freopen(outfname, "w", stdout))
+ fatal("unable to open %s:", outfname);
+ break;
+ default:
+ usage(argv0);
+ }
+ }
+
+ if (argv[0]) {
+ if (argv[1])
+ usage(argv0);
+ infilename = argv[0];
+ if (!freopen(infilename, "r", stdin))
+ fatal("unable to open %s:", infilename);
+ }
+}
+
+int
+main(int argc, char* argv[])
+{
+ symbols = mkhtab(256);
+ parseargs(argc, argv);
+ allasm = parseasm();
+ initsections();
assemble();
- }
- fillsymtab();
- handlerelocs();
- outelf();
- return 0;
+ while (nrelax-- > 0) {
+ relaxreset();
+ assemble();
+ }
+ fillsymtab();
+ handlerelocs();
+ outelf();
+ return 0;
} \ No newline at end of file
diff --git a/minias.h b/minias.h
index c731ec7..ec8f7cc 100644
--- a/minias.h
+++ b/minias.h
@@ -12,353 +12,353 @@
#include <unistd.h>
typedef struct {
- Elf64_Shdr hdr;
- int16_t idx;
- int64_t wco;
- int64_t offset;
- size_t capacity;
- uint8_t *data;
+ Elf64_Shdr hdr;
+ int16_t idx;
+ int64_t wco;
+ int64_t offset;
+ size_t capacity;
+ uint8_t* data;
} Section;
typedef struct {
- const char *name;
- int32_t idx;
- int64_t offset;
- int64_t wco; /* worst case offset */
- int64_t size;
- int global;
- int defined;
- Section *section;
+ const char* name;
+ int32_t idx;
+ int64_t offset;
+ int64_t wco; /* worst case offset */
+ int64_t size;
+ int global;
+ int defined;
+ Section* section;
} Symbol;
typedef struct {
- Section *section;
- Symbol *sym;
- int type;
- int64_t offset;
- int64_t addend;
+ Section* section;
+ Symbol* sym;
+ int type;
+ int64_t offset;
+ int64_t addend;
} Relocation;
typedef enum {
- // Misc
- ASM_SYNTAX_ERROR,
- ASM_BLANK,
- ASM_LABEL,
- ASM_IMM,
- ASM_STRING,
- ASM_MEMARG,
- // Directives.
- ASM_DIR_GLOBL,
- ASM_DIR_SECTION,
- ASM_DIR_ASCII,
- ASM_DIR_ASCIIZ,
- ASM_DIR_DATA,
- ASM_DIR_TEXT,
- ASM_DIR_FILL,
- ASM_DIR_BYTE,
- ASM_DIR_SHORT,
- ASM_DIR_INT,
- ASM_DIR_QUAD,
- ASM_DIR_BALIGN,
- // Instructions.
- ASM_CALL,
- ASM_JMP,
- ASM_INSTR,
- // Registers, order matters.
- ASM_REG_BEGIN,
-
- ASM_AL,
- ASM_CL,
- ASM_DL,
- ASM_BL,
- ASM_SPL,
- ASM_BPL,
- ASM_SIL,
- ASM_DIL,
- ASM_R8B,
- ASM_R9B,
- ASM_R10B,
- ASM_R11B,
- ASM_R12B,
- ASM_R13B,
- ASM_R14B,
- ASM_R15B,
-
- ASM_AX,
- ASM_CX,
- ASM_DX,
- ASM_BX,
- ASM_SP,
- ASM_BP,
- ASM_SI,
- ASM_DI,
- ASM_R8W,
- ASM_R9W,
- ASM_R10W,
- ASM_R11W,
- ASM_R12W,
- ASM_R13W,
- ASM_R14W,
- ASM_R15W,
-
- ASM_EAX,
- ASM_ECX,
- ASM_EDX,
- ASM_EBX,
- ASM_ESP,
- ASM_EBP,
- ASM_ESI,
- ASM_EDI,
- ASM_R8D,
- ASM_R9D,
- ASM_R10D,
- ASM_R11D,
- ASM_R12D,
- ASM_R13D,
- ASM_R14D,
- ASM_R15D,
-
- ASM_RAX,
- ASM_RCX,
- ASM_RDX,
- ASM_RBX,
- ASM_RSP,
- ASM_RBP,
- ASM_RSI,
- ASM_RDI,
- ASM_R8,
- ASM_R9,
- ASM_R10,
- ASM_R11,
- ASM_R12,
- ASM_R13,
- ASM_R14,
- ASM_R15,
-
- ASM_XMM0,
- ASM_XMM1,
- ASM_XMM2,
- ASM_XMM3,
- ASM_XMM4,
- ASM_XMM5,
- ASM_XMM6,
- ASM_XMM7,
- ASM_XMM8,
- ASM_XMM9,
- ASM_XMM10,
- ASM_XMM11,
- ASM_XMM12,
- ASM_XMM13,
- ASM_XMM14,
- ASM_XMM15,
-
- /* RIP is in a special class of its own. */
- ASM_RIP,
- ASM_NO_REG,
-
- ASM_REG_END,
+ // Misc
+ ASM_SYNTAX_ERROR,
+ ASM_BLANK,
+ ASM_LABEL,
+ ASM_IMM,
+ ASM_STRING,
+ ASM_MEMARG,
+ // Directives.
+ ASM_DIR_GLOBL,
+ ASM_DIR_SECTION,
+ ASM_DIR_ASCII,
+ ASM_DIR_ASCIIZ,
+ ASM_DIR_DATA,
+ ASM_DIR_TEXT,
+ ASM_DIR_FILL,
+ ASM_DIR_BYTE,
+ ASM_DIR_SHORT,
+ ASM_DIR_INT,
+ ASM_DIR_QUAD,
+ ASM_DIR_BALIGN,
+ // Instructions.
+ ASM_CALL,
+ ASM_JMP,
+ ASM_INSTR,
+ // Registers, order matters.
+ ASM_REG_BEGIN,
+
+ ASM_AL,
+ ASM_CL,
+ ASM_DL,
+ ASM_BL,
+ ASM_SPL,
+ ASM_BPL,
+ ASM_SIL,
+ ASM_DIL,
+ ASM_R8B,
+ ASM_R9B,
+ ASM_R10B,
+ ASM_R11B,
+ ASM_R12B,
+ ASM_R13B,
+ ASM_R14B,
+ ASM_R15B,
+
+ ASM_AX,
+ ASM_CX,
+ ASM_DX,
+ ASM_BX,
+ ASM_SP,
+ ASM_BP,
+ ASM_SI,
+ ASM_DI,
+ ASM_R8W,
+ ASM_R9W,
+ ASM_R10W,
+ ASM_R11W,
+ ASM_R12W,
+ ASM_R13W,
+ ASM_R14W,
+ ASM_R15W,
+
+ ASM_EAX,
+ ASM_ECX,
+ ASM_EDX,
+ ASM_EBX,
+ ASM_ESP,
+ ASM_EBP,
+ ASM_ESI,
+ ASM_EDI,
+ ASM_R8D,
+ ASM_R9D,
+ ASM_R10D,
+ ASM_R11D,
+ ASM_R12D,
+ ASM_R13D,
+ ASM_R14D,
+ ASM_R15D,
+
+ ASM_RAX,
+ ASM_RCX,
+ ASM_RDX,
+ ASM_RBX,
+ ASM_RSP,
+ ASM_RBP,
+ ASM_RSI,
+ ASM_RDI,
+ ASM_R8,
+ ASM_R9,
+ ASM_R10,
+ ASM_R11,
+ ASM_R12,
+ ASM_R13,
+ ASM_R14,
+ ASM_R15,
+
+ ASM_XMM0,
+ ASM_XMM1,
+ ASM_XMM2,
+ ASM_XMM3,
+ ASM_XMM4,
+ ASM_XMM5,
+ ASM_XMM6,
+ ASM_XMM7,
+ ASM_XMM8,
+ ASM_XMM9,
+ ASM_XMM10,
+ ASM_XMM11,
+ ASM_XMM12,
+ ASM_XMM13,
+ ASM_XMM14,
+ ASM_XMM15,
+
+ /* RIP is in a special class of its own. */
+ ASM_RIP,
+ ASM_NO_REG,
+
+ ASM_REG_END,
} AsmKind;
typedef union Parsev Parsev;
typedef struct Label {
- AsmKind kind;
- const char *name;
+ AsmKind kind;
+ const char* name;
} Label;
typedef struct Globl {
- AsmKind kind;
- const char *name;
+ AsmKind kind;
+ const char* name;
} Globl;
typedef struct DirSection {
- AsmKind kind;
- int32_t type;
- const char *name;
- const char *flags;
+ AsmKind kind;
+ int32_t type;
+ const char* name;
+ const char* flags;
} DirSection;
typedef struct {
- int64_t c;
- const char *l;
+ int64_t c;
+ const char* l;
} Value;
typedef struct Byte {
- AsmKind kind;
- Value value;
+ AsmKind kind;
+ Value value;
} Byte;
typedef struct Short {
- AsmKind kind;
- Value value;
+ AsmKind kind;
+ Value value;
} Short;
typedef struct Int {
- AsmKind kind;
- Value value;
+ AsmKind kind;
+ Value value;
} Int;
typedef struct Quad {
- AsmKind kind;
- Value value;
+ AsmKind kind;
+ Value value;
} Quad;
typedef struct Balign {
- AsmKind kind;
- uint64_t align;
+ AsmKind kind;
+ uint64_t align;
} Balign;
typedef struct Fill {
- AsmKind kind;
- int32_t size;
- int32_t repeat;
- int64_t value;
+ AsmKind kind;
+ int32_t size;
+ int32_t repeat;
+ int64_t value;
} Fill;
typedef struct Imm {
- AsmKind kind;
- uint32_t nbytes;
- Value v;
+ AsmKind kind;
+ uint32_t nbytes;
+ Value v;
} Imm;
typedef struct Memarg {
- AsmKind kind;
- AsmKind base;
- AsmKind index;
- uint32_t scale;
- Value disp;
+ AsmKind kind;
+ AsmKind base;
+ AsmKind index;
+ uint32_t scale;
+ Value disp;
} Memarg;
typedef struct String {
- AsmKind kind;
- size_t len;
- uint8_t *data;
+ AsmKind kind;
+ size_t len;
+ uint8_t* data;
} String;
typedef String Ascii;
typedef String Asciiz;
typedef struct Call {
- AsmKind kind;
- uint32_t indirect;
- union {
- const Parsev *indirect;
- Value direct;
- } target;
+ AsmKind kind;
+ uint32_t indirect;
+ union {
+ const Parsev* indirect;
+ Value direct;
+ } target;
} Call;
typedef struct Jmp {
- AsmKind kind;
- uint32_t cc; /* 0 means unconditional. */
- const char *target;
+ AsmKind kind;
+ uint32_t cc; /* 0 means unconditional. */
+ const char* target;
} Jmp;
/* Rex opcode prefix. */
typedef struct Rex {
- uint8_t required : 1;
- uint8_t w : 1;
- uint8_t r : 1;
- uint8_t x : 1;
- uint8_t b : 1;
+ uint8_t required : 1;
+ uint8_t w : 1;
+ uint8_t r : 1;
+ uint8_t x : 1;
+ uint8_t b : 1;
} Rex;
/* Various classes of instruction encoding.
The *2 variants just have operands swapped. */
typedef enum Encoder {
- ENCODER_OP,
- ENCODER_OPREG,
- ENCODER_OPMEM,
- ENCODER_R,
- ENCODER_RIMM,
- ENCODER_IMM,
- ENCODER_IMMMEM,
- ENCODER_IMMREG,
- ENCODER_MEMREG,
- ENCODER_MEMREG2,
- ENCODER_REGMEM,
- ENCODER_REGMEM2,
- ENCODER_REGREG,
- ENCODER_REGREG2,
- ENCODER_IMMREGREG2,
- ENCODER_IMMMEMREG,
+ ENCODER_OP,
+ ENCODER_OPREG,
+ ENCODER_OPMEM,
+ ENCODER_R,
+ ENCODER_RIMM,
+ ENCODER_IMM,
+ ENCODER_IMMMEM,
+ ENCODER_IMMREG,
+ ENCODER_MEMREG,
+ ENCODER_MEMREG2,
+ ENCODER_REGMEM,
+ ENCODER_REGMEM2,
+ ENCODER_REGREG,
+ ENCODER_REGREG2,
+ ENCODER_IMMREGREG2,
+ ENCODER_IMMMEMREG,
} Encoder;
typedef struct Instr {
- AsmKind kind;
- Encoder encoder;
- Rex rex;
- uint8_t pad[3]; /* Avoid undefined padding - see internparsev. */
- uint32_t fixedreg;
- int32_t opcode;
- int32_t prefix;
- const Parsev *arg1;
- const Parsev *arg2;
- const Parsev *arg3;
+ AsmKind kind;
+ Encoder encoder;
+ Rex rex;
+ uint8_t pad[3]; /* Avoid undefined padding - see internparsev. */
+ uint32_t fixedreg;
+ int32_t opcode;
+ int32_t prefix;
+ const Parsev* arg1;
+ const Parsev* arg2;
+ const Parsev* arg3;
} Instr;
union Parsev {
- AsmKind kind;
- Label label;
- Globl globl;
- DirSection section;
- Balign balign;
- Ascii ascii;
- Asciiz asciiz;
- Memarg memarg;
- Instr instr;
- Call call;
- Jmp jmp;
- Fill fill;
- Byte dirbyte;
- Short dirshort;
- Int dirint;
- Quad dirquad;
- Imm imm;
- String string;
- // Temporary values.
- Value value;
- const char *charptr;
- int64_t i64;
+ AsmKind kind;
+ Label label;
+ Globl globl;
+ DirSection section;
+ Balign balign;
+ Ascii ascii;
+ Asciiz asciiz;
+ Memarg memarg;
+ Instr instr;
+ Call call;
+ Jmp jmp;
+ Fill fill;
+ Byte dirbyte;
+ Short dirshort;
+ Int dirint;
+ Quad dirquad;
+ Imm imm;
+ String string;
+ // Temporary values.
+ Value value;
+ const char* charptr;
+ int64_t i64;
};
/* parse.c */
typedef struct AsmLine AsmLine;
struct AsmLine {
- int64_t lineno;
- const Parsev *v;
- AsmLine *next;
+ int64_t lineno;
+ const Parsev* v;
+ AsmLine* next;
};
-AsmLine *parseasm(void);
+AsmLine* parseasm(void);
/* util.c */
-void vwarn(const char *fmt, va_list ap);
-void fatal(const char *fmt, ...);
+void vwarn(const char* fmt, va_list ap);
+void fatal(const char* fmt, ...);
void unreachable(void);
-void *xmalloc(size_t);
-void *xrealloc(void *, size_t);
-void *xreallocarray(void *, size_t, size_t);
-char *xmemdup(const char *, size_t);
-char *xstrdup(const char *s);
-void *zalloc(size_t n);
+void* xmalloc(size_t);
+void* xrealloc(void*, size_t);
+void* xreallocarray(void*, size_t, size_t);
+char* xmemdup(const char*, size_t);
+char* xstrdup(const char* s);
+void* zalloc(size_t n);
struct hashtable {
- size_t len, cap;
- struct hashtablekey *keys;
- void **vals;
+ size_t len, cap;
+ struct hashtablekey* keys;
+ void** vals;
};
struct hashtablekey {
- uint64_t hash;
- const char *str;
- size_t len;
+ uint64_t hash;
+ const char* str;
+ size_t len;
};
-void htabkey(struct hashtablekey *, const char *, size_t);
-struct hashtable *mkhtab(size_t);
-void delhtab(struct hashtable *, void(void *));
-void **htabput(struct hashtable *, struct hashtablekey *);
-void *htabget(struct hashtable *, struct hashtablekey *);
-uint64_t murmurhash64a(const void *, size_t); \ No newline at end of file
+void htabkey(struct hashtablekey*, const char*, size_t);
+struct hashtable* mkhtab(size_t);
+void delhtab(struct hashtable*, void(void*));
+void** htabput(struct hashtable*, struct hashtablekey*);
+void* htabget(struct hashtable*, struct hashtablekey*);
+uint64_t murmurhash64a(const void*, size_t); \ No newline at end of file
diff --git a/parse.c b/parse.c
index 67ac99e..3e60b62 100644
--- a/parse.c
+++ b/parse.c
@@ -1,8 +1,10 @@
#include "minias.h"
/* Cache of Parsev* by value. */
-static const Parsev *internparsev(Parsev *p) {
- /*
+static const Parsev*
+internparsev(Parsev* p)
+{
+ /*
A simple direct mapped cache that prevents our parser
from allocating duplicate values. Note that it uses memcmp
for equality, even on pointer values, this works because the
@@ -13,238 +15,264 @@ static const Parsev *internparsev(Parsev *p) {
but the best fix is still to avoid the padding bytes in the Parsev
variants.
*/
- size_t idx;
- const Parsev *interned;
- static const Parsev *cache[4096] = {0};
-
- idx = murmurhash64a((char *)p, sizeof(Parsev)) % sizeof(cache) /
- sizeof(cache[0]);
- interned = cache[idx];
- if (interned && memcmp(p, interned, sizeof(Parsev)) == 0)
+ size_t idx;
+ const Parsev* interned;
+ static const Parsev* cache[4096] = { 0 };
+
+ idx = murmurhash64a((char*)p, sizeof(Parsev)) % sizeof(cache) / sizeof(cache[0]);
+ interned = cache[idx];
+ if (interned && memcmp(p, interned, sizeof(Parsev)) == 0)
+ return interned;
+ interned = (const Parsev*)xmemdup((char*)p, sizeof(Parsev));
+ cache[idx] = interned;
return interned;
- interned = (const Parsev *)xmemdup((char *)p, sizeof(Parsev));
- cache[idx] = interned;
- return interned;
}
/* Cache of char* by value. */
-const char *internstring(const char *s) {
- size_t idx, len;
- const char *interned;
- static const char *cache[4096] = {0};
-
- len = strlen(s);
- idx = murmurhash64a(s, len) % sizeof(cache) / sizeof(cache[0]);
- interned = cache[idx];
- if (interned && strcmp(s, cache[idx]) == 0)
+const char*
+internstring(const char* s)
+{
+ size_t idx, len;
+ const char* interned;
+ static const char* cache[4096] = { 0 };
+
+ len = strlen(s);
+ idx = murmurhash64a(s, len) % sizeof(cache) / sizeof(cache[0]);
+ interned = cache[idx];
+ if (interned && strcmp(s, cache[idx]) == 0)
+ return interned;
+ interned = xstrdup(s);
+ cache[idx] = interned;
return interned;
- interned = xstrdup(s);
- cache[idx] = interned;
- return interned;
}
-static String decodestring(char *s) {
- char *end;
- size_t len = 0;
- size_t cap = 0;
- uint8_t *data = NULL;
- uint8_t c = 0;
-
- /* The string is already validated by the parser so we omit some checks*/
- while (*s) {
- if (*s == '\\') {
- s++;
- if (*s >= '0' && *s <= '7') {
- c = strtoul(s, &end, 8);
- s += 2;
- } else if (*s == 'x') {
+static String
+decodestring(char* s)
+{
+ char* end;
+ size_t len = 0;
+ size_t cap = 0;
+ uint8_t* data = NULL;
+ uint8_t c = 0;
+
+ /* The string is already validated by the parser so we omit some checks*/
+ while (*s) {
+ if (*s == '\\') {
+ s++;
+ if (*s >= '0' && *s <= '7') {
+ c = strtoul(s, &end, 8);
+ s += 2;
+ } else if (*s == 'x') {
+ s++;
+ c = strtoul(s, &end, 16);
+ s = end - 1;
+ } else if (*s == 'r') {
+ c = '\r';
+ } else if (*s == 'n') {
+ c = '\n';
+ } else if (*s == 't') {
+ c = '\t';
+ } else if (*s == '\\') {
+ c = '\\';
+ } else {
+ unreachable();
+ }
+ } else {
+ c = *s;
+ }
s++;
- c = strtoul(s, &end, 16);
- s = end - 1;
- } else if (*s == 'r') {
- c = '\r';
- } else if (*s == 'n') {
- c = '\n';
- } else if (*s == 't') {
- c = '\t';
- } else if (*s == '\\') {
- c = '\\';
- } else {
- unreachable();
- }
- } else {
- c = *s;
- }
- s++;
- if (len == cap) {
- cap = cap ? len * 2 : 8;
- data = realloc(data, cap);
- }
- data[len++] = c;
- }
- return (String){.kind = ASM_STRING, .len = len, .data = data};
+ if (len == cap) {
+ cap = cap ? len * 2 : 8;
+ data = realloc(data, cap);
+ }
+ data[len++] = c;
+ }
+ return (String){ .kind = ASM_STRING, .len = len, .data = data };
}
-static int needsmovabs(Imm *imm) {
- int64_t mask, maskedc;
+static int
+needsmovabs(Imm* imm)
+{
+ int64_t mask, maskedc;
- if (imm->v.l)
- return 1;
+ if (imm->v.l)
+ return 1;
- mask = 0xffffffff80000000;
- maskedc = (uint64_t)imm->v.c & mask;
- return (maskedc != mask && maskedc != 0);
+ mask = 0xffffffff80000000;
+ maskedc = (uint64_t)imm->v.c & mask;
+ return (maskedc != mask && maskedc != 0);
}
-#define OP(OPCODE) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_OP, .prefix = -1, \
- .opcode = OPCODE, \
- } \
- }
-
-#define OPREG(REX, PREFIX, OPCODE, REG, A1) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_OPREG, .prefix = PREFIX, \
- .rex = (Rex)REX, .fixedreg = REG, .opcode = OPCODE, \
- .arg1 = internparsev(&A1) \
- } \
- }
-
-#define OPMEM(REX, PREFIX, OPCODE, REG, A1) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_OPMEM, .prefix = PREFIX, \
- .rex = (Rex)REX, .fixedreg = REG, .opcode = OPCODE, \
- .arg1 = internparsev(&A1) \
- } \
- }
-
-#define R(REX, PREFIX, OPCODE, A1) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_R, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- } \
- }
-
-#define IMM(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_IMM, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define RIMM(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_RIMM, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define IMMREG(REX, PREFIX, OPCODE, IMMREG, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_IMMREG, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .fixedreg = IMMREG, \
- .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
- } \
- }
-
-#define IMMMEM(REX, PREFIX, OPCODE, IMMREG, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_IMMMEM, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .fixedreg = IMMREG, \
- .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
- } \
- }
-
-#define REGMEM(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_REGMEM, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define MEMREG(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_MEMREG, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define REGREG(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_REGREG, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define REGREG2(REX, PREFIX, OPCODE, A1, A2) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_REGREG2, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2) \
- } \
- }
-
-#define IMMREGREG2(REX, PREFIX, OPCODE, A1, A2, A3) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_IMMREGREG2, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2), .arg3 = internparsev(&A3) \
- } \
- }
-
-#define IMMMEMREG(REX, PREFIX, OPCODE, A1, A2, A3) \
- (Parsev) { \
- .instr = (Instr) { \
- .kind = ASM_INSTR, .encoder = ENCODER_IMMMEMREG, .prefix = PREFIX, \
- .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \
- .arg2 = internparsev(&A2), .arg3 = internparsev(&A3) \
- } \
- }
-
-#define REG(K) \
- (Parsev) { .kind = ASM_##K }
+#define OP(OPCODE) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_OP, .prefix = -1, .opcode = OPCODE, \
+ } \
+ }
+
+#define OPREG(REX, PREFIX, OPCODE, REG, A1) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_OPREG, .prefix = PREFIX, .rex = (Rex)REX, \
+ .fixedreg = REG, .opcode = OPCODE, .arg1 = internparsev(&A1) \
+ } \
+ }
+
+#define OPMEM(REX, PREFIX, OPCODE, REG, A1) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_OPMEM, .prefix = PREFIX, .rex = (Rex)REX, \
+ .fixedreg = REG, .opcode = OPCODE, .arg1 = internparsev(&A1) \
+ } \
+ }
+
+#define R(REX, PREFIX, OPCODE, A1) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_R, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), \
+ } \
+ }
+
+#define IMM(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_IMM, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define RIMM(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_RIMM, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define IMMREG(REX, PREFIX, OPCODE, IMMREG, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_IMMREG, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .fixedreg = IMMREG, .arg1 = internparsev(&A1), \
+ .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define IMMMEM(REX, PREFIX, OPCODE, IMMREG, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_IMMMEM, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .fixedreg = IMMREG, .arg1 = internparsev(&A1), \
+ .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define REGMEM(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_REGMEM, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define MEMREG(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_MEMREG, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define REGREG(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_REGREG, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define REGREG2(REX, PREFIX, OPCODE, A1, A2) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_REGREG2, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \
+ } \
+ }
+
+#define IMMREGREG2(REX, PREFIX, OPCODE, A1, A2, A3) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_IMMREGREG2, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \
+ .arg3 = internparsev(&A3) \
+ } \
+ }
+
+#define IMMMEMREG(REX, PREFIX, OPCODE, A1, A2, A3) \
+ (Parsev) \
+ { \
+ .instr = (Instr) \
+ { \
+ .kind = ASM_INSTR, .encoder = ENCODER_IMMMEMREG, .prefix = PREFIX, .opcode = OPCODE, \
+ .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \
+ .arg3 = internparsev(&A3) \
+ } \
+ }
+
+#define REG(K) \
+ (Parsev) { .kind = ASM_##K }
#define YYSTYPE Parsev
#define YY_CTX_LOCAL
#define YY_CTX_MEMBERS Parsev v;
#include "asm.peg.inc"
-AsmLine *parseasm(void) {
- AsmLine *result, *l, *prevl;
- yycontext ctx;
-
- memset(&ctx, 0, sizeof(yycontext));
- result = NULL;
- prevl = NULL;
-
- while (yyparse(&ctx)) {
- l = zalloc(sizeof(AsmLine));
- l->v = internparsev(&ctx.v);
- if (prevl)
- prevl->next = l;
- else
- result = l;
- prevl = l;
- }
-
- return result;
+AsmLine*
+parseasm(void)
+{
+ AsmLine *result, *l, *prevl;
+ yycontext ctx;
+
+ memset(&ctx, 0, sizeof(yycontext));
+ result = NULL;
+ prevl = NULL;
+
+ while (yyparse(&ctx)) {
+ l = zalloc(sizeof(AsmLine));
+ l->v = internparsev(&ctx.v);
+ if (prevl)
+ prevl->next = l;
+ else
+ result = l;
+ prevl = l;
+ }
+
+ return result;
}
diff --git a/util.c b/util.c
index ab1fa19..68b0b01 100644
--- a/util.c
+++ b/util.c
@@ -1,209 +1,247 @@
#include "minias.h"
-void vwarn(const char *fmt, va_list ap) {
- vfprintf(stderr, fmt, ap);
- if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
- putc(' ', stderr);
- perror(NULL);
- } else {
- putc('\n', stderr);
- }
+void
+vwarn(const char* fmt, va_list ap)
+{
+ vfprintf(stderr, fmt, ap);
+ if (fmt[0] && fmt[strlen(fmt) - 1] == ':') {
+ putc(' ', stderr);
+ perror(NULL);
+ } else {
+ putc('\n', stderr);
+ }
}
-void fatal(const char *fmt, ...) {
- va_list ap;
- va_start(ap, fmt);
- vwarn(fmt, ap);
- va_end(ap);
- exit(1);
+void
+fatal(const char* fmt, ...)
+{
+ va_list ap;
+ va_start(ap, fmt);
+ vwarn(fmt, ap);
+ va_end(ap);
+ exit(1);
}
-void unreachable(void) { fatal("BUG: unexpected internal condition"); }
+void
+unreachable(void)
+{
+ fatal("BUG: unexpected internal condition");
+}
-void *xmalloc(size_t n) {
- void *p;
+void*
+xmalloc(size_t n)
+{
+ void* p;
- p = malloc(n);
- if (!p)
- fatal("malloc:");
+ p = malloc(n);
+ if (!p)
+ fatal("malloc:");
- return p;
+ return p;
}
-void *zalloc(size_t n) {
- void *p;
+void*
+zalloc(size_t n)
+{
+ void* p;
- p = malloc(n);
- if (!p)
- fatal("malloc:");
- memset(p, 0, n);
- return p;
+ p = malloc(n);
+ if (!p)
+ fatal("malloc:");
+ memset(p, 0, n);
+ return p;
}
-void *xrealloc(void *p, size_t n) {
- p = realloc(p, n);
- if (!p)
- fatal("realloc:");
+void*
+xrealloc(void* p, size_t n)
+{
+ p = realloc(p, n);
+ if (!p)
+ fatal("realloc:");
- return p;
+ return p;
}
-void *xreallocarray(void *p, size_t n, size_t m) {
- p = reallocarray(p, n, m);
- if (!p)
- fatal("reallocarray:");
+void*
+xreallocarray(void* p, size_t n, size_t m)
+{
+ p = reallocarray(p, n, m);
+ if (!p)
+ fatal("reallocarray:");
- return p;
+ return p;
}
-char *xmemdup(const char *s, size_t n) {
- char *p;
+char*
+xmemdup(const char* s, size_t n)
+{
+ char* p;
- p = xmalloc(n);
- memcpy(p, s, n);
+ p = xmalloc(n);
+ memcpy(p, s, n);
- return p;
+ return p;
}
-char *xstrdup(const char *s) { return xmemdup(s, strlen(s) + 1); }
+char*
+xstrdup(const char* s)
+{
+ return xmemdup(s, strlen(s) + 1);
+}
-void htabkey(struct hashtablekey *k, const char *s, size_t n) {
- k->str = s;
- k->len = n;
- k->hash = murmurhash64a(s, n);
+void
+htabkey(struct hashtablekey* k, const char* s, size_t n)
+{
+ k->str = s;
+ k->len = n;
+ k->hash = murmurhash64a(s, n);
}
-struct hashtable *mkhtab(size_t cap) {
- struct hashtable *h;
- size_t i;
+struct hashtable*
+mkhtab(size_t cap)
+{
+ struct hashtable* h;
+ size_t i;
+
+ assert(!(cap & (cap - 1)));
+ h = xmalloc(sizeof(*h));
+ h->len = 0;
+ h->cap = cap;
+ h->keys = xreallocarray(NULL, cap, sizeof(h->keys[0]));
+ h->vals = xreallocarray(NULL, cap, sizeof(h->vals[0]));
+ for (i = 0; i < cap; ++i)
+ h->keys[i].str = NULL;
+
+ return h;
+}
- assert(!(cap & (cap - 1)));
- h = xmalloc(sizeof(*h));
- h->len = 0;
- h->cap = cap;
- h->keys = xreallocarray(NULL, cap, sizeof(h->keys[0]));
- h->vals = xreallocarray(NULL, cap, sizeof(h->vals[0]));
- for (i = 0; i < cap; ++i)
- h->keys[i].str = NULL;
+void
+delhtab(struct hashtable* h, void del(void*))
+{
+ size_t i;
+
+ if (!h)
+ return;
+ if (del) {
+ for (i = 0; i < h->cap; ++i) {
+ if (h->keys[i].str)
+ del(h->vals[i]);
+ }
+ }
+ free(h->keys);
+ free(h->vals);
+ free(h);
+}
- return h;
+static bool
+keyequal(struct hashtablekey* k1, struct hashtablekey* k2)
+{
+ if (k1->hash != k2->hash || k1->len != k2->len)
+ return false;
+ return memcmp(k1->str, k2->str, k1->len) == 0;
}
-void delhtab(struct hashtable *h, void del(void *)) {
- size_t i;
+static size_t
+keyindex(struct hashtable* h, struct hashtablekey* k)
+{
+ size_t i;
- if (!h)
- return;
- if (del) {
- for (i = 0; i < h->cap; ++i) {
- if (h->keys[i].str)
- del(h->vals[i]);
+ i = k->hash & (h->cap - 1);
+ while (h->keys[i].str && !keyequal(&h->keys[i], k))
+ i = (i + 1) & (h->cap - 1);
+ return i;
+}
+
+void**
+htabput(struct hashtable* h, struct hashtablekey* k)
+{
+ struct hashtablekey* oldkeys;
+ void** oldvals;
+ size_t i, j, oldcap;
+
+ if (h->cap / 2 < h->len) {
+ oldkeys = h->keys;
+ oldvals = h->vals;
+ oldcap = h->cap;
+ h->cap *= 2;
+ h->keys = xreallocarray(NULL, h->cap, sizeof(h->keys[0]));
+ h->vals = xreallocarray(NULL, h->cap, sizeof(h->vals[0]));
+ for (i = 0; i < h->cap; ++i)
+ h->keys[i].str = NULL;
+ for (i = 0; i < oldcap; ++i) {
+ if (oldkeys[i].str) {
+ j = keyindex(h, &oldkeys[i]);
+ h->keys[j] = oldkeys[i];
+ h->vals[j] = oldvals[i];
+ }
+ }
+ free(oldkeys);
+ free(oldvals);
}
- }
- free(h->keys);
- free(h->vals);
- free(h);
-}
-
-static bool keyequal(struct hashtablekey *k1, struct hashtablekey *k2) {
- if (k1->hash != k2->hash || k1->len != k2->len)
- return false;
- return memcmp(k1->str, k2->str, k1->len) == 0;
-}
-
-static size_t keyindex(struct hashtable *h, struct hashtablekey *k) {
- size_t i;
-
- i = k->hash & (h->cap - 1);
- while (h->keys[i].str && !keyequal(&h->keys[i], k))
- i = (i + 1) & (h->cap - 1);
- return i;
-}
-
-void **htabput(struct hashtable *h, struct hashtablekey *k) {
- struct hashtablekey *oldkeys;
- void **oldvals;
- size_t i, j, oldcap;
-
- if (h->cap / 2 < h->len) {
- oldkeys = h->keys;
- oldvals = h->vals;
- oldcap = h->cap;
- h->cap *= 2;
- h->keys = xreallocarray(NULL, h->cap, sizeof(h->keys[0]));
- h->vals = xreallocarray(NULL, h->cap, sizeof(h->vals[0]));
- for (i = 0; i < h->cap; ++i)
- h->keys[i].str = NULL;
- for (i = 0; i < oldcap; ++i) {
- if (oldkeys[i].str) {
- j = keyindex(h, &oldkeys[i]);
- h->keys[j] = oldkeys[i];
- h->vals[j] = oldvals[i];
- }
+ i = keyindex(h, k);
+ if (!h->keys[i].str) {
+ h->keys[i] = *k;
+ h->vals[i] = NULL;
+ ++h->len;
}
- free(oldkeys);
- free(oldvals);
- }
- i = keyindex(h, k);
- if (!h->keys[i].str) {
- h->keys[i] = *k;
- h->vals[i] = NULL;
- ++h->len;
- }
- return &h->vals[i];
+ return &h->vals[i];
}
-void *htabget(struct hashtable *h, struct hashtablekey *k) {
- size_t i;
+void*
+htabget(struct hashtable* h, struct hashtablekey* k)
+{
+ size_t i;
- i = keyindex(h, k);
- return h->keys[i].str ? h->vals[i] : NULL;
+ i = keyindex(h, k);
+ return h->keys[i].str ? h->vals[i] : NULL;
}
-uint64_t murmurhash64a(const void *ptr, size_t len) {
- const uint64_t seed = 0xdecafbaddecafbadull;
- const uint64_t m = 0xc6a4a7935bd1e995ull;
- uint64_t h, k, n;
- const uint8_t *p, *end;
- int r = 47;
-
- h = seed ^ (len * m);
- n = len & ~0x7ull;
- end = ptr;
- end += n;
- for (p = ptr; p != end; p += 8) {
- memcpy(&k, p, sizeof(k));
+uint64_t
+murmurhash64a(const void* ptr, size_t len)
+{
+ const uint64_t seed = 0xdecafbaddecafbadull;
+ const uint64_t m = 0xc6a4a7935bd1e995ull;
+ uint64_t h, k, n;
+ const uint8_t *p, *end;
+ int r = 47;
+
+ h = seed ^ (len * m);
+ n = len & ~0x7ull;
+ end = ptr;
+ end += n;
+ for (p = ptr; p != end; p += 8) {
+ memcpy(&k, p, sizeof(k));
+
+ k *= m;
+ k ^= k >> r;
+ k *= m;
+
+ h ^= k;
+ h *= m;
+ }
- k *= m;
- k ^= k >> r;
- k *= m;
+ switch (len & 0x7) {
+ case 7:
+ h ^= (uint64_t)p[6] << 48; /* fallthrough */
+ case 6:
+ h ^= (uint64_t)p[5] << 40; /* fallthrough */
+ case 5:
+ h ^= (uint64_t)p[4] << 32; /* fallthrough */
+ case 4:
+ h ^= (uint64_t)p[3] << 24; /* fallthrough */
+ case 3:
+ h ^= (uint64_t)p[2] << 16; /* fallthrough */
+ case 2:
+ h ^= (uint64_t)p[1] << 8; /* fallthrough */
+ case 1:
+ h ^= (uint64_t)p[0];
+ h *= m;
+ }
- h ^= k;
- h *= m;
- }
-
- switch (len & 0x7) {
- case 7:
- h ^= (uint64_t)p[6] << 48; /* fallthrough */
- case 6:
- h ^= (uint64_t)p[5] << 40; /* fallthrough */
- case 5:
- h ^= (uint64_t)p[4] << 32; /* fallthrough */
- case 4:
- h ^= (uint64_t)p[3] << 24; /* fallthrough */
- case 3:
- h ^= (uint64_t)p[2] << 16; /* fallthrough */
- case 2:
- h ^= (uint64_t)p[1] << 8; /* fallthrough */
- case 1:
- h ^= (uint64_t)p[0];
+ h ^= h >> r;
h *= m;
- }
-
- h ^= h >> r;
- h *= m;
- h ^= h >> r;
+ h ^= h >> r;
- return h;
+ return h;
}