diff options
| author | Andrew Chambers <[email protected]> | 2021-10-19 15:47:22 +1300 |
|---|---|---|
| committer | Andrew Chambers <[email protected]> | 2021-10-19 15:47:22 +1300 |
| commit | 66269a6c54613908a3c1a5851437044717b21d7e (patch) | |
| tree | 86a52d9993b6e609a07e4481c04cad22ce42e309 | |
| parent | 61a23eb3fd34408ade4f4724fd992a5877d857e1 (diff) | |
Change formatting style.
| -rw-r--r-- | Makefile | 4 | ||||
| -rw-r--r-- | main.c | 1916 | ||||
| -rw-r--r-- | minias.h | 514 | ||||
| -rw-r--r-- | parse.c | 460 | ||||
| -rw-r--r-- | util.c | 366 |
5 files changed, 1706 insertions, 1554 deletions
@@ -24,7 +24,9 @@ parse.o: asm.peg.inc main.o parse.o util.o: minias.h fmt: - clang-format -i *.c *.h + clang-format \ + -style='{BasedOnStyle: WebKit, AlwaysBreakAfterReturnType: TopLevelDefinitions, ColumnLimit: 100}'\ + -i *.c *.h check: sh test/test.sh @@ -1,16 +1,16 @@ #include "minias.h" /* Parsed assembly */ -static AsmLine *allasm = NULL; +static AsmLine* allasm = NULL; /* Number of assembly relaxation passes. */ static int nrelax = 1; /* Symbols before writing to symtab section. */ -static struct hashtable *symbols = NULL; +static struct hashtable* symbols = NULL; /* Array of all relocations before adding to the rel section. */ -static Relocation *relocs = NULL; +static Relocation* relocs = NULL; static size_t nrelocs = 0; static size_t reloccap = 0; @@ -18,232 +18,279 @@ static size_t reloccap = 0; static Section sections[MAXSECTIONS]; static size_t nsections = 1; // first is reserved. -static Section *cursection = NULL; -static Section *shstrtab = NULL; -static Section *strtab = NULL; -static Section *symtab = NULL; -static Section *bss = NULL; -static Section *text = NULL; -static Section *data = NULL; -static Section *textrel = NULL; -static Section *datarel = NULL; - -static char *infilename = "<stdin>"; +static Section* cursection = NULL; +static Section* shstrtab = NULL; +static Section* strtab = NULL; +static Section* symtab = NULL; +static Section* bss = NULL; +static Section* text = NULL; +static Section* data = NULL; +static Section* textrel = NULL; +static Section* datarel = NULL; + +static char* infilename = "<stdin>"; static size_t curlineno = 0; -static void lfatal(const char *fmt, ...) { - va_list ap; - fprintf(stderr, "%s:%ld: ", infilename, curlineno); - va_start(ap, fmt); - vwarn(fmt, ap); - va_end(ap); - exit(1); -} - -static Symbol *getsym(const char *name) { - Symbol **ps, *s; - struct hashtablekey htk; - - htabkey(&htk, name, strlen(name)); - ps = (Symbol **)htabput(symbols, &htk); - if (!*ps) { - *ps = xmalloc(sizeof(Symbol)); - **ps = (Symbol){ - .name = name, - .wco = -1, - }; - } - s = *ps; - return s; +static void +lfatal(const char* fmt, ...) +{ + va_list ap; + fprintf(stderr, "%s:%ld: ", infilename, curlineno); + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(1); +} + +static Symbol* +getsym(const char* name) +{ + Symbol **ps, *s; + struct hashtablekey htk; + + htabkey(&htk, name, strlen(name)); + ps = (Symbol**)htabput(symbols, &htk); + if (!*ps) { + *ps = xmalloc(sizeof(Symbol)); + **ps = (Symbol){ + .name = name, + .wco = -1, + }; + } + s = *ps; + return s; } -static void secaddbytes(Section *s, const void *bytes, size_t n) { +static void +secaddbytes(Section* s, const void* bytes, size_t n) +{ + + if (s->hdr.sh_type == SHT_NOBITS) { + s->hdr.sh_size += n; + return; + } + + while (s->capacity < s->hdr.sh_size + n) { + s->capacity = s->capacity ? (s->capacity * 2) : 512; + s->data = xrealloc(s->data, s->capacity); + } + memcpy(s->data + s->hdr.sh_size, bytes, n); - if (s->hdr.sh_type == SHT_NOBITS) { s->hdr.sh_size += n; - return; - } - - while (s->capacity < s->hdr.sh_size + n) { - s->capacity = s->capacity ? (s->capacity * 2) : 512; - s->data = xrealloc(s->data, s->capacity); - } - memcpy(s->data + s->hdr.sh_size, bytes, n); - - s->hdr.sh_size += n; -} - -static void secaddbyte(Section *s, uint8_t b) { secaddbytes(s, &b, 1); } - -static Elf64_Word elfstr(Section *sec, const char *s) { - Elf64_Word i = sec->hdr.sh_size; - secaddbytes(sec, s, strlen(s) + 1); - return i; -} - -static Section *newsection() { - Section *s; - if (nsections >= MAXSECTIONS) - fatal("too many sections"); - s = §ions[nsections]; - s->idx = nsections; - nsections += 1; - return s; -} - -static Section *getsection(const char *name) { - size_t i; - char *secname; - Section *s; - - for (i = 0; i < nsections; i++) { - secname = (char *)shstrtab->data + sections[i].hdr.sh_name; - if (strcmp(secname, name) == 0) - return §ions[i]; - } - s = newsection(); - s->hdr.sh_name = elfstr(shstrtab, name); - return s; -} - -static void initsections(void) { - Elf64_Sym elfsym; - - shstrtab = newsection(); - secaddbyte(shstrtab, 0); - shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab"); - shstrtab->hdr.sh_type = SHT_STRTAB; - - strtab = newsection(); - secaddbyte(strtab, 0); - strtab->hdr.sh_name = elfstr(shstrtab, ".strtab"); - strtab->hdr.sh_type = SHT_STRTAB; - - symtab = newsection(); - symtab->hdr.sh_name = elfstr(shstrtab, ".symtab"); - symtab->hdr.sh_type = SHT_SYMTAB; - symtab->hdr.sh_link = strtab->idx; - symtab->hdr.sh_entsize = sizeof(Elf64_Sym); - memset(&elfsym, 0, sizeof(elfsym)); - secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); - - bss = newsection(); - bss->hdr.sh_name = elfstr(shstrtab, ".bss"); - bss->hdr.sh_type = SHT_NOBITS; - bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; - bss->hdr.sh_addralign = 16; // XXX right value? - - data = newsection(); - data->hdr.sh_name = elfstr(shstrtab, ".data"); - data->hdr.sh_type = SHT_PROGBITS; - data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; - data->hdr.sh_addralign = 16; // XXX right value? - - text = newsection(); - text->hdr.sh_name = elfstr(shstrtab, ".text"); - text->hdr.sh_type = SHT_PROGBITS; - text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC; - text->hdr.sh_addralign = 4; - - textrel = newsection(); - textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text"); - textrel->hdr.sh_type = SHT_RELA; - textrel->hdr.sh_info = text->idx; - textrel->hdr.sh_link = symtab->idx; - textrel->hdr.sh_entsize = sizeof(Elf64_Rela); - - datarel = newsection(); - datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data"); - datarel->hdr.sh_type = SHT_RELA; - datarel->hdr.sh_info = data->idx; - datarel->hdr.sh_link = symtab->idx; - datarel->hdr.sh_entsize = sizeof(Elf64_Rela); -} - -static Relocation *newreloc() { - if (nrelocs == reloccap) { - reloccap = nrelocs ? nrelocs * 2 : 64; - relocs = xreallocarray(relocs, reloccap, sizeof(Relocation)); - } - return &relocs[nrelocs++]; +} + +static void +secaddbyte(Section* s, uint8_t b) +{ + secaddbytes(s, &b, 1); +} + +static Elf64_Word +elfstr(Section* sec, const char* s) +{ + Elf64_Word i = sec->hdr.sh_size; + secaddbytes(sec, s, strlen(s) + 1); + return i; +} + +static Section* +newsection() +{ + Section* s; + if (nsections >= MAXSECTIONS) + fatal("too many sections"); + s = §ions[nsections]; + s->idx = nsections; + nsections += 1; + return s; +} + +static Section* +getsection(const char* name) +{ + size_t i; + char* secname; + Section* s; + + for (i = 0; i < nsections; i++) { + secname = (char*)shstrtab->data + sections[i].hdr.sh_name; + if (strcmp(secname, name) == 0) + return §ions[i]; + } + s = newsection(); + s->hdr.sh_name = elfstr(shstrtab, name); + return s; +} + +static void +initsections(void) +{ + Elf64_Sym elfsym; + + shstrtab = newsection(); + secaddbyte(shstrtab, 0); + shstrtab->hdr.sh_name = elfstr(shstrtab, ".shstrtab"); + shstrtab->hdr.sh_type = SHT_STRTAB; + + strtab = newsection(); + secaddbyte(strtab, 0); + strtab->hdr.sh_name = elfstr(shstrtab, ".strtab"); + strtab->hdr.sh_type = SHT_STRTAB; + + symtab = newsection(); + symtab->hdr.sh_name = elfstr(shstrtab, ".symtab"); + symtab->hdr.sh_type = SHT_SYMTAB; + symtab->hdr.sh_link = strtab->idx; + symtab->hdr.sh_entsize = sizeof(Elf64_Sym); + memset(&elfsym, 0, sizeof(elfsym)); + secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); + + bss = newsection(); + bss->hdr.sh_name = elfstr(shstrtab, ".bss"); + bss->hdr.sh_type = SHT_NOBITS; + bss->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; + bss->hdr.sh_addralign = 16; // XXX right value? + + data = newsection(); + data->hdr.sh_name = elfstr(shstrtab, ".data"); + data->hdr.sh_type = SHT_PROGBITS; + data->hdr.sh_flags = SHF_ALLOC | SHF_WRITE; + data->hdr.sh_addralign = 16; // XXX right value? + + text = newsection(); + text->hdr.sh_name = elfstr(shstrtab, ".text"); + text->hdr.sh_type = SHT_PROGBITS; + text->hdr.sh_flags = SHF_EXECINSTR | SHF_ALLOC; + text->hdr.sh_addralign = 4; + + textrel = newsection(); + textrel->hdr.sh_name = elfstr(shstrtab, ".rela.text"); + textrel->hdr.sh_type = SHT_RELA; + textrel->hdr.sh_info = text->idx; + textrel->hdr.sh_link = symtab->idx; + textrel->hdr.sh_entsize = sizeof(Elf64_Rela); + + datarel = newsection(); + datarel->hdr.sh_name = elfstr(shstrtab, ".rela.data"); + datarel->hdr.sh_type = SHT_RELA; + datarel->hdr.sh_info = data->idx; + datarel->hdr.sh_link = symtab->idx; + datarel->hdr.sh_entsize = sizeof(Elf64_Rela); +} + +static Relocation* +newreloc() +{ + if (nrelocs == reloccap) { + reloccap = nrelocs ? nrelocs * 2 : 64; + relocs = xreallocarray(relocs, reloccap, sizeof(Relocation)); + } + return &relocs[nrelocs++]; } /* Shorthand helpers to write section data. */ -static void sb(uint8_t b) { secaddbyte(cursection, b); } +static void +sb(uint8_t b) +{ + secaddbyte(cursection, b); +} -static void sbn(uint8_t *bytes, size_t n) { secaddbytes(cursection, bytes, n); } +static void +sbn(uint8_t* bytes, size_t n) +{ + secaddbytes(cursection, bytes, n); +} -static void su16(uint16_t w) { - uint8_t buf[2] = {w & 0xff, (w & 0xff00) >> 8}; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su16(uint16_t w) +{ + uint8_t buf[2] = { w & 0xff, (w & 0xff00) >> 8 }; + secaddbytes(cursection, buf, sizeof(buf)); } -static void su32(uint32_t l) { - uint8_t buf[4] = { - l & 0xff, - (l & 0xff00) >> 8, - (l & 0xff0000) >> 16, - (l & 0xff000000) >> 24, - }; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su32(uint32_t l) +{ + uint8_t buf[4] = { + l & 0xff, + (l & 0xff00) >> 8, + (l & 0xff0000) >> 16, + (l & 0xff000000) >> 24, + }; + secaddbytes(cursection, buf, sizeof(buf)); } -static void su64(uint64_t l) { - uint8_t buf[8] = { - l & 0xff, - (l & 0xff00) >> 8, - (l & 0xff0000) >> 16, - (l & 0xff000000) >> 24, - (l & 0xff00000000) >> 32, - (l & 0xff0000000000) >> 40, - (l & 0xff000000000000) >> 48, - (l & 0xff00000000000000) >> 56, - }; - secaddbytes(cursection, buf, sizeof(buf)); +static void +su64(uint64_t l) +{ + uint8_t buf[8] = { + l & 0xff, + (l & 0xff00) >> 8, + (l & 0xff0000) >> 16, + (l & 0xff000000) >> 24, + (l & 0xff00000000) >> 32, + (l & 0xff0000000000) >> 40, + (l & 0xff000000000000) >> 48, + (l & 0xff00000000000000) >> 56, + }; + secaddbytes(cursection, buf, sizeof(buf)); } /* Convert an AsmKind to register bits in reg/rm style. */ -static uint8_t regbits(AsmKind k) { return (k - (ASM_REG_BEGIN + 1)) % 16; } +static uint8_t +regbits(AsmKind k) +{ + return (k - (ASM_REG_BEGIN + 1)) % 16; +} /* Register that requires the use of a rex prefix. */ -static uint8_t isrexreg(AsmKind k) { - return k > ASM_REG_BEGIN && k < ASM_REG_END && - (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || - k == ASM_SIL || k == ASM_DIL); +static uint8_t +isrexreg(AsmKind k) +{ + return k > ASM_REG_BEGIN && k < ASM_REG_END + && (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL || k == ASM_SIL || k == ASM_DIL); } -static uint8_t rexbyte(Rex rex) { - return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b); +static uint8_t +rexbyte(Rex rex) +{ + return ((1 << 6) | (rex.w << 3) | (rex.r << 2) | (rex.x << 1) | rex.b); } /* Compose a mod/reg/rm byte - See intel manual. */ -static uint8_t modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) { - return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); +static uint8_t +modregrmbyte(uint8_t mod, uint8_t reg, uint8_t rm) +{ + return (((mod & 3) << 6) | ((reg & 7) << 3) | (rm & 7)); } /* Compose an sib byte - See intel manual. */ -static uint8_t sibbyte(uint8_t ss, uint8_t idx, uint8_t base) { - return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7)); -} - -void assembleconstant(int64_t c, int nbytes) { - switch (nbytes) { - case 1: - sb((uint8_t)c); - break; - case 2: - su16((uint16_t)c); - break; - case 4: - su32((uint32_t)c); - break; - case 8: - su64((uint64_t)c); - break; - default: - unreachable(); - } +static uint8_t +sibbyte(uint8_t ss, uint8_t idx, uint8_t base) +{ + return (((ss & 3) << 6) | ((idx & 7) << 3) | (base & 7)); +} + +void +assembleconstant(int64_t c, int nbytes) +{ + switch (nbytes) { + case 1: + sb((uint8_t)c); + break; + case 2: + su16((uint16_t)c); + break; + case 4: + su32((uint32_t)c); + break; + case 8: + su64((uint64_t)c); + break; + default: + unreachable(); + } } /* The VarBytes type encodes a variadic number of bytes. @@ -256,766 +303,803 @@ void assembleconstant(int64_t c, int nbytes) { */ typedef int32_t VarBytes; -static void assemblevbytes(VarBytes bytes) { - int i, n; - uint8_t b, shift; - - n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24); - for (i = n; i >= 0; i--) { - shift = i * 8; - b = (bytes & (0xff << shift)) >> shift; - sb(b); - } +static void +assemblevbytes(VarBytes bytes) +{ + int i, n; + uint8_t b, shift; + + n = (int8_t)(uint8_t)((bytes & 0xff000000) >> 24); + for (i = n; i >= 0; i--) { + shift = i * 8; + b = (bytes & (0xff << shift)) >> shift; + sb(b); + } } -static void assemblerex(Rex rex) { - if (rex.required || rex.w || rex.r || rex.x || rex.b) - sb(rexbyte(rex)); +static void +assemblerex(Rex rex) +{ + if (rex.required || rex.w || rex.r || rex.x || rex.b) + sb(rexbyte(rex)); } /* Assemble a symbolic value. */ -static void assemblereloc(const char *l, int64_t c, int nbytes, int type) { - Relocation *reloc; - Symbol *sym; - - if (l != NULL) { - reloc = newreloc(); - sym = getsym(l); - reloc->type = type; - reloc->section = cursection; - reloc->sym = sym; - reloc->offset = cursection->hdr.sh_size; - reloc->addend = c; - c = 0; - } - assembleconstant(c, nbytes); +static void +assemblereloc(const char* l, int64_t c, int nbytes, int type) +{ + Relocation* reloc; + Symbol* sym; + + if (l != NULL) { + reloc = newreloc(); + sym = getsym(l); + reloc->type = type; + reloc->section = cursection; + reloc->sym = sym; + reloc->offset = cursection->hdr.sh_size; + reloc->addend = c; + c = 0; + } + assembleconstant(c, nbytes); } /* Assemble a r <-> mem operation. */ -static void assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, - VarBytes opcode, uint8_t reg, int32_t nexti) { +static void +assemblemem( + const Memarg* memarg, Rex rex, VarBytes prefix, VarBytes opcode, uint8_t reg, int32_t nexti) +{ + + uint8_t mod, rm, scale, index, base; + + /* Rip relative addressing. */ + if (memarg->base == ASM_RIP) { + rm = 0x05; + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(0x00, reg, rm)); + + if (memarg->disp.l) { + assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, R_X86_64_PC32); + } else { + assembleconstant(memarg->disp.c, 4); + } + return; + } - uint8_t mod, rm, scale, index, base; + /* Direct memory access */ + if (memarg->base == ASM_NO_REG) { + mod = 0; + rm = 4; + + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(mod, reg, rm)); + + sb(sibbyte(0, 4, 5)); + if (memarg->disp.l) { + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + } else { + assembleconstant(memarg->disp.c, 4); + } + return; + } - /* Rip relative addressing. */ - if (memarg->base == ASM_RIP) { - rm = 0x05; - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(0x00, reg, rm)); + rm = regbits(memarg->base); + rex.b = !!(rm & (1 << 3)); - if (memarg->disp.l) { - assemblereloc(memarg->disp.l, memarg->disp.c - 4 + nexti, 4, - R_X86_64_PC32); - } else { - assembleconstant(memarg->disp.c, 4); + /* Case when we don't need sib */ + if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) { + + if (memarg->disp.l == 0 && memarg->disp.c == 0) { + if ((rm & 7) == 5) { + mod = 1; + } else { + mod = 0; + } + } else { + mod = 2; + } + + assemblevbytes(prefix); + assemblerex(rex); + assemblevbytes(opcode); + sb(modregrmbyte(mod, reg, rm)); + + if (mod == 1) { + assembleconstant(memarg->disp.c, 1); + } else if (mod == 2) { + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + } + return; } - return; - } - /* Direct memory access */ - if (memarg->base == ASM_NO_REG) { - mod = 0; + /* Setup sib indexing. */ + base = rm; rm = 4; - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(mod, reg, rm)); + if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) { + mod = 0; /* +0 */ + } else { + if (memarg->disp.l == NULL && memarg->disp.c >= -128 && memarg->disp.c <= 127) { + mod = 1; /* +disp8 */ + } else { + mod = 2; /* +disp32 */ + } + } - sb(sibbyte(0, 4, 5)); - if (memarg->disp.l) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + if (memarg->index == ASM_NO_REG) { + index = 4; } else { - assembleconstant(memarg->disp.c, 4); + if (memarg->index == ASM_RSP) + lfatal("rsp cannot be used as an index"); + index = regbits(memarg->index); } - return; - } - rm = regbits(memarg->base); - rex.b = !!(rm & (1 << 3)); + /* If our base is a bp register, we must use the index instead. */ + if ((base & 7) == 5 && memarg->index == ASM_NO_REG) { + index = base; + } - /* Case when we don't need sib */ - if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) { + rex.x = !!(index & (1 << 3)); - if (memarg->disp.l == 0 && memarg->disp.c == 0) { - if ((rm & 7) == 5) { - mod = 1; - } else { - mod = 0; - } - } else { - mod = 2; + switch (memarg->scale) { + case 0: + case 1: + scale = 0; + break; + case 2: + scale = 1; + break; + case 4: + scale = 2; + break; + case 8: + scale = 3; + break; + default: + lfatal("invalid addressing scale"); + return; } assemblevbytes(prefix); assemblerex(rex); assemblevbytes(opcode); sb(modregrmbyte(mod, reg, rm)); + sb(sibbyte(scale, index, base)); if (mod == 1) { - assembleconstant(memarg->disp.c, 1); + assembleconstant(memarg->disp.c, 1); } else if (mod == 2) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); + assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); } - return; - } - - /* Setup sib indexing. */ - base = rm; - rm = 4; +} - if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) { - mod = 0; /* +0 */ - } else { - if (memarg->disp.l == NULL && memarg->disp.c >= -128 && - memarg->disp.c <= 127) { - mod = 1; /* +disp8 */ - } else { - mod = 2; /* +disp32 */ - } - } - - if (memarg->index == ASM_NO_REG) { - index = 4; - } else { - if (memarg->index == ASM_RSP) - lfatal("rsp cannot be used as an index"); - index = regbits(memarg->index); - } - - /* If our base is a bp register, we must use the index instead. */ - if ((base & 7) == 5 && memarg->index == ASM_NO_REG) { - index = base; - } - - rex.x = !!(index & (1 << 3)); - - switch (memarg->scale) { - case 0: - case 1: - scale = 0; - break; - case 2: - scale = 1; - break; - case 4: - scale = 2; - break; - case 8: - scale = 3; - break; - default: - lfatal("invalid addressing scale"); - return; - } - - assemblevbytes(prefix); - assemblerex(rex); - assemblevbytes(opcode); - sb(modregrmbyte(mod, reg, rm)); - sb(sibbyte(scale, index, base)); - - if (mod == 1) { - assembleconstant(memarg->disp.c, 1); - } else if (mod == 2) { - assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32); - } -} - -static void assemblecall(const Call *call) { - Rex rex; - uint8_t rm; - - if (call->indirect) { - if (call->target.indirect->kind == ASM_MEMARG) { - rex = (Rex){0}; - abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, - // 0x02); - } else { - rm = regbits(call->target.indirect->kind); - rex = (Rex){.b = !!(rm & (1 << 3))}; - assemblerex(rex); - assemblevbytes(0xff); - sb(modregrmbyte(0x03, 0x02, rm)); - } - } else { - sb(0xe8); - assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, - R_X86_64_PC32); - } -} - -static void assemblejmp(const Jmp *j) { - int jmpsize; - int64_t distance; - Symbol *target; - - static uint8_t cc2op[31] = { - 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, 0x89, 0x8b, 0x81, - 0x8f, 0x8d, 0x8c, 0x8e, 0x85, 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, - 0x8c, 0x8d, 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, - }; - - jmpsize = 4; - target = getsym(j->target); - if (cursection == target->section && (target->defined || target->wco != -1)) { - if (target->defined) { - distance = target->offset - cursection->hdr.sh_size; +static void +assemblecall(const Call* call) +{ + Rex rex; + uint8_t rm; + + if (call->indirect) { + if (call->target.indirect->kind == ASM_MEMARG) { + rex = (Rex){ 0 }; + abort(); // assemblemem(&call->target.indirect->memarg, rex, -1, 0xff, + // 0x02); + } else { + rm = regbits(call->target.indirect->kind); + rex = (Rex){ .b = !!(rm & (1 << 3)) }; + assemblerex(rex); + assemblevbytes(0xff); + sb(modregrmbyte(0x03, 0x02, rm)); + } } else { - distance = target->wco - cursection->hdr.sh_size; + sb(0xe8); + assemblereloc(call->target.direct.l, call->target.direct.c - 4, 4, R_X86_64_PC32); } - if ((distance - 1) >= -128 && (distance - 1) <= 127) { - jmpsize = 1; - } else { - jmpsize = 4; - } - } - - if (jmpsize == 4) { - if (j->cc) - sb(0x0f); - sb(cc2op[j->cc]); - assemblereloc(j->target, -4, 4, R_X86_64_PC32); - } else { - sb(cc2op[j->cc] + (j->cc ? -16 : 2)); - assemblereloc(j->target, -1, 1, R_X86_64_PC8); - } -} - -static void assembleabsimm(const Imm *imm) { - if (imm->nbytes == 1) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8); - else if (imm->nbytes == 2) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16); - else if (imm->nbytes == 4) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); - else if (imm->nbytes == 8) - assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64); - else - unreachable(); -} - -static void assembleinstr(const Instr *instr) { - Rex rex; - const Memarg *memarg; - const Imm *imm; - uint8_t reg, rm; - - switch (instr->encoder) { - case ENCODER_OP: - assemblevbytes(instr->opcode); - break; - case ENCODER_OPREG: - rm = regbits(instr->arg1->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, instr->fixedreg, rm)); - break; - case ENCODER_OPMEM: - memarg = &instr->arg1->memarg; - rex = instr->rex; - assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0); - break; - case ENCODER_R: - reg = regbits(instr->arg1->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind); - rex.b = !!(reg & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode | (reg & 7)); - break; - case ENCODER_RIMM: - imm = &instr->arg1->imm; - reg = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg2->kind); - rex.b = !!(reg & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode | (reg & 7)); - assembleabsimm(imm); - break; - case ENCODER_IMM: - imm = &instr->arg1->imm; - rex = instr->rex; - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - assembleabsimm(imm); - break; - case ENCODER_IMMREG: - imm = &instr->arg1->imm; - reg = instr->fixedreg; - rm = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg2->kind); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - assembleabsimm(imm); - break; - case ENCODER_IMMMEM: - imm = &instr->arg1->imm; - memarg = &instr->arg2->memarg; - reg = instr->fixedreg; - rex = instr->rex; - assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, - imm->nbytes); - assembleabsimm(imm); - break; - case ENCODER_REGMEM: - case ENCODER_MEMREG: - if (instr->encoder == ENCODER_MEMREG) { - memarg = &instr->arg1->memarg; - reg = regbits(instr->arg2->kind); - } else { - memarg = &instr->arg2->memarg; - reg = regbits(instr->arg1->kind); - } - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0); - break; - case ENCODER_REGREG: - case ENCODER_REGREG2: - if (instr->encoder == ENCODER_REGREG) { - reg = regbits(instr->arg1->kind); - rm = regbits(instr->arg2->kind); +} + +static void +assemblejmp(const Jmp* j) +{ + int jmpsize; + int64_t distance; + Symbol* target; + + // clang-format off + static uint8_t cc2op[31] = { + 0xe9, 0x84, 0x88, 0x8b, 0x8a, 0x8a, 0x80, 0x85, + 0x89, 0x8b, 0x81, 0x8f, 0x8d, 0x8c, 0x8e, 0x85, + 0x83, 0x87, 0x83, 0x82, 0x86, 0x8e, 0x8c, 0x8d, + 0x8f, 0x84, 0x82, 0x86, 0x82, 0x83, 0x87, + }; + // clang-format on + + jmpsize = 4; + target = getsym(j->target); + if (cursection == target->section && (target->defined || target->wco != -1)) { + if (target->defined) { + distance = target->offset - cursection->hdr.sh_size; + } else { + distance = target->wco - cursection->hdr.sh_size; + } + if ((distance - 1) >= -128 && (distance - 1) <= 127) { + jmpsize = 1; + } else { + jmpsize = 4; + } + } + + if (jmpsize == 4) { + if (j->cc) + sb(0x0f); + sb(cc2op[j->cc]); + assemblereloc(j->target, -4, 4, R_X86_64_PC32); } else { - reg = regbits(instr->arg2->kind); - rm = regbits(instr->arg1->kind); + sb(cc2op[j->cc] + (j->cc ? -16 : 2)); + assemblereloc(j->target, -1, 1, R_X86_64_PC8); } - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - break; - case ENCODER_IMMREGREG2: - imm = &instr->arg1->imm; - reg = regbits(instr->arg3->kind); - rm = regbits(instr->arg2->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - rex.b = !!(rm & (1 << 3)); - assemblevbytes(instr->prefix); - assemblerex(rex); - assemblevbytes(instr->opcode); - sb(modregrmbyte(0x03, reg, rm)); - assembleabsimm(imm); - break; - case ENCODER_IMMMEMREG: - imm = &instr->arg1->imm; - memarg = &instr->arg2->memarg; - reg = regbits(instr->arg3->kind); - rex = instr->rex; - rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); - rex.r = !!(reg & (1 << 3)); - assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes); - assembleabsimm(imm); - break; - default: - unreachable(); - } -} - -static void assemble(void) { - Symbol *sym; - AsmLine *l; - const Parsev *v; - - cursection = text; - curlineno = 0; - for (l = allasm; l; l = l->next) { - curlineno++; - v = l->v; - switch (v->kind) { - case ASM_SYNTAX_ERROR: - lfatal("syntax error"); - break; - case ASM_BLANK: - break; - case ASM_DIR_GLOBL: - sym = getsym(v->globl.name); - sym->global = 1; - break; - case ASM_DIR_SECTION: { - const char *fp; - Section *s; - - s = getsection(v->section.name); - s->hdr.sh_type = v->section.type; - fp = v->section.flags; - while (fp && *fp) { - switch (*(fp++)) { - case 'a': - s->hdr.sh_flags |= SHF_ALLOC; - break; - case 'w': - s->hdr.sh_flags |= SHF_WRITE; - break; - case 'x': - s->hdr.sh_flags |= SHF_EXECINSTR; - break; - default: - unreachable(); +} + +static void +assembleabsimm(const Imm* imm) +{ + if (imm->nbytes == 1) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_8); + else if (imm->nbytes == 2) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_16); + else if (imm->nbytes == 4) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32); + else if (imm->nbytes == 8) + assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_64); + else + unreachable(); +} + +static void +assembleinstr(const Instr* instr) +{ + Rex rex; + const Memarg* memarg; + const Imm* imm; + uint8_t reg, rm; + + switch (instr->encoder) { + case ENCODER_OP: + assemblevbytes(instr->opcode); + break; + case ENCODER_OPREG: + rm = regbits(instr->arg1->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, instr->fixedreg, rm)); + break; + case ENCODER_OPMEM: + memarg = &instr->arg1->memarg; + rex = instr->rex; + assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, 0); + break; + case ENCODER_R: + reg = regbits(instr->arg1->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind); + rex.b = !!(reg & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode | (reg & 7)); + break; + case ENCODER_RIMM: + imm = &instr->arg1->imm; + reg = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg2->kind); + rex.b = !!(reg & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode | (reg & 7)); + assembleabsimm(imm); + break; + case ENCODER_IMM: + imm = &instr->arg1->imm; + rex = instr->rex; + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + assembleabsimm(imm); + break; + case ENCODER_IMMREG: + imm = &instr->arg1->imm; + reg = instr->fixedreg; + rm = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg2->kind); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + assembleabsimm(imm); + break; + case ENCODER_IMMMEM: + imm = &instr->arg1->imm; + memarg = &instr->arg2->memarg; + reg = instr->fixedreg; + rex = instr->rex; + assemblemem(memarg, rex, instr->prefix, instr->opcode, instr->fixedreg, imm->nbytes); + assembleabsimm(imm); + break; + case ENCODER_REGMEM: + case ENCODER_MEMREG: + if (instr->encoder == ENCODER_MEMREG) { + memarg = &instr->arg1->memarg; + reg = regbits(instr->arg2->kind); + } else { + memarg = &instr->arg2->memarg; + reg = regbits(instr->arg1->kind); } - } - cursection = s; - break; - } - case ASM_DIR_DATA: - cursection = data; - break; - case ASM_DIR_TEXT: - cursection = text; - break; - case ASM_DIR_ASCII: - sbn(v->ascii.data, v->ascii.len); - break; - case ASM_DIR_ASCIIZ: - sbn(v->asciiz.data, v->asciiz.len); - sb(0x00); - break; - case ASM_DIR_BALIGN: { - int64_t offset, i, rem, amnt; - amnt = 0; - offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size; - rem = offset % v->balign.align; - if (rem) - amnt = v->balign.align - rem; - for (i = 0; i < amnt; i++) { - sb(0x00); - } - break; - } - case ASM_DIR_FILL: { - ssize_t i = 0; - - for (i = 0; i < v->fill.repeat; i++) { - switch (v->fill.size) { - case 1: - case 2: - case 4: - case 8: - assembleconstant(v->fill.value, v->fill.size); - break; - default: - lfatal("unsupported fill size '%d'", v->fill.size); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, 0); + break; + case ENCODER_REGREG: + case ENCODER_REGREG2: + if (instr->encoder == ENCODER_REGREG) { + reg = regbits(instr->arg1->kind); + rm = regbits(instr->arg2->kind); + } else { + reg = regbits(instr->arg2->kind); + rm = regbits(instr->arg1->kind); } - } - break; - } - case ASM_DIR_BYTE: - assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32); - break; - case ASM_DIR_SHORT: - assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32); - break; - case ASM_DIR_INT: - assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32); - break; - case ASM_DIR_QUAD: - assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64); - break; - case ASM_LABEL: - sym = getsym(v->label.name); - sym->section = cursection; - sym->offset = cursection->hdr.sh_size; - if (sym->defined) - lfatal("%s already defined", sym->name); - sym->defined = 1; - break; - case ASM_INSTR: - assembleinstr(&v->instr); - break; - case ASM_CALL: - assemblecall(&v->call); - break; - case ASM_JMP: - assemblejmp(&v->jmp); - break; + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + break; + case ENCODER_IMMREGREG2: + imm = &instr->arg1->imm; + reg = regbits(instr->arg3->kind); + rm = regbits(instr->arg2->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + rex.b = !!(rm & (1 << 3)); + assemblevbytes(instr->prefix); + assemblerex(rex); + assemblevbytes(instr->opcode); + sb(modregrmbyte(0x03, reg, rm)); + assembleabsimm(imm); + break; + case ENCODER_IMMMEMREG: + imm = &instr->arg1->imm; + memarg = &instr->arg2->memarg; + reg = regbits(instr->arg3->kind); + rex = instr->rex; + rex.required = isrexreg(instr->arg1->kind) || isrexreg(instr->arg2->kind); + rex.r = !!(reg & (1 << 3)); + assemblemem(memarg, rex, instr->prefix, instr->opcode, reg, imm->nbytes); + assembleabsimm(imm); + break; default: - lfatal("assemble: unexpected kind: %d", v->kind); + unreachable(); + } +} + +static void +assemble(void) +{ + Symbol* sym; + AsmLine* l; + const Parsev* v; + + cursection = text; + curlineno = 0; + for (l = allasm; l; l = l->next) { + curlineno++; + v = l->v; + switch (v->kind) { + case ASM_SYNTAX_ERROR: + lfatal("syntax error"); + break; + case ASM_BLANK: + break; + case ASM_DIR_GLOBL: + sym = getsym(v->globl.name); + sym->global = 1; + break; + case ASM_DIR_SECTION: { + const char* fp; + Section* s; + + s = getsection(v->section.name); + s->hdr.sh_type = v->section.type; + fp = v->section.flags; + while (fp && *fp) { + switch (*(fp++)) { + case 'a': + s->hdr.sh_flags |= SHF_ALLOC; + break; + case 'w': + s->hdr.sh_flags |= SHF_WRITE; + break; + case 'x': + s->hdr.sh_flags |= SHF_EXECINSTR; + break; + default: + unreachable(); + } + } + cursection = s; + break; + } + case ASM_DIR_DATA: + cursection = data; + break; + case ASM_DIR_TEXT: + cursection = text; + break; + case ASM_DIR_ASCII: + sbn(v->ascii.data, v->ascii.len); + break; + case ASM_DIR_ASCIIZ: + sbn(v->asciiz.data, v->asciiz.len); + sb(0x00); + break; + case ASM_DIR_BALIGN: { + int64_t offset, i, rem, amnt; + amnt = 0; + offset = cursection->hdr.sh_addralign + cursection->hdr.sh_size; + rem = offset % v->balign.align; + if (rem) + amnt = v->balign.align - rem; + for (i = 0; i < amnt; i++) { + sb(0x00); + } + break; + } + case ASM_DIR_FILL: { + ssize_t i = 0; + + for (i = 0; i < v->fill.repeat; i++) { + switch (v->fill.size) { + case 1: + case 2: + case 4: + case 8: + assembleconstant(v->fill.value, v->fill.size); + break; + default: + lfatal("unsupported fill size '%d'", v->fill.size); + } + } + break; + } + case ASM_DIR_BYTE: + assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32); + break; + case ASM_DIR_SHORT: + assemblereloc(v->dirshort.value.l, v->dirshort.value.c, 2, R_X86_64_32); + break; + case ASM_DIR_INT: + assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32); + break; + case ASM_DIR_QUAD: + assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64); + break; + case ASM_LABEL: + sym = getsym(v->label.name); + sym->section = cursection; + sym->offset = cursection->hdr.sh_size; + if (sym->defined) + lfatal("%s already defined", sym->name); + sym->defined = 1; + break; + case ASM_INSTR: + assembleinstr(&v->instr); + break; + case ASM_CALL: + assemblecall(&v->call); + break; + case ASM_JMP: + assemblejmp(&v->jmp); + break; + default: + lfatal("assemble: unexpected kind: %d", v->kind); + } } - } } /* Reset while remembering symbol offsets so we can size jumps. */ -static void relaxreset(void) { - Symbol *sym; - Section *sec; - size_t i; - - /* Reset relocations and section data but retain capacity. */ - nrelocs = 0; - - for (i = 0; i < nsections; i++) { - sec = §ions[i]; - if (sec == shstrtab) - continue; - sec->hdr.sh_size = 0; - } - - /* Reset symbols, saving the worst case offset for the second pass. */ - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - *sym = (Symbol){ - .name = sym->name, .section = sym->section, .wco = sym->offset}; - } -} - -static void addtosymtab(Symbol *sym) { - Elf64_Sym elfsym; - int stype; - int sbind; - - stype = 0; - if (sym->defined) { - sbind = sym->global ? STB_GLOBAL : STB_LOCAL; - } else { - sbind = STB_GLOBAL; - } - - sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize; - - elfsym.st_name = elfstr(strtab, sym->name); - elfsym.st_value = sym->offset; - elfsym.st_size = sym->size; - elfsym.st_info = ELF64_ST_INFO(sbind, stype); - elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF; - elfsym.st_other = 0; - secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); -} - -static void fillsymtab(void) { - Symbol *sym; - size_t i; - - // Local symbols - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - if (!sym->defined || sym->global) - continue; - addtosymtab(sym); - } - - // Global symbols - - // Set start of global symbols. - symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize; - - for (i = 0; i < symbols->cap; i++) { - if (!symbols->keys[i].str) - continue; - sym = symbols->vals[i]; - - if (sym->defined && !sym->global) - continue; - addtosymtab(sym); - } -} - -static int resolvereloc(Relocation *reloc) { - Symbol *sym; - uint8_t *rdata; - int64_t value; - - sym = reloc->sym; - - if (sym->section != reloc->section) - return 0; +static void +relaxreset(void) +{ + Symbol* sym; + Section* sec; + size_t i; + + /* Reset relocations and section data but retain capacity. */ + nrelocs = 0; + + for (i = 0; i < nsections; i++) { + sec = §ions[i]; + if (sec == shstrtab) + continue; + sec->hdr.sh_size = 0; + } - switch (reloc->type) { - case R_X86_64_32: - case R_X86_64_64: - return 0; - case R_X86_64_PC8: - rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; - rdata[0] = ((uint8_t)value & 0xff); - return 1; - case R_X86_64_PC32: - rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; - rdata[0] = ((uint32_t)value & 0xff); - rdata[1] = ((uint32_t)value & 0xff00) >> 8; - rdata[2] = ((uint32_t)value & 0xff0000) >> 16; - rdata[3] = ((uint32_t)value & 0xff000000) >> 24; - return 1; - default: - unreachable(); - return 0; - } -} - -static void appendreloc(Relocation *reloc) { - Symbol *sym; - Section *relsection; - Elf64_Rela elfrel; - - memset(&elfrel, 0, sizeof(elfrel)); - - sym = reloc->sym; - if (reloc->section == text) - relsection = textrel; - else if (reloc->section == data) - relsection = datarel; - else { - fatal("unexpected relocation for symbol '%s'", sym->name); - return; - } - - switch (reloc->type) { - case R_X86_64_PC32: - case R_X86_64_32: - case R_X86_64_64: - elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type); - elfrel.r_offset = reloc->offset; - elfrel.r_addend = reloc->addend; - break; - default: - unreachable(); - } - - secaddbytes(relsection, &elfrel, sizeof(elfrel)); -} - -static void handlerelocs(void) { - Relocation *reloc; - size_t i; - for (i = 0; i < nrelocs; i++) { - reloc = &relocs[i]; - if (resolvereloc(reloc)) - continue; - appendreloc(reloc); - } -} - -static void out(const void *buf, size_t n) { - fwrite(buf, 1, n, stdout); - if (ferror(stdout)) - fatal("fwrite:"); -} - -static void outelf(void) { - size_t i; - uint64_t offset; - Elf64_Ehdr ehdr; - - memset(&ehdr, 0, sizeof(ehdr)); - ehdr.e_ident[0] = 0x7f; - ehdr.e_ident[1] = 'E'; - ehdr.e_ident[2] = 'L'; - ehdr.e_ident[3] = 'F'; - ehdr.e_ident[4] = ELFCLASS64; - ehdr.e_ident[5] = ELFDATA2LSB; - ehdr.e_ident[6] = 1; - ehdr.e_type = ET_REL; - ehdr.e_machine = EM_X86_64; - ehdr.e_flags = 0; - ehdr.e_version = 1; - ehdr.e_ehsize = sizeof(Elf64_Ehdr); - ehdr.e_shoff = sizeof(Elf64_Ehdr); - ehdr.e_shentsize = sizeof(Elf64_Shdr); - ehdr.e_shnum = nsections; - ehdr.e_shstrndx = 1; - - out(&ehdr, sizeof(ehdr)); - offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; - - for (i = 0; i < nsections; i++) { - sections[i].hdr.sh_offset = offset; - out(§ions[i].hdr, sizeof(Elf64_Shdr)); - offset += sections[i].hdr.sh_size; - } - for (i = 0; i < nsections; i++) { - if (sections[i].hdr.sh_type == SHT_NOBITS) - continue; - out(sections[i].data, sections[i].hdr.sh_size); - } - if (fflush(stdout) != 0) - fatal("fflush:"); -} - -static void usage(char *argv0) { - fprintf(stderr, "minias - a mini x86-64 assembler.\n\n"); - fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0); - fprintf(stderr, "\n"); - fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n"); - fprintf(stderr, " -o out Output file to write (default stdout).\n"); - exit(2); -} - -static void parseargs(int argc, char *argv[]) { - char *a, *argv0, *outfname; - - argv0 = argv[0]; - - for (++argv; *argv; argv++) { - if (argv[0][0] != '-') - break; - for (a = &argv[0][1]; *a; a++) { - switch (*a) { - case '-': - case 'h': - usage(argv0); - break; - case 'r': - nrelax = atoi(*++argv); - break; - case 'o': - if (argv[1] == NULL) - usage(argv0); - outfname = *++argv; - if (!freopen(outfname, "w", stdout)) - fatal("unable to open %s:", outfname); + /* Reset symbols, saving the worst case offset for the second pass. */ + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + sym = symbols->vals[i]; + *sym = (Symbol){ .name = sym->name, .section = sym->section, .wco = sym->offset }; + } +} + +static void +addtosymtab(Symbol* sym) +{ + Elf64_Sym elfsym; + int stype; + int sbind; + + stype = 0; + if (sym->defined) { + sbind = sym->global ? STB_GLOBAL : STB_LOCAL; + } else { + sbind = STB_GLOBAL; + } + + sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize; + + elfsym.st_name = elfstr(strtab, sym->name); + elfsym.st_value = sym->offset; + elfsym.st_size = sym->size; + elfsym.st_info = ELF64_ST_INFO(sbind, stype); + elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF; + elfsym.st_other = 0; + secaddbytes(symtab, &elfsym, sizeof(Elf64_Sym)); +} + +static void +fillsymtab(void) +{ + Symbol* sym; + size_t i; + + // Local symbols + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + sym = symbols->vals[i]; + if (!sym->defined || sym->global) + continue; + addtosymtab(sym); + } + + // Global symbols + + // Set start of global symbols. + symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize; + + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + + sym = symbols->vals[i]; + if (sym->defined && !sym->global) + continue; + addtosymtab(sym); + } +} + +static int +resolvereloc(Relocation* reloc) +{ + Symbol* sym; + uint8_t* rdata; + int64_t value; + + sym = reloc->sym; + + if (sym->section != reloc->section) + return 0; + + switch (reloc->type) { + case R_X86_64_32: + case R_X86_64_64: + return 0; + case R_X86_64_PC8: + rdata = &reloc->section->data[reloc->offset]; + value = sym->offset - reloc->offset + reloc->addend; + rdata[0] = ((uint8_t)value & 0xff); + return 1; + case R_X86_64_PC32: + rdata = &reloc->section->data[reloc->offset]; + value = sym->offset - reloc->offset + reloc->addend; + rdata[0] = ((uint32_t)value & 0xff); + rdata[1] = ((uint32_t)value & 0xff00) >> 8; + rdata[2] = ((uint32_t)value & 0xff0000) >> 16; + rdata[3] = ((uint32_t)value & 0xff000000) >> 24; + return 1; + default: + unreachable(); + return 0; + } +} + +static void +appendreloc(Relocation* reloc) +{ + Symbol* sym; + Section* relsection; + Elf64_Rela elfrel; + + memset(&elfrel, 0, sizeof(elfrel)); + + sym = reloc->sym; + if (reloc->section == text) + relsection = textrel; + else if (reloc->section == data) + relsection = datarel; + else { + fatal("unexpected relocation for symbol '%s'", sym->name); + return; + } + + switch (reloc->type) { + case R_X86_64_PC32: + case R_X86_64_32: + case R_X86_64_64: + elfrel.r_info = ELF64_R_INFO(sym->idx, reloc->type); + elfrel.r_offset = reloc->offset; + elfrel.r_addend = reloc->addend; break; - default: - usage(argv0); - } - } - } - - if (argv[0]) { - if (argv[1]) - usage(argv0); - infilename = argv[0]; - if (!freopen(infilename, "r", stdin)) - fatal("unable to open %s:", infilename); - } -} - -int main(int argc, char *argv[]) { - symbols = mkhtab(256); - parseargs(argc, argv); - allasm = parseasm(); - initsections(); - assemble(); - while (nrelax-- > 0) { - relaxreset(); + default: + unreachable(); + } + + secaddbytes(relsection, &elfrel, sizeof(elfrel)); +} + +static void +handlerelocs(void) +{ + Relocation* reloc; + size_t i; + for (i = 0; i < nrelocs; i++) { + reloc = &relocs[i]; + if (resolvereloc(reloc)) + continue; + appendreloc(reloc); + } +} + +static void +out(const void* buf, size_t n) +{ + fwrite(buf, 1, n, stdout); + if (ferror(stdout)) + fatal("fwrite:"); +} + +static void +outelf(void) +{ + size_t i; + uint64_t offset; + Elf64_Ehdr ehdr; + + memset(&ehdr, 0, sizeof(ehdr)); + ehdr.e_ident[0] = 0x7f; + ehdr.e_ident[1] = 'E'; + ehdr.e_ident[2] = 'L'; + ehdr.e_ident[3] = 'F'; + ehdr.e_ident[4] = ELFCLASS64; + ehdr.e_ident[5] = ELFDATA2LSB; + ehdr.e_ident[6] = 1; + ehdr.e_type = ET_REL; + ehdr.e_machine = EM_X86_64; + ehdr.e_flags = 0; + ehdr.e_version = 1; + ehdr.e_ehsize = sizeof(Elf64_Ehdr); + ehdr.e_shoff = sizeof(Elf64_Ehdr); + ehdr.e_shentsize = sizeof(Elf64_Shdr); + ehdr.e_shnum = nsections; + ehdr.e_shstrndx = 1; + + out(&ehdr, sizeof(ehdr)); + offset = sizeof(Elf64_Ehdr) + sizeof(Elf64_Shdr) * nsections; + + for (i = 0; i < nsections; i++) { + sections[i].hdr.sh_offset = offset; + out(§ions[i].hdr, sizeof(Elf64_Shdr)); + offset += sections[i].hdr.sh_size; + } + for (i = 0; i < nsections; i++) { + if (sections[i].hdr.sh_type == SHT_NOBITS) + continue; + out(sections[i].data, sections[i].hdr.sh_size); + } + if (fflush(stdout) != 0) + fatal("fflush:"); +} + +static void +usage(char* argv0) +{ + fprintf(stderr, "minias - a mini x86-64 assembler.\n\n"); + fprintf(stderr, "usage: %s [-r iter] [-o out] [input]\n", argv0); + fprintf(stderr, "\n"); + fprintf(stderr, " -r iter Jump relaxation iterations (default 1).\n"); + fprintf(stderr, " -o out Output file to write (default stdout).\n"); + exit(2); +} + +static void +parseargs(int argc, char* argv[]) +{ + char *a, *argv0, *outfname; + + argv0 = argv[0]; + + for (++argv; *argv; argv++) { + if (argv[0][0] != '-') + break; + a = &argv[0][1]; + switch (*a) { + case '-': + case 'h': + usage(argv0); + break; + case 'r': + nrelax = atoi(*++argv); + break; + case 'o': + if (argv[1] == NULL) + usage(argv0); + outfname = *++argv; + if (!freopen(outfname, "w", stdout)) + fatal("unable to open %s:", outfname); + break; + default: + usage(argv0); + } + } + + if (argv[0]) { + if (argv[1]) + usage(argv0); + infilename = argv[0]; + if (!freopen(infilename, "r", stdin)) + fatal("unable to open %s:", infilename); + } +} + +int +main(int argc, char* argv[]) +{ + symbols = mkhtab(256); + parseargs(argc, argv); + allasm = parseasm(); + initsections(); assemble(); - } - fillsymtab(); - handlerelocs(); - outelf(); - return 0; + while (nrelax-- > 0) { + relaxreset(); + assemble(); + } + fillsymtab(); + handlerelocs(); + outelf(); + return 0; }
\ No newline at end of file @@ -12,353 +12,353 @@ #include <unistd.h> typedef struct { - Elf64_Shdr hdr; - int16_t idx; - int64_t wco; - int64_t offset; - size_t capacity; - uint8_t *data; + Elf64_Shdr hdr; + int16_t idx; + int64_t wco; + int64_t offset; + size_t capacity; + uint8_t* data; } Section; typedef struct { - const char *name; - int32_t idx; - int64_t offset; - int64_t wco; /* worst case offset */ - int64_t size; - int global; - int defined; - Section *section; + const char* name; + int32_t idx; + int64_t offset; + int64_t wco; /* worst case offset */ + int64_t size; + int global; + int defined; + Section* section; } Symbol; typedef struct { - Section *section; - Symbol *sym; - int type; - int64_t offset; - int64_t addend; + Section* section; + Symbol* sym; + int type; + int64_t offset; + int64_t addend; } Relocation; typedef enum { - // Misc - ASM_SYNTAX_ERROR, - ASM_BLANK, - ASM_LABEL, - ASM_IMM, - ASM_STRING, - ASM_MEMARG, - // Directives. - ASM_DIR_GLOBL, - ASM_DIR_SECTION, - ASM_DIR_ASCII, - ASM_DIR_ASCIIZ, - ASM_DIR_DATA, - ASM_DIR_TEXT, - ASM_DIR_FILL, - ASM_DIR_BYTE, - ASM_DIR_SHORT, - ASM_DIR_INT, - ASM_DIR_QUAD, - ASM_DIR_BALIGN, - // Instructions. - ASM_CALL, - ASM_JMP, - ASM_INSTR, - // Registers, order matters. - ASM_REG_BEGIN, - - ASM_AL, - ASM_CL, - ASM_DL, - ASM_BL, - ASM_SPL, - ASM_BPL, - ASM_SIL, - ASM_DIL, - ASM_R8B, - ASM_R9B, - ASM_R10B, - ASM_R11B, - ASM_R12B, - ASM_R13B, - ASM_R14B, - ASM_R15B, - - ASM_AX, - ASM_CX, - ASM_DX, - ASM_BX, - ASM_SP, - ASM_BP, - ASM_SI, - ASM_DI, - ASM_R8W, - ASM_R9W, - ASM_R10W, - ASM_R11W, - ASM_R12W, - ASM_R13W, - ASM_R14W, - ASM_R15W, - - ASM_EAX, - ASM_ECX, - ASM_EDX, - ASM_EBX, - ASM_ESP, - ASM_EBP, - ASM_ESI, - ASM_EDI, - ASM_R8D, - ASM_R9D, - ASM_R10D, - ASM_R11D, - ASM_R12D, - ASM_R13D, - ASM_R14D, - ASM_R15D, - - ASM_RAX, - ASM_RCX, - ASM_RDX, - ASM_RBX, - ASM_RSP, - ASM_RBP, - ASM_RSI, - ASM_RDI, - ASM_R8, - ASM_R9, - ASM_R10, - ASM_R11, - ASM_R12, - ASM_R13, - ASM_R14, - ASM_R15, - - ASM_XMM0, - ASM_XMM1, - ASM_XMM2, - ASM_XMM3, - ASM_XMM4, - ASM_XMM5, - ASM_XMM6, - ASM_XMM7, - ASM_XMM8, - ASM_XMM9, - ASM_XMM10, - ASM_XMM11, - ASM_XMM12, - ASM_XMM13, - ASM_XMM14, - ASM_XMM15, - - /* RIP is in a special class of its own. */ - ASM_RIP, - ASM_NO_REG, - - ASM_REG_END, + // Misc + ASM_SYNTAX_ERROR, + ASM_BLANK, + ASM_LABEL, + ASM_IMM, + ASM_STRING, + ASM_MEMARG, + // Directives. + ASM_DIR_GLOBL, + ASM_DIR_SECTION, + ASM_DIR_ASCII, + ASM_DIR_ASCIIZ, + ASM_DIR_DATA, + ASM_DIR_TEXT, + ASM_DIR_FILL, + ASM_DIR_BYTE, + ASM_DIR_SHORT, + ASM_DIR_INT, + ASM_DIR_QUAD, + ASM_DIR_BALIGN, + // Instructions. + ASM_CALL, + ASM_JMP, + ASM_INSTR, + // Registers, order matters. + ASM_REG_BEGIN, + + ASM_AL, + ASM_CL, + ASM_DL, + ASM_BL, + ASM_SPL, + ASM_BPL, + ASM_SIL, + ASM_DIL, + ASM_R8B, + ASM_R9B, + ASM_R10B, + ASM_R11B, + ASM_R12B, + ASM_R13B, + ASM_R14B, + ASM_R15B, + + ASM_AX, + ASM_CX, + ASM_DX, + ASM_BX, + ASM_SP, + ASM_BP, + ASM_SI, + ASM_DI, + ASM_R8W, + ASM_R9W, + ASM_R10W, + ASM_R11W, + ASM_R12W, + ASM_R13W, + ASM_R14W, + ASM_R15W, + + ASM_EAX, + ASM_ECX, + ASM_EDX, + ASM_EBX, + ASM_ESP, + ASM_EBP, + ASM_ESI, + ASM_EDI, + ASM_R8D, + ASM_R9D, + ASM_R10D, + ASM_R11D, + ASM_R12D, + ASM_R13D, + ASM_R14D, + ASM_R15D, + + ASM_RAX, + ASM_RCX, + ASM_RDX, + ASM_RBX, + ASM_RSP, + ASM_RBP, + ASM_RSI, + ASM_RDI, + ASM_R8, + ASM_R9, + ASM_R10, + ASM_R11, + ASM_R12, + ASM_R13, + ASM_R14, + ASM_R15, + + ASM_XMM0, + ASM_XMM1, + ASM_XMM2, + ASM_XMM3, + ASM_XMM4, + ASM_XMM5, + ASM_XMM6, + ASM_XMM7, + ASM_XMM8, + ASM_XMM9, + ASM_XMM10, + ASM_XMM11, + ASM_XMM12, + ASM_XMM13, + ASM_XMM14, + ASM_XMM15, + + /* RIP is in a special class of its own. */ + ASM_RIP, + ASM_NO_REG, + + ASM_REG_END, } AsmKind; typedef union Parsev Parsev; typedef struct Label { - AsmKind kind; - const char *name; + AsmKind kind; + const char* name; } Label; typedef struct Globl { - AsmKind kind; - const char *name; + AsmKind kind; + const char* name; } Globl; typedef struct DirSection { - AsmKind kind; - int32_t type; - const char *name; - const char *flags; + AsmKind kind; + int32_t type; + const char* name; + const char* flags; } DirSection; typedef struct { - int64_t c; - const char *l; + int64_t c; + const char* l; } Value; typedef struct Byte { - AsmKind kind; - Value value; + AsmKind kind; + Value value; } Byte; typedef struct Short { - AsmKind kind; - Value value; + AsmKind kind; + Value value; } Short; typedef struct Int { - AsmKind kind; - Value value; + AsmKind kind; + Value value; } Int; typedef struct Quad { - AsmKind kind; - Value value; + AsmKind kind; + Value value; } Quad; typedef struct Balign { - AsmKind kind; - uint64_t align; + AsmKind kind; + uint64_t align; } Balign; typedef struct Fill { - AsmKind kind; - int32_t size; - int32_t repeat; - int64_t value; + AsmKind kind; + int32_t size; + int32_t repeat; + int64_t value; } Fill; typedef struct Imm { - AsmKind kind; - uint32_t nbytes; - Value v; + AsmKind kind; + uint32_t nbytes; + Value v; } Imm; typedef struct Memarg { - AsmKind kind; - AsmKind base; - AsmKind index; - uint32_t scale; - Value disp; + AsmKind kind; + AsmKind base; + AsmKind index; + uint32_t scale; + Value disp; } Memarg; typedef struct String { - AsmKind kind; - size_t len; - uint8_t *data; + AsmKind kind; + size_t len; + uint8_t* data; } String; typedef String Ascii; typedef String Asciiz; typedef struct Call { - AsmKind kind; - uint32_t indirect; - union { - const Parsev *indirect; - Value direct; - } target; + AsmKind kind; + uint32_t indirect; + union { + const Parsev* indirect; + Value direct; + } target; } Call; typedef struct Jmp { - AsmKind kind; - uint32_t cc; /* 0 means unconditional. */ - const char *target; + AsmKind kind; + uint32_t cc; /* 0 means unconditional. */ + const char* target; } Jmp; /* Rex opcode prefix. */ typedef struct Rex { - uint8_t required : 1; - uint8_t w : 1; - uint8_t r : 1; - uint8_t x : 1; - uint8_t b : 1; + uint8_t required : 1; + uint8_t w : 1; + uint8_t r : 1; + uint8_t x : 1; + uint8_t b : 1; } Rex; /* Various classes of instruction encoding. The *2 variants just have operands swapped. */ typedef enum Encoder { - ENCODER_OP, - ENCODER_OPREG, - ENCODER_OPMEM, - ENCODER_R, - ENCODER_RIMM, - ENCODER_IMM, - ENCODER_IMMMEM, - ENCODER_IMMREG, - ENCODER_MEMREG, - ENCODER_MEMREG2, - ENCODER_REGMEM, - ENCODER_REGMEM2, - ENCODER_REGREG, - ENCODER_REGREG2, - ENCODER_IMMREGREG2, - ENCODER_IMMMEMREG, + ENCODER_OP, + ENCODER_OPREG, + ENCODER_OPMEM, + ENCODER_R, + ENCODER_RIMM, + ENCODER_IMM, + ENCODER_IMMMEM, + ENCODER_IMMREG, + ENCODER_MEMREG, + ENCODER_MEMREG2, + ENCODER_REGMEM, + ENCODER_REGMEM2, + ENCODER_REGREG, + ENCODER_REGREG2, + ENCODER_IMMREGREG2, + ENCODER_IMMMEMREG, } Encoder; typedef struct Instr { - AsmKind kind; - Encoder encoder; - Rex rex; - uint8_t pad[3]; /* Avoid undefined padding - see internparsev. */ - uint32_t fixedreg; - int32_t opcode; - int32_t prefix; - const Parsev *arg1; - const Parsev *arg2; - const Parsev *arg3; + AsmKind kind; + Encoder encoder; + Rex rex; + uint8_t pad[3]; /* Avoid undefined padding - see internparsev. */ + uint32_t fixedreg; + int32_t opcode; + int32_t prefix; + const Parsev* arg1; + const Parsev* arg2; + const Parsev* arg3; } Instr; union Parsev { - AsmKind kind; - Label label; - Globl globl; - DirSection section; - Balign balign; - Ascii ascii; - Asciiz asciiz; - Memarg memarg; - Instr instr; - Call call; - Jmp jmp; - Fill fill; - Byte dirbyte; - Short dirshort; - Int dirint; - Quad dirquad; - Imm imm; - String string; - // Temporary values. - Value value; - const char *charptr; - int64_t i64; + AsmKind kind; + Label label; + Globl globl; + DirSection section; + Balign balign; + Ascii ascii; + Asciiz asciiz; + Memarg memarg; + Instr instr; + Call call; + Jmp jmp; + Fill fill; + Byte dirbyte; + Short dirshort; + Int dirint; + Quad dirquad; + Imm imm; + String string; + // Temporary values. + Value value; + const char* charptr; + int64_t i64; }; /* parse.c */ typedef struct AsmLine AsmLine; struct AsmLine { - int64_t lineno; - const Parsev *v; - AsmLine *next; + int64_t lineno; + const Parsev* v; + AsmLine* next; }; -AsmLine *parseasm(void); +AsmLine* parseasm(void); /* util.c */ -void vwarn(const char *fmt, va_list ap); -void fatal(const char *fmt, ...); +void vwarn(const char* fmt, va_list ap); +void fatal(const char* fmt, ...); void unreachable(void); -void *xmalloc(size_t); -void *xrealloc(void *, size_t); -void *xreallocarray(void *, size_t, size_t); -char *xmemdup(const char *, size_t); -char *xstrdup(const char *s); -void *zalloc(size_t n); +void* xmalloc(size_t); +void* xrealloc(void*, size_t); +void* xreallocarray(void*, size_t, size_t); +char* xmemdup(const char*, size_t); +char* xstrdup(const char* s); +void* zalloc(size_t n); struct hashtable { - size_t len, cap; - struct hashtablekey *keys; - void **vals; + size_t len, cap; + struct hashtablekey* keys; + void** vals; }; struct hashtablekey { - uint64_t hash; - const char *str; - size_t len; + uint64_t hash; + const char* str; + size_t len; }; -void htabkey(struct hashtablekey *, const char *, size_t); -struct hashtable *mkhtab(size_t); -void delhtab(struct hashtable *, void(void *)); -void **htabput(struct hashtable *, struct hashtablekey *); -void *htabget(struct hashtable *, struct hashtablekey *); -uint64_t murmurhash64a(const void *, size_t);
\ No newline at end of file +void htabkey(struct hashtablekey*, const char*, size_t); +struct hashtable* mkhtab(size_t); +void delhtab(struct hashtable*, void(void*)); +void** htabput(struct hashtable*, struct hashtablekey*); +void* htabget(struct hashtable*, struct hashtablekey*); +uint64_t murmurhash64a(const void*, size_t);
\ No newline at end of file @@ -1,8 +1,10 @@ #include "minias.h" /* Cache of Parsev* by value. */ -static const Parsev *internparsev(Parsev *p) { - /* +static const Parsev* +internparsev(Parsev* p) +{ + /* A simple direct mapped cache that prevents our parser from allocating duplicate values. Note that it uses memcmp for equality, even on pointer values, this works because the @@ -13,238 +15,264 @@ static const Parsev *internparsev(Parsev *p) { but the best fix is still to avoid the padding bytes in the Parsev variants. */ - size_t idx; - const Parsev *interned; - static const Parsev *cache[4096] = {0}; - - idx = murmurhash64a((char *)p, sizeof(Parsev)) % sizeof(cache) / - sizeof(cache[0]); - interned = cache[idx]; - if (interned && memcmp(p, interned, sizeof(Parsev)) == 0) + size_t idx; + const Parsev* interned; + static const Parsev* cache[4096] = { 0 }; + + idx = murmurhash64a((char*)p, sizeof(Parsev)) % sizeof(cache) / sizeof(cache[0]); + interned = cache[idx]; + if (interned && memcmp(p, interned, sizeof(Parsev)) == 0) + return interned; + interned = (const Parsev*)xmemdup((char*)p, sizeof(Parsev)); + cache[idx] = interned; return interned; - interned = (const Parsev *)xmemdup((char *)p, sizeof(Parsev)); - cache[idx] = interned; - return interned; } /* Cache of char* by value. */ -const char *internstring(const char *s) { - size_t idx, len; - const char *interned; - static const char *cache[4096] = {0}; - - len = strlen(s); - idx = murmurhash64a(s, len) % sizeof(cache) / sizeof(cache[0]); - interned = cache[idx]; - if (interned && strcmp(s, cache[idx]) == 0) +const char* +internstring(const char* s) +{ + size_t idx, len; + const char* interned; + static const char* cache[4096] = { 0 }; + + len = strlen(s); + idx = murmurhash64a(s, len) % sizeof(cache) / sizeof(cache[0]); + interned = cache[idx]; + if (interned && strcmp(s, cache[idx]) == 0) + return interned; + interned = xstrdup(s); + cache[idx] = interned; return interned; - interned = xstrdup(s); - cache[idx] = interned; - return interned; } -static String decodestring(char *s) { - char *end; - size_t len = 0; - size_t cap = 0; - uint8_t *data = NULL; - uint8_t c = 0; - - /* The string is already validated by the parser so we omit some checks*/ - while (*s) { - if (*s == '\\') { - s++; - if (*s >= '0' && *s <= '7') { - c = strtoul(s, &end, 8); - s += 2; - } else if (*s == 'x') { +static String +decodestring(char* s) +{ + char* end; + size_t len = 0; + size_t cap = 0; + uint8_t* data = NULL; + uint8_t c = 0; + + /* The string is already validated by the parser so we omit some checks*/ + while (*s) { + if (*s == '\\') { + s++; + if (*s >= '0' && *s <= '7') { + c = strtoul(s, &end, 8); + s += 2; + } else if (*s == 'x') { + s++; + c = strtoul(s, &end, 16); + s = end - 1; + } else if (*s == 'r') { + c = '\r'; + } else if (*s == 'n') { + c = '\n'; + } else if (*s == 't') { + c = '\t'; + } else if (*s == '\\') { + c = '\\'; + } else { + unreachable(); + } + } else { + c = *s; + } s++; - c = strtoul(s, &end, 16); - s = end - 1; - } else if (*s == 'r') { - c = '\r'; - } else if (*s == 'n') { - c = '\n'; - } else if (*s == 't') { - c = '\t'; - } else if (*s == '\\') { - c = '\\'; - } else { - unreachable(); - } - } else { - c = *s; - } - s++; - if (len == cap) { - cap = cap ? len * 2 : 8; - data = realloc(data, cap); - } - data[len++] = c; - } - return (String){.kind = ASM_STRING, .len = len, .data = data}; + if (len == cap) { + cap = cap ? len * 2 : 8; + data = realloc(data, cap); + } + data[len++] = c; + } + return (String){ .kind = ASM_STRING, .len = len, .data = data }; } -static int needsmovabs(Imm *imm) { - int64_t mask, maskedc; +static int +needsmovabs(Imm* imm) +{ + int64_t mask, maskedc; - if (imm->v.l) - return 1; + if (imm->v.l) + return 1; - mask = 0xffffffff80000000; - maskedc = (uint64_t)imm->v.c & mask; - return (maskedc != mask && maskedc != 0); + mask = 0xffffffff80000000; + maskedc = (uint64_t)imm->v.c & mask; + return (maskedc != mask && maskedc != 0); } -#define OP(OPCODE) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_OP, .prefix = -1, \ - .opcode = OPCODE, \ - } \ - } - -#define OPREG(REX, PREFIX, OPCODE, REG, A1) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_OPREG, .prefix = PREFIX, \ - .rex = (Rex)REX, .fixedreg = REG, .opcode = OPCODE, \ - .arg1 = internparsev(&A1) \ - } \ - } - -#define OPMEM(REX, PREFIX, OPCODE, REG, A1) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_OPMEM, .prefix = PREFIX, \ - .rex = (Rex)REX, .fixedreg = REG, .opcode = OPCODE, \ - .arg1 = internparsev(&A1) \ - } \ - } - -#define R(REX, PREFIX, OPCODE, A1) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_R, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - } \ - } - -#define IMM(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_IMM, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define RIMM(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_RIMM, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define IMMREG(REX, PREFIX, OPCODE, IMMREG, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_IMMREG, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .fixedreg = IMMREG, \ - .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ - } \ - } - -#define IMMMEM(REX, PREFIX, OPCODE, IMMREG, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_IMMMEM, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .fixedreg = IMMREG, \ - .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ - } \ - } - -#define REGMEM(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_REGMEM, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define MEMREG(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_MEMREG, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define REGREG(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_REGREG, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define REGREG2(REX, PREFIX, OPCODE, A1, A2) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_REGREG2, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2) \ - } \ - } - -#define IMMREGREG2(REX, PREFIX, OPCODE, A1, A2, A3) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_IMMREGREG2, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2), .arg3 = internparsev(&A3) \ - } \ - } - -#define IMMMEMREG(REX, PREFIX, OPCODE, A1, A2, A3) \ - (Parsev) { \ - .instr = (Instr) { \ - .kind = ASM_INSTR, .encoder = ENCODER_IMMMEMREG, .prefix = PREFIX, \ - .opcode = OPCODE, .rex = (Rex)REX, .arg1 = internparsev(&A1), \ - .arg2 = internparsev(&A2), .arg3 = internparsev(&A3) \ - } \ - } - -#define REG(K) \ - (Parsev) { .kind = ASM_##K } +#define OP(OPCODE) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_OP, .prefix = -1, .opcode = OPCODE, \ + } \ + } + +#define OPREG(REX, PREFIX, OPCODE, REG, A1) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_OPREG, .prefix = PREFIX, .rex = (Rex)REX, \ + .fixedreg = REG, .opcode = OPCODE, .arg1 = internparsev(&A1) \ + } \ + } + +#define OPMEM(REX, PREFIX, OPCODE, REG, A1) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_OPMEM, .prefix = PREFIX, .rex = (Rex)REX, \ + .fixedreg = REG, .opcode = OPCODE, .arg1 = internparsev(&A1) \ + } \ + } + +#define R(REX, PREFIX, OPCODE, A1) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_R, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), \ + } \ + } + +#define IMM(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_IMM, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define RIMM(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_RIMM, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define IMMREG(REX, PREFIX, OPCODE, IMMREG, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_IMMREG, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .fixedreg = IMMREG, .arg1 = internparsev(&A1), \ + .arg2 = internparsev(&A2) \ + } \ + } + +#define IMMMEM(REX, PREFIX, OPCODE, IMMREG, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_IMMMEM, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .fixedreg = IMMREG, .arg1 = internparsev(&A1), \ + .arg2 = internparsev(&A2) \ + } \ + } + +#define REGMEM(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_REGMEM, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define MEMREG(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_MEMREG, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define REGREG(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_REGREG, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define REGREG2(REX, PREFIX, OPCODE, A1, A2) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_REGREG2, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2) \ + } \ + } + +#define IMMREGREG2(REX, PREFIX, OPCODE, A1, A2, A3) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_IMMREGREG2, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \ + .arg3 = internparsev(&A3) \ + } \ + } + +#define IMMMEMREG(REX, PREFIX, OPCODE, A1, A2, A3) \ + (Parsev) \ + { \ + .instr = (Instr) \ + { \ + .kind = ASM_INSTR, .encoder = ENCODER_IMMMEMREG, .prefix = PREFIX, .opcode = OPCODE, \ + .rex = (Rex)REX, .arg1 = internparsev(&A1), .arg2 = internparsev(&A2), \ + .arg3 = internparsev(&A3) \ + } \ + } + +#define REG(K) \ + (Parsev) { .kind = ASM_##K } #define YYSTYPE Parsev #define YY_CTX_LOCAL #define YY_CTX_MEMBERS Parsev v; #include "asm.peg.inc" -AsmLine *parseasm(void) { - AsmLine *result, *l, *prevl; - yycontext ctx; - - memset(&ctx, 0, sizeof(yycontext)); - result = NULL; - prevl = NULL; - - while (yyparse(&ctx)) { - l = zalloc(sizeof(AsmLine)); - l->v = internparsev(&ctx.v); - if (prevl) - prevl->next = l; - else - result = l; - prevl = l; - } - - return result; +AsmLine* +parseasm(void) +{ + AsmLine *result, *l, *prevl; + yycontext ctx; + + memset(&ctx, 0, sizeof(yycontext)); + result = NULL; + prevl = NULL; + + while (yyparse(&ctx)) { + l = zalloc(sizeof(AsmLine)); + l->v = internparsev(&ctx.v); + if (prevl) + prevl->next = l; + else + result = l; + prevl = l; + } + + return result; } @@ -1,209 +1,247 @@ #include "minias.h" -void vwarn(const char *fmt, va_list ap) { - vfprintf(stderr, fmt, ap); - if (fmt[0] && fmt[strlen(fmt) - 1] == ':') { - putc(' ', stderr); - perror(NULL); - } else { - putc('\n', stderr); - } +void +vwarn(const char* fmt, va_list ap) +{ + vfprintf(stderr, fmt, ap); + if (fmt[0] && fmt[strlen(fmt) - 1] == ':') { + putc(' ', stderr); + perror(NULL); + } else { + putc('\n', stderr); + } } -void fatal(const char *fmt, ...) { - va_list ap; - va_start(ap, fmt); - vwarn(fmt, ap); - va_end(ap); - exit(1); +void +fatal(const char* fmt, ...) +{ + va_list ap; + va_start(ap, fmt); + vwarn(fmt, ap); + va_end(ap); + exit(1); } -void unreachable(void) { fatal("BUG: unexpected internal condition"); } +void +unreachable(void) +{ + fatal("BUG: unexpected internal condition"); +} -void *xmalloc(size_t n) { - void *p; +void* +xmalloc(size_t n) +{ + void* p; - p = malloc(n); - if (!p) - fatal("malloc:"); + p = malloc(n); + if (!p) + fatal("malloc:"); - return p; + return p; } -void *zalloc(size_t n) { - void *p; +void* +zalloc(size_t n) +{ + void* p; - p = malloc(n); - if (!p) - fatal("malloc:"); - memset(p, 0, n); - return p; + p = malloc(n); + if (!p) + fatal("malloc:"); + memset(p, 0, n); + return p; } -void *xrealloc(void *p, size_t n) { - p = realloc(p, n); - if (!p) - fatal("realloc:"); +void* +xrealloc(void* p, size_t n) +{ + p = realloc(p, n); + if (!p) + fatal("realloc:"); - return p; + return p; } -void *xreallocarray(void *p, size_t n, size_t m) { - p = reallocarray(p, n, m); - if (!p) - fatal("reallocarray:"); +void* +xreallocarray(void* p, size_t n, size_t m) +{ + p = reallocarray(p, n, m); + if (!p) + fatal("reallocarray:"); - return p; + return p; } -char *xmemdup(const char *s, size_t n) { - char *p; +char* +xmemdup(const char* s, size_t n) +{ + char* p; - p = xmalloc(n); - memcpy(p, s, n); + p = xmalloc(n); + memcpy(p, s, n); - return p; + return p; } -char *xstrdup(const char *s) { return xmemdup(s, strlen(s) + 1); } +char* +xstrdup(const char* s) +{ + return xmemdup(s, strlen(s) + 1); +} -void htabkey(struct hashtablekey *k, const char *s, size_t n) { - k->str = s; - k->len = n; - k->hash = murmurhash64a(s, n); +void +htabkey(struct hashtablekey* k, const char* s, size_t n) +{ + k->str = s; + k->len = n; + k->hash = murmurhash64a(s, n); } -struct hashtable *mkhtab(size_t cap) { - struct hashtable *h; - size_t i; +struct hashtable* +mkhtab(size_t cap) +{ + struct hashtable* h; + size_t i; + + assert(!(cap & (cap - 1))); + h = xmalloc(sizeof(*h)); + h->len = 0; + h->cap = cap; + h->keys = xreallocarray(NULL, cap, sizeof(h->keys[0])); + h->vals = xreallocarray(NULL, cap, sizeof(h->vals[0])); + for (i = 0; i < cap; ++i) + h->keys[i].str = NULL; + + return h; +} - assert(!(cap & (cap - 1))); - h = xmalloc(sizeof(*h)); - h->len = 0; - h->cap = cap; - h->keys = xreallocarray(NULL, cap, sizeof(h->keys[0])); - h->vals = xreallocarray(NULL, cap, sizeof(h->vals[0])); - for (i = 0; i < cap; ++i) - h->keys[i].str = NULL; +void +delhtab(struct hashtable* h, void del(void*)) +{ + size_t i; + + if (!h) + return; + if (del) { + for (i = 0; i < h->cap; ++i) { + if (h->keys[i].str) + del(h->vals[i]); + } + } + free(h->keys); + free(h->vals); + free(h); +} - return h; +static bool +keyequal(struct hashtablekey* k1, struct hashtablekey* k2) +{ + if (k1->hash != k2->hash || k1->len != k2->len) + return false; + return memcmp(k1->str, k2->str, k1->len) == 0; } -void delhtab(struct hashtable *h, void del(void *)) { - size_t i; +static size_t +keyindex(struct hashtable* h, struct hashtablekey* k) +{ + size_t i; - if (!h) - return; - if (del) { - for (i = 0; i < h->cap; ++i) { - if (h->keys[i].str) - del(h->vals[i]); + i = k->hash & (h->cap - 1); + while (h->keys[i].str && !keyequal(&h->keys[i], k)) + i = (i + 1) & (h->cap - 1); + return i; +} + +void** +htabput(struct hashtable* h, struct hashtablekey* k) +{ + struct hashtablekey* oldkeys; + void** oldvals; + size_t i, j, oldcap; + + if (h->cap / 2 < h->len) { + oldkeys = h->keys; + oldvals = h->vals; + oldcap = h->cap; + h->cap *= 2; + h->keys = xreallocarray(NULL, h->cap, sizeof(h->keys[0])); + h->vals = xreallocarray(NULL, h->cap, sizeof(h->vals[0])); + for (i = 0; i < h->cap; ++i) + h->keys[i].str = NULL; + for (i = 0; i < oldcap; ++i) { + if (oldkeys[i].str) { + j = keyindex(h, &oldkeys[i]); + h->keys[j] = oldkeys[i]; + h->vals[j] = oldvals[i]; + } + } + free(oldkeys); + free(oldvals); } - } - free(h->keys); - free(h->vals); - free(h); -} - -static bool keyequal(struct hashtablekey *k1, struct hashtablekey *k2) { - if (k1->hash != k2->hash || k1->len != k2->len) - return false; - return memcmp(k1->str, k2->str, k1->len) == 0; -} - -static size_t keyindex(struct hashtable *h, struct hashtablekey *k) { - size_t i; - - i = k->hash & (h->cap - 1); - while (h->keys[i].str && !keyequal(&h->keys[i], k)) - i = (i + 1) & (h->cap - 1); - return i; -} - -void **htabput(struct hashtable *h, struct hashtablekey *k) { - struct hashtablekey *oldkeys; - void **oldvals; - size_t i, j, oldcap; - - if (h->cap / 2 < h->len) { - oldkeys = h->keys; - oldvals = h->vals; - oldcap = h->cap; - h->cap *= 2; - h->keys = xreallocarray(NULL, h->cap, sizeof(h->keys[0])); - h->vals = xreallocarray(NULL, h->cap, sizeof(h->vals[0])); - for (i = 0; i < h->cap; ++i) - h->keys[i].str = NULL; - for (i = 0; i < oldcap; ++i) { - if (oldkeys[i].str) { - j = keyindex(h, &oldkeys[i]); - h->keys[j] = oldkeys[i]; - h->vals[j] = oldvals[i]; - } + i = keyindex(h, k); + if (!h->keys[i].str) { + h->keys[i] = *k; + h->vals[i] = NULL; + ++h->len; } - free(oldkeys); - free(oldvals); - } - i = keyindex(h, k); - if (!h->keys[i].str) { - h->keys[i] = *k; - h->vals[i] = NULL; - ++h->len; - } - return &h->vals[i]; + return &h->vals[i]; } -void *htabget(struct hashtable *h, struct hashtablekey *k) { - size_t i; +void* +htabget(struct hashtable* h, struct hashtablekey* k) +{ + size_t i; - i = keyindex(h, k); - return h->keys[i].str ? h->vals[i] : NULL; + i = keyindex(h, k); + return h->keys[i].str ? h->vals[i] : NULL; } -uint64_t murmurhash64a(const void *ptr, size_t len) { - const uint64_t seed = 0xdecafbaddecafbadull; - const uint64_t m = 0xc6a4a7935bd1e995ull; - uint64_t h, k, n; - const uint8_t *p, *end; - int r = 47; - - h = seed ^ (len * m); - n = len & ~0x7ull; - end = ptr; - end += n; - for (p = ptr; p != end; p += 8) { - memcpy(&k, p, sizeof(k)); +uint64_t +murmurhash64a(const void* ptr, size_t len) +{ + const uint64_t seed = 0xdecafbaddecafbadull; + const uint64_t m = 0xc6a4a7935bd1e995ull; + uint64_t h, k, n; + const uint8_t *p, *end; + int r = 47; + + h = seed ^ (len * m); + n = len & ~0x7ull; + end = ptr; + end += n; + for (p = ptr; p != end; p += 8) { + memcpy(&k, p, sizeof(k)); + + k *= m; + k ^= k >> r; + k *= m; + + h ^= k; + h *= m; + } - k *= m; - k ^= k >> r; - k *= m; + switch (len & 0x7) { + case 7: + h ^= (uint64_t)p[6] << 48; /* fallthrough */ + case 6: + h ^= (uint64_t)p[5] << 40; /* fallthrough */ + case 5: + h ^= (uint64_t)p[4] << 32; /* fallthrough */ + case 4: + h ^= (uint64_t)p[3] << 24; /* fallthrough */ + case 3: + h ^= (uint64_t)p[2] << 16; /* fallthrough */ + case 2: + h ^= (uint64_t)p[1] << 8; /* fallthrough */ + case 1: + h ^= (uint64_t)p[0]; + h *= m; + } - h ^= k; - h *= m; - } - - switch (len & 0x7) { - case 7: - h ^= (uint64_t)p[6] << 48; /* fallthrough */ - case 6: - h ^= (uint64_t)p[5] << 40; /* fallthrough */ - case 5: - h ^= (uint64_t)p[4] << 32; /* fallthrough */ - case 4: - h ^= (uint64_t)p[3] << 24; /* fallthrough */ - case 3: - h ^= (uint64_t)p[2] << 16; /* fallthrough */ - case 2: - h ^= (uint64_t)p[1] << 8; /* fallthrough */ - case 1: - h ^= (uint64_t)p[0]; + h ^= h >> r; h *= m; - } - - h ^= h >> r; - h *= m; - h ^= h >> r; + h ^= h >> r; - return h; + return h; } |
