From 6b9b596fcd588d22e15f259b572ae8845fbddacb Mon Sep 17 00:00:00 2001 From: Andrew Chambers Date: Sat, 23 Oct 2021 23:57:24 +1300 Subject: Implement the .set directive. --- asm.peg | 2 + main.c | 99 +++++++++++++++++++++++++++++++++++----------- minias.h | 20 +++++++--- parse.c | 2 +- test/execute/0002-setsym.s | 17 ++++++++ test/test.sh | 2 +- 6 files changed, 110 insertions(+), 32 deletions(-) create mode 100644 test/execute/0002-setsym.s diff --git a/asm.peg b/asm.peg index 3bc49d1..596852b 100644 --- a/asm.peg +++ b/asm.peg @@ -33,6 +33,8 @@ directive = { $$.dirint = (Int){.kind = ASM_DIR_INT, .value = v.value }; } | "quad" ws v:value { $$.dirquad = (Quad){.kind = ASM_DIR_QUAD, .value = v.value }; } + | "set" ws s:ident ws? ',' ws? v:value + { $$.set = (Set){.kind = ASM_DIR_SET, .sym = s.charptr, .value = v.value }; } | fd:fill-directive { $$ = fd; } | sd:section-directive diff --git a/main.c b/main.c index 36eb930..38ad138 100644 --- a/main.c +++ b/main.c @@ -52,7 +52,7 @@ getsym(const char *name) ps = (Symbol **)htabput(symbols, &htk); if (!*ps) { *ps = xmalloc(sizeof(Symbol)); - **ps = (Symbol){ + **ps = (Symbol) { .name = name, .wco = -1, }; @@ -250,7 +250,7 @@ isrexreg(AsmKind k) { return k > ASM_REG_BEGIN && k < ASM_REG_END && (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL - || k == ASM_SIL || k == ASM_DIL); + || k == ASM_SIL || k == ASM_DIL); } static uint8_t @@ -486,9 +486,9 @@ assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, VarBytes opcode, static void assemblejmp(const Jmp *j) { - int jmpsize; - int64_t distance; Symbol *target; + int64_t distance; + int jmpsize; // clang-format off static uint8_t cc2op[31] = { @@ -501,13 +501,9 @@ assemblejmp(const Jmp *j) jmpsize = 4; target = getsym(j->target); - if (cursection == target->section - && (target->defined || target->wco != -1)) { - if (target->defined) { - distance = target->offset - cursection->hdr.sh_size; - } else { - distance = target->wco - cursection->hdr.sh_size; - } + + if (cursection == target->section && target->wco != -1) { + distance = target->wco - cursection->hdr.sh_size; if (distance - 2 >= INT8_MIN && distance - (j->cc ? 6 : 5) <= INT8_MAX) { jmpsize = 1; @@ -540,8 +536,10 @@ assembleabsimm(const Imm *imm) reltype = R_X86_64_32; else if (imm->nbytes == 8) reltype = R_X86_64_64; - else + else { unreachable(); + return; + } assemblereloc(imm->v.l, imm->v.c, imm->nbytes, reltype); } @@ -802,13 +800,18 @@ assemble(void) assemblereloc( v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64); break; + case ASM_DIR_SET: + sym = getsym(v->set.sym); + sym->value = v->set.value; + break; case ASM_LABEL: sym = getsym(v->label.name); - sym->section = cursection; - sym->offset = cursection->hdr.sh_size; if (sym->defined) lfatal("%s already defined", sym->name); sym->defined = 1; + sym->section = cursection; + sym->value.c = cursection->hdr.sh_size; + sym->wco = sym->value.c; break; case ASM_INSTR: assembleinstr(&v->instr); @@ -845,12 +848,59 @@ relaxreset(void) if (!symbols->keys[i].str) continue; sym = symbols->vals[i]; - *sym = (Symbol){ - .name = sym->name, .section = sym->section, .wco = sym->offset + *sym = (Symbol) { + .name = sym->name, .section = sym->section, .wco = sym->wco }; } } +/* Try to resolve the address of a symbol, this will recursively look + for the symbol address if it is defined relative to another symbol. */ +static int +resolvesymrecurse(Symbol *sym, int n) +{ + Symbol *indirect; + + if (n > 64) + fatal("recursion limit hit when resolving symbol location"); + + if (sym->value.l) { + indirect = getsym(sym->value.l); + if (!resolvesymrecurse(indirect, n + 1)) + return 0; + sym->section = indirect->section; + sym->value.l = NULL; + sym->value.c += indirect->value.c; + sym->wco = sym->value.c; + sym->defined = 1; + return 1; + } + + if (!sym->defined) + return 0; + + return 1; +} + +static int +resolvesym(Symbol *sym) +{ + return resolvesymrecurse(sym, 0); +} + +/* Resolve all symbols to their final location if we can. */ +static void +resolvesymbols(void) +{ + size_t i; + + for (i = 0; i < symbols->cap; i++) { + if (!symbols->keys[i].str) + continue; + resolvesym(symbols->vals[i]); + } +} + static void addtosymtab(Symbol *sym) { @@ -868,7 +918,7 @@ addtosymtab(Symbol *sym) sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize; elfsym.st_name = elfstr(strtab, sym->name); - elfsym.st_value = sym->offset; + elfsym.st_value = sym->value.c; elfsym.st_size = sym->size; elfsym.st_info = ELF64_ST_INFO(sbind, stype); elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF; @@ -882,7 +932,7 @@ fillsymtab(void) Symbol *sym; size_t i; - // Local symbols + /* Local symbols come first. */ for (i = 0; i < symbols->cap; i++) { if (!symbols->keys[i].str) continue; @@ -892,11 +942,10 @@ fillsymtab(void) addtosymtab(sym); } - // Global symbols - - // Set start of global symbols. + /* Set start of global symbols. */ symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize; + /* Global symbols. */ for (i = 0; i < symbols->cap; i++) { if (!symbols->keys[i].str) continue; @@ -912,8 +961,8 @@ static int resolvereloc(Relocation *reloc) { Symbol *sym; - uint8_t *rdata; int64_t value; + uint8_t *rdata; sym = reloc->sym; @@ -926,14 +975,14 @@ resolvereloc(Relocation *reloc) return 0; case R_X86_64_PC8: rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; + value = sym->value.c - reloc->offset + reloc->addend; if (value > INT8_MAX || value < INT8_MIN) fatal("R_X86_64_PC8 relocation overflow"); rdata[0] = value; return 1; case R_X86_64_PC32: rdata = &reloc->section->data[reloc->offset]; - value = sym->offset - reloc->offset + reloc->addend; + value = sym->value.c - reloc->offset + reloc->addend; if (value > INT32_MAX || value < INT32_MIN) fatal("R_X86_64_PC32 relocation overflow"); rdata[0] = ((uint32_t)value & 0xff); @@ -1103,9 +1152,11 @@ main(int argc, char *argv[]) allasm = parseasm(); initsections(); assemble(); + resolvesymbols(); while (nrelax-- > 0) { relaxreset(); assemble(); + resolvesymbols(); } fillsymtab(); handlerelocs(); diff --git a/minias.h b/minias.h index f211116..ccef65b 100644 --- a/minias.h +++ b/minias.h @@ -20,10 +20,15 @@ typedef struct { uint8_t *data; } Section; +typedef struct { + int64_t c; + const char *l; +} Value; + typedef struct { const char *name; int32_t idx; - int64_t offset; + Value value; int64_t wco; /* worst case offset */ int64_t size; int global; @@ -52,6 +57,7 @@ typedef enum { ASM_DIR_SECTION, ASM_DIR_ASCII, ASM_DIR_ASCIIZ, + ASM_DIR_SET, ASM_DIR_DATA, ASM_DIR_TEXT, ASM_DIR_FILL, @@ -170,6 +176,12 @@ typedef struct Globl { const char *name; } Globl; +typedef struct Set { + AsmKind kind; + const char *sym; + Value value; +} Set; + typedef struct DirSection { AsmKind kind; int32_t type; @@ -177,11 +189,6 @@ typedef struct DirSection { const char *flags; } DirSection; -typedef struct { - int64_t c; - const char *l; -} Value; - typedef struct Byte { AsmKind kind; Value value; @@ -298,6 +305,7 @@ union Parsev { Instr instr; Jmp jmp; Fill fill; + Set set; Byte dirbyte; Short dirshort; Int dirint; diff --git a/parse.c b/parse.c index eaa9c67..3d63fbe 100644 --- a/parse.c +++ b/parse.c @@ -88,7 +88,7 @@ decodestring(char *s) } data[len++] = c; } - return (String){ .kind = ASM_STRING, .len = len, .data = data }; + return (String) { .kind = ASM_STRING, .len = len, .data = data }; } static int diff --git a/test/execute/0002-setsym.s b/test/execute/0002-setsym.s new file mode 100644 index 0000000..6ce09bc --- /dev/null +++ b/test/execute/0002-setsym.s @@ -0,0 +1,17 @@ +.text +.globl main +main: + jmp z + # x + nop + # y + nop + # z + xorl %eax, %eax + ret + +.set z, y+1 +.set x, main+2 +.set y, x+1 + +.section .note.GNU-stack,"",@progbits diff --git a/test/test.sh b/test/test.sh index 3510fa4..cf673d7 100644 --- a/test/test.sh +++ b/test/test.sh @@ -361,7 +361,7 @@ t () { echo -n "." } -for tc in $(echo test/execute/*) +for tc in $(echo test/execute/*.s) do t "$tc" done \ No newline at end of file -- cgit v1.2.3