aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-23 23:57:24 +1300
committerAndrew Chambers <[email protected]>2021-10-23 23:57:24 +1300
commit6b9b596fcd588d22e15f259b572ae8845fbddacb (patch)
tree1cf1758d7a65e90e8a9d40a91887bd5501506d27
parent1e9bf39a1b2c47920def55f29c4b9b93c402e1d4 (diff)
Implement the .set directive.
-rw-r--r--asm.peg2
-rw-r--r--main.c99
-rw-r--r--minias.h20
-rw-r--r--parse.c2
-rw-r--r--test/execute/0002-setsym.s17
-rw-r--r--test/test.sh2
6 files changed, 110 insertions, 32 deletions
diff --git a/asm.peg b/asm.peg
index 3bc49d1..596852b 100644
--- a/asm.peg
+++ b/asm.peg
@@ -33,6 +33,8 @@ directive =
{ $$.dirint = (Int){.kind = ASM_DIR_INT, .value = v.value }; }
| "quad" ws v:value
{ $$.dirquad = (Quad){.kind = ASM_DIR_QUAD, .value = v.value }; }
+ | "set" ws s:ident ws? ',' ws? v:value
+ { $$.set = (Set){.kind = ASM_DIR_SET, .sym = s.charptr, .value = v.value }; }
| fd:fill-directive
{ $$ = fd; }
| sd:section-directive
diff --git a/main.c b/main.c
index 36eb930..38ad138 100644
--- a/main.c
+++ b/main.c
@@ -52,7 +52,7 @@ getsym(const char *name)
ps = (Symbol **)htabput(symbols, &htk);
if (!*ps) {
*ps = xmalloc(sizeof(Symbol));
- **ps = (Symbol){
+ **ps = (Symbol) {
.name = name,
.wco = -1,
};
@@ -250,7 +250,7 @@ isrexreg(AsmKind k)
{
return k > ASM_REG_BEGIN && k < ASM_REG_END
&& (regbits(k) & (1 << 3) || k == ASM_SPL || k == ASM_BPL
- || k == ASM_SIL || k == ASM_DIL);
+ || k == ASM_SIL || k == ASM_DIL);
}
static uint8_t
@@ -486,9 +486,9 @@ assemblemem(const Memarg *memarg, Rex rex, VarBytes prefix, VarBytes opcode,
static void
assemblejmp(const Jmp *j)
{
- int jmpsize;
- int64_t distance;
Symbol *target;
+ int64_t distance;
+ int jmpsize;
// clang-format off
static uint8_t cc2op[31] = {
@@ -501,13 +501,9 @@ assemblejmp(const Jmp *j)
jmpsize = 4;
target = getsym(j->target);
- if (cursection == target->section
- && (target->defined || target->wco != -1)) {
- if (target->defined) {
- distance = target->offset - cursection->hdr.sh_size;
- } else {
- distance = target->wco - cursection->hdr.sh_size;
- }
+
+ if (cursection == target->section && target->wco != -1) {
+ distance = target->wco - cursection->hdr.sh_size;
if (distance - 2 >= INT8_MIN
&& distance - (j->cc ? 6 : 5) <= INT8_MAX) {
jmpsize = 1;
@@ -540,8 +536,10 @@ assembleabsimm(const Imm *imm)
reltype = R_X86_64_32;
else if (imm->nbytes == 8)
reltype = R_X86_64_64;
- else
+ else {
unreachable();
+ return;
+ }
assemblereloc(imm->v.l, imm->v.c, imm->nbytes, reltype);
}
@@ -802,13 +800,18 @@ assemble(void)
assemblereloc(
v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_64);
break;
+ case ASM_DIR_SET:
+ sym = getsym(v->set.sym);
+ sym->value = v->set.value;
+ break;
case ASM_LABEL:
sym = getsym(v->label.name);
- sym->section = cursection;
- sym->offset = cursection->hdr.sh_size;
if (sym->defined)
lfatal("%s already defined", sym->name);
sym->defined = 1;
+ sym->section = cursection;
+ sym->value.c = cursection->hdr.sh_size;
+ sym->wco = sym->value.c;
break;
case ASM_INSTR:
assembleinstr(&v->instr);
@@ -845,12 +848,59 @@ relaxreset(void)
if (!symbols->keys[i].str)
continue;
sym = symbols->vals[i];
- *sym = (Symbol){
- .name = sym->name, .section = sym->section, .wco = sym->offset
+ *sym = (Symbol) {
+ .name = sym->name, .section = sym->section, .wco = sym->wco
};
}
}
+/* Try to resolve the address of a symbol, this will recursively look
+ for the symbol address if it is defined relative to another symbol. */
+static int
+resolvesymrecurse(Symbol *sym, int n)
+{
+ Symbol *indirect;
+
+ if (n > 64)
+ fatal("recursion limit hit when resolving symbol location");
+
+ if (sym->value.l) {
+ indirect = getsym(sym->value.l);
+ if (!resolvesymrecurse(indirect, n + 1))
+ return 0;
+ sym->section = indirect->section;
+ sym->value.l = NULL;
+ sym->value.c += indirect->value.c;
+ sym->wco = sym->value.c;
+ sym->defined = 1;
+ return 1;
+ }
+
+ if (!sym->defined)
+ return 0;
+
+ return 1;
+}
+
+static int
+resolvesym(Symbol *sym)
+{
+ return resolvesymrecurse(sym, 0);
+}
+
+/* Resolve all symbols to their final location if we can. */
+static void
+resolvesymbols(void)
+{
+ size_t i;
+
+ for (i = 0; i < symbols->cap; i++) {
+ if (!symbols->keys[i].str)
+ continue;
+ resolvesym(symbols->vals[i]);
+ }
+}
+
static void
addtosymtab(Symbol *sym)
{
@@ -868,7 +918,7 @@ addtosymtab(Symbol *sym)
sym->idx = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
elfsym.st_name = elfstr(strtab, sym->name);
- elfsym.st_value = sym->offset;
+ elfsym.st_value = sym->value.c;
elfsym.st_size = sym->size;
elfsym.st_info = ELF64_ST_INFO(sbind, stype);
elfsym.st_shndx = sym->section ? sym->section->idx : SHN_UNDEF;
@@ -882,7 +932,7 @@ fillsymtab(void)
Symbol *sym;
size_t i;
- // Local symbols
+ /* Local symbols come first. */
for (i = 0; i < symbols->cap; i++) {
if (!symbols->keys[i].str)
continue;
@@ -892,11 +942,10 @@ fillsymtab(void)
addtosymtab(sym);
}
- // Global symbols
-
- // Set start of global symbols.
+ /* Set start of global symbols. */
symtab->hdr.sh_info = symtab->hdr.sh_size / symtab->hdr.sh_entsize;
+ /* Global symbols. */
for (i = 0; i < symbols->cap; i++) {
if (!symbols->keys[i].str)
continue;
@@ -912,8 +961,8 @@ static int
resolvereloc(Relocation *reloc)
{
Symbol *sym;
- uint8_t *rdata;
int64_t value;
+ uint8_t *rdata;
sym = reloc->sym;
@@ -926,14 +975,14 @@ resolvereloc(Relocation *reloc)
return 0;
case R_X86_64_PC8:
rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
+ value = sym->value.c - reloc->offset + reloc->addend;
if (value > INT8_MAX || value < INT8_MIN)
fatal("R_X86_64_PC8 relocation overflow");
rdata[0] = value;
return 1;
case R_X86_64_PC32:
rdata = &reloc->section->data[reloc->offset];
- value = sym->offset - reloc->offset + reloc->addend;
+ value = sym->value.c - reloc->offset + reloc->addend;
if (value > INT32_MAX || value < INT32_MIN)
fatal("R_X86_64_PC32 relocation overflow");
rdata[0] = ((uint32_t)value & 0xff);
@@ -1103,9 +1152,11 @@ main(int argc, char *argv[])
allasm = parseasm();
initsections();
assemble();
+ resolvesymbols();
while (nrelax-- > 0) {
relaxreset();
assemble();
+ resolvesymbols();
}
fillsymtab();
handlerelocs();
diff --git a/minias.h b/minias.h
index f211116..ccef65b 100644
--- a/minias.h
+++ b/minias.h
@@ -21,9 +21,14 @@ typedef struct {
} Section;
typedef struct {
+ int64_t c;
+ const char *l;
+} Value;
+
+typedef struct {
const char *name;
int32_t idx;
- int64_t offset;
+ Value value;
int64_t wco; /* worst case offset */
int64_t size;
int global;
@@ -52,6 +57,7 @@ typedef enum {
ASM_DIR_SECTION,
ASM_DIR_ASCII,
ASM_DIR_ASCIIZ,
+ ASM_DIR_SET,
ASM_DIR_DATA,
ASM_DIR_TEXT,
ASM_DIR_FILL,
@@ -170,6 +176,12 @@ typedef struct Globl {
const char *name;
} Globl;
+typedef struct Set {
+ AsmKind kind;
+ const char *sym;
+ Value value;
+} Set;
+
typedef struct DirSection {
AsmKind kind;
int32_t type;
@@ -177,11 +189,6 @@ typedef struct DirSection {
const char *flags;
} DirSection;
-typedef struct {
- int64_t c;
- const char *l;
-} Value;
-
typedef struct Byte {
AsmKind kind;
Value value;
@@ -298,6 +305,7 @@ union Parsev {
Instr instr;
Jmp jmp;
Fill fill;
+ Set set;
Byte dirbyte;
Short dirshort;
Int dirint;
diff --git a/parse.c b/parse.c
index eaa9c67..3d63fbe 100644
--- a/parse.c
+++ b/parse.c
@@ -88,7 +88,7 @@ decodestring(char *s)
}
data[len++] = c;
}
- return (String){ .kind = ASM_STRING, .len = len, .data = data };
+ return (String) { .kind = ASM_STRING, .len = len, .data = data };
}
static int
diff --git a/test/execute/0002-setsym.s b/test/execute/0002-setsym.s
new file mode 100644
index 0000000..6ce09bc
--- /dev/null
+++ b/test/execute/0002-setsym.s
@@ -0,0 +1,17 @@
+.text
+.globl main
+main:
+ jmp z
+ # x
+ nop
+ # y
+ nop
+ # z
+ xorl %eax, %eax
+ ret
+
+.set z, y+1
+.set x, main+2
+.set y, x+1
+
+.section .note.GNU-stack,"",@progbits
diff --git a/test/test.sh b/test/test.sh
index 3510fa4..cf673d7 100644
--- a/test/test.sh
+++ b/test/test.sh
@@ -361,7 +361,7 @@ t () {
echo -n "."
}
-for tc in $(echo test/execute/*)
+for tc in $(echo test/execute/*.s)
do
t "$tc"
done \ No newline at end of file