aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-12 17:27:34 +1300
committerAndrew Chambers <[email protected]>2021-10-12 17:27:34 +1300
commit71ba2cdd14a4462829a2e5d859ea776b934d4322 (patch)
tree33d27272b743bdf0ffb80eb6f56ddb9553ba36be
parentdc55c00a34a9c5b0d2374d31e00daeb502863626 (diff)
Work on assembling sqlite3.
-rw-r--r--asm.peg67
-rw-r--r--main.c54
-rw-r--r--minias.h27
3 files changed, 97 insertions, 51 deletions
diff --git a/asm.peg b/asm.peg
index aaad38f..4ee8d78 100644
--- a/asm.peg
+++ b/asm.peg
@@ -26,13 +26,20 @@ directive =
{ $$.kind = ASM_DIR_TEXT; }
| "balign" ws n:number
{ $$.balign = (Balign){.kind = ASM_DIR_BALIGN, .align = n.i64 }; }
- | "byte" ws n:number
- { $$.dirbyte = (Byte){.kind = ASM_DIR_BYTE, .v = n.i64 }; }
- | "int" ws n:number
- { $$.dirint = (Int){.kind = ASM_DIR_INT, .v = n.i64 }; }
- | "quad" ws n:number
- { $$.dirquad = (Quad){.kind = ASM_DIR_QUAD, .v = n.i64 }; }
- | sd:section-directive { $$ = sd; }
+ | "byte" ws v:value
+ { $$.dirbyte = (Byte){.kind = ASM_DIR_BYTE, .value = v.value }; }
+ | "int" ws v:value
+ { $$.dirint = (Int){.kind = ASM_DIR_INT, .value = v.value }; }
+ | "quad" ws v:value
+ { $$.dirquad = (Quad){.kind = ASM_DIR_QUAD, .value = v.value }; }
+ | fd:fill-directive
+ { $$ = fd; }
+ | sd:section-directive
+ { $$ = sd; }
+
+fill-directive =
+ "fill" ws r:number ws? "," ws? s:number ws? "," ws? v:number
+ { $$.fill = (Fill){ .kind=ASM_DIR_FILL, .repeat = r.i64, .size = s.i64, .value = v.i64 }; }
section-directive =
"section" ws? n:section-name (
@@ -243,12 +250,12 @@ imul = "imul" (
| 'w'? ws s:r16 ws? ',' ws? d:r16 { $$ = INSTR2(11, s, d); }
| 'l'? ws s:r32 ws? ',' ws? d:r32 { $$ = INSTR2(12, s, d); }
| 'q'? ws s:r64 ws? ',' ws? d:r64 { $$ = INSTR2(13, s, d); }
- | 'w'? ws i:imm16 ',' ws? s:m ws? ',' ws? d:r16 { $$ = INSTR3(14, s, d, i); }
- | 'l'? ws i:imm32 ',' ws? s:m ws? ',' ws? d:r32 { $$ = INSTR3(15, s, d, i); }
- | 'q'? ws i:imm32 ',' ws? s:m ws? ',' ws? d:r64 { $$ = INSTR3(16, s, d, i); }
- | 'w'? ws i:imm16 ',' ws? s:r16 ws? ',' ws? d:r16 { $$ = INSTR3(17, s, d, i); }
- | 'l'? ws i:imm32 ',' ws? s:r32 ws? ',' ws? d:r32 { $$ = INSTR3(18, s, d, i); }
- | 'q'? ws i:imm32 ',' ws? s:r64 ws? ',' ws? d:r64 { $$ = INSTR3(19, s, d, i); }
+ | 'w'? ws i:imm16 ws? ',' ws? s:m ws? ',' ws? d:r16 { $$ = INSTR3(14, s, d, i); }
+ | 'l'? ws i:imm32 ws? ',' ws? s:m ws? ',' ws? d:r32 { $$ = INSTR3(15, s, d, i); }
+ | 'q'? ws i:imm32 ws? ',' ws? s:m ws? ',' ws? d:r64 { $$ = INSTR3(16, s, d, i); }
+ | 'w'? ws i:imm16 ws? ',' ws? s:r16 ws? ',' ws? d:r16 { $$ = INSTR3(17, s, d, i); }
+ | 'l'? ws i:imm32 ws? ',' ws? s:r32 ws? ',' ws? d:r32 { $$ = INSTR3(18, s, d, i); }
+ | 'q'? ws i:imm32 ws? ',' ws? s:r64 ws? ',' ws? d:r64 { $$ = INSTR3(19, s, d, i); }
)
# Must come last due to peg ordering.
| args:m-opargs { $$ = args; }
@@ -403,28 +410,25 @@ r64-or-rip = (
scale-index-base =
'(' ws? b:r64 ws? ',' ws? i:r64 ws? ',' ws? s:number ws? ')'
- { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = s.i64, .index=i.kind, .base = b.kind, .c = 0, .l = NULL }; }
+ { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = s.i64, .index=i.kind, .base = b.kind, .disp = (Value){ .c = 0, .l = NULL } }; }
| '(' ws? b:r64 ws? ',' ws? i:r64 ')'
- { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = 1, .index=i.kind, .base = b.kind, .c = 0, .l = NULL }; }
+ { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = 1, .index=i.kind, .base = b.kind, .disp = (Value){ .c = 0, .l = NULL } }; }
| '(' ws? b:r64-or-rip ws? ')'
- { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = 0, .index=ASM_NO_REG, .base = b.kind, .c = 0, .l = NULL }; }
+ { $$.memarg = (Memarg){.kind=ASM_MEMARG, .scale = 0, .index=ASM_NO_REG, .base = b.kind, .disp = (Value){ .c = 0, .l = NULL } }; }
-# XXX There are more addressing modes.
m =
sib:scale-index-base
{ $$ = sib; }
- | disp:number ws? sib:scale-index-base
- { sib.memarg.c = disp.i64; $$ = sib; }
- | i:ident ws? sib:scale-index-base
- { sib.memarg.l = i.charptr; $$ = sib; }
+ | d:value ws? sib:scale-index-base
+ { sib.memarg.disp = d.value; $$ = sib; }
imm8 = i:imm { i.imm.nbytes = 1; $$ = i; }
imm16 = i:imm { i.imm.nbytes = 2; $$ = i; }
imm32 = i:imm { i.imm.nbytes = 4; $$ = i; }
-imm =
- '$' ws? n:number
- { $$.imm = (Imm){ .kind = ASM_IMM, .c = n.i64, .l = NULL, .nbytes = 0}; }
+imm =
+ '$' ws? val:value
+ { $$.imm = (Imm){ .kind = ASM_IMM, .v = val.value, .nbytes = 0}; }
al = "%al" { $$ = REG(ASM_AL); }
cl = "%cl" { $$ = REG(ASM_CL); }
@@ -536,10 +540,21 @@ string-escape = '\\' (
| [0-7][0-7][0-7]
)
+value =
+ n:number { $$.value = (Value){ .l = NULL, .c = n.i64 }; }
+ |
+ i:ident ws? (
+ '+' ws? n:number { $$.value = (Value){ .c = n.i64 }; }
+ | &'-' n:number { $$.value = (Value){ .c = n.i64 }; }
+ | { $$.value = (Value){ .c = 0 }; }
+ ) { $$.value.l = i.charptr; }
+
ident =
<[._a-zA-Z][._a-zA-Z0-9]*>
{ $$.charptr = xstrdup(yytext); }
number =
- <'-'?[0-9]+>
- { $$.i64 = strtoll(yytext, NULL, 10); }
+ <'-'? ws? [0-9]+>
+ { $$.i64 = strtoll(yytext, NULL, 10); }
+ | <[0-9]+>
+ { $$.i64 = (int64_t)strtoull(yytext, NULL, 10); }
diff --git a/main.c b/main.c
index 6944694..ffd925f 100644
--- a/main.c
+++ b/main.c
@@ -431,10 +431,10 @@ static void assemblemem(Memarg *memarg, uint8_t rexw, VarBytes prefix,
rm = 0x05;
rex = rexbyte(rexw, reg & (1 << 3), 0, rm & (1 << 3));
assemblemodregrm(rex, prefix, opcode, 0x00, reg, rm);
- if (memarg->l) {
- assemblereloc(memarg->l, memarg->c - 4, 4, R_X86_64_PC32);
+ if (memarg->disp.l) {
+ assemblereloc(memarg->disp.l, memarg->disp.c - 4, 4, R_X86_64_PC32);
} else {
- assembleconstant(memarg->c, 4);
+ assembleconstant(memarg->disp.c, 4);
}
return;
}
@@ -444,7 +444,7 @@ static void assemblemem(Memarg *memarg, uint8_t rexw, VarBytes prefix,
/* Case when we don't need sib */
if (memarg->index == ASM_NO_REG && memarg->scale == 0 && ((rm & 7) != 4)) {
- if (memarg->l == 0 && memarg->c == 0) {
+ if (memarg->disp.l == 0 && memarg->disp.c == 0) {
if ((rm & 7) == 5) {
mod = 1;
} else {
@@ -458,9 +458,9 @@ static void assemblemem(Memarg *memarg, uint8_t rexw, VarBytes prefix,
assemblemodregrm(rex, prefix, opcode, mod, reg, rm);
if (mod == 1) {
- assemblereloc(memarg->l, memarg->c, 1, R_X86_64_32);
+ assemblereloc(memarg->disp.l, memarg->disp.c, 1, R_X86_64_32);
} else if (mod == 2) {
- assemblereloc(memarg->l, memarg->c, 4, R_X86_64_32);
+ assemblereloc(memarg->disp.l, memarg->disp.c, 4, R_X86_64_32);
}
return;
}
@@ -470,10 +470,10 @@ static void assemblemem(Memarg *memarg, uint8_t rexw, VarBytes prefix,
rm = 4;
// TODO: if our disp fits in a +disp8, use that instead.
- if (memarg->c == 0 && memarg->l == 0 && ((base & 7) != 5)) {
+ if (memarg->disp.c == 0 && memarg->disp.l == 0 && ((base & 7) != 5)) {
mod = 0; /* +0 */
} else {
- if (memarg->c == 0 && memarg->l == 0) {
+ if (memarg->disp.c == 0 && memarg->disp.l == 0) {
mod = 1; /* +disp8 */
} else {
mod = 2; /* +disp32 */
@@ -516,7 +516,8 @@ static void assemblemem(Memarg *memarg, uint8_t rexw, VarBytes prefix,
sb(sibbyte(scale, index, base));
if (mod)
- assemblereloc(memarg->l, memarg->c, (mod == 2) ? 4 : 1, R_X86_64_32);
+ assemblereloc(memarg->disp.l, memarg->disp.c, (mod == 2) ? 4 : 1,
+ R_X86_64_32);
}
/* Assemble op + imm -> r/m. */
@@ -534,7 +535,7 @@ static void assembleimmrm(Instr *instr, uint8_t rexw, VarBytes prefix,
rex = rexbyte(rexw, immreg & (1 << 3), 0, rm & (1 << 3));
assemblemodregrm(rex, prefix, opcode, 0x03, immreg, rm);
}
- assemblereloc(imm->l, imm->c, imm->nbytes, R_X86_64_32);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
}
/* Assemble op + r <-> r/m. */
@@ -575,7 +576,7 @@ static void assemblebasicop(Instr *instr, VarBytes opcode, uint8_t immreg) {
if (rexw)
sb(rexbyte(1, 0, 0, 0));
assemblevbytes(opcode);
- assemblereloc(imm->l, imm->c, imm->nbytes, R_X86_64_32);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
} else if (instr->variant < 12) {
assembleimmrm(instr, rexw, prefix, opcode, immreg);
} else {
@@ -615,7 +616,7 @@ static void assemblemov(Instr *mov) {
if (mov->variant >= 4 && mov->variant <= 6) {
imm = &mov->arg1->imm;
assembleplusr(isreg64(mov->arg2->kind), prefix, opcode, mov->arg2->kind);
- assemblereloc(imm->l, imm->c, imm->nbytes, R_X86_64_32);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
} else if (mov->variant == 7 || mov->variant < 4) {
rexw = ((mov->variant % 4) == 3);
assembleimmrm(mov, rexw, prefix, opcode, 0x00);
@@ -646,7 +647,7 @@ static void assembledivmulneg(Instr *instr, uint8_t reg) {
if (instr->arg1->kind == ASM_MEMARG) {
assemblemem(&instr->arg1->memarg, rexw, prefix, opcode, reg);
} else {
- rex = rexbyte(rexw, reg & (1 << 3), 0, rm & (1 << 3));
+ rex = rexbyte(rexw, reg & (1 << 3), 0, 0);
rm = regbits(instr->arg1->kind);
assemblemodregrm(rex, prefix, opcode, 0x03, reg, rm);
}
@@ -712,7 +713,7 @@ static void assembletest(Instr *instr) {
sb(rexbyte(1, 0, 0, 0));
assemblevbytes(byteop ? 0xa8 : 0xa9);
imm = &instr->arg1->imm;
- assemblereloc(imm->l, imm->c, imm->nbytes, R_X86_64_32);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
} else if (instr->variant < 12) {
assembleimmrm(instr, rexw, prefix, byteop ? 0xf6 : 0xf7, 0);
} else {
@@ -785,14 +786,31 @@ static void assemble(void) {
}
break;
}
+ case ASM_DIR_FILL: {
+ ssize_t i = 0;
+
+ for (i = 0; i < v->fill.repeat; i++) {
+ switch (v->fill.size) {
+ case 1:
+ case 2:
+ case 4:
+ case 8:
+ assembleconstant(v->fill.value, v->fill.size);
+ break;
+ default:
+ lfatal("unsupported fill size '%d'", v->fill.size);
+ }
+ }
+ break;
+ }
case ASM_DIR_BYTE:
- sb((uint8_t)v->dirbyte.v);
+ assemblereloc(v->dirbyte.value.l, v->dirbyte.value.c, 1, R_X86_64_32);
break;
case ASM_DIR_INT:
- su32((uint32_t)v->dirint.v);
+ assemblereloc(v->dirint.value.l, v->dirint.value.c, 4, R_X86_64_32);
break;
case ASM_DIR_QUAD:
- su64((uint64_t)v->dirquad.v);
+ assemblereloc(v->dirquad.value.l, v->dirquad.value.c, 8, R_X86_64_32);
break;
case ASM_LABEL:
sym = getsym(v->label.name);
@@ -953,7 +971,7 @@ static void assemble(void) {
opcode = 0x69;
prefix = ((v->instr.variant - 14) % 3) == 0 ? 0x66 : EMPTY_VBYTES;
assemblerrm(&v->instr, prefix, opcode);
- assemblereloc(imm->l, imm->c, imm->nbytes, R_X86_64_32);
+ assemblereloc(imm->v.l, imm->v.c, imm->nbytes, R_X86_64_32);
}
break;
}
diff --git a/minias.h b/minias.h
index 46a9779..38f1f1d 100644
--- a/minias.h
+++ b/minias.h
@@ -53,6 +53,7 @@ typedef enum {
ASM_DIR_ASCIIZ,
ASM_DIR_DATA,
ASM_DIR_TEXT,
+ ASM_DIR_FILL,
ASM_DIR_BYTE,
ASM_DIR_INT,
ASM_DIR_QUAD,
@@ -214,18 +215,23 @@ typedef struct {
} DirSection;
typedef struct {
+ int64_t c;
+ const char *l;
+} Value;
+
+typedef struct {
AsmKind kind;
- int64_t v;
+ Value value;
} Byte;
typedef struct {
AsmKind kind;
- int64_t v;
+ Value value;
} Int;
typedef struct {
AsmKind kind;
- int64_t v;
+ Value value;
} Quad;
typedef struct {
@@ -235,9 +241,15 @@ typedef struct {
typedef struct {
AsmKind kind;
+ int32_t size;
+ int32_t repeat;
+ int64_t value;
+} Fill;
+
+typedef struct {
+ AsmKind kind;
uint8_t nbytes;
- const char *l; /* label */
- int64_t c; /* constant */
+ Value v;
} Imm;
typedef struct {
@@ -245,8 +257,7 @@ typedef struct {
AsmKind base;
AsmKind index;
uint8_t scale;
- const char *l; /* label */
- int64_t c; /* constant */
+ Value disp;
} Memarg;
typedef struct {
@@ -288,12 +299,14 @@ union Parsev {
Instr instr;
Call call;
Jmp jmp;
+ Fill fill;
Byte dirbyte;
Int dirint;
Quad dirquad;
Imm imm;
String string;
// Temporary values.
+ Value value;
const char *charptr;
int64_t i64;
};