aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAndrew Chambers <[email protected]>2021-10-08 01:36:55 +1300
committerAndrew Chambers <[email protected]>2021-10-08 01:36:55 +1300
commit6526cddc058df5a1a84d7f4081e3f4a499642733 (patch)
tree42d31d6df783fc97dd14e2911fbfa7d2c380c3bb
parent035c38a46e86ae24269ff24f1011776200050e7f (diff)
More tests.
-rw-r--r--asm.peg2
-rw-r--r--main.c193
-rw-r--r--test/test.sh20
3 files changed, 128 insertions, 87 deletions
diff --git a/asm.peg b/asm.peg
index a49dfe7..0718aef 100644
--- a/asm.peg
+++ b/asm.peg
@@ -48,6 +48,8 @@ lea =
| 'q'? ws s:m ws? ',' ws? d:r64 { $$ = INSTR(2, s, d); }
) { $$.instr.kind = ASM_LEA; }
+#XXX Some other these rules can probably be collapsed.
+
mov = "mov" (
'b'? ws s:imm8 ws? ',' ws? d:r8 { $$ = INSTR(0, s, d); }
| 'w'? ws s:imm16 ws? ',' ws? d:r16 { $$ = INSTR(1, s, d); }
diff --git a/main.c b/main.c
index 75efa72..1481e58 100644
--- a/main.c
+++ b/main.c
@@ -234,36 +234,100 @@ static void assembleplusr(uint8_t opcode, AsmKind reg) {
sb(opcode | (bits & 7));
}
-/* Assemble + r <-> r/m. */
-static void assemblerrm(Instr *i, uint8_t opcode) {
+static void assembleimm(Imm *imm) {
+ switch (imm->nbytes) {
+ case 1:
+ sb((uint8_t)imm->c);
+ break;
+ case 2:
+ sw((uint16_t)imm->c);
+ break;
+ case 4:
+ sl((uint32_t)imm->c);
+ break;
+ case 8:
+ fatal("TODO 8 byte imm");
+ break;
+ default:
+ unreachable();
+ }
+}
+
+/* Assemble op + imm -> r/m. */
+static void assembleimmrm(Instr *instr, uint8_t opcode, uint8_t immreg,
+ uint8_t opsz) {
+
+ Memarg *memarg;
+ uint8_t rex, rexw, mod, rm, sib;
+ int wantsib;
+
+ wantsib = 0;
+
+ if (instr->dst->kind == ASM_MEMARG) {
+ memarg = &instr->dst->memarg;
+ rexw = opsz == 8;
+ rm = regbits(memarg->reg);
+ /* We cannot address ESP/RSP/... */
+ if ((rm & 7) == 4)
+ lfatal("addressing mode unrepresentable");
+ if (memarg->c == 0 && memarg->l == NULL) {
+ if ((rm & 7) == 5) { // BP style registers need sib
+ mod = 0x01;
+ wantsib = 1;
+ sib = 0;
+ } else {
+ mod = 0x00;
+ }
+ } else {
+ lfatal("TODO X");
+ }
+
+ } else {
+ memarg = NULL;
+ mod = 0x3;
+ rexw = isreg64(instr->dst->kind);
+ rm = regbits(instr->dst->kind);
+ }
+
+ if (opsz == 2)
+ sb(0x66);
+
+ rex = rexbyte(rexw, 0, 0, rm & (1 << 3));
+ if (rex != rexbyte(0, 0, 0, 0))
+ sb(rex);
+
+ sb2(opcode, modregrm(mod, immreg, rm));
+
+ if (wantsib)
+ sb(sib);
+
+ assembleimm(&instr->src->imm);
+}
+
+/* Assemble op + r <-> r/m. */
+static void assemblerrm(Instr *instr, uint8_t opcode) {
Memarg *memarg;
AsmKind regarg;
uint8_t rex, mod, reg, rm, sib;
int wantsib;
- int64_t disp;
- int dispsz;
- int64_t imm;
- int immsz;
- mod = 0x03;
wantsib = 0;
- dispsz = 0;
- immsz = 0;
-
- if (i->src->kind == ASM_MEMARG) {
- memarg = &i->src->memarg;
- regarg = i->dst->kind;
- reg = regbits(i->dst->kind);
- } else if (i->dst->kind == ASM_MEMARG) {
- memarg = &i->dst->memarg;
- regarg = i->src->kind;
- reg = regbits(i->src->kind);
+
+ if (instr->src->kind == ASM_MEMARG) {
+ memarg = &instr->src->memarg;
+ regarg = instr->dst->kind;
+ reg = regbits(instr->dst->kind);
+ } else if (instr->dst->kind == ASM_MEMARG) {
+ memarg = &instr->dst->memarg;
+ regarg = instr->src->kind;
+ reg = regbits(instr->src->kind);
} else {
+ mod = 0x03;
memarg = NULL;
- regarg = i->src->kind;
- reg = regbits(i->src->kind);
- rm = regbits(i->dst->kind);
+ regarg = instr->src->kind;
+ reg = regbits(instr->src->kind);
+ rm = regbits(instr->dst->kind);
}
if (memarg) {
@@ -295,46 +359,10 @@ static void assemblerrm(Instr *i, uint8_t opcode) {
if (wantsib)
sb(sib);
-
- switch (dispsz) {
- case 1:
- sb((uint8_t)disp);
- break;
- case 4:
- sw((uint32_t)disp);
- break;
- }
- switch (immsz) {
- case 1:
- sb((uint8_t)imm);
- break;
- case 4:
- sw((uint32_t)imm);
- break;
- }
-}
-
-static void assembleimm(Imm *imm) {
- switch (imm->nbytes) {
- case 1:
- sb((uint8_t)imm->c);
- break;
- case 2:
- sw((uint16_t)imm->c);
- break;
- case 4:
- sl((uint32_t)imm->c);
- break;
- case 8:
- fatal("TODO 8 byte imm");
- break;
- default:
- unreachable();
- }
}
/* Assemble a 'basic op' which is just a repeated op pattern we have named. */
-static void assemblebasicop(Instr *instr, uint8_t opcode) {
+static void assemblebasicop(Instr *instr, uint8_t opcode, uint8_t immreg) {
if (instr->variant < 4) {
if (isreg16(instr->dst->kind))
sb(0x66);
@@ -342,6 +370,10 @@ static void assemblebasicop(Instr *instr, uint8_t opcode) {
sb(rexbyte(1, 0, 0, 0));
sb(opcode);
assembleimm(&instr->src->imm);
+ } else if (instr->variant < 12) {
+ // Note, uses a pattern in the variant array.
+ uint8_t opsize = 1 << (instr->variant % 4); // 1 2 4 8
+ assembleimmrm(instr, opcode, immreg, opsize);
} else {
assemblerrm(instr, opcode);
}
@@ -371,23 +403,16 @@ static void assemblemov(Instr *mov) {
if (mov->variant >= 8) {
assemblerrm(mov, opcode);
} else if (mov->variant >= 3) {
- // c7 /0 i[bwd] encoding.
- // dest in rm.
- rm = regbits(mov->dst->kind);
- rex = rexbyte(isreg64(mov->dst->kind), 0, 0, rm & (1 << 3));
- if (isreg16(mov->dst->kind))
- sb(0x66);
- if (rex != rexbyte(0, 0, 0, 0))
- sb(rex);
- sb2(opcode, modregrm(3, 0, rm));
- assembleimm(&mov->src->imm);
+ // Note, uses a pattern in the variant array.
+ uint8_t opsize = 1 << (mov->variant % 4);
+ assembleimmrm(mov, opcode, 0x00, opsize);
} else {
assembleplusr(opcode, mov->dst->kind);
assembleimm(&mov->src->imm);
}
}
-static void assemble() {
+static void assemble(void) {
Symbol *sym;
Parsev *v;
AsmLine *l;
@@ -447,47 +472,47 @@ static void assemble() {
}
case ASM_ADD: {
static uint8_t variant2op[24] = {
- 0x04, 0x05, 0x05, 0x05, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x02, 0x03, 0x03, 0x03,
+ 0x04, 0x05, 0x05, 0x05, 0x80, 0x81, 0x81, 0x81,
+ 0x80, 0x81, 0x81, 0x81, 0x02, 0x03, 0x03, 0x03,
0x00, 0x01, 0x01, 0x01, 0x00, 0x01, 0x01, 0x01,
};
- assemblebasicop(&v->instr, variant2op[v->instr.variant]);
+ assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x00);
break;
}
case ASM_AND: {
static uint8_t variant2op[24] = {
- 0x24, 0x25, 0x25, 0x25, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x22, 0x23, 0x23, 0x23,
+ 0x24, 0x25, 0x25, 0x25, 0x80, 0x81, 0x81, 0x81,
+ 0x80, 0x81, 0x81, 0x81, 0x22, 0x23, 0x23, 0x23,
0x20, 0x21, 0x21, 0x21, 0x20, 0x21, 0x21, 0x21,
};
- assemblebasicop(&v->instr, variant2op[v->instr.variant]);
+ assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x04);
break;
}
case ASM_OR: {
static uint8_t variant2op[24] = {
- 0x0c, 0x0d, 0x0d, 0x0d, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x0a, 0x0b, 0x0b, 0x0b,
+ 0x0c, 0x0d, 0x0d, 0x0d, 0x80, 0x81, 0x81, 0x81,
+ 0x80, 0x81, 0x81, 0x81, 0x0a, 0x0b, 0x0b, 0x0b,
0x08, 0x09, 0x09, 0x09, 0x08, 0x09, 0x09, 0x09,
};
- assemblebasicop(&v->instr, variant2op[v->instr.variant]);
+ assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x01);
break;
}
case ASM_SUB: {
static uint8_t variant2op[24] = {
- 0x2c, 0x2d, 0x2d, 0x2d, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x2a, 0x2b, 0x2b, 0x2b,
+ 0x2c, 0x2d, 0x2d, 0x2d, 0x80, 0x81, 0x81, 0x81,
+ 0x80, 0x81, 0x81, 0x81, 0x2a, 0x2b, 0x2b, 0x2b,
0x28, 0x29, 0x29, 0x29, 0x28, 0x29, 0x29, 0x29,
};
- assemblebasicop(&v->instr, variant2op[v->instr.variant]);
+ assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x05);
break;
}
case ASM_XOR: {
static uint8_t variant2op[24] = {
- 0x34, 0x35, 0x35, 0x35, 0x00, 0x00, 0x00, 0x00,
- 0x00, 0x00, 0x00, 0x00, 0x32, 0x33, 0x33, 0x33,
+ 0x34, 0x35, 0x35, 0x35, 0x80, 0x81, 0x81, 0x81,
+ 0x80, 0x81, 0x81, 0x81, 0x32, 0x33, 0x33, 0x33,
0x30, 0x31, 0x31, 0x31, 0x30, 0x31, 0x31, 0x31,
};
- assemblebasicop(&v->instr, variant2op[v->instr.variant]);
+ assemblebasicop(&v->instr, variant2op[v->instr.variant], 0x06);
break;
}
case ASM_XCHG: {
diff --git a/test/test.sh b/test/test.sh
index 8d790d8..9c5e1d8 100644
--- a/test/test.sh
+++ b/test/test.sh
@@ -30,12 +30,26 @@ t () {
echo -n "."
}
+
for op in mov add and or sub xor
do
+ # Special case a register variants.
t "${op}b \$127, %al"
- t "${op}w \$32767, %ax" # clang disagrees
- t "${op}l \$2147483647, %eax" # clang disagrees
- t "${op}q \$2147483647, %rax" # clang disagrees
+ t "${op}w \$32767, %ax"
+ t "${op}l \$2147483647, %eax"
+ t "${op}q \$2147483647, %rax"
+
+ # immediate variants.
+ t "${op}b \$127, (%rbx)"
+ t "${op}w \$32767, (%rbx)"
+ t "${op}l \$2147483647, (%rbx)"
+ t "${op}q \$2147483647, (%rbx)"
+ t "${op}b \$127, %bl"
+ t "${op}w \$32767, %bx"
+ t "${op}l \$2147483647, %ebx"
+ t "${op}q \$2147483647, %rbx"
+
+ # r rm variants
t "${op}b (%rax), %al"
t "${op}w (%rax), %ax"
t "${op}l (%rax), %eax"