diff --git a/arch/amd64/disasm/front.c b/arch/amd64/disasm/front.c new file mode 100644 index 0000000..fc1cc0a --- /dev/null +++ b/arch/amd64/disasm/front.c @@ -0,0 +1,156 @@ +#include "arch/amd64/disasm/x86.h" + +#include +#include +#include +#include + +// TODO: operand sizes +static const char *reg_name(int is_rex, uint8_t reg) { + _assert(reg < 16); + static const char *reg_names[] = { + "rax", "rcx", "rdx", "rbx", "rsp", "rbp", "rsi", "rdi", + "r8", "r9", "r10", "r11", "r12", "r13", "r14", "r15", + "eax", "ecx", "edx", "ebx", "esp", "ebp", "esi", "edi", + "", "", "", "", "", "", "", "", + }; + return reg_names[reg + 16 * !is_rex]; +} + +static const char *insn_name(enum opt opt) { + switch (opt) { + case OPT_ADD: + return "add"; + case OPT_SUB: + return "sub"; + case OPT_DEC: + return "dec"; + case OPT_XOR: + return "xor"; + case OPT_SHR: + return "shr"; + + case OPT_TEST: + return "test"; + case OPT_CMP: + return "cmp"; + + case OPT_CALL: + return "call"; + case OPT_JC: + return "jc"; + case OPT_JZ: + return "jz"; + case OPT_JMP: + return "jmp"; + case OPT_RET: + return "ret"; + + case OPT_PUSH: + return "push"; + case OPT_POP: + return "pop"; + case OPT_MOV: + return "mov"; + + case OPT_SWAPGS: + return "swapgs"; + case OPT_UD2: + return "ud2"; + case OPT_IRET: + return "iret"; + + default: + return "???"; + } +} + +static void dump_op(uintptr_t off, size_t op_size, const struct op *op) { + struct op_info *info = op->info; + const struct rm_operand *arg; + struct op_arg_info *ainf; + + debugf(DEBUG_DEFAULT, " %04x: ", off); + if (!info) { + debugs(DEBUG_DEFAULT, "BAD\n"); + return; + } + + debugs(DEBUG_DEFAULT, insn_name(info->opt)); + + // TODO: print operand sizes in certain cases + // (imm+mem, mem, imm) + + for (int i = 0; i < info->argc; ++i) { + arg = &op->argv[i]; + if (i) { + debugc(DEBUG_DEFAULT, ','); + } + debugc(DEBUG_DEFAULT, ' '); + int rex = op->prefices & PREF_REX; + + switch (arg->type) { + case RMOP_MSD: + debugf(DEBUG_DEFAULT, "[%s+%s+%ld]", reg_name(rex, arg->reg), reg_name(rex, arg->reg2), arg->imm); + break; + case RMOP_MRD: + debugf(DEBUG_DEFAULT, "[%s+%ld]", reg_name(rex, arg->reg), arg->imm); + break; + case RMOP_R: + debugf(DEBUG_DEFAULT, "%s", reg_name(rex, arg->reg)); + break; + case RMOP_XR: + switch (arg->reg2) { + case RMX_TYPE_CR: + debugf(DEBUG_DEFAULT, "cr%u", arg->reg); + break; + default: + debugf(DEBUG_DEFAULT, "??? (xr, reg2=%d)", arg->reg2); + break; + } + break; + case RMOP_MD: + if (op->prefices & PREF_SEG_DS) { + debugs(DEBUG_DEFAULT, "ds:"); + } + if (op->prefices & PREF_SEG_ES) { + debugs(DEBUG_DEFAULT, "es:"); + } + if (op->prefices & PREF_SEG_FS) { + debugs(DEBUG_DEFAULT, "es:"); + } + if (op->prefices & PREF_SEG_GS) { + debugs(DEBUG_DEFAULT, "gs:"); + } + debugf(DEBUG_DEFAULT, "[%ld]", arg->imm); + break; + case RMOP_CD: + debugf(DEBUG_DEFAULT, "<%%rip+%ld> (0x%08x)", arg->imm, off + op_size + arg->imm); + break; + case RMOP_I: + debugf(DEBUG_DEFAULT, "#%ld", arg->imm); + break; + default: + debugf(DEBUG_DEFAULT, "??? (%d)", arg->type); + break; + } + } + debugc(DEBUG_DEFAULT, '\n'); +} + +void dump_segment(const uint8_t *bytes, uintptr_t base, size_t size) { + struct op op; + ssize_t op_size; + size_t off = 0; + + x86_set_mode(MODE_LONG); + + while (off < size) { + op_size = read_op(bytes + off, base + off, &op); + if (op_size <= 0) { + panic("Failed to read op\n"); + } + dump_op(base + off, op_size, &op); + off += op_size; + } +} diff --git a/arch/amd64/disasm/x86.c b/arch/amd64/disasm/x86.c new file mode 100644 index 0000000..25b3621 --- /dev/null +++ b/arch/amd64/disasm/x86.c @@ -0,0 +1,178 @@ +#include "arch/amd64/disasm/x86.h" +#include "arch/amd64/disasm/util.h" + +#include "sys/assert.h" +#include "sys/debug.h" +//#include +//#include +//#include +//#include +//#include + +static uint32_t processor_mode = MODE_REAL; + +static ssize_t x86_read_prefices(const uint8_t *data, uint32_t *prefices) { + uint8_t byte; + uint32_t p = 0; + size_t pos = 0; + + while (1) { + int is_prefix = 1; + byte = data[pos]; + switch (byte) { + // Grp1 + case 0xF0: + p |= PREF_LOCK; + break; + case 0xF2: + break; + case 0xF3: + p |= PREF_REP; + break; + + // Grp2 + case 0x26: + p |= PREF_SEG_ES; + break; + case 0x2E: + p |= PREF_SEG_CS; + break; + case 0x3E: + p |= PREF_SEG_DS; + break; + case 0x64: + p |= PREF_SEG_FS; + break; + case 0x65: + p |= PREF_SEG_GS; + break; + + // Grp3 + case 0x66: + p |= PREF_OP_SIZE; + break; + + // Grp4 + case 0x67: + p |= PREF_AD_SIZE; + break; + + default: + is_prefix = 0; + break; + } + + if (is_prefix) { + ++pos; + } else { + break; + } + } + + *prefices = p; + return pos; +} + +static void x86_sizes(uint32_t prefices, uint8_t rex, uint32_t insn_flags, size_t *operand_size, size_t *address_size) { + switch (processor_mode) { + case MODE_REAL: + if (prefices & PREF_AD_SIZE) { + *address_size = 32; + } else { + *address_size = 16; + } + if (prefices & PREF_OP_SIZE) { + *operand_size = 32; + } else { + *operand_size = 16; + } + break; + case MODE_LONG: + if (prefices & PREF_AD_SIZE) { + *address_size = 32; + } else { + *address_size = 64; + } + if ((rex & REX_W) || (insn_flags & OP_DEF64)) { + _assert(!(prefices & PREF_OP_SIZE)); + *operand_size = 64; + } else { + *operand_size = 32; + + if (prefices & PREF_OP_SIZE) { + *operand_size = 16; + } + } + break; + default: + panic("Unhandled CPU mode\n"); + } + + if (insn_flags & OP_DEF8) { + *operand_size = 8; + } +} + +ssize_t read_op(const uint8_t *data, uint64_t base_addr, struct op *op) { + // Read prefixes + size_t pos = 0; + uint8_t byte; + ssize_t op_len; + uint32_t prefices = 0; + uint8_t rex = 0; + int has_modrm = 0; + uint8_t modrm; + struct op_info *info = NULL; + + // Read prefices + if ((op_len = x86_read_prefices(data, &prefices)) < 0) { + return -1; + } + + pos += op_len; + byte = data[pos]; + op->rex = 0; + + if (processor_mode != MODE_REAL) { + // Check if it's REX + if ((byte & 0xF0) == 0x40) { + rex = byte; + prefices |= PREF_REX; + + ++pos; + + if ((processor_mode != MODE_LONG) && (rex & REX_W)) { + kerror("REX.W in legacy modes\n"); + return -1; + } + } + } + + if ((op_len = x86_match_op(data + pos, &info, &modrm)) > 0) { + op->rex = rex; + op->info = info; + op->prefices = prefices; + pos += op_len; + + x86_sizes(prefices, rex, info->flags, &op->operand_size, &op->address_size); + + // This means x86_match_op has already read ModR/M for us + if (info->flags & OP_RMEXT) { + has_modrm = 1; + } + + if ((op_len = x86_read_operands(data + pos, op, has_modrm ? &modrm : NULL)) < 0) { + return -1; + } + pos += op_len; + + return pos; + } else { + op->info = NULL; + return pos + 1; + } +} + +void x86_set_mode(int mode) { + processor_mode = mode; +} diff --git a/arch/amd64/disasm/x86_arg.c b/arch/amd64/disasm/x86_arg.c new file mode 100644 index 0000000..8b6c063 --- /dev/null +++ b/arch/amd64/disasm/x86_arg.c @@ -0,0 +1,427 @@ +#include "arch/amd64/disasm/x86.h" +#include "arch/amd64/disasm/util.h" +#include "sys/assert.h" +#include "sys/string.h" +#include "sys/debug.h" +//#include +//#include <_assert.h> +//#include +//#include + +struct operand_bytes { + int has_imm0; + int has_imm1; + int has_sib; + int has_disp; + int has_modrm; + + uint8_t sib; + int64_t disp; + int64_t imm0; + int64_t imm1; + uint8_t modrm; +}; + +static void x86_read_imm(const uint8_t *data, size_t imm_size, int64_t *v) { + switch (imm_size) { + case 8: + *v = qmovsx8(data[0]); + break; + case 16: + *v = qmovsx16( + ((uint16_t) data[1] << 8) | + data[0] + ); + break; + case 32: + *v = qmovsx32( + ((uint32_t) data[3] << 24) | + ((uint32_t) data[2] << 16) | + ((uint32_t) data[1] << 8) | + data[0] + ); + break; + default: + panic("Immediate size not implemented: %zu\n", imm_size); + break; + } +} + +static ssize_t x86_read_operand_bytes(const uint8_t *data, const struct op *op, struct operand_bytes *bytes) { + // Operand bytes: + // ... | ModR/M | SIB | Disp | Imm + size_t pos = 0; + const struct op_info *info = op->info; + + bytes->has_sib = 0; + bytes->has_disp = 0; + bytes->has_imm0 = 0; + bytes->has_imm1 = 0; + + // 1. ModR/M + if (!bytes->has_modrm) { + for (int i = 0; i < info->argc; ++i) { + if (info->argv[i].source == ARG_SRC_MODRM + || info->argv[i].source == ARG_SRC_MODRM_CR + || info->argv[i].source == ARG_SRC_SREG + || info->argv[i].source == ARG_SRC_SSE) { + bytes->has_modrm = 1; + bytes->modrm = data[pos++]; + break; + } + } + } + // 2. SIB/Disp + if (bytes->has_modrm) { + uint8_t mod = bytes->modrm >> 6; + uint8_t rm = bytes->modrm & 7; + + if (op->address_size > 16) { + if (op->rex & REX_B) { + rm |= 1 << 3; + } + + if (mod != 3 && (rm == 4 || rm == 12)) { + bytes->has_sib = 1; + bytes->sib = data[pos]; + ++pos; + } + if (mod == 1) { + bytes->has_disp = 1; + bytes->disp = qmovsx8(data[pos]); + ++pos; + } else if (mod == 2) { + bytes->has_disp = 1; + bytes->disp = qmovsx32( + ((uint32_t) data[pos + 3] << 24) | + ((uint32_t) data[pos + 2] << 16) | + ((uint32_t) data[pos + 1] << 8) | + data[pos] + ); + pos += 4; + } else if (mod == 0 && (rm == 5 || rm == 13)) { + bytes->has_disp = 1; + bytes->disp = qmovsx32( + ((uint32_t) data[pos + 3] << 24) | + ((uint32_t) data[pos + 2] << 16) | + ((uint32_t) data[pos + 1] << 8) | + data[pos] + ); + pos += 4; + } else if (bytes->has_sib) { + uint8_t base = bytes->sib & 0x7; + + if (mod == 0 && base == 5) { + bytes->has_disp = 1; + bytes->disp = qmovsx32( + ((uint32_t) data[pos + 3] << 24) | + ((uint32_t) data[pos + 2] << 16) | + ((uint32_t) data[pos + 1] << 8) | + data[pos] + ); + pos += 4; + } else { + // TODO + panic("???\n"); + } + } + } else { + if (mod == 1) { + bytes->has_disp = 1; + bytes->disp = data[pos]; + ++pos; + } else if (mod == 2) { + bytes->has_disp = 1; + bytes->disp = qmovsx16(((uint16_t) data[pos + 1] << 8) | data[pos]); + pos += 2; + } else if (mod == 0 && rm == 6) { + bytes->has_disp = 1; + bytes->disp = ((uint16_t) data[pos + 1] << 8) | data[pos]; + pos += 2; + } + } + } + // 3. Imm0 + for (int i = 0; i < info->argc; ++i) { + if (info->argv[i].source == ARG_SRC_IMM0 || + info->argv[i].source == ARG_SRC_IMMREL || + info->argv[i].source == ARG_SRC_MOFFS) { + size_t imm_size = (info->argv[i].source == ARG_SRC_IMM0) ? op->operand_size : op->address_size; + imm_size = MIN(imm_size, info->argv[i].imm_size); + bytes->has_imm0 = 1; + x86_read_imm(data + pos, imm_size, &bytes->imm0); + pos += imm_size >> 3; + break; + } + } + // 4. Imm1 + for (int i = 0; i < info->argc; ++i) { + if (info->argv[i].source == ARG_SRC_IMM1) { + size_t imm_size = op->operand_size; + imm_size = MIN(imm_size, info->argv[i].imm_size); + bytes->has_imm1 = 1; + x86_read_imm(data + pos, imm_size, &bytes->imm1); + pos += imm_size >> 3; + break; + } + } + + return pos; +} + +ssize_t x86_read_operands(const uint8_t *data, struct op *op, const uint8_t *modrm) { + struct op_arg_info *info; + struct operand_bytes bytes; + uint8_t mod, rm, r; + bytes.has_modrm = !!modrm; + if (modrm) { + bytes.modrm = *modrm; + } + + ssize_t res = x86_read_operand_bytes(data, op, &bytes); + + // Process operand bytes + for (int i = 0; i < op->info->argc; ++i) { + info = &op->info->argv[i]; + + switch (info->source) { + case ARG_SRC_MODRM: + _assert(bytes.has_modrm); + mod = bytes.modrm >> 6; + rm = bytes.modrm & 7; + r = (bytes.modrm >> 3) & 7; + + if (op->rex & REX_B) { + rm |= 1 << 3; + } + if (op->rex & REX_R) { + r |= 1 << 3; + } + + if (info->rm == 0) { + // Just reg + op->argv[i].type = RMOP_R; + op->argv[i].reg = r; + break; + } + + op->argv[i].imm = 0; + if (op->address_size > 16) { + if (mod == 3) { + // Just reg + op->argv[i].type = RMOP_R; + op->argv[i].reg = rm; + } else { + if (rm == 4 || rm == 12) { + uint8_t base; + uint8_t index; + // [SIB + disp0/8/32] + _assert(bytes.has_sib); + op->argv[i].type = RMOP_MSD; + + base = bytes.sib & 0x7; + index = (bytes.sib >> 3) & 7; + + if (mod == 0) { + if (base == 5) { + _assert(bytes.has_disp); + op->argv[i].imm = bytes.disp; + } else { + op->argv[i].imm = 0; + } + + switch (index) { + case 0: + case 1: + case 2: + case 3: + op->argv[i].type = RMOP_MSD; + op->argv[i].reg = base | ((op->rex & REX_B) << 3); + op->argv[i].reg2 = index | (!!(op->rex & REX_X) << 3); + break; + case 4: + _assert(bytes.has_disp); + op->argv[i].type = RMOP_MD; + break; + default: + panic("TODO: handle sib, mod=0, index=%d\n", index); + } + } else if (mod == 1) { + if (index < 4) { + panic("TODO: handle sib, mod=%d, index=%d\n", mod, index); + } else if (index == 4) { + _assert(bytes.has_disp); + op->argv[i].type = RMOP_MRD; + op->argv[i].imm = bytes.disp; + op->argv[i].reg = base | ((op->rex & REX_B) << 3); + break; + } else { + panic("TODO: handle sib, mod=%d, index=%d\n", mod, index); + } + } else { + // TODO + panic("TODO: handle sib, mod=%d, index=%d\n", mod, index); + //_assert(bytes.has_disp); + //op->argv[i].imm = bytes.disp; + } + + op->argv[i].reg = index; + if (op->rex & REX_X) { + op->argv[i].reg |= 1 << 3; + } + + op->argv[i].reg2 = base; + if (op->rex & REX_B) { + op->argv[i].reg2 |= 1 << 3; + } + } else if (mod == 0 && (rm == 5 || rm == 13)) { + // [RIP + disp32] + _assert(bytes.has_disp); + op->argv[i].type = RMOP_MCD; + op->argv[i].imm = bytes.disp; + } else { + op->argv[i].type = RMOP_MRD; + if (mod == 0) { + // [REG] + op->argv[i].imm = 0; + } else { + // [REG + disp] + _assert(bytes.has_disp); + op->argv[i].imm = bytes.disp; + } + + op->argv[i].reg = rm; + } + } + } else { + if (mod == 3) { + op->argv[i].type = RMOP_R; + op->argv[i].reg = rm; + } else { + if (mod == 0 && rm == 6) { + // [disp] + op->argv[i].type = RMOP_MD; + } else if (rm < 4) { + // [REG + REG2 + disp] + op->argv[i].type = RMOP_MRRD; + } else { + // [REG + disp] + op->argv[i].type = RMOP_MRD; + } + + if (mod > 0 || rm == 6) { + _assert(bytes.has_disp); + op->argv[i].imm = bytes.disp; + } else { + op->argv[i].imm = 0; + } + + switch (rm) { + case 0: + // [BX + SI + disp0/8/16] + op->argv[i].reg = RM_BX; + op->argv[i].reg2 = RM_SI; + break; + case 1: + // [BX + DI + disp0/8/16] + op->argv[i].reg = RM_BX; + op->argv[i].reg2 = RM_DI; + break; + case 2: + // [BP + SI + disp0/8/16] + op->argv[i].reg = RM_BP; + op->argv[i].reg2 = RM_SI; + break; + case 3: + // [BP + DI + disp0/8/16] + op->argv[i].reg = RM_BP; + op->argv[i].reg2 = RM_DI; + break; + case 4: + // [SI + disp0/8/16] + op->argv[i].reg = RM_SI; + break; + case 5: + // [DI + disp0/8/16] + op->argv[i].reg = RM_DI; + break; + case 6: + // [BP + disp] or [disp] + if (mod) { + op->argv[i].reg = RM_BP; + } + break; + case 7: + // [BX + disp0/8/16] + op->argv[i].reg = RM_BX; + break; + default: + panic("Unsupported addressing mode: Real, MOD = %u, RM = %u\n", mod, rm); + } + } + } + break; + case ARG_SRC_OPXREG: + op->argv[i].type = RMOP_XR; + op->argv[i].reg = info->reg; + op->argv[i].reg2 = info->ext; + break; + case ARG_SRC_SSE: + op->argv[i].type = RMOP_XR; + op->argv[i].reg2 = RMX_TYPE_XMM; + _assert(bytes.has_modrm); + if (info->rm) { + op->argv[i].reg = bytes.modrm & 7; + } else { + op->argv[i].reg = (bytes.modrm >> 3) & 7; + } + break; + case ARG_SRC_OPREG: + op->argv[i].type = RMOP_R; + op->argv[i].reg = info->reg; + break; + case ARG_SRC_IMM0: + _assert(bytes.has_imm0); + op->argv[i].type = RMOP_I; + op->argv[i].imm = bytes.imm0; + break; + case ARG_SRC_IMM1: + _assert(bytes.has_imm1); + op->argv[i].type = RMOP_I; + op->argv[i].imm = bytes.imm1; + break; + case ARG_SRC_IMMREL: + _assert(bytes.has_imm0); + op->argv[i].type = RMOP_CD; + op->argv[i].imm = bytes.imm0; + break; + case ARG_SRC_SREG: + op->argv[i].type = RMOP_XR; + op->argv[i].reg = (bytes.modrm >> 3) & 7; + if (op->rex & REX_R) { + op->argv[i].reg |= 1 << 3; + } + op->argv[i].reg2 = 0; + break; + case ARG_SRC_MOFFS: + _assert(bytes.has_imm0); + op->argv[i].type = RMOP_MD; + op->argv[i].imm = bytes.imm0; + break; + case ARG_SRC_MODRM_CR: + _assert(bytes.has_modrm); + op->argv[i].type = RMOP_XR; + op->argv[i].reg2 = RMX_TYPE_CR; + if (info->rm) { + op->argv[i].reg = bytes.modrm & 7; + } else { + op->argv[i].reg = (bytes.modrm >> 3) & 7; + } + break; + default: + panic("Unhandled operand: %d\n", info->source); + } + } + + return res; +} diff --git a/arch/amd64/disasm/x86_op.c b/arch/amd64/disasm/x86_op.c new file mode 100644 index 0000000..cfb7a8c --- /dev/null +++ b/arch/amd64/disasm/x86_op.c @@ -0,0 +1,840 @@ +#include "arch/amd64/disasm/x86.h" +#include "sys/assert.h" +#include "sys/debug.h" +//#include +//#include +//#include + +//#define PRINT_OPCODES + +// insn REG/MEM, REG, IMM +#define OP_ARG3_RRMI(sz) \ + { \ + { 1, ARG_SRC_MODRM, .rm = 1 }, \ + { 0, ARG_SRC_MODRM, .rm = 0 }, \ + { 0, ARG_SRC_IMM0, .imm_size = sz } \ + } + +// insn REG/MEM, REG +#define OP_ARG2_RRM_DEF \ + { \ + { 1, ARG_SRC_MODRM, .rm = 1 }, \ + { 0, ARG_SRC_MODRM, .rm = 0 } \ + } + +// insn REG/MEM, REG +#define OP_ARG2_RMC_DEF(b) \ + { \ + { 1, ARG_SRC_MODRM, .rm = 1 }, \ + { 0, ARG_SRC_OPREG, .reg = b } \ + } + +// insn REG/MEM, IMM +#define OP_ARG2_RMI_DEF(sz) \ + { \ + { 0, ARG_SRC_MODRM, .rm = 1 }, \ + { 1, ARG_SRC_IMM0, .imm_size = sz } \ + } + +// insn REG, IMM +#define OP_ARG2_CI_DEF(sz, b) \ + { \ + { 1, ARG_SRC_OPREG, .reg = b }, \ + { 0, ARG_SRC_IMM0, .imm_size = sz } \ + } + +// insn MOFFS, REG +#define OP_ARG2_TC_DEF(sz, b) \ + { \ + { 1, ARG_SRC_MOFFS, .imm_size = sz }, \ + { 0, ARG_SRC_OPREG, .reg = b }, \ + } + +// insn REG, MOFFS +#define OP_ARG2_CT_DEF(sz, b) \ + { \ + { 1, ARG_SRC_OPREG, .reg = b }, \ + { 0, ARG_SRC_MOFFS, .imm_size = sz }, \ + } + +// insn REG, REG/MEM +#define OP_ARG2_RMR_DEF \ + { \ + { 1, ARG_SRC_MODRM, .rm = 0 }, \ + { 0, ARG_SRC_MODRM, .rm = 1 } \ + } + +// insn SREG, REG/MEM +#define OP_ARG2_RMS_DEF \ + { \ + { 1, ARG_SRC_SREG }, \ + { 0, ARG_SRC_MODRM, .rm = 1 } \ + } + +// insn IMM, IMM +#define OP_ARG2_II_DEF(sz0, sz1) \ + { \ + { 0, ARG_SRC_IMM0, .imm_size = sz0 }, \ + { 0, ARG_SRC_IMM1, .imm_size = sz1 } \ + } + +// insn REG +#define OP_ARG1_C_DEF(b) \ + { \ + { 0, ARG_SRC_OPREG, .reg = b } \ + } + +// insn REG/MEM +#define OP_ARG1_RM_DEF \ + { \ + { 0, ARG_SRC_MODRM, .rm = 1 } \ + } + +// insn IMM +#define OP_ARG1_I_DEF(sz) \ + { \ + { 0, ARG_SRC_IMM0, .imm_size = sz } \ + } + +// insn xREG +#define OP_ARG1_EXT_C_DEF(t, r) \ + { \ + { 0, ARG_SRC_OPXREG, .ext = t, .reg = r } \ + } + +// insn REL +#define OP_ARG1_REL_DEF(sz) \ + { \ + { 0, ARG_SRC_IMMREL, .imm_size = sz } \ + } + + +// SSE +#define OP_ARG2_SSE_RMR_DEF \ + { \ + { 1, ARG_SRC_SSE, .rm = 0 }, \ + { 0, ARG_SRC_SSE, .rm = 1 } \ + } + +#pragma GCC diagnostic push +#pragma GCC diagnostic ignored "-Wmissing-braces" +#pragma GCC diagnostic ignored "-Wmissing-field-initializers" +static struct op_info ops_1[0x800] = { +// // 8-bit instructions +// // 00 /r ADD r/m8, r8 + [0x000] = { OPT_ADD, OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 02 /r ADD r8, r/m8 +// [0x002] = { "add", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // 04 /r ADD AL, imm8 +// [0x004] = { "add", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// // 08 /r OR r/m8, r8 +// [0x008] = { "or", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 0C ib OR AL, imm8 +// [0x00C] = { "or", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// // 0A /r OR r8, r/m8 +// [0x00A] = { "or", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // 10 /r ADC r/m8, r8 +// [0x010] = { "adc", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 18 /r SBB r/m8, r8 +// [0x018] = { "sbb", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 1A /r SBB r8, r/m8 +// [0x01A] = { "sbb", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // 1C /r SBB AL, imm8 +// [0x01C] = { "sbb", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// // 20 /r AND r/m8, r8 +// [0x020] = { "and", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 2C ib SUB AL, imm8 +// [0x02C] = { "sub", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// // 28 /r SUB r/m8, r8 +// [0x028] = { "sub", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 30 /r XOR r/m8, r8 +// [0x030] = { "xor", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 32 /r XOR r8, r/m8 +// [0x032] = { "xor", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // 34 ib XOR AL, imm8 +// [0x034] = { "xor", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// // 38 /r CMP r/m8, r8 +// [0x038] = { "cmp", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// // 3A /r CMP r8, r/m8 +// [0x03A] = { "cmp", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // 3C ib CMP AL, imm8 +// [0x03C] = { "cmp", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// +// // 80 /0 ib ADD r/m8, imm8 +// [0x080] = { "add", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 80 /1 ib OR r/m8, imm8 +// [0x180] = { "or", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 80 /4 ib AND r/m8, imm8 +// [0x480] = { "and", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 80 /7 ib CMP r/m8, imm8 +// [0x780] = { "cmp", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// +// // F6 /0 ib TEST r/m8, imm8 +// [0x0F6] = { "test", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// +// // 86 /r XCHG r8, r/m8 +// [0x086] = { "xchg", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// +// // C6 /0 ib MOV r/m8, imm8 +// [0x0C6] = { "mov", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// + // A0 MOV AL, moffs8 + [0x0A0] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CT_DEF(64, 0) }, + // A2 MOV moffs8, AL + [0x0A2] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_TC_DEF(64, 0) }, + +// // 84 /r TEST r/m8, r8 +// [0x084] = { "test", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, + // 88 /r MOV r/m8, r8 + [0x088] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, + // 8A /r MOV r8, r/m8 + [0x08A] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_RMR_DEF }, +// // A4 +// [0x0A4] = { "movs", OP_PRESENT | OP_DEF8, 0 }, +// // AA STOS m8 + [0x0AA] = { OPT_STOSB, OP_PRESENT | OP_DEF8, 0 }, +// // AC LODS m8 +// [0x0AC] = { "lods", OP_PRESENT | OP_DEF8, 0 }, +// +// // FE /0 INC r/m8 +// [0x0FE] = { "inc", OP_PRESENT | OP_DEF8 | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // FE /1 DEC r/m8 +// [0x1FE] = { "dec", OP_PRESENT | OP_DEF8 | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// +// // A8 ib TEST AL, imm8 +// [0x0A8] = { "test", OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, +// +// // C0 /0 ib ROL r/m8, imm8 +// [0x0C0] = { "rol", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // C0 /5 ib SHR r/m8, imm8 +// [0x5C0] = { "shr", OP_PRESENT | OP_DEF8 | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// +// // D0 /0 ROL r/m8 +// [0x0D0] = { "rol", OP_PRESENT | OP_DEF8 | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // D0 /5 SHR r/m8 +// [0x5D0] = { "shr", OP_PRESENT | OP_DEF8 | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// +// // EC IN AL, DX +// // AL is implied +// [0x0EC] = { "in", OP_PRESENT | OP_DEF8, 1, OP_ARG1_C_DEF(2) }, +// // EE OUT DX, AL +// // AL is implied +// [0x0EE] = { "out", OP_PRESENT | OP_DEF8, 1, OP_ARG1_C_DEF(2) }, +// +// // 6C INS m8, DX +// // DX is implied +// // ES:(E)DI is implied +// [0x06C] = { "ins", OP_PRESENT | OP_DEF8, 0 }, +// // 6E OUTS DX, m8 +// // DX is implied +// // DS:(E)SI is implied +// [0x06E] = { "outs", OP_PRESENT | OP_DEF8, 0 }, +// +// +// // "Simple" instructions +// // 01 /r ADD r/m16, r16 +// // 01 /r ADD r/m32, r32 +// [0x001] = { "add", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// // 03 /r ADD r16, r/m16 +// // 03 /r ADD r32, r/m32 +// [0x003] = { "add", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// // 05 /r ADD AX, imm16 +// // 05 /r ADD EAX, imm32 +// [0x005] = { "add", OP_PRESENT, 2, OP_ARG2_CI_DEF(32, 0) }, +// // 09 /r OR r/m16, r16 +// // 09 /r OR r/m32, r32 +// [0x009] = { "or", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 29 /r SUB r/m16, r16 + // 29 /r SUB r/m32, r32 + [0x029] = { OPT_SUB, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 31 /r XOR r/m16, r16 + // 31 /r XOR r/m32, r32 + [0x031] = { OPT_XOR, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 39 /r CMP r/m16, r16 + // 39 /r CMP r/m32, r32 + [0x039] = { OPT_CMP, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 3B /r CMP r16, r/m16 + // 3B /r CMP r32, r/m32 + [0x03B] = { OPT_CMP, OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // C8 iw ib ENTER imm16, imm8 +// [0x0C8] = { "enter", OP_PRESENT, 2, OP_ARG2_II_DEF(16, 8) }, +// +// // 90 NOP +// [0x090] = { "nop", OP_PRESENT, 0 }, +// // 61 POPA +// // 61 POPAD +// [0x061] = { "popa", OP_PRESENT, 0 }, +// // 62 /r BOUND r16, m16&16 +// // 62 /r BOUND r32, m32&32 +// [0x062] = { "bound", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// // 63 /r ARPL r/m16, r16 +// [0x063] = { "arpl", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// +// // 06 PUSH ES +// [0x006] = { "push", OP_PRESENT, 1, OP_ARG1_EXT_C_DEF(RMX_TYPE_SEG, RMX_ES) }, +// // 07 POP ES +// [0x007] = { "pop", OP_PRESENT, 1, OP_ARG1_EXT_C_DEF(RMX_TYPE_SEG, RMX_ES) }, +// // 0E PUSH CS +// [0x00E] = { "push", OP_PRESENT, 1, OP_ARG1_EXT_C_DEF(RMX_TYPE_SEG, RMX_CS) }, +// // 1E PUSH DS +// [0x01E] = { "push", OP_PRESENT, 1, OP_ARG1_EXT_C_DEF(RMX_TYPE_SEG, RMX_DS) }, +// // 1F POP DS +// [0x01F] = { "pop", OP_PRESENT, 1, OP_ARG1_EXT_C_DEF(RMX_TYPE_SEG, RMX_DS) }, +// +// // 0D iw OR AX, imm16 +// // 0D id OR EAX, imm32 +// [0x00D] = { "or", OP_PRESENT, 2, OP_ARG2_CI_DEF(32, 0) }, +// // 25 iw AND AX, imm16 +// // 25 id AND EAX, imm32 +// [0x025] = { "and", OP_PRESENT, 2, OP_ARG2_CI_DEF(32, 0) }, +// + // 70 cb JO rel8 + [0x070] = { OPT_JO, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 71 cb JNO rel8 + [0x071] = { OPT_JNO, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 72 cb JC rel8 + [0x072] = { OPT_JC, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 73 cb JNC rel8 + [0x073] = { OPT_JNC, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 74 cb JZ rel8 + [0x074] = { OPT_JZ, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 75 cb JNZ rel8 + [0x075] = { OPT_JNZ, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 76 cb JNA rel8 + [0x076] = { OPT_JNA, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 77 cb JA rel8 + [0x077] = { OPT_JA, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 78 cb JS rel8 + [0x078] = { OPT_JS, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 79 cb JNS rel8 + [0x079] = { OPT_JNS, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 7A cb JP rel8 + [0x07A] = { OPT_JP, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 7C cb JL rel8 + [0x07C] = { OPT_JL, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 7D cb JNL rel8 + [0x07D] = { OPT_JNL, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 7E cb JNG rel8 + [0x07E] = { OPT_JNG, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 7F cb JG rel8 + [0x07F] = { OPT_JG, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // E2 cb LOOP rel8 + [0x0E2] = { OPT_LOOP, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // E1 cb LOOPZ rel8 + [0x0E1] = { OPT_LOOPZ, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // E0 cb LOOPNZ rel8 + [0x0E0] = { OPT_LOOPNZ, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // E3 cb JCXZ rel8 + [0x0E3] = { OPT_JCXZ, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, +// +// // 3D iw CMP AX, imm16 +// // 3D id CMP EAX, imm32 +// [0x03D] = { "cmp", OP_PRESENT, 2, OP_ARG2_CI_DEF(32, 0) }, +// +// // AB STOS m16 +// // AB STOS m32 +// [0x0AB] = { "stos", OP_PRESENT, 0 }, +// // AF SCAS m16 +// // AF SCAS m32 +// [0x0AF] = { "scas", OP_PRESENT, 0 }, +// +// // CB RETF +// [0x0CB] = { "retf", OP_PRESENT, 0 }, + // CF IRET + [0x0CF] = { OPT_IRET, OP_PRESENT, 0 }, +// +// // 11 /r ADC r/m16, r16 +// // 11 /r ADC r/m32, r32 +// [0x011] = { "adc", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// // 13 /r ADC r16, r/m16 +// // 13 /r ADC r32, r/m32 +// [0x013] = { "adc", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// // E9 cw JMP rel16 +// // E9 cd JMP rel32 + [0x0E9] = { OPT_JMP, OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // EB cb JMP rel8 + [0x0EB] = { OPT_JMP, OP_PRESENT, 1, OP_ARG1_REL_DEF(8) }, + // 85 /r TEST r/m16, r16 + // 85 /r TEST r/m32, r32 + [0x085] = { OPT_TEST, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 89 /r MOV r/m16, r16 + // 89 /r MOV r/m32, r32 + [0x089] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 8B /r MOV r16, r/m16 + // 8B /r MOV r32, r/m32 + [0x08B] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_RMR_DEF }, + // 8E /r MOV Sreg, r/m16 + [0x08E] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_RMS_DEF }, + // 8C /r MOV r/m16, Sreg + [0x08C] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_RRM_DEF }, + // 9C PUSHF +// [0x09C] = { "pushf", OP_PRESENT, 0 }, +// // 9D POPF +// [0x09D] = { "popf", OP_PRESENT, 0 }, +// // C2 iw RET imm16 +// [0x0C2] = { "ret", OP_PRESENT, 1, OP_ARG1_I_DEF(16) }, +// // F9 STC +// [0x0F9] = { "stc", OP_PRESENT, 0 }, +// // FA CLI +// [0x0FA] = { "cli", OP_PRESENT, 0 }, +// // F8 NOP +// [0x0F8] = { "clc", OP_PRESENT, 0 }, +// // FC +// [0x0FC] = { "cld", OP_PRESENT, 0 }, +// // CD ib INT imm8 +// [0x0CD] = { "int", OP_PRESENT, 1, OP_ARG1_I_DEF(8) }, +// // 8D /r LEA r16, m +// // 8D /r LEA r32, m +// [0x08D] = { "lea", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // 69 /r iw IMUL r16, r/m16, imm16 +// // 69 /r id IMUL r32, r/m32, imm32 +// [0x069] = { "imul", OP_PRESENT, 3, OP_ARG3_RRMI(32) }, +// // 6B /r ib IMUL r16, r/m16, imm8 +// // 6B /r ib IMUL r32, r/m32, imm8 +// [0x06B] = { "imul", OP_PRESENT, 3, OP_ARG3_RRMI(8) }, +// +// // 0B /r OR r16, r/m16 +// // 0B /r OR r32, r/m32 +// [0x00B] = { "or", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// // 33 /r XOR r16, r/m16 +// // 33 /r XOR r32, r/m32 +// [0x033] = { "xor", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // 19 /r SBB r/m16, r16 +// // 19 /r SBB r/m32, r32 +// [0x019] = { "sbb", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// + // C3 RET + [0x0C3] = { OPT_RET, OP_PRESENT, 0 }, +// // 99 CWD +// // 99 CDQ +// // REX.W + 99 CQO +// [0x099] = { "cdq/cwd/cqo", OP_PRESENT, 0 }, +// +// // E8 cw CALL rel16 +// // E8 cd CALL rel32 +// [0x0E8] = { "call", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// +// // 21 /r AND r/m16, r16 +// // 21 /r AND r/m32, r32 +// [0x021] = { "and", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// // 23 /r AND r16, r/m16 +// // 23 /r AND r32, r/m32 +// [0x023] = { "and", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // ED IN AX, DX +// // ED IN EAX, DX +// // (E)AX is implied +// [0x0ED] = { "in", OP_PRESENT, 1, OP_ARG1_C_DEF(2) }, +// // EF OUT DX, AX +// // EF OUT DX, EAX +// // AL/AX/EAX implied, so only 1 argument +// [0x0EF] = { "out", OP_PRESENT, 1, OP_ARG1_C_DEF(2) }, +// // 6F OUTS DX, m16 +// // 6F OUTS DX, m32 +// // DX is implied +// // DS:(E)SI is implied +// [0x06F] = { "outs", OP_PRESENT, 0 }, +// // 6D INS m16, DX +// // 6D INS m32, DX +// // DX is implied +// // ES:(E)DI is implied +// [0x06D] = { "ins", OP_PRESENT, 0 }, +// +// // A1 MOV AX, moffs16 +// // A1 MOV EAX, moffs32 + [0x0A1] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CT_DEF(64, 0) }, +// // A3 MOV moffs16, AX +// // A3 MOV moffs32, EAX + [0x0A3] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_TC_DEF(64, 0) }, +// +// // 6A ib PUSH imm8 + [0x06A] = { OPT_PUSH, OP_PRESENT, 1, OP_ARG1_I_DEF(8) }, +// // 68 iw PUSH imm16 +// // 68 id PUSH imm32 + [0x068] = { OPT_PUSH, OP_PRESENT, 1, OP_ARG1_I_DEF(32) }, +// +// // Instructions with in-opcode register +// // 50 + rw PUSH r16 +// // 50 + rd PUSH r32 +// // REX.W + 50 + rd PUSH r64 + [0x050] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(0) }, + [0x051] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(1) }, + [0x052] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(2) }, + [0x053] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(3) }, + [0x054] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(4) }, + [0x055] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(5) }, + [0x056] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(6) }, + [0x057] = { OPT_PUSH, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(7) }, + // 58 + rw POP r16 + // 58 + rd POP r32 + // REX.W + 58 + rd POP r64 + [0x058] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(0) }, + [0x059] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(1) }, + [0x05A] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(2) }, + [0x05B] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(3) }, + [0x05C] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(4) }, + [0x05D] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(5) }, + [0x05E] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(6) }, + [0x05F] = { OPT_POP, OP_PRESENT | OP_DEF64, 1, OP_ARG1_C_DEF(7) }, + // B0 + rb ib MOV r8, imm8 + [0x0B0] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 0) }, + [0x0B1] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 1) }, + [0x0B2] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 2) }, + [0x0B3] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 3) }, + [0x0B4] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 4) }, + [0x0B5] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 5) }, + [0x0B6] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 6) }, + [0x0B7] = { OPT_MOV, OP_PRESENT | OP_DEF8, 2, OP_ARG2_CI_DEF(8, 7) }, + // B8 + rw iw MOV r16, imm16 + // B8 + rd id MOV r32, imm32 + // REX.W + B8 + rd io MOV r64, imm64 + [0x0B8] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 0) }, + [0x0B9] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 1) }, + [0x0BA] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 2) }, + [0x0BB] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 3) }, + [0x0BC] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 4) }, + [0x0BD] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 5) }, + [0x0BE] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 6) }, + [0x0BF] = { OPT_MOV, OP_PRESENT, 2, OP_ARG2_CI_DEF(64, 7) }, +// +// +// // Instructions with ModR/M extension +// // DC /0 FADD m64fp +// [0x0DC] = { "fadd", OP_PRESENT | OP_RMEXT | OP_DEF64, 1, OP_ARG1_RM_DEF }, +// // DC /5 FSUBR m64fp +// [0x5DC] = { "fsubr", OP_PRESENT | OP_RMEXT | OP_DEF64, 1, OP_ARG1_RM_DEF }, +// + // FF /0 INC r/m16 + // FF /0 INC r/m32 + [0x0FF] = { OPT_INC, OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, + // FF /2 CALL r/m16 + // FF /2 CALL r/m32 + [0x2FF] = { OPT_CALL, OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // FF /1 DEC r/m16 +// // FF /1 DEC r/m32 +// [0x1FF] = { "dec", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, + // FF /3 CALL m16:16 + // FF /3 CALL m16:32 + [0x3FF] = { OPT_CALL, OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // FF /4 JMP r/m16 +// // FF /4 JMP r/m32 +// [0x4FF] = { "jmp", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // FF /6 PUSH r/m16 +// // FF /6 PUSH r/m32 +// [0x6FF] = { "push", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// +// // 8F /0 POP r/m16 +// // 8F /0 POP r/m32 +// [0x08F] = { "pop", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// + // C1 /0 ib ROL r/m16, imm8 + // C1 /0 ib ROL r/m32, imm8 + [0x0C1] = { OPT_ROL, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, + // C1 /4 ib SAL r/m16, imm8 + // C1 /4 ib SAL r/m32, imm8 + [0x4C1] = { OPT_SAL, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, + // C1 /5 ib SHR r/m16, imm8 + // C1 /5 ib SHR r/m32, imm8 + [0x5C1] = { OPT_SHR, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, + // C1 /7 ib SAR r/m16, imm8 + // C1 /7 ib SAR r/m32, imm8 + [0x7C1] = { OPT_SAR, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, + + // C7 /0 iw MOV r/m16, imm16 + // C7 /0 id MOV r/m32, imm32 + // REX.W + C7 /0 id MOV r/m64, imm32 + [0x0C7] = { OPT_MOV, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, +// +// // TODO: OP_ARG2_RM1_DEF +// // D1 /0 ROL r/m16, 1 +// // D1 /0 ROL r/m32, 1 +// [0x0D1] = { "rol", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // D1 /4 SHL r/m16, 1 +// // D1 /4 SHL r/m32, 1 +// [0x4D1] = { "shl", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // D1 /5 SHR r/m16, 1 +// // D1 /5 SHR r/m32, 1 +// [0x5D1] = { "shr", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // D1 /7 SAR r/m16, 1 +// // D1 /7 SAR r/m32, 1 +// [0x7D1] = { "sar", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// +// // D3 /0 ROL r/m16, CL +// // D3 /0 ROL r/m32, CL +// [0x0D3] = { "rol", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMC_DEF(1), }, +// // D3 /4 SHL r/m16, CL +// // D3 /4 SHL r/m32, CL +// [0x4D3] = { "shl", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMC_DEF(1), }, +// // D3 /5 SHR r/m16, CL +// // D3 /5 SHR r/m32, CL +// [0x5D3] = { "shr", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMC_DEF(1), }, +// // D3 /7 SAR r/m16, CL +// // D3 /7 SAR r/m32, CL +// [0x7D3] = { "sar", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMC_DEF(1), }, + + // 81 /0 iw ADD r/m16, imm16 + // 81 /0 id ADD r/m32, imm32 + [0x081] = { OPT_ADD, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, + // 81 /1 iw OR r/m16, imm16 + // 81 /1 id OR r/m32, imm32 + [0x181] = { OPT_OR, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, + // 81 /4 iw AND r/m16, imm16 + // 81 /4 id AND r/m32, imm32 + [0x481] = { OPT_AND, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, + // 81 /5 iw SUB r/m16, imm16 + // 81 /5 id SUB r/m32, imm32 + [0x581] = { OPT_SUB, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, + // 81 /7 iw CMP r/m16, imm16 + // 81 /7 id CMP r/m32, imm32 + [0x781] = { OPT_CMP, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, + + // 83 /0 iw ADD r/m16, imm8 + // 83 /0 id ADD r/m32, imm8 + [0x083] = { OPT_ADD, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 83 /1 iw OR r/m16, imm8 +// // 83 /1 id OR r/m32, imm8 +// [0x183] = { "or", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 83 /4 iw AND r/m16, imm8 +// // 83 /4 id AND r/m32, imm8 +// [0x483] = { "and", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, + // 83 /5 iw SUB r/m16, imm8 + // 83 /5 id SUB r/m32, imm8 + [0x583] = { OPT_SUB, OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// // 83 /7 iw CMP r/m16, imm8 +// // 83 /7 id CMP r/m32, imm8 +// [0x783] = { "cmp", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(8) }, +// +// // F7 /0 iw TEST r/m16, imm16 +// // F7 /0 id TEST r/m32, imm32 +// [0x0F7] = { "test", OP_PRESENT | OP_RMEXT, 2, OP_ARG2_RMI_DEF(32) }, +// // F7 /2 NOT r/m16 +// // F7 /2 NOT r/m32 +// [0x2F7] = { "not", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // F7 /3 NEG r/m16 +// // F7 /3 NEG r/m32 +// [0x3F7] = { "neg", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // F7 /6 DIV r/m16 +// // F7 /6 DIV r/m32 +// [0x6F7] = { "div", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // F7 /7 IDIV r/m16 +// // F7 /7 IDIV r/m32 +// [0x7F7] = { "idiv", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// +// + // 16-bit only opcodes (overlap REX prefix) + // 40 + rw INC r16 + [0x040] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(0) }, + [0x041] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(1) }, + [0x042] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(2) }, + [0x043] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(3) }, + [0x044] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(4) }, + [0x045] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(5) }, + [0x046] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(6) }, + [0x047] = { OPT_INC, OP_PRESENT, 1, OP_ARG1_C_DEF(7) }, + + // 48 + rw DEC r16 + [0x048] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(0) }, + [0x049] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(1) }, + [0x04A] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(2) }, + [0x04B] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(3) }, + [0x04C] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(4) }, + [0x04D] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(5) }, + [0x04E] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(6) }, + [0x04F] = { OPT_DEC, OP_PRESENT, 1, OP_ARG1_C_DEF(7) }, +}; + +static struct op_info ops_0f[0x800] = { +// // 8-bit operand +// // 0F 94 cw SETZ r/m8 +// [0x094] = { "setz", OP_PRESENT | OP_DEF8, 1, OP_ARG1_RM_DEF }, +// // 0F 95 cw SETNZ r/m8 +// [0x095] = { "setnz", OP_PRESENT | OP_DEF8, 1, OP_ARG1_RM_DEF }, +// // 0F 9C cw SETL r/m8 +// [0x09C] = { "setl", OP_PRESENT | OP_DEF8, 1, OP_ARG1_RM_DEF }, +// +// // 0F B6 /r MOVZX r16, r/m8 +// // 0F B6 /r MOVZX r32, r/m8 +// [0x0B6] = { "movzx", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// +// // 0F BE /r MOVSX r16, r/m8 +// // 0F BE /r MOVSX r32, r/m8 +// [0x0BE] = { "movsx", OP_PRESENT | OP_DEF8, 2, OP_ARG2_RRM_DEF }, +// +// // Everything else +// // 0F 00 /0 SLDT r/m16 +// [0x000] = { "sldt", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// // 0F 00 /3 LTR r/m16 +// [0x300] = { "ltr", OP_PRESENT | OP_RMEXT, 1, OP_ARG1_RM_DEF }, +// + // 0F 0B UD2 + [0x00B] = { OPT_UD2, OP_PRESENT, 0 }, +// // 0F 82 cw JC rel16 +// // 0F 82 cd JC rel32 +// [0x082] = { "jc", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 83 cw JNC rel16 +// // 0F 83 cd JNC rel32 +// [0x083] = { "jnc", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 84 cw JZ rel16 +// // 0F 84 cd JZ rel32 +// [0x084] = { "jz", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 85 cw JNZ rel16 +// // 0F 85 cd JNZ rel32 +// [0x085] = { "jnz", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 86 cw JNA rel16 +// // 0F 86 cd JNA rel32 +// [0x086] = { "jna", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 87 cw JA rel16 +// // 0F 87 cd JA rel32 +// [0x087] = { "ja", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 88 cw JS rel16 +// // 0F 88 cd JS rel32 +// [0x088] = { "js", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 8D cw JNL rel16 +// // 0F 8D cd JNL rel32 +// [0x08D] = { "jnl", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// // 0F 8E cw JNG rel16 +// // 0F 8E cd JNG rel32 +// [0x08E] = { "jng", OP_PRESENT, 1, OP_ARG1_REL_DEF(32) }, +// + // TODO: this instruction's operand size depends on CPU operating mode + // 0F 20 /r MOV r32, CR0-CR7 + // 0F 20 /r MOV r64, CR0-CR7 + [0x020] = { OPT_MOV, OP_PRESENT, 2, { + { 1, ARG_SRC_MODRM, .rm = 1 }, + { 0, ARG_SRC_MODRM_CR, .rm = 0 } + }}, + // 0F 22 /r MOV CR0-CR7, r32 + // 0F 22 /r MOV CR0-CR7, r64 + [0x022] = { OPT_MOV, OP_PRESENT, 2, { + { 1, ARG_SRC_MODRM_CR, .rm = 0 }, + { 0, ARG_SRC_MODRM, .rm = 1 } + }}, +// +// // 0F 41 /r CMOVNO r16, r/m16 +// [0x041] = { "cmovno", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // 0F A4 /r ib SHLD r/m16, r16, imm8 +// // 0F A4 /r ib SHLD r/m32, r32, imm8 +// [0x0A4] = { "shld", OP_PRESENT, 3, OP_ARG3_RRMI(8) }, +// +// // 0F B7 /r MOVZX r32, r/m16 +// // REX.W + 0F B7 /r MOVZX r64, r/m32 +// [0x0B7] = { "movzx", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// +// // 0F 08 INVD +// [0x008] = { "invd", OP_PRESENT, 0 }, +// +// // 0F AF /r IMUL r16, r/m16 +// // 0F AF /r IMUL r32, r/m32 +// // REX.W + 0F AF /r IMUL r64, r/m64 +// [0x0AF] = { "imul", OP_PRESENT, 2, OP_ARG2_RRM_DEF }, +// +// // 0F FF /r UD0 r32, r/m32 +// [0x0FF] = { "ud0", OP_PRESENT, 2, OP_ARG2_RMR_DEF }, +// +// // 0F 05 SYSCALL +// [0x005] = { "syscall", OP_PRESENT, 0 }, +// +// // SSE shit that appeared in BIOS I've tried to disassemble +// // 0F 10 /r MOVUPS xmm1, xmm2/m128 +// [0x010] = { "movups", OP_PRESENT | OP_SSE, 2, OP_ARG2_SSE_RMR_DEF }, +}; + +static struct op_info ops_0f01_swapgs = { + OPT_SWAPGS, OP_PRESENT, 0 +}; +#pragma GCC diagnostic pop + +ssize_t x86_match_op(const uint8_t *code, struct op_info **info, uint8_t *modrm) { + size_t pos = 0; + + if (code[0] == 0x0F) { + if (code[1] == 0x38) { + return -1; + } else if (code[1] == 0x3A) { + return -1; + } else { + // Omit 0x0F + ++pos; +#ifdef PRINT_OPCODES + printf("0F "); + printf("%02hhX ", code[pos]); +#endif + uint8_t l2_op = code[pos++]; + + if (l2_op == 0x01) { + // Special? + uint8_t l3_op = code[pos++]; + + switch (l3_op) { + case 0xF8: + *info = &ops_0f01_swapgs; + *modrm = 0; + return 3; + default: + return -1; + } + + return -1; + } + + if (ops_0f[l2_op].flags & OP_PRESENT) { + // Check if opcode has a ModR/M extension + *info = &ops_0f[l2_op]; + + if ((*info)->flags & OP_RMEXT) { + uint8_t r; + *modrm = code[pos++]; + + r = ((*modrm) >> 3) & 0x7; + *info = &ops_0f[(((uint16_t) r) << 8) | l2_op]; + + if (!((*info)->flags & OP_PRESENT)) { +#ifdef PRINT_OPCODES + printf("%02hhX ", *modrm); + printf(" (R = %u) ", r); +#endif + return -1; + } + } + + return pos; + } + + return -1; + } + } + +#ifdef PRINT_OPCODES + printf("%02hhX ", code[0]); +#endif + + uint8_t l1_op = code[pos++]; + + if (ops_1[l1_op].flags & OP_PRESENT) { + // Check if opcode has a ModR/M extension + *info = &ops_1[l1_op]; + + if ((*info)->flags & OP_RMEXT) { + uint8_t r; + *modrm = code[pos++]; + + r = ((*modrm) >> 3) & 0x7; + *info = &ops_1[(((uint16_t) r) << 8) | l1_op]; + if (!((*info)->flags & OP_PRESENT)) { +#ifdef PRINT_OPCODES + printf("%02hhX ", *modrm); + printf(" (R = %u) ", r); +#endif + return -1; + } + } + + return pos; + } + + return -1; +} diff --git a/arch/amd64/hw/exc.c b/arch/amd64/hw/exc.c index 3d34aa2..f3516f7 100644 --- a/arch/amd64/hw/exc.c +++ b/arch/amd64/hw/exc.c @@ -2,6 +2,7 @@ #include "arch/amd64/smp/ipi.h" #include "arch/amd64/smp/smp.h" #endif +#include "arch/amd64/disasm/front.h" #include "arch/amd64/cpu.h" #include "sys/mem/phys.h" #include "sys/thread.h" @@ -108,6 +109,40 @@ int do_pfault(struct amd64_exception_frame *frame, uintptr_t cr2, uintptr_t cr3) } } +static int exc_safe_read_byte(mm_space_t space, uintptr_t addr, uint8_t *byte) { + uintptr_t page; + page = mm_map_get(space, addr & ~0xFFF, NULL); + if (page == MM_NADDR) { + return -1; + } + *byte = *(uint8_t *) MM_VIRTUALIZE(page + (addr & 0xFFF)); + return 0; +} + +static void exc_dump_code(int level, uintptr_t rip) { +#define DISASM_BYTES 72 + uintptr_t cr3; + mm_space_t space; + uint8_t bytes[DISASM_BYTES]; + size_t count; + + asm volatile ("movq %%cr3, %0":"=r"(cr3)); + space = (mm_space_t) MM_VIRTUALIZE(cr3); + + count = 0; + for (size_t i = 0; i < sizeof(bytes); ++i) { + // TODO: overflow check here + if (exc_safe_read_byte(space, rip + i, &bytes[i]) != 0) { + break; + } + ++count; + } + + dump_segment(bytes, rip, count); + debug_dump(level, bytes, count); +#undef DISASM_BYTES +} + static void exc_dump(int level, struct amd64_exception_frame *frame) { uintptr_t cr2, cr3; @@ -193,6 +228,9 @@ static void exc_dump(int level, struct amd64_exception_frame *frame) { } else { debugf(level, "%rip is in unknown location\n"); } + + exc_dump_code(level, frame->rip); + debugs(level, "\033[0m"); } @@ -231,6 +269,7 @@ void amd64_exception(struct amd64_exception_frame *frame) { case X86_EXCEPTION_PF: kerror("SIGSEGV in %d\n", thread_self->proc->pid); exc_dump(DEBUG_DEFAULT, frame); + while (1); thread_signal(thread_self, SIGSEGV); return; } diff --git a/arch/amd64/mm/mm.c b/arch/amd64/mm/mm.c index 3be51d1..91c0adb 100644 --- a/arch/amd64/mm/mm.c +++ b/arch/amd64/mm/mm.c @@ -20,9 +20,13 @@ __attribute__((aligned(0x1000))) uint64_t kernel_pd_res[6 * 512]; extern int _kernel_end; void userptr_check(const void *ptr) { - // TODO: "hardened" check - also check that the address is mapped + uintptr_t cr3; + asm volatile ("mov %%cr3, %0":"=r"(cr3)); + mm_space_t space = (mm_space_t) MM_VIRTUALIZE(cr3); + assert(ptr, "invalid userptr: NULL\n"); assert((uintptr_t) ptr < KERNEL_VIRT_BASE, "invalid userptr: in kernel space (%p)\n", ptr); + assert(mm_map_get(space, (uintptr_t) ptr, NULL) != MM_NADDR, "invalid userptr: not mapped (%p)\n", ptr); } void amd64_mm_init(void) { diff --git a/etc/Kernel.makefile b/etc/Kernel.makefile index 9fc9ba5..658c866 100644 --- a/etc/Kernel.makefile +++ b/etc/Kernel.makefile @@ -120,6 +120,10 @@ KERNEL_OBJ=$(O)/arch/amd64/boot/yboot.o \ $(O)/drivers/usb/device.o \ $(O)/drivers/usb/usbkbd.o \ $(O)/drivers/usb/hub.o \ + $(O)/arch/amd64/disasm/front.o \ + $(O)/arch/amd64/disasm/x86.o \ + $(O)/arch/amd64/disasm/x86_op.o \ + $(O)/arch/amd64/disasm/x86_arg.o \ KERNEL_LDS=arch/amd64/link.ld KERNEL_HDR=$(shell find include -type f -name "*.h") @@ -128,6 +132,7 @@ DIRS+=$(O)/arch/amd64/hw \ $(O)/arch/amd64/mm \ $(O)/arch/amd64/sys \ $(O)/arch/amd64/boot \ + $(O)/arch/amd64/disasm \ $(O)/drivers/usb \ $(O)/drivers/ata \ $(O)/drivers/pci \ diff --git a/include/arch/amd64/disasm/front.h b/include/arch/amd64/disasm/front.h new file mode 100644 index 0000000..ecbbed9 --- /dev/null +++ b/include/arch/amd64/disasm/front.h @@ -0,0 +1,3 @@ +#pragma once + +void dump_segment(const uint8_t *bytes, uintptr_t base, size_t size); diff --git a/include/arch/amd64/disasm/util.h b/include/arch/amd64/disasm/util.h new file mode 100644 index 0000000..b668356 --- /dev/null +++ b/include/arch/amd64/disasm/util.h @@ -0,0 +1,51 @@ +#pragma once +#include + +static inline int32_t movsx16(uint16_t src) { + if (src & 0x8000) { + return 0xFFFF0000 + src; + } else { + return src; + } +} + +static inline int32_t movsx8(uint8_t src) { + if (src & 0x80) { + return 0xFFFFFF00 + src; + } else { + return src; + } +} + +static inline int64_t qmovsx32(uint32_t src) { + if (src & 0x80000000) { + return 0xFFFFFFFF00000000LL + src; + } else { + return src; + } +} + +static inline int64_t qmovsx16(uint16_t src) { + if (src & 0x8000) { + return 0xFFFFFFFFFFFF0000LL + src; + } else { + return src; + } +} + +static inline int64_t qmovsx8(uint8_t src) { + if (src & 0x80) { + return 0xFFFFFFFFFFFFFF00LL + src; + } else { + return src; + } +} + +static inline int64_t bmask(int64_t src, size_t sz) { + switch (sz) { + case 16: + return src & 0xFFFF; + default: + return src; + } +} diff --git a/include/arch/amd64/disasm/x86.h b/include/arch/amd64/disasm/x86.h new file mode 100644 index 0000000..90315e3 --- /dev/null +++ b/include/arch/amd64/disasm/x86.h @@ -0,0 +1,158 @@ +#pragma once +#include +#include +#include +#include "x86_op.h" + +// Present bit +#define OP_PRESENT (1 << 0) +// The operand size for the instruction defaults to 64 bits in long mode +#define OP_DEF64 (1 << 2) +// Instruction uses 8 bit operands +#define OP_DEF8 (1 << 3) +// Instruction uses R/M as an extension of the opcode +#define OP_RMEXT (1 << 4) +// SSE +#define OP_SSE (1 << 5) + +#define MODE_REAL 0 +#define MODE_LEGACY 1 +#define MODE_LONG 2 + +#define ARG_SRC_MODRM 0 +#define ARG_SRC_IMM0 1 +#define ARG_SRC_OPREG 2 +#define ARG_SRC_IMMREL 3 +#define ARG_SRC_SREG 4 +#define ARG_SRC_MOFFS 5 +#define ARG_SRC_SSE 6 +#define ARG_SRC_IMM1 7 +#define ARG_SRC_OPXREG 8 +#define ARG_SRC_MODRM_CR 9 + +// reg +#define RMOP_R 0 +// [reg + disp] +#define RMOP_MRD 1 +// [reg + reg + disp] +#define RMOP_MRRD 2 +// [sib + disp] +#define RMOP_MSD 3 +// [disp] +#define RMOP_MD 4 +// reg2: +// 0 - Sreg +#define RMOP_XR 5 +// [ip + disp] +#define RMOP_MCD 6 +// imm +#define RMOP_I 7 +// ip + disp +#define RMOP_CD 8 + +// 16-bit and higher +#define RM_AX 0 +#define RM_CX 1 +#define RM_DX 2 +#define RM_BX 3 +#define RM_SP 4 +#define RM_BP 5 +#define RM_SI 6 +#define RM_DI 7 + +// 8-bit operations +#define RM8_AL 0 +#define RM8_CL 1 +#define RM8_DL 2 +#define RM8_BL 3 +#define RM8_AH 4 +#define RM8_CH 5 +#define RM8_DH 6 +#define RM8_BH 7 + +// Segment registers +#define RMX_ES 0 +#define RMX_CS 1 +#define RMX_SS 2 +#define RMX_DS 3 +#define RMX_FS 4 +#define RMX_GS 5 + +#define RMX_TYPE_SEG 0 +#define RMX_TYPE_FPU 1 +#define RMX_TYPE_MMX 2 +#define RMX_TYPE_XMM 3 +#define RMX_TYPE_YMM 4 +#define RMX_TYPE_CR 5 +#define RMX_TYPE_DEBUG 6 + +#define PREF_OP_SIZE (1 << 0) +#define PREF_AD_SIZE (1 << 1) +#define PREF_LOCK (1 << 31) +#define PREF_REX (1 << 30) +#define PREF_REP (1 << 2) +#define PREF_SEG_DS (1 << 3) +#define PREF_SEG_ES (1 << 4) +#define PREF_SEG_FS (1 << 5) +#define PREF_SEG_GS (1 << 6) +#define PREF_SEG_SS (1 << 7) +#define PREF_SEG_CS (1 << 8) + +#define REX_W (1 << 3) +#define REX_R (1 << 2) +#define REX_X (1 << 1) +#define REX_B (1 << 0) + +struct rm_operand { + uint8_t type; + + uint8_t mod; + uint8_t reg; + uint8_t reg2; + int64_t imm; + uint8_t seg; +}; + +struct op_arg_info { + // Direction: + // 0 - Source operand + // 1 - Destination operand + uint8_t dir; + uint8_t source; + union { + // Size of the immediate operand in bits (maximum size) + uint8_t imm_size; + // 0 - the argument is R part of ModR/M + // 1 - the argument is RM part of ModR/M + uint8_t rm; + // Register number + uint8_t reg; + }; + uint8_t ext; +}; + +struct op_info { + enum opt opt; + uint8_t flags; + int argc; + struct op_arg_info argv[8]; +}; + +struct op { + struct op_info *info; + uint64_t addr; + uintptr_t pos; + size_t len; + + uint32_t prefices; + uint8_t rex; + size_t operand_size; + size_t address_size; + size_t argc; + struct rm_operand argv[8]; +}; + +void x86_set_mode(int mode); +ssize_t x86_read_operands(const uint8_t *data, struct op *op, const uint8_t *modrm); +ssize_t x86_match_op(const uint8_t *data, struct op_info **info, uint8_t *modrm); +ssize_t read_op(const uint8_t *data, uint64_t base_addr, struct op *op); diff --git a/include/arch/amd64/disasm/x86_op.h b/include/arch/amd64/disasm/x86_op.h new file mode 100644 index 0000000..716679a --- /dev/null +++ b/include/arch/amd64/disasm/x86_op.h @@ -0,0 +1,50 @@ +#pragma once + +enum opt { + OPT_PUSH = 1, + OPT_POP, + OPT_MOV, + OPT_STOSB, + + OPT_INC, + OPT_DEC, + OPT_ADD, + OPT_SUB, + OPT_XOR, + OPT_OR, + OPT_AND, + OPT_ROL, + OPT_SAL, + OPT_SHR, + OPT_SAR, + + OPT_TEST, + OPT_CMP, + + OPT_CALL, + OPT_JMP, + OPT_JZ, + OPT_JA, + OPT_JNA, + OPT_JNZ, + OPT_JL, + OPT_JNL, + OPT_JP, + OPT_JS, + OPT_JNS, + OPT_JG, + OPT_JNG, + OPT_JO, + OPT_JNO, + OPT_JC, + OPT_JNC, + OPT_LOOP, + OPT_LOOPZ, + OPT_LOOPNZ, + OPT_JCXZ, + OPT_RET, + + OPT_IRET, + OPT_SWAPGS, + OPT_UD2, +};