//先判断opcode后面的字节有没有ModRM字节 for (int icount = 0; icount <3;icount++) { switch (decode_struct.AddressingMethod[icount]) { case C: case D: case G: case P: case S: case V: case E: case M: case N: case Q: case R: case U: case W: ModRM_Flag = true; if (0x4== RM(*(POpcode + 1)) && 0x3 != MOD(*(POpcode + 1)))//当ModRM.r/m == 100(0x4)且Mod ≠11b时,存在SIB字节 SIB_Flag = true; if (0x0 == MOD(*(POpcode + 1)) && 0x5 == RM(*(POpcode + 1)))//当ModRM.r/m == 101(0x5)且MOD == 0时,disp为4字节 idispcount = 4; if (0x1 == MOD(*(POpcode + 1)))//当ModRM.MOD == 1时,disp为1字节 idispcount = 1; if (0x2 == MOD(*(POpcode + 1)))//当ModRM.MOD == 2时,disp为4字节 idispcount = 4; if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x0 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 0时,disp为4字节 idispcount = 4; if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x1 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 1时,disp为1字节 idispcount = 1; if (true == SIB_Flag && 0x5 == BASE(*(POpcode + 2)) && 0x2 == MOD(*(POpcode + 1)))//有SIB且base域为5,当ModRM.MOD == 2时,disp为4字节 idispcount = 4; break; default: break; } }
确定一条指令包含什么部分:
先确定有没有ModRM字节-->然后确定有没有SIB字节
|
I-->确定有没有Disp字节(有几字节?)3的寻址方式还有类型确认后面的有什么字节
再确定有没有 immediate 字节(有几字节)这个可以到最后解码immediate字节的时候再做。
一、先解码寄存器操作数
//General Registers in Legacy and Compatibility Modes OR General Registers in 64-Bit Mode Without REX
const char *GPRs32[0x3][0x8] =
{
{"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//8bit
{"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//16bit
{"EAX","ECX","EDX","EBX","ESP","EBP","ESI","EDI"} //32bit
};
//General Registers in 64-Bit Mode With REX
const char *GPRs64[0x4][0x10] =
{
{"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL", "R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B"},//8bit
{"AX", "CX", "DX", "BX", "SP" , "BP" , "SI" , "DI" , "R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W"},//16bit
{"EAX","ECX","EDX","EBX","ESP", "EBP", "ESI", "EDI", "R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D"},//32bit
{"RAX","RCX","RDX","RBX","RSP", "RBP", "RSI", "RDI", "R8" , "R9" , "R10" , "R11" , "R12", "R13" , "R14" , "R15" } //64bit
};
switch (decode_struct.AddressingMethod[icount])
{
case rAX:
case rCX:
case rDX:
case rBX:
case rSP:
case rBP:
case rSI:
case rDI:
case r8:
case r9:
case r10:
case r11:
case r12:
case r13:
case r14:
case r15://解析直接出现的寄存器操作数
if (UNCON != decode_struct.OperandType[icount])
Result = Result + GPRs64[decode_struct.OperandType[icount]][decode_struct.AddressingMethod[icount]];
else
{
if (MODE32 == Mode)
{
if (OPSizeFlag)
Result = Result + GPRs32[_16BIT][decode_struct.AddressingMethod[icount]];
else
Result = Result + GPRs32[_32BIT][decode_struct.AddressingMethod[icount]];
}
else
{
if (REX_W_Flag || REX_B_Flag)
Result = Result + GPRs64[_64BIT][decode_struct.AddressingMethod[icount]];
else
{
if (OPSizeFlag)
Result = Result + GPRs64[_16BIT][decode_struct.AddressingMethod[icount]];
else
Result = Result + GPRs64[_32BIT][decode_struct.AddressingMethod[icount]];
}
}
}
break;
寄存器的解码
1、根据当前的模式还有就是有没有操作数大小覆盖前缀来确定(没有大小描述的情况)
2、直接解码返回(有大小描述的情况)
二、解码Mod.reg域
//当不存在REX时,REG域数组,[具体的选项][REG] const char *ModRM_REG0[0xA][0x8] = { {"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//reg8 {"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16 {"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"},//reg32 {"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"},//reg64 {"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx {"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm {"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm {"ES", "CS", "SS", "DS", "FS", "GS", "invalid","invalid"},//sReg {"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7" },//cReg {"DR0", "DR1", "DR2", "DR3", "DR4", "DR5", "DR6", "DR7" } //dReg }; //当存在REX且REX.R为0时,REG域数组,[具体的选项][REG] const char *ModRM_REG10[0xA][0x8] = { {"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL"},//reg8 {"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16 {"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI"},//reg32 {"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI"},//reg64 {"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx {"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm {"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm {"ES", "CS", "SS", "DS", "FS", "GS","invalid","invalid"},//sReg {"CR0", "CR1", "CR2", "CR3", "CR4", "CR5", "CR6", "CR7"},//cReg {"DR0", "DR1", "DR2", "DR3", "DR4", "DR5", "DR6", "DR7"} //dReg }; //当存在REX且REX.R为1时,REG域数组,[具体的选项][REG] const char *ModRM_REG11[0xA][0x8] = { {"R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B"},//reg8 {"R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W"},//reg16 {"R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D"},//reg32 {"R8", "R9", "R10", "R11", "R12", "R13", "R14", "R15"},//reg64 {"MMX0","MMX1","MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7"},//mmx {"XMM8","XMM9","XMM10","XMM11","XMM12","XMM13","XMM14","XMM15"},//xmm {"YMM8","YMM9","YMM10","YMM11","YMM12","YMM13","YMM14","YMM15"},//ymm {"ES", "CS", "SS", "DS", "FS", "GS", "invalid","invalid"}, //sReg {"CR8", "CR9", "CR10", "CR11", "CR12", "CR13", "CR14", "CR15" },//cReg {"DR8", "DR9", "DR10", "DR11", "DR12", "DR13", "DR14", "DR15" } //dReg }; //解码Mod.REG域函数 static void DecodeREG(BYTE* PModRM, CString &Result, bool REX_Flag, bool REX_W_Flag, bool REX_R_Flag, int Mode, bool OPSizeFlag, int DecodeType) { if (MODE32 == Mode) { if (GPRS != DecodeType) Result = Result + " " + ModRM_REG0[DecodeType][REG(*PModRM)] + " "; else { if (OPSizeFlag) Result = Result + " " + ModRM_REG0[REG16][REG(*PModRM)] + " "; else Result = Result + " " + ModRM_REG0[REG32][REG(*PModRM)] + " "; } } else { if (REX_Flag) { if (REX_R_Flag) { if (GPRS != DecodeType) Result = Result + " " + ModRM_REG11[DecodeType][REG(*PModRM)] + " "; else { if (REX_W_Flag) Result = Result + " " + ModRM_REG11[REG64][REG(*PModRM)] + " "; else { if (OPSizeFlag) Result = Result + " " + ModRM_REG11[REG16][REG(*PModRM)] + " "; else Result = Result + " " + ModRM_REG11[REG32][REG(*PModRM)] + " "; } } } else { if (GPRS != DecodeType) Result = Result + " " + ModRM_REG10[DecodeType][REG(*PModRM)] + " "; else { if (REX_W_Flag) Result = Result + " " + ModRM_REG10[REG64][REG(*PModRM)] + " "; else { if (OPSizeFlag) Result = Result + " " + ModRM_REG10[REG16][REG(*PModRM)] + " "; else Result = Result + " " + ModRM_REG10[REG32][REG(*PModRM)] + " "; } } } } else { if (GPRS != DecodeType) Result = Result + " " + ModRM_REG0[DecodeType][REG(*PModRM)] + " "; else { if (REX_W_Flag) Result = Result + " " + ModRM_REG0[REG64][REG(*PModRM)] + " "; else { if (OPSizeFlag) Result = Result + " " + ModRM_REG0[REG16][REG(*PModRM)] + " "; else Result = Result + " " + ModRM_REG0[REG32][REG(*PModRM)] + " "; } } } } }
使用三个二维数组表示[具体的选项][REG]
具体的选项包括reg8、reg16、reg32、reg64、mmx、xmm、ymm,其中通用寄存器需要确定大小(reg8~reg64)
使用一个函数解码,需要用到一个指向二维数组的指针(因为数组大小都是一样的)。
三、解码Mod.R/M域
//ModRM R/M Field Encoding, 32-Bit and 64-Bit Addressing //32位且Mod≠11时的RM域数组,[Mod][R/M] const char *ModRM_RM32[0x3][0x8] = { {"EAX","ECX","EDX","EBX","","", "ESI","EDI"}, {"EAX","ECX","EDX","EBX","","EBP","ESI","EDI"}, {"EAX","ECX","EDX","EBX","","EBP","ESI","EDI"} }; //64位REX.B = 0且Mod≠11时的RM数组,[Mod][R/M] const char *ModRM_RM64_0[0x3][0x8] = { {"RAX","RCX","RDX","RBX","","", "RSI","RDI"}, {"RAX","RCX","RDX","RBX","","RBP","RSI","RDI"}, {"RAX","RCX","RDX","RBX","","RBP","RSI","RDI"} }; //64位REX.B = 1且Mod≠11时的RM数组,[Mod][R/M] const char *ModRM_RM64_1[0x3][0x8] = { {"R8","R9","R10","R11","","", "R14","R15"}, {"R8","R9","R10","R11","","R13","R14","R15"}, {"R8","R9","R10","R11","","R13","R14","R15"} }; //32位且Mod =11时的RM域数组,[具体的选项][R/M] const char *ModRM_RM32_11[0x5][0x8] = { {"AL", "CL", "DL", "BL", "AH", "CH", "DH", "BH" },//reg8 {"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16 {"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI" },//reg32 {"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx {"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"} //xmm }; //64位REX.B = 0且Mod = 11时的RM数组,[具体的选项][R/M] const char *ModRM_RM64_110[0x7][0x8] = { {"AL", "CL", "DL", "BL", "SPL", "BPL", "SIL", "DIL" },//reg8 {"AX", "CX", "DX", "BX", "SP", "BP", "SI", "DI" },//reg16 {"EAX", "ECX", "EDX", "EBX", "ESP", "EBP", "ESI", "EDI" },//reg32 {"RAX", "RCX", "RDX", "RBX", "RSP", "RBP", "RSI", "RDI" },//reg64 {"MMX0","MMX1","MMX2","MMX3","MMX4","MMX5","MMX6","MMX7"},//mmx {"XMM0","XMM1","XMM2","XMM3","XMM4","XMM5","XMM6","XMM7"},//xmm {"YMM0","YMM1","YMM2","YMM3","YMM4","YMM5","YMM6","YMM7"},//ymm }; //64位REX.B = 1且Mod = 11时的RM数组,[具体的选项][R/M] const char *ModRM_RM64_111[0x7][0x8] = { {"R8B", "R9B", "R10B", "R11B", "R12B", "R13B", "R14B", "R15B" },//reg8 {"R8W", "R9W", "R10W", "R11W", "R12W", "R13W", "R14W", "R15W" },//reg16 {"R8D", "R9D", "R10D", "R11D", "R12D", "R13D", "R14D", "R15D" },//reg32 {"R8" , "R9" , "R10" , "R11" , "R12", "R13" , "R14" , "R15" },//reg64 {"MMX0","MMX1","MMX2", "MMX3", "MMX4", "MMX5", "MMX6", "MMX7" },//mmx {"XMM8","XMM9","XMM10", "XMM11", "XMM12", "XMM13", "XMM14", "XMM15"},//xmm {"YMM8","YMM9","YMM10", "YMM11", "YMM12", "YMM13", "YMM14", "YMM15"},//ymm }; //解码代码太长,不贴了。请看附件源代码DecodeModRM函数
Mod为00b~10b使用三个二维数[Mod][R/M](32位,64位的当REX.B = 0与REX.B = 1,三种情况,其中01,10需要解析出disp字节),Mod为11b单独使用[具体的选项][R/M]为三个二维数组32位,64位的当REX.B = 0与REX.B = 1,三种情况),根据具体的情况传入[具体选项]与[R/M],返回字符串
注意有SIB字节的情况,这种情况单独处理
处理函数的步骤:
先通过位数(模式)、REX.B和Mod来确定二维数组
1、Mod ≠ 11b,直接通过[Mod][R/M]就可以返回,通过Mod与R/M确定有没有SIB需要特殊处理,有Disp就解析出来。
2、Mod = 11b,通用寄存器需要确定大小(reg8~reg64)作为[具体的选项],mmx xmm ymm不需要。不需要解析SIB和Disp
//32位SIB解析数组[scale比例][index索引] const char *ScaledIndex32[0x4][0x8] { {"EAX", "ECX", "EDX", "EBX", "","EBP", "ESI", "EDI"}, {"EAX*2","ECX*2","EDX*2","EBX*2","","EBP*2","ESI*2","EDI*2"}, {"EAX*4","ECX*4","EDX*4","EBX*4","","EBP*4","ESI*4","EDI*4"}, {"EAX*8","ECX*8","EDX*8","EBX*8","","EBP*8","ESI*8","EDI*8"} }; //64位当REX.X = 0时SIB解析数组[scale比例][index索引] const char *ScaledIndex64_0[0x4][0x8] { {"RAX", "RCX", "RDX", "RBX", "","RBP", "RSI", "RDI"}, {"RAX*2","RCX*2","RDX*2","RBX*2","","RBP*2","RSI*2","RDI*2"}, {"RAX*4","RCX*4","RDX*4","RBX*4","","RBP*4","RSI*4","RDI*4"}, {"RAX*8","RCX*8","RDX*8","RBX*8","","RBP*8","RSI*8","RDI*8"} }; //64位当REX.X = 1时SIB解析数组[scale比例][index索引] const char *ScaledIndex64_1[0x4][0x8] { {"R8", "R9", "R10", "R11", "R12", "R13", "RR14", "R15"}, {"R8*2","R9*2","R10*2","R11*2","R12*2","R13*2","RR14*2","R15*2"}, {"R8*4","R9*4","R10*4","R11*4","R12*4","R13*4","RR14*4","R15*4"}, {"R8*8","R9*8","R10*8","R11*8","R12*8","R13*8","RR14*8","R15*8"} };
SIB 字节由 ModRM.r/m = 100且Mod ≠ 11 引导出来
解码SIB字节
三个[scale][index]二维数组(32位,64位的当REX.X = 0与REX.X = 1,三种情况)
SIB解析数组[scale][index]确定索引和比例 64位的base域是有两种情况的即REX.B = 0 或 REX.B = 1.当base域为101b时与Mod域有关联
函数参数(指向ModR/M的指针,用来返回结果的CString对象的引用,REX_W_Flag,REX_X_Flag,REX_B_Flag,ModRM_Flag,SIB_Flag,idispcount(用来确定disp),Mode,OPSizeFlag,ADSizeFlag(用来确定操作数大小),
[具体选项](这个只是很少的情况需要))
四、解码immediate字节
根据前面判断的位置和大小取出(先转换指针类型,然后解指针)。
case E:
if (0x3 != MOD(*(POpcode + 1)) && "" != StrPrefix1)
Result = Result + " " + StrPrefix1 + ":";
switch (decode_struct.OperandType[icount])
{
case v:
if (0x3 != MOD(*(POpcode + 1)))
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, 0);
else
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, GPRS);
break;
case b:
if (0x3 != MOD(*(POpcode + 1)))
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, _BYTEP);
else
DecodeModRM((POpcode + 1), Result, REX_W_Flag, REX_X_Flag, REX_B_Flag, ModRM_Flag, SIB_Flag, idispcount, Mode, OPSizeFlag, ADSizeFlag, REG8);
break;
default:
break;
}
break;
我只是做了Primary Opcode Map一个表,所以会出现问题。不过这个主要是用来理解指令的,所以就这样子了。不想去打磨了,生命无价,不要浪费时间在造轮子上。理解原理就够了。(intel的文档我就不上传了,自己去官网下),还有就是源代码我是使用VS2017构建的。那个压缩文件就是源代码,还有测试代码