花指令是一种反静态调试的最基础手段(对于动态调试来说就没有用处),我们可以通过在程序的代码中添加一些不影响程序运行的垃圾机器码,进而影响反汇编结果的准确性,达到程序保护的目的。
1、可执行花指令
见字知其意,即花指令在程序正常运行的时候被执行,通用寄存器的值不发生改变,但不影响程序原有的功能执行。
2、不可执行花指令
见字知其意,即花指令在程序正常运行的时候不会被执行,不影响程序原有的功能。
原则:保持堆栈的平衡
汇编小知识:
mov eax, 1 eax赋值为1
pop 1 将1从栈顶弹出
pop ebp 将栈顶的值弹出赋给寄存器ebp
push 1 将1压入栈中
push ebp 将ebp的值压入栈中
sub eax, 1 eax的值减1
add eax, 1 eax的值加1
inc eax eax的值加1
dec eax eax的值减1
call [x] 调用地址为x的函数,call对应的硬编码为0xE8
jmp x 跳转到x地址处,jmp对应的硬编码为0xE9
nop 不做任何事情,相当于python中的pass,对应的硬编码为0x90
_emit 相当于db,byte类型,1字节
以下方式均通过内联汇编实现
1、单节方式
#include "stdafx.h"
void Test(){
int a[3] = {1, 2, 3};
_asm{
jz Label;
jnz Label;
_emit 0xE8;
}
Label:
a[0] = 2;
a[1] = 5;
a[2] = 6;
printf("%d\n", a[2]);
}
int main(int argc, char* argv[]){
Test();
return 0;
}
使用IDA打开,可以看到标红的地方就是花指令,因为call指令的存在,使得后面的4字节数据被错误识别成函数地址,进而导致接下来的分析出错。
人工Patch花指令的方式很简单:
选中call指令所在行,点击Edit选项>Patch program>Change byte。
将call的硬编码E8改为0x90(nop指令)
2、多节方式
#include "stdafx.h"
void Test(){
int a[3] = {1, 2, 3};
_asm{
jz Label1;
jnz Label1;
_emit 0xE9;
}
Label1:
a[0] = 5;
a[1] = 6;
a[2] = 7;
_asm{
jz Label2;
jnz Label2;
_emit 0xE8;
}
Label2:
a[1] = a[0] + a[2];
a[2] = a[1] + a[0];
printf("%d\n", a[2]);
}
int main(int argc, char* argv[])
{
Test();
return 0;
}
3、多层乱序(疯狂套娃)
#include "stdafx.h"
void Test(){
int arr[3] = {1, 2, 3};
_asm{
jz Label3;
jnz Label3;
_emit 0xE8;
}
Label2:
_asm{
jz Label4;
jnz Label4;
_emit 0xE8;
}
Label3:
_asm{
jz Label1;
jnz Label1;
_emit 0xE9;
}
Label1:
_asm{
jz Label2;
jnz Label2;
_emit 0xE9;
}
Label4:
int a = 10;
printf("%d\n",a);
}
int main(int argc, char* argv[])
{
Test();
return 0;
}
4.开辟堆栈的花指令
push 1
push ebp
mov ebp, esp
sub esp, 0x8
push eax
push ecx
pop ecx
pop eax
add esp, 0x8
pop ebp
je xxx
jne xxx
花指令多的情况就需要自己写个IDA python脚本进行去除。
5.花指令免杀
一些反病毒软件依靠特征码来判断文件是否有毒,其识别引擎在文件镜像(filebuffer)一定的偏移范围内进行扫描,比如在0x00001000~0x00006000之间,我们在其中加入一些花指令,使恶意代码偏离引擎识别的偏移范围,再使用工具修改程序入口(OEP),就可以逃避这种方式的特征码识别。
[2020CISCN初赛]hyperthreading
多线程反调试花指令
只有第一个创建的线程有操作
第二个单纯和标志位相加,再调试时标志位不为0
所以直接忽视
第三个创建的就是个死循环检测调试也不用管
第一个有指令重叠,调试即可
花指令不用管只需要调试
先
input[i]>>2input[i]>>2^input[i]<<6input[i]<<6
然后xor 0x23
然后sleep让线程切换直接忽视
然后+ 0x23
最后比较
flag = ''
hape = [221, 91, 158, 29, 32, 158, 144, 145, 144, 144, 145, 146, 222, 139, 17, 209, 30, 158, 139, 81, 17, 80, 81, 139, 158, 93, 93, 17, 139, 144, 18, 145, 80, 18, 210, 145, 146, 30, 158, 144, 210, 159]
for i in range(len(hape)):
hape[i] -= 0x23
hape[i] &= 0xff
hape[i] ^= 0x23
hape[i] = ((hape[i] << 2) ^ (hape[i] >> 6)) & 0xff
flag += chr(hape[i])
print flag
#flag{a959951b-76ca-4784-add7-93583251ca92}
int __cdecl main(int argc, const char **argv, const char **envp)
{
int i; // eax
HANDLE Handles[2]; // [esp+8h] [ebp-Ch] BYREF
sub_401020("plz input your flag:");
sub_401050("%42s", byte_40336C);
Handles[0] = CreateThread(0, 0, StartAddress, 0, 0, 0);
Handles[1] = CreateThread(0, 0, loc_401200, 0, 0, 0);
CreateThread(0, 0, sub_401240, 0, 0, 0);
WaitForMultipleObjects(2u, Handles, 1, 0xFFFFFFFF);
for ( i = 0; i < 42; ++i )
{
if ( byte_40336C[i] != byte_402150[i] )
{
sub_401020("error");
exit(0);
}
}
sub_401020("win");
getchar();
return 0;
}
flag = ''
hape = [221, 91, 158, 29, 32, 158, 144, 145, 144, 144, 145, 146, 222, 139, 17, 209, 30, 158, 139, 81, 17, 80, 81, 139, 158, 93, 93, 17, 139, 144, 18, 145, 80, 18, 210, 145, 146, 30, 158, 144, 210, 159]
for i in range(len(hape)):
hape[i] -= 0x23
hape[i] &= 0xff
hape[i] ^= 0x23
hape[i] = ((hape[i] << 2) ^ (hape[i] >> 6)) & 0xff
flag += chr(hape[i])
print(flag)
具体分析:
逻辑分析
IDA打开,shift+F12查看strings窗口。
int __cdecl main(int argc, const char **argv, const char **envp)
{
int i; // eax
char v5; // [esp+0h] [ebp-14h]
HANDLE Handles[2]; // [esp+8h] [ebp-Ch] BYREF
sub_401020("plz input your flag:", v5);
sub_401050("%42s", (char)byte_40336C);
Handles[0] = CreateThread(0, 0, StartAddress, 0, 0, 0);
Handles[1] = CreateThread(0, 0, loc_401200, 0, 0, 0);
CreateThread(0, 0, sub_401240, 0, 0, 0);
WaitForMultipleObjects(2u, Handles, 1, 0xFFFFFFFF);
for ( i = 0; i < 42; ++i )
{
if ( byte_40336C[i] != byte_402150[i] )
{
sub_401020("error", (char)Handles[0]);
exit(0);
}
}
sub_401020("win", (char)Handles[0]);
getchar();
return 0;
}
可以看出输入的flag存储在byte_40336C数组,最后将其和数组byte_402150比较。可以猜测是将输入的flag加密了。加密结果就是byte_402150数组。数组byte_402150已经给出。
盲猜线性加密
string = 'abcdefghijklmnopqrstuvwxyz{}123456789-'
table = [0x9E,0xDE,0x1E,0x5D,0x9D,0xDD,0x1D,0x5C,0x9C,0xDC,0x1C,0x5B,0x9B,0xDB,0x1B,0x62,
0xA2,0xE2,0x22,0x61,0xA1,0xE1,0x21,0x60,0xA0,0xE0,0x20,0x9F,0x92,0xD2,0x12,0x51,
0x91,0xD1,0x11,0x50,0x90,0x8B]
result = [0xDD,0x5B,0x9E,0x1D,0x20,0x9E,0x90,0x91,0x90,0x90,
0x91,0x92,0x0DE,0x8B,0x11,0x0D1,0x1E,0x9E,0x8B,0x51,
0x11,0x50,0x51,0x8B,0x9E,0x5d,0x5d,0x11,0x8B,0x90,
0x12,0x91,0x50,0x12,0x0D2,0x91,0x92,0x1E,0x9E,0x90,0x0D2,
0x9F]
for i in range(42):
for pos in range(len(table)):
if result[i] == table[pos]:
print(string[pos],end = '')
break
常规去花指令分析
void __stdcall __noreturn sub_401240(LPVOID lpThreadParameter)
{
while ( !IsDebuggerPresent() )
;
sub_401020("debug!\n");
exit(0);
}
loc_401200: ; DATA XREF: sub_401270+4B↓o
.text:00401200 push ebp
.text:00401201 mov ebp, esp
.text:00401203 push ecx
.text:00401204 push ebx
.text:00401205 push esi
.text:00401206 push edi
.text:00401207 mov eax, large fs:30h
.text:0040120D
.text:0040120D loc_40120D: ; CODE XREF: .text:loc_40120D↑j
.text:0040120D jmp short near ptr loc_40120D+1
.text:0040120F ; ---------------------------------------------------------------------------
.text:0040120F ror byte ptr [eax+0Fh], 0B6h
.text:00401213 inc eax
.text:00401214 add cl, [ecx-49F003BBh]
.text:0040121A dec ebp
.text:0040121B cld
.text:0040121C xor eax, eax
.text:0040121E imul ebx, ecx, 64h
.text:00401221
.text:00401221 loc_401221: ; CODE XREF: .text:0040122D↓j
.text:00401221 mov dl, bl
.text:00401223 add byte_40336C[eax], dl
.text:00401229 inc eax
.text:0040122A cmp eax, 2Ah
.text:0040122D jl short loc_401221
.text:0040122F pop edi
.text:00401230 pop esi
.text:00401231 xor eax, eax
.text:00401233 pop ebx
.text:00401234 mov esp, ebp
.text:00401236 pop ebp
.text:00401237 retn 4
DWORD __stdcall sub_401200(LPVOID lpThreadParameter)
{
signed int v1; // eax
char v2; // bl
v1 = 0;
v2 = 100 * *(_BYTE *)(__readfsdword(0x30u) + 2);
do
byte_40336C[v1++] += v2;
while ( v1 < 42 );
return 0;
}
.text:00401120 ; DWORD __stdcall StartAddress(LPVOID lpThreadParameter)
.text:00401120 StartAddress: ; DATA XREF: sub_401270+3A↓o
.text:00401120 push ebp
.text:00401121 mov ebp, esp
.text:00401123 sub esp, 0Ch
.text:00401126 push ebx
.text:00401127 push esi
.text:00401128 push edi
.text:00401129 push 0
.text:0040112B push 0
.text:0040112D push 0
.text:0040112F push offset sub_4010E0
.text:00401134 push 0
.text:00401136 push 0
.text:00401138 call ds:CreateThread
.text:0040113E mov [ebp-8], eax
.text:00401141 push 0FFFFFFFFh
.text:00401143 push offset sub_4010E0
.text:00401148 call ds:WaitForSingleObject
.text:0040114E mov [ebp-0Ch], eax
.text:00401151
.text:00401151 loc_401151: ; CODE XREF: .text:loc_401151↑j
.text:00401151 jmp short near ptr loc_401151+1
.text:00401151 ; ---------------------------------------------------------------------------
.text:00401153 db 0C0h
.text:00401154 dd 0FC45C748h, 0
.text:0040115C dd 458B09EBh, 1C083FCh, 83FC4589h, 0F2AFC7Dh, 808Dh, 0FC4D8B00h
.text:00401174 db 0Fh, 0B6h, 91h
.text:00401177 dd offset byte_40336C
.text:0040117B db 0C1h
.text:0040117C dd 458B02FAh, 88B60FFCh
.text:00401184 dd offset byte_40336C
.text:00401188 dd 3306E1C1h, 0FC458BD1h
.text:00401190 db 88h, 90h
.text:00401192 dd offset byte_40336C
.text:00401196 dw 4D8Bh
.text:00401198 dd 91B60FFCh
.text:0040119C dd offset byte_40336C
.text:004011A0 dd 8B23F283h, 9088FC45h
.text:004011A8 dd offset byte_40336C
.text:004011AC dd 15FF066Ah
.text:004011B0 dd offset Sleep
.text:004011B4 dd 301D8B64h, 0F000000h, 33025BB6h, 0E80575C0h, 1, 0C38358E8h
.text:004011B4 dd 50C30309h, 0FFEB90C3h, 4D8B48C0h, 91B60FFCh
.text:004011DC dd offset byte_40336C
.text:004011E0 dd 8B23C283h, 9088FC45h
.text:004011E8 dd offset byte_40336C
.text:004011EC dd 0FFFF6DE9h, 5FC033FFh, 0E58B5B5Eh, 4C25Dh, 0CCCCCCCCh
.text:00401200
; DWORD __stdcall StartAddress(LPVOID lpThreadParameter)
.text:00401120 StartAddress: ; DATA XREF: sub_401270+3A↓o
.text:00401120 push ebp
.text:00401121 mov ebp, esp
.text:00401123 sub esp, 0Ch
.text:00401126 push ebx
.text:00401127 push esi
.text:00401128 push edi
.text:00401129 push 0
.text:0040112B push 0
.text:0040112D push 0
.text:0040112F push offset sub_4010E0
.text:00401134 push 0
.text:00401136 push 0
.text:00401138 call ds:CreateThread
.text:0040113E mov [ebp-8], eax
.text:00401141 push 0FFFFFFFFh
.text:00401143 push offset sub_4010E0
.text:00401148 call ds:WaitForSingleObject
.text:0040114E mov [ebp-0Ch], eax
.text:00401151 nop
.text:00401152 inc eax
.text:00401154 dec eax
.text:00401155 mov dword ptr [ebp-4], 0
.text:0040115C jmp short loc_401167
.text:0040115E ; ---------------------------------------------------------------------------
.text:0040115E
.text:0040115E loc_40115E: ; CODE XREF: .text:004011EC↓j
.text:0040115E mov eax, [ebp-4]
.text:00401161 add eax, 1
.text:00401164 mov [ebp-4], eax
.text:00401167
.text:00401167 loc_401167: ; CODE XREF: .text:0040115C↑j
.text:00401167 cmp dword ptr [ebp-4], 2Ah
.text:0040116B jge loc_4011F1
.text:00401171 mov ecx, [ebp-4]
.text:00401174 movzx edx, byte_40336C[ecx]
.text:0040117B sar edx, 2
.text:0040117E mov eax, [ebp-4]
.text:00401181 movzx ecx, byte_40336C[eax]
.text:00401188 shl ecx, 6
.text:0040118B xor edx, ecx
.text:0040118D mov eax, [ebp-4]
.text:00401190 mov byte_40336C[eax], dl
.text:00401196 mov ecx, [ebp-4]
.text:00401199 movzx edx, byte_40336C[ecx]
.text:004011A0 xor edx, 23h
.text:004011A3 mov eax, [ebp-4]
.text:004011A6 mov byte_40336C[eax], dl
.text:004011AC push 6
.text:004011AE call ds:Sleep
.text:004011B4 mov ebx, large fs:30h
.text:004011BB movzx ebx, byte ptr [ebx+2]
.text:004011BF xor eax, eax
.text:004011C1 jnz short loc_4011C8
.text:004011C3 call near ptr loc_4011C8+1
.text:004011C8
.text:004011C8 loc_4011C8: ; CODE XREF: .text:004011C1↑j
.text:004011C8 ; .text:004011C3↑p
.text:004011C8 call near ptr 0A039525h
.text:004011CD add eax, ebx
.text:004011CF push eax
.text:004011D0 retn
.text:004011D1 ; ---------------------------------------------------------------------------
.text:004011D1 nop
.text:004011D2
.text:004011D2 loc_4011D2: ; CODE XREF: .text:loc_4011D2↑j
.text:004011D2 jmp short near ptr loc_4011D2+1
.text:004011D4 ; ---------------------------------------------------------------------------
.text:004011D4 ror byte ptr [eax-75h], 4Dh
.text:004011D8 cld
.text:004011D9 movzx edx, byte_40336C[ecx]
.text:004011E0 add edx, 23h
.text:004011E3 mov eax, [ebp-4]
.text:004011E6 mov byte_40336C[eax], dl
.text:004011EC jmp loc_40115E
.text:004011F1 ; ---------------------------------------------------------------------------
.text:004011F1
.text:004011F1 loc_4011F1: ; CODE XREF: .text:0040116B↑j
.text:004011F1 xor eax, eax
.text:004011F3 pop edi
.text:004011F4 pop esi
.text:004011F5 pop ebx
.text:004011F6 mov esp, ebp
.text:004011F8 pop ebp
.text:004011F9 retn 4
.text:004011F9 ; ---------------------------------------------------------------------------
.text:004011FC db 0CCh
.text:004011FD db 0CCh
.text:004011FE db 0CCh
.text:004011FF db 0CCh
观察11C1处的跳转可知,必不跳转,则继续下一行的call near ptr loc_4011C9。观察11C8处call了一个不存在的地址,则此处花指令。将其第一个字节nop。再观察发现上面的call和其后的retn构成一个函数。是在函数里写的函数,会影响Create Function,导致IDA无法正确识别函数(会报sp不平衡,且后面的代码不能被识别)。则将该call与retn两个指令nop掉(其他不用管)。再将其后的一个jmp花指令同上修改。再Create Function。F5生成伪代码。
DWORD __stdcall StartAddress(LPVOID lpThreadParameter)
{
int v2; // [esp+0h] [ebp-18h]
signed int i; // [esp+14h] [ebp-4h]
CreateThread(0, 0, sub_4010E0, 0, 0, 0);
WaitForSingleObject(sub_4010E0, 0xFFFFFFFF);
for ( i = 0; i < 42; ++i )
{
byte_40336C[i] = (byte_40336C[i] << 6) ^ ((signed int)(unsigned __int8)byte_40336C[i] >> 2);
byte_40336C[i] ^= 0x23u;
Sleep(6u);
v2 += *(unsigned __int8 *)(__readfsdword(0x30u) + 2) + 9;
byte_40336C[i] += 35;
}
return 0;
}
DWORD __stdcall sub_4010E0(LPVOID lpThreadParameter)
{
int v1; // ebx
int v3; // [esp+0h] [ebp-Ch]
int v4; // [esp+0h] [ebp-Ch]
v1 = *(unsigned __int8 *)(__readfsdword(0x30u) + 2) + 9;
v4 = v1 + v3;
if ( v1 == 10 )
((void (__cdecl *)(int))loc_401080)(v4);
return 0;
}
void __noreturn sub_401080()
{
signed int v0; // esi
int v1; // ST04_4
int v2; // [esp+Ch] [ebp-8h]
int v3; // [esp+10h] [ebp-4h]
Sleep(0x14u);
v3 = *(unsigned __int8 *)(__readfsdword(0x30u) + 2);
v2 = 0xF3ECF7F0;
v0 = 0;
LOBYTE(v3) = -94;
do
{
v1 = *((unsigned __int8 *)&v2 + v0) ^ 0x83;
sub_401020(&unk_402118);
++v0;
}
while ( v0 < 5 );
exit(0);
}
result = [0xDD,0x5B,0x9E,0x1D,0x20,0x9E,0x90,0x91,0x90,0x90,
0x91,0x92,0x0DE,0x8B,0x11,0x0D1,0x1E,0x9E,0x8B,0x51,
0x11,0x50,0x51,0x8B,0x9E,0x5d,0x5d,0x11,0x8B,0x90,
0x12,0x91,0x50,0x12,0x0D2,0x91,0x92,0x1E,0x9E,0x90,0x0D2,
0x9F]
flag = ''
for j in range(len(result)):
for i in range(128):
if result[j]==((((i<<6)^(i>>2))^0x23)+0x23)&0xff:
flag += chr(i)
print(flag)
flag{a959951b-76ca-4784-add7-93583251ca92}
搜索字符串找到关键函数:
int sub_401270()
{
signed int v0; // eax
HANDLE Handles; // [esp+8h] [ebp-Ch]
HANDLE v3; // [esp+Ch] [ebp-8h]
printf("plz input your flag:");
scanf("%42s", byte_40336C);
Handles = CreateThread(0, 0, StartAddress, 0, 0, 0);
v3 = CreateThread(0, 0, loc_401200, 0, 0, 0);
CreateThread(0, 0, sub_401240, 0, 0, 0);
WaitForMultipleObjects(2u, &Handles, 1, 0xFFFFFFFF);
v0 = 0;
do
{
if ( byte_40336C[v0] != byte_402150[v0] )
{
printf("error");
exit(0);
}
++v0;
}
while ( v0 < 42 );
printf("win");
getchar();
return 0;
看到关键句:byte_40336C[v0] != byte_402150[v0]
找到加密后的字符串
这里创建了一堆线程:
跟进sub_401240
发现,这里用到了IsDebuggerPresent
反调试函数
void __stdcall __noreturn sub_401240(LPVOID lpThreadParameter)
{
while ( !IsDebuggerPresent() )
;
printf("debug!\n");
exit(0);
007F1296
,此时要求输入flag,随便输点东西WaitForMultipleObjects
,往下程序就会卡在这个位置,因为前面判断了程序处于调试模式,所以线程会一直占用,出不来。WaitForMultipleObject
这个函数,上面那个判断反调式的线程没有占用资源了dd 7f336c
dd 7f2150
通过比较后,发现7f2150开始的位置就是byte_402150
而7f336c就是我们输入的flag经过处理后的结果。
经过多次尝试发现,对于每个字符,每一次的加密结果是固定的,那么就可以通过遍历有可能是flag的字符,查看每个字符加密后的结果,与真正flag加密后的结果进行逐一比较,最终可以得到flag
encrypt = [0xDD,0x5B,0x9E,0x1D,0x20,0x9E,0x90,0x91,0x90,0x90,0x91,0x92,0xDE,0x8B,0x11,0xD1,0x1E,0x9E,0x8B,0x51,0x11,0x50,0x51,0x8B,0x9E,0x5D,0x5D,0x11,0x8B,0x90,0x12,0x91,0x50,0x12,0xD2,0x91,0x92,0x1E,0x9E,0x90,0xD2,0x9F]
key_dic = {'a':0x9E,'b':0xDE,'c':0x1E,'d':0x5D,'e':0x9D,'f':0xDD,'g':0x1D,'h':0x5C,'i':0x9C,'j':0xDC,'k':0x1C,'l':0x5B,'m':0x9B,'n':0xDB,'o':0x1B,'p':0x62,'q':0xA2,'r':0xE2,'s':0x22,'t':0x61,'u':0xA1,'v':0xE1,'w':0x21,'x':0x60,'y':0x0A,'z':0xE0,'_':0x17,'{':0x20,'}':0x9F,'0':0x52,'1':0x92,'2':0xD2,'3':0x12,'4':0x51,'5':0x91,'6':0xD1,'7':0x11,'8':0x50,'9':0x90,'-':0x8B}
flag = ''
for i in encrypt:
for j in key_dic:
if key_dic[j] == i:
flag += j
print(flag)
# flag{a959951b-76ca-4784-add7-93583251ca92}
这里可以发现,当EIP处于WaitForMultipleObjects
之前时,0x0087336c
的值还是我们输入的字符串,并没有进行处理。(这里是我之后补充的,所以基地址不同)