公众号:https://mp.weixin.qq.com/s/dMkrp5Wi8VDIa7b57I8ikw

或许我们的公众号会有更多你感兴趣的内容

img

花指令(junk code)是一种专门用来迷惑反编译器的指令片段,这些指令片段不会影响程序的原有功能,但会使得反汇编器的结果出现偏差,从而使破解者分析失败。比较经典的花指令技巧有利用 jmpcallret 指令改变执行流,从而使得反汇编器解析出与运行时不相符的错误代码。

这里使用这个网站进行汇编到机器码的快速查询

https://defuse.ca/online-x86-assembler.htm#disassembly

如何添加花指令

要知道如何去除,首先就要想到如何添加

1. x32 手动添加

从简单的开始,从x86 (32位)开始,因为在windows上还支持在32位中进行汇编内联

这里先用个简单的吧

jmp $+2
#0: eb 00 jmp 2 <_main+0x2>

这条汇编长度是2,跳转到当前地址+2的地方,也就是说没有执行任何操作

但是MSVC的内联汇编不能这样写,得

_asm {
jmp next1;
next1:
}

完整代码如下

#include <iostream>

bool verify(char* passwd) {
size_t sum = 0;
_asm {
jmp next1;
next1:
}
for (size_t i = 0; passwd[i]; i++)
{
_asm {
jmp next2;
next2:
}
sum *= (sum+passwd[i]) % 13 + 1;
_asm {
jmp next3;
next3:
}
}
if (sum == 0x1234567) {
return true;
}
else {
return false;
}
}

int main()
{
char buffer[0x10] = {};
_asm {
jmp next;
next:
}
std::cout << "input password: ";
scanf_s("%s", buffer, 0x10);


if(verify(buffer))
std::cout << "OK!\n";
else
std::cout << "NO!\n";

}

image-20260413214527037

image-20260413214544539

稍微进阶一点点的就是使用宏定义

#include <iostream>
#include <stdio.h>

#define JUNK1 __asm { mov edx, edx }
#define JUNK2 __asm { push eax } __asm { pop eax }
#define JUNK3 __asm { xor eax, eax } __asm { add eax, 1 } __asm { sub eax, 1 }
#define JUNK4 __asm { mov eax, eax }

#define CONCAT(a, b) a##b
#define CONCAT_EXPAND(a, b) CONCAT(a, b)

#define JUNK5_IMPL(x) __asm { jmp CONCAT_EXPAND(skip, x) } CONCAT_EXPAND(skip, x) :
#define JUNK5 JUNK5_IMPL(__COUNTER__)

#define JUNK_0 JUNK1
#define JUNK_1 JUNK2
#define JUNK_2 JUNK3
#define JUNK_3 JUNK4
#define JUNK_4 JUNK5
#define JUNK_5 JUNK1
#define JUNK_6 JUNK2
#define JUNK_7 JUNK3
#define JUNK_8 JUNK4
#define JUNK_9 JUNK5

#define JUNK_EXPAND(x) JUNK_EXPAND2(x)
#define JUNK_EXPAND2(x) JUNK_##x
#define JUNKFUNC() JUNK_EXPAND(__COUNTER__)

bool verify(char* passwd) {
size_t sum = 1;

JUNKFUNC();

for (size_t i = 0; passwd[i]; i++)
{
JUNKFUNC();
sum *= (sum + passwd[i]) % 13 + 1;
JUNKFUNC();
}

JUNKFUNC();

return sum == 0x1234567;
}

int main()
{
char buffer[0x10] = {};

JUNKFUNC();

std::cout << "input password: ";
scanf_s("%15s", buffer, (unsigned)_countof(buffer));

JUNKFUNC();

if (verify(buffer))
std::cout << "OK!\n";
else
std::cout << "NO!\n";

JUNKFUNC();
}

2. x32 简易shellcode混淆

这个方法和x64是一致的,我个人使用hde(Hacker Disassembler Engine 32)这个反汇编器做shellcode的混淆,你可以在minhook下或者github保存起来的仓库找到这个项目

首先是找到所有相对跳转的汇编指令,然后标记他们的起点和目的绝对跳转地址

#include "include/jumper.hpp"

bool is_jcc(uint8_t opcode)
{
return (opcode >= 0x70 && opcode <= 0x7F); // short jcc
}

bool is_jcc_0f(uint8_t opcode2)
{
return (opcode2 >= 0x80 && opcode2 <= 0x8F); // 0F 8x
}


std::vector<JumpInfo> find_jumps(unsigned char* shellcode, size_t size)
{
std::vector<JumpInfo> result;

size_t offset = 0;

while (offset < size)
{
hde32s hs;
uint32_t len = hde32_disasm(shellcode + offset, &hs);

if (hs.flags & F_ERROR || len == 0)
break;

uint8_t op = hs.opcode;
JumpType type = JT_NONE;
int32_t rel = 0;
bool isRelative = false;

if (op == 0xE8) //CALL
{
type = JT_CALL;
isRelative = true;
}

else if (op == 0xE9 || op == 0xEB) // JMP
{
type = JT_JMP;
isRelative = true;
}


else if (is_jcc(op)) //JCC(short)
{
type = JT_JCC;
isRelative = true;
}

else if (op == 0x0F && is_jcc_0f(hs.opcode2)) // JCC(0x8)
{
type = JT_JCC;
isRelative = true;
}

else if (op == 0xFF) //FF /2 /4 (间接call/jmp)
{
uint8_t modrm = hs.modrm;
uint8_t reg = (modrm >> 3) & 7;
if (reg == 4)
type = JT_JMP; // jmp r/m32

if (type != JT_NONE)
{
type = JT_ABSOLUTE; // 标记为绝对跳转
}
}

if (type != JT_NONE) //计算相对跳转目标
{
uint32_t from = (uint32_t)offset;
uint32_t to = 0;

if (isRelative)
{
if (hs.flags & F_IMM8)
rel = (int8_t)hs.imm.imm8;
else if (hs.flags & F_IMM16)
rel = (int16_t)hs.imm.imm16;
else if (hs.flags & F_IMM32)
rel = (int32_t)hs.imm.imm32;

to = (uint32_t)(offset + len + rel);
}

result.push_back({ from, to, type });
}

offset += len;
}

return result;
}

image-20260414141155282

然后就是解析长度后随机位置插入花指令然后修复相对位置的偏移

std::vector<uint8_t> remapper(unsigned char* shellcode, size_t size) {
std::vector<Instruction> instructions;

size_t offset = 0;

while (offset < size)
{
hde32s hs;
uint32_t len = hde32_disasm(shellcode + offset, &hs);

if (hs.flags & F_ERROR || len == 0)
break;

Instruction inst;
inst.old_offset = offset;
inst.len = len;
inst.bytes.assign(shellcode + offset, shellcode + offset + len);
inst.hs = hs;

instructions.push_back(inst);

offset += len;
}

std::vector<uint8_t> new_code;
std::unordered_map<uint32_t, uint32_t> offset_map;

for (auto& inst : instructions)
{
// 记录映射
offset_map[inst.old_offset] = new_code.size();

// 写入原指令
new_code.insert(new_code.end(), inst.bytes.begin(), inst.bytes.end());

// 随机插入 junk
if (rand() % 5 == 0)
{
std::vector<uint8_t> junk = {
0x90, // nop
0x50, 0x58 // push eax; pop eax
};

new_code.insert(new_code.end(), junk.begin(), junk.end());
}
else if (rand() % 3 == 0)
{
std::vector<uint8_t> junk = { 0x89, 0xD2 };//mov edx, edx

new_code.insert(new_code.end(), junk.begin(), junk.end());
}
else if (rand() % 6 == 0)
{
std::vector<uint8_t> junk = {
0x83, 0xC0, 0x01, // add eax,0x1
0x83, 0xE8, 0x01 // sub eax,0x1
};

new_code.insert(new_code.end(), junk.begin(), junk.end());
}
}

for (auto& inst : instructions)
{
auto& hs = inst.hs;

// 只处理相对跳转
if (!(hs.flags & F_RELATIVE))
continue;

uint32_t old_from = inst.old_offset;
uint32_t new_from = offset_map[old_from];

int32_t rel = 0;

if (hs.flags & F_IMM8)
rel = (int8_t)hs.imm.imm8;
else if (hs.flags & F_IMM32)
rel = (int32_t)hs.imm.imm32;

uint32_t old_target = old_from + inst.len + rel;

// 找新地址
if (offset_map.find(old_target) == offset_map.end())
continue; // 跳到外部,跳过

uint32_t new_target = offset_map[old_target];

// 计算新的相对偏移
int32_t new_rel = (int32_t)(new_target - (new_from + inst.len));

// 写回 new_code
uint32_t write_pos = new_from + inst.len;

if (hs.flags & F_IMM8)
*(int8_t*)(&new_code[write_pos - 1]) = (int8_t)new_rel;
else if (hs.flags & F_IMM32)
*(int32_t*)(&new_code[write_pos - 4]) = new_rel;
}
return new_code;
}

进行测试

#include <iostream>
#include "include/jumper.hpp"
int main()
{
unsigned char shellcode[] =
"\xFC\x33\xD2\xB2\x30\x64\xFF\x32\x5A\x8B"
"\x52\x0C\x8B\x52\x14\x8B\x72\x28\x33\xC9"
"\xB1\x18\x33\xFF\x33\xC0\xAC\x3C\x61\x7C"
"\x02\x2C\x20\xC1\xCF\x0D\x03\xF8\xE2\xF0"
"\x81\xFF\x5B\xBC\x4A\x6A\x8B\x5A\x10\x8B"
"\x12\x75\xDA\x8B\x53\x3C\x03\xD3\xFF\x72"
"\x34\x8B\x52\x78\x03\xD3\x8B\x72\x20\x03"
"\xF3\x33\xC9\x41\xAD\x03\xC3\x81\x38\x47"
"\x65\x74\x50\x75\xF4\x81\x78\x04\x72\x6F"
"\x63\x41\x75\xEB\x81\x78\x08\x64\x64\x72"
"\x65\x75\xE2\x49\x8B\x72\x24\x03\xF3\x66"
"\x8B\x0C\x4E\x8B\x72\x1C\x03\xF3\x8B\x14"
"\x8E\x03\xD3\x52\x33\xFF\x57\x68\x61\x72"
"\x79\x41\x68\x4C\x69\x62\x72\x68\x4C\x6F"
"\x61\x64\x54\x53\xFF\xD2\x68\x33\x32\x01"
"\x01\x66\x89\x7C\x24\x02\x68\x75\x73\x65"
"\x72\x54\xFF\xD0\x68\x6F\x78\x41\x01\x8B"
"\xDF\x88\x5C\x24\x03\x68\x61\x67\x65\x42"
"\x68\x4D\x65\x73\x73\x54\x50\xFF\x54\x24"
"\x2C\x57\x68\x4F\x5F\x6F\x21\x8B\xDC\x57"
"\x53\x53\x57\xFF\xD0\x68\x65\x73\x73\x01"
"\x8B\xDF\x88\x5C\x24\x03\x68\x50\x72\x6F"
"\x63\x68\x45\x78\x69\x74\x54\xFF\x74\x24"
"\x40\xFF\x54\x24\x40\x57\xFF\xD0";
auto jumps = find_jumps(shellcode, sizeof(shellcode));

for (auto& j : jumps)
{
std::cout << "from: 0x" << std::hex << j.from
<< " -> to: 0x" << j.to
<< " type: " << j.type << std::endl;
}
std::vector<uint8_t> newcode = remapper(shellcode, sizeof(shellcode));
jumps = find_jumps(&newcode[0], newcode.size());

for (auto& j : jumps)
{
std::cout << "[new]from: 0x" << std::hex << j.from
<< " -> to: 0x" << j.to
<< " type: " << j.type << std::endl;
}
PVOID lpAddr = VirtualAlloc(nullptr, newcode.size(), MEM_COMMIT | MEM_RESERVE, PAGE_EXECUTE_READWRITE);
RtlCopyMemory(lpAddr, &newcode[0], newcode.size());
HANDLE hThread = CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)lpAddr, NULL, 0, NULL);
WaitForSingleObject(hThread, -1);

return 0;
}

image-20260414143834191

在尝试对CobaltStrike的shellcode进行混淆,很明显是不能成功的,大致来说就是cs的shellcode包含有SMC自解码、大量的跳转和自定义的hash API等。具体的shellcode分析可以见【免杀】Cobaltstrike Stager Payload分析

3. x64 手动添加

相较于x32更加困难就是无法使用内联汇编,所以尝试将核心代码使用汇编编写,直接在汇编中添加花指令,然后在主程序中调用。

我主要使用的是cmake进行编写,在CMakeLists.txt

project(jumper LANGUAGES CXX ASM_MASM)//启用汇编
//....
add_library(cr4 OBJECT src/cr4.asm)//添加到项目
//最后链接的时候加上
target_link_libraries(${PROJECT_NAME} PRIVATE hde $<TARGET_OBJECTS:cr4>)

在Visual Studio的方法可以参考syswhispers的方法

OPTION CASEMAP:NONE
.code

PUBLIC CR4Enc

; void CR4Enc(uint8_t* plain, uint8_t* cipher, uint64_t size, uint64_t key)

CR4Enc PROC
; RCX = plain
; RDX = cipher
; R8 = size
; R9 = key

push rbx
push rsi
push rdi

;无意义的寄存器操作
push rax
pop rax

mov rsi, rcx ; plain

mov rdi, rdx ; cipher
mov rcx, r8 ; loop counter
mov rbx, r9 ; key

; 使用ret跳转,打破ida的分析
ret_junk:
lea r10, ret_junk
mov r11d, 5653D986h
xor r11d, 5653d99Ch
add r10, r11
push r10
ret

test rcx, rcx
jz done

loop_start:
mov al, byte ptr [rsi]

; --- 简单 CR4Enc-like 混淆 ---
xor al, bl ; XOR key低字节
rol al, 3 ; 左旋3位
add al, 55h ; 加常数扰动

mov byte ptr [rdi], al

;增加无意义的操作
jmp short skip_junk
skip_junk:
xor rax, 0DEADBEEFh
xor rax, 0DEADBEEFh

; key 演化(类似流加密)
ror rbx, 1
add rbx, 1337h

;无意义的跳转
jz junk
jnz junk
junk:

inc rsi
inc rdi
dec rcx
jnz loop_start

done:
pop rdi
pop rsi
pop rbx
ret

CR4Enc ENDP

END
#include <iostream>

extern "C" void CR4Enc(uint8_t* plain, uint8_t* cipher, uint64_t size, uint64_t key);

int main()
{
uint8_t data[] = "HelloWorld";
uint8_t out[sizeof(data)] = { 0 };

CR4Enc(data, out, sizeof(data) - 1, 0x12345678);

for (int i = 0; i < sizeof(data) - 1; i++)
printf("%02X ", out[i]);

return 0;
}

现在就可以在汇编中手动添加花指令了,具体方法和x32手动添加类似

image-20260414160717562

4. 编译中添加

笔者之所以想起这个方法是突然回忆起了AFL-fuzz的插装方法,这里简单提及

所以,AFL的代码插桩,就是在将源文件编译为汇编代码后,通过afl-as完成。开始重写汇编指令,准备在分支处插入代码

https://joe1sn.eu.org/2023/07/22/afl-source/

static const u8* trampoline_fmt_32 =
"\n"
"/* --- AFL TRAMPOLINE (32-BIT) --- */\n"
"\n"
".align 4\n"
"\n"
"leal -16(%%esp), %%esp\n"
"movl %%edi, 0(%%esp)\n"
"movl %%edx, 4(%%esp)\n"
"movl %%ecx, 8(%%esp)\n"
"movl %%eax, 12(%%esp)\n"
"movl $0x%08x, %%ecx\n"
"call __afl_maybe_log\n"
"movl 12(%%esp), %%eax\n"
"movl 8(%%esp), %%ecx\n"
"movl 4(%%esp), %%edx\n"
"movl 0(%%esp), %%edi\n"
"leal 16(%%esp), %%esp\n"
"\n"
"/* --- END --- */\n"
"\n";

AFL 相当于魔改了编译器,这种方式更可能贴近 OLLVM 这种爆改编译器的做法,所以这里也只是提一嘴。

使用IDA去除花指令

1. 手动patch

就是识别到花指令,然后直接修改汇编代码,没什么好说的,例如这里的

image-20260414161653518

直接jmp,我这里是通过修改byte而不是assembly得到的

image-20260414162649768

2. 使用IDA-Python自动清除

依旧使用之前的x64 手动添加的例子,我们可以编写idc脚本进行去除

image-20260414165241191

主要思想就是找到特征码,然后清除

https://docs.hex-rays.com/9.1/developer-guide/idc

#include<idc.idc>
static main()
{
auto StartVa, StopVa, Size, i;
StartVa=0x1400015E0;
StopVa=0x14000161D;
Size=StopVa-StartVa;
for (i=0; i<Size; i++){
if (Byte(StartVa)==0x4C && Byte(StartVa+1)==0x8D && Byte(StartVa+2)==0x15)
{
PatchByte(StartVa, 0xEB);
PatchByte(StartVa+1, 0x18);
PatchByte(StartVa+2, 0x90);
MakeCode(StartVa);
StartVa++;
Message("Find Fakereturn Opcode!!\n");
continue;
}
StartVa++;
}
Message("Clear Fakereturn Opcode Ok\n");
}

image-20260414165121495

run了过后

image-20260414165150161

引用

https://mp.weixin.qq.com/s/UsPTeRZvlLFUG-EjCkVqTQ

https://singlehorn.github.io/2026/02/04/VNCTF2026出题笔记/

https://joe1sn.eu.org/2023/07/22/afl-source/