引子:最近想写一些文章,但是每每到中间就放弃了,我觉得写技术文章不难,能把自己脑子想的用文字描述出来并让人看明白很难,两个字:”缺练“。
回归正题,给大家整两个小题目,供大家把玩。
1.汇编翻译为C语言
这是段函数汇编源码,请把它翻译为C代码。
1 00411A20 push ebp
2 00411A21 mov ebp,esp
3 00411A23 sub esp,0E8h
4 00411A29 push ebx
5 00411A2A push esi
6 00411A2B push edi
7 00411A2C lea edi,[ebp-0E8h]
8 00411A32 mov ecx,3Ah
9 00411A37 mov eax,0CCCCCCCCh
10 00411A3C rep stos dword ptr [edi]
11 00411A3E mov eax,dword ptr [a]
12 00411A41 add eax,dword ptr [b]
13 00411A44 mov dword ptr [d],eax
14 00411A47 mov dword ptr [i], 1
15 00411A4E mov dword ptr [c], 0
16 00411A55 cmp dword ptr [c],64h
17 00411A59 jge myfunction+46h (411A66h)
18 00411A5B mov eax,dword ptr [c]
19 00411A5E add eax,dword ptr [i]
20 00411A61 mov dword ptr [c],eax
21 00411A64 jmp myfunction+35h (411A55h)
22 00411A66 mov eax,dword ptr [c]
23 00411A69 mov dword ptr [ebp-0E8h],eax
24 00411A6F cmp dword ptr [ebp-0E8h], 0
25 00411A76 je myfunction+63h (411A83h)
26 00411A78 cmp dword ptr [ebp-0E8h], 1
27 00411A7F je myfunction+6Ah (411A8Ah)
28 00411A81 jmp myfunction+72h (411A92h)
29 00411A83 mov dword ptr [d], 1
30 00411A8A mov eax,dword ptr [c]
31 00411A8D mov dword ptr [d],eax
32 00411A90 jmp myfunction+79h (411A99h)
33 00411A92 mov dword ptr [d], 0
34 00411A99 mov eax,dword ptr [d]
35 00411A9C pop edi
36 00411A9D pop esi
37 00411A9E pop ebx
38 00411A9F mov esp,ebp
39 00411AA1 pop ebp
40 00411AA2 ret
41
我翻译成C语言是:
int myfunction( int a, int b)
{
int d=a+b ;
int i= 1 ;
int c= 0 ;
while(c< 100 ) {c=c+i ; }
switch(c)
{
case 0 :d= 1 ;
case 1 :d=c ; break;
default: d= 0 ;
}
return d ;
}
再用VS2008反编上边的C代码:
1 int myfunction( int a, int b)
2 {
3 00251BD0 push ebp
4 00251BD1 mov ebp,esp
5 00251BD3 sub esp,0E8h
6 00251BD9 push ebx
7 00251BDA push esi
8 00251BDB push edi
9 00251BDC lea edi,[ebp-0E8h]
10 00251BE2 mov ecx,3Ah
11 00251BE7 mov eax,0CCCCCCCCh
12 00251BEC rep stos dword ptr es: [edi]
13 int d=a+b ;
14 00251BEE mov eax,dword ptr [a]
15 00251BF1 add eax,dword ptr [b]
16 00251BF4 mov dword ptr [d],eax
17 int i= 1 ;
18 00251BF7 mov dword ptr [i], 1
19 int c= 0 ;
20 00251BFE mov dword ptr [c], 0
21 while(c< 100 ) {c=c+i ; }
22 00251C05 cmp dword ptr [c],64h
23 00251C09 jge myfunction+46h (251C16h)
24 00251C0B mov eax,dword ptr [c]
25 00251C0E add eax,dword ptr [i]
26 00251C11 mov dword ptr [c],eax
27 00251C14 jmp myfunction+35h (251C05h)
28 switch(c)
29 00251C16 mov eax,dword ptr [c]
30 00251C19 mov dword ptr [ebp-0E8h],eax
31 00251C1F cmp dword ptr [ebp-0E8h], 0
32 00251C26 je myfunction+63h (251C33h)
33 00251C28 cmp dword ptr [ebp-0E8h], 1
34 00251C2F je myfunction+6Ah (251C3Ah)
35 00251C31 jmp myfunction+72h (251C42h)
36 {
37 case 0 :d= 1 ;
38 00251C33 mov dword ptr [d], 1
39 case 1 :d=c ; break;
40 00251C3A mov eax,dword ptr [c]
41 00251C3D mov dword ptr [d],eax
42 00251C40 jmp myfunction+79h (251C49h)
43 default: d= 0 ;
44 00251C42 mov dword ptr [d], 0
45 }
46
47 return d ;
48 00251C49 mov eax,dword ptr [d]
49 }
50 00251C4C pop edi
51 00251C4D pop esi
52 00251C4E pop ebx
53 00251C4F mov esp,ebp
54 00251C51 pop ebp
55 00251C52 ret
56
您猜对了吗?

2.高阶C
#include "stdio.h"
int main()
{
int a[5]={7,8,9,10,11};
int *ptr1=(int *)(&a+1);
int *ptr2=(int *)((int )a+1);
printf("%x,%x",ptr1[-1],*ptr2);
return 0;
}
将会打印什么?
在我的vista 32位和VS2008打印的是:
b,8000000
分析
到底是为什么会打印,下面上汇编码:
int a[5]={7,8,9,10,11};
009A13BE mov dword ptr [a],7 ;&a :0x0018fed4
009A13C5 mov dword ptr [ebp-14h],8
009A13CC mov dword ptr [ebp-10h],9
009A13D3 mov dword ptr [ebp-0Ch],0Ah
009A13DA mov dword ptr [ebp-8],0Bh
int *ptr1=(int *)(&a+1);
009A13E1 lea eax,[ebp-4]
009A13E4 mov dword ptr [ptr1],eax
int *ptr2=(int *)((int )a+1);
009A13E7 lea eax,[ebp-17h]
009A13EA mov dword ptr [ptr2],eax
头5行很好分析,就是给一片内存区域赋值,依次是7,8,9,10,11。
下面一句int *ptr1=(int *)(&a+1); 将[ebp-4]的地址写入ptr1,寄存器和相关内存dump如下:
EAX = CCCCCCCC EBX = 7FFDB000 ECX = 00000000 EDX = 00000001 ESI = 00000000 EDI = 0018FEEC EIP = 009A13E1 ESP = 0018FDEC EBP = 0018FEEC EFL = 00000202
adress value symbol
0x0018FED4 07 00 00 00
.
0x0018FED8 08 00 00 00
.
0x0018FEDC 09 00 00 00
.
0x0018FEE0 0a 00 00 00
.
0x0018FEE4 0b 00 00 00
.
0x0018FEE8 cc cc cc cc ????
当前EBP是 0018FEEC ,ebp-4就是0018FEE8,就是cccccccc,刚好是在数组a之后,这就说明了&a+1相当于&a+sizeof(a)=>&a+0x14(20)=0018FEE8。
继续!!
下一个语句传递的【ebp-17】地址给ptr2,ebp-17是0018FED5,刚好是数组a第一个元素数字a【0】的第2个字节位置,从此开始,读取一个int数字,这时候出现了错位,读取的是00 00 00 08,我的机器是intel,是小端,实际代表的数字是08000000。实际的语句是(int)a+1,在我的32位机器上,(int)a+1就是数组 a的首地址+1的偏移地址。
10: printf("%x,%x",ptr1[-1],*ptr2);
009A13ED mov esi,esp
009A13EF mov eax,dword ptr [ptr2]
009A13F2 mov ecx,dword ptr [eax]
009A13F4 push ecx
009A13F5 mov edx,dword ptr [ptr1]
009A13F8 mov eax,dword ptr [edx-4]
009A13FB push eax
009A13FC push offset string "%x,%x" (9A573Ch)
009A1401 call dword ptr [__imp__printf (9A82BCh)]
009A1407 add esp,0Ch
009A140A cmp esi,esp
009A140C call @ILT+310(__RTC_CheckEsp) (9A113Bh)
接下来就调用printf函数,*ptr2就是指向08000000,先压入堆栈;而ptr1[-1],则先将ptr1读入edx,然后将【edx-4】的内容读入eax,然后压入堆栈;此时ptr1[-1]可以等同于*(ptr1-1),根据我们上面分析的,ptr1指向数组a之后,此时实际* (ptr1-1)的值应是a[4],也就是0x0018FEE4 0b 00 00 00 ,即是0000000b.
完毕。
这里ptr1是平台无关的;但是ptr2是平台相关的:
CPU的大端和小端
平台上指针的长度,int的长度,比如64位机器,真是是64位长度(8字节),如果int是4字节编码,(int)a就会失去原有指针的精度,所以不同的系统编译的ptr2的值会不同。
不对之处,请大家斧正。