本系列文章是读《coredump问题原理探究》的读书笔记。
基本数据类型
char
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>
int main() {
char c1 = 'a';
char c2 = 'b';
char c3 = 'c';
printf("addresses of c1 to c3 are (%x, %x, %x)\n", &c1, &c2, &c3);
char* p = &c1;
*p++;
p = &c2;
(*p) += 2;
p = &c3;
(*p) += 3;
printf("address and value of p is ( %x, %c )\n", &p, *p);
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004005f0 <+0>: push %rbp
0x00000000004005f1 <+1>: mov %rsp,%rbp
0x00000000004005f4 <+4>: sub $0x10,%rsp
0x00000000004005f8 <+8>: movb $0x61,-0x1(%rbp)
0x00000000004005fc <+12>: movb $0x62,-0x2(%rbp)
0x0000000000400600 <+16>: movb $0x63,-0x3(%rbp)
0x0000000000400604 <+20>: lea -0x3(%rbp),%rcx
0x0000000000400608 <+24>: lea -0x2(%rbp),%rdx
0x000000000040060c <+28>: lea -0x1(%rbp),%rax
char 占一个字节,因此 movb 是 char 类型的特征指令。lea 指令用于获取内存单元的地址,为指针的特征指令。
short
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
#include <stdio.h>
int main() {
short c1 = 'a';
short c2 = 'b';
short c3 = 'c';
printf("addresses of c1 to c3 are (%x, %x, %x)\n", &c1, &c2, &c3);
short* p = &c1;
*p++;
p = &c2;
(*p) += 2;
p = &c3;
(*p) += 3;
printf("address and value of p is ( %x, %d )\n", &p, *p);
return 0;
}
1
2
3
4
5
6
7
8
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004005f0 <+0>: push %rbp
0x00000000004005f1 <+1>: mov %rsp,%rbp
0x00000000004005f4 <+4>: sub $0x10,%rsp
0x00000000004005f8 <+8>: movw $0x61,-0x2(%rbp)
0x00000000004005fe <+14>: movw $0x62,-0x4(%rbp)
0x0000000000400604 <+20>: movw $0x63,-0x6(%rbp)
short是两个字节的,因此 movw 就是其特征指令。
下面表格是特征指令的总结:
类型 | 特征指令 |
char | movb |
short | movw |
int | movl |
long | movl (32-bit) movq (64-bit) |
指针 | lea |
char 数组
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
#include <stdio.h>
int main() {
char buf[16];
char c = 'a';
printf("head of array:%x, tail of array:%x", buf, &buf[15]);
for (int i = 0; i < 16; i++, c++) {
buf[i] = c;
}
buf[15] = '\0';
printf( "%s\n", buf );
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400640 <+0>: push %rbp
0x0000000000400641 <+1>: mov %rsp,%rbp
0x0000000000400644 <+4>: sub $0x20,%rsp
0x0000000000400648 <+8>: movb $0x61,-0x1(%rbp)
0x000000000040064c <+12>: lea -0x20(%rbp),%rax
0x0000000000400650 <+16>: lea 0xf(%rax),%rdx
0x0000000000400654 <+20>: lea -0x20(%rbp),%rax
0x0000000000400658 <+24>: mov %rax,%rsi
0x000000000040065b <+27>: mov $0x400740,%edi
0x0000000000400660 <+32>: mov $0x0,%eax
0x0000000000400665 <+37>: callq 0x400510 <printf@plt>
0x000000000040066a <+42>: movl $0x0,-0x8(%rbp)
0x0000000000400671 <+49>: jmp 0x40068e <main+78>
0x0000000000400673 <+51>: mov -0x8(%rbp),%eax
0x0000000000400676 <+54>: cltq
0x0000000000400678 <+56>: movzbl -0x1(%rbp),%edx
0x000000000040067c <+60>: mov %dl,-0x20(%rbp,%rax,1)
0x0000000000400680 <+64>: addl $0x1,-0x8(%rbp)
0x0000000000400684 <+68>: movzbl -0x1(%rbp),%eax
0x0000000000400688 <+72>: add $0x1,%eax
0x000000000040068b <+75>: mov %al,-0x1(%rbp)
0x000000000040068e <+78>: cmpl $0xf,-0x8(%rbp)
0x0000000000400692 <+82>: jle 0x400673 <main+51>
0x0000000000400694 <+84>: movb $0x0,-0x11(%rbp)
0x0000000000400698 <+88>: lea -0x20(%rbp),%rax
0x000000000040069c <+92>: mov %rax,%rdi
0x000000000040069f <+95>: callq 0x400530 <puts@plt>
0x00000000004006a4 <+100>: mov $0x0,%eax
0x00000000004006a9 <+105>: leaveq
0x00000000004006aa <+106>: retq
End of assembler dump.
从
1
2
0x000000000040064c <+12>: lea -0x20(%rbp),%rax
0x0000000000400650 <+16>: lea 0xf(%rax),%rdx
可以看出数组 buf 的基址为 -0x20(%rbp),buf[15] 的地址则由 0xf(%rax) 得出,即基址加上 0xf,刚好为 基址 + 15 * sizeof(char)。
short 数组
1
2
3
4
5
6
7
8
9
10
11
12
13
#include <stdio.h>
int main() {
short buf[16];
short s = 'a';
printf( "head of array:%x, tail of array:%x", buf, &buf[15] );
for ( int i = 0; i < 16; i++, s++ ) {
buf[i] = s;
}
return buf[15];
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004005f0 <+0>: push %rbp
0x00000000004005f1 <+1>: mov %rsp,%rbp
0x00000000004005f4 <+4>: sub $0x30,%rsp
0x00000000004005f8 <+8>: movw $0x61,-0x2(%rbp)
0x00000000004005fe <+14>: lea -0x30(%rbp),%rax
0x0000000000400602 <+18>: lea 0x1e(%rax),%rdx
0x0000000000400606 <+22>: lea -0x30(%rbp),%rax
0x000000000040060a <+26>: mov %rax,%rsi
0x000000000040060d <+29>: mov $0x4006e0,%edi
0x0000000000400612 <+34>: mov $0x0,%eax
0x0000000000400617 <+39>: callq 0x4004d0 <printf@plt>
0x000000000040061c <+44>: movl $0x0,-0x8(%rbp)
0x0000000000400623 <+51>: jmp 0x400642 <main+82>
0x0000000000400625 <+53>: mov -0x8(%rbp),%eax
0x0000000000400628 <+56>: cltq
0x000000000040062a <+58>: movzwl -0x2(%rbp),%edx
0x000000000040062e <+62>: mov %dx,-0x30(%rbp,%rax,2)
0x0000000000400633 <+67>: addl $0x1,-0x8(%rbp)
0x0000000000400637 <+71>: movzwl -0x2(%rbp),%eax
0x000000000040063b <+75>: add $0x1,%eax
0x000000000040063e <+78>: mov %ax,-0x2(%rbp)
0x0000000000400642 <+82>: cmpl $0xf,-0x8(%rbp)
0x0000000000400646 <+86>: jle 0x400625 <main+53>
0x0000000000400648 <+88>: movzwl -0x12(%rbp),%eax
0x000000000040064c <+92>: cwtl
0x000000000040064d <+93>: leaveq
0x000000000040064e <+94>: retq
End of assembler dump.
由
1
2
0x00000000004005fe <+14>: lea -0x30(%rbp),%rax
0x0000000000400602 <+18>: lea 0x1e(%rax),%rdx
得知 buf 数组基址是为 -0x30(%rbp),buf[15] 的地址为基址加上 0x1e (short 类型的大小是2个字节)。
由
1
2
3
0x000000000040061c <+44>: movl $0x0,-0x8(%rbp)
0x0000000000400625 <+53>: mov -0x8(%rbp),%eax
0x000000000040062e <+62>: mov %dx,-0x30(%rbp,%rax,2)
得出变量 i 存放在 -0x8(%rbp),每次对数组元素的引用 buf[i] 为基址 -0x30(%rbp) + sizeof(short) * i 即 -0x30(%rbp) + 2 * i。
coredump 分析
执行下面这段代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
#include <stdlib.h>
#include <string.h>
int main() {
int* ptrArray[4] = { NULL, };
for (int i = 0; i < 4; i += 2) {
ptrArray[i] = new int[8];
}
for (char c = 0; c < 4; c++) {
memset(ptrArray[c], c, 8 * sizeof(int));
}
return 0;
}
产生的 coredump 文件:
1
2
3
(gdb) bt
#0 0x00007fb3cf8e972c in __memset_sse2 () from /lib64/libc.so.6
#1 0x00000000004006e4 in main ()
执行 disassemble 看看堆栈:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
(gdb) bt
#0 0x00007f53cf4c972c in __memset_sse2 () from /lib64/libc.so.6
#1 0x00000000004006e4 in main ()
(gdb) frame 1
#1 0x00000000004006e4 in main ()
(gdb) disassemble
Dump of assembler code for function main:
0x0000000000400670 <+0>: push %rbp
0x0000000000400671 <+1>: mov %rsp,%rbp
0x0000000000400674 <+4>: sub $0x30,%rsp
0x0000000000400678 <+8>: movq $0x0,-0x30(%rbp)
0x0000000000400680 <+16>: movq $0x0,-0x28(%rbp)
0x0000000000400688 <+24>: movq $0x0,-0x20(%rbp)
0x0000000000400690 <+32>: movq $0x0,-0x18(%rbp)
0x0000000000400698 <+40>: movl $0x0,-0x4(%rbp)
0x000000000040069f <+47>: jmp 0x4006ba <main+74>
0x00000000004006a1 <+49>: mov $0x20,%edi
0x00000000004006a6 <+54>: callq 0x400560 <_Znam@plt>
0x00000000004006ab <+59>: mov -0x4(%rbp),%edx
0x00000000004006ae <+62>: movslq %edx,%rdx
0x00000000004006b1 <+65>: mov %rax,-0x30(%rbp,%rdx,8)
0x00000000004006b6 <+70>: addl $0x2,-0x4(%rbp)
0x00000000004006ba <+74>: cmpl $0x3,-0x4(%rbp)
0x00000000004006be <+78>: jle 0x4006a1 <main+49>
0x00000000004006c0 <+80>: movb $0x0,-0x5(%rbp)
0x00000000004006c4 <+84>: jmp 0x4006ee <main+126>
0x00000000004006c6 <+86>: mov $0x20,%edx
0x00000000004006cb <+91>: movsbl -0x5(%rbp),%ecx
0x00000000004006cf <+95>: movsbl -0x5(%rbp),%eax
0x00000000004006d3 <+99>: cltq
0x00000000004006d5 <+101>: mov -0x30(%rbp,%rax,8),%rax
0x00000000004006da <+106>: mov %ecx,%esi
0x00000000004006dc <+108>: mov %rax,%rdi
0x00000000004006df <+111>: callq 0x400540 <memset@plt>
=> 0x00000000004006e4 <+116>: movzbl -0x5(%rbp),%eax
0x00000000004006e8 <+120>: add $0x1,%eax
0x00000000004006eb <+123>: mov %al,-0x5(%rbp)
0x00000000004006ee <+126>: cmpb $0x3,-0x5(%rbp)
0x00000000004006f2 <+130>: jle 0x4006c6 <main+86>
0x00000000004006f4 <+132>: mov $0x0,%eax
0x00000000004006f9 <+137>: leaveq
0x00000000004006fa <+138>: retq
End of assembler dump.
可以看出是在调用 memset
之后 core 的。通过汇编推测出循环变量 c 存放于 $rbp-0x5,查看崩溃时 c 的值:
1
2
(gdb) x /c $rbp-0x5
0x7ffec6da164b: 1 '\001'
崩溃时正好指向 arr 的第二个元素 ptrArray[1]。由
1
0x00000000004006b1 <+65>: mov %rax,-0x30(%rbp,%rdx,8)
推断出数组的基址为 -0x30(%rbp), 由数组元素作为 memset
的参数,可见数组的元素是指针, 由步长为 8 推断出是 64-bit 的机器。
查看 ptrArray[1] 的值:
1
2
(gdb) x /2x $rbp-0x30+8
0x7ffd641fbcb8: 0x00000000 0x00000000
确定是空指针传入 memset
导致 core。
coredump 分析 II
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
Program terminated with signal 11, Segmentation fault.
#0 0x0000000000000000 in ?? ()
Missing separate debuginfos, use: debuginfo-install glibc-2.17-157.tl2.2.x86_64 libgcc-4.8.5-4.el7.x86_64 libstdc++-4.8.5-4.el7.x86_64
(gdb) bt
#0 0x0000000000000000 in ?? ()
#1 0x0000000000400619 in result(xuzhina_dump_c05_s3_ex*, int) ()
#2 0x000000000040086d in main ()
(gdb) disassemble result
Dump of assembler code for function _Z6resultP22xuzhina_dump_c05_s3_exi:
0x00000000004005b0 <+0>: push %rbp
0x00000000004005b1 <+1>: mov %rsp,%rbp
0x00000000004005b4 <+4>: sub $0x20,%rsp
0x00000000004005b8 <+8>: mov %rdi,-0x18(%rbp)
0x00000000004005bc <+12>: mov %esi,-0x1c(%rbp)
0x00000000004005bf <+15>: movl $0x0,-0x4(%rbp)
0x00000000004005c6 <+22>: movl $0x0,-0x8(%rbp)
0x00000000004005cd <+29>: jmp 0x400620 <_Z6resultP22xuzhina_dump_c05_s3_exi+112>
0x00000000004005cf <+31>: mov -0x8(%rbp),%eax
0x00000000004005d2 <+34>: cltq
0x00000000004005d4 <+36>: shl $0x4,%rax
0x00000000004005d8 <+40>: mov %rax,%rdx
0x00000000004005db <+43>: mov -0x18(%rbp),%rax
0x00000000004005df <+47>: add %rdx,%rax
0x00000000004005e2 <+50>: mov 0x8(%rax),%rax
0x00000000004005e6 <+54>: mov -0x8(%rbp),%edx
0x00000000004005e9 <+57>: movslq %edx,%rdx
0x00000000004005ec <+60>: mov %rdx,%rcx
0x00000000004005ef <+63>: shl $0x4,%rcx
0x00000000004005f3 <+67>: mov -0x18(%rbp),%rdx
0x00000000004005f7 <+71>: add %rcx,%rdx
0x00000000004005fa <+74>: mov 0x4(%rdx),%ecx
0x00000000004005fd <+77>: mov -0x8(%rbp),%edx
0x0000000000400600 <+80>: movslq %edx,%rdx
0x0000000000400603 <+83>: mov %rdx,%rsi
0x0000000000400606 <+86>: shl $0x4,%rsi
0x000000000040060a <+90>: mov -0x18(%rbp),%rdx
0x000000000040060e <+94>: add %rsi,%rdx
0x0000000000400611 <+97>: mov (%rdx),%edx
0x0000000000400613 <+99>: mov %ecx,%esi
0x0000000000400615 <+101>: mov %edx,%edi
0x0000000000400617 <+103>: callq *%rax
0x0000000000400619 <+105>: add %eax,-0x4(%rbp)
0x000000000040061c <+108>: addl $0x1,-0x8(%rbp)
0x0000000000400620 <+112>: mov -0x8(%rbp),%eax
0x0000000000400623 <+115>: cmp -0x1c(%rbp),%eax
0x0000000000400626 <+118>: jl 0x4005cf <_Z6resultP22xuzhina_dump_c05_s3_exi+31>
0x0000000000400628 <+120>: mov -0x4(%rbp),%eax
0x000000000040062b <+123>: leaveq
0x000000000040062c <+124>: retq
End of assembler dump.
(gdb) i r rip
rip 0x0 0x0
可以看到调用堆栈顶层的地址为空,rip 也为 0。这种情况只可能是调用了地址为 0 的函数指针。
由
1
2
3
4
5
6
7
0x00000000004005bc <+12>: mov %esi,-0x1c(%rbp)
0x00000000004005c6 <+22>: movl $0x0,-0x8(%rbp)
...
0x000000000040061c <+108>: addl $0x1,-0x8(%rbp)
0x0000000000400620 <+112>: mov -0x8(%rbp),%eax
0x0000000000400623 <+115>: cmp -0x1c(%rbp),%eax
0x0000000000400626 <+118>: jl 0x4005cf <_Z6resultP22xuzhina_dump_c05_s3_exi+31>
推测是循环遍历,循环变量 i 存在 -0x8(%rbp),每次递增 1,然后与变量 -0x1c(%rbp) 比较。
由
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
0x00000000004005b8 <+8>: mov %rdi,-0x18(%rbp)
...
0x00000000004005cf <+31>: mov -0x8(%rbp),%eax
0x00000000004005d2 <+34>: cltq
0x00000000004005d4 <+36>: shl $0x4,%rax
0x00000000004005d8 <+40>: mov %rax,%rdx
0x00000000004005db <+43>: mov -0x18(%rbp),%rax
0x00000000004005df <+47>: add %rdx,%rax
0x00000000004005e2 <+50>: mov 0x8(%rax),%rax
0x00000000004005e6 <+54>: mov -0x8(%rbp),%edx
0x00000000004005e9 <+57>: movslq %edx,%rdx
0x00000000004005ec <+60>: mov %rdx,%rcx
0x00000000004005ef <+63>: shl $0x4,%rcx
0x00000000004005f3 <+67>: mov -0x18(%rbp),%rdx
0x00000000004005f7 <+71>: add %rcx,%rdx
0x00000000004005fa <+74>: mov 0x4(%rdx),%ecx
0x00000000004005fd <+77>: mov -0x8(%rbp),%edx
0x0000000000400600 <+80>: movslq %edx,%rdx
0x0000000000400603 <+83>: mov %rdx,%rsi
0x0000000000400606 <+86>: shl $0x4,%rsi
0x000000000040060a <+90>: mov -0x18(%rbp),%rdx
0x000000000040060e <+94>: add %rsi,%rdx
0x0000000000400611 <+97>: mov (%rdx),%edx
这三段基本一致的汇编代码我们推测是在遍历数组,数组的基址为 -0x18(%rbp)。由每段最后一句汇编推测数组元素为结构体,每段取结构体中的不同元素。
从0x00000000004005d4 <+36>: shl $0x4,%rax
得知每次遍历数组的偏移值为 16(左移 4 位 即乘以 16),说明数组元素的大小为 16 字节。
看看崩溃时 i 的值:
1
2
(gdb) x /x $rbp-0x8
0x7ffecfe8e358: 0x00000003
crash 时 i 等于 3,即第 4 个数组元素。
由
1
2
3
4
5
6
7
0x00000000004005d4 <+36>: shl $0x4,%rax
0x00000000004005d8 <+40>: mov %rax,%rdx
0x00000000004005db <+43>: mov -0x18(%rbp),%rax
0x00000000004005df <+47>: add %rdx,%rax
0x00000000004005e2 <+50>: mov 0x8(%rax),%rax
...
0x0000000000400617 <+103>: callq *%rax
得知函数指针存放于结构体的 0x8 偏移的字段,我们打印下下数组第三个元素的内存值,即基址 -0x18(%rbp) + 3 * 16 = -0x18(%rbp) + 0x30:
1
2
3
4
(gdb) x /2x $rbp-0x18
0x7ffecfe8e348: 0xcfe8e370 0x00007ffe
(gdb) x /3x 0x00007ffecfe8e370+0x30
0x7ffecfe8e3a0: 0x00000003 0x00000003 0x00000000
可以看出数组第一和第二个字段值为3,第三个元素为 0,正好是空指针。 对比下下面的源码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
typedef int (*operation)(int a, int b);
struct xuzhina_dump_c05_s3_ex {
int a;
int b;
operation oper;
};
int result(struct xuzhina_dump_c05_s3_ex test[], int num) {
int res = 0;
for (int i = 0; i < num; i++) {
res += test[i].oper(test[i].a, test[i].b);
}
return res;
}
int add(int a, int b) {
return a + b;
}
int sub(int a, int b) {
return a - b;
}
int mul(int a, int b) {
return a * b;
}
void init(struct xuzhina_dump_c05_s3_ex test[], int num) {
for (int i = 0; i < num; i++) {
switch( i % 4 ) {
case 0:
test[i].a = i / 4;
test[i].b = 0;
test[i].oper = add;
break;
case 1:
test[i].a = i / 4;
test[i].b = i % 4;
test[i].oper = mul;
break;
case 2:
test[i].a = i % 4;
test[i].b = i / 4;
test[i].oper = sub;
break;
default:
test[i].a = i;
test[i].b = i % 4;
test[i].oper = 0;
break;
}
}
}
int main() {
struct xuzhina_dump_c05_s3_ex test[15];
init(test, 15);
return result(test, 15);
}
数组第三个元素的字段分别为 3、3、0,和我们的推测相符。
类
类成员函数
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
#include <stdio.h>
class xuzhina_dump_c06_s1 {
public:
void hello() {
printf("hello\n");
}
void print() {
printf("this:%p\n", this);
}
};
int main() {
xuzhina_dump_c06_s1 test;
printf("address of test:%p\n", &test);
test.print();
test.hello();
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400640 <+0>: push %rbp
0x0000000000400641 <+1>: mov %rsp,%rbp
0x0000000000400644 <+4>: sub $0x10,%rsp
0x0000000000400648 <+8>: lea -0x1(%rbp),%rax
0x000000000040064c <+12>: mov %rax,%rsi
0x000000000040064f <+15>: mov $0x40075f,%edi
0x0000000000400654 <+20>: mov $0x0,%eax
0x0000000000400659 <+25>: callq 0x400510 <printf@plt>
0x000000000040065e <+30>: lea -0x1(%rbp),%rax
0x0000000000400662 <+34>: mov %rax,%rdi
0x0000000000400665 <+37>: callq 0x400696 <_ZN19xuzhina_dump_c06_s15printEv>
0x000000000040066a <+42>: lea -0x1(%rbp),%rax
0x000000000040066e <+46>: mov %rax,%rdi
0x0000000000400671 <+49>: callq 0x40067e <_ZN19xuzhina_dump_c06_s15helloEv>
0x0000000000400676 <+54>: mov $0x0,%eax
0x000000000040067b <+59>: leaveq
0x000000000040067c <+60>: retq
End of assembler dump.
调用 print()
和 hello()
的汇编指令如下:
1
2
3
4
5
6
0x000000000040065e <+30>: lea -0x1(%rbp),%rax
0x0000000000400662 <+34>: mov %rax,%rdi
0x0000000000400665 <+37>: callq 0x400696 <_ZN19xuzhina_dump_c06_s15printEv>
0x000000000040066a <+42>: lea -0x1(%rbp),%rax
0x000000000040066e <+46>: mov %rax,%rdi
0x0000000000400671 <+49>: callq 0x40067e <_ZN19xuzhina_dump_c06_s15helloEv>
可以看到,调用类成员函数时会隐式传入 this 指针作为第一个参数。
类成员变量
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#include <stdio.h>
class xuzhina_dump_c06_s2 {
private:
short m_c;
char m_d;
int m_e;
public:
xuzhina_dump_c06_s2(int a, int b) {
m_c = (short)(a + b);
m_d = 'd';
m_e = a - b;
}
void print() {
printf("member %d, %c, %d\n", m_c, m_d, m_e);
}
};
int main() {
xuzhina_dump_c06_s2 test(2, 3);
test.print();
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
(gdb) disassemble main
Dump of assembler code for function main:
0x00000000004005f0 <+0>: push %rbp
0x00000000004005f1 <+1>: mov %rsp,%rbp
0x00000000004005f4 <+4>: sub $0x10,%rsp
0x00000000004005f8 <+8>: lea -0x10(%rbp),%rax
0x00000000004005fc <+12>: mov $0x3,%edx
0x0000000000400601 <+17>: mov $0x2,%esi
0x0000000000400606 <+22>: mov %rax,%rdi
0x0000000000400609 <+25>: callq 0x400622 <_ZN19xuzhina_dump_c06_s2C2Eii>
0x000000000040060e <+30>: lea -0x10(%rbp),%rax
0x0000000000400612 <+34>: mov %rax,%rdi
0x0000000000400615 <+37>: callq 0x40065c <_ZN19xuzhina_dump_c06_s25printEv>
0x000000000040061a <+42>: mov $0x0,%eax
0x000000000040061f <+47>: leaveq
0x0000000000400620 <+48>: retq
End of assembler dump.
打个断点:
1
2
3
(gdb) tbreak *0x000000000040060e
Temporary breakpoint 1 at 0x40060e
(gdb) r
查看对象是如何存放的:
1
2
3
4
5
(gdb) x /8x $rbp-0x10
0x7fffffffe460: 0xff640005 0xffffffff 0x00000000 0x00000000
0x7fffffffe470: 0x00000000 0x00000000 0xf721cb35 0x00007fff
(gdb) x /c 0x7fffffffe460+0x2
0x7fffffffe462: 100 'd'
对象由低到高分别存储 5(0x0005), ‘d’(0xff64), -1(0xffffffff)。可见类的成员布局和结构体一样。
coredump 分析 III
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
(gdb) bt
#0 0x0000000000400711 in xuzhina_dump_c06_s2_ex::print() ()
#1 0x00000000004007fd in main ()
(gdb) disassemble
Dump of assembler code for function _ZN22xuzhina_dump_c06_s2_ex5printEv:
0x00000000004006d8 <+0>: push %rbp
0x00000000004006d9 <+1>: mov %rsp,%rbp
0x00000000004006dc <+4>: sub $0x20,%rsp
0x00000000004006e0 <+8>: mov %rdi,-0x18(%rbp)
0x00000000004006e4 <+12>: movl $0x0,-0x4(%rbp)
0x00000000004006eb <+19>: jmpq 0x400781 <_ZN22xuzhina_dump_c06_s2_ex5printEv+169>
0x00000000004006f0 <+24>: mov -0x18(%rbp),%rax
0x00000000004006f4 <+28>: movzwl (%rax),%eax
0x00000000004006f7 <+31>: cwtl
0x00000000004006f8 <+32>: test %eax,%eax
0x00000000004006fa <+34>: je 0x400703 <_ZN22xuzhina_dump_c06_s2_ex5printEv+43>
0x00000000004006fc <+36>: cmp $0x1,%eax
0x00000000004006ff <+39>: je 0x40072a <_ZN22xuzhina_dump_c06_s2_ex5printEv+82>
0x0000000000400701 <+41>: jmp 0x400757 <_ZN22xuzhina_dump_c06_s2_ex5printEv+127>
0x0000000000400703 <+43>: mov -0x18(%rbp),%rax
0x0000000000400707 <+47>: mov 0x18(%rax),%rdx
0x000000000040070b <+51>: mov -0x4(%rbp),%eax
0x000000000040070e <+54>: add %rdx,%rax
=> 0x0000000000400711 <+57>: movzbl (%rax),%eax
0x0000000000400714 <+60>: movsbl %al,%eax
0x0000000000400717 <+63>: mov %eax,%esi
0x0000000000400719 <+65>: mov $0x4008a0,%edi
0x000000000040071e <+70>: mov $0x0,%eax
0x0000000000400723 <+75>: callq 0x400540 <printf@plt>
0x0000000000400728 <+80>: jmp 0x40077d <_ZN22xuzhina_dump_c06_s2_ex5printEv+165>
0x000000000040072a <+82>: mov -0x18(%rbp),%rax
0x000000000040072e <+86>: mov 0x18(%rax),%rax
0x0000000000400732 <+90>: mov -0x4(%rbp),%edx
0x0000000000400735 <+93>: shl $0x2,%rdx
0x0000000000400739 <+97>: add %rdx,%rax
0x000000000040073c <+100>: movss (%rax),%xmm0
0x0000000000400740 <+104>: unpcklps %xmm0,%xmm0
0x0000000000400743 <+107>: cvtps2pd %xmm0,%xmm0
0x0000000000400746 <+110>: mov $0x4008a4,%edi
0x000000000040074b <+115>: mov $0x1,%eax
0x0000000000400750 <+120>: callq 0x400540 <printf@plt>
0x0000000000400755 <+125>: jmp 0x40077d <_ZN22xuzhina_dump_c06_s2_ex5printEv+165>
0x0000000000400757 <+127>: mov -0x18(%rbp),%rax
0x000000000040075b <+131>: mov 0x18(%rax),%rax
0x000000000040075f <+135>: mov -0x4(%rbp),%edx
0x0000000000400762 <+138>: shl $0x2,%rdx
0x0000000000400766 <+142>: add %rdx,%rax
0x0000000000400769 <+145>: mov (%rax),%eax
0x000000000040076b <+147>: mov %eax,%esi
0x000000000040076d <+149>: mov $0x4008a8,%edi
0x0000000000400772 <+154>: mov $0x0,%eax
0x0000000000400777 <+159>: callq 0x400540 <printf@plt>
0x000000000040077c <+164>: nop
0x000000000040077d <+165>: addl $0x1,-0x4(%rbp)
0x0000000000400781 <+169>: mov -0x18(%rbp),%rax
0x0000000000400785 <+173>: mov 0x20(%rax),%eax
0x0000000000400788 <+176>: cmp -0x4(%rbp),%eax
0x000000000040078b <+179>: ja 0x4006f0 <_ZN22xuzhina_dump_c06_s2_ex5printEv+24>
0x0000000000400791 <+185>: leaveq
0x0000000000400792 <+186>: retq
End of assembler dump.
由 xuzhina_dump_c06_s2_ex::print()
我们知道 print()
是类成员函数。因此第一个参数是 this 指针,从 0x00000000004006e0 <+8>: mov %rdi,-0x18(%rbp)
得知 this 指针存放于 -0x18(%rbp)。
由 core 附近的语句:
1
2
3
4
5
6
7
8
0x00000000004006e4 <+12>: movl $0x0,-0x4(%rbp)
...
0x0000000000400701 <+41>: jmp 0x400757 <_ZN22xuzhina_dump_c06_s2_ex5printEv+127>
0x0000000000400703 <+43>: mov -0x18(%rbp),%rax
0x0000000000400707 <+47>: mov 0x18(%rax),%rdx
0x000000000040070b <+51>: mov -0x4(%rbp),%eax
0x000000000040070e <+54>: add %rdx,%rax
=> 0x0000000000400711 <+57>: movzbl (%rax),%eax
可以推测出 this 指针 +0x18 偏移的成员变量是数组,数组索引是保存在 -0x4(%rbp) 的变量。由 movzbl
指令知道数组元素类型是 char。
通过
1
2
3
4
5
6
7
8
0x00000000004006f0 <+24>: mov -0x18(%rbp),%rax
0x00000000004006f4 <+28>: movzwl (%rax),%eax
0x00000000004006f7 <+31>: cwtl
0x00000000004006f8 <+32>: test %eax,%eax
0x00000000004006fa <+34>: je 0x400703 <_ZN22xuzhina_dump_c06_s2_ex5printEv+43> // 等于 0 则跳转到 <+43>
0x00000000004006fc <+36>: cmp $0x1,%eax
0x00000000004006ff <+39>: je 0x40072a <_ZN22xuzhina_dump_c06_s2_ex5printEv+82> // 等于 1 则跳转到 <+82>
0x0000000000400701 <+41>: jmp 0x400757 <_ZN22xuzhina_dump_c06_s2_ex5printEv+127>
我们知道当 this 对象的第一个成员变量(因为其地址就是 this 指针的地址所以是第一个成员变量)为 0 时才会执行到崩溃的代码,由 movzwl
指令知道该成员为 short 类型。
接下来看看 -0x4(%rbp) 变量的逻辑:
1
2
3
4
5
6
7
0x00000000004006e4 <+12>: movl $0x0,-0x4(%rbp)
...
0x000000000040077d <+165>: addl $0x1,-0x4(%rbp)
0x0000000000400781 <+169>: mov -0x18(%rbp),%rax
0x0000000000400785 <+173>: mov 0x20(%rax),%eax
0x0000000000400788 <+176>: cmp -0x4(%rbp),%eax
0x000000000040078b <+179>: ja 0x4006f0 <_ZN22xuzhina_dump_c06_s2_ex5printEv+24>
可以看到 -0x4(%rbp) 为计数变量,每次于 this 指针 +0x20 偏移的成员变量做比较,代码框架大致如下:
1
2
3
4
5
6
7
for (int i = 0; i < this->count; i++) {
if (this->flag == 0) {
// core 了
} else if (this->flag == 1) {
...
}
}
由
1
2
0x0000000000400703 <+43>: mov -0x18(%rbp),%rax
0x0000000000400707 <+47>: mov 0x18(%rax),%rdx
core 之前访问了偏移为 0x18 的成员变量。前面分析得知该成员变量是 char 数组,gdb 查看下该变量:
1
2
3
4
5
6
7
8
(gdb) x /2x $rbp-0x18 // this 指针
0x7ffd988dfe98: 0x988dfed0 0x00007ffd
(gdb) x /2x 0x00007ffd988dfed0 // *this
0x7ffd988dfed0: 0x45480000 0x45454545
(gdb) x /s 0x00007ffd988dfed0+0x2 // 第一个成员为 short 类型 因此第二个成员为 *this + 0x2
0x7ffd988dfed2: "H", 'E' <repeats 45 times>
(gdb) x /2x 0x00007ffd988dfed0+0x18 // *this + 0x18 成员变量
0x7ffd988dfee8: 0x45 0x45
可以看到 +0x18 成员变量指向的地址不是有效的地址,被第二个成员一大串字符串 “H”, ‘E’ <repeats 45 times> 覆盖了。
接下来看看 main
函数,分析下是在哪里被覆盖的:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
(gdb) set print asm-demangle
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400793 <+0>: push %rbp
0x0000000000400794 <+1>: mov %rsp,%rbp
0x0000000000400797 <+4>: sub $0x40,%rsp
0x000000000040079b <+8>: mov %edi,-0x34(%rbp)
0x000000000040079e <+11>: mov %rsi,-0x40(%rbp)
0x00000000004007a2 <+15>: cmpl $0x2,-0x34(%rbp)
0x00000000004007a6 <+19>: jg 0x4007af <main+28>
0x00000000004007a8 <+21>: mov $0xffffffff,%eax
0x00000000004007ad <+26>: jmp 0x400802 <main+111>
0x00000000004007af <+28>: mov -0x40(%rbp),%rax
0x00000000004007b3 <+32>: add $0x10,%rax
0x00000000004007b7 <+36>: mov (%rax),%rax
0x00000000004007ba <+39>: mov %rax,%rdi
0x00000000004007bd <+42>: callq 0x400570 <strlen@plt>
0x00000000004007c2 <+47>: mov %eax,%ecx
0x00000000004007c4 <+49>: mov -0x40(%rbp),%rax
0x00000000004007c8 <+53>: add $0x10,%rax
0x00000000004007cc <+57>: mov (%rax),%rdx
0x00000000004007cf <+60>: mov -0x40(%rbp),%rax
0x00000000004007d3 <+64>: add $0x8,%rax
0x00000000004007d7 <+68>: mov (%rax),%rsi
0x00000000004007da <+71>: lea -0x30(%rbp),%rax
0x00000000004007de <+75>: mov %ecx,%r8d
0x00000000004007e1 <+78>: mov %rdx,%rcx
0x00000000004007e4 <+81>: mov $0x0,%edx
0x00000000004007e9 <+86>: mov %rax,%rdi
0x00000000004007ec <+89>: callq 0x400680 <xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex(char*, short, void*, unsigned int)>
0x00000000004007f1 <+94>: lea -0x30(%rbp),%rax
0x00000000004007f5 <+98>: mov %rax,%rdi
0x00000000004007f8 <+101>: callq 0x4006d8 <xuzhina_dump_c06_s2_ex::print()>
0x00000000004007fd <+106>: mov $0x0,%eax
0x0000000000400802 <+111>: leaveq
0x0000000000400803 <+112>: retq
End of assembler dump.
从 main 的汇编代码发现只调用了 strlen
、xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex(char*, short, void*, unsigned int)
、xuzhina_dump_c06_s2_ex::print()
这三个函数。先看看构造函数:
注:x64 参数自左向右依次传入寄存器rdi, rsi, rdx, rcx, r8
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
(gdb) disassemble 0x400680
Dump of assembler code for function _ZN22xuzhina_dump_c06_s2_exC2EPcsPvj:
0x0000000000400680 <+0>: push %rbp
0x0000000000400681 <+1>: mov %rsp,%rbp
0x0000000000400684 <+4>: sub $0x20,%rsp
0x0000000000400688 <+8>: mov %rdi,-0x8(%rbp) // this 指针
0x000000000040068c <+12>: mov %rsi,-0x10(%rbp) // char*
0x0000000000400690 <+16>: mov %edx,%eax // short
0x0000000000400692 <+18>: mov %rcx,-0x20(%rbp) // void*
0x0000000000400696 <+22>: mov %r8d,-0x18(%rbp) // unsigned int
0x000000000040069a <+26>: mov %ax,-0x14(%rbp) // short
0x000000000040069e <+30>: mov -0x8(%rbp),%rax
0x00000000004006a2 <+34>: mov -0x20(%rbp),%rdx
0x00000000004006a6 <+38>: mov %rdx,0x18(%rax)
0x00000000004006aa <+42>: mov -0x8(%rbp),%rax
0x00000000004006ae <+46>: mov -0x18(%rbp),%edx
0x00000000004006b1 <+49>: mov %edx,0x20(%rax)
0x00000000004006b4 <+52>: mov -0x8(%rbp),%rax
0x00000000004006b8 <+56>: lea 0x2(%rax),%rdx
0x00000000004006bc <+60>: mov -0x10(%rbp),%rax
0x00000000004006c0 <+64>: mov %rax,%rsi
0x00000000004006c3 <+67>: mov %rdx,%rdi
0x00000000004006c6 <+70>: callq 0x400580 <strcpy@plt>
0x00000000004006cb <+75>: mov -0x8(%rbp),%rax
0x00000000004006cf <+79>: movzwl -0x14(%rbp),%edx
0x00000000004006d3 <+83>: mov %dx,(%rax)
0x00000000004006d6 <+86>: leaveq
0x00000000004006d7 <+87>: retq
End of assembler dump.
由
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
0x0000000000400688 <+8>: mov %rdi,-0x8(%rbp)
...
0x000000000040069e <+30>: mov -0x8(%rbp),%rax
0x00000000004006a2 <+34>: mov -0x20(%rbp),%rdx
0x00000000004006a6 <+38>: mov %rdx,0x18(%rax) // void* 指针 赋值给 0x18偏移 即类对象的第三个字段
...
0x00000000004006aa <+42>: mov -0x8(%rbp),%rax
0x00000000004006ae <+46>: mov -0x18(%rbp),%edx // unsigned int
0x00000000004006b1 <+49>: mov %edx,0x20(%rax) // unsigned int 赋值给 0x20偏移 即类对象的第四个字段
0x00000000004006b4 <+52>: mov -0x8(%rbp),%rax
0x00000000004006b8 <+56>: lea 0x2(%rax),%rdx // 0x2偏移字段
0x00000000004006bc <+60>: mov -0x10(%rbp),%rax // char*
0x00000000004006c0 <+64>: mov %rax,%rsi // char* 作为 strcpy 的第二个参数
0x00000000004006c3 <+67>: mov %rdx,%rdi // 0x2偏移字段 即类对象的第二个字段 作为 strcpy 的第一个参数
0x00000000004006c6 <+70>: callq 0x400580 <strcpy@plt>
0x00000000004006cb <+75>: mov -0x8(%rbp),%rax
0x00000000004006cf <+79>: movzwl -0x14(%rbp),%edx
0x00000000004006d3 <+83>: mov %dx,(%rax) // 把 short 赋值给 0x0偏移 即类对象的第一个字段
得知类对象有四个成员变量,类结构体如下:
1
2
3
4
5
6
class xuzhina_dump_c06_s2_ex {
short flag; // 0x0
char arr[??]; // 0x2
void* ptr; // 0x18
unsigned int count; // 0x20
}
发现高危函数 strcpy
,猜测是 strcpy
第二个字段的时候把第三个字段给覆盖了。strcpy
第二个参数是 main
调用 xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex
函数的 char* 参数。切换到 frame 1,查看 main
中传给 xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex
函数的 char* 参数:
1
2
(gdb) f 1
#1 0x00000000004007fd in main ()
由 main
汇编代码中的:
1
2
3
4
5
6
7
0x000000000040079e <+11>: mov %rsi,-0x40(%rbp)
...
0x00000000004007cf <+60>: mov -0x40(%rbp),%rax
0x00000000004007d3 <+64>: add $0x8,%rax
0x00000000004007d7 <+68>: mov (%rax),%rsi // 第二个参数值存于 rsi, 也就是 char*
...
0x00000000004007ec <+89>: callq 0x400680 <xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex(char*, short, void*, unsigned int)>
1
2
3
4
5
6
(gdb) x /2x $rbp-0x40
0x7ffd988dfec0: 0x988dffe8 0x00007ffd
(gdb) x /2x 0x00007ffd988dffe8 + 0x8
0x7ffd988dfff0: 0x988e0793 0x00007ffd
(gdb) x /s 0x00007ffd988e0793
0x7ffd988e0793: "H", 'E' <repeats 45 times>
可见,长度超过了 0x18 导致第三个成员变量被覆盖。
源码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
#include <string.h>
#include <stdio.h>
class xuzhina_dump_c06_s2_ex {
private:
short m_type;
char m_name[16];
void* m_ptr;
unsigned int m_len;
public:
xuzhina_dump_c06_s2_ex(char* name, short type, void* data, unsigned int len);
void print();
};
xuzhina_dump_c06_s2_ex::xuzhina_dump_c06_s2_ex(char* name, short type, void* data, unsigned int len) {
m_ptr = data;
m_len = len;
strcpy( m_name, name );
m_type = type;
}
void xuzhina_dump_c06_s2_ex::print() {
for (unsigned int i = 0; i < m_len; i++) {
switch(m_type) {
case 0:
printf( "%c ", *((char*)m_ptr + i ) );
break;
case 1:
printf( "%f ", *((float*)m_ptr + i ) );
break;
default:
printf( "%d ", *((int*)m_ptr + i ) );
break;
}
}
}
int main(int argc, char* argv[]) {
if (argc < 3) {
return -1;
}
xuzhina_dump_c06_s2_ex test(argv[1], 0, argv[2], strlen(argv[2]));
test.print();
return 0;
}
执行 ./test HEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEEE 1
触发 coredump。