本系列文章是读《coredump问题原理探究》的读书笔记。
多继承
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
#include <stdio.h>
class xuzhina_dump_c06_s5_mother {
private:
int m_age;
int m_beauty;
public:
virtual void print() {
printf("mother\n");
}
virtual void setBeauty(int age, int beauty) {
m_age = age - 5;
m_beauty = beauty;
}
};
class xuzhina_dump_c06_s5_father {
private:
int m_strong;
int m_age;
public:
virtual void print() {
printf("father\n");
}
virtual void setStrong(int strong, int age) {
m_strong = strong;
m_age = age;
}
};
class xuzhina_dump_c06_s5_child: public xuzhina_dump_c06_s5_father,
public xuzhina_dump_c06_s5_mother {
private:
bool m_newMind;
public:
virtual void print() {
printf("child\n");
}
virtual void setGender(bool gender) {
m_newMind = true;
if (gender) {
setBeauty(10, 10);
} else {
setStrong(20,20);
}
}
};
int main() {
xuzhina_dump_c06_s5_child* child = new xuzhina_dump_c06_s5_child;
child->setGender( false );
child->print();
xuzhina_dump_c06_s5_father* f = child;
f->print();
xuzhina_dump_c06_s5_mother* m = child;
m->print();
return 0;
}
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
(gdb) disassemble main
Dump of assembler code for function main:
0x0000000000400740 <+0>: push %rbp
0x0000000000400741 <+1>: mov %rsp,%rbp
0x0000000000400744 <+4>: push %rbx
0x0000000000400745 <+5>: sub $0x28,%rsp
0x0000000000400749 <+9>: mov $0x28,%edi
0x000000000040074e <+14>: callq 0x400640 <_Znwm@plt>
0x0000000000400753 <+19>: mov %rax,%rbx
0x0000000000400756 <+22>: mov %rbx,%rdi
0x0000000000400759 <+25>: callq 0x400916 <_ZN25xuzhina_dump_c06_s5_childC2Ev>
0x000000000040075e <+30>: mov %rbx,-0x18(%rbp) // child 指针存放于 -0x18(%rbp)
0x0000000000400762 <+34>: mov -0x18(%rbp),%rax
0x0000000000400766 <+38>: mov (%rax),%rax
0x0000000000400769 <+41>: add $0x10,%rax
0x000000000040076d <+45>: mov (%rax),%rax // setGender 函数地址
0x0000000000400770 <+48>: mov -0x18(%rbp),%rdx
0x0000000000400774 <+52>: mov $0x0,%esi
0x0000000000400779 <+57>: mov %rdx,%rdi
0x000000000040077c <+60>: callq *%rax // child->setGender( false );
0x000000000040077e <+62>: mov -0x18(%rbp),%rax
0x0000000000400782 <+66>: mov (%rax),%rax
0x0000000000400785 <+69>: mov (%rax),%rax
0x0000000000400788 <+72>: mov -0x18(%rbp),%rdx
0x000000000040078c <+76>: mov %rdx,%rdi
0x000000000040078f <+79>: callq *%rax // child->print();
0x0000000000400791 <+81>: mov -0x18(%rbp),%rax // 取 child 地址
0x0000000000400795 <+85>: mov %rax,-0x20(%rbp) // f = child; f 存储于 -0x20(%rbp)
0x0000000000400799 <+89>: mov -0x20(%rbp),%rax
0x000000000040079d <+93>: mov (%rax),%rax
0x00000000004007a0 <+96>: mov (%rax),%rax
0x00000000004007a3 <+99>: mov -0x20(%rbp),%rdx
0x00000000004007a7 <+103>: mov %rdx,%rdi
0x00000000004007aa <+106>: callq *%rax // f->print();
0x00000000004007ac <+108>: cmpq $0x0,-0x18(%rbp)
0x00000000004007b1 <+113>: je 0x4007bd <main+125>
0x00000000004007b3 <+115>: mov -0x18(%rbp),%rax
0x00000000004007b7 <+119>: add $0x10,%rax
0x00000000004007bb <+123>: jmp 0x4007c2 <main+130>
0x00000000004007bd <+125>: mov $0x0,%eax
0x00000000004007c2 <+130>: mov %rax,-0x28(%rbp)
0x00000000004007c6 <+134>: mov -0x28(%rbp),%rax
0x00000000004007ca <+138>: mov (%rax),%rax
0x00000000004007cd <+141>: mov (%rax),%rax
0x00000000004007d0 <+144>: mov -0x28(%rbp),%rdx
0x00000000004007d4 <+148>: mov %rdx,%rdi
0x00000000004007d7 <+151>: callq *%rax // m->print();
0x00000000004007d9 <+153>: mov $0x0,%eax
0x00000000004007de <+158>: add $0x28,%rsp
0x00000000004007e2 <+162>: pop %rbx
0x00000000004007e3 <+163>: pop %rbp
0x00000000004007e4 <+164>: retq
End of assembler dump.
关注这几行,对应的代码是 xuzhina_dump_c06_s5_mother* m = child;
1
2
3
4
5
6
7
0x00000000004007ac <+108>: cmpq $0x0,-0x18(%rbp)
0x00000000004007b1 <+113>: je 0x4007bd <main+125>
0x00000000004007b3 <+115>: mov -0x18(%rbp),%rax
0x00000000004007b7 <+119>: add $0x10,%rax // child 偏移 0x10
0x00000000004007bb <+123>: jmp 0x4007c2 <main+130>
0x00000000004007bd <+125>: mov $0x0,%eax
0x00000000004007c2 <+130>: mov %rax,-0x28(%rbp) // 偏移后赋值给 m
为什么这里不能和 xuzhina_dump_c06_s5_father* f = child;
一样,直接把 this 指针 -0x18(%rbp) 直接赋值,而是需要加上 $0x10 的偏移呢?看看子类的构造函数:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
(gdb) disassemble 0x400916
Dump of assembler code for function _ZN25xuzhina_dump_c06_s5_childC2Ev:
0x0000000000400916 <+0>: push %rbp
0x0000000000400917 <+1>: mov %rsp,%rbp
0x000000000040091a <+4>: sub $0x10,%rsp
0x000000000040091e <+8>: mov %rdi,-0x8(%rbp)
0x0000000000400922 <+12>: mov -0x8(%rbp),%rax
0x0000000000400926 <+16>: mov %rax,%rdi
0x0000000000400929 <+19>: callq 0x4008ea <_ZN26xuzhina_dump_c06_s5_fatherC2Ev>
0x000000000040092e <+24>: mov -0x8(%rbp),%rax
0x0000000000400932 <+28>: add $0x10,%rax
0x0000000000400936 <+32>: mov %rax,%rdi
0x0000000000400939 <+35>: callq 0x400900 <_ZN26xuzhina_dump_c06_s5_motherC2Ev>
0x000000000040093e <+40>: mov -0x8(%rbp),%rax
0x0000000000400942 <+44>: movq $0x400a30,(%rax)
0x0000000000400949 <+51>: mov -0x8(%rbp),%rax
0x000000000040094d <+55>: movq $0x400a58,0x10(%rax)
0x0000000000400955 <+63>: leaveq
0x0000000000400956 <+64>: retq
End of assembler dump.
可见子类 child 为两个父类 father 和 mother 的组合。从
1
2
3
4
5
0x0000000000400929 <+19>: callq 0x4008ea <_ZN26xuzhina_dump_c06_s5_fatherC2Ev>
0x000000000040092e <+24>: mov -0x8(%rbp),%rax
0x0000000000400932 <+28>: add $0x10,%rax // 偏移 0x10
0x0000000000400936 <+32>: mov %rax,%rdi
0x0000000000400939 <+35>: callq 0x400900 <_ZN26xuzhina_dump_c06_s5_motherC2Ev>
我们知道 father 对象大小为 0x10。
1
2
3
4
5
6
7
8
9
10
(gdb) x /4x 0x400a58
0x400a58 <_ZTV25xuzhina_dump_c06_s5_child+56>: 0x0040087a 0x00000000 0x004007fe 0x00000000
(gdb) x /4x 0x400a30
0x400a30 <_ZTV25xuzhina_dump_c06_s5_child+16>: 0x00400862 0x00000000 0x0040083e 0x00000000
(gdb) info symbol 0x0040087a
non-virtual thunk to xuzhina_dump_c06_s5_child::print() in section .text
(gdb) info symbol 0x004007fe
xuzhina_dump_c06_s5_mother::setBeauty(int, int) in section .text
(gdb) info symbol 0x00400862
xuzhina_dump_c06_s5_child::print() in section .text
结论,多继承时,当将子类对象指针转换为基类指针,实际上是把子类对象中的基类对象的地址赋值给基类指针。
coredump 分析
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
(gdb) bt
#0 0x0000000000400821 in xuzhina_dump_c06_s5_ex_child::inheritFrom(char*, int) ()
#1 0x000000000040079d in main ()
(gdb) disassemble
Dump of assembler code for function _ZN28xuzhina_dump_c06_s5_ex_child11inheritFromEPci:
0x00000000004007ec <+0>: push %rbp
0x00000000004007ed <+1>: mov %rsp,%rbp
0x00000000004007f0 <+4>: sub $0x20,%rsp
0x00000000004007f4 <+8>: mov %rdi,-0x8(%rbp)
0x00000000004007f8 <+12>: mov %rsi,-0x10(%rbp)
0x00000000004007fc <+16>: mov %edx,-0x14(%rbp)
0x00000000004007ff <+19>: mov -0x8(%rbp),%rax
0x0000000000400803 <+23>: mov (%rax),%rax
0x0000000000400806 <+26>: mov (%rax),%rax
0x0000000000400809 <+29>: mov -0x8(%rbp),%rdx
0x000000000040080d <+33>: mov -0x10(%rbp),%rcx
0x0000000000400811 <+37>: mov %rcx,%rsi
0x0000000000400814 <+40>: mov %rdx,%rdi
0x0000000000400817 <+43>: callq *%rax
0x0000000000400819 <+45>: mov -0x8(%rbp),%rax
0x000000000040081d <+49>: mov 0x10(%rax),%rax
=> 0x0000000000400821 <+53>: mov (%rax),%rax
0x0000000000400824 <+56>: mov -0x8(%rbp),%rdx
0x0000000000400828 <+60>: lea 0x10(%rdx),%rcx
0x000000000040082c <+64>: mov -0x14(%rbp),%edx
0x000000000040082f <+67>: mov %edx,%esi
0x0000000000400831 <+69>: mov %rcx,%rdi
0x0000000000400834 <+72>: callq *%rax
0x0000000000400836 <+74>: mov -0x8(%rbp),%rax
0x000000000040083a <+78>: movl $0x1,0x1c(%rax)
0x0000000000400841 <+85>: leaveq
0x0000000000400842 <+86>: retq
End of assembler dump.
this 指针存放在 -0x8(%rbp)。查看 core 附近的指令
1
2
3
4
5
6
(gdb) x /4x $rbp-0x8
0x7ffca5ea45c8: 0x01070010 0x00000000 0xa5ea4610 0x00007ffc
(gdb) x /4x 0x01070010+0x10
0x1070020: 0x6548646c 0x576f6c6c 0x646c726f 0x00000000 // 每个字节都小于0x80 猜测是 ascii 码
(gdb) x /s 0x01070010+0x10
0x1070020: "ldHelloWorld"
分析 core 之后的指令:
1
2
3
4
5
6
7
8
9
0x0000000000400819 <+45>: mov -0x8(%rbp),%rax
0x000000000040081d <+49>: mov 0x10(%rax),%rax
=> 0x0000000000400821 <+53>: mov (%rax),%rax
0x0000000000400824 <+56>: mov -0x8(%rbp),%rdx
0x0000000000400828 <+60>: lea 0x10(%rdx),%rcx
0x000000000040082c <+64>: mov -0x14(%rbp),%edx
0x000000000040082f <+67>: mov %edx,%esi
0x0000000000400831 <+69>: mov %rcx,%rdi
0x0000000000400834 <+72>: callq *%rax
看起来 rax 应该保存的是虚函数的指针,然而上面发现 rax 寄存器的内容是 ldHelloWorld
字符串,意味着 this 指针 +0x10 偏移的虚函数指针被字符串所覆盖。
core 之前有调用虚函数的汇编代码:
1
2
3
4
5
6
7
8
0x00000000004007ff <+19>: mov -0x8(%rbp),%rax
0x0000000000400803 <+23>: mov (%rax),%rax
0x0000000000400806 <+26>: mov (%rax),%rax
0x0000000000400809 <+29>: mov -0x8(%rbp),%rdx
0x000000000040080d <+33>: mov -0x10(%rbp),%rcx
0x0000000000400811 <+37>: mov %rcx,%rsi
0x0000000000400814 <+40>: mov %rdx,%rdi
0x0000000000400817 <+43>: callq *%rax
猜测是调用该虚函数之后溢出覆盖了虚函数表导致 core 的,看看调用了哪个虚函数:
1
2
3
4
5
6
7
8
(gdb) x /4x $rbp-0x8
0x7ffca5ea45c8: 0x01070010 0x00000000 0xa5ea4610 0x00007ffc
(gdb) x /4wx 0x01070010
0x1070010: 0x00400970 0x00000000 0x6c6c6548 0x726f576f
(gdb) x /4wx 0x00400970
0x400970 <_ZTV28xuzhina_dump_c06_s5_ex_child+16>: 0x004007aa 0x00000000 0x004007ec 0x00000000
(gdb) info symbol 0x004007aa
xuzhina_dump_c06_s5_ex_father::setName(char*) in section .text of /data/home/tmp/test
查看下 setName 的汇编代码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
(gdb) disassemble 0x004007aa
Dump of assembler code for function _ZN29xuzhina_dump_c06_s5_ex_father7setNameEPc:
0x00000000004007aa <+0>: push %rbp
0x00000000004007ab <+1>: mov %rsp,%rbp
0x00000000004007ae <+4>: sub $0x10,%rsp
0x00000000004007b2 <+8>: mov %rdi,-0x8(%rbp) // this 指针
0x00000000004007b6 <+12>: mov %rsi,-0x10(%rbp) // char* 参数
0x00000000004007ba <+16>: mov -0x8(%rbp),%rax
0x00000000004007be <+20>: lea 0x8(%rax),%rdx // this 指针 +0x8 偏移的成员变量
0x00000000004007c2 <+24>: mov -0x10(%rbp),%rax
0x00000000004007c6 <+28>: mov %rax,%rsi
0x00000000004007c9 <+31>: mov %rdx,%rdi
0x00000000004007cc <+34>: callq 0x400630 <strcpy@plt> // 把 char* strcpy 到 this + 0x8 的成员变量
0x00000000004007d1 <+39>: leaveq
0x00000000004007d2 <+40>: retq
End of assembler dump.
发现高危函数 strcpy
,setName 会把 char* 拷贝到 this 指针的 +0x8 偏移的成员变量。推断是因为 char* 超过该成员变量的大小导致覆盖到 this+0x10 的虚函数指针。
查下 setName 的参数 char* 的值:
1
2
3
4
5
6
0x00000000004007f8 <+12>: mov %rsi,-0x10(%rbp) // xuzhina_dump_c06_s5_ex_child::inheritFrom 的第一个参数 存放在 -0x10(%rbp)
...
0x000000000040080d <+33>: mov -0x10(%rbp),%rcx
0x0000000000400811 <+37>: mov %rcx,%rsi // -0x10(%rbp) 作为第一个参数传给 setName
0x0000000000400814 <+40>: mov %rdx,%rdi
0x0000000000400817 <+43>: callq *%rax
可以看出 char* 是 main
传给 xuzhina_dump_c06_s5_ex_child::inheritFrom
的第一个参数,切换到 f 1 看看:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
(gdb) f 1
#1 0x000000000040079d in main ()
(gdb) disassemble
Dump of assembler code for function main:
0x0000000000400740 <+0>: push %rbp
0x0000000000400741 <+1>: mov %rsp,%rbp
0x0000000000400744 <+4>: push %rbx
0x0000000000400745 <+5>: sub $0x28,%rsp
0x0000000000400749 <+9>: mov %edi,-0x24(%rbp)
0x000000000040074c <+12>: mov %rsi,-0x30(%rbp)
0x0000000000400750 <+16>: cmpl $0x1,-0x24(%rbp)
0x0000000000400754 <+20>: jg 0x40075d <main+29>
0x0000000000400756 <+22>: mov $0xffffffff,%eax
0x000000000040075b <+27>: jmp 0x4007a2 <main+98>
0x000000000040075d <+29>: mov $0x20,%edi
0x0000000000400762 <+34>: callq 0x400640 <_Znwm@plt>
0x0000000000400767 <+39>: mov %rax,%rbx
0x000000000040076a <+42>: mov %rbx,%rdi
0x000000000040076d <+45>: callq 0x400870 <_ZN28xuzhina_dump_c06_s5_ex_childC2Ev>
0x0000000000400772 <+50>: mov %rbx,-0x18(%rbp)
0x0000000000400776 <+54>: mov -0x18(%rbp),%rax
0x000000000040077a <+58>: mov (%rax),%rax
0x000000000040077d <+61>: add $0x8,%rax
0x0000000000400781 <+65>: mov (%rax),%rax
0x0000000000400784 <+68>: mov -0x30(%rbp),%rdx
0x0000000000400788 <+72>: add $0x8,%rdx
0x000000000040078c <+76>: mov (%rdx),%rsi
0x000000000040078f <+79>: mov -0x18(%rbp),%rcx
0x0000000000400793 <+83>: mov $0x1,%edx
0x0000000000400798 <+88>: mov %rcx,%rdi
0x000000000040079b <+91>: callq *%rax
=> 0x000000000040079d <+93>: mov $0x0,%eax
0x00000000004007a2 <+98>: add $0x28,%rsp
0x00000000004007a6 <+102>: pop %rbx
0x00000000004007a7 <+103>: pop %rbp
0x00000000004007a8 <+104>: retq
End of assembler dump.
从
1
2
3
4
5
6
7
0x0000000000400784 <+68>: mov -0x30(%rbp),%rdx
0x0000000000400788 <+72>: add $0x8,%rdx
0x000000000040078c <+76>: mov (%rdx),%rsi // char* 的地址为 -0x30(%rbp) + 0x8
0x000000000040078f <+79>: mov -0x18(%rbp),%rcx
0x0000000000400793 <+83>: mov $0x1,%edx
0x0000000000400798 <+88>: mov %rcx,%rdi // this 指针
0x000000000040079b <+91>: callq *%rax
确定 char* 的地址为 -0x30(%rbp) + 0x8,gdb 打印下:
1
2
3
4
5
6
(gdb) x /2wx $rbp-0x30
0x7ffca5ea45e0: 0xa5ea46f8 0x00007ffc
(gdb) x /2wx 0x00007ffca5ea46f8+0x8
0x7ffca5ea4700: 0xa5ea67af 0x00007ffc
(gdb) x /s 0x00007ffca5ea67af
0x7ffca5ea67af: "HelloWorldHelloWorld"
得出 char* 的值为 HelloWorldHelloWorld
,长度为 20,很明显 0x8 + 20 = 28 超过了 0x10,调用 strcpy 会导致字符串覆盖到 0x10 的虚函数指针。
源码:
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
#include <string.h>
class xuzhina_dump_c06_s5_ex_father {
private:
char m_name[8];
public:
virtual void setName(char* name) {
strcpy(m_name, name);
}
};
class xuzhina_dump_c06_s5_ex_mother {
private:
int m_nature;
public:
virtual void setNature(int nature) {
m_nature = nature;
}
};
class xuzhina_dump_c06_s5_ex_child: public xuzhina_dump_c06_s5_ex_father,
public xuzhina_dump_c06_s5_ex_mother {
private:
int m_sweet;
public:
virtual void inheritFrom(char* lastName, int nature) {
setName(lastName);
setNature(nature);
m_sweet = 1;
}
};
int main(int argc, char* argv[])
{
if (argc < 2) {
return -1;
}
xuzhina_dump_c06_s5_ex_child* child = new xuzhina_dump_c06_s5_ex_child;
child->inheritFrom(argv[1], 1);
return 0;
}
./test HelloWorldHelloWorld
触发 coredump。