0%

C++对象模型 Function Semantics

NRO: c++ - What are copy elision and return value optimization? - Stack Overflow

调用方式

non static member function

经过编译器对名称mangling,然后进行一些返回值优化,this作为member function的参数

virtual member function

1
2
3
ptr->normalize();
// to
(*ptr->vptr[1])(ptr);

可以通过显式的调用操作避免virtual

1
register float mag = ptr->Point3d::magnitude();

static function

取一个static function的地址,类型是一个函数指针

1
2
3
4
5
6
7
8
9
10
11
12
13
class Foo {
public:
static void f() { std::cout << "Foo::f()\n"; }
void g() { std::cout << "Foo::g()\n"; }
};

int main() {
auto f = &Foo::f; // void(*)()
f();
auto g = &Foo::g; // void (Foo::*)()
Foo foo;
(foo.*g)();
}

virtual member function

单继承


在调用virtual function时

  • 并不知道ptr所指对象的真正类型,然而经由ptr可以获得该对象的virtual table
  • 并不知道哪一个z()实例会被调用,但是每个z()实例都放在slot 4中

多继承

以Derived继承自Base1和Base2为例:

1
2
3
Base2 *pbase2 = new Derived;

delete pbase2; // invoke derived class's destructor

如果是指针offset调整的方式,当调用delete的时候需要将指针调整回来(重新指向Derived对象,因为调用的是Derived虚析构函数)

解决方法是增加一个thunk,虚表中对应的函数指向,在thunk中调整指针位置,使this重新指向Derived

单继承、多继承、菱形继承的虚函数表 | Albert World
c++ - What is a ‘thunk’? - Stack Overflow

至于代码:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
#include <iostream>
#include <memory>

class Base1 {
public:
virtual void f() { std::cout << "Base1::f()\n"; }
virtual void g() { std::cout << "Base1::g()\n"; }
private:
int base1_data;
};

class Base2 {
public:
virtual void g() { std::cout << "Base2::g()\n"; }
virtual void h() { std::cout << "Base2::h()\n"; }

private:
int base2_data;
};

class Derived : public Base1, public Base2 {
public:
virtual void f() { std::cout << "Derived::f()\n"; }
virtual void g() { std::cout << "Derived::g()\n"; }
};

int main() {
Derived d;
Base1 *b1 = &d;
Base2 *b2 = &d; // 这里将d加上了16
b1->f();
b1->g();
b2->g();
b2->h();
b1->Base1::f();
b1->Base1::g();
b2->Base2::g();
b2->Base2::h();
}

在这个程序中,先来看下derived的vtable和构造函数

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
vtable for Derived:
.quad 0
.quad typeinfo for Derived
.quad Derived::f()
.quad Derived::g()
.quad -16
.quad typeinfo for Derived
.quad non-virtual thunk to Derived::g()
.quad Base2::h()

Derived::Derived() [base object constructor]: # @Derived::Derived() [base object constructor]
push rbp
mov rbp, rsp
sub rsp, 16
mov qword ptr [rbp - 8], rdi
mov rdi, qword ptr [rbp - 8]
mov qword ptr [rbp - 16], rdi # 8-byte Spill
call Base1::Base1() [base object constructor]
mov rdi, qword ptr [rbp - 16] # 8-byte Reload
add rdi, 16
call Base2::Base2() [base object constructor]
mov rax, qword ptr [rbp - 16] # 8-byte Reload
lea rcx, [rip + vtable for Derived]
add rcx, 16
mov qword ptr [rax], rcx
lea rcx, [rip + vtable for Derived]
add rcx, 48
mov qword ptr [rax + 16], rcx
add rsp, 16
pop rbp
ret

Derived的对象布局如下,32bytes:

1
2
3
4
5
6
7
8
9
+------
|0 | vtable+16 // +16后指向函数
|8 | base1_data
|12| padding
+------
|16| vtable+48 // +48后指向Base2的虚函数
|24| base2_data
|28| padding
+------

vtable中,有个non-virtual thunk to Derived::g():

1
2
3
4
5
6
7
8
non-virtual thunk to Derived::g():                  # @non-virtual thunk to Derived::g()
push rbp
mov rbp, rsp
mov qword ptr [rbp - 8], rdi
mov rdi, qword ptr [rbp - 8]
add rdi, -16
pop rbp
jmp Derived::g() # TAILCALL

可以看到作用是将this指针减去16(Base1的大小),将类型为Base2的指针重新指回去Derived
在Derived转换为Base2指针的时候:

1
2
3
4
5
6
7
8
9
10
11
12
// Base2 *b2 = &d; // 这里将d加上了16
xor eax, eax
lea rcx, [rbp - 40]
cmp rcx, 0
mov qword ptr [rbp - 64], rax # 8-byte Spill
je .LBB1_2
lea rax, [rbp - 40]
add rax, 16
mov qword ptr [rbp - 64], rax # 8-byte Spill
.LBB1_2:
mov rax, qword ptr [rbp - 64] # 8-byte Reload
mov qword ptr [rbp - 56], rax

通过Base2的指针调用的时候,此时this指针已经是加上16的,所以之后调用没有override的Base2::h()的时候不需要任何操作(vtable中最后一项)

虚继承下的virtual function

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
#include <iostream>

class Point2d {
public:
Point2d(float x = 0.0, float y = 0.0) : x_(x), y_(y){};
virtual ~Point2d(){};

virtual void mumble() {};
virtual float z() {};

protected:
float x_, y_;
};

class Point3d : virtual Point2d {
public:
Point3d(float x = 0.0, float y = 0.0, float z = 0.0) : Point2d(x, y), z_(z) {};
~Point3d(){};

float z() { return z_; };

protected:
float z_;
};

int main() {
Point3d p(1, 1, 1);
sizeof(Point3d); // == 32
sizeof(Point2d); // == 16
}

Point3d的对象布局如下,32bytes:

1
2
3
4
5
6
7
8
9
+------
|0 | vtable+24 // +24后指向函数
|8 | z
|12| padding
+------
|16| vtable+88 // +88后指向Base2的虚函数
|24| base2_data
|28| padding
+------

vtable:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
vtable for Point3d:
.quad 16 # 虚基类offset
.quad 0
.quad typeinfo for Point3d
.quad Point3d::~Point3d() [complete object destructor]
.quad Point3d::~Point3d() [deleting destructor]
.quad Point3d::z()
.quad -16
.quad 0
.quad -16
.quad -16
.quad typeinfo for Point3d
.quad virtual thunk to Point3d::~Point3d() [complete object destructor]
.quad virtual thunk to Point3d::~Point3d() [deleting destructor]
.quad Point2d::mumble()
.quad virtual thunk to Point3d::z()

然后当我们访问虚基类的member时候:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
// p.x_
mov rax, qword ptr [rbp - 40]
mov rax, qword ptr [rax - 24]
movss xmm0, dword ptr [rbp + rax - 32] # xmm0 = mem[0],zero,zero,zero 为什么是32,在下边解释
movss dword ptr [rbp - 64], xmm0
// Point2d *p2 = &p;
xor eax, eax
lea rcx, [rbp - 40]
cmp rcx, 0
mov qword ptr [rbp - 72], rax # 8-byte Spill
je .LBB1_2
mov rcx, qword ptr [rbp - 40]
lea rax, [rbp - 40]
add rax, qword ptr [rcx - 24]
mov qword ptr [rbp - 72], rax # 8-byte Spill
.LBB1_2:
mov rax, qword ptr [rbp - 72] # 8-byte Reload
mov qword ptr [rbp - 48], rax

rbp-40是Point3d对象的地址,取出来vtable放到rax
rax-24是vtable中的第一项,也就是16,表示虚基类的offset是16
加上这个offset赋值给Point2d的指针
访问x_的时候跳过一开始的虚表,所以是32

vtable中还有这四项非常奇怪

1
2
3
4
.quad   -16
.quad 0
.quad -16
.quad -16

这四项分别对应了(对称的)

1
2
3
4
.quad   virtual thunk to Point3d::~Point3d() [complete object destructor]
.quad virtual thunk to Point3d::~Point3d() [deleting destructor]
.quad Point2d::mumble()
.quad virtual thunk to Point3d::z()

这四个函数,调用的时候thunk对指针进行调整,如果override了offset就是-16,如果没有override offset就是0
thunk长这样

1
2
3
4
5
6
7
8
9
10
virtual thunk to Point3d::~Point3d() [deleting destructor]:                # @virtual thunk to Point3d::~Point3d() [deleting destructor]
push rbp
mov rbp, rsp
mov qword ptr [rbp - 8], rdi # rdi是Point3d的Point2d部分
mov rdi, qword ptr [rbp - 8]
mov rax, qword ptr [rdi]
mov rax, qword ptr [rax - 24] # 获取vtable中对应项的offset
add rdi, rax
pop rbp
jmp Point3d::~Point3d() [deleting destructor] # TAILCALL

指向Member Function的指针

  • non trival member function:指向函数的地址
  • virtual member function:记录vtable的offset

直接来看代码吧

1
2
3
4
5
6
Point3d p(1, 1, 1);
auto vptr_1 = &Point2d::foo; // vfunc1
auto vptr_2 = &Point2d::bar; // vfunc2
auto mptr = &Point2d::baz; // member function
(p.*vptr_1)();
(p.*mptr)();
1
2
3
4
5
6
// auto vptr_1 = &Point2d::foo;
mov qword ptr [rbp - 40], 0
mov qword ptr [rbp - 48], 17
// auto vptr_2 = &Point2d::bar;
mov qword ptr [rbp - 56], 0
mov qword ptr [rbp - 64], 25
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
vtable for Point3d:
.quad 0
.quad typeinfo for Point3d
.quad Point3d::~Point3d() [base object destructor]
.quad Point3d::~Point3d() [deleting destructor]
.quad Point2d::foo()
.quad Point2d::bar()
.quad Point3d::three()
// (p.*ptr)();
mov rax, qword ptr [rbp - 48]
mov qword ptr [rbp - 128], rax # 8-byte Spill
mov rdx, qword ptr [rbp - 40]
lea rcx, [rbp - 32] # rcx是对象p的地址
add rcx, rdx # rdx是指针的高8位,做什么用的?
mov qword ptr [rbp - 120], rcx # 8-byte Spill
and rax, 1 # 函数指针是否是奇数
cmp rax, 0
je .LBB1_2 # 如果不是奇数则不是虚函数
mov rcx, qword ptr [rbp - 128] # 8-byte Reload
mov rax, qword ptr [rbp - 120] # 8-byte Reload
mov rax, qword ptr [rax] # 取出来vtable
sub rcx, 1
mov rax, qword ptr [rax + rcx] # 取出来对应的函数(vtable+17-1),第二项
mov qword ptr [rbp - 136], rax # 8-byte Spill, 函数地址放到[rbp - 136]
jmp .LBB1_3
.LBB1_2:
mov rax, qword ptr [rbp - 128] # 8-byte Reload
mov qword ptr [rbp - 136], rax # 8-byte Spill, 如果是成员函数,直接将低8位作为函数地址放到[rbp - 136]
.LBB1_3:
mov rdi, qword ptr [rbp - 120] # 8-byte Reload, 对象地址
mov rax, qword ptr [rbp - 136] # 8-byte Reload, 函数地址
call rax
jmp .LBB1_4
  • Spill和Reload就是变量太多reg放不下,暂时放到栈中
  • 这里的虚函数指针都是奇数,用来区别于成员函数指针
  • 成员函数指针调用都是相同的方式,所以会有很多次的访存和跳转开销
  • 成员函数指针的高8位是一个对对象地址的offset

问题

  1. 为什么[[#虚继承下的virtual function]]vtable中offset和对应的函数要对称排列,如果不是对称的话一个thunk就可以解决了
  2. 在[[Chap.4 Function Semantics#多继承]]的例子中,thunk是直接减去了固定值,而不是像这个还有一次访存
  3. [[#指向Member Function的指针]]中,成员函数指针的高8位是做什么用的

TODO

  1. 把Chap.3的更新下汇编