fishhook源码分析

没什么特别要说的,详细看代码,可以结合之前的两篇文章,更好的理解这部分内容。

mach-o格式分析

mach-o延时绑定

源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
// Copyright (c) 2013, Facebook, Inc.
// All rights reserved.
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions are met:
// * Redistributions of source code must retain the above copyright notice,
// this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright notice,
// this list of conditions and the following disclaimer in the documentation
// and/or other materials provided with the distribution.
// * Neither the name Facebook nor the names of its contributors may be used to
// endorse or promote products derived from this software without specific
// prior written permission.
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
// AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
// DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE LIABLE
// FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
// DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
// SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER
// CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY,
// OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#import "fishhook.h"

#import <dlfcn.h>
#import <stdlib.h>
#import <string.h>
#import <sys/types.h>
#import <mach-o/dyld.h>
#import <mach-o/loader.h>
#import <mach-o/nlist.h>

#ifdef __LP64__
typedef struct mach_header_64 mach_header_t;
typedef struct segment_command_64 segment_command_t;
typedef struct section_64 section_t;
typedef struct nlist_64 nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT_64
#else
typedef struct mach_header mach_header_t;
typedef struct segment_command segment_command_t;
typedef struct section section_t;
typedef struct nlist nlist_t;
#define LC_SEGMENT_ARCH_DEPENDENT LC_SEGMENT
#endif

#ifndef SEG_DATA_CONST
#define SEG_DATA_CONST "__DATA_CONST"
#endif

struct rebindings_entry {
struct rebinding *rebindings;
size_t rebindings_nel;
struct rebindings_entry *next;
};

static struct rebindings_entry *_rebindings_head;

static int prepend_rebindings(struct rebindings_entry **rebindings_head,
struct rebinding rebindings[],
size_t nel)
{

struct rebindings_entry *new_entry = malloc(sizeof(struct rebindings_entry));
if (!new_entry) {
return -1;
}
new_entry->rebindings = malloc(sizeof(struct rebinding) * nel);
if (!new_entry->rebindings) {
free(new_entry);
return -1;
}
memcpy(new_entry->rebindings, rebindings, sizeof(struct rebinding) * nel);
new_entry->rebindings_nel = nel;
new_entry->next = *rebindings_head;
*rebindings_head = new_entry;
return 0;
}

static void perform_rebinding_with_section(struct rebindings_entry *rebindings,
section_t *section,
intptr_t slide,
nlist_t *symtab,
char *strtab,
uint32_t *indirect_symtab)
{

//__la_symbol_ptr的reserved1字段标识了section描述的符号在符号表中开始的index
//动态符号表中第一个需要解析的符号 开始地址
uint32_t *indirect_symbol_indices = indirect_symtab + section->reserved1;

void **indirect_symbol_bindings = (void **)((uintptr_t)slide + section->addr);

for (uint i = 0; i < section->size / sizeof(void *); i++) {
uint32_t symtab_index = indirect_symbol_indices[i];
if (symtab_index == INDIRECT_SYMBOL_ABS || symtab_index == INDIRECT_SYMBOL_LOCAL ||
symtab_index == (INDIRECT_SYMBOL_LOCAL | INDIRECT_SYMBOL_ABS)) {
continue;
}
//获取每一个需要动态解析的符号在符号表中的地址
uint32_t strtab_offset = symtab[symtab_index].n_un.n_strx;

//通过符号表中每一个导入符号的字符串表偏移量获取符号对应的字符串(符号的名字)
char *symbol_name = strtab + strtab_offset;
struct rebindings_entry *cur = rebindings;
while (cur) {
for (uint j = 0; j < cur->rebindings_nel; j++) {
if (strlen(symbol_name) > 1 &&
strcmp(&symbol_name[1], cur->rebindings[j].name) == 0) {
//找到相同的函数替换指针
if (cur->rebindings[j].replaced != NULL &&
indirect_symbol_bindings[i] != cur->rebindings[j].replacement) {
*(cur->rebindings[j].replaced) = indirect_symbol_bindings[i];
}
indirect_symbol_bindings[i] = cur->rebindings[j].replacement;
goto symbol_loop;
}
}
cur = cur->next;
}
symbol_loop:;
}
}


//typedef struct {
// const char *dli_fname; /* Pathname of shared object that
// contains address */
// void *dli_fbase; /* Address at which shared object
// is loaded */
// const char *dli_sname; /* Name of nearest symbol with address
// lower than addr */
// void *dli_saddr; /* Exact address of symbol named
// in dli_sname */
//} Dl_info;

static void rebind_symbols_for_image(struct rebindings_entry *rebindings,
const struct mach_header *header,
intptr_t slide)
{

//获取Dl_info
Dl_info info;
if (dladdr(header, &info) == 0) {
return;
}


segment_command_t *cur_seg_cmd;
segment_command_t *linkedit_segment = NULL;
struct symtab_command* symtab_cmd = NULL;
struct dysymtab_command* dysymtab_cmd = NULL;

uintptr_t cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_LINKEDIT) == 0) {
//在lc_segment中遍历寻找__LINKEDIT的section
linkedit_segment = cur_seg_cmd;
}
} else if (cur_seg_cmd->cmd == LC_SYMTAB) {
//遍历寻找lc_symtab
symtab_cmd = (struct symtab_command*)cur_seg_cmd;
} else if (cur_seg_cmd->cmd == LC_DYSYMTAB) {
//遍历寻找lc_dysymtab
dysymtab_cmd = (struct dysymtab_command*)cur_seg_cmd;
}
}

//检测必要的数据结构是否都存在
/*
LC_SYMTAB这个LoadCommand主要提供了两个信息
Symbol Table的偏移量与Symbol Table中元素的个数
String Table的偏移量与String Table的长度
LC_DYSYMTAB
提供了动态符号表的位移和元素个数,还有一些其他的表格索引
LC_SEGMENT.__LINKEDIT
含有为动态链接库使用的原始数据
*/

if (!symtab_cmd || !dysymtab_cmd || !linkedit_segment ||
!dysymtab_cmd->nindirectsyms) {
return;
}

// Find base symbol/string table addresses
// 获取链接时程序的基址
// 基址 = __LINKEDIT.VM_Address - __LINK.File_Offset + silde的改变值
// machoview随便看一个程序的__LINKEDIT
// offset | data | Description | value
// ...
// 0x0000390 0x0000000100002000 VM Address 4294975488
// 0x0000398 0x0000000000003000 VM Size 12288
// 0x00003A0 0x0000000000002000 File Offset 8192
// 0x00003A8 0x0000000000002690 File Size 9872

// base = 100002000-2000 + slide = 0x0000000100000000 + slide
// 这应该是一个公式
uintptr_t linkedit_base = (uintptr_t)slide + linkedit_segment->vmaddr - linkedit_segment->fileoff;

// 符号表的地址 = 基址 + 符号表偏移量
nlist_t *symtab = (nlist_t *)(linkedit_base + symtab_cmd->symoff);
// 字符串表的地址 = 基址 + 字符串表偏移量
char *strtab = (char *)(linkedit_base + symtab_cmd->stroff);

// Get indirect symbol table (array of uint32_t indices into symbol table)
// 动态符号表地址 = 基址 + 动态符号表偏移量
uint32_t *indirect_symtab = (uint32_t *)(linkedit_base + dysymtab_cmd->indirectsymoff);

//再一次遍历loadcommands
cur = (uintptr_t)header + sizeof(mach_header_t);
for (uint i = 0; i < header->ncmds; i++, cur += cur_seg_cmd->cmdsize) {
cur_seg_cmd = (segment_command_t *)cur;
if (cur_seg_cmd->cmd == LC_SEGMENT_ARCH_DEPENDENT) {
if (strcmp(cur_seg_cmd->segname, SEG_DATA) != 0 &&
strcmp(cur_seg_cmd->segname, SEG_DATA_CONST) != 0) {
continue;
}
//找到__DATA和__DATA_CONST的section
//__DATA section里面保存的是符号跳转的函数指针表
// 对__nl_symbol_ptr以及__la_symbol_ptr进行rebind
for (uint j = 0; j < cur_seg_cmd->nsects; j++) {
section_t *sect =
(section_t *)(cur + sizeof(segment_command_t)) + j;
if ((sect->flags & SECTION_TYPE) == S_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
if ((sect->flags & SECTION_TYPE) == S_NON_LAZY_SYMBOL_POINTERS) {
perform_rebinding_with_section(rebindings, sect, slide, symtab, strtab, indirect_symtab);
}
}
}
}
}

static void _rebind_symbols_for_image(const struct mach_header *header,
intptr_t slide) {
rebind_symbols_for_image(_rebindings_head, header, slide);
}

int rebind_symbols_image(void *header,
intptr_t slide,
struct rebinding rebindings[],
size_t rebindings_nel)
{

struct rebindings_entry *rebindings_head = NULL;
int retval = prepend_rebindings(&rebindings_head, rebindings, rebindings_nel);
rebind_symbols_for_image(rebindings_head, header, slide);
free(rebindings_head);
return retval;
}

int rebind_symbols(struct rebinding rebindings[], size_t rebindings_nel) {
int retval = prepend_rebindings(&_rebindings_head, rebindings, rebindings_nel);
if (retval < 0) {
return retval;
}
// If this was the first call, register callback for image additions (which is also invoked for
// existing images, otherwise, just run on existing images
if (!_rebindings_head->next) {
_dyld_register_func_for_add_image(_rebind_symbols_for_image);
} else {
uint32_t c = _dyld_image_count();
for (uint32_t i = 0; i < c; i++) {
_rebind_symbols_for_image(_dyld_get_image_header(i), _dyld_get_image_vmaddr_slide(i));
}
}
return retval;
}

小结

整个流程中最复杂的部分是perform_rebinding_with_section实现的功能。

简单的概括流程:

  • 通过__la_symbol_ptrreserved1字段找到第一个需要动态绑定的符号在动态符号表中的位置。
  • 通过__la_symbol_ptr中提供的数据找到在symbol table中的数据。

图1

图上可以看到reserved1被machoview工具修改成了Indirect Sym Index,他的值是3.

图2

可以看到第四个就是需要延时绑定的符号开始的地方。上面的值是3

因为0,1,2,3….

所以找到了他在符号表中的偏移量

图3

后面的逻辑就比较好理解了,找到目标函数,替换目标函数。

PS

github可以获取注释后的代码。

https://github.com/turingH/fishhook_analysis

文章目录
  1. 1. 源码
  2. 2. 小结
  3. 3. PS
,