IO_FILE

重写了io file这一部分的知识,因为发现之前接触的太浅了(拿到题目自己都不能构造fake_io

这一次源码级调试一下,看一下各种各样的io结构的原理,笔者这里照着这个师傅调试的

IO FILE

怎么源码级调试?这里笔者直接用的pwndbg里面的dir将glibc的源码带到了调试里。dir /glibc-2.23/libio/,需要注意的是启动gdb的时候需要这样:gdb 文件名,这样就可以愉快的调试啦。

需要先理解一下什么是IO FILE。进程中的FILE结构会通过_chain域链接形成一个链表。链表头部用全局变量__IO_list_all表示。一个程序启动时有三个文件流是打开的:stderr,stdout,stdin。这三个文件流位于libc.so数据段,__IO_FILE结构外包裹着另一种结构__IO_FILE_plus。如下

1
2
3
4
5
6
7
8
9
                         stderr				stdout				stdin
_______ _______ _______
IO_list_all ----------->|flag | |--> |flag | |--> |flag |
|_____| | |_____| | |_____|
|chain|-----| |chain|-----| |chain|
|_____| |_____| |_____|
|_____| |_____| |_____|


fopen

一个简单的fopen程序2.23下的libc。

1
2
3
4
5
6
7
8
#include <stdio.h>
#include <stdlib.h>

int main(){
FILE*fp = fopen("test","wb");
char *ptr = malloc(0x20);
return 0;
}

gdb启动完成之后s进去就可以看到fopen实际上是_IO_new_fopen函数。

1
2
3
4
5
   94 _IO_FILE *
95 _IO_new_fopen (const char *filename, const char *mode)
96 {
97 return __fopen_internal (filename, mode, 1);
98 }

从上面的源码中可以很清楚的看到又调用了__fopen_internal函数。看一下源码

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
_IO_FILE *
__fopen_internal (const char *filename, const char *mode, int is32)
{
struct locked_FILE
{
struct _IO_FILE_plus fp;
#ifdef _IO_MTSAFE_IO
_IO_lock_t lock;
#endif
struct _IO_wide_data wd;
} *new_f = (struct locked_FILE *) malloc (sizeof (struct locked_FILE));

if (new_f == NULL)
return NULL;
#ifdef _IO_MTSAFE_IO
new_f->fp.file._lock = &new_f->lock;
#endif
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
_IO_no_init (&new_f->fp.file, 0, 0, &new_f->wd, &_IO_wfile_jumps);
#else
_IO_no_init (&new_f->fp.file, 1, 0, NULL, NULL);
#endif
_IO_JUMPS (&new_f->fp) = &_IO_file_jumps;
_IO_file_init (&new_f->fp);
#if !_IO_UNIFIED_JUMPTABLES
new_f->fp.vtable = NULL;
#endif
if (_IO_file_fopen ((_IO_FILE *) new_f, filename, mode, is32) != NULL)
return __fopen_maybe_mmap (&new_f->fp.file);
locked_FILE
_IO_un_link (&new_f->fp);
free (new_f);
return NULL;
}
  1. malloc分配内存空间。
  2. _IO_no_init 对file结构体进行null初始化。
  3. _IO_file_init将结构体链接进_IO_list_all链表。
  4. _IO_file_fopen执行系统调用打开文件

malloc分配内存空间

首先malloc了一个struct locked_FILE大小的结构体,这个结构体内有_IO_FILE_plus_IO_lock_t_IO_wide_data这三个结构,其中_IO_FILE_plus为使用的IO_FILE结构体。malloc之后会发现都为0,很明显的可以看到这个结构体的大小为0x230。

1
2
3
4
5
6
7
8
9
10
11
12
13
pwndbg> p new_f
$2 = (struct locked_FILE *) 0x55555555b010
pwndbg> x/20gx 0x55555555b010 - 0x10
0x55555555b000: 0x0000000000000000 0x0000000000000231
0x55555555b010: 0x0000000000000000 0x0000000000000000
0x55555555b020: 0x0000000000000000 0x0000000000000000
0x55555555b030: 0x0000000000000000 0x0000000000000000
0x55555555b040: 0x0000000000000000 0x0000000000000000
0x55555555b050: 0x0000000000000000 0x0000000000000000
0x55555555b060: 0x0000000000000000 0x0000000000000000
0x55555555b070: 0x0000000000000000 0x0000000000000000
0x55555555b080: 0x0000000000000000 0x0000000000000000
0x55555555b090: 0x0000000000000000 0x0000000000000000

_IO_no_init对file结构体进行初始化操作

继续住下走会调用_IO_no_init这个函数对上面的结构体进行初始化操作,这个文件在libio/genops.c。跟着源码还可以看到的是还利用了_IO_old_init这个函数对flags这些进行初始化。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
void
_IO_no_init (_IO_FILE *fp, int flags, int orientation,
struct _IO_wide_data *wd, const struct _IO_jump_t *jmp)
{
_IO_old_init (fp, flags);
fp->_mode = orientation;
#if defined _LIBC || defined _GLIBCPP_USE_WCHAR_T
if (orientation >= 0)
{
fp->_wide_data = wd;
fp->_wide_data->_IO_buf_base = NULL;
fp->_wide_data->_IO_buf_end = NULL;
fp->_wide_data->_IO_read_base = NULL;
fp->_wide_data->_IO_read_ptr = NULL;
fp->_wide_data->_IO_read_end = NULL;
fp->_wide_data->_IO_write_base = NULL;
fp->_wide_data->_IO_write_ptr = NULL;
fp->_wide_data->_IO_write_end = NULL;
fp->_wide_data->_IO_save_base = NULL;
fp->_wide_data->_IO_backup_base = NULL;
fp->_wide_data->_IO_save_end = NULL;

fp->_wide_data->_wide_vtable = jmp;
}
else
/* Cause predictable crash when a wide function is called on a byte
stream. */
fp->_wide_data = (struct _IO_wide_data *) -1L;
#endif
fp->_freeres_list = NULL;
}

void
_IO_old_init (_IO_FILE *fp, int flags)
{
fp->_flags = _IO_MAGIC|flags;
fp->_flags2 = 0;
fp->_IO_buf_base = NULL;
fp->_IO_buf_end = NULL;
fp->_IO_read_base = NULL;
fp->_IO_read_ptr = NULL;
fp->_IO_read_end = NULL;
fp->_IO_write_base = NULL;
fp->_IO_write_ptr = NULL;
fp->_IO_write_end = NULL;
fp->_chain = NULL; /* Not necessary. */

fp->_IO_save_base = NULL;
fp->_IO_backup_base = NULL;
fp->_IO_save_end = NULL;
fp->_markers = NULL;
fp->_cur_column = 0;
#if _IO_JUMPS_OFFSET
fp->_vtable_offset = 0;
#endif
#ifdef _IO_MTSAFE_IO
if (fp->_lock != NULL)
_IO_lock_init (*fp->_lock);
#endif

看一下最后的file结构被初始化成什么样子了。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
pwndbg> p new_f->fp
$4 = {
file = {
_flags = -72548352,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x0,
_fileno = 0,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x55555555b0f0,
_offset = 0,
_codecvt = 0x0,
_wide_data = 0x55555555b100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
},
vtable = 0x0
}

_IO_file_init将结构体链接到_IO_list_all

结束_IO_no_init之后我们可以看到回到了__fopen_internal并继续执行_IO_file_init这个函数,跟进看一下是干什么的,跟进之后是/libio/fileops.c这个文件里的_IO_new_file_init

1
2
3
4
5
6
7
8
9
10
11
void
_IO_new_file_init (struct _IO_FILE_plus *fp)
{
/* POSIX.1 allows another file handle to be used to change the position
of our file descriptor. Hence we actually don't know the actual
position before we do the first fseek (and until a following fflush). */
fp->file._offset = _IO_pos_BAD;
fp->file._IO_file_flags |= CLOSED_FILEBUF_FLAGS;

_IO_link_in (fp);
fp->file._fileno = -1;

这个函数主要调用了_IO_link_in这个函数,继续跟进这个函数,libio/genops.c里面的_IO_link_in

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
void
_IO_link_in (struct _IO_FILE_plus *fp)
{
if ((fp->file._flags & _IO_LINKED) == 0)
{
fp->file._flags |= _IO_LINKED;
#ifdef _IO_MTSAFE_IO
_IO_cleanup_region_start_noarg (flush_cleanup);
_IO_lock_lock (list_all_lock);
run_fp = (_IO_FILE *) fp;
_IO_flockfile ((_IO_FILE *) fp);
#endif
fp->file._chain = (_IO_FILE *) _IO_list_all;
_IO_list_all = fp;
++_IO_list_all_stamp;
#ifdef _IO_MTSAFE_IO
_IO_funlockfile ((_IO_FILE *) fp);
run_fp = NULL;
_IO_lock_unlock (list_all_lock);
_IO_cleanup_region_end (0);
#endif
}
}

首先这个if是判断flag的标志位是否是_IO_LINKED,这个有什么用呢?FILE结构体是通过_IO_list_all的单链表进行管理的,如果这个结构体没有_IO_LINKED就说明这个结构体没有链接进入_IO_list_all。后面把它链接进入_IO_list_all链表,同时设置FILE结构体的_chain字段为之前的链表的值,否则直接返回。所以_IO_file_init主要功能是将FILE结构体链接进入_IO_list_all链表。

1
2
pwndbg> p _IO_list_all
$6 = (struct _IO_FILE_plus *) 0x7ffff7dd2540 <_IO_2_1_stderr_>

在没有执行下面的操作之前可以看到_IO_list_all链接的是_IO_2_1_stderr_,执行完之后_IO_list_all就指向的是申请出来的结构体。

1
2
pwndbg> p _IO_list_all
$7 = (struct _IO_FILE_plus *) 0x55555555b010

同时此时的_chain字段也指向了_IO_2_1_stderr_这里。

_IO_file_fopen打开文件句柄

设置好了_IO_LINKED这里东西之后又会回到__fopen_internal这里,接下来会执行_IO_file_fopen这个函数,跟进后发现位于libio/fileops.c这里面的_IO_new_file_fopen。这个_IO_new_file_fopen函数有点长,这里就放一部分比较重要的。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
_IO_FILE *
_IO_new_file_fopen (_IO_FILE *fp, const char *filename, const char *mode,
int is32not64)
{

...
## 检查文件是否已打开,打开则返回
if (_IO_file_is_open (fp))
return 0;
## 设置文件打开模式
switch (*mode)
{
case 'r':
omode = O_RDONLY;
read_write = _IO_NO_WRITES;
break;
...
}
...
## 调用_IO_file_open函数
result = _IO_file_open (fp, filename, omode|oflags, oprot, read_write,
is32not64);
...
}
libc_hidden_ver (_IO_new_file_fopen, _IO_file_fopen)

会先检查文件是否打开,然后设置打开模式,最后调用了_IO_file_open这个函数跟进它。位于libio/fileops.c这个文件中的_IO_file_open

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
_IO_FILE *
_IO_file_open (_IO_FILE *fp, const char *filename, int posix_mode, int prot,
int read_write, int is32not64)
{
int fdesc;
#ifdef _LIBC
if (__glibc_unlikely (fp->_flags2 & _IO_FLAGS2_NOTCANCEL))
fdesc = open_not_cancel (filename,
posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
else
fdesc = open (filename, posix_mode | (is32not64 ? 0 : O_LARGEFILE), prot);
#else
fdesc = open (filename, posix_mode, prot);
#endif
if (fdesc < 0)
return NULL;
fp->_fileno = fdesc;
_IO_mask_flags (fp, read_write,_IO_NO_READS+_IO_NO_WRITES+_IO_IS_APPENDING);
/* For append mode, send the file offset to the end of the file. Don't
update the offset cache though, since the file handle is not active. */
if ((read_write & (_IO_IS_APPENDING | _IO_NO_READS))
== (_IO_IS_APPENDING | _IO_NO_READS))
{
_IO_off64_t new_pos = _IO_SYSSEEK (fp, 0, _IO_seek_end);
if (new_pos == _IO_pos_BAD && errno != ESPIPE)
{
close_not_cancel (fdesc);
return NULL;
}
}
_IO_link_in ((struct _IO_FILE_plus *) fp);
return fp;
}

这个函数就是调用open系统调用打开文件,将文件描述符赋值给FILE结构体的_fileno 字段,最后再次调用_IO_link_in函数,确保该结构体被链接进入_IO_list_all链表。查看new_f->fp就可以看到_fileno被设置为0x3。

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
pwndbg> p new_f->fp
$9 = {
file = {
_flags = -72539004,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7dd2540 <_IO_2_1_stderr_>,
_fileno = 3,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x55555555b0f0,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x55555555b100,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
},
vtable = 0x7ffff7dd06e0 <_IO_file_jumps>
}

执行完成之后返回FILE结构体指针。至此对fopen的调试结束。

fread

和fopen一样用个程序来,这里还是用那位师傅的程序,笔者只跟着动态调试。

1
2
3
4
5
6
7
8
#include <stdio.h>

int main(){
char data[20];
FILE *fp = fopen("test", "rb");
fread(data, 1, 20, fp);
return 0;
}

需要创建一个test文件并写入一些东西进去即可。gdb启动调试,断点下在fread这里就会看到调用了_IO_fread,看一下FILE结构体fp的内容。可以看到此时的_IO_read_ptr_IO_buf_base等指针都还是空的

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
pwndbg> p *_IO_list_all
$1 = {
file = {
_flags = -72539000,
_IO_read_ptr = 0x0,
_IO_read_end = 0x0,
_IO_read_base = 0x0,
_IO_write_base = 0x0,
_IO_write_ptr = 0x0,
_IO_write_end = 0x0,
_IO_buf_base = 0x0,
_IO_buf_end = 0x0,
_IO_save_base = 0x0,
_IO_backup_base = 0x0,
_IO_save_end = 0x0,
_markers = 0x0,
_chain = 0x7ffff7f995e0 <_IO_2_1_stderr_>,
_fileno = 3,
_flags2 = 0,
_old_offset = 0,
_cur_column = 0,
_vtable_offset = 0 '\000',
_shortbuf = "",
_lock = 0x555555559380,
_offset = -1,
_codecvt = 0x0,
_wide_data = 0x555555559390,
_freeres_list = 0x0,
_freeres_buf = 0x0,
__pad5 = 0,
_mode = 0,
_unused2 = '\000' <repeats 19 times>
},
vtable = 0x7ffff7f9a4a0 <_IO_file_jumps>
}

vtable中的指针内容如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
pwndbg> p *_IO_list_all->vtable
$2 = {
__dummy = 0,
__dummy2 = 0,
__finish = 0x7ffff7e4a440 <_IO_new_file_finish>,
__overflow = 0x7ffff7e4aea0 <_IO_new_file_overflow>,
__underflow = 0x7ffff7e4ab50 <_IO_new_file_underflow>,
__uflow = 0x7ffff7e4bf10 <__GI__IO_default_uflow>,
__pbackfail = 0x7ffff7e4d2d0 <__GI__IO_default_pbackfail>,
__xsputn = 0x7ffff7e4a030 <_IO_new_file_xsputn>,
__xsgetn = 0x7ffff7e49c10 <__GI__IO_file_xsgetn>,
__seekoff = 0x7ffff7e49470 <_IO_new_file_seekoff>,
__seekpos = 0x7ffff7e4c2a0 <_IO_default_seekpos>,
__setbuf = 0x7ffff7e48d30 <_IO_new_file_setbuf>,
__sync = 0x7ffff7e48bc0 <_IO_new_file_sync>,
__doallocate = 0x7ffff7e3d8d0 <__GI__IO_file_doallocate>,
__read = 0x7ffff7e4a210 <__GI__IO_file_read>,
__write = 0x7ffff7e49a70 <_IO_new_file_write>,
__seek = 0x7ffff7e491a0 <__GI__IO_file_seek>,
__close = 0x7ffff7e48d20 <__GI__IO_file_close>,
__stat = 0x7ffff7e49a60 <__GI__IO_file_stat>,
__showmanyc = 0x7ffff7e4d460 <_IO_default_showmanyc>,
__imbue = 0x7ffff7e4d470 <_IO_default_imbue>
}

fread实际上是_IO_fread函数,文件目录为/libio/iofread.c

1
2
3
4
5
6
7
8
9
10
11
12
13
14
_IO_size_t
_IO_fread (void *buf, _IO_size_t size, _IO_size_t count, _IO_FILE *fp)
{
_IO_size_t bytes_requested = size * count;
_IO_size_t bytes_read;
CHECK_FILE (fp, 0);
if (bytes_requested == 0)
return 0;
_IO_acquire_lock (fp);
# 调用_IO_sgetn函数
bytes_read = _IO_sgetn (fp, (char *) buf, bytes_requested);
_IO_release_lock (fp);
return bytes_requested == bytes_read ? count : bytes_read / size;
}

源码中可以看到又调用了_IO_sgetn函数,跟进它。

1
2
3
4
5
6
  463 _IO_size_t
464 _IO_sgetn (_IO_FILE *fp, void *data, _IO_size_t n)
465 {
466 /* FIXME handle putback buffer here! */
467 return _IO_XSGETN (fp, data, n);
468 }

又调用了_IO_XSGETN#define _IO_XSGETN(FP, DATA, N) JUMP2 (__xsgetn, FP, DATA, N),继续跟进就可以发现最终调用了_IO_file_xsgetn实际上就是FILE结构体中vtable的__xsgetn函数,位于libio/fileops.c。下面贴一下比较重要的部分

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
_IO_size_t
_IO_file_xsgetn (_IO_FILE *fp, void *data, _IO_size_t n)
{
_IO_size_t want, have;
_IO_ssize_t count;
char *s = data;

want = n;

if (fp->_IO_buf_base == NULL)
{
...
# 第一部分,如果fp->_IO_buf_base为空的话则调用`_IO_doallocbuf`
_IO_doallocbuf (fp);
}

while (want > 0)
{

have = fp->_IO_read_end - fp->_IO_read_ptr;
## 第二部分,输入缓冲区里已经有足够的字符,则直接把缓冲区里的字符给目标buff
if (want <= have)
{
memcpy (s, fp->_IO_read_ptr, want);
fp->_IO_read_ptr += want;
want = 0;
}
else
{
# 第二部分,输入缓冲区里有部分字符,但是没有达到fread的size需求,先把已有的拷贝至目标buff
if (have > 0)
{
...
memcpy (s, fp->_IO_read_ptr, have);
s += have;

want -= have;
fp->_IO_read_ptr += have;
}


if (fp->_IO_buf_base
&& want < (size_t) (fp->_IO_buf_end - fp->_IO_buf_base))
{
## 第三部分,输入缓冲区里不能满足需求,调用__underflow读入数据
if (__underflow (fp) == EOF)
break;

continue;
}
...
return n - want;
}
libc_hidden_def (_IO_file_xsgetn)