ptmalloc小记

本篇记录下学习ptmalloc过程中遇到的疑惑以及编译大佬的文章~

疑惑

  1. lastreminder
    它总是指向切割后的部分,可能在largebin里切割,也可能在topchunk,或者其他,哪里有切割它就被更新指向该处。
  2. unsortedbins
    它会倒着遍历,除非精确匹配,否则没遍历一个,就将其放入对应bin。

ptmalloc fanzine

这几篇文章介绍了一些典型的ptmalloc meta-data攻击技术(来源[2]),编译记录如下,注意平台版本不同可能实现也不同:

munmap madness

分配mmap chunks

使用mmap分配的情形:

  1. 使用非主分配区
  2. 所需chunksize达到mmap分配阈值

释放mmap chunks

thread local caching in glibc malloc

tcache介绍

它被集成到了libc2.26中,它对每个线程增加一个bin缓存,这样能显著地提高性能,默认情况下,每个线程有64个bins,以16(8)递增,mensize从24(12)到1032(516):

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
#if USE_TCACHE
/* We want 64 entries. This is an arbitrary limit, which tunables can reduce. */
# define TCACHE_MAX_BINS 64
# define MAX_TCACHE_SIZE tidx2usize (TCACHE_MAX_BINS-1)

/* Only used to pre-fill the tunables. */
# define tidx2usize(idx) (((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)

/* When "x" is from chunksize(). */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size. */
# define usize2tidx(x) csize2tidx (request2size (x))

/* With rounding and alignment, the bins are...
idx 0 bytes 0..24 (64-bit) or 0..12 (32-bit)
idx 1 bytes 25..40 or 13..20
idx 2 bytes 41..56 or 21..28
etc. */

/* This is another arbitrary limit, which tunables can change. Each
tcache bin will hold at most this number of chunks. */
# define TCACHE_FILL_COUNT 7
#endif

每个bin是单链表结构,整个tcache为tcache_perthread_struct

1
2
3
4
5
6
7
8
9
10
11
12
13
14
/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct"). Keeping overall size low is mildly important. Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons. */
typedef struct tcache_perthread_struct
{
char counts[TCACHE_MAX_BINS];
tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;
tcache使用

chunk进入tcache:

  1. 释放时,_int_free中在检查了size合法后,放入fastbin之前,它先尝试将其放入tcache:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    #if USE_TCACHE
    {
    size_t tc_idx = csize2tidx (size);

    if (tcache
    && tc_idx < mp_.tcache_bins //大小合适
    && tcache->counts[tc_idx] < mp_.tcache_count) //未满
    {
    tcache_put (p, tc_idx);
    return;
    }
    }
    #endif
    static void tcache_put (mchunkptr chunk, size_t tc_idx)
    {
    tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
    assert (tc_idx < TCACHE_MAX_BINS);
    e->next = tcache->entries[tc_idx];
    tcache->entries[tc_idx] = e; //和fastbin一样,单链表FILO
    ++(tcache->counts[tc_idx]);
    }
  2. _int_malloc中,若fastbins中取出块则将对应bin中其余chunk填入tcache对应项直到填满(smallbins中也是如此):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
    31
    32
    33
    34
    35
    36
    37
    38
    39
    40
    41
      if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
    idx = fastbin_index (nb);
    mfastbinptr *fb = &fastbin (av, idx);
    mchunkptr pp = *fb;
    REMOVE_FB (fb, victim, pp);
    if (victim != 0)
    {
    if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
    {
    errstr = "malloc(): memory corruption (fast)";
    errout:
    malloc_printerr (check_action, errstr, chunk2mem (victim), av);
    return NULL;
    }
    check_remalloced_chunk (av, victim, nb);
    #if USE_TCACHE
    /* While we're here, if we see other chunks of the same size,
    stash them in the tcache. */
    size_t tc_idx = csize2tidx (nb);
    if (tcache && tc_idx < mp_.tcache_bins)
    {
    mchunkptr tc_victim;

    /* While bin not empty and tcache not full, copy chunks over. */
    while (tcache->counts[tc_idx] < mp_.tcache_count
    && (pp = *fb) != NULL)
    {
    REMOVE_FB (fb, tc_victim, pp);
    if (tc_victim != 0)
    {
    tcache_put (tc_victim, tc_idx);
    }
    }
    }
    #endif
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    }
    }
  3. 当进入unsorted bin中找到精确的大小时,并不是直接返回而是先加入tcache中,直到填满:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    28
    29
    30
        while(遍历unsorted bin){
    ....
    //当精确匹配
    if (size == nb)
    {
    set_inuse_bit_at_offset (victim, size);
    if (av != &main_arena)
    set_non_main_arena (victim);
    #if USE_TCACHE
    /* Fill cache first, return to user only if cache fills.
    We may return one of these chunks later. */
    if (tcache_nb
    && tcache->counts[tc_idx] < mp_.tcache_count)
    {
    tcache_put (victim, tc_idx);
    return_cached = 1;
    continue;
    }
    else
    {
    #endif //满了才直接返回
    check_malloced_chunk (av, victim, nb);
    void *p = chunk2mem (victim);
    alloc_perturb (p, bytes);
    return p;
    #if USE_TCACHE
    }
    #endif
    }
    }

从tcache获取chunk:

  1. __libc_malloc_int_malloc之前
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
    21
    22
    23
    24
    25
    26
    27
    void * __libc_malloc (size_t bytes)
    {
    mstate ar_ptr;
    void *victim;

    //hook();
    #if USE_TCACHE
    /* int_free also calls request2size, be careful to not pad twice. */
    size_t tbytes = request2size (bytes);
    size_t tc_idx = csize2tidx (tbytes);

    MAYBE_INIT_TCACHE ();

    DIAG_PUSH_NEEDS_COMMENT;
    if (tc_idx < mp_.tcache_bins
    /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
    && tcache
    && tcache->entries[tc_idx] != NULL)
    {
    return tcache_get (tc_idx);
    }
    DIAG_POP_NEEDS_COMMENT;
    #endif

    arena_get (ar_ptr, bytes);

    victim = _int_malloc (ar_ptr, bytes);
  2. 在遍历完unsorted bin之后,若是有缓存则取出并返回:
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
    {
    //遍历~~~~~
    }

    #if USE_TCACHE
    /* If all the small chunks we found ended up cached, return one now. */
    if (return_cached)
    {
    return tcache_get (tc_idx);
    }
    #endif
  3. 在遍历unsorted bin时,大小不匹配的chunk将会被放入对应的bins,若达到tcache_unsorted_limit限制且之前已经存入过chunk则在此时取出(默认无限制):
    1
    2
    3
    4
    5
    6
    7
    8
    9
    10
    11
    12
    13
    14
    15
    16
    17
    18
    19
    20
          while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
    {
    //遍历~~~~~
    //大小不等时,放入对应bin~
    #if USE_TCACHE
    /* If we've processed as many chunks as we're allowed while
    filling the cache, return one of the cached ones. */
    ++tcache_unsorted_count;
    if (return_cached
    && mp_.tcache_unsorted_limit > 0
    && tcache_unsorted_count > mp_.tcache_unsorted_limit)
    {
    return tcache_get (tc_idx);
    }
    #endif

    #define MAX_ITERS 10000
    if (++iters >= MAX_ITERS)
    break;
    }

对旧的利用技术的影响

由上可知tcache发生在比较前面,在它之前只有size等很少的完整性校验(只有存入前有size >= MINSIZE && aligned_OK (size) && !misaligned_chunk (p) && (uintptr_t) p <= (uintptr_t) -size),而它本身并没有什么完整性校验,于是利用它进行攻击会简单很多。

house of spirit

在之前,这种利用手段主要用在fastbin,因为它的检查要少很多,包括要释放的chunk大小正确且属于fastbin,下一个chunk大小要适中,不能小于2*SIZE_SZ且不能大于已分配内存。small的检查更多,但是若使用tcache,就只需要关心当前chunk的地址及大小,大小也覆盖到了更多smallbin的范围了。

double free

和fastbin一样,但是fastbin不能连续释放同一个chunk,而它无这种限制而且它适用更大的chunk。

Overlapping chunks

若能更改指定chunk的size,增加其值,那么在释放后再分配对应大小就能控制其后的chunk了。

tcache poisoning

对于已经在tcache里面的chunk,更改它的fd值即可在malloc时分配任意地址!

Smallbin cache filling bck write

一直存在,在unsorted bin的unlink中,它是最原始的unlink未进行其他检查(如bck->fd != victim),这样又可以进行DWORD SHOOT啦:

1
2
unsorted_chunks (av)->bk = bck;
bck->fd = unsorted_chunks (av);

参考

[1]华庭- 《Ptmalloc2 源代码分析》
[2]andigena-《ptmalloc fanzine
[3]glibc wiki malloc internals