ptmalloc小记

本篇记录下学习ptmalloc过程中遇到的疑惑以及编译大佬的文章~

疑惑

lastreminder
它总是指向切割后的部分，可能在largebin里切割，也可能在topchunk，或者其他，哪里有切割它就被更新指向该处。
unsortedbins
它会倒着遍历，除非精确匹配，否则没遍历一个，就将其放入对应bin。

ptmalloc fanzine

这几篇文章介绍了一些典型的ptmalloc meta-data攻击技术(来源[2])，编译记录如下，注意平台版本不同可能实现也不同：

munmap madness

分配mmap chunks

使用mmap分配的情形：

使用非主分配区
所需chunksize达到mmap分配阈值

释放mmap chunks

thread local caching in glibc malloc

tcache介绍

它被集成到了libc2.26中，它对每个线程增加一个bin缓存，这样能显著地提高性能，默认情况下，每个线程有64个bins，以16(8)递增，mensize从24(12)到1032(516)：

#if USE_TCACHE
/* We want 64 entries.  This is an arbitrary limit, which tunables can reduce.  */
# define TCACHE_MAX_BINS		64
# define MAX_TCACHE_SIZE	tidx2usize (TCACHE_MAX_BINS-1)

/* Only used to pre-fill the tunables.  */
# define tidx2usize(idx)	(((size_t) idx) * MALLOC_ALIGNMENT + MINSIZE - SIZE_SZ)

/* When "x" is from chunksize().  */
# define csize2tidx(x) (((x) - MINSIZE + MALLOC_ALIGNMENT - 1) / MALLOC_ALIGNMENT)
/* When "x" is a user-provided size.  */
# define usize2tidx(x) csize2tidx (request2size (x))

/* With rounding and alignment, the bins are...
   idx 0   bytes 0..24 (64-bit) or 0..12 (32-bit)
   idx 1   bytes 25..40 or 13..20
   idx 2   bytes 41..56 or 21..28
   etc.  */

/* This is another arbitrary limit, which tunables can change.  Each
   tcache bin will hold at most this number of chunks.  */
# define TCACHE_FILL_COUNT 7
#endif

每个bin是单链表结构，整个tcache为tcache_perthread_struct：

/* We overlay this structure on the user-data portion of a chunk when the chunk is stored in the per-thread cache.  */
typedef struct tcache_entry
{
  struct tcache_entry *next;
} tcache_entry;

/* There is one of these for each thread, which contains the per-thread cache (hence "tcache_perthread_struct").  Keeping overall size low is mildly important.  Note that COUNTS and ENTRIES are redundant (we could have just counted the linked list each time), this is for performance reasons.  */
typedef struct tcache_perthread_struct
{
  char counts[TCACHE_MAX_BINS];
  tcache_entry *entries[TCACHE_MAX_BINS];
} tcache_perthread_struct;

static __thread tcache_perthread_struct *tcache = NULL;

tcache使用

chunk进入tcache：

释放时，_int_free中在检查了size合法后，放入fastbin之前，它先尝试将其放入tcache:

#if USE_TCACHE
  {
    size_t tc_idx = csize2tidx (size);

    if (tcache
	&& tc_idx < mp_.tcache_bins                     //大小合适
	&& tcache->counts[tc_idx] < mp_.tcache_count)   //未满
      {
	tcache_put (p, tc_idx);
	return;
      }
  }
#endif
static void tcache_put (mchunkptr chunk, size_t tc_idx)
{
  tcache_entry *e = (tcache_entry *) chunk2mem (chunk);
  assert (tc_idx < TCACHE_MAX_BINS);
  e->next = tcache->entries[tc_idx];
  tcache->entries[tc_idx] = e;          //和fastbin一样，单链表FILO
  ++(tcache->counts[tc_idx]);
}

在_int_malloc中，若fastbins中取出块则将对应bin中其余chunk填入tcache对应项直到填满（smallbins中也是如此）：

  if ((unsigned long) (nb) <= (unsigned long) (get_max_fast ()))
    {
      idx = fastbin_index (nb);
      mfastbinptr *fb = &fastbin (av, idx);
      mchunkptr pp = *fb;
      REMOVE_FB (fb, victim, pp);
      if (victim != 0)
        {
          if (__builtin_expect (fastbin_index (chunksize (victim)) != idx, 0))
            {
              errstr = "malloc(): memory corruption (fast)";
            errout:
              malloc_printerr (check_action, errstr, chunk2mem (victim), av);
              return NULL;
            }
          check_remalloced_chunk (av, victim, nb);
#if USE_TCACHE
	  /* While we're here, if we see other chunks of the same size,
	     stash them in the tcache.  */
	  size_t tc_idx = csize2tidx (nb);
	  if (tcache && tc_idx < mp_.tcache_bins)
	    {
	      mchunkptr tc_victim;

	      /* While bin not empty and tcache not full, copy chunks over.  */
	      while (tcache->counts[tc_idx] < mp_.tcache_count
		     && (pp = *fb) != NULL)
		{
		  REMOVE_FB (fb, tc_victim, pp);
		  if (tc_victim != 0)
		    {
		      tcache_put (tc_victim, tc_idx);
	            }
		}
	    }
#endif
          void *p = chunk2mem (victim);
          alloc_perturb (p, bytes);
          return p;
        }
    }

当进入unsorted bin中找到精确的大小时，并不是直接返回而是先加入tcache中，直到填满：

    while(遍历unsorted bin){
        ....
        //当精确匹配
          if (size == nb)
            {
              set_inuse_bit_at_offset (victim, size);
              if (av != &main_arena)
		set_non_main_arena (victim);
#if USE_TCACHE
	      /* Fill cache first, return to user only if cache fills.
		 We may return one of these chunks later.  */
	      if (tcache_nb
		  && tcache->counts[tc_idx] < mp_.tcache_count)
		{
		  tcache_put (victim, tc_idx);
		  return_cached = 1;
		  continue;
		}
	      else
		{
#endif        //满了才直接返回
              check_malloced_chunk (av, victim, nb);
              void *p = chunk2mem (victim);
              alloc_perturb (p, bytes);
              return p;
#if USE_TCACHE
		}
#endif
          }
    }

从tcache获取chunk：

在__libc_malloc，_int_malloc之前

void * __libc_malloc (size_t bytes)
{
  mstate ar_ptr;
  void *victim;

  //hook();
#if USE_TCACHE
  /* int_free also calls request2size, be careful to not pad twice.  */
  size_t tbytes = request2size (bytes);
  size_t tc_idx = csize2tidx (tbytes);

  MAYBE_INIT_TCACHE ();

  DIAG_PUSH_NEEDS_COMMENT;
  if (tc_idx < mp_.tcache_bins
      /*&& tc_idx < TCACHE_MAX_BINS*/ /* to appease gcc */
      && tcache
      && tcache->entries[tc_idx] != NULL)
    {
      return tcache_get (tc_idx);
    }
  DIAG_POP_NEEDS_COMMENT;
#endif

  arena_get (ar_ptr, bytes);

  victim = _int_malloc (ar_ptr, bytes);

在遍历完unsorted bin之后，若是有缓存则取出并返回：

      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
      {
        //遍历~~~~~
      }

#if USE_TCACHE
      /* If all the small chunks we found ended up cached, return one now.  */
      if (return_cached)
	{
	  return tcache_get (tc_idx);
	}
#endif

在遍历unsorted bin时，大小不匹配的chunk将会被放入对应的bins，若达到tcache_unsorted_limit限制且之前已经存入过chunk则在此时取出（默认无限制）：

      while ((victim = unsorted_chunks (av)->bk) != unsorted_chunks (av))
      {
        //遍历~~~~~
        //大小不等时，放入对应bin~
#if USE_TCACHE
      /* If we've processed as many chunks as we're allowed while
	 filling the cache, return one of the cached ones.  */
      ++tcache_unsorted_count;
      if (return_cached
	  && mp_.tcache_unsorted_limit > 0
	  && tcache_unsorted_count > mp_.tcache_unsorted_limit)
	{
	  return tcache_get (tc_idx);
	}
#endif

#define MAX_ITERS       10000
          if (++iters >= MAX_ITERS)
            break;
        }

对旧的利用技术的影响

由上可知tcache发生在比较前面，在它之前只有size等很少的完整性校验(只有存入前有size >= MINSIZE && aligned_OK (size) && !misaligned_chunk (p) && (uintptr_t) p <= (uintptr_t) -size)，而它本身并没有什么完整性校验，于是利用它进行攻击会简单很多。

house of spirit

在之前，这种利用手段主要用在fastbin，因为它的检查要少很多，包括要释放的chunk大小正确且属于fastbin，下一个chunk大小要适中，不能小于2*SIZE_SZ且不能大于已分配内存。small的检查更多，但是若使用tcache，就只需要关心当前chunk的地址及大小，大小也覆盖到了更多smallbin的范围了。

double free

和fastbin一样，但是fastbin不能连续释放同一个chunk，而它无这种限制而且它适用更大的chunk。

Overlapping chunks

若能更改指定chunk的size，增加其值，那么在释放后再分配对应大小就能控制其后的chunk了。

tcache poisoning

对于已经在tcache里面的chunk，更改它的fd值即可在malloc时分配任意地址！

Smallbin cache filling bck write

一直存在，在unsorted bin的unlink中，它是最原始的unlink未进行其他检查(如bck->fd != victim)，这样又可以进行DWORD SHOOT啦：

1 2	unsorted_chunks (av)->bk = bck; bck->fd = unsorted_chunks (av);

参考

[1]华庭- 《Ptmalloc2 源代码分析》
[2]andigena-《ptmalloc fanzine》
[3]glibc wiki malloc internals