CSAPP - malloc Lab
实现一个空间分配器
隐式空闲链表
按照书中第九章虚拟内存的隐式空闲链表(带边界标记的)进行设计。查找空闲块的方法是首次适配算法。其他的宏定义和函数与课本上一致。
在实验官网上没有给出traces的文件夹,从github上找到并使用svn下载traces文件夹。
首次适配的算法
// First fit algorithm
static void *find_fit(size_t asize) {
void *bp;
for (bp = heap_listp; GET_SIZE(HDRP(bp)) != 0; bp = NEXT_BLKP(bp)) {
if (!GET_ALLOC(HDRP(bp)) && GET_SIZE(HDRP(bp)) >= asize) {
return bp;
}
}
return NULL;
}
static void place(void *bp, size_t asize) {
size_t blk_size = GET_SIZE(HDRP(bp));
size_t remain_size = blk_size - asize;
size_t should_split = (remain_size >= 2 * DSIZE); // 剩余部分大于或等于最小块(4+4+1 => 16 bytes)的大小时进行分割
if (should_split) {
PUT(HDRP(bp), PACK(asize, 1));
PUT(FTRP(bp), PACK(asize, 1));
PUT(HDRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
} else {
PUT(HDRP(bp), PACK(blk_size, 1));
PUT(FTRP(bp), PACK(blk_size, 1));
}
}
mm_realloc函数
/*
* mm_realloc - Implemented simply in terms of mm_malloc and mm_free
*/
void *mm_realloc(void *ptr, size_t size)
{
if (!ptr) {
return mm_malloc(size);
}
if (size == 0) {
mm_free(ptr);
return 0;
}
size_t old_size = GET_SIZE(HDRP(ptr));
void* newptr = mm_malloc(size);
if(!newptr) {
return 0;
}
if (size < old_size) {
old_size = size;
}
memcpy(newptr, ptr, old_size);
mm_free(ptr);
return newptr;
}
性能:
$ ./mdriver -V -t traces
Team Name:Avenger
Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.010277 554
1 yes 99% 5848 0.009609 609
2 yes 99% 6648 0.015698 423
3 yes 100% 5380 0.011530 467
4 yes 66% 14400 0.000578 24913
5 yes 91% 4800 0.009993 480
6 yes 92% 4800 0.009340 514
7 yes 55% 12000 0.124230 97
8 yes 51% 24000 0.404152 59
9 yes 27% 14401 0.109136 132
10 yes 34% 14401 0.003691 3902
Total 74% 112372 0.708234 159
Perf index = 44 (util) + 11 (thru) = 55/100
显式空闲链表
使用双向链表而不是隐式空闲链表,使首次适配的分配时间从块总数的线性时间减少到了空闲块数量的线性时间
在空闲块中的增加两个指针记录pred和succ,指向前一个和后一个空闲块,方便在使用首次适配算法时查找空闲块。但这样也会导致最小块的大小增加,如果是8Bytes(双字)对齐的,最小块为4 + 4 + 4 + 4 + 1 -> 24Bytes
在书中提到了两种维护链表的方式:LIFO(后进先出)顺序、地址顺序。
LIFO每次把新释放的空闲块放置在链表的开头,使得释放一个块可以在常数时间内完成。而按照地址顺序组织链表,在释放时需要查找合适的链表插入位置,但内存利用率高。
LIFO实现
实现过程中遇到很多Segmentation fault,主要是指针操作的错误。需要用gdb调试,在gcc编译时增加-g选项,使用gdb的backtrace命令查看段错误时的调用栈来定位错误。
定义新的宏来实现对空闲块的指针操作,以及显示空闲链表的头指针
// 显式空闲链表
// Given a free block pointer bp, compute the address of pred and succeed pointer in this free block
#define PRED(bp) (bp)
#define SUCC(bp) ((char *)bp + WSIZE)
// Given a free block pointer bp, compute the address of pred-free and succeed-free blocks of bp
#define PRED_FBLKP(bp) ((char *)GET(bp))
#define SUCC_FBLKP(bp) ((char *)GET(SUCC(bp))) // 需要转换成char类型指针,保证增加的地址是以字节为单位的
// 显式空闲链表
static char *free_list_head = 0;
两个对链表进行头插法和删除指定空闲块的函数
// 头插法
static void insert_head(void *new_free_bp) {
PUT(SUCC(new_free_bp), free_list_head);
PUT(PRED(new_free_bp), 0);
if (free_list_head != 0) {
PUT(PRED(free_list_head), new_free_bp);
}
free_list_head = new_free_bp;
}
// 从空闲链表上删除一个空闲块
static void remove_block(void *bp) {
if (PRED_FBLKP(bp) == 0 && SUCC_FBLKP(bp) == 0) { // 单个头节点
free_list_head = 0;
} else if (PRED_FBLKP(bp) == 0) { // bp是头节点
free_list_head = SUCC_FBLKP(bp);
PUT(PRED(SUCC_FBLKP(bp)), 0);
PUT(PRED(bp), 0);
PUT(SUCC(bp), 0);
} else if (SUCC_FBLKP(bp) == 0) {
PUT(SUCC(PRED_FBLKP(bp)), 0);
PUT(PRED(bp), 0);
PUT(SUCC(bp), 0);
} else {
PUT(SUCC(PRED_FBLKP(bp)), SUCC_FBLKP(bp));
PUT(PRED(SUCC_FBLKP(bp)), PRED_FBLKP(bp));
PUT(PRED(bp), 0);
PUT(SUCC(bp), 0);
}
}
更新后的函数(其他函数和书本上的一样)
int mm_init(void)
{
if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void *)-1)
return -1;
PUT(heap_listp, 0); // alignment padding
PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); // prologue header
PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); // prologue footer
PUT(heap_listp + (3*WSIZE), PACK(0, 1)); // Epilogue header
heap_listp += (2 * WSIZE);
if ((free_list_head = extend_heap(CHUNKSIZE / WSIZE)) == NULL) {
return -1;
}
PUT(PRED(free_list_head), 0);
PUT(SUCC(free_list_head), 0);
return 0;
}
void *mm_malloc(size_t size)
{
Dprintf("Mallocing: %lu bytes.\n", size);
size_t asize; // Adjusted block size
size_t extendsize; // Amount to extend heap if no fit
char *bp;
if (size <= 0) {
return NULL;
}
if (size <= DSIZE) {
asize = 2 * DSIZE;
} else {
// 有效载荷大小 首尾
asize = DSIZE * ((size + DSIZE + DSIZE-1) / DSIZE); // 双字对齐
}
if ((bp = find_fit(asize)) != NULL) {
place(bp, asize);
// mm_check(1);
return bp;
}
Dprint("Not enough space, extend Heap.\n");
extendsize = MAX(asize, CHUNKSIZE);
if ((bp = extend_heap(extendsize/WSIZE)) == NULL) {
return NULL;
}
remove_block(bp);
place(bp, asize);
if (DEBUG) {
mm_check(1);
}
return bp;
}
static void *find_fit(size_t asize) {
void *bp;
for (bp = free_list_head; bp != 0; bp = SUCC_FBLKP(bp)) {
// debug
// print_block(bp);
if (GET_SIZE(HDRP(bp)) >= asize) {
// printf("Find free block, remove it from list.\n");
remove_block(bp);
return bp;
}
}
return NULL;
}
static void place(void *bp, size_t asize) {
size_t blk_size = GET_SIZE(HDRP(bp));
size_t remain_size = blk_size - asize;
size_t should_split = (remain_size >= 3 * DSIZE); // 剩余部分大于或等于最小块(4+4+4+4+1 => 24 bytes)的大小时进行分割
if (should_split) {
// Important! remove the free block first, called before place()
PUT(HDRP(bp), PACK(asize, 1));
PUT(FTRP(bp), PACK(asize, 1));
PUT(HDRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
// insert the new free block to head
void* new_free_block = NEXT_BLKP(bp);
PUT(PRED(new_free_block), 0);
PUT(SUCC(new_free_block), 0);
insert_head(new_free_block);
} else {
PUT(HDRP(bp), PACK(blk_size, 1));
PUT(FTRP(bp), PACK(blk_size, 1));
}
}
static void *coalesce(void *bp) {
size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
size_t size = GET_SIZE(HDRP(bp));
if (prev_alloc && next_alloc) {
Dprint("single block coalesce.\n");
insert_head(bp);
return bp;
} else if(prev_alloc && !next_alloc) {
Dprint("Next block is free\n");
remove_block(NEXT_BLKP(bp));
size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(HDRP(bp), PACK(size, 0));
PUT(FTRP(bp), PACK(size, 0));
insert_head(bp);
} else if (!prev_alloc && next_alloc) {
Dprint("Prev block is free\n");
remove_block(PREV_BLKP(bp));
size += GET_SIZE(HDRP(PREV_BLKP(bp)));
PUT(FTRP(bp), PACK(size, 0));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
insert_head(bp);
} else {
remove_block(PREV_BLKP(bp));
remove_block(NEXT_BLKP(bp));
size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
GET_SIZE(HDRP(NEXT_BLKP(bp)));
PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
bp = PREV_BLKP(bp);
insert_head(bp);
}
Dprint("\nAfter coalease.\n");
if (DEBUG) {
mm_check(1);
}
return bp;
}
测试性能,可以看到对比之前的隐式链表有很大的性能提升
$ ./mdriver -V -t traces
Team Name:Avenger
Results for mm malloc:
trace valid util ops secs Kops
0 yes 89% 5694 0.000274 20773
1 yes 92% 5848 0.000195 30005
2 yes 94% 6648 0.000377 17629
3 yes 96% 5380 0.000300 17957
4 yes 66% 14400 0.000225 64028
5 yes 87% 4800 0.000595 8067
6 yes 85% 4800 0.000602 7976
7 yes 55% 12000 0.003013 3983
8 yes 51% 24000 0.003050 7868
9 yes 26% 14401 0.122659 117
10 yes 34% 14401 0.003157 4562
Total 70% 112372 0.134446 836
Perf index = 42 (util) + 40 (thru) = 82/100
地址顺序组织实现
为了提升内存利用率,使用按照地址顺序来组织空闲链表的排序。付出的代价是释放一个块时的线性搜索时间代价。
增加了一个insert_orderly的函数,之后将LIFO实现中的insert_block都替换为insert_orderly函数。
static void insert_orderly(void *new_free_bp) {
if (free_list_head == 0) {
PUT(PRED(new_free_bp), 0);
PUT(SUCC(new_free_bp), 0);
free_list_head = new_free_bp;
return;
}
void *bp, *tail;
for (bp = free_list_head; bp != 0; tail = bp, bp = SUCC_FBLKP(bp)) {
if (new_free_bp < bp) {
if (PRED_FBLKP(bp)) { // 有前缀块
PUT(SUCC(PRED_FBLKP(bp)), new_free_bp);
} else {
free_list_head = new_free_bp;
}
PUT(PRED(new_free_bp), PRED_FBLKP(bp));
PUT(PRED(bp), new_free_bp);
PUT(SUCC(new_free_bp), bp);
return;
}
}
PUT(SUCC(tail), new_free_bp);
PUT(PRED(new_free_bp), tail);
PUT(SUCC(new_free_bp), 0);
}
时间性能下降了非常多,也可能是coalesce函数中的插入查找次数过多
Results for mm malloc:
trace valid util ops secs Kops
0 yes 99% 5694 0.000267 21342
1 yes 99% 5848 0.000226 25899
2 yes 99% 6648 0.000314 21145
3 yes 99% 5380 0.000286 18785
4 yes 66% 14400 0.000227 63464
5 yes 91% 4800 0.004698 1022
6 yes 92% 4800 0.004342 1106
7 yes 55% 12000 0.060981 197
8 yes 51% 24000 0.231590 104
9 yes 27% 14401 0.152743 94
10 yes 34% 14401 0.003200 4500
Total 74% 112372 0.458874 245
Perf index = 44 (util) + 16 (thru) = 61/100
todo:改进