CSAPP - malloc Lab

实现一个空间分配器

隐式空闲链表

按照书中第九章虚拟内存的隐式空闲链表(带边界标记的)进行设计。查找空闲块的方法是首次适配算法。其他的宏定义和函数与课本上一致。

在实验官网上没有给出traces的文件夹,从github上找到并使用svn下载traces文件夹。

首次适配的算法

// First fit algorithm
static void *find_fit(size_t asize) {
    void *bp;
    for (bp = heap_listp; GET_SIZE(HDRP(bp)) != 0; bp = NEXT_BLKP(bp)) {
        if (!GET_ALLOC(HDRP(bp)) && GET_SIZE(HDRP(bp)) >= asize) {
            return bp;
        }
    }
    return NULL;
}


static void place(void *bp, size_t asize) {
    size_t blk_size = GET_SIZE(HDRP(bp));
    size_t remain_size = blk_size - asize;
    size_t should_split = (remain_size >= 2 * DSIZE); // 剩余部分大于或等于最小块(4+4+1 => 16 bytes)的大小时进行分割

    if (should_split) {
        PUT(HDRP(bp), PACK(asize, 1));
        PUT(FTRP(bp), PACK(asize, 1));
        PUT(HDRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
    } else {
        PUT(HDRP(bp), PACK(blk_size, 1));
        PUT(FTRP(bp), PACK(blk_size, 1));
    }
}

mm_realloc函数

/*
 * mm_realloc - Implemented simply in terms of mm_malloc and mm_free
 */
void *mm_realloc(void *ptr, size_t size)
{
    if (!ptr) {
        return mm_malloc(size);
    }
    if (size == 0) {
        mm_free(ptr);
        return 0;
    }
    size_t old_size = GET_SIZE(HDRP(ptr));
    void* newptr = mm_malloc(size);

    if(!newptr) {
        return 0;
    }

    if (size < old_size) {
        old_size = size;
    }
    memcpy(newptr, ptr, old_size);

    mm_free(ptr);

    return newptr;
}

性能:

$ ./mdriver -V -t traces
Team Name:Avenger

Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   99%    5694  0.010277   554
 1       yes   99%    5848  0.009609   609
 2       yes   99%    6648  0.015698   423
 3       yes  100%    5380  0.011530   467
 4       yes   66%   14400  0.000578 24913
 5       yes   91%    4800  0.009993   480
 6       yes   92%    4800  0.009340   514
 7       yes   55%   12000  0.124230    97
 8       yes   51%   24000  0.404152    59
 9       yes   27%   14401  0.109136   132
10       yes   34%   14401  0.003691  3902
Total          74%  112372  0.708234   159

Perf index = 44 (util) + 11 (thru) = 55/100

显式空闲链表

使用双向链表而不是隐式空闲链表,使首次适配的分配时间从块总数的线性时间减少到了空闲块数量的线性时间

在空闲块中的增加两个指针记录pred和succ,指向前一个和后一个空闲块,方便在使用首次适配算法时查找空闲块。但这样也会导致最小块的大小增加,如果是8Bytes(双字)对齐的,最小块为4 + 4 + 4 + 4 + 1 -> 24Bytes

在书中提到了两种维护链表的方式:LIFO(后进先出)顺序、地址顺序。

LIFO每次把新释放的空闲块放置在链表的开头,使得释放一个块可以在常数时间内完成。而按照地址顺序组织链表,在释放时需要查找合适的链表插入位置,但内存利用率高。

LIFO实现

实现过程中遇到很多Segmentation fault,主要是指针操作的错误。需要用gdb调试,在gcc编译时增加-g选项,使用gdb的backtrace命令查看段错误时的调用栈来定位错误。

定义新的宏来实现对空闲块的指针操作,以及显示空闲链表的头指针

// 显式空闲链表
// Given a free block pointer bp, compute the address of pred and succeed pointer in this free block
#define PRED(bp) (bp)
#define SUCC(bp) ((char *)bp + WSIZE)
// Given a free block pointer bp, compute the address of pred-free and succeed-free blocks of bp
#define PRED_FBLKP(bp) ((char *)GET(bp))
#define SUCC_FBLKP(bp) ((char *)GET(SUCC(bp)))  // 需要转换成char类型指针,保证增加的地址是以字节为单位的
// 显式空闲链表
static char *free_list_head = 0;

两个对链表进行头插法和删除指定空闲块的函数

// 头插法
static void insert_head(void *new_free_bp) {
    PUT(SUCC(new_free_bp), free_list_head);
    PUT(PRED(new_free_bp), 0);
    if (free_list_head != 0) {
        PUT(PRED(free_list_head), new_free_bp);
    }
    free_list_head = new_free_bp;
}

// 从空闲链表上删除一个空闲块
static void remove_block(void *bp) {
    if (PRED_FBLKP(bp) == 0 && SUCC_FBLKP(bp) == 0) { // 单个头节点
        free_list_head = 0;
    } else if (PRED_FBLKP(bp) == 0) { // bp是头节点
        free_list_head = SUCC_FBLKP(bp);
        PUT(PRED(SUCC_FBLKP(bp)), 0);
        PUT(PRED(bp), 0);
        PUT(SUCC(bp), 0);
    } else if (SUCC_FBLKP(bp) == 0) {
        PUT(SUCC(PRED_FBLKP(bp)), 0);
        PUT(PRED(bp), 0);
        PUT(SUCC(bp), 0);
    } else {
        PUT(SUCC(PRED_FBLKP(bp)), SUCC_FBLKP(bp));
        PUT(PRED(SUCC_FBLKP(bp)), PRED_FBLKP(bp));
        PUT(PRED(bp), 0);
        PUT(SUCC(bp), 0);
    }
}

更新后的函数(其他函数和书本上的一样)

int mm_init(void)
{
    if ((heap_listp = mem_sbrk(4 * WSIZE)) == (void *)-1)
        return -1;
    PUT(heap_listp, 0);  // alignment padding
    PUT(heap_listp + (1*WSIZE), PACK(DSIZE, 1)); // prologue header
    PUT(heap_listp + (2*WSIZE), PACK(DSIZE, 1)); // prologue footer
    PUT(heap_listp + (3*WSIZE), PACK(0, 1)); // Epilogue header
    heap_listp += (2 * WSIZE);

    if ((free_list_head = extend_heap(CHUNKSIZE / WSIZE)) == NULL) {
        return -1;
    }

    PUT(PRED(free_list_head), 0);
    PUT(SUCC(free_list_head), 0);
    return 0;
}

void *mm_malloc(size_t size)
{
    Dprintf("Mallocing: %lu bytes.\n", size);
    size_t asize;  // Adjusted block size
    size_t extendsize; // Amount to extend heap if no fit
    char *bp;

    if (size <= 0) {
        return NULL;
    }

    if (size <= DSIZE) {
        asize = 2 * DSIZE;
    } else {
        //           有效载荷大小  首尾
        asize = DSIZE * ((size + DSIZE + DSIZE-1) / DSIZE);  // 双字对齐
    }

    if ((bp = find_fit(asize)) != NULL) {
        place(bp, asize);
//        mm_check(1);
        return bp;
    }

    Dprint("Not enough space, extend Heap.\n");

    extendsize = MAX(asize, CHUNKSIZE);
    if ((bp = extend_heap(extendsize/WSIZE)) == NULL) {
        return NULL;
    }
    remove_block(bp);
    place(bp, asize);

    if (DEBUG) {
        mm_check(1);
    }
    return bp;
}

static void *find_fit(size_t asize) {
    void *bp;
    for (bp = free_list_head; bp != 0; bp = SUCC_FBLKP(bp)) {
        // debug
//        print_block(bp);
        if (GET_SIZE(HDRP(bp)) >= asize) {
//            printf("Find free block, remove it from list.\n");
            remove_block(bp);
            return bp;
        }
    }
    return NULL;
}


static void place(void *bp, size_t asize) {
    size_t blk_size = GET_SIZE(HDRP(bp));
    size_t remain_size = blk_size - asize;
    size_t should_split = (remain_size >= 3 * DSIZE); // 剩余部分大于或等于最小块(4+4+4+4+1 => 24 bytes)的大小时进行分割

    if (should_split) {
        // Important! remove the free block first, called before place()
        PUT(HDRP(bp), PACK(asize, 1));
        PUT(FTRP(bp), PACK(asize, 1));
        PUT(HDRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(remain_size, 0));
        // insert the new free block to head
        void* new_free_block = NEXT_BLKP(bp);
        PUT(PRED(new_free_block), 0);
        PUT(SUCC(new_free_block), 0);
        insert_head(new_free_block);
    } else {
        PUT(HDRP(bp), PACK(blk_size, 1));
        PUT(FTRP(bp), PACK(blk_size, 1));
    }
}
static void *coalesce(void *bp) {
    size_t prev_alloc = GET_ALLOC(FTRP(PREV_BLKP(bp)));
    size_t next_alloc = GET_ALLOC(HDRP(NEXT_BLKP(bp)));
    size_t size = GET_SIZE(HDRP(bp));

    if (prev_alloc && next_alloc) {
        Dprint("single block coalesce.\n");
        insert_head(bp);
        return bp;
    } else if(prev_alloc && !next_alloc) {
        Dprint("Next block is free\n");
        remove_block(NEXT_BLKP(bp));
        size += GET_SIZE(HDRP(NEXT_BLKP(bp)));
        PUT(HDRP(bp), PACK(size, 0));
        PUT(FTRP(bp), PACK(size, 0));
        insert_head(bp);
    } else if (!prev_alloc && next_alloc) {
        Dprint("Prev block is free\n");
        remove_block(PREV_BLKP(bp));
        size += GET_SIZE(HDRP(PREV_BLKP(bp)));
        PUT(FTRP(bp), PACK(size, 0));
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        bp = PREV_BLKP(bp);
        insert_head(bp);
    } else {
        remove_block(PREV_BLKP(bp));
        remove_block(NEXT_BLKP(bp));
        size += GET_SIZE(HDRP(PREV_BLKP(bp))) +
                GET_SIZE(HDRP(NEXT_BLKP(bp)));
        PUT(HDRP(PREV_BLKP(bp)), PACK(size, 0));
        PUT(FTRP(NEXT_BLKP(bp)), PACK(size, 0));
        bp = PREV_BLKP(bp);
        insert_head(bp);
    }
    Dprint("\nAfter coalease.\n");
    if (DEBUG) {
        mm_check(1);
    }
    return bp;
}

测试性能,可以看到对比之前的隐式链表有很大的性能提升

$ ./mdriver -V -t traces
Team Name:Avenger

Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   89%    5694  0.000274 20773
 1       yes   92%    5848  0.000195 30005
 2       yes   94%    6648  0.000377 17629
 3       yes   96%    5380  0.000300 17957
 4       yes   66%   14400  0.000225 64028
 5       yes   87%    4800  0.000595  8067
 6       yes   85%    4800  0.000602  7976
 7       yes   55%   12000  0.003013  3983
 8       yes   51%   24000  0.003050  7868
 9       yes   26%   14401  0.122659   117
10       yes   34%   14401  0.003157  4562
Total          70%  112372  0.134446   836

Perf index = 42 (util) + 40 (thru) = 82/100

地址顺序组织实现

为了提升内存利用率,使用按照地址顺序来组织空闲链表的排序。付出的代价是释放一个块时的线性搜索时间代价。

增加了一个insert_orderly的函数,之后将LIFO实现中的insert_block都替换为insert_orderly函数。

static void insert_orderly(void *new_free_bp) {
    if (free_list_head == 0) {
        PUT(PRED(new_free_bp), 0);
        PUT(SUCC(new_free_bp), 0);
        free_list_head = new_free_bp;
        return;
    }
    void *bp, *tail;
    for (bp = free_list_head; bp != 0; tail = bp, bp = SUCC_FBLKP(bp)) {
        if (new_free_bp < bp) {
            if (PRED_FBLKP(bp)) { // 有前缀块
                PUT(SUCC(PRED_FBLKP(bp)), new_free_bp);
            } else {
                free_list_head = new_free_bp;
            }
            PUT(PRED(new_free_bp), PRED_FBLKP(bp));
            PUT(PRED(bp), new_free_bp);
            PUT(SUCC(new_free_bp), bp);
            return;
        }
    }
    PUT(SUCC(tail), new_free_bp);
    PUT(PRED(new_free_bp), tail);
    PUT(SUCC(new_free_bp), 0);
}

时间性能下降了非常多,也可能是coalesce函数中的插入查找次数过多

Results for mm malloc:
trace  valid  util     ops      secs  Kops
 0       yes   99%    5694  0.000267 21342
 1       yes   99%    5848  0.000226 25899
 2       yes   99%    6648  0.000314 21145
 3       yes   99%    5380  0.000286 18785
 4       yes   66%   14400  0.000227 63464
 5       yes   91%    4800  0.004698  1022
 6       yes   92%    4800  0.004342  1106
 7       yes   55%   12000  0.060981   197
 8       yes   51%   24000  0.231590   104
 9       yes   27%   14401  0.152743    94
10       yes   34%   14401  0.003200  4500
Total          74%  112372  0.458874   245

Perf index = 44 (util) + 16 (thru) = 61/100

todo:改进