天天看點

Linux記憶體管理之實體頁面配置設定概述實體頁面配置設定接口實體頁面釋放接口配置設定掩碼zone水位

概述

頁面配置設定器是Linux核心記憶體管理中最基本的配置設定器,基于夥伴系統算法(buddy)和zone-base的設計理念。

實體頁面配置設定接口

alloc_pages

alloc_pages接口最終會調用到__alloc_pages_nodemask。

struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
            struct zonelist *zonelist, nodemask_t *nodemask)
{
    enum zone_type high_zoneidx = gfp_zone(gfp_mask);
    struct zone *preferred_zone;
    struct page *page = NULL;
    int migratetype = allocflags_to_migratetype(gfp_mask);
    unsigned int cpuset_mems_cookie;
    int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
    struct mem_cgroup *memcg = NULL;

    gfp_mask &= gfp_allowed_mask;

    lockdep_trace_alloc(gfp_mask);

    might_sleep_if(gfp_mask & __GFP_WAIT); //如果配置了__GFP_WAIT,可能會睡眠

    if (should_fail_alloc_page(gfp_mask, order))
        return NULL;

    /*
     * Check the zones suitable for the gfp_mask contain at least one
     * valid zone. It's possible to have an empty zonelist as a result
     * of GFP_THISNODE and a memoryless node
     */
    if (unlikely(!zonelist->_zonerefs->zone))
        return NULL;

    /*
     * Will only have any effect when __GFP_KMEMCG is set.  This is
     * verified in the (always inline) callee
     */
    if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
        return NULL;

retry_cpuset:
    cpuset_mems_cookie = get_mems_allowed();

    /* The preferred zone is used for statistics later */
    first_zones_zonelist(zonelist, high_zoneidx,  //掃描zone,優先從ZONE_HIGHMEM配置設定
                nodemask ? : &cpuset_current_mems_allowed,
                &preferred_zone);
    if (!preferred_zone)
        goto out;

    /* First allocation attempt */
    page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
            zonelist, high_zoneidx, alloc_flags,
            preferred_zone, migratetype);
    if (unlikely(!page)) {
        /*
         * Runtime PM, block IO and its error handling path
         * can deadlock because I/O on the device might not
         * complete.
         */
        gfp_mask = memalloc_noio_flags(gfp_mask);
        page = __alloc_pages_slowpath(gfp_mask, order,
                zonelist, high_zoneidx, nodemask,
                preferred_zone, migratetype);
    }

    trace_mm_page_alloc(page, order, gfp_mask, migratetype);

out:
    /*
     * When updating a task's mems_allowed, it is possible to race with
     * parallel threads in such a way that an allocation can fail while
     * the mask is being updated. If a page allocation is about to fail,
     * check if the cpuset changed during allocation and if so, retry.
     */
    if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
        goto retry_cpuset;

    memcg_kmem_commit_charge(page, memcg, order);

    return page; //傳回page
}
           

__get_free_pages

unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
{
    struct page *page;

    /*
     * __get_free_pages() returns a 32-bit address, which cannot represent
     * a highmem page
     */
    VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != );  //不能在高端記憶體配置設定

    page = alloc_pages(gfp_mask, order); //還是基于alloc_pages配置設定 
    if (!page)
        return ;
    return (unsigned long) page_address(page); //傳回的不是Page,是線性位址
           

實體頁面釋放接口

free_pages

free_pages最終會調用到free_pages

void __free_pages(struct page *page, unsigned int order)
{
    if (put_page_testzero(page)) {
        if (order == ) //order為0時另外處理
            free_hot_cold_page(page, );
        else
            __free_pages_ok(page, order);
    }
}
           

配置設定掩碼

gfp.h中定義了常用的配置設定掩碼。這些掩碼會改變配置設定的行為。

#define GFP_ATOMIC  (__GFP_HIGH)
#define GFP_NOIO    (__GFP_WAIT)
#define GFP_NOFS    (__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL  (__GFP_WAIT | __GFP_IO | __GFP_FS)
#define GFP_TEMPORARY   (__GFP_WAIT | __GFP_IO | __GFP_FS | \
             __GFP_RECLAIMABLE)
#define GFP_USER    (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER    (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
             __GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE    (__GFP_WAIT | __GFP_IO | __GFP_FS | \
                 __GFP_HARDWALL | __GFP_HIGHMEM | \
                 __GFP_MOVABLE)
#define GFP_IOFS    (__GFP_IO | __GFP_FS)
           

zone水位

配置設定記憶體時會進行水位檢測。

static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
              int classzone_idx, int alloc_flags, long free_pages)
{
    /* free_pages my go negative - that's OK */
    long min = mark;
    long lowmem_reserve = z->lowmem_reserve[classzone_idx];
    int o;

    free_pages -= ( << order) - ;
    if (alloc_flags & ALLOC_HIGH)
        min -= min / ;
    if (alloc_flags & ALLOC_HARDER)
        min -= min / ; //努力的去配置設定
#ifdef CONFIG_CMA
    /*
     * We don't want to regard the pages on CMA region as free
     * on watermark checking, since they cannot be used for
     * unmovable/reclaimable allocation and they can suddenly
     * vanish through CMA allocation
     */
    if (IS_ENABLED(CONFIG_CMA) && z->managed_cma_pages)
        free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
#endif

    if (free_pages <= min + lowmem_reserve) //空閑頁面不能小于min(min_free_kbytes)和預留的空間之和
        return false;
    for (o = ; o < order; o++) {
        /* At the next order, this order's pages become unavailable */
        free_pages -= z->free_area[o].nr_free << o;

        /* Require fewer higher order pages to be free */
        min >>= min_free_order_shift;

        if (free_pages <= min)
            return false;
    }
    return true;
}
           

繼續閱讀