概述
頁面配置設定器是Linux核心記憶體管理中最基本的配置設定器,基于夥伴系統算法(buddy)和zone-base的設計理念。
實體頁面配置設定接口
alloc_pages
alloc_pages接口最終會調用到__alloc_pages_nodemask。
struct page *
__alloc_pages_nodemask(gfp_t gfp_mask, unsigned int order,
struct zonelist *zonelist, nodemask_t *nodemask)
{
enum zone_type high_zoneidx = gfp_zone(gfp_mask);
struct zone *preferred_zone;
struct page *page = NULL;
int migratetype = allocflags_to_migratetype(gfp_mask);
unsigned int cpuset_mems_cookie;
int alloc_flags = ALLOC_WMARK_LOW|ALLOC_CPUSET;
struct mem_cgroup *memcg = NULL;
gfp_mask &= gfp_allowed_mask;
lockdep_trace_alloc(gfp_mask);
might_sleep_if(gfp_mask & __GFP_WAIT); //如果配置了__GFP_WAIT,可能會睡眠
if (should_fail_alloc_page(gfp_mask, order))
return NULL;
/*
* Check the zones suitable for the gfp_mask contain at least one
* valid zone. It's possible to have an empty zonelist as a result
* of GFP_THISNODE and a memoryless node
*/
if (unlikely(!zonelist->_zonerefs->zone))
return NULL;
/*
* Will only have any effect when __GFP_KMEMCG is set. This is
* verified in the (always inline) callee
*/
if (!memcg_kmem_newpage_charge(gfp_mask, &memcg, order))
return NULL;
retry_cpuset:
cpuset_mems_cookie = get_mems_allowed();
/* The preferred zone is used for statistics later */
first_zones_zonelist(zonelist, high_zoneidx, //掃描zone,優先從ZONE_HIGHMEM配置設定
nodemask ? : &cpuset_current_mems_allowed,
&preferred_zone);
if (!preferred_zone)
goto out;
/* First allocation attempt */
page = get_page_from_freelist(gfp_mask|__GFP_HARDWALL, nodemask, order,
zonelist, high_zoneidx, alloc_flags,
preferred_zone, migratetype);
if (unlikely(!page)) {
/*
* Runtime PM, block IO and its error handling path
* can deadlock because I/O on the device might not
* complete.
*/
gfp_mask = memalloc_noio_flags(gfp_mask);
page = __alloc_pages_slowpath(gfp_mask, order,
zonelist, high_zoneidx, nodemask,
preferred_zone, migratetype);
}
trace_mm_page_alloc(page, order, gfp_mask, migratetype);
out:
/*
* When updating a task's mems_allowed, it is possible to race with
* parallel threads in such a way that an allocation can fail while
* the mask is being updated. If a page allocation is about to fail,
* check if the cpuset changed during allocation and if so, retry.
*/
if (unlikely(!put_mems_allowed(cpuset_mems_cookie) && !page))
goto retry_cpuset;
memcg_kmem_commit_charge(page, memcg, order);
return page; //傳回page
}
__get_free_pages
unsigned long __get_free_pages(gfp_t gfp_mask, unsigned int order)
{
struct page *page;
/*
* __get_free_pages() returns a 32-bit address, which cannot represent
* a highmem page
*/
VM_BUG_ON((gfp_mask & __GFP_HIGHMEM) != ); //不能在高端記憶體配置設定
page = alloc_pages(gfp_mask, order); //還是基于alloc_pages配置設定
if (!page)
return ;
return (unsigned long) page_address(page); //傳回的不是Page,是線性位址
實體頁面釋放接口
free_pages
free_pages最終會調用到free_pages
void __free_pages(struct page *page, unsigned int order)
{
if (put_page_testzero(page)) {
if (order == ) //order為0時另外處理
free_hot_cold_page(page, );
else
__free_pages_ok(page, order);
}
}
配置設定掩碼
gfp.h中定義了常用的配置設定掩碼。這些掩碼會改變配置設定的行為。
#define GFP_ATOMIC (__GFP_HIGH)
#define GFP_NOIO (__GFP_WAIT)
#define GFP_NOFS (__GFP_WAIT | __GFP_IO)
#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS)
#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \
__GFP_RECLAIMABLE)
#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL)
#define GFP_HIGHUSER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL | \
__GFP_HIGHMEM)
#define GFP_HIGHUSER_MOVABLE (__GFP_WAIT | __GFP_IO | __GFP_FS | \
__GFP_HARDWALL | __GFP_HIGHMEM | \
__GFP_MOVABLE)
#define GFP_IOFS (__GFP_IO | __GFP_FS)
zone水位
配置設定記憶體時會進行水位檢測。
static bool __zone_watermark_ok(struct zone *z, int order, unsigned long mark,
int classzone_idx, int alloc_flags, long free_pages)
{
/* free_pages my go negative - that's OK */
long min = mark;
long lowmem_reserve = z->lowmem_reserve[classzone_idx];
int o;
free_pages -= ( << order) - ;
if (alloc_flags & ALLOC_HIGH)
min -= min / ;
if (alloc_flags & ALLOC_HARDER)
min -= min / ; //努力的去配置設定
#ifdef CONFIG_CMA
/*
* We don't want to regard the pages on CMA region as free
* on watermark checking, since they cannot be used for
* unmovable/reclaimable allocation and they can suddenly
* vanish through CMA allocation
*/
if (IS_ENABLED(CONFIG_CMA) && z->managed_cma_pages)
free_pages -= zone_page_state(z, NR_FREE_CMA_PAGES);
#endif
if (free_pages <= min + lowmem_reserve) //空閑頁面不能小于min(min_free_kbytes)和預留的空間之和
return false;
for (o = ; o < order; o++) {
/* At the next order, this order's pages become unavailable */
free_pages -= z->free_area[o].nr_free << o;
/* Require fewer higher order pages to be free */
min >>= min_free_order_shift;
if (free_pages <= min)
return false;
}
return true;
}