天天看點

Linux頁表機制初始化

Linux啟動并建立一套完整的頁表機制要經過以下幾個步驟:

1.臨時核心頁表的初始化(setup_32.s)

2.啟動分頁機制(head_32.s)

3.建立低端記憶體和高端記憶體固定映射區的頁表( init_memory_mapping())

4.建立高端記憶體永久映射區的頁表并擷取固定映射區的臨時映射區頁表(paging_init())

下面主要介紹3和4

一、低端記憶體頁表的建立

在setup_arch()中核心通過調用init_memory_mapping()來建立低端記憶體頁表

void __init setup_arch(char **cmdline_p)
{
    ...
    ...
    /* max_pfn_mapped is updated here */  
    max_low_pfn_mapped = init_memory_mapping(0, max_low_pfn<<PAGE_SHIFT); 
    max_pfn_mapped = max_low_pfn_mapped;  
    ...
    ... 
}
           

核心将低端記憶體的起始位址(0),和低端記憶體的結束位址(max_low_pfn<<PAGE_SHIFT)傳遞給init_memory_mapping(),下面來看Init_memory_mapping()的具體實作,簡單起見,隻分析32位系統的情況

/*
 * Setup the direct mapping of the physical memory at PAGE_OFFSET.
 * This runs before bootmem is initialized and gets pages directly from
 * the physical memory. To access them they are temporarily mapped.
 */
unsigned long __init_refok init_memory_mapping(unsigned long start,
					       unsigned long end)
{
	unsigned long page_size_mask = 0;
	unsigned long start_pfn, end_pfn;
	unsigned long ret = 0;
	unsigned long pos;

	struct map_range mr[NR_RANGE_MR];
	int nr_range, i;
	int use_pse, use_gbpages;

	printk(KERN_INFO "init_memory_mapping: %016lx-%016lx\n", start, end);

#if defined(CONFIG_DEBUG_PAGEALLOC) || defined(CONFIG_KMEMCHECK)
	/*
	 * For CONFIG_DEBUG_PAGEALLOC, identity mapping will use small pages.
	 * This will simplify cpa(), which otherwise needs to support splitting
	 * large pages into small in interrupt context, etc.
	 */
	use_pse = use_gbpages = 0;
#else
	use_pse = cpu_has_pse;
	use_gbpages = direct_gbpages;
#endif

	set_nx();
	if (nx_enabled)
		printk(KERN_INFO "NX (Execute Disable) protection: active\n");

	/* Enable PSE if available */
	if (cpu_has_pse)
		set_in_cr4(X86_CR4_PSE);

	/* Enable PGE if available */
	if (cpu_has_pge) {
		set_in_cr4(X86_CR4_PGE);
		__supported_pte_mask |= _PAGE_GLOBAL;
	}

	/*這裡确定是否使用大型頁*/
	if (use_gbpages)
		page_size_mask |= 1 << PG_LEVEL_1G;
	if (use_pse)
		page_size_mask |= 1 << PG_LEVEL_2M;

	memset(mr, 0, sizeof(mr));
	nr_range = 0;

	/* head if not big page alignment ? */
	start_pfn = start >> PAGE_SHIFT;/*得到起始頁框的編号*/
	pos = start_pfn << PAGE_SHIFT;   /*得到起始實體位址,取按PAGE_SHIFT位對齊後的結果*/
#ifdef CONFIG_X86_32
	/*
	 * Don't use a large page for the first 2/4MB of memory
	 * because there are often fixed size MTRRs in there
	 * and overlapping MTRRs into large pages can cause
	 * slowdowns.
	 */
	/*設定結束頁框與起始頁框的距離為2M/4K = 512K個頁框*/
	if (pos == 0)
		end_pfn = 1<<(PMD_SHIFT - PAGE_SHIFT);
	else
		end_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
				 << (PMD_SHIFT - PAGE_SHIFT);
#else /* CONFIG_X86_64 */
	end_pfn = ((pos + (PMD_SIZE - 1)) >> PMD_SHIFT)
			<< (PMD_SHIFT - PAGE_SHIFT);
#endif
	if (end_pfn > (end >> PAGE_SHIFT))/*設定的結束頁框要大于指定的結束位址對應的頁框,
										則将結束頁框下調*/
		end_pfn = end >> PAGE_SHIFT;
	if (start_pfn < end_pfn) {        /*如果起始頁框小于結束頁框,則将該段儲存,
	                                                這個段使用的都是小型頁,即4K大小*/
		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);
		pos = end_pfn << PAGE_SHIFT;  /*将起始位址移到結束位址處*/
	}

	/* big page (2M) range */
	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)/*将起始位址右移2M*/
			 << (PMD_SHIFT - PAGE_SHIFT);
#ifdef CONFIG_X86_32
	end_pfn = (end>>PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);/*end_pfn設為指定的end對應的頁框,
                                                                                取按PMD_SHIFT位對齊後的結果*/
#else /* CONFIG_X86_64 */
	end_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
			 << (PUD_SHIFT - PAGE_SHIFT);
	if (end_pfn > ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT)))
		end_pfn = ((end>>PMD_SHIFT)<<(PMD_SHIFT - PAGE_SHIFT));
#endif

	if (start_pfn < end_pfn) {/*如果start_pfn小于end_pfn則儲存該記憶體段*/
		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
				page_size_mask & (1<<PG_LEVEL_2M));/*這裡確定如果2M的PG_LEVEL被置位的話,該段按2M的大型頁分頁*/
		pos = end_pfn << PAGE_SHIFT;/*同樣将起始位置調整至該段的結束頁框*/
	}

#ifdef CONFIG_X86_64
	/* big page (1G) range */
	start_pfn = ((pos + (PUD_SIZE - 1))>>PUD_SHIFT)
			 << (PUD_SHIFT - PAGE_SHIFT);
	end_pfn = (end >> PUD_SHIFT) << (PUD_SHIFT - PAGE_SHIFT);
	if (start_pfn < end_pfn) {
		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
				page_size_mask &
				 ((1<<PG_LEVEL_2M)|(1<<PG_LEVEL_1G)));
		pos = end_pfn << PAGE_SHIFT;
	}

	/* tail is not big page (1G) alignment */
	start_pfn = ((pos + (PMD_SIZE - 1))>>PMD_SHIFT)
			 << (PMD_SHIFT - PAGE_SHIFT);
	end_pfn = (end >> PMD_SHIFT) << (PMD_SHIFT - PAGE_SHIFT);
	if (start_pfn < end_pfn) {
		nr_range = save_mr(mr, nr_range, start_pfn, end_pfn,
				page_size_mask & (1<<PG_LEVEL_2M));
		pos = end_pfn << PAGE_SHIFT;
	}
#endif

	/* tail is not big page (2M) alignment */
	/*将非對齊的末端進行儲存為一個段*/
	start_pfn = pos>>PAGE_SHIFT;
	end_pfn = end>>PAGE_SHIFT;
	nr_range = save_mr(mr, nr_range, start_pfn, end_pfn, 0);

	/* try to merge same page size and continuous */
	for (i = 0; nr_range > 1 && i < nr_range - 1; i++) {
		unsigned long old_start;
		if (mr[i].end != mr[i+1].start ||  /*如果前記憶體段的位址和後記憶體段的位址相同,并且頁面大小相同*/
		    mr[i].page_size_mask != mr[i+1].page_size_mask)
			continue;
		/* move it */
		old_start = mr[i].start;
		memmove(&mr[i], &mr[i+1],
			(nr_range - 1 - i) * sizeof(struct map_range));
		mr[i--].start = old_start;
		nr_range--;
	}

	for (i = 0; i < nr_range; i++)
		printk(KERN_DEBUG " %010lx - %010lx page %s\n",
				mr[i].start, mr[i].end,
			(mr[i].page_size_mask & (1<<PG_LEVEL_1G))?"1G":(
			 (mr[i].page_size_mask & (1<<PG_LEVEL_2M))?"2M":"4k"));

	/*
	 * Find space for the kernel direct mapping tables.
	 *
	 * Later we should allocate these tables in the local node of the
	 * memory mapped. Unfortunately this is done currently before the
	 * nodes are discovered.
	 */
	 /*如果還未建立bootmem allocator,則直接通過e820擷取的資訊來得到一個連續的空閑記憶體段保留核心頁表*/
	if (!after_bootmem)
		find_early_table_space(end, use_pse, use_gbpages);

#ifdef CONFIG_X86_32
	for (i = 0; i < nr_range; i++)/*周遊每個段,建立頁表完成低端記憶體虛拟位址到實體位址的映射*/
		kernel_physical_mapping_init(mr[i].start, mr[i].end,
					     mr[i].page_size_mask);
	ret = end;
#else /* CONFIG_X86_64 */
	for (i = 0; i < nr_range; i++)
		ret = kernel_physical_mapping_init(mr[i].start, mr[i].end,
						   mr[i].page_size_mask);
#endif

#ifdef CONFIG_X86_32
	early_ioremap_page_table_range_init();/*為核心的固定映射區配置設定頁表*/

	load_cr3(swapper_pg_dir);
#endif

#ifdef CONFIG_X86_64
	if (!after_bootmem && !start) {
		pud_t *pud;
		pmd_t *pmd;

		mmu_cr4_features = read_cr4();

		/*
		 * _brk_end cannot change anymore, but it and _end may be
		 * located on different 2M pages. cleanup_highmap(), however,
		 * can only consider _end when it runs, so destroy any
		 * mappings beyond _brk_end here.
		 */
		pud = pud_offset(pgd_offset_k(_brk_end), _brk_end);
		pmd = pmd_offset(pud, _brk_end - 1);
		while (++pmd <= pmd_offset(pud, (unsigned long)_end - 1))
			pmd_clear(pmd);
	}
#endif
	__flush_tlb_all();

	if (!after_bootmem && e820_table_end > e820_table_start)
		reserve_early(e820_table_start << PAGE_SHIFT,
				 e820_table_end << PAGE_SHIFT, "PGTABLE");

	if (!after_bootmem)
		early_memtest(start, end);

	return ret >> PAGE_SHIFT;
}
           

将init_memory_mapping()中的幾個比較關鍵的部分提取出來進行分析

用來儲存記憶體段資訊的struct map_range結構體的定義如下

struct map_range {
	unsigned long start;
	unsigned long end;
	unsigned page_size_mask;
};
           

其包含了一個段的起始位址,結束位址,以及該段是按多大的頁面進行分頁(4K,2M,1G)

而save_mr()函數的工作就是初始化這三項

static int __meminit save_mr(struct map_range *mr, int nr_range,
			     unsigned long start_pfn, unsigned long end_pfn,
			     unsigned long page_size_mask)
{
	if (start_pfn < end_pfn) {
		if (nr_range >= NR_RANGE_MR)
			panic("run out of range for init_memory_mapping\n");
		mr[nr_range].start = start_pfn<<PAGE_SHIFT;
		mr[nr_range].end   = end_pfn<<PAGE_SHIFT;
		mr[nr_range].page_size_mask = page_size_mask;
		nr_range++;
	}

	return nr_range;
}
           

完成分段工作後,核心對每個段調用關鍵函數kernel_physical_mapping_init()來完成虛拟位址到實體位址的映射,在看這個函數之前先介紹一些關于LINUX分頁的基本概念和一些關鍵的宏

在2.6.11後,Linux采用四級分頁模型,這四級頁目錄分别為

  • 頁全局目錄(Page Global Directory)
  • 頁上級目錄(Page Upper Directory)
  • 頁中間目錄(Page Middle Directory)
  • 頁表(Page Table)

對于沒有啟動PAE的32位系統,Linux雖然也采用四級分頁模型,但本質上隻用到了兩級分頁,Linux通過将"頁上級目錄"位域和“頁中間目錄”位域全為0來達到使用兩級分頁的目的,但為了保證程式能32位和64系統上都能運作,核心保留了頁上級目錄和頁中間目錄在指針序列中的位置,它們的頁目錄數都被核心置為1,并把這2個頁目錄項映射到适合的全局目錄項。

PAGE_SHIFT,PMD_SHIFT,PUD_SHIFT,PGDIR_SHIFT

對應相應的頁目錄所能映射的區域大小的位數,如PAGE_SHIFT為12,即頁面大小為4k,PMD_SHIFT為線性位址的offset和table字段的總位數,未啟用PAE的32位系統下,為22.

PTRS_PER_PTE,   PTRS_PER_PMD,   PTRS_PER_PUD,   PTRS_PER_PGD

對應相應頁目錄中的表項數。32位系統下,當PAE被禁止時,他們的值分别為1024,,1,1和1024,也就是說隻使用兩級分頁。

pgd_index(addr),  pud_index,  pmd_index(addr),  pte_index(addr)

取addr在該目錄中的索引。 

pte_index(addr)的實作:

static inline unsigned long pte_index(unsigned long address)
{
	return (address >> PAGE_SHIFT) & (PTRS_PER_PTE - 1);
}
           

pud_offset(pgd,addr),   pmd_offset(pud,addr),   pte_offset(pmd,addr)

以pmd_offset為例,線性位址addr對應的pmd索引在在pud指定的pmd表的偏移位址。在兩級或三級分頁系統中,pmd_offset和pud_offset都傳回頁全局目錄的位址

/*
 * This maps the physical memory to kernel virtual address space, a total
 * of max_low_pfn pages, by creating page tables starting from address
 * PAGE_OFFSET:
 */
unsigned long __init
kernel_physical_mapping_init(unsigned long start,
			     unsigned long end,
			     unsigned long page_size_mask)
{
	int use_pse = page_size_mask == (1<<PG_LEVEL_2M);
	unsigned long start_pfn, end_pfn;
	pgd_t *pgd_base = swapper_pg_dir;
	int pgd_idx, pmd_idx, pte_ofs;
	unsigned long pfn;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte;
	unsigned pages_2m, pages_4k;
	int mapping_iter;

	start_pfn = start >> PAGE_SHIFT;
	end_pfn = end >> PAGE_SHIFT;

	/*
	 * First iteration will setup identity mapping using large/small pages
	 * based on use_pse, with other attributes same as set by
	 * the early code in head_32.S
	 *
	 * Second iteration will setup the appropriate attributes (NX, GLOBAL..)
	 * as desired for the kernel identity mapping.
	 *
	 * This two pass mechanism conforms to the TLB app note which says:
	 *
	 *     "Software should not write to a paging-structure entry in a way
	 *      that would change, for any linear address, both the page size
	 *      and either the page frame or attributes."
	 */
	mapping_iter = 1;

	if (!cpu_has_pse)
		use_pse = 0;

repeat:
	pages_2m = pages_4k = 0;
	pfn = start_pfn;         /*pfn儲存起始頁框号*/
	pgd_idx = pgd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);/*低端記憶體的虛拟起始位址對應的pgd的偏移*/
	pgd = pgd_base + pgd_idx;/*得到起始頁框對應的pgd*/

	/*由pgd開始周遊*/
	for (; pgd_idx < PTRS_PER_PGD; pgd++, pgd_idx++) {
		pmd = one_md_table_init(pgd);/*得到一個pmd表*/

		if (pfn >= end_pfn)
			continue;
#ifdef CONFIG_X86_PAE
		pmd_idx = pmd_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
		pmd += pmd_idx;
#else
		pmd_idx = 0;
#endif
		/*周遊pmd表,對于未激活PAE的32位系統,PTRS_PER_PMD為1,激活PAE則為512*/
		for (; pmd_idx < PTRS_PER_PMD && pfn < end_pfn;
		     pmd++, pmd_idx++) {
			unsigned int addr = pfn * PAGE_SIZE + PAGE_OFFSET;

			/*
			 * Map with big pages if possible, otherwise
			 * create normal page tables:
			 */
			if (use_pse) {
				unsigned int addr2;
				pgprot_t prot = PAGE_KERNEL_LARGE;
				/*
				 * first pass will use the same initial
				 * identity mapping attribute + _PAGE_PSE.
				 */
				pgprot_t init_prot =
					__pgprot(PTE_IDENT_ATTR |
						 _PAGE_PSE);

				addr2 = (pfn + PTRS_PER_PTE-1) * PAGE_SIZE +
					PAGE_OFFSET + PAGE_SIZE-1;

				if (is_kernel_text(addr) ||
				    is_kernel_text(addr2))
					prot = PAGE_KERNEL_LARGE_EXEC;

				pages_2m++;
				if (mapping_iter == 1)
					set_pmd(pmd, pfn_pmd(pfn, init_prot));
				else
					set_pmd(pmd, pfn_pmd(pfn, prot));

				pfn += PTRS_PER_PTE;
				continue;
			}
			pte = one_page_table_init(pmd);/*建立一個page table*/

			/*得到pfn在page table中的偏移并定位到具體的pte*/
			pte_ofs = pte_index((pfn<<PAGE_SHIFT) + PAGE_OFFSET);
			pte += pte_ofs;
			/*由pte開始周遊page table*/
			for (; pte_ofs < PTRS_PER_PTE && pfn < end_pfn;
			     pte++, pfn++, pte_ofs++, addr += PAGE_SIZE) {
				pgprot_t prot = PAGE_KERNEL;
				/*
				 * first pass will use the same initial
				 * identity mapping attribute.
				 */
				pgprot_t init_prot = __pgprot(PTE_IDENT_ATTR);

				if (is_kernel_text(addr)) /*如果處于核心代碼段,權限設為可執行*/
					prot = PAGE_KERNEL_EXEC;

				pages_4k++;
				/*!!設定pte,與pfn關聯*/
				
				if (mapping_iter == 1)/*第一次執行将權限位設為init_prot*/
					set_pte(pte, pfn_pte(pfn, init_prot));
				else                 /*第二次執行将權限位置為prot*/
					set_pte(pte, pfn_pte(pfn, prot));
			}
		}
	}
	if (mapping_iter == 1) {
		/*
		 * update direct mapping page count only in the first
		 * iteration.
		 */
		update_page_count(PG_LEVEL_2M, pages_2m);
		update_page_count(PG_LEVEL_4K, pages_4k);

		/*
		 * local global flush tlb, which will flush the previous
		 * mappings present in both small and large page TLB's.
		 */
		__flush_tlb_all();

		/*
		 * Second iteration will set the actual desired PTE attributes.
		 */
		mapping_iter = 2;
		goto repeat;
	}
	return 0;
}
           

至此,低端記憶體的實體位址和虛拟位址之間的映射關系已全部建立起來!

二、高端記憶體固定映射區的建立

       在init_memory_mapping()中完成了低端記憶體映射後,其将調用early_ioremap_page_table_range_init()來建立高端記憶體的固定映射區頁表。與低端記憶體的頁表初始化不同的是,固定映射區的頁表隻是被配置設定,相應的PTE項并未初始化,這個工作交由後面的各個固定映射區部分的相關代碼調用set_fixmap()來将相關的固定映射區頁表與實體記憶體關聯。

           每個固定映射區索引都以枚舉類型的形式定義在enum fixed_addresses中

enum fixed_addresses {
#ifdef CONFIG_X86_32
	FIX_HOLE,
	FIX_VDSO,
#else
	VSYSCALL_LAST_PAGE,
	VSYSCALL_FIRST_PAGE = VSYSCALL_LAST_PAGE
			    + ((VSYSCALL_END-VSYSCALL_START) >> PAGE_SHIFT) - 1,
	VSYSCALL_HPET,
#endif
	FIX_DBGP_BASE,
	FIX_EARLYCON_MEM_BASE,
#ifdef CONFIG_PROVIDE_OHCI1394_DMA_INIT
	FIX_OHCI1394_BASE,
#endif
#ifdef CONFIG_X86_LOCAL_APIC
	FIX_APIC_BASE,	/* local (CPU) APIC) -- required for SMP or not */
#endif
#ifdef CONFIG_X86_IO_APIC
	FIX_IO_APIC_BASE_0,
	FIX_IO_APIC_BASE_END = FIX_IO_APIC_BASE_0 + MAX_IO_APICS - 1,
#endif
#ifdef CONFIG_X86_VISWS_APIC
	FIX_CO_CPU,	/* Cobalt timer */
	FIX_CO_APIC,	/* Cobalt APIC Redirection Table */
	FIX_LI_PCIA,	/* Lithium PCI Bridge A */
	FIX_LI_PCIB,	/* Lithium PCI Bridge B */
#endif
#ifdef CONFIG_X86_F00F_BUG
	FIX_F00F_IDT,	/* Virtual mapping for IDT */
#endif
#ifdef CONFIG_X86_CYCLONE_TIMER
	FIX_CYCLONE_TIMER, /*cyclone timer register*/
#endif
#ifdef CONFIG_X86_32
	FIX_KMAP_BEGIN,	/* reserved pte's for temporary kernel mappings */
	FIX_KMAP_END = FIX_KMAP_BEGIN+(KM_TYPE_NR*NR_CPUS)-1,
#ifdef CONFIG_PCI_MMCONFIG
	FIX_PCIE_MCFG,
#endif
#endif
#ifdef CONFIG_PARAVIRT
	FIX_PARAVIRT_BOOTMAP,
#endif
	FIX_TEXT_POKE1,	/* reserve 2 pages for text_poke() */
	FIX_TEXT_POKE0, /* first page is last, because allocation is backward */
	__end_of_permanent_fixed_addresses,
	/*
	 * 256 temporary boot-time mappings, used by early_ioremap(),
	 * before ioremap() is functional.
	 *
	 * We round it up to the next 256 pages boundary so that we
	 * can have a single pgd entry and a single pte table:
	 */
#define NR_FIX_BTMAPS		64
#define FIX_BTMAPS_SLOTS	4
	FIX_BTMAP_END = __end_of_permanent_fixed_addresses + 256 -
			(__end_of_permanent_fixed_addresses & 255),
	FIX_BTMAP_BEGIN = FIX_BTMAP_END + NR_FIX_BTMAPS*FIX_BTMAPS_SLOTS - 1,
#ifdef CONFIG_X86_32
	FIX_WP_TEST,
#endif
#ifdef CONFIG_INTEL_TXT
	FIX_TBOOT_BASE,
#endif
	__end_of_fixed_addresses
};
           

一個索引對應一個4KB的頁框,固定映射區的結束位址為FIXADDR_TOP,即0xfffff000(4G-4K),固定映射區是反向生長的,也就是說第一個索引對應的位址離FIXADDR_TOP最近。宏__fix_to_virt(idx)通過索引來計算相應的固定映射區域的線性位址

void __init early_ioremap_page_table_range_init(void)
{
	pgd_t *pgd_base = swapper_pg_dir;
	unsigned long vaddr, end;

	/*
	 * Fixed mappings, only the page table structure has to be
	 * created - mappings will be set by set_fixmap():
	 */
	vaddr = __fix_to_virt(__end_of_fixed_addresses - 1) & PMD_MASK;
	end = (FIXADDR_TOP + PMD_SIZE - 1) & PMD_MASK;
	page_table_range_init(vaddr, end, pgd_base);
	early_ioremap_reset();
}
           
/*
 * This function initializes a certain range of kernel virtual memory
 * with new bootmem page tables, everywhere page tables are missing in
 * the given range.
 *
 * NOTE: The pagetables are allocated contiguous on the physical space
 * so we can cache the place of the first one and move around without
 * checking the pgd every time.
 */
static void __init
page_table_range_init(unsigned long start, unsigned long end, pgd_t *pgd_base)
{
	int pgd_idx, pmd_idx;
	unsigned long vaddr;
	pgd_t *pgd;
	pmd_t *pmd;
	pte_t *pte = NULL;

	vaddr = start;
	pgd_idx = pgd_index(vaddr);/*得到vaddr對應的pgd索引*/
	pmd_idx = pmd_index(vaddr);/*得到vaddr對應的pmd索引*/
	pgd = pgd_base + pgd_idx;  /*得到pgd項*/

	for ( ; (pgd_idx < PTRS_PER_PGD) && (vaddr != end); pgd++, pgd_idx++) {
		pmd = one_md_table_init(pgd); /*得到pmd起始項*/
		pmd = pmd + pmd_index(vaddr); /*得到偏移後的pmd*/
		for (; (pmd_idx < PTRS_PER_PMD) && (vaddr != end);
							pmd++, pmd_idx++) {
			/*建立pte表項并檢查是vaddr是否對應核心臨時映射區,若是則重新申請一個頁表來儲存pte表*/
			pte = page_table_kmap_check(one_page_table_init(pmd),
			                            pmd, vaddr, pte);

			vaddr += PMD_SIZE;
		}
		pmd_idx = 0;
	}
}

           
static pte_t *__init page_table_kmap_check(pte_t *pte, pmd_t *pmd,
					   unsigned long vaddr, pte_t *lastpte)
{
#ifdef CONFIG_HIGHMEM
	/*
	 * Something (early fixmap) may already have put a pte
	 * page here, which causes the page table allocation
	 * to become nonlinear. Attempt to fix it, and if it
	 * is still nonlinear then we have to bug.
	 */

	/*得到核心固定映射區的臨時映射區的起始和結束虛拟頁框号*/
	int pmd_idx_kmap_begin = fix_to_virt(FIX_KMAP_END) >> PMD_SHIFT;
	int pmd_idx_kmap_end = fix_to_virt(FIX_KMAP_BEGIN) >> PMD_SHIFT;

	/*如果:1.kmap_begin和kmap_end沒有重疊
	       2.vaddr處于kmap區間
	       3.相應的pte沒處在低端記憶體頁表儲存區間*/
	if (pmd_idx_kmap_begin != pmd_idx_kmap_end
	    && (vaddr >> PMD_SHIFT) >= pmd_idx_kmap_begin
	    && (vaddr >> PMD_SHIFT) <= pmd_idx_kmap_end
	    && ((__pa(pte) >> PAGE_SHIFT) < e820_table_start
		|| (__pa(pte) >> PAGE_SHIFT) >= e820_table_end)) {
		pte_t *newpte;
		int i;

		BUG_ON(after_bootmem);
		newpte = alloc_low_page();  /*新申請一個頁作為pte表以保證連續性*/
		for (i = 0; i < PTRS_PER_PTE; i++) /*将pte表的内容拷貝到newpte表*/
			set_pte(newpte + i, pte[i]);

		paravirt_alloc_pte(&init_mm, __pa(newpte) >> PAGE_SHIFT);
		set_pmd(pmd, __pmd(__pa(newpte)|_PAGE_TABLE));/*pmd與newpte表進行關聯*/
		BUG_ON(newpte != pte_offset_kernel(pmd, 0));
		__flush_tlb_all();

		paravirt_release_pte(__pa(pte) >> PAGE_SHIFT);
		pte = newpte;
	}
	BUG_ON(vaddr < fix_to_virt(FIX_KMAP_BEGIN - 1)
	       && vaddr > fix_to_virt(FIX_KMAP_END)
	       && lastpte && lastpte + PTRS_PER_PTE != pte);
#endif
	return pte;
}

           

至此高端記憶體的固定映射區的頁表配置設定完成!

三、高端記憶體永久映射區頁表的建立和臨時映射區頁表的擷取

paging_init()負責完成剩下的頁表建立工作

void __init paging_init(void)
{
	pagetable_init();

	__flush_tlb_all();

	kmap_init();

	/*
	 * NOTE: at this point the bootmem allocator is fully available.
	 */
	sparse_init();
	zone_sizes_init();
}
           
static void __init pagetable_init(void)
{
	pgd_t *pgd_base = swapper_pg_dir;

	permanent_kmaps_init(pgd_base);
}
           
static void __init permanent_kmaps_init(pgd_t *pgd_base)
{
	unsigned long vaddr;
	pgd_t *pgd;
	pud_t *pud;
	pmd_t *pmd;
	pte_t *pte;

	vaddr = PKMAP_BASE;
	/*對永久記憶體區進行頁表配置設定*/
	page_table_range_init(vaddr, vaddr + PAGE_SIZE*LAST_PKMAP, pgd_base);

	pgd = swapper_pg_dir + pgd_index(vaddr);
	pud = pud_offset(pgd, vaddr);
	pmd = pmd_offset(pud, vaddr);
	pte = pte_offset_kernel(pmd, vaddr);
	pkmap_page_table = pte;/*儲存永久記憶體區的起始頁表項*/
}
           

permanent_kmaps_init()完成了永久記憶體區的頁表配置設定

static void __init kmap_init(void)
{
	unsigned long kmap_vstart;

	/*
	 * Cache the first kmap pte:
	 */
	kmap_vstart = __fix_to_virt(FIX_KMAP_BEGIN);/*得到固定映射區的臨時映射區的虛拟位址*/
	kmap_pte = kmap_get_fixmap_pte(kmap_vstart);/*得到固定映射區中的臨時映射區的起始頁表項*/

	kmap_prot = PAGE_KERNEL;
}
           

kmap_init()擷取了之前已經配置設定了的臨時映射區的頁表,把起始頁表項儲存在kmap_pte中。

繼續閱讀