天天看點

linux程序位址空間(2) 缺頁異常詳解(3)寫時複制COW詳解

接上一篇

現在分析寫時複制COW,對于寫時複制,首先把握一點就是隻有寫操作時才有可能觸發寫時複制,是以首先總要判斷異常flag是否含有标志FAULT_FLAG_WRITE,然後判斷二級頁表條目值是否含有L_PTE_WRITE标志,這是意味着這個實體頁是否可寫,如果不可寫則說明應該進入寫時複制流程,調用處理函數do_wp_page;

可見,COW的應用場合就是通路映射的頁不可寫,它包括兩種情況,第一種是fork導緻,第二種是如malloc後第一次對他進行讀操作,擷取到的是zero_pfn零頁,當再次寫時需要寫時複制,共同特點都是虛拟位址的二級頁表映射内容在記憶體中,但是對應的頁不可寫,在函數do_wp_page中對于這兩種情況的處理基本相似的;

另外一個應該知道的是,如果該頁隻有一個程序在用,那麼就直接修改這個頁可寫就行了,不要搞COW,總之,不到不得以的情況下是不會進行COW的,這也是核心對于COW使用的原則,就是盡量不使用;

函數do_wp_page源碼如下:

static int do_wp_page(struct mm_struct *mm, struct vm_area_struct *vma,

                   unsigned long address, pte_t *page_table, pmd_t *pmd,

                   spinlock_t *ptl, pte_t orig_pte)

{

         struct page *old_page, *new_page;

         pte_t entry;

         int reuse = 0, ret = 0;

         int page_mkwrite = 0;

         struct page *dirty_page = NULL;

         old_page = vm_normal_page(vma, address, orig_pte);

         if (!old_page) {

                   if ((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==

                                          (VM_WRITE|VM_SHARED))

                            goto reuse;

                   goto gotten;

         }

         if (PageAnon(old_page) && !PageKsm(old_page)) {

                   if (!trylock_page(old_page)) {

                            page_cache_get(old_page);

                            pte_unmap_unlock(page_table, ptl);

                            lock_page(old_page);

                            page_table = pte_offset_map_lock(mm, pmd, address,

                                                                  &ptl);

                            if (!pte_same(*page_table, orig_pte)) {

                                     unlock_page(old_page);

                                     page_cache_release(old_page);

                                     goto unlock;

                            }

                            page_cache_release(old_page);

                   }

                   reuse = reuse_swap_page(old_page);

                   unlock_page(old_page);

         }

    else if (unlikely((vma->vm_flags & (VM_WRITE|VM_SHARED)) ==

                                               (VM_WRITE|VM_SHARED))) {

                   if (vma->vm_ops && vma->vm_ops->page_mkwrite) {

                            struct vm_fault vmf;

                            int tmp;

                            vmf.virtual_address = (void __user *)(address &

                                                                           PAGE_MASK);

                            vmf.pgoff = old_page->index;

                            vmf.flags = FAULT_FLAG_WRITE|FAULT_FLAG_MKWRITE;

                            vmf.page = old_page;

                            page_cache_get(old_page);

                            pte_unmap_unlock(page_table, ptl);

                            tmp = vma->vm_ops->page_mkwrite(vma, &vmf);

                            if (unlikely(tmp &

                                               (VM_FAULT_ERROR | VM_FAULT_NOPAGE))) {

                                     ret = tmp;

                                     goto unwritable_page;

                            }

                            if (unlikely(!(tmp & VM_FAULT_LOCKED))) {

                                     lock_page(old_page);

                                     if (!old_page->mapping) {

                                               ret = 0;

                                               unlock_page(old_page);

                                               goto unwritable_page;

                                     }

                            } else

                                     VM_BUG_ON(!PageLocked(old_page));

                            page_table = pte_offset_map_lock(mm, pmd, address,

                                                                  &ptl);

                            if (!pte_same(*page_table, orig_pte)) {

                                     unlock_page(old_page);

                                     page_cache_release(old_page);

                                     goto unlock;

                            }

                            page_mkwrite = 1;

                   }

                   dirty_page = old_page;

                   get_page(dirty_page);

                   reuse = 1;

         }

         if (reuse) {

reuse:

                   flush_cache_page(vma, address, pte_pfn(orig_pte));

                   entry = pte_mkyoung(orig_pte);

                   entry = maybe_mkwrite(pte_mkdirty(entry), vma);

                   if (ptep_set_access_flags(vma, address, page_table, entry,1))

                            update_mmu_cache(vma, address, entry);

                   ret |= VM_FAULT_WRITE;

                   goto unlock;

         }

         page_cache_get(old_page);

gotten:

         pte_unmap_unlock(page_table, ptl);

         if (unlikely(anon_vma_prepare(vma)))

                   goto oom;

         if (is_zero_pfn(pte_pfn(orig_pte))) {

                   new_page = alloc_zeroed_user_highpage_movable(vma, address);

                   if (!new_page)

                            goto oom;

         }

    else {

                   new_page = alloc_page_vma(GFP_HIGHUSER_MOVABLE, vma, address);

                   if (!new_page)

                            goto oom;

                   cow_user_page(new_page, old_page, address, vma);

         }

         __SetPageUptodate(new_page);

         if ((vma->vm_flags & VM_LOCKED) && old_page) {

                   lock_page(old_page);     

                   clear_page_mlock(old_page);

                   unlock_page(old_page);

         }

         if (mem_cgroup_newpage_charge(new_page, mm, GFP_KERNEL))

                   goto oom_free_new;

         page_table = pte_offset_map_lock(mm, pmd, address, &ptl);

         if (likely(pte_same(*page_table, orig_pte))) {

                   if (old_page) {

                            if (!PageAnon(old_page)) {

                                     dec_mm_counter(mm, file_rss);

                                     inc_mm_counter(mm, anon_rss);

                            }

                   } else

                            inc_mm_counter(mm, anon_rss);

                   flush_cache_page(vma, address, pte_pfn(orig_pte));

                   entry = mk_pte(new_page, vma->vm_page_prot);

                   entry = maybe_mkwrite(pte_mkdirty(entry), vma);

                   ptep_clear_flush(vma, address, page_table);

                   page_add_new_anon_rmap(new_page, vma, address);

                   set_pte_at_notify(mm, address, page_table, entry);

                   update_mmu_cache(vma, address, entry);

                   if (old_page) {

                            page_remove_rmap(old_page);

                   }

                   new_page = old_page;

                   ret |= VM_FAULT_WRITE;

         }

    else

                   mem_cgroup_uncharge_page(new_page);

         if (new_page)

                   page_cache_release(new_page);

         if (old_page)

                   page_cache_release(old_page);

unlock:

         pte_unmap_unlock(page_table, ptl);

         if (dirty_page) {

                   if (!page_mkwrite) {

                            wait_on_page_locked(dirty_page);

                            set_page_dirty_balance(dirty_page, page_mkwrite);

                   }

                   put_page(dirty_page);

                   if (page_mkwrite) {

                            struct address_space *mapping = dirty_page->mapping;

                            set_page_dirty(dirty_page);

                            unlock_page(dirty_page);

                            page_cache_release(dirty_page);

                            if (mapping)      {

                                     balance_dirty_pages_ratelimited(mapping);

                            }

                   }

                   if (vma->vm_file)

                            file_update_time(vma->vm_file);

         }

         return ret;

oom_free_new:

         page_cache_release(new_page);

oom:

         if (old_page) {

                   if (page_mkwrite) {

                            unlock_page(old_page);

                            page_cache_release(old_page);

                   }

                   page_cache_release(old_page);

         }

         return VM_FAULT_OOM;

unwritable_page:

         page_cache_release(old_page);

         return ret;

}

一級一級傳回,最終傳回到函數__do_page_fault,會根據傳回值fault累計task的相應異常類型次數(maj_flt或min_flt),并最終把fault傳回給函數do_page_fault,釋放信号量mmap_sem,正常情況下就傳回0,缺頁異常處理完畢。

繼續閱讀