天天看點

轉載 linux 2.6線程建立源碼分析

linux 2.6線程建立源碼分析

 上章講到線程,現在對線程建立的代碼流程分析下。來一步一步揭開她神秘的面紗

 linux核心建立線程函數 kernel_thread(),最終會調用do_fork().

 前面談到線程也是用task_struct結構表示它擁有的資訊,隻是是共享程序的資源。

 根據clone_flags标志,來調用clone()建立"線程",表示共享記憶體、共享檔案系統通路計數、共享檔案描述符表,以及共享信号處理方式。

 kernel_thread定義在/arch/kernel/process.c

int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)

{

       struct pt_regs regs;   

       memset(&regs, 0, sizeof(regs));         //把該結構的變量全部清0

       regs.ebx = (unsigned long) fn;         

       regs.edx = (unsigned long) arg;        

       regs.xds = __USER_DS;

       regs.xes = __USER_DS;

       regs.xfs = __KERNEL_PERCPU;

       regs.orig_eax = -1;

       regs.eip = (unsigned long) kernel_thread_helper;      

       regs.xcs = __KERNEL_CS | get_kernel_rpl();

       regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;

       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);

}

其中__USER_DS,__KERNEL_PERCPU,__KERNEL_CS都是一些宏定義。在/linux/include/asm-i386/segment.h

extern void kernel_thread_helper(void); 

__asm__(".section .text/n"

    ".align 4/n"

    "kernel_thread_helper:/n/t"

    "movl %edx,%eax/n/t"

    "pushl %edx/n/t"  

    "call *%ebx/n/t"  

    "pushl %eax/n/t"

    "call do_exit/n"  

    ".previous");

在kernel_thread中調用了do_fork,讓我們揭開do_fork()的面紗.

long do_fork(unsigned long clone_flags,

          unsigned long stack_start,

          struct pt_regs *regs,

          unsigned long stack_size,

          int __user *parent_tidptr,

          int __user *child_tidptr)

{

    ...

    ...

    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

    ...

    ...

}

接着分析do_fork(),copy_proces()是它的核心函數。重點分析一下:

static struct task_struct *copy_process(unsigned long clone_flags,

                       unsigned long stack_start,

                       struct pt_regs *regs,

                       unsigned long stack_size,

                       int __user *parent_tidptr,

                       int __user *child_tidptr,

                       struct pid *pid)

{

     int retval;

     struct task_struct *p = NULL;

     //clone_flags參數的有效性判斷

     //不能同時定義CLONE_NEWNS,CLONE_FS

     if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

         return ERR_PTR(-EINVAL);

 //如果定義CLONE_THREAD,則必須要定義CLONE_SIGHAND

     if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

         return ERR_PTR(-EINVAL);

 //如果定義CLONE_SIGHAND,則必須要定義CLONE_VM

     if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

         return ERR_PTR(-EINVAL);

     retval = security_task_create(clone_flags);

     if (retval)

         goto fork_out;

     retval = -ENOMEM;

     //從父程序中複制出一個task

     p = dup_task_struct(current);

     if (!p)

         goto fork_out;

     rt_mutex_init_task(p);

#ifdef CONFIG_TRACE_IRQFLAGS

     DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);

     DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);

#endif

     retval = -EAGAIN;

     //如果使用者的程序總數超過了限制

     if (atomic_read(&p->user->processes) >=

              p->signal->rlim[RLIMIT_NPROC].rlim_cur) {

         if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&

             p->user != current->nsproxy->user_ns->root_user)

              goto bad_fork_free;

     }

     //更新程序使用者的相關計數

     atomic_inc(&p->user->__count);

     atomic_inc(&p->user->processes);

     get_group_info(p->group_info);

      //目前程序數是否大于系統規定的最大程序數

     if (nr_threads >= max_threads)

         goto bad_fork_cleanup_count;

     //加載程序的相關執行子產品

     if (!try_module_get(task_thread_info(p)->exec_domain->module))

         goto bad_fork_cleanup_count;

     if (p->binfmt && !try_module_get(p->binfmt->module))

         goto bad_fork_cleanup_put_domain;

     //子程序還在進行初始化,沒有execve

     p->did_exec = 0;

     delayacct_tsk_init(p);

     //copy父程序的所有标志,除了PF_SUPERPRIV(超級權限)

     //置子程序的PF_FORKNOEXEC标志,表示正在被FORK

     copy_flags(clone_flags, p);

     //指派子程序的pid

     p->pid = pid_nr(pid);

     retval = -EFAULT;

     if (clone_flags & CLONE_PARENT_SETTID)

         if (put_user(p->pid, parent_tidptr))

              goto bad_fork_cleanup_delays_binfmt;

     //初始化子程序的幾個連結清單

     INIT_LIST_HEAD(&p->children);

     INIT_LIST_HEAD(&p->sibling);

     p->vfork_done = NULL;

     spin_lock_init(&p->alloc_lock);

     //父程序的TIF_SIGPENDING被複制進了子程序,這個标志表示有末處理的信号

     //這個标志子程序是不需要的

     clear_tsk_thread_flag(p, TIF_SIGPENDING);

     init_sigpending(&p->pending);

     //初始化子程序的time

     p->utime = cputime_zero;

     p->stime = cputime_zero;

     p->prev_utime = cputime_zero;

……

……

//tgid = pid

     p->tgid = p->pid;

     if (clone_flags & CLONE_THREAD)

         p->tgid = current->tgid;

     //copy父程序的其它資源.比例打開的檔案,信号,VM等等

     if ((retval = security_task_alloc(p)))

          goto bad_fork_cleanup_policy;

     if ((retval = audit_alloc(p)))

         goto bad_fork_cleanup_security;

     if ((retval = copy_semundo(clone_flags, p)))

         goto bad_fork_cleanup_audit;

     if ((retval = copy_files(clone_flags, p)))

         goto bad_fork_cleanup_semundo;

     if ((retval = copy_fs(clone_flags, p)))

         goto bad_fork_cleanup_files;

     if ((retval = copy_sighand(clone_flags, p)))

         goto bad_fork_cleanup_fs;

     if ((retval = copy_signal(clone_flags, p)))

         goto bad_fork_cleanup_sighand;

     if ((retval = copy_mm(clone_flags, p)))

         goto bad_fork_cleanup_signal;

     if ((retval = copy_keys(clone_flags, p)))

         goto bad_fork_cleanup_mm;

     if ((retval = copy_namespaces(clone_flags, p)))

         goto bad_fork_cleanup_keys;

     retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);

     if (retval)

         goto bad_fork_cleanup_namespaces;

     p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;

     p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

     p->robust_list = NULL;

#ifdef CONFIG_COMPAT

     p->compat_robust_list = NULL;

#endif

     INIT_LIST_HEAD(&p->pi_state_list);

     p->pi_state_cache = NULL;

     if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)

         p->sas_ss_sp = p->sas_ss_size = 0;

     clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

#ifdef TIF_SYSCALL_EMU

     clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);

#endif

     p->parent_exec_id = p->self_exec_id;

     //exit_signal: 子程序退出時給父程序發送的信号

     p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);

     //pdeath_signal:程序退出時.給其下的子程序發送的信号

     p->pdeath_signal = 0;

     p->exit_state = 0;

     ……

     ……

     if (likely(p->pid)) {

         add_parent(p);

         if (unlikely(p->ptrace & PT_PTRACED))

              __ptrace_link(p, current->parent);

         if (thread_group_leader(p)) {

              p->signal->tty = current->signal->tty;

              p->signal->pgrp = process_group(current);

              set_signal_session(p->signal, process_session(current));

              attach_pid(p, PIDTYPE_PGID, task_pgrp(current));

              attach_pid(p, PIDTYPE_SID, task_session(current));

              list_add_tail_rcu(&p->tasks, &init_task.tasks);

              __get_cpu_var(process_counts)++;

         }

         attach_pid(p, PIDTYPE_PID, pid);

         //目前程序數遞增

         nr_threads++;

     }

     //被fork的程序數計數遞增

     total_forks++;

     spin_unlock(&current->sighand->siglock);

     write_unlock_irq(&tasklist_lock);

     proc_fork_connector(p);

     return p;

……

……

}

參考:深入了解linux核心

到這裡為止,程序的運作内間已經設定好了。但子程序的怎麼傳回到使用者空間呢?這是在copy_process()—> copy_thread()中完成的。

int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,

     unsigned long unused,

     struct task_struct * p, struct pt_regs * regs)

{

     struct pt_regs * childregs;

     struct task_struct *tsk;

     int err;

     //子程序的核心堆棧起點

     childregs = task_pt_regs(p);

     //将父程序的regs參數指派到子程序的核心堆棧

     //regs參數:裡面存放的是父程序陷入核心後的各寄存器的值

     *childregs = *regs;

     //eax:傳回值. 将其設為0,子程序傳回到使用者空間後,它的傳回值是0

     childregs->eax = 0;

     //esp:子程序的使用者堆棧指針位置

     childregs->esp = esp;

     //子程序核心堆棧位置

     p->thread.esp = (unsigned long) childregs;

     //子程序核心堆棧指針位置

     p->thread.esp0 = (unsigned long) (childregs+1);

     //子程序要執行的下一條指令.對應子程序從系統空間傳回使用者空間

     p->thread.eip = (unsigned long) ret_from_fork;

     savesegment(gs,p->thread.gs);

     tsk = current;

     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {

         p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,

                            IO_BITMAP_BYTES, GFP_KERNEL);

         if (!p->thread.io_bitmap_ptr) {

              p->thread.io_bitmap_max = 0;

              return -ENOMEM;

         }

         set_tsk_thread_flag(p, TIF_IO_BITMAP);

     }

     if (clone_flags & CLONE_SETTLS) {

         struct desc_struct *desc;

         struct user_desc info;

         int idx;

         err = -EFAULT;

         if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))

              goto out;

         err = -EINVAL;

         if (LDT_empty(&info))

              goto out;

         idx = info.entry_number;

         if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)

              goto out;

         desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;

         desc->a = LDT_entry_a(&info);

         desc->b = LDT_entry_b(&info);

     }

     err = 0;

 out:

     if (err && p->thread.io_bitmap_ptr) {

         kfree(p->thread.io_bitmap_ptr);

         p->thread.io_bitmap_max = 0;

     }

     return err;

}

在這裡把ret_from_fork的位址指派給p->thread.eip,p->thread.eip表示當程序下一次排程時的指令開始位址,

是以當線程建立後被排程時,是從ret_from_fork位址處開始的.

到這裡說明,新的線程已産生了.

ENTRY(ret_from_fork)

    pushl %eax

    call schedule_tail

    GET_THREAD_INFO(%ebp)

    popl %eax

    jmp syscall_exit

syscall_exit:

...

work_resched:

    call schedule

...

當他從ret_from_fork退出時,會從堆棧中彈出原來儲存的eip,而ip指向kernel_thread_helper,

至此kernel_thread_helper被調用,他就能夠運作我們的指定的函數了do_exit().

從核心空間傳回到使用者空間。

繼續閱讀