天天看点

转载 linux 2.6线程创建源码分析

linux 2.6线程创建源码分析

 上章讲到线程,现在对线程创建的代码流程分析下。来一步一步揭开她神秘的面纱

 linux内核创建线程函数 kernel_thread(),最终会调用do_fork().

 前面谈到线程也是用task_struct结构表示它拥有的信息,只是是共享进程的资源。

 根据clone_flags标志,来调用clone()创建"线程",表示共享内存、共享文件系统访问计数、共享文件描述符表,以及共享信号处理方式。

 kernel_thread定义在/arch/kernel/process.c

int kernel_thread(int (*fn)(void *), void * arg, unsigned long flags)

{

       struct pt_regs regs;   

       memset(&regs, 0, sizeof(regs));         //把该结构的变量全部清0

       regs.ebx = (unsigned long) fn;         

       regs.edx = (unsigned long) arg;        

       regs.xds = __USER_DS;

       regs.xes = __USER_DS;

       regs.xfs = __KERNEL_PERCPU;

       regs.orig_eax = -1;

       regs.eip = (unsigned long) kernel_thread_helper;      

       regs.xcs = __KERNEL_CS | get_kernel_rpl();

       regs.eflags = X86_EFLAGS_IF | X86_EFLAGS_SF | X86_EFLAGS_PF | 0x2;

       return do_fork(flags | CLONE_VM | CLONE_UNTRACED, 0, &regs, 0, NULL, NULL);

}

其中__USER_DS,__KERNEL_PERCPU,__KERNEL_CS都是一些宏定义。在/linux/include/asm-i386/segment.h

extern void kernel_thread_helper(void); 

__asm__(".section .text/n"

    ".align 4/n"

    "kernel_thread_helper:/n/t"

    "movl %edx,%eax/n/t"

    "pushl %edx/n/t"  

    "call *%ebx/n/t"  

    "pushl %eax/n/t"

    "call do_exit/n"  

    ".previous");

在kernel_thread中调用了do_fork,让我们揭开do_fork()的面纱.

long do_fork(unsigned long clone_flags,

          unsigned long stack_start,

          struct pt_regs *regs,

          unsigned long stack_size,

          int __user *parent_tidptr,

          int __user *child_tidptr)

{

    ...

    ...

    p = copy_process(clone_flags, stack_start, regs, stack_size, parent_tidptr, child_tidptr, pid);

    ...

    ...

}

接着分析do_fork(),copy_proces()是它的核心函数。重点分析一下:

static struct task_struct *copy_process(unsigned long clone_flags,

                       unsigned long stack_start,

                       struct pt_regs *regs,

                       unsigned long stack_size,

                       int __user *parent_tidptr,

                       int __user *child_tidptr,

                       struct pid *pid)

{

     int retval;

     struct task_struct *p = NULL;

     //clone_flags参数的有效性判断

     //不能同时定义CLONE_NEWNS,CLONE_FS

     if ((clone_flags & (CLONE_NEWNS|CLONE_FS)) == (CLONE_NEWNS|CLONE_FS))

         return ERR_PTR(-EINVAL);

 //如果定义CLONE_THREAD,则必须要定义CLONE_SIGHAND

     if ((clone_flags & CLONE_THREAD) && !(clone_flags & CLONE_SIGHAND))

         return ERR_PTR(-EINVAL);

 //如果定义CLONE_SIGHAND,则必须要定义CLONE_VM

     if ((clone_flags & CLONE_SIGHAND) && !(clone_flags & CLONE_VM))

         return ERR_PTR(-EINVAL);

     retval = security_task_create(clone_flags);

     if (retval)

         goto fork_out;

     retval = -ENOMEM;

     //从父进程中复制出一个task

     p = dup_task_struct(current);

     if (!p)

         goto fork_out;

     rt_mutex_init_task(p);

#ifdef CONFIG_TRACE_IRQFLAGS

     DEBUG_LOCKS_WARN_ON(!p->hardirqs_enabled);

     DEBUG_LOCKS_WARN_ON(!p->softirqs_enabled);

#endif

     retval = -EAGAIN;

     //如果用户的进程总数超过了限制

     if (atomic_read(&p->user->processes) >=

              p->signal->rlim[RLIMIT_NPROC].rlim_cur) {

         if (!capable(CAP_SYS_ADMIN) && !capable(CAP_SYS_RESOURCE) &&

             p->user != current->nsproxy->user_ns->root_user)

              goto bad_fork_free;

     }

     //更新进程用户的相关计数

     atomic_inc(&p->user->__count);

     atomic_inc(&p->user->processes);

     get_group_info(p->group_info);

      //当前进程数是否大于系统规定的最大进程数

     if (nr_threads >= max_threads)

         goto bad_fork_cleanup_count;

     //加载进程的相关执行模块

     if (!try_module_get(task_thread_info(p)->exec_domain->module))

         goto bad_fork_cleanup_count;

     if (p->binfmt && !try_module_get(p->binfmt->module))

         goto bad_fork_cleanup_put_domain;

     //子进程还在进行初始化,没有execve

     p->did_exec = 0;

     delayacct_tsk_init(p);

     //copy父进程的所有标志,除了PF_SUPERPRIV(超级权限)

     //置子进程的PF_FORKNOEXEC标志,表示正在被FORK

     copy_flags(clone_flags, p);

     //赋值子进程的pid

     p->pid = pid_nr(pid);

     retval = -EFAULT;

     if (clone_flags & CLONE_PARENT_SETTID)

         if (put_user(p->pid, parent_tidptr))

              goto bad_fork_cleanup_delays_binfmt;

     //初始化子进程的几个链表

     INIT_LIST_HEAD(&p->children);

     INIT_LIST_HEAD(&p->sibling);

     p->vfork_done = NULL;

     spin_lock_init(&p->alloc_lock);

     //父进程的TIF_SIGPENDING被复制进了子进程,这个标志表示有末处理的信号

     //这个标志子进程是不需要的

     clear_tsk_thread_flag(p, TIF_SIGPENDING);

     init_sigpending(&p->pending);

     //初始化子进程的time

     p->utime = cputime_zero;

     p->stime = cputime_zero;

     p->prev_utime = cputime_zero;

……

……

//tgid = pid

     p->tgid = p->pid;

     if (clone_flags & CLONE_THREAD)

         p->tgid = current->tgid;

     //copy父进程的其它资源.比例打开的文件,信号,VM等等

     if ((retval = security_task_alloc(p)))

          goto bad_fork_cleanup_policy;

     if ((retval = audit_alloc(p)))

         goto bad_fork_cleanup_security;

     if ((retval = copy_semundo(clone_flags, p)))

         goto bad_fork_cleanup_audit;

     if ((retval = copy_files(clone_flags, p)))

         goto bad_fork_cleanup_semundo;

     if ((retval = copy_fs(clone_flags, p)))

         goto bad_fork_cleanup_files;

     if ((retval = copy_sighand(clone_flags, p)))

         goto bad_fork_cleanup_fs;

     if ((retval = copy_signal(clone_flags, p)))

         goto bad_fork_cleanup_sighand;

     if ((retval = copy_mm(clone_flags, p)))

         goto bad_fork_cleanup_signal;

     if ((retval = copy_keys(clone_flags, p)))

         goto bad_fork_cleanup_mm;

     if ((retval = copy_namespaces(clone_flags, p)))

         goto bad_fork_cleanup_keys;

     retval = copy_thread(0, clone_flags, stack_start, stack_size, p, regs);

     if (retval)

         goto bad_fork_cleanup_namespaces;

     p->set_child_tid = (clone_flags & CLONE_CHILD_SETTID) ? child_tidptr : NULL;

     p->clear_child_tid = (clone_flags & CLONE_CHILD_CLEARTID) ? child_tidptr: NULL;

     p->robust_list = NULL;

#ifdef CONFIG_COMPAT

     p->compat_robust_list = NULL;

#endif

     INIT_LIST_HEAD(&p->pi_state_list);

     p->pi_state_cache = NULL;

     if ((clone_flags & (CLONE_VM|CLONE_VFORK)) == CLONE_VM)

         p->sas_ss_sp = p->sas_ss_size = 0;

     clear_tsk_thread_flag(p, TIF_SYSCALL_TRACE);

#ifdef TIF_SYSCALL_EMU

     clear_tsk_thread_flag(p, TIF_SYSCALL_EMU);

#endif

     p->parent_exec_id = p->self_exec_id;

     //exit_signal: 子进程退出时给父进程发送的信号

     p->exit_signal = (clone_flags & CLONE_THREAD) ? -1 : (clone_flags & CSIGNAL);

     //pdeath_signal:进程退出时.给其下的子进程发送的信号

     p->pdeath_signal = 0;

     p->exit_state = 0;

     ……

     ……

     if (likely(p->pid)) {

         add_parent(p);

         if (unlikely(p->ptrace & PT_PTRACED))

              __ptrace_link(p, current->parent);

         if (thread_group_leader(p)) {

              p->signal->tty = current->signal->tty;

              p->signal->pgrp = process_group(current);

              set_signal_session(p->signal, process_session(current));

              attach_pid(p, PIDTYPE_PGID, task_pgrp(current));

              attach_pid(p, PIDTYPE_SID, task_session(current));

              list_add_tail_rcu(&p->tasks, &init_task.tasks);

              __get_cpu_var(process_counts)++;

         }

         attach_pid(p, PIDTYPE_PID, pid);

         //当前进程数递增

         nr_threads++;

     }

     //被fork的进程数计数递增

     total_forks++;

     spin_unlock(&current->sighand->siglock);

     write_unlock_irq(&tasklist_lock);

     proc_fork_connector(p);

     return p;

……

……

}

参考:深入理解linux内核

到这里为止,进程的运行内间已经设置好了。但子进程的怎么返回到用户空间呢?这是在copy_process()—> copy_thread()中完成的。

int copy_thread(int nr, unsigned long clone_flags, unsigned long esp,

     unsigned long unused,

     struct task_struct * p, struct pt_regs * regs)

{

     struct pt_regs * childregs;

     struct task_struct *tsk;

     int err;

     //子进程的内核堆栈起点

     childregs = task_pt_regs(p);

     //将父进程的regs参数赋值到子进程的内核堆栈

     //regs参数:里面存放的是父进程陷入内核后的各寄存器的值

     *childregs = *regs;

     //eax:返回值. 将其设为0,子进程返回到用户空间后,它的返回值是0

     childregs->eax = 0;

     //esp:子进程的用户堆栈指针位置

     childregs->esp = esp;

     //子进程内核堆栈位置

     p->thread.esp = (unsigned long) childregs;

     //子进程内核堆栈指针位置

     p->thread.esp0 = (unsigned long) (childregs+1);

     //子进程要执行的下一条指令.对应子进程从系统空间返回用户空间

     p->thread.eip = (unsigned long) ret_from_fork;

     savesegment(gs,p->thread.gs);

     tsk = current;

     if (unlikely(test_tsk_thread_flag(tsk, TIF_IO_BITMAP))) {

         p->thread.io_bitmap_ptr = kmemdup(tsk->thread.io_bitmap_ptr,

                            IO_BITMAP_BYTES, GFP_KERNEL);

         if (!p->thread.io_bitmap_ptr) {

              p->thread.io_bitmap_max = 0;

              return -ENOMEM;

         }

         set_tsk_thread_flag(p, TIF_IO_BITMAP);

     }

     if (clone_flags & CLONE_SETTLS) {

         struct desc_struct *desc;

         struct user_desc info;

         int idx;

         err = -EFAULT;

         if (copy_from_user(&info, (void __user *)childregs->esi, sizeof(info)))

              goto out;

         err = -EINVAL;

         if (LDT_empty(&info))

              goto out;

         idx = info.entry_number;

         if (idx < GDT_ENTRY_TLS_MIN || idx > GDT_ENTRY_TLS_MAX)

              goto out;

         desc = p->thread.tls_array + idx - GDT_ENTRY_TLS_MIN;

         desc->a = LDT_entry_a(&info);

         desc->b = LDT_entry_b(&info);

     }

     err = 0;

 out:

     if (err && p->thread.io_bitmap_ptr) {

         kfree(p->thread.io_bitmap_ptr);

         p->thread.io_bitmap_max = 0;

     }

     return err;

}

在这里把ret_from_fork的地址赋值给p->thread.eip,p->thread.eip表示当进程下一次调度时的指令开始地址,

所以当线程创建后被调度时,是从ret_from_fork地址处开始的.

到这里说明,新的线程已产生了.

ENTRY(ret_from_fork)

    pushl %eax

    call schedule_tail

    GET_THREAD_INFO(%ebp)

    popl %eax

    jmp syscall_exit

syscall_exit:

...

work_resched:

    call schedule

...

当他从ret_from_fork退出时,会从堆栈中弹出原来保存的eip,而ip指向kernel_thread_helper,

至此kernel_thread_helper被调用,他就能够运行我们的指定的函数了do_exit().

从内核空间返回到用户空间。

继续阅读