执行完/bin/echo之后,会调动do_exit,销毁子进程:
我们还是先从系统调用exit()说起,先来看exit()的实现,进入到内核态执行sys_exit。
asmlinkage long sys_exit(int error_code)
{
do_exit((error_code&0xff)<< 8);
}NORET_TYPE void do_exit(long code)
{
struct task_struct *tsk = current;
if (in_interrupt())//中断服务不能中断
panic("Aiee, killing interrupt handler!");
if (!tsk->pid)//空转idle进程是不允许退出的
panic("Attempted to kill the idle task!");
if (tsk->pid == 1)//init进程是不允许退出的
panic("Attempted to kill init!");
tsk->flags |= PF_EXITING;
del_timer_sync(&tsk->real_timer);
fake_volatile:
#ifdef CONFIG_BSD_PROCESS_ACCT
acct_process(code);
#endif
__exit_mm(tsk);//如果通过指针共享,只是减少共享计数mm->mm_users。如果自立门户,则释放mm_struct,vm_struct;释放页目录表,页表
lock_kernel();
sem_exit();//信号相关,看完进程间通信再说
__exit_files(tsk);//如果通过指针共享,只是减少共享计数files->count。如果自立门户,那就要释放files_struct数据结构
__exit_fs(tsk);//如果通过指针共享,只是减少共享计数fs->count。如果自立门户,那就要释放fs_struct数据结构
exit_sighand(tsk);//如果通过指针共享,只是减少共享计数sig->count。如果自立门户,那就要释放signal_struct数据结构
exit_thread();//空函数
if (current->leader)
disassociate_ctty(1);
put_exec_domain(tsk->exec_domain);
if (tsk->binfmt && tsk->binfmt->module)
__MOD_DEC_USE_COUNT(tsk->binfmt->module);
tsk->exit_code = code;
exit_notify();//将当前进程设置为僵死状态;并给父进程发信号;其当前进程的子进程的父进程设置为init进程
schedule();
BUG();
/*
* In order to get rid of the "volatile function does return" message
* I did this little loop that confuses gcc to think do_exit really
* is volatile. In fact it‘s schedule() that is volatile in some
* circumstances: when current->state = ZOMBIE, schedule() never
* returns.
*
* In fact the natural way to do all this is to have the label and the
* goto right after each other, but I put the fake_volatile label at
* the start of the function just in case something /really/ bad
* happens, and the schedule returns. This way we can try again. I‘m
* not paranoid: it‘s just that everybody is out to get me.
*/
goto fake_volatile;
}static inline void __exit_mm(struct task_struct * tsk)
{
struct mm_struct * mm = tsk->mm;
mm_release();
if (mm) {
atomic_inc(&mm->mm_count);
if (mm != tsk->active_mm) BUG();
/* more a memory barrier than a real lock */
task_lock(tsk);
tsk->mm = NULL;
task_unlock(tsk);
enter_lazy_tlb(mm, current, smp_processor_id());
mmput(mm);//主要是这句
}
}void mmput(struct mm_struct *mm)
{
if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {//mm->mm_users为1
list_del(&mm->mmlist);
spin_unlock(&mmlist_lock);
exit_mmap(mm);//释放vm_struct,并把页目录表项和页表项都清0
mmdrop(mm);//释放mm_struct和页目录表,页表
}
}void exit_mmap(struct mm_struct * mm)
{
struct vm_area_struct * mpnt;
release_segments(mm);
spin_lock(&mm->page_table_lock);
mpnt = mm->mmap;
mm->mmap = mm->mmap_avl = mm->mmap_cache = NULL;
spin_unlock(&mm->page_table_lock);
mm->rss = 0;
mm->total_vm = 0;
mm->locked_vm = 0;
while (mpnt) {
struct vm_area_struct * next = mpnt->vm_next;
unsigned long start = mpnt->vm_start;
unsigned long end = mpnt->vm_end;
unsigned long size = end - start;
if (mpnt->vm_ops) {
if (mpnt->vm_ops->close)
mpnt->vm_ops->close(mpnt);
}
mm->map_count--;
remove_shared_vm_struct(mpnt);
flush_cache_range(mm, start, end);
zap_page_range(mm, start, size);
if (mpnt->vm_file)
fput(mpnt->vm_file);
kmem_cache_free(vm_area_cachep, mpnt);
mpnt = next;
}
/* This is just debugging */
if (mm->map_count)
printk("exit_mmap: map count is %d\n", mm->map_count);
clear_page_tables(mm, FIRST_USER_PGD_NR, USER_PTRS_PER_PGD);
}static inline void mmdrop(struct mm_struct * mm)
{
if (atomic_dec_and_test(&mm->mm_count))
__mmdrop(mm);
}inline void __mmdrop(struct mm_struct *mm)
{
if (mm == &init_mm) BUG();
pgd_free(mm->pgd);
destroy_context(mm);
free_mm(mm);
}static inline void __exit_files(struct task_struct *tsk)
{
struct files_struct * files = tsk->files;
if (files) {
task_lock(tsk);
tsk->files = NULL;
task_unlock(tsk);
put_files_struct(files);
}
}void put_files_struct(struct files_struct *files)
{
if (atomic_dec_and_test(&files->count)) {//files->count为1
close_files(files);
/*
* Free the fd and fdset arrays if we expanded them.
*/
if (files->fd != &files->fd_array[0])
free_fd_array(files->fd, files->max_fds);
if (files->max_fdset > __FD_SETSIZE) {
free_fdset(files->open_fds, files->max_fdset);
free_fdset(files->close_on_exec, files->max_fdset);
}
kmem_cache_free(files_cachep, files);
}
}static inline void __exit_fs(struct task_struct *tsk)
{
struct fs_struct * fs = tsk->fs;
if (fs) {
task_lock(tsk);
tsk->fs = NULL;
task_unlock(tsk);
__put_fs_struct(fs);
}
}static inline void __put_fs_struct(struct fs_struct *fs)
{
/* No need to hold fs->lock if we are killing it */
if (atomic_dec_and_test(&fs->count)) {//fs->count为1
dput(fs->root);
mntput(fs->rootmnt);
dput(fs->pwd);
mntput(fs->pwdmnt);
if (fs->altroot) {
dput(fs->altroot);
mntput(fs->altrootmnt);
}
kmem_cache_free(fs_cachep, fs);
}
}void exit_sighand(struct task_struct *tsk)
{
struct signal_struct * sig = tsk->sig;
spin_lock_irq(&tsk->sigmask_lock);
if (sig) {
tsk->sig = NULL;
if (atomic_dec_and_test(&sig->count))//sig->count为1
kmem_cache_free(sigact_cachep, sig);
}
tsk->sigpending = 0;
flush_sigqueue(&tsk->pending);
spin_unlock_irq(&tsk->sigmask_lock);
}static void exit_notify(void)
{
struct task_struct * p, *t;
forget_original_parent(current);//其当前进程的子进程的父进程设置为init进程
/*
* Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*
* Case i: Our father is in a different pgrp than we are
* and we were the only connection outside, so our pgrp
* is about to become orphaned.
*/
t = current->p_pptr;//通知的是p_pptr,在forget_original_parent设置的是p->p_opptr = reaper
if ((t->pgrp != current->pgrp) &&
(t->session == current->session) &&
will_become_orphaned_pgrp(current->pgrp, current) &&
has_stopped_jobs(current->pgrp)) {
kill_pg(current->pgrp,SIGHUP,1);
kill_pg(current->pgrp,SIGCONT,1);
}
/* Let father know we died
*
* Thread signals are configurable, but you aren‘t going to use
* that to send signals to arbitary processes.
* That stops right now.
*
* If the parent exec id doesn‘t match the exec id we saved
* when we started then we know the parent has changed security
* domain.
*
* If our self_exec id doesn‘t match our parent_exec_id then
* we have changed execution domain as these two values started
* the same after a fork.
*
*/
if(current->exit_signal != SIGCHLD &&
( current->parent_exec_id != t->self_exec_id ||
current->self_exec_id != current->parent_exec_id)
&& !capable(CAP_KILL))
current->exit_signal = SIGCHLD;//给父进程发的信号是SIGCHLD
/*
* This loop does two things:
*
* A. Make init inherit all the child processes
* B. Check to see if any process groups have become orphaned
* as a result of our exiting, and if they have any stopped
* jobs, send them a SIGHUP and then a SIGCONT. (POSIX 3.2.2.2)
*/
write_lock_irq(&tasklist_lock);
current->state = TASK_ZOMBIE;//当前进程设置为僵死状态
do_notify_parent(current, current->exit_signal);//给父进程发信号
while (current->p_cptr != NULL) {
p = current->p_cptr;
current->p_cptr = p->p_osptr;
p->p_ysptr = NULL;
p->ptrace = 0;
p->p_pptr = p->p_opptr;//这里,把p_pptr和p_opptr统一了,都是reaper
p->p_osptr = p->p_pptr->p_cptr;
if (p->p_osptr)
p->p_osptr->p_ysptr = p;
p->p_pptr->p_cptr = p;
if (p->state == TASK_ZOMBIE)
do_notify_parent(p, p->exit_signal);
/*
* process group orphan check
* Case ii: Our child is in a different pgrp
* than we are, and it was the only connection
* outside, so the child pgrp is now orphaned.
*/
if ((p->pgrp != current->pgrp) &&
(p->session == current->session)) {
int pgrp = p->pgrp;
write_unlock_irq(&tasklist_lock);
if (is_orphaned_pgrp(pgrp) && has_stopped_jobs(pgrp)) {
kill_pg(pgrp,SIGHUP,1);
kill_pg(pgrp,SIGCONT,1);
}
write_lock_irq(&tasklist_lock);
}
}
write_unlock_irq(&tasklist_lock);
}static inline void forget_original_parent(struct task_struct * father)
{
struct task_struct * p, *reaper;
read_lock(&tasklist_lock);
/* Next in our thread group */
reaper = next_thread(father);
if (reaper == father)
reaper = child_reaper;//init进程
for_each_task(p) {
if (p->p_opptr == father) {
/* We dont want people slaying init */
p->exit_signal = SIGCHLD;
p->self_exec_id++;
p->p_opptr = reaper;//其当前进程的子进程的父进程设置为init进程,这里设置的p_opptr
if (p->pdeath_signal) send_sig(p->pdeath_signal, p, 0);
}
}
read_unlock(&tasklist_lock);
}void do_notify_parent(struct task_struct *tsk, int sig)//sig为SIGCHLD
{
struct siginfo info;
int why, status;
info.si_signo = sig;
info.si_errno = 0;
info.si_pid = tsk->pid;
info.si_uid = tsk->uid;
/* FIXME: find out whether or not this is supposed to be c*time. */
info.si_utime = tsk->times.tms_utime;
info.si_stime = tsk->times.tms_stime;
status = tsk->exit_code & 0x7f;
why = SI_KERNEL; /* shouldn‘t happen */
switch (tsk->state) {
case TASK_STOPPED:
/* FIXME -- can we deduce CLD_TRAPPED or CLD_CONTINUED? */
if (tsk->ptrace & PT_PTRACED)
why = CLD_TRAPPED;
else
why = CLD_STOPPED;
break;
default:
if (tsk->exit_code & 0x80)
why = CLD_DUMPED;
else if (tsk->exit_code & 0x7f)
why = CLD_KILLED;
else {
why = CLD_EXITED;
status = tsk->exit_code >> 8;
}
break;
}
info.si_code = why;
info.si_status = status;
send_sig_info(sig, &info, tsk->p_pptr);//给父进程发送SIGCHLD信号
wake_up_parent(tsk->p_pptr);//唤醒父进程,父进程在wait时,将状态设置为TASK_INTERRUPTIBLE,现在设置为TASK_RUNNING
} 至此,进程的基本资源都已经释放了,但是当前进程的残骸仍旧占用着最低限度的资源,包括其task_struct数据结构和系统空间堆栈所在的两个页面。当前进程自己不释放这两个页面,就像人们自己并不在临终注销自己的户口一样,而是通知其父进程,让父进程料理后事。当前进程状态为 TASK_ZOMBIE,schedule时,无限延迟调度该进程。下面,最后执行schedule,假设只有父进程和子进程,父进程的状态已经是TASK_RUNNING,切换到父进程继续执行。
#define switch_to(prev,next,last) do { asm volatile("pushl %%esi\n\t" \ //把esi存入现在进程prev的堆栈
"pushl %%edi\n\t" \ //把edi存入现在进程prev的堆栈
"pushl %%ebp\n\t" \ //把ebp存入现在进程prev的堆栈
"movl %%esp,%0\n\t" /* save ESP */ \ //现在进程prev的esp保存在prev->thread.esp
"movl %3,%%esp\n\t" /* restore ESP */ \ //将要切换的进程next->thread.esp保存在esp中,堆栈已经切换了
"movl $1f,%1\n\t" /* save EIP */ \ //现在进程prev的eip(也就是"1:\t"地址)保存在prev->thread.eip
"pushl %4\n\t" /* restore EIP */ \ //将要切换的进程next->thread.eip保存在eip中
"jmp __switch_to\n" \ //且不说__switch_to中干了些什么,当CPU执行到那里的ret指令时,由于是通过jmp指令转过去的,最后进入堆栈的next->thread.eip就变成了返回地址
"1:\t" \ //如果切换的不是子进程,next->thread.eip实际上就是上一次保存在prev->thread.eip,也就是这一行语句
"popl %%ebp\n\t" \ //由于堆栈已经切换过来,pop出的都是上面存入进程prev堆栈的内容
"popl %%edi\n\t" "popl %%esi\n\t" :"=m" (prev->thread.esp),"=m" (prev->thread.eip), "=b" (last) :"m" (next->thread.esp),"m" (next->thread.eip), "a" (prev), "d" (next), "b" (prev)); } while (0)
父进程在sys_wait4等待,父进程从"1:\t"继续执行,继续执行sys_wait4函数。原文:http://blog.csdn.net/jltxgcy/article/details/44513299