a08aa2c6477ec12dfb189595fc5e735619e34568
4 * Copyright (C) 1991, 1992 Linus Torvalds 8 * 'fork.c' contains the help-routines for the 'fork' system call 9 * (see also system_call.s). 10 * Fork is rather simple, once you get the hang of it, but the memory 11 * management can be a bitch. See 'mm/mm.c': 'copy_page_tables()' 14 #include <linux/init.h> 15 #include <linux/errno.h> 16 #include <linux/sched.h> 17 #include <linux/kernel.h> 19 #include <linux/slab.h> 20 #include <linux/unistd.h> 21 #include <linux/ptrace.h> 22 #include <linux/malloc.h> 23 #include <linux/smp.h> 24 #include <linux/smp_lock.h> 25 #include <linux/module.h> 27 #include <asm/system.h> 28 #include <asm/pgtable.h> 29 #include <asm/mmu_context.h> 30 #include <asm/uaccess.h> 34 unsigned long int total_forks
=0;/* Handle normal Linux uptimes. */ 37 /* SLAB cache for mm_struct's. */ 38 kmem_cache_t
*mm_cachep
; 40 /* SLAB cache for files structs */ 41 kmem_cache_t
*files_cachep
; 43 struct task_struct
*pidhash
[PIDHASH_SZ
]; 45 struct task_struct
**tarray_freelist
= NULL
; 46 spinlock_t taskslot_lock
= SPIN_LOCK_UNLOCKED
; 48 /* UID task count cache, to prevent walking entire process list every 49 * single fork() operation. 51 #define UIDHASH_SZ (PIDHASH_SZ >> 2) 53 static struct uid_taskcount
{ 54 struct uid_taskcount
*next
, **pprev
; 57 } *uidhash
[UIDHASH_SZ
]; 60 static spinlock_t uidhash_lock
= SPIN_LOCK_UNLOCKED
; 63 kmem_cache_t
*uid_cachep
; 65 #define uidhashfn(uid) (((uid >> 8) ^ uid) & (UIDHASH_SZ - 1)) 67 staticinlinevoiduid_hash_insert(struct uid_taskcount
*up
,unsigned int hashent
) 69 spin_lock(&uidhash_lock
); 70 if((up
->next
= uidhash
[hashent
]) != NULL
) 71 uidhash
[hashent
]->pprev
= &up
->next
; 72 up
->pprev
= &uidhash
[hashent
]; 73 uidhash
[hashent
] = up
; 74 spin_unlock(&uidhash_lock
); 77 staticinlinevoiduid_hash_remove(struct uid_taskcount
*up
) 79 spin_lock(&uidhash_lock
); 81 up
->next
->pprev
= up
->pprev
; 82 *up
->pprev
= up
->next
; 83 spin_unlock(&uidhash_lock
); 86 staticinlinestruct uid_taskcount
*uid_find(unsigned short uid
,unsigned int hashent
) 88 struct uid_taskcount
*up
; 90 spin_lock(&uidhash_lock
); 91 for(up
= uidhash
[hashent
]; (up
&& up
->uid
!= uid
); up
= up
->next
) 93 spin_unlock(&uidhash_lock
); 97 intcharge_uid(struct task_struct
*p
,int count
) 99 unsigned int hashent
=uidhashfn(p
->uid
); 100 struct uid_taskcount
*up
=uid_find(p
->uid
, hashent
); 103 int limit
= p
->rlim
[RLIMIT_NPROC
].rlim_cur
; 104 int newcnt
= up
->task_count
+ count
; 108 else if(newcnt
==0) { 110 kmem_cache_free(uid_cachep
, up
); 114 up
=kmem_cache_alloc(uid_cachep
, SLAB_KERNEL
); 119 uid_hash_insert(up
, hashent
); 121 up
->task_count
+= count
; 125 __initfunc(voiduidcache_init(void)) 129 uid_cachep
=kmem_cache_create("uid_cache",sizeof(struct uid_taskcount
), 131 SLAB_HWCACHE_ALIGN
, NULL
, NULL
); 133 panic("Cannot create uid taskcount SLAB cache\n"); 135 for(i
=0; i
< UIDHASH_SZ
; i
++) 139 staticinlineintfind_empty_process(void) 141 struct task_struct
**tslot
; 146 if(nr_tasks
>= NR_TASKS
- MIN_TASKS_LEFT_FOR_ROOT
) 148 if((error
=charge_uid(current
,1)) <0) 151 tslot
=get_free_taskslot(); 153 return tslot
- &task
[0]; 158 /* Protects next_safe and last_pid. */ 159 static spinlock_t lastpid_lock
= SPIN_LOCK_UNLOCKED
; 162 static intget_pid(unsigned long flags
) 164 static int next_safe
= PID_MAX
; 165 struct task_struct
*p
; 167 if(flags
& CLONE_PID
) 170 spin_lock(&lastpid_lock
); 171 if((++last_pid
) &0xffff8000) { 172 last_pid
=300;/* Skip daemons etc. */ 175 if(last_pid
>= next_safe
) { 178 read_lock(&tasklist_lock
); 181 if(p
->pid
== last_pid
|| 182 p
->pgrp
== last_pid
|| 183 p
->session
== last_pid
) { 184 if(++last_pid
>= next_safe
) { 185 if(last_pid
&0xffff8000) 191 if(p
->pid
> last_pid
&& next_safe
> p
->pid
) 193 if(p
->pgrp
> last_pid
&& next_safe
> p
->pgrp
) 195 if(p
->session
> last_pid
&& next_safe
> p
->session
) 196 next_safe
= p
->session
; 198 read_unlock(&tasklist_lock
); 200 spin_unlock(&lastpid_lock
); 205 staticinlineintdup_mmap(struct mm_struct
* mm
) 207 struct vm_area_struct
* mpnt
, *tmp
, **pprev
; 210 flush_cache_mm(current
->mm
); 212 for(mpnt
= current
->mm
->mmap
; mpnt
; mpnt
= mpnt
->vm_next
) { 216 tmp
=kmem_cache_alloc(vm_area_cachep
, SLAB_KERNEL
); 220 tmp
->vm_flags
&= ~VM_LOCKED
; 227 if(tmp
->vm_flags
& VM_DENYWRITE
) 228 file
->f_dentry
->d_inode
->i_writecount
--; 230 /* insert tmp into the share list, just after mpnt */ 231 if((tmp
->vm_next_share
= mpnt
->vm_next_share
) != NULL
) 232 mpnt
->vm_next_share
->vm_pprev_share
= 234 mpnt
->vm_next_share
= tmp
; 235 tmp
->vm_pprev_share
= &mpnt
->vm_next_share
; 238 /* Copy the pages, but defer checking for errors */ 239 retval
=copy_page_range(mm
, current
->mm
, tmp
); 240 if(!retval
&& tmp
->vm_ops
&& tmp
->vm_ops
->open
) 241 tmp
->vm_ops
->open(tmp
); 244 * Link in the new vma even if an error occurred, 245 * so that exit_mmap() can clean up the mess. 247 if((tmp
->vm_next
= *pprev
) != NULL
) 248 (*pprev
)->vm_pprev
= &tmp
->vm_next
; 250 tmp
->vm_pprev
= pprev
; 252 pprev
= &tmp
->vm_next
; 259 flush_tlb_mm(current
->mm
); 264 * Allocate and initialize an mm_struct. 266 * NOTE! The mm mutex will be locked until the 267 * caller decides that all systems are go.. 269 struct mm_struct
*mm_alloc(void) 271 struct mm_struct
* mm
; 273 mm
=kmem_cache_alloc(mm_cachep
, SLAB_KERNEL
); 276 init_new_context(mm
); 280 mm
->mmap_sem
= MUTEX_LOCKED
; 282 * Leave mm->pgd set to the parent's pgd 283 * so that pgd_offset() is always valid. 285 mm
->mmap
= mm
->mmap_cache
= NULL
; 287 /* It has not run yet, so cannot be present in anyone's 296 * Decrement the use count and release all resources for an mm. 298 voidmmput(struct mm_struct
*mm
) 302 free_page_tables(mm
); 303 kmem_cache_free(mm_cachep
, mm
); 307 staticinlineintcopy_mm(unsigned long clone_flags
,struct task_struct
* tsk
) 309 struct mm_struct
* mm
; 312 if(clone_flags
& CLONE_VM
) { 314 SET_PAGE_DIR(tsk
, current
->mm
->pgd
); 324 tsk
->min_flt
= tsk
->maj_flt
=0; 325 tsk
->cmin_flt
= tsk
->cmaj_flt
=0; 326 tsk
->nswap
= tsk
->cnswap
=0; 327 retval
=new_page_tables(tsk
); 330 retval
=dup_mmap(mm
); 345 staticinlineintcopy_fs(unsigned long clone_flags
,struct task_struct
* tsk
) 347 if(clone_flags
& CLONE_FS
) { 348 current
->fs
->count
++; 351 tsk
->fs
=kmalloc(sizeof(*tsk
->fs
), GFP_KERNEL
); 355 tsk
->fs
->umask
= current
->fs
->umask
; 356 tsk
->fs
->root
=dget(current
->fs
->root
); 357 tsk
->fs
->pwd
=dget(current
->fs
->pwd
); 361 /* return value is only accurate by +-sizeof(long)*8 fds */ 362 /* XXX make this architecture specific */ 363 staticinlineint__copy_fdset(unsigned long*d
,unsigned long*src
) 366 unsigned long*p
= src
; 367 unsigned long*max
= src
; 369 for(i
= __FDSET_LONGS
; i
; --i
) { 370 if((*d
++ = *p
++) !=0) 373 return(max
- src
)*sizeof(long)*8; 376 staticinlineintcopy_fdset(fd_set
*dst
, fd_set
*src
) 378 return__copy_fdset(dst
->fds_bits
, src
->fds_bits
); 381 static intcopy_files(unsigned long clone_flags
,struct task_struct
* tsk
) 383 struct files_struct
*oldf
, *newf
; 384 struct file
**old_fds
, **new_fds
; 385 int size
, i
, error
=0; 388 * A background process may not have any files ... 390 oldf
= current
->files
; 394 if(clone_flags
& CLONE_FILES
) { 401 newf
=kmem_cache_alloc(files_cachep
, SLAB_KERNEL
); 406 * Allocate the fd array, using get_free_page() if possible. 407 * Eventually we want to make the array size variable ... 409 size
= NR_OPEN
*sizeof(struct file
*); 410 if(size
== PAGE_SIZE
) 411 new_fds
= (struct file
**)__get_free_page(GFP_KERNEL
); 413 new_fds
= (struct file
**)kmalloc(size
, GFP_KERNEL
); 416 memset((void*) new_fds
,0, size
); 419 newf
->max_fds
= NR_OPEN
; 421 newf
->close_on_exec
= oldf
->close_on_exec
; 422 i
=copy_fdset(&newf
->open_fds
, &oldf
->open_fds
); 426 struct file
* f
= *old_fds
; 439 kmem_cache_free(files_cachep
, newf
); 443 staticinlineintcopy_sighand(unsigned long clone_flags
,struct task_struct
* tsk
) 445 if(clone_flags
& CLONE_SIGHAND
) { 446 atomic_inc(¤t
->sig
->count
); 449 tsk
->sig
=kmalloc(sizeof(*tsk
->sig
), GFP_KERNEL
); 452 spin_lock_init(&tsk
->sig
->siglock
); 453 atomic_set(&tsk
->sig
->count
,1); 454 memcpy(tsk
->sig
->action
, current
->sig
->action
,sizeof(tsk
->sig
->action
)); 459 * Ok, this is the main fork-routine. It copies the system process 460 * information (task[nr]) and sets up the necessary registers. It 461 * also copies the data segment in its entirety. 463 intdo_fork(unsigned long clone_flags
,unsigned long usp
,struct pt_regs
*regs
) 467 struct task_struct
*p
; 470 p
=alloc_task_struct(); 475 nr
=find_empty_process(); 481 if(p
->exec_domain
&& p
->exec_domain
->module
) 482 __MOD_INC_USE_COUNT(p
->exec_domain
->module
); 483 if(p
->binfmt
&& p
->binfmt
->module
) 484 __MOD_INC_USE_COUNT(p
->binfmt
->module
); 488 p
->state
= TASK_UNINTERRUPTIBLE
; 489 p
->flags
&= ~(PF_PTRACED
|PF_TRACESYS
|PF_SUPERPRIV
); 491 p
->flags
|= PF_FORKNOEXEC
; 492 p
->pid
=get_pid(clone_flags
); 495 p
->p_pptr
= p
->p_opptr
= current
; 497 init_waitqueue(&p
->wait_chldexit
); 498 sigemptyset(&p
->signal
); 500 p
->sigqueue_tail
= &p
->sigqueue
; 501 p
->it_real_value
= p
->it_virt_value
= p
->it_prof_value
=0; 502 p
->it_real_incr
= p
->it_virt_incr
= p
->it_prof_incr
=0; 503 init_timer(&p
->real_timer
); 504 p
->real_timer
.data
= (unsigned long) p
; 505 p
->leader
=0;/* session leadership doesn't inherit */ 507 p
->times
.tms_utime
= p
->times
.tms_stime
=0; 508 p
->times
.tms_cutime
= p
->times
.tms_cstime
=0; 513 p
->processor
= NO_PROC_ID
; 514 /* ?? should we just memset this ?? */ 515 for(i
=0; i
< smp_num_cpus
; i
++) 516 p
->per_cpu_utime
[i
] = p
->per_cpu_stime
[i
] =0; 520 p
->start_time
= jiffies
; 521 p
->tarray_ptr
= &task
[nr
]; 526 write_lock_irqsave(&tasklist_lock
, flags
); 529 write_unlock_irqrestore(&tasklist_lock
, flags
); 535 /* copy all the process information */ 536 if(copy_files(clone_flags
, p
)) 537 goto bad_fork_cleanup
; 538 if(copy_fs(clone_flags
, p
)) 539 goto bad_fork_cleanup_files
; 540 if(copy_sighand(clone_flags
, p
)) 541 goto bad_fork_cleanup_fs
; 542 if(copy_mm(clone_flags
, p
)) 543 goto bad_fork_cleanup_sighand
; 544 error
=copy_thread(nr
, clone_flags
, usp
, p
, regs
); 546 goto bad_fork_cleanup_sighand
; 549 /* ok, now we should be set up.. */ 551 p
->exit_signal
= clone_flags
& CSIGNAL
; 555 * "share" dynamic priority between parent and child, thus the 556 * total amount of dynamic priorities in the system doesnt change, 557 * more scheduling fairness. This is only important in the first 558 * timeslice, on the long run the scheduling behaviour is unchanged. 560 current
->counter
>>=1; 561 p
->counter
= current
->counter
; 564 wake_up_process(p
);/* do this last, just in case */ 566 p
->state
= TASK_RUNNING
; 567 p
->next_run
= p
->prev_run
= p
; 575 bad_fork_cleanup_sighand
: 578 exit_fs(p
);/* blocking */ 579 bad_fork_cleanup_files
: 580 exit_files(p
);/* blocking */ 582 charge_uid(current
, -1); 583 if(p
->exec_domain
&& p
->exec_domain
->module
) 584 __MOD_DEC_USE_COUNT(p
->exec_domain
->module
); 585 if(p
->binfmt
&& p
->binfmt
->module
) 586 __MOD_DEC_USE_COUNT(p
->binfmt
->module
); 587 add_free_taskslot(p
->tarray_ptr
); 591 write_lock_irqsave(&tasklist_lock
, flags
); 594 write_unlock_irqrestore(&tasklist_lock
, flags
); 603 static voidfiles_ctor(void*fp
, kmem_cache_t
*cachep
,unsigned long flags
) 605 struct files_struct
*f
= fp
; 607 memset(f
,0,sizeof(*f
)); 610 __initfunc(voidfilescache_init(void)) 612 files_cachep
=kmem_cache_create("files_cache", 613 sizeof(struct files_struct
), 618 panic("Cannot create files cache");