4 * Copyright (C) 1991, 1992 Linus Torvalds 6 * 1996-12-23 Modified by Dave Grothe to fix bugs in semaphores and 7 * make semaphores SMP safe 8 * 1997-01-28 Modified by Finn Arne Gangstad to make timers scale better. 9 * 1997-09-10 Updated NTP code according to technical memorandum Jan '96 10 * "A Kernel Model for Precision Timekeeping" by Dave Mills 11 * 1998-11-19 Implemented schedule_timeout() and related stuff 13 * 1998-12-24 Fixed a xtime SMP race (we need the xtime_lock rw spinlock to 14 * serialize accesses to xtime/lost_ticks). 15 * Copyright (C) 1998 Andrea Arcangeli 16 * 1998-12-28 Implemented better SMP scheduling by Ingo Molnar 17 * 1999-03-10 Improved NTP compatibility by Ulrich Windl 21 * 'sched.c' is the main kernel file. It contains scheduling primitives 22 * (sleep_on, wakeup, schedule etc) as well as a number of simple system 23 * call functions (type getpid()), which just extract a field from 28 #include <linux/kernel_stat.h> 29 #include <linux/fdreg.h> 30 #include <linux/delay.h> 31 #include <linux/interrupt.h> 32 #include <linux/smp_lock.h> 33 #include <linux/init.h> 36 #include <asm/uaccess.h> 37 #include <asm/pgtable.h> 38 #include <asm/mmu_context.h> 39 #include <asm/semaphore-helper.h> 41 #include <linux/timex.h> 47 unsigned securebits
= SECUREBITS_DEFAULT
;/* systemwide security settings */ 49 long tick
= (1000000+ HZ
/2) / HZ
;/* timer interrupt period */ 51 /* The current time */ 52 volatilestruct timeval xtime
__attribute__((aligned(16))); 54 /* Don't completely fail for HZ > 500. */ 55 int tickadj
=500/HZ
? :1;/* microsecs */ 57 DECLARE_TASK_QUEUE(tq_timer
); 58 DECLARE_TASK_QUEUE(tq_immediate
); 59 DECLARE_TASK_QUEUE(tq_scheduler
); 62 * phase-lock loop variables 64 /* TIME_ERROR prevents overwriting the CMOS clock */ 65 int time_state
= TIME_OK
;/* clock synchronization status */ 66 int time_status
= STA_UNSYNC
;/* clock status bits */ 67 long time_offset
=0;/* time adjustment (us) */ 68 long time_constant
=2;/* pll time constant */ 69 long time_tolerance
= MAXFREQ
;/* frequency tolerance (ppm) */ 70 long time_precision
=1;/* clock precision (us) */ 71 long time_maxerror
= NTP_PHASE_LIMIT
;/* maximum error (us) */ 72 long time_esterror
= NTP_PHASE_LIMIT
;/* estimated error (us) */ 73 long time_phase
=0;/* phase offset (scaled us) */ 74 long time_freq
= ((1000000+ HZ
/2) % HZ
- HZ
/2) << SHIFT_USEC
;/* frequency offset (scaled ppm) */ 75 long time_adj
=0;/* tick adjust (scaled 1 / HZ) */ 76 long time_reftime
=0;/* time at last adjustment (s) */ 79 long time_adjust_step
=0; 81 unsigned long event
=0; 83 externintdo_setitimer(int,struct itimerval
*,struct itimerval
*); 84 unsigned int* prof_buffer
= NULL
; 85 unsigned long prof_len
=0; 86 unsigned long prof_shift
=0; 88 externvoidmem_use(void); 90 unsigned longvolatile jiffies
=0; 93 * Init task must be ok at boot for the ix86 as we will check its signals 94 * via the SMP irq return path. 97 struct task_struct
* init_tasks
[NR_CPUS
] = {&init_task
, }; 100 * The tasklist_lock protects the linked list of processes. 102 * The scheduler lock is protecting against multiple entry 103 * into the scheduling code, and doesn't need to worry 104 * about interrupts (because interrupts cannot call the 107 * The run-queue lock locks the parts that actually access 108 * and change the run-queues, and have to be interrupt-safe. 110 spinlock_t runqueue_lock
= SPIN_LOCK_UNLOCKED
;/* second */ 111 rwlock_t tasklist_lock
= RW_LOCK_UNLOCKED
;/* third */ 113 staticLIST_HEAD(runqueue_head
); 116 * We align per-CPU scheduling data on cacheline boundaries, 117 * to prevent cacheline ping-pong. 120 struct schedule_data
{ 121 struct task_struct
* curr
; 122 cycles_t last_schedule
; 124 char __pad
[SMP_CACHE_BYTES
]; 125 } aligned_data
[NR_CPUS
] __cacheline_aligned
= { {{&init_task
,0}}}; 127 #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr 129 struct kernel_stat kstat
= {0}; 133 #define idle_task(cpu) (init_tasks[cpu_number_map[(cpu)]]) 134 #define can_schedule(p) (!(p)->has_cpu) 138 #define idle_task(cpu) (&init_task) 139 #define can_schedule(p) (1) 143 voidscheduling_functions_start_here(void) { } 146 * This is the function that decides how desirable a process is.. 147 * You can weigh different processes against each other depending 148 * on what CPU they've run on lately etc to try to handle cache 149 * and TLB miss penalties. 152 * -1000: never select this 153 * 0: out of time, recalculate counters (but it might still be 155 * +ve: "goodness" value (the larger, the better) 156 * +1000: realtime process, select this. 159 staticinlineintgoodness(struct task_struct
* p
,int this_cpu
,struct mm_struct
*this_mm
) 164 * Realtime process, select the first one on the 165 * runqueue (taking priorities within processes 168 if(p
->policy
!= SCHED_OTHER
) { 169 weight
=1000+ p
->rt_priority
; 174 * Give the process a first-approximation goodness value 175 * according to the number of clock-ticks it has left. 177 * Don't do any other calculations if the time slice is 185 /* Give a largish advantage to the same processor... */ 186 /* (this is equivalent to penalizing other processors) */ 187 if(p
->processor
== this_cpu
) 188 weight
+= PROC_CHANGE_PENALTY
; 191 /* .. and a slight advantage to the current MM */ 194 weight
+= p
->priority
; 201 * subtle. We want to discard a yielded process only if it's being 202 * considered for a reschedule. Wakeup-time 'queries' of the scheduling 203 * state do not count. Another optimization we do: sched_yield()-ed 204 * processes are runnable (and thus will be considered for scheduling) 205 * right when they are calling schedule(). So the only place we need 206 * to care about SCHED_YIELD is when we calculate the previous process' 209 staticinlineintprev_goodness(struct task_struct
* p
,int this_cpu
,struct mm_struct
*this_mm
) 211 if(p
->policy
& SCHED_YIELD
) { 212 p
->policy
&= ~SCHED_YIELD
; 215 returngoodness(p
, this_cpu
, this_mm
); 219 * the 'goodness value' of replacing a process on a given CPU. 220 * positive value means 'replace', zero or negative means 'dont'. 222 staticinlineintpreemption_goodness(struct task_struct
* prev
,struct task_struct
* p
,int cpu
) 224 returngoodness(p
, cpu
, prev
->mm
) -goodness(prev
, cpu
, prev
->mm
); 228 * If there is a dependency between p1 and p2, 229 * don't be too eager to go into the slow schedule. 230 * In particular, if p1 and p2 both want the kernel 231 * lock, there is no point in trying to make them 232 * extremely parallel.. 234 * (No lock - lock_depth < 0) 236 * There are two additional metrics here: 238 * first, a 'cutoff' interval, currently 0-200 usecs on 239 * x86 CPUs, depending on the size of the 'SMP-local cache'. 240 * If the current process has longer average timeslices than 241 * this, then we utilize the idle CPU. 243 * second, if the wakeup comes from a process context, 244 * then the two processes are 'related'. (they form a 247 * An idle CPU is almost always a bad thing, thus we skip 248 * the idle-CPU utilization only if both these conditions 249 * are true. (ie. a 'process-gang' rescheduling with rather 250 * high frequency should stay on the same CPU). 252 * [We can switch to something more finegrained in 2.3.] 254 * do not 'guess' if the to-be-scheduled task is RT. 256 #define related(p1,p2) (((p1)->lock_depth >= 0) && (p2)->lock_depth >= 0) && \ 257 (((p2)->policy == SCHED_OTHER) && ((p1)->avg_slice < cacheflush_time)) 259 staticinlinevoidreschedule_idle_slow(struct task_struct
* p
) 263 * (see reschedule_idle() for an explanation first ...) 267 * We try to find another (idle) CPU for this woken-up process. 269 * On SMP, we mostly try to see if the CPU the task used 270 * to run on is idle.. but we will use another idle CPU too, 271 * at this point we already know that this CPU is not 272 * willing to reschedule in the near future. 274 * An idle CPU is definitely wasted, especially if this CPU is 275 * running long-timeslice processes. The following algorithm is 276 * pretty good at finding the best idle CPU to send this process 279 * [We can try to preempt low-priority processes on other CPUs in 280 * 2.3. Also we can try to use the avg_slice value to predict 281 * 'likely reschedule' events even on other CPUs.] 283 int this_cpu
=smp_processor_id(), target_cpu
; 284 struct task_struct
*tsk
, *target_tsk
; 285 int cpu
, best_cpu
, weight
, best_weight
, i
; 288 best_weight
=0;/* prevents negative weight */ 290 spin_lock_irqsave(&runqueue_lock
, flags
); 293 * shortcut if the woken up task's last CPU is 296 best_cpu
= p
->processor
; 297 target_tsk
=idle_task(best_cpu
); 298 if(cpu_curr(best_cpu
) == target_tsk
) 302 for(i
=0; i
< smp_num_cpus
; i
++) { 303 cpu
=cpu_logical_map(i
); 307 weight
=preemption_goodness(tsk
, p
, cpu
); 308 if(weight
> best_weight
) { 309 best_weight
= weight
; 315 * found any suitable CPU? 321 target_cpu
= target_tsk
->processor
; 322 target_tsk
->need_resched
=1; 323 spin_unlock_irqrestore(&runqueue_lock
, flags
); 325 * the APIC stuff can go outside of the lock because 326 * it uses no task information, only CPU#. 328 if(target_cpu
!= this_cpu
) 329 smp_send_reschedule(target_cpu
); 332 spin_unlock_irqrestore(&runqueue_lock
, flags
); 335 int this_cpu
=smp_processor_id(); 336 struct task_struct
*tsk
; 338 tsk
=cpu_curr(this_cpu
); 339 if(preemption_goodness(tsk
, p
, this_cpu
) >0) 340 tsk
->need_resched
=1; 344 static voidreschedule_idle(struct task_struct
* p
) 347 int cpu
=smp_processor_id(); 349 * ("wakeup()" should not be called before we've initialized 351 * Basically a not-yet initialized SMP subsystem can be 352 * considered as a not-yet working scheduler, simply dont use 353 * it before it's up and running ...) 355 * SMP rescheduling is done in 2 passes: 356 * - pass #1: faster: 'quick decisions' 357 * - pass #2: slower: 'lets try and find a suitable CPU' 361 * Pass #1. (subtle. We might be in the middle of __switch_to, so 362 * to preserve scheduling atomicity we have to use cpu_curr) 364 if((p
->processor
== cpu
) &&related(cpu_curr(cpu
), p
)) 370 reschedule_idle_slow(p
); 376 * This has to add the process to the _beginning_ of the 377 * run-queue, not the end. See the comment about "This is 378 * subtle" in the scheduler proper.. 380 staticinlinevoidadd_to_runqueue(struct task_struct
* p
) 382 list_add(&p
->run_list
, &runqueue_head
); 386 staticinlinevoidmove_last_runqueue(struct task_struct
* p
) 388 list_del(&p
->run_list
); 389 list_add_tail(&p
->run_list
, &runqueue_head
); 392 staticinlinevoidmove_first_runqueue(struct task_struct
* p
) 394 list_del(&p
->run_list
); 395 list_add(&p
->run_list
, &runqueue_head
); 399 * Wake up a process. Put it on the run-queue if it's not 400 * already there. The "current" process is always on the 401 * run-queue (except when the actual re-schedule is in 402 * progress), and as such you're allowed to do the simpler 403 * "current->state = TASK_RUNNING" to mark yourself runnable 404 * without the overhead of this. 406 voidwake_up_process(struct task_struct
* p
) 411 * We want the common case fall through straight, thus the goto. 413 spin_lock_irqsave(&runqueue_lock
, flags
); 414 p
->state
= TASK_RUNNING
; 415 if(task_on_runqueue(p
)) 418 spin_unlock_irqrestore(&runqueue_lock
, flags
); 423 spin_unlock_irqrestore(&runqueue_lock
, flags
); 426 static voidprocess_timeout(unsigned long __data
) 428 struct task_struct
* p
= (struct task_struct
*) __data
; 438 #define TVN_SIZE (1 << TVN_BITS) 439 #define TVR_SIZE (1 << TVR_BITS) 440 #define TVN_MASK (TVN_SIZE - 1) 441 #define TVR_MASK (TVR_SIZE - 1) 445 struct timer_list
*vec
[TVN_SIZE
]; 448 struct timer_vec_root
{ 450 struct timer_list
*vec
[TVR_SIZE
]; 453 static struct timer_vec tv5
= {0}; 454 static struct timer_vec tv4
= {0}; 455 static struct timer_vec tv3
= {0}; 456 static struct timer_vec tv2
= {0}; 457 static struct timer_vec_root tv1
= {0}; 459 static struct timer_vec
*const tvecs
[] = { 460 (struct timer_vec
*)&tv1
, &tv2
, &tv3
, &tv4
, &tv5
463 #define NOOF_TVECS (sizeof(tvecs) / sizeof(tvecs[0])) 465 static unsigned long timer_jiffies
=0; 467 staticinlinevoidinsert_timer(struct timer_list
*timer
, 468 struct timer_list
**vec
,int idx
) 470 if((timer
->next
= vec
[idx
])) 471 vec
[idx
]->prev
= timer
; 473 timer
->prev
= (struct timer_list
*)&vec
[idx
]; 476 staticinlinevoidinternal_add_timer(struct timer_list
*timer
) 479 * must be cli-ed when calling this 481 unsigned long expires
= timer
->expires
; 482 unsigned long idx
= expires
- timer_jiffies
; 485 int i
= expires
& TVR_MASK
; 486 insert_timer(timer
, tv1
.vec
, i
); 487 }else if(idx
<1<< (TVR_BITS
+ TVN_BITS
)) { 488 int i
= (expires
>> TVR_BITS
) & TVN_MASK
; 489 insert_timer(timer
, tv2
.vec
, i
); 490 }else if(idx
<1<< (TVR_BITS
+2* TVN_BITS
)) { 491 int i
= (expires
>> (TVR_BITS
+ TVN_BITS
)) & TVN_MASK
; 492 insert_timer(timer
, tv3
.vec
, i
); 493 }else if(idx
<1<< (TVR_BITS
+3* TVN_BITS
)) { 494 int i
= (expires
>> (TVR_BITS
+2* TVN_BITS
)) & TVN_MASK
; 495 insert_timer(timer
, tv4
.vec
, i
); 496 }else if((signed long) idx
<0) { 497 /* can happen if you add a timer with expires == jiffies, 498 * or you set a timer to go off in the past 500 insert_timer(timer
, tv1
.vec
, tv1
.index
); 501 }else if(idx
<=0xffffffffUL
) { 502 int i
= (expires
>> (TVR_BITS
+3* TVN_BITS
)) & TVN_MASK
; 503 insert_timer(timer
, tv5
.vec
, i
); 505 /* Can only get here on architectures with 64-bit jiffies */ 506 timer
->next
= timer
->prev
= timer
; 510 spinlock_t timerlist_lock
= SPIN_LOCK_UNLOCKED
; 512 voidadd_timer(struct timer_list
*timer
) 516 spin_lock_irqsave(&timerlist_lock
, flags
); 519 internal_add_timer(timer
); 521 spin_unlock_irqrestore(&timerlist_lock
, flags
); 525 printk("bug: kernel timer added twice at %p.\n", 526 __builtin_return_address(0)); 530 staticinlineintdetach_timer(struct timer_list
*timer
) 532 struct timer_list
*prev
= timer
->prev
; 534 struct timer_list
*next
= timer
->next
; 543 voidmod_timer(struct timer_list
*timer
,unsigned long expires
) 547 spin_lock_irqsave(&timerlist_lock
, flags
); 548 timer
->expires
= expires
; 550 internal_add_timer(timer
); 551 spin_unlock_irqrestore(&timerlist_lock
, flags
); 554 intdel_timer(struct timer_list
* timer
) 559 spin_lock_irqsave(&timerlist_lock
, flags
); 560 ret
=detach_timer(timer
); 561 timer
->next
= timer
->prev
=0; 562 spin_unlock_irqrestore(&timerlist_lock
, flags
); 566 signed longschedule_timeout(signed long timeout
) 568 struct timer_list timer
; 569 unsigned long expire
; 573 case MAX_SCHEDULE_TIMEOUT
: 575 * These two special cases are useful to be comfortable 576 * in the caller. Nothing more. We could take 577 * MAX_SCHEDULE_TIMEOUT from one of the negative value 578 * but I' d like to return a valid offset (>=0) to allow 579 * the caller to do everything it want with the retval. 585 * Another bit of PARANOID. Note that the retval will be 586 * 0 since no piece of kernel is supposed to do a check 587 * for a negative retval of schedule_timeout() (since it 588 * should never happens anyway). You just have the printk() 589 * that will tell you if something is gone wrong and where. 593 printk(KERN_ERR
"schedule_timeout: wrong timeout " 594 "value %lx from %p\n", timeout
, 595 __builtin_return_address(0)); 600 expire
= timeout
+ jiffies
; 603 timer
.expires
= expire
; 604 timer
.data
= (unsigned long) current
; 605 timer
.function
= process_timeout
; 611 timeout
= expire
- jiffies
; 614 return timeout
<0?0: timeout
; 618 * schedule_tail() is getting called from the fork return path. This 619 * cleans up all remaining scheduler things, without impacting the 622 staticinlinevoid__schedule_tail(struct task_struct
*prev
) 625 if((prev
->state
== TASK_RUNNING
) && 626 (prev
!=idle_task(smp_processor_id()))) 627 reschedule_idle(prev
); 633 voidschedule_tail(struct task_struct
*prev
) 635 __schedule_tail(prev
); 639 * 'schedule()' is the scheduler function. It's a very simple and nice 640 * scheduler: it's not perfect, but certainly works for most things. 642 * The goto is "interesting". 644 * NOTE!! Task 0 is the 'idle' task, which gets called when no other 645 * tasks can run. It can not be killed, and it cannot sleep. The 'state' 646 * information in task[0] is never used. 648 asmlinkage
voidschedule(void) 650 struct schedule_data
* sched_data
; 651 struct task_struct
*prev
, *next
, *p
; 652 struct list_head
*tmp
; 655 if(!current
->active_mm
)BUG(); 657 goto handle_tq_scheduler
; 661 this_cpu
= prev
->processor
; 664 goto scheduling_in_interrupt
; 666 release_kernel_lock(prev
, this_cpu
); 668 /* Do "administrative" work here while we don't hold any locks */ 669 if(bh_mask
& bh_active
) 674 * 'sched_data' is protected by the fact that we can run 675 * only one process per CPU. 677 sched_data
= & aligned_data
[this_cpu
].schedule_data
; 679 spin_lock_irq(&runqueue_lock
); 681 /* move an exhausted RR process to be last.. */ 682 if(prev
->policy
== SCHED_RR
) 686 switch(prev
->state
) { 687 case TASK_INTERRUPTIBLE
: 688 if(signal_pending(prev
)) { 689 prev
->state
= TASK_RUNNING
; 693 del_from_runqueue(prev
); 696 prev
->need_resched
=0; 699 * this is the scheduler proper: 704 * Default process to select.. 706 next
=idle_task(this_cpu
); 708 if(prev
->state
== TASK_RUNNING
) 712 tmp
= runqueue_head
.next
; 713 while(tmp
!= &runqueue_head
) { 714 p
=list_entry(tmp
,struct task_struct
, run_list
); 715 if(can_schedule(p
)) { 716 int weight
=goodness(p
, this_cpu
, prev
->active_mm
); 718 c
= weight
, next
= p
; 723 /* Do we need to re-calculate counters? */ 727 * from this point on nothing can prevent us from 728 * switching to the next task, save this fact in 731 sched_data
->curr
= next
; 734 next
->processor
= this_cpu
; 736 spin_unlock_irq(&runqueue_lock
); 743 * maintain the per-process 'average timeslice' value. 744 * (this has to be recalculated even if we reschedule to 745 * the same process) Currently this is only used on SMP, 746 * and it's approximate, so we do not have to maintain 747 * it while holding the runqueue spinlock. 750 cycles_t t
, this_slice
; 753 this_slice
= t
- sched_data
->last_schedule
; 754 sched_data
->last_schedule
= t
; 757 * Exponentially fading average calculation, with 758 * some weight so it doesnt get fooled easily by 759 * smaller irregularities. 761 prev
->avg_slice
= (this_slice
*1+ prev
->avg_slice
*1)/2; 765 * We drop the scheduler lock early (it's a global spinlock), 766 * thus we have to lock the previous process from getting 767 * rescheduled during switch_to(). 772 kstat
.context_swtch
++; 774 * there are 3 processes which are affected by a context switch: 776 * prev == .... ==> (last => next) 778 * It's the 'much more previous' 'prev' that is on next's stack, 779 * but prev is set to (the just run) 'last' process by switch_to(). 780 * This might sound slightly confusing but makes tons of sense. 783 struct mm_struct
*mm
= next
->mm
; 784 struct mm_struct
*oldmm
= prev
->active_mm
; 786 if(next
->active_mm
)BUG(); 787 next
->active_mm
= oldmm
; 788 atomic_inc(&oldmm
->mm_count
); 790 if(next
->active_mm
!= mm
)BUG(); 792 switch_mm(oldmm
, mm
, this_cpu
); 796 prev
->active_mm
= NULL
; 802 * This just switches the register state and the 805 switch_to(prev
, next
, prev
); 806 __schedule_tail(prev
); 809 reacquire_kernel_lock(current
); 814 struct task_struct
*p
; 815 spin_unlock_irq(&runqueue_lock
); 816 read_lock(&tasklist_lock
); 818 p
->counter
= (p
->counter
>>1) + p
->priority
; 819 read_unlock(&tasklist_lock
); 820 spin_lock_irq(&runqueue_lock
); 822 goto repeat_schedule
; 825 c
=prev_goodness(prev
, this_cpu
, prev
->active_mm
); 827 goto still_running_back
; 834 run_task_queue(&tq_scheduler
); 835 goto tq_scheduler_back
; 839 prev
->counter
= prev
->priority
; 840 move_last_runqueue(prev
); 844 scheduling_in_interrupt
: 845 printk("Scheduling in interrupt\n"); 850 void__wake_up(wait_queue_head_t
*q
,unsigned int mode
) 852 struct list_head
*tmp
, *head
; 853 struct task_struct
*p
; 859 wq_write_lock_irqsave(&q
->lock
, flags
); 862 CHECK_MAGIC_WQHEAD(q
); 865 head
= &q
->task_list
; 867 if(!head
->next
|| !head
->prev
) 873 wait_queue_t
*curr
=list_entry(tmp
, wait_queue_t
, task_list
); 878 CHECK_MAGIC(curr
->__magic
); 884 curr
->__waker
= (long)__builtin_return_address(0); 887 if(state
& TASK_EXCLUSIVE
) 891 wq_write_unlock_irqrestore(&q
->lock
, flags
); 897 * Semaphores are implemented using a two-way counter: 898 * The "count" variable is decremented for each process 899 * that tries to sleep, while the "waking" variable is 900 * incremented when the "up()" code goes to wake up waiting 903 * Notably, the inline "up()" and "down()" functions can 904 * efficiently test if they need to do any extra work (up 905 * needs to do something only if count was negative before 906 * the increment operation. 908 * waking_non_zero() (from asm/semaphore.h) must execute 911 * When __up() is called, the count was negative before 912 * incrementing it, and we need to wake up somebody. 914 * This routine adds one to the count of processes that need to 915 * wake up and exit. ALL waiting processes actually wake up but 916 * only the one that gets to the "waking" field first will gate 917 * through and acquire the semaphore. The others will go back 920 * Note that these functions are only called when there is 921 * contention on the lock, and as such all this is the 922 * "non-critical" part of the whole semaphore business. The 923 * critical part is the inline stuff in <asm/semaphore.h> 924 * where we want to avoid any extra jumps and calls. 926 void__up(struct semaphore
*sem
) 933 * Perform the "down" function. Return zero for semaphore acquired, 934 * return negative for signalled out of the function. 936 * If called from __down, the return is ignored and the wait loop is 937 * not interruptible. This means that a task waiting on a semaphore 938 * using "down()" cannot be killed until someone does an "up()" on 941 * If called from __down_interruptible, the return value gets checked 942 * upon return. If the return value is negative then the task continues 943 * with the negative value in the return register (it can be tested by 946 * Either form may be used in conjunction with "up()". 951 struct task_struct *tsk = current; \ 953 init_waitqueue_entry(&wait, tsk); 955 #define DOWN_HEAD(task_state) \ 958 tsk->state = (task_state); \ 959 add_wait_queue(&sem->wait, &wait); \ 962 * Ok, we're set up. sem->count is known to be less than zero \ 965 * We can let go the lock for purposes of waiting. \ 966 * We re-acquire it after awaking so as to protect \ 967 * all semaphore operations. \ 969 * If "up()" is called before we call waking_non_zero() then \ 970 * we will catch it right away. If it is called later then \ 971 * we will have to go through a wakeup cycle to catch it. \ 973 * Multiple waiters contend for the semaphore lock to see \ 974 * who gets to gate through and who has to wait some more. \ 978 #define DOWN_TAIL(task_state) \ 979 tsk->state = (task_state); \ 981 tsk->state = TASK_RUNNING; \ 982 remove_wait_queue(&sem->wait, &wait); 984 void__down(struct semaphore
* sem
) 987 DOWN_HEAD(TASK_UNINTERRUPTIBLE
) 988 if(waking_non_zero(sem
)) 991 DOWN_TAIL(TASK_UNINTERRUPTIBLE
) 994 int__down_interruptible(struct semaphore
* sem
) 998 DOWN_HEAD(TASK_INTERRUPTIBLE
) 1000 ret
=waking_non_zero_interruptible(sem
, tsk
); 1004 /* ret != 0 only if we get interrupted -arca */ 1009 DOWN_TAIL(TASK_INTERRUPTIBLE
) 1013 int__down_trylock(struct semaphore
* sem
) 1015 returnwaking_non_zero_trylock(sem
); 1018 #define SLEEP_ON_VAR \ 1019 unsigned long flags; \ 1020 wait_queue_t wait; \ 1021 init_waitqueue_entry(&wait, current); 1023 #define SLEEP_ON_HEAD \ 1024 wq_write_lock_irqsave(&q->lock,flags); \ 1025 __add_wait_queue(q, &wait); \ 1026 wq_write_unlock(&q->lock); 1028 #define SLEEP_ON_TAIL \ 1029 wq_write_lock_irq(&q->lock); \ 1030 __remove_wait_queue(q, &wait); \ 1031 wq_write_unlock_irqrestore(&q->lock,flags); 1033 voidinterruptible_sleep_on(wait_queue_head_t
*q
) 1037 current
->state
= TASK_INTERRUPTIBLE
; 1044 longinterruptible_sleep_on_timeout(wait_queue_head_t
*q
,long timeout
) 1048 current
->state
= TASK_INTERRUPTIBLE
; 1051 timeout
=schedule_timeout(timeout
); 1057 voidsleep_on(wait_queue_head_t
*q
) 1061 current
->state
= TASK_UNINTERRUPTIBLE
; 1068 longsleep_on_timeout(wait_queue_head_t
*q
,long timeout
) 1072 current
->state
= TASK_UNINTERRUPTIBLE
; 1075 timeout
=schedule_timeout(timeout
); 1081 voidscheduling_functions_end_here(void) { } 1083 staticinlinevoidcascade_timers(struct timer_vec
*tv
) 1085 /* cascade all the timers from tv up one level */ 1086 struct timer_list
*timer
; 1087 timer
= tv
->vec
[tv
->index
]; 1089 * We are removing _all_ timers from the list, so we don't have to 1090 * detach them individually, just clear the list afterwards. 1093 struct timer_list
*tmp
= timer
; 1094 timer
= timer
->next
; 1095 internal_add_timer(tmp
); 1097 tv
->vec
[tv
->index
] = NULL
; 1098 tv
->index
= (tv
->index
+1) & TVN_MASK
; 1101 staticinlinevoidrun_timer_list(void) 1103 spin_lock_irq(&timerlist_lock
); 1104 while((long)(jiffies
- timer_jiffies
) >=0) { 1105 struct timer_list
*timer
; 1109 cascade_timers(tvecs
[n
]); 1110 }while(tvecs
[n
]->index
==1&& ++n
< NOOF_TVECS
); 1112 while((timer
= tv1
.vec
[tv1
.index
])) { 1113 void(*fn
)(unsigned long) = timer
->function
; 1114 unsigned long data
= timer
->data
; 1115 detach_timer(timer
); 1116 timer
->next
= timer
->prev
= NULL
; 1117 spin_unlock_irq(&timerlist_lock
); 1119 spin_lock_irq(&timerlist_lock
); 1122 tv1
.index
= (tv1
.index
+1) & TVR_MASK
; 1124 spin_unlock_irq(&timerlist_lock
); 1128 staticinlinevoidrun_old_timers(void) 1130 struct timer_struct
*tp
; 1133 for(mask
=1, tp
= timer_table
+0; mask
; tp
++,mask
+= mask
) { 1134 if(mask
> timer_active
) 1136 if(!(mask
& timer_active
)) 1138 if(time_after(tp
->expires
, jiffies
)) 1140 timer_active
&= ~mask
; 1146 spinlock_t tqueue_lock
; 1150 run_task_queue(&tq_timer
); 1153 voidimmediate_bh(void) 1155 run_task_queue(&tq_immediate
); 1158 unsigned long timer_active
=0; 1159 struct timer_struct timer_table
[32]; 1162 * Hmm.. Changed this, as the GNU make sources (load.c) seems to 1163 * imply that avenrun[] is the standard name for this kind of thing. 1164 * Nothing else seems to be standardized: the fractional size etc 1165 * all seem to differ on different machines. 1167 unsigned long avenrun
[3] = {0,0,0}; 1170 * Nr of active tasks - counted in fixed-point numbers 1172 static unsigned longcount_active_tasks(void) 1174 struct task_struct
*p
; 1175 unsigned long nr
=0; 1177 read_lock(&tasklist_lock
); 1179 if((p
->state
== TASK_RUNNING
|| 1180 (p
->state
& TASK_UNINTERRUPTIBLE
) || 1181 (p
->state
& TASK_SWAPPING
))) 1184 read_unlock(&tasklist_lock
); 1188 staticinlinevoidcalc_load(unsigned long ticks
) 1190 unsigned long active_tasks
;/* fixed-point */ 1191 static int count
= LOAD_FREQ
; 1196 active_tasks
=count_active_tasks(); 1197 CALC_LOAD(avenrun
[0], EXP_1
, active_tasks
); 1198 CALC_LOAD(avenrun
[1], EXP_5
, active_tasks
); 1199 CALC_LOAD(avenrun
[2], EXP_15
, active_tasks
); 1204 * this routine handles the overflow of the microsecond field 1206 * The tricky bits of code to handle the accurate clock support 1207 * were provided by Dave Mills (Mills@UDEL.EDU) of NTP fame. 1208 * They were originally developed for SUN and DEC kernels. 1209 * All the kudos should go to Dave for this stuff. 1212 static voidsecond_overflow(void) 1216 /* Bump the maxerror field */ 1217 time_maxerror
+= time_tolerance
>> SHIFT_USEC
; 1218 if( time_maxerror
> NTP_PHASE_LIMIT
) { 1219 time_maxerror
= NTP_PHASE_LIMIT
; 1220 time_status
|= STA_UNSYNC
; 1224 * Leap second processing. If in leap-insert state at 1225 * the end of the day, the system clock is set back one 1226 * second; if in leap-delete state, the system clock is 1227 * set ahead one second. The microtime() routine or 1228 * external clock driver will insure that reported time 1229 * is always monotonic. The ugly divides should be 1232 switch(time_state
) { 1235 if(time_status
& STA_INS
) 1236 time_state
= TIME_INS
; 1237 else if(time_status
& STA_DEL
) 1238 time_state
= TIME_DEL
; 1242 if(xtime
.tv_sec
%86400==0) { 1244 time_state
= TIME_OOP
; 1245 printk(KERN_NOTICE
"Clock: inserting leap second 23:59:60 UTC\n"); 1250 if((xtime
.tv_sec
+1) %86400==0) { 1252 time_state
= TIME_WAIT
; 1253 printk(KERN_NOTICE
"Clock: deleting leap second 23:59:59 UTC\n"); 1258 time_state
= TIME_WAIT
; 1262 if(!(time_status
& (STA_INS
| STA_DEL
))) 1263 time_state
= TIME_OK
; 1267 * Compute the phase adjustment for the next second. In 1268 * PLL mode, the offset is reduced by a fixed factor 1269 * times the time constant. In FLL mode the offset is 1270 * used directly. In either mode, the maximum phase 1271 * adjustment for each second is clamped so as to spread 1272 * the adjustment over not more than the number of 1273 * seconds between updates. 1275 if(time_offset
<0) { 1276 ltemp
= -time_offset
; 1277 if(!(time_status
& STA_FLL
)) 1278 ltemp
>>= SHIFT_KG
+ time_constant
; 1279 if(ltemp
> (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
) 1280 ltemp
= (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
; 1281 time_offset
+= ltemp
; 1282 time_adj
= -ltemp
<< (SHIFT_SCALE
- SHIFT_HZ
- SHIFT_UPDATE
); 1284 ltemp
= time_offset
; 1285 if(!(time_status
& STA_FLL
)) 1286 ltemp
>>= SHIFT_KG
+ time_constant
; 1287 if(ltemp
> (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
) 1288 ltemp
= (MAXPHASE
/ MINSEC
) << SHIFT_UPDATE
; 1289 time_offset
-= ltemp
; 1290 time_adj
= ltemp
<< (SHIFT_SCALE
- SHIFT_HZ
- SHIFT_UPDATE
); 1294 * Compute the frequency estimate and additional phase 1295 * adjustment due to frequency error for the next 1296 * second. When the PPS signal is engaged, gnaw on the 1297 * watchdog counter and update the frequency computed by 1298 * the pll and the PPS signal. 1301 if(pps_valid
== PPS_VALID
) {/* PPS signal lost */ 1302 pps_jitter
= MAXTIME
; 1303 pps_stabil
= MAXFREQ
; 1304 time_status
&= ~(STA_PPSSIGNAL
| STA_PPSJITTER
| 1305 STA_PPSWANDER
| STA_PPSERROR
); 1307 ltemp
= time_freq
+ pps_freq
; 1309 time_adj
-= -ltemp
>> 1310 (SHIFT_USEC
+ SHIFT_HZ
- SHIFT_SCALE
); 1312 time_adj
+= ltemp
>> 1313 (SHIFT_USEC
+ SHIFT_HZ
- SHIFT_SCALE
); 1316 /* Compensate for (HZ==100) != (1 << SHIFT_HZ). 1317 * Add 25% and 3.125% to get 128.125; => only 0.125% error (p. 14) 1320 time_adj
-= (-time_adj
>>2) + (-time_adj
>>5); 1322 time_adj
+= (time_adj
>>2) + (time_adj
>>5); 1326 /* in the NTP reference this is called "hardclock()" */ 1327 static voidupdate_wall_time_one_tick(void) 1329 if( (time_adjust_step
= time_adjust
) !=0) { 1330 /* We are doing an adjtime thing. 1332 * Prepare time_adjust_step to be within bounds. 1333 * Note that a positive time_adjust means we want the clock 1336 * Limit the amount of the step to be in the range 1337 * -tickadj .. +tickadj 1339 if(time_adjust
> tickadj
) 1340 time_adjust_step
= tickadj
; 1341 else if(time_adjust
< -tickadj
) 1342 time_adjust_step
= -tickadj
; 1344 /* Reduce by this step the amount of time left */ 1345 time_adjust
-= time_adjust_step
; 1347 xtime
.tv_usec
+= tick
+ time_adjust_step
; 1349 * Advance the phase, once it gets to one microsecond, then 1350 * advance the tick more. 1352 time_phase
+= time_adj
; 1353 if(time_phase
<= -FINEUSEC
) { 1354 long ltemp
= -time_phase
>> SHIFT_SCALE
; 1355 time_phase
+= ltemp
<< SHIFT_SCALE
; 1356 xtime
.tv_usec
-= ltemp
; 1358 else if(time_phase
>= FINEUSEC
) { 1359 long ltemp
= time_phase
>> SHIFT_SCALE
; 1360 time_phase
-= ltemp
<< SHIFT_SCALE
; 1361 xtime
.tv_usec
+= ltemp
; 1366 * Using a loop looks inefficient, but "ticks" is 1367 * usually just one (we shouldn't be losing ticks, 1368 * we're doing this this way mainly for interrupt 1369 * latency reasons, not because we think we'll 1370 * have lots of lost timer ticks 1372 static voidupdate_wall_time(unsigned long ticks
) 1376 update_wall_time_one_tick(); 1379 if(xtime
.tv_usec
>=1000000) { 1380 xtime
.tv_usec
-=1000000; 1386 staticinlinevoiddo_process_times(struct task_struct
*p
, 1387 unsigned long user
,unsigned long system
) 1391 psecs
= (p
->times
.tms_utime
+= user
); 1392 psecs
+= (p
->times
.tms_stime
+= system
); 1393 if(psecs
/ HZ
> p
->rlim
[RLIMIT_CPU
].rlim_cur
) { 1394 /* Send SIGXCPU every second.. */ 1396 send_sig(SIGXCPU
, p
,1); 1397 /* and SIGKILL when we go over max.. */ 1398 if(psecs
/ HZ
> p
->rlim
[RLIMIT_CPU
].rlim_max
) 1399 send_sig(SIGKILL
, p
,1); 1403 staticinlinevoiddo_it_virt(struct task_struct
* p
,unsigned long ticks
) 1405 unsigned long it_virt
= p
->it_virt_value
; 1408 if(it_virt
<= ticks
) { 1409 it_virt
= ticks
+ p
->it_virt_incr
; 1410 send_sig(SIGVTALRM
, p
,1); 1412 p
->it_virt_value
= it_virt
- ticks
; 1416 staticinlinevoiddo_it_prof(struct task_struct
* p
,unsigned long ticks
) 1418 unsigned long it_prof
= p
->it_prof_value
; 1421 if(it_prof
<= ticks
) { 1422 it_prof
= ticks
+ p
->it_prof_incr
; 1423 send_sig(SIGPROF
, p
,1); 1425 p
->it_prof_value
= it_prof
- ticks
; 1429 voidupdate_one_process(struct task_struct
*p
, 1430 unsigned long ticks
,unsigned long user
,unsigned long system
,int cpu
) 1432 p
->per_cpu_utime
[cpu
] += user
; 1433 p
->per_cpu_stime
[cpu
] += system
; 1434 do_process_times(p
, user
, system
); 1435 do_it_virt(p
, user
); 1436 do_it_prof(p
, ticks
); 1439 static voidupdate_process_times(unsigned long ticks
,unsigned long system
) 1442 * SMP does this on a per-CPU basis elsewhere 1445 struct task_struct
* p
= current
; 1446 unsigned long user
= ticks
- system
; 1448 p
->counter
-= ticks
; 1449 if(p
->counter
<=0) { 1453 if(p
->priority
< DEF_PRIORITY
) 1454 kstat
.cpu_nice
+= user
; 1456 kstat
.cpu_user
+= user
; 1457 kstat
.cpu_system
+= system
; 1459 update_one_process(p
, ticks
, user
, system
,0); 1463 volatileunsigned long lost_ticks
=0; 1464 static unsigned long lost_ticks_system
=0; 1467 * This spinlock protect us from races in SMP while playing with xtime. -arca 1469 rwlock_t xtime_lock
= RW_LOCK_UNLOCKED
; 1471 staticinlinevoidupdate_times(void) 1473 unsigned long ticks
; 1476 * update_times() is run from the raw timer_bh handler so we 1477 * just know that the irqs are locally enabled and so we don't 1478 * need to save/restore the flags of the local CPU here. -arca 1480 write_lock_irq(&xtime_lock
); 1486 unsigned long system
; 1487 system
=xchg(&lost_ticks_system
,0); 1490 update_wall_time(ticks
); 1491 write_unlock_irq(&xtime_lock
); 1493 update_process_times(ticks
, system
); 1496 write_unlock_irq(&xtime_lock
); 1499 static voidtimer_bh(void) 1506 voiddo_timer(struct pt_regs
* regs
) 1508 (*(unsigned long*)&jiffies
)++; 1511 if(!user_mode(regs
)) 1512 lost_ticks_system
++; 1520 * For backwards compatibility? This can be done in libc so Alpha 1521 * and all newer ports shouldn't need it. 1523 asmlinkage
unsigned intsys_alarm(unsigned int seconds
) 1525 struct itimerval it_new
, it_old
; 1526 unsigned int oldalarm
; 1528 it_new
.it_interval
.tv_sec
= it_new
.it_interval
.tv_usec
=0; 1529 it_new
.it_value
.tv_sec
= seconds
; 1530 it_new
.it_value
.tv_usec
=0; 1531 do_setitimer(ITIMER_REAL
, &it_new
, &it_old
); 1532 oldalarm
= it_old
.it_value
.tv_sec
; 1533 /* ehhh.. We can't return 0 if we have an alarm pending.. */ 1534 /* And we'd better return too much than too little anyway */ 1535 if(it_old
.it_value
.tv_usec
) 1541 * The Alpha uses getxpid, getxuid, and getxgid instead. Maybe this 1542 * should be moved into arch/i386 instead? 1545 asmlinkage
intsys_getpid(void) 1547 /* This is SMP safe - current->pid doesn't change */ 1548 return current
->pid
; 1552 * This is not strictly SMP safe: p_opptr could change 1553 * from under us. However, rather than getting any lock 1554 * we can use an optimistic algorithm: get the parent 1555 * pid, and go back and check that the parent is still 1556 * the same. If it has changed (which is extremely unlikely 1557 * indeed), we just try again.. 1559 * NOTE! This depends on the fact that even if we _do_ 1560 * get an old value of "parent", we can happily dereference 1561 * the pointer: we just can't necessarily trust the result 1562 * until we know that the parent pointer is valid. 1564 * The "mb()" macro is a memory barrier - a synchronizing 1565 * event. It also makes sure that gcc doesn't optimize 1566 * away the necessary memory references.. The barrier doesn't 1567 * have to have all that strong semantics: on x86 we don't 1568 * really require a synchronizing instruction, for example. 1569 * The barrier is more important for code generation than 1570 * for any real memory ordering semantics (even if there is 1571 * a small window for a race, using the old pointer is 1572 * harmless for a while). 1574 asmlinkage
intsys_getppid(void) 1577 struct task_struct
* me
= current
; 1578 struct task_struct
* parent
; 1580 parent
= me
->p_opptr
; 1585 struct task_struct
*old
= parent
; 1587 parent
= me
->p_opptr
; 1597 asmlinkage
intsys_getuid(void) 1599 /* Only we change this so SMP safe */ 1600 return current
->uid
; 1603 asmlinkage
intsys_geteuid(void) 1605 /* Only we change this so SMP safe */ 1606 return current
->euid
; 1609 asmlinkage
intsys_getgid(void) 1611 /* Only we change this so SMP safe */ 1612 return current
->gid
; 1615 asmlinkage
intsys_getegid(void) 1617 /* Only we change this so SMP safe */ 1618 return current
->egid
; 1622 * This has been replaced by sys_setpriority. Maybe it should be 1623 * moved into the arch dependent tree for those ports that require 1624 * it for backward compatibility? 1627 asmlinkage
intsys_nice(int increment
) 1629 unsigned long newprio
; 1633 * Setpriority might change our priority at the same moment. 1634 * We don't have to worry. Conceptually one call occurs first 1635 * and we have a single winner. 1638 newprio
= increment
; 1640 if(!capable(CAP_SYS_NICE
)) 1642 newprio
= -increment
; 1649 * do a "normalization" of the priority (traditionally 1650 * Unix nice values are -20 to 20; Linux doesn't really 1651 * use that kind of thing, but uses the length of the 1652 * timeslice instead (default 200 ms). The rounding is 1653 * why we want to avoid negative values. 1655 newprio
= (newprio
* DEF_PRIORITY
+10) /20; 1656 increment
= newprio
; 1658 increment
= -increment
; 1660 * Current->priority can change between this point 1661 * and the assignment. We are assigning not doing add/subs 1662 * so thats ok. Conceptually a process might just instantaneously 1663 * read the value we stomp over. I don't think that is an issue 1664 * unless posix makes it one. If so we can loop on changes 1665 * to current->priority. 1667 newprio
= current
->priority
- increment
; 1668 if((signed) newprio
<1) 1670 if(newprio
> DEF_PRIORITY
*2) 1671 newprio
= DEF_PRIORITY
*2; 1672 current
->priority
= newprio
; 1678 staticinlinestruct task_struct
*find_process_by_pid(pid_t pid
) 1680 struct task_struct
*tsk
= current
; 1683 tsk
=find_task_by_pid(pid
); 1687 static intsetscheduler(pid_t pid
,int policy
, 1688 struct sched_param
*param
) 1690 struct sched_param lp
; 1691 struct task_struct
*p
; 1695 if(!param
|| pid
<0) 1699 if(copy_from_user(&lp
, param
,sizeof(struct sched_param
))) 1703 * We play safe to avoid deadlocks. 1705 spin_lock_irq(&runqueue_lock
); 1706 read_lock(&tasklist_lock
); 1708 p
=find_process_by_pid(pid
); 1718 if(policy
!= SCHED_FIFO
&& policy
!= SCHED_RR
&& 1719 policy
!= SCHED_OTHER
) 1724 * Valid priorities for SCHED_FIFO and SCHED_RR are 1..99, valid 1725 * priority for SCHED_OTHER is 0. 1728 if(lp
.sched_priority
<0|| lp
.sched_priority
>99) 1730 if((policy
== SCHED_OTHER
) != (lp
.sched_priority
==0)) 1734 if((policy
== SCHED_FIFO
|| policy
== SCHED_RR
) && 1735 !capable(CAP_SYS_NICE
)) 1737 if((current
->euid
!= p
->euid
) && (current
->euid
!= p
->uid
) && 1738 !capable(CAP_SYS_NICE
)) 1743 p
->rt_priority
= lp
.sched_priority
; 1744 if(task_on_runqueue(p
)) 1745 move_first_runqueue(p
); 1747 current
->need_resched
=1; 1750 read_unlock(&tasklist_lock
); 1751 spin_unlock_irq(&runqueue_lock
); 1757 asmlinkage
intsys_sched_setscheduler(pid_t pid
,int policy
, 1758 struct sched_param
*param
) 1760 returnsetscheduler(pid
, policy
, param
); 1763 asmlinkage
intsys_sched_setparam(pid_t pid
,struct sched_param
*param
) 1765 returnsetscheduler(pid
, -1, param
); 1768 asmlinkage
intsys_sched_getscheduler(pid_t pid
) 1770 struct task_struct
*p
; 1777 read_lock(&tasklist_lock
); 1780 p
=find_process_by_pid(pid
); 1787 read_unlock(&tasklist_lock
); 1793 asmlinkage
intsys_sched_getparam(pid_t pid
,struct sched_param
*param
) 1795 struct task_struct
*p
; 1796 struct sched_param lp
; 1800 if(!param
|| pid
<0) 1803 read_lock(&tasklist_lock
); 1804 p
=find_process_by_pid(pid
); 1808 lp
.sched_priority
= p
->rt_priority
; 1809 read_unlock(&tasklist_lock
); 1812 * This one might sleep, we cannot do it with a spinlock held ... 1814 retval
=copy_to_user(param
, &lp
,sizeof(*param
)) ? -EFAULT
:0; 1820 read_unlock(&tasklist_lock
); 1824 asmlinkage
intsys_sched_yield(void) 1826 spin_lock_irq(&runqueue_lock
); 1827 if(current
->policy
== SCHED_OTHER
) 1828 current
->policy
|= SCHED_YIELD
; 1829 current
->need_resched
=1; 1830 move_last_runqueue(current
); 1831 spin_unlock_irq(&runqueue_lock
); 1835 asmlinkage
intsys_sched_get_priority_max(int policy
) 1851 asmlinkage
intsys_sched_get_priority_min(int policy
) 1866 asmlinkage
intsys_sched_rr_get_interval(pid_t pid
,struct timespec
*interval
) 1872 if(copy_to_user(interval
, &t
,sizeof(struct timespec
))) 1877 asmlinkage
intsys_nanosleep(struct timespec
*rqtp
,struct timespec
*rmtp
) 1880 unsigned long expire
; 1882 if(copy_from_user(&t
, rqtp
,sizeof(struct timespec
))) 1885 if(t
.tv_nsec
>=1000000000L|| t
.tv_nsec
<0|| t
.tv_sec
<0) 1889 if(t
.tv_sec
==0&& t
.tv_nsec
<=2000000L&& 1890 current
->policy
!= SCHED_OTHER
) 1893 * Short delay requests up to 2 ms will be handled with 1894 * high precision by a busy wait for all real-time processes. 1896 * Its important on SMP not to do this holding locks. 1898 udelay((t
.tv_nsec
+999) /1000); 1902 expire
=timespec_to_jiffies(&t
) + (t
.tv_sec
|| t
.tv_nsec
); 1904 current
->state
= TASK_INTERRUPTIBLE
; 1905 expire
=schedule_timeout(expire
); 1909 jiffies_to_timespec(expire
, &t
); 1910 if(copy_to_user(rmtp
, &t
,sizeof(struct timespec
))) 1918 static voidshow_task(struct task_struct
* p
) 1920 unsigned long free
=0; 1922 static const char* stat_nam
[] = {"R","S","D","Z","T","W"}; 1924 printk("%-8s ", p
->comm
); 1925 state
= p
->state
?ffz(~p
->state
) +1:0; 1926 if(((unsigned) state
) <sizeof(stat_nam
)/sizeof(char*)) 1927 printk(stat_nam
[state
]); 1930 #if (BITS_PER_LONG == 32) 1932 printk(" current "); 1934 printk(" %08lX ",thread_saved_pc(&p
->thread
)); 1937 printk(" current task "); 1939 printk(" %016lx ",thread_saved_pc(&p
->thread
)); 1942 unsigned long* n
= (unsigned long*) (p
+1); 1945 free
= (unsigned long) n
- (unsigned long)(p
+1); 1947 printk("%5lu %5d %6d ", free
, p
->pid
, p
->p_pptr
->pid
); 1949 printk("%5d ", p
->p_cptr
->pid
); 1953 printk(" (L-TLB) "); 1955 printk(" (NOTLB) "); 1957 printk("%7d", p
->p_ysptr
->pid
); 1961 printk(" %5d\n", p
->p_osptr
->pid
); 1966 struct signal_queue
*q
; 1967 char s
[sizeof(sigset_t
)*2+1], b
[sizeof(sigset_t
)*2+1]; 1969 render_sigset_t(&p
->signal
, s
); 1970 render_sigset_t(&p
->blocked
, b
); 1971 printk(" sig: %d %s %s :",signal_pending(p
), s
, b
); 1972 for(q
= p
->sigqueue
; q
; q
= q
->next
) 1973 printk(" %d", q
->info
.si_signo
); 1978 char*render_sigset_t(sigset_t
*set
,char*buffer
) 1983 if(sigismember(set
, i
+1)) x
|=1; 1984 if(sigismember(set
, i
+2)) x
|=2; 1985 if(sigismember(set
, i
+3)) x
|=4; 1986 if(sigismember(set
, i
+4)) x
|=8; 1987 *buffer
++ = (x
<10?'0':'a'-10) + x
; 1993 voidshow_state(void) 1995 struct task_struct
*p
; 1997 #if (BITS_PER_LONG == 32) 2000 printk(" task PC stack pid father child younger older\n"); 2004 printk(" task PC stack pid father child younger older\n"); 2006 read_lock(&tasklist_lock
); 2009 read_unlock(&tasklist_lock
); 2012 void __init
init_idle(void) 2015 struct schedule_data
* sched_data
; 2016 sched_data
= &aligned_data
[smp_processor_id()].schedule_data
; 2018 if(current
!= &init_task
&&task_on_runqueue(current
)) { 2019 printk("UGH! (%d:%d) was on the runqueue, removing.\n", 2020 smp_processor_id(), current
->pid
); 2021 del_from_runqueue(current
); 2024 sched_data
->curr
= current
; 2025 sched_data
->last_schedule
= t
; 2028 void __init
sched_init(void) 2031 * We have to do a little magic to get the first 2032 * process right in SMP mode. 2034 int cpu
=hard_smp_processor_id(); 2037 init_task
.processor
=cpu
; 2039 for(nr
=0; nr
< PIDHASH_SZ
; nr
++) 2042 init_bh(TIMER_BH
, timer_bh
); 2043 init_bh(TQUEUE_BH
, tqueue_bh
); 2044 init_bh(IMMEDIATE_BH
, immediate_bh
); 2047 * The boot idle thread does lazy MMU switching as well: 2049 atomic_inc(&init_mm
.mm_count
);