--- sched.c.orig Tue Mar 27 17:30:58 2001 +++ sched.c Tue Apr 3 16:45:21 2001 @@ -34,6 +34,7 @@ extern void timer_bh(void); extern void tqueue_bh(void); extern void immediate_bh(void); +static inline void hop_queues(struct task_struct *, int); /* * scheduler variables @@ -90,7 +91,8 @@ spinlock_t runqueue_lock __cacheline_aligned = SPIN_LOCK_UNLOCKED; /* inner */ rwlock_t tasklist_lock __cacheline_aligned = RW_LOCK_UNLOCKED; /* outer */ -static LIST_HEAD(runqueue_head); +static struct list_head runqueue_head[NR_CPUS] = { LIST_HEAD_INIT((runqueue_head[0]))}; +static LIST_HEAD(rt_queue_head); /* * We align per-CPU scheduling data on cacheline boundaries, @@ -100,12 +102,15 @@ struct schedule_data { struct task_struct * curr; cycles_t last_schedule; + struct list_head runqueue_head; } schedule_data; char __pad [SMP_CACHE_BYTES]; -} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0}}}; +} aligned_data [NR_CPUS] __cacheline_aligned = { {{&init_task,0, + LIST_HEAD_INIT((aligned_data[0].schedule_data.runqueue_head))}}}; #define cpu_curr(cpu) aligned_data[(cpu)].schedule_data.curr #define last_schedule(cpu) aligned_data[(cpu)].schedule_data.last_schedule +#define cpu_rq(cpu) (aligned_data[(cpu)].schedule_data.runqueue_head) struct kernel_stat kstat; @@ -199,6 +204,33 @@ return goodness(p, cpu, prev->active_mm) - goodness(prev, cpu, prev->active_mm); } + +static inline int other_goodness(struct task_struct * p, int this_cpu, struct mm_struct *this_mm) +{ + int weight; + + /* + * select the current process after every other + * runnable process, but before the idle thread. + * Also, dont trigger a counter recalculation. + * + * Give the process a first-approximation goodness value + * according to the number of clock-ticks it has left. + * + * Don't do any other calculations if the time slice is + * over.. + */ + weight = p->counter; + if (!weight) + goto out2; + + /* .. and a slight advantage to the current MM */ + if (p->mm == this_mm || !p->mm) + weight += 1; + weight += 20 - p->nice; +out2: + return weight; +} /* * This is ugly, but reschedule_idle() is very timing-critical. * We are called with the runqueue spinlock held and we must @@ -266,6 +298,10 @@ } else { if (oldest_idle == -1ULL) { int prio = preemption_goodness(tsk, p, cpu); + /* + * this will never be true for < 400 HZ non + * realtime. optimize this? SAR + */ if (prio > max_prio) { max_prio = prio; @@ -277,6 +313,10 @@ tsk = target_tsk; if (tsk) { if (oldest_idle != -1ULL) { + /* push onto best queue */ + if (p->policy == SCHED_OTHER){ + hop_queues(p, tsk->processor); + } best_cpu = tsk->processor; goto send_now_idle; } @@ -306,20 +346,28 @@ */ static inline void add_to_runqueue(struct task_struct * p) { - list_add(&p->run_list, &runqueue_head); + if (p->policy == SCHED_OTHER){ + list_add(&p->run_list, &cpu_rq(p->processor)); + } else list_add(&p->run_list, &rt_queue_head); nr_running++; } -static inline void move_last_runqueue(struct task_struct * p) +static inline void move_last_rt_queue(struct task_struct * p) +{ + list_del(&p->run_list); + list_add_tail(&p->run_list, &rt_queue_head); +} + +static inline void move_first_rt_queue(struct task_struct * p) { list_del(&p->run_list); - list_add_tail(&p->run_list, &runqueue_head); + list_add(&p->run_list, &rt_queue_head); } -static inline void move_first_runqueue(struct task_struct * p) +static inline void hop_queues(struct task_struct * p, int this_cpu) { list_del(&p->run_list); - list_add(&p->run_list, &runqueue_head); + list_add(&p->run_list, &cpu_rq(this_cpu)); } /* @@ -343,6 +391,7 @@ if (task_on_runqueue(p)) goto out; add_to_runqueue(p); + /* LATER : make an effort to choose rq before add ? */ if (!synchronous || !(p->cpus_allowed & (1 << smp_processor_id()))) reschedule_idle(p); success = 1; @@ -531,9 +580,9 @@ asmlinkage void schedule(void) { struct schedule_data * sched_data; - struct task_struct *prev, *next, *p; + struct task_struct *prev, *next, *p = NULL; /* LATER fix this */ struct list_head *tmp; - int this_cpu, c; + int this_cpu, c, i; spin_lock_prefetch(&runqueue_lock); @@ -592,18 +641,63 @@ goto still_running; still_running_back: - list_for_each(tmp, &runqueue_head) { + /* + * we unrolled the original combined runqueue in to two separate + * checks, first the real time and then then policy OTHER for own + * cpu, and finally theft from another cpu. + */ + list_for_each(tmp, &rt_queue_head) { p = list_entry(tmp, struct task_struct, run_list); - if (can_schedule(p, this_cpu)) { - int weight = goodness(p, this_cpu, prev->active_mm); + if (can_schedule(p, this_cpu) && !(p->policy & SCHED_YIELD)) { + int weight = 1000 + p->rt_priority; if (weight > c) c = weight, next = p; } } + if (c >= 1000) + goto choice_made; + + list_for_each(tmp, &cpu_rq(this_cpu)) { + p = list_entry(tmp, struct task_struct, run_list); + if (can_schedule(p, this_cpu) && !(p->policy & SCHED_YIELD)) { + int weight = other_goodness(p, this_cpu, prev->active_mm); + if (weight > c) + c = weight, next = p; + } + } + +#ifdef CONFIG_SMP + if (c > 0) + goto choice_made; + + /* + * try to steal from another CPU's queue. since we don't have to + * worry about real time or CPU preference, pick anything available. + * this is an area for detailed policy. + */ + for (i = 0; i < smp_num_cpus; i++) { + int cpu = cpu_logical_map(i); + if (cpu == this_cpu) + continue; + + list_for_each(tmp, &cpu_rq(cpu)) { + p = list_entry(tmp, struct task_struct, run_list); + if (can_schedule(p, this_cpu) && !(p->policy & SCHED_YIELD)) { + int weight = other_goodness(p, cpu, prev->active_mm); + if (weight > c) + c = weight, next = p; + } + } + } + + if (c > 0) + hop_queues(next, this_cpu); /* pull onto mine */ +#endif /* Do we need to re-calculate counters? */ if (!c) goto recalculate; +choice_made: /* * from this point on nothing can prevent us from * switching to the next task, save this fact in @@ -705,7 +799,7 @@ move_rr_last: if (!prev->counter) { prev->counter = NICE_TO_TICKS(prev->nice); - move_last_runqueue(prev); + move_last_rt_queue(prev); } goto move_rr_back; @@ -938,8 +1032,14 @@ retval = 0; p->policy = policy; p->rt_priority = lp.sched_priority; - if (task_on_runqueue(p)) - move_first_runqueue(p); + if (task_on_runqueue(p)){ + if (policy != SCHED_OTHER) + move_first_rt_queue(p); + else { + /* push onto appropriate non-rt queue */ + hop_queues(p, p->processor); + } + } current->need_resched = 1; @@ -1251,9 +1351,12 @@ * process right in SMP mode. */ int cpu = smp_processor_id(); - int nr; + int nr, i; init_task.processor = cpu; + for (i=1; i<NR_CPUS; i++){ + INIT_LIST_HEAD(&cpu_rq(i)); + } for(nr = 0; nr < PIDHASH_SZ; nr++) pidhash[nr] = NULL;