From: Andrea Arcangeli <and...@suse.de>
Subject: patch oom 2.3.x
Date: 2000/01/08
Message-ID: <fa.iob4c8v.1b56k3f@ifi.uio.no>
X-Deja-AN: 570031124
Original-Date: Sat, 8 Jan 2000 17:17:09 +0100 (CET)
Sender: owner-linux-ker...@vger.rutgers.edu
Original-Message-ID: <Pine.LNX.4.21.0001081708430.6128-100000@alpha.random>
X-PGP-Key-URL: http://e-mind.com/~andrea/aa.asc
To: Linus Torvalds <torva...@transmeta.com>
X-Sender: and...@alpha.random
Content-Type: TEXT/PLAIN; charset=US-ASCII
X-Orcpt: rfc822;linux-kernel-outgoing-dig
Organization: Internet mailing list
MIME-Version: 1.0
X-GnuPG-Key-URL: http://e-mind.com/~andrea/aa.gnupg.asc
Newsgroups: fa.linux.kernel
X-Loop: majord...@vger.rutgers.edu

o	fixes the init-gets-sigsegv during oom.
o	avoids iopl'ed tasks to get SIGKILL and send them a SIGTERM so X
	won't screwup the console due oom in IA32.
o	makes the signal code to better react to oom.
o	updates a few places that doesn't know about the new handle_mm_fault
	interface yet (btw in ptrace David prefers to send the sigkill
	due oom to the traced `tsk` and not to the `current` tracer task,
	I left to him to send an incremental patch if he wants, actually I
	prefer the way on my patch because obviously safe).
o	removes the deprecated oom function.
o	avoid the swap algorithm to reassing stuff more than one time
	per-try.

diff -urN 2.3.36pre5/arch/alpha/kernel/signal.c 2.3.36pre5-oom/arch/alpha/kernel/signal.c
--- 2.3.36pre5/arch/alpha/kernel/signal.c	Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/kernel/signal.c	Mon Jan  3 19:00:05 2000
@@ -437,6 +437,8 @@
 		err |= __copy_to_user(frame->extramask, &set->sig[1], 
 				      sizeof(frame->extramask));
 	}
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
@@ -499,6 +501,8 @@
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, regs, sw,
 				set->sig[0], oldsp);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
diff -urN 2.3.36pre5/arch/alpha/mm/fault.c 2.3.36pre5-oom/arch/alpha/mm/fault.c
--- 2.3.36pre5/arch/alpha/mm/fault.c	Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/alpha/mm/fault.c	Mon Jan  3 19:00:05 2000
@@ -130,13 +130,13 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
+survive:
 	fault = handle_mm_fault(current, vma, address, cause > 0);
-	up(&mm->mmap_sem);
-
 	if (fault < 0)
 		goto out_of_memory;
 	if (fault == 0)
 		goto do_sigbus;
+	up(&mm->mmap_sem);
 
 	return;
 
@@ -177,13 +177,23 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
-	printk(KERN_ALERT "VM: killing process %s(%d)\n",
-	       current->comm, current->pid);
-	if (!user_mode(regs))
-		goto no_context;
-	do_exit(SIGKILL);
+	if (current->pid == 1)
+	{
+		current->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
+	up(&mm->mmap_sem);
+	if (user_mode(regs))
+	{
+		printk(KERN_ALERT "VM: killing process %s(%d)\n",
+		       current->comm, current->pid);
+		do_exit(SIGKILL);
+	}
+	goto no_context;
 
 do_sigbus:
+	up(&mm->mmap_sem);
 	/*
 	 * Send a sigbus, regardless of whether we were in kernel
 	 * or user mode.
diff -urN 2.3.36pre5/arch/i386/kernel/signal.c 2.3.36pre5-oom/arch/i386/kernel/signal.c
--- 2.3.36pre5/arch/i386/kernel/signal.c	Wed Nov 24 18:22:03 1999
+++ 2.3.36pre5-oom/arch/i386/kernel/signal.c	Mon Jan  3 19:00:05 2000
@@ -419,13 +419,19 @@
 		           ? current->exec_domain->signal_invmap[sig]
 		           : sig),
 		          &frame->sig);
+	if (err)
+		goto give_sigsegv;
 
 	err |= setup_sigcontext(&frame->sc, &frame->fpstate, regs, set->sig[0]);
+	if (err)
+		goto give_sigsegv;
 
 	if (_NSIG_WORDS > 1) {
 		err |= __copy_to_user(frame->extramask, &set->sig[1],
 				      sizeof(frame->extramask));
 	}
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
@@ -486,6 +492,8 @@
 	err |= __put_user(&frame->info, &frame->pinfo);
 	err |= __put_user(&frame->uc, &frame->puc);
 	err |= __copy_to_user(&frame->info, info, sizeof(*info));
+	if (err)
+		goto give_sigsegv;
 
 	/* Create the ucontext.  */
 	err |= __put_user(0, &frame->uc.uc_flags);
@@ -497,6 +505,8 @@
 	err |= setup_sigcontext(&frame->uc.uc_mcontext, &frame->fpstate,
 			        regs, set->sig[0]);
 	err |= __copy_to_user(&frame->uc.uc_sigmask, set, sizeof(*set));
+	if (err)
+		goto give_sigsegv;
 
 	/* Set up to return from userspace.  If provided, use a stub
 	   already in userspace.  */
diff -urN 2.3.36pre5/arch/i386/mm/fault.c 2.3.36pre5-oom/arch/i386/mm/fault.c
--- 2.3.36pre5/arch/i386/mm/fault.c	Wed Nov 24 18:22:04 1999
+++ 2.3.36pre5-oom/arch/i386/mm/fault.c	Mon Jan  3 19:00:05 2000
@@ -31,6 +31,7 @@
 {
 	struct vm_area_struct * vma;
 	unsigned long start = (unsigned long) addr;
+	int fault;
 
 	if (!size)
 		return 1;
@@ -50,8 +51,12 @@
 	start &= PAGE_MASK;
 
 	for (;;) {
-		if (handle_mm_fault(current, vma, start, 1) <= 0)
-			goto bad_area;
+survive:
+		fault =  handle_mm_fault(current, vma, start, 1);
+		if (!fault)
+			goto do_sigbus;
+		if (fault < 0)
+			goto out_of_memory;
 		if (!size)
 			break;
 		size--;
@@ -74,6 +79,19 @@
 
 bad_area:
 	return 0;
+
+do_sigbus:
+	force_sig(SIGBUS, current);
+	goto bad_area;
+
+out_of_memory:
+	if (current->pid == 1)
+	{
+		current->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
+	goto bad_area;
 }
 
 static inline void handle_wp_test (void)
@@ -188,6 +206,7 @@
 	 * make sure we exit gracefully rather than endlessly redo
 	 * the fault.
 	 */
+survive:
 	{
 		int fault = handle_mm_fault(tsk, vma, address, write);
 		if (fault < 0)
@@ -280,10 +299,33 @@
  * us unable to handle the page fault gracefully.
  */
 out_of_memory:
+	if (tsk->pid == 1)
+	{
+		tsk->policy |= SCHED_YIELD;
+		schedule();
+		goto survive;
+	}
 	up(&mm->mmap_sem);
-	printk("VM: killing process %s\n", tsk->comm);
 	if (error_code & 4)
-		do_exit(SIGKILL);
+	{
+		if (!((regs->eflags >> 12) & 3))
+		{
+			printk(KERN_ALERT "VM: killing process %s\n",
+			       tsk->comm);
+			do_exit(SIGKILL);
+		}
+		else
+		{
+			/*
+			 * The task is running with privilegies and so we
+			 * trust it and we give it a chance to die gracefully.
+			 */
+			printk(KERN_ALERT "VM: terminating process %s\n",
+			       tsk->comm);
+			force_sig(SIGTERM, current);
+			return;
+		}
+	}
 	goto no_context;
 
 do_sigbus:
diff -urN 2.3.36pre5/fs/exec.c 2.3.36pre5-oom/fs/exec.c
--- 2.3.36pre5/fs/exec.c	Mon Jan  3 18:56:24 2000
+++ 2.3.36pre5-oom/fs/exec.c	Mon Jan  3 19:02:11 2000
@@ -277,13 +277,13 @@
 	pmd = pmd_alloc(pgd, address);
 	if (!pmd) {
 		__free_page(page);
-		oom(tsk);
+		force_sig(SIGKILL, tsk);
 		return;
 	}
 	pte = pte_alloc(pmd, address);
 	if (!pte) {
 		__free_page(page);
-		oom(tsk);
+		force_sig(SIGKILL, tsk);
 		return;
 	}
 	if (!pte_none(*pte)) {
diff -urN 2.3.36pre5/include/linux/mm.h 2.3.36pre5-oom/include/linux/mm.h
--- 2.3.36pre5/include/linux/mm.h	Mon Jan  3 18:56:25 2000
+++ 2.3.36pre5-oom/include/linux/mm.h	Mon Jan  3 19:00:05 2000
@@ -400,7 +400,6 @@
 		unsigned int * zones_size, unsigned long zone_start_paddr);
 extern void mem_init(void);
 extern void show_mem(void);
-extern void oom(struct task_struct * tsk);
 extern void si_meminfo(struct sysinfo * val);
 extern void swapin_readahead(swp_entry_t);
 
diff -urN 2.3.36pre5/kernel/ptrace.c 2.3.36pre5-oom/kernel/ptrace.c
--- 2.3.36pre5/kernel/ptrace.c	Sun Nov 21 03:20:20 1999
+++ 2.3.36pre5-oom/kernel/ptrace.c	Mon Jan  3 19:01:02 2000
@@ -26,6 +26,7 @@
 	unsigned long mapnr;
 	unsigned long maddr; 
 	struct page *page;
+	int fault;
 
 repeat:
 	pgdir = pgd_offset(vma->vm_mm, addr);
@@ -64,8 +65,12 @@
 
 fault_in_page:
 	/* -1: out of memory. 0 - unmapped page */
-	if (handle_mm_fault(tsk, vma, addr, write) > 0)
+	fault = handle_mm_fault(tsk, vma, addr, write);
+	if (fault > 0)
 		goto repeat;
+	if (fault < 0)
+		/* the out of memory is been triggered by the current task. */
+		force_sig(SIGKILL, current);
 	return 0;
 
 bad_pgd:
diff -urN 2.3.36pre5/mm/memory.c 2.3.36pre5-oom/mm/memory.c
--- 2.3.36pre5/mm/memory.c	Mon Jan  3 18:56:25 2000
+++ 2.3.36pre5-oom/mm/memory.c	Mon Jan  3 19:00:05 2000
@@ -70,16 +70,6 @@
 mem_map_t * mem_map = NULL;
 
 /*
- * oom() prints a message (so that the user knows why the process died),
- * and gives the process an untrappable SIGKILL.
- */
-void oom(struct task_struct * task)
-{
-	printk("\nOut of memory for %s.\n", task->comm);
-	force_sig(SIGKILL, task);
-}
-
-/*
  * Note: this doesn't free the actual pages themselves. That
  * has been handled earlier when unmapping all the memory regions.
  */
diff -urN 2.3.36pre5/mm/vmscan.c 2.3.36pre5-oom/mm/vmscan.c
--- 2.3.36pre5/mm/vmscan.c	Fri Dec 31 16:33:05 1999
+++ 2.3.36pre5-oom/mm/vmscan.c	Mon Jan  3 19:00:05 2000
@@ -328,6 +328,7 @@
 	struct task_struct * p;
 	int counter;
 	int __ret = 0;
+	int assign = 0;
 
 	lock_kernel();
 	/* 
@@ -347,12 +348,9 @@
 	counter = nr_threads / (priority+1);
 	if (counter < 1)
 		counter = 1;
-	if (counter > nr_threads)
-		counter = nr_threads;
 
 	for (; counter >= 0; counter--) {
-		int assign = 0;
-		int max_cnt = 0;
+		unsigned long max_cnt = 0;
 		struct mm_struct *best = NULL;
 		int pid = 0;
 	select:
@@ -365,7 +363,7 @@
 	 		if (mm->rss <= 0)
 				continue;
 			/* Refresh swap_cnt? */
-			if (assign)
+			if (assign == 1)
 				mm->swap_cnt = mm->rss;
 			if (mm->swap_cnt > max_cnt) {
 				max_cnt = mm->swap_cnt;
@@ -374,6 +372,8 @@
 			}
 		}
 		read_unlock(&tasklist_lock);
+		if (assign == 1)
+			assign = 2;
 		if (!best) {
 			if (!assign) {
 				assign = 1;

Andrea


-
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majord...@vger.rutgers.edu
Please read the FAQ at http://www.tux.org/lkml/