Patch

 fs/exec.c                        |    2 
 include/asm-alpha/rmap.h         |    7 
 include/asm-arm/proc-armv/rmap.h |   49 ++++
 include/asm-arm/rmap.h           |    6 
 include/asm-cris/rmap.h          |    7 
 include/asm-generic/rmap.h       |   52 ++++
 include/asm-i386/rmap.h          |    7 
 include/asm-ia64/rmap.h          |    7 
 include/asm-m68k/rmap.h          |    7 
 include/asm-mips/rmap.h          |    7 
 include/asm-mips64/rmap.h        |    7 
 include/asm-parisc/rmap.h        |    7 
 include/asm-ppc/rmap.h           |    9 
 include/asm-s390/rmap.h          |    7 
 include/asm-s390x/rmap.h         |    7 
 include/asm-sh/rmap.h            |    7 
 include/asm-sparc/rmap.h         |    7 
 include/asm-sparc64/rmap.h       |    7 
 include/linux/mm.h               |    5 
 include/linux/page-flags.h       |   28 ++
 include/linux/swap.h             |   14 +
 kernel/fork.c                    |    4 
 mm/Makefile                      |    2 
 mm/filemap.c                     |    9 
 mm/memory.c                      |   27 +-
 mm/mremap.c                      |    8 
 mm/page_alloc.c                  |    1 
 mm/rmap.c                        |  394 ++++++++++++++++++++++++++++++++++
 mm/swap_state.c                  |   63 +++++
 mm/swapfile.c                    |    1 
 mm/vmscan.c                      |  447 +++++++--------------------------------
 31 files changed, 832 insertions(+), 380 deletions(-)

--- 2.5.26/fs/exec.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/fs/exec.c	Tue Jul 16 21:46:26 2002
@@ -36,6 +36,7 @@
 #include <linux/spinlock.h>
 #include <linux/personality.h>
 #include <linux/binfmts.h>
+#include <linux/swap.h>
 #define __NO_VERSION__
 #include <linux/module.h>
 #include <linux/namei.h>
@@ -283,6 +284,7 @@ void put_dirty_page(struct task_struct *
 	flush_dcache_page(page);
 	flush_page_to_ram(page);
 	set_pte(pte, pte_mkdirty(pte_mkwrite(mk_pte(page, PAGE_COPY))));
+	page_add_rmap(page, pte);
 	pte_unmap(pte);
 	tsk->mm->rss++;
 	spin_unlock(&tsk->mm->page_table_lock);
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-alpha/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _ALPHA_RMAP_H
+#define _ALPHA_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-arm/proc-armv/rmap.h	Tue Jul 16 21:59:40 2002
@@ -0,0 +1,49 @@
+#ifndef _ARMV_RMAP_H
+#define _ARMV_RMAP_H
+/*
+ * linux/include/asm-arm/proc-armv/rmap.h
+ *
+ * Architecture dependant parts of the reverse mapping code,
+ *
+ * ARM is different since hardware page tables are smaller than
+ * the page size and Linux uses a "duplicate" one with extra info.
+ * For rmap this means that the first 2 kB of a page are the hardware
+ * page tables and the last 2 kB are the software page tables.
+ */
+
+static inline void pgtable_add_rmap(pte_t * ptep, struct mm_struct * mm, unsigned long address)
+{
+	struct page * page = virt_to_page(ptep);
+
+	page->mm = mm;
+	page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+}
+
+static inline void pgtable_remove_rmap(pte_t * ptep)
+{
+	struct page * page = virt_to_page(ptep);
+
+	page->mm = NULL;
+	page->index = 0;
+}
+
+static inline struct mm_struct * ptep_to_mm(pte_t * ptep)
+{
+	struct page * page = virt_to_page(ptep);
+
+	return page->mm;
+}
+
+/* The page table takes half of the page */
+#define PTE_MASK  ((PAGE_SIZE / 2) - 1)
+
+static inline unsigned long ptep_to_address(pte_t * ptep)
+{
+	struct page * page = virt_to_page(ptep);
+	unsigned long low_bits;
+
+	low_bits = ((unsigned long)ptep & PTE_MASK) * PTRS_PER_PTE;
+	return page->index + low_bits;
+}
+
+#endif /* _ARMV_RMAP_H */
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-arm/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,6 @@
+#ifndef _ARM_RMAP_H
+#define _ARM_RMAP_H
+
+#include <asm/proc/rmap.h>
+
+#endif /* _ARM_RMAP_H */
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-cris/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _CRIS_RMAP_H
+#define _CRIS_RMAP_H
+
+/* nothing to see, move along :) */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-generic/rmap.h	Tue Jul 16 21:59:40 2002
@@ -0,0 +1,52 @@
+#ifndef _GENERIC_RMAP_H
+#define _GENERIC_RMAP_H
+/*
+ * linux/include/asm-generic/rmap.h
+ *
+ * Architecture dependant parts of the reverse mapping code,
+ * this version should work for most architectures with a
+ * 'normal' page table layout.
+ *
+ * We use the struct page of the page table page to find out
+ * the process and full address of a page table entry:
+ * - page->mapping points to the process' mm_struct
+ * - page->index has the high bits of the address
+ * - the lower bits of the address are calculated from the
+ *   offset of the page table entry within the page table page
+ */
+#include <linux/mm.h>
+
+static inline void pgtable_add_rmap(struct page * page, struct mm_struct * mm, unsigned long address)
+{
+#ifdef BROKEN_PPC_PTE_ALLOC_ONE
+	/* OK, so PPC calls pte_alloc() before mem_map[] is setup ... ;( */
+	extern int mem_init_done;
+
+	if (!mem_init_done)
+		return;
+#endif
+	page->mapping = (void *)mm;
+	page->index = address & ~((PTRS_PER_PTE * PAGE_SIZE) - 1);
+}
+
+static inline void pgtable_remove_rmap(struct page * page)
+{
+	page->mapping = NULL;
+	page->index = 0;
+}
+
+static inline struct mm_struct * ptep_to_mm(pte_t * ptep)
+{
+	struct page * page = virt_to_page(ptep);
+	return (struct mm_struct *) page->mapping;
+}
+
+static inline unsigned long ptep_to_address(pte_t * ptep)
+{
+	struct page * page = virt_to_page(ptep);
+	unsigned long low_bits;
+	low_bits = ((unsigned long)ptep & ~PAGE_MASK) * PTRS_PER_PTE;
+	return page->index + low_bits;
+}
+
+#endif /* _GENERIC_RMAP_H */
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-i386/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _I386_RMAP_H
+#define _I386_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-ia64/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _IA64_RMAP_H
+#define _IA64_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-m68k/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _M68K_RMAP_H
+#define _M68K_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-mips64/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _MIPS64_RMAP_H
+#define _MIPS64_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-mips/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _MIPS_RMAP_H
+#define _MIPS_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-parisc/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _PARISC_RMAP_H
+#define _PARISC_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-ppc/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,9 @@
+#ifndef _PPC_RMAP_H
+#define _PPC_RMAP_H
+
+/* PPC calls pte_alloc() before mem_map[] is setup ... */
+#define BROKEN_PPC_PTE_ALLOC_ONE
+
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-s390/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _S390_RMAP_H
+#define _S390_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-s390x/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _S390X_RMAP_H
+#define _S390X_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-sh/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _SH_RMAP_H
+#define _SH_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-sparc64/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _SPARC64_RMAP_H
+#define _SPARC64_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/include/asm-sparc/rmap.h	Tue Jul 16 21:46:26 2002
@@ -0,0 +1,7 @@
+#ifndef _SPARC_RMAP_H
+#define _SPARC_RMAP_H
+
+/* nothing to see, move along */
+#include <asm-generic/rmap.h>
+
+#endif
--- 2.5.26/include/linux/mm.h~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/include/linux/mm.h	Tue Jul 16 21:59:41 2002
@@ -130,6 +130,9 @@ struct vm_operations_struct {
 	struct page * (*nopage)(struct vm_area_struct * area, unsigned long address, int unused);
 };
 
+/* forward declaration; pte_chain is meant to be internal to rmap.c */
+struct pte_chain;
+
 /*
  * Each physical page in the system has a struct page associated with
  * it to keep track of whatever it is we are using the page for at the
@@ -154,6 +157,8 @@ struct page {
 					   updated asynchronously */
 	struct list_head lru;		/* Pageout list, eg. active_list;
 					   protected by pagemap_lru_lock !! */
+	struct pte_chain * pte_chain;	/* Reverse pte mapping pointer.
+					 * protected by PG_chainlock */
 	unsigned long private;		/* mapping-private opaque data */
 
 	/*
--- 2.5.26/include/linux/page-flags.h~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/include/linux/page-flags.h	Tue Jul 16 21:59:41 2002
@@ -47,7 +47,7 @@
  * locked- and dirty-page accounting.  The top eight bits of page->flags are
  * used for page->zone, so putting flag bits there doesn't work.
  */
-#define PG_locked	 0	/* Page is locked. Don't touch. */
+#define PG_locked	 	 0	/* Page is locked. Don't touch. */
 #define PG_error		 1
 #define PG_referenced		 2
 #define PG_uptodate		 3
@@ -65,6 +65,7 @@
 #define PG_private		12	/* Has something at ->private */
 #define PG_writeback		13	/* Page is under writeback */
 #define PG_nosave		15	/* Used for system suspend/resume */
+#define PG_chainlock		16	/* lock bit for ->pte_chain */
 
 /*
  * Global page accounting.  One instance per CPU.
@@ -217,6 +218,31 @@ extern void get_page_state(struct page_s
 #define TestClearPageNosave(page)	test_and_clear_bit(PG_nosave, &(page)->flags)
 
 /*
+ * inlines for acquisition and release of PG_chainlock
+ */
+static inline void pte_chain_lock(struct page *page)
+{
+	/*
+	 * Assuming the lock is uncontended, this never enters
+	 * the body of the outer loop. If it is contended, then
+	 * within the inner loop a non-atomic test is used to
+	 * busywait with less bus contention for a good time to
+	 * attempt to acquire the lock bit.
+	 */
+	preempt_disable();
+	while (test_and_set_bit(PG_chainlock, &page->flags)) {
+		while (test_bit(PG_chainlock, &page->flags))
+			cpu_relax();
+	}
+}
+
+static inline void pte_chain_unlock(struct page *page)
+{
+	clear_bit(PG_chainlock, &page->flags);
+	preempt_enable();
+}
+
+/*
  * The PageSwapCache predicate doesn't use a PG_flag at this time,
  * but it may again do so one day.
  */
--- 2.5.26/include/linux/swap.h~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/include/linux/swap.h	Tue Jul 16 21:46:26 2002
@@ -142,6 +142,19 @@ struct sysinfo;
 struct address_space;
 struct zone_t;
 
+/* linux/mm/rmap.c */
+extern int FASTCALL(page_referenced(struct page *));
+extern void FASTCALL(page_add_rmap(struct page *, pte_t *));
+extern void FASTCALL(page_remove_rmap(struct page *, pte_t *));
+extern int FASTCALL(try_to_unmap(struct page *));
+extern int FASTCALL(page_over_rsslimit(struct page *));
+
+/* return values of try_to_unmap */
+#define	SWAP_SUCCESS	0
+#define	SWAP_AGAIN	1
+#define	SWAP_FAIL	2
+#define	SWAP_ERROR	3
+
 /* linux/mm/swap.c */
 extern void FASTCALL(lru_cache_add(struct page *));
 extern void FASTCALL(__lru_cache_del(struct page *));
@@ -168,6 +181,7 @@ int rw_swap_page_sync(int rw, swp_entry_
 extern void show_swap_cache_info(void);
 #endif
 extern int add_to_swap_cache(struct page *, swp_entry_t);
+extern int add_to_swap(struct page *);
 extern void __delete_from_swap_cache(struct page *page);
 extern void delete_from_swap_cache(struct page *page);
 extern int move_to_swap_cache(struct page *page, swp_entry_t entry);
--- 2.5.26/kernel/fork.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/kernel/fork.c	Tue Jul 16 21:46:26 2002
@@ -189,7 +189,6 @@ static inline int dup_mmap(struct mm_str
 	mm->map_count = 0;
 	mm->rss = 0;
 	mm->cpu_vm_mask = 0;
-	mm->swap_address = 0;
 	pprev = &mm->mmap;
 
 	/*
@@ -308,9 +307,6 @@ inline void __mmdrop(struct mm_struct *m
 void mmput(struct mm_struct *mm)
 {
 	if (atomic_dec_and_lock(&mm->mm_users, &mmlist_lock)) {
-		extern struct mm_struct *swap_mm;
-		if (swap_mm == mm)
-			swap_mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
 		list_del(&mm->mmlist);
 		mmlist_nr--;
 		spin_unlock(&mmlist_lock);
--- 2.5.26/mm/filemap.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/filemap.c	Tue Jul 16 21:59:40 2002
@@ -176,14 +176,13 @@ static inline void truncate_partial_page
  */
 static void truncate_complete_page(struct page *page)
 {
-	/* Leave it on the LRU if it gets converted into anonymous buffers */
-	if (!PagePrivate(page) || do_invalidatepage(page, 0)) {
-		lru_cache_del(page);
-	} else {
+	/* Drop fs-specific data so the page might become freeable. */
+	if (PagePrivate(page) && !do_invalidatepage(page, 0)) {
 		if (current->flags & PF_INVALIDATE)
 			printk("%s: buffer heads were leaked\n",
 				current->comm);
 	}
+
 	ClearPageDirty(page);
 	ClearPageUptodate(page);
 	remove_inode_page(page);
@@ -660,7 +659,7 @@ EXPORT_SYMBOL(wait_on_page_bit);
  * But that's OK - sleepers in wait_on_page_writeback() just go back to sleep.
  *
  * The first mb is necessary to safely close the critical section opened by the
- * TryLockPage(), the second mb is necessary to enforce ordering between
+ * TestSetPageLocked(), the second mb is necessary to enforce ordering between
  * the clear_bit and the read of the waitqueue (to avoid SMP races with a
  * parallel wait_on_page_locked()).
  */
--- 2.5.26/mm/Makefile~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/Makefile	Tue Jul 16 21:46:26 2002
@@ -16,6 +16,6 @@ obj-y	 := memory.o mmap.o filemap.o mpro
 	    vmalloc.o slab.o bootmem.o swap.o vmscan.o page_io.o \
 	    page_alloc.o swap_state.o swapfile.o numa.o oom_kill.o \
 	    shmem.o highmem.o mempool.o msync.o mincore.o readahead.o \
-	    pdflush.o page-writeback.o
+	    pdflush.o page-writeback.o rmap.o
 
 include $(TOPDIR)/Rules.make
--- 2.5.26/mm/memory.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/memory.c	Tue Jul 16 21:59:40 2002
@@ -46,6 +46,7 @@
 #include <linux/pagemap.h>
 
 #include <asm/pgalloc.h>
+#include <asm/rmap.h>
 #include <asm/uaccess.h>
 #include <asm/tlb.h>
 #include <asm/tlbflush.h>
@@ -79,7 +80,7 @@ struct page *mem_map;
  */
 static inline void free_one_pmd(mmu_gather_t *tlb, pmd_t * dir)
 {
-	struct page *pte;
+	struct page *page;
 
 	if (pmd_none(*dir))
 		return;
@@ -88,9 +89,10 @@ static inline void free_one_pmd(mmu_gath
 		pmd_clear(dir);
 		return;
 	}
-	pte = pmd_page(*dir);
+	page = pmd_page(*dir);
 	pmd_clear(dir);
-	pte_free_tlb(tlb, pte);
+	pgtable_remove_rmap(page);
+	pte_free_tlb(tlb, page);
 }
 
 static inline void free_one_pgd(mmu_gather_t *tlb, pgd_t * dir)
@@ -150,6 +152,7 @@ pte_t * pte_alloc_map(struct mm_struct *
 			pte_free(new);
 			goto out;
 		}
+		pgtable_add_rmap(new, mm, address);
 		pmd_populate(mm, pmd, new);
 	}
 out:
@@ -177,6 +180,7 @@ pte_t * pte_alloc_kernel(struct mm_struc
 			pte_free_kernel(new);
 			goto out;
 		}
+		pgtable_add_rmap(virt_to_page(new), mm, address);
 		pmd_populate_kernel(mm, pmd, new);
 	}
 out:
@@ -260,10 +264,13 @@ skip_copy_pte_range:		address = (address
 
 				if (pte_none(pte))
 					goto cont_copy_pte_range_noset;
+				/* pte contains position in swap, so copy. */
 				if (!pte_present(pte)) {
 					swap_duplicate(pte_to_swp_entry(pte));
-					goto cont_copy_pte_range;
+					set_pte(dst_pte, pte);
+					goto cont_copy_pte_range_noset;
 				}
+				ptepage = pte_page(pte);
 				pfn = pte_pfn(pte);
 				if (!pfn_valid(pfn))
 					goto cont_copy_pte_range;
@@ -272,7 +279,7 @@ skip_copy_pte_range:		address = (address
 					goto cont_copy_pte_range;
 
 				/* If it's a COW mapping, write protect it both in the parent and the child */
-				if (cow && pte_write(pte)) {
+				if (cow) {
 					ptep_set_wrprotect(src_pte);
 					pte = *src_pte;
 				}
@@ -285,6 +292,7 @@ skip_copy_pte_range:		address = (address
 				dst->rss++;
 
 cont_copy_pte_range:		set_pte(dst_pte, pte);
+				page_add_rmap(ptepage, dst_pte);
 cont_copy_pte_range_noset:	address += PAGE_SIZE;
 				if (address >= end) {
 					pte_unmap_nested(src_pte);
@@ -342,6 +350,7 @@ static void zap_pte_range(mmu_gather_t *
 					if (pte_dirty(pte))
 						set_page_dirty(page);
 					tlb->freed++;
+					page_remove_rmap(page, ptep);
 					tlb_remove_page(tlb, page);
 				}
 			}
@@ -992,7 +1001,9 @@ static int do_wp_page(struct mm_struct *
 	if (pte_same(*page_table, pte)) {
 		if (PageReserved(old_page))
 			++mm->rss;
+		page_remove_rmap(old_page, page_table);
 		break_cow(vma, new_page, address, page_table);
+		page_add_rmap(new_page, page_table);
 		lru_cache_add(new_page);
 
 		/* Free the old page.. */
@@ -1199,6 +1210,7 @@ static int do_swap_page(struct mm_struct
 	flush_page_to_ram(page);
 	flush_icache_page(vma, page);
 	set_pte(page_table, pte);
+	page_add_rmap(page, page_table);
 
 	/* No need to invalidate - it was non-present before */
 	update_mmu_cache(vma, address, pte);
@@ -1215,14 +1227,13 @@ static int do_swap_page(struct mm_struct
 static int do_anonymous_page(struct mm_struct * mm, struct vm_area_struct * vma, pte_t *page_table, pmd_t *pmd, int write_access, unsigned long addr)
 {
 	pte_t entry;
+	struct page * page = ZERO_PAGE(addr);
 
 	/* Read-only mapping of ZERO_PAGE. */
 	entry = pte_wrprotect(mk_pte(ZERO_PAGE(addr), vma->vm_page_prot));
 
 	/* ..except if it's a write access */
 	if (write_access) {
-		struct page *page;
-
 		/* Allocate our own private page. */
 		pte_unmap(page_table);
 		spin_unlock(&mm->page_table_lock);
@@ -1248,6 +1259,7 @@ static int do_anonymous_page(struct mm_s
 	}
 
 	set_pte(page_table, entry);
+	page_add_rmap(page, page_table); /* ignores ZERO_PAGE */
 	pte_unmap(page_table);
 
 	/* No need to invalidate - it was non-present before */
@@ -1327,6 +1339,7 @@ static int do_no_page(struct mm_struct *
 		if (write_access)
 			entry = pte_mkwrite(pte_mkdirty(entry));
 		set_pte(page_table, entry);
+		page_add_rmap(new_page, page_table);
 		pte_unmap(page_table);
 	} else {
 		/* One of our sibling threads was faster, back out. */
--- 2.5.26/mm/mremap.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/mremap.c	Tue Jul 16 21:46:26 2002
@@ -68,8 +68,14 @@ static inline int copy_one_pte(struct mm
 {
 	int error = 0;
 	pte_t pte;
+	struct page * page = NULL;
+
+	if (pte_present(*src))
+		page = pte_page(*src);
 
 	if (!pte_none(*src)) {
+		if (page)
+			page_remove_rmap(page, src);
 		pte = ptep_get_and_clear(src);
 		if (!dst) {
 			/* No dest?  We must put it back. */
@@ -77,6 +83,8 @@ static inline int copy_one_pte(struct mm
 			error++;
 		}
 		set_pte(dst, pte);
+		if (page)
+			page_add_rmap(page, dst);
 	}
 	return error;
 }
--- 2.5.26/mm/page_alloc.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/page_alloc.c	Tue Jul 16 21:59:41 2002
@@ -92,6 +92,7 @@ static void __free_pages_ok (struct page
 	BUG_ON(PageLRU(page));
 	BUG_ON(PageActive(page));
 	BUG_ON(PageWriteback(page));
+	BUG_ON(page->pte_chain != NULL);
 	if (PageDirty(page))
 		ClearPageDirty(page);
 	BUG_ON(page_count(page) != 0);
--- /dev/null	Thu Aug 30 13:30:55 2001
+++ 2.5.26-akpm/mm/rmap.c	Tue Jul 16 21:59:41 2002
@@ -0,0 +1,394 @@
+/*
+ * mm/rmap.c - physical to virtual reverse mappings
+ *
+ * Copyright 2001, Rik van Riel <r...@conectiva.com.br>
+ * Released under the General Public License (GPL).
+ *
+ *
+ * Simple, low overhead pte-based reverse mapping scheme.
+ * This is kept modular because we may want to experiment
+ * with object-based reverse mapping schemes. Please try
+ * to keep this thing as modular as possible.
+ */
+
+/*
+ * Locking:
+ * - the page->pte_chain is protected by the PG_chainlock bit,
+ *   which nests within the pagemap_lru_lock, then the
+ *   mm->page_table_lock, and then the page lock.
+ * - because swapout locking is opposite to the locking order
+ *   in the page fault path, the swapout path uses trylocks
+ *   on the mm->page_table_lock
+ */
+#include <linux/mm.h>
+#include <linux/pagemap.h>
+#include <linux/swapops.h>
+
+#include <asm/pgalloc.h>
+#include <asm/rmap.h>
+#include <asm/smplock.h>
+#include <asm/tlb.h>
+#include <asm/tlbflush.h>
+
+/* #define DEBUG_RMAP */
+
+/*
+ * Shared pages have a chain of pte_chain structures, used to locate
+ * all the mappings to this page. We only need a pointer to the pte
+ * here, the page struct for the page table page contains the process
+ * it belongs to and the offset within that process.
+ *
+ * A singly linked list should be fine for most, if not all, workloads.
+ * On fork-after-exec the mapping we'll be removing will still be near
+ * the start of the list, on mixed application systems the short-lived
+ * processes will have their mappings near the start of the list and
+ * in systems with long-lived applications the relative overhead of
+ * exit() will be lower since the applications are long-lived.
+ */
+struct pte_chain {
+	struct pte_chain * next;
+	pte_t * ptep;
+};
+
+static inline struct pte_chain * pte_chain_alloc(void);
+static inline void pte_chain_free(struct pte_chain *, struct pte_chain *,
+		struct page *);
+static void alloc_new_pte_chains(void);
+
+/**
+ * page_referenced - test if the page was referenced
+ * @page: the page to test
+ *
+ * Quick test_and_clear_referenced for all mappings to a page,
+ * returns the number of processes which referenced the page.
+ * Caller needs to hold the pte_chain_lock.
+ */
+int page_referenced(struct page * page)
+{
+	struct pte_chain * pc;
+	int referenced = 0;
+
+	if (TestClearPageReferenced(page))
+		referenced++;
+
+	/* Check all the page tables mapping this page. */
+	for (pc = page->pte_chain; pc; pc = pc->next) {
+		if (ptep_test_and_clear_young(pc->ptep))
+			referenced++;
+	}
+	return referenced;
+}
+
+/**
+ * page_add_rmap - add reverse mapping entry to a page
+ * @page: the page to add the mapping to
+ * @ptep: the page table entry mapping this page
+ *
+ * Add a new pte reverse mapping to a page.
+ * The caller needs to hold the mm->page_table_lock.
+ */
+void page_add_rmap(struct page * page, pte_t * ptep)
+{
+	struct pte_chain * pte_chain;
+	unsigned long pfn = pte_pfn(*ptep);
+
+#ifdef DEBUG_RMAP
+	if (!page || !ptep)
+		BUG();
+	if (!pte_present(*ptep))
+		BUG();
+	if (!ptep_to_mm(ptep))
+		BUG();
+#endif
+
+	if (!pfn_valid(pfn) || PageReserved(page))
+		return;
+
+#ifdef DEBUG_RMAP
+	pte_chain_lock(page);
+	{
+		struct pte_chain * pc;
+		for (pc = page->pte_chain; pc; pc = pc->next) {
+			if (pc->ptep == ptep)
+				BUG();
+		}
+	}
+	pte_chain_unlock(page);
+#endif
+
+	pte_chain = pte_chain_alloc();
+
+	pte_chain_lock(page);
+
+	/* Hook up the pte_chain to the page. */
+	pte_chain->ptep = ptep;
+	pte_chain->next = page->pte_chain;
+	page->pte_chain = pte_chain;
+
+	pte_chain_unlock(page);
+}
+
+/**
+ * page_remove_rmap - take down reverse mapping to a page
+ * @page: page to remove mapping from
+ * @ptep: page table entry to remove
+ *
+ * Removes the reverse mapping from the pte_chain of the page,
+ * after that the caller can clear the page table entry and free
+ * the page.
+ * Caller needs to hold the mm->page_table_lock.
+ */
+void page_remove_rmap(struct page * page, pte_t * ptep)
+{
+	struct pte_chain * pc, * prev_pc = NULL;
+	unsigned long pfn = pte_pfn(*ptep);
+
+	if (!page || !ptep)
+		BUG();
+	if (!pfn_valid(pfn) || PageReserved(page))
+		return;
+
+	pte_chain_lock(page);
+	for (pc = page->pte_chain; pc; prev_pc = pc, pc = pc->next) {
+		if (pc->ptep == ptep) {
+			pte_chain_free(pc, prev_pc, page);
+			goto out;
+		}
+	}
+#ifdef DEBUG_RMAP
+	/* Not found. This should NEVER happen! */
+	printk(KERN_ERR "page_remove_rmap: pte_chain %p not present.\n", ptep);
+	printk(KERN_ERR "page_remove_rmap: only found: ");
+	for (pc = page->pte_chain; pc; pc = pc->next)
+		printk("%p ", pc->ptep);
+	printk("\n");
+	printk(KERN_ERR "page_remove_rmap: driver cleared PG_reserved ?\n");
+#endif
+
+out:
+	pte_chain_unlock(page);
+	return;
+			
+}
+
+/**
+ * try_to_unmap_one - worker function for try_to_unmap
+ * @page: page to unmap
+ * @ptep: page table entry to unmap from page
+ *
+ * Internal helper function for try_to_unmap, called for each page
+ * table entry mapping a page. Because locking order here is opposite
+ * to the locking order used by the page fault path, we use trylocks.
+ * Locking:
+ *	pagemap_lru_lock		page_launder()
+ *	    page lock			page_launder(), trylock
+ *		pte_chain_lock		page_launder()
+ *		    mm->page_table_lock	try_to_unmap_one(), trylock
+ */
+static int FASTCALL(try_to_unmap_one(struct page *, pte_t *));
+static int try_to_unmap_one(struct page * page, pte_t * ptep)
+{
+	unsigned long address = ptep_to_address(ptep);
+	struct mm_struct * mm = ptep_to_mm(ptep);
+	struct vm_area_struct * vma;
+	pte_t pte;
+	int ret;
+
+	if (!mm)
+		BUG();
+
+	/*
+	 * We need the page_table_lock to protect us from page faults,
+	 * munmap, fork, etc...
+	 */
+	if (!spin_trylock(&mm->page_table_lock))
+		return SWAP_AGAIN;
+
+	/* During mremap, it's possible pages are not in a VMA. */
+	vma = find_vma(mm, address);
+	if (!vma) {
+		ret = SWAP_FAIL;
+		goto out_unlock;
+	}
+
+	/* The page is mlock()d, we cannot swap it out. */
+	if (vma->vm_flags & VM_LOCKED) {
+		ret = SWAP_FAIL;
+		goto out_unlock;
+	}
+
+	/* Nuke the page table entry. */
+	pte = ptep_get_and_clear(ptep);
+	flush_tlb_page(vma, address);
+	flush_cache_page(vma, address);
+
+	/* Store the swap location in the pte. See handle_pte_fault() ... */
+	if (PageSwapCache(page)) {
+		swp_entry_t entry;
+		entry.val = page->index;
+		swap_duplicate(entry);
+		set_pte(ptep, swp_entry_to_pte(entry));
+	}
+
+	/* Move the dirty bit to the physical page now the pte is gone. */
+	if (pte_dirty(pte))
+		set_page_dirty(page);
+
+	mm->rss--;
+	page_cache_release(page);
+	ret = SWAP_SUCCESS;
+
+out_unlock:
+	spin_unlock(&mm->page_table_lock);
+	return ret;
+}
+
+/**
+ * try_to_unmap - try to remove all page table mappings to a page
+ * @page: the page to get unmapped
+ *
+ * Tries to remove all the page table entries which are mapping this
+ * page, used in the pageout path.  Caller must hold pagemap_lru_lock
+ * and the page lock.  Return values are:
+ *
+ * SWAP_SUCCESS	- we succeeded in removing all mappings
+ * SWAP_AGAIN	- we missed a trylock, try again later
+ * SWAP_FAIL	- the page is unswappable
+ * SWAP_ERROR	- an error occurred
+ */
+int try_to_unmap(struct page * page)
+{
+	struct pte_chain * pc, * next_pc, * prev_pc = NULL;
+	int ret = SWAP_SUCCESS;
+
+	/* This page should not be on the pageout lists. */
+	if (PageReserved(page))
+		BUG();
+	if (!PageLocked(page))
+		BUG();
+	/* We need backing store to swap out a page. */
+	if (!page->mapping)
+		BUG();
+
+	for (pc = page->pte_chain; pc; pc = next_pc) {
+		next_pc = pc->next;
+		switch (try_to_unmap_one(page, pc->ptep)) {
+			case SWAP_SUCCESS:
+				/* Free the pte_chain struct. */
+				pte_chain_free(pc, prev_pc, page);
+				break;
+			case SWAP_AGAIN:
+				/* Skip this pte, remembering status. */
+				prev_pc = pc;
+				ret = SWAP_AGAIN;
+				continue;
+			case SWAP_FAIL:
+				return SWAP_FAIL;
+			case SWAP_ERROR:
+				return SWAP_ERROR;
+		}
+	}
+
+	return ret;
+}
+
+/**
+ ** No more VM stuff below this comment, only pte_chain helper
+ ** functions.
+ **/
+
+struct pte_chain * pte_chain_freelist;
+spinlock_t pte_chain_freelist_lock = SPIN_LOCK_UNLOCKED;
+
+/* Maybe we should have standard ops for singly linked lists ... - Rik */
+static inline void pte_chain_push(struct pte_chain * pte_chain)
+{
+	pte_chain->ptep = NULL;
+	pte_chain->next = pte_chain_freelist;
+	pte_chain_freelist = pte_chain;
+}
+
+static inline struct pte_chain * pte_chain_pop(void)
+{
+	struct pte_chain *pte_chain;
+
+	pte_chain = pte_chain_freelist;
+	pte_chain_freelist = pte_chain->next;
+	pte_chain->next = NULL;
+
+	return pte_chain;
+}
+
+/**
+ * pte_chain_free - free pte_chain structure
+ * @pte_chain: pte_chain struct to free
+ * @prev_pte_chain: previous pte_chain on the list (may be NULL)
+ * @page: page this pte_chain hangs off (may be NULL)
+ *
+ * This function unlinks pte_chain from the singly linked list it
+ * may be on and adds the pte_chain to the free list. May also be
+ * called for new pte_chain structures which aren't on any list yet.
+ * Caller needs to hold the pte_chain_lock if the page is non-NULL.
+ */
+static inline void pte_chain_free(struct pte_chain * pte_chain,
+		struct pte_chain * prev_pte_chain, struct page * page)
+{
+	if (prev_pte_chain)
+		prev_pte_chain->next = pte_chain->next;
+	else if (page)
+		page->pte_chain = pte_chain->next;
+
+	spin_lock(&pte_chain_freelist_lock);
+	pte_chain_push(pte_chain);
+	spin_unlock(&pte_chain_freelist_lock);
+}
+
+/**
+ * pte_chain_alloc - allocate a pte_chain struct
+ *
+ * Returns a pointer to a fresh pte_chain structure. Allocates new
+ * pte_chain structures as required.
+ * Caller needs to hold the page's pte_chain_lock.
+ */
+static inline struct pte_chain * pte_chain_alloc()
+{
+	struct pte_chain * pte_chain;
+
+	spin_lock(&pte_chain_freelist_lock);
+
+	/* Allocate new pte_chain structs as needed. */
+	if (!pte_chain_freelist)
+		alloc_new_pte_chains();
+
+	/* Grab the first pte_chain from the freelist. */
+	pte_chain = pte_chain_pop();
+
+	spin_unlock(&pte_chain_freelist_lock);
+
+	return pte_chain;
+}
+
+/**
+ * alloc_new_pte_chains - convert a free page to pte_chain structures
+ *
+ * Grabs a free page and converts it to pte_chain structures. We really
+ * should pre-allocate these earlier in the pagefault path or come up
+ * with some other trick.
+ *
+ * Note that we cannot use the slab cache because the pte_chain structure
+ * is way smaller than the minimum size of a slab cache allocation.
+ * Caller needs to hold the pte_chain_freelist_lock
+ */
+static void alloc_new_pte_chains()
+{
+	struct pte_chain * pte_chain = (void *) get_zeroed_page(GFP_ATOMIC);
+	int i = PAGE_SIZE / sizeof(struct pte_chain);
+
+	if (pte_chain) {
+		for (; i-- > 0; pte_chain++)
+			pte_chain_push(pte_chain);
+	} else {
+		/* Yeah yeah, I'll fix the pte_chain allocation ... */
+		panic("Fix pte_chain allocation, you lazy bastard!\n");
+	}
+}
--- 2.5.26/mm/swapfile.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/swapfile.c	Tue Jul 16 21:46:26 2002
@@ -383,6 +383,7 @@ static inline void unuse_pte(struct vm_a
 		return;
 	get_page(page);
 	set_pte(dir, pte_mkold(mk_pte(page, vma->vm_page_prot)));
+	page_add_rmap(page, dir);
 	swap_free(entry);
 	++vma->vm_mm->rss;
 }
--- 2.5.26/mm/swap_state.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/swap_state.c	Tue Jul 16 21:59:37 2002
@@ -105,6 +105,69 @@ void __delete_from_swap_cache(struct pag
 	INC_CACHE_INFO(del_total);
 }
 
+/**
+ * add_to_swap - allocate swap space for a page
+ * @page: page we want to move to swap
+ *
+ * Allocate swap space for the page and add the page to the
+ * swap cache.  Caller needs to hold the page lock. 
+ */
+int add_to_swap(struct page * page)
+{
+	swp_entry_t entry;
+	int flags;
+
+	if (!PageLocked(page))
+		BUG();
+
+	for (;;) {
+		entry = get_swap_page();
+		if (!entry.val)
+			return 0;
+
+		/* Radix-tree node allocations are performing
+		 * GFP_ATOMIC allocations under PF_MEMALLOC.  
+		 * They can completely exhaust the page allocator.  
+		 *
+		 * So PF_MEMALLOC is dropped here.  This causes the slab 
+		 * allocations to fail earlier, so radix-tree nodes will 
+		 * then be allocated from the mempool reserves.
+		 *
+		 * We're still using __GFP_HIGH for radix-tree node
+		 * allocations, so some of the emergency pools are available,
+		 * just not all of them.
+		 */
+
+		flags = current->flags;
+		current->flags &= ~PF_MEMALLOC;
+		current->flags |= PF_NOWARN;
+		ClearPageUptodate(page);		/* why? */
+
+		/*
+		 * Add it to the swap cache and mark it dirty
+		 * (adding to the page cache will clear the dirty
+		 * and uptodate bits, so we need to do it again)
+		 */
+		switch (add_to_swap_cache(page, entry)) {
+		case 0:				/* Success */
+			current->flags = flags;
+			SetPageUptodate(page);
+			set_page_dirty(page);
+			swap_free(entry);
+			return 1;
+		case -ENOMEM:			/* radix-tree allocation */
+			current->flags = flags;
+			swap_free(entry);
+			return 0;
+		default:			/* ENOENT: raced */
+			break;
+		}
+		/* Raced with "speculative" read_swap_cache_async */
+		current->flags = flags;
+		swap_free(entry);
+	}
+}
+
 /*
  * This must be called only on pages that have
  * been verified to be in the swap cache and locked.
--- 2.5.26/mm/vmscan.c~rmap	Tue Jul 16 21:46:26 2002
+++ 2.5.26-akpm/mm/vmscan.c	Tue Jul 16 21:59:41 2002
@@ -42,348 +42,24 @@ static inline int is_page_cache_freeable
 	return page_count(page) - !!PagePrivate(page) == 1;
 }
 
-/*
- * On the swap_out path, the radix-tree node allocations are performing
- * GFP_ATOMIC allocations under PF_MEMALLOC.  They can completely
- * exhaust the page allocator.  This is bad; some pages should be left
- * available for the I/O system to start sending the swapcache contents
- * to disk.
- *
- * So PF_MEMALLOC is dropped here.  This causes the slab allocations to fail
- * earlier, so radix-tree nodes will then be allocated from the mempool
- * reserves.
- *
- * We're still using __GFP_HIGH for radix-tree node allocations, so some of
- * the emergency pools are available - just not all of them.
- */
-static inline int
-swap_out_add_to_swap_cache(struct page *page, swp_entry_t entry)
-{
-	int flags = current->flags;
-	int ret;
-
-	current->flags &= ~PF_MEMALLOC;
-	current->flags |= PF_NOWARN;
-	ClearPageUptodate(page);		/* why? */
-	ClearPageReferenced(page);		/* why? */
-	ret = add_to_swap_cache(page, entry);
-	current->flags = flags;
-	return ret;
-}
-
-/*
- * The swap-out function returns 1 if it successfully
- * scanned all the pages it was asked to (`count').
- * It returns zero if it couldn't do anything,
- *
- * rss may decrease because pages are shared, but this
- * doesn't count as having freed a page.
- */
-
-/* mm->page_table_lock is held. mmap_sem is not held */
-static inline int try_to_swap_out(struct mm_struct * mm, struct vm_area_struct* vma, unsigned long address, pte_t * page_table, struct page *page, zone_t * classzone)
+/* Must be called with page's pte_chain_lock held. */
+static inline int page_mapping_inuse(struct page * page)
 {
-	pte_t pte;
-	swp_entry_t entry;
-
-	/* Don't look at this pte if it's been accessed recently. */
-	if ((vma->vm_flags & VM_LOCKED) || ptep_test_and_clear_young(page_table)) {
-		mark_page_accessed(page);
-		return 0;
-	}
-
-	/* Don't bother unmapping pages that are active */
-	if (PageActive(page))
-		return 0;
+	struct address_space *mapping = page->mapping;
 
-	/* Don't bother replenishing zones not under pressure.. */
-	if (!memclass(page_zone(page), classzone))
-		return 0;
+	/* Page is in somebody's page tables. */
+	if (page->pte_chain)
+		return 1;
 
-	if (TestSetPageLocked(page))
+	/* XXX: does this happen ? */
+	if (!mapping)
 		return 0;
 
-	if (PageWriteback(page))
-		goto out_unlock;
-
-	/* From this point on, the odds are that we're going to
-	 * nuke this pte, so read and clear the pte.  This hook
-	 * is needed on CPUs which update the accessed and dirty
-	 * bits in hardware.
-	 */
-	flush_cache_page(vma, address);
-	pte = ptep_get_and_clear(page_table);
-	flush_tlb_page(vma, address);
-
-	if (pte_dirty(pte))
-		set_page_dirty(page);
-
-	/*
-	 * Is the page already in the swap cache? If so, then
-	 * we can just drop our reference to it without doing
-	 * any IO - it's already up-to-date on disk.
-	 */
-	if (PageSwapCache(page)) {
-		entry.val = page->index;
-		swap_duplicate(entry);
-set_swap_pte:
-		set_pte(page_table, swp_entry_to_pte(entry));
-drop_pte:
-		mm->rss--;
-		unlock_page(page);
-		{
-			int freeable = page_count(page) -
-				!!PagePrivate(page) <= 2;
-			page_cache_release(page);
-			return freeable;
-		}
-	}
-
-	/*
-	 * Is it a clean page? Then it must be recoverable
-	 * by just paging it in again, and we can just drop
-	 * it..  or if it's dirty but has backing store,
-	 * just mark the page dirty and drop it.
-	 *
-	 * However, this won't actually free any real
-	 * memory, as the page will just be in the page cache
-	 * somewhere, and as such we should just continue
-	 * our scan.
-	 *
-	 * Basically, this just makes it possible for us to do
-	 * some real work in the future in "refill_inactive()".
-	 */
-	if (page->mapping)
-		goto drop_pte;
-	if (!PageDirty(page))
-		goto drop_pte;
-
-	/*
-	 * Anonymous buffercache pages can be left behind by
-	 * concurrent truncate and pagefault.
-	 */
-	if (PagePrivate(page))
-		goto preserve;
-
-	/*
-	 * This is a dirty, swappable page.  First of all,
-	 * get a suitable swap entry for it, and make sure
-	 * we have the swap cache set up to associate the
-	 * page with that swap entry.
-	 */
-	for (;;) {
-		entry = get_swap_page();
-		if (!entry.val)
-			break;
-		/* Add it to the swap cache and mark it dirty
-		 * (adding to the page cache will clear the dirty
-		 * and uptodate bits, so we need to do it again)
-		 */
-		switch (swap_out_add_to_swap_cache(page, entry)) {
-		case 0:				/* Success */
-			SetPageUptodate(page);
-			set_page_dirty(page);
-			goto set_swap_pte;
-		case -ENOMEM:			/* radix-tree allocation */
-			swap_free(entry);
-			goto preserve;
-		default:			/* ENOENT: raced */
-			break;
-		}
-		/* Raced with "speculative" read_swap_cache_async */
-		swap_free(entry);
-	}
-
-	/* No swap space left */
-preserve:
-	set_pte(page_table, pte);
-out_unlock:
-	unlock_page(page);
-	return 0;
-}
-
-/* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_pmd(struct mm_struct * mm, struct vm_area_struct * vma, pmd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
-{
-	pte_t * pte;
-	unsigned long pmd_end;
-
-	if (pmd_none(*dir))
-		return count;
-	if (pmd_bad(*dir)) {
-		pmd_ERROR(*dir);
-		pmd_clear(dir);
-		return count;
-	}
-	
-	pte = pte_offset_map(dir, address);
-	
-	pmd_end = (address + PMD_SIZE) & PMD_MASK;
-	if (end > pmd_end)
-		end = pmd_end;
-
-	do {
-		if (pte_present(*pte)) {
-			unsigned long pfn = pte_pfn(*pte);
-			struct page *page = pfn_to_page(pfn);
-
-			if (pfn_valid(pfn) && !PageReserved(page)) {
-				count -= try_to_swap_out(mm, vma, address, pte, page, classzone);
-				if (!count) {
-					address += PAGE_SIZE;
-					pte++;
-					break;
-				}
-			}
-		}
-		address += PAGE_SIZE;
-		pte++;
-	} while (address && (address < end));
-	pte_unmap(pte - 1);
-	mm->swap_address = address;
-	return count;
-}
-
-/* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_pgd(struct mm_struct * mm, struct vm_area_struct * vma, pgd_t *dir, unsigned long address, unsigned long end, int count, zone_t * classzone)
-{
-	pmd_t * pmd;
-	unsigned long pgd_end;
-
-	if (pgd_none(*dir))
-		return count;
-	if (pgd_bad(*dir)) {
-		pgd_ERROR(*dir);
-		pgd_clear(dir);
-		return count;
-	}
-
-	pmd = pmd_offset(dir, address);
-
-	pgd_end = (address + PGDIR_SIZE) & PGDIR_MASK;	
-	if (pgd_end && (end > pgd_end))
-		end = pgd_end;
-	
-	do {
-		count = swap_out_pmd(mm, vma, pmd, address, end, count, classzone);
-		if (!count)
-			break;
-		address = (address + PMD_SIZE) & PMD_MASK;
-		pmd++;
-	} while (address && (address < end));
-	return count;
-}
-
-/* mm->page_table_lock is held. mmap_sem is not held */
-static inline int swap_out_vma(struct mm_struct * mm, struct vm_area_struct * vma, unsigned long address, int count, zone_t * classzone)
-{
-	pgd_t *pgdir;
-	unsigned long end;
-
-	/* Don't swap out areas which are reserved */
-	if (vma->vm_flags & VM_RESERVED)
-		return count;
-
-	pgdir = pgd_offset(mm, address);
-
-	end = vma->vm_end;
-	if (address >= end)
-		BUG();
-	do {
-		count = swap_out_pgd(mm, vma, pgdir, address, end, count, classzone);
-		if (!count)
-			break;
-		address = (address + PGDIR_SIZE) & PGDIR_MASK;
-		pgdir++;
-	} while (address && (address < end));
-	return count;
-}
-
-/* Placeholder for swap_out(): may be updated by fork.c:mmput() */
-struct mm_struct *swap_mm = &init_mm;
-
-/*
- * Returns remaining count of pages to be swapped out by followup call.
- */
-static inline int swap_out_mm(struct mm_struct * mm, int count, int * mmcounter, zone_t * classzone)
-{
-	unsigned long address;
-	struct vm_area_struct* vma;
-
-	/*
-	 * Find the proper vm-area after freezing the vma chain 
-	 * and ptes.
-	 */
-	spin_lock(&mm->page_table_lock);
-	address = mm->swap_address;
-	if (address == TASK_SIZE || swap_mm != mm) {
-		/* We raced: don't count this mm but try again */
-		++*mmcounter;
-		goto out_unlock;
-	}
-	vma = find_vma(mm, address);
-	if (vma) {
-		if (address < vma->vm_start)
-			address = vma->vm_start;
-
-		for (;;) {
-			count = swap_out_vma(mm, vma, address, count, classzone);
-			vma = vma->vm_next;
-			if (!vma)
-				break;
-			if (!count)
-				goto out_unlock;
-			address = vma->vm_start;
-		}
-	}
-	/* Indicate that we reached the end of address space */
-	mm->swap_address = TASK_SIZE;
-
-out_unlock:
-	spin_unlock(&mm->page_table_lock);
-	return count;
-}
-
-static int FASTCALL(swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone));
-static int swap_out(unsigned int priority, unsigned int gfp_mask, zone_t * classzone)
-{
-	int counter, nr_pages = SWAP_CLUSTER_MAX;
-	struct mm_struct *mm;
-
-	counter = mmlist_nr;
-	do {
-		if (need_resched()) {
-			__set_current_state(TASK_RUNNING);
-			schedule();
-		}
-
-		spin_lock(&mmlist_lock);
-		mm = swap_mm;
-		while (mm->swap_address == TASK_SIZE || mm == &init_mm) {
-			mm->swap_address = 0;
-			mm = list_entry(mm->mmlist.next, struct mm_struct, mmlist);
-			if (mm == swap_mm)
-				goto empty;
-			swap_mm = mm;
-		}
-
-		/* Make sure the mm doesn't disappear when we drop the lock.. */
-		atomic_inc(&mm->mm_users);
-		spin_unlock(&mmlist_lock);
-
-		nr_pages = swap_out_mm(mm, nr_pages, &counter, classzone);
-
-		mmput(mm);
-
-		if (!nr_pages)
-			return 1;
-	} while (--counter >= 0);
+	/* File is mmap'd by somebody. */
+	if (!list_empty(&mapping->i_mmap) || !list_empty(&mapping->i_mmap_shared))
+		return 1;
 
 	return 0;
-
-empty:
-	spin_unlock(&mmlist_lock);
-	return 0;
 }
 
 static int
@@ -392,7 +68,6 @@ shrink_cache(int nr_pages, zone_t *class
 {
 	struct list_head * entry;
 	struct address_space *mapping;
-	int max_mapped = nr_pages << (9 - priority);
 
 	spin_lock(&pagemap_lru_lock);
 	while (--max_scan >= 0 &&
@@ -428,10 +103,6 @@ shrink_cache(int nr_pages, zone_t *class
 		if (!memclass(page_zone(page), classzone))
 			continue;
 
-		/* Racy check to avoid trylocking when not worthwhile */
-		if (!PagePrivate(page) && (page_count(page) != 1 || !page->mapping))
-			goto page_mapped;
-
 		/*
 		 * swap activity never enters the filesystem and is safe
 		 * for GFP_NOFS allocations.
@@ -461,6 +132,59 @@ shrink_cache(int nr_pages, zone_t *class
 			continue;
 		}
 
+		/*
+		 * The page is in active use or really unfreeable. Move to
+		 * the active list.
+		 */
+		pte_chain_lock(page);
+		if (page_referenced(page) && page_mapping_inuse(page)) {
+			del_page_from_inactive_list(page);
+			add_page_to_active_list(page);
+			pte_chain_unlock(page);
+			unlock_page(page);
+			continue;
+		}
+
+		/*
+		 * Anonymous process memory without backing store. Try to
+		 * allocate it some swap space here.
+		 *
+		 * XXX: implement swap clustering ?
+		 */
+		if (page->pte_chain && !page->mapping && !PagePrivate(page)) {
+			page_cache_get(page);
+			pte_chain_unlock(page);
+			spin_unlock(&pagemap_lru_lock);
+			if (!add_to_swap(page)) {
+				activate_page(page);
+				unlock_page(page);
+				page_cache_release(page);
+				spin_lock(&pagemap_lru_lock);
+				continue;
+			}
+			page_cache_release(page);
+			spin_lock(&pagemap_lru_lock);
+			pte_chain_lock(page);
+		}
+
+		/*
+		 * The page is mapped into the page tables of one or more
+		 * processes. Try to unmap it here.
+		 */
+		if (page->pte_chain) {
+			switch (try_to_unmap(page)) {
+				case SWAP_ERROR:
+				case SWAP_FAIL:
+					goto page_active;
+				case SWAP_AGAIN:
+					pte_chain_unlock(page);
+					unlock_page(page);
+					continue;
+				case SWAP_SUCCESS:
+					; /* try to free the page below */
+			}
+		}
+		pte_chain_unlock(page);
 		mapping = page->mapping;
 
 		if (PageDirty(page) && is_page_cache_freeable(page) &&
@@ -469,7 +193,7 @@ shrink_cache(int nr_pages, zone_t *class
 			 * It is not critical here to write it only if
 			 * the page is unmapped beause any direct writer
 			 * like O_DIRECT would set the page's dirty bitflag
-			 * on the phisical page after having successfully
+			 * on the physical page after having successfully
 			 * pinned it and after the I/O to the page is finished,
 			 * so the direct writes to the page cannot get lost.
 			 */
@@ -557,18 +281,7 @@ shrink_cache(int nr_pages, zone_t *class
 			write_unlock(&mapping->page_lock);
 		}
 		unlock_page(page);
-page_mapped:
-		if (--max_mapped >= 0)
-			continue;
-
-		/*
-		 * Alert! We've found too many mapped pages on the
-		 * inactive list, so we start swapping out now!
-		 */
-		spin_unlock(&pagemap_lru_lock);
-		swap_out(priority, gfp_mask, classzone);
-		return nr_pages;
-
+		continue;
 page_freeable:
 		/*
 		 * It is critical to check PageDirty _after_ we made sure
@@ -597,13 +310,21 @@ page_freeable:
 
 		/* effectively free the page here */
 		page_cache_release(page);
-
 		if (--nr_pages)
 			continue;
-		break;
+		goto out;
+page_active:
+		/*
+		 * OK, we don't know what to do with the page.
+		 * It's no use keeping it here, so we move it to
+		 * the active list.
+		 */
+		del_page_from_inactive_list(page);
+		add_page_to_active_list(page);
+		pte_chain_unlock(page);
+		unlock_page(page);
 	}
-	spin_unlock(&pagemap_lru_lock);
-
+out:	spin_unlock(&pagemap_lru_lock);
 	return nr_pages;
 }
 
@@ -611,8 +332,8 @@ page_freeable:
  * This moves pages from the active list to
  * the inactive list.
  *
- * We move them the other way when we see the
- * reference bit on the page.
+ * We move them the other way if the page is 
+ * referenced by one or more processes, from rmap
  */
 static void refill_inactive(int nr_pages)
 {
@@ -625,15 +346,17 @@ static void refill_inactive(int nr_pages
 
 		page = list_entry(entry, struct page, lru);
 		entry = entry->prev;
-		if (TestClearPageReferenced(page)) {
+
+		pte_chain_lock(page);
+		if (page->pte_chain && page_referenced(page)) {
 			list_del(&page->lru);
 			list_add(&page->lru, &active_list);
+			pte_chain_unlock(page);
 			continue;
 		}
-
 		del_page_from_active_list(page);
 		add_page_to_inactive_list(page);
-		SetPageReferenced(page);
+		pte_chain_unlock(page);
 	}
 	spin_unlock(&pagemap_lru_lock);
 }