From: Hugh Dickins <hugh@veritas.com>

A recent update removed the page_table_lock from objrmap.  That was
simply wrong: it's necessary to guard mm->rss (in try_to_unmap), and
it's necessary to ensure the page tables don't get freed beneath us
(in which case any modification of the pte, by page_referenced or
by try_to_unmap, might be corrupting a freed and reused page).

Then restore the original SWAP_AGAIN semantics in try_to_unmap:
simplest for inner levels to return SWAP_AGAIN or SWAP_FAILED,
outer level to decide SWAP_SUCCESS if all pages were unmapped.
Stop searching when all have been unmapped.

page_convert_anon left for the moment with FIXMEs rather than
page_table_lock.  I believe it can be done without page_table_lock
(which would be problematic there), but have noticed several other
bugs there, more patches will follow at a later date (though I'd
much rather anobjrmap, which handles all this so much more cleanly).



 25-akpm/mm/rmap.c |   28 +++++++++++++++++++---------
 1 files changed, 19 insertions(+), 9 deletions(-)

diff -puN mm/rmap.c~hugh-07-objrmap-page_table_lock mm/rmap.c
--- 25/mm/rmap.c~hugh-07-objrmap-page_table_lock	Tue Mar 25 18:34:52 2003
+++ 25-akpm/mm/rmap.c	Tue Mar 25 18:34:52 2003
@@ -144,9 +144,13 @@ out:
 static int
 page_referenced_obj_one(struct vm_area_struct *vma, struct page *page)
 {
+	struct mm_struct *mm = vma->vm_mm;
 	pte_t *pte;
 	int referenced = 0;
 
+	if (!spin_trylock(&mm->page_table_lock))
+		return 1;
+
 	pte = find_pte(vma, page, NULL);
 	if (pte) {
 		if (ptep_test_and_clear_young(pte))
@@ -154,6 +158,7 @@ page_referenced_obj_one(struct vm_area_s
 		pte_unmap(pte);
 	}
 
+	spin_unlock(&mm->page_table_lock);
 	return referenced;
 }
 
@@ -490,7 +495,10 @@ try_to_unmap_obj_one(struct vm_area_stru
 	unsigned long address;
 	pte_t *pte;
 	pte_t pteval;
-	int ret = SWAP_SUCCESS;
+	int ret = SWAP_AGAIN;
+
+	if (!spin_trylock(&mm->page_table_lock))
+		return ret;
 
 	pte = find_pte(vma, page, &address);
 	if (!pte)
@@ -519,6 +527,7 @@ out_unmap:
 	pte_unmap(pte);
 
 out:
+	spin_unlock(&mm->page_table_lock);
 	return ret;
 }
 
@@ -539,7 +548,7 @@ try_to_unmap_obj(struct page *page)
 {
 	struct address_space *mapping = page->mapping;
 	struct vm_area_struct *vma;
-	int ret = SWAP_SUCCESS;
+	int ret = SWAP_AGAIN;
 
 	if (!mapping)
 		BUG();
@@ -548,23 +557,20 @@ try_to_unmap_obj(struct page *page)
 		BUG();
 
 	if (down_trylock(&mapping->i_shared_sem))
-		return SWAP_AGAIN;
+		return ret;
 	
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
 		ret = try_to_unmap_obj_one(vma, page);
-		if (ret != SWAP_SUCCESS)
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
 			goto out;
 	}
 
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
 		ret = try_to_unmap_obj_one(vma, page);
-		if (ret != SWAP_SUCCESS)
+		if (ret == SWAP_FAIL || !page->pte.mapcount)
 			goto out;
 	}
 
-	if (page->pte.mapcount)
-		BUG();
-
 out:
 	up(&mapping->i_shared_sem);
 	return ret;
@@ -754,8 +760,10 @@ int try_to_unmap(struct page * page)
 		}
 	}
 out:
-	if (!page_mapped(page))
+	if (!page_mapped(page)) {
 		dec_page_state(nr_mapped);
+		ret = SWAP_SUCCESS;
+	}
 	return ret;
 }
 
@@ -840,6 +848,7 @@ retry:
 
 	index = NRPTE-1;
 	list_for_each_entry(vma, &mapping->i_mmap, shared) {
+		/* FIXME: unsafe without page_table_lock */
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
 			pte_paddr = ptep_to_paddr(pte);
@@ -856,6 +865,7 @@ retry:
 		}
 	}
 	list_for_each_entry(vma, &mapping->i_mmap_shared, shared) {
+		/* FIXME: unsafe without page_table_lock */
 		pte = find_pte(vma, page, NULL);
 		if (pte) {
 			pte_paddr = ptep_to_paddr(pte);

_