This function is called a lot.  Every pagefault.  The atomic_add() against a
global counter hurts on large SMP machines.

The patch simply reduces the rate at which that atomic operation is
performed, by accumulating a per-cpu count which is spilled into the global
counter when the local counter overflows.

It trades off efficiency for a little inaccuracy.

I tried various implementations involving kmalloc_percpu() and open-coded
per-cpu arrays in a generic "per-cpu counter" thing.  They all were
surprisingly sucky - the additional cache misses involved in walking the more
complex data structures really showed up.



 fs/proc/proc_misc.c  |    2 --
 include/linux/mman.h |   19 ++++++++++++++++++-
 mm/mmap.c            |    7 +------
 mm/swap.c            |   27 +++++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 9 deletions(-)

diff -puN mm/mmap.c~vm_enough_memory-speedup mm/mmap.c
--- 25/mm/mmap.c~vm_enough_memory-speedup	2003-03-18 01:44:12.000000000 -0800
+++ 25-akpm/mm/mmap.c	2003-03-18 03:24:48.000000000 -0800
@@ -53,11 +53,6 @@ int sysctl_overcommit_memory = 0;	/* def
 int sysctl_overcommit_ratio = 50;	/* default is 50% */
 atomic_t vm_committed_space = ATOMIC_INIT(0);
 
-inline void vm_unacct_memory(long pages)
-{	
-	atomic_sub(pages, &vm_committed_space);
-}
-
 /*
  * Check that a process has enough memory to allocate a new virtual
  * mapping. 1 means there is enough memory for the allocation to
@@ -73,7 +68,7 @@ int vm_enough_memory(long pages)
 {
 	unsigned long free, allowed;
 
-	atomic_add(pages, &vm_committed_space);
+	vm_acct_memory(pages);
 
         /*
 	 * Sometimes we want to use more memory than we have
diff -puN include/linux/mman.h~vm_enough_memory-speedup include/linux/mman.h
--- 25/include/linux/mman.h~vm_enough_memory-speedup	2003-03-18 01:44:12.000000000 -0800
+++ 25-akpm/include/linux/mman.h	2003-03-18 01:44:12.000000000 -0800
@@ -1,12 +1,29 @@
 #ifndef _LINUX_MMAN_H
 #define _LINUX_MMAN_H
 
+#include <linux/config.h>
+
+#include <asm/atomic.h>
 #include <asm/mman.h>
 
 #define MREMAP_MAYMOVE	1
 #define MREMAP_FIXED	2
 
 extern int vm_enough_memory(long pages);
-extern void vm_unacct_memory(long pages);
+extern atomic_t vm_committed_space;
+
+#ifdef CONFIG_SMP
+extern void vm_acct_memory(long pages);
+#else
+static inline void vm_acct_memory(long pages)
+{
+	atomic_add(pages, &vm_committed_space);
+}
+#endif
+
+static inline void vm_unacct_memory(long pages)
+{
+	vm_acct_memory(-pages);
+}
 
 #endif /* _LINUX_MMAN_H */
diff -puN mm/swap.c~vm_enough_memory-speedup mm/swap.c
--- 25/mm/swap.c~vm_enough_memory-speedup	2003-03-18 01:44:12.000000000 -0800
+++ 25-akpm/mm/swap.c	2003-03-18 01:44:12.000000000 -0800
@@ -16,6 +16,7 @@
 #include <linux/mm.h>
 #include <linux/kernel_stat.h>
 #include <linux/swap.h>
+#include <linux/mman.h>
 #include <linux/pagemap.h>
 #include <linux/pagevec.h>
 #include <linux/init.h>
@@ -347,6 +348,32 @@ unsigned int pagevec_lookup(struct pagev
 	return pagevec_count(pvec);
 }
 
+
+#ifdef CONFIG_SMP
+/*
+ * We tolerate a little inaccuracy to avoid ping-ponging the counter between
+ * CPUs
+ */
+#define ACCT_THRESHOLD	max(16, NR_CPUS * 2)
+
+static DEFINE_PER_CPU(long, committed_space) = 0;
+
+void vm_acct_memory(long pages)
+{
+	long *local;
+
+	preempt_disable();
+	local = &__get_cpu_var(committed_space);
+	*local += pages;
+	if (*local > ACCT_THRESHOLD || *local < -ACCT_THRESHOLD) {
+		atomic_add(*local, &vm_committed_space);
+		*local = 0;
+	}
+	preempt_enable();
+}
+#endif
+
+
 /*
  * Perform any setup for the swap system
  */
diff -puN fs/proc/proc_misc.c~vm_enough_memory-speedup fs/proc/proc_misc.c
--- 25/fs/proc/proc_misc.c~vm_enough_memory-speedup	2003-03-18 01:44:12.000000000 -0800
+++ 25-akpm/fs/proc/proc_misc.c	2003-03-18 01:44:12.000000000 -0800
@@ -134,8 +134,6 @@ static int uptime_read_proc(char *page, 
 	return proc_calc_metrics(page, start, off, count, eof, len);
 }
 
-extern atomic_t vm_committed_space;
-
 static int meminfo_read_proc(char *page, char **start, off_t off,
 				 int count, int *eof, void *data)
 {

_