From: Rusty Russell <rusty@rustcorp.com.au>

Several tweaks to the kmalloc_percpu()/kfree_percpu() interface, to
allow future implementations to be more flexible, and make easier to
use now we can see how it's actually being used.

1) No flags argument: GFP_ATOMIC doesn't make much sense,

2) Explicit alignment argument, so we don't have to give SMP_CACHE_BYTES
   alignment always,

3) Zeros memory, since most callers want that and it's not entirely
   trivial,

4) Convenient type-safe wrapper which takes a typename, and

5) Rename to alloc_percpu/__alloc_percpu, since usage no longer matches
   kmalloc.



 include/linux/genhd.h  |    5 +---
 include/linux/percpu.h |   26 ++++++++++++++++--------
 include/net/ipv6.h     |    2 -
 kernel/ksyms.c         |    4 +--
 mm/slab.c              |   29 ++++++++++-----------------
 net/ipv4/af_inet.c     |   52 +++++++++----------------------------------------
 net/ipv4/route.c       |   11 +---------
 net/ipv6/af_inet6.c    |   28 +++++++++++---------------
 net/ipv6/proc.c        |    3 +-
 net/sctp/protocol.c    |   22 ++++----------------
 10 files changed, 64 insertions(+), 118 deletions(-)

diff -puN include/linux/genhd.h~kmalloc_percpu-interface-change include/linux/genhd.h
--- 25/include/linux/genhd.h~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/include/linux/genhd.h	2003-05-28 19:00:29.000000000 -0700
@@ -160,16 +160,15 @@ static inline void disk_stat_set_all(str
 #ifdef  CONFIG_SMP
 static inline int init_disk_stats(struct gendisk *disk)
 {
-	disk->dkstats = kmalloc_percpu(sizeof (struct disk_stats), GFP_KERNEL);
+	disk->dkstats = alloc_percpu(struct disk_stats);
 	if (!disk->dkstats)
 		return 0;
-	disk_stat_set_all(disk, 0);
 	return 1;
 }
 
 static inline void free_disk_stats(struct gendisk *disk)
 {
-	kfree_percpu(disk->dkstats);
+	free_percpu(disk->dkstats);
 }
 #else	/* CONFIG_SMP */
 static inline int init_disk_stats(struct gendisk *disk)
diff -puN include/linux/percpu.h~kmalloc_percpu-interface-change include/linux/percpu.h
--- 25/include/linux/percpu.h~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/include/linux/percpu.h	2003-05-28 19:00:29.000000000 -0700
@@ -1,7 +1,8 @@
 #ifndef __LINUX_PERCPU_H
 #define __LINUX_PERCPU_H
 #include <linux/spinlock.h> /* For preempt_disable() */
-#include <linux/slab.h> /* For kmalloc_percpu() */
+#include <linux/slab.h> /* For kmalloc() */
+#include <linux/string.h> /* For memset() */
 #include <asm/percpu.h>
 
 /* Must be an lvalue. */
@@ -17,7 +18,7 @@ struct percpu_data {
 
 /* 
  * Use this to get to a cpu's version of the per-cpu object allocated using
- * kmalloc_percpu.  If you want to get "this cpu's version", maybe you want
+ * alloc_percpu.  If you want to get "this cpu's version", maybe you want
  * to use get_cpu_ptr... 
  */ 
 #define per_cpu_ptr(ptr, cpu)                   \
@@ -26,19 +27,22 @@ struct percpu_data {
         (__typeof__(ptr))__p->ptrs[(cpu)];	\
 })
 
-extern void *kmalloc_percpu(size_t size, int flags);
-extern void kfree_percpu(const void *);
+extern void *__alloc_percpu(size_t size, size_t align);
+extern void free_percpu(const void *);
 extern void kmalloc_percpu_init(void);
 
 #else /* CONFIG_SMP */
 
 #define per_cpu_ptr(ptr, cpu) (ptr)
 
-static inline void *kmalloc_percpu(size_t size, int flags)
+static inline void *__alloc_percpu(size_t size, size_t align)
 {
-	return(kmalloc(size, flags));
+	void *ret = kmalloc(size, GFP_KERNEL);
+	if (ret)
+		memset(ret, 0, size);
+	return ret;
 }
-static inline void kfree_percpu(const void *ptr)
+static inline void free_percpu(const void *ptr)
 {	
 	kfree(ptr);
 }
@@ -46,9 +50,13 @@ static inline void kmalloc_percpu_init(v
 
 #endif /* CONFIG_SMP */
 
+/* Simple wrapper for the common case: zeros memory. */
+#define alloc_percpu(type) \
+	((type *)(__alloc_percpu(sizeof(type), __alignof__(type))))
+
 /* 
- * Use these with kmalloc_percpu. If
- * 1. You want to operate on memory allocated by kmalloc_percpu (dereference
+ * Use these with alloc_percpu. If
+ * 1. You want to operate on memory allocated by alloc_percpu (dereference
  *    and read/modify/write)  AND 
  * 2. You want "this cpu's version" of the object AND 
  * 3. You want to do this safely since:
diff -puN include/net/ipv6.h~kmalloc_percpu-interface-change include/net/ipv6.h
--- 25/include/net/ipv6.h~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/include/net/ipv6.h	2003-05-28 19:00:29.000000000 -0700
@@ -145,7 +145,7 @@ extern atomic_t			inet6_sock_nr;
 
 int snmp6_register_dev(struct inet6_dev *idev);
 int snmp6_unregister_dev(struct inet6_dev *idev);
-int snmp6_mib_init(void *ptr[2], size_t mibsize);
+int snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign);
 void snmp6_mib_free(void *ptr[2]);
 
 struct ip6_ra_chain
diff -puN kernel/ksyms.c~kmalloc_percpu-interface-change kernel/ksyms.c
--- 25/kernel/ksyms.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/kernel/ksyms.c	2003-05-28 19:00:29.000000000 -0700
@@ -98,8 +98,8 @@ EXPORT_SYMBOL(remove_shrinker);
 EXPORT_SYMBOL(kmalloc);
 EXPORT_SYMBOL(kfree);
 #ifdef CONFIG_SMP
-EXPORT_SYMBOL(kmalloc_percpu);
-EXPORT_SYMBOL(kfree_percpu);
+EXPORT_SYMBOL(__alloc_percpu);
+EXPORT_SYMBOL(free_percpu);
 EXPORT_SYMBOL(percpu_counter_mod);
 #endif
 EXPORT_SYMBOL(vfree);
diff -puN mm/slab.c~kmalloc_percpu-interface-change mm/slab.c
--- 25/mm/slab.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/mm/slab.c	2003-05-28 19:00:29.000000000 -0700
@@ -1953,26 +1953,18 @@ void * kmalloc (size_t size, int flags)
 
 #ifdef CONFIG_SMP
 /**
- * kmalloc_percpu - allocate one copy of the object for every present
- * cpu in the system.
+ * __alloc_percpu - allocate one copy of the object for every present
+ * cpu in the system, zeroing them.
  * Objects should be dereferenced using per_cpu_ptr/get_cpu_ptr
  * macros only.
  *
  * @size: how many bytes of memory are required.
- * @flags: the type of memory to allocate.
- * The @flags argument may be one of:
- *
- * %GFP_USER - Allocate memory on behalf of user.  May sleep.
- *
- * %GFP_KERNEL - Allocate normal kernel ram.  May sleep.
- *
- * %GFP_ATOMIC - Allocation will not sleep.  Use inside interrupt handlers.
+ * @align: the alignment, which can't be greater than SMP_CACHE_BYTES.
  */
-void *
-kmalloc_percpu(size_t size, int flags)
+void *__alloc_percpu(size_t size, size_t align)
 {
 	int i;
-	struct percpu_data *pdata = kmalloc(sizeof (*pdata), flags);
+	struct percpu_data *pdata = kmalloc(sizeof (*pdata), GFP_KERNEL);
 
 	if (!pdata)
 		return NULL;
@@ -1980,9 +1972,10 @@ kmalloc_percpu(size_t size, int flags)
 	for (i = 0; i < NR_CPUS; i++) {
 		if (!cpu_possible(i))
 			continue;
-		pdata->ptrs[i] = kmalloc(size, flags);
+		pdata->ptrs[i] = kmalloc(size, GFP_KERNEL);
 		if (!pdata->ptrs[i])
 			goto unwind_oom;
+		memset(pdata->ptrs[i], 0, size);
 	}
 
 	/* Catch derefs w/o wrappers */
@@ -2039,14 +2032,14 @@ void kfree (const void *objp)
 
 #ifdef CONFIG_SMP
 /**
- * kfree_percpu - free previously allocated percpu memory
- * @objp: pointer returned by kmalloc_percpu.
+ * free_percpu - free previously allocated percpu memory
+ * @objp: pointer returned by alloc_percpu.
  *
- * Don't free memory not originally allocated by kmalloc_percpu()
+ * Don't free memory not originally allocated by alloc_percpu()
  * The complemented objp is to check for that.
  */
 void
-kfree_percpu(const void *objp)
+free_percpu(const void *objp)
 {
 	int i;
 	struct percpu_data *p = (struct percpu_data *) (~(unsigned long) objp);
diff -puN net/ipv4/af_inet.c~kmalloc_percpu-interface-change net/ipv4/af_inet.c
--- 25/net/ipv4/af_inet.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/net/ipv4/af_inet.c	2003-05-28 19:00:29.000000000 -0700
@@ -1068,54 +1068,22 @@ static struct inet_protocol icmp_protoco
 
 static int __init init_ipv4_mibs(void)
 {
-	int i;
-
-	net_statistics[0] =
-	    kmalloc_percpu(sizeof (struct linux_mib), GFP_KERNEL);
-	net_statistics[1] =
-	    kmalloc_percpu(sizeof (struct linux_mib), GFP_KERNEL);
-	ip_statistics[0] = kmalloc_percpu(sizeof (struct ip_mib), GFP_KERNEL);
-	ip_statistics[1] = kmalloc_percpu(sizeof (struct ip_mib), GFP_KERNEL);
-	icmp_statistics[0] =
-	    kmalloc_percpu(sizeof (struct icmp_mib), GFP_KERNEL);
-	icmp_statistics[1] =
-	    kmalloc_percpu(sizeof (struct icmp_mib), GFP_KERNEL);
-	tcp_statistics[0] = kmalloc_percpu(sizeof (struct tcp_mib), GFP_KERNEL);
-	tcp_statistics[1] = kmalloc_percpu(sizeof (struct tcp_mib), GFP_KERNEL);
-	udp_statistics[0] = kmalloc_percpu(sizeof (struct udp_mib), GFP_KERNEL);
-	udp_statistics[1] = kmalloc_percpu(sizeof (struct udp_mib), GFP_KERNEL);
+	net_statistics[0] = alloc_percpu(struct linux_mib);
+	net_statistics[1] = alloc_percpu(struct linux_mib);
+	ip_statistics[0] = alloc_percpu(struct ip_mib);
+	ip_statistics[1] = alloc_percpu(struct ip_mib);
+	icmp_statistics[0] = alloc_percpu(struct icmp_mib);
+	icmp_statistics[1] = alloc_percpu(struct icmp_mib);
+	tcp_statistics[0] = alloc_percpu(struct tcp_mib);
+	tcp_statistics[1] = alloc_percpu(struct tcp_mib);
+	udp_statistics[0] = alloc_percpu(struct udp_mib);
+	udp_statistics[1] = alloc_percpu(struct udp_mib);
 	if (!
 	    (net_statistics[0] && net_statistics[1] && ip_statistics[0]
 	     && ip_statistics[1] && tcp_statistics[0] && tcp_statistics[1]
 	     && udp_statistics[0] && udp_statistics[1]))
 		return -ENOMEM;
 
-	/* Set all the per cpu copies of the mibs to zero */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_possible(i)) {
-			memset(per_cpu_ptr(net_statistics[0], i), 0,
-			       sizeof (struct linux_mib));
-			memset(per_cpu_ptr(net_statistics[1], i), 0,
-			       sizeof (struct linux_mib));
-			memset(per_cpu_ptr(ip_statistics[0], i), 0,
-			       sizeof (struct ip_mib));
-			memset(per_cpu_ptr(ip_statistics[1], i), 0,
-			       sizeof (struct ip_mib));
-			memset(per_cpu_ptr(icmp_statistics[0], i), 0,
-			       sizeof (struct icmp_mib));
-			memset(per_cpu_ptr(icmp_statistics[1], i), 0,
-			       sizeof (struct icmp_mib));
-			memset(per_cpu_ptr(tcp_statistics[0], i), 0,
-			       sizeof (struct tcp_mib));
-			memset(per_cpu_ptr(tcp_statistics[1], i), 0,
-			       sizeof (struct tcp_mib));
-			memset(per_cpu_ptr(udp_statistics[0], i), 0,
-			       sizeof (struct udp_mib));
-			memset(per_cpu_ptr(udp_statistics[1], i), 0,
-			       sizeof (struct udp_mib));
-		}
-	}
-
 	(void) tcp_mib_init();
 
 	return 0;
diff -puN net/ipv4/route.c~kmalloc_percpu-interface-change net/ipv4/route.c
--- 25/net/ipv4/route.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/net/ipv4/route.c	2003-05-28 19:00:29.000000000 -0700
@@ -2694,16 +2694,9 @@ int __init ip_rt_init(void)
 	ipv4_dst_ops.gc_thresh = (rt_hash_mask + 1);
 	ip_rt_max_size = (rt_hash_mask + 1) * 16;
 
-	rt_cache_stat = kmalloc_percpu(sizeof (struct rt_cache_stat),
-					GFP_KERNEL);
+	rt_cache_stat = alloc_percpu(struct rt_cache_stat);
 	if (!rt_cache_stat) 
 		goto out_enomem1;
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_possible(i)) {
-			memset(per_cpu_ptr(rt_cache_stat, i), 0,
-			       sizeof (struct rt_cache_stat));
-		}
-	}
 
 	devinet_init();
 	ip_fib_init();
@@ -2739,7 +2732,7 @@ int __init ip_rt_init(void)
 out:
 	return rc;
 out_enomem:
-	kfree_percpu(rt_cache_stat);
+	free_percpu(rt_cache_stat);
 out_enomem1:
 	rc = -ENOMEM;
 	goto out;
diff -puN net/ipv6/af_inet6.c~kmalloc_percpu-interface-change net/ipv6/af_inet6.c
--- 25/net/ipv6/af_inet6.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/net/ipv6/af_inet6.c	2003-05-28 19:00:29.000000000 -0700
@@ -640,32 +640,25 @@ inet6_unregister_protosw(struct inet_pro
 }
 
 int
-snmp6_mib_init(void *ptr[2], size_t mibsize)
+snmp6_mib_init(void *ptr[2], size_t mibsize, size_t mibalign)
 {
 	int i;
 
 	if (ptr == NULL)
 		return -EINVAL;
 
-	ptr[0] = kmalloc_percpu(mibsize, GFP_KERNEL);
+	ptr[0] = __alloc_percpu(mibsize, mibalign);
 	if (!ptr[0])
 		goto err0;
 
-	ptr[1] = kmalloc_percpu(mibsize, GFP_KERNEL);
+	ptr[1] = __alloc_percpu(mibsize, mibalign);
 	if (!ptr[1])
 		goto err1;
 
-	/* Zero percpu version of the mibs */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_possible(i)) {
-			memset(per_cpu_ptr(ptr[0], i), 0, mibsize);
-			memset(per_cpu_ptr(ptr[1], i), 0, mibsize);
-		}
-	}
 	return 0;
 
 err1:
-	kfree_percpu(ptr[0]);
+	free_percpu(ptr[0]);
 	ptr[0] = NULL;
 err0:
 	return -ENOMEM;
@@ -676,18 +669,21 @@ snmp6_mib_free(void *ptr[2])
 {
 	if (ptr == NULL)
 		return;
-	kfree_percpu(ptr[0]);
-	kfree_percpu(ptr[1]);
+	free_percpu(ptr[0]);
+	free_percpu(ptr[1]);
 	ptr[0] = ptr[1] = NULL;
 }
 
 static int __init init_ipv6_mibs(void)
 {
-	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipv6_mib)) < 0)
+	if (snmp6_mib_init((void **)ipv6_statistics, sizeof (struct ipv6_mib),
+			   __alignof__(struct ipv6_mib)) < 0)
 		goto err_ip_mib;
-	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib)) < 0)
+	if (snmp6_mib_init((void **)icmpv6_statistics, sizeof (struct icmpv6_mib),
+			   __alignof__(struct ipv6_mib)) < 0)
 		goto err_icmp_mib;
-	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib)) < 0)
+	if (snmp6_mib_init((void **)udp_stats_in6, sizeof (struct udp_mib),
+			   __alignof__(struct ipv6_mib)) < 0)
 		goto err_udp_mib;
 	return 0;
 
diff -puN net/ipv6/proc.c~kmalloc_percpu-interface-change net/ipv6/proc.c
--- 25/net/ipv6/proc.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/net/ipv6/proc.c	2003-05-28 19:00:29.000000000 -0700
@@ -228,7 +228,8 @@ int snmp6_register_dev(struct inet6_dev 
 	if (!idev || !idev->dev)
 		return -EINVAL;
 
-	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib)) < 0)
+	if (snmp6_mib_init((void **)idev->stats.icmpv6, sizeof(struct icmpv6_mib),
+			   __alignof__(struct ipv6_mib)) < 0)
 		goto err_icmp;
 
 #ifdef CONFIG_PROC_FS
diff -puN net/sctp/protocol.c~kmalloc_percpu-interface-change net/sctp/protocol.c
--- 25/net/sctp/protocol.c~kmalloc_percpu-interface-change	2003-05-28 19:00:29.000000000 -0700
+++ 25-akpm/net/sctp/protocol.c	2003-05-28 19:00:29.000000000 -0700
@@ -880,34 +880,22 @@ static int __init init_sctp_mibs(void)
 {
 	int i;
 
-	sctp_statistics[0] = kmalloc_percpu(sizeof (struct sctp_mib),
-					    GFP_KERNEL);
+	sctp_statistics[0] = alloc_percpu(struct sctp_mib);
 	if (!sctp_statistics[0])
 		return -ENOMEM;
-	sctp_statistics[1] = kmalloc_percpu(sizeof (struct sctp_mib),
-					    GFP_KERNEL);
+	sctp_statistics[1] = alloc_percpu(struct sctp_mib);
 	if (!sctp_statistics[1]) {
-		kfree_percpu(sctp_statistics[0]);
+		free_percpu(sctp_statistics[0]);
 		return -ENOMEM;
 	}
-
-	/* Zero all percpu versions of the mibs */
-	for (i = 0; i < NR_CPUS; i++) {
-		if (cpu_possible(i)) {
-			memset(per_cpu_ptr(sctp_statistics[0], i), 0,
-					sizeof (struct sctp_mib));
-			memset(per_cpu_ptr(sctp_statistics[1], i), 0,
-					sizeof (struct sctp_mib));
-		}
-	}
 	return 0;
 
 }
 
 static void cleanup_sctp_mibs(void)
 {
-	kfree_percpu(sctp_statistics[0]);
-	kfree_percpu(sctp_statistics[1]);
+	free_percpu(sctp_statistics[0]);
+	free_percpu(sctp_statistics[1]);
 }
 
 /* Initialize the universe into something sensible.  */

_