From: Daniel McNeil <daniel@osdl.org>

This adds i_seqcount to the inode structure and then uses i_size_read() and
i_size_write() to provide atomic access to i_size.  This is a port of
Andrea Arcangeli's i_size atomic access patch from 2.4.  This only uses the
generic reader/writer consistent mechanism.

Before:
mnm:/usr/src/25> size vmlinux 
   text    data     bss     dec     hex filename
2229582 1027683  162436 3419701  342e35 vmlinux

After:
mnm:/usr/src/25> size vmlinux
   text    data     bss     dec     hex filename
2225642 1027655  162436 3415733  341eb5 vmlinux

3.9k more text, a lot of it fastpath :(



 drivers/block/loop.c |    2 -
 fs/attr.c            |    2 -
 fs/binfmt_aout.c     |    4 +--
 fs/block_dev.c       |    6 ++--
 fs/buffer.c          |   17 ++++++++-----
 fs/direct-io.c       |    8 ++++--
 fs/ext3/inode.c      |    4 +--
 fs/inode.c           |    1 
 fs/ioctl.c           |    2 -
 fs/libfs.c           |    6 ++++
 fs/locks.c           |    4 +--
 fs/mpage.c           |    8 +++---
 fs/nfs/inode.c       |    8 ++++--
 fs/nfs/write.c       |   21 ++++++++---------
 fs/open.c            |    2 -
 fs/quota_v1.c        |    8 ++++--
 fs/read_write.c      |    4 +--
 fs/stat.c            |    2 -
 include/linux/fs.h   |   63 +++++++++++++++++++++++++++++++++++++++++++++++++++
 ipc/shm.c            |    2 -
 mm/filemap.c         |   32 ++++++++++++++-----------
 mm/memory.c          |    4 +--
 mm/nommu.c           |    4 +--
 mm/readahead.c       |    5 ++--
 mm/shmem.c           |   18 ++++++++------
 mm/swapfile.c        |    4 +--
 26 files changed, 164 insertions(+), 77 deletions(-)

diff -puN drivers/block/loop.c~i_size-atomic-access drivers/block/loop.c
--- 25/drivers/block/loop.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/drivers/block/loop.c	2003-06-19 23:37:08.000000000 -0700
@@ -146,7 +146,7 @@ struct loop_func_table *xfer_funcs[MAX_L
 
 static int figure_loop_size(struct loop_device *lo)
 {
-	loff_t size = lo->lo_backing_file->f_dentry->d_inode->i_mapping->host->i_size;
+	loff_t size = i_size_read(lo->lo_backing_file->f_dentry->d_inode->i_mapping->host);
 	sector_t x;
 	/*
 	 * Unfortunately, if we want to do I/O on the device,
diff -puN fs/attr.c~i_size-atomic-access fs/attr.c
--- 25/fs/attr.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/attr.c	2003-06-19 23:37:08.000000000 -0700
@@ -68,7 +68,7 @@ int inode_setattr(struct inode * inode, 
 	int error = 0;
 
 	if (ia_valid & ATTR_SIZE) {
-		if (attr->ia_size != inode->i_size) {
+		if (attr->ia_size != i_size_read(inode)) {
 			error = vmtruncate(inode, attr->ia_size);
 			if (error || (ia_valid == ATTR_SIZE))
 				goto out;
diff -puN fs/binfmt_aout.c~i_size-atomic-access fs/binfmt_aout.c
--- 25/fs/binfmt_aout.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/binfmt_aout.c	2003-06-19 23:37:08.000000000 -0700
@@ -269,7 +269,7 @@ static int load_aout_binary(struct linux
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != OMAGIC &&
 	     N_MAGIC(ex) != QMAGIC && N_MAGIC(ex) != NMAGIC) ||
 	    N_TRSIZE(ex) || N_DRSIZE(ex) ||
-	    bprm->file->f_dentry->d_inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(bprm->file->f_dentry->d_inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		return -ENOEXEC;
 	}
 
@@ -454,7 +454,7 @@ static int load_aout_library(struct file
 	/* We come in here for the regular a.out style of shared libraries */
 	if ((N_MAGIC(ex) != ZMAGIC && N_MAGIC(ex) != QMAGIC) || N_TRSIZE(ex) ||
 	    N_DRSIZE(ex) || ((ex.a_entry & 0xfff) && N_MAGIC(ex) == ZMAGIC) ||
-	    inode->i_size < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
+	    i_size_read(inode) < ex.a_text+ex.a_data+N_SYMSIZE(ex)+N_TXTOFF(ex)) {
 		goto out;
 	}
 
diff -puN fs/block_dev.c~i_size-atomic-access fs/block_dev.c
--- 25/fs/block_dev.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/block_dev.c	2003-06-19 23:37:08.000000000 -0700
@@ -29,7 +29,7 @@
 static sector_t max_block(struct block_device *bdev)
 {
 	sector_t retval = ~((sector_t)0);
-	loff_t sz = bdev->bd_inode->i_size;
+	loff_t sz = i_size_read(bdev->bd_inode);
 
 	if (sz) {
 		unsigned int size = block_size(bdev);
@@ -156,7 +156,7 @@ static int blkdev_commit_write(struct fi
 static loff_t block_llseek(struct file *file, loff_t offset, int origin)
 {
 	/* ewww */
-	loff_t size = file->f_dentry->d_inode->i_bdev->bd_inode->i_size;
+	loff_t size = i_size_read(file->f_dentry->d_inode->i_bdev->bd_inode);
 	loff_t retval;
 
 	lock_kernel();
@@ -485,7 +485,7 @@ int check_disk_change(struct block_devic
 static void bd_set_size(struct block_device *bdev, loff_t size)
 {
 	unsigned bsize = bdev_hardsect_size(bdev);
-	bdev->bd_inode->i_size = size;
+	i_size_write(bdev->bd_inode, size);
 	while (bsize < PAGE_CACHE_SIZE) {
 		if (size & bsize)
 			break;
diff -puN fs/buffer.c~i_size-atomic-access fs/buffer.c
--- 25/fs/buffer.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/buffer.c	2003-06-19 23:37:08.000000000 -0700
@@ -1758,7 +1758,7 @@ static int __block_write_full_page(struc
 
 	BUG_ON(!PageLocked(page));
 
-	last_block = (inode->i_size - 1) >> inode->i_blkbits;
+	last_block = (i_size_read(inode) - 1) >> inode->i_blkbits;
 
 	if (!page_has_buffers(page)) {
 		if (!PageUptodate(page))
@@ -2095,7 +2095,7 @@ int block_read_full_page(struct page *pa
 	head = page_buffers(page);
 
 	iblock = (sector_t)page->index << (PAGE_CACHE_SHIFT - inode->i_blkbits);
-	lblock = (inode->i_size+blocksize-1) >> inode->i_blkbits;
+	lblock = (i_size_read(inode)+blocksize-1) >> inode->i_blkbits;
 	bh = head;
 	nr = 0;
 	i = 0;
@@ -2320,8 +2320,12 @@ int generic_commit_write(struct file *fi
 	struct inode *inode = page->mapping->host;
 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 	__block_commit_write(inode,page,from,to);
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold i_sem.
+	 */
 	if (pos > inode->i_size) {
-		inode->i_size = pos;
+		i_size_write(inode, pos);
 		mark_inode_dirty(inode);
 	}
 	return 0;
@@ -2473,7 +2477,7 @@ int nobh_commit_write(struct file *file,
 
 	set_page_dirty(page);
 	if (pos > inode->i_size) {
-		inode->i_size = pos;
+		i_size_write(inode, pos);
 		mark_inode_dirty(inode);
 	}
 	return 0;
@@ -2603,7 +2607,8 @@ int block_write_full_page(struct page *p
 			struct writeback_control *wbc)
 {
 	struct inode * const inode = page->mapping->host;
-	const unsigned long end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	loff_t i_size = i_size_read(inode);
+	const unsigned long end_index = i_size >> PAGE_CACHE_SHIFT;
 	unsigned offset;
 	void *kaddr;
 
@@ -2612,7 +2617,7 @@ int block_write_full_page(struct page *p
 		return __block_write_full_page(inode, page, get_block, wbc);
 
 	/* Is the page fully outside i_size? (truncate in progress) */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_CACHE_SIZE-1);
 	if (page->index >= end_index+1 || !offset) {
 		/*
 		 * The page may have dirty, unmapped buffers.  For example,
diff -puN fs/direct-io.c~i_size-atomic-access fs/direct-io.c
--- 25/fs/direct-io.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/direct-io.c	2003-06-19 23:37:09.000000000 -0700
@@ -757,7 +757,7 @@ do_holes:
 				char *kaddr;
 
 				if (dio->block_in_file >=
-						dio->inode->i_size>>blkbits) {
+					i_size_read(dio->inode)>>blkbits) {
 					/* We hit eof */
 					page_cache_release(page);
 					goto out;
@@ -943,13 +943,15 @@ direct_io_worker(int rw, struct kiocb *i
 		if (ret == 0)
 			ret = dio->page_errors;
 		if (ret == 0 && dio->result) {
+			loff_t i_size = i_size_read(inode);
+
 			ret = dio->result;
 			/*
 			 * Adjust the return value if the read crossed a
 			 * non-block-aligned EOF.
 			 */
-			if (rw == READ && (offset + ret > inode->i_size))
-				ret = inode->i_size - offset;
+			if (rw == READ && (offset + ret > i_size))
+				ret = i_size - offset;
 		}
 		kfree(dio);
 	}
diff -puN fs/ext3/inode.c~i_size-atomic-access fs/ext3/inode.c
--- 25/fs/ext3/inode.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/ext3/inode.c	2003-06-19 23:37:08.000000000 -0700
@@ -1200,7 +1200,7 @@ static int ext3_journalled_commit_write(
 	if (!partial)
 		SetPageUptodate(page);
 	if (pos > inode->i_size)
-		inode->i_size = pos;
+		i_size_write(inode, pos);
 	EXT3_I(inode)->i_state |= EXT3_STATE_JDATA;
 	if (inode->i_size > EXT3_I(inode)->i_disksize) {
 		EXT3_I(inode)->i_disksize = inode->i_size;
@@ -1574,7 +1574,7 @@ out_stop:
 			loff_t end = offset + ret;
 			if (end > inode->i_size) {
 				ei->i_disksize = end;
-				inode->i_size = end;
+				i_size_write(inode, end);
 				err = ext3_mark_inode_dirty(handle, inode);
 				if (!ret) 
 					ret = err;
diff -puN fs/inode.c~i_size-atomic-access fs/inode.c
--- 25/fs/inode.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/inode.c	2003-06-19 23:37:08.000000000 -0700
@@ -190,6 +190,7 @@ void inode_init_once(struct inode *inode
 	INIT_LIST_HEAD(&inode->i_data.i_mmap);
 	INIT_LIST_HEAD(&inode->i_data.i_mmap_shared);
 	spin_lock_init(&inode->i_lock);
+	i_size_ordered_init(inode);
 }
 
 static void init_once(void * foo, kmem_cache_t * cachep, unsigned long flags)
diff -puN fs/ioctl.c~i_size-atomic-access fs/ioctl.c
--- 25/fs/ioctl.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/ioctl.c	2003-06-19 23:37:08.000000000 -0700
@@ -40,7 +40,7 @@ static int file_ioctl(struct file *filp,
 				return -EBADF;
 			return put_user(inode->i_sb->s_blocksize, (int *) arg);
 		case FIONREAD:
-			return put_user(inode->i_size - filp->f_pos, (int *) arg);
+			return put_user(i_size_read(inode) - filp->f_pos, (int *) arg);
 	}
 	if (filp->f_op && filp->f_op->ioctl)
 		return filp->f_op->ioctl(inode, filp, cmd, arg);
diff -puN fs/libfs.c~i_size-atomic-access fs/libfs.c
--- 25/fs/libfs.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/libfs.c	2003-06-19 23:37:08.000000000 -0700
@@ -328,8 +328,12 @@ int simple_commit_write(struct file *fil
 	struct inode *inode = page->mapping->host;
 	loff_t pos = ((loff_t)page->index << PAGE_CACHE_SHIFT) + to;
 
+	/*
+	 * No need to use i_size_read() here, the i_size
+	 * cannot change under us because we hold the i_sem.
+	 */
 	if (pos > inode->i_size)
-		inode->i_size = pos;
+		i_size_write(inode, pos);
 	set_page_dirty(page);
 	return 0;
 }
diff -puN fs/locks.c~i_size-atomic-access fs/locks.c
--- 25/fs/locks.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/locks.c	2003-06-19 23:37:08.000000000 -0700
@@ -285,7 +285,7 @@ static int flock_to_posix_lock(struct fi
 		start = filp->f_pos;
 		break;
 	case 2: /*SEEK_END*/
-		start = filp->f_dentry->d_inode->i_size;
+		start = i_size_read(filp->f_dentry->d_inode);
 		break;
 	default:
 		return -EINVAL;
@@ -335,7 +335,7 @@ static int flock64_to_posix_lock(struct 
 		start = filp->f_pos;
 		break;
 	case 2: /*SEEK_END*/
-		start = filp->f_dentry->d_inode->i_size;
+		start = i_size_read(filp->f_dentry->d_inode);
 		break;
 	default:
 		return -EINVAL;
diff -puN fs/mpage.c~i_size-atomic-access fs/mpage.c
--- 25/fs/mpage.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/mpage.c	2003-06-19 23:37:08.000000000 -0700
@@ -227,7 +227,7 @@ do_mpage_readpage(struct bio *bio, struc
 		goto confused;
 
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
-	last_block = (inode->i_size + blocksize - 1) >> blkbits;
+	last_block = (i_size_read(inode) + blocksize - 1) >> blkbits;
 
 	bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page;
@@ -459,7 +459,7 @@ mpage_writepage(struct bio *bio, struct 
 	 */
 	BUG_ON(!PageUptodate(page));
 	block_in_file = page->index << (PAGE_CACHE_SHIFT - blkbits);
-	last_block = (inode->i_size - 1) >> blkbits;
+	last_block = (i_size_read(inode) - 1) >> blkbits;
 	map_bh.b_page = page;
 	for (page_block = 0; page_block < blocks_per_page; ) {
 
@@ -489,9 +489,9 @@ mpage_writepage(struct bio *bio, struct 
 
 	first_unmapped = page_block;
 
-	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size_read(inode) >> PAGE_CACHE_SHIFT;
 	if (page->index >= end_index) {
-		unsigned offset = inode->i_size & (PAGE_CACHE_SIZE - 1);
+		unsigned offset = i_size_read(inode) & (PAGE_CACHE_SIZE - 1);
 		char *kaddr;
 
 		if (page->index > end_index || !offset)
diff -puN fs/nfs/inode.c~i_size-atomic-access fs/nfs/inode.c
--- 25/fs/nfs/inode.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/nfs/inode.c	2003-06-19 23:37:09.000000000 -0700
@@ -1001,6 +1001,7 @@ __nfs_refresh_inode(struct inode *inode,
 	loff_t		new_isize;
 	int		invalid = 0;
 	int		mtime_update = 0;
+	loff_t		cur_isize;
 
 	dfprintk(VFS, "NFS: refresh_inode(%s/%ld ct=%d info=0x%x)\n",
 			inode->i_sb->s_id, inode->i_ino,
@@ -1087,8 +1088,9 @@ __nfs_refresh_inode(struct inode *inode,
 	 * If we have pending writebacks, things can get
 	 * messy.
 	 */
-	if (nfs_have_writebacks(inode) && new_isize < inode->i_size)
-		new_isize = inode->i_size;
+	cur_isize = i_size_read(inode);
+	if (nfs_have_writebacks(inode) && new_isize < cur_isize)
+		new_isize = cur_isize;
 
 	nfsi->read_cache_ctime = fattr->ctime;
 	inode->i_ctime = fattr->ctime;
@@ -1102,7 +1104,7 @@ __nfs_refresh_inode(struct inode *inode,
 	}
 
 	nfsi->read_cache_isize = new_size;
-	inode->i_size = new_isize;
+	i_size_write(inode, new_isize);
 
 	if (inode->i_mode != fattr->mode ||
 	    inode->i_uid != fattr->uid ||
diff -puN fs/nfs/write.c~i_size-atomic-access fs/nfs/write.c
--- 25/fs/nfs/write.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/nfs/write.c	2003-06-19 23:37:09.000000000 -0700
@@ -187,8 +187,8 @@ nfs_writepage_sync(struct file *file, st
 		 * If we've extended the file, update the inode
 		 * now so we don't invalidate the cache.
 		 */
-		if (wdata.args.offset > inode->i_size)
-			inode->i_size = wdata.args.offset;
+		if (wdata.args.offset > i_size_read(inode))
+			i_size_write(inode, wdata.args.offset);
 	} while (count);
 
 	if (PageError(page))
@@ -218,8 +218,8 @@ nfs_writepage_async(struct file *file, s
 	nfs_unlock_request(req);
 	nfs_strategy(inode);
 	end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
-	if (inode->i_size < end)
-		inode->i_size = end;
+	if (i_size_read(inode) < end)
+		i_size_write(inode, end);
 
  out:
 	return status;
@@ -234,9 +234,10 @@ nfs_writepage(struct page *page, struct 
 	struct inode *inode = page->mapping->host;
 	unsigned long end_index;
 	unsigned offset = PAGE_CACHE_SIZE;
+	loff_t i_size = i_size_read(inode);
 	int err;
 
-	end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+	end_index = i_size >> PAGE_CACHE_SHIFT;
 
 	/* Ensure we've flushed out any previous writes */
 	nfs_wb_page(inode,page);
@@ -245,7 +246,7 @@ nfs_writepage(struct page *page, struct 
 	if (page->index < end_index)
 		goto do_it;
 	/* things got complicated... */
-	offset = inode->i_size & (PAGE_CACHE_SIZE-1);
+	offset = i_size & (PAGE_CACHE_SIZE-1);
 
 	/* OK, are we completely out? */
 	err = -EIO;
@@ -708,8 +709,8 @@ nfs_updatepage(struct file *file, struct
 
 	status = 0;
 	end = ((loff_t)page->index<<PAGE_CACHE_SHIFT) + (loff_t)(offset + count);
-	if (inode->i_size < end)
-		inode->i_size = end;
+	if (i_size_read(inode) < end)
+		i_size_write(inode, end);
 
 	/* If we wrote past the end of the page.
 	 * Call the strategy routine so it can send out a bunch
@@ -723,7 +724,7 @@ nfs_updatepage(struct file *file, struct
 		nfs_unlock_request(req);
 done:
         dprintk("NFS:      nfs_updatepage returns %d (isize %Ld)\n",
-                                                status, (long long)inode->i_size);
+			status, (long long)i_size_read(inode));
 	if (status < 0)
 		ClearPageUptodate(page);
 	return status;
@@ -958,7 +959,7 @@ nfs_commit_rpcsetup(struct list_head *he
 	end = req_offset(last) + last->wb_bytes;
 	len = end - start;
 	/* If 'len' is not a 32-bit quantity, pass '0' in the COMMIT call */
-	if (end >= inode->i_size || len < 0 || len > (~((u32)0) >> 1))
+	if (end >= i_size_read(inode) || len < 0 || len > (~((u32)0) >> 1))
 		len = 0;
 
 	data->inode	  = inode;
diff -puN fs/open.c~i_size-atomic-access fs/open.c
--- 25/fs/open.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/open.c	2003-06-19 23:37:08.000000000 -0700
@@ -1011,7 +1011,7 @@ asmlinkage long sys_vhangup(void)
  */
 int generic_file_open(struct inode * inode, struct file * filp)
 {
-	if (!(filp->f_flags & O_LARGEFILE) && inode->i_size > MAX_NON_LFS)
+	if (!(filp->f_flags & O_LARGEFILE) && i_size_read(inode) > MAX_NON_LFS)
 		return -EFBIG;
 	return 0;
 }
diff -puN fs/quota_v1.c~i_size-atomic-access fs/quota_v1.c
--- 25/fs/quota_v1.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/quota_v1.c	2003-06-19 23:37:08.000000000 -0700
@@ -132,12 +132,14 @@ static int v1_check_quota_file(struct su
 	mm_segment_t fs;
 	ssize_t size;
 	loff_t offset = 0;
+	loff_t isize;
 	static const uint quota_magics[] = V2_INITQMAGICS;
 
-	if (!inode->i_size)
+	isize = i_size_read(inode);
+	if (!isize)
 		return 0;
-	blocks = inode->i_size >> BLOCK_SIZE_BITS;
-	off = inode->i_size & (BLOCK_SIZE - 1);
+	blocks = isize >> BLOCK_SIZE_BITS;
+	off = isize & (BLOCK_SIZE - 1);
 	if ((blocks % sizeof(struct v1_disk_dqblk) * BLOCK_SIZE + off) % sizeof(struct v1_disk_dqblk))
 		return 0;
 	/* Doublecheck whether we didn't get file with new format - with old quotactl() this could happen */
diff -puN fs/read_write.c~i_size-atomic-access fs/read_write.c
--- 25/fs/read_write.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/read_write.c	2003-06-19 23:37:09.000000000 -0700
@@ -55,7 +55,7 @@ loff_t remote_llseek(struct file *file, 
 	lock_kernel();
 	switch (origin) {
 		case 2:
-			offset += file->f_dentry->d_inode->i_size;
+			offset += i_size_read(file->f_dentry->d_inode);
 			break;
 		case 1:
 			offset += file->f_pos;
@@ -84,7 +84,7 @@ loff_t default_llseek(struct file *file,
 	lock_kernel();
 	switch (origin) {
 		case 2:
-			offset += file->f_dentry->d_inode->i_size;
+			offset += i_size_read(file->f_dentry->d_inode);
 			break;
 		case 1:
 			offset += file->f_pos;
diff -puN fs/stat.c~i_size-atomic-access fs/stat.c
--- 25/fs/stat.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/fs/stat.c	2003-06-19 23:37:08.000000000 -0700
@@ -28,7 +28,7 @@ void generic_fillattr(struct inode *inod
 	stat->atime = inode->i_atime;
 	stat->mtime = inode->i_mtime;
 	stat->ctime = inode->i_ctime;
-	stat->size = inode->i_size;
+	stat->size = i_size_read(inode);
 	stat->blocks = inode->i_blocks;
 	stat->blksize = inode->i_blksize;
 }
diff -puN include/linux/fs.h~i_size-atomic-access include/linux/fs.h
--- 25/include/linux/fs.h~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/include/linux/fs.h	2003-06-19 23:37:08.000000000 -0700
@@ -350,6 +350,17 @@ struct block_device {
 	struct gendisk *	bd_disk;
 };
 
+/*
+ * Use sequence counter to get consistent i_size on 32-bit processors.
+ */
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+#include <linux/seqlock.h>
+#define __NEED_I_SIZE_ORDERED
+#define i_size_ordered_init(inode) seqcount_init(&inode->i_size_seqcount)
+#else
+#define i_size_ordered_init(inode) do { } while (0)
+#endif
+
 struct inode {
 	struct hlist_node	i_hash;
 	struct list_head	i_list;
@@ -400,8 +411,60 @@ struct inode {
 	union {
 		void		*generic_ip;
 	} u;
+#ifdef __NEED_I_SIZE_ORDERED
+	seqcount_t		i_size_seqcount;
+#endif
 };
 
+/*
+ * NOTE: in a 32bit arch with a preemptable kernel and
+ * an UP compile the i_size_read/write must be atomic
+ * with respect to the local cpu (unlike with preempt disabled),
+ * but they don't need to be atomic with respect to other cpus like in
+ * true SMP (so they need either to either locally disable irq around
+ * the read or for example on x86 they can be still implemented as a
+ * cmpxchg8b without the need of the lock prefix). For SMP compiles
+ * and 64bit archs it makes no difference if preempt is enabled or not.
+ */
+static inline loff_t i_size_read(struct inode *inode)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	loff_t i_size;
+	unsigned int seq;
+
+	do {
+		seq = read_seqcount_begin(&inode->i_size_seqcount);
+		i_size = inode->i_size;
+	} while (read_seqcount_retry(&inode->i_size_seqcount, seq));
+	return i_size;
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+	loff_t i_size;
+
+	preempt_disable();
+	i_size = inode->i_size;
+	preempt_enable();
+	return i_size;
+#else
+	return inode->i_size;
+#endif
+}
+
+
+static inline void i_size_write(struct inode *inode, loff_t i_size)
+{
+#if BITS_PER_LONG==32 && defined(CONFIG_SMP)
+	write_seqcount_begin(&inode->i_size_seqcount);
+	inode->i_size = i_size;
+	write_seqcount_end(&inode->i_size_seqcount);
+#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT)
+	preempt_disable();
+	inode->i_size = i_size;
+	preempt_enable();
+#else
+	inode->i_size = i_size;
+#endif
+}
+
 struct fown_struct {
 	rwlock_t lock;          /* protects pid, uid, euid fields */
 	int pid;		/* pid or -pgrp where SIGIO should be sent */
diff -puN ipc/shm.c~i_size-atomic-access ipc/shm.c
--- 25/ipc/shm.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/ipc/shm.c	2003-06-19 23:37:08.000000000 -0700
@@ -703,7 +703,7 @@ long sys_shmat(int shmid, char __user *s
 	}
 		
 	file = shp->shm_file;
-	size = file->f_dentry->d_inode->i_size;
+	size = i_size_read(file->f_dentry->d_inode);
 	shp->shm_nattch++;
 	shm_unlock(shp);
 
diff -puN mm/filemap.c~i_size-atomic-access mm/filemap.c
--- 25/mm/filemap.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/filemap.c	2003-06-19 23:37:08.000000000 -0700
@@ -612,14 +612,15 @@ void do_generic_mapping_read(struct addr
 	for (;;) {
 		struct page *page;
 		unsigned long end_index, nr, ret;
+		loff_t isize = i_size_read(inode);
 
-		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+		end_index = isize >> PAGE_CACHE_SHIFT;
 			
 		if (index > end_index)
 			break;
 		nr = PAGE_CACHE_SIZE;
 		if (index == end_index) {
-			nr = inode->i_size & ~PAGE_CACHE_MASK;
+			nr = isize & ~PAGE_CACHE_MASK;
 			if (nr <= offset)
 				break;
 		}
@@ -836,7 +837,7 @@ __generic_file_aio_read(struct kiocb *io
 		retval = 0;
 		if (!count)
 			goto out; /* skip atime */
-		size = inode->i_size;
+		size = i_size_read(inode);
 		if (pos < size) {
 			retval = generic_file_direct_IO(READ, iocb,
 						iov, pos, nr_segs);
@@ -1025,7 +1026,7 @@ retry_all:
 	 * An external ptracer can access pages that normally aren't
 	 * accessible..
 	 */
-	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if ((pgoff >= size) && (area->vm_mm == current->mm))
 		return NULL;
 
@@ -1292,7 +1293,7 @@ static int filemap_populate(struct vm_ar
 					pgoff, len >> PAGE_CACHE_SHIFT);
 
 repeat:
-	size = (inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+	size = (i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 	if (pgoff + (len >> PAGE_CACHE_SHIFT) > size)
 		return -EINVAL;
 
@@ -1605,7 +1606,7 @@ inline int generic_write_checks(struct i
 	if (!isblk) {
 		/* FIXME: this is for backwards compatibility with 2.4 */
 		if (file->f_flags & O_APPEND)
-                        *pos = inode->i_size;
+                        *pos = i_size_read(inode);
 
 		if (limit != RLIM_INFINITY) {
 			if (*pos >= limit) {
@@ -1651,15 +1652,17 @@ inline int generic_write_checks(struct i
 		if (unlikely(*pos + *count > inode->i_sb->s_maxbytes))
 			*count = inode->i_sb->s_maxbytes - *pos;
 	} else {
+		loff_t isize;
 		if (bdev_read_only(inode->i_bdev))
 			return -EPERM;
-		if (*pos >= inode->i_size) {
-			if (*count || *pos > inode->i_size)
+		isize = i_size_read(inode);
+		if (*pos >= isize) {
+			if (*count || *pos > isize)
 				return -ENOSPC;
 		}
 
-		if (*pos + *count > inode->i_size)
-			*count = inode->i_size - *pos;
+		if (*pos + *count > isize)
+			*count = isize - *pos;
 	}
 	return 0;
 }
@@ -1745,8 +1748,8 @@ generic_file_aio_write_nolock(struct kio
 					iov, pos, nr_segs);
 		if (written > 0) {
 			loff_t end = pos + written;
-			if (end > inode->i_size && !isblk) {
-				inode->i_size = end;
+			if (end > i_size_read(inode) && !isblk) {
+				i_size_write(inode,  end);
 				mark_inode_dirty(inode);
 			}
 			*ppos = end;
@@ -1794,14 +1797,15 @@ generic_file_aio_write_nolock(struct kio
 
 		status = a_ops->prepare_write(file, page, offset, offset+bytes);
 		if (unlikely(status)) {
+			loff_t isize = i_size_read(inode);
 			/*
 			 * prepare_write() may have instantiated a few blocks
 			 * outside i_size.  Trim these off again.
 			 */
 			unlock_page(page);
 			page_cache_release(page);
-			if (pos + bytes > inode->i_size)
-				vmtruncate(inode, inode->i_size);
+			if (pos + bytes > isize)
+				vmtruncate(inode, isize);
 			break;
 		}
 		if (likely(nr_segs == 1))
diff -puN mm/memory.c~i_size-atomic-access mm/memory.c
--- 25/mm/memory.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/memory.c	2003-06-19 23:37:08.000000000 -0700
@@ -1162,7 +1162,7 @@ int vmtruncate(struct inode * inode, lof
 
 	if (inode->i_size < offset)
 		goto do_expand;
-	inode->i_size = offset;
+	i_size_write(inode, offset);
 	invalidate_mmap_range(mapping, offset + PAGE_SIZE - 1, 0);
 	truncate_inode_pages(mapping, offset);
 	goto out_truncate;
@@ -1173,7 +1173,7 @@ do_expand:
 		goto out_sig;
 	if (offset > inode->i_sb->s_maxbytes)
 		goto out;
-	inode->i_size = offset;
+	i_size_write(inode, offset);
 
 out_truncate:
 	if (inode->i_op && inode->i_op->truncate)
diff -puN mm/nommu.c~i_size-atomic-access mm/nommu.c
--- 25/mm/nommu.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/nommu.c	2003-06-19 23:37:08.000000000 -0700
@@ -48,7 +48,7 @@ int vmtruncate(struct inode *inode, loff
 
 	if (inode->i_size < offset)
 		goto do_expand;
-	inode->i_size = offset;
+	i_size_write(inode, offset);
 
 	truncate_inode_pages(mapping, offset);
 	goto out_truncate;
@@ -59,7 +59,7 @@ do_expand:
 		goto out_sig;
 	if (offset > inode->i_sb->s_maxbytes)
 		goto out;
-	inode->i_size = offset;
+	i_size_write(inode, offset);
 
 out_truncate:
 	if (inode->i_op && inode->i_op->truncate) {
diff -puN mm/readahead.c~i_size-atomic-access mm/readahead.c
--- 25/mm/readahead.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/readahead.c	2003-06-19 23:37:08.000000000 -0700
@@ -208,11 +208,12 @@ __do_page_cache_readahead(struct address
 	LIST_HEAD(page_pool);
 	int page_idx;
 	int ret = 0;
+	loff_t isize = i_size_read(inode);
 
-	if (inode->i_size == 0)
+	if (isize == 0)
 		goto out;
 
- 	end_index = ((inode->i_size - 1) >> PAGE_CACHE_SHIFT);
+ 	end_index = ((isize - 1) >> PAGE_CACHE_SHIFT);
 
 	/*
 	 * Preallocate as many pages as we will need.
diff -puN mm/shmem.c~i_size-atomic-access mm/shmem.c
--- 25/mm/shmem.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/shmem.c	2003-06-19 23:37:08.000000000 -0700
@@ -298,7 +298,7 @@ static swp_entry_t *shmem_swp_alloc(stru
 	static const swp_entry_t unswapped = {0};
 
 	if (sgp != SGP_WRITE &&
-	    ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size)
+	    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode))
 		return ERR_PTR(-EINVAL);
 
 	while (!(entry = shmem_swp_entry(info, index, &page))) {
@@ -331,7 +331,7 @@ static swp_entry_t *shmem_swp_alloc(stru
 			return ERR_PTR(-ENOMEM);
 		}
 		if (sgp != SGP_WRITE &&
-		    ((loff_t) index << PAGE_CACHE_SHIFT) >= inode->i_size) {
+		    ((loff_t) index << PAGE_CACHE_SHIFT) >= i_size_read(inode)) {
 			entry = ERR_PTR(-EINVAL);
 			break;
 		}
@@ -640,7 +640,7 @@ found:
 
 	/* Racing against delete or truncate? Must leave out of page cache */
 	limit = (inode->i_state & I_FREEING)? 0:
-		(inode->i_size + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
+		(i_size_read(inode) + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT;
 
 	if (idx >= limit ||
 	    move_from_swap_cache(page, idx, inode->i_mapping) == 0)
@@ -963,7 +963,7 @@ static int shmem_populate(struct vm_area
 	enum sgp_type sgp = nonblock? SGP_QUICK: SGP_CACHE;
 	unsigned long size;
 
-	size = (inode->i_size + PAGE_SIZE - 1) >> PAGE_SHIFT;
+	size = (i_size_read(inode) + PAGE_SIZE - 1) >> PAGE_SHIFT;
 	if (pgoff >= size || pgoff + (len >> PAGE_SHIFT) > size)
 		return -EINVAL;
 
@@ -1238,12 +1238,13 @@ static void do_shmem_file_read(struct fi
 	for (;;) {
 		struct page *page = NULL;
 		unsigned long end_index, nr, ret;
+		loff_t i_size = i_size_read(inode);
 
-		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+		end_index = i_size >> PAGE_CACHE_SHIFT;
 		if (index > end_index)
 			break;
 		if (index == end_index) {
-			nr = inode->i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_CACHE_MASK;
 			if (nr <= offset)
 				break;
 		}
@@ -1260,9 +1261,10 @@ static void do_shmem_file_read(struct fi
 		 * are called without i_sem protection against truncate
 		 */
 		nr = PAGE_CACHE_SIZE;
-		end_index = inode->i_size >> PAGE_CACHE_SHIFT;
+		i_size = i_size_read(inode);
+		end_index = i_size >> PAGE_CACHE_SHIFT;
 		if (index == end_index) {
-			nr = inode->i_size & ~PAGE_CACHE_MASK;
+			nr = i_size & ~PAGE_CACHE_MASK;
 			if (nr <= offset) {
 				page_cache_release(page);
 				break;
diff -puN mm/swapfile.c~i_size-atomic-access mm/swapfile.c
--- 25/mm/swapfile.c~i_size-atomic-access	2003-06-19 23:37:08.000000000 -0700
+++ 25-akpm/mm/swapfile.c	2003-06-19 23:37:09.000000000 -0700
@@ -922,7 +922,7 @@ static int setup_swap_extents(struct swa
 	 */
 	probe_block = 0;
 	page_no = 0;
-	last_block = inode->i_size >> blkbits;
+	last_block = i_size_read(inode) >> blkbits;
 	while ((probe_block + blocks_per_page) <= last_block &&
 			page_no < sis->max) {
 		unsigned block_in_page;
@@ -1308,7 +1308,7 @@ asmlinkage long sys_swapon(const char __
 		goto bad_swap;
 	}
 
-	swapfilesize = mapping->host->i_size >> PAGE_SHIFT;
+	swapfilesize = i_size_read(mapping->host) >> PAGE_SHIFT;
 
 	/*
 	 * Read the swap header.

_