From: Alex Tomas <bzzz@tmi.comex.ru>

hi!

this time, concurrent block/inode allocation for EXT3 against 2.5.65.
should be applied with ext2-concurrent-balloc because of ext2_set_bit_atomic()
and ext2_clear_bit_atomic().

to see real improvement, you should use 2.5.65-mm4 in which Andrew Morton pushed
BKL'es down into JBD.

1) each group has own spinlock, which is used for group counter
   modifications
2) sb->s_free_blocks_count isn't used any more.  ext2_statfs() and
   find_group_orlov() loop over groups to count free blocks
3) sb->s_free_blocks_count is recalculated at mount/umount/sync_super time
   in order to check consistency and to avoid fsck warnings
4) reserved blocks are distributed over last groups
5) ext3_new_block() tries to use non-reserved blocks and if it fails then
   tries to use reserved blocks
6) ext3_new_block() and ext3_free_blocks do not modify sb->s_free_blocks,
   therefore they do not call mark_buffer_dirty() for superblock's
   buffer_head. this should reduce I/O a bit




 fs/ext3/balloc.c           |  238 +++++++++++++++++++++++----------------------
 fs/ext3/ialloc.c           |   70 +++++--------
 fs/ext3/super.c            |   59 +++++++++--
 include/linux/ext3_fs.h    |    2 
 include/linux/ext3_fs_sb.h |   10 +
 5 files changed, 212 insertions(+), 167 deletions(-)

diff -puN fs/ext3/balloc.c~ext3-concurrent-block-inode-allocation fs/ext3/balloc.c
--- 25/fs/ext3/balloc.c~ext3-concurrent-block-inode-allocation	2003-04-28 23:38:54.000000000 -0700
+++ 25-akpm/fs/ext3/balloc.c	2003-04-28 23:38:54.000000000 -0700
@@ -118,7 +118,6 @@ void ext3_free_blocks (handle_t *handle,
 		printk ("ext3_free_blocks: nonexistent device");
 		return;
 	}
-	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
 	if (block < le32_to_cpu(es->s_first_data_block) ||
 	    block + count < block ||
@@ -184,11 +183,6 @@ do_more:
 	if (err)
 		goto error_return;
 
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
-	if (err)
-		goto error_return;
-
 	for (i = 0; i < count; i++) {
 		/*
 		 * An HJ special.  This is expensive...
@@ -208,18 +202,15 @@ do_more:
 		}
 #endif
 		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext3_clear_bit (bit + i, bitmap_bh->b_data)) {
+		if (!ext3_clear_bit_atomic (&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock,
+						bit + i, bitmap_bh->b_data)) {
 			ext3_error (sb, __FUNCTION__,
 				      "bit already cleared for block %lu", 
 				      block + i);
 			BUFFER_TRACE(bitmap_bh, "bit already cleared");
-		} else {
+		} else 
 			dquot_freed_blocks++;
-			gdp->bg_free_blocks_count =
-			  cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count)+1);
-			es->s_free_blocks_count =
-			  cpu_to_le32(le32_to_cpu(es->s_free_blocks_count)+1);
-		}
+
 		/* @@@ This prevents newly-allocated data from being
 		 * freed and then reallocated within the same
 		 * transaction. 
@@ -241,9 +232,15 @@ do_more:
 		BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
 		J_ASSERT_BH(bitmap_bh,
 				bh2jh(bitmap_bh)->b_committed_data != NULL);
-		ext3_set_bit(bit + i, bh2jh(bitmap_bh)->b_committed_data);
+		ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock,
+				bit + i, bh2jh(bitmap_bh)->b_committed_data);
 	}
 
+	spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+	gdp->bg_free_blocks_count =
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + dquot_freed_blocks);
+	spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
@@ -253,11 +250,6 @@ do_more:
 	ret = ext3_journal_dirty_metadata(handle, gd_bh);
 	if (!err) err = ret;
 
-	/* And the superblock */
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "dirtied superblock");
-	ret = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	if (!err) err = ret;
-
 	if (overflow && !err) {
 		block += count;
 		count = overflow;
@@ -267,7 +259,6 @@ do_more:
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, err);
-	unlock_super(sb);
 	if (dquot_freed_blocks)
 		DQUOT_FREE_BLOCK(inode, dquot_freed_blocks);
 	return;
@@ -367,6 +358,59 @@ static int find_next_usable_block(int st
 	return -1;
 }
 
+static int
+ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
+		struct buffer_head *bitmap_bh, int goal, int *errp)
+{
+	int i, fatal = 0;
+
+	*errp = 0;
+
+	if (goal >= 0 && ext3_test_allocatable(goal, bitmap_bh))
+		goto got;
+
+repeat:
+	goal = find_next_usable_block(goal, bitmap_bh,
+				EXT3_BLOCKS_PER_GROUP(sb));
+	if (goal < 0) 
+		return -1;
+
+	for (i = 0;
+		i < 7 && goal > 0 && ext3_test_allocatable(goal - 1, bitmap_bh);
+		i++, goal--);
+
+got:
+	/* Make sure we use undo access for the bitmap, because it is
+	 * critical that we do the frozen_data COW on bitmap buffers in
+	 * all cases even if the buffer is in BJ_Forget state in the
+	 * committing transaction.  */
+	BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
+	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+	if (fatal) {
+		*errp = fatal;
+		return -1;
+	}
+
+	if (ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock,
+				goal, bitmap_bh->b_data)) {
+		/* already allocated by concurrent thread -bzzz */
+		goal++;
+		if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
+			return -1;
+		goto repeat;
+	}
+
+	BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
+	fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
+	if (fatal) {
+		*errp = fatal;
+		return -1;
+	}
+
+	return goal;
+}
+
+
 /*
  * ext3_new_block uses a goal block to assist allocation.  If the goal is
  * free, or there is a free block within 32 blocks of the goal, that block
@@ -383,10 +427,12 @@ ext3_new_block(handle_t *handle, struct 
 	struct buffer_head *gdp_bh;		/* bh2 */
 	int group_no;				/* i */
 	int ret_block;				/* j */
-	int bit;				/* k */
+	int bgi;				/* blockgroup iteration index */
 	int target_block;			/* tmp */
 	int fatal = 0, err;
 	int performed_allocation = 0;
+	int free;
+	int use_reserve = 0;
 	struct super_block *sb;
 	struct ext3_group_desc *gdp;
 	struct ext3_super_block *es;
@@ -408,16 +454,7 @@ ext3_new_block(handle_t *handle, struct 
 		return 0;
 	}
 
-	lock_super(sb);
 	es = EXT3_SB(sb)->s_es;
-	if (le32_to_cpu(es->s_free_blocks_count) <=
-			le32_to_cpu(es->s_r_blocks_count) &&
-	    ((EXT3_SB(sb)->s_resuid != current->fsuid) &&
-	     (EXT3_SB(sb)->s_resgid == 0 ||
-	      !in_group_p(EXT3_SB(sb)->s_resgid)) && 
-	     !capable(CAP_SYS_RESOURCE)))
-		goto out;
-
 	ext3_debug("goal=%lu.\n", goal);
 
 	/*
@@ -432,40 +469,28 @@ ext3_new_block(handle_t *handle, struct 
 	if (!gdp)
 		goto io_error;
 
-	if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
+	free = le16_to_cpu(gdp->bg_free_blocks_count);
+	free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
+	if (free > 0) {
 		ret_block = ((goal - le32_to_cpu(es->s_first_data_block)) %
 				EXT3_BLOCKS_PER_GROUP(sb));
-#ifdef EXT3FS_DEBUG
-		if (ret_block)
-			goal_attempts++;
-#endif
 		bitmap_bh = read_block_bitmap(sb, group_no);
 		if (!bitmap_bh)
-			goto io_error;
-
-		ext3_debug("goal is at %d:%d.\n", group_no, ret_block);
-
-		if (ext3_test_allocatable(ret_block, bitmap_bh)) {
-#ifdef EXT3FS_DEBUG
-			goal_hits++;
-			ext3_debug("goal bit allocated.\n");
-#endif
-			goto got_block;
-		}
-
-		ret_block = find_next_usable_block(ret_block, bitmap_bh,
-				EXT3_BLOCKS_PER_GROUP(sb));
+			goto io_error;	
+		ret_block = ext3_try_to_allocate(sb, handle, group_no,
+					bitmap_bh, ret_block, &fatal);
+		if (fatal)
+			goto out;
 		if (ret_block >= 0)
-			goto search_back;
+			goto allocated;
 	}
-
-	ext3_debug("Bit not found in block group %d.\n", group_no);
-
+	
 	/*
 	 * Now search the rest of the groups.  We assume that 
 	 * i and gdp correctly point to the last group visited.
 	 */
-	for (bit = 0; bit < EXT3_SB(sb)->s_groups_count; bit++) {
+repeat:
+	for (bgi = 0; bgi < EXT3_SB(sb)->s_groups_count; bgi++) {
 		group_no++;
 		if (group_no >= EXT3_SB(sb)->s_groups_count)
 			group_no = 0;
@@ -474,57 +499,47 @@ ext3_new_block(handle_t *handle, struct 
 			*errp = -EIO;
 			goto out;
 		}
-		if (le16_to_cpu(gdp->bg_free_blocks_count) > 0) {
-			brelse(bitmap_bh);
-			bitmap_bh = read_block_bitmap(sb, group_no);
-			if (!bitmap_bh)
-				goto io_error;
-			ret_block = find_next_usable_block(-1, bitmap_bh, 
-						   EXT3_BLOCKS_PER_GROUP(sb));
-			if (ret_block >= 0) 
-				goto search_back;
-		}
+		free = le16_to_cpu(gdp->bg_free_blocks_count);
+		if (!use_reserve) 
+			free -= EXT3_SB(sb)->s_bgi[group_no].bg_reserved;
+		if (free <= 0)
+			continue;
+
+		brelse(bitmap_bh);
+		bitmap_bh = read_block_bitmap(sb, group_no);
+		if (!bitmap_bh)
+			goto io_error;
+		ret_block = ext3_try_to_allocate(sb, handle, group_no,
+						bitmap_bh, -1, &fatal);
+		if (fatal)
+			goto out;
+		if (ret_block >= 0) 
+			goto allocated;
+	}
+
+	if (!use_reserve &&
+		(EXT3_SB(sb)->s_resuid == current->fsuid ||
+		  (EXT3_SB(sb)->s_resgid != 0 && in_group_p(EXT3_SB(sb)->s_resgid)) ||
+		  capable(CAP_SYS_RESOURCE))) {
+		use_reserve = 1;
+		group_no = 0;
+		goto repeat;
 	}
 
 	/* No space left on the device */
+	*errp = -ENOSPC;
 	goto out;
 
-search_back:
-	/* 
-	 * We have succeeded in finding a free byte in the block
-	 * bitmap.  Now search backwards up to 7 bits to find the
-	 * start of this group of free blocks.
-	 */
-	for (	bit = 0;
-		bit < 7 && ret_block > 0 &&
-			ext3_test_allocatable(ret_block - 1, bitmap_bh);
-		bit++, ret_block--)
-		;
-	
-got_block:
+allocated:
 
 	ext3_debug("using block group %d(%d)\n",
 			group_no, gdp->bg_free_blocks_count);
 
-	/* Make sure we use undo access for the bitmap, because it is
-           critical that we do the frozen_data COW on bitmap buffers in
-           all cases even if the buffer is in BJ_Forget state in the
-           committing transaction.  */
-	BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
-	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
-	if (fatal)
-		goto out;
-	
 	BUFFER_TRACE(gdp_bh, "get_write_access");
 	fatal = ext3_journal_get_write_access(handle, gdp_bh);
 	if (fatal)
 		goto out;
 
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
-	fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
-	if (fatal)
-		goto out;
-
 	target_block = ret_block + group_no * EXT3_BLOCKS_PER_GROUP(sb)
 				+ le32_to_cpu(es->s_first_data_block);
 
@@ -536,11 +551,6 @@ got_block:
 			    "Allocating block in system zone - "
 			    "block = %u", target_block);
 
-	/* The superblock lock should guard against anybody else beating
-	 * us to this point! */
-	J_ASSERT_BH(bitmap_bh, !ext3_test_bit(ret_block, bitmap_bh->b_data));
-	BUFFER_TRACE(bitmap_bh, "setting bitmap bit");
-	ext3_set_bit(ret_block, bitmap_bh->b_data);
 	performed_allocation = 1;
 
 #ifdef CONFIG_JBD_DEBUG
@@ -556,20 +566,17 @@ got_block:
 		}
 	}
 #endif
+	spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
 	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
 		J_ASSERT_BH(bitmap_bh,
 			!ext3_test_bit(ret_block,
 					bh2jh(bitmap_bh)->b_committed_data));
 	ext3_debug("found bit %d\n", ret_block);
+	spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
 
 	/* ret_block was blockgroup-relative.  Now it becomes fs-relative */
 	ret_block = target_block;
 
-	BUFFER_TRACE(bitmap_bh, "journal_dirty_metadata for bitmap block");
-	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
-	if (!fatal)
-		fatal = err;
-	
 	if (ret_block >= le32_to_cpu(es->s_blocks_count)) {
 		ext3_error(sb, "ext3_new_block",
 			    "block(%d) >= blocks count(%d) - "
@@ -586,27 +593,20 @@ got_block:
 	ext3_debug("allocating block %d. Goal hits %d of %d.\n",
 			ret_block, goal_hits, goal_attempts);
 
+	spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
 	gdp->bg_free_blocks_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
-	es->s_free_blocks_count =
-			cpu_to_le32(le32_to_cpu(es->s_free_blocks_count) - 1);
+	spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext3_journal_dirty_metadata(handle, gdp_bh);
 	if (!fatal)
 		fatal = err;
 
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
-			"journal_dirty_metadata for superblock");
-	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-	if (!fatal)
-		fatal = err;
-
 	sb->s_dirt = 1;
 	if (fatal)
 		goto out;
 
-	unlock_super(sb);
 	*errp = 0;
 	brelse(bitmap_bh);
 	return ret_block;
@@ -618,7 +618,6 @@ out:
 		*errp = fatal;
 		ext3_std_error(sb, fatal);
 	}
-	unlock_super(sb);
 	/*
 	 * Undo the block allocation
 	 */
@@ -631,12 +630,13 @@ out:
 
 unsigned long ext3_count_free_blocks(struct super_block *sb)
 {
+	unsigned long desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
 #ifdef EXT3FS_DEBUG
 	struct ext3_super_block *es;
-	unsigned long desc_count, bitmap_count, x;
+	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
-	struct ext3_group_desc *gdp;
-	int i;
 	
 	lock_super(sb);
 	es = EXT3_SB(sb)->s_es;
@@ -664,7 +664,15 @@ unsigned long ext3_count_free_blocks(str
 	unlock_super(sb);
 	return bitmap_count;
 #else
-	return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count);
+	desc_count = 0;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc(sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_blocks_count);
+	}
+
+	return desc_count;
 #endif
 }
 
diff -puN fs/ext3/ialloc.c~ext3-concurrent-block-inode-allocation fs/ext3/ialloc.c
--- 25/fs/ext3/ialloc.c~ext3-concurrent-block-inode-allocation	2003-04-28 23:38:54.000000000 -0700
+++ 25-akpm/fs/ext3/ialloc.c	2003-04-28 23:38:54.000000000 -0700
@@ -131,7 +131,6 @@ void ext3_free_inode (handle_t *handle, 
 	/* Do this BEFORE marking the inode not in use or returning an error */
 	clear_inode (inode);
 
-	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
 	if (ino < EXT3_FIRST_INO(sb) || ino > le32_to_cpu(es->s_inodes_count)) {
 		ext3_error (sb, "ext3_free_inode",
@@ -150,7 +149,8 @@ void ext3_free_inode (handle_t *handle, 
 		goto error_return;
 
 	/* Ok, now we can actually update the inode bitmaps.. */
-	if (!ext3_clear_bit(bit, bitmap_bh->b_data))
+	if (!ext3_clear_bit_atomic(sb_bgl_lock(sbi, block_group),
+					bit, bitmap_bh->b_data))
 		ext3_error (sb, "ext3_free_inode",
 			      "bit already cleared for inode %lu", ino);
 	else {
@@ -160,28 +160,18 @@ void ext3_free_inode (handle_t *handle, 
 		fatal = ext3_journal_get_write_access(handle, bh2);
 		if (fatal) goto error_return;
 
-		BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get write access");
-		fatal = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
-		if (fatal) goto error_return;
-
 		if (gdp) {
+			spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
 			gdp->bg_free_inodes_count = cpu_to_le16(
 				le16_to_cpu(gdp->bg_free_inodes_count) + 1);
-			if (is_directory) {
+			if (is_directory)
 				gdp->bg_used_dirs_count = cpu_to_le16(
 				  le16_to_cpu(gdp->bg_used_dirs_count) - 1);
-				EXT3_SB(sb)->s_dir_count--;
-			}
+			spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_ialloc_lock);
 		}
 		BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
 		err = ext3_journal_dirty_metadata(handle, bh2);
 		if (!fatal) fatal = err;
-		es->s_free_inodes_count =
-			cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) + 1);
-		BUFFER_TRACE(EXT3_SB(sb)->s_sbh,
-					"call ext3_journal_dirty_metadata");
-		err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
-		if (!fatal) fatal = err;
 	}
 	BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
 	err = ext3_journal_dirty_metadata(handle, bitmap_bh);
@@ -191,7 +181,6 @@ void ext3_free_inode (handle_t *handle, 
 error_return:
 	brelse(bitmap_bh);
 	ext3_std_error(sb, fatal);
-	unlock_super(sb);
 }
 
 /*
@@ -206,9 +195,8 @@ error_return:
  */
 static int find_group_dir(struct super_block *sb, struct inode *parent)
 {
-	struct ext3_super_block * es = EXT3_SB(sb)->s_es;
 	int ngroups = EXT3_SB(sb)->s_groups_count;
-	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
+	int avefreei = ext3_count_free_inodes(sb) / ngroups;
 	struct ext3_group_desc *desc, *best_desc = NULL;
 	struct buffer_head *bh;
 	int group, best_group = -1;
@@ -264,10 +252,12 @@ static int find_group_orlov(struct super
 	struct ext3_super_block *es = sbi->s_es;
 	int ngroups = sbi->s_groups_count;
 	int inodes_per_group = EXT3_INODES_PER_GROUP(sb);
-	int avefreei = le32_to_cpu(es->s_free_inodes_count) / ngroups;
-	int avefreeb = le32_to_cpu(es->s_free_blocks_count) / ngroups;
+	int freei = ext3_count_free_inodes(sb);
+	int avefreei = freei / ngroups;
+	int freeb = ext3_count_free_blocks(sb);
+	int avefreeb = freeb / ngroups;
 	int blocks_per_dir;
-	int ndirs = sbi->s_dir_count;
+	int ndirs = ext3_count_dirs(sb);
 	int max_debt, max_dirs, min_blocks, min_inodes;
 	int group = -1, i;
 	struct ext3_group_desc *desc;
@@ -319,7 +309,7 @@ static int find_group_orlov(struct super
 		desc = ext3_get_group_desc (sb, group, &bh);
 		if (!desc || !desc->bg_free_inodes_count)
 			continue;
-		if (sbi->s_debts[group] >= max_debt)
+		if (sbi->s_bgi[group].bg_debts >= max_debt)
 			continue;
 		if (le16_to_cpu(desc->bg_used_dirs_count) >= max_dirs)
 			continue;
@@ -435,7 +425,6 @@ struct inode *ext3_new_inode(handle_t *h
 		return ERR_PTR(-ENOMEM);
 	ei = EXT3_I(inode);
 
-	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
 repeat:
 	if (S_ISDIR(mode)) {
@@ -464,11 +453,9 @@ repeat:
 		err = ext3_journal_get_write_access(handle, bitmap_bh);
 		if (err) goto fail;
 
-		if (ext3_set_bit(ino, bitmap_bh->b_data)) {
-			ext3_error (sb, "ext3_new_inode",
-				      "bit already set for inode %lu", ino);
+		if (ext3_set_bit_atomic(sb_bgl_lock(sbi, group),
+						ino, bitmap_bh->b_data)) 
 			goto repeat;
-		}
 		BUFFER_TRACE(bitmap_bh, "call ext3_journal_dirty_metadata");
 		err = ext3_journal_dirty_metadata(handle, bitmap_bh);
 		if (err) goto fail;
@@ -504,26 +491,19 @@ repeat:
 	BUFFER_TRACE(bh2, "get_write_access");
 	err = ext3_journal_get_write_access(handle, bh2);
 	if (err) goto fail;
+	spin_lock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
 	gdp->bg_free_inodes_count =
 		cpu_to_le16(le16_to_cpu(gdp->bg_free_inodes_count) - 1);
 	if (S_ISDIR(mode)) {
 		gdp->bg_used_dirs_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_used_dirs_count) + 1);
-		EXT3_SB(sb)->s_dir_count++;
 	}
+	spin_unlock(&EXT3_SB(sb)->s_bgi[group].bg_ialloc_lock);
 	BUFFER_TRACE(bh2, "call ext3_journal_dirty_metadata");
 	err = ext3_journal_dirty_metadata(handle, bh2);
 	if (err) goto fail;
 	
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "get_write_access");
-	err = ext3_journal_get_write_access(handle, EXT3_SB(sb)->s_sbh);
-	if (err) goto fail;
-	es->s_free_inodes_count =
-		cpu_to_le32(le32_to_cpu(es->s_free_inodes_count) - 1);
-	BUFFER_TRACE(EXT3_SB(sb)->s_sbh, "call ext3_journal_dirty_metadata");
-	err = ext3_journal_dirty_metadata(handle, EXT3_SB(sb)->s_sbh);
 	sb->s_dirt = 1;
-	if (err) goto fail;
 
 	inode->i_uid = current->fsuid;
 	if (test_opt (sb, GRPID))
@@ -576,7 +556,6 @@ repeat:
 
 	ei->i_state = EXT3_STATE_NEW;
 
-	unlock_super(sb);
 	ret = inode;
 	if(DQUOT_ALLOC_INODE(inode)) {
 		DQUOT_DROP(inode);
@@ -600,7 +579,6 @@ repeat:
 fail:
 	ext3_std_error(sb, err);
 out:
-	unlock_super(sb);
 	iput(inode);
 	ret = ERR_PTR(err);
 really_out:
@@ -673,12 +651,13 @@ out:
 
 unsigned long ext3_count_free_inodes (struct super_block * sb)
 {
+	unsigned long desc_count;
+	struct ext3_group_desc *gdp;
+	int i;
 #ifdef EXT3FS_DEBUG
 	struct ext3_super_block *es;
-	unsigned long desc_count, bitmap_count, x;
-	struct ext3_group_desc *gdp;
+	unsigned long bitmap_count, x;
 	struct buffer_head *bitmap_bh = NULL;
-	int i;
 
 	lock_super (sb);
 	es = EXT3_SB(sb)->s_es;
@@ -706,7 +685,14 @@ unsigned long ext3_count_free_inodes (st
 	unlock_super(sb);
 	return desc_count;
 #else
-	return le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count);
+	desc_count = 0;
+	for (i = 0; i < EXT3_SB(sb)->s_groups_count; i++) {
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp)
+			continue;
+		desc_count += le16_to_cpu(gdp->bg_free_inodes_count);
+	}
+	return desc_count;
 #endif
 }
 
diff -puN fs/ext3/super.c~ext3-concurrent-block-inode-allocation fs/ext3/super.c
--- 25/fs/ext3/super.c~ext3-concurrent-block-inode-allocation	2003-04-28 23:38:54.000000000 -0700
+++ 25-akpm/fs/ext3/super.c	2003-04-28 23:38:54.000000000 -0700
@@ -460,7 +460,7 @@ void ext3_put_super (struct super_block 
 	for (i = 0; i < sbi->s_gdb_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
-	kfree(sbi->s_debts);
+	kfree(sbi->s_bgi);
 	brelse(sbi->s_sbh);
 
 	/* Debugging code just in case the in-memory inode orphan list
@@ -901,6 +901,8 @@ static int ext3_check_descriptors (struc
 	struct ext3_sb_info *sbi = EXT3_SB(sb);
 	unsigned long block = le32_to_cpu(sbi->s_es->s_first_data_block);
 	struct ext3_group_desc * gdp = NULL;
+	unsigned long total_free;
+	unsigned int reserved = le32_to_cpu(sbi->s_es->s_r_blocks_count);
 	int desc_block = 0;
 	int i;
 
@@ -947,6 +949,43 @@ static int ext3_check_descriptors (struc
 		block += EXT3_BLOCKS_PER_GROUP(sb);
 		gdp++;
 	}
+
+	total_free = ext3_count_free_blocks(sb);
+	if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count)) {
+		printk("EXT3-fs: invalid s_free_blocks_count %u (real %lu)\n",
+				le32_to_cpu(EXT3_SB(sb)->s_es->s_free_blocks_count),
+				total_free);
+		EXT3_SB(sb)->s_es->s_free_blocks_count = cpu_to_le32(total_free);
+	}
+
+	/* distribute reserved blocks over groups -bzzz */
+	for(i = sbi->s_groups_count - 1; reserved && total_free && i >= 0; i--) {
+		int free;
+
+		gdp = ext3_get_group_desc (sb, i, NULL);
+		if (!gdp) {
+			ext3_error (sb, "ext3_check_descriptors",
+					"cant get descriptor for group %d", i);
+			return 0;
+		}
+
+		free = le16_to_cpu(gdp->bg_free_blocks_count);
+		if (free > reserved)
+			free = reserved;
+		sbi->s_bgi[i].bg_reserved = free;
+		reserved -= free;
+		total_free -= free;
+	}
+
+	total_free = ext3_count_free_inodes(sb);
+	if (total_free != le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count)) {
+		printk("EXT3-fs: invalid s_free_inodes_count %u (real %lu)\n",
+			le32_to_cpu(EXT3_SB(sb)->s_es->s_free_inodes_count),
+			total_free);
+		EXT3_SB(sb)->s_es->s_free_inodes_count = cpu_to_le32(total_free);
+	}
+
+
 	return 1;
 }
 
@@ -1307,13 +1346,17 @@ static int ext3_fill_super (struct super
 		printk (KERN_ERR "EXT3-fs: not enough memory\n");
 		goto failed_mount;
 	}
-	sbi->s_debts = kmalloc(sbi->s_groups_count * sizeof(*sbi->s_debts),
+	sbi->s_bgi = kmalloc(sbi->s_groups_count * sizeof(struct ext3_bg_info),
 			GFP_KERNEL);
-	if (!sbi->s_debts) {
-		printk ("EXT3-fs: not enough memory\n");
+	if (!sbi->s_bgi) {
+		printk("EXT3-fs: not enough memory to allocate s_bgi\n");
 		goto failed_mount2;
 	}
-	memset(sbi->s_debts, 0, sbi->s_groups_count * sizeof(*sbi->s_debts));
+	memset(sbi->s_bgi, 0,  sbi->s_groups_count * sizeof(struct ext3_bg_info));
+	for (i = 0; i < sbi->s_groups_count; i++) {
+		spin_lock_init(&sbi->s_bgi[i].bg_balloc_lock);
+		spin_lock_init(&sbi->s_bgi[i].bg_ialloc_lock);
+	}
 	for (i = 0; i < db_count; i++) {
 		block = descriptor_loc(sb, logic_sb_block, i);
 		sbi->s_group_desc[i] = sb_bread(sb, block);
@@ -1329,7 +1372,6 @@ static int ext3_fill_super (struct super
 		goto failed_mount2;
 	}
 	sbi->s_gdb_count = db_count;
-	sbi->s_dir_count = ext3_count_dirs(sb);
 	/*
 	 * set up enough so that it can read an inode
 	 */
@@ -1432,8 +1474,7 @@ static int ext3_fill_super (struct super
 failed_mount3:
 	journal_destroy(sbi->s_journal);
 failed_mount2:
-	if (sbi->s_debts)
-		kfree(sbi->s_debts);
+	kfree(sbi->s_bgi);
 	for (i = 0; i < db_count; i++)
 		brelse(sbi->s_group_desc[i]);
 	kfree(sbi->s_group_desc);
@@ -1702,6 +1743,8 @@ static void ext3_commit_super (struct su
 	if (!sbh)
 		return;
 	es->s_wtime = cpu_to_le32(get_seconds());
+	es->s_free_blocks_count = cpu_to_le32(ext3_count_free_blocks(sb));
+	es->s_free_inodes_count = cpu_to_le32(ext3_count_free_inodes(sb));
 	BUFFER_TRACE(sbh, "marking dirty");
 	mark_buffer_dirty(sbh);
 	if (sync)
diff -puN include/linux/ext3_fs.h~ext3-concurrent-block-inode-allocation include/linux/ext3_fs.h
--- 25/include/linux/ext3_fs.h~ext3-concurrent-block-inode-allocation	2003-04-28 23:38:54.000000000 -0700
+++ 25-akpm/include/linux/ext3_fs.h	2003-04-28 23:38:54.000000000 -0700
@@ -344,7 +344,9 @@ struct ext3_inode {
 #endif
 
 #define ext3_set_bit			ext2_set_bit
+#define ext3_set_bit_atomic		ext2_set_bit_atomic
 #define ext3_clear_bit			ext2_clear_bit
+#define ext3_clear_bit_atomic		ext2_clear_bit_atomic
 #define ext3_test_bit			ext2_test_bit
 #define ext3_find_first_zero_bit	ext2_find_first_zero_bit
 #define ext3_find_next_zero_bit		ext2_find_next_zero_bit
diff -puN include/linux/ext3_fs_sb.h~ext3-concurrent-block-inode-allocation include/linux/ext3_fs_sb.h
--- 25/include/linux/ext3_fs_sb.h~ext3-concurrent-block-inode-allocation	2003-04-28 23:38:54.000000000 -0700
+++ 25-akpm/include/linux/ext3_fs_sb.h	2003-04-28 23:38:54.000000000 -0700
@@ -21,6 +21,13 @@
 #include <linux/wait.h>
 #endif
 
+struct ext3_bg_info {
+	u8 bg_debts;
+	spinlock_t bg_balloc_lock;
+	spinlock_t bg_ialloc_lock;
+	unsigned long bg_reserved;
+} ____cacheline_aligned_in_smp;
+
 /*
  * third extended-fs super-block data in memory
  */
@@ -50,8 +57,7 @@ struct ext3_sb_info {
 	u32 s_next_generation;
 	u32 s_hash_seed[4];
 	int s_def_hash_version;
-	unsigned long s_dir_count;
-	u8 *s_debts;
+	struct ext3_bg_info *s_bgi;
 
 	/* Journaling */
 	struct inode * s_journal_inode;

_