fs/ext3/balloc.c         |  123 +++++++++++++++++++++++++++++++++--------------
 fs/jbd/transaction.c     |    2 
 include/linux/ext3_jbd.h |    6 ++
 include/linux/jbd.h      |    0 
 4 files changed, 93 insertions(+), 38 deletions(-)

diff -puN fs/ext3/balloc.c~ext3-concurrent-block-allocation-fix-1 fs/ext3/balloc.c
--- 25/fs/ext3/balloc.c~ext3-concurrent-block-allocation-fix-1	2003-03-24 23:05:11.000000000 -0800
+++ 25-akpm/fs/ext3/balloc.c	2003-03-25 01:19:16.000000000 -0800
@@ -21,6 +21,11 @@
 #include <linux/buffer_head.h>
 
 /*
+ * Helper to get at the spinlock corresponding with this blockgroup
+ */
+#define bg_lock(sb, group) (&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock)
+
+/*
  * balloc.c contains the blocks allocation and deallocation routines
  */
 
@@ -201,16 +206,6 @@ do_more:
 			}
 		}
 #endif
-		BUFFER_TRACE(bitmap_bh, "clear bit");
-		if (!ext3_clear_bit_atomic (&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock,
-						bit + i, bitmap_bh->b_data)) {
-			ext3_error (sb, __FUNCTION__,
-				      "bit already cleared for block %lu", 
-				      block + i);
-			BUFFER_TRACE(bitmap_bh, "bit already cleared");
-		} else 
-			dquot_freed_blocks++;
-
 		/* @@@ This prevents newly-allocated data from being
 		 * freed and then reallocated within the same
 		 * transaction. 
@@ -229,17 +224,34 @@ do_more:
 		 * activity on the buffer any more and so it is safe to
 		 * reallocate it.  
 		 */
-		BUFFER_TRACE(bitmap_bh, "clear in b_committed_data");
+		BUFFER_TRACE(bitmap_bh, "set in b_committed_data");
 		J_ASSERT_BH(bitmap_bh,
 				bh2jh(bitmap_bh)->b_committed_data != NULL);
-		ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock,
-				bit + i, bh2jh(bitmap_bh)->b_committed_data);
+		ext3_set_bit_atomic(bg_lock(sb, block_group), bit + i,
+				bh2jh(bitmap_bh)->b_committed_data);
+
+		/*
+		 * We clear the bit in the bitmap after setting the committed
+		 * data bit, because this is the reverse order to that which
+		 * the allocator uses.
+		 */
+		BUFFER_TRACE(bitmap_bh, "clear bit");
+		if (!ext3_clear_bit_atomic(bg_lock(sb, block_group),
+						bit + i, bitmap_bh->b_data)) {
+			ext3_error (sb, __FUNCTION__,
+				      "bit already cleared for block %lu", 
+				      block + i);
+			BUFFER_TRACE(bitmap_bh, "bit already cleared");
+		} else {
+			dquot_freed_blocks++;
+		}
 	}
 
-	spin_lock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+	spin_lock(bg_lock(sb, block_group));
 	gdp->bg_free_blocks_count =
-		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) + dquot_freed_blocks);
-	spin_unlock(&EXT3_SB(sb)->s_bgi[block_group].bg_balloc_lock);
+		cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) +
+			dquot_freed_blocks);
+	spin_unlock(bg_lock(sb, block_group));
 
 	/* We dirtied the bitmap block */
 	BUFFER_TRACE(bitmap_bh, "dirtied bitmap block");
@@ -358,11 +370,37 @@ static int find_next_usable_block(int st
 	return -1;
 }
 
+/*
+ * We think we can allocate this block in this bitmap.  Try to set the bit.
+ * If that succeeds then check that nobody has allocated and then freed the
+ * block since we saw that is was not marked in b_committed_data.  If it _was_
+ * allocated and freed then clear the bit in the bitmap again and return
+ * zero (failure).
+ */
+static inline int
+claim_block(spinlock_t *lock, int block, struct buffer_head *bh)
+{
+	if (ext3_set_bit_atomic(lock, block, bh->b_data))
+		return 0;
+	if (buffer_jbd(bh) && bh2jh(bh)->b_committed_data &&
+			ext3_test_bit(block, bh2jh(bh)->b_committed_data)) {
+		ext3_clear_bit_atomic(lock, block, bh->b_data);
+		return 0;
+	}
+	return 1;
+}
+
+/*
+ * If we failed to allocate the desired block then we may end up crossing to a
+ * new bitmap.  In that case we must release write access to the old one via
+ * ext3_journal_release_buffer(), else we'll run out of credits.
+ */
 static int
 ext3_try_to_allocate(struct super_block *sb, handle_t *handle, int group,
 		struct buffer_head *bitmap_bh, int goal, int *errp)
 {
 	int i, fatal = 0;
+	int have_access = 0;
 
 	*errp = 0;
 
@@ -372,31 +410,38 @@ ext3_try_to_allocate(struct super_block 
 repeat:
 	goal = find_next_usable_block(goal, bitmap_bh,
 				EXT3_BLOCKS_PER_GROUP(sb));
-	if (goal < 0) 
-		return -1;
+	if (goal < 0)
+		goto fail;
 
 	for (i = 0;
 		i < 7 && goal > 0 && ext3_test_allocatable(goal - 1, bitmap_bh);
 		i++, goal--);
 
 got:
-	/* Make sure we use undo access for the bitmap, because it is
-	 * critical that we do the frozen_data COW on bitmap buffers in
-	 * all cases even if the buffer is in BJ_Forget state in the
-	 * committing transaction.  */
-	BUFFER_TRACE(bitmap_bh, "get undo access for marking new block");
-	fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
-	if (fatal) {
-		*errp = fatal;
-		return -1;
+	if (!have_access) {
+		/*
+		 * Make sure we use undo access for the bitmap, because it is
+	 	 * critical that we do the frozen_data COW on bitmap buffers in
+	 	 * all cases even if the buffer is in BJ_Forget state in the
+	 	 * committing transaction.
+		 */
+		BUFFER_TRACE(bitmap_bh, "get undo access for new block");
+		fatal = ext3_journal_get_undo_access(handle, bitmap_bh);
+		if (fatal) {
+			*errp = fatal;
+			goto fail;
+		}
+		have_access = 1;
 	}
 
-	if (ext3_set_bit_atomic(&EXT3_SB(sb)->s_bgi[group].bg_balloc_lock,
-				goal, bitmap_bh->b_data)) {
-		/* already allocated by concurrent thread -bzzz */
+	if (!claim_block(bg_lock(sb, group), goal, bitmap_bh)) {
+		/*
+		 * The block was allocated by another thread, or it was
+		 * allocated and then freed by another thread
+		 */
 		goal++;
 		if (goal >= EXT3_BLOCKS_PER_GROUP(sb))
-			return -1;
+			goto fail;
 		goto repeat;
 	}
 
@@ -404,10 +449,16 @@ got:
 	fatal = ext3_journal_dirty_metadata(handle, bitmap_bh);
 	if (fatal) {
 		*errp = fatal;
-		return -1;
+		goto fail;
 	}
 
 	return goal;
+fail:
+	if (have_access) {
+		BUFFER_TRACE(bitmap_bh, "journal_release_buffer");
+		ext3_journal_release_buffer(handle, bitmap_bh);
+	}
+	return -1;
 }
 
 
@@ -566,13 +617,13 @@ allocated:
 		}
 	}
 #endif
-	spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
+	spin_lock(bg_lock(sb, group_no));
 	if (buffer_jbd(bitmap_bh) && bh2jh(bitmap_bh)->b_committed_data)
 		J_ASSERT_BH(bitmap_bh,
 			!ext3_test_bit(ret_block,
 					bh2jh(bitmap_bh)->b_committed_data));
 	ext3_debug("found bit %d\n", ret_block);
-	spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
+	spin_unlock(bg_lock(sb, group_no));
 
 	/* ret_block was blockgroup-relative.  Now it becomes fs-relative */
 	ret_block = target_block;
@@ -593,10 +644,10 @@ allocated:
 	ext3_debug("allocating block %d. Goal hits %d of %d.\n",
 			ret_block, goal_hits, goal_attempts);
 
-	spin_lock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
+	spin_lock(bg_lock(sb, group_no));
 	gdp->bg_free_blocks_count =
 			cpu_to_le16(le16_to_cpu(gdp->bg_free_blocks_count) - 1);
-	spin_unlock(&EXT3_SB(sb)->s_bgi[group_no].bg_balloc_lock);
+	spin_unlock(bg_lock(sb, group_no));
 
 	BUFFER_TRACE(gdp_bh, "journal_dirty_metadata for group descriptor");
 	err = ext3_journal_dirty_metadata(handle, gdp_bh);
diff -puN fs/jbd/transaction.c~ext3-concurrent-block-allocation-fix-1 fs/jbd/transaction.c
--- 25/fs/jbd/transaction.c~ext3-concurrent-block-allocation-fix-1	2003-03-24 23:05:49.000000000 -0800
+++ 25-akpm/fs/jbd/transaction.c	2003-03-24 23:06:16.000000000 -0800
@@ -1121,7 +1121,6 @@ out:
 	return 0;
 }
 
-#if 0
 /* 
  * journal_release_buffer: undo a get_write_access without any buffer
  * updates, if the update decided in the end that it didn't need access.
@@ -1155,7 +1154,6 @@ void journal_release_buffer (handle_t *h
 	JBUFFER_TRACE(jh, "exit");
 	unlock_journal(journal);
 }
-#endif
 
 /** 
  * void journal_forget() - bforget() for potentially-journaled buffers.
diff -puN include/linux/jbd.h~ext3-concurrent-block-allocation-fix-1 include/linux/jbd.h
diff -puN include/linux/ext3_jbd.h~ext3-concurrent-block-allocation-fix-1 include/linux/ext3_jbd.h
--- 25/include/linux/ext3_jbd.h~ext3-concurrent-block-allocation-fix-1	2003-03-24 23:07:14.000000000 -0800
+++ 25-akpm/include/linux/ext3_jbd.h	2003-03-24 23:08:14.000000000 -0800
@@ -117,6 +117,12 @@ __ext3_journal_get_write_access(const ch
 }
 
 static inline void
+ext3_journal_release_buffer(handle_t *handle, struct buffer_head *bh)
+{
+	journal_release_buffer(handle, bh);
+}
+
+static inline void
 ext3_journal_forget(handle_t *handle, struct buffer_head *bh)
 {
 	journal_forget(handle, bh);

_