There are various places in which JBD is starting a commit against a
transaction without sufficient locking in place to ensure that that
transaction is still alive.

Change it so that log_start_commit() takes a transaction ID instead.  Make
the caller take a copy of that ID inside the appropriate locks.



 fs/ext3/super.c          |    9 +++--
 fs/jbd/checkpoint.c      |   12 +------
 fs/jbd/journal.c         |   75 ++++++++++++++++++++++++-----------------------
 fs/jbd/transaction.c     |   13 +++++---
 include/linux/ext3_jbd.h |   11 ------
 include/linux/jbd.h      |   11 +++---
 6 files changed, 62 insertions(+), 69 deletions(-)

diff -puN fs/ext3/super.c~jbd-670-log_start_commit-locking-fix fs/ext3/super.c
--- 25/fs/ext3/super.c~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/fs/ext3/super.c	2003-06-10 20:35:32.000000000 -0700
@@ -1811,7 +1811,7 @@ void ext3_write_super (struct super_bloc
 	if (down_trylock(&sb->s_lock) == 0)
 		BUG();
 	sb->s_dirt = 0;
-	log_start_commit(EXT3_SB(sb)->s_journal, NULL);
+	journal_start_commit(EXT3_SB(sb)->s_journal, NULL);
 }
 
 static int ext3_sync_fs(struct super_block *sb, int wait)
@@ -1819,9 +1819,10 @@ static int ext3_sync_fs(struct super_blo
 	tid_t target;
 
 	sb->s_dirt = 0;
-	target = log_start_commit(EXT3_SB(sb)->s_journal, NULL);
-	if (wait)
-		log_wait_commit(EXT3_SB(sb)->s_journal, target);
+	if (journal_start_commit(EXT3_SB(sb)->s_journal, &target)) {
+		if (wait)
+			log_wait_commit(EXT3_SB(sb)->s_journal, target);
+	}
 	return 0;
 }
 
diff -puN fs/jbd/checkpoint.c~jbd-670-log_start_commit-locking-fix fs/jbd/checkpoint.c
--- 25/fs/jbd/checkpoint.c~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/fs/jbd/checkpoint.c	2003-06-10 20:35:32.000000000 -0700
@@ -124,7 +124,6 @@ static void jbd_sync_bh(journal_t *journ
  * checkpoint.  (journal_remove_checkpoint() deletes the transaction when
  * the last checkpoint buffer is cleansed)
  *
- * Called with the journal locked.
  * Called with j_list_lock held.
  */
 static int __cleanup_transaction(journal_t *journal, transaction_t *transaction)
@@ -167,17 +166,12 @@ static int __cleanup_transaction(journal
 
 			spin_unlock(&journal->j_list_lock);
 			jbd_unlock_bh_state(bh);
-			log_start_commit(journal, t);
+			log_start_commit(journal, tid);
 			log_wait_commit(journal, tid);
 			goto out_return_1;
 		}
 
 		/*
-		 * We used to test for (jh->b_list != BUF_CLEAN) here.
-		 * But unmap_underlying_metadata() can place buffer onto
-		 * BUF_CLEAN.
-		 */
-		/*
 		 * AKPM: I think the buffer_jbddirty test is redundant - it
 		 * shouldn't have NULL b_transaction?
 		 */
@@ -322,7 +316,7 @@ int log_do_checkpoint(journal_t *journal
 		int cleanup_ret, retry = 0;
 		tid_t this_tid;
 
-		transaction = journal->j_checkpoint_transactions->t_cpprev;
+		transaction = journal->j_checkpoint_transactions->t_cpnext;
 		this_tid = transaction->t_tid;
 		jh = transaction->t_checkpoint_list;
 		last_jh = jh->b_cpprev;
@@ -359,7 +353,7 @@ int log_do_checkpoint(journal_t *journal
 		 * If someone cleaned up this transaction while we slept, we're
 		 * done
 		 */
-		if (journal->j_checkpoint_transactions->t_cpprev != transaction)
+		if (journal->j_checkpoint_transactions->t_cpnext != transaction)
 			continue;
 		/*
 		 * Maybe it's a new transaction, but it fell at the same
diff -puN fs/jbd/journal.c~jbd-670-log_start_commit-locking-fix fs/jbd/journal.c
--- 25/fs/jbd/journal.c~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/fs/jbd/journal.c	2003-06-10 20:35:32.000000000 -0700
@@ -73,7 +73,7 @@ EXPORT_SYMBOL(journal_errno);
 EXPORT_SYMBOL(journal_ack_err);
 EXPORT_SYMBOL(journal_clear_err);
 EXPORT_SYMBOL(log_wait_commit);
-EXPORT_SYMBOL(log_start_commit);
+EXPORT_SYMBOL(journal_start_commit);
 EXPORT_SYMBOL(journal_wipe);
 EXPORT_SYMBOL(journal_blocks_per_page);
 EXPORT_SYMBOL(journal_invalidatepage);
@@ -433,52 +433,55 @@ int __log_space_left(journal_t *journal)
 }
 
 /*
- * Called under j_state_lock.
+ * Called under j_state_lock.  Returns true if a transaction was started.
  */
-tid_t __log_start_commit(journal_t *journal, transaction_t *transaction)
+int __log_start_commit(journal_t *journal, tid_t target)
 {
-	tid_t target = journal->j_commit_request;
-
-	/*
-	 * A NULL transaction asks us to commit the currently running
-	 * transaction, if there is one.  
-	 */
-	if (transaction)
-		target = transaction->t_tid;
-	else {
-		transaction = journal->j_running_transaction;
-		if (!transaction)
-			goto out;
-		target = transaction->t_tid;
-	}
-		
 	/*
 	 * Are we already doing a recent enough commit?
 	 */
-	if (tid_geq(journal->j_commit_request, target))
-		goto out;
+	if (!tid_geq(journal->j_commit_request, target)) {
+		/*
+		 * We want a new commit: OK, mark the request and wakup the
+		 * commit thread.  We do _not_ do the commit ourselves.
+		 */
 
-	/*
-	 * We want a new commit: OK, mark the request and wakup the
-	 * commit thread.  We do _not_ do the commit ourselves.
-	 */
+		journal->j_commit_request = target;
+		jbd_debug(1, "JBD: requesting commit %d/%d\n",
+			  journal->j_commit_request,
+			  journal->j_commit_sequence);
+		wake_up(&journal->j_wait_commit);
+		return 1;
+	}
+	return 0;
+}
 
-	journal->j_commit_request = target;
-	jbd_debug(1, "JBD: requesting commit %d/%d\n",
-		  journal->j_commit_request,
-		  journal->j_commit_sequence);
-	wake_up(&journal->j_wait_commit);
+int log_start_commit(journal_t *journal, tid_t tid)
+{
+	int ret;
 
-out:
-	return target;
+	spin_lock(&journal->j_state_lock);
+	ret = __log_start_commit(journal, tid);
+	spin_unlock(&journal->j_state_lock);
+	return ret;
 }
 
-tid_t log_start_commit(journal_t *journal, transaction_t *transaction)
+/*
+ * Start a commit of the current running transaction (if any).  Returns true
+ * if a transaction was started, and fills its tid in at *ptid
+ */
+int journal_start_commit(journal_t *journal, tid_t *ptid)
 {
-	tid_t ret;
+	int ret = 0;
 
 	spin_lock(&journal->j_state_lock);
-	ret = __log_start_commit(journal, transaction);
+	if (journal->j_running_transaction) {
+		tid_t tid = journal->j_running_transaction->t_tid;
+
+		ret = __log_start_commit(journal, tid);
+		if (ret && ptid)
+			*ptid = tid;
+	}
 	spin_unlock(&journal->j_state_lock);
 	return ret;
 }
@@ -1243,7 +1246,7 @@ int journal_flush(journal_t *journal)
 	/* Force everything buffered to the log... */
 	if (journal->j_running_transaction) {
 		transaction = journal->j_running_transaction;
-		__log_start_commit(journal, transaction);
+		__log_start_commit(journal, transaction->t_tid);
 	} else if (journal->j_committing_transaction)
 		transaction = journal->j_committing_transaction;
 
@@ -1374,7 +1377,7 @@ void __journal_abort_hard(journal_t *jou
 	journal->j_flags |= JFS_ABORT;
 	transaction = journal->j_running_transaction;
 	if (transaction)
-		__log_start_commit(journal, transaction);
+		__log_start_commit(journal, transaction->t_tid);
 	spin_unlock(&journal->j_state_lock);
 }
 
diff -puN fs/jbd/transaction.c~jbd-670-log_start_commit-locking-fix fs/jbd/transaction.c
--- 25/fs/jbd/transaction.c~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/fs/jbd/transaction.c	2003-06-10 20:35:32.000000000 -0700
@@ -173,7 +173,7 @@ repeat:
 		spin_unlock(&transaction->t_handle_lock);
 		prepare_to_wait(&journal->j_wait_transaction_locked, &wait,
 				TASK_UNINTERRUPTIBLE);
-		__log_start_commit(journal, transaction);
+		__log_start_commit(journal, transaction->t_tid);
 		spin_unlock(&journal->j_state_lock);
 		schedule();
 		finish_wait(&journal->j_wait_transaction_locked, &wait);
@@ -396,6 +396,7 @@ int journal_restart(handle_t *handle, in
 	J_ASSERT(transaction->t_updates > 0);
 	J_ASSERT(journal_current_handle() == handle);
 
+	spin_lock(&journal->j_state_lock);
 	spin_lock(&transaction->t_handle_lock);
 	transaction->t_outstanding_credits -= handle->h_buffer_credits;
 	transaction->t_updates--;
@@ -405,7 +406,8 @@ int journal_restart(handle_t *handle, in
 	spin_unlock(&transaction->t_handle_lock);
 
 	jbd_debug(2, "restarting handle %p\n", handle);
-	log_start_commit(journal, transaction);
+	__log_start_commit(journal, transaction->t_tid);
+	spin_unlock(&journal->j_state_lock);
 
 	handle->h_buffer_credits = nblocks;
 	ret = start_this_handle(journal, handle);
@@ -1382,6 +1384,7 @@ int journal_stop(handle_t *handle)
 	}
 
 	current->journal_info = NULL;
+	spin_lock(&journal->j_state_lock);
 	spin_lock(&transaction->t_handle_lock);
 	transaction->t_outstanding_credits -= handle->h_buffer_credits;
 	transaction->t_updates--;
@@ -1415,8 +1418,9 @@ int journal_stop(handle_t *handle)
 		jbd_debug(2, "transaction too old, requesting commit for "
 					"handle %p\n", handle);
 		/* This is non-blocking */
-		log_start_commit(journal, transaction);
-		
+		__log_start_commit(journal, transaction->t_tid);
+		spin_unlock(&journal->j_state_lock);
+
 		/*
 		 * Special case: JFS_SYNC synchronous updates require us
 		 * to wait for the commit to complete.  
@@ -1425,6 +1429,7 @@ int journal_stop(handle_t *handle)
 			err = log_wait_commit(journal, tid);
 	} else {
 		spin_unlock(&transaction->t_handle_lock);
+		spin_unlock(&journal->j_state_lock);
 	}
 
 	jbd_free_handle(handle);
diff -puN include/linux/ext3_jbd.h~jbd-670-log_start_commit-locking-fix include/linux/ext3_jbd.h
--- 25/include/linux/ext3_jbd.h~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/include/linux/ext3_jbd.h	2003-06-10 20:35:32.000000000 -0700
@@ -184,17 +184,6 @@ static inline handle_t *ext3_journal_cur
 	return journal_current_handle();
 }
 
-static inline void
-ext3_log_start_commit(journal_t *journal, transaction_t *transaction)
-{
-	log_start_commit(journal, transaction);
-}
-
-static inline void ext3_log_wait_commit(journal_t *journal, tid_t tid)
-{
-	log_wait_commit(journal, tid);
-}
-
 static inline int ext3_journal_extend(handle_t *handle, int nblocks)
 {
 	return journal_extend(handle, nblocks);
diff -puN include/linux/jbd.h~jbd-670-log_start_commit-locking-fix include/linux/jbd.h
--- 25/include/linux/jbd.h~jbd-670-log_start_commit-locking-fix	2003-06-10 20:35:32.000000000 -0700
+++ 25-akpm/include/linux/jbd.h	2003-06-10 20:35:32.000000000 -0700
@@ -988,12 +988,13 @@ extern void	journal_switch_revoke_table(
  */
 
 int __log_space_left(journal_t *); /* Called with journal locked */
-extern tid_t	log_start_commit (journal_t *, transaction_t *);
-extern tid_t	__log_start_commit(journal_t *, transaction_t *);
-extern int	log_wait_commit (journal_t *, tid_t);
-extern int	log_do_checkpoint (journal_t *, int);
+int log_start_commit(journal_t *journal, tid_t tid);
+int __log_start_commit(journal_t *journal, tid_t tid);
+int journal_start_commit(journal_t *journal, tid_t *tid);
+int log_wait_commit(journal_t *journal, tid_t tid);
+int log_do_checkpoint(journal_t *journal, int nblocks);
 
-void __log_wait_for_space(journal_t *, int nblocks);
+void __log_wait_for_space(journal_t *journal, int nblocks);
 extern void	__journal_drop_transaction(journal_t *, transaction_t *);
 extern int	cleanup_journal_tail(journal_t *);
 

_