From: Russell Miller <rmiller@duskglow.com>

A BUG or an oops will often leave a machine in a useless state.  There is no
way to remotely recover the machine from that state.

The patch adds a /proc/sys/kernel/panic_on_oops sysctl which, when set, will
cause the x86 kernel to call panic() at the end of the oops handler.  If the
user has also set /proc/sys/kernel/panic then a reboot will occur.

The implementation will try to sleep for a while before panicing so the oops
info has a chance of hitting the logs.

The implementation is designed so that other architectures can easily do this
in their oops handlers.



 Documentation/sysctl/kernel.txt |   12 ++++++++++++
 arch/i386/kernel/traps.c        |    9 +++++++++
 include/linux/kernel.h          |    1 +
 include/linux/sysctl.h          |    1 +
 kernel/panic.c                  |    7 +++----
 kernel/sysctl.c                 |    2 ++
 6 files changed, 28 insertions(+), 4 deletions(-)

diff -puN arch/i386/kernel/traps.c~panic-on-oops arch/i386/kernel/traps.c
--- 25/arch/i386/kernel/traps.c~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/arch/i386/kernel/traps.c	2003-04-02 21:57:44.000000000 -0800
@@ -302,6 +302,15 @@ void die(const char * str, struct pt_reg
 	show_registers(regs);
 	bust_spinlocks(0);
 	spin_unlock_irq(&die_lock);
+	if (in_interrupt())
+		panic("Fatal exception in interrupt");
+
+	if (panic_on_oops) {
+		printk(KERN_EMERG "Fatal exception: panic in 5 seconds\n");
+		set_current_state(TASK_UNINTERRUPTIBLE);
+		schedule_timeout(5 * HZ);
+		panic("Fatal exception");
+	}
 	do_exit(SIGSEGV);
 }
 
diff -puN include/linux/kernel.h~panic-on-oops include/linux/kernel.h
--- 25/include/linux/kernel.h~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/include/linux/kernel.h	2003-04-02 21:50:15.000000000 -0800
@@ -104,6 +104,7 @@ static inline void console_verbose(void)
 
 extern void bust_spinlocks(int yes);
 extern int oops_in_progress;		/* If set, an oops, panic(), BUG() or die() is in progress */
+extern int panic_on_oops;
 
 extern int tainted;
 extern const char *print_tainted(void);
diff -puN include/linux/sysctl.h~panic-on-oops include/linux/sysctl.h
--- 25/include/linux/sysctl.h~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/include/linux/sysctl.h	2003-04-02 21:50:15.000000000 -0800
@@ -130,6 +130,7 @@ enum
 	KERN_CADPID=54,		/* int: PID of the process to notify on CAD */
 	KERN_PIDMAX=55,		/* int: PID # limit */
   	KERN_CORE_PATTERN=56,	/* string: pattern for core-file names */
+	KERN_PANIC_ON_OOPS=57,  /* int: whether we will panic on an oops */
 };
 
 
diff -puN kernel/panic.c~panic-on-oops kernel/panic.c
--- 25/kernel/panic.c~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/kernel/panic.c	2003-04-02 21:50:15.000000000 -0800
@@ -20,6 +20,8 @@
 asmlinkage void sys_sync(void);	/* it's really int */
 
 int panic_timeout;
+int panic_on_oops;
+int tainted;
 
 struct notifier_block *panic_notifier_list;
 
@@ -28,7 +30,6 @@ static int __init panic_setup(char *str)
 	panic_timeout = simple_strtoul(str, NULL, 0);
 	return 1;
 }
-
 __setup("panic=", panic_setup);
 
 /**
@@ -51,7 +52,7 @@ NORET_TYPE void panic(const char * fmt, 
 
 	bust_spinlocks(1);
 	va_start(args, fmt);
-	vsprintf(buf, fmt, args);
+	vsnprintf(buf, sizeof(buf), fmt, args);
 	va_end(args);
 	printk(KERN_EMERG "Kernel panic: %s\n",buf);
 	if (in_interrupt())
@@ -123,5 +124,3 @@ const char *print_tainted()
 		snprintf(buf, sizeof(buf), "Not tainted");
 	return(buf);
 }
-
-int tainted = 0;
diff -puN kernel/sysctl.c~panic-on-oops kernel/sysctl.c
--- 25/kernel/sysctl.c~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/kernel/sysctl.c	2003-04-02 21:50:15.000000000 -0800
@@ -275,6 +275,8 @@ static ctl_table kern_table[] = {
 #endif
 	{KERN_PIDMAX, "pid_max", &pid_max, sizeof (int),
 	 0600, NULL, &proc_dointvec},
+	{KERN_PANIC_ON_OOPS,"panic_on_oops",
+	 &panic_on_oops,sizeof(int),0644,NULL,&proc_dointvec},
 	{0}
 };
 
diff -puN Documentation/sysctl/kernel.txt~panic-on-oops Documentation/sysctl/kernel.txt
--- 25/Documentation/sysctl/kernel.txt~panic-on-oops	2003-04-02 21:50:15.000000000 -0800
+++ 25-akpm/Documentation/sysctl/kernel.txt	2003-04-02 21:50:15.000000000 -0800
@@ -204,6 +204,18 @@ software watchdog, the recommended setti
 
 ==============================================================
 
+panic_on_oops:
+
+Controls the kernel's behaviour when an oops or BUG is encountered.
+
+0: try to continue operation
+
+1: delay a few seconds (to give klogd time to record the oops output) and
+   then panic.  If the `panic' sysctl is also non-zero then the machine will
+   be rebooted.
+
+==============================================================
+
 pid_max:
 
 PID allocation wrap value.  When the kenrel's next PID value

_