commit 6aceb53be44ed55a2374c20a62e3aef9d3919e8d Author: Vincent Li Date: Mon Dec 14 17:58:49 2009 -0800 mm/vmscan: change comment generic_file_write to __generic_file_aio_write Commit 543ade1fc9 ("Streamline generic_file_* interfaces and filemap cleanups") removed generic_file_write() in filemap. Change the comment in vmscan pageout() to __generic_file_aio_write(). Signed-off-by: Vincent Li Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit cba8aafe1e07dfc8bae5ba78be8e02883bd34d31 Author: Vincent Li Date: Tue Sep 22 16:45:38 2009 -0700 fs/proc/base.c: fix proc_fault_inject_write() input sanity check Remove obfuscated zero-length input check and return -EINVAL instead of -EIO error to make the error message clear to user. Add whitespace stripping. No functionality changes. The old code: echo 1 > /proc/pid/make-it-fail (ok) echo 1foo > /proc/pid/make-it-fail (-bash: echo: write error: Input/output error) The new code: echo 1 > /proc/pid/make-it-fail (ok) echo 1foo > /proc/pid/make-it-fail (-bash: echo: write error: Invalid argument) This patch is conservative in changes to not breaking existing scripts/applications. Signed-off-by: Vincent Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit fb92a4b068be96799da3748c11cbd69760e44d7b Author: Vincent Li Date: Tue Sep 22 16:45:36 2009 -0700 fs/proc/task_mmu.c v1: fix clear_refs_write() input sanity check Andrew Morton pointed out similar string hacking and obfuscated check for zero-length input at the end of the function, David Rientjes suggested to use strict_strtol to replace simple_strtol, this patch cover above suggestions, add removing of leading and trailing whitespace from user input. It does not change function behavious. Signed-off-by: Vincent Li Acked-by: David Rientjes Cc: Matt Mackall Cc: Amerigo Wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit f168e1b6390e2d79cf57e48e6ae6d9b0a9e2851a Author: Vincent Li Date: Mon Sep 21 17:03:12 2009 -0700 mm/vmscan: remove page_queue_congested() comment Commit 084f71ae5c(kill page_queue_congested()) removed page_queue_congested(). Remove the page_queue_congested() comment in vmscan pageout() too. Signed-off-by: Vincent Li Reviewed-by: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds commit 0b21767637c3c99890a248fe47ac414e51cf5eb7 Author: Vincent Li Date: Mon Sep 21 17:03:09 2009 -0700 mm/vmscan: rename zone_nr_pages() to zone_nr_lru_pages() The name `zone_nr_pages' can be mis-read as zone's (total) number pages, but it actually returns zone's LRU list number pages. Signed-off-by: Vincent Li Cc: KOSAKI Motohiro Cc: Rik van Riel Cc: KAMEZAWA Hiroyuki Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds List: linux-kernel Subject: [PATCH] Convert software watchdog softdog to use basic watchdog core framework From: Vincent Li Date: 2011-08-14 20:25:03 Message-ID: 1313353503-26094-1-git-send-email-vincent.mc.li () gmail ! com [Download message RAW] Convert software watchdog softdog to use basic watchdog core framework to reduce dulicate code according to commit 43316044d watchdog: WatchDog Timer Driver Core - Add basic framework Compiled and tested against v3.1.0-rc1. Signed-off-by: Vincent Li --- drivers/watchdog/Kconfig | 1 + drivers/watchdog/softdog.c | 154 ++++++++++--------------------------------- 2 files changed, 37 insertions(+), 118 deletions(-) diff --git a/drivers/watchdog/Kconfig b/drivers/watchdog/Kconfig index 86b0735..b563c1d 100644 --- a/drivers/watchdog/Kconfig +++ b/drivers/watchdog/Kconfig @@ -55,6 +55,7 @@ comment "Watchdog Device Drivers" config SOFT_WATCHDOG tristate "Software watchdog" + select WATCHDOG_CORE help A software monitoring watchdog. This will fail to reboot your system from some situations that the hardware watchdog will recover diff --git a/drivers/watchdog/softdog.c b/drivers/watchdog/softdog.c index bf16ffb..0fb793a 100644 --- a/drivers/watchdog/softdog.c +++ b/drivers/watchdog/softdog.c @@ -42,12 +42,10 @@ #include #include #include -#include #include #include #include #include -#include #include #define PFX "SoftDog: " @@ -90,7 +88,6 @@ static void watchdog_fire(unsigned long); static struct timer_list watchdog_ticktock = TIMER_INITIALIZER(watchdog_fire, 0, 0); static unsigned long driver_open, orphan_timer; -static char expect_close; /* @@ -118,32 +115,28 @@ static void watchdog_fire(unsigned long data) * Softdog operations */ -static int softdog_keepalive(void) +static int softdog_keepalive(struct watchdog_device *wdd) { mod_timer(&watchdog_ticktock, jiffies+(soft_margin*HZ)); return 0; } -static int softdog_stop(void) +static int softdog_stop(struct watchdog_device *wdd) { del_timer(&watchdog_ticktock); return 0; } -static int softdog_set_heartbeat(int t) +static int softdog_set_heartbeat(struct watchdog_device *wdd, unsigned int t) { if ((t < 0x0001) || (t > 0xFFFF)) return -EINVAL; - soft_margin = t; + wdd->timeout = t; return 0; } -/* - * /dev/watchdog handling - */ - -static int softdog_open(struct inode *inode, struct file *file) +static int softdog_start(struct watchdog_device *wdd) { if (test_and_set_bit(0, &driver_open)) return -EBUSY; @@ -152,92 +145,35 @@ static int softdog_open(struct inode *inode, struct file *file) /* * Activate timer */ - softdog_keepalive(); - return nonseekable_open(inode, file); -} - -static int softdog_release(struct inode *inode, struct file *file) -{ - /* - * Shut off the timer. - * Lock it in if it's a module and we set nowayout - */ - if (expect_close == 42) { - softdog_stop(); - module_put(THIS_MODULE); - } else { - printk(KERN_CRIT PFX - "Unexpected close, not stopping watchdog!\n"); - set_bit(0, &orphan_timer); - softdog_keepalive(); - } - clear_bit(0, &driver_open); - expect_close = 0; + softdog_keepalive(wdd); return 0; } -static ssize_t softdog_write(struct file *file, const char __user *data, - size_t len, loff_t *ppos) -{ - /* - * Refresh the timer. - */ - if (len) { - if (!nowayout) { - size_t i; - - /* In case it was set long ago */ - expect_close = 0; - - for (i = 0; i != len; i++) { - char c; - - if (get_user(c, data + i)) - return -EFAULT; - if (c == 'V') - expect_close = 42; - } - } - softdog_keepalive(); - } - return len; -} +/* + * Kernel Interfaces + */ -static long softdog_ioctl(struct file *file, unsigned int cmd, - unsigned long arg) -{ - void __user *argp = (void __user *)arg; - int __user *p = argp; - int new_margin; - static const struct watchdog_info ident = { - .options = WDIOF_SETTIMEOUT | - WDIOF_KEEPALIVEPING | - WDIOF_MAGICCLOSE, - .firmware_version = 0, - .identity = "Software Watchdog", - }; - switch (cmd) { - case WDIOC_GETSUPPORT: - return copy_to_user(argp, &ident, sizeof(ident)) ? -EFAULT : 0; - case WDIOC_GETSTATUS: - case WDIOC_GETBOOTSTATUS: - return put_user(0, p); - case WDIOC_KEEPALIVE: - softdog_keepalive(); - return 0; - case WDIOC_SETTIMEOUT: - if (get_user(new_margin, p)) - return -EFAULT; - if (softdog_set_heartbeat(new_margin)) - return -EINVAL; - softdog_keepalive(); - /* Fall */ - case WDIOC_GETTIMEOUT: - return put_user(soft_margin, p); - default: - return -ENOTTY; - } -} +static const struct watchdog_ops softdog_ops = { + .owner = THIS_MODULE, + .start = softdog_start, + .stop = softdog_stop, + .ping = softdog_keepalive, + .set_timeout = softdog_set_heartbeat, +}; + +static const struct watchdog_info softdog_ident = { + .options = WDIOF_MAGICCLOSE | + WDIOF_KEEPALIVEPING | + WDIOF_SETTIMEOUT, + .identity = "Softdog Watchdog", +}; + +static struct watchdog_device softdog_wdd = { + .info = &softdog_ident, + .ops = &softdog_ops, + .min_timeout = 0x0001, + .max_timeout = 0xFFFF, +}; /* * Notifier for system down @@ -248,29 +184,10 @@ static int softdog_notify_sys(struct notifier_block *this, unsigned long code, { if (code == SYS_DOWN || code == SYS_HALT) /* Turn the WDT off */ - softdog_stop(); + softdog_stop(&softdog_wdd); return NOTIFY_DONE; } -/* - * Kernel Interfaces - */ - -static const struct file_operations softdog_fops = { - .owner = THIS_MODULE, - .llseek = no_llseek, - .write = softdog_write, - .unlocked_ioctl = softdog_ioctl, - .open = softdog_open, - .release = softdog_release, -}; - -static struct miscdevice softdog_miscdev = { - .minor = WATCHDOG_MINOR, - .name = "watchdog", - .fops = &softdog_fops, -}; - static struct notifier_block softdog_notifier = { .notifier_call = softdog_notify_sys, }; @@ -285,8 +202,8 @@ static int __init watchdog_init(void) /* Check that the soft_margin value is within it's range; if not reset to the default */ - if (softdog_set_heartbeat(soft_margin)) { - softdog_set_heartbeat(TIMER_MARGIN); + if (softdog_set_heartbeat(&softdog_wdd, soft_margin)) { + softdog_set_heartbeat(&softdog_wdd, TIMER_MARGIN); printk(KERN_INFO PFX "soft_margin must be 0 < soft_margin < 65536, using %d\n", TIMER_MARGIN); @@ -299,7 +216,7 @@ static int __init watchdog_init(void) return ret; } - ret = misc_register(&softdog_miscdev); + ret = watchdog_register_device(&softdog_wdd); if (ret) { printk(KERN_ERR PFX "cannot register miscdev on minor=%d (err=%d)\n", @@ -315,7 +232,8 @@ static int __init watchdog_init(void) static void __exit watchdog_exit(void) { - misc_deregister(&softdog_miscdev); + softdog_stop(&softdog_wdd); + watchdog_unregister_device(&softdog_wdd); unregister_reboot_notifier(&softdog_notifier); } -- 1.7.0.4 From 5631d69b3fb37ac6740dd5b82f964d31c0133a17 Mon Sep 17 00:00:00 2001 From: Vincent Li Date: Sun, 11 Dec 2011 14:38:50 -0800 Subject: [PATCH] Softlockup (out of cpu) killer In kernel, there is out of memory (OOM) killer, why not make an out of cpu (OOC) killer? I tested following patch by running an user-space cpu hogging process and the softlockukp detector killed the process successfully. Softlockup could be caused by user-space process hogging cpu, add softlockup_kill kernel config to allow kernel to kill the user space cpu hogging process. this feature is useful for high availability systems that have uptime gurantees and where a softlockup must be resolved ASAP echo 1 > /proc/sys/kernel/softlockukp_kill to enable cpu hog process killer echo 0 > /proc/sys/kernel/softlockup_kill to disable cpu hog process killer Signed-off-by: Vincent Li --- Documentation/kernel-parameters.txt | 4 ++++ include/linux/sched.h | 1 + kernel/sysctl.c | 9 +++++++++ kernel/watchdog.c | 18 ++++++++++++++++++ lib/Kconfig.debug | 21 +++++++++++++++++++++ 5 files changed, 53 insertions(+), 0 deletions(-) diff --git a/Documentation/kernel-parameters.txt b/Documentation/kernel-parameters.txt index 81c287f..1609387 100644 --- a/Documentation/kernel-parameters.txt +++ b/Documentation/kernel-parameters.txt @@ -2418,6 +2418,10 @@ bytes respectively. Such letter suffixes can also be entirely omitted. [KNL] Should the soft-lockup detector generate panics. Format: + softlockup_panic= + [KNL] Should the soft-lockup detector kill cpu hog process. + Format: + sonypi.*= [HW] Sony Programmable I/O Control Device driver See Documentation/laptops/sonypi.txt diff --git a/include/linux/sched.h b/include/linux/sched.h index 1c4f3e9..4783fac 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -315,6 +315,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; +extern unsigned int softlockup_kill; void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) diff --git a/kernel/sysctl.c b/kernel/sysctl.c index ae27196..e79ea9c 100644 --- a/kernel/sysctl.c +++ b/kernel/sysctl.c @@ -770,6 +770,15 @@ static struct ctl_table kern_table[] = { .extra2 = &one, }, { + .procname = "softlockup_kill", + .data = &softlockup_kill, + .maxlen = sizeof(int), + .mode = 0644, + .proc_handler = proc_dointvec_minmax, + .extra1 = &zero, + .extra2 = &one, + }, + { .procname = "nmi_watchdog", .data = &watchdog_enabled, .maxlen = sizeof (int), diff --git a/kernel/watchdog.c b/kernel/watchdog.c index 1d7bca7..5832a90 100644 --- a/kernel/watchdog.c +++ b/kernel/watchdog.c @@ -75,6 +75,17 @@ static int __init softlockup_panic_setup(char *str) } __setup("softlockup_panic=", softlockup_panic_setup); +unsigned int __read_mostly softlockup_kill = + CONFIG_BOOTPARAM_SOFTLOCKUP_KILL_VALUE; + +static int __init softlockup_kill_setup(char *str) +{ + softlockup_kill = simple_strtoul(str, NULL, 0); + + return 1; +} +__setup("softlockup_kill=", softlockup_kill_setup); + static int __init nowatchdog_setup(char *str) { watchdog_enabled = 0; @@ -306,6 +317,13 @@ static enum hrtimer_restart watchdog_timer_fn(struct hrtimer *hrtimer) else dump_stack(); + if (softlockup_kill) { + printk(KERN_ERR "Kill softlockup process [%s:%d] on CPU#%d\n", + current->comm, task_pid_nr(current), + smp_processor_id()); + force_sig(SIGKILL, current); + } + if (softlockup_panic) panic("softlockup: hung tasks"); __this_cpu_write(soft_watchdog_warn, true); diff --git a/lib/Kconfig.debug b/lib/Kconfig.debug index 82928f5..e4afc98 100644 --- a/lib/Kconfig.debug +++ b/lib/Kconfig.debug @@ -224,6 +224,27 @@ config BOOTPARAM_SOFTLOCKUP_PANIC_VALUE default 0 if !BOOTPARAM_SOFTLOCKUP_PANIC default 1 if BOOTPARAM_SOFTLOCKUP_PANIC +config BOOTPARAM_SOFTLOCKUP_KILL + bool "Kill (cpu hog process) On Soft Lockups" + depends on LOCKUP_DETECTOR + help + Say Y here to enable the kernel to kill cpu hog process on + "soft lockups", which are bugs that cause the kernel to + loop in kernel mode for more than 60 seconds, without giving + other tasks a chance to run. + + This feature is useful for high-availability systems that + have uptime guarantees and where a lockup must be resolved ASAP. + + Say N if unsure. + +config BOOTPARAM_SOFTLOCKUP_KILL_VALUE + int + depends on LOCKUP_DETECTOR + range 0 1 + default 0 if !BOOTPARAM_SOFTLOCKUP_KILL + default 1 if BOOTPARAM_SOFTLOCKUP_KILL + config DETECT_HUNG_TASK bool "Detect Hung Tasks" depends on DEBUG_KERNEL -- 1.7.0.4