Prev: [RESEND PATCH] module param_call: fix potential NULL pointer dereference
Next: [ANNOUNCE] util-linux-ng v2.17.1
From: Ingo Molnar on 22 Feb 2010 05:40 * Russ Anderson <rja(a)sgi.com> wrote: > Enable NMI on all cpus in UV system and add an NMI handler > to dump_stack on each cpu. > > Signed-off-by: Russ Anderson <rja(a)sgi.com> > > --- > > By default on x86 all the cpus except the boot cpu have NMI > masked off. This patch enables NMI on all cpus in UV system > and adds an NMI handler to dump_stack on each cpu. This > way if a system hangs we can NMI the machine and get a > backtrace from all the cpus. > > > arch/x86/include/asm/uv/uv.h | 1 > arch/x86/kernel/apic/x2apic_uv_x.c | 49 +++++++++++++++++++++++++++++++++++++ > arch/x86/kernel/smpboot.c | 2 + > 3 files changed, 52 insertions(+) > > Index: linux/arch/x86/kernel/apic/x2apic_uv_x.c > =================================================================== > --- linux.orig/arch/x86/kernel/apic/x2apic_uv_x.c 2010-02-17 10:21:55.000000000 -0600 > +++ linux/arch/x86/kernel/apic/x2apic_uv_x.c 2010-02-17 10:32:20.000000000 -0600 > @@ -20,6 +20,7 @@ > #include <linux/cpu.h> > #include <linux/init.h> > #include <linux/io.h> > +#include <linux/kdebug.h> > > #include <asm/uv/uv_mmrs.h> > #include <asm/uv/uv_hub.h> > @@ -39,6 +40,53 @@ static u64 gru_start_paddr, gru_end_padd > int uv_min_hub_revision_id; > EXPORT_SYMBOL_GPL(uv_min_hub_revision_id); > > +int uv_handle_nmi(struct notifier_block *self, > + unsigned long reason, void *data) > +{ > + unsigned long flags; > + static DEFINE_SPINLOCK(uv_nmi_lock); > + > + if (reason != DIE_NMI_IPI) > + return NOTIFY_OK; > + /* > + * Use a lock so only one cpu prints at a time > + * to prevent intermixed output. > + */ > + spin_lock_irqsave(&uv_nmi_lock, flags); > + printk(KERN_INFO "NMI stack dump cpu %u:\n", > + smp_processor_id()); > + dump_stack(); > + spin_unlock_irqrestore(&uv_nmi_lock, flags); > + > + return NOTIFY_STOP; > +} > + > +static struct notifier_block uv_dump_stack_nmi_nb = { > + .notifier_call = uv_handle_nmi, > + .next = NULL, > + .priority = 0 > +}; > + > +void uv_register_nmi_notifier(void) > +{ > + if (register_die_notifier(&uv_dump_stack_nmi_nb)) > + printk(KERN_WARNING "UV NMI handler failed to register\n"); > +} > + > +/* > + * Called on each cpu to unmask NMI. > + */ > +void __cpuinit uv_nmi_init(void) > +{ > + unsigned int value; > + > + /* > + * Unmask NMI on all cpus > + */ > + value = apic_read(APIC_LVT1) | APIC_DM_NMI; > + value &= ~APIC_LVT_MASKED; > + apic_write(APIC_LVT1, value); > +} > > static int is_GRU_range(u64 start, u64 end) > { > @@ -718,5 +766,6 @@ void __init uv_system_init(void) > > uv_cpu_init(); > uv_scir_register_cpu_notifier(); > + uv_register_nmi_notifier(); > proc_mkdir("sgi_uv", NULL); > } > Index: linux/arch/x86/include/asm/uv/uv.h > =================================================================== > --- linux.orig/arch/x86/include/asm/uv/uv.h 2010-02-17 10:21:55.000000000 -0600 > +++ linux/arch/x86/include/asm/uv/uv.h 2010-02-17 10:32:20.000000000 -0600 > @@ -11,6 +11,7 @@ struct mm_struct; > extern enum uv_system_type get_uv_system_type(void); > extern int is_uv_system(void); > extern void uv_cpu_init(void); > +extern void uv_nmi_init(void); > extern void uv_system_init(void); > extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, > struct mm_struct *mm, > Index: linux/arch/x86/kernel/smpboot.c > =================================================================== > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-17 10:21:55.000000000 -0600 > +++ linux/arch/x86/kernel/smpboot.c 2010-02-17 10:32:20.000000000 -0600 > @@ -320,6 +320,8 @@ notrace static void __cpuinit start_seco > unlock_vector_lock(); > ipi_call_unlock(); > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > + if (is_uv_system()) > + uv_nmi_init(); Instead of cramming it into the init sequence open-coded, shouldnt this be done via the x86_platform driver mechanism? Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Russ Anderson on 22 Feb 2010 17:30 On Mon, Feb 22, 2010 at 11:38:53AM +0100, Ingo Molnar wrote: > > * Russ Anderson <rja(a)sgi.com> wrote: > > > Index: linux/arch/x86/kernel/smpboot.c > > =================================================================== > > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-17 10:21:55.000000000 -0600 > > +++ linux/arch/x86/kernel/smpboot.c 2010-02-17 10:32:20.000000000 -0600 > > @@ -320,6 +320,8 @@ notrace static void __cpuinit start_seco > > unlock_vector_lock(); > > ipi_call_unlock(); > > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > > + if (is_uv_system()) > > + uv_nmi_init(); > > Instead of cramming it into the init sequence open-coded, shouldnt this be > done via the x86_platform driver mechanism? OK, I'm working on it. > Thanks, > > Ingo Thanks, -- Russ Anderson, OS RAS/Partitioning Project Lead SGI - Silicon Graphics Inc rja(a)sgi.com -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Russ Anderson on 25 Feb 2010 12:30 On Mon, Feb 22, 2010 at 11:38:53AM +0100, Ingo Molnar wrote: > > * Russ Anderson <rja(a)sgi.com> wrote: > > > Enable NMI on all cpus in UV system and add an NMI handler > > to dump_stack on each cpu. > > > > Signed-off-by: Russ Anderson <rja(a)sgi.com> [...] > > struct mm_struct *mm, > > Index: linux/arch/x86/kernel/smpboot.c > > =================================================================== > > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-17 10:21:55.000000000 -0600 > > +++ linux/arch/x86/kernel/smpboot.c 2010-02-17 10:32:20.000000000 -0600 > > @@ -320,6 +320,8 @@ notrace static void __cpuinit start_seco > > unlock_vector_lock(); > > ipi_call_unlock(); > > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > > + if (is_uv_system()) > > + uv_nmi_init(); > > Instead of cramming it into the init sequence open-coded, shouldnt this be > done via the x86_platform driver mechanism? Attached is the updated patch with the x86_platform driver mechanism used for uv_nmi_init. > Thanks, > > Ingo -- Russ Anderson, OS RAS/Partitioning Project Lead SGI - Silicon Graphics Inc rja(a)sgi.com
From: Ingo Molnar on 26 Feb 2010 04:30 * Russ Anderson <rja(a)sgi.com> wrote: > On Mon, Feb 22, 2010 at 11:38:53AM +0100, Ingo Molnar wrote: > > > > * Russ Anderson <rja(a)sgi.com> wrote: > > > > > Enable NMI on all cpus in UV system and add an NMI handler > > > to dump_stack on each cpu. > > > > > > Signed-off-by: Russ Anderson <rja(a)sgi.com> > [...] > > > struct mm_struct *mm, > > > Index: linux/arch/x86/kernel/smpboot.c > > > =================================================================== > > > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-17 10:21:55.000000000 -0600 > > > +++ linux/arch/x86/kernel/smpboot.c 2010-02-17 10:32:20.000000000 -0600 > > > @@ -320,6 +320,8 @@ notrace static void __cpuinit start_seco > > > unlock_vector_lock(); > > > ipi_call_unlock(); > > > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > > > + if (is_uv_system()) > > > + uv_nmi_init(); > > > > Instead of cramming it into the init sequence open-coded, shouldnt this be > > done via the x86_platform driver mechanism? > > Attached is the updated patch with the x86_platform driver mechanism > used for uv_nmi_init. ok, this looks far cleaner. A few nits: > + * When NMI is received, print a stack trace. > + */ > +int uv_handle_nmi(struct notifier_block *self, > + unsigned long reason, void *data) Please put prototypes on a single line if it's still below 100 cols or so. > +{ > + unsigned long flags; > + static DEFINE_SPINLOCK(uv_nmi_lock); Please dont hide locks amongst local variables. (even if they are only used by that function) > + > + if (reason != DIE_NMI_IPI) > + return NOTIFY_OK; > + /* > + * Use a lock so only one cpu prints at a time > + * to prevent intermixed output. > + */ > + spin_lock_irqsave(&uv_nmi_lock, flags); > + printk(KERN_INFO "NMI stack dump cpu %u:\n", > + smp_processor_id()); Can be on a single line too. Can use pr_info(). Should use a raw spinlock - this is NMI context. > + dump_stack(); > + spin_unlock_irqrestore(&uv_nmi_lock, flags); > + > + return NOTIFY_STOP; > +} > + > +static struct notifier_block uv_dump_stack_nmi_nb = { > + .notifier_call = uv_handle_nmi, > + .next = NULL, > + .priority = 0 > +}; Please align structure initializations vertically. Plus no need to open-code the setting of priority to 0 i guess. > + > +void uv_register_nmi_notifier(void) > +{ > + if (register_die_notifier(&uv_dump_stack_nmi_nb)) > + printk(KERN_WARNING "UV NMI handler failed to register\n"); > +} > + > +void uv_nmi_init(void) > +{ > + unsigned int value; > + > + /* > + * Unmask NMI on all cpus > + */ > + value = apic_read(APIC_LVT1) | APIC_DM_NMI; > + value &= ~APIC_LVT_MASKED; > + apic_write(APIC_LVT1, value); > +} > > void __init uv_system_init(void) > { > @@ -690,5 +739,6 @@ void __init uv_system_init(void) > > uv_cpu_init(); > uv_scir_register_cpu_notifier(); > + uv_register_nmi_notifier(); > proc_mkdir("sgi_uv", NULL); > } > Index: linux/arch/x86/include/asm/uv/uv.h > =================================================================== > --- linux.orig/arch/x86/include/asm/uv/uv.h 2010-02-25 11:01:48.131694174 -0600 > +++ linux/arch/x86/include/asm/uv/uv.h 2010-02-25 11:02:17.622069711 -0600 > @@ -11,6 +11,7 @@ struct mm_struct; > extern enum uv_system_type get_uv_system_type(void); > extern int is_uv_system(void); > extern void uv_cpu_init(void); > +extern void uv_nmi_init(void); > extern void uv_system_init(void); > extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, > struct mm_struct *mm, > Index: linux/arch/x86/kernel/smpboot.c > =================================================================== > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-25 11:01:50.929770347 -0600 > +++ linux/arch/x86/kernel/smpboot.c 2010-02-25 11:02:17.664720876 -0600 > @@ -263,6 +263,11 @@ static void __cpuinit smp_callin(void) > cpumask_set_cpu(cpuid, cpu_callin_mask); > } > > +void default_nmi_init(void) > +{ > + return; > +} That return is not needed. > + > /* > * Activate a secondary processor. > */ > @@ -320,6 +325,7 @@ notrace static void __cpuinit start_seco > unlock_vector_lock(); > ipi_call_unlock(); > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > + x86_platform.nmi_init(); > > /* enable local interrupts */ > local_irq_enable(); > Index: linux/arch/x86/include/asm/x86_init.h > =================================================================== > --- linux.orig/arch/x86/include/asm/x86_init.h 2010-02-25 11:01:48.131694174 -0600 > +++ linux/arch/x86/include/asm/x86_init.h 2010-02-25 11:02:17.696714616 -0600 > @@ -126,6 +126,7 @@ struct x86_cpuinit_ops { > * @get_wallclock: get time from HW clock like RTC etc. > * @set_wallclock: set time back to HW clock > * @is_untracked_pat_range exclude from PAT logic > + * @nmi_init enable NMI on cpus > */ > struct x86_platform_ops { > unsigned long (*calibrate_tsc)(void); > @@ -133,6 +134,7 @@ struct x86_platform_ops { > int (*set_wallclock)(unsigned long nowtime); > void (*iommu_shutdown)(void); > bool (*is_untracked_pat_range)(u64 start, u64 end); > + void (*nmi_init)(void); > }; > > extern struct x86_init_ops x86_init; > Index: linux/arch/x86/kernel/x86_init.c > =================================================================== > --- linux.orig/arch/x86/kernel/x86_init.c 2010-02-25 11:01:47.848760574 -0600 > +++ linux/arch/x86/kernel/x86_init.c 2010-02-25 11:02:17.732996103 -0600 > @@ -13,6 +13,7 @@ > #include <asm/e820.h> > #include <asm/time.h> > #include <asm/irq.h> > +#include <asm/nmi.h> > #include <asm/pat.h> > #include <asm/tsc.h> > #include <asm/iommu.h> > @@ -82,4 +83,5 @@ struct x86_platform_ops x86_platform = { > .set_wallclock = mach_set_rtc_mmss, > .iommu_shutdown = iommu_shutdown_noop, > .is_untracked_pat_range = is_ISA_range, > + .nmi_init = default_nmi_init > }; the default can be in this file too - then you can also make it 'static' and save some space and not touch smpboot.c. > Index: linux/arch/x86/include/asm/nmi.h > =================================================================== > --- linux.orig/arch/x86/include/asm/nmi.h 2010-02-25 11:01:48.131694174 -0600 > +++ linux/arch/x86/include/asm/nmi.h 2010-02-25 11:02:17.756721420 -0600 > @@ -24,6 +24,7 @@ extern int reserve_perfctr_nmi(unsigned > extern void release_perfctr_nmi(unsigned int); > extern int reserve_evntsel_nmi(unsigned int); > extern void release_evntsel_nmi(unsigned int); > +extern void default_nmi_init(void); > > extern void setup_apic_nmi_watchdog(void *); > extern void stop_apic_nmi_watchdog(void *); This will go away in that case as well. Thanks, Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Russ Anderson on 26 Feb 2010 11:50
Attached is a patch that cleans up Ingo's nits. On Fri, Feb 26, 2010 at 10:22:52AM +0100, Ingo Molnar wrote: > > * Russ Anderson <rja(a)sgi.com> wrote: > > > On Mon, Feb 22, 2010 at 11:38:53AM +0100, Ingo Molnar wrote: > > > > > > * Russ Anderson <rja(a)sgi.com> wrote: > > > > > > > Enable NMI on all cpus in UV system and add an NMI handler > > > > to dump_stack on each cpu. > > > > > > > > Signed-off-by: Russ Anderson <rja(a)sgi.com> > > [...] > > > > struct mm_struct *mm, > > > > Index: linux/arch/x86/kernel/smpboot.c > > > > =================================================================== > > > > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-17 10:21:55.000000000 -0600 > > > > +++ linux/arch/x86/kernel/smpboot.c 2010-02-17 10:32:20.000000000 -0600 > > > > @@ -320,6 +320,8 @@ notrace static void __cpuinit start_seco > > > > unlock_vector_lock(); > > > > ipi_call_unlock(); > > > > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > > > > + if (is_uv_system()) > > > > + uv_nmi_init(); > > > > > > Instead of cramming it into the init sequence open-coded, shouldnt this be > > > done via the x86_platform driver mechanism? > > > > Attached is the updated patch with the x86_platform driver mechanism > > used for uv_nmi_init. > > ok, this looks far cleaner. A few nits: > > > + * When NMI is received, print a stack trace. > > + */ > > +int uv_handle_nmi(struct notifier_block *self, > > + unsigned long reason, void *data) > > Please put prototypes on a single line if it's still below 100 cols or so. Done. > > +{ > > + unsigned long flags; > > + static DEFINE_SPINLOCK(uv_nmi_lock); > > Please dont hide locks amongst local variables. (even if they are only used by > that function) Done. > > + > > + if (reason != DIE_NMI_IPI) > > + return NOTIFY_OK; > > + /* > > + * Use a lock so only one cpu prints at a time > > + * to prevent intermixed output. > > + */ > > + spin_lock_irqsave(&uv_nmi_lock, flags); > > + printk(KERN_INFO "NMI stack dump cpu %u:\n", > > + smp_processor_id()); > > Can be on a single line too. > > Can use pr_info(). > > Should use a raw spinlock - this is NMI context. Done. > > + dump_stack(); > > + spin_unlock_irqrestore(&uv_nmi_lock, flags); > > + > > + return NOTIFY_STOP; > > +} > > + > > +static struct notifier_block uv_dump_stack_nmi_nb = { > > + .notifier_call = uv_handle_nmi, > > + .next = NULL, > > + .priority = 0 > > +}; > > Please align structure initializations vertically. > > Plus no need to open-code the setting of priority to 0 i guess. Done. > > + > > +void uv_register_nmi_notifier(void) > > +{ > > + if (register_die_notifier(&uv_dump_stack_nmi_nb)) > > + printk(KERN_WARNING "UV NMI handler failed to register\n"); > > +} > > + > > +void uv_nmi_init(void) > > +{ > > + unsigned int value; > > + > > + /* > > + * Unmask NMI on all cpus > > + */ > > + value = apic_read(APIC_LVT1) | APIC_DM_NMI; > > + value &= ~APIC_LVT_MASKED; > > + apic_write(APIC_LVT1, value); > > +} > > > > void __init uv_system_init(void) > > { > > @@ -690,5 +739,6 @@ void __init uv_system_init(void) > > > > uv_cpu_init(); > > uv_scir_register_cpu_notifier(); > > + uv_register_nmi_notifier(); > > proc_mkdir("sgi_uv", NULL); > > } > > Index: linux/arch/x86/include/asm/uv/uv.h > > =================================================================== > > --- linux.orig/arch/x86/include/asm/uv/uv.h 2010-02-25 11:01:48.131694174 -0600 > > +++ linux/arch/x86/include/asm/uv/uv.h 2010-02-25 11:02:17.622069711 -0600 > > @@ -11,6 +11,7 @@ struct mm_struct; > > extern enum uv_system_type get_uv_system_type(void); > > extern int is_uv_system(void); > > extern void uv_cpu_init(void); > > +extern void uv_nmi_init(void); > > extern void uv_system_init(void); > > extern const struct cpumask *uv_flush_tlb_others(const struct cpumask *cpumask, > > struct mm_struct *mm, > > Index: linux/arch/x86/kernel/smpboot.c > > =================================================================== > > --- linux.orig/arch/x86/kernel/smpboot.c 2010-02-25 11:01:50.929770347 -0600 > > +++ linux/arch/x86/kernel/smpboot.c 2010-02-25 11:02:17.664720876 -0600 > > @@ -263,6 +263,11 @@ static void __cpuinit smp_callin(void) > > cpumask_set_cpu(cpuid, cpu_callin_mask); > > } > > > > +void default_nmi_init(void) > > +{ > > + return; > > +} > > That return is not needed. Done and moved to x86_init.c. > > + > > /* > > * Activate a secondary processor. > > */ > > @@ -320,6 +325,7 @@ notrace static void __cpuinit start_seco > > unlock_vector_lock(); > > ipi_call_unlock(); > > per_cpu(cpu_state, smp_processor_id()) = CPU_ONLINE; > > + x86_platform.nmi_init(); > > > > /* enable local interrupts */ > > local_irq_enable(); > > Index: linux/arch/x86/include/asm/x86_init.h > > =================================================================== > > --- linux.orig/arch/x86/include/asm/x86_init.h 2010-02-25 11:01:48.131694174 -0600 > > +++ linux/arch/x86/include/asm/x86_init.h 2010-02-25 11:02:17.696714616 -0600 > > @@ -126,6 +126,7 @@ struct x86_cpuinit_ops { > > * @get_wallclock: get time from HW clock like RTC etc. > > * @set_wallclock: set time back to HW clock > > * @is_untracked_pat_range exclude from PAT logic > > + * @nmi_init enable NMI on cpus > > */ > > struct x86_platform_ops { > > unsigned long (*calibrate_tsc)(void); > > @@ -133,6 +134,7 @@ struct x86_platform_ops { > > int (*set_wallclock)(unsigned long nowtime); > > void (*iommu_shutdown)(void); > > bool (*is_untracked_pat_range)(u64 start, u64 end); > > + void (*nmi_init)(void); > > }; > > > > extern struct x86_init_ops x86_init; > > Index: linux/arch/x86/kernel/x86_init.c > > =================================================================== > > --- linux.orig/arch/x86/kernel/x86_init.c 2010-02-25 11:01:47.848760574 -0600 > > +++ linux/arch/x86/kernel/x86_init.c 2010-02-25 11:02:17.732996103 -0600 > > @@ -13,6 +13,7 @@ > > #include <asm/e820.h> > > #include <asm/time.h> > > #include <asm/irq.h> > > +#include <asm/nmi.h> > > #include <asm/pat.h> > > #include <asm/tsc.h> > > #include <asm/iommu.h> > > @@ -82,4 +83,5 @@ struct x86_platform_ops x86_platform = { > > .set_wallclock = mach_set_rtc_mmss, > > .iommu_shutdown = iommu_shutdown_noop, > > .is_untracked_pat_range = is_ISA_range, > > + .nmi_init = default_nmi_init > > }; > > the default can be in this file too - then you can also make it 'static' and > save some space and not touch smpboot.c. Done. > > Index: linux/arch/x86/include/asm/nmi.h > > =================================================================== > > --- linux.orig/arch/x86/include/asm/nmi.h 2010-02-25 11:01:48.131694174 -0600 > > +++ linux/arch/x86/include/asm/nmi.h 2010-02-25 11:02:17.756721420 -0600 > > @@ -24,6 +24,7 @@ extern int reserve_perfctr_nmi(unsigned > > extern void release_perfctr_nmi(unsigned int); > > extern int reserve_evntsel_nmi(unsigned int); > > extern void release_evntsel_nmi(unsigned int); > > +extern void default_nmi_init(void); > > > > extern void setup_apic_nmi_watchdog(void *); > > extern void stop_apic_nmi_watchdog(void *); > > This will go away in that case as well. Removed. > Thanks, > > Ingo -- Russ Anderson, OS RAS/Partitioning Project Lead SGI - Silicon Graphics Inc rja(a)sgi.com |