Prev: Add TAINT_HARDWARE_UNSUPPORTED flag
Next: scsi: megaraid_sas - Online Controller Reset (OCR): Add and initialize the OCR relaed variable
From: Cliff Wickman on 17 Jun 2010 12:20 kdump is failing on an SGI UV system because it depends on /sys/devices/system/cpu/cpuN/crash_notes. And these files contain bad addresses for cpus beyond cpu 0. This occurs using 2.6.35-rc3 code. But the same problem looks present in 2.6.33. The problem traces to per_cpu_ptr_to_phys() -> pcpu_addr_to_page() -> vmalloc_to_page() for per-cpu addresses not in the first per-cpu 'chunk', but not in the VMALLOC_START/VMALLOC_END range. I wonder why this shows up on UV but not other x86_64's? I've included a patch that solves this for me. But I defer to the authors for a proper solution. This is where per_cpu_ptr_to_phys() is called for this /sys file: static ssize_t show_crash_notes(struct sys_device *dev, struct ... .... addr = per_cpu_ptr_to_phys(per_cpu_ptr(crash_notes, cpunum)); rc = sprintf(buf, "%Lx\n", addr); return rc; } The problem, without the below patch: (but a couple of printk's) uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu0/crash_notes 1c1b040 uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu1/crash_notes db74000000000000 uv3-sys:/tmp/cpw # cat /sys/devices/system/cpu/cpu2/crash_notes db74000000000000 uv3-sys:/tmp/cpw # dmesg | tail -n 6 [ 133.883009] cpw: per_cpu_ptr_to_phys addr ffff880001c1b040 [ 133.883012] cpw: per_cpu_ptr_to_phys returning 0x1c1b040 [ 136.910178] cpw: per_cpu_ptr_to_phys addr ffff880001c3b040 [ 136.910181] cpw: per_cpu_ptr_to_phys returning 0xdb74000000000000 [ 140.304825] cpw: per_cpu_ptr_to_phys addr ffff880001c5b040 [ 140.304828] cpw: per_cpu_ptr_to_phys returning 0xdb74000000000000 With the below patch: (plus a couple of printk's) uv3-sys: # cat /sys/devices/system/cpu/cpu0/crash_notes 1c1b040 uv3-sys: # cat /sys/devices/system/cpu/cpu1/crash_notes 1c3b040 uv3-sys: # cat /sys/devices/system/cpu/cpu2/crash_notes 1c5b040 uv3-sys: # dmesg | tail -n 6 [ 130.411358] cpw: per_cpu_ptr_to_phys addr ffff880001c1b040 [ 130.411361] cpw: per_cpu_ptr_to_phys returning 0x1c1b040 [ 135.420702] cpw: per_cpu_ptr_to_phys addr ffff880001c3b040 [ 135.420705] cpw: per_cpu_ptr_to_phys returning 0x1c3b040 [ 139.514014] cpw: per_cpu_ptr_to_phys addr ffff880001c5b040 [ 139.514016] cpw: per_cpu_ptr_to_phys returning 0x1c5b040 Diffed against 2.6.35-rc5 Signed-off-by: Cliff Wickman <cpw(a)sgi.com> --- mm/percpu.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) Index: linux-2.6.35-rc3/mm/percpu.c =================================================================== --- linux-2.6.35-rc3.orig/mm/percpu.c +++ linux-2.6.35-rc3/mm/percpu.c @@ -978,12 +978,11 @@ bool is_kernel_percpu_address(unsigned l */ phys_addr_t per_cpu_ptr_to_phys(void *addr) { + if ((unsigned long)addr < VMALLOC_START || + (unsigned long)addr >= VMALLOC_END) + return __pa(addr); if (pcpu_addr_in_first_chunk(addr)) { - if ((unsigned long)addr < VMALLOC_START || - (unsigned long)addr >= VMALLOC_END) - return __pa(addr); - else - return page_to_phys(vmalloc_to_page(addr)); + return page_to_phys(vmalloc_to_page(addr)); } else return page_to_phys(pcpu_addr_to_page(addr)); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Tejun Heo on 17 Jun 2010 13:10 Hello, On 06/17/2010 06:20 PM, Cliff Wickman wrote: > phys_addr_t per_cpu_ptr_to_phys(void *addr) > { > + if ((unsigned long)addr < VMALLOC_START || > + (unsigned long)addr >= VMALLOC_END) > + return __pa(addr); > if (pcpu_addr_in_first_chunk(addr)) { > - if ((unsigned long)addr < VMALLOC_START || > - (unsigned long)addr >= VMALLOC_END) > - return __pa(addr); > - else > - return page_to_phys(vmalloc_to_page(addr)); > + return page_to_phys(vmalloc_to_page(addr)); > } else > return page_to_phys(pcpu_addr_to_page(addr)); > } (scratching head...) So, that means it's given an address for which !pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange. I'll find out what's going on. Thanks. -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Tejun Heo on 17 Jun 2010 13:40 On 06/17/2010 07:08 PM, Tejun Heo wrote: > (scratching head...) So, that means it's given an address for which > !pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange. > I'll find out what's going on. Does the following patch work? The original patch assumed that @addr would be the address of the base cpu which isn't true. I only compile tested the patch so it might be broken (sorry, I gotta go somewhere now) but this should be the right direction. Thanks. diff --git a/mm/percpu.c b/mm/percpu.c index 46485e1..8956155 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -978,14 +978,23 @@ bool is_kernel_percpu_address(unsigned long addr) */ phys_addr_t per_cpu_ptr_to_phys(void *addr) { - if (pcpu_addr_in_first_chunk(addr)) { - if ((unsigned long)addr < VMALLOC_START || - (unsigned long)addr >= VMALLOC_END) - return __pa(addr); - else - return page_to_phys(vmalloc_to_page(addr)); - } else - return page_to_phys(pcpu_addr_to_page(addr)); + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); + unsigned int cpu; + + for_each_possible_cpu(cpu) { + void *start = per_cpu_ptr(base, cpu); + + if (addr >= start && addr < start + pcpu_unit_size) { + /* in the first chunk */ + if ((unsigned long)addr < VMALLOC_START || + (unsigned long)addr >= VMALLOC_END) + return __pa(addr); + else + return page_to_phys(vmalloc_to_page(addr)); + } + } + /* in one of the other chunks */ + return page_to_phys(pcpu_addr_to_page(addr)); } static inline size_t pcpu_calc_fc_sizes(size_t static_size, -- tejun -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Cliff Wickman on 17 Jun 2010 14:30
On Thu, Jun 17, 2010 at 07:35:16PM +0200, Tejun Heo wrote: > On 06/17/2010 07:08 PM, Tejun Heo wrote: > > (scratching head...) So, that means it's given an address for which > > !pcpu_addr_in_first_chunk() but outside of vmalloc area. Strange. > > I'll find out what's going on. > > Does the following patch work? The original patch assumed that @addr > would be the address of the base cpu which isn't true. I only compile > tested the patch so it might be broken (sorry, I gotta go somewhere > now) but this should be the right direction. Yes, your patch works. I tested it on a 32p UV system. -Cliff > diff --git a/mm/percpu.c b/mm/percpu.c > index 46485e1..8956155 100644 > --- a/mm/percpu.c > +++ b/mm/percpu.c > @@ -978,14 +978,23 @@ bool is_kernel_percpu_address(unsigned long addr) > */ > phys_addr_t per_cpu_ptr_to_phys(void *addr) > { > - if (pcpu_addr_in_first_chunk(addr)) { > - if ((unsigned long)addr < VMALLOC_START || > - (unsigned long)addr >= VMALLOC_END) > - return __pa(addr); > - else > - return page_to_phys(vmalloc_to_page(addr)); > - } else > - return page_to_phys(pcpu_addr_to_page(addr)); > + void __percpu *base = __addr_to_pcpu_ptr(pcpu_base_addr); > + unsigned int cpu; > + > + for_each_possible_cpu(cpu) { > + void *start = per_cpu_ptr(base, cpu); > + > + if (addr >= start && addr < start + pcpu_unit_size) { > + /* in the first chunk */ > + if ((unsigned long)addr < VMALLOC_START || > + (unsigned long)addr >= VMALLOC_END) > + return __pa(addr); > + else > + return page_to_phys(vmalloc_to_page(addr)); > + } > + } > + /* in one of the other chunks */ > + return page_to_phys(pcpu_addr_to_page(addr)); > } > > static inline size_t pcpu_calc_fc_sizes(size_t static_size, > > > -- > tejun -- Cliff Wickman SGI cpw(a)sgi.com (651) 683-3824 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |