Prev: linux-next: failed import of the usb quilt series
Next: [PATCH] ipmi: Fix memleaking for add_smi when duplicating happen
From: Daniel Kiper on 26 Jul 2010 20:50 Hi, Currently there is fully working version. It has been tested on Xen Ver. 4.0.0 in PV guest i386/x86_64 with Linux kernel Ver. 2.6.32.16 and Ver. 2.6.34.1. This patch cleanly applys to Ver. 2.6.34.1 (also as attachment because I received some reports that my patches are mangled). All found bugs have been removed (Sorry however I am sure that some hidden still exists :-(((). This patch enables two modes of operation: - enabled by CONFIG_XEN_MEMORY_HOTPLUG config option: - set memory limit for chosen domU from dom0: xm mem-max <domU> <new_memory_size_limit> - add memory in chosen domU: echo <unused_address> > \ /sys/devices/system/memory/probe; memory is added in sections which sizes differ from arch to arch (i386: 512 MiB, x86_64: 128 MiB; it could by checked by cat /sys/devices/system/memory/block_size_bytes; this value is in HEX); it is preffered to choose address at section boundary, - online memory in chosen domU: echo online > \ /sys/devices/system/memory/memory<section_number>/state; <section_number> could be established in following manner: (int)(<unused_address> / <section_size>) - enabled by CONFIG_XEN_BALLOON_MEMORY_HOTPLUG config option: - set memory limit for chosen domU from dom0: xm mem-max <domU> <new_memory_size_limit> - add memory for chosen domU from dom0: xm mem-set <domU> <new_memory_size> If you have a questions please drop me a line. Daniel Signed-off-by: Daniel Kiper <dkiper(a)net-space.pl> --- arch/x86/Kconfig | 2 +- drivers/base/memory.c | 23 +++++ drivers/xen/Kconfig | 10 ++ drivers/xen/balloon.c | 196 +++++++++++++++++++++++++++++++++++++++- include/linux/memory_hotplug.h | 8 ++ include/xen/balloon.h | 6 ++ mm/Kconfig | 9 ++ mm/memory_hotplug.c | 140 ++++++++++++++++++++++++++++ 8 files changed, 390 insertions(+), 4 deletions(-) diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index 9458685..38434da 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -1273,7 +1273,7 @@ config ARCH_SELECT_MEMORY_MODEL depends on ARCH_SPARSEMEM_ENABLE config ARCH_MEMORY_PROBE - def_bool X86_64 + def_bool y depends on MEMORY_HOTPLUG config ILLEGAL_POINTER_VALUE diff --git a/drivers/base/memory.c b/drivers/base/memory.c index 933442f..709457b 100644 --- a/drivers/base/memory.c +++ b/drivers/base/memory.c @@ -27,6 +27,14 @@ #include <asm/atomic.h> #include <asm/uaccess.h> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG +#include <xen/xen.h> +#endif + +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) +#include <xen/balloon.h> +#endif + #define MEMORY_CLASS_NAME "memory" static struct sysdev_class memory_sysdev_class = { @@ -215,6 +223,10 @@ memory_block_action(struct memory_block *mem, unsigned long action) case MEM_ONLINE: start_pfn = page_to_pfn(first_page); ret = online_pages(start_pfn, PAGES_PER_SECTION); +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) + if (xen_domain() && !ret) + balloon_update_stats(PAGES_PER_SECTION); +#endif break; case MEM_OFFLINE: mem->state = MEM_GOING_OFFLINE; @@ -225,6 +237,10 @@ memory_block_action(struct memory_block *mem, unsigned long action) mem->state = old_state; break; } +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) && defined(CONFIG_XEN_BALLOON) + if (xen_domain()) + balloon_update_stats(-PAGES_PER_SECTION); +#endif break; default: WARN(1, KERN_WARNING "%s(%p, %ld) unknown action: %ld\n", @@ -341,6 +357,13 @@ memory_probe_store(struct class *class, struct class_attribute *attr, phys_addr = simple_strtoull(buf, NULL, 0); +#ifdef CONFIG_XEN_MEMORY_HOTPLUG + if (xen_domain()) { + ret = xen_memory_probe(phys_addr); + return ret ? ret : count; + } +#endif + nid = memory_add_physaddr_to_nid(phys_addr); ret = add_memory(nid, phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); diff --git a/drivers/xen/Kconfig b/drivers/xen/Kconfig index fad3df2..9713048 100644 --- a/drivers/xen/Kconfig +++ b/drivers/xen/Kconfig @@ -9,6 +9,16 @@ config XEN_BALLOON the system to expand the domain's memory allocation, or alternatively return unneeded memory to the system. +config XEN_BALLOON_MEMORY_HOTPLUG + bool "Xen memory balloon driver with memory hotplug support" + depends on EXPERIMENTAL && XEN_BALLOON && MEMORY_HOTPLUG + default n + help + Xen memory balloon driver with memory hotplug support allows expanding + memory available for the system above limit declared at system startup. + It is very useful on critical systems which require long run without + rebooting. + config XEN_SCRUB_PAGES bool "Scrub pages before returning them to system" depends on XEN_BALLOON diff --git a/drivers/xen/balloon.c b/drivers/xen/balloon.c index 1a0d8c2..f80bba0 100644 --- a/drivers/xen/balloon.c +++ b/drivers/xen/balloon.c @@ -6,6 +6,7 @@ * Copyright (c) 2003, B Dragovic * Copyright (c) 2003-2004, M Williamson, K Fraser * Copyright (c) 2005 Dan M. Smith, IBM Corporation + * Copyright (c) 2010 Daniel Kiper * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License version 2 @@ -61,6 +62,10 @@ #include <xen/features.h> #include <xen/page.h> +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +#include <linux/memory.h> +#endif + #define PAGES2KB(_p) ((_p)<<(PAGE_SHIFT-10)) #define BALLOON_CLASS_NAME "xen_memory" @@ -77,6 +82,11 @@ struct balloon_stats { /* Number of pages in high- and low-memory balloons. */ unsigned long balloon_low; unsigned long balloon_high; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + unsigned long boot_max_pfn; + u64 hotplug_start_paddr; + u64 hotplug_size; +#endif }; static DEFINE_MUTEX(balloon_mutex); @@ -184,6 +194,12 @@ static void balloon_alarm(unsigned long unused) schedule_work(&balloon_worker); } +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +static inline unsigned long current_target(void) +{ + return balloon_stats.target_pages; +} +#else static unsigned long current_target(void) { unsigned long target = balloon_stats.target_pages; @@ -195,11 +211,12 @@ static unsigned long current_target(void) return target; } +#endif static int increase_reservation(unsigned long nr_pages) { - unsigned long pfn, i, flags; - struct page *page; + unsigned long uninitialized_var(pfn), i, flags; + struct page *uninitialized_var(page); long rc; struct xen_memory_reservation reservation = { .address_bits = 0, @@ -207,11 +224,63 @@ static int increase_reservation(unsigned long nr_pages) .domid = DOMID_SELF }; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + resource_size_t r_min, r_size; + struct resource *r; +#endif + if (nr_pages > ARRAY_SIZE(frame_list)) nr_pages = ARRAY_SIZE(frame_list); spin_lock_irqsave(&balloon_lock, flags); +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (!balloon_stats.balloon_low && !balloon_stats.balloon_high) { + if (!balloon_stats.hotplug_start_paddr) { + + /* + * Look for first unused memory region starting + * at page boundary. Skip last memory section created + * at boot time becuase it may contains unused memory + * pages with PG_reserved bit not set (online_pages + * require PG_reserved bit set). + */ + + r = kzalloc(sizeof(struct resource), GFP_KERNEL); + + if (!r) { + rc = -ENOMEM; + goto out; + } + + r->name = "System RAM"; + r->flags = IORESOURCE_MEM | IORESOURCE_BUSY; + r_min = PFN_PHYS(section_nr_to_pfn(pfn_to_section_nr(balloon_stats.boot_max_pfn) + 1)); + r_size = (balloon_stats.target_pages - balloon_stats.current_pages) << PAGE_SHIFT; + + rc = allocate_resource(&iomem_resource, r, + r_size, r_min, ULONG_MAX, + PAGE_SIZE, NULL, NULL); + + if (rc < 0) { + kfree(r); + goto out; + } + + balloon_stats.hotplug_start_paddr = r->start; + } + + pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr + + balloon_stats.hotplug_size); + + for (i = 0; i < nr_pages; ++i, ++pfn) + frame_list[i] = pfn; + + pfn -= nr_pages + 1; + goto populate_physmap; + } +#endif + page = balloon_first_page(); for (i = 0; i < nr_pages; i++) { BUG_ON(page == NULL); @@ -219,6 +288,9 @@ static int increase_reservation(unsigned long nr_pages) page = balloon_next_page(page); } +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +populate_physmap: +#endif set_xen_guest_handle(reservation.extent_start, frame_list); reservation.nr_extents = nr_pages; rc = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); @@ -226,17 +298,33 @@ static int increase_reservation(unsigned long nr_pages) goto out; for (i = 0; i < rc; i++) { +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (balloon_stats.hotplug_start_paddr) { + ++pfn; + goto set_p2m; + } +#endif + page = balloon_retrieve(); BUG_ON(page == NULL); pfn = page_to_pfn(page); + +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG +set_p2m: +#endif BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && phys_to_machine_mapping_valid(pfn)); set_phys_to_machine(pfn, frame_list[i]); +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (balloon_stats.hotplug_start_paddr) + continue; +#endif + /* Link back into the page tables if not highmem. */ - if (pfn < max_low_pfn) { + if (!PageHighMem(page)) { int ret; ret = HYPERVISOR_update_va_mapping( (unsigned long)__va(pfn << PAGE_SHIFT), @@ -251,6 +339,11 @@ static int increase_reservation(unsigned long nr_pages) __free_page(page); } +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + if (balloon_stats.hotplug_start_paddr) + balloon_stats.hotplug_size += rc << PAGE_SHIFT; +#endif + balloon_stats.current_pages += rc; out: @@ -331,6 +424,12 @@ static void balloon_process(struct work_struct *work) int need_sleep = 0; long credit; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + int nid, ret; + struct memory_block *mem; + unsigned long pfn, pfn_limit; +#endif + mutex_lock(&balloon_mutex); do { @@ -349,10 +448,93 @@ static void balloon_process(struct work_struct *work) /* Schedule more work if there is some still to be done. */ if (current_target() != balloon_stats.current_pages) mod_timer(&balloon_timer, jiffies + HZ); +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + else if (balloon_stats.hotplug_start_paddr) { + nid = memory_add_physaddr_to_nid(balloon_stats.hotplug_start_paddr); + + ret = xen_add_memory(nid, balloon_stats.hotplug_start_paddr, + balloon_stats.hotplug_size); + + if (ret) { + printk(KERN_ERR "%s: xen_add_memory: " + "Memory hotplug failed: %i\n", + __func__, ret); + goto error; + } + + pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr); + pfn_limit = pfn + (balloon_stats.hotplug_size >> PAGE_SHIFT); + + for (; pfn < pfn_limit; ++pfn) + if (!PageHighMem(pfn_to_page(pfn))) + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(pfn_to_mfn(pfn), PAGE_KERNEL), 0)); + + ret = online_pages(PFN_DOWN(balloon_stats.hotplug_start_paddr), + balloon_stats.hotplug_size >> PAGE_SHIFT); + + if (ret) { + printk(KERN_ERR "%s: online_pages: Failed: %i\n", + __func__, ret); + goto error; + } + + pfn = PFN_DOWN(balloon_stats.hotplug_start_paddr); + pfn_limit = pfn + (balloon_stats.hotplug_size >> PAGE_SHIFT); + + for (; pfn < pfn_limit; pfn += PAGES_PER_SECTION) { + mem = find_memory_block(__pfn_to_section(pfn)); + BUG_ON(!mem); + BUG_ON(!present_section_nr(mem->phys_index)); + mutex_lock(&mem->state_mutex); + mem->state = MEM_ONLINE; + mutex_unlock(&mem->state_mutex); + } + + goto out; + +error: + balloon_stats.current_pages -= balloon_stats.hotplug_size >> PAGE_SHIFT; + balloon_stats.target_pages -= balloon_stats.hotplug_size >> PAGE_SHIFT; + +out: + balloon_stats.hotplug_start_paddr = 0; + balloon_stats.hotplug_size = 0; + } +#endif mutex_unlock(&balloon_mutex); } +#ifdef CONFIG_XEN_MEMORY_HOTPLUG + +/* Resets the Xen limit, sets new target, and kicks off processing. */ +static void balloon_set_new_target(unsigned long target) +{ + mutex_lock(&balloon_mutex); + balloon_stats.target_pages = target; + mutex_unlock(&balloon_mutex); + + schedule_work(&balloon_worker); +} + +void balloon_update_stats(long nr_pages) +{ + mutex_lock(&balloon_mutex); + + balloon_stats.current_pages += nr_pages; + balloon_stats.target_pages += nr_pages; + + xenbus_printf(XBT_NIL, "memory", "target", "%llu", + (unsigned long long)balloon_stats.target_pages << (PAGE_SHIFT - 10)); + + mutex_unlock(&balloon_mutex); +} +EXPORT_SYMBOL_GPL(balloon_update_stats); + +#else + /* Resets the Xen limit, sets new target, and kicks off processing. */ static void balloon_set_new_target(unsigned long target) { @@ -361,6 +543,8 @@ static void balloon_set_new_target(unsigned long target) schedule_work(&balloon_worker); } +#endif + static struct xenbus_watch target_watch = { .node = "memory/target" @@ -416,6 +600,12 @@ static int __init balloon_init(void) balloon_stats.balloon_high = 0; balloon_stats.driver_pages = 0UL; +#ifdef CONFIG_XEN_BALLOON_MEMORY_HOTPLUG + balloon_stats.boot_max_pfn = max_pfn; + balloon_stats.hotplug_start_paddr = 0; + balloon_stats.hotplug_size = 0; +#endif + init_timer(&balloon_timer); balloon_timer.data = 0; balloon_timer.function = balloon_alarm; diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 35b07b7..04e67b8 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -211,4 +211,12 @@ extern void sparse_remove_one_section(struct zone *zone, struct mem_section *ms) extern struct page *sparse_decode_mem_map(unsigned long coded_mem_map, unsigned long pnum); +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) || defined(CONFIG_XEN_BALLOON_MEMORY_HOTPLUG) +extern int xen_add_memory(int nid, u64 start, u64 size); +#endif + +#ifdef CONFIG_XEN_MEMORY_HOTPLUG +extern int xen_memory_probe(u64 phys_addr); +#endif + #endif /* __LINUX_MEMORY_HOTPLUG_H */ diff --git a/include/xen/balloon.h b/include/xen/balloon.h new file mode 100644 index 0000000..84b17b7 --- /dev/null +++ b/include/xen/balloon.h @@ -0,0 +1,6 @@ +#ifndef _XEN_BALLOON_H +#define _XEN_BALLOON_H + +extern void balloon_update_stats(long nr_pages); + +#endif /* _XEN_BALLOON_H */ diff --git a/mm/Kconfig b/mm/Kconfig index 9c61158..b04f3a8 100644 --- a/mm/Kconfig +++ b/mm/Kconfig @@ -144,6 +144,15 @@ config MEMORY_HOTREMOVE depends on MEMORY_HOTPLUG && ARCH_ENABLE_MEMORY_HOTREMOVE depends on MIGRATION +config XEN_MEMORY_HOTPLUG + bool "Allow for memory hot-add in Xen guests" + depends on EXPERIMENTAL && ARCH_MEMORY_PROBE && XEN + default n + help + Memory hotplug allows expanding memory available for the system + above limit declared at system startup. It is very useful on critical + systems which require long run without rebooting. + # # If we have space for more page flags then we can enable additional # optimizations and functionality. diff --git a/mm/memory_hotplug.c b/mm/memory_hotplug.c index be211a5..1c73703 100644 --- a/mm/memory_hotplug.c +++ b/mm/memory_hotplug.c @@ -32,6 +32,14 @@ #include <asm/tlbflush.h> +#ifdef CONFIG_XEN_MEMORY_HOTPLUG +#include <asm/xen/hypercall.h> +#include <xen/interface/xen.h> +#include <xen/interface/memory.h> +#include <xen/features.h> +#include <xen/page.h> +#endif + #include "internal.h" /* add this memory to iomem resource */ @@ -542,6 +550,138 @@ out: } EXPORT_SYMBOL_GPL(add_memory); +#if defined(CONFIG_XEN_MEMORY_HOTPLUG) || defined(CONFIG_XEN_BALLOON_MEMORY_HOTPLUG) +/* we are OK calling __meminit stuff here - we have CONFIG_MEMORY_HOTPLUG */ +int __ref xen_add_memory(int nid, u64 start, u64 size) +{ + pg_data_t *pgdat = NULL; + int new_pgdat = 0, ret; + + lock_system_sleep(); + + if (!node_online(nid)) { + pgdat = hotadd_new_pgdat(nid, start); + ret = -ENOMEM; + if (!pgdat) + goto out; + new_pgdat = 1; + } + + /* call arch's memory hotadd */ + ret = arch_add_memory(nid, start, size); + + if (ret < 0) + goto error; + + /* we online node here. we can't roll back from here. */ + node_set_online(nid); + + if (new_pgdat) { + ret = register_one_node(nid); + /* + * If sysfs file of new node can't create, cpu on the node + * can't be hot-added. There is no rollback way now. + * So, check by BUG_ON() to catch it reluctantly.. + */ + BUG_ON(ret); + } + + goto out; + +error: + /* rollback pgdat allocation */ + if (new_pgdat) + rollback_node_hotadd(nid, pgdat); + +out: + unlock_system_sleep(); + return ret; +} +EXPORT_SYMBOL_GPL(xen_add_memory); +#endif + +#ifdef CONFIG_XEN_MEMORY_HOTPLUG +int xen_memory_probe(u64 phys_addr) +{ + int nr_pages, ret; + struct resource *r; + struct xen_memory_reservation reservation = { + .address_bits = 0, + .extent_order = 0, + .domid = DOMID_SELF, + .nr_extents = PAGES_PER_SECTION + }; + unsigned long *frame_list, i, pfn; + + r = register_memory_resource(phys_addr, PAGES_PER_SECTION << PAGE_SHIFT); + + if (!r) + return -EEXIST; + + frame_list = vmalloc(PAGES_PER_SECTION * sizeof(unsigned long)); + + if (!frame_list) { + printk(KERN_ERR "%s: vmalloc: Out of memory\n", __func__); + ret = -ENOMEM; + goto error; + } + + set_xen_guest_handle(reservation.extent_start, frame_list); + for (i = 0, pfn = PFN_DOWN(phys_addr); i < PAGES_PER_SECTION; ++i, ++pfn) + frame_list[i] = pfn; + + ret = HYPERVISOR_memory_op(XENMEM_populate_physmap, &reservation); + + if (ret < PAGES_PER_SECTION) { + if (ret > 0) { + printk(KERN_ERR "%s: PHYSMAP is not fully " + "populated: %i/%lu\n", __func__, + ret, PAGES_PER_SECTION); + reservation.nr_extents = nr_pages = ret; + ret = HYPERVISOR_memory_op(XENMEM_decrease_reservation, &reservation); + BUG_ON(ret != nr_pages); + ret = -ENOMEM; + } else { + ret = (ret < 0) ? ret : -ENOMEM; + printk(KERN_ERR "%s: Can't populate PHYSMAP: %i\n", __func__, ret); + } + goto error; + } + + for (i = 0, pfn = PFN_DOWN(phys_addr); i < PAGES_PER_SECTION; ++i, ++pfn) { + BUG_ON(!xen_feature(XENFEAT_auto_translated_physmap) && + phys_to_machine_mapping_valid(pfn)); + set_phys_to_machine(pfn, frame_list[i]); + } + + ret = xen_add_memory(memory_add_physaddr_to_nid(phys_addr), phys_addr, + PAGES_PER_SECTION << PAGE_SHIFT); + + if (ret) { + printk(KERN_ERR "%s: xen_add_memory: Memory hotplug " + "failed: %i\n", __func__, ret); + goto out; + } + + for (i = 0, pfn = PFN_DOWN(phys_addr); i < PAGES_PER_SECTION; ++i, ++pfn) + if (!PageHighMem(pfn_to_page(pfn))) + BUG_ON(HYPERVISOR_update_va_mapping( + (unsigned long)__va(pfn << PAGE_SHIFT), + mfn_pte(frame_list[i], PAGE_KERNEL), 0)); + + goto out; + +error: + release_memory_resource(r); + +out: + vfree(frame_list); + + return (ret < 0) ? ret : 0; +} +EXPORT_SYMBOL_GPL(xen_memory_probe); +#endif + #ifdef CONFIG_MEMORY_HOTREMOVE /* * A free page on the buddy free lists (not the per-cpu lists) has PageBuddy |