Prev: HID:HID-NTRIG update ntrig_event function
Next: power_meter: acpi_device_class "power_meter_resource" too long
From: Yinghai Lu on 23 Mar 2010 02:10 please check [PATCH 01/20] x86: add find_e820_area_node [RFC PATCH] x86: use lmb to replace early_res still keep kernel/early_res.c for the extension. should move those file to lib/lmb.c later? Signed-off-by: Yinghai Lu <yinghai(a)kernel.org> --- arch/x86/Kconfig | 1 arch/x86/include/asm/e820.h | 38 +- arch/x86/include/asm/lmb.h | 8 arch/x86/kernel/e820.c | 163 +---------- arch/x86/kernel/head.c | 2 arch/x86/kernel/head32.c | 4 arch/x86/kernel/head64.c | 2 arch/x86/kernel/setup.c | 2 arch/x86/kernel/setup_percpu.c | 6 include/linux/early_res.h | 9 include/linux/lmb.h | 5 kernel/early_res.c | 594 ++++++++++++++++------------------------- lib/lmb.c | 9 mm/page_alloc.c | 2 mm/sparse-vmemmap.c | 4 15 files changed, 321 insertions(+), 528 deletions(-) Index: linux-2.6/arch/x86/Kconfig =================================================================== --- linux-2.6.orig/arch/x86/Kconfig +++ linux-2.6/arch/x86/Kconfig @@ -27,6 +27,7 @@ config X86 select HAVE_PERF_EVENTS if (!M386 && !M486) select HAVE_IOREMAP_PROT select HAVE_KPROBES + select HAVE_LMB select ARCH_WANT_OPTIONAL_GPIOLIB select ARCH_WANT_FRAME_POINTERS select HAVE_DMA_ATTRS Index: linux-2.6/arch/x86/include/asm/e820.h =================================================================== --- linux-2.6.orig/arch/x86/include/asm/e820.h +++ linux-2.6/arch/x86/include/asm/e820.h @@ -113,22 +113,36 @@ static inline void early_memtest(unsigne extern unsigned long end_user_pfn; -extern u64 find_e820_area(u64 start, u64 end, u64 size, u64 align); -extern u64 find_e820_area_size(u64 start, u64 *sizep, u64 align); -u64 find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align); -extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); #include <linux/early_res.h> +static inline u64 find_e820_area(u64 start, u64 end, u64 size, u64 align) +{ + return find_lmb_area(start, end, size, align); +} +static inline u64 find_e820_area_size(u64 start, u64 *sizep, u64 align) +{ + return find_lmb_area_size(start, sizep, align); +} +static inline u64 +find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align) +{ + return find_lmb_area_node(nid, start, end, size, align); +} +extern u64 early_reserve_e820(u64 startt, u64 sizet, u64 align); extern unsigned long e820_end_of_ram_pfn(void); extern unsigned long e820_end_of_low_ram_pfn(void); -extern int e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn); -extern void e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long end_pfn); -extern u64 e820_hole_size(u64 start, u64 end); +static inline void e820_register_active_regions(int nid, + unsigned long start_pfn, + unsigned long end_pfn) +{ + lmb_register_active_regions(nid, start_pfn, end_pfn); +} +static inline u64 e820_hole_size(u64 start, u64 end) +{ + return lmb_hole_size(start, end); +} +void init_lmb_memory(void); +void fill_lmb_memory(void); extern void finish_e820_parsing(void); extern void e820_reserve_resources(void); extern void e820_reserve_resources_late(void); Index: linux-2.6/arch/x86/kernel/e820.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/e820.c +++ linux-2.6/arch/x86/kernel/e820.c @@ -15,6 +15,7 @@ #include <linux/pfn.h> #include <linux/suspend.h> #include <linux/firmware-map.h> +#include <linux/lmb.h> #include <asm/e820.h> #include <asm/proto.h> @@ -727,37 +728,6 @@ static int __init e820_mark_nvs_memory(v core_initcall(e820_mark_nvs_memory); #endif -/* - * Find a free area with specified alignment in a specific range. - */ -u64 __init find_e820_area(u64 start, u64 end, u64 size, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area(ei_start, ei_last, start, end, - size, align); - - if (addr != -1ULL) - return addr; - } - return -1ULL; -} - -u64 __init find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align) -{ - return find_e820_area(start, end, size, align); -} - u64 __init get_max_mapped(void) { u64 end = max_pfn_mapped; @@ -766,47 +736,6 @@ u64 __init get_max_mapped(void) return end; } -/* - * Find next free range after *start - */ -u64 __init find_e820_area_size(u64 start, u64 *sizep, u64 align) -{ - int i; - - for (i = 0; i < e820.nr_map; i++) { - struct e820entry *ei = &e820.map[i]; - u64 addr; - u64 ei_start, ei_last; - - if (ei->type != E820_RAM) - continue; - - ei_last = ei->addr + ei->size; - ei_start = ei->addr; - addr = find_early_area_size(ei_start, ei_last, start, - sizep, align); - - if (addr != -1ULL) - return addr; - } - - return -1ULL; -} - -u64 __init find_e820_area_node(int nid, u64 start, u64 end, u64 size, u64 align) -{ - u64 addr; - /* - * need to call this function after e820_register_active_regions - * so early_node_map[] is set - */ - addr = find_memory_core_early(nid, size, align, start, end); - if (addr != -1ULL) - return addr; - - /* fallback, should already have start end in the node range */ - return find_e820_area(start, end, size, align); -} /* * pre allocated 4k and reserved it in e820 @@ -900,74 +829,6 @@ unsigned long __init e820_end_of_low_ram { return e820_end_pfn(1UL<<(32 - PAGE_SHIFT), E820_RAM); } -/* - * Finds an active region in the address range from start_pfn to last_pfn and - * returns its range in ei_startpfn and ei_endpfn for the e820 entry. - */ -int __init e820_find_active_region(const struct e820entry *ei, - unsigned long start_pfn, - unsigned long last_pfn, - unsigned long *ei_startpfn, - unsigned long *ei_endpfn) -{ - u64 align = PAGE_SIZE; - - *ei_startpfn = round_up(ei->addr, align) >> PAGE_SHIFT; - *ei_endpfn = round_down(ei->addr + ei->size, align) >> PAGE_SHIFT; - - /* Skip map entries smaller than a page */ - if (*ei_startpfn >= *ei_endpfn) - return 0; - - /* Skip if map is outside the node */ - if (ei->type != E820_RAM || *ei_endpfn <= start_pfn || - *ei_startpfn >= last_pfn) - return 0; - - /* Check for overlaps */ - if (*ei_startpfn < start_pfn) - *ei_startpfn = start_pfn; - if (*ei_endpfn > last_pfn) - *ei_endpfn = last_pfn; - - return 1; -} - -/* Walk the e820 map and register active regions within a node */ -void __init e820_register_active_regions(int nid, unsigned long start_pfn, - unsigned long last_pfn) -{ - unsigned long ei_startpfn; - unsigned long ei_endpfn; - int i; - - for (i = 0; i < e820.nr_map; i++) - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - add_active_range(nid, ei_startpfn, ei_endpfn); -} - -/* - * Find the hole size (in bytes) in the memory range. - * @start: starting address of the memory range to scan - * @end: ending address of the memory range to scan - */ -u64 __init e820_hole_size(u64 start, u64 end) -{ - unsigned long start_pfn = start >> PAGE_SHIFT; - unsigned long last_pfn = end >> PAGE_SHIFT; - unsigned long ei_startpfn, ei_endpfn, ram = 0; - int i; - - for (i = 0; i < e820.nr_map; i++) { - if (e820_find_active_region(&e820.map[i], - start_pfn, last_pfn, - &ei_startpfn, &ei_endpfn)) - ram += ei_endpfn - ei_startpfn; - } - return end - start - ((u64)ram << PAGE_SHIFT); -} static void early_panic(char *msg) { @@ -1058,6 +919,28 @@ void __init finish_e820_parsing(void) } } +void __init init_lmb_memory(void) +{ + lmb_init(); +} + +void __init fill_lmb_memory(void) +{ + int i; + + for (i = 0; i < e820.nr_map; i++) { + struct e820entry *ei = &e820.map[i]; + + if (ei->type != E820_RAM) + continue; + lmb_add(ei->addr, ei->size); + } + + lmb_analyze(); + + lmb_dump_all(); +} + static inline const char *e820_type_to_string(int e820_type) { switch (e820_type) { Index: linux-2.6/arch/x86/kernel/head.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head.c +++ linux-2.6/arch/x86/kernel/head.c @@ -51,5 +51,5 @@ void __init reserve_ebda_region(void) lowmem = 0x9f000; /* reserve all memory between lowmem and the 1MB mark */ - reserve_early_overlap_ok(lowmem, 0x100000, "BIOS reserved"); + reserve_early(lowmem, 0x100000, "BIOS reserved"); } Index: linux-2.6/arch/x86/kernel/head32.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head32.c +++ linux-2.6/arch/x86/kernel/head32.c @@ -29,13 +29,15 @@ static void __init i386_default_early_se void __init i386_start_kernel(void) { + + init_lmb_memory(); #ifdef CONFIG_X86_TRAMPOLINE /* * But first pinch a few for the stack/trampoline stuff * FIXME: Don't need the extra page at 4K, but need to fix * trampoline before removing it. (see the GDT stuff) */ - reserve_early_overlap_ok(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, + reserve_early(PAGE_SIZE, PAGE_SIZE + PAGE_SIZE, "EX TRAMPOLINE"); #endif Index: linux-2.6/arch/x86/kernel/head64.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/head64.c +++ linux-2.6/arch/x86/kernel/head64.c @@ -96,6 +96,8 @@ void __init x86_64_start_kernel(char * r void __init x86_64_start_reservations(char *real_mode_data) { + init_lmb_memory(); + copy_bootdata(__va(real_mode_data)); reserve_early(__pa_symbol(&_text), __pa_symbol(&__bss_stop), "TEXT DATA BSS"); Index: linux-2.6/arch/x86/kernel/setup.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup.c +++ linux-2.6/arch/x86/kernel/setup.c @@ -892,6 +892,8 @@ void __init setup_arch(char **cmdline_p) max_pfn_mapped = KERNEL_IMAGE_SIZE >> PAGE_SHIFT; #endif + fill_lmb_memory(); + #ifdef CONFIG_X86_CHECK_BIOS_CORRUPTION setup_bios_corruption_check(); #endif Index: linux-2.6/arch/x86/kernel/setup_percpu.c =================================================================== --- linux-2.6.orig/arch/x86/kernel/setup_percpu.c +++ linux-2.6/arch/x86/kernel/setup_percpu.c @@ -137,13 +137,7 @@ static void * __init pcpu_fc_alloc(unsig static void __init pcpu_fc_free(void *ptr, size_t size) { -#ifdef CONFIG_NO_BOOTMEM - u64 start = __pa(ptr); - u64 end = start + size; - free_early_partial(start, end); -#else free_bootmem(__pa(ptr), size); -#endif } static int __init pcpu_cpu_distance(unsigned int from, unsigned int to) Index: linux-2.6/include/linux/early_res.h =================================================================== --- linux-2.6.orig/include/linux/early_res.h +++ linux-2.6/include/linux/early_res.h @@ -5,15 +5,18 @@ extern void reserve_early(u64 start, u64 end, char *name); extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); extern void free_early(u64 start, u64 end); -void free_early_partial(u64 start, u64 end); extern void early_res_to_bootmem(u64 start, u64 end); -void reserve_early_without_check(u64 start, u64 end, char *name); u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, u64 size, u64 align); u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, u64 *sizep, u64 align); -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); +u64 find_lmb_area(u64 start, u64 end, u64 size, u64 align); +u64 find_lmb_area_size(u64 start, u64 *sizep, u64 align); +u64 find_lmb_area_node(int nid, u64 start, u64 end, u64 size, u64 align); +void lmb_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn); +u64 lmb_hole_size(u64 start, u64 end); u64 get_max_mapped(void); #include <linux/range.h> int get_free_all_memory_range(struct range **rangep, int nodeid); Index: linux-2.6/include/linux/lmb.h =================================================================== --- linux-2.6.orig/include/linux/lmb.h +++ linux-2.6/include/linux/lmb.h @@ -26,7 +26,8 @@ struct lmb_property { struct lmb_region { unsigned long cnt; u64 size; - struct lmb_property region[MAX_LMB_REGIONS+1]; + struct lmb_property *region; + unsigned long region_array_size; }; struct lmb { @@ -37,6 +38,8 @@ struct lmb { }; extern struct lmb lmb; +extern struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1]; +extern struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1]; extern void __init lmb_init(void); extern void __init lmb_analyze(void); Index: linux-2.6/kernel/early_res.c =================================================================== --- linux-2.6.orig/kernel/early_res.c +++ linux-2.6/kernel/early_res.c @@ -6,284 +6,65 @@ #include <linux/init.h> #include <linux/bootmem.h> #include <linux/mm.h> +#include <linux/lmb.h> #include <linux/early_res.h> /* * Early reserved memory areas. */ -/* - * need to make sure this one is bigger enough before - * find_fw_memmap_area could be used - */ -#define MAX_EARLY_RES_X 32 - -struct early_res { - u64 start, end; - char name[15]; - char overlap_ok; -}; -static struct early_res early_res_x[MAX_EARLY_RES_X] __initdata; - -static int max_early_res __initdata = MAX_EARLY_RES_X; -static struct early_res *early_res __initdata = &early_res_x[0]; -static int early_res_count __initdata; - -static int __init find_overlapped_early(u64 start, u64 end) -{ - int i; - struct early_res *r; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - if (end > r->start && start < r->end) - break; - } - - return i; -} - -/* - * Drop the i-th range from the early reservation map, - * by copying any higher ranges down one over it, and - * clearing what had been the last slot. - */ -static void __init drop_range(int i) -{ - int j; - - for (j = i + 1; j < max_early_res && early_res[j].end; j++) - ; - - memmove(&early_res[i], &early_res[i + 1], - (j - 1 - i) * sizeof(struct early_res)); - - early_res[j - 1].end = 0; - early_res_count--; -} - -static void __init drop_range_partial(int i, u64 start, u64 end) -{ - u64 common_start, common_end; - u64 old_start, old_end; - - old_start = early_res[i].start; - old_end = early_res[i].end; - common_start = max(old_start, start); - common_end = min(old_end, end); - - /* no overlap ? */ - if (common_start >= common_end) - return; - - if (old_start < common_start) { - /* make head segment */ - early_res[i].end = common_start; - if (old_end > common_end) { - char name[15]; - - /* - * Save a local copy of the name, since the - * early_res array could get resized inside - * reserve_early_without_check() -> - * __check_and_double_early_res(), which would - * make the current name pointer invalid. - */ - strncpy(name, early_res[i].name, - sizeof(early_res[i].name) - 1); - /* add another for left over on tail */ - reserve_early_without_check(common_end, old_end, name); - } - return; - } else { - if (old_end > common_end) { - /* reuse the entry for tail left */ - early_res[i].start = common_end; - return; - } - /* all covered */ - drop_range(i); - } -} - -/* - * Split any existing ranges that: - * 1) are marked 'overlap_ok', and - * 2) overlap with the stated range [start, end) - * into whatever portion (if any) of the existing range is entirely - * below or entirely above the stated range. Drop the portion - * of the existing range that overlaps with the stated range, - * which will allow the caller of this routine to then add that - * stated range without conflicting with any existing range. - */ -static void __init drop_overlaps_that_are_ok(u64 start, u64 end) -{ - int i; - struct early_res *r; - u64 lower_start, lower_end; - u64 upper_start, upper_end; - char name[15]; - - for (i = 0; i < max_early_res && early_res[i].end; i++) { - r = &early_res[i]; - - /* Continue past non-overlapping ranges */ - if (end <= r->start || start >= r->end) - continue; - - /* - * Leave non-ok overlaps as is; let caller - * panic "Overlapping early reservations" - * when it hits this overlap. - */ - if (!r->overlap_ok) - return; - - /* - * We have an ok overlap. We will drop it from the early - * reservation map, and add back in any non-overlapping - * portions (lower or upper) as separate, overlap_ok, - * non-overlapping ranges. - */ - - /* 1. Note any non-overlapping (lower or upper) ranges. */ - strncpy(name, r->name, sizeof(name) - 1); - - lower_start = lower_end = 0; - upper_start = upper_end = 0; - if (r->start < start) { - lower_start = r->start; - lower_end = start; - } - if (r->end > end) { - upper_start = end; - upper_end = r->end; - } - - /* 2. Drop the original ok overlapping range */ - drop_range(i); - - i--; /* resume for-loop on copied down entry */ - - /* 3. Add back in any non-overlapping ranges. */ - if (lower_end) - reserve_early_overlap_ok(lower_start, lower_end, name); - if (upper_end) - reserve_early_overlap_ok(upper_start, upper_end, name); - } -} - -static void __init __reserve_early(u64 start, u64 end, char *name, - int overlap_ok) -{ - int i; - struct early_res *r; - - i = find_overlapped_early(start, end); - if (i >= max_early_res) - panic("Too many early reservations"); - r = &early_res[i]; - if (r->end) - panic("Overlapping early reservations " - "%llx-%llx %s to %llx-%llx %s\n", - start, end - 1, name ? name : "", r->start, - r->end - 1, r->name); - r->start = start; - r->end = end; - r->overlap_ok = overlap_ok; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; -} - -/* - * A few early reservtations come here. - * - * The 'overlap_ok' in the name of this routine does -not- mean it - * is ok for these reservations to overlap an earlier reservation. - * Rather it means that it is ok for subsequent reservations to - * overlap this one. - * - * Use this entry point to reserve early ranges when you are doing - * so out of "Paranoia", reserving perhaps more memory than you need, - * just in case, and don't mind a subsequent overlapping reservation - * that is known to be needed. - * - * The drop_overlaps_that_are_ok() call here isn't really needed. - * It would be needed if we had two colliding 'overlap_ok' - * reservations, so that the second such would not panic on the - * overlap with the first. We don't have any such as of this - * writing, but might as well tolerate such if it happens in - * the future. - */ -void __init reserve_early_overlap_ok(u64 start, u64 end, char *name) -{ - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 1); -} static void __init __check_and_double_early_res(u64 ex_start, u64 ex_end) { u64 start, end, size, mem; - struct early_res *new; + struct lmb_property *new, *old; + struct lmb_region *type = &lmb.reserved; /* do we have enough slots left ? */ - if ((max_early_res - early_res_count) > max(max_early_res/8, 2)) + if ((type->region_array_size - type->cnt) > + max_t(unsigned long, type->region_array_size/8, 2)) return; + old = type->region; /* double it */ mem = -1ULL; - size = sizeof(struct early_res) * max_early_res * 2; - if (early_res == early_res_x) + size = sizeof(struct lmb_property) * type->region_array_size * 2; + if (old == lmb_reserved_region) start = 0; else - start = early_res[0].end; + start = __pa(old) + sizeof(struct lmb_property) * + type->region_array_size; end = ex_start; if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); + mem = find_lmb_area(start, end, size, + sizeof(struct lmb_property)); if (mem == -1ULL) { start = ex_end; end = get_max_mapped(); if (start + size < end) - mem = find_fw_memmap_area(start, end, size, - sizeof(struct early_res)); + mem = find_lmb_area(start, end, size, + sizeof(struct lmb_property)); } if (mem == -1ULL) - panic("can not find more space for early_res array"); + panic("can not find more space for lmb.reserved.region array"); new = __va(mem); - /* save the first one for own */ - new[0].start = mem; - new[0].end = mem + size; - new[0].overlap_ok = 0; /* copy old to new */ - if (early_res == early_res_x) { - memcpy(&new[1], &early_res[0], - sizeof(struct early_res) * max_early_res); - memset(&new[max_early_res+1], 0, - sizeof(struct early_res) * (max_early_res - 1)); - early_res_count++; - } else { - memcpy(&new[1], &early_res[1], - sizeof(struct early_res) * (max_early_res - 1)); - memset(&new[max_early_res], 0, - sizeof(struct early_res) * max_early_res); - } - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = new; - max_early_res *= 2; - printk(KERN_DEBUG "early_res array is doubled to %d at [%llx - %llx]\n", - max_early_res, mem, mem + size - 1); + memcpy(&new[0], &old[0], + sizeof(struct lmb_property) * type->region_array_size); + memset(&new[type->region_array_size], 0, + sizeof(struct lmb_property) * type->region_array_size); + + memset(type->region, 0, + sizeof(struct lmb_property) * type->region_array_size); + type->region = new; + type->region_array_size *= 2; + printk(KERN_DEBUG "lmb.reserved.region array is doubled to %ld at [%llx - %llx]\n", + type->region_array_size, mem, mem + size - 1); + if (old != lmb_reserved_region) + lmb_free(__pa(old), + sizeof(struct lmb_property) * type->region_array_size/2); } -/* - * Most early reservations come here. - * - * We first have drop_overlaps_that_are_ok() drop any pre-existing - * 'overlap_ok' ranges, so that we can then reserve this memory - * range without risk of panic'ing on an overlapping overlap_ok - * early reservation. - */ void __init reserve_early(u64 start, u64 end, char *name) { if (start >= end) @@ -291,68 +72,18 @@ void __init reserve_early(u64 start, u64 __check_and_double_early_res(start, end); - drop_overlaps_that_are_ok(start, end); - __reserve_early(start, end, name, 0); -} - -void __init reserve_early_without_check(u64 start, u64 end, char *name) -{ - struct early_res *r; - - if (start >= end) - return; - - __check_and_double_early_res(start, end); - - r = &early_res[early_res_count]; - - r->start = start; - r->end = end; - r->overlap_ok = 0; - if (name) - strncpy(r->name, name, sizeof(r->name) - 1); - early_res_count++; + lmb_reserve(start, end - start); } void __init free_early(u64 start, u64 end) { - struct early_res *r; - int i; - - i = find_overlapped_early(start, end); - r = &early_res[i]; - if (i >= max_early_res || r->end != end || r->start != start) - panic("free_early on not reserved area: %llx-%llx!", - start, end - 1); - - drop_range(i); -} - -void __init free_early_partial(u64 start, u64 end) -{ - struct early_res *r; - int i; - if (start == end) return; - if (WARN_ONCE(start > end, "free_early_partial: wrong range [%#llx, %#llx]\n", start, end)) + if (WARN_ONCE(start > end, "free_early: wrong range [%#llx, %#llx]\n", start, end)) return; -try_next: - i = find_overlapped_early(start, end); - if (i >= max_early_res) - return; - - r = &early_res[i]; - /* hole ? */ - if (r->end >= end && r->start <= start) { - drop_range_partial(i, start, end); - return; - } - - drop_range_partial(i, start, end); - goto try_next; + lmb_free(start, end - start); } #ifdef CONFIG_NO_BOOTMEM @@ -360,48 +91,45 @@ static void __init subtract_early_res(st { int i, count; u64 final_start, final_end; - int idx = 0; - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; + count = lmb.reserved.cnt; + + if (lmb.reserved.region != lmb_reserved_region) { + /*take out table it self */ + lmb_free(__pa(lmb.reserved.region), + sizeof(struct lmb_property) * + lmb.reserved.region_array_size); + } #define DEBUG_PRINT_EARLY_RES 1 #if DEBUG_PRINT_EARLY_RES printk(KERN_INFO "Subtract (%d early reservations)\n", count); #endif - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; + for (i = 0; i < count; i++) { + struct lmb_property *r = &lmb.reserved.region[i]; #if DEBUG_PRINT_EARLY_RES - printk(KERN_INFO " #%d [%010llx - %010llx] %15s\n", i, - r->start, r->end, r->name); + printk(KERN_INFO " #%d [%010llx - %010llx]\n", i, + r->base, r->base + r->size); #endif - final_start = PFN_DOWN(r->start); - final_end = PFN_UP(r->end); + final_start = PFN_DOWN(r->base); + final_end = PFN_UP(r->base + r->size); if (final_start >= final_end) continue; subtract_range(range, az, final_start, final_end); } - } int __init get_free_all_memory_range(struct range **rangep, int nodeid) { - int i, count; + int count; u64 start = 0, end; u64 size; u64 mem; struct range *range; int nr_range; - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; + count = lmb.reserved.region_array_size; count *= 2; @@ -411,12 +139,15 @@ int __init get_free_all_memory_range(str if (end > (MAX_DMA32_PFN << PAGE_SHIFT)) start = MAX_DMA32_PFN << PAGE_SHIFT; #endif - mem = find_fw_memmap_area(start, end, size, sizeof(struct range)); + mem = find_lmb_area(start, end, size, sizeof(struct range)); if (mem == -1ULL) panic("can not find more space for range free"); range = __va(mem); - /* use early_node_map[] and early_res to get range array at first */ + /* + * use early_node_map[] and lmb.reserved.region to get range array + * at first + */ memset(range, 0, size); nr_range = 0; @@ -430,10 +161,11 @@ int __init get_free_all_memory_range(str /* need to clear it ? */ if (nodeid == MAX_NUMNODES) { - memset(&early_res[0], 0, - sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; + memset(&lmb.reserved.region[0], 0, + sizeof(struct lmb_property) * lmb.reserved.region_array_size); + lmb.reserved.region = NULL; + lmb.reserved.region_array_size = 0; + lmb.reserved.cnt = 0; } *rangep = range; @@ -444,24 +176,24 @@ void __init early_res_to_bootmem(u64 sta { int i, count; u64 final_start, final_end; - int idx = 0; - count = 0; - for (i = 0; i < max_early_res && early_res[i].end; i++) - count++; - - /* need to skip first one ?*/ - if (early_res != early_res_x) - idx = 1; + count = lmb.reserved.cnt; + + if (lmb.reserved.region != lmb_reserved_region) { + /*take out table it self */ + lmb_free(__pa(lmb.reserved.region), + sizeof(struct lmb_property) * + lmb.reserved.region_array_size); + } printk(KERN_INFO "(%d/%d early reservations) ==> bootmem [%010llx - %010llx]\n", - count - idx, max_early_res, start, end); - for (i = idx; i < count; i++) { - struct early_res *r = &early_res[i]; + count, lmb.reserved.cnt, start, end); + for (i = 0; i < count; i++) { + struct lmb_property *r = &lmb.reserved.region[i]; printk(KERN_INFO " #%d [%010llx - %010llx] %16s", i, - r->start, r->end, r->name); - final_start = max(start, r->start); - final_end = min(end, r->end); + r->base, r->base + r->size); + final_start = max(start, r->base); + final_end = min(end, r->base + r->size); if (final_start >= final_end) { printk(KERN_CONT "\n"); continue; @@ -472,25 +204,43 @@ void __init early_res_to_bootmem(u64 sta BOOTMEM_DEFAULT); } /* clear them */ - memset(&early_res[0], 0, sizeof(struct early_res) * max_early_res); - early_res = NULL; - max_early_res = 0; - early_res_count = 0; + memset(&lmb.reserved.region[0], 0, + sizeof(struct lmb_property) * lmb.reserved.region_array_size); + lmb.reserved.region = NULL; + lmb.reserved.region_array_size = 0; + lmb.reserved.cnt = 0; } #endif + +/* following code is for early_res converting */ + +static int __init find_overlapped_early(u64 start, u64 end) +{ + int i; + struct lmb_property *r; + + for (i = 0; i < lmb.reserved.cnt && lmb.reserved.region[i].size; i++) { + r = &lmb.reserved.region[i]; + if (end > r->base && start < (r->base + r->size)) + break; + } + + return i; +} + /* Check for already reserved areas */ static inline int __init bad_addr(u64 *addrp, u64 size, u64 align) { int i; u64 addr = *addrp; int changed = 0; - struct early_res *r; + struct lmb_property *r; again: i = find_overlapped_early(addr, addr + size); - r = &early_res[i]; - if (i < max_early_res && r->end) { - *addrp = addr = round_up(r->end, align); + r = &lmb.reserved.region[i]; + if (i < lmb.reserved.cnt && r->size) { + *addrp = addr = round_up(r->base + r->size, align); changed = 1; goto again; } @@ -506,20 +256,20 @@ static inline int __init bad_addr_size(u int changed = 0; again: last = addr + size; - for (i = 0; i < max_early_res && early_res[i].end; i++) { - struct early_res *r = &early_res[i]; - if (last > r->start && addr < r->start) { - size = r->start - addr; + for (i = 0; i < lmb.reserved.cnt && lmb.reserved.region[i].size; i++) { + struct lmb_property *r = &lmb.reserved.region[i]; + if (last > r->base && addr < r->base) { + size = r->base - addr; changed = 1; goto again; } - if (last > r->end && addr < r->end) { - addr = round_up(r->end, align); + if (last > (r->base + r->size) && addr < (r->base + r->size)) { + addr = round_up(r->base + r->size, align); size = last - addr; changed = 1; goto again; } - if (last <= r->end && addr >= r->start) { + if (last <= (r->base + r->size) && addr >= r->base) { (*sizep)++; return 0; } @@ -531,13 +281,8 @@ again: return changed; } -/* - * Find a free area with specified alignment in a specific range. - * only with the area.between start to end is active range from early_node_map - * so they are good as RAM - */ u64 __init find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, - u64 size, u64 align) + u64 size, u64 align) { u64 addr, last; @@ -582,3 +327,130 @@ u64 __init find_early_area_size(u64 ei_s out: return -1ULL; } + +/* + * Find a free area with specified alignment in a specific range. + */ +u64 __init find_lmb_area(u64 start, u64 end, u64 size, u64 align) +{ + int i; + + for (i = 0; i < lmb.memory.cnt; i++) { + u64 ei_start = lmb.memory.region[i].base; + u64 ei_last = ei_start + lmb.memory.region[i].size; + u64 addr; + + addr = find_early_area(ei_start, ei_last, start, end, + size, align); + + if (addr != -1ULL) + return addr; + } + return -1ULL; +} + +/* + * Find next free range after *start + */ +u64 __init find_lmb_area_size(u64 start, u64 *sizep, u64 align) +{ + int i; + + for (i = 0; i < lmb.memory.cnt; i++) { + u64 ei_start = lmb.memory.region[i].base; + u64 ei_last = ei_start + lmb.memory.region[i].size; + u64 addr; + + addr = find_early_area_size(ei_start, ei_last, start, + sizep, align); + + if (addr != -1ULL) + return addr; + } + + return -1ULL; +} + +u64 __init find_lmb_area_node(int nid, u64 start, u64 end, u64 size, u64 align) +{ + u64 addr; + /* + * need to call this function after e820_register_active_regions + * so early_node_map[] is set + */ + addr = find_memory_core_early(nid, size, align, start, end); + if (addr != -1ULL) + return addr; + + /* fallback, should already have start end in the node range */ + return find_lmb_area(start, end, size, align); +} + +/* + * Finds an active region in the address range from start_pfn to last_pfn and + * returns its range in ei_startpfn and ei_endpfn for the lmb entry. + */ +static int __init lmb_find_active_region(const struct lmb_property *ei, + unsigned long start_pfn, + unsigned long last_pfn, + unsigned long *ei_startpfn, + unsigned long *ei_endpfn) +{ + u64 align = PAGE_SIZE; + + *ei_startpfn = round_up(ei->base, align) >> PAGE_SHIFT; + *ei_endpfn = round_down(ei->base + ei->size, align) >> PAGE_SHIFT; + + /* Skip map entries smaller than a page */ + if (*ei_startpfn >= *ei_endpfn) + return 0; + + /* Skip if map is outside the node */ + if (*ei_endpfn <= start_pfn || *ei_startpfn >= last_pfn) + return 0; + + /* Check for overlaps */ + if (*ei_startpfn < start_pfn) + *ei_startpfn = start_pfn; + if (*ei_endpfn > last_pfn) + *ei_endpfn = last_pfn; + + return 1; +} + +/* Walk the lmb.memory map and register active regions within a node */ +void __init lmb_register_active_regions(int nid, unsigned long start_pfn, + unsigned long last_pfn) +{ + unsigned long ei_startpfn; + unsigned long ei_endpfn; + int i; + + for (i = 0; i < lmb.memory.cnt; i++) + if (lmb_find_active_region(&lmb.memory.region[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + add_active_range(nid, ei_startpfn, ei_endpfn); +} + +/* + * Find the hole size (in bytes) in the memory range. + * @start: starting address of the memory range to scan + * @end: ending address of the memory range to scan + */ +u64 __init lmb_hole_size(u64 start, u64 end) +{ + unsigned long start_pfn = start >> PAGE_SHIFT; + unsigned long last_pfn = end >> PAGE_SHIFT; + unsigned long ei_startpfn, ei_endpfn, ram = 0; + int i; + + for (i = 0; i < lmb.memory.cnt; i++) { + if (lmb_find_active_region(&lmb.memory.region[i], + start_pfn, last_pfn, + &ei_startpfn, &ei_endpfn)) + ram += ei_endpfn - ei_startpfn; + } + return end - start - ((u64)ram << PAGE_SHIFT); +} + Index: linux-2.6/lib/lmb.c =================================================================== --- linux-2.6.orig/lib/lmb.c +++ linux-2.6/lib/lmb.c @@ -18,6 +18,8 @@ #define LMB_ALLOC_ANYWHERE 0 struct lmb lmb; +struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1]; +struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1]; static int lmb_debug; @@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct void __init lmb_init(void) { + lmb.memory.region = lmb_memory_region; + lmb.memory.region_array_size = ARRAY_SIZE(lmb_memory_region); + lmb.reserved.region = lmb_reserved_region; + lmb.reserved.region_array_size = ARRAY_SIZE(lmb_reserved_region); + /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ @@ -539,3 +546,5 @@ int lmb_find(struct lmb_property *res) } return -1; } + + Index: linux-2.6/mm/page_alloc.c =================================================================== --- linux-2.6.orig/mm/page_alloc.c +++ linux-2.6/mm/page_alloc.c @@ -3457,7 +3457,7 @@ void * __init __alloc_memory_core_early( ptr = phys_to_virt(addr); memset(ptr, 0, size); - reserve_early_without_check(addr, addr + size, "BOOTMEM"); + reserve_early(addr, addr + size, "BOOTMEM"); return ptr; } #endif Index: linux-2.6/mm/sparse-vmemmap.c =================================================================== --- linux-2.6.orig/mm/sparse-vmemmap.c +++ linux-2.6/mm/sparse-vmemmap.c @@ -229,8 +229,8 @@ void __init sparse_mem_maps_populate_nod char name[15]; snprintf(name, sizeof(name), "MEMMAP %d", nodeid); - reserve_early_without_check(__pa(vmemmap_buf_start), - __pa(vmemmap_buf), name); + reserve_early(__pa(vmemmap_buf_start), + __pa(vmemmap_buf), name); } #else free_bootmem(__pa(vmemmap_buf), vmemmap_buf_end - vmemmap_buf); Index: linux-2.6/arch/x86/include/asm/lmb.h =================================================================== --- /dev/null +++ linux-2.6/arch/x86/include/asm/lmb.h @@ -0,0 +1,8 @@ +#ifndef _X86_LMB_H +#define _X86_LMB_H + +#define LMB_DBG(fmt...) printk(fmt) + +#define LMB_REAL_LIMIT 0 + +#endif -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Ingo Molnar on 23 Mar 2010 04:10 * Yinghai Lu <yinghai(a)kernel.org> wrote: > please check > > [PATCH 01/20] x86: add find_e820_area_node > > > [RFC PATCH] x86: use lmb to replace early_res > > still keep kernel/early_res.c for the extension. > > should move those file to lib/lmb.c later? > > Signed-off-by: Yinghai Lu <yinghai(a)kernel.org> > > --- > arch/x86/Kconfig | 1 > arch/x86/include/asm/e820.h | 38 +- > arch/x86/include/asm/lmb.h | 8 > arch/x86/kernel/e820.c | 163 +---------- > arch/x86/kernel/head.c | 2 > arch/x86/kernel/head32.c | 4 > arch/x86/kernel/head64.c | 2 > arch/x86/kernel/setup.c | 2 > arch/x86/kernel/setup_percpu.c | 6 > include/linux/early_res.h | 9 > include/linux/lmb.h | 5 > kernel/early_res.c | 594 ++++++++++++++++------------------------- > lib/lmb.c | 9 > mm/page_alloc.c | 2 > mm/sparse-vmemmap.c | 4 > 15 files changed, 321 insertions(+), 528 deletions(-) That looks like a very promising direction! There's several things to do to make the approach fully clean: 1) I think we want to shape this as a series of simpler (and bisectable) patches. 2) I think we also need to concentrate the changes back into LMB: > Index: linux-2.6/include/linux/early_res.h > =================================================================== > --- linux-2.6.orig/include/linux/early_res.h > +++ linux-2.6/include/linux/early_res.h > @@ -5,15 +5,18 @@ > extern void reserve_early(u64 start, u64 end, char *name); > extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); > extern void free_early(u64 start, u64 end); > -void free_early_partial(u64 start, u64 end); > extern void early_res_to_bootmem(u64 start, u64 end); > > -void reserve_early_without_check(u64 start, u64 end, char *name); > u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, > u64 size, u64 align); > u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, > u64 *sizep, u64 align); > -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); > +u64 find_lmb_area(u64 start, u64 end, u64 size, u64 align); > +u64 find_lmb_area_size(u64 start, u64 *sizep, u64 align); > +u64 find_lmb_area_node(int nid, u64 start, u64 end, u64 size, u64 align); > +void lmb_register_active_regions(int nid, unsigned long start_pfn, > + unsigned long last_pfn); > +u64 lmb_hole_size(u64 start, u64 end); > u64 get_max_mapped(void); > #include <linux/range.h> > int get_free_all_memory_range(struct range **rangep, int nodeid); those new lmb_*() APIs should go into lmb.h. 3) Furthermore, i think all of early_res.c should move into lmb.c as well and we should eliminate kernel/early_res.c. early_res.h will go away as well and all the new APIs will be in lmb.h. 4) Also, we should move lib/lmb.c to mm/lmb.c, as now it's not just some optional library but _the_ main early-reserve memory subsystem used by the biggest Linux architectures. 5) Could we perhaps also try to eliminate e820_*() method uses in arch/x86/, and replace them by lmb_*() API uses? (that too should be a step by step method, for bisectability) > +++ linux-2.6/include/linux/lmb.h > @@ -26,7 +26,8 @@ struct lmb_property { > struct lmb_region { > unsigned long cnt; > u64 size; > - struct lmb_property region[MAX_LMB_REGIONS+1]; > + struct lmb_property *region; > + unsigned long region_array_size; > }; I suspect this should keep current LMB architectures still working, right? Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Yinghai Lu on 23 Mar 2010 05:10 On 03/23/2010 01:02 AM, Ingo Molnar wrote: > > * Yinghai Lu <yinghai(a)kernel.org> wrote: > >> please check >> >> [PATCH 01/20] x86: add find_e820_area_node >> >> >> [RFC PATCH] x86: use lmb to replace early_res >> >> still keep kernel/early_res.c for the extension. >> >> should move those file to lib/lmb.c later? >> >> Signed-off-by: Yinghai Lu <yinghai(a)kernel.org> >> >> --- >> arch/x86/Kconfig | 1 >> arch/x86/include/asm/e820.h | 38 +- >> arch/x86/include/asm/lmb.h | 8 >> arch/x86/kernel/e820.c | 163 +---------- >> arch/x86/kernel/head.c | 2 >> arch/x86/kernel/head32.c | 4 >> arch/x86/kernel/head64.c | 2 >> arch/x86/kernel/setup.c | 2 >> arch/x86/kernel/setup_percpu.c | 6 >> include/linux/early_res.h | 9 >> include/linux/lmb.h | 5 >> kernel/early_res.c | 594 ++++++++++++++++------------------------- >> lib/lmb.c | 9 >> mm/page_alloc.c | 2 >> mm/sparse-vmemmap.c | 4 >> 15 files changed, 321 insertions(+), 528 deletions(-) > > That looks like a very promising direction! > > There's several things to do to make the approach fully clean: > > 1) > > I think we want to shape this as a series of simpler (and bisectable) patches. will check it. at least change include/linux/lmb.h and lib/lmb.c change could be sperated. other looks a little bit hard. > > 2) > > I think we also need to concentrate the changes back into LMB: yes. put them in kernel/early_res.c and move them to lmb.c if lmb gugs are happy with the change. > >> Index: linux-2.6/include/linux/early_res.h >> =================================================================== >> --- linux-2.6.orig/include/linux/early_res.h >> +++ linux-2.6/include/linux/early_res.h >> @@ -5,15 +5,18 @@ >> extern void reserve_early(u64 start, u64 end, char *name); >> extern void reserve_early_overlap_ok(u64 start, u64 end, char *name); >> extern void free_early(u64 start, u64 end); >> -void free_early_partial(u64 start, u64 end); >> extern void early_res_to_bootmem(u64 start, u64 end); >> >> -void reserve_early_without_check(u64 start, u64 end, char *name); >> u64 find_early_area(u64 ei_start, u64 ei_last, u64 start, u64 end, >> u64 size, u64 align); >> u64 find_early_area_size(u64 ei_start, u64 ei_last, u64 start, >> u64 *sizep, u64 align); >> -u64 find_fw_memmap_area(u64 start, u64 end, u64 size, u64 align); >> +u64 find_lmb_area(u64 start, u64 end, u64 size, u64 align); >> +u64 find_lmb_area_size(u64 start, u64 *sizep, u64 align); >> +u64 find_lmb_area_node(int nid, u64 start, u64 end, u64 size, u64 align); >> +void lmb_register_active_regions(int nid, unsigned long start_pfn, >> + unsigned long last_pfn); >> +u64 lmb_hole_size(u64 start, u64 end); >> u64 get_max_mapped(void); >> #include <linux/range.h> >> int get_free_all_memory_range(struct range **rangep, int nodeid); > > those new lmb_*() APIs should go into lmb.h. next version > > 3) > > Furthermore, i think all of early_res.c should move into lmb.c as well and we > should eliminate kernel/early_res.c. > > early_res.h will go away as well and all the new APIs will be in lmb.h. current have three levels a. old lmb users b. x86 with bootmem c. x86 with no-bootmem some functions later could be moved to new bootmem.c > > 4) > > Also, we should move lib/lmb.c to mm/lmb.c, as now it's not just some optional > library but _the_ main early-reserve memory subsystem used by the biggest > Linux architectures. yes > > 5) > > Could we perhaps also try to eliminate e820_*() method uses in arch/x86/, and > replace them by lmb_*() API uses? (that too should be a step by step method, > for bisectability) yes. except e820_any_mapped(,,E820_RESERVED) others should not be used after fill_lmb_memory() > >> +++ linux-2.6/include/linux/lmb.h >> @@ -26,7 +26,8 @@ struct lmb_property { >> struct lmb_region { >> unsigned long cnt; >> u64 size; >> - struct lmb_property region[MAX_LMB_REGIONS+1]; >> + struct lmb_property *region; >> + unsigned long region_array_size; >> }; > > I suspect this should keep current LMB architectures still working, right? they are still working. lmb_init will connect the pointers. Index: linux-2.6/lib/lmb.c =================================================================== --- linux-2.6.orig/lib/lmb.c +++ linux-2.6/lib/lmb.c @@ -18,6 +18,8 @@ #define LMB_ALLOC_ANYWHERE 0 struct lmb lmb; +struct lmb_property lmb_memory_region[MAX_LMB_REGIONS + 1]; +struct lmb_property lmb_reserved_region[MAX_LMB_REGIONS + 1]; static int lmb_debug; @@ -106,6 +108,11 @@ static void lmb_coalesce_regions(struct void __init lmb_init(void) { + lmb.memory.region = lmb_memory_region; + lmb.memory.region_array_size = ARRAY_SIZE(lmb_memory_region); + lmb.reserved.region = lmb_reserved_region; + lmb.reserved.region_array_size = ARRAY_SIZE(lmb_reserved_region); + /* Create a dummy zero size LMB which will get coalesced away later. * This simplifies the lmb_add() code below... */ @@ -169,7 +176,7 @@ static long lmb_add_region(struct lmb_re if (coalesced) return coalesced; - if (rgn->cnt >= MAX_LMB_REGIONS) + if (rgn->cnt >= (rgn->region_array_size - 1)) return -1; Thanks Yinghai -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Ingo Molnar on 23 Mar 2010 05:50 * Yinghai Lu <yinghai(a)kernel.org> wrote: > > 2) > > > > I think we also need to concentrate the changes back into LMB: > > yes. put them in kernel/early_res.c and move them to lmb.c if lmb gugs are > happy with the change. Yes, they seemed OK with changing it to accomodate x86, as long as current behavior stays compatible and as long as the changes are squeaky-clean. Both of which are highly reasonable expectations ;-) > > early_res.h will go away as well and all the new APIs will be in lmb.h. > > current have three levels > a. old lmb users > b. x86 with bootmem > c. x86 with no-bootmem > > some functions later could be moved to new bootmem.c I think we want to work towards the end result where we dont have bootmem.c anymore. I.e. a modern LMB architecture should generally not make use of bootmem at all. We could do that switch on x86 straight away, and make CONFIG_NO_BOOTMEM a default-y option, hm? We could also hide the interactivity behind CONFIG_DEBUG_VM or so - and eliminate it altogether later on. We should also switch around the flag and turn it into CONFIG_BOOTMEM. Hm? Ingo -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Yinghai Lu on 24 Mar 2010 02:00 On 03/23/2010 09:44 PM, Benjamin Herrenschmidt wrote: > >> I though one possibility would be to have LMB regions become more lists >> than arrays, so that the static storage only needs to cover as much as >> is needed during really early boot (and we could probably still move the >> BSS top point on some archs to dynamically make more ... actually we >> could be smart arses and use LMB to allocate more LMB list heads if we >> are reaching the table limit :-) > > Actually what about that: > > LMB entries are linked-listed. The array is just storage for those entry > "heads". > > The initial static array only needs to be big enough for very very early > platform specific kernel bits and pieces, so it could even be sized by a > Kconfig option. Or it could just use a klimit moving trick to pick up a > page right after the BSS but that may need to be arch specific. > > lmb_init() queues all the entries from the initial array in a freelist > > lmb_alloc() and lmb_reserve() just pop entries from that freelist to > populate the two main linked lists (memory and reserved). > > When something tries to dequeue up the last freelist entry, then under > the hood, LMB uses it instead to allocate a new block of LMB entries > that gets added to the freelist. > > We never free blocks of LMB entries. > > That way, we can fine tine the static array to be as small as we can > realistically make it be, and we have no boundary limitations on the > amount of entries in either the memory list or the reserved list. > > I'm a bit too flat out right now to write code, but if there's no > objection, I might give that a go either later this week or next week, > see if I can replace bootmem on powerpc. > if the array can be doubled and have old one copied to new one. then we don't change lmb.c too much. new early_res.c exend lmb. and another half already works with x86 to replace bootmem. will check if i can produce one patch to make powerpc to reuse early_res/nobootmem Thanks Yinghai -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
First
|
Prev
|
Next
|
Last
Pages: 1 2 3 4 Prev: HID:HID-NTRIG update ntrig_event function Next: power_meter: acpi_device_class "power_meter_resource" too long |