Prev: powerpc: Use IRQF_NO_SUSPEND not IRQF_TIMER for non-timer interrupts
Next: [PATCH 3/9] staging: otus: check kmalloc() return value
From: KAMEZAWA Hiroyuki on 1 Aug 2010 20:30 On Sat, 31 Jul 2010 14:41:26 +1000 Bojan Smojver <bojan(a)rexursive.com> wrote: > On Sat, 2010-07-31 at 11:33 +1000, Bojan Smojver wrote: > > I can go back to that easily. > > So, here is that whole enchilada one more time (it includes sync_read > removal patch as well). > > I did 3 hibernate/thaw cycles with it. Images varied from about 850 MB, > 1.1 GB to 750 MB. I was getting 156/141 MB/s, 121/118 MBs and 141/130 > MBs speeds. Obviously, these things depend on compression ratios > achieved etc. > > I guess the number of pages (i.e. LZO_UNC_PAGES) could be made > configurable as well. > > PS. Inline, as requested. > I'm sorry if I miss something. > + wrk = vmalloc(LZO1X_1_MEM_COMPRESS); > + if (!wrk) { > + printk(KERN_ERR "PM: Failed to allocate LZO workspace\n"); > + free_page((unsigned long)page); > + return -ENOMEM; > + } > + > + unc = vmalloc(LZO_UNC_SIZE); > + if (!unc) { > + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); > + vfree(wrk); > + free_page((unsigned long)page); > + return -ENOMEM; > + } Now, vmallc() is used here. Then, following will happen. 1. vmalloc() -> vmalloc adds vmap objects and set page table entries. 2. saving image -> At taking snapshot of memory to the disk, above vmalloc() area is saved to disk as it is. .... 3. At restore Because you dont't remember which vmalloc() area was used for creating snapshot, you can't free it at swsusp_free(). memory leak ? Thanks, -Kame > + > + cmp = vmalloc(LZO_CMP_SIZE); > + if (!cmp) { > + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); > + vfree(unc); > + vfree(wrk); > + free_page((unsigned long)page); > + return -ENOMEM; > + } > > printk(KERN_INFO "PM: Saving image data pages (%u pages) ... ", > nr_to_write); > @@ -382,16 +422,48 @@ static int save_image(struct swap_map_handle *handle, > bio = NULL; > do_gettimeofday(&start); > while (1) { > - ret = snapshot_read_next(snapshot); > - if (ret <= 0) > + for (ul = 0; ul < LZO_UNC_SIZE; ul += PAGE_SIZE) { > + ret = snapshot_read_next(snapshot); > + if (ret < 0) > + goto out_finish; > + > + if (ret == 0) > + break; > + > + memcpy(unc + ul, data_of(*snapshot), PAGE_SIZE); > + > + if (!(nr_pages % m)) > + printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); > + nr_pages++; > + } > + > + if (ul == 0) > + break; > + > + ret = lzo1x_1_compress(unc, ul, cmp + LZO_HEADER, &cl, wrk); > + if (ret < 0) { > + printk(KERN_ERR "PM: LZO compression failed\n"); > break; > - ret = swap_write_page(handle, data_of(*snapshot), &bio); > - if (ret) > + } > + > + if (unlikely(cl == 0 || LZO_HEADER + cl > LZO_CMP_SIZE)) { > + printk(KERN_ERR "PM: Invalid LZO length\n"); > + ret = -1; > break; > - if (!(nr_pages % m)) > - printk(KERN_CONT "\b\b\b\b%3d%%", nr_pages / m); > - nr_pages++; > + } > + > + *(size_t *)cmp = cl; > + > + for (ul = 0; ul < LZO_HEADER + cl; ul += PAGE_SIZE) { > + memcpy(page, cmp + ul, PAGE_SIZE); > + > + ret = swap_write_page(handle, page, &bio); > + if (ret) > + goto out_finish; > + } > } > + > +out_finish: > err2 = hib_wait_on_bio_chain(&bio); > do_gettimeofday(&stop); > if (!ret) > @@ -401,6 +473,12 @@ static int save_image(struct swap_map_handle *handle, > else > printk(KERN_CONT "\n"); > swsusp_show_speed(&start, &stop, nr_to_write, "Wrote"); > + > + vfree(cmp); > + vfree(unc); > + vfree(wrk); > + free_page((unsigned long)page); > + > return ret; > } > > @@ -416,7 +494,8 @@ static int enough_swap(unsigned int nr_pages) > unsigned int free_swap = count_swap_pages(root_swap, 1); > > pr_debug("PM: Free swap pages: %u\n", free_swap); > - return free_swap > nr_pages + PAGES_FOR_IO; > + return free_swap > > + (nr_pages * LZO_CMP_PAGES) / LZO_UNC_PAGES + PAGES_FOR_IO; > } > > /** > @@ -547,9 +626,30 @@ static int load_image(struct swap_map_handle *handle, > int error = 0; > struct timeval start; > struct timeval stop; > - struct bio *bio; > - int err2; > unsigned nr_pages; > + size_t ul, cl; > + unsigned char *unc, *cmp, *page; > + > + page = (void *)__get_free_page(__GFP_WAIT | __GFP_HIGH); > + if (!page) { > + printk(KERN_ERR "PM: Failed to allocate LZO page\n"); > + return -ENOMEM; > + } > + > + unc = vmalloc(LZO_UNC_SIZE); > + if (!unc) { > + printk(KERN_ERR "PM: Failed to allocate LZO uncompressed\n"); > + free_page((unsigned long)page); > + return -ENOMEM; > + } > + > + cmp = vmalloc(LZO_CMP_SIZE); > + if (!cmp) { > + printk(KERN_ERR "PM: Failed to allocate LZO compressed\n"); > + vfree(unc); > + free_page((unsigned long)page); > + return -ENOMEM; > + } > > printk(KERN_INFO "PM: Loading image data pages (%u pages) ... ", > nr_to_read); > @@ -557,27 +657,60 @@ static int load_image(struct swap_map_handle *handle, > if (!m) > m = 1; > nr_pages = 0; > - bio = NULL; > do_gettimeofday(&start); > + > + error = snapshot_write_next(snapshot); > + if (error <= 0) > + goto out_finish; > + > for ( ; ; ) { > - error = snapshot_write_next(snapshot); > - if (error <= 0) > - break; > - error = swap_read_page(handle, data_of(*snapshot), &bio); > + error = swap_read_page(handle, page, NULL); /* sync */ > if (error) > break; > - if (snapshot->sync_read) > - error = hib_wait_on_bio_chain(&bio); > - if (error) > + memcpy(cmp, page, PAGE_SIZE); > + > + cl = *(size_t *)cmp; > + if (unlikely(cl == 0 || LZO_HEADER + cl > LZO_CMP_SIZE)) { > + printk(KERN_ERR "PM: Invalid LZO length\n"); > + error = -1; > + break; > + } > + > + for (ul = PAGE_SIZE; ul < LZO_HEADER + cl; ul += PAGE_SIZE) { > + error = swap_read_page(handle, page, NULL); /* sync */ > + if (error) > + goto out_finish; > + memcpy(cmp + ul, page, PAGE_SIZE); > + } > + > + ul = LZO_UNC_SIZE; > + error = lzo1x_decompress_safe(cmp + LZO_HEADER, cl, unc, &ul); > + if (error < 0) { > + printk(KERN_ERR "PM: LZO decompression failed\n"); > break; > - if (!(nr_pages % m)) > - printk("\b\b\b\b%3d%%", nr_pages / m); > - nr_pages++; > + } > + > + if (unlikely(ul == 0 || ul > LZO_UNC_SIZE)) { > + printk(KERN_ERR "PM: Invalid LZO length\n"); > + error = -1; > + break; > + } > + > + for (cl = 0; cl < ul; cl += PAGE_SIZE) { > + memcpy(data_of(*snapshot), unc + cl, PAGE_SIZE); > + > + if (!(nr_pages % m)) > + printk("\b\b\b\b%3d%%", nr_pages / m); > + nr_pages++; > + > + error = snapshot_write_next(snapshot); > + if (error <= 0) > + goto out_finish; > + } > } > - err2 = hib_wait_on_bio_chain(&bio); > + > +out_finish: > do_gettimeofday(&stop); > - if (!error) > - error = err2; > if (!error) { > printk("\b\b\b\bdone\n"); > snapshot_write_finalize(snapshot); > @@ -586,6 +719,11 @@ static int load_image(struct swap_map_handle *handle, > } else > printk("\n"); > swsusp_show_speed(&start, &stop, nr_to_read, "Read"); > + > + vfree(cmp); > + vfree(unc); > + free_page((unsigned long)page); > + > return error; > } > > > -- > Bojan > > -- > To unsubscribe from this list: send the line "unsubscribe linux-kernel" in > the body of a message to majordomo(a)vger.kernel.org > More majordomo info at http://vger.kernel.org/majordomo-info.html > Please read the FAQ at http://www.tux.org/lkml/ > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Bojan Smojver on 1 Aug 2010 21:00 On Mon, 2010-08-02 at 09:17 +0900, KAMEZAWA Hiroyuki wrote: > Now, vmallc() is used here. Then, following will happen. > > 1. vmalloc() > -> vmalloc adds vmap objects and set page table entries. > > 2. saving image > -> At taking snapshot of memory to the disk, above vmalloc() area > is > saved to disk as it is. > ... > 3. At restore > Because you dont't remember which vmalloc() area was used for > creating > snapshot, you can't free it at swsusp_free(). > > memory leak ? To be honest, I'm not sure. However, I thought that by the time save_image() is called, snapshot has already been taken, no? ------------------ error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); if (error) goto Thaw; if (in_suspend) { unsigned int flags = 0; if (hibernation_mode == HIBERNATION_PLATFORM) flags |= SF_PLATFORM_MODE; pr_debug("PM: writing image.\n"); error = swsusp_write(flags); <--- this calls save_image() ------------------ So, me thinks that these allocations will not be in the snapshot image. PS. Take everything I take with a grain (or two) of salt. I'm just a regular Linux user trying to make my Fedora hibernate/thaw process suck less. -- Bojan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: KAMEZAWA Hiroyuki on 1 Aug 2010 21:20 On Mon, 02 Aug 2010 10:54:13 +1000 Bojan Smojver <bojan(a)rexursive.com> wrote: > On Mon, 2010-08-02 at 09:17 +0900, KAMEZAWA Hiroyuki wrote: > > Now, vmallc() is used here. Then, following will happen. > > > > 1. vmalloc() > > -> vmalloc adds vmap objects and set page table entries. > > > > 2. saving image > > -> At taking snapshot of memory to the disk, above vmalloc() area > > is > > saved to disk as it is. > > ... > > 3. At restore > > Because you dont't remember which vmalloc() area was used for > > creating > > snapshot, you can't free it at swsusp_free(). > > > > memory leak ? > > To be honest, I'm not sure. > > However, I thought that by the time save_image() is called, snapshot has > already been taken, no? > ------------------ > error = hibernation_snapshot(hibernation_mode == HIBERNATION_PLATFORM); > if (error) > goto Thaw; > > if (in_suspend) { > unsigned int flags = 0; > > if (hibernation_mode == HIBERNATION_PLATFORM) > flags |= SF_PLATFORM_MODE; > pr_debug("PM: writing image.\n"); > error = swsusp_write(flags); <--- this calls save_image() > ------------------ > > So, me thinks that these allocations will not be in the snapshot image. > I'm a very newbie to snapshot ...(I'm now studying it because I got a report that my patch corrupts it.) So, don't trust my words. Looking into swsusp_write(). == swsusp_write() -> save_image() -> while () { snapshot_read_next() swap_write_page() } == This routine writes a buffer which is gotten by snapshot_read_next() to the disk. Then, what snapshot_read_next() pass is. == } else { struct page *page; page = pfn_to_page(memory_bm_next_pfn(©_bm)); if (PageHighMem(page)) { /* Highmem pages are copied to the buffer, * because we can't return with a kmapped * highmem page (we may not be called again). */ void *kaddr; kaddr = kmap_atomic(page, KM_USER0); memcpy(buffer, kaddr, PAGE_SIZE); kunmap_atomic(kaddr, KM_USER0); handle->buffer = buffer; } else { handle->buffer = page_address(page); } } == The physical memory address of a page to be saved. So, I thought "system memory image" itself is not a snapshot but it's changing while it runs. Why swsusp can avoid memory leak is that it records which pages should be freed after resume in the bitmap, which will be saved to image header(?) And, even if this snapshot saves the image of buddy-allocator, the save routine itself uses a fixed buffer which can be freed after restore. Thanks, -Kame -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Bojan Smojver on 1 Aug 2010 21:30 On Mon, 2010-08-02 at 10:10 +0900, KAMEZAWA Hiroyuki wrote: > Why swsusp can avoid memory leak is that it records which > pages should be freed after resume in the bitmap, which will be saved > to image header(?) Right. So, are you saying that all allocations in save_image() should be done using __get_free_page() or __get_free_pages() and not with vmalloc()? -- Bojan -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: KAMEZAWA Hiroyuki on 1 Aug 2010 21:40
On Mon, 02 Aug 2010 11:21:08 +1000 Bojan Smojver <bojan(a)rexursive.com> wrote: > On Mon, 2010-08-02 at 10:10 +0900, KAMEZAWA Hiroyuki wrote: > > Why swsusp can avoid memory leak is that it records which > > pages should be freed after resume in the bitmap, which will be saved > > to image header(?) > > Right. So, are you saying that all allocations in save_image() should be > done using __get_free_page() or __get_free_pages() and not with > vmalloc()? > I don't say so but a consideration about following is required. (And it's good to write "we're safe because...as comment") 1. Information about pointers used for vmalloc are saved into image. 2. Information(1) is properly recovered after resume and we can free it. 3. No more allocation will happen once we start wriritng to the disk. Then, vmalloc() area itself's information will be saved as "this vmalloc area is used" and, at resume, recoreved as "this vmalloc area is used" Then, you can free it because you remember pointers. Then, you should make @@ -372,6 +380,38 @@ static int save_image(struct swap_map_handle *handle, struct bio *bio; struct timeval start; struct timeval stop; + size_t ul, cl; + unsigned char *unc, *cmp, *wrk, *page; as global variable. Because global variables will be saved as it is, you can find it after resume and free used vmalloc() buffers. Maybe freeing it at swsusp_free() will be clean. Thanks, -Kame -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |