Prev: [RFC][PATCH 07/11] blkiocg async: Pass bio to elevator_ops functions
Next: [RFC][PATCH 11/11] blkiocg async: Workload timeslice adjustment for async queues
From: Munehiro Ikeda on 8 Jul 2010 23:30 When a mmap(2)'d page is written back, which means the page doesn't have buffer_head, ext4 prepares buffer_heads and calls block_commit_write() from ext4_writepage(). This results to call mark_buffer_dirty() and the page's dirty flag is set. In this case, current process marking page dirty is (almost) flush kernel thread, so the original info of a process which dirtied this page is lost. To prevent this issue, this patch introduces block_commit_write_noiotrack() which is same as block_commit_write() but runs through a code path not to record current process info. The portion calling block_commit_write() in ext4 will be modified in the following patch. Signed-off-by: Munehiro "Muuhh" Ikeda <m-ikeda(a)ds.jp.nec.com> --- fs/buffer.c | 70 ++++++++++++++++++++++++++++++++----------- include/linux/buffer_head.h | 2 + 2 files changed, 54 insertions(+), 18 deletions(-) diff --git a/fs/buffer.c b/fs/buffer.c index c418fdf..61ebf94 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -660,15 +660,17 @@ EXPORT_SYMBOL(mark_buffer_dirty_inode); * * If warn is true, then emit a warning if the page is not uptodate and has * not been truncated. + * If track is true, dirtying process info is recorded for iotrack. */ static void __set_page_dirty(struct page *page, - struct address_space *mapping, int warn) + struct address_space *mapping, int warn, int track) { spin_lock_irq(&mapping->tree_lock); if (page->mapping) { /* Race with truncate? */ WARN_ON_ONCE(warn && !PageUptodate(page)); account_page_dirtied(page, mapping); - blk_iotrack_reset_owner_pagedirty(page, current->mm); + if (track) + blk_iotrack_reset_owner_pagedirty(page, current->mm); radix_tree_tag_set(&mapping->page_tree, page_index(page), PAGECACHE_TAG_DIRTY); } @@ -723,7 +725,7 @@ int __set_page_dirty_buffers(struct page *page) spin_unlock(&mapping->private_lock); if (newly_dirty) - __set_page_dirty(page, mapping, 1); + __set_page_dirty(page, mapping, 1, 1); return newly_dirty; } EXPORT_SYMBOL(__set_page_dirty_buffers); @@ -1137,18 +1139,11 @@ __getblk_slow(struct block_device *bdev, sector_t block, int size) */ /** - * mark_buffer_dirty - mark a buffer_head as needing writeout + * __mark_buffer_dirty - helper function for mark_buffer_dirty* * @bh: the buffer_head to mark dirty - * - * mark_buffer_dirty() will set the dirty bit against the buffer, then set its - * backing page dirty, then tag the page as dirty in its address_space's radix - * tree and then attach the address_space's inode to its superblock's dirty - * inode list. - * - * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, - * mapping->tree_lock and the global inode_lock. + * @track: if true, dirtying process info will be recorded for iotrack */ -void mark_buffer_dirty(struct buffer_head *bh) +static void __mark_buffer_dirty(struct buffer_head *bh, int track) { WARN_ON_ONCE(!buffer_uptodate(bh)); @@ -1169,12 +1164,40 @@ void mark_buffer_dirty(struct buffer_head *bh) if (!TestSetPageDirty(page)) { struct address_space *mapping = page_mapping(page); if (mapping) - __set_page_dirty(page, mapping, 0); + __set_page_dirty(page, mapping, 0, track); } } } + +/** + * mark_buffer_dirty - mark a buffer_head as needing writeout + * @bh: the buffer_head to mark dirty + * + * mark_buffer_dirty() will set the dirty bit against the buffer, then set its + * backing page dirty, then tag the page as dirty in its address_space's radix + * tree and then attach the address_space's inode to its superblock's dirty + * inode list. + * + * mark_buffer_dirty() is atomic. It takes bh->b_page->mapping->private_lock, + * mapping->tree_lock and the global inode_lock. + */ +void mark_buffer_dirty(struct buffer_head *bh) +{ + __mark_buffer_dirty(bh, 1); +} EXPORT_SYMBOL(mark_buffer_dirty); +/** + * mark_buffer_dirty_noiotrack + * - same as mark_buffer_dirty but doesn't record dirtying process info + * @bh: the buffer_head to mark dirty + */ +void mark_buffer_dirty_noiotrack(struct buffer_head *bh) +{ + __mark_buffer_dirty(bh, 0); +} +EXPORT_SYMBOL(mark_buffer_dirty_noiotrack); + /* * Decrement a buffer_head's reference count. If all buffers against a page * have zero reference count, are clean and unlocked, and if the page is clean @@ -1916,7 +1939,7 @@ static int __block_prepare_write(struct inode *inode, struct page *page, } static int __block_commit_write(struct inode *inode, struct page *page, - unsigned from, unsigned to) + unsigned from, unsigned to, int track) { unsigned block_start, block_end; int partial = 0; @@ -1934,7 +1957,10 @@ static int __block_commit_write(struct inode *inode, struct page *page, partial = 1; } else { set_buffer_uptodate(bh); - mark_buffer_dirty(bh); + if (track) + mark_buffer_dirty(bh); + else + mark_buffer_dirty_noiotrack(bh); } clear_buffer_new(bh); } @@ -2067,7 +2093,7 @@ int block_write_end(struct file *file, struct address_space *mapping, flush_dcache_page(page); /* This could be a short (even 0-length) commit */ - __block_commit_write(inode, page, start, start+copied); + __block_commit_write(inode, page, start, start+copied, 1); return copied; } @@ -2414,11 +2440,19 @@ EXPORT_SYMBOL(block_prepare_write); int block_commit_write(struct page *page, unsigned from, unsigned to) { struct inode *inode = page->mapping->host; - __block_commit_write(inode,page,from,to); + __block_commit_write(inode, page, from, to, 1); return 0; } EXPORT_SYMBOL(block_commit_write); +int block_commit_write_noiotrack(struct page *page, unsigned from, unsigned to) +{ + struct inode *inode = page->mapping->host; + __block_commit_write(inode, page, from, to, 0); + return 0; +} +EXPORT_SYMBOL(block_commit_write_noiotrack); + /* * block_page_mkwrite() is not allowed to change the file size as it gets * called from a page fault handler when a page is first dirtied. Hence we must diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 1b9ba19..9d7e0b0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -145,6 +145,7 @@ BUFFER_FNS(Unwritten, unwritten) */ void mark_buffer_dirty(struct buffer_head *bh); +void mark_buffer_dirty_noiotrack(struct buffer_head *bh); void init_buffer(struct buffer_head *, bh_end_io_t *, void *); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); @@ -225,6 +226,7 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); +int block_commit_write_noiotrack(struct page *page, unsigned from, unsigned to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); void block_sync_page(struct page *); -- 1.6.2.5 -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |