Prev: Use xvmalloc to store compressed chunks
Next: [PATCH] KERNEL BUILD: Make the setlocalversion script POSIX-compliant.
From: Pekka Enberg on 18 Jul 2010 04:20 Nitin Gupta wrote: > +/* > + * Individual percpu values can go negative but the sum across all CPUs > + * must always be positive (we store various counts). So, return sum as > + * unsigned value. > + */ > +static u64 zcache_get_stat(struct zcache_pool *zpool, > + enum zcache_pool_stats_index idx) > +{ > + int cpu; > + s64 val = 0; > + > + for_each_possible_cpu(cpu) { > + unsigned int start; > + struct zcache_pool_stats_cpu *stats; > + > + stats = per_cpu_ptr(zpool->stats, cpu); > + do { > + start = u64_stats_fetch_begin(&stats->syncp); > + val += stats->count[idx]; > + } while (u64_stats_fetch_retry(&stats->syncp, start)); Can we use 'struct percpu_counter' for this? OTOH, the warning on top of include/linux/percpu_counter.h makes me think not. > + } > + > + BUG_ON(val < 0); BUG_ON() seems overly aggressive. How about if (WARN_ON(val < 0)) return 0; > + return val; > +} > + > +static void zcache_add_stat(struct zcache_pool *zpool, > + enum zcache_pool_stats_index idx, s64 val) > +{ > + struct zcache_pool_stats_cpu *stats; > + > + preempt_disable(); > + stats = __this_cpu_ptr(zpool->stats); > + u64_stats_update_begin(&stats->syncp); > + stats->count[idx] += val; > + u64_stats_update_end(&stats->syncp); > + preempt_enable(); What is the preempt_disable/preempt_enable trying to do here? > +static void zcache_destroy_pool(struct zcache_pool *zpool) > +{ > + int i; > + > + if (!zpool) > + return; > + > + spin_lock(&zcache->pool_lock); > + zcache->num_pools--; > + for (i = 0; i < MAX_ZCACHE_POOLS; i++) > + if (zcache->pools[i] == zpool) > + break; > + zcache->pools[i] = NULL; > + spin_unlock(&zcache->pool_lock); > + > + if (!RB_EMPTY_ROOT(&zpool->inode_tree)) { Use WARN_ON here to get a stack trace? > + pr_warn("Memory leak detected. Freeing non-empty pool!\n"); > + zcache_dump_stats(zpool); > + } > + > + free_percpu(zpool->stats); > + kfree(zpool); > +} > + > +/* > + * Allocate a new zcache pool and set default memlimit. > + * > + * Returns pool_id on success, negative error code otherwise. > + */ > +int zcache_create_pool(void) > +{ > + int ret; > + u64 memlimit; > + struct zcache_pool *zpool = NULL; > + > + spin_lock(&zcache->pool_lock); > + if (zcache->num_pools == MAX_ZCACHE_POOLS) { > + spin_unlock(&zcache->pool_lock); > + pr_info("Cannot create new pool (limit: %u)\n", > + MAX_ZCACHE_POOLS); > + ret = -EPERM; > + goto out; > + } > + zcache->num_pools++; > + spin_unlock(&zcache->pool_lock); > + > + zpool = kzalloc(sizeof(*zpool), GFP_KERNEL); > + if (!zpool) { > + spin_lock(&zcache->pool_lock); > + zcache->num_pools--; > + spin_unlock(&zcache->pool_lock); > + ret = -ENOMEM; > + goto out; > + } Why not kmalloc() an new struct zcache_pool object first and then take zcache->pool_lock() and check for MAX_ZCACHE_POOLS? That should make the locking little less confusing here. > + > + zpool->stats = alloc_percpu(struct zcache_pool_stats_cpu); > + if (!zpool->stats) { > + ret = -ENOMEM; > + goto out; > + } > + > + rwlock_init(&zpool->tree_lock); > + seqlock_init(&zpool->memlimit_lock); > + zpool->inode_tree = RB_ROOT; > + > + memlimit = zcache_pool_default_memlimit_perc_ram * > + ((totalram_pages << PAGE_SHIFT) / 100); > + memlimit &= PAGE_MASK; > + zcache_set_memlimit(zpool, memlimit); > + > + /* Add to pool list */ > + spin_lock(&zcache->pool_lock); > + for (ret = 0; ret < MAX_ZCACHE_POOLS; ret++) > + if (!zcache->pools[ret]) > + break; > + zcache->pools[ret] = zpool; > + spin_unlock(&zcache->pool_lock); > + > +out: > + if (ret < 0) > + zcache_destroy_pool(zpool); > + > + return ret; > +} > +/* > + * Allocate memory for storing the given page and insert > + * it in the given node's page tree at location 'index'. > + * > + * Returns 0 on success, negative error code on failure. > + */ > +static int zcache_store_page(struct zcache_inode_rb *znode, > + pgoff_t index, struct page *page) > +{ > + int ret; > + unsigned long flags; > + struct page *zpage; > + void *src_data, *dest_data; > + > + zpage = alloc_page(GFP_NOWAIT); > + if (!zpage) { > + ret = -ENOMEM; > + goto out; > + } > + zpage->index = index; > + > + src_data = kmap_atomic(page, KM_USER0); > + dest_data = kmap_atomic(zpage, KM_USER1); > + memcpy(dest_data, src_data, PAGE_SIZE); > + kunmap_atomic(src_data, KM_USER0); > + kunmap_atomic(dest_data, KM_USER1); copy_highpage() > + > + spin_lock_irqsave(&znode->tree_lock, flags); > + ret = radix_tree_insert(&znode->page_tree, index, zpage); > + spin_unlock_irqrestore(&znode->tree_lock, flags); > + if (unlikely(ret)) > + __free_page(zpage); > + > +out: > + return ret; > +} > +/* > + * cleancache_ops.get_page > + * > + * Locates stored zcache page using <pool_id, inode_no, index>. > + * If found, copies it to the given output page 'page' and frees > + * zcache copy of the same. > + * > + * Returns 0 if requested page found, -1 otherwise. > + */ > +static int zcache_get_page(int pool_id, ino_t inode_no, > + pgoff_t index, struct page *page) > +{ > + int ret = -1; > + unsigned long flags; > + struct page *src_page; > + void *src_data, *dest_data; > + struct zcache_inode_rb *znode; > + struct zcache_pool *zpool = zcache->pools[pool_id]; > + > + znode = zcache_find_inode(zpool, inode_no); > + if (!znode) > + goto out; > + > + BUG_ON(znode->inode_no != inode_no); Maybe use WARN_ON here and return -1? > + > + spin_lock_irqsave(&znode->tree_lock, flags); > + src_page = radix_tree_delete(&znode->page_tree, index); > + if (zcache_inode_is_empty(znode)) > + zcache_inode_isolate(znode); > + spin_unlock_irqrestore(&znode->tree_lock, flags); > + > + kref_put(&znode->refcount, zcache_inode_release); > + > + if (!src_page) > + goto out; > + > + src_data = kmap_atomic(src_page, KM_USER0); > + dest_data = kmap_atomic(page, KM_USER1); > + memcpy(dest_data, src_data, PAGE_SIZE); > + kunmap_atomic(src_data, KM_USER0); > + kunmap_atomic(dest_data, KM_USER1); The above sequence can be replaced with copy_highpage(). > + > + flush_dcache_page(page); > + > + __free_page(src_page); > + > + zcache_dec_stat(zpool, ZPOOL_STAT_PAGES_STORED); > + ret = 0; /* success */ > + > +out: > + return ret; > +} -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Pekka Enberg on 18 Jul 2010 04:30 Nitin Gupta wrote: > +static void zcache_add_stat(struct zcache_pool *zpool, > + enum zcache_pool_stats_index idx, s64 val) > +{ > + struct zcache_pool_stats_cpu *stats; > + > + preempt_disable(); > + stats = __this_cpu_ptr(zpool->stats); > + u64_stats_update_begin(&stats->syncp); > + stats->count[idx] += val; > + u64_stats_update_end(&stats->syncp); > + preempt_enable(); > + > +} You should probably use this_cpu_inc() here. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Eric Dumazet on 18 Jul 2010 04:50 Le vendredi 16 juillet 2010 à 18:07 +0530, Nitin Gupta a écrit : > This particular patch implemets basic functionality only: > +static u64 zcache_get_stat(struct zcache_pool *zpool, > + enum zcache_pool_stats_index idx) > +{ > + int cpu; > + s64 val = 0; > + > + for_each_possible_cpu(cpu) { > + unsigned int start; > + struct zcache_pool_stats_cpu *stats; > + > + stats = per_cpu_ptr(zpool->stats, cpu); > + do { > + start = u64_stats_fetch_begin(&stats->syncp); > + val += stats->count[idx]; > + } while (u64_stats_fetch_retry(&stats->syncp, start)); > + } > + > + BUG_ON(val < 0); > + return val; > +} Sorry this is wrong. Inside the fetch/retry block you should not do the addition to val, only a read of value to a temporary variable, since this might be done several times. You want something like : static u64 zcache_get_stat(struct zcache_pool *zpool, enum zcache_pool_stats_index idx) { int cpu; s64 temp, val = 0; for_each_possible_cpu(cpu) { unsigned int start; struct zcache_pool_stats_cpu *stats; stats = per_cpu_ptr(zpool->stats, cpu); do { start = u64_stats_fetch_begin(&stats->syncp); temp = stats->count[idx]; } while (u64_stats_fetch_retry(&stats->syncp, start)); val += temp; } .... } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Nitin Gupta on 18 Jul 2010 05:50 On 07/18/2010 01:44 PM, Pekka Enberg wrote: > Nitin Gupta wrote: >> +/* >> + * Individual percpu values can go negative but the sum across all CPUs >> + * must always be positive (we store various counts). So, return sum as >> + * unsigned value. >> + */ >> +static u64 zcache_get_stat(struct zcache_pool *zpool, >> + enum zcache_pool_stats_index idx) >> +{ >> + int cpu; >> + s64 val = 0; >> + >> + for_each_possible_cpu(cpu) { >> + unsigned int start; >> + struct zcache_pool_stats_cpu *stats; >> + >> + stats = per_cpu_ptr(zpool->stats, cpu); >> + do { >> + start = u64_stats_fetch_begin(&stats->syncp); >> + val += stats->count[idx]; >> + } while (u64_stats_fetch_retry(&stats->syncp, start)); > > Can we use 'struct percpu_counter' for this? OTOH, the warning on top of include/linux/percpu_counter.h makes me think not. > Yes, that warning only scared me :) >> + } >> + >> + BUG_ON(val < 0); > > BUG_ON() seems overly aggressive. How about > > if (WARN_ON(val < 0)) > return 0; > Yes, this sounds better. I will change it. >> + return val; >> +} >> + >> +static void zcache_add_stat(struct zcache_pool *zpool, >> + enum zcache_pool_stats_index idx, s64 val) >> +{ >> + struct zcache_pool_stats_cpu *stats; >> + >> + preempt_disable(); >> + stats = __this_cpu_ptr(zpool->stats); >> + u64_stats_update_begin(&stats->syncp); >> + stats->count[idx] += val; >> + u64_stats_update_end(&stats->syncp); >> + preempt_enable(); > > What is the preempt_disable/preempt_enable trying to do here? > On 32-bit there will be no seqlock to protect this value. So, if we get preempted after __this_cpu_ptr(), two CPUs can end up racy-writing to the same variable. I think for the same reason this_cpu_add() finally does increment with preempt disabled. Also, I think we shouldn't use this_cpu_add (as you suggested in another mail) since we have to do this_cpu_ptr() first to get access to seqlock (stats->syncp) anyways. So, simple increment on thus obtained pcpu pointer should be okay. >> +static void zcache_destroy_pool(struct zcache_pool *zpool) >> +{ >> + int i; >> + >> + if (!zpool) >> + return; >> + >> + spin_lock(&zcache->pool_lock); >> + zcache->num_pools--; >> + for (i = 0; i < MAX_ZCACHE_POOLS; i++) >> + if (zcache->pools[i] == zpool) >> + break; >> + zcache->pools[i] = NULL; >> + spin_unlock(&zcache->pool_lock); >> + >> + if (!RB_EMPTY_ROOT(&zpool->inode_tree)) { > > Use WARN_ON here to get a stack trace? > This sounds better, will change it. >> + pr_warn("Memory leak detected. Freeing non-empty pool!\n"); >> + zcache_dump_stats(zpool); >> + } >> + >> + free_percpu(zpool->stats); >> + kfree(zpool); >> +} >> + >> +/* >> + * Allocate a new zcache pool and set default memlimit. >> + * >> + * Returns pool_id on success, negative error code otherwise. >> + */ >> +int zcache_create_pool(void) >> +{ >> + int ret; >> + u64 memlimit; >> + struct zcache_pool *zpool = NULL; >> + >> + spin_lock(&zcache->pool_lock); >> + if (zcache->num_pools == MAX_ZCACHE_POOLS) { >> + spin_unlock(&zcache->pool_lock); >> + pr_info("Cannot create new pool (limit: %u)\n", >> + MAX_ZCACHE_POOLS); >> + ret = -EPERM; >> + goto out; >> + } >> + zcache->num_pools++; >> + spin_unlock(&zcache->pool_lock); >> + >> + zpool = kzalloc(sizeof(*zpool), GFP_KERNEL); >> + if (!zpool) { >> + spin_lock(&zcache->pool_lock); >> + zcache->num_pools--; >> + spin_unlock(&zcache->pool_lock); >> + ret = -ENOMEM; >> + goto out; >> + } > > Why not kmalloc() an new struct zcache_pool object first and then take zcache->pool_lock() and check for MAX_ZCACHE_POOLS? That should make the locking little less confusing here. > kmalloc() before this check should be better. This also avoids unnecessary num_pools decrement later if kmalloc fails. >> + >> + src_data = kmap_atomic(page, KM_USER0); >> + dest_data = kmap_atomic(zpage, KM_USER1); >> + memcpy(dest_data, src_data, PAGE_SIZE); >> + kunmap_atomic(src_data, KM_USER0); >> + kunmap_atomic(dest_data, KM_USER1); > > copy_highpage() > Ok. But we will again have to open-code this memcpy() when we start using xvmalloc (patch 7/8). Same applies to another instance you pointed out. >> +static int zcache_get_page(int pool_id, ino_t inode_no, >> + pgoff_t index, struct page *page) >> +{ >> + int ret = -1; >> + unsigned long flags; >> + struct page *src_page; >> + void *src_data, *dest_data; >> + struct zcache_inode_rb *znode; >> + struct zcache_pool *zpool = zcache->pools[pool_id]; >> + >> + znode = zcache_find_inode(zpool, inode_no); >> + if (!znode) >> + goto out; >> + >> + BUG_ON(znode->inode_no != inode_no); > > Maybe use WARN_ON here and return -1? > okay. Thanks for the review. Nitin -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Nitin Gupta on 18 Jul 2010 06:00
On 07/18/2010 02:14 PM, Eric Dumazet wrote: > Le vendredi 16 juillet 2010 à 18:07 +0530, Nitin Gupta a écrit : > >> This particular patch implemets basic functionality only: >> +static u64 zcache_get_stat(struct zcache_pool *zpool, >> + enum zcache_pool_stats_index idx) >> +{ >> + int cpu; >> + s64 val = 0; >> + >> + for_each_possible_cpu(cpu) { >> + unsigned int start; >> + struct zcache_pool_stats_cpu *stats; >> + >> + stats = per_cpu_ptr(zpool->stats, cpu); >> + do { >> + start = u64_stats_fetch_begin(&stats->syncp); >> + val += stats->count[idx]; >> + } while (u64_stats_fetch_retry(&stats->syncp, start)); >> + } >> + >> + BUG_ON(val < 0); >> + return val; >> +} > > Sorry this is wrong. > > Inside the fetch/retry block you should not do the addition to val, only > a read of value to a temporary variable, since this might be done > several times. > > You want something like : > > static u64 zcache_get_stat(struct zcache_pool *zpool, > enum zcache_pool_stats_index idx) > { > int cpu; > s64 temp, val = 0; > > for_each_possible_cpu(cpu) { > unsigned int start; > struct zcache_pool_stats_cpu *stats; > > stats = per_cpu_ptr(zpool->stats, cpu); > do { > start = u64_stats_fetch_begin(&stats->syncp); > temp = stats->count[idx]; > } while (u64_stats_fetch_retry(&stats->syncp, start)); > val += temp; > } > > ... > } > > Oh, my bad. Thanks for the fix! On a side note: u64_stats_* should probably be renamed to stats64_* since they are equally applicable for s64. Thanks, Nitin -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |