Prev: [PATCH] watchdog docs: Fix use of WDIOC_SETOPTIONS ioctl.
Next: [announce] atang tree has been rebased on top of 2.6.33
From: Lai Jiangshan on 5 Apr 2010 06:40 Current get_online_cpus() acquires a mutex lock and then release it. It is not scale and it hurts cache. This patch rewrite it. 1) get_online_cpus() must be allowed to be called recursively, so I added get_online_cpus_nest for every task for new code. This patch just allows get_online_cpus() to be called recursively, but when it is not nested, get_online_cpus() will wait until cpuhotplug finished, so the potential starvation is avoided. And, the livelock of cpu_hotplug_begin() is avoided, so the comment is removed. 2) This new code use PER_CPU counters, and this counters protected by RCU. These counters acts like the reference counters of a modules. (Actually, all these code is stolen from module.c: try_refcount_get() is stolen from try_module_get(), put_online_cpus() from module_put()...) After this patch applied, get_online_cpus() is very light and scale when cpuhotplug is not running. It just disables preemption and increase the cpu counter and then enables preemption. 3) Since we have try_refcount_get(), I add a new API try_get_online_cpus(). Signed-off-by: Lai Jiangshan <laijs(a)cn.fujitsu.com> --- include/linux/cpu.h | 2 include/linux/sched.h | 3 + kernel/cpu.c | 131 ++++++++++++++++++++++++++++++++------------------ kernel/fork.c | 3 + 4 files changed, 94 insertions(+), 45 deletions(-) diff --git a/include/linux/cpu.h b/include/linux/cpu.h index e287863..a32809c 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -112,6 +112,7 @@ extern struct sysdev_class cpu_sysdev_class; extern void get_online_cpus(void); extern void put_online_cpus(void); +extern int try_get_online_cpus(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) @@ -134,6 +135,7 @@ static inline void cpu_hotplug_driver_unlock(void) #define get_online_cpus() do { } while (0) #define put_online_cpus() do { } while (0) +#define try_get_online_cpus() (1) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) diff --git a/include/linux/sched.h b/include/linux/sched.h index c46b6e5..0422ea3 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1501,6 +1501,9 @@ struct task_struct { unsigned long memsw_bytes; /* uncharged mem+swap usage */ } memcg_batch; #endif +#ifdef CONFIG_HOTPLUG_CPU + int get_online_cpus_nest; +#endif }; /* Future-safe accessor for struct task_struct's cpus_allowed. */ diff --git a/kernel/cpu.c b/kernel/cpu.c index bc1e3d5..ede02c6 100644 --- a/kernel/cpu.c +++ b/kernel/cpu.c @@ -14,6 +14,7 @@ #include <linux/kthread.h> #include <linux/stop_machine.h> #include <linux/mutex.h> +#include <linux/percpu.h> #ifdef CONFIG_SMP /* Serializes the updates to cpu_online_mask, cpu_present_mask */ @@ -42,41 +43,82 @@ static int cpu_hotplug_disabled; #ifdef CONFIG_HOTPLUG_CPU -static struct { - struct task_struct *active_writer; - struct mutex lock; /* Synchronizes accesses to refcount, */ - /* - * Also blocks the new readers during - * an ongoing cpu hotplug operation. - */ - int refcount; -} cpu_hotplug = { - .active_writer = NULL, - .lock = __MUTEX_INITIALIZER(cpu_hotplug.lock), - .refcount = 0, -}; +DEFINE_MUTEX(cpu_hotplug_lock); +struct task_struct *cpu_hotplug_task; +DEFINE_PER_CPU(int, refcount); + +static int try_refcount_get(void) +{ + preempt_disable(); + + if (likely(!cpu_hotplug_task)) { + __get_cpu_var(refcount)++; + preempt_enable(); + return 1; + } + + preempt_enable(); + return 0; +} + +int try_get_online_cpus(void) +{ + if (cpu_hotplug_task == current) + return 1; + + if (current->get_online_cpus_nest || try_refcount_get()) { + current->get_online_cpus_nest++; + return 1; + } + + return 0; +} +EXPORT_SYMBOL_GPL(try_get_online_cpus); void get_online_cpus(void) { might_sleep(); - if (cpu_hotplug.active_writer == current) + if (cpu_hotplug_task == current) + return; + + if (current->get_online_cpus_nest++) + return; + + if (likely(try_refcount_get())) return; - mutex_lock(&cpu_hotplug.lock); - cpu_hotplug.refcount++; - mutex_unlock(&cpu_hotplug.lock); + mutex_lock(&cpu_hotplug_lock); + percpu_add(refcount, 1); + mutex_unlock(&cpu_hotplug_lock); } EXPORT_SYMBOL_GPL(get_online_cpus); +static unsigned int refcount_sum(void) +{ + unsigned int total = 0; + int cpu; + + for_each_possible_cpu(cpu) + total += per_cpu(refcount, cpu); + + return total; +} + void put_online_cpus(void) { - if (cpu_hotplug.active_writer == current) + if (cpu_hotplug_task == current) + return; + + if (WARN_ON(!current->get_online_cpus_nest)) return; - mutex_lock(&cpu_hotplug.lock); - if (!--cpu_hotplug.refcount && unlikely(cpu_hotplug.active_writer)) - wake_up_process(cpu_hotplug.active_writer); - mutex_unlock(&cpu_hotplug.lock); + if (!--current->get_online_cpus_nest) { + preempt_disable(); + __get_cpu_var(refcount)--; + if (cpu_hotplug_task) + wake_up_process(cpu_hotplug_task); + preempt_enable(); + } } EXPORT_SYMBOL_GPL(put_online_cpus); @@ -85,41 +127,40 @@ EXPORT_SYMBOL_GPL(put_online_cpus); * refcount goes to zero. * * Note that during a cpu-hotplug operation, the new readers, if any, - * will be blocked by the cpu_hotplug.lock - * - * Since cpu_hotplug_begin() is always called after invoking - * cpu_maps_update_begin(), we can be sure that only one writer is active. - * - * Note that theoretically, there is a possibility of a livelock: - * - Refcount goes to zero, last reader wakes up the sleeping - * writer. - * - Last reader unlocks the cpu_hotplug.lock. - * - A new reader arrives at this moment, bumps up the refcount. - * - The writer acquires the cpu_hotplug.lock finds the refcount - * non zero and goes to sleep again. - * - * However, this is very difficult to achieve in practice since - * get_online_cpus() not an api which is called all that often. - * + * will be blocked by the cpu_hotplug_lock */ static void cpu_hotplug_begin(void) { - cpu_hotplug.active_writer = current; + mutex_lock(&cpu_hotplug_lock); + + /* + * Set cpu_hotplug_task. Wait until all running try_refcount_get() + * finished and all these try_refcount_get() behavior are seen. + */ + cpu_hotplug_task = current; + synchronize_sched(); + /* Wait for zero refcount */ for (;;) { - mutex_lock(&cpu_hotplug.lock); - if (likely(!cpu_hotplug.refcount)) + set_current_state(TASK_UNINTERRUPTIBLE); + if (!refcount_sum()) break; - __set_current_state(TASK_UNINTERRUPTIBLE); - mutex_unlock(&cpu_hotplug.lock); schedule(); } + + __set_current_state(TASK_RUNNING); } static void cpu_hotplug_done(void) { - cpu_hotplug.active_writer = NULL; - mutex_unlock(&cpu_hotplug.lock); + /* + * Ensure try_refcount_get() sees the front befavior + * after it sees cpu_hotplug_task == NULL. + */ + smp_mb(); + + cpu_hotplug_task = NULL; + mutex_unlock(&cpu_hotplug_lock); } #else /* #if CONFIG_HOTPLUG_CPU */ diff --git a/kernel/fork.c b/kernel/fork.c index d67f1db..b162014 100644 --- a/kernel/fork.c +++ b/kernel/fork.c @@ -1109,6 +1109,9 @@ static struct task_struct *copy_process(unsigned long clone_flags, p->memcg_batch.memcg = NULL; #endif p->stack_start = stack_start; +#ifdef CONFIG_HOTPLUG_CPU + p->get_online_cpus_nest = 0; +#endif /* Perform scheduler related setup. Assign this task to a CPU. */ sched_fork(p, clone_flags); -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |