Prev: [PATCH 2/4] panic: Add taint flag TAINT_FIRMWARE_WORKAROUND ('I')
Next: [PATCH] raw: fsync method is now required
From: Dominik Brodowski on 6 Apr 2010 16:50 On Mon, Apr 05, 2010 at 03:31:30PM -0700, Paul E. McKenney wrote: > > > 5) powertop and hrtimer_start_range_ns (tick_sched_timer) on a SMP kernel > > > booted with "nosmp": > > > > > > Wakeups-from-idle per second : 9.9 interval: 15.0s > > > ... > > > 48.5% ( 9.4) <kernel core> : hrtimer_start (tick_sched_timer) > > > 26.1% ( 5.1) <kernel core> : cursor_timer_handler > > > (cursor_timer_handle > > > 20.6% ( 4.0) <kernel core> : usb_hcd_poll_rh_status (rh_timer_func) > > > 1.0% ( 0.2) <kernel core> : arm_supers_timer > > > (sync_supers_timer_fn) > > > 0.7% ( 0.1) <interrupt> : ata_piix > > > ... > > > > > > Accoding to http://www.linuxpowertop.org , the count in the brackets is > > > how > > > many wakeups per seconds were caused by one source. Adding all _except_ > > > 48.5% ( 9.4) <kernel core> : hrtimer_start (tick_sched_timer) > > > up leads to the 9.9. > > OK, so you further instrumented the hrtimer_sched_timer (or was it > tick_sched_timer?) to find the number that you were attributing to > rcu_needs_cpu()? That's what I did -- to tick_nohz_stop_sched_tick(), to see why the tick_sched_timer did not get stopped. Or why powertop thinks it did not get stopped... Patch below (works only for 1 or 2 CPUs) for all interested parties, _NOT_ intended for submission, though ;) 0x0001: CPU #1 0x0002: inidle 0x0004: not inidle 0x0008: INACTIVE 0x0010: need_resched 0x0020: softirq 0x0040: rcu_needs_ 0x0080: printk_needs_ 0x0100: arch_needs_ 0x0200: off-by-one 0x0400: skip 0x0800: loadbalance: 0x1000: cancel timer (SUCCESS) 0x2000: timer was already in the past 0x4000: hrtimer_start() (SUCCESS) 0x8000: stop_tick (SUCCESS) Fixing USB-autosuspend and disabling the fbcon cursor, I'm down to ~0.9 wakeups-per-second booting with "nosmp". diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c index f992762..6ba7bae 100644 --- a/kernel/time/tick-sched.c +++ b/kernel/time/tick-sched.c @@ -210,19 +210,35 @@ void tick_nohz_stop_sched_tick(int inidle) struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; u64 time_delta; int cpu; + static int testrun_counter = 0; + int testrun = 0; + u64 debugdata = 0; + + testrun_counter++; + + if ((testrun_counter % 123) == 0) + testrun = 1; local_irq_save(flags); cpu = smp_processor_id(); ts = &per_cpu(tick_cpu_sched, cpu); + debugdata = cpu; + + if (inidle) + debugdata |= 0x2; + + /* * Call to tick_nohz_start_idle stops the last_update_time from being * updated. Thus, it must not be called in the event we are called from * irq_exit() with the prior state different than idle. */ - if (!inidle && !ts->inidle) + if (!inidle && !ts->inidle) { + debugdata |= 0x4; goto end; + } /* * Set ts->inidle unconditionally. Even if the system did not @@ -245,11 +261,15 @@ void tick_nohz_stop_sched_tick(int inidle) tick_do_timer_cpu = TICK_DO_TIMER_NONE; } - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { + debugdata |= 0x8; goto end; + } - if (need_resched()) + if (need_resched()) { + debugdata |= 0x10; goto end; + } if (unlikely(local_softirq_pending() && cpu_online(cpu))) { static int ratelimit; @@ -259,6 +279,7 @@ void tick_nohz_stop_sched_tick(int inidle) (unsigned int) local_softirq_pending()); ratelimit++; } + debugdata |= 0x20; goto end; } @@ -275,6 +296,12 @@ void tick_nohz_stop_sched_tick(int inidle) arch_needs_cpu(cpu)) { next_jiffies = last_jiffies + 1; delta_jiffies = 1; + if (rcu_needs_cpu(cpu)) + debugdata |= 0x40; + if (printk_needs_cpu(cpu)) + debugdata |= 0x80; + if (arch_needs_cpu(cpu)) + debugdata |= 0x100; } else { /* Get the next timer wheel timer */ next_jiffies = get_next_timer_interrupt(last_jiffies); @@ -284,8 +311,10 @@ void tick_nohz_stop_sched_tick(int inidle) * Do not stop the tick, if we are only one off * or if the cpu is required for rcu */ - if (!ts->tick_stopped && delta_jiffies == 1) + if (!ts->tick_stopped && delta_jiffies == 1) { + debugdata |= 0x200; goto out; + } /* Schedule the tick, if we are at least one jiffie off */ if ((long)delta_jiffies >= 1) { @@ -341,8 +370,10 @@ void tick_nohz_stop_sched_tick(int inidle) cpumask_set_cpu(cpu, nohz_cpu_mask); /* Skip reprogram of event if its not changed */ - if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) + if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) { + debugdata |= 0x400; goto out; + } /* * nohz_stop_sched_tick can be called several times before @@ -357,8 +388,10 @@ void tick_nohz_stop_sched_tick(int inidle) * sched tick not stopped! */ cpumask_clear_cpu(cpu, nohz_cpu_mask); + debugdata |= 0x800; goto out; } + debugdata |= 0x8000; ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); ts->tick_stopped = 1; @@ -375,7 +408,8 @@ void tick_nohz_stop_sched_tick(int inidle) * If the expiration time == KTIME_MAX, then * in this case we simply stop the tick timer. */ - if (unlikely(expires.tv64 == KTIME_MAX)) { + if (unlikely(expires.tv64 == KTIME_MAX)) { + debugdata |= 0x1000; if (ts->nohz_mode == NOHZ_MODE_HIGHRES) hrtimer_cancel(&ts->sched_timer); goto out; @@ -384,9 +418,13 @@ void tick_nohz_stop_sched_tick(int inidle) if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { hrtimer_start(&ts->sched_timer, expires, HRTIMER_MODE_ABS_PINNED); + debugdata |= 0x2000; /* Check, if the timer was already in the past */ - if (hrtimer_active(&ts->sched_timer)) + if (hrtimer_active(&ts->sched_timer)) { + debugdata &= ~(0x2000); + debugdata |= 0x4000; goto out; + } } else if (!tick_program_event(expires, 0)) goto out; /* @@ -403,6 +441,8 @@ out: ts->last_jiffies = last_jiffies; ts->sleep_length = ktime_sub(dev->next_event, now); end: + if (testrun) + printk(KERN_DEBUG "0x%0llx\n", debugdata); local_irq_restore(flags); } -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Paul E. McKenney on 6 Apr 2010 17:00 On Tue, Apr 06, 2010 at 10:45:37PM +0200, Dominik Brodowski wrote: > On Mon, Apr 05, 2010 at 03:31:30PM -0700, Paul E. McKenney wrote: > > > > 5) powertop and hrtimer_start_range_ns (tick_sched_timer) on a SMP kernel > > > > booted with "nosmp": > > > > > > > > Wakeups-from-idle per second : 9.9 interval: 15.0s > > > > ... > > > > 48.5% ( 9.4) <kernel core> : hrtimer_start (tick_sched_timer) > > > > 26.1% ( 5.1) <kernel core> : cursor_timer_handler > > > > (cursor_timer_handle > > > > 20.6% ( 4.0) <kernel core> : usb_hcd_poll_rh_status (rh_timer_func) > > > > 1.0% ( 0.2) <kernel core> : arm_supers_timer > > > > (sync_supers_timer_fn) > > > > 0.7% ( 0.1) <interrupt> : ata_piix > > > > ... > > > > > > > > Accoding to http://www.linuxpowertop.org , the count in the brackets is > > > > how > > > > many wakeups per seconds were caused by one source. Adding all _except_ > > > > 48.5% ( 9.4) <kernel core> : hrtimer_start (tick_sched_timer) > > > > up leads to the 9.9. > > > > OK, so you further instrumented the hrtimer_sched_timer (or was it > > tick_sched_timer?) to find the number that you were attributing to > > rcu_needs_cpu()? > > That's what I did -- to tick_nohz_stop_sched_tick(), to see why the > tick_sched_timer did not get stopped. Or why powertop thinks it did not > get stopped... > > Patch below (works only for 1 or 2 CPUs) for all interested parties, > _NOT_ intended for submission, though ;) Cool! If you ever do want to submit such a patch, I should give you multiple non-zero return values from rcu_needs_cpu() to allow distinguishing between the following: 1. RCU needs the CPU, but is trying to get done with it quickly. 2. RCU needs the CPU, and has given up on trying to get done with it quickly. Unless you tell me otherwise, I will assume that you do -not- need this. Thanx, Paul > 0x0001: CPU #1 > 0x0002: inidle > 0x0004: not inidle > 0x0008: INACTIVE > 0x0010: need_resched > 0x0020: softirq > 0x0040: rcu_needs_ > 0x0080: printk_needs_ > 0x0100: arch_needs_ > 0x0200: off-by-one > 0x0400: skip > 0x0800: loadbalance: > 0x1000: cancel timer (SUCCESS) > 0x2000: timer was already in the past > 0x4000: hrtimer_start() (SUCCESS) > 0x8000: stop_tick (SUCCESS) > > Fixing USB-autosuspend and disabling the fbcon cursor, I'm down to > ~0.9 wakeups-per-second booting with "nosmp". > > diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c > index f992762..6ba7bae 100644 > --- a/kernel/time/tick-sched.c > +++ b/kernel/time/tick-sched.c > @@ -210,19 +210,35 @@ void tick_nohz_stop_sched_tick(int inidle) > struct clock_event_device *dev = __get_cpu_var(tick_cpu_device).evtdev; > u64 time_delta; > int cpu; > + static int testrun_counter = 0; > + int testrun = 0; > + u64 debugdata = 0; > + > + testrun_counter++; > + > + if ((testrun_counter % 123) == 0) > + testrun = 1; > > local_irq_save(flags); > > cpu = smp_processor_id(); > ts = &per_cpu(tick_cpu_sched, cpu); > > + debugdata = cpu; > + > + if (inidle) > + debugdata |= 0x2; > + > + > /* > * Call to tick_nohz_start_idle stops the last_update_time from being > * updated. Thus, it must not be called in the event we are called from > * irq_exit() with the prior state different than idle. > */ > - if (!inidle && !ts->inidle) > + if (!inidle && !ts->inidle) { > + debugdata |= 0x4; > goto end; > + } > > /* > * Set ts->inidle unconditionally. Even if the system did not > @@ -245,11 +261,15 @@ void tick_nohz_stop_sched_tick(int inidle) > tick_do_timer_cpu = TICK_DO_TIMER_NONE; > } > > - if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) > + if (unlikely(ts->nohz_mode == NOHZ_MODE_INACTIVE)) { > + debugdata |= 0x8; > goto end; > + } > > - if (need_resched()) > + if (need_resched()) { > + debugdata |= 0x10; > goto end; > + } > > if (unlikely(local_softirq_pending() && cpu_online(cpu))) { > static int ratelimit; > @@ -259,6 +279,7 @@ void tick_nohz_stop_sched_tick(int inidle) > (unsigned int) local_softirq_pending()); > ratelimit++; > } > + debugdata |= 0x20; > goto end; > } > > @@ -275,6 +296,12 @@ void tick_nohz_stop_sched_tick(int inidle) > arch_needs_cpu(cpu)) { > next_jiffies = last_jiffies + 1; > delta_jiffies = 1; > + if (rcu_needs_cpu(cpu)) > + debugdata |= 0x40; > + if (printk_needs_cpu(cpu)) > + debugdata |= 0x80; > + if (arch_needs_cpu(cpu)) > + debugdata |= 0x100; > } else { > /* Get the next timer wheel timer */ > next_jiffies = get_next_timer_interrupt(last_jiffies); > @@ -284,8 +311,10 @@ void tick_nohz_stop_sched_tick(int inidle) > * Do not stop the tick, if we are only one off > * or if the cpu is required for rcu > */ > - if (!ts->tick_stopped && delta_jiffies == 1) > + if (!ts->tick_stopped && delta_jiffies == 1) { > + debugdata |= 0x200; > goto out; > + } > > /* Schedule the tick, if we are at least one jiffie off */ > if ((long)delta_jiffies >= 1) { > @@ -341,8 +370,10 @@ void tick_nohz_stop_sched_tick(int inidle) > cpumask_set_cpu(cpu, nohz_cpu_mask); > > /* Skip reprogram of event if its not changed */ > - if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) > + if (ts->tick_stopped && ktime_equal(expires, dev->next_event)) { > + debugdata |= 0x400; > goto out; > + } > > /* > * nohz_stop_sched_tick can be called several times before > @@ -357,8 +388,10 @@ void tick_nohz_stop_sched_tick(int inidle) > * sched tick not stopped! > */ > cpumask_clear_cpu(cpu, nohz_cpu_mask); > + debugdata |= 0x800; > goto out; > } > + debugdata |= 0x8000; > > ts->idle_tick = hrtimer_get_expires(&ts->sched_timer); > ts->tick_stopped = 1; > @@ -375,7 +408,8 @@ void tick_nohz_stop_sched_tick(int inidle) > * If the expiration time == KTIME_MAX, then > * in this case we simply stop the tick timer. > */ > - if (unlikely(expires.tv64 == KTIME_MAX)) { > + if (unlikely(expires.tv64 == KTIME_MAX)) { > + debugdata |= 0x1000; > if (ts->nohz_mode == NOHZ_MODE_HIGHRES) > hrtimer_cancel(&ts->sched_timer); > goto out; > @@ -384,9 +418,13 @@ void tick_nohz_stop_sched_tick(int inidle) > if (ts->nohz_mode == NOHZ_MODE_HIGHRES) { > hrtimer_start(&ts->sched_timer, expires, > HRTIMER_MODE_ABS_PINNED); > + debugdata |= 0x2000; > /* Check, if the timer was already in the past */ > - if (hrtimer_active(&ts->sched_timer)) > + if (hrtimer_active(&ts->sched_timer)) { > + debugdata &= ~(0x2000); > + debugdata |= 0x4000; > goto out; > + } > } else if (!tick_program_event(expires, 0)) > goto out; > /* > @@ -403,6 +441,8 @@ out: > ts->last_jiffies = last_jiffies; > ts->sleep_length = ktime_sub(dev->next_event, now); > end: > + if (testrun) > + printk(KERN_DEBUG "0x%0llx\n", debugdata); > local_irq_restore(flags); > } > -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
First
|
Prev
|
Pages: 1 2 3 4 5 6 Prev: [PATCH 2/4] panic: Add taint flag TAINT_FIRMWARE_WORKAROUND ('I') Next: [PATCH] raw: fsync method is now required |