Prev: [PATCH 2/2] x86, mce: Make MCE tracepoint persistent event
Next: [2.6.34-git8][regression] massive polling problems with udevd and other processes
From: Borislav Petkov on 22 May 2010 15:20 From: Borislav Petkov <bp(a)amd64.org> Date: Sat, May 22, 2010 at 09:04:47PM +0200 > Register and enable events marked as persistent right after perf events > has initialized. > > Not-yet-signed-off-by: Borislav Petkov <bp(a)alien8.de> > --- > include/linux/ftrace_event.h | 10 +++++++ > include/linux/perf_event.h | 1 + > kernel/perf_event.c | 59 +++++++++++++++++++++++++++++++++++++---- > kernel/trace/trace.h | 1 - > 4 files changed, 64 insertions(+), 7 deletions(-) > > diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h > index c0f4b36..b40d637 100644 > --- a/include/linux/ftrace_event.h > +++ b/include/linux/ftrace_event.h > @@ -13,6 +13,8 @@ struct dentry; > > DECLARE_PER_CPU(struct trace_seq, ftrace_event_seq); > > +extern struct list_head ftrace_events; > + > struct trace_print_flags { > unsigned long mask; > const char *name; > @@ -134,6 +136,7 @@ struct ftrace_event_call { > int perf_refcount; > int (*perf_event_enable)(struct ftrace_event_call *); > void (*perf_event_disable)(struct ftrace_event_call *); > + unsigned int type; > }; > > #define PERF_MAX_TRACE_SIZE 2048 > @@ -155,6 +158,13 @@ enum { > FILTER_PTR_STRING, > }; > > +enum event_type_t { > + EVENT_FLEXIBLE = 0x1, > + EVENT_PINNED = 0x2, > + EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, > + EVENT_PERSISTENT = 0x3, > +}; Doh, I meant enum event_type_t { EVENT_FLEXIBLE = 0x1, EVENT_PINNED = 0x2, EVENT_PERSISTENT = 0x4, EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED | EVENT_PERSISTENT, }; here. > + > extern int trace_event_raw_init(struct ftrace_event_call *call); > extern int trace_define_field(struct ftrace_event_call *call, const char *type, > const char *name, int offset, int size, > diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h > index c8e3754..aa62c97 100644 > --- a/include/linux/perf_event.h > +++ b/include/linux/perf_event.h > @@ -579,6 +579,7 @@ struct perf_event { > struct list_head group_entry; > struct list_head event_entry; > struct list_head sibling_list; > + struct list_head pevent_entry; > int nr_siblings; > int group_flags; > struct perf_event *group_leader; > diff --git a/kernel/perf_event.c b/kernel/perf_event.c > index 3d1552d..84f2f36 100644 > --- a/kernel/perf_event.c > +++ b/kernel/perf_event.c > @@ -72,6 +72,11 @@ static atomic64_t perf_event_id; > static DEFINE_SPINLOCK(perf_resource_lock); > > /* > + * persistent events which are always on > + */ > +DEFINE_PER_CPU(struct list_head, persistent_events); > + > +/* > * Architecture provided APIs - weak aliases: > */ > extern __weak const struct pmu *hw_perf_event_init(struct perf_event *event) > @@ -1017,12 +1022,6 @@ static int perf_event_refresh(struct perf_event *event, int refresh) > return 0; > } > > -enum event_type_t { > - EVENT_FLEXIBLE = 0x1, > - EVENT_PINNED = 0x2, > - EVENT_ALL = EVENT_FLEXIBLE | EVENT_PINNED, > -}; > - > static void ctx_sched_out(struct perf_event_context *ctx, > struct perf_cpu_context *cpuctx, > enum event_type_t event_type) > @@ -5385,6 +5384,8 @@ static void __init perf_event_init_all_cpus(void) > for_each_possible_cpu(cpu) { > cpuctx = &per_cpu(perf_cpu_context, cpu); > __perf_event_init_context(&cpuctx->ctx, NULL); > + > + INIT_LIST_HEAD(&per_cpu(persistent_events, cpu)); > } > } > > @@ -5405,12 +5406,16 @@ static void __perf_event_exit_cpu(void *info) > struct perf_cpu_context *cpuctx = &__get_cpu_var(perf_cpu_context); > struct perf_event_context *ctx = &cpuctx->ctx; > struct perf_event *event, *tmp; > + struct list_head *pers_events_list = &__get_cpu_var(persistent_events); > > list_for_each_entry_safe(event, tmp, &ctx->pinned_groups, group_entry) > __perf_event_remove_from_context(event); > list_for_each_entry_safe(event, tmp, &ctx->flexible_groups, group_entry) > __perf_event_remove_from_context(event); > + list_for_each_entry_safe(event, tmp, pers_events_list, pevent_entry) > + __perf_event_remove_from_context(event); > } > + > static void perf_event_exit_cpu(int cpu) > { > struct perf_cpu_context *cpuctx = &per_cpu(perf_cpu_context, cpu); > @@ -5456,6 +5461,46 @@ static struct notifier_block __cpuinitdata perf_cpu_nb = { > .priority = 20, > }; > > +static void __init perf_init_persistent_events(void) > +{ > + > + struct ftrace_event_call *call; > + struct perf_event_attr attr; > + struct perf_event *event; > + int cpu; > + > + list_for_each_entry(call, &ftrace_events, list) { > + > + if (call->type != EVENT_PERSISTENT) > + continue; > + > + attr.type = PERF_TYPE_TRACEPOINT, > + attr.config = call->id, > + attr.size = sizeof(attr), > + > + get_online_cpus(); > + > + for_each_online_cpu(cpu) { > + struct list_head *list; > + > + event = perf_event_create_kernel_counter(&attr, cpu, -1, NULL); > + if (IS_ERR(event)) { > + printk(KERN_ERR "Error initializing persistent " > + "event %s on cpu %d\n", > + call->name, cpu); > + break; > + } > + > + list = &per_cpu(persistent_events, cpu); > + list_add(&event->pevent_entry, list); > + > + perf_event_enable(event); > + > + } > + put_online_cpus(); > + } > +} > + > void __init perf_event_init(void) > { > perf_event_init_all_cpus(); > @@ -5464,6 +5509,8 @@ void __init perf_event_init(void) > perf_cpu_notify(&perf_cpu_nb, (unsigned long)CPU_ONLINE, > (void *)(long)smp_processor_id()); > register_cpu_notifier(&perf_cpu_nb); > + > + perf_init_persistent_events(); > } > > static ssize_t perf_show_reserve_percpu(struct sysdev_class *class, > diff --git a/kernel/trace/trace.h b/kernel/trace/trace.h > index 2825ef2..95f5611 100644 > --- a/kernel/trace/trace.h > +++ b/kernel/trace/trace.h > @@ -786,7 +786,6 @@ filter_check_discard(struct ftrace_event_call *call, void *rec, > } > > extern struct mutex event_mutex; > -extern struct list_head ftrace_events; > > extern const char *__start___trace_bprintk_fmt[]; > extern const char *__stop___trace_bprintk_fmt[]; > -- > 1.7.1 > > -- Regards/Gruss, Boris. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on 23 May 2010 14:20 On Sat, 2010-05-22 at 21:00 +0200, Borislav Petkov wrote: > Register and enable events marked as persistent right after perf events > has initialized. > > Not-yet-signed-off-by: Borislav Petkov <bp(a)alien8.de> Nah, this is totally wrong. A persistent event would simply be a regular event, but created by the kernel and not tied to a file-desc's lifetime. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Borislav Petkov on 23 May 2010 14:40 From: Peter Zijlstra <peterz(a)infradead.org> Date: Sun, May 23, 2010 at 08:15:13PM +0200 > On Sat, 2010-05-22 at 21:00 +0200, Borislav Petkov wrote: > > Register and enable events marked as persistent right after perf events > > has initialized. > > > > Not-yet-signed-off-by: Borislav Petkov <bp(a)alien8.de> > > Nah, this is totally wrong. > > A persistent event would simply be a regular event, but created by the > kernel and not tied to a file-desc's lifetime. So you're saying the trace_mce_record() tracepoint for example should be created completely internally in the kernel and cease to be a tracepoint? Will it still be able to be selected by perf -e? Please elaborate. -- Regards/Gruss, Boris. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Peter Zijlstra on 23 May 2010 14:50 On Sun, 2010-05-23 at 20:33 +0200, Borislav Petkov wrote: > From: Peter Zijlstra <peterz(a)infradead.org> > Date: Sun, May 23, 2010 at 08:15:13PM +0200 > > > On Sat, 2010-05-22 at 21:00 +0200, Borislav Petkov wrote: > > > Register and enable events marked as persistent right after perf events > > > has initialized. > > > > > > Not-yet-signed-off-by: Borislav Petkov <bp(a)alien8.de> > > > > Nah, this is totally wrong. > > > > A persistent event would simply be a regular event, but created by the > > kernel and not tied to a file-desc's lifetime. > > So you're saying the trace_mce_record() tracepoint for example should > be created completely internally in the kernel and cease to be a > tracepoint? Will it still be able to be selected by perf -e? No, it should be a regular tracepoint as far as tracepoints are concerned. But the only thing persistence should add is an instance of a perf_event, it should not modify either the perf_event nor the tracepoint code. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Borislav Petkov on 23 May 2010 15:00
From: Peter Zijlstra <peterz(a)infradead.org> Date: Sun, May 23, 2010 at 08:40:47PM +0200 > > > A persistent event would simply be a regular event, but created by the > > > kernel and not tied to a file-desc's lifetime. > > > > So you're saying the trace_mce_record() tracepoint for example should > > be created completely internally in the kernel and cease to be a > > tracepoint? Will it still be able to be selected by perf -e? > > No, it should be a regular tracepoint as far as tracepoints are > concerned. > > But the only thing persistence should add is an instance of a > perf_event, it should not modify either the perf_event nor the > tracepoint code. which means that subsystems which initialize earlier than perf (mce, for example) should have to be notified when perf is ready so that they could register a persistent event. How does that sound? -- Regards/Gruss, Boris. -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |