P4 PMU early draft [Kernel]

Prev: block: add sysfs lockdep class for iosched
Next: Race in ptrace.

From: Cyrill Gorcunov on 8 Feb 2010 17:00

On Mon, Feb 08, 2010 at 09:45:04PM +0300, Cyrill Gorcunov wrote:
> Hi all,
>
....
> All in one -- If you have some spare minutes, please take a glance. I can't
> say I like this code -- it's overcomplicated and I fear hard to understand.
> and still a bit buggy. Complains are welcome!
>
....

Just updated some snippets, here is an interdiff on top of previous post
(just to not post too much). The key moment is to use cpu in new
x86_pmu.schedule_events routine. This will allow to find if hw_perf_event::config
has been migrated to a different logical cpu if HT is turned on. On non-HT
machine it will have no effect. As result we should swap the ESCR+CCCR thread
specific flags I think.

-- Cyrill
---

diff -u linux-2.6.git/arch/x86/include/asm/perf_p4.h linux-2.6.git/arch/x86/include/asm/perf_p4.h
--- linux-2.6.git/arch/x86/include/asm/perf_p4.h
+++ linux-2.6.git/arch/x86/include/asm/perf_p4.h
@@ -120,7 +120,7 @@
return !!((P4_EVENT_UNPACK_SELECTOR(config)) & P4_CCCR_CASCADE);
}

-static inline bool p4_is_ht_slave(u64 config)
+static inline int p4_is_ht_slave(u64 config)
{
return !!(config & P4_CONFIG_HT);
}
@@ -146,7 +146,8 @@
static inline int p4_ht_thread(int cpu)
{
#ifdef CONFIG_SMP
- return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
+ if (smp_num_siblings == 2)
+ return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map));
#endif
return 0;
}
diff -u linux-2.6.git/arch/x86/kernel/cpu/perf_event.c linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
--- linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
+++ linux-2.6.git/arch/x86/kernel/cpu/perf_event.c
@@ -141,7 +141,7 @@
u64 max_period;
u64 intel_ctrl;
int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc);
- int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign);
+ int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign, int cpu);
void (*enable_bts)(u64 config);
void (*disable_bts)(void);

@@ -1236,7 +1236,7 @@
}

/*
- * offset: 0 - BSP thread, 1 - secondary thread
+ * offset: 0,1 - HT threads
* used in HT enabled cpu
*/
struct p4_event_template {
@@ -1254,19 +1254,6 @@
} p4_pmu_config;

/*
- * Netburst is heavily constrained :(
- */
-#if 0
-#define P4_EVENT_CONSTRAINT(c, n) \
- EVENT_CONSTRAINT(c, n, (P4_EVNTSEL_MASK | P4_CCCR_MASK))
-
-static struct event_constraint p4_event_constraints[] =
-{
- EVENT_CONSTRAINT_END
-};
-#endif
-
-/*
* CCCR1 doesn't have a working enable bit so do not use it ever
*
* Also as only we start to support raw events we will need to
@@ -1382,6 +1369,11 @@
return config;
}

+/*
+ * note-to-self: this could be a bottleneck and we may need some hash structure
+ * based on "packed" event CRC, currently even if we may almost ideal
+ * hashing we will still have 5 intersected opcodes, introduce kind of salt?
+ */
static struct p4_event_template *p4_pmu_template_lookup(u64 config)
{
u32 opcode = p4_config_unpack_opcode(config);
@@ -1411,6 +1403,12 @@
{
int cpu = smp_processor_id();

+ /*
+ * the reason we use cpu that early is that if we get scheduled
+ * first time on the same cpu -- we will not need swap thread
+ * specific flags in config which will save some cycles
+ */
+
/* CCCR by default */
hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu));

@@ -1522,15 +1520,26 @@
return handled;
}

+/* swap some thread specific flags in cofing */
+static u64 p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu)
+{
+ u64 conf = hwc->config;
+
+ if ((p4_is_ht_slave(hwc->config) ^ p4_ht_thread(cpu))) {
+ /* FIXME: swap them here */
+ }
+
+ return conf;
+}
+
/*
* Netburst has a quite constrained architecture
*/
-static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
{
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
struct p4_event_template *tpl;
struct hw_perf_event *hwc;
- int cpu = smp_processor_id();
int thread;
int i, j, num = 0;

@@ -1678,7 +1687,8 @@
return event->pmu == &pmu;
}

-static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign)
+/* we don't use cpu argument here at all */
+static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign, int cpu)
{
struct event_constraint *c, *constraints[X86_PMC_IDX_MAX];
unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)];
@@ -2143,7 +2153,7 @@
if (n < 0)
return n;

- ret = x86_schedule_events(cpuc, n, assign);
+ ret = x86_pmu.schedule_events(cpuc, n, assign, 0);
if (ret)
return ret;
/*
@@ -2660,7 +2670,7 @@
if (n0 < 0)
return n0;

- ret = x86_pmu.schedule_events(cpuc, n0, assign);
+ ret = x86_pmu.schedule_events(cpuc, n0, assign, cpu);
if (ret)
return ret;

@@ -3070,6 +3080,7 @@
{
struct perf_event *leader = event->group_leader;
struct cpu_hw_events *fake_cpuc;
+ int cpu = smp_processor_id();
int ret, n;

ret = -ENOMEM;
@@ -3095,7 +3106,7 @@

fake_cpuc->n_events = n;

- ret = x86_pmu.schedule_events(fake_cpuc, n, NULL);
+ ret = x86_pmu.schedule_events(fake_cpuc, n, NULL, cpu);

out_free:
kfree(fake_cpuc);
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Ingo Molnar on 8 Feb 2010 23:20

* Cyrill Gorcunov <gorcunov(a)gmail.com> wrote:

> Hi all,
>
> first of all the patches are NOT for any kind of inclusion. It's not ready
> yet. More likely I'm asking for glance review, ideas, criticism.

A quick question: does the code produce something on a real P4? (possibly
only running with a single event - but even that would be enough.)

> The main problem in implementing P4 PMU is that it has much more
> restrictions for event to MSR mapping. [...]

One possibly simpler approach might be to represent the P4 PMU via a maximum
_two_ generic events only.

Last i looked at the many P4 events, i've noticed that generally you can
create any two events. (with a few exceptions) Once you start trying to take
advantage of the more than a dozen seemingly separate counters, additional
non-trivial constraints apply.

So if we only allowed a maximum of _two_ generic events (like say a simple
Core2 has, so it's not a big restriction at all), we wouldnt have to map all
the constraints, we'd only have to encode the specific event-to-MSR details.
(which alone is quite a bit of work as well.)

We could also use the new constraints code to map them all, of course - it
will certainly be more complex to implement.

Hm?

Ingo
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Paul Mackerras on 8 Feb 2010 23:30

On Mon, Feb 08, 2010 at 09:45:04PM +0300, Cyrill Gorcunov wrote:

> The main problem in implementing P4 PMU is that it has much more
> restrictions for event to MSR mapping. So to fit into current
> perf_events model I made the following:

Is there somewhere accessible on the web where I can read about the P4
PMU? I'm interested to see if the constraint representation and
search I used on the POWER processors would be applicable.

Paul.
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Cyrill Gorcunov on 9 Feb 2010 02:00

On 2/9/10, Ingo Molnar <mingo(a)elte.hu> wrote:
>
> * Cyrill Gorcunov <gorcunov(a)gmail.com> wrote:
>
>> Hi all,
>>
>> first of all the patches are NOT for any kind of inclusion. It's not ready
>>
>> yet. More likely I'm asking for glance review, ideas, criticism.
>
> A quick question: does the code produce something on a real P4? (possibly
> only running with a single event - but even that would be enough.)

not yet, a few code snippets need to be added into scheduling routine,
hope to finish this today evening

>
>> The main problem in implementing P4 PMU is that it has much more
>> restrictions for event to MSR mapping. [...]
>
> One possibly simpler approach might be to represent the P4 PMU via a maximum
> _two_ generic events only.
>

yeah, good idea!

> Last i looked at the many P4 events, i've noticed that generally you can
> create any two events. (with a few exceptions) Once you start trying to take
> advantage of the more than a dozen seemingly separate counters, additional
> non-trivial constraints apply.
>
> So if we only allowed a maximum of _two_ generic events (like say a simple
> Core2 has, so it's not a big restriction at all), we wouldnt have to map all
> the constraints, we'd only have to encode the specific event-to-MSR details.
> (which alone is quite a bit of work as well.)
>
> We could also use the new constraints code to map them all, of course - it
> will certainly be more complex to implement.
>

i thought about it, but i didn't like the result code ;) will think about it.

> Hm?
>
> Ingo
>
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

From: Cyrill Gorcunov on 9 Feb 2010 02:00

On 2/9/10, Paul Mackerras <paulus(a)samba.org> wrote:
> On Mon, Feb 08, 2010 at 09:45:04PM +0300, Cyrill Gorcunov wrote:
>
>> The main problem in implementing P4 PMU is that it has much more
>> restrictions for event to MSR mapping. So to fit into current
>> perf_events model I made the following:
>
> Is there somewhere accessible on the web where I can read about the P4
> PMU? I'm interested to see if the constraint representation and
> search I used on the POWER processors would be applicable.
>
> Paul.
>

I've been using intel sdm, from intel.com
--
To unsubscribe from this list: send the line "unsubscribe linux-kernel" in
the body of a message to majordomo(a)vger.kernel.org
More majordomo info at http://vger.kernel.org/majordomo-info.html
Please read the FAQ at http://www.tux.org/lkml/

| Next | Last
Pages: 1 2 3 4
Prev: block: add sysfs lockdep class for iosched
Next: Race in ptrace.