From: Eric Dumazet on 30 Sep 2009 11:30 Arjan van de Ven a �crit : > On Tue, 29 Sep 2009 14:56:28 -0700 (PDT) > Linus Torvalds <torvalds(a)linux-foundation.org> wrote: > >> >> On Tue, 29 Sep 2009, Arjan van de Ven wrote: >>> can't we use alternatives() for this, to patch cmpxchg64 in ? >>> I mean.. it'll be commonly supported nowadays.. the fallback to it >>> not being supported could be a bit slower by now... >> Yes, we could. It would limit us to some fixed address format, >> probably >> >> cmpxchg8b (%esi) >> >> or something. Use something like this as a starting point, perhaps? >> >> NOTE! Totally untested! And you'd actually need to implement the >> "cmpxchg8b_emu" function that takes it's arguments in %eax:%edx, >> %ebx:%ecx and %esi and doesn't trash any other registers.. > > so I debugged this guy (had a few bugs ;-) > > patch, including a new cmpxchg8b_emu below: > > From 5a76986c5dd272ea16a9b8abb7349ff3d6791c2b Mon Sep 17 00:00:00 2001 > From: Arjan van de Ven <arjan(a)linux.intel.com> > Date: Wed, 30 Sep 2009 17:04:35 +0200 > Subject: [PATCH] x86: Provide an alternative() based cmpxchg64() > > Based on Linus' patch, this patch provides cmpxchg64() using > the alternative() infrastructure. > > Note: the fallback is NOT smp safe, just like the current fallback > is not SMP safe. > > Signed-off-by: Arjan van de Ven <arjan(a)linux.intel.com> > --- > arch/x86/include/asm/cmpxchg_32.h | 29 ++++++++++-------- > arch/x86/kernel/i386_ksyms_32.c | 3 ++ > arch/x86/lib/Makefile | 2 +- > arch/x86/lib/cmpxchg8b_emu.S | 61 +++++++++++++++++++++++++++++++++++++ > 4 files changed, 81 insertions(+), 14 deletions(-) > create mode 100644 arch/x86/lib/cmpxchg8b_emu.S > > diff --git a/arch/x86/include/asm/cmpxchg_32.h b/arch/x86/include/asm/cmpxchg_32.h > index 82ceb78..3b21afa 100644 > --- a/arch/x86/include/asm/cmpxchg_32.h > +++ b/arch/x86/include/asm/cmpxchg_32.h > @@ -312,19 +312,22 @@ static inline unsigned long cmpxchg_386(volatile void *ptr, unsigned long old, > > extern unsigned long long cmpxchg_486_u64(volatile void *, u64, u64); > > -#define cmpxchg64(ptr, o, n) \ > -({ \ > - __typeof__(*(ptr)) __ret; \ > - if (likely(boot_cpu_data.x86 > 4)) \ > - __ret = (__typeof__(*(ptr)))__cmpxchg64((ptr), \ > - (unsigned long long)(o), \ > - (unsigned long long)(n)); \ > - else \ > - __ret = (__typeof__(*(ptr)))cmpxchg_486_u64((ptr), \ > - (unsigned long long)(o), \ > - (unsigned long long)(n)); \ > - __ret; \ > -}) > +#define cmpxchg64(ptr, o, n) \ > +({ \ > + __typeof__(*(ptr)) __ret; \ > + __typeof__(*(ptr)) __old = (o); \ > + __typeof__(*(ptr)) __new = (n); \ > + alternative_io("call cmpxchg8b_emu", \ > + "lock; cmpxchg8b (%%esi)" , \ > + X86_FEATURE_CX8, \ > + "=A" (__ret), \ > + "S" ((ptr)), "0" (__old), \ > + "b" ((unsigned int)__new), \ > + "c" ((unsigned int)(__new>>32))); \ > + __ret; }) > + > + > + > #define cmpxchg64_local(ptr, o, n) \ > ({ \ > __typeof__(*(ptr)) __ret; \ > diff --git a/arch/x86/kernel/i386_ksyms_32.c b/arch/x86/kernel/i386_ksyms_32.c > index 43cec6b..f17930e 100644 > --- a/arch/x86/kernel/i386_ksyms_32.c > +++ b/arch/x86/kernel/i386_ksyms_32.c > @@ -10,6 +10,9 @@ > EXPORT_SYMBOL(mcount); > #endif > > +extern void cmpxchg8b_emu(void); /* dummy proto */ > +EXPORT_SYMBOL(cmpxchg8b_emu); > + > /* Networking helper routines. */ > EXPORT_SYMBOL(csum_partial_copy_generic); > > diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile > index 9e60920..3e549b8 100644 > --- a/arch/x86/lib/Makefile > +++ b/arch/x86/lib/Makefile > @@ -15,7 +15,7 @@ ifeq ($(CONFIG_X86_32),y) > obj-y += atomic64_32.o > lib-y += checksum_32.o > lib-y += strstr_32.o > - lib-y += semaphore_32.o string_32.o > + lib-y += semaphore_32.o string_32.o cmpxchg8b_emu.o > > lib-$(CONFIG_X86_USE_3DNOW) += mmx_32.o > else > diff --git a/arch/x86/lib/cmpxchg8b_emu.S b/arch/x86/lib/cmpxchg8b_emu.S > new file mode 100644 > index 0000000..b8af4c7 > --- /dev/null > +++ b/arch/x86/lib/cmpxchg8b_emu.S > @@ -0,0 +1,61 @@ > +/* > + * This program is free software; you can redistribute it and/or > + * modify it under the terms of the GNU General Public License > + * as published by the Free Software Foundation; version 2 > + * of the License. > + * > + */ > + > +#include <linux/linkage.h> > +#include <asm/alternative-asm.h> > +#include <asm/frame.h> > +#include <asm/dwarf2.h> > + > + > +.text > + > +/* > + * Inputs: > + * %esi : memory location to compare > + * %eax : low 32 bits of old value > + * %edx : high 32 bits of old value > + * %ebx : low 32 bits of new value > + * %ecx : high 32 bits of new value > + */ > +ENTRY(cmpxchg8b_emu) > + CFI_STARTPROC > + > + push %edi > + push %ebx > + push %ecx > + /* disable interrupts */ > + pushf > + pop %edi Why do you pop flags in edi, to later re-push them ? > + cli > + > + cmpl %edx, 4(%esi) > + jne 1f > + cmpl %eax, (%esi) > + jne 1f > + So we dont have irq fro this cpu, ok. And other cpus allowed, and xchg implies lock prefix, ok. > + xchg (%esi), %ebx > + xchg 4(%esi), %ecx How this sequence is guaranteed to be atomic with other cpus ? If it is a !SMP implementation, then you could replace xchg by mov instructions. mov %ebx,(%esi) mov %ecx,4(%esi) > + mov %ebx, %eax > + mov %ecx, %edx and avoid these of course > + > +2: > + /* restore interrupts */ > + push %edi > + popf > + > + pop %ecx > + pop %ebx > + pop %edi > + ret > +1: > + mov (%esi), %eax > + mov 4(%esi), %edx > + jmp 2b > + CFI_ENDPROC > +ENDPROC(cmpxchg8b_emu) > + So I suggest : ENTRY(cmpxchg8b_emu) CFI_STARTPROC /* disable interrupts */ pushf cli cmpl %eax,(%esi) jne 1f cmpl %edx,4(%esi) jne 2f mov %ebx,(%esi) mov %ecx,4(%esi) /* restore interrupts */ popf ret 1: mov (%esi), %eax 2: mov 4(%esi), %edx popf ret CFI_ENDPROC ENDPROC(cmpxchg8b_emu) -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Arjan van de Ven on 30 Sep 2009 11:40 On Wed, 30 Sep 2009 17:27:05 +0200 Eric Dumazet <eric.dumazet(a)gmail.com> wrote: > > > + pop %edi > Why do you pop flags in edi, to later re-push them ? > > > + cli because here I disable interrupts (basically this is local_irq_save() ) > > > > + xchg (%esi), %ebx > > + xchg 4(%esi), %ecx > How this sequence is guaranteed to be atomic with other cpus ? it is not. this is the 486 implementation which is !SMP (just like the current cmpxchg64() fallback) > > If it is a !SMP implementation, then you could replace xchg by mov > instructions. that is not equivalent. I need to also store the old values and return them.... > > So I suggest : > > > ENTRY(cmpxchg8b_emu) > CFI_STARTPROC > > /* disable interrupts */ > pushf > cli > > cmpl %eax,(%esi) > jne 1f > cmpl %edx,4(%esi) > jne 2f > > mov %ebx,(%esi) > mov %ecx,4(%esi) this is not equivalent since you don't return the "prev" value. -- Arjan van de Ven Intel Open Source Technology Centre For development, discussion and tips for power savings, visit http://www.lesswatts.org -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Eric Dumazet on 30 Sep 2009 12:00 Arjan van de Ven a �crit : > On Tue, 29 Sep 2009 14:56:28 -0700 (PDT) > Linus Torvalds <torvalds(a)linux-foundation.org> wrote: > >> >> On Tue, 29 Sep 2009, Arjan van de Ven wrote: >>> can't we use alternatives() for this, to patch cmpxchg64 in ? >>> I mean.. it'll be commonly supported nowadays.. the fallback to it >>> not being supported could be a bit slower by now... >> Yes, we could. It would limit us to some fixed address format, >> probably >> >> cmpxchg8b (%esi) >> >> or something. Use something like this as a starting point, perhaps? >> >> NOTE! Totally untested! And you'd actually need to implement the >> "cmpxchg8b_emu" function that takes it's arguments in %eax:%edx, >> %ebx:%ecx and %esi and doesn't trash any other registers.. > > so I debugged this guy (had a few bugs ;-) > > patch, including a new cmpxchg8b_emu below: > >>From 5a76986c5dd272ea16a9b8abb7349ff3d6791c2b Mon Sep 17 00:00:00 2001 > From: Arjan van de Ven <arjan(a)linux.intel.com> > Date: Wed, 30 Sep 2009 17:04:35 +0200 > Subject: [PATCH] x86: Provide an alternative() based cmpxchg64() > > Based on Linus' patch, this patch provides cmpxchg64() using > the alternative() infrastructure. > > Note: the fallback is NOT smp safe, just like the current fallback > is not SMP safe. > > Signed-off-by: Arjan van de Ven <arjan(a)linux.intel.com> > +#define cmpxchg64(ptr, o, n) \ > +({ \ > + __typeof__(*(ptr)) __ret; \ > + __typeof__(*(ptr)) __old = (o); \ > + __typeof__(*(ptr)) __new = (n); \ > + alternative_io("call cmpxchg8b_emu", \ > + "lock; cmpxchg8b (%%esi)" , \ > + X86_FEATURE_CX8, \ > + "=A" (__ret), \ > + "S" ((ptr)), "0" (__old), \ > + "b" ((unsigned int)__new), \ > + "c" ((unsigned int)(__new>>32))); \ Note: lock; cmpxchg8b (%%esi) gives 4 bytes opcode : f0 0f c7 0e Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added. Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ? -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Linus Torvalds on 30 Sep 2009 12:20 On Wed, 30 Sep 2009, Eric Dumazet wrote: > > lock; cmpxchg8b (%%esi) > > gives 4 bytes opcode : f0 0f c7 0e > Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added. > > Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ? And if you want to be really clever, you would want to handle the non-SMP case too, where you have just "cmpxchgb (%%esi)" (three bytes) without the lock prefix. However, at this point I think Arjan's patch is already way superior to what we have now (feel free to take a look at what we generate on 32-bit without PAE today - just have a barf-bag handy), so all I'd really want is a few "tested-by"s to make me feel happier about it, and a few more people looking at the emulation routine to all say "ok, looks sane, ACK". And at that point we can then either make "cmpxchg()" just do the 8-byte case natively, or just take your patch to change sched_clock.c to now use the no-longer-entirely-disgusting cmpxchg64(). Ingo - I suspect both those patches should just go through you. You do both x86 and scheduler, so I'll happily pull the end result. Linus -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Cyrill Gorcunov on 30 Sep 2009 12:20
[Eric Dumazet - Wed, Sep 30, 2009 at 05:57:25PM +0200] .... | | > +#define cmpxchg64(ptr, o, n) \ | > +({ \ | > + __typeof__(*(ptr)) __ret; \ | > + __typeof__(*(ptr)) __old = (o); \ | > + __typeof__(*(ptr)) __new = (n); \ | > + alternative_io("call cmpxchg8b_emu", \ | > + "lock; cmpxchg8b (%%esi)" , \ | > + X86_FEATURE_CX8, \ | > + "=A" (__ret), \ | > + "S" ((ptr)), "0" (__old), \ | > + "b" ((unsigned int)__new), \ | > + "c" ((unsigned int)(__new>>32))); \ | | | Note: | | lock; cmpxchg8b (%%esi) | | gives 4 bytes opcode : f0 0f c7 0e | Because alternative (call cmpxchg8b_emu) uses 5 bytes, a nop will be added. | | Choosing ".byte 0xf0, 0x0f, 0xc7, 0x4e, 0x00" aka "lock cmpxchg8b 0x0(%esi)" is a litle bit better ? | Just curious why not "nop; lock; cmpxchg8b (%esi)"? lock itself is destructive instruction with causes write buffers to flush data back and NOP itself will be discarded by cpu internals so I suppose this form should be better. Though I could miss something, and OTOH it's not a big deal. But still curious :) -- Cyrill -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |