Prev: [PATCH 3/4, v2] x86: enlightenment for ticket spin locks - eliminate NOPs introduced by first patch
Next: [PATCH 1/4, v2] x86: enlightenment for ticket spin locks - base implementation
From: Jan Beulich on 29 Jun 2010 10:40 Use the (alternative instructions based) callout hooks to the ticket spinlock code to enlighten ticket locks when running fully virtualized on Xen. Ultimately, this code might also be a candidate to be used when running para-virtualized. Signed-off-by: Jan Beulich <jbeulich(a)novell.com> Cc: Jeremy Fitzhardinge <jeremy.fitzhardinge(a)citrix.com> Cc: KY Srinivasan <ksrinivasan(a)novell.com> --- arch/x86/include/asm/hypervisor.h | 1 arch/x86/include/asm/spinlock_types.h | 17 +- arch/x86/include/asm/xen/cpuid.h | 68 ++++++++ arch/x86/kernel/cpu/Makefile | 2 arch/x86/kernel/cpu/hypervisor.c | 1 arch/x86/kernel/cpu/xen.c | 269 ++++++++++++++++++++++++++++++++++ 6 files changed, 355 insertions(+), 3 deletions(-) --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/hypervisor.h +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/hypervisor.h @@ -45,5 +45,6 @@ extern const struct hypervisor_x86 *x86_ /* Recognized hypervisors */ extern const struct hypervisor_x86 x86_hyper_vmware; extern const struct hypervisor_x86 x86_hyper_ms_hyperv; +extern const struct hypervisor_x86 x86_hyper_xen; #endif --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/include/asm/spinlock_types.h +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/spinlock_types.h @@ -5,11 +5,24 @@ # error "please don't include this file directly" #endif +#include <asm/types.h> + typedef struct arch_spinlock { - unsigned int slock; + union { + unsigned int slock; +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS + struct { +# if CONFIG_NR_CPUS < 256 + u8 cur, seq; +# else + u16 cur, seq; +# endif + }; +#endif + }; } arch_spinlock_t; -#define __ARCH_SPIN_LOCK_UNLOCKED { 0 } +#define __ARCH_SPIN_LOCK_UNLOCKED { { 0 } } typedef struct { unsigned int lock; --- /dev/null +++ 2.6.35-rc3-virt-spinlocks/arch/x86/include/asm/xen/cpuid.h @@ -0,0 +1,68 @@ +/****************************************************************************** + * arch-x86/cpuid.h + * + * CPUID interface to Xen. + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING + * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER + * DEALINGS IN THE SOFTWARE. + * + * Copyright (c) 2007 Citrix Systems, Inc. + * + * Authors: + * Keir Fraser <keir.fraser(a)citrix.com> + */ + +#ifndef __XEN_PUBLIC_ARCH_X86_CPUID_H__ +#define __XEN_PUBLIC_ARCH_X86_CPUID_H__ + +/* Xen identification leaves start at 0x40000000. */ +#define XEN_CPUID_FIRST_LEAF 0x40000000 +#define XEN_CPUID_LEAF(i) (XEN_CPUID_FIRST_LEAF + (i)) + +/* + * Leaf 1 (0x40000000) + * EAX: Largest Xen-information leaf. All leaves up to an including @EAX + * are supported by the Xen host. + * EBX-EDX: "XenVMMXenVMM" signature, allowing positive identification + * of a Xen host. + */ +#define XEN_CPUID_SIGNATURE_EBX 0x566e6558 /* "XenV" */ +#define XEN_CPUID_SIGNATURE_ECX 0x65584d4d /* "MMXe" */ +#define XEN_CPUID_SIGNATURE_EDX 0x4d4d566e /* "nVMM" */ + +/* + * Leaf 2 (0x40000001) + * EAX[31:16]: Xen major version. + * EAX[15: 0]: Xen minor version. + * EBX-EDX: Reserved (currently all zeroes). + */ + +/* + * Leaf 3 (0x40000002) + * EAX: Number of hypercall transfer pages. This register is always guaranteed + * to specify one hypercall page. + * EBX: Base address of Xen-specific MSRs. + * ECX: Features 1. Unused bits are set to zero. + * EDX: Features 2. Unused bits are set to zero. + */ + +/* Does the host support MMU_PT_UPDATE_PRESERVE_AD for this guest? */ +#define _XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD 0 +#define XEN_CPUID_FEAT1_MMU_PT_UPDATE_PRESERVE_AD (1u<<0) + +#endif /* __XEN_PUBLIC_ARCH_X86_CPUID_H__ */ --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/Makefile +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/Makefile @@ -14,7 +14,7 @@ CFLAGS_common.o := $(nostackp) obj-y := intel_cacheinfo.o addon_cpuid_features.o obj-y += proc.o capflags.o powerflags.o common.o -obj-y += vmware.o hypervisor.o sched.o mshyperv.o +obj-y += vmware.o xen.o hypervisor.o sched.o mshyperv.o obj-$(CONFIG_X86_32) += bugs.o cmpxchg.o obj-$(CONFIG_X86_64) += bugs_64.o --- 2.6.35-rc3-virt-spinlocks.orig/arch/x86/kernel/cpu/hypervisor.c +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/hypervisor.c @@ -43,6 +43,7 @@ static const __initconst struct hypervis { &x86_hyper_vmware, &x86_hyper_ms_hyperv, + &x86_hyper_xen, }; const struct hypervisor_x86 *x86_hyper; --- /dev/null +++ 2.6.35-rc3-virt-spinlocks/arch/x86/kernel/cpu/xen.c @@ -0,0 +1,269 @@ +#define __XEN_INTERFACE_VERSION__ 0x00030207 +#include <linux/bootmem.h> +#include <linux/init.h> +#include <linux/module.h> +#include <linux/percpu.h> +#include <linux/slab.h> +#include <linux/smp.h> +#include <linux/spinlock.h> +#include <linux/stringify.h> +#include <asm/sync_bitops.h> +#include <asm/hypervisor.h> +#include <asm/xen/cpuid.h> +#include <asm/xen/hypercall.h> +#include <xen/interface/event_channel.h> +#include <xen/interface/memory.h> +#include <xen/interface/vcpu.h> + +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS +struct spinning { + struct arch_spinlock *lock; + unsigned int ticket; + struct spinning *prev; +}; + +static struct shared_info *__read_mostly xen_shared_info; +EXPORT_SYMBOL_GPL(xen_shared_info); + +static DEFINE_PER_CPU(struct vcpu_runstate_info, runstate); +static DEFINE_PER_CPU(evtchn_port_t, poll_evtchn); +static DEFINE_PER_CPU(struct spinning *, _spinning); +/* + * Protect removal of objects: Insertion can be done lockless, and even + * removal itself doesn't need protection - what needs to be prevented is + * removed objects going out of scope (as they're living on the stack). + */ +static DEFINE_PER_CPU(arch_rwlock_t, spinning_rm_lock) = __ARCH_RW_LOCK_UNLOCKED; + +static unsigned int __read_mostly spin_count = 1000; +static int __init setup_spin_count(char *s) +{ + if (!s) + return -EINVAL; + spin_count = simple_strtoul(s, &s, 0); + return !*s ? 0 : -EINVAL; +} +early_param("spin_count", setup_spin_count); + +#ifndef CONFIG_XEN +__asm__(".pushsection .text, \"ax\", @progbits\n" + ".p2align " __stringify(PAGE_SHIFT) "\n" + "hypercall_page:\n" + ".skip 1 << " __stringify(PAGE_SHIFT) "\n" + ".popsection"); +#endif + +static void xen_set_cpu_features(struct cpuinfo_x86 *); + +static void xen_spin_lock(struct arch_spinlock *lock, unsigned int token) +{ + arch_rwlock_t *rm_lock; + unsigned long flags; + unsigned int count; + struct spinning spinning; + + if (unlikely(percpu_read(runstate.state) != RUNSTATE_running)) + xen_set_cpu_features(&__get_cpu_var(cpu_info)); + +#if TICKET_SHIFT == 8 + token >>= TICKET_SHIFT; +#endif + spinning.ticket = token; + spinning.lock = lock; + spinning.prev = percpu_read(_spinning); + smp_wmb(); + percpu_write(_spinning, &spinning); + + sync_clear_bit(percpu_read(poll_evtchn), + xen_shared_info->evtchn_pending); + + for (count = spin_count; ({ barrier(); lock->cur != token; }); ) + if (likely(cpu_online(raw_smp_processor_id())) + && unlikely(!--count)) { + struct sched_poll sched_poll; + + set_xen_guest_handle(sched_poll.ports, + &__get_cpu_var(poll_evtchn)); + sched_poll.nr_ports = 1; + sched_poll.timeout = 0; + HYPERVISOR_sched_op(SCHEDOP_poll, &sched_poll); + count = spin_count; + } else + cpu_relax(); + + /* + * If we interrupted another spinlock while it was blocking, make + * sure it doesn't block (again) without re-checking the lock. + */ + if (spinning.prev) + sync_set_bit(percpu_read(poll_evtchn), + xen_shared_info->evtchn_pending); + + percpu_write(_spinning, spinning.prev); + rm_lock = &__get_cpu_var(spinning_rm_lock); + raw_local_irq_save(flags); + arch_write_lock(rm_lock); + arch_write_unlock(rm_lock); + raw_local_irq_restore(flags); +} + +static void xen_spin_unlock(struct arch_spinlock *lock, unsigned int token) +{ + unsigned int cpu; + + token &= (1U << TICKET_SHIFT) - 1; + for_each_online_cpu(cpu) { + arch_rwlock_t *rm_lock; + unsigned long flags; + struct spinning *spinning; + + if (cpu == raw_smp_processor_id()) + continue; + + rm_lock = &per_cpu(spinning_rm_lock, cpu); + raw_local_irq_save(flags); + arch_read_lock(rm_lock); + + spinning = per_cpu(_spinning, cpu); + smp_rmb(); + if (spinning + && (spinning->lock != lock || spinning->ticket != token)) + spinning = NULL; + + arch_read_unlock(rm_lock); + raw_local_irq_restore(flags); + + if (unlikely(spinning)) { + struct evtchn_send send; + + send.port = per_cpu(poll_evtchn, cpu); + HYPERVISOR_event_channel_op(EVTCHNOP_send, &send); + return; + } + } +} + +static void __init _prepare_shared_info_page(void) +{ + struct xen_add_to_physmap xatp; + + xen_shared_info = slab_is_available() + ? (void *)get_zeroed_page(GFP_KERNEL) + : alloc_bootmem_pages(PAGE_SIZE); + + xatp.domid = DOMID_SELF; + xatp.idx = 0; + xatp.space = XENMAPSPACE_shared_info; + xatp.gpfn = __pa(xen_shared_info) >> PAGE_SHIFT; + if (HYPERVISOR_memory_op(XENMEM_add_to_physmap, &xatp)) + BUG(); +} + +static void __ref prepare_shared_info_page(void) +{ + _prepare_shared_info_page(); +} +#endif + +static bool __cpuinit xen_platform(void) +{ + unsigned int first = XEN_CPUID_FIRST_LEAF; + +#if 0 /* So far, Xen sets this only for PV guests. */ + if (!cpu_has_hypervisor) + return false; +#endif + + while (first < XEN_CPUID_LEAF(0x10000)) { + unsigned int eax, ebx, ecx, edx; + + cpuid(first, &eax, &ebx, &ecx, &edx); + if (ebx == XEN_CPUID_SIGNATURE_EBX + && ecx == XEN_CPUID_SIGNATURE_ECX + && edx == XEN_CPUID_SIGNATURE_EDX) { + if (!smp_processor_id()) { + cpuid(first + 1, &eax, &ebx, &ecx, &edx); + printk(KERN_INFO "Running on Xen %u.%u\n", + eax >> 16, eax & 0xffff); + } + return true; + } + first += 0x100; + } + + return false; +} + +static void xen_set_cpu_features(struct cpuinfo_x86 *c) +{ +#ifdef CONFIG_ENLIGHTEN_SPINLOCKS + unsigned int msr, eax, ebx, ecx, edx; + unsigned int first = XEN_CPUID_FIRST_LEAF; + int ret; + struct vcpu_register_runstate_memory_area vrrma; + + if (num_possible_cpus() <= 1 + || !spin_count + || (c != &boot_cpu_data + && !boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD))) + return; + + while (first < XEN_CPUID_LEAF(0x10000)) { + cpuid(first, &eax, &ebx, &ecx, &edx); + if (ebx == XEN_CPUID_SIGNATURE_EBX + && ecx == XEN_CPUID_SIGNATURE_ECX + && edx == XEN_CPUID_SIGNATURE_EDX) + break; + first += 0x100; + } + BUG_ON(first >= XEN_CPUID_LEAF(0x10000)); + + cpuid(first + 2, &eax, &msr, &ecx, &edx); + BUG_ON(!eax); + wrmsrl(msr, __pa_symbol(hypercall_page)); + + if (!xen_shared_info) + prepare_shared_info_page(); + + memset(&vrrma, 0, sizeof(vrrma)); + set_xen_guest_handle(vrrma.addr.h, &__get_cpu_var(runstate)); + ret = HYPERVISOR_vcpu_op(VCPUOP_register_runstate_memory_area, + c->cpu_index, &vrrma); + if (ret) { + printk(KERN_WARNING + "Could not register runstate area for CPU%u: %d\n", + c->cpu_index, ret); + BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD)); + return; + } + + if (c != &boot_cpu_data || !percpu_read(poll_evtchn)) { + struct evtchn_bind_ipi bind_ipi; + + bind_ipi.vcpu = c->cpu_index; + ret = HYPERVISOR_event_channel_op(EVTCHNOP_bind_ipi, + &bind_ipi); + if (ret) { + printk(KERN_WARNING + "Could not bind event channel for CPU%u: %d\n", + c->cpu_index, ret); + BUG_ON(boot_cpu_has(X86_FEATURE_SPINLOCK_YIELD)); + return; + } + sync_set_bit(bind_ipi.port, xen_shared_info->evtchn_mask); + percpu_write(poll_evtchn, bind_ipi.port); + printk(KERN_INFO "CPU%u spinlock poll event channel: %u\n", + c->cpu_index, bind_ipi.port); + } + + virt_spin_lock = xen_spin_lock; + virt_spin_unlock = xen_spin_unlock; + set_cpu_cap(c, X86_FEATURE_SPINLOCK_YIELD); +#endif +} + +const __refconst struct hypervisor_x86 x86_hyper_xen = { + .name = "Xen", + .detect = xen_platform, + .set_cpu_features = xen_set_cpu_features +}; -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |