Prev: [RFC PATCH] init: boot to device-mapper targets without an initr*
Next: [PATCH 1/2] lib/btree: Kill unused MAX macro
From: Mel Gorman on 12 May 2010 17:10 On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote: > Subject: extend KSM refcounts to the anon_vma root > > KSM reference counts can cause an anon_vma to exist after the processe > it belongs to have already exited. Because the anon_vma lock now lives > in the root anon_vma, we need to ensure that the root anon_vma stays > around until after all the "child" anon_vmas have been freed. > > The obvious way to do this is to have a "child" anon_vma take a > reference to the root in anon_vma_fork. When the anon_vma is freed > at munmap or process exit, we drop the refcount in anon_vma_unlink > and possibly free the root anon_vma. > > The KSM anon_vma reference count function also needs to be modified > to deal with the possibility of freeing 2 levels of anon_vma. The > easiest way to do this is to break out the KSM magic and make it > generic. > > When compiling without CONFIG_KSM, this code is compiled out. > > Signed-off-by: Rik van Riel <riel(a)redhat.com> > --- > include/linux/rmap.h | 12 ++++++++++++ > mm/ksm.c | 17 ++++++----------- > mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- > 3 files changed, 62 insertions(+), 12 deletions(-) > > diff --git a/include/linux/rmap.h b/include/linux/rmap.h > index 33ffe14..387d40c 100644 > --- a/include/linux/rmap.h > +++ b/include/linux/rmap.h > @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); > void __anon_vma_link(struct vm_area_struct *); > void anon_vma_free(struct anon_vma *); > > +#ifdef CONFIG_KSM > +static inline void get_anon_vma(struct anon_vma *anon_vma) > +{ > + atomic_inc(&anon_vma->ksm_refcount); > +} > + > +void drop_anon_vma(struct anon_vma *); > +#else > +#define get_anon_vma(x) do {} while(0) > +#define drop_anon_vma(x) do {} while(0) > +#endif > + > static inline void anon_vma_merge(struct vm_area_struct *vma, > struct vm_area_struct *next) > { > diff --git a/mm/ksm.c b/mm/ksm.c > index 7ca0dd7..9f2acc9 100644 > --- a/mm/ksm.c > +++ b/mm/ksm.c > @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item, > struct anon_vma *anon_vma) > { > rmap_item->anon_vma = anon_vma; > - atomic_inc(&anon_vma->ksm_refcount); > + get_anon_vma(anon_vma); > } I'm not quite getting this. Here, we get the local anon_vma so we increment its reference count and later we drop it but without a refcount taken on the root anon_vma, why is it guaranteed to stay around? > > -static void drop_anon_vma(struct rmap_item *rmap_item) > +static void ksm_drop_anon_vma(struct rmap_item *rmap_item) > { > struct anon_vma *anon_vma = rmap_item->anon_vma; > > - if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) { > - int empty = list_empty(&anon_vma->head); > - anon_vma_unlock(anon_vma); > - if (empty) > - anon_vma_free(anon_vma); > - } > + drop_anon_vma(anon_vma); > } > > /* > @@ -415,7 +410,7 @@ static void break_cow(struct rmap_item *rmap_item) > * It is not an accident that whenever we want to break COW > * to undo, we also need to drop a reference to the anon_vma. > */ > - drop_anon_vma(rmap_item); > + ksm_drop_anon_vma(rmap_item); > > down_read(&mm->mmap_sem); > if (ksm_test_exit(mm)) > @@ -470,7 +465,7 @@ static void remove_node_from_stable_tree(struct stable_node *stable_node) > ksm_pages_sharing--; > else > ksm_pages_shared--; > - drop_anon_vma(rmap_item); > + ksm_drop_anon_vma(rmap_item); > rmap_item->address &= PAGE_MASK; > cond_resched(); > } > @@ -558,7 +553,7 @@ static void remove_rmap_item_from_tree(struct rmap_item *rmap_item) > else > ksm_pages_shared--; > > - drop_anon_vma(rmap_item); > + ksm_drop_anon_vma(rmap_item); > rmap_item->address &= PAGE_MASK; > > } else if (rmap_item->address & UNSTABLE_FLAG) { > diff --git a/mm/rmap.c b/mm/rmap.c > index f0ba648..d63cd91 100644 > --- a/mm/rmap.c > +++ b/mm/rmap.c > @@ -238,6 +238,12 @@ int anon_vma_fork(struct vm_area_struct *vma, struct vm_area_struct *pvma) > */ > root_avc = list_entry(vma->anon_vma_chain.prev, struct anon_vma_chain, same_vma); > anon_vma->root = root_avc->anon_vma; > + /* > + * With KSM refcounts, an anon_vma can stay around longer than the > + * process it belongs to. The root anon_vma needs to be pinned, > + * because that is where the lock lives. > + */ > + get_anon_vma(anon_vma->root); > /* Mark this anon_vma as the one where our new (COWed) pages go. */ > vma->anon_vma = anon_vma; > anon_vma_chain_link(vma, avc, anon_vma); > @@ -267,8 +273,11 @@ static void anon_vma_unlink(struct anon_vma_chain *anon_vma_chain) > empty = list_empty(&anon_vma->head) && !ksm_refcount(anon_vma); > anon_vma_unlock(anon_vma); > > - if (empty) > + if (empty) { > + /* We no longer need the root anon_vma */ > + drop_anon_vma(anon_vma->root); > anon_vma_free(anon_vma); > + } > } > > void unlink_anon_vmas(struct vm_area_struct *vma) > @@ -1355,6 +1364,40 @@ int try_to_munlock(struct page *page) > return try_to_unmap_file(page, TTU_MUNLOCK); > } > > +#ifdef CONFIG_KSM > +/* > + * Drop an anon_vma refcount, freeing the anon_vma and anon_vma->root > + * if necessary. Be careful to do all the tests under the lock. Once > + * we know we are the last user, nobody else can get a reference and we > + * can do the freeing without the lock. > + */ > +void drop_anon_vma(struct anon_vma *anon_vma) > +{ > + if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) { > + struct anon_vma *root = anon_vma->root; > + int empty list_empty(&anon_vma->head); > + int last_root_user = 0; > + int root_empty = 0; > + > + /* > + * The refcount on a non-root anon_vma got dropped. Drop > + * the refcount on the root and check if we need to free it. > + */ > + if (empty && anon_vma != root) { > + last_root_user = atomic_dec_and_test(&root->ksm_refcount); > + root_empty = list_empty(&root->head); > + } > + anon_vma_unlock(anon_vma); > + > + if (empty) { > + anon_vma_free(anon_vma); > + if (root_empty && last_root_user) > + anon_vma_free(root); > + } > + } > +} > +#endif > + > #ifdef CONFIG_MIGRATION > /* > * rmap_walk() and its helpers rmap_walk_anon() and rmap_walk_file(): > -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Rik van Riel on 12 May 2010 17:20 On 05/12/2010 05:07 PM, Mel Gorman wrote: > On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote: >> Subject: extend KSM refcounts to the anon_vma root >> >> KSM reference counts can cause an anon_vma to exist after the processe >> it belongs to have already exited. Because the anon_vma lock now lives >> in the root anon_vma, we need to ensure that the root anon_vma stays >> around until after all the "child" anon_vmas have been freed. >> >> The obvious way to do this is to have a "child" anon_vma take a >> reference to the root in anon_vma_fork. When the anon_vma is freed >> at munmap or process exit, we drop the refcount in anon_vma_unlink >> and possibly free the root anon_vma. >> >> The KSM anon_vma reference count function also needs to be modified >> to deal with the possibility of freeing 2 levels of anon_vma. The >> easiest way to do this is to break out the KSM magic and make it >> generic. >> >> When compiling without CONFIG_KSM, this code is compiled out. >> >> Signed-off-by: Rik van Riel<riel(a)redhat.com> >> --- >> include/linux/rmap.h | 12 ++++++++++++ >> mm/ksm.c | 17 ++++++----------- >> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- >> 3 files changed, 62 insertions(+), 12 deletions(-) >> >> diff --git a/include/linux/rmap.h b/include/linux/rmap.h >> index 33ffe14..387d40c 100644 >> --- a/include/linux/rmap.h >> +++ b/include/linux/rmap.h >> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); >> void __anon_vma_link(struct vm_area_struct *); >> void anon_vma_free(struct anon_vma *); >> >> +#ifdef CONFIG_KSM >> +static inline void get_anon_vma(struct anon_vma *anon_vma) >> +{ >> + atomic_inc(&anon_vma->ksm_refcount); >> +} >> + >> +void drop_anon_vma(struct anon_vma *); >> +#else >> +#define get_anon_vma(x) do {} while(0) >> +#define drop_anon_vma(x) do {} while(0) >> +#endif >> + >> static inline void anon_vma_merge(struct vm_area_struct *vma, >> struct vm_area_struct *next) >> { >> diff --git a/mm/ksm.c b/mm/ksm.c >> index 7ca0dd7..9f2acc9 100644 >> --- a/mm/ksm.c >> +++ b/mm/ksm.c >> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item, >> struct anon_vma *anon_vma) >> { >> rmap_item->anon_vma = anon_vma; >> - atomic_inc(&anon_vma->ksm_refcount); >> + get_anon_vma(anon_vma); >> } > > I'm not quite getting this. Here, we get the local anon_vma so we > increment its reference count and later we drop it but without a > refcount taken on the root anon_vma, why is it guaranteed to stay > around? Because anon_vma_fork takes a reference count on the root anon_vma, the VMA we take a refcount on will either have a refcount on the root, or it is the root. -- All rights reversed -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Mel Gorman on 13 May 2010 07:30 On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote: > On 05/12/2010 05:07 PM, Mel Gorman wrote: >> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote: >>> Subject: extend KSM refcounts to the anon_vma root >>> >>> KSM reference counts can cause an anon_vma to exist after the processe >>> it belongs to have already exited. Because the anon_vma lock now lives >>> in the root anon_vma, we need to ensure that the root anon_vma stays >>> around until after all the "child" anon_vmas have been freed. >>> >>> The obvious way to do this is to have a "child" anon_vma take a >>> reference to the root in anon_vma_fork. When the anon_vma is freed >>> at munmap or process exit, we drop the refcount in anon_vma_unlink >>> and possibly free the root anon_vma. >>> >>> The KSM anon_vma reference count function also needs to be modified >>> to deal with the possibility of freeing 2 levels of anon_vma. The >>> easiest way to do this is to break out the KSM magic and make it >>> generic. >>> >>> When compiling without CONFIG_KSM, this code is compiled out. >>> >>> Signed-off-by: Rik van Riel<riel(a)redhat.com> >>> --- >>> include/linux/rmap.h | 12 ++++++++++++ >>> mm/ksm.c | 17 ++++++----------- >>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- >>> 3 files changed, 62 insertions(+), 12 deletions(-) >>> >>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h >>> index 33ffe14..387d40c 100644 >>> --- a/include/linux/rmap.h >>> +++ b/include/linux/rmap.h >>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); >>> void __anon_vma_link(struct vm_area_struct *); >>> void anon_vma_free(struct anon_vma *); >>> >>> +#ifdef CONFIG_KSM >>> +static inline void get_anon_vma(struct anon_vma *anon_vma) >>> +{ >>> + atomic_inc(&anon_vma->ksm_refcount); >>> +} >>> + >>> +void drop_anon_vma(struct anon_vma *); >>> +#else >>> +#define get_anon_vma(x) do {} while(0) >>> +#define drop_anon_vma(x) do {} while(0) >>> +#endif >>> + >>> static inline void anon_vma_merge(struct vm_area_struct *vma, >>> struct vm_area_struct *next) >>> { >>> diff --git a/mm/ksm.c b/mm/ksm.c >>> index 7ca0dd7..9f2acc9 100644 >>> --- a/mm/ksm.c >>> +++ b/mm/ksm.c >>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item, >>> struct anon_vma *anon_vma) >>> { >>> rmap_item->anon_vma = anon_vma; >>> - atomic_inc(&anon_vma->ksm_refcount); >>> + get_anon_vma(anon_vma); >>> } >> >> I'm not quite getting this. Here, we get the local anon_vma so we >> increment its reference count and later we drop it but without a >> refcount taken on the root anon_vma, why is it guaranteed to stay >> around? > > Because anon_vma_fork takes a reference count on the root anon_vma, > the VMA we take a refcount on will either have a refcount on the > root, or it is the root. > Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around during fork but what about during exit? Lets say anon_vma_unlink is called on the following arrangement; root_anon_vma->refcounted_anon_vma We walk the list but the root_anon_vma doesn't have a refcount so it gets freed. drop_anon_vma gets called on refcounted_anon_vma which does if (atomic_dec_and_lock(&anon_vma->ksm_refcount, &anon_vma->root->lock)) but the root anon_vma is now gone. Are you depending on the lifecycle of anon_vma's within KSM for this to work? If so, then the migration-related fixes in mmotm that take a refcount on anon_vma during migration will also need to take a refcount on the root. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Rik van Riel on 13 May 2010 09:20 On 05/13/2010 07:26 AM, Mel Gorman wrote: > On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote: >> On 05/12/2010 05:07 PM, Mel Gorman wrote: >>> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote: >>>> Subject: extend KSM refcounts to the anon_vma root >>>> >>>> KSM reference counts can cause an anon_vma to exist after the processe >>>> it belongs to have already exited. Because the anon_vma lock now lives >>>> in the root anon_vma, we need to ensure that the root anon_vma stays >>>> around until after all the "child" anon_vmas have been freed. >>>> >>>> The obvious way to do this is to have a "child" anon_vma take a >>>> reference to the root in anon_vma_fork. When the anon_vma is freed >>>> at munmap or process exit, we drop the refcount in anon_vma_unlink >>>> and possibly free the root anon_vma. >>>> >>>> The KSM anon_vma reference count function also needs to be modified >>>> to deal with the possibility of freeing 2 levels of anon_vma. The >>>> easiest way to do this is to break out the KSM magic and make it >>>> generic. >>>> >>>> When compiling without CONFIG_KSM, this code is compiled out. >>>> >>>> Signed-off-by: Rik van Riel<riel(a)redhat.com> >>>> --- >>>> include/linux/rmap.h | 12 ++++++++++++ >>>> mm/ksm.c | 17 ++++++----------- >>>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- >>>> 3 files changed, 62 insertions(+), 12 deletions(-) >>>> >>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h >>>> index 33ffe14..387d40c 100644 >>>> --- a/include/linux/rmap.h >>>> +++ b/include/linux/rmap.h >>>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); >>>> void __anon_vma_link(struct vm_area_struct *); >>>> void anon_vma_free(struct anon_vma *); >>>> >>>> +#ifdef CONFIG_KSM >>>> +static inline void get_anon_vma(struct anon_vma *anon_vma) >>>> +{ >>>> + atomic_inc(&anon_vma->ksm_refcount); >>>> +} >>>> + >>>> +void drop_anon_vma(struct anon_vma *); >>>> +#else >>>> +#define get_anon_vma(x) do {} while(0) >>>> +#define drop_anon_vma(x) do {} while(0) >>>> +#endif >>>> + >>>> static inline void anon_vma_merge(struct vm_area_struct *vma, >>>> struct vm_area_struct *next) >>>> { >>>> diff --git a/mm/ksm.c b/mm/ksm.c >>>> index 7ca0dd7..9f2acc9 100644 >>>> --- a/mm/ksm.c >>>> +++ b/mm/ksm.c >>>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item, >>>> struct anon_vma *anon_vma) >>>> { >>>> rmap_item->anon_vma = anon_vma; >>>> - atomic_inc(&anon_vma->ksm_refcount); >>>> + get_anon_vma(anon_vma); >>>> } >>> >>> I'm not quite getting this. Here, we get the local anon_vma so we >>> increment its reference count and later we drop it but without a >>> refcount taken on the root anon_vma, why is it guaranteed to stay >>> around? >> >> Because anon_vma_fork takes a reference count on the root anon_vma, >> the VMA we take a refcount on will either have a refcount on the >> root, or it is the root. >> > > Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around > during fork but what about during exit? It is kept around all the way from fork until exit. -- All rights reversed -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/
From: Mel Gorman on 13 May 2010 09:30
On Thu, May 13, 2010 at 09:11:30AM -0400, Rik van Riel wrote: > On 05/13/2010 07:26 AM, Mel Gorman wrote: >> On Wed, May 12, 2010 at 05:09:18PM -0400, Rik van Riel wrote: >>> On 05/12/2010 05:07 PM, Mel Gorman wrote: >>>> On Wed, May 12, 2010 at 01:41:11PM -0400, Rik van Riel wrote: >>>>> Subject: extend KSM refcounts to the anon_vma root >>>>> >>>>> KSM reference counts can cause an anon_vma to exist after the processe >>>>> it belongs to have already exited. Because the anon_vma lock now lives >>>>> in the root anon_vma, we need to ensure that the root anon_vma stays >>>>> around until after all the "child" anon_vmas have been freed. >>>>> >>>>> The obvious way to do this is to have a "child" anon_vma take a >>>>> reference to the root in anon_vma_fork. When the anon_vma is freed >>>>> at munmap or process exit, we drop the refcount in anon_vma_unlink >>>>> and possibly free the root anon_vma. >>>>> >>>>> The KSM anon_vma reference count function also needs to be modified >>>>> to deal with the possibility of freeing 2 levels of anon_vma. The >>>>> easiest way to do this is to break out the KSM magic and make it >>>>> generic. >>>>> >>>>> When compiling without CONFIG_KSM, this code is compiled out. >>>>> >>>>> Signed-off-by: Rik van Riel<riel(a)redhat.com> >>>>> --- >>>>> include/linux/rmap.h | 12 ++++++++++++ >>>>> mm/ksm.c | 17 ++++++----------- >>>>> mm/rmap.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- >>>>> 3 files changed, 62 insertions(+), 12 deletions(-) >>>>> >>>>> diff --git a/include/linux/rmap.h b/include/linux/rmap.h >>>>> index 33ffe14..387d40c 100644 >>>>> --- a/include/linux/rmap.h >>>>> +++ b/include/linux/rmap.h >>>>> @@ -126,6 +126,18 @@ int anon_vma_fork(struct vm_area_struct *, struct vm_area_struct *); >>>>> void __anon_vma_link(struct vm_area_struct *); >>>>> void anon_vma_free(struct anon_vma *); >>>>> >>>>> +#ifdef CONFIG_KSM >>>>> +static inline void get_anon_vma(struct anon_vma *anon_vma) >>>>> +{ >>>>> + atomic_inc(&anon_vma->ksm_refcount); >>>>> +} >>>>> + >>>>> +void drop_anon_vma(struct anon_vma *); >>>>> +#else >>>>> +#define get_anon_vma(x) do {} while(0) >>>>> +#define drop_anon_vma(x) do {} while(0) >>>>> +#endif >>>>> + >>>>> static inline void anon_vma_merge(struct vm_area_struct *vma, >>>>> struct vm_area_struct *next) >>>>> { >>>>> diff --git a/mm/ksm.c b/mm/ksm.c >>>>> index 7ca0dd7..9f2acc9 100644 >>>>> --- a/mm/ksm.c >>>>> +++ b/mm/ksm.c >>>>> @@ -318,19 +318,14 @@ static void hold_anon_vma(struct rmap_item *rmap_item, >>>>> struct anon_vma *anon_vma) >>>>> { >>>>> rmap_item->anon_vma = anon_vma; >>>>> - atomic_inc(&anon_vma->ksm_refcount); >>>>> + get_anon_vma(anon_vma); >>>>> } >>>> >>>> I'm not quite getting this. Here, we get the local anon_vma so we >>>> increment its reference count and later we drop it but without a >>>> refcount taken on the root anon_vma, why is it guaranteed to stay >>>> around? >>> >>> Because anon_vma_fork takes a reference count on the root anon_vma, >>> the VMA we take a refcount on will either have a refcount on the >>> root, or it is the root. >>> >> >> Sorry, I'm still not getting it. anon_vma_fork keeps the refcount around >> during fork but what about during exit? > > It is kept around all the way from fork until exit. > Ok, now I get it. I was thinking in terms of a reference count being for the duration of a specific operation. In this case, the root anon_vma has an elevated reference count for the lifetime of the anon_vma forest. Thanks. -- Mel Gorman Part-time Phd Student Linux Technology Center University of Limerick IBM Dublin Software Lab -- To unsubscribe from this list: send the line "unsubscribe linux-kernel" in the body of a message to majordomo(a)vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html Please read the FAQ at http://www.tux.org/lkml/ |