@@ -210,44 +210,12 @@ static void move_ptes(struct vm_area_struct *vma, pmd_t *old_pmd,
210210 drop_rmap_locks (vma );
211211}
212212
213- #ifdef CONFIG_SPECULATIVE_PAGE_FAULT
214- static inline bool trylock_vma_ref_count (struct vm_area_struct * vma )
215- {
216- /*
217- * If we have the only reference, swap the refcount to -1. This
218- * will prevent other concurrent references by get_vma() for SPFs.
219- */
220- return atomic_cmpxchg_acquire (& vma -> vm_ref_count , 1 , -1 ) == 1 ;
221- }
222-
223- /*
224- * Restore the VMA reference count to 1 after a fast mremap.
225- */
226- static inline void unlock_vma_ref_count (struct vm_area_struct * vma )
227- {
228- int old = atomic_xchg_release (& vma -> vm_ref_count , 1 );
229-
230- /*
231- * This should only be called after a corresponding,
232- * successful trylock_vma_ref_count().
233- */
234- VM_BUG_ON_VMA (old != -1 , vma );
235- }
236- #else /* !CONFIG_SPECULATIVE_PAGE_FAULT */
237- static inline bool trylock_vma_ref_count (struct vm_area_struct * vma )
238- {
239- return true;
240- }
241- static inline void unlock_vma_ref_count (struct vm_area_struct * vma )
242- {
243- }
244- #endif /* CONFIG_SPECULATIVE_PAGE_FAULT */
245-
246213#ifdef CONFIG_HAVE_MOVE_PMD
247214static bool move_normal_pmd (struct vm_area_struct * vma , unsigned long old_addr ,
248215 unsigned long new_addr , pmd_t * old_pmd , pmd_t * new_pmd )
249216{
250- spinlock_t * old_ptl , * new_ptl ;
217+ spinlock_t * old_ptl , * new_ptl , * old_pte_ptl ;
218+
251219 struct mm_struct * mm = vma -> vm_mm ;
252220 pmd_t pmd ;
253221
@@ -277,14 +245,6 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
277245 if (WARN_ON_ONCE (!pmd_none (* new_pmd )))
278246 return false;
279247
280- /*
281- * We hold both exclusive mmap_lock and rmap_lock at this point and
282- * cannot block. If we cannot immediately take exclusive ownership
283- * of the VMA fallback to the move_ptes().
284- */
285- if (!trylock_vma_ref_count (vma ))
286- return false;
287-
288248 /*
289249 * We don't have to worry about the ordering of src and dst
290250 * ptlocks because exclusive mmap_lock prevents deadlock.
@@ -294,6 +254,24 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
294254 if (new_ptl != old_ptl )
295255 spin_lock_nested (new_ptl , SINGLE_DEPTH_NESTING );
296256
257+ /*
258+ * If SPF is enabled, take the ptl lock on the source page table
259+ * page, to prevent the entire pmd from being moved under a
260+ * concurrent SPF.
261+ *
262+ * There is no need to take the destination ptl lock since, mremap
263+ * has already created a hole at the destination and freed the
264+ * corresponding page tables in the process.
265+ *
266+ * NOTE: If USE_SPLIT_PTE_PTLOCKS is false, then the old_ptl, new_ptl,
267+ * and the old_pte_ptl; are all the same lock (mm->page_table_lock).
268+ * Check that the locks are different to avoid a deadlock.
269+ */
270+ old_pte_ptl = pte_lockptr (mm , old_pmd );
271+ if (IS_ENABLED (CONFIG_SPECULATIVE_PAGE_FAULT ) && old_pte_ptl != old_ptl )
272+ spin_lock (old_pte_ptl );
273+
274+
297275 /* Clear the pmd */
298276 pmd = * old_pmd ;
299277 pmd_clear (old_pmd );
@@ -303,11 +281,13 @@ static bool move_normal_pmd(struct vm_area_struct *vma, unsigned long old_addr,
303281 /* Set the new pmd */
304282 set_pmd_at (mm , new_addr , new_pmd , pmd );
305283 flush_tlb_range (vma , old_addr , old_addr + PMD_SIZE );
284+
285+ if (IS_ENABLED (CONFIG_SPECULATIVE_PAGE_FAULT ) && old_pte_ptl != old_ptl )
286+ spin_unlock (old_pte_ptl );
306287 if (new_ptl != old_ptl )
307288 spin_unlock (new_ptl );
308289 spin_unlock (old_ptl );
309290
310- unlock_vma_ref_count (vma );
311291 return true;
312292}
313293#else
@@ -319,7 +299,8 @@ static inline bool move_normal_pmd(struct vm_area_struct *vma,
319299}
320300#endif
321301
322- #ifdef CONFIG_HAVE_MOVE_PUD
302+ #if CONFIG_PGTABLE_LEVELS > 2 && defined(CONFIG_HAVE_MOVE_PUD ) && \
303+ !defined(CONFIG_SPECULATIVE_PAGE_FAULT )
323304static bool move_normal_pud (struct vm_area_struct * vma , unsigned long old_addr ,
324305 unsigned long new_addr , pud_t * old_pud , pud_t * new_pud )
325306{
@@ -334,14 +315,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
334315 if (WARN_ON_ONCE (!pud_none (* new_pud )))
335316 return false;
336317
337- /*
338- * We hold both exclusive mmap_lock and rmap_lock at this point and
339- * cannot block. If we cannot immediately take exclusive ownership
340- * of the VMA fallback to the move_ptes().
341- */
342- if (!trylock_vma_ref_count (vma ))
343- return false;
344-
345318 /*
346319 * We don't have to worry about the ordering of src and dst
347320 * ptlocks because exclusive mmap_lock prevents deadlock.
@@ -364,7 +337,6 @@ static bool move_normal_pud(struct vm_area_struct *vma, unsigned long old_addr,
364337 spin_unlock (new_ptl );
365338 spin_unlock (old_ptl );
366339
367- unlock_vma_ref_count (vma );
368340 return true;
369341}
370342#else
0 commit comments