diff options
Diffstat (limited to 'mm/khugepaged.c')
-rw-r--r-- | mm/khugepaged.c | 39 |
1 files changed, 29 insertions, 10 deletions
diff --git a/mm/khugepaged.c b/mm/khugepaged.c index 5cb401aa2b9d..a26a28e3738c 100644 --- a/mm/khugepaged.c +++ b/mm/khugepaged.c @@ -847,6 +847,10 @@ static int hugepage_vma_revalidate(struct mm_struct *mm, unsigned long address, return SCAN_SUCCEED; } +/* + * See pmd_trans_unstable() for how the result may change out from + * underneath us, even if we hold mmap_lock in read. + */ static int find_pmd_or_thp_or_none(struct mm_struct *mm, unsigned long address, pmd_t **pmd) @@ -865,8 +869,12 @@ static int find_pmd_or_thp_or_none(struct mm_struct *mm, #endif if (pmd_none(pmde)) return SCAN_PMD_NONE; + if (!pmd_present(pmde)) + return SCAN_PMD_NULL; if (pmd_trans_huge(pmde)) return SCAN_PMD_MAPPED; + if (pmd_devmap(pmde)) + return SCAN_PMD_NULL; if (pmd_bad(pmde)) return SCAN_PMD_NULL; return SCAN_SUCCEED; @@ -1460,14 +1468,6 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, if (!hugepage_vma_check(vma, vma->vm_flags, false, false, false)) return SCAN_VMA_CHECK; - /* - * Symmetry with retract_page_tables(): Exclude MAP_PRIVATE mappings - * that got written to. Without this, we'd have to also lock the - * anon_vma if one exists. - */ - if (vma->anon_vma) - return SCAN_VMA_CHECK; - /* Keep pmd pgtable for uffd-wp; see comment in retract_page_tables() */ if (userfaultfd_wp(vma)) return SCAN_PTE_UFFD_WP; @@ -1567,8 +1567,14 @@ int collapse_pte_mapped_thp(struct mm_struct *mm, unsigned long addr, } /* step 4: remove pte entries */ + /* we make no change to anon, but protect concurrent anon page lookup */ + if (vma->anon_vma) + anon_vma_lock_write(vma->anon_vma); + collapse_and_free_pmd(mm, vma, haddr, pmd); + if (vma->anon_vma) + anon_vma_unlock_write(vma->anon_vma); i_mmap_unlock_write(vma->vm_file->f_mapping); maybe_install_pmd: @@ -1644,7 +1650,7 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, * has higher cost too. It would also probably require locking * the anon_vma. */ - if (vma->anon_vma) { + if (READ_ONCE(vma->anon_vma)) { result = SCAN_PAGE_ANON; goto next; } @@ -1673,6 +1679,18 @@ static int retract_page_tables(struct address_space *mapping, pgoff_t pgoff, if ((cc->is_khugepaged || is_target) && mmap_write_trylock(mm)) { /* + * Re-check whether we have an ->anon_vma, because + * collapse_and_free_pmd() requires that either no + * ->anon_vma exists or the anon_vma is locked. + * We already checked ->anon_vma above, but that check + * is racy because ->anon_vma can be populated under the + * mmap lock in read mode. + */ + if (vma->anon_vma) { + result = SCAN_PAGE_ANON; + goto unlock_next; + } + /* * When a vma is registered with uffd-wp, we can't * recycle the pmd pgtable because there can be pte * markers installed. Skip it only, so the rest mm/vma @@ -2593,6 +2611,7 @@ static int madvise_collapse_errno(enum scan_result r) case SCAN_CGROUP_CHARGE_FAIL: return -EBUSY; /* Resource temporary unavailable - trying again might succeed */ + case SCAN_PAGE_COUNT: case SCAN_PAGE_LOCK: case SCAN_PAGE_LRU: case SCAN_DEL_PAGE_LRU: @@ -2649,7 +2668,7 @@ int madvise_collapse(struct vm_area_struct *vma, struct vm_area_struct **prev, goto out_nolock; } - hend = vma->vm_end & HPAGE_PMD_MASK; + hend = min(hend, vma->vm_end & HPAGE_PMD_MASK); } mmap_assert_locked(mm); memset(cc->node_load, 0, sizeof(cc->node_load)); |