aboutsummaryrefslogtreecommitdiffstats
path: root/fs/userfaultfd.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
committerLinus Torvalds <torvalds@linux-foundation.org>2023-06-28 10:28:11 -0700
commit6e17c6de3ddf3073741d9c91a796ee696914d8a0 (patch)
tree2c425707f78642625dbe2c824c7fded2021e3dc7 /fs/userfaultfd.c
parent6aeadf7896bff4ca230702daba8788455e6b866e (diff)
parentacc72d59c7509540c27c49625cb4b5a8db1f1a84 (diff)
downloadlinux-6e17c6de3ddf3073741d9c91a796ee696914d8a0.tar.gz
Merge tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull mm updates from Andrew Morton: - Yosry Ahmed brought back some cgroup v1 stats in OOM logs - Yosry has also eliminated cgroup's atomic rstat flushing - Nhat Pham adds the new cachestat() syscall. It provides userspace with the ability to query pagecache status - a similar concept to mincore() but more powerful and with improved usability - Mel Gorman provides more optimizations for compaction, reducing the prevalence of page rescanning - Lorenzo Stoakes has done some maintanance work on the get_user_pages() interface - Liam Howlett continues with cleanups and maintenance work to the maple tree code. Peng Zhang also does some work on maple tree - Johannes Weiner has done some cleanup work on the compaction code - David Hildenbrand has contributed additional selftests for get_user_pages() - Thomas Gleixner has contributed some maintenance and optimization work for the vmalloc code - Baolin Wang has provided some compaction cleanups, - SeongJae Park continues maintenance work on the DAMON code - Huang Ying has done some maintenance on the swap code's usage of device refcounting - Christoph Hellwig has some cleanups for the filemap/directio code - Ryan Roberts provides two patch series which yield some rationalization of the kernel's access to pte entries - use the provided APIs rather than open-coding accesses - Lorenzo Stoakes has some fixes to the interaction between pagecache and directio access to file mappings - John Hubbard has a series of fixes to the MM selftesting code - ZhangPeng continues the folio conversion campaign - Hugh Dickins has been working on the pagetable handling code, mainly with a view to reducing the load on the mmap_lock - Catalin Marinas has reduced the arm64 kmalloc() minimum alignment from 128 to 8 - Domenico Cerasuolo has improved the zswap reclaim mechanism by reorganizing the LRU management - Matthew Wilcox provides some fixups to make gfs2 work better with the buffer_head code - Vishal Moola also has done some folio conversion work - Matthew Wilcox has removed the remnants of the pagevec code - their functionality is migrated over to struct folio_batch * tag 'mm-stable-2023-06-24-19-15' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (380 commits) mm/hugetlb: remove hugetlb_set_page_subpool() mm: nommu: correct the range of mmap_sem_read_lock in task_mem() hugetlb: revert use of page_cache_next_miss() Revert "page cache: fix page_cache_next/prev_miss off by one" mm/vmscan: fix root proactive reclaim unthrottling unbalanced node mm: memcg: rename and document global_reclaim() mm: kill [add|del]_page_to_lru_list() mm: compaction: convert to use a folio in isolate_migratepages_block() mm: zswap: fix double invalidate with exclusive loads mm: remove unnecessary pagevec includes mm: remove references to pagevec mm: rename invalidate_mapping_pagevec to mapping_try_invalidate mm: remove struct pagevec net: convert sunrpc from pagevec to folio_batch i915: convert i915_gpu_error to use a folio_batch pagevec: rename fbatch_count() mm: remove check_move_unevictable_pages() drm: convert drm_gem_put_pages() to use a folio_batch i915: convert shmem_sg_free_table() to use a folio_batch scatterlist: add sg_set_folio() ...
Diffstat (limited to 'fs/userfaultfd.c')
-rw-r--r--fs/userfaultfd.c62
1 files changed, 26 insertions, 36 deletions
diff --git a/fs/userfaultfd.c b/fs/userfaultfd.c
index 4e800bb7d2ab61..7cecd49e078b3d 100644
--- a/fs/userfaultfd.c
+++ b/fs/userfaultfd.c
@@ -335,6 +335,7 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
pud_t *pud;
pmd_t *pmd, _pmd;
pte_t *pte;
+ pte_t ptent;
bool ret = true;
mmap_assert_locked(mm);
@@ -349,20 +350,13 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
if (!pud_present(*pud))
goto out;
pmd = pmd_offset(pud, address);
- /*
- * READ_ONCE must function as a barrier with narrower scope
- * and it must be equivalent to:
- * _pmd = *pmd; barrier();
- *
- * This is to deal with the instability (as in
- * pmd_trans_unstable) of the pmd.
- */
- _pmd = READ_ONCE(*pmd);
+again:
+ _pmd = pmdp_get_lockless(pmd);
if (pmd_none(_pmd))
goto out;
ret = false;
- if (!pmd_present(_pmd))
+ if (!pmd_present(_pmd) || pmd_devmap(_pmd))
goto out;
if (pmd_trans_huge(_pmd)) {
@@ -371,19 +365,20 @@ static inline bool userfaultfd_must_wait(struct userfaultfd_ctx *ctx,
goto out;
}
- /*
- * the pmd is stable (as in !pmd_trans_unstable) so we can re-read it
- * and use the standard pte_offset_map() instead of parsing _pmd.
- */
pte = pte_offset_map(pmd, address);
+ if (!pte) {
+ ret = true;
+ goto again;
+ }
/*
* Lockless access: we're in a wait_event so it's ok if it
* changes under us. PTE markers should be handled the same as none
* ptes here.
*/
- if (pte_none_mostly(*pte))
+ ptent = ptep_get(pte);
+ if (pte_none_mostly(ptent))
ret = true;
- if (!pte_write(*pte) && (reason & VM_UFFD_WP))
+ if (!pte_write(ptent) && (reason & VM_UFFD_WP))
ret = true;
pte_unmap(pte);
@@ -857,31 +852,26 @@ static bool has_unmap_ctx(struct userfaultfd_ctx *ctx, struct list_head *unmaps,
return false;
}
-int userfaultfd_unmap_prep(struct mm_struct *mm, unsigned long start,
+int userfaultfd_unmap_prep(struct vm_area_struct *vma, unsigned long start,
unsigned long end, struct list_head *unmaps)
{
- VMA_ITERATOR(vmi, mm, start);
- struct vm_area_struct *vma;
-
- for_each_vma_range(vmi, vma, end) {
- struct userfaultfd_unmap_ctx *unmap_ctx;
- struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
+ struct userfaultfd_unmap_ctx *unmap_ctx;
+ struct userfaultfd_ctx *ctx = vma->vm_userfaultfd_ctx.ctx;
- if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
- has_unmap_ctx(ctx, unmaps, start, end))
- continue;
+ if (!ctx || !(ctx->features & UFFD_FEATURE_EVENT_UNMAP) ||
+ has_unmap_ctx(ctx, unmaps, start, end))
+ return 0;
- unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
- if (!unmap_ctx)
- return -ENOMEM;
+ unmap_ctx = kzalloc(sizeof(*unmap_ctx), GFP_KERNEL);
+ if (!unmap_ctx)
+ return -ENOMEM;
- userfaultfd_ctx_get(ctx);
- atomic_inc(&ctx->mmap_changing);
- unmap_ctx->ctx = ctx;
- unmap_ctx->start = start;
- unmap_ctx->end = end;
- list_add_tail(&unmap_ctx->list, unmaps);
- }
+ userfaultfd_ctx_get(ctx);
+ atomic_inc(&ctx->mmap_changing);
+ unmap_ctx->ctx = ctx;
+ unmap_ctx->start = start;
+ unmap_ctx->end = end;
+ list_add_tail(&unmap_ctx->list, unmaps);
return 0;
}