diff options
| author | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 19:29:45 -0800 |
|---|---|---|
| committer | Linus Torvalds <torvalds@linux-foundation.org> | 2022-12-13 19:29:45 -0800 |
| commit | e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770 (patch) | |
| tree | f7ed7753a2e66486a4ffe0fbbf98404ec4ba2212 /mm/mprotect.c | |
| parent | 7e68dd7d07a28faa2e6574dd6b9dbd90cdeaae91 (diff) | |
| parent | c45bc55a99957b20e4e0333bcd42e12d1833a7f5 (diff) | |
| download | net-e2ca6ba6ba0152361aa4fcbf6067db71b2c7a770.tar.gz | |
Merge tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm
Pull MM updates from Andrew Morton:
- More userfaultfs work from Peter Xu
- Several convert-to-folios series from Sidhartha Kumar and Huang Ying
- Some filemap cleanups from Vishal Moola
- David Hildenbrand added the ability to selftest anon memory COW
handling
- Some cpuset simplifications from Liu Shixin
- Addition of vmalloc tracing support by Uladzislau Rezki
- Some pagecache folioifications and simplifications from Matthew
Wilcox
- A pagemap cleanup from Kefeng Wang: we have VM_ACCESS_FLAGS, so use
it
- Miguel Ojeda contributed some cleanups for our use of the
__no_sanitize_thread__ gcc keyword.
This series should have been in the non-MM tree, my bad
- Naoya Horiguchi improved the interaction between memory poisoning and
memory section removal for huge pages
- DAMON cleanups and tuneups from SeongJae Park
- Tony Luck fixed the handling of COW faults against poisoned pages
- Peter Xu utilized the PTE marker code for handling swapin errors
- Hugh Dickins reworked compound page mapcount handling, simplifying it
and making it more efficient
- Removal of the autonuma savedwrite infrastructure from Nadav Amit and
David Hildenbrand
- zram support for multiple compression streams from Sergey Senozhatsky
- David Hildenbrand reworked the GUP code's R/O long-term pinning so
that drivers no longer need to use the FOLL_FORCE workaround which
didn't work very well anyway
- Mel Gorman altered the page allocator so that local IRQs can remnain
enabled during per-cpu page allocations
- Vishal Moola removed the try_to_release_page() wrapper
- Stefan Roesch added some per-BDI sysfs tunables which are used to
prevent network block devices from dirtying excessive amounts of
pagecache
- David Hildenbrand did some cleanup and repair work on KSM COW
breaking
- Nhat Pham and Johannes Weiner have implemented writeback in zswap's
zsmalloc backend
- Brian Foster has fixed a longstanding corner-case oddity in
file[map]_write_and_wait_range()
- sparse-vmemmap changes for MIPS, LoongArch and NIOS2 from Feiyang
Chen
- Shiyang Ruan has done some work on fsdax, to make its reflink mode
work better under xfstests. Better, but still not perfect
- Christoph Hellwig has removed the .writepage() method from several
filesystems. They only need .writepages()
- Yosry Ahmed wrote a series which fixes the memcg reclaim target
beancounting
- David Hildenbrand has fixed some of our MM selftests for 32-bit
machines
- Many singleton patches, as usual
* tag 'mm-stable-2022-12-13' of git://git.kernel.org/pub/scm/linux/kernel/git/akpm/mm: (313 commits)
mm/hugetlb: set head flag before setting compound_order in __prep_compound_gigantic_folio
mm: mmu_gather: allow more than one batch of delayed rmaps
mm: fix typo in struct pglist_data code comment
kmsan: fix memcpy tests
mm: add cond_resched() in swapin_walk_pmd_entry()
mm: do not show fs mm pc for VM_LOCKONFAULT pages
selftests/vm: ksm_functional_tests: fixes for 32bit
selftests/vm: cow: fix compile warning on 32bit
selftests/vm: madv_populate: fix missing MADV_POPULATE_(READ|WRITE) definitions
mm/gup_test: fix PIN_LONGTERM_TEST_READ with highmem
mm,thp,rmap: fix races between updates of subpages_mapcount
mm: memcg: fix swapcached stat accounting
mm: add nodes= arg to memory.reclaim
mm: disable top-tier fallback to reclaim on proactive reclaim
selftests: cgroup: make sure reclaim target memcg is unprotected
selftests: cgroup: refactor proactive reclaim code to reclaim_until()
mm: memcg: fix stale protection of reclaim target memcg
mm/mmap: properly unaccount memory on mas_preallocate() failure
omfs: remove ->writepage
jfs: remove ->writepage
...
Diffstat (limited to 'mm/mprotect.c')
| -rw-r--r-- | mm/mprotect.c | 55 |
1 files changed, 24 insertions, 31 deletions
diff --git a/mm/mprotect.c b/mm/mprotect.c index 668bfaa6ed2aeb..093cb50f2fc464 100644 --- a/mm/mprotect.c +++ b/mm/mprotect.c @@ -39,14 +39,16 @@ #include "internal.h" -static inline bool can_change_pte_writable(struct vm_area_struct *vma, - unsigned long addr, pte_t pte) +bool can_change_pte_writable(struct vm_area_struct *vma, unsigned long addr, + pte_t pte) { struct page *page; - VM_BUG_ON(!(vma->vm_flags & VM_WRITE) || pte_write(pte)); + if (WARN_ON_ONCE(!(vma->vm_flags & VM_WRITE))) + return false; - if (pte_protnone(pte) || !pte_dirty(pte)) + /* Don't touch entries that are not even readable. */ + if (pte_protnone(pte)) return false; /* Do we need write faults for softdirty tracking? */ @@ -59,17 +61,23 @@ static inline bool can_change_pte_writable(struct vm_area_struct *vma, if (!(vma->vm_flags & VM_SHARED)) { /* - * We can only special-case on exclusive anonymous pages, - * because we know that our write-fault handler similarly would - * map them writable without any additional checks while holding - * the PT lock. + * Writable MAP_PRIVATE mapping: We can only special-case on + * exclusive anonymous pages, because we know that our + * write-fault handler similarly would map them writable without + * any additional checks while holding the PT lock. */ page = vm_normal_page(vma, addr, pte); - if (!page || !PageAnon(page) || !PageAnonExclusive(page)) - return false; + return page && PageAnon(page) && PageAnonExclusive(page); } - return true; + /* + * Writable MAP_SHARED mapping: "clean" might indicate that the FS still + * needs a real write-fault for writenotify + * (see vma_wants_writenotify()). If "dirty", the assumption is that the + * FS was already notified and we can simply mark the PTE writable + * just like the write-fault handler would do. + */ + return pte_dirty(pte); } static unsigned long change_pte_range(struct mmu_gather *tlb, @@ -113,7 +121,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, oldpte = *pte; if (pte_present(oldpte)) { pte_t ptent; - bool preserve_write = prot_numa && pte_write(oldpte); /* * Avoid trapping faults against the zero or KSM @@ -169,8 +176,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, oldpte = ptep_modify_prot_start(vma, addr, pte); ptent = pte_modify(oldpte, newprot); - if (preserve_write) - ptent = pte_mk_savedwrite(ptent); if (uffd_wp) { ptent = pte_wrprotect(ptent); @@ -267,7 +272,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, } else { /* It must be an none page, or what else?.. */ WARN_ON_ONCE(!pte_none(oldpte)); -#ifdef CONFIG_PTE_MARKER_UFFD_WP if (unlikely(uffd_wp && !vma_is_anonymous(vma))) { /* * For file-backed mem, we need to be able to @@ -279,7 +283,6 @@ static unsigned long change_pte_range(struct mmu_gather *tlb, make_pte_marker(PTE_MARKER_UFFD_WP)); pages++; } -#endif } } while (pte++, addr += PAGE_SIZE, addr != end); arch_leave_lazy_mmu_mode(); @@ -552,8 +555,8 @@ mprotect_fixup(struct mmu_gather *tlb, struct vm_area_struct *vma, struct mm_struct *mm = vma->vm_mm; unsigned long oldflags = vma->vm_flags; long nrpages = (end - start) >> PAGE_SHIFT; + unsigned int mm_cp_flags = 0; unsigned long charged = 0; - bool try_change_writable; pgoff_t pgoff; int error; @@ -631,20 +634,11 @@ success: * held in write mode. */ vma->vm_flags = newflags; - /* - * We want to check manually if we can change individual PTEs writable - * if we can't do that automatically for all PTEs in a mapping. For - * private mappings, that's always the case when we have write - * permissions as we properly have to handle COW. - */ - if (vma->vm_flags & VM_SHARED) - try_change_writable = vma_wants_writenotify(vma, vma->vm_page_prot); - else - try_change_writable = !!(vma->vm_flags & VM_WRITE); + if (vma_wants_manual_pte_write_upgrade(vma)) + mm_cp_flags |= MM_CP_TRY_CHANGE_WRITABLE; vma_set_page_prot(vma); - change_protection(tlb, vma, start, end, vma->vm_page_prot, - try_change_writable ? MM_CP_TRY_CHANGE_WRITABLE : 0); + change_protection(tlb, vma, start, end, vma->vm_page_prot, mm_cp_flags); /* * Private VM_LOCKED VMA becoming writable: trigger COW to avoid major @@ -756,8 +750,7 @@ static int do_mprotect_pkey(unsigned long start, size_t len, * If a permission is not passed to mprotect(), it must be * cleared from the VMA. */ - mask_off_old_flags = VM_READ | VM_WRITE | VM_EXEC | - VM_FLAGS_CLEAR; + mask_off_old_flags = VM_ACCESS_FLAGS | VM_FLAGS_CLEAR; new_vma_pkey = arch_override_mprotect_pkey(vma, prot, pkey); newflags = calc_vm_prot_bits(prot, new_vma_pkey); |
