diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-07-04 23:05:31 -0700 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-07-04 23:05:31 -0700 |
| commit | 818b26588994d9d95743fca0a427f08ec6c1c41d (patch) | |
| tree | 870d9abed0e43b82257350a93e6517816815cd6c /mm | |
| parent | 45a4b68354ffccbc9ca71027bd34754ca24f5183 (diff) | |
| parent | f8f84af5da9ee04ef1d271528656dac42a090d00 (diff) | |
| download | net-818b26588994d9d95743fca0a427f08ec6c1c41d.tar.gz | |
Merge branch 'next' into for-linus
Prepare input updates for 5.14 merge window.
Diffstat (limited to 'mm')
| -rw-r--r-- | mm/filemap.c | 31 | ||||
| -rw-r--r-- | mm/gup.c | 4 | ||||
| -rw-r--r-- | mm/highmem.c | 4 | ||||
| -rw-r--r-- | mm/hugetlb.c | 41 | ||||
| -rw-r--r-- | mm/hugetlb_cgroup.c | 10 | ||||
| -rw-r--r-- | mm/internal.h | 20 | ||||
| -rw-r--r-- | mm/kasan/common.c | 2 | ||||
| -rw-r--r-- | mm/kasan/kasan.h | 2 | ||||
| -rw-r--r-- | mm/kasan/report_generic.c | 2 | ||||
| -rw-r--r-- | mm/kfence/core.c | 9 | ||||
| -rw-r--r-- | mm/kmemleak.c | 3 | ||||
| -rw-r--r-- | mm/mapping_dirty_helpers.c | 2 | ||||
| -rw-r--r-- | mm/memory.c | 2 | ||||
| -rw-r--r-- | mm/mmu_gather.c | 29 | ||||
| -rw-r--r-- | mm/mmu_notifier.c | 23 | ||||
| -rw-r--r-- | mm/oom_kill.c | 2 | ||||
| -rw-r--r-- | mm/page-writeback.c | 16 | ||||
| -rw-r--r-- | mm/page_poison.c | 4 | ||||
| -rw-r--r-- | mm/percpu-internal.h | 2 | ||||
| -rw-r--r-- | mm/percpu-stats.c | 9 | ||||
| -rw-r--r-- | mm/percpu.c | 14 | ||||
| -rw-r--r-- | mm/ptdump.c | 2 | ||||
| -rw-r--r-- | mm/shuffle.c | 4 | ||||
| -rw-r--r-- | mm/z3fold.c | 16 |
24 files changed, 202 insertions, 51 deletions
diff --git a/mm/filemap.c b/mm/filemap.c index 43700480d897d4..6ce832dc59e738 100644 --- a/mm/filemap.c +++ b/mm/filemap.c @@ -1969,8 +1969,14 @@ unlock: put: put_page(page); next: - if (!xa_is_value(page) && PageTransHuge(page)) - xas_set(&xas, page->index + thp_nr_pages(page)); + if (!xa_is_value(page) && PageTransHuge(page)) { + unsigned int nr_pages = thp_nr_pages(page); + + /* Final THP may cross MAX_LFS_FILESIZE on 32-bit */ + xas_set(&xas, page->index + nr_pages); + if (xas.xa_index < nr_pages) + break; + } } rcu_read_unlock(); @@ -2672,7 +2678,7 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, loff_t end, int whence) { XA_STATE(xas, &mapping->i_pages, start >> PAGE_SHIFT); - pgoff_t max = (end - 1) / PAGE_SIZE; + pgoff_t max = (end - 1) >> PAGE_SHIFT; bool seek_data = (whence == SEEK_DATA); struct page *page; @@ -2681,7 +2687,8 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, rcu_read_lock(); while ((page = find_get_entry(&xas, max, XA_PRESENT))) { - loff_t pos = xas.xa_index * PAGE_SIZE; + loff_t pos = (u64)xas.xa_index << PAGE_SHIFT; + unsigned int seek_size; if (start < pos) { if (!seek_data) @@ -2689,25 +2696,25 @@ loff_t mapping_seek_hole_data(struct address_space *mapping, loff_t start, start = pos; } - pos += seek_page_size(&xas, page); + seek_size = seek_page_size(&xas, page); + pos = round_up(pos + 1, seek_size); start = page_seek_hole_data(&xas, mapping, page, start, pos, seek_data); if (start < pos) goto unlock; + if (start >= end) + break; + if (seek_size > PAGE_SIZE) + xas_set(&xas, pos >> PAGE_SHIFT); if (!xa_is_value(page)) put_page(page); } - rcu_read_unlock(); - if (seek_data) - return -ENXIO; - goto out; - + start = -ENXIO; unlock: rcu_read_unlock(); - if (!xa_is_value(page)) + if (page && !xa_is_value(page)) put_page(page); -out: if (start > end) return end; return start; diff --git a/mm/gup.c b/mm/gup.c index e40579624f1078..ef7d2da9f03ff1 100644 --- a/mm/gup.c +++ b/mm/gup.c @@ -1535,6 +1535,10 @@ struct page *get_dump_page(unsigned long addr) FOLL_FORCE | FOLL_DUMP | FOLL_GET); if (locked) mmap_read_unlock(mm); + + if (ret == 1 && is_page_poisoned(page)) + return NULL; + return (ret == 1) ? page : NULL; } #endif /* CONFIG_ELF_CORE */ diff --git a/mm/highmem.c b/mm/highmem.c index 86f2b9495f9cfd..6ef8f5e05e7e5d 100644 --- a/mm/highmem.c +++ b/mm/highmem.c @@ -618,7 +618,7 @@ void __kmap_local_sched_out(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } @@ -654,7 +654,7 @@ void __kmap_local_sched_in(void) int idx; /* With debug all even slots are unmapped and act as guard */ - if (IS_ENABLED(CONFIG_DEBUG_HIGHMEM) && !(i & 0x01)) { + if (IS_ENABLED(CONFIG_DEBUG_KMAP_LOCAL) && !(i & 0x01)) { WARN_ON_ONCE(!pte_none(pteval)); continue; } diff --git a/mm/hugetlb.c b/mm/hugetlb.c index 5b1ab1f427c57c..a86a58ef132d55 100644 --- a/mm/hugetlb.c +++ b/mm/hugetlb.c @@ -280,6 +280,17 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, nrg->reservation_counter = &h_cg->rsvd_hugepage[hstate_index(h)]; nrg->css = &h_cg->css; + /* + * The caller will hold exactly one h_cg->css reference for the + * whole contiguous reservation region. But this area might be + * scattered when there are already some file_regions reside in + * it. As a result, many file_regions may share only one css + * reference. In order to ensure that one file_region must hold + * exactly one h_cg->css reference, we should do css_get for + * each file_region and leave the reference held by caller + * untouched. + */ + css_get(&h_cg->css); if (!resv->pages_per_hpage) resv->pages_per_hpage = pages_per_huge_page(h); /* pages_per_hpage should be the same for all entries in @@ -293,6 +304,14 @@ static void record_hugetlb_cgroup_uncharge_info(struct hugetlb_cgroup *h_cg, #endif } +static void put_uncharge_info(struct file_region *rg) +{ +#ifdef CONFIG_CGROUP_HUGETLB + if (rg->css) + css_put(rg->css); +#endif +} + static bool has_same_uncharge_info(struct file_region *rg, struct file_region *org) { @@ -316,6 +335,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) prg->to = rg->to; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); rg = prg; @@ -327,6 +347,7 @@ static void coalesce_file_region(struct resv_map *resv, struct file_region *rg) nrg->from = rg->from; list_del(&rg->link); + put_uncharge_info(rg); kfree(rg); } } @@ -662,7 +683,7 @@ retry: del += t - f; hugetlb_cgroup_uncharge_file_region( - resv, rg, t - f); + resv, rg, t - f, false); /* New entry for end of split region */ nrg->from = t; @@ -683,7 +704,7 @@ retry: if (f <= rg->from && t >= rg->to) { /* Remove entire region */ del += rg->to - rg->from; hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - rg->from); + rg->to - rg->from, true); list_del(&rg->link); kfree(rg); continue; @@ -691,13 +712,13 @@ retry: if (f <= rg->from) { /* Trim beginning of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - t - rg->from); + t - rg->from, false); del += t - rg->from; rg->from = t; } else { /* Trim end of region */ hugetlb_cgroup_uncharge_file_region(resv, rg, - rg->to - f); + rg->to - f, false); del += rg->to - f; rg->to = f; @@ -5187,6 +5208,10 @@ bool hugetlb_reserve_pages(struct inode *inode, */ long rsv_adjust; + /* + * hugetlb_cgroup_uncharge_cgroup_rsvd() will put the + * reference to h_cg->css. See comment below for detail. + */ hugetlb_cgroup_uncharge_cgroup_rsvd( hstate_index(h), (chg - add) * pages_per_huge_page(h), h_cg); @@ -5194,6 +5219,14 @@ bool hugetlb_reserve_pages(struct inode *inode, rsv_adjust = hugepage_subpool_put_pages(spool, chg - add); hugetlb_acct_memory(h, -rsv_adjust); + } else if (h_cg) { + /* + * The file_regions will hold their own reference to + * h_cg->css. So we should release the reference held + * via hugetlb_cgroup_charge_cgroup_rsvd() when we are + * done. + */ + hugetlb_cgroup_put_rsvd_cgroup(h_cg); } } return true; diff --git a/mm/hugetlb_cgroup.c b/mm/hugetlb_cgroup.c index f68b51fcda3d13..603a131e262dc7 100644 --- a/mm/hugetlb_cgroup.c +++ b/mm/hugetlb_cgroup.c @@ -391,7 +391,8 @@ void hugetlb_cgroup_uncharge_counter(struct resv_map *resv, unsigned long start, void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, struct file_region *rg, - unsigned long nr_pages) + unsigned long nr_pages, + bool region_del) { if (hugetlb_cgroup_disabled() || !resv || !rg || !nr_pages) return; @@ -400,7 +401,12 @@ void hugetlb_cgroup_uncharge_file_region(struct resv_map *resv, !resv->reservation_counter) { page_counter_uncharge(rg->reservation_counter, nr_pages * resv->pages_per_hpage); - css_put(rg->css); + /* + * Only do css_put(rg->css) when we delete the entire region + * because one file_region must hold exactly one css reference. + */ + if (region_del) + css_put(rg->css); } } diff --git a/mm/internal.h b/mm/internal.h index 1432feec62df09..cb3c5e0a7799f5 100644 --- a/mm/internal.h +++ b/mm/internal.h @@ -97,6 +97,26 @@ static inline void set_page_refcounted(struct page *page) set_page_count(page, 1); } +/* + * When kernel touch the user page, the user page may be have been marked + * poison but still mapped in user space, if without this page, the kernel + * can guarantee the data integrity and operation success, the kernel is + * better to check the posion status and avoid touching it, be good not to + * panic, coredump for process fatal signal is a sample case matching this + * scenario. Or if kernel can't guarantee the data integrity, it's better + * not to call this function, let kernel touch the poison page and get to + * panic. + */ +static inline bool is_page_poisoned(struct page *page) +{ + if (PageHWPoison(page)) + return true; + else if (PageHuge(page) && PageHWPoison(compound_head(page))) + return true; + + return false; +} + extern unsigned long highest_memmap_pfn; /* diff --git a/mm/kasan/common.c b/mm/kasan/common.c index b5e08d4cefecc0..7b53291dafa175 100644 --- a/mm/kasan/common.c +++ b/mm/kasan/common.c @@ -63,7 +63,7 @@ void __kasan_unpoison_range(const void *address, size_t size) kasan_unpoison(address, size); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK /* Unpoison the entire stack for a task. */ void kasan_unpoison_task_stack(struct task_struct *task) { diff --git a/mm/kasan/kasan.h b/mm/kasan/kasan.h index 8c55634d6edd92..3436c6bf7c0c7c 100644 --- a/mm/kasan/kasan.h +++ b/mm/kasan/kasan.h @@ -231,7 +231,7 @@ void *kasan_find_first_bad_addr(void *addr, size_t size); const char *kasan_get_bug_type(struct kasan_access_info *info); void kasan_metadata_fetch_row(char *buffer, void *row); -#if defined(CONFIG_KASAN_GENERIC) && CONFIG_KASAN_STACK +#if defined(CONFIG_KASAN_GENERIC) && defined(CONFIG_KASAN_STACK) void kasan_print_address_stack_frame(const void *addr); #else static inline void kasan_print_address_stack_frame(const void *addr) { } diff --git a/mm/kasan/report_generic.c b/mm/kasan/report_generic.c index 41f3745851444f..de732bc341c5c7 100644 --- a/mm/kasan/report_generic.c +++ b/mm/kasan/report_generic.c @@ -128,7 +128,7 @@ void kasan_metadata_fetch_row(char *buffer, void *row) memcpy(buffer, kasan_mem_to_shadow(row), META_BYTES_PER_ROW); } -#if CONFIG_KASAN_STACK +#ifdef CONFIG_KASAN_STACK static bool __must_check tokenize_frame_descr(const char **frame_descr, char *token, size_t max_tok_len, unsigned long *value) diff --git a/mm/kfence/core.c b/mm/kfence/core.c index 3b8ec938470a2a..d53c91f881a41d 100644 --- a/mm/kfence/core.c +++ b/mm/kfence/core.c @@ -12,6 +12,7 @@ #include <linux/debugfs.h> #include <linux/kcsan-checks.h> #include <linux/kfence.h> +#include <linux/kmemleak.h> #include <linux/list.h> #include <linux/lockdep.h> #include <linux/memblock.h> @@ -480,6 +481,14 @@ static bool __init kfence_init_pool(void) addr += 2 * PAGE_SIZE; } + /* + * The pool is live and will never be deallocated from this point on. + * Remove the pool object from the kmemleak object tree, as it would + * otherwise overlap with allocations returned by kfence_alloc(), which + * are registered with kmemleak through the slab post-alloc hook. + */ + kmemleak_free(__kfence_pool); + return true; err: diff --git a/mm/kmemleak.c b/mm/kmemleak.c index c0014d3b91c10f..fe6e3ae8e8c671 100644 --- a/mm/kmemleak.c +++ b/mm/kmemleak.c @@ -97,6 +97,7 @@ #include <linux/atomic.h> #include <linux/kasan.h> +#include <linux/kfence.h> #include <linux/kmemleak.h> #include <linux/memory_hotplug.h> @@ -589,7 +590,7 @@ static struct kmemleak_object *create_object(unsigned long ptr, size_t size, atomic_set(&object->use_count, 1); object->flags = OBJECT_ALLOCATED; object->pointer = ptr; - object->size = size; + object->size = kfence_ksize((void *)ptr) ?: size; object->excess_ref = 0; object->min_count = min_count; object->count = 0; /* white color initially */ diff --git a/mm/mapping_dirty_helpers.c b/mm/mapping_dirty_helpers.c index b59054ef2e107e..b890854ec76165 100644 --- a/mm/mapping_dirty_helpers.c +++ b/mm/mapping_dirty_helpers.c @@ -165,10 +165,12 @@ static int wp_clean_pud_entry(pud_t *pud, unsigned long addr, unsigned long end, return 0; } +#ifdef CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD /* Huge pud */ walk->action = ACTION_CONTINUE; if (pud_trans_huge(pudval) || pud_devmap(pudval)) WARN_ON(pud_write(pudval) || pud_dirty(pudval)); +#endif return 0; } diff --git a/mm/memory.c b/mm/memory.c index 5efa07fb6cdc18..550405fc3b5e63 100644 --- a/mm/memory.c +++ b/mm/memory.c @@ -166,7 +166,7 @@ static int __init init_zero_pfn(void) zero_pfn = page_to_pfn(ZERO_PAGE(0)); return 0; } -core_initcall(init_zero_pfn); +early_initcall(init_zero_pfn); void mm_trace_rss_stat(struct mm_struct *mm, int member, long count) { diff --git a/mm/mmu_gather.c b/mm/mmu_gather.c index 0dc7149b0c6153..1b9837419bf9cd 100644 --- a/mm/mmu_gather.c +++ b/mm/mmu_gather.c @@ -249,16 +249,6 @@ void tlb_flush_mmu(struct mmu_gather *tlb) tlb_flush_mmu_free(tlb); } -/** - * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down - * @tlb: the mmu_gather structure to initialize - * @mm: the mm_struct of the target address space - * @fullmm: @mm is without users and we're going to destroy the full address - * space (exit/execve) - * - * Called to initialize an (on-stack) mmu_gather structure for page-table - * tear-down from @mm. - */ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, bool fullmm) { @@ -283,11 +273,30 @@ static void __tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm, inc_tlb_flush_pending(tlb->mm); } +/** + * tlb_gather_mmu - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, false); } +/** + * tlb_gather_mmu_fullmm - initialize an mmu_gather structure for page-table tear-down + * @tlb: the mmu_gather structure to initialize + * @mm: the mm_struct of the target address space + * + * In this case, @mm is without users and we're going to destroy the + * full address space (exit/execve). + * + * Called to initialize an (on-stack) mmu_gather structure for page-table + * tear-down from @mm. + */ void tlb_gather_mmu_fullmm(struct mmu_gather *tlb, struct mm_struct *mm) { __tlb_gather_mmu(tlb, mm, true); diff --git a/mm/mmu_notifier.c b/mm/mmu_notifier.c index 61ee40ed804ee5..459d195d2ff64b 100644 --- a/mm/mmu_notifier.c +++ b/mm/mmu_notifier.c @@ -501,10 +501,33 @@ static int mn_hlist_invalidate_range_start( ""); WARN_ON(mmu_notifier_range_blockable(range) || _ret != -EAGAIN); + /* + * We call all the notifiers on any EAGAIN, + * there is no way for a notifier to know if + * its start method failed, thus a start that + * does EAGAIN can't also do end. + */ + WARN_ON(ops->invalidate_range_end); ret = _ret; } } } + + if (ret) { + /* + * Must be non-blocking to get here. If there are multiple + * notifiers and one or more failed start, any that succeeded + * start are expecting their end to be called. Do so now. + */ + hlist_for_each_entry_rcu(subscription, &subscriptions->list, + hlist, srcu_read_lock_held(&srcu)) { + if (!subscription->ops->invalidate_range_end) + continue; + + subscription->ops->invalidate_range_end(subscription, + range); + } + } srcu_read_unlock(&srcu, id); return ret; diff --git a/mm/oom_kill.c b/mm/oom_kill.c index 9efaf430cfd375..fa1cf18bac9784 100644 --- a/mm/oom_kill.c +++ b/mm/oom_kill.c @@ -170,7 +170,7 @@ static bool oom_unkillable_task(struct task_struct *p) return false; } -/** +/* * Check whether unreclaimable slab amount is greater than * all user memory(LRU pages). * dump_unreclaimable_slab() could help in the case that diff --git a/mm/page-writeback.c b/mm/page-writeback.c index eb34d204d4ee71..9e35b636a39375 100644 --- a/mm/page-writeback.c +++ b/mm/page-writeback.c @@ -2833,6 +2833,22 @@ void wait_on_page_writeback(struct page *page) } EXPORT_SYMBOL_GPL(wait_on_page_writeback); +/* + * Wait for a page to complete writeback. Returns -EINTR if we get a + * fatal signal while waiting. + */ +int wait_on_page_writeback_killable(struct page *page) +{ + while (PageWriteback(page)) { + trace_wait_on_page_writeback(page, page_mapping(page)); + if (wait_on_page_bit_killable(page, PG_writeback)) + return -EINTR; + } + + return 0; +} +EXPORT_SYMBOL_GPL(wait_on_page_writeback_killable); + /** * wait_for_stable_page() - wait for writeback to finish, if necessary. * @page: The page to wait on. diff --git a/mm/page_poison.c b/mm/page_poison.c index 65cdf844c8adbb..655dc589560439 100644 --- a/mm/page_poison.c +++ b/mm/page_poison.c @@ -77,12 +77,14 @@ static void unpoison_page(struct page *page) void *addr; addr = kmap_atomic(page); + kasan_disable_current(); /* * Page poisoning when enabled poisons each and every page * that is freed to buddy. Thus no extra check is done to * see if a page was poisoned. */ - check_poison_mem(addr, PAGE_SIZE); + check_poison_mem(kasan_reset_tag(addr), PAGE_SIZE); + kasan_enable_current(); kunmap_atomic(addr); } diff --git a/mm/percpu-internal.h b/mm/percpu-internal.h index 18b768ac7dcae2..095d7eaa0db426 100644 --- a/mm/percpu-internal.h +++ b/mm/percpu-internal.h @@ -87,7 +87,7 @@ extern spinlock_t pcpu_lock; extern struct list_head *pcpu_chunk_lists; extern int pcpu_nr_slots; -extern int pcpu_nr_empty_pop_pages; +extern int pcpu_nr_empty_pop_pages[]; extern struct pcpu_chunk *pcpu_first_chunk; extern struct pcpu_chunk *pcpu_reserved_chunk; diff --git a/mm/percpu-stats.c b/mm/percpu-stats.c index c8400a2adbc2b2..f6026dbcdf6b3b 100644 --- a/mm/percpu-stats.c +++ b/mm/percpu-stats.c @@ -145,6 +145,7 @@ static int percpu_stats_show(struct seq_file *m, void *v) int slot, max_nr_alloc; int *buffer; enum pcpu_chunk_type type; + int nr_empty_pop_pages; alloc_buffer: spin_lock_irq(&pcpu_lock); @@ -165,7 +166,11 @@ alloc_buffer: goto alloc_buffer; } -#define PL(X) \ + nr_empty_pop_pages = 0; + for (type = 0; type < PCPU_NR_CHUNK_TYPES; type++) + nr_empty_pop_pages += pcpu_nr_empty_pop_pages[type]; + +#define PL(X) \ seq_printf(m, " %-20s: %12lld\n", #X, (long long int)pcpu_stats_ai.X) seq_printf(m, @@ -196,7 +201,7 @@ alloc_buffer: PU(nr_max_chunks); PU(min_alloc_size); PU(max_alloc_size); - P("empty_pop_pages", pcpu_nr_empty_pop_pages); + P("empty_pop_pages", nr_empty_pop_pages); seq_putc(m, '\n'); #undef PU diff --git a/mm/percpu.c b/mm/percpu.c index 6596a0a4286e7d..23308113a5ff0b 100644 --- a/mm/percpu.c +++ b/mm/percpu.c @@ -173,10 +173,10 @@ struct list_head *pcpu_chunk_lists __ro_after_init; /* chunk list slots */ static LIST_HEAD(pcpu_map_extend_chunks); /* - * The number of empty populated pages, protected by pcpu_lock. The - * reserved chunk doesn't contribute to the count. + * The number of empty populated pages by chunk type, protected by pcpu_lock. + * The reserved chunk doesn't contribute to the count. */ -int pcpu_nr_empty_pop_pages; +int pcpu_nr_empty_pop_pages[PCPU_NR_CHUNK_TYPES]; /* * The number of populated pages in use by the allocator, protected by @@ -556,7 +556,7 @@ static inline void pcpu_update_empty_pages(struct pcpu_chunk *chunk, int nr) { chunk->nr_empty_pop_pages += nr; if (chunk != pcpu_reserved_chunk) - pcpu_nr_empty_pop_pages += nr; + pcpu_nr_empty_pop_pages[pcpu_chunk_type(chunk)] += nr; } /* @@ -1832,7 +1832,7 @@ area_found: mutex_unlock(&pcpu_alloc_mutex); } - if (pcpu_nr_empty_pop_pages < PCPU_EMPTY_POP_PAGES_LOW) + if (pcpu_nr_empty_pop_pages[type] < PCPU_EMPTY_POP_PAGES_LOW) pcpu_schedule_balance_work(); /* clear the areas and return address relative to base address */ @@ -2000,7 +2000,7 @@ retry_pop: pcpu_atomic_alloc_failed = false; } else { nr_to_pop = clamp(PCPU_EMPTY_POP_PAGES_HIGH - - pcpu_nr_empty_pop_pages, + pcpu_nr_empty_pop_pages[type], 0, PCPU_EMPTY_POP_PAGES_HIGH); } @@ -2580,7 +2580,7 @@ void __init pcpu_setup_first_chunk(const struct pcpu_alloc_info *ai, /* link the first chunk in */ pcpu_first_chunk = chunk; - pcpu_nr_empty_pop_pages = pcpu_first_chunk->nr_empty_pop_pages; + pcpu_nr_empty_pop_pages[PCPU_CHUNK_ROOT] = pcpu_first_chunk->nr_empty_pop_pages; pcpu_chunk_relocate(pcpu_first_chunk, -1); /* include all regions of the first chunk */ diff --git a/mm/ptdump.c b/mm/ptdump.c index 4354c1422d57c9..da751448d0e4ef 100644 --- a/mm/ptdump.c +++ b/mm/ptdump.c @@ -111,7 +111,7 @@ static int ptdump_pte_entry(pte_t *pte, unsigned long addr, unsigned long next, struct mm_walk *walk) { struct ptdump_state *st = walk->private; - pte_t val = READ_ONCE(*pte); + pte_t val = ptep_get(pte); if (st->effective_prot) st->effective_prot(st, 4, pte_val(val)); diff --git a/mm/shuffle.c b/mm/shuffle.c index 9c2e145a747aff..c13c33b247e874 100644 --- a/mm/shuffle.c +++ b/mm/shuffle.c @@ -147,8 +147,8 @@ void __meminit __shuffle_zone(struct zone *z) spin_unlock_irqrestore(&z->lock, flags); } -/** - * shuffle_free_memory - reduce the predictability of the page allocator +/* + * __shuffle_free_memory - reduce the predictability of the page allocator * @pgdat: node page data */ void __meminit __shuffle_free_memory(pg_data_t *pgdat) diff --git a/mm/z3fold.c b/mm/z3fold.c index b5dafa7e44e429..9d889ad2bb8693 100644 --- a/mm/z3fold.c +++ b/mm/z3fold.c @@ -1346,8 +1346,22 @@ static int z3fold_reclaim_page(struct z3fold_pool *pool, unsigned int retries) page = list_entry(pos, struct page, lru); zhdr = page_address(page); - if (test_bit(PAGE_HEADLESS, &page->private)) + if (test_bit(PAGE_HEADLESS, &page->private)) { + /* + * For non-headless pages, we wait to do this + * until we have the page lock to avoid racing + * with __z3fold_alloc(). Headless pages don't + * have a lock (and __z3fold_alloc() will never + * see them), but we still need to test and set + * PAGE_CLAIMED to avoid racing with + * z3fold_free(), so just do it now before + * leaving the loop. + */ + if (test_and_set_bit(PAGE_CLAIMED, &page->private)) + continue; + break; + } if (kref_get_unless_zero(&zhdr->refcount) == 0) { zhdr = NULL; |
