34 files changed, 442 insertions, 213 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6066f64967b3cc..e8920178b50f73 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -100,7 +100,7 @@ struct kioctx {
 
 	unsigned long		user_id;
 
-	struct __percpu kioctx_cpu *cpu;
+	struct kioctx_cpu __percpu *cpu;
 
 	/*
 	 * For percpu reqs_available, number of slots we move to/from global
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 8c1d587b3eefa4..77c7991d89aace 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -62,6 +62,7 @@ struct autofs_info {
 	struct list_head expiring;
 
 	struct autofs_sb_info *sbi;
+	unsigned long exp_timeout;
 	unsigned long last_used;
 	int count;
 
@@ -81,6 +82,9 @@ struct autofs_info {
 					*/
 #define AUTOFS_INF_PENDING	(1<<2) /* dentry pending mount */
 
+#define AUTOFS_INF_EXPIRE_SET	(1<<3) /* per-dentry expire timeout set for
+					  this mount point.
+					*/
 struct autofs_wait_queue {
 	wait_queue_head_t queue;
 	struct autofs_wait_queue *next;
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 5bf781ea6d6762..f011e026358ebc 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -128,7 +128,13 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
 			goto out;
 		}
 
+		/* Setting the per-dentry expire timeout requires a trailing
+		 * path component, ie. no '/', so invert the logic of the
+		 * check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD.
+		 */
 		err = check_name(param->path);
+		if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
+			err = err ? 0 : -EINVAL;
 		if (err) {
 			pr_warn("invalid path supplied for cmd(0x%08x)\n",
 				cmd);
@@ -396,16 +402,97 @@ static int autofs_dev_ioctl_catatonic(struct file *fp,
 	return 0;
 }
 
-/* Set the autofs mount timeout */
+/*
+ * Set the autofs mount expire timeout.
+ *
+ * There are two places an expire timeout can be set, in the autofs
+ * super block info. (this is all that's needed for direct and offset
+ * mounts because there's a distinct mount corresponding to each of
+ * these) and per-dentry within within the dentry info. If a per-dentry
+ * timeout is set it will override the expire timeout set in the parent
+ * autofs super block info.
+ *
+ * If setting the autofs super block expire timeout the autofs_dev_ioctl
+ * size field will be equal to the autofs_dev_ioctl structure size. If
+ * setting the per-dentry expire timeout the mount point name is passed
+ * in the autofs_dev_ioctl path field and the size field updated to
+ * reflect this.
+ *
+ * Setting the autofs mount expire timeout sets the timeout in the super
+ * block info. struct. Setting the per-dentry timeout does a little more.
+ * If the timeout is equal to -1 the per-dentry timeout (and flag) is
+ * cleared which reverts to using the super block timeout, otherwise if
+ * timeout is 0 the timeout is set to this value and the flag is left
+ * set which disables expiration for the mount point, lastly the flag
+ * and the timeout are set enabling the dentry to use this timeout.
+ */
 static int autofs_dev_ioctl_timeout(struct file *fp,
 				    struct autofs_sb_info *sbi,
 				    struct autofs_dev_ioctl *param)
 {
-	unsigned long timeout;
+	unsigned long timeout = param->timeout.timeout;
+
+	/* If setting the expire timeout for an individual indirect
+	 * mount point dentry the mount trailing component path is
+	 * placed in param->path and param->size adjusted to account
+	 * for it otherwise param->size it is set to the structure
+	 * size.
+	 */
+	if (param->size == AUTOFS_DEV_IOCTL_SIZE) {
+		param->timeout.timeout = sbi->exp_timeout / HZ;
+		sbi->exp_timeout = timeout * HZ;
+	} else {
+		struct dentry *base = fp->f_path.dentry;
+		struct inode *inode = base->d_inode;
+		int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1;
+		struct dentry *dentry;
+		struct autofs_info *ino;
+
+		if (!autofs_type_indirect(sbi->type))
+			return -EINVAL;
+
+		/* An expire timeout greater than the superblock timeout
+		 * could be a problem at shutdown but the super block
+		 * timeout itself can change so all we can really do is
+		 * warn the user.
+		 */
+		if (timeout >= sbi->exp_timeout)
+			pr_warn("per-mount expire timeout is greater than "
+				"the parent autofs mount timeout which could "
+				"prevent shutdown\n");
+
+		inode_lock_shared(inode);
+		dentry = try_lookup_one_len(param->path, base, path_len);
+		inode_unlock_shared(inode);
+		if (IS_ERR_OR_NULL(dentry))
+			return dentry ? PTR_ERR(dentry) : -ENOENT;
+		ino = autofs_dentry_ino(dentry);
+		if (!ino) {
+			dput(dentry);
+			return -ENOENT;
+		}
+
+		if (ino->exp_timeout && ino->flags & AUTOFS_INF_EXPIRE_SET)
+			param->timeout.timeout = ino->exp_timeout / HZ;
+		else
+			param->timeout.timeout = sbi->exp_timeout / HZ;
+
+		if (timeout == -1) {
+			/* Revert to using the super block timeout */
+			ino->flags &= ~AUTOFS_INF_EXPIRE_SET;
+			ino->exp_timeout = 0;
+		} else {
+			/* Set the dentry expire flag and timeout.
+			 *
+			 * If timeout is 0 it will prevent the expire
+			 * of this particular automount.
+			 */
+			ino->flags |= AUTOFS_INF_EXPIRE_SET;
+			ino->exp_timeout = timeout * HZ;
+		}
+		dput(dentry);
+	}
 
-	timeout = param->timeout.timeout;
-	param->timeout.timeout = sbi->exp_timeout / HZ;
-	sbi->exp_timeout = timeout * HZ;
 	return 0;
 }
 
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index 39d8c84c16f4e5..5c2d459e1e484e 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -429,8 +429,6 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
 	if (!root)
 		return NULL;
 
-	timeout = sbi->exp_timeout;
-
 	dentry = NULL;
 	while ((dentry = get_next_positive_subdir(dentry, root))) {
 		spin_lock(&sbi->fs_lock);
@@ -441,6 +439,11 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
 		}
 		spin_unlock(&sbi->fs_lock);
 
+		if (ino->flags & AUTOFS_INF_EXPIRE_SET)
+			timeout = ino->exp_timeout;
+		else
+			timeout = sbi->exp_timeout;
+
 		expired = should_expire(dentry, mnt, timeout, how);
 		if (!expired)
 			continue;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index cf792d4de4f1b3..ee2edccaef70c7 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -19,6 +19,7 @@ struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
 		INIT_LIST_HEAD(&ino->expiring);
 		ino->last_used = jiffies;
 		ino->sbi = sbi;
+		ino->exp_timeout = -1;
 		ino->count = 1;
 	}
 	return ino;
@@ -28,6 +29,7 @@ void autofs_clean_ino(struct autofs_info *ino)
 {
 	ino->uid = GLOBAL_ROOT_UID;
 	ino->gid = GLOBAL_ROOT_GID;
+	ino->exp_timeout = -1;
 	ino->last_used = jiffies;
 }
 
@@ -172,8 +174,7 @@ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi,
 	ret = autofs_check_pipe(pipe);
 	if (ret < 0) {
 		errorf(fc, "Invalid/unusable pipe");
-		if (param->type != fs_value_is_file)
-			fput(pipe);
+		fput(pipe);
 		return -EBADF;
 	}
 
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 257f07656e5f5a..011817afc3adcf 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1652,14 +1652,16 @@ again:
 				break;
 			}
 		} else if (clean_pass && this_pass_clean) {
-			wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
-			DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+			struct wait_bit_queue_entry wqe;
+			struct wait_queue_head *wq_head;
 
-			prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+			wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
+			prepare_to_wait_event(wq_head, &wqe.wq_entry,
+					      TASK_UNINTERRUPTIBLE);
 			mutex_unlock(&c->vfs_inodes_lock);
 
 			schedule();
-			finish_wait(wq, &wait.wq_entry);
+			finish_wait(wq_head, &wqe.wq_entry);
 			goto again;
 		}
 	}
diff --git a/fs/buffer.c b/fs/buffer.c
index e55ad471c53060..74f4eb5c70871d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -774,12 +774,11 @@ EXPORT_SYMBOL(block_dirty_folio);
 static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
 {
 	struct buffer_head *bh;
-	struct list_head tmp;
 	struct address_space *mapping;
 	int err = 0, err2;
 	struct blk_plug plug;
+	LIST_HEAD(tmp);
 
-	INIT_LIST_HEAD(&tmp);
 	blk_start_plug(&plug);
 
 	spin_lock(lock);
@@ -958,12 +957,9 @@ no_grow:
 }
 EXPORT_SYMBOL_GPL(folio_alloc_buffers);
 
-struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
-				       bool retry)
+struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size)
 {
 	gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
-	if (retry)
-		gfp |= __GFP_NOFAIL;
 
 	return folio_alloc_buffers(page_folio(page), size, gfp);
 }
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 6898dc621011cf..6896fce122e186 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -119,31 +119,43 @@ static const struct fs_parameter_spec coda_param_specs[] = {
 	{}
 };
 
-static int coda_parse_fd(struct fs_context *fc, int fd)
+static int coda_set_idx(struct fs_context *fc, struct file *file)
 {
 	struct coda_fs_context *ctx = fc->fs_private;
-	struct fd f;
 	struct inode *inode;
 	int idx;
 
-	f = fdget(fd);
-	if (!f.file)
-		return -EBADF;
-	inode = file_inode(f.file);
+	inode = file_inode(file);
 	if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
-		fdput(f);
-		return invalf(fc, "code: Not coda psdev");
+		return invalf(fc, "coda: Not coda psdev");
 	}
-
 	idx = iminor(inode);
-	fdput(f);
-
 	if (idx < 0 || idx >= MAX_CODADEVS)
 		return invalf(fc, "coda: Bad minor number");
 	ctx->idx = idx;
 	return 0;
 }
 
+static int coda_parse_fd(struct fs_context *fc, struct fs_parameter *param,
+			 struct fs_parse_result *result)
+{
+	struct file *file;
+	int err;
+
+	if (param->type == fs_value_is_file) {
+		file = param->file;
+		param->file = NULL;
+	} else {
+		file = fget(result->uint_32);
+	}
+	if (!file)
+		return -EBADF;
+
+	err = coda_set_idx(fc, file);
+	fput(file);
+	return err;
+}
+
 static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
 {
 	struct fs_parse_result result;
@@ -155,7 +167,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
 
 	switch (opt) {
 	case Opt_fd:
-		return coda_parse_fd(fc, result.uint_32);
+		return coda_parse_fd(fc, param, &result);
 	}
 
 	return 0;
@@ -167,6 +179,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
  */
 static int coda_parse_monolithic(struct fs_context *fc, void *_data)
 {
+	struct file *file;
 	struct coda_mount_data *data = _data;
 
 	if (!data)
@@ -175,7 +188,11 @@ static int coda_parse_monolithic(struct fs_context *fc, void *_data)
 	if (data->version != CODA_MOUNT_VERSION)
 		return invalf(fc, "coda: Bad mount version");
 
-	coda_parse_fd(fc, data->fd);
+	file = fget(data->fd);
+	if (file) {
+		coda_set_idx(fc, file);
+		fput(file);
+	}
 	return 0;
 }
 
diff --git a/fs/dcache.c b/fs/dcache.c
index 6386b9b625ddbc..0f6b16ba30d082 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1913,8 +1913,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
 	__d_instantiate(entry, inode);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW & ~I_CREATING;
+	/*
+	 * Pairs with the barrier in prepare_to_wait_event() to make sure
+	 * ___wait_var_event() either sees the bit cleared or
+	 * waitqueue_active() check in wake_up_var() sees the waiter.
+	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_NEW);
+	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(d_instantiate_new);
@@ -2168,9 +2173,6 @@ seqretry:
  * without taking d_lock and checking d_seq sequence count against @seq
  * returned here.
  *
- * A refcount may be taken on the found dentry with the d_rcu_to_refcount
- * function.
- *
  * Alternatively, __d_lookup_rcu may be called again to look up the child of
  * the returned dentry, so long as its parent's seqlock is checked after the
  * child is looked up. Thus, an interlocking stepping of sequence lock checks
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 91521576f5003e..66d9b3b4c5881d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -89,12 +89,14 @@ enum {
 	Opt_uid,
 	Opt_gid,
 	Opt_mode,
+	Opt_source,
 };
 
 static const struct fs_parameter_spec debugfs_param_specs[] = {
 	fsparam_gid	("gid",		Opt_gid),
 	fsparam_u32oct	("mode",	Opt_mode),
 	fsparam_uid	("uid",		Opt_uid),
+	fsparam_string	("source",	Opt_source),
 	{}
 };
 
@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
 	case Opt_mode:
 		opts->mode = result.uint_32 & S_IALLUGO;
 		break;
+	case Opt_source:
+		if (fc->source)
+			return invalfc(fc, "Multiple sources specified");
+		fc->source = param->string;
+		param->string = NULL;
+		break;
 	/*
 	 * We might like to report bad mount options here;
 	 * but traditionally debugfs has ignored all mount options
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b0aafe640fa428..bbd05f1a21453b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
 #include <linux/rwsem.h>
 #include <linux/uio.h>
 #include <linux/atomic.h>
-#include <linux/prefetch.h>
 
 #include "internal.h"
 
@@ -1121,11 +1120,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
 	struct blk_plug plug;
 	unsigned long align = offset | iov_iter_alignment(iter);
 
-	/*
-	 * Avoid references to bdev if not absolutely needed to give
-	 * the early prefetch in the caller enough time.
-	 */
-
 	/* watch out for a 0 len io from a tricksy fs */
 	if (iov_iter_rw(iter) == READ && !count)
 		return 0;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f53ca4f7fceddd..145f5349c612e2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -420,7 +420,7 @@ static bool busy_loop_ep_timeout(unsigned long start_time,
 
 static bool ep_busy_loop_on(struct eventpoll *ep)
 {
-	return !!ep->busy_poll_usecs || net_busy_loop_on();
+	return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on();
 }
 
 static bool ep_busy_loop_end(void *p, unsigned long start_time)
@@ -2200,11 +2200,6 @@ static int do_epoll_create(int flags)
 		error = PTR_ERR(file);
 		goto out_free_fd;
 	}
-#ifdef CONFIG_NET_RX_BUSY_POLL
-	ep->busy_poll_usecs = 0;
-	ep->busy_poll_budget = 0;
-	ep->prefer_busy_poll = false;
-#endif
 	ep->file = file;
 	fd_install(fd, file);
 	return fd;
diff --git a/fs/exec.c b/fs/exec.c
index 50e76cc633c4ba..caae051c5a956d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -145,13 +145,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
 		goto out;
 
 	/*
-	 * may_open() has already checked for this, so it should be
-	 * impossible to trip now. But we need to be extra cautious
-	 * and check again at the very end too.
+	 * Check do_open_execat() for an explanation.
 	 */
 	error = -EACCES;
-	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
-			 path_noexec(&file->f_path)))
+	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+	    path_noexec(&file->f_path))
 		goto exit;
 
 	error = -ENOEXEC;
@@ -954,7 +952,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
 static struct file *do_open_execat(int fd, struct filename *name, int flags)
 {
 	struct file *file;
-	int err;
 	struct open_flags open_exec_flags = {
 		.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
 		.acc_mode = MAY_EXEC,
@@ -971,24 +968,20 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
 
 	file = do_filp_open(fd, name, &open_exec_flags);
 	if (IS_ERR(file))
-		goto out;
+		return file;
 
 	/*
-	 * may_open() has already checked for this, so it should be
-	 * impossible to trip now. But we need to be extra cautious
-	 * and check again at the very end too.
+	 * In the past the regular type check was here. It moved to may_open() in
+	 * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
+	 * an invariant that all non-regular files error out before we get here.
 	 */
-	err = -EACCES;
-	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
-			 path_noexec(&file->f_path)))
-		goto exit;
+	if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+	    path_noexec(&file->f_path)) {
+		fput(file);
+		return ERR_PTR(-EACCES);
+	}
 
-out:
 	return file;
-
-exit:
-	fput(file);
-	return ERR_PTR(err);
 }
 
 /**
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 300e5d9ad913b5..22ec683ad8f8b4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -343,6 +343,12 @@ static long f_dupfd_query(int fd, struct file *filp)
 	return f.file == filp;
 }
 
+/* Let the caller figure out whether a given file was just created. */
+static long f_created_query(const struct file *filp)
+{
+	return !!(filp->f_mode & FMODE_CREATED);
+}
+
 static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 		struct file *filp)
 {
@@ -352,6 +358,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 	long err = -EINVAL;
 
 	switch (cmd) {
+	case F_CREATED_QUERY:
+		err = f_created_query(filp);
+		break;
 	case F_DUPFD:
 		err = f_dupfd(argi, filp, 0);
 		break;
@@ -463,6 +472,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
 static int check_fcntl_cmd(unsigned cmd)
 {
 	switch (cmd) {
+	case F_CREATED_QUERY:
 	case F_DUPFD:
 	case F_DUPFD_CLOEXEC:
 	case F_DUPFD_QUERY:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6e8cea16790ef2..8cb665629f4a89 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@
 
 static long do_sys_name_to_handle(const struct path *path,
 				  struct file_handle __user *ufh,
-				  int __user *mnt_id, int fh_flags)
+				  void __user *mnt_id, bool unique_mntid,
+				  int fh_flags)
 {
 	long retval;
 	struct file_handle f_handle;
@@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path,
 	} else
 		retval = 0;
 	/* copy the mount id */
-	if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
-	    copy_to_user(ufh, handle,
-			 struct_size(handle, f_handle, handle_bytes)))
+	if (unique_mntid) {
+		if (put_user(real_mount(path->mnt)->mnt_id_unique,
+			     (u64 __user *) mnt_id))
+			retval = -EFAULT;
+	} else {
+		if (put_user(real_mount(path->mnt)->mnt_id,
+			     (int __user *) mnt_id))
+			retval = -EFAULT;
+	}
+	/* copy the handle */
+	if (retval != -EFAULT &&
+		copy_to_user(ufh, handle,
+			     struct_size(handle, f_handle, handle_bytes)))
 		retval = -EFAULT;
 	kfree(handle);
 	return retval;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
  * @name: name that should be converted to handle.
  * @handle: resulting file handle
  * @mnt_id: mount id of the file system containing the file
+ *          (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
  * @flag: flag value to indicate whether to follow symlink or not
  *        and whether a decodable file handle is required.
  *
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
  * value required.
  */
 SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
-		struct file_handle __user *, handle, int __user *, mnt_id,
+		struct file_handle __user *, handle, void __user *, mnt_id,
 		int, flag)
 {
 	struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 	int fh_flags;
 	int err;
 
-	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+	if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+		     AT_HANDLE_MNT_ID_UNIQUE))
 		return -EINVAL;
 
 	lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
 		lookup_flags |= LOOKUP_EMPTY;
 	err = user_path_at(dfd, name, lookup_flags, &path);
 	if (!err) {
-		err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+		err = do_sys_name_to_handle(&path, handle, mnt_id,
+					    flag & AT_HANDLE_MNT_ID_UNIQUE,
+					    fh_flags);
 		path_put(&path);
 	}
 	return err;
diff --git a/fs/file.c b/fs/file.c
index 655338effe9c72..976ecd4ce2c6c8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -672,7 +672,7 @@ int close_fd(unsigned fd)
 
 	return filp_close(file, files);
 }
-EXPORT_SYMBOL(close_fd); /* for ksys_close() */
+EXPORT_SYMBOL(close_fd);
 
 /**
  * last_fd - return last valid index into fd table
diff --git a/fs/file_table.c b/fs/file_table.c
index ca7843dde56dd9..35f2d5d9ca7621 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -136,6 +136,7 @@ static int __init init_fs_stat_sysctls(void)
 	register_sysctl_init("fs", fs_stat_sysctls);
 	if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
 		struct ctl_table_header *hdr;
+
 		hdr = register_sysctl_mount_point("fs/binfmt_misc");
 		kmemleak_not_leak(hdr);
 	}
@@ -383,7 +384,9 @@ EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
 struct file *alloc_file_clone(struct file *base, int flags,
 				const struct file_operations *fops)
 {
-	struct file *f = alloc_file(&base->f_path, flags, fops);
+	struct file *f;
+
+	f = alloc_file(&base->f_path, flags, fops);
 	if (!IS_ERR(f)) {
 		path_get(&f->f_path);
 		f->f_mapping = base->f_mapping;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b865a3fa52f336..d8bec3c1bb1fa7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1132,6 +1132,7 @@ out_bdi_put:
 
 /**
  * cgroup_writeback_umount - flush inode wb switches for umount
+ * @sb: target super_block
  *
  * This function is called when a super_block is about to be destroyed and
  * flushes in-flight inode wb switches.  An inode wb switch goes through
@@ -1140,8 +1141,12 @@ out_bdi_put:
  * rare occurrences and synchronize_rcu() can take a while, perform
  * flushing iff wb switches are in flight.
  */
-void cgroup_writeback_umount(void)
+void cgroup_writeback_umount(struct super_block *sb)
 {
+
+	if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK))
+		return;
+
 	/*
 	 * SB_ACTIVE should be reliably cleared before checking
 	 * isw_nr_in_flight, see generic_shutdown_super().
@@ -1381,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
 
 static void inode_sync_complete(struct inode *inode)
 {
+	assert_spin_locked(&inode->i_lock);
+
 	inode->i_state &= ~I_SYNC;
 	/* If inode is clean an unused, put it into LRU now... */
 	inode_add_lru(inode);
-	/* Waiters must see I_SYNC cleared before being woken up */
-	smp_mb();
-	wake_up_bit(&inode->i_state, __I_SYNC);
+	/* Called with inode->i_lock which ensures memory ordering. */
+	inode_wake_up_bit(inode, __I_SYNC);
 }
 
 static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1505,30 +1511,27 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
  * Wait for writeback on an inode to complete. Called with i_lock held.
  * Caller must make sure inode cannot go away when we drop i_lock.
  */
-static void __inode_wait_for_writeback(struct inode *inode)
-	__releases(inode->i_lock)
-	__acquires(inode->i_lock)
+void inode_wait_for_writeback(struct inode *inode)
 {
-	DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
-	wait_queue_head_t *wqh;
+	struct wait_bit_queue_entry wqe;
+	struct wait_queue_head *wq_head;
+
+	assert_spin_locked(&inode->i_lock);
 
-	wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-	while (inode->i_state & I_SYNC) {
+	if (!(inode->i_state & I_SYNC))
+		return;
+
+	wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+	for (;;) {
+		prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+		/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+		if (!(inode->i_state & I_SYNC))
+			break;
 		spin_unlock(&inode->i_lock);
-		__wait_on_bit(wqh, &wq, bit_wait,
-			      TASK_UNINTERRUPTIBLE);
+		schedule();
 		spin_lock(&inode->i_lock);
 	}
-}
-
-/*
- * Wait for writeback on an inode to complete. Caller must have inode pinned.
- */
-void inode_wait_for_writeback(struct inode *inode)
-{
-	spin_lock(&inode->i_lock);
-	__inode_wait_for_writeback(inode);
-	spin_unlock(&inode->i_lock);
+	finish_wait(wq_head, &wqe.wq_entry);
 }
 
 /*
@@ -1539,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
 static void inode_sleep_on_writeback(struct inode *inode)
 	__releases(inode->i_lock)
 {
-	DEFINE_WAIT(wait);
-	wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
-	int sleep;
+	struct wait_bit_queue_entry wqe;
+	struct wait_queue_head *wq_head;
+	bool sleep;
+
+	assert_spin_locked(&inode->i_lock);
 
-	prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
-	sleep = inode->i_state & I_SYNC;
+	wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+	/* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+	sleep = !!(inode->i_state & I_SYNC);
 	spin_unlock(&inode->i_lock);
 	if (sleep)
 		schedule();
-	finish_wait(wqh, &wait);
+	finish_wait(wq_head, &wqe.wq_entry);
 }
 
 /*
@@ -1752,7 +1759,7 @@ static int writeback_single_inode(struct inode *inode,
 		 */
 		if (wbc->sync_mode != WB_SYNC_ALL)
 			goto out;
-		__inode_wait_for_writeback(inode);
+		inode_wait_for_writeback(inode);
 	}
 	WARN_ON(inode->i_state & I_SYNC);
 	/*
diff --git a/fs/inode.c b/fs/inode.c
index 10c4619faeef8c..af78f515403f3d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
 		inode->i_state |= I_REFERENCED;
 }
 
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+					    struct inode *inode, u32 bit)
+{
+        void *bit_address;
+
+        bit_address = inode_state_wait_address(inode, bit);
+        init_wait_var_entry(wqe, bit_address, 0);
+        return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
 /*
  * Add inode to LRU if needed (inode is unused and clean).
  *
@@ -500,25 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
 	inode->i_state &= ~I_LRU_ISOLATING;
-	smp_mb();
-	wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+	/* Called with inode->i_lock which ensures memory ordering. */
+	inode_wake_up_bit(inode, __I_LRU_ISOLATING);
 	spin_unlock(&inode->i_lock);
 }
 
 static void inode_wait_for_lru_isolating(struct inode *inode)
 {
-	spin_lock(&inode->i_lock);
-	if (inode->i_state & I_LRU_ISOLATING) {
-		DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
-		wait_queue_head_t *wqh;
+	struct wait_bit_queue_entry wqe;
+	struct wait_queue_head *wq_head;
+
+	lockdep_assert_held(&inode->i_lock);
+	if (!(inode->i_state & I_LRU_ISOLATING))
+		return;
 
-		wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+	wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+	for (;;) {
+		prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+		/*
+		 * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+		 * memory ordering.
+		 */
+		if (!(inode->i_state & I_LRU_ISOLATING))
+			break;
 		spin_unlock(&inode->i_lock);
-		__wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+		schedule();
 		spin_lock(&inode->i_lock);
-		WARN_ON(inode->i_state & I_LRU_ISOLATING);
 	}
-	spin_unlock(&inode->i_lock);
+	finish_wait(wq_head, &wqe.wq_entry);
+	WARN_ON(inode->i_state & I_LRU_ISOLATING);
 }
 
 /**
@@ -595,6 +616,7 @@ void dump_mapping(const struct address_space *mapping)
 	struct hlist_node *dentry_first;
 	struct dentry *dentry_ptr;
 	struct dentry dentry;
+	char fname[64] = {};
 	unsigned long ino;
 
 	/*
@@ -631,11 +653,14 @@ void dump_mapping(const struct address_space *mapping)
 		return;
 	}
 
+	if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
+		strscpy(fname, "<invalid>");
 	/*
-	 * if dentry is corrupted, the %pd handler may still crash,
-	 * but it's unlikely that we reach here with a corrupt mapping
+	 * Even if strncpy_from_kernel_nofault() succeeded,
+	 * the fname could be unreliable
 	 */
-	pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+	pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
+		a_ops, ino, fname);
 }
 
 void clear_inode(struct inode *inode)
@@ -690,6 +715,7 @@ static void evict(struct inode *inode)
 
 	inode_sb_list_del(inode);
 
+	spin_lock(&inode->i_lock);
 	inode_wait_for_lru_isolating(inode);
 
 	/*
@@ -699,6 +725,7 @@ static void evict(struct inode *inode)
 	 * the inode.  We just have to wait for running writeback to finish.
 	 */
 	inode_wait_for_writeback(inode);
+	spin_unlock(&inode->i_lock);
 
 	if (op->evict_inode) {
 		op->evict_inode(inode);
@@ -722,7 +749,13 @@ static void evict(struct inode *inode)
 	 * used as an indicator whether blocking on it is safe.
 	 */
 	spin_lock(&inode->i_lock);
-	wake_up_bit(&inode->i_state, __I_NEW);
+	/*
+	 * Pairs with the barrier in prepare_to_wait_event() to make sure
+	 * ___wait_var_event() either sees the bit cleared or
+	 * waitqueue_active() check in wake_up_var() sees the waiter.
+	 */
+	smp_mb();
+	inode_wake_up_bit(inode, __I_NEW);
 	BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
 	spin_unlock(&inode->i_lock);
 
@@ -770,6 +803,10 @@ again:
 			continue;
 
 		spin_lock(&inode->i_lock);
+		if (atomic_read(&inode->i_count)) {
+			spin_unlock(&inode->i_lock);
+			continue;
+		}
 		if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
 			spin_unlock(&inode->i_lock);
 			continue;
@@ -1130,8 +1167,13 @@ void unlock_new_inode(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW & ~I_CREATING;
+	/*
+	 * Pairs with the barrier in prepare_to_wait_event() to make sure
+	 * ___wait_var_event() either sees the bit cleared or
+	 * waitqueue_active() check in wake_up_var() sees the waiter.
+	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_NEW);
+	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
 }
 EXPORT_SYMBOL(unlock_new_inode);
@@ -1142,8 +1184,13 @@ void discard_new_inode(struct inode *inode)
 	spin_lock(&inode->i_lock);
 	WARN_ON(!(inode->i_state & I_NEW));
 	inode->i_state &= ~I_NEW;
+	/*
+	 * Pairs with the barrier in prepare_to_wait_event() to make sure
+	 * ___wait_var_event() either sees the bit cleared or
+	 * waitqueue_active() check in wake_up_var() sees the waiter.
+	 */
 	smp_mb();
-	wake_up_bit(&inode->i_state, __I_NEW);
+	inode_wake_up_bit(inode, __I_NEW);
 	spin_unlock(&inode->i_lock);
 	iput(inode);
 }
@@ -1570,9 +1617,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
 	struct hlist_head *head = inode_hashtable + hash(sb, ino);
 	struct inode *inode;
 again:
-	spin_lock(&inode_hash_lock);
-	inode = find_inode_fast(sb, head, ino, true);
-	spin_unlock(&inode_hash_lock);
+	inode = find_inode_fast(sb, head, ino, false);
 
 	if (inode) {
 		if (IS_ERR(inode))
@@ -2334,8 +2379,8 @@ EXPORT_SYMBOL(inode_needs_sync);
  */
 static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
 {
-	wait_queue_head_t *wq;
-	DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+	struct wait_bit_queue_entry wqe;
+	struct wait_queue_head *wq_head;
 
 	/*
 	 * Handle racing against evict(), see that routine for more details.
@@ -2346,14 +2391,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
 		return;
 	}
 
-	wq = bit_waitqueue(&inode->i_state, __I_NEW);
-	prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+	wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+	prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
 	spin_unlock(&inode->i_lock);
 	rcu_read_unlock();
 	if (is_inode_hash_locked)
 		spin_unlock(&inode_hash_lock);
 	schedule();
-	finish_wait(wq, &wait.wq_entry);
+	finish_wait(wq_head, &wqe.wq_entry);
 	if (is_inode_hash_locked)
 		spin_lock(&inode_hash_lock);
 	rcu_read_lock();
@@ -2502,18 +2547,11 @@ EXPORT_SYMBOL(inode_owner_or_capable);
 /*
  * Direct i/o helper functions
  */
-static void __inode_dio_wait(struct inode *inode)
+bool inode_dio_finished(const struct inode *inode)
 {
-	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
-	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
-	do {
-		prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
-		if (atomic_read(&inode->i_dio_count))
-			schedule();
-	} while (atomic_read(&inode->i_dio_count));
-	finish_wait(wq, &q.wq_entry);
+	return atomic_read(&inode->i_dio_count) == 0;
 }
+EXPORT_SYMBOL(inode_dio_finished);
 
 /**
  * inode_dio_wait - wait for outstanding DIO requests to finish
@@ -2527,11 +2565,17 @@ static void __inode_dio_wait(struct inode *inode)
  */
 void inode_dio_wait(struct inode *inode)
 {
-	if (atomic_read(&inode->i_dio_count))
-		__inode_dio_wait(inode);
+	wait_var_event(&inode->i_dio_count, inode_dio_finished(inode));
 }
 EXPORT_SYMBOL(inode_dio_wait);
 
+void inode_dio_wait_interruptible(struct inode *inode)
+{
+	wait_var_event_interruptible(&inode->i_dio_count,
+				     inode_dio_finished(inode));
+}
+EXPORT_SYMBOL(inode_dio_wait_interruptible);
+
 /*
  * inode_set_flags - atomically set some inode flags
  *
diff --git a/fs/libfs.c b/fs/libfs.c
index b64b4c44cfea87..0e1b999238027e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -2003,13 +2003,19 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force)
 	 * information, but the legacy inode_inc_iversion code used a spinlock
 	 * to serialize increments.
 	 *
-	 * Here, we add full memory barriers to ensure that any de-facto
-	 * ordering with other info is preserved.
+	 * We add a full memory barrier to ensure that any de facto ordering
+	 * with other state is preserved (either implicitly coming from cmpxchg
+	 * or explicitly from smp_mb if we don't know upfront if we will execute
+	 * the former).
 	 *
-	 * This barrier pairs with the barrier in inode_query_iversion()
+	 * These barriers pair with inode_query_iversion().
 	 */
-	smp_mb();
 	cur = inode_peek_iversion_raw(inode);
+	if (!force && !(cur & I_VERSION_QUERIED)) {
+		smp_mb();
+		cur = inode_peek_iversion_raw(inode);
+	}
+
 	do {
 		/* If flag is clear then we needn't do anything */
 		if (!force && !(cur & I_VERSION_QUERIED))
@@ -2038,20 +2044,22 @@ EXPORT_SYMBOL(inode_maybe_inc_iversion);
 u64 inode_query_iversion(struct inode *inode)
 {
 	u64 cur, new;
+	bool fenced = false;
 
+	/*
+	 * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with
+	 * inode_maybe_inc_iversion(), see that routine for more details.
+	 */
 	cur = inode_peek_iversion_raw(inode);
 	do {
 		/* If flag is already set, then no need to swap */
 		if (cur & I_VERSION_QUERIED) {
-			/*
-			 * This barrier (and the implicit barrier in the
-			 * cmpxchg below) pairs with the barrier in
-			 * inode_maybe_inc_iversion().
-			 */
-			smp_mb();
+			if (!fenced)
+				smp_mb();
 			break;
 		}
 
+		fenced = true;
 		new = cur | I_VERSION_QUERIED;
 	} while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
 	return cur >> I_VERSION_QUERIED_SHIFT;
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 3c60f1eaca615a..79491663dbc0cf 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -228,15 +228,15 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from,
 		return 0;
 	}
 
-	forward = kmemdup(map_from->forward,
-			  nr_extents * sizeof(struct uid_gid_extent),
-			  GFP_KERNEL_ACCOUNT);
+	forward = kmemdup_array(map_from->forward, nr_extents,
+				sizeof(struct uid_gid_extent),
+				GFP_KERNEL_ACCOUNT);
 	if (!forward)
 		return -ENOMEM;
 
-	reverse = kmemdup(map_from->reverse,
-			  nr_extents * sizeof(struct uid_gid_extent),
-			  GFP_KERNEL_ACCOUNT);
+	reverse = kmemdup_array(map_from->reverse, nr_extents,
+				sizeof(struct uid_gid_extent),
+				GFP_KERNEL_ACCOUNT);
 	if (!reverse) {
 		kfree(forward);
 		return -ENOMEM;
diff --git a/fs/mount.h b/fs/mount.h
index ad4b1ddebb5433..0a78f85cf73703 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -153,5 +153,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
 	list_add_tail(&mnt->mnt_list, dt_list);
 }
 
-extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
 bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 5512cb10fa8971..2699601bf8e9e7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1639,6 +1639,20 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
 }
 EXPORT_SYMBOL(lookup_one_qstr_excl);
 
+/**
+ * lookup_fast - do fast lockless (but racy) lookup of a dentry
+ * @nd: current nameidata
+ *
+ * Do a fast, but racy lookup in the dcache for the given dentry, and
+ * revalidate it. Returns a valid dentry pointer or NULL if one wasn't
+ * found. On error, an ERR_PTR will be returned.
+ *
+ * If this function returns a valid dentry and the walk is no longer
+ * lazy, the dentry will carry a reference that must later be put. If
+ * RCU mode is still in force, then this is not the case and the dentry
+ * must be legitimized before use. If this returns NULL, then the walk
+ * will no longer be in RCU mode.
+ */
 static struct dentry *lookup_fast(struct nameidata *nd)
 {
 	struct dentry *dentry, *parent = nd->path.dentry;
@@ -3521,6 +3535,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
 		return dentry;
 	}
 
+	if (open_flag & O_CREAT)
+		audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
+
 	/*
 	 * Checking write permission is tricky, bacuse we don't know if we are
 	 * going to actually need it: O_CREAT opens should work as long as the
@@ -3591,6 +3608,42 @@ out_dput:
 	return ERR_PTR(error);
 }
 
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+	return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+	struct dentry *dentry;
+
+	if (open_flag & O_CREAT) {
+		if (trailing_slashes(nd))
+			return ERR_PTR(-EISDIR);
+
+		/* Don't bother on an O_EXCL create */
+		if (open_flag & O_EXCL)
+			return NULL;
+	}
+
+	if (trailing_slashes(nd))
+		nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+	dentry = lookup_fast(nd);
+	if (IS_ERR_OR_NULL(dentry))
+		return dentry;
+
+	if (open_flag & O_CREAT) {
+		/* Discard negative dentries. Need inode_lock to do the create */
+		if (!dentry->d_inode) {
+			if (!(nd->flags & LOOKUP_RCU))
+				dput(dentry);
+			dentry = NULL;
+		}
+	}
+	return dentry;
+}
+
 static const char *open_last_lookups(struct nameidata *nd,
 		   struct file *file, const struct open_flags *op)
 {
@@ -3608,28 +3661,22 @@ static const char *open_last_lookups(struct nameidata *nd,
 		return handle_dots(nd, nd->last_type);
 	}
 
-	if (!(open_flag & O_CREAT)) {
-		if (nd->last.name[nd->last.len])
-			nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
-		/* we _can_ be in RCU mode here */
-		dentry = lookup_fast(nd);
-		if (IS_ERR(dentry))
-			return ERR_CAST(dentry);
-		if (likely(dentry))
-			goto finish_lookup;
+	/* We _can_ be in RCU mode here */
+	dentry = lookup_fast_for_open(nd, open_flag);
+	if (IS_ERR(dentry))
+		return ERR_CAST(dentry);
+
+	if (likely(dentry))
+		goto finish_lookup;
 
+	if (!(open_flag & O_CREAT)) {
 		if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
 			return ERR_PTR(-ECHILD);
 	} else {
-		/* create side of things */
 		if (nd->flags & LOOKUP_RCU) {
 			if (!try_to_unlazy(nd))
 				return ERR_PTR(-ECHILD);
 		}
-		audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
-		/* trailing slashes? */
-		if (unlikely(nd->last.name[nd->last.len]))
-			return ERR_PTR(-EISDIR);
 	}
 
 	if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 328087a4df8a6e..5f2dddee007458 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1774,7 +1774,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
 		list_del_init(&p->mnt_child);
 	}
 
-	/* Add propogated mounts to the tmp_list */
+	/* Add propagated mounts to the tmp_list */
 	if (how & UMOUNT_PROPAGATE)
 		propagate_umount(&tmp_list);
 
@@ -2921,8 +2921,15 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
 	if (!__mnt_is_readonly(mnt) &&
 	   (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
 	   (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
-		char *buf = (char *)__get_free_page(GFP_KERNEL);
-		char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
+		char *buf, *mntpath;
+
+		buf = (char *)__get_free_page(GFP_KERNEL);
+		if (buf)
+			mntpath = d_path(mountpoint, buf, PAGE_SIZE);
+		else
+			mntpath = ERR_PTR(-ENOMEM);
+		if (IS_ERR(mntpath))
+			mntpath = "(unknown)";
 
 		pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n",
 			sb->s_type->name,
@@ -2930,8 +2937,9 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
 			mntpath, &sb->s_time_max,
 			(unsigned long long)sb->s_time_max);
 
-		free_page((unsigned long)buf);
 		sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
+		if (buf)
+			free_page((unsigned long)buf);
 	}
 }
 
@@ -5605,7 +5613,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
 			/* Only worry about locked mounts */
 			if (!(child->mnt.mnt_flags & MNT_LOCKED))
 				continue;
-			/* Is the directory permanetly empty? */
+			/* Is the directory permanently empty? */
 			if (!is_empty_dir_inode(inode))
 				goto next;
 		}
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 75dc52a49b3a46..21eab56ee2f94f 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -19,25 +19,13 @@
  * Must be called under a lock that serializes taking new references
  * to i_dio_count, usually by inode->i_mutex.
  */
-static int inode_dio_wait_interruptible(struct inode *inode)
+static int netfs_inode_dio_wait_interruptible(struct inode *inode)
 {
-	if (!atomic_read(&inode->i_dio_count))
+	if (inode_dio_finished(inode))
 		return 0;
 
-	wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
-	DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
-	for (;;) {
-		prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
-		if (!atomic_read(&inode->i_dio_count))
-			break;
-		if (signal_pending(current))
-			break;
-		schedule();
-	}
-	finish_wait(wq, &q.wq_entry);
-
-	return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+	inode_dio_wait_interruptible(inode);
+	return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
 }
 
 /* Call with exclusively locked inode->i_rwsem */
@@ -46,7 +34,7 @@ static int netfs_block_o_direct(struct netfs_inode *ictx)
 	if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
 		return 0;
 	clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
-	return inode_dio_wait_interruptible(&ictx->inode);
+	return netfs_inode_dio_wait_interruptible(&ictx->inode);
 }
 
 /**
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d211d..9d6b49dc66945f 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -142,7 +142,7 @@ static int __init netfs_init(void)
 
 error_fscache:
 error_procfile:
-	remove_proc_entry("fs/netfs", NULL);
+	remove_proc_subtree("fs/netfs", NULL);
 error_proc:
 	mempool_exit(&netfs_subrequest_pool);
 error_subreqpool:
@@ -159,7 +159,7 @@ fs_initcall(netfs_init);
 static void __exit netfs_exit(void)
 {
 	fscache_exit();
-	remove_proc_entry("fs/netfs", NULL);
+	remove_proc_subtree("fs/netfs", NULL);
 	mempool_exit(&netfs_subrequest_pool);
 	kmem_cache_destroy(netfs_subrequest_slab);
 	mempool_exit(&netfs_request_pool);
diff --git a/fs/pipe.c b/fs/pipe.c
index 7dff2aa50a6ddb..9a6dfe39f01270 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1427,7 +1427,7 @@ static const struct super_operations pipefs_ops = {
 
 /*
  * pipefs should _never_ be mounted by userland - too much of security hassle,
- * no real gain from having the whole whorehouse mounted. So we don't need
+ * no real gain from having the whole file system mounted. So we don't need
  * any operations on the root directory. However, we need a non-trivial
  * d_name - pipe: will go nicely and kill the special-casing in procfs.
  */
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 3f87297dbfdb70..6c66a37522d001 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -715,8 +715,8 @@ int posix_acl_update_mode(struct mnt_idmap *idmap,
 		return error;
 	if (error == 0)
 		*acl = NULL;
-	if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
-	    !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+	if (!in_group_or_capable(idmap, inode,
+				 i_gid_into_vfsgid(idmap, inode)))
 		mode &= ~S_ISGID;
 	*mode_p = mode;
 	return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675cc..1409d1003101cf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -827,12 +827,9 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
 
 static int mem_open(struct inode *inode, struct file *file)
 {
-	int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
-
-	/* OK to pass negative loff_t, we can catch out-of-range */
-	file->f_mode |= FMODE_UNSIGNED_OFFSET;
-
-	return ret;
+	if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET)))
+		return -EINVAL;
+	return __mem_open(inode, file, PTRACE_MODE_ATTACH);
 }
 
 static ssize_t mem_rw(struct file *file, char __user *buf,
@@ -932,6 +929,7 @@ static const struct file_operations proc_mem_operations = {
 	.write		= mem_write,
 	.open		= mem_open,
 	.release	= mem_release,
+	.fop_flags	= FOP_UNSIGNED_OFFSET,
 };
 
 static int environ_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 586bbc84ca0430..7baafb1eba1306 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -59,7 +59,7 @@ static int seq_show(struct seq_file *m, void *v)
 		   real_mount(file->f_path.mnt)->mnt_id,
 		   file_inode(file)->i_ino);
 
-	/* show_fd_locks() never deferences files so a stale value is safe */
+	/* show_fd_locks() never dereferences files, so a stale value is safe */
 	show_fd_locks(m, file, files);
 	if (seq_has_overflowed(m))
 		goto out;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8e08a9a1b7ed57..7d0acdad74e2ff 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -235,7 +235,7 @@ static int kcore_ram_list(struct list_head *list)
 	int nid, ret;
 	unsigned long end_pfn;
 
-	/* Not inialized....update now */
+	/* Not initialized....update now */
 	/* find out "max pfn" */
 	end_pfn = 0;
 	for_each_node_state(nid, N_MEMORY) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 90e283b31ca181..89d4af0e3b93d3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(generic_ro_fops);
 
 static inline bool unsigned_offsets(struct file *file)
 {
-	return file->f_mode & FMODE_UNSIGNED_OFFSET;
+	return file->f_op->fop_flags & FOP_UNSIGNED_OFFSET;
 }
 
 /**
diff --git a/fs/select.c b/fs/select.c
index 9515c3fa1a03e8..1a4849e2afb975 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -840,7 +840,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
 struct poll_list {
 	struct poll_list *next;
 	unsigned int len;
-	struct pollfd entries[];
+	struct pollfd entries[] __counted_by(len);
 };
 
 #define POLLFD_PER_PAGE  ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
diff --git a/fs/super.c b/fs/super.c
index b7913b55debc1f..1db230432960dc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -621,7 +621,7 @@ void generic_shutdown_super(struct super_block *sb)
 		sync_filesystem(sb);
 		sb->s_flags &= ~SB_ACTIVE;
 
-		cgroup_writeback_umount();
+		cgroup_writeback_umount(sb);
 
 		/* Evict all inodes with zero refcount. */
 		evict_inodes(sb);
@@ -1905,7 +1905,7 @@ static void lockdep_sb_freeze_release(struct super_block *sb)
 	int level;
 
 	for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
-		percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+		percpu_rwsem_release(sb->s_writers.rw_sem + level, _THIS_IP_);
 }
 
 /*