aboutsummaryrefslogtreecommitdiffstats
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/aio.c2
-rw-r--r--fs/autofs/autofs_i.h4
-rw-r--r--fs/autofs/dev-ioctl.c97
-rw-r--r--fs/autofs/expire.c7
-rw-r--r--fs/autofs/inode.c5
-rw-r--r--fs/bcachefs/fs.c10
-rw-r--r--fs/buffer.c8
-rw-r--r--fs/coda/inode.c43
-rw-r--r--fs/dcache.c10
-rw-r--r--fs/debugfs/inode.c8
-rw-r--r--fs/direct-io.c6
-rw-r--r--fs/eventpoll.c7
-rw-r--r--fs/exec.c31
-rw-r--r--fs/fcntl.c10
-rw-r--r--fs/fhandle.c29
-rw-r--r--fs/file.c2
-rw-r--r--fs/file_table.c5
-rw-r--r--fs/fs-writeback.c67
-rw-r--r--fs/inode.c116
-rw-r--r--fs/libfs.c28
-rw-r--r--fs/mnt_idmapping.c12
-rw-r--r--fs/mount.h1
-rw-r--r--fs/namei.c75
-rw-r--r--fs/namespace.c18
-rw-r--r--fs/netfs/locking.c22
-rw-r--r--fs/netfs/main.c4
-rw-r--r--fs/pipe.c2
-rw-r--r--fs/posix_acl.c4
-rw-r--r--fs/proc/base.c10
-rw-r--r--fs/proc/fd.c2
-rw-r--r--fs/proc/kcore.c2
-rw-r--r--fs/read_write.c2
-rw-r--r--fs/select.c2
-rw-r--r--fs/super.c4
34 files changed, 442 insertions, 213 deletions
diff --git a/fs/aio.c b/fs/aio.c
index 6066f64967b3cc..e8920178b50f73 100644
--- a/fs/aio.c
+++ b/fs/aio.c
@@ -100,7 +100,7 @@ struct kioctx {
unsigned long user_id;
- struct __percpu kioctx_cpu *cpu;
+ struct kioctx_cpu __percpu *cpu;
/*
* For percpu reqs_available, number of slots we move to/from global
diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h
index 8c1d587b3eefa4..77c7991d89aace 100644
--- a/fs/autofs/autofs_i.h
+++ b/fs/autofs/autofs_i.h
@@ -62,6 +62,7 @@ struct autofs_info {
struct list_head expiring;
struct autofs_sb_info *sbi;
+ unsigned long exp_timeout;
unsigned long last_used;
int count;
@@ -81,6 +82,9 @@ struct autofs_info {
*/
#define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */
+#define AUTOFS_INF_EXPIRE_SET (1<<3) /* per-dentry expire timeout set for
+ this mount point.
+ */
struct autofs_wait_queue {
wait_queue_head_t queue;
struct autofs_wait_queue *next;
diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c
index 5bf781ea6d6762..f011e026358ebc 100644
--- a/fs/autofs/dev-ioctl.c
+++ b/fs/autofs/dev-ioctl.c
@@ -128,7 +128,13 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param)
goto out;
}
+ /* Setting the per-dentry expire timeout requires a trailing
+ * path component, ie. no '/', so invert the logic of the
+ * check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD.
+ */
err = check_name(param->path);
+ if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD)
+ err = err ? 0 : -EINVAL;
if (err) {
pr_warn("invalid path supplied for cmd(0x%08x)\n",
cmd);
@@ -396,16 +402,97 @@ static int autofs_dev_ioctl_catatonic(struct file *fp,
return 0;
}
-/* Set the autofs mount timeout */
+/*
+ * Set the autofs mount expire timeout.
+ *
+ * There are two places an expire timeout can be set, in the autofs
+ * super block info. (this is all that's needed for direct and offset
+ * mounts because there's a distinct mount corresponding to each of
+ * these) and per-dentry within within the dentry info. If a per-dentry
+ * timeout is set it will override the expire timeout set in the parent
+ * autofs super block info.
+ *
+ * If setting the autofs super block expire timeout the autofs_dev_ioctl
+ * size field will be equal to the autofs_dev_ioctl structure size. If
+ * setting the per-dentry expire timeout the mount point name is passed
+ * in the autofs_dev_ioctl path field and the size field updated to
+ * reflect this.
+ *
+ * Setting the autofs mount expire timeout sets the timeout in the super
+ * block info. struct. Setting the per-dentry timeout does a little more.
+ * If the timeout is equal to -1 the per-dentry timeout (and flag) is
+ * cleared which reverts to using the super block timeout, otherwise if
+ * timeout is 0 the timeout is set to this value and the flag is left
+ * set which disables expiration for the mount point, lastly the flag
+ * and the timeout are set enabling the dentry to use this timeout.
+ */
static int autofs_dev_ioctl_timeout(struct file *fp,
struct autofs_sb_info *sbi,
struct autofs_dev_ioctl *param)
{
- unsigned long timeout;
+ unsigned long timeout = param->timeout.timeout;
+
+ /* If setting the expire timeout for an individual indirect
+ * mount point dentry the mount trailing component path is
+ * placed in param->path and param->size adjusted to account
+ * for it otherwise param->size it is set to the structure
+ * size.
+ */
+ if (param->size == AUTOFS_DEV_IOCTL_SIZE) {
+ param->timeout.timeout = sbi->exp_timeout / HZ;
+ sbi->exp_timeout = timeout * HZ;
+ } else {
+ struct dentry *base = fp->f_path.dentry;
+ struct inode *inode = base->d_inode;
+ int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1;
+ struct dentry *dentry;
+ struct autofs_info *ino;
+
+ if (!autofs_type_indirect(sbi->type))
+ return -EINVAL;
+
+ /* An expire timeout greater than the superblock timeout
+ * could be a problem at shutdown but the super block
+ * timeout itself can change so all we can really do is
+ * warn the user.
+ */
+ if (timeout >= sbi->exp_timeout)
+ pr_warn("per-mount expire timeout is greater than "
+ "the parent autofs mount timeout which could "
+ "prevent shutdown\n");
+
+ inode_lock_shared(inode);
+ dentry = try_lookup_one_len(param->path, base, path_len);
+ inode_unlock_shared(inode);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry ? PTR_ERR(dentry) : -ENOENT;
+ ino = autofs_dentry_ino(dentry);
+ if (!ino) {
+ dput(dentry);
+ return -ENOENT;
+ }
+
+ if (ino->exp_timeout && ino->flags & AUTOFS_INF_EXPIRE_SET)
+ param->timeout.timeout = ino->exp_timeout / HZ;
+ else
+ param->timeout.timeout = sbi->exp_timeout / HZ;
+
+ if (timeout == -1) {
+ /* Revert to using the super block timeout */
+ ino->flags &= ~AUTOFS_INF_EXPIRE_SET;
+ ino->exp_timeout = 0;
+ } else {
+ /* Set the dentry expire flag and timeout.
+ *
+ * If timeout is 0 it will prevent the expire
+ * of this particular automount.
+ */
+ ino->flags |= AUTOFS_INF_EXPIRE_SET;
+ ino->exp_timeout = timeout * HZ;
+ }
+ dput(dentry);
+ }
- timeout = param->timeout.timeout;
- param->timeout.timeout = sbi->exp_timeout / HZ;
- sbi->exp_timeout = timeout * HZ;
return 0;
}
diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c
index 39d8c84c16f4e5..5c2d459e1e484e 100644
--- a/fs/autofs/expire.c
+++ b/fs/autofs/expire.c
@@ -429,8 +429,6 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
if (!root)
return NULL;
- timeout = sbi->exp_timeout;
-
dentry = NULL;
while ((dentry = get_next_positive_subdir(dentry, root))) {
spin_lock(&sbi->fs_lock);
@@ -441,6 +439,11 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb,
}
spin_unlock(&sbi->fs_lock);
+ if (ino->flags & AUTOFS_INF_EXPIRE_SET)
+ timeout = ino->exp_timeout;
+ else
+ timeout = sbi->exp_timeout;
+
expired = should_expire(dentry, mnt, timeout, how);
if (!expired)
continue;
diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c
index cf792d4de4f1b3..ee2edccaef70c7 100644
--- a/fs/autofs/inode.c
+++ b/fs/autofs/inode.c
@@ -19,6 +19,7 @@ struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi)
INIT_LIST_HEAD(&ino->expiring);
ino->last_used = jiffies;
ino->sbi = sbi;
+ ino->exp_timeout = -1;
ino->count = 1;
}
return ino;
@@ -28,6 +29,7 @@ void autofs_clean_ino(struct autofs_info *ino)
{
ino->uid = GLOBAL_ROOT_UID;
ino->gid = GLOBAL_ROOT_GID;
+ ino->exp_timeout = -1;
ino->last_used = jiffies;
}
@@ -172,8 +174,7 @@ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi,
ret = autofs_check_pipe(pipe);
if (ret < 0) {
errorf(fc, "Invalid/unusable pipe");
- if (param->type != fs_value_is_file)
- fput(pipe);
+ fput(pipe);
return -EBADF;
}
diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c
index 257f07656e5f5a..011817afc3adcf 100644
--- a/fs/bcachefs/fs.c
+++ b/fs/bcachefs/fs.c
@@ -1652,14 +1652,16 @@ again:
break;
}
} else if (clean_pass && this_pass_clean) {
- wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW);
- DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry,
+ TASK_UNINTERRUPTIBLE);
mutex_unlock(&c->vfs_inodes_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
goto again;
}
}
diff --git a/fs/buffer.c b/fs/buffer.c
index e55ad471c53060..74f4eb5c70871d 100644
--- a/fs/buffer.c
+++ b/fs/buffer.c
@@ -774,12 +774,11 @@ EXPORT_SYMBOL(block_dirty_folio);
static int fsync_buffers_list(spinlock_t *lock, struct list_head *list)
{
struct buffer_head *bh;
- struct list_head tmp;
struct address_space *mapping;
int err = 0, err2;
struct blk_plug plug;
+ LIST_HEAD(tmp);
- INIT_LIST_HEAD(&tmp);
blk_start_plug(&plug);
spin_lock(lock);
@@ -958,12 +957,9 @@ no_grow:
}
EXPORT_SYMBOL_GPL(folio_alloc_buffers);
-struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size,
- bool retry)
+struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size)
{
gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT;
- if (retry)
- gfp |= __GFP_NOFAIL;
return folio_alloc_buffers(page_folio(page), size, gfp);
}
diff --git a/fs/coda/inode.c b/fs/coda/inode.c
index 6898dc621011cf..6896fce122e186 100644
--- a/fs/coda/inode.c
+++ b/fs/coda/inode.c
@@ -119,31 +119,43 @@ static const struct fs_parameter_spec coda_param_specs[] = {
{}
};
-static int coda_parse_fd(struct fs_context *fc, int fd)
+static int coda_set_idx(struct fs_context *fc, struct file *file)
{
struct coda_fs_context *ctx = fc->fs_private;
- struct fd f;
struct inode *inode;
int idx;
- f = fdget(fd);
- if (!f.file)
- return -EBADF;
- inode = file_inode(f.file);
+ inode = file_inode(file);
if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) {
- fdput(f);
- return invalf(fc, "code: Not coda psdev");
+ return invalf(fc, "coda: Not coda psdev");
}
-
idx = iminor(inode);
- fdput(f);
-
if (idx < 0 || idx >= MAX_CODADEVS)
return invalf(fc, "coda: Bad minor number");
ctx->idx = idx;
return 0;
}
+static int coda_parse_fd(struct fs_context *fc, struct fs_parameter *param,
+ struct fs_parse_result *result)
+{
+ struct file *file;
+ int err;
+
+ if (param->type == fs_value_is_file) {
+ file = param->file;
+ param->file = NULL;
+ } else {
+ file = fget(result->uint_32);
+ }
+ if (!file)
+ return -EBADF;
+
+ err = coda_set_idx(fc, file);
+ fput(file);
+ return err;
+}
+
static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
{
struct fs_parse_result result;
@@ -155,7 +167,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
switch (opt) {
case Opt_fd:
- return coda_parse_fd(fc, result.uint_32);
+ return coda_parse_fd(fc, param, &result);
}
return 0;
@@ -167,6 +179,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param)
*/
static int coda_parse_monolithic(struct fs_context *fc, void *_data)
{
+ struct file *file;
struct coda_mount_data *data = _data;
if (!data)
@@ -175,7 +188,11 @@ static int coda_parse_monolithic(struct fs_context *fc, void *_data)
if (data->version != CODA_MOUNT_VERSION)
return invalf(fc, "coda: Bad mount version");
- coda_parse_fd(fc, data->fd);
+ file = fget(data->fd);
+ if (file) {
+ coda_set_idx(fc, file);
+ fput(file);
+ }
return 0;
}
diff --git a/fs/dcache.c b/fs/dcache.c
index 6386b9b625ddbc..0f6b16ba30d082 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -1913,8 +1913,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode)
__d_instantiate(entry, inode);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(d_instantiate_new);
@@ -2168,9 +2173,6 @@ seqretry:
* without taking d_lock and checking d_seq sequence count against @seq
* returned here.
*
- * A refcount may be taken on the found dentry with the d_rcu_to_refcount
- * function.
- *
* Alternatively, __d_lookup_rcu may be called again to look up the child of
* the returned dentry, so long as its parent's seqlock is checked after the
* child is looked up. Thus, an interlocking stepping of sequence lock checks
diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c
index 91521576f5003e..66d9b3b4c5881d 100644
--- a/fs/debugfs/inode.c
+++ b/fs/debugfs/inode.c
@@ -89,12 +89,14 @@ enum {
Opt_uid,
Opt_gid,
Opt_mode,
+ Opt_source,
};
static const struct fs_parameter_spec debugfs_param_specs[] = {
fsparam_gid ("gid", Opt_gid),
fsparam_u32oct ("mode", Opt_mode),
fsparam_uid ("uid", Opt_uid),
+ fsparam_string ("source", Opt_source),
{}
};
@@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param
case Opt_mode:
opts->mode = result.uint_32 & S_IALLUGO;
break;
+ case Opt_source:
+ if (fc->source)
+ return invalfc(fc, "Multiple sources specified");
+ fc->source = param->string;
+ param->string = NULL;
+ break;
/*
* We might like to report bad mount options here;
* but traditionally debugfs has ignored all mount options
diff --git a/fs/direct-io.c b/fs/direct-io.c
index b0aafe640fa428..bbd05f1a21453b 100644
--- a/fs/direct-io.c
+++ b/fs/direct-io.c
@@ -37,7 +37,6 @@
#include <linux/rwsem.h>
#include <linux/uio.h>
#include <linux/atomic.h>
-#include <linux/prefetch.h>
#include "internal.h"
@@ -1121,11 +1120,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode,
struct blk_plug plug;
unsigned long align = offset | iov_iter_alignment(iter);
- /*
- * Avoid references to bdev if not absolutely needed to give
- * the early prefetch in the caller enough time.
- */
-
/* watch out for a 0 len io from a tricksy fs */
if (iov_iter_rw(iter) == READ && !count)
return 0;
diff --git a/fs/eventpoll.c b/fs/eventpoll.c
index f53ca4f7fceddd..145f5349c612e2 100644
--- a/fs/eventpoll.c
+++ b/fs/eventpoll.c
@@ -420,7 +420,7 @@ static bool busy_loop_ep_timeout(unsigned long start_time,
static bool ep_busy_loop_on(struct eventpoll *ep)
{
- return !!ep->busy_poll_usecs || net_busy_loop_on();
+ return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on();
}
static bool ep_busy_loop_end(void *p, unsigned long start_time)
@@ -2200,11 +2200,6 @@ static int do_epoll_create(int flags)
error = PTR_ERR(file);
goto out_free_fd;
}
-#ifdef CONFIG_NET_RX_BUSY_POLL
- ep->busy_poll_usecs = 0;
- ep->busy_poll_budget = 0;
- ep->prefer_busy_poll = false;
-#endif
ep->file = file;
fd_install(fd, file);
return fd;
diff --git a/fs/exec.c b/fs/exec.c
index 50e76cc633c4ba..caae051c5a956d 100644
--- a/fs/exec.c
+++ b/fs/exec.c
@@ -145,13 +145,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library)
goto out;
/*
- * may_open() has already checked for this, so it should be
- * impossible to trip now. But we need to be extra cautious
- * and check again at the very end too.
+ * Check do_open_execat() for an explanation.
*/
error = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
- path_noexec(&file->f_path)))
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+ path_noexec(&file->f_path))
goto exit;
error = -ENOEXEC;
@@ -954,7 +952,6 @@ EXPORT_SYMBOL(transfer_args_to_stack);
static struct file *do_open_execat(int fd, struct filename *name, int flags)
{
struct file *file;
- int err;
struct open_flags open_exec_flags = {
.open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC,
.acc_mode = MAY_EXEC,
@@ -971,24 +968,20 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags)
file = do_filp_open(fd, name, &open_exec_flags);
if (IS_ERR(file))
- goto out;
+ return file;
/*
- * may_open() has already checked for this, so it should be
- * impossible to trip now. But we need to be extra cautious
- * and check again at the very end too.
+ * In the past the regular type check was here. It moved to may_open() in
+ * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is
+ * an invariant that all non-regular files error out before we get here.
*/
- err = -EACCES;
- if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) ||
- path_noexec(&file->f_path)))
- goto exit;
+ if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) ||
+ path_noexec(&file->f_path)) {
+ fput(file);
+ return ERR_PTR(-EACCES);
+ }
-out:
return file;
-
-exit:
- fput(file);
- return ERR_PTR(err);
}
/**
diff --git a/fs/fcntl.c b/fs/fcntl.c
index 300e5d9ad913b5..22ec683ad8f8b4 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -343,6 +343,12 @@ static long f_dupfd_query(int fd, struct file *filp)
return f.file == filp;
}
+/* Let the caller figure out whether a given file was just created. */
+static long f_created_query(const struct file *filp)
+{
+ return !!(filp->f_mode & FMODE_CREATED);
+}
+
static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
struct file *filp)
{
@@ -352,6 +358,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
long err = -EINVAL;
switch (cmd) {
+ case F_CREATED_QUERY:
+ err = f_created_query(filp);
+ break;
case F_DUPFD:
err = f_dupfd(argi, filp, 0);
break;
@@ -463,6 +472,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
static int check_fcntl_cmd(unsigned cmd)
{
switch (cmd) {
+ case F_CREATED_QUERY:
case F_DUPFD:
case F_DUPFD_CLOEXEC:
case F_DUPFD_QUERY:
diff --git a/fs/fhandle.c b/fs/fhandle.c
index 6e8cea16790ef2..8cb665629f4a89 100644
--- a/fs/fhandle.c
+++ b/fs/fhandle.c
@@ -16,7 +16,8 @@
static long do_sys_name_to_handle(const struct path *path,
struct file_handle __user *ufh,
- int __user *mnt_id, int fh_flags)
+ void __user *mnt_id, bool unique_mntid,
+ int fh_flags)
{
long retval;
struct file_handle f_handle;
@@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path,
} else
retval = 0;
/* copy the mount id */
- if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) ||
- copy_to_user(ufh, handle,
- struct_size(handle, f_handle, handle_bytes)))
+ if (unique_mntid) {
+ if (put_user(real_mount(path->mnt)->mnt_id_unique,
+ (u64 __user *) mnt_id))
+ retval = -EFAULT;
+ } else {
+ if (put_user(real_mount(path->mnt)->mnt_id,
+ (int __user *) mnt_id))
+ retval = -EFAULT;
+ }
+ /* copy the handle */
+ if (retval != -EFAULT &&
+ copy_to_user(ufh, handle,
+ struct_size(handle, f_handle, handle_bytes)))
retval = -EFAULT;
kfree(handle);
return retval;
@@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path,
* @name: name that should be converted to handle.
* @handle: resulting file handle
* @mnt_id: mount id of the file system containing the file
+ * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int)
* @flag: flag value to indicate whether to follow symlink or not
* and whether a decodable file handle is required.
*
@@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path,
* value required.
*/
SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
- struct file_handle __user *, handle, int __user *, mnt_id,
+ struct file_handle __user *, handle, void __user *, mnt_id,
int, flag)
{
struct path path;
@@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
int fh_flags;
int err;
- if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID))
+ if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID |
+ AT_HANDLE_MNT_ID_UNIQUE))
return -EINVAL;
lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0;
@@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name,
lookup_flags |= LOOKUP_EMPTY;
err = user_path_at(dfd, name, lookup_flags, &path);
if (!err) {
- err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags);
+ err = do_sys_name_to_handle(&path, handle, mnt_id,
+ flag & AT_HANDLE_MNT_ID_UNIQUE,
+ fh_flags);
path_put(&path);
}
return err;
diff --git a/fs/file.c b/fs/file.c
index 655338effe9c72..976ecd4ce2c6c8 100644
--- a/fs/file.c
+++ b/fs/file.c
@@ -672,7 +672,7 @@ int close_fd(unsigned fd)
return filp_close(file, files);
}
-EXPORT_SYMBOL(close_fd); /* for ksys_close() */
+EXPORT_SYMBOL(close_fd);
/**
* last_fd - return last valid index into fd table
diff --git a/fs/file_table.c b/fs/file_table.c
index ca7843dde56dd9..35f2d5d9ca7621 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -136,6 +136,7 @@ static int __init init_fs_stat_sysctls(void)
register_sysctl_init("fs", fs_stat_sysctls);
if (IS_ENABLED(CONFIG_BINFMT_MISC)) {
struct ctl_table_header *hdr;
+
hdr = register_sysctl_mount_point("fs/binfmt_misc");
kmemleak_not_leak(hdr);
}
@@ -383,7 +384,9 @@ EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount);
struct file *alloc_file_clone(struct file *base, int flags,
const struct file_operations *fops)
{
- struct file *f = alloc_file(&base->f_path, flags, fops);
+ struct file *f;
+
+ f = alloc_file(&base->f_path, flags, fops);
if (!IS_ERR(f)) {
path_get(&f->f_path);
f->f_mapping = base->f_mapping;
diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c
index b865a3fa52f336..d8bec3c1bb1fa7 100644
--- a/fs/fs-writeback.c
+++ b/fs/fs-writeback.c
@@ -1132,6 +1132,7 @@ out_bdi_put:
/**
* cgroup_writeback_umount - flush inode wb switches for umount
+ * @sb: target super_block
*
* This function is called when a super_block is about to be destroyed and
* flushes in-flight inode wb switches. An inode wb switch goes through
@@ -1140,8 +1141,12 @@ out_bdi_put:
* rare occurrences and synchronize_rcu() can take a while, perform
* flushing iff wb switches are in flight.
*/
-void cgroup_writeback_umount(void)
+void cgroup_writeback_umount(struct super_block *sb)
{
+
+ if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK))
+ return;
+
/*
* SB_ACTIVE should be reliably cleared before checking
* isw_nr_in_flight, see generic_shutdown_super().
@@ -1381,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb)
static void inode_sync_complete(struct inode *inode)
{
+ assert_spin_locked(&inode->i_lock);
+
inode->i_state &= ~I_SYNC;
/* If inode is clean an unused, put it into LRU now... */
inode_add_lru(inode);
- /* Waiters must see I_SYNC cleared before being woken up */
- smp_mb();
- wake_up_bit(&inode->i_state, __I_SYNC);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_SYNC);
}
static bool inode_dirtied_after(struct inode *inode, unsigned long t)
@@ -1505,30 +1511,27 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc)
* Wait for writeback on an inode to complete. Called with i_lock held.
* Caller must make sure inode cannot go away when we drop i_lock.
*/
-static void __inode_wait_for_writeback(struct inode *inode)
- __releases(inode->i_lock)
- __acquires(inode->i_lock)
+void inode_wait_for_writeback(struct inode *inode)
{
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC);
- wait_queue_head_t *wqh;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ assert_spin_locked(&inode->i_lock);
- wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- while (inode->i_state & I_SYNC) {
+ if (!(inode->i_state & I_SYNC))
+ return;
+
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ if (!(inode->i_state & I_SYNC))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait,
- TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
}
-}
-
-/*
- * Wait for writeback on an inode to complete. Caller must have inode pinned.
- */
-void inode_wait_for_writeback(struct inode *inode)
-{
- spin_lock(&inode->i_lock);
- __inode_wait_for_writeback(inode);
- spin_unlock(&inode->i_lock);
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
@@ -1539,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode)
static void inode_sleep_on_writeback(struct inode *inode)
__releases(inode->i_lock)
{
- DEFINE_WAIT(wait);
- wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC);
- int sleep;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+ bool sleep;
+
+ assert_spin_locked(&inode->i_lock);
- prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE);
- sleep = inode->i_state & I_SYNC;
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */
+ sleep = !!(inode->i_state & I_SYNC);
spin_unlock(&inode->i_lock);
if (sleep)
schedule();
- finish_wait(wqh, &wait);
+ finish_wait(wq_head, &wqe.wq_entry);
}
/*
@@ -1752,7 +1759,7 @@ static int writeback_single_inode(struct inode *inode,
*/
if (wbc->sync_mode != WB_SYNC_ALL)
goto out;
- __inode_wait_for_writeback(inode);
+ inode_wait_for_writeback(inode);
}
WARN_ON(inode->i_state & I_SYNC);
/*
diff --git a/fs/inode.c b/fs/inode.c
index 10c4619faeef8c..af78f515403f3d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate)
inode->i_state |= I_REFERENCED;
}
+struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe,
+ struct inode *inode, u32 bit)
+{
+ void *bit_address;
+
+ bit_address = inode_state_wait_address(inode, bit);
+ init_wait_var_entry(wqe, bit_address, 0);
+ return __var_waitqueue(bit_address);
+}
+EXPORT_SYMBOL(inode_bit_waitqueue);
+
/*
* Add inode to LRU if needed (inode is unused and clean).
*
@@ -500,25 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_LRU_ISOLATING));
inode->i_state &= ~I_LRU_ISOLATING;
- smp_mb();
- wake_up_bit(&inode->i_state, __I_LRU_ISOLATING);
+ /* Called with inode->i_lock which ensures memory ordering. */
+ inode_wake_up_bit(inode, __I_LRU_ISOLATING);
spin_unlock(&inode->i_lock);
}
static void inode_wait_for_lru_isolating(struct inode *inode)
{
- spin_lock(&inode->i_lock);
- if (inode->i_state & I_LRU_ISOLATING) {
- DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING);
- wait_queue_head_t *wqh;
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
+
+ lockdep_assert_held(&inode->i_lock);
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ return;
- wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING);
+ for (;;) {
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
+ /*
+ * Checking I_LRU_ISOLATING with inode->i_lock guarantees
+ * memory ordering.
+ */
+ if (!(inode->i_state & I_LRU_ISOLATING))
+ break;
spin_unlock(&inode->i_lock);
- __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE);
+ schedule();
spin_lock(&inode->i_lock);
- WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
- spin_unlock(&inode->i_lock);
+ finish_wait(wq_head, &wqe.wq_entry);
+ WARN_ON(inode->i_state & I_LRU_ISOLATING);
}
/**
@@ -595,6 +616,7 @@ void dump_mapping(const struct address_space *mapping)
struct hlist_node *dentry_first;
struct dentry *dentry_ptr;
struct dentry dentry;
+ char fname[64] = {};
unsigned long ino;
/*
@@ -631,11 +653,14 @@ void dump_mapping(const struct address_space *mapping)
return;
}
+ if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0)
+ strscpy(fname, "<invalid>");
/*
- * if dentry is corrupted, the %pd handler may still crash,
- * but it's unlikely that we reach here with a corrupt mapping
+ * Even if strncpy_from_kernel_nofault() succeeded,
+ * the fname could be unreliable
*/
- pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry);
+ pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n",
+ a_ops, ino, fname);
}
void clear_inode(struct inode *inode)
@@ -690,6 +715,7 @@ static void evict(struct inode *inode)
inode_sb_list_del(inode);
+ spin_lock(&inode->i_lock);
inode_wait_for_lru_isolating(inode);
/*
@@ -699,6 +725,7 @@ static void evict(struct inode *inode)
* the inode. We just have to wait for running writeback to finish.
*/
inode_wait_for_writeback(inode);
+ spin_unlock(&inode->i_lock);
if (op->evict_inode) {
op->evict_inode(inode);
@@ -722,7 +749,13 @@ static void evict(struct inode *inode)
* used as an indicator whether blocking on it is safe.
*/
spin_lock(&inode->i_lock);
- wake_up_bit(&inode->i_state, __I_NEW);
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
+ smp_mb();
+ inode_wake_up_bit(inode, __I_NEW);
BUG_ON(inode->i_state != (I_FREEING | I_CLEAR));
spin_unlock(&inode->i_lock);
@@ -770,6 +803,10 @@ again:
continue;
spin_lock(&inode->i_lock);
+ if (atomic_read(&inode->i_count)) {
+ spin_unlock(&inode->i_lock);
+ continue;
+ }
if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) {
spin_unlock(&inode->i_lock);
continue;
@@ -1130,8 +1167,13 @@ void unlock_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW & ~I_CREATING;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
}
EXPORT_SYMBOL(unlock_new_inode);
@@ -1142,8 +1184,13 @@ void discard_new_inode(struct inode *inode)
spin_lock(&inode->i_lock);
WARN_ON(!(inode->i_state & I_NEW));
inode->i_state &= ~I_NEW;
+ /*
+ * Pairs with the barrier in prepare_to_wait_event() to make sure
+ * ___wait_var_event() either sees the bit cleared or
+ * waitqueue_active() check in wake_up_var() sees the waiter.
+ */
smp_mb();
- wake_up_bit(&inode->i_state, __I_NEW);
+ inode_wake_up_bit(inode, __I_NEW);
spin_unlock(&inode->i_lock);
iput(inode);
}
@@ -1570,9 +1617,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino)
struct hlist_head *head = inode_hashtable + hash(sb, ino);
struct inode *inode;
again:
- spin_lock(&inode_hash_lock);
- inode = find_inode_fast(sb, head, ino, true);
- spin_unlock(&inode_hash_lock);
+ inode = find_inode_fast(sb, head, ino, false);
if (inode) {
if (IS_ERR(inode))
@@ -2334,8 +2379,8 @@ EXPORT_SYMBOL(inode_needs_sync);
*/
static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked)
{
- wait_queue_head_t *wq;
- DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW);
+ struct wait_bit_queue_entry wqe;
+ struct wait_queue_head *wq_head;
/*
* Handle racing against evict(), see that routine for more details.
@@ -2346,14 +2391,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock
return;
}
- wq = bit_waitqueue(&inode->i_state, __I_NEW);
- prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE);
+ wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW);
+ prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE);
spin_unlock(&inode->i_lock);
rcu_read_unlock();
if (is_inode_hash_locked)
spin_unlock(&inode_hash_lock);
schedule();
- finish_wait(wq, &wait.wq_entry);
+ finish_wait(wq_head, &wqe.wq_entry);
if (is_inode_hash_locked)
spin_lock(&inode_hash_lock);
rcu_read_lock();
@@ -2502,18 +2547,11 @@ EXPORT_SYMBOL(inode_owner_or_capable);
/*
* Direct i/o helper functions
*/
-static void __inode_dio_wait(struct inode *inode)
+bool inode_dio_finished(const struct inode *inode)
{
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
- DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
- do {
- prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE);
- if (atomic_read(&inode->i_dio_count))
- schedule();
- } while (atomic_read(&inode->i_dio_count));
- finish_wait(wq, &q.wq_entry);
+ return atomic_read(&inode->i_dio_count) == 0;
}
+EXPORT_SYMBOL(inode_dio_finished);
/**
* inode_dio_wait - wait for outstanding DIO requests to finish
@@ -2527,11 +2565,17 @@ static void __inode_dio_wait(struct inode *inode)
*/
void inode_dio_wait(struct inode *inode)
{
- if (atomic_read(&inode->i_dio_count))
- __inode_dio_wait(inode);
+ wait_var_event(&inode->i_dio_count, inode_dio_finished(inode));
}
EXPORT_SYMBOL(inode_dio_wait);
+void inode_dio_wait_interruptible(struct inode *inode)
+{
+ wait_var_event_interruptible(&inode->i_dio_count,
+ inode_dio_finished(inode));
+}
+EXPORT_SYMBOL(inode_dio_wait_interruptible);
+
/*
* inode_set_flags - atomically set some inode flags
*
diff --git a/fs/libfs.c b/fs/libfs.c
index b64b4c44cfea87..0e1b999238027e 100644
--- a/fs/libfs.c
+++ b/fs/libfs.c
@@ -2003,13 +2003,19 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force)
* information, but the legacy inode_inc_iversion code used a spinlock
* to serialize increments.
*
- * Here, we add full memory barriers to ensure that any de-facto
- * ordering with other info is preserved.
+ * We add a full memory barrier to ensure that any de facto ordering
+ * with other state is preserved (either implicitly coming from cmpxchg
+ * or explicitly from smp_mb if we don't know upfront if we will execute
+ * the former).
*
- * This barrier pairs with the barrier in inode_query_iversion()
+ * These barriers pair with inode_query_iversion().
*/
- smp_mb();
cur = inode_peek_iversion_raw(inode);
+ if (!force && !(cur & I_VERSION_QUERIED)) {
+ smp_mb();
+ cur = inode_peek_iversion_raw(inode);
+ }
+
do {
/* If flag is clear then we needn't do anything */
if (!force && !(cur & I_VERSION_QUERIED))
@@ -2038,20 +2044,22 @@ EXPORT_SYMBOL(inode_maybe_inc_iversion);
u64 inode_query_iversion(struct inode *inode)
{
u64 cur, new;
+ bool fenced = false;
+ /*
+ * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with
+ * inode_maybe_inc_iversion(), see that routine for more details.
+ */
cur = inode_peek_iversion_raw(inode);
do {
/* If flag is already set, then no need to swap */
if (cur & I_VERSION_QUERIED) {
- /*
- * This barrier (and the implicit barrier in the
- * cmpxchg below) pairs with the barrier in
- * inode_maybe_inc_iversion().
- */
- smp_mb();
+ if (!fenced)
+ smp_mb();
break;
}
+ fenced = true;
new = cur | I_VERSION_QUERIED;
} while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new));
return cur >> I_VERSION_QUERIED_SHIFT;
diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c
index 3c60f1eaca615a..79491663dbc0cf 100644
--- a/fs/mnt_idmapping.c
+++ b/fs/mnt_idmapping.c
@@ -228,15 +228,15 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from,
return 0;
}
- forward = kmemdup(map_from->forward,
- nr_extents * sizeof(struct uid_gid_extent),
- GFP_KERNEL_ACCOUNT);
+ forward = kmemdup_array(map_from->forward, nr_extents,
+ sizeof(struct uid_gid_extent),
+ GFP_KERNEL_ACCOUNT);
if (!forward)
return -ENOMEM;
- reverse = kmemdup(map_from->reverse,
- nr_extents * sizeof(struct uid_gid_extent),
- GFP_KERNEL_ACCOUNT);
+ reverse = kmemdup_array(map_from->reverse, nr_extents,
+ sizeof(struct uid_gid_extent),
+ GFP_KERNEL_ACCOUNT);
if (!reverse) {
kfree(forward);
return -ENOMEM;
diff --git a/fs/mount.h b/fs/mount.h
index ad4b1ddebb5433..0a78f85cf73703 100644
--- a/fs/mount.h
+++ b/fs/mount.h
@@ -153,5 +153,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list)
list_add_tail(&mnt->mnt_list, dt_list);
}
-extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor);
bool has_locked_children(struct mount *mnt, struct dentry *dentry);
diff --git a/fs/namei.c b/fs/namei.c
index 5512cb10fa8971..2699601bf8e9e7 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -1639,6 +1639,20 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name,
}
EXPORT_SYMBOL(lookup_one_qstr_excl);
+/**
+ * lookup_fast - do fast lockless (but racy) lookup of a dentry
+ * @nd: current nameidata
+ *
+ * Do a fast, but racy lookup in the dcache for the given dentry, and
+ * revalidate it. Returns a valid dentry pointer or NULL if one wasn't
+ * found. On error, an ERR_PTR will be returned.
+ *
+ * If this function returns a valid dentry and the walk is no longer
+ * lazy, the dentry will carry a reference that must later be put. If
+ * RCU mode is still in force, then this is not the case and the dentry
+ * must be legitimized before use. If this returns NULL, then the walk
+ * will no longer be in RCU mode.
+ */
static struct dentry *lookup_fast(struct nameidata *nd)
{
struct dentry *dentry, *parent = nd->path.dentry;
@@ -3521,6 +3535,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
return dentry;
}
+ if (open_flag & O_CREAT)
+ audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
+
/*
* Checking write permission is tricky, bacuse we don't know if we are
* going to actually need it: O_CREAT opens should work as long as the
@@ -3591,6 +3608,42 @@ out_dput:
return ERR_PTR(error);
}
+static inline bool trailing_slashes(struct nameidata *nd)
+{
+ return (bool)nd->last.name[nd->last.len];
+}
+
+static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag)
+{
+ struct dentry *dentry;
+
+ if (open_flag & O_CREAT) {
+ if (trailing_slashes(nd))
+ return ERR_PTR(-EISDIR);
+
+ /* Don't bother on an O_EXCL create */
+ if (open_flag & O_EXCL)
+ return NULL;
+ }
+
+ if (trailing_slashes(nd))
+ nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
+
+ dentry = lookup_fast(nd);
+ if (IS_ERR_OR_NULL(dentry))
+ return dentry;
+
+ if (open_flag & O_CREAT) {
+ /* Discard negative dentries. Need inode_lock to do the create */
+ if (!dentry->d_inode) {
+ if (!(nd->flags & LOOKUP_RCU))
+ dput(dentry);
+ dentry = NULL;
+ }
+ }
+ return dentry;
+}
+
static const char *open_last_lookups(struct nameidata *nd,
struct file *file, const struct open_flags *op)
{
@@ -3608,28 +3661,22 @@ static const char *open_last_lookups(struct nameidata *nd,
return handle_dots(nd, nd->last_type);
}
- if (!(open_flag & O_CREAT)) {
- if (nd->last.name[nd->last.len])
- nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY;
- /* we _can_ be in RCU mode here */
- dentry = lookup_fast(nd);
- if (IS_ERR(dentry))
- return ERR_CAST(dentry);
- if (likely(dentry))
- goto finish_lookup;
+ /* We _can_ be in RCU mode here */
+ dentry = lookup_fast_for_open(nd, open_flag);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+ if (likely(dentry))
+ goto finish_lookup;
+ if (!(open_flag & O_CREAT)) {
if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU))
return ERR_PTR(-ECHILD);
} else {
- /* create side of things */
if (nd->flags & LOOKUP_RCU) {
if (!try_to_unlazy(nd))
return ERR_PTR(-ECHILD);
}
- audit_inode(nd->name, dir, AUDIT_INODE_PARENT);
- /* trailing slashes? */
- if (unlikely(nd->last.name[nd->last.len]))
- return ERR_PTR(-EISDIR);
}
if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) {
diff --git a/fs/namespace.c b/fs/namespace.c
index 328087a4df8a6e..5f2dddee007458 100644
--- a/fs/namespace.c
+++ b/fs/namespace.c
@@ -1774,7 +1774,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how)
list_del_init(&p->mnt_child);
}
- /* Add propogated mounts to the tmp_list */
+ /* Add propagated mounts to the tmp_list */
if (how & UMOUNT_PROPAGATE)
propagate_umount(&tmp_list);
@@ -2921,8 +2921,15 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
if (!__mnt_is_readonly(mnt) &&
(!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) &&
(ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) {
- char *buf = (char *)__get_free_page(GFP_KERNEL);
- char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM);
+ char *buf, *mntpath;
+
+ buf = (char *)__get_free_page(GFP_KERNEL);
+ if (buf)
+ mntpath = d_path(mountpoint, buf, PAGE_SIZE);
+ else
+ mntpath = ERR_PTR(-ENOMEM);
+ if (IS_ERR(mntpath))
+ mntpath = "(unknown)";
pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n",
sb->s_type->name,
@@ -2930,8 +2937,9 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount *
mntpath, &sb->s_time_max,
(unsigned long long)sb->s_time_max);
- free_page((unsigned long)buf);
sb->s_iflags |= SB_I_TS_EXPIRY_WARNED;
+ if (buf)
+ free_page((unsigned long)buf);
}
}
@@ -5605,7 +5613,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns,
/* Only worry about locked mounts */
if (!(child->mnt.mnt_flags & MNT_LOCKED))
continue;
- /* Is the directory permanetly empty? */
+ /* Is the directory permanently empty? */
if (!is_empty_dir_inode(inode))
goto next;
}
diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c
index 75dc52a49b3a46..21eab56ee2f94f 100644
--- a/fs/netfs/locking.c
+++ b/fs/netfs/locking.c
@@ -19,25 +19,13 @@
* Must be called under a lock that serializes taking new references
* to i_dio_count, usually by inode->i_mutex.
*/
-static int inode_dio_wait_interruptible(struct inode *inode)
+static int netfs_inode_dio_wait_interruptible(struct inode *inode)
{
- if (!atomic_read(&inode->i_dio_count))
+ if (inode_dio_finished(inode))
return 0;
- wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP);
- DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP);
-
- for (;;) {
- prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE);
- if (!atomic_read(&inode->i_dio_count))
- break;
- if (signal_pending(current))
- break;
- schedule();
- }
- finish_wait(wq, &q.wq_entry);
-
- return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0;
+ inode_dio_wait_interruptible(inode);
+ return !inode_dio_finished(inode) ? -ERESTARTSYS : 0;
}
/* Call with exclusively locked inode->i_rwsem */
@@ -46,7 +34,7 @@ static int netfs_block_o_direct(struct netfs_inode *ictx)
if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags))
return 0;
clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags);
- return inode_dio_wait_interruptible(&ictx->inode);
+ return netfs_inode_dio_wait_interruptible(&ictx->inode);
}
/**
diff --git a/fs/netfs/main.c b/fs/netfs/main.c
index 5f0f438e5d211d..9d6b49dc66945f 100644
--- a/fs/netfs/main.c
+++ b/fs/netfs/main.c
@@ -142,7 +142,7 @@ static int __init netfs_init(void)
error_fscache:
error_procfile:
- remove_proc_entry("fs/netfs", NULL);
+ remove_proc_subtree("fs/netfs", NULL);
error_proc:
mempool_exit(&netfs_subrequest_pool);
error_subreqpool:
@@ -159,7 +159,7 @@ fs_initcall(netfs_init);
static void __exit netfs_exit(void)
{
fscache_exit();
- remove_proc_entry("fs/netfs", NULL);
+ remove_proc_subtree("fs/netfs", NULL);
mempool_exit(&netfs_subrequest_pool);
kmem_cache_destroy(netfs_subrequest_slab);
mempool_exit(&netfs_request_pool);
diff --git a/fs/pipe.c b/fs/pipe.c
index 7dff2aa50a6ddb..9a6dfe39f01270 100644
--- a/fs/pipe.c
+++ b/fs/pipe.c
@@ -1427,7 +1427,7 @@ static const struct super_operations pipefs_ops = {
/*
* pipefs should _never_ be mounted by userland - too much of security hassle,
- * no real gain from having the whole whorehouse mounted. So we don't need
+ * no real gain from having the whole file system mounted. So we don't need
* any operations on the root directory. However, we need a non-trivial
* d_name - pipe: will go nicely and kill the special-casing in procfs.
*/
diff --git a/fs/posix_acl.c b/fs/posix_acl.c
index 3f87297dbfdb70..6c66a37522d001 100644
--- a/fs/posix_acl.c
+++ b/fs/posix_acl.c
@@ -715,8 +715,8 @@ int posix_acl_update_mode(struct mnt_idmap *idmap,
return error;
if (error == 0)
*acl = NULL;
- if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) &&
- !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID))
+ if (!in_group_or_capable(idmap, inode,
+ i_gid_into_vfsgid(idmap, inode)))
mode &= ~S_ISGID;
*mode_p = mode;
return 0;
diff --git a/fs/proc/base.c b/fs/proc/base.c
index 72a1acd03675cc..1409d1003101cf 100644
--- a/fs/proc/base.c
+++ b/fs/proc/base.c
@@ -827,12 +827,9 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode)
static int mem_open(struct inode *inode, struct file *file)
{
- int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH);
-
- /* OK to pass negative loff_t, we can catch out-of-range */
- file->f_mode |= FMODE_UNSIGNED_OFFSET;
-
- return ret;
+ if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET)))
+ return -EINVAL;
+ return __mem_open(inode, file, PTRACE_MODE_ATTACH);
}
static ssize_t mem_rw(struct file *file, char __user *buf,
@@ -932,6 +929,7 @@ static const struct file_operations proc_mem_operations = {
.write = mem_write,
.open = mem_open,
.release = mem_release,
+ .fop_flags = FOP_UNSIGNED_OFFSET,
};
static int environ_open(struct inode *inode, struct file *file)
diff --git a/fs/proc/fd.c b/fs/proc/fd.c
index 586bbc84ca0430..7baafb1eba1306 100644
--- a/fs/proc/fd.c
+++ b/fs/proc/fd.c
@@ -59,7 +59,7 @@ static int seq_show(struct seq_file *m, void *v)
real_mount(file->f_path.mnt)->mnt_id,
file_inode(file)->i_ino);
- /* show_fd_locks() never deferences files so a stale value is safe */
+ /* show_fd_locks() never dereferences files, so a stale value is safe */
show_fd_locks(m, file, files);
if (seq_has_overflowed(m))
goto out;
diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c
index 8e08a9a1b7ed57..7d0acdad74e2ff 100644
--- a/fs/proc/kcore.c
+++ b/fs/proc/kcore.c
@@ -235,7 +235,7 @@ static int kcore_ram_list(struct list_head *list)
int nid, ret;
unsigned long end_pfn;
- /* Not inialized....update now */
+ /* Not initialized....update now */
/* find out "max pfn" */
end_pfn = 0;
for_each_node_state(nid, N_MEMORY) {
diff --git a/fs/read_write.c b/fs/read_write.c
index 90e283b31ca181..89d4af0e3b93d3 100644
--- a/fs/read_write.c
+++ b/fs/read_write.c
@@ -36,7 +36,7 @@ EXPORT_SYMBOL(generic_ro_fops);
static inline bool unsigned_offsets(struct file *file)
{
- return file->f_mode & FMODE_UNSIGNED_OFFSET;
+ return file->f_op->fop_flags & FOP_UNSIGNED_OFFSET;
}
/**
diff --git a/fs/select.c b/fs/select.c
index 9515c3fa1a03e8..1a4849e2afb975 100644
--- a/fs/select.c
+++ b/fs/select.c
@@ -840,7 +840,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg)
struct poll_list {
struct poll_list *next;
unsigned int len;
- struct pollfd entries[];
+ struct pollfd entries[] __counted_by(len);
};
#define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd))
diff --git a/fs/super.c b/fs/super.c
index b7913b55debc1f..1db230432960dc 100644
--- a/fs/super.c
+++ b/fs/super.c
@@ -621,7 +621,7 @@ void generic_shutdown_super(struct super_block *sb)
sync_filesystem(sb);
sb->s_flags &= ~SB_ACTIVE;
- cgroup_writeback_umount();
+ cgroup_writeback_umount(sb);
/* Evict all inodes with zero refcount. */
evict_inodes(sb);
@@ -1905,7 +1905,7 @@ static void lockdep_sb_freeze_release(struct super_block *sb)
int level;
for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--)
- percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_);
+ percpu_rwsem_release(sb->s_writers.rw_sem + level, _THIS_IP_);
}
/*