diff options
Diffstat (limited to 'fs')
| -rw-r--r-- | fs/aio.c | 2 | ||||
| -rw-r--r-- | fs/autofs/autofs_i.h | 4 | ||||
| -rw-r--r-- | fs/autofs/dev-ioctl.c | 97 | ||||
| -rw-r--r-- | fs/autofs/expire.c | 7 | ||||
| -rw-r--r-- | fs/autofs/inode.c | 5 | ||||
| -rw-r--r-- | fs/bcachefs/fs.c | 10 | ||||
| -rw-r--r-- | fs/buffer.c | 8 | ||||
| -rw-r--r-- | fs/coda/inode.c | 43 | ||||
| -rw-r--r-- | fs/dcache.c | 10 | ||||
| -rw-r--r-- | fs/debugfs/inode.c | 8 | ||||
| -rw-r--r-- | fs/direct-io.c | 6 | ||||
| -rw-r--r-- | fs/eventpoll.c | 7 | ||||
| -rw-r--r-- | fs/exec.c | 31 | ||||
| -rw-r--r-- | fs/fcntl.c | 10 | ||||
| -rw-r--r-- | fs/fhandle.c | 29 | ||||
| -rw-r--r-- | fs/file.c | 2 | ||||
| -rw-r--r-- | fs/file_table.c | 5 | ||||
| -rw-r--r-- | fs/fs-writeback.c | 67 | ||||
| -rw-r--r-- | fs/inode.c | 116 | ||||
| -rw-r--r-- | fs/libfs.c | 28 | ||||
| -rw-r--r-- | fs/mnt_idmapping.c | 12 | ||||
| -rw-r--r-- | fs/mount.h | 1 | ||||
| -rw-r--r-- | fs/namei.c | 75 | ||||
| -rw-r--r-- | fs/namespace.c | 18 | ||||
| -rw-r--r-- | fs/netfs/locking.c | 22 | ||||
| -rw-r--r-- | fs/netfs/main.c | 4 | ||||
| -rw-r--r-- | fs/pipe.c | 2 | ||||
| -rw-r--r-- | fs/posix_acl.c | 4 | ||||
| -rw-r--r-- | fs/proc/base.c | 10 | ||||
| -rw-r--r-- | fs/proc/fd.c | 2 | ||||
| -rw-r--r-- | fs/proc/kcore.c | 2 | ||||
| -rw-r--r-- | fs/read_write.c | 2 | ||||
| -rw-r--r-- | fs/select.c | 2 | ||||
| -rw-r--r-- | fs/super.c | 4 |
34 files changed, 442 insertions, 213 deletions
diff --git a/fs/aio.c b/fs/aio.c index 6066f64967b3cc..e8920178b50f73 100644 --- a/fs/aio.c +++ b/fs/aio.c @@ -100,7 +100,7 @@ struct kioctx { unsigned long user_id; - struct __percpu kioctx_cpu *cpu; + struct kioctx_cpu __percpu *cpu; /* * For percpu reqs_available, number of slots we move to/from global diff --git a/fs/autofs/autofs_i.h b/fs/autofs/autofs_i.h index 8c1d587b3eefa4..77c7991d89aace 100644 --- a/fs/autofs/autofs_i.h +++ b/fs/autofs/autofs_i.h @@ -62,6 +62,7 @@ struct autofs_info { struct list_head expiring; struct autofs_sb_info *sbi; + unsigned long exp_timeout; unsigned long last_used; int count; @@ -81,6 +82,9 @@ struct autofs_info { */ #define AUTOFS_INF_PENDING (1<<2) /* dentry pending mount */ +#define AUTOFS_INF_EXPIRE_SET (1<<3) /* per-dentry expire timeout set for + this mount point. + */ struct autofs_wait_queue { wait_queue_head_t queue; struct autofs_wait_queue *next; diff --git a/fs/autofs/dev-ioctl.c b/fs/autofs/dev-ioctl.c index 5bf781ea6d6762..f011e026358ebc 100644 --- a/fs/autofs/dev-ioctl.c +++ b/fs/autofs/dev-ioctl.c @@ -128,7 +128,13 @@ static int validate_dev_ioctl(int cmd, struct autofs_dev_ioctl *param) goto out; } + /* Setting the per-dentry expire timeout requires a trailing + * path component, ie. no '/', so invert the logic of the + * check_name() return for AUTOFS_DEV_IOCTL_TIMEOUT_CMD. + */ err = check_name(param->path); + if (cmd == AUTOFS_DEV_IOCTL_TIMEOUT_CMD) + err = err ? 0 : -EINVAL; if (err) { pr_warn("invalid path supplied for cmd(0x%08x)\n", cmd); @@ -396,16 +402,97 @@ static int autofs_dev_ioctl_catatonic(struct file *fp, return 0; } -/* Set the autofs mount timeout */ +/* + * Set the autofs mount expire timeout. + * + * There are two places an expire timeout can be set, in the autofs + * super block info. (this is all that's needed for direct and offset + * mounts because there's a distinct mount corresponding to each of + * these) and per-dentry within within the dentry info. If a per-dentry + * timeout is set it will override the expire timeout set in the parent + * autofs super block info. + * + * If setting the autofs super block expire timeout the autofs_dev_ioctl + * size field will be equal to the autofs_dev_ioctl structure size. If + * setting the per-dentry expire timeout the mount point name is passed + * in the autofs_dev_ioctl path field and the size field updated to + * reflect this. + * + * Setting the autofs mount expire timeout sets the timeout in the super + * block info. struct. Setting the per-dentry timeout does a little more. + * If the timeout is equal to -1 the per-dentry timeout (and flag) is + * cleared which reverts to using the super block timeout, otherwise if + * timeout is 0 the timeout is set to this value and the flag is left + * set which disables expiration for the mount point, lastly the flag + * and the timeout are set enabling the dentry to use this timeout. + */ static int autofs_dev_ioctl_timeout(struct file *fp, struct autofs_sb_info *sbi, struct autofs_dev_ioctl *param) { - unsigned long timeout; + unsigned long timeout = param->timeout.timeout; + + /* If setting the expire timeout for an individual indirect + * mount point dentry the mount trailing component path is + * placed in param->path and param->size adjusted to account + * for it otherwise param->size it is set to the structure + * size. + */ + if (param->size == AUTOFS_DEV_IOCTL_SIZE) { + param->timeout.timeout = sbi->exp_timeout / HZ; + sbi->exp_timeout = timeout * HZ; + } else { + struct dentry *base = fp->f_path.dentry; + struct inode *inode = base->d_inode; + int path_len = param->size - AUTOFS_DEV_IOCTL_SIZE - 1; + struct dentry *dentry; + struct autofs_info *ino; + + if (!autofs_type_indirect(sbi->type)) + return -EINVAL; + + /* An expire timeout greater than the superblock timeout + * could be a problem at shutdown but the super block + * timeout itself can change so all we can really do is + * warn the user. + */ + if (timeout >= sbi->exp_timeout) + pr_warn("per-mount expire timeout is greater than " + "the parent autofs mount timeout which could " + "prevent shutdown\n"); + + inode_lock_shared(inode); + dentry = try_lookup_one_len(param->path, base, path_len); + inode_unlock_shared(inode); + if (IS_ERR_OR_NULL(dentry)) + return dentry ? PTR_ERR(dentry) : -ENOENT; + ino = autofs_dentry_ino(dentry); + if (!ino) { + dput(dentry); + return -ENOENT; + } + + if (ino->exp_timeout && ino->flags & AUTOFS_INF_EXPIRE_SET) + param->timeout.timeout = ino->exp_timeout / HZ; + else + param->timeout.timeout = sbi->exp_timeout / HZ; + + if (timeout == -1) { + /* Revert to using the super block timeout */ + ino->flags &= ~AUTOFS_INF_EXPIRE_SET; + ino->exp_timeout = 0; + } else { + /* Set the dentry expire flag and timeout. + * + * If timeout is 0 it will prevent the expire + * of this particular automount. + */ + ino->flags |= AUTOFS_INF_EXPIRE_SET; + ino->exp_timeout = timeout * HZ; + } + dput(dentry); + } - timeout = param->timeout.timeout; - param->timeout.timeout = sbi->exp_timeout / HZ; - sbi->exp_timeout = timeout * HZ; return 0; } diff --git a/fs/autofs/expire.c b/fs/autofs/expire.c index 39d8c84c16f4e5..5c2d459e1e484e 100644 --- a/fs/autofs/expire.c +++ b/fs/autofs/expire.c @@ -429,8 +429,6 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb, if (!root) return NULL; - timeout = sbi->exp_timeout; - dentry = NULL; while ((dentry = get_next_positive_subdir(dentry, root))) { spin_lock(&sbi->fs_lock); @@ -441,6 +439,11 @@ static struct dentry *autofs_expire_indirect(struct super_block *sb, } spin_unlock(&sbi->fs_lock); + if (ino->flags & AUTOFS_INF_EXPIRE_SET) + timeout = ino->exp_timeout; + else + timeout = sbi->exp_timeout; + expired = should_expire(dentry, mnt, timeout, how); if (!expired) continue; diff --git a/fs/autofs/inode.c b/fs/autofs/inode.c index cf792d4de4f1b3..ee2edccaef70c7 100644 --- a/fs/autofs/inode.c +++ b/fs/autofs/inode.c @@ -19,6 +19,7 @@ struct autofs_info *autofs_new_ino(struct autofs_sb_info *sbi) INIT_LIST_HEAD(&ino->expiring); ino->last_used = jiffies; ino->sbi = sbi; + ino->exp_timeout = -1; ino->count = 1; } return ino; @@ -28,6 +29,7 @@ void autofs_clean_ino(struct autofs_info *ino) { ino->uid = GLOBAL_ROOT_UID; ino->gid = GLOBAL_ROOT_GID; + ino->exp_timeout = -1; ino->last_used = jiffies; } @@ -172,8 +174,7 @@ static int autofs_parse_fd(struct fs_context *fc, struct autofs_sb_info *sbi, ret = autofs_check_pipe(pipe); if (ret < 0) { errorf(fc, "Invalid/unusable pipe"); - if (param->type != fs_value_is_file) - fput(pipe); + fput(pipe); return -EBADF; } diff --git a/fs/bcachefs/fs.c b/fs/bcachefs/fs.c index 257f07656e5f5a..011817afc3adcf 100644 --- a/fs/bcachefs/fs.c +++ b/fs/bcachefs/fs.c @@ -1652,14 +1652,16 @@ again: break; } } else if (clean_pass && this_pass_clean) { - wait_queue_head_t *wq = bit_waitqueue(&inode->v.i_state, __I_NEW); - DEFINE_WAIT_BIT(wait, &inode->v.i_state, __I_NEW); + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + wq_head = inode_bit_waitqueue(&wqe, &inode->v, __I_NEW); + prepare_to_wait_event(wq_head, &wqe.wq_entry, + TASK_UNINTERRUPTIBLE); mutex_unlock(&c->vfs_inodes_lock); schedule(); - finish_wait(wq, &wait.wq_entry); + finish_wait(wq_head, &wqe.wq_entry); goto again; } } diff --git a/fs/buffer.c b/fs/buffer.c index e55ad471c53060..74f4eb5c70871d 100644 --- a/fs/buffer.c +++ b/fs/buffer.c @@ -774,12 +774,11 @@ EXPORT_SYMBOL(block_dirty_folio); static int fsync_buffers_list(spinlock_t *lock, struct list_head *list) { struct buffer_head *bh; - struct list_head tmp; struct address_space *mapping; int err = 0, err2; struct blk_plug plug; + LIST_HEAD(tmp); - INIT_LIST_HEAD(&tmp); blk_start_plug(&plug); spin_lock(lock); @@ -958,12 +957,9 @@ no_grow: } EXPORT_SYMBOL_GPL(folio_alloc_buffers); -struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, - bool retry) +struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size) { gfp_t gfp = GFP_NOFS | __GFP_ACCOUNT; - if (retry) - gfp |= __GFP_NOFAIL; return folio_alloc_buffers(page_folio(page), size, gfp); } diff --git a/fs/coda/inode.c b/fs/coda/inode.c index 6898dc621011cf..6896fce122e186 100644 --- a/fs/coda/inode.c +++ b/fs/coda/inode.c @@ -119,31 +119,43 @@ static const struct fs_parameter_spec coda_param_specs[] = { {} }; -static int coda_parse_fd(struct fs_context *fc, int fd) +static int coda_set_idx(struct fs_context *fc, struct file *file) { struct coda_fs_context *ctx = fc->fs_private; - struct fd f; struct inode *inode; int idx; - f = fdget(fd); - if (!f.file) - return -EBADF; - inode = file_inode(f.file); + inode = file_inode(file); if (!S_ISCHR(inode->i_mode) || imajor(inode) != CODA_PSDEV_MAJOR) { - fdput(f); - return invalf(fc, "code: Not coda psdev"); + return invalf(fc, "coda: Not coda psdev"); } - idx = iminor(inode); - fdput(f); - if (idx < 0 || idx >= MAX_CODADEVS) return invalf(fc, "coda: Bad minor number"); ctx->idx = idx; return 0; } +static int coda_parse_fd(struct fs_context *fc, struct fs_parameter *param, + struct fs_parse_result *result) +{ + struct file *file; + int err; + + if (param->type == fs_value_is_file) { + file = param->file; + param->file = NULL; + } else { + file = fget(result->uint_32); + } + if (!file) + return -EBADF; + + err = coda_set_idx(fc, file); + fput(file); + return err; +} + static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param) { struct fs_parse_result result; @@ -155,7 +167,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param) switch (opt) { case Opt_fd: - return coda_parse_fd(fc, result.uint_32); + return coda_parse_fd(fc, param, &result); } return 0; @@ -167,6 +179,7 @@ static int coda_parse_param(struct fs_context *fc, struct fs_parameter *param) */ static int coda_parse_monolithic(struct fs_context *fc, void *_data) { + struct file *file; struct coda_mount_data *data = _data; if (!data) @@ -175,7 +188,11 @@ static int coda_parse_monolithic(struct fs_context *fc, void *_data) if (data->version != CODA_MOUNT_VERSION) return invalf(fc, "coda: Bad mount version"); - coda_parse_fd(fc, data->fd); + file = fget(data->fd); + if (file) { + coda_set_idx(fc, file); + fput(file); + } return 0; } diff --git a/fs/dcache.c b/fs/dcache.c index 6386b9b625ddbc..0f6b16ba30d082 100644 --- a/fs/dcache.c +++ b/fs/dcache.c @@ -1913,8 +1913,13 @@ void d_instantiate_new(struct dentry *entry, struct inode *inode) __d_instantiate(entry, inode); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; + /* + * Pairs with the barrier in prepare_to_wait_event() to make sure + * ___wait_var_event() either sees the bit cleared or + * waitqueue_active() check in wake_up_var() sees the waiter. + */ smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); + inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(d_instantiate_new); @@ -2168,9 +2173,6 @@ seqretry: * without taking d_lock and checking d_seq sequence count against @seq * returned here. * - * A refcount may be taken on the found dentry with the d_rcu_to_refcount - * function. - * * Alternatively, __d_lookup_rcu may be called again to look up the child of * the returned dentry, so long as its parent's seqlock is checked after the * child is looked up. Thus, an interlocking stepping of sequence lock checks diff --git a/fs/debugfs/inode.c b/fs/debugfs/inode.c index 91521576f5003e..66d9b3b4c5881d 100644 --- a/fs/debugfs/inode.c +++ b/fs/debugfs/inode.c @@ -89,12 +89,14 @@ enum { Opt_uid, Opt_gid, Opt_mode, + Opt_source, }; static const struct fs_parameter_spec debugfs_param_specs[] = { fsparam_gid ("gid", Opt_gid), fsparam_u32oct ("mode", Opt_mode), fsparam_uid ("uid", Opt_uid), + fsparam_string ("source", Opt_source), {} }; @@ -126,6 +128,12 @@ static int debugfs_parse_param(struct fs_context *fc, struct fs_parameter *param case Opt_mode: opts->mode = result.uint_32 & S_IALLUGO; break; + case Opt_source: + if (fc->source) + return invalfc(fc, "Multiple sources specified"); + fc->source = param->string; + param->string = NULL; + break; /* * We might like to report bad mount options here; * but traditionally debugfs has ignored all mount options diff --git a/fs/direct-io.c b/fs/direct-io.c index b0aafe640fa428..bbd05f1a21453b 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -37,7 +37,6 @@ #include <linux/rwsem.h> #include <linux/uio.h> #include <linux/atomic.h> -#include <linux/prefetch.h> #include "internal.h" @@ -1121,11 +1120,6 @@ ssize_t __blockdev_direct_IO(struct kiocb *iocb, struct inode *inode, struct blk_plug plug; unsigned long align = offset | iov_iter_alignment(iter); - /* - * Avoid references to bdev if not absolutely needed to give - * the early prefetch in the caller enough time. - */ - /* watch out for a 0 len io from a tricksy fs */ if (iov_iter_rw(iter) == READ && !count) return 0; diff --git a/fs/eventpoll.c b/fs/eventpoll.c index f53ca4f7fceddd..145f5349c612e2 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -420,7 +420,7 @@ static bool busy_loop_ep_timeout(unsigned long start_time, static bool ep_busy_loop_on(struct eventpoll *ep) { - return !!ep->busy_poll_usecs || net_busy_loop_on(); + return !!READ_ONCE(ep->busy_poll_usecs) || net_busy_loop_on(); } static bool ep_busy_loop_end(void *p, unsigned long start_time) @@ -2200,11 +2200,6 @@ static int do_epoll_create(int flags) error = PTR_ERR(file); goto out_free_fd; } -#ifdef CONFIG_NET_RX_BUSY_POLL - ep->busy_poll_usecs = 0; - ep->busy_poll_budget = 0; - ep->prefer_busy_poll = false; -#endif ep->file = file; fd_install(fd, file); return fd; diff --git a/fs/exec.c b/fs/exec.c index 50e76cc633c4ba..caae051c5a956d 100644 --- a/fs/exec.c +++ b/fs/exec.c @@ -145,13 +145,11 @@ SYSCALL_DEFINE1(uselib, const char __user *, library) goto out; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * Check do_open_execat() for an explanation. */ error = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) goto exit; error = -ENOEXEC; @@ -954,7 +952,6 @@ EXPORT_SYMBOL(transfer_args_to_stack); static struct file *do_open_execat(int fd, struct filename *name, int flags) { struct file *file; - int err; struct open_flags open_exec_flags = { .open_flag = O_LARGEFILE | O_RDONLY | __FMODE_EXEC, .acc_mode = MAY_EXEC, @@ -971,24 +968,20 @@ static struct file *do_open_execat(int fd, struct filename *name, int flags) file = do_filp_open(fd, name, &open_exec_flags); if (IS_ERR(file)) - goto out; + return file; /* - * may_open() has already checked for this, so it should be - * impossible to trip now. But we need to be extra cautious - * and check again at the very end too. + * In the past the regular type check was here. It moved to may_open() in + * 633fb6ac3980 ("exec: move S_ISREG() check earlier"). Since then it is + * an invariant that all non-regular files error out before we get here. */ - err = -EACCES; - if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode) || - path_noexec(&file->f_path))) - goto exit; + if (WARN_ON_ONCE(!S_ISREG(file_inode(file)->i_mode)) || + path_noexec(&file->f_path)) { + fput(file); + return ERR_PTR(-EACCES); + } -out: return file; - -exit: - fput(file); - return ERR_PTR(err); } /** diff --git a/fs/fcntl.c b/fs/fcntl.c index 300e5d9ad913b5..22ec683ad8f8b4 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -343,6 +343,12 @@ static long f_dupfd_query(int fd, struct file *filp) return f.file == filp; } +/* Let the caller figure out whether a given file was just created. */ +static long f_created_query(const struct file *filp) +{ + return !!(filp->f_mode & FMODE_CREATED); +} + static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, struct file *filp) { @@ -352,6 +358,9 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, long err = -EINVAL; switch (cmd) { + case F_CREATED_QUERY: + err = f_created_query(filp); + break; case F_DUPFD: err = f_dupfd(argi, filp, 0); break; @@ -463,6 +472,7 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, static int check_fcntl_cmd(unsigned cmd) { switch (cmd) { + case F_CREATED_QUERY: case F_DUPFD: case F_DUPFD_CLOEXEC: case F_DUPFD_QUERY: diff --git a/fs/fhandle.c b/fs/fhandle.c index 6e8cea16790ef2..8cb665629f4a89 100644 --- a/fs/fhandle.c +++ b/fs/fhandle.c @@ -16,7 +16,8 @@ static long do_sys_name_to_handle(const struct path *path, struct file_handle __user *ufh, - int __user *mnt_id, int fh_flags) + void __user *mnt_id, bool unique_mntid, + int fh_flags) { long retval; struct file_handle f_handle; @@ -69,9 +70,19 @@ static long do_sys_name_to_handle(const struct path *path, } else retval = 0; /* copy the mount id */ - if (put_user(real_mount(path->mnt)->mnt_id, mnt_id) || - copy_to_user(ufh, handle, - struct_size(handle, f_handle, handle_bytes))) + if (unique_mntid) { + if (put_user(real_mount(path->mnt)->mnt_id_unique, + (u64 __user *) mnt_id)) + retval = -EFAULT; + } else { + if (put_user(real_mount(path->mnt)->mnt_id, + (int __user *) mnt_id)) + retval = -EFAULT; + } + /* copy the handle */ + if (retval != -EFAULT && + copy_to_user(ufh, handle, + struct_size(handle, f_handle, handle_bytes))) retval = -EFAULT; kfree(handle); return retval; @@ -83,6 +94,7 @@ static long do_sys_name_to_handle(const struct path *path, * @name: name that should be converted to handle. * @handle: resulting file handle * @mnt_id: mount id of the file system containing the file + * (u64 if AT_HANDLE_MNT_ID_UNIQUE, otherwise int) * @flag: flag value to indicate whether to follow symlink or not * and whether a decodable file handle is required. * @@ -92,7 +104,7 @@ static long do_sys_name_to_handle(const struct path *path, * value required. */ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, - struct file_handle __user *, handle, int __user *, mnt_id, + struct file_handle __user *, handle, void __user *, mnt_id, int, flag) { struct path path; @@ -100,7 +112,8 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, int fh_flags; int err; - if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID)) + if (flag & ~(AT_SYMLINK_FOLLOW | AT_EMPTY_PATH | AT_HANDLE_FID | + AT_HANDLE_MNT_ID_UNIQUE)) return -EINVAL; lookup_flags = (flag & AT_SYMLINK_FOLLOW) ? LOOKUP_FOLLOW : 0; @@ -109,7 +122,9 @@ SYSCALL_DEFINE5(name_to_handle_at, int, dfd, const char __user *, name, lookup_flags |= LOOKUP_EMPTY; err = user_path_at(dfd, name, lookup_flags, &path); if (!err) { - err = do_sys_name_to_handle(&path, handle, mnt_id, fh_flags); + err = do_sys_name_to_handle(&path, handle, mnt_id, + flag & AT_HANDLE_MNT_ID_UNIQUE, + fh_flags); path_put(&path); } return err; diff --git a/fs/file.c b/fs/file.c index 655338effe9c72..976ecd4ce2c6c8 100644 --- a/fs/file.c +++ b/fs/file.c @@ -672,7 +672,7 @@ int close_fd(unsigned fd) return filp_close(file, files); } -EXPORT_SYMBOL(close_fd); /* for ksys_close() */ +EXPORT_SYMBOL(close_fd); /** * last_fd - return last valid index into fd table diff --git a/fs/file_table.c b/fs/file_table.c index ca7843dde56dd9..35f2d5d9ca7621 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -136,6 +136,7 @@ static int __init init_fs_stat_sysctls(void) register_sysctl_init("fs", fs_stat_sysctls); if (IS_ENABLED(CONFIG_BINFMT_MISC)) { struct ctl_table_header *hdr; + hdr = register_sysctl_mount_point("fs/binfmt_misc"); kmemleak_not_leak(hdr); } @@ -383,7 +384,9 @@ EXPORT_SYMBOL_GPL(alloc_file_pseudo_noaccount); struct file *alloc_file_clone(struct file *base, int flags, const struct file_operations *fops) { - struct file *f = alloc_file(&base->f_path, flags, fops); + struct file *f; + + f = alloc_file(&base->f_path, flags, fops); if (!IS_ERR(f)) { path_get(&f->f_path); f->f_mapping = base->f_mapping; diff --git a/fs/fs-writeback.c b/fs/fs-writeback.c index b865a3fa52f336..d8bec3c1bb1fa7 100644 --- a/fs/fs-writeback.c +++ b/fs/fs-writeback.c @@ -1132,6 +1132,7 @@ out_bdi_put: /** * cgroup_writeback_umount - flush inode wb switches for umount + * @sb: target super_block * * This function is called when a super_block is about to be destroyed and * flushes in-flight inode wb switches. An inode wb switch goes through @@ -1140,8 +1141,12 @@ out_bdi_put: * rare occurrences and synchronize_rcu() can take a while, perform * flushing iff wb switches are in flight. */ -void cgroup_writeback_umount(void) +void cgroup_writeback_umount(struct super_block *sb) { + + if (!(sb->s_bdi->capabilities & BDI_CAP_WRITEBACK)) + return; + /* * SB_ACTIVE should be reliably cleared before checking * isw_nr_in_flight, see generic_shutdown_super(). @@ -1381,12 +1386,13 @@ static void requeue_io(struct inode *inode, struct bdi_writeback *wb) static void inode_sync_complete(struct inode *inode) { + assert_spin_locked(&inode->i_lock); + inode->i_state &= ~I_SYNC; /* If inode is clean an unused, put it into LRU now... */ inode_add_lru(inode); - /* Waiters must see I_SYNC cleared before being woken up */ - smp_mb(); - wake_up_bit(&inode->i_state, __I_SYNC); + /* Called with inode->i_lock which ensures memory ordering. */ + inode_wake_up_bit(inode, __I_SYNC); } static bool inode_dirtied_after(struct inode *inode, unsigned long t) @@ -1505,30 +1511,27 @@ static int write_inode(struct inode *inode, struct writeback_control *wbc) * Wait for writeback on an inode to complete. Called with i_lock held. * Caller must make sure inode cannot go away when we drop i_lock. */ -static void __inode_wait_for_writeback(struct inode *inode) - __releases(inode->i_lock) - __acquires(inode->i_lock) +void inode_wait_for_writeback(struct inode *inode) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_SYNC); - wait_queue_head_t *wqh; + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; + + assert_spin_locked(&inode->i_lock); - wqh = bit_waitqueue(&inode->i_state, __I_SYNC); - while (inode->i_state & I_SYNC) { + if (!(inode->i_state & I_SYNC)) + return; + + wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC); + for (;;) { + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); + /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ + if (!(inode->i_state & I_SYNC)) + break; spin_unlock(&inode->i_lock); - __wait_on_bit(wqh, &wq, bit_wait, - TASK_UNINTERRUPTIBLE); + schedule(); spin_lock(&inode->i_lock); } -} - -/* - * Wait for writeback on an inode to complete. Caller must have inode pinned. - */ -void inode_wait_for_writeback(struct inode *inode) -{ - spin_lock(&inode->i_lock); - __inode_wait_for_writeback(inode); - spin_unlock(&inode->i_lock); + finish_wait(wq_head, &wqe.wq_entry); } /* @@ -1539,16 +1542,20 @@ void inode_wait_for_writeback(struct inode *inode) static void inode_sleep_on_writeback(struct inode *inode) __releases(inode->i_lock) { - DEFINE_WAIT(wait); - wait_queue_head_t *wqh = bit_waitqueue(&inode->i_state, __I_SYNC); - int sleep; + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; + bool sleep; + + assert_spin_locked(&inode->i_lock); - prepare_to_wait(wqh, &wait, TASK_UNINTERRUPTIBLE); - sleep = inode->i_state & I_SYNC; + wq_head = inode_bit_waitqueue(&wqe, inode, __I_SYNC); + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); + /* Checking I_SYNC with inode->i_lock guarantees memory ordering. */ + sleep = !!(inode->i_state & I_SYNC); spin_unlock(&inode->i_lock); if (sleep) schedule(); - finish_wait(wqh, &wait); + finish_wait(wq_head, &wqe.wq_entry); } /* @@ -1752,7 +1759,7 @@ static int writeback_single_inode(struct inode *inode, */ if (wbc->sync_mode != WB_SYNC_ALL) goto out; - __inode_wait_for_writeback(inode); + inode_wait_for_writeback(inode); } WARN_ON(inode->i_state & I_SYNC); /* diff --git a/fs/inode.c b/fs/inode.c index 10c4619faeef8c..af78f515403f3d 100644 --- a/fs/inode.c +++ b/fs/inode.c @@ -472,6 +472,17 @@ static void __inode_add_lru(struct inode *inode, bool rotate) inode->i_state |= I_REFERENCED; } +struct wait_queue_head *inode_bit_waitqueue(struct wait_bit_queue_entry *wqe, + struct inode *inode, u32 bit) +{ + void *bit_address; + + bit_address = inode_state_wait_address(inode, bit); + init_wait_var_entry(wqe, bit_address, 0); + return __var_waitqueue(bit_address); +} +EXPORT_SYMBOL(inode_bit_waitqueue); + /* * Add inode to LRU if needed (inode is unused and clean). * @@ -500,25 +511,35 @@ static void inode_unpin_lru_isolating(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_LRU_ISOLATING)); inode->i_state &= ~I_LRU_ISOLATING; - smp_mb(); - wake_up_bit(&inode->i_state, __I_LRU_ISOLATING); + /* Called with inode->i_lock which ensures memory ordering. */ + inode_wake_up_bit(inode, __I_LRU_ISOLATING); spin_unlock(&inode->i_lock); } static void inode_wait_for_lru_isolating(struct inode *inode) { - spin_lock(&inode->i_lock); - if (inode->i_state & I_LRU_ISOLATING) { - DEFINE_WAIT_BIT(wq, &inode->i_state, __I_LRU_ISOLATING); - wait_queue_head_t *wqh; + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; + + lockdep_assert_held(&inode->i_lock); + if (!(inode->i_state & I_LRU_ISOLATING)) + return; - wqh = bit_waitqueue(&inode->i_state, __I_LRU_ISOLATING); + wq_head = inode_bit_waitqueue(&wqe, inode, __I_LRU_ISOLATING); + for (;;) { + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); + /* + * Checking I_LRU_ISOLATING with inode->i_lock guarantees + * memory ordering. + */ + if (!(inode->i_state & I_LRU_ISOLATING)) + break; spin_unlock(&inode->i_lock); - __wait_on_bit(wqh, &wq, bit_wait, TASK_UNINTERRUPTIBLE); + schedule(); spin_lock(&inode->i_lock); - WARN_ON(inode->i_state & I_LRU_ISOLATING); } - spin_unlock(&inode->i_lock); + finish_wait(wq_head, &wqe.wq_entry); + WARN_ON(inode->i_state & I_LRU_ISOLATING); } /** @@ -595,6 +616,7 @@ void dump_mapping(const struct address_space *mapping) struct hlist_node *dentry_first; struct dentry *dentry_ptr; struct dentry dentry; + char fname[64] = {}; unsigned long ino; /* @@ -631,11 +653,14 @@ void dump_mapping(const struct address_space *mapping) return; } + if (strncpy_from_kernel_nofault(fname, dentry.d_name.name, 63) < 0) + strscpy(fname, "<invalid>"); /* - * if dentry is corrupted, the %pd handler may still crash, - * but it's unlikely that we reach here with a corrupt mapping + * Even if strncpy_from_kernel_nofault() succeeded, + * the fname could be unreliable */ - pr_warn("aops:%ps ino:%lx dentry name:\"%pd\"\n", a_ops, ino, &dentry); + pr_warn("aops:%ps ino:%lx dentry name(?):\"%s\"\n", + a_ops, ino, fname); } void clear_inode(struct inode *inode) @@ -690,6 +715,7 @@ static void evict(struct inode *inode) inode_sb_list_del(inode); + spin_lock(&inode->i_lock); inode_wait_for_lru_isolating(inode); /* @@ -699,6 +725,7 @@ static void evict(struct inode *inode) * the inode. We just have to wait for running writeback to finish. */ inode_wait_for_writeback(inode); + spin_unlock(&inode->i_lock); if (op->evict_inode) { op->evict_inode(inode); @@ -722,7 +749,13 @@ static void evict(struct inode *inode) * used as an indicator whether blocking on it is safe. */ spin_lock(&inode->i_lock); - wake_up_bit(&inode->i_state, __I_NEW); + /* + * Pairs with the barrier in prepare_to_wait_event() to make sure + * ___wait_var_event() either sees the bit cleared or + * waitqueue_active() check in wake_up_var() sees the waiter. + */ + smp_mb(); + inode_wake_up_bit(inode, __I_NEW); BUG_ON(inode->i_state != (I_FREEING | I_CLEAR)); spin_unlock(&inode->i_lock); @@ -770,6 +803,10 @@ again: continue; spin_lock(&inode->i_lock); + if (atomic_read(&inode->i_count)) { + spin_unlock(&inode->i_lock); + continue; + } if (inode->i_state & (I_NEW | I_FREEING | I_WILL_FREE)) { spin_unlock(&inode->i_lock); continue; @@ -1130,8 +1167,13 @@ void unlock_new_inode(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW & ~I_CREATING; + /* + * Pairs with the barrier in prepare_to_wait_event() to make sure + * ___wait_var_event() either sees the bit cleared or + * waitqueue_active() check in wake_up_var() sees the waiter. + */ smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); + inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); } EXPORT_SYMBOL(unlock_new_inode); @@ -1142,8 +1184,13 @@ void discard_new_inode(struct inode *inode) spin_lock(&inode->i_lock); WARN_ON(!(inode->i_state & I_NEW)); inode->i_state &= ~I_NEW; + /* + * Pairs with the barrier in prepare_to_wait_event() to make sure + * ___wait_var_event() either sees the bit cleared or + * waitqueue_active() check in wake_up_var() sees the waiter. + */ smp_mb(); - wake_up_bit(&inode->i_state, __I_NEW); + inode_wake_up_bit(inode, __I_NEW); spin_unlock(&inode->i_lock); iput(inode); } @@ -1570,9 +1617,7 @@ struct inode *ilookup(struct super_block *sb, unsigned long ino) struct hlist_head *head = inode_hashtable + hash(sb, ino); struct inode *inode; again: - spin_lock(&inode_hash_lock); - inode = find_inode_fast(sb, head, ino, true); - spin_unlock(&inode_hash_lock); + inode = find_inode_fast(sb, head, ino, false); if (inode) { if (IS_ERR(inode)) @@ -2334,8 +2379,8 @@ EXPORT_SYMBOL(inode_needs_sync); */ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_locked) { - wait_queue_head_t *wq; - DEFINE_WAIT_BIT(wait, &inode->i_state, __I_NEW); + struct wait_bit_queue_entry wqe; + struct wait_queue_head *wq_head; /* * Handle racing against evict(), see that routine for more details. @@ -2346,14 +2391,14 @@ static void __wait_on_freeing_inode(struct inode *inode, bool is_inode_hash_lock return; } - wq = bit_waitqueue(&inode->i_state, __I_NEW); - prepare_to_wait(wq, &wait.wq_entry, TASK_UNINTERRUPTIBLE); + wq_head = inode_bit_waitqueue(&wqe, inode, __I_NEW); + prepare_to_wait_event(wq_head, &wqe.wq_entry, TASK_UNINTERRUPTIBLE); spin_unlock(&inode->i_lock); rcu_read_unlock(); if (is_inode_hash_locked) spin_unlock(&inode_hash_lock); schedule(); - finish_wait(wq, &wait.wq_entry); + finish_wait(wq_head, &wqe.wq_entry); if (is_inode_hash_locked) spin_lock(&inode_hash_lock); rcu_read_lock(); @@ -2502,18 +2547,11 @@ EXPORT_SYMBOL(inode_owner_or_capable); /* * Direct i/o helper functions */ -static void __inode_dio_wait(struct inode *inode) +bool inode_dio_finished(const struct inode *inode) { - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); - - do { - prepare_to_wait(wq, &q.wq_entry, TASK_UNINTERRUPTIBLE); - if (atomic_read(&inode->i_dio_count)) - schedule(); - } while (atomic_read(&inode->i_dio_count)); - finish_wait(wq, &q.wq_entry); + return atomic_read(&inode->i_dio_count) == 0; } +EXPORT_SYMBOL(inode_dio_finished); /** * inode_dio_wait - wait for outstanding DIO requests to finish @@ -2527,11 +2565,17 @@ static void __inode_dio_wait(struct inode *inode) */ void inode_dio_wait(struct inode *inode) { - if (atomic_read(&inode->i_dio_count)) - __inode_dio_wait(inode); + wait_var_event(&inode->i_dio_count, inode_dio_finished(inode)); } EXPORT_SYMBOL(inode_dio_wait); +void inode_dio_wait_interruptible(struct inode *inode) +{ + wait_var_event_interruptible(&inode->i_dio_count, + inode_dio_finished(inode)); +} +EXPORT_SYMBOL(inode_dio_wait_interruptible); + /* * inode_set_flags - atomically set some inode flags * diff --git a/fs/libfs.c b/fs/libfs.c index b64b4c44cfea87..0e1b999238027e 100644 --- a/fs/libfs.c +++ b/fs/libfs.c @@ -2003,13 +2003,19 @@ bool inode_maybe_inc_iversion(struct inode *inode, bool force) * information, but the legacy inode_inc_iversion code used a spinlock * to serialize increments. * - * Here, we add full memory barriers to ensure that any de-facto - * ordering with other info is preserved. + * We add a full memory barrier to ensure that any de facto ordering + * with other state is preserved (either implicitly coming from cmpxchg + * or explicitly from smp_mb if we don't know upfront if we will execute + * the former). * - * This barrier pairs with the barrier in inode_query_iversion() + * These barriers pair with inode_query_iversion(). */ - smp_mb(); cur = inode_peek_iversion_raw(inode); + if (!force && !(cur & I_VERSION_QUERIED)) { + smp_mb(); + cur = inode_peek_iversion_raw(inode); + } + do { /* If flag is clear then we needn't do anything */ if (!force && !(cur & I_VERSION_QUERIED)) @@ -2038,20 +2044,22 @@ EXPORT_SYMBOL(inode_maybe_inc_iversion); u64 inode_query_iversion(struct inode *inode) { u64 cur, new; + bool fenced = false; + /* + * Memory barriers (implicit in cmpxchg, explicit in smp_mb) pair with + * inode_maybe_inc_iversion(), see that routine for more details. + */ cur = inode_peek_iversion_raw(inode); do { /* If flag is already set, then no need to swap */ if (cur & I_VERSION_QUERIED) { - /* - * This barrier (and the implicit barrier in the - * cmpxchg below) pairs with the barrier in - * inode_maybe_inc_iversion(). - */ - smp_mb(); + if (!fenced) + smp_mb(); break; } + fenced = true; new = cur | I_VERSION_QUERIED; } while (!atomic64_try_cmpxchg(&inode->i_version, &cur, new)); return cur >> I_VERSION_QUERIED_SHIFT; diff --git a/fs/mnt_idmapping.c b/fs/mnt_idmapping.c index 3c60f1eaca615a..79491663dbc0cf 100644 --- a/fs/mnt_idmapping.c +++ b/fs/mnt_idmapping.c @@ -228,15 +228,15 @@ static int copy_mnt_idmap(struct uid_gid_map *map_from, return 0; } - forward = kmemdup(map_from->forward, - nr_extents * sizeof(struct uid_gid_extent), - GFP_KERNEL_ACCOUNT); + forward = kmemdup_array(map_from->forward, nr_extents, + sizeof(struct uid_gid_extent), + GFP_KERNEL_ACCOUNT); if (!forward) return -ENOMEM; - reverse = kmemdup(map_from->reverse, - nr_extents * sizeof(struct uid_gid_extent), - GFP_KERNEL_ACCOUNT); + reverse = kmemdup_array(map_from->reverse, nr_extents, + sizeof(struct uid_gid_extent), + GFP_KERNEL_ACCOUNT); if (!reverse) { kfree(forward); return -ENOMEM; diff --git a/fs/mount.h b/fs/mount.h index ad4b1ddebb5433..0a78f85cf73703 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -153,5 +153,4 @@ static inline void move_from_ns(struct mount *mnt, struct list_head *dt_list) list_add_tail(&mnt->mnt_list, dt_list); } -extern void mnt_cursor_del(struct mnt_namespace *ns, struct mount *cursor); bool has_locked_children(struct mount *mnt, struct dentry *dentry); diff --git a/fs/namei.c b/fs/namei.c index 5512cb10fa8971..2699601bf8e9e7 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -1639,6 +1639,20 @@ struct dentry *lookup_one_qstr_excl(const struct qstr *name, } EXPORT_SYMBOL(lookup_one_qstr_excl); +/** + * lookup_fast - do fast lockless (but racy) lookup of a dentry + * @nd: current nameidata + * + * Do a fast, but racy lookup in the dcache for the given dentry, and + * revalidate it. Returns a valid dentry pointer or NULL if one wasn't + * found. On error, an ERR_PTR will be returned. + * + * If this function returns a valid dentry and the walk is no longer + * lazy, the dentry will carry a reference that must later be put. If + * RCU mode is still in force, then this is not the case and the dentry + * must be legitimized before use. If this returns NULL, then the walk + * will no longer be in RCU mode. + */ static struct dentry *lookup_fast(struct nameidata *nd) { struct dentry *dentry, *parent = nd->path.dentry; @@ -3521,6 +3535,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file, return dentry; } + if (open_flag & O_CREAT) + audit_inode(nd->name, dir, AUDIT_INODE_PARENT); + /* * Checking write permission is tricky, bacuse we don't know if we are * going to actually need it: O_CREAT opens should work as long as the @@ -3591,6 +3608,42 @@ out_dput: return ERR_PTR(error); } +static inline bool trailing_slashes(struct nameidata *nd) +{ + return (bool)nd->last.name[nd->last.len]; +} + +static struct dentry *lookup_fast_for_open(struct nameidata *nd, int open_flag) +{ + struct dentry *dentry; + + if (open_flag & O_CREAT) { + if (trailing_slashes(nd)) + return ERR_PTR(-EISDIR); + + /* Don't bother on an O_EXCL create */ + if (open_flag & O_EXCL) + return NULL; + } + + if (trailing_slashes(nd)) + nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; + + dentry = lookup_fast(nd); + if (IS_ERR_OR_NULL(dentry)) + return dentry; + + if (open_flag & O_CREAT) { + /* Discard negative dentries. Need inode_lock to do the create */ + if (!dentry->d_inode) { + if (!(nd->flags & LOOKUP_RCU)) + dput(dentry); + dentry = NULL; + } + } + return dentry; +} + static const char *open_last_lookups(struct nameidata *nd, struct file *file, const struct open_flags *op) { @@ -3608,28 +3661,22 @@ static const char *open_last_lookups(struct nameidata *nd, return handle_dots(nd, nd->last_type); } - if (!(open_flag & O_CREAT)) { - if (nd->last.name[nd->last.len]) - nd->flags |= LOOKUP_FOLLOW | LOOKUP_DIRECTORY; - /* we _can_ be in RCU mode here */ - dentry = lookup_fast(nd); - if (IS_ERR(dentry)) - return ERR_CAST(dentry); - if (likely(dentry)) - goto finish_lookup; + /* We _can_ be in RCU mode here */ + dentry = lookup_fast_for_open(nd, open_flag); + if (IS_ERR(dentry)) + return ERR_CAST(dentry); + + if (likely(dentry)) + goto finish_lookup; + if (!(open_flag & O_CREAT)) { if (WARN_ON_ONCE(nd->flags & LOOKUP_RCU)) return ERR_PTR(-ECHILD); } else { - /* create side of things */ if (nd->flags & LOOKUP_RCU) { if (!try_to_unlazy(nd)) return ERR_PTR(-ECHILD); } - audit_inode(nd->name, dir, AUDIT_INODE_PARENT); - /* trailing slashes? */ - if (unlikely(nd->last.name[nd->last.len])) - return ERR_PTR(-EISDIR); } if (open_flag & (O_CREAT | O_TRUNC | O_WRONLY | O_RDWR)) { diff --git a/fs/namespace.c b/fs/namespace.c index 328087a4df8a6e..5f2dddee007458 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -1774,7 +1774,7 @@ static void umount_tree(struct mount *mnt, enum umount_tree_flags how) list_del_init(&p->mnt_child); } - /* Add propogated mounts to the tmp_list */ + /* Add propagated mounts to the tmp_list */ if (how & UMOUNT_PROPAGATE) propagate_umount(&tmp_list); @@ -2921,8 +2921,15 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * if (!__mnt_is_readonly(mnt) && (!(sb->s_iflags & SB_I_TS_EXPIRY_WARNED)) && (ktime_get_real_seconds() + TIME_UPTIME_SEC_MAX > sb->s_time_max)) { - char *buf = (char *)__get_free_page(GFP_KERNEL); - char *mntpath = buf ? d_path(mountpoint, buf, PAGE_SIZE) : ERR_PTR(-ENOMEM); + char *buf, *mntpath; + + buf = (char *)__get_free_page(GFP_KERNEL); + if (buf) + mntpath = d_path(mountpoint, buf, PAGE_SIZE); + else + mntpath = ERR_PTR(-ENOMEM); + if (IS_ERR(mntpath)) + mntpath = "(unknown)"; pr_warn("%s filesystem being %s at %s supports timestamps until %ptTd (0x%llx)\n", sb->s_type->name, @@ -2930,8 +2937,9 @@ static void mnt_warn_timestamp_expiry(struct path *mountpoint, struct vfsmount * mntpath, &sb->s_time_max, (unsigned long long)sb->s_time_max); - free_page((unsigned long)buf); sb->s_iflags |= SB_I_TS_EXPIRY_WARNED; + if (buf) + free_page((unsigned long)buf); } } @@ -5605,7 +5613,7 @@ static bool mnt_already_visible(struct mnt_namespace *ns, /* Only worry about locked mounts */ if (!(child->mnt.mnt_flags & MNT_LOCKED)) continue; - /* Is the directory permanetly empty? */ + /* Is the directory permanently empty? */ if (!is_empty_dir_inode(inode)) goto next; } diff --git a/fs/netfs/locking.c b/fs/netfs/locking.c index 75dc52a49b3a46..21eab56ee2f94f 100644 --- a/fs/netfs/locking.c +++ b/fs/netfs/locking.c @@ -19,25 +19,13 @@ * Must be called under a lock that serializes taking new references * to i_dio_count, usually by inode->i_mutex. */ -static int inode_dio_wait_interruptible(struct inode *inode) +static int netfs_inode_dio_wait_interruptible(struct inode *inode) { - if (!atomic_read(&inode->i_dio_count)) + if (inode_dio_finished(inode)) return 0; - wait_queue_head_t *wq = bit_waitqueue(&inode->i_state, __I_DIO_WAKEUP); - DEFINE_WAIT_BIT(q, &inode->i_state, __I_DIO_WAKEUP); - - for (;;) { - prepare_to_wait(wq, &q.wq_entry, TASK_INTERRUPTIBLE); - if (!atomic_read(&inode->i_dio_count)) - break; - if (signal_pending(current)) - break; - schedule(); - } - finish_wait(wq, &q.wq_entry); - - return atomic_read(&inode->i_dio_count) ? -ERESTARTSYS : 0; + inode_dio_wait_interruptible(inode); + return !inode_dio_finished(inode) ? -ERESTARTSYS : 0; } /* Call with exclusively locked inode->i_rwsem */ @@ -46,7 +34,7 @@ static int netfs_block_o_direct(struct netfs_inode *ictx) if (!test_bit(NETFS_ICTX_ODIRECT, &ictx->flags)) return 0; clear_bit(NETFS_ICTX_ODIRECT, &ictx->flags); - return inode_dio_wait_interruptible(&ictx->inode); + return netfs_inode_dio_wait_interruptible(&ictx->inode); } /** diff --git a/fs/netfs/main.c b/fs/netfs/main.c index 5f0f438e5d211d..9d6b49dc66945f 100644 --- a/fs/netfs/main.c +++ b/fs/netfs/main.c @@ -142,7 +142,7 @@ static int __init netfs_init(void) error_fscache: error_procfile: - remove_proc_entry("fs/netfs", NULL); + remove_proc_subtree("fs/netfs", NULL); error_proc: mempool_exit(&netfs_subrequest_pool); error_subreqpool: @@ -159,7 +159,7 @@ fs_initcall(netfs_init); static void __exit netfs_exit(void) { fscache_exit(); - remove_proc_entry("fs/netfs", NULL); + remove_proc_subtree("fs/netfs", NULL); mempool_exit(&netfs_subrequest_pool); kmem_cache_destroy(netfs_subrequest_slab); mempool_exit(&netfs_request_pool); diff --git a/fs/pipe.c b/fs/pipe.c index 7dff2aa50a6ddb..9a6dfe39f01270 100644 --- a/fs/pipe.c +++ b/fs/pipe.c @@ -1427,7 +1427,7 @@ static const struct super_operations pipefs_ops = { /* * pipefs should _never_ be mounted by userland - too much of security hassle, - * no real gain from having the whole whorehouse mounted. So we don't need + * no real gain from having the whole file system mounted. So we don't need * any operations on the root directory. However, we need a non-trivial * d_name - pipe: will go nicely and kill the special-casing in procfs. */ diff --git a/fs/posix_acl.c b/fs/posix_acl.c index 3f87297dbfdb70..6c66a37522d001 100644 --- a/fs/posix_acl.c +++ b/fs/posix_acl.c @@ -715,8 +715,8 @@ int posix_acl_update_mode(struct mnt_idmap *idmap, return error; if (error == 0) *acl = NULL; - if (!vfsgid_in_group_p(i_gid_into_vfsgid(idmap, inode)) && - !capable_wrt_inode_uidgid(idmap, inode, CAP_FSETID)) + if (!in_group_or_capable(idmap, inode, + i_gid_into_vfsgid(idmap, inode))) mode &= ~S_ISGID; *mode_p = mode; return 0; diff --git a/fs/proc/base.c b/fs/proc/base.c index 72a1acd03675cc..1409d1003101cf 100644 --- a/fs/proc/base.c +++ b/fs/proc/base.c @@ -827,12 +827,9 @@ static int __mem_open(struct inode *inode, struct file *file, unsigned int mode) static int mem_open(struct inode *inode, struct file *file) { - int ret = __mem_open(inode, file, PTRACE_MODE_ATTACH); - - /* OK to pass negative loff_t, we can catch out-of-range */ - file->f_mode |= FMODE_UNSIGNED_OFFSET; - - return ret; + if (WARN_ON_ONCE(!(file->f_op->fop_flags & FOP_UNSIGNED_OFFSET))) + return -EINVAL; + return __mem_open(inode, file, PTRACE_MODE_ATTACH); } static ssize_t mem_rw(struct file *file, char __user *buf, @@ -932,6 +929,7 @@ static const struct file_operations proc_mem_operations = { .write = mem_write, .open = mem_open, .release = mem_release, + .fop_flags = FOP_UNSIGNED_OFFSET, }; static int environ_open(struct inode *inode, struct file *file) diff --git a/fs/proc/fd.c b/fs/proc/fd.c index 586bbc84ca0430..7baafb1eba1306 100644 --- a/fs/proc/fd.c +++ b/fs/proc/fd.c @@ -59,7 +59,7 @@ static int seq_show(struct seq_file *m, void *v) real_mount(file->f_path.mnt)->mnt_id, file_inode(file)->i_ino); - /* show_fd_locks() never deferences files so a stale value is safe */ + /* show_fd_locks() never dereferences files, so a stale value is safe */ show_fd_locks(m, file, files); if (seq_has_overflowed(m)) goto out; diff --git a/fs/proc/kcore.c b/fs/proc/kcore.c index 8e08a9a1b7ed57..7d0acdad74e2ff 100644 --- a/fs/proc/kcore.c +++ b/fs/proc/kcore.c @@ -235,7 +235,7 @@ static int kcore_ram_list(struct list_head *list) int nid, ret; unsigned long end_pfn; - /* Not inialized....update now */ + /* Not initialized....update now */ /* find out "max pfn" */ end_pfn = 0; for_each_node_state(nid, N_MEMORY) { diff --git a/fs/read_write.c b/fs/read_write.c index 90e283b31ca181..89d4af0e3b93d3 100644 --- a/fs/read_write.c +++ b/fs/read_write.c @@ -36,7 +36,7 @@ EXPORT_SYMBOL(generic_ro_fops); static inline bool unsigned_offsets(struct file *file) { - return file->f_mode & FMODE_UNSIGNED_OFFSET; + return file->f_op->fop_flags & FOP_UNSIGNED_OFFSET; } /** diff --git a/fs/select.c b/fs/select.c index 9515c3fa1a03e8..1a4849e2afb975 100644 --- a/fs/select.c +++ b/fs/select.c @@ -840,7 +840,7 @@ SYSCALL_DEFINE1(old_select, struct sel_arg_struct __user *, arg) struct poll_list { struct poll_list *next; unsigned int len; - struct pollfd entries[]; + struct pollfd entries[] __counted_by(len); }; #define POLLFD_PER_PAGE ((PAGE_SIZE-sizeof(struct poll_list)) / sizeof(struct pollfd)) diff --git a/fs/super.c b/fs/super.c index b7913b55debc1f..1db230432960dc 100644 --- a/fs/super.c +++ b/fs/super.c @@ -621,7 +621,7 @@ void generic_shutdown_super(struct super_block *sb) sync_filesystem(sb); sb->s_flags &= ~SB_ACTIVE; - cgroup_writeback_umount(); + cgroup_writeback_umount(sb); /* Evict all inodes with zero refcount. */ evict_inodes(sb); @@ -1905,7 +1905,7 @@ static void lockdep_sb_freeze_release(struct super_block *sb) int level; for (level = SB_FREEZE_LEVELS - 1; level >= 0; level--) - percpu_rwsem_release(sb->s_writers.rw_sem + level, 0, _THIS_IP_); + percpu_rwsem_release(sb->s_writers.rw_sem + level, _THIS_IP_); } /* |
