aboutsummaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorChristian Brauner <brauner@kernel.org>2025-12-05 10:02:18 +0100
committerChristian Brauner <brauner@kernel.org>2025-12-15 14:12:45 +0100
commit1c921baf4212f68fc4eecc4de4ceffc7fb965265 (patch)
treedb8583a5c781c323b0e81a2698416774d067b4fc
parent8f0b4cce4481fb22653697cced8d0d04027cb1e8 (diff)
parent64a989dbd144e0622371396461b11335459692d2 (diff)
downloadvfs-7.0.atomic_open.tar.gz
Merge patch series "Allow knfsd to use atomic_open()"vfs-7.0.atomic_open
Benjamin Coddington <bcodding@hammerspace.com> says: We have workloads that will benefit from allowing knfsd to use atomic_open() in the open/create path. There are two benefits; the first is the original matter of correctness: when knfsd must perform both vfs_create() and vfs_open() in series there can be races or error results that cause the caller to receive unexpected results. The second benefit is that for some network filesystems, we can reduce the number of remote round-trip operations by using a single atomic_open() path which provides a performance benefit. I've implemented this with the simplest possible change - by modifying dentry_create() which has a single user: knfsd. The changes cause us to insert ourselves part-way into the previously closed/static atomic_open() path, so I expect VFS folks to have some good ideas about potentially superior approaches. Previous work on commit fb70bf124b05 ("NFSD: Instantiate a struct file when creating a regular NFSv4 file") addressed most of the atomicity issues, but there are still a few gaps on network filesystems. The problem was noticed on a test that did open O_CREAT with mode 0 which will succeed in creating the file but will return -EACCES from vfs_open() - this specific test is mentioned in 3/3 description. Also, Trond notes that independently of the permissions issues, atomic_open also solves races in open(O_CREAT|O_TRUNC). The NFS client now uses it for both NFSv4 and NFSv3 for that reason. See commit 7c6c5249f061 "NFS: add atomic_open for NFSv3 to handle O_TRUNC correctly." * patches from https://patch.msgid.link/cover.1764259052.git.bcodding@hammerspace.com: VFS/knfsd: Teach dentry_create() to use atomic_open() VFS: Prepare atomic_open() for dentry_create() VFS: move dentry_create() from fs/open.c to fs/namei.c Link: https://patch.msgid.link/cover.1764259052.git.bcodding@hammerspace.com Signed-off-by: Christian Brauner <brauner@kernel.org>
-rw-r--r--fs/namei.c80
-rw-r--r--fs/nfsd/nfs4proc.c11
-rw-r--r--fs/open.c39
-rw-r--r--include/linux/fs.h2
4 files changed, 82 insertions, 50 deletions
diff --git a/fs/namei.c b/fs/namei.c
index bf0f66f0e9b92c..aefb21bc0944e3 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -4279,19 +4279,16 @@ static int may_o_create(struct mnt_idmap *idmap,
*
* Returns an error code otherwise.
*/
-static struct dentry *atomic_open(struct nameidata *nd, struct dentry *dentry,
+static struct dentry *atomic_open(const struct path *path, struct dentry *dentry,
struct file *file,
int open_flag, umode_t mode)
{
struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
- struct inode *dir = nd->path.dentry->d_inode;
+ struct inode *dir = path->dentry->d_inode;
int error;
- if (nd->flags & LOOKUP_DIRECTORY)
- open_flag |= O_DIRECTORY;
-
file->__f_path.dentry = DENTRY_NOT_SET;
- file->__f_path.mnt = nd->path.mnt;
+ file->__f_path.mnt = path->mnt;
error = dir->i_op->atomic_open(dir, dentry, file,
open_to_namei_flags(open_flag), mode);
d_lookup_done(dentry);
@@ -4403,7 +4400,9 @@ static struct dentry *lookup_open(struct nameidata *nd, struct file *file,
if (create_error)
open_flag &= ~O_CREAT;
if (dir_inode->i_op->atomic_open) {
- dentry = atomic_open(nd, dentry, file, open_flag, mode);
+ if (nd->flags & LOOKUP_DIRECTORY)
+ open_flag |= O_DIRECTORY;
+ dentry = atomic_open(&nd->path, dentry, file, open_flag, mode);
if (unlikely(create_error) && dentry == ERR_PTR(-ENOENT))
dentry = ERR_PTR(create_error);
return dentry;
@@ -4937,6 +4936,73 @@ inline struct dentry *start_creating_user_path(
}
EXPORT_SYMBOL(start_creating_user_path);
+/**
+ * dentry_create - Create and open a file
+ * @path: path to create
+ * @flags: O_ flags
+ * @mode: mode bits for new file
+ * @cred: credentials to use
+ *
+ * Caller must hold the parent directory's lock, and have prepared
+ * a negative dentry, placed in @path->dentry, for the new file.
+ *
+ * Caller sets @path->mnt to the vfsmount of the filesystem where
+ * the new file is to be created. The parent directory and the
+ * negative dentry must reside on the same filesystem instance.
+ *
+ * On success, returns a "struct file *". Otherwise a ERR_PTR
+ * is returned.
+ */
+struct file *dentry_create(struct path *path, int flags, umode_t mode,
+ const struct cred *cred)
+{
+ struct file *file __free(fput) = NULL;
+ struct dentry *dentry = path->dentry;
+ struct dentry *dir = dentry->d_parent;
+ struct inode *dir_inode = d_inode(dir);
+ struct mnt_idmap *idmap;
+ int error, create_error;
+
+ file = alloc_empty_file(flags, cred);
+ if (IS_ERR(file))
+ return file;
+
+ idmap = mnt_idmap(path->mnt);
+
+ if (dir_inode->i_op->atomic_open) {
+ path->dentry = dir;
+ mode = vfs_prepare_mode(idmap, dir_inode, mode, S_IALLUGO, S_IFREG);
+
+ create_error = may_o_create(idmap, path, dentry, mode);
+ if (create_error)
+ flags &= ~O_CREAT;
+
+ dentry = atomic_open(path, dentry, file, flags, mode);
+ error = PTR_ERR_OR_ZERO(dentry);
+
+ if (unlikely(create_error) && error == -ENOENT)
+ error = create_error;
+
+ if (!error) {
+ if (file->f_mode & FMODE_CREATED)
+ fsnotify_create(dir->d_inode, dentry);
+ if (file->f_mode & FMODE_OPENED)
+ fsnotify_open(file);
+ }
+
+ path->dentry = dentry;
+
+ } else {
+ error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL);
+ if (!error)
+ error = vfs_open(path, file);
+ }
+ if (unlikely(error))
+ return ERR_PTR(error);
+
+ return no_free_ptr(file);
+}
+EXPORT_SYMBOL(dentry_create);
/**
* vfs_mknod - create device node or file
diff --git a/fs/nfsd/nfs4proc.c b/fs/nfsd/nfs4proc.c
index b748009175837c..6aa22b3b2f4394 100644
--- a/fs/nfsd/nfs4proc.c
+++ b/fs/nfsd/nfs4proc.c
@@ -194,7 +194,7 @@ static inline bool nfsd4_create_is_exclusive(int createmode)
}
static __be32
-nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
+nfsd4_vfs_create(struct svc_fh *fhp, struct dentry **child,
struct nfsd4_open *open)
{
struct file *filp;
@@ -202,6 +202,9 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
int oflags;
oflags = O_CREAT | O_LARGEFILE;
+ if (nfsd4_create_is_exclusive(open->op_createmode))
+ oflags |= O_EXCL;
+
switch (open->op_share_access & NFS4_SHARE_ACCESS_BOTH) {
case NFS4_SHARE_ACCESS_WRITE:
oflags |= O_WRONLY;
@@ -214,9 +217,11 @@ nfsd4_vfs_create(struct svc_fh *fhp, struct dentry *child,
}
path.mnt = fhp->fh_export->ex_path.mnt;
- path.dentry = child;
+ path.dentry = *child;
filp = dentry_create(&path, oflags, open->op_iattr.ia_mode,
current_cred());
+ *child = path.dentry;
+
if (IS_ERR(filp))
return nfserrno(PTR_ERR(filp));
@@ -350,7 +355,7 @@ nfsd4_create_file(struct svc_rqst *rqstp, struct svc_fh *fhp,
status = fh_fill_pre_attrs(fhp);
if (status != nfs_ok)
goto out;
- status = nfsd4_vfs_create(fhp, child, open);
+ status = nfsd4_vfs_create(fhp, &child, open);
if (status != nfs_ok)
goto out;
open->op_created = true;
diff --git a/fs/open.c b/fs/open.c
index f328622061c56c..74c4c1462b3e47 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -1142,45 +1142,6 @@ struct file *dentry_open_nonotify(const struct path *path, int flags,
}
/**
- * dentry_create - Create and open a file
- * @path: path to create
- * @flags: O_ flags
- * @mode: mode bits for new file
- * @cred: credentials to use
- *
- * Caller must hold the parent directory's lock, and have prepared
- * a negative dentry, placed in @path->dentry, for the new file.
- *
- * Caller sets @path->mnt to the vfsmount of the filesystem where
- * the new file is to be created. The parent directory and the
- * negative dentry must reside on the same filesystem instance.
- *
- * On success, returns a "struct file *". Otherwise a ERR_PTR
- * is returned.
- */
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
- const struct cred *cred)
-{
- struct file *f;
- int error;
-
- f = alloc_empty_file(flags, cred);
- if (IS_ERR(f))
- return f;
-
- error = vfs_create(mnt_idmap(path->mnt), path->dentry, mode, NULL);
- if (!error)
- error = vfs_open(path, f);
-
- if (unlikely(error)) {
- fput(f);
- return ERR_PTR(error);
- }
- return f;
-}
-EXPORT_SYMBOL(dentry_create);
-
-/**
* kernel_file_open - open a file for kernel internal use
* @path: path of the file to open
* @flags: open flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 04ceeca12a0d5c..1cb3385ee85202 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -2457,7 +2457,7 @@ struct file *dentry_open(const struct path *path, int flags,
const struct cred *creds);
struct file *dentry_open_nonotify(const struct path *path, int flags,
const struct cred *cred);
-struct file *dentry_create(const struct path *path, int flags, umode_t mode,
+struct file *dentry_create(struct path *path, int flags, umode_t mode,
const struct cred *cred);
const struct path *backing_file_user_path(const struct file *f);