From 0b1765830cf9c652f010e9f5df7d9fa435a2f930 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 3 Nov 2025 16:16:01 +0100 Subject: ns: use NS_COMMON_INIT() for all namespaces Now that we have a common initializer use it for all static namespaces. Signed-off-by: Christian Brauner --- fs/namespace.c | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index d82910f33dc483..7b78dd48b6c356 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5985,11 +5985,8 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, } struct mnt_namespace init_mnt_ns = { - .ns.inum = ns_init_inum(&init_mnt_ns), - .ns.ops = &mntns_operations, + .ns = NS_COMMON_INIT(init_mnt_ns, 1), .user_ns = &init_user_ns, - .ns.__ns_ref = REFCOUNT_INIT(1), - .ns.ns_type = ns_common_type(&init_mnt_ns), .passive = REFCOUNT_INIT(1), .mounts = RB_ROOT, .poll = __WAIT_QUEUE_HEAD_INITIALIZER(init_mnt_ns.poll), -- cgit 1.2.3-korg From 3760342fd6312416491d536144e39297fa5b1950 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 29 Oct 2025 13:20:28 +0100 Subject: nstree: assign fixed ids to the initial namespaces The initial set of namespace comes with fixed inode numbers making it easy for userspace to identify them solely based on that information. This has long preceeded anything here. Similarly, let's assign fixed namespace ids for the initial namespaces. Kill the cookie and use a sequentially increasing number. This has the nice side-effect that the owning user namespace will always have a namespace id that is smaller than any of it's descendant namespaces. Link: https://patch.msgid.link/20251029-work-namespace-nstree-listns-v4-15-2e6f823ebdc0@kernel.org Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- include/linux/ns_common.h | 13 ++++++++++++- include/linux/nstree.h | 15 +++++++++++---- include/uapi/linux/nsfs.h | 14 ++++++++++++++ kernel/nstree.c | 13 ++++++++----- net/core/net_namespace.c | 2 +- 6 files changed, 47 insertions(+), 12 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index 7b78dd48b6c356..eded33eeb6473e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4094,7 +4094,7 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a return ERR_PTR(ret); } if (!anon) - ns_tree_gen_id(&new_ns->ns); + ns_tree_gen_id(new_ns); refcount_set(&new_ns->passive, 1); new_ns->mounts = RB_ROOT; init_waitqueue_head(&new_ns->poll); diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 7a3c71b3a76f46..009a6dea724f1c 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -173,6 +173,17 @@ static __always_inline bool is_initial_namespace(struct ns_common *ns) struct user_namespace *: &init_user_ns, \ struct uts_namespace *: &init_uts_ns) +#define ns_init_id(__ns) \ + _Generic((__ns), \ + struct cgroup_namespace *: CGROUP_NS_INIT_ID, \ + struct ipc_namespace *: IPC_NS_INIT_ID, \ + struct mnt_namespace *: MNT_NS_INIT_ID, \ + struct net *: NET_NS_INIT_ID, \ + struct pid_namespace *: PID_NS_INIT_ID, \ + struct time_namespace *: TIME_NS_INIT_ID, \ + struct user_namespace *: USER_NS_INIT_ID, \ + struct uts_namespace *: UTS_NS_INIT_ID) + #define to_ns_operations(__ns) \ _Generic((__ns), \ struct cgroup_namespace *: (IS_ENABLED(CONFIG_CGROUPS) ? &cgroupns_operations : NULL), \ @@ -198,7 +209,7 @@ static __always_inline bool is_initial_namespace(struct ns_common *ns) #define NS_COMMON_INIT(nsname, refs) \ { \ .ns_type = ns_common_type(&nsname), \ - .ns_id = 0, \ + .ns_id = ns_init_id(&nsname), \ .inum = ns_init_inum(&nsname), \ .ops = to_ns_operations(&nsname), \ .stashed = NULL, \ diff --git a/include/linux/nstree.h b/include/linux/nstree.h index 43aa262c0ea154..38674c6fa4f787 100644 --- a/include/linux/nstree.h +++ b/include/linux/nstree.h @@ -9,6 +9,7 @@ #include #include #include +#include extern struct ns_tree cgroup_ns_tree; extern struct ns_tree ipc_ns_tree; @@ -30,7 +31,11 @@ extern struct ns_tree uts_ns_tree; struct user_namespace *: &(user_ns_tree), \ struct uts_namespace *: &(uts_ns_tree)) -u64 ns_tree_gen_id(struct ns_common *ns); +#define ns_tree_gen_id(__ns) \ + __ns_tree_gen_id(to_ns_common(__ns), \ + (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0)) + +u64 __ns_tree_gen_id(struct ns_common *ns, u64 id); void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree); void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree); struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type); @@ -38,9 +43,9 @@ struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, struct ns_tree *ns_tree, bool previous); -static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) +static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree, u64 id) { - ns_tree_gen_id(ns); + __ns_tree_gen_id(ns, id); __ns_tree_add_raw(ns, ns_tree); } @@ -60,7 +65,9 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree) * This function assigns a new id to the namespace and adds it to the * appropriate namespace tree and list. */ -#define ns_tree_add(__ns) __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns)) +#define ns_tree_add(__ns) \ + __ns_tree_add(to_ns_common(__ns), to_ns_tree(__ns), \ + (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0)) /** * ns_tree_remove - Remove a namespace from a namespace tree diff --git a/include/uapi/linux/nsfs.h b/include/uapi/linux/nsfs.h index e098759ec917ac..f8bc2aad74d69e 100644 --- a/include/uapi/linux/nsfs.h +++ b/include/uapi/linux/nsfs.h @@ -67,4 +67,18 @@ struct nsfs_file_handle { #define NSFS_FILE_HANDLE_SIZE_VER0 16 /* sizeof first published struct */ #define NSFS_FILE_HANDLE_SIZE_LATEST sizeof(struct nsfs_file_handle) /* sizeof latest published struct */ +enum init_ns_id { + IPC_NS_INIT_ID = 1ULL, + UTS_NS_INIT_ID = 2ULL, + USER_NS_INIT_ID = 3ULL, + PID_NS_INIT_ID = 4ULL, + CGROUP_NS_INIT_ID = 5ULL, + TIME_NS_INIT_ID = 6ULL, + NET_NS_INIT_ID = 7ULL, + MNT_NS_INIT_ID = 8ULL, +#ifdef __KERNEL__ + NS_LAST_INIT_ID = MNT_NS_INIT_ID, +#endif +}; + #endif /* __LINUX_NSFS_H */ diff --git a/kernel/nstree.c b/kernel/nstree.c index bbb34b46b01b69..cf102c5bb849f3 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -69,8 +69,6 @@ struct ns_tree time_ns_tree = { .type = CLONE_NEWTIME, }; -DEFINE_COOKIE(namespace_cookie); - static inline struct ns_common *node_to_ns(const struct rb_node *node) { if (!node) @@ -285,15 +283,20 @@ struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, /** * ns_tree_gen_id - generate a new namespace id * @ns: namespace to generate id for + * @id: if non-zero, this is the initial namespace and this is a fixed id * * Generates a new namespace id and assigns it to the namespace. All * namespaces types share the same id space and thus can be compared * directly. IOW, when two ids of two namespace are equal, they are * identical. */ -u64 ns_tree_gen_id(struct ns_common *ns) +u64 __ns_tree_gen_id(struct ns_common *ns, u64 id) { - guard(preempt)(); - ns->ns_id = gen_cookie_next(&namespace_cookie); + static atomic64_t namespace_cookie = ATOMIC64_INIT(NS_LAST_INIT_ID + 1); + + if (id) + ns->ns_id = id; + else + ns->ns_id = atomic64_inc_return(&namespace_cookie); return ns->ns_id; } diff --git a/net/core/net_namespace.c b/net/core/net_namespace.c index b0e0f22d7b213c..83cbec4afcb3a0 100644 --- a/net/core/net_namespace.c +++ b/net/core/net_namespace.c @@ -439,7 +439,7 @@ static __net_init int setup_net(struct net *net) LIST_HEAD(net_exit_list); int error = 0; - net->net_cookie = ns_tree_gen_id(&net->ns); + net->net_cookie = ns_tree_gen_id(net); list_for_each_entry(ops, &pernet_list, list) { error = ops_init(ops, net); -- cgit 1.2.3-korg From a657bc8a75cf40c3d0814fe6488ba4af56528f42 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 10 Nov 2025 16:08:17 +0100 Subject: nstree: switch to new structures Switch the nstree management to the new combined structures. Link: https://patch.msgid.link/20251110-work-namespace-nstree-fixes-v1-5-e8a9264e0fb9@kernel.org Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- include/linux/ns/ns_common_types.h | 27 ++--- include/linux/ns/nstree_types.h | 19 ++++ include/linux/ns_common.h | 27 +++-- include/linux/nstree.h | 26 ++--- kernel/nscommon.c | 13 +-- kernel/nstree.c | 199 ++++++++++++++----------------------- 7 files changed, 136 insertions(+), 177 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index eded33eeb6473e..ad19530a13b225 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -138,7 +138,7 @@ static inline struct mnt_namespace *node_to_mnt_ns(const struct rb_node *node) if (!node) return NULL; - ns = rb_entry(node, struct ns_common, ns_tree_node); + ns = rb_entry(node, struct ns_common, ns_tree_node.ns_node); return container_of(ns, struct mnt_namespace, ns); } diff --git a/include/linux/ns/ns_common_types.h b/include/linux/ns/ns_common_types.h index ccd1d1e116f644..b332b019b29cb8 100644 --- a/include/linux/ns/ns_common_types.h +++ b/include/linux/ns/ns_common_types.h @@ -3,6 +3,7 @@ #define _LINUX_NS_COMMON_TYPES_H #include +#include #include #include #include @@ -98,6 +99,13 @@ extern const struct proc_ns_operations utsns_operations; * Initial namespaces: * Boot-time namespaces (init_net, init_pid_ns, etc.) start with * __ns_ref_active = 1 and remain active forever. + * + * @ns_type: type of namespace (e.g., CLONE_NEWNET) + * @stashed: cached dentry to be used by the vfs + * @ops: namespace operations + * @inum: namespace inode number (quickly recycled for non-initial namespaces) + * @__ns_ref: main reference count (do not use directly) + * @ns_tree: namespace tree nodes and active reference count */ struct ns_common { u32 ns_type; @@ -106,24 +114,7 @@ struct ns_common { unsigned int inum; refcount_t __ns_ref; /* do not use directly */ union { - struct { - u64 ns_id; - struct /* global namespace rbtree and list */ { - struct rb_node ns_unified_tree_node; - struct list_head ns_unified_list_node; - }; - struct /* per type rbtree and list */ { - struct rb_node ns_tree_node; - struct list_head ns_list_node; - }; - struct /* namespace ownership rbtree and list */ { - struct rb_root ns_owner_tree; /* rbtree of namespaces owned by this namespace */ - struct list_head ns_owner; /* list of namespaces owned by this namespace */ - struct rb_node ns_owner_tree_node; /* node in the owner namespace's rbtree */ - struct list_head ns_owner_entry; /* node in the owner namespace's ns_owned list */ - }; - atomic_t __ns_ref_active; /* do not use directly */ - }; + struct ns_tree; struct rcu_head ns_rcu; }; }; diff --git a/include/linux/ns/nstree_types.h b/include/linux/ns/nstree_types.h index 6ee0c39686f842..2fb28ee31efbb6 100644 --- a/include/linux/ns/nstree_types.h +++ b/include/linux/ns/nstree_types.h @@ -33,4 +33,23 @@ struct ns_tree_node { struct list_head ns_list_entry; }; +/** + * struct ns_tree - Namespace tree nodes and active reference count + * @ns_id: Unique namespace identifier + * @__ns_ref_active: Active reference count (do not use directly) + * @ns_unified_node: Node in the global namespace tree + * @ns_tree_node: Node in the per-type namespace tree + * @ns_owner_node: Node in the owner namespace's tree of owned namespaces + * @ns_owner_root: Root of the tree of namespaces owned by this namespace + * (only used when this namespace is an owner) + */ +struct ns_tree { + u64 ns_id; + atomic_t __ns_ref_active; + struct ns_tree_node ns_unified_node; + struct ns_tree_node ns_tree_node; + struct ns_tree_node ns_owner_node; + struct ns_tree_root ns_owner_root; +}; + #endif /* _LINUX_NSTREE_TYPES_H */ diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 6a4ca8c3b9c424..f90509ee090075 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -26,20 +26,19 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) return ns->ns_id <= NS_LAST_INIT_ID; } - -#define NS_COMMON_INIT(nsname, refs) \ -{ \ - .ns_type = ns_common_type(&nsname), \ - .ns_id = ns_init_id(&nsname), \ - .inum = ns_init_inum(&nsname), \ - .ops = to_ns_operations(&nsname), \ - .stashed = NULL, \ - .__ns_ref = REFCOUNT_INIT(refs), \ - .__ns_ref_active = ATOMIC_INIT(1), \ - .ns_list_node = LIST_HEAD_INIT(nsname.ns.ns_list_node), \ - .ns_owner_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_entry), \ - .ns_owner = LIST_HEAD_INIT(nsname.ns.ns_owner), \ - .ns_unified_list_node = LIST_HEAD_INIT(nsname.ns.ns_unified_list_node), \ +#define NS_COMMON_INIT(nsname, refs) \ +{ \ + .ns_type = ns_common_type(&nsname), \ + .ns_id = ns_init_id(&nsname), \ + .inum = ns_init_inum(&nsname), \ + .ops = to_ns_operations(&nsname), \ + .stashed = NULL, \ + .__ns_ref = REFCOUNT_INIT(refs), \ + .__ns_ref_active = ATOMIC_INIT(1), \ + .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \ + .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \ + .ns_owner_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_owner_node.ns_list_entry), \ + .ns_owner_root.ns_list_head = LIST_HEAD_INIT(nsname.ns.ns_owner_root.ns_list_head), \ } #define ns_common_init(__ns) \ diff --git a/include/linux/nstree.h b/include/linux/nstree.h index 98b848cf2f1ce7..175e4625bfa6d9 100644 --- a/include/linux/nstree.h +++ b/include/linux/nstree.h @@ -13,14 +13,14 @@ struct ns_common; -extern struct ns_tree cgroup_ns_tree; -extern struct ns_tree ipc_ns_tree; -extern struct ns_tree mnt_ns_tree; -extern struct ns_tree net_ns_tree; -extern struct ns_tree pid_ns_tree; -extern struct ns_tree time_ns_tree; -extern struct ns_tree user_ns_tree; -extern struct ns_tree uts_ns_tree; +extern struct ns_tree_root cgroup_ns_tree; +extern struct ns_tree_root ipc_ns_tree; +extern struct ns_tree_root mnt_ns_tree; +extern struct ns_tree_root net_ns_tree; +extern struct ns_tree_root pid_ns_tree; +extern struct ns_tree_root time_ns_tree; +extern struct ns_tree_root user_ns_tree; +extern struct ns_tree_root uts_ns_tree; void ns_tree_node_init(struct ns_tree_node *node); void ns_tree_root_init(struct ns_tree_root *root); @@ -46,14 +46,14 @@ void ns_tree_node_del(struct ns_tree_node *node, struct ns_tree_root *root); (((__ns) == ns_init_ns(__ns)) ? ns_init_id(__ns) : 0)) u64 __ns_tree_gen_id(struct ns_common *ns, u64 id); -void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree); -void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree); +void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree); +void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree); struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type); struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, - struct ns_tree *ns_tree, + struct ns_tree_root *ns_tree, bool previous); -static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree, u64 id) +static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree_root *ns_tree, u64 id) { __ns_tree_gen_id(ns, id); __ns_tree_add_raw(ns, ns_tree); @@ -91,6 +91,6 @@ static inline void __ns_tree_add(struct ns_common *ns, struct ns_tree *ns_tree, #define ns_tree_adjoined_rcu(__ns, __previous) \ __ns_tree_adjoined_rcu(to_ns_common(__ns), to_ns_tree(__ns), __previous) -#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node)) +#define ns_tree_active(__ns) (!RB_EMPTY_NODE(&to_ns_common(__ns)->ns_tree_node.ns_node)) #endif /* _LINUX_NSTREE_H */ diff --git a/kernel/nscommon.c b/kernel/nscommon.c index c910b979e4336c..88f70baccb75ec 100644 --- a/kernel/nscommon.c +++ b/kernel/nscommon.c @@ -2,6 +2,7 @@ /* Copyright (c) 2025 Christian Brauner */ #include +#include #include #include #include @@ -61,14 +62,10 @@ int __ns_common_init(struct ns_common *ns, u32 ns_type, const struct proc_ns_ope ns->ops = ops; ns->ns_id = 0; ns->ns_type = ns_type; - RB_CLEAR_NODE(&ns->ns_tree_node); - RB_CLEAR_NODE(&ns->ns_unified_tree_node); - RB_CLEAR_NODE(&ns->ns_owner_tree_node); - INIT_LIST_HEAD(&ns->ns_list_node); - INIT_LIST_HEAD(&ns->ns_unified_list_node); - ns->ns_owner_tree = RB_ROOT; - INIT_LIST_HEAD(&ns->ns_owner); - INIT_LIST_HEAD(&ns->ns_owner_entry); + ns_tree_node_init(&ns->ns_tree_node); + ns_tree_node_init(&ns->ns_unified_node); + ns_tree_node_init(&ns->ns_owner_node); + ns_tree_root_init(&ns->ns_owner_root); #ifdef CONFIG_DEBUG_VFS ns_debug(ns, ops); diff --git a/kernel/nstree.c b/kernel/nstree.c index fe71ff943f703a..6c7ec9fbf25f2e 100644 --- a/kernel/nstree.c +++ b/kernel/nstree.c @@ -9,68 +9,51 @@ #include static __cacheline_aligned_in_smp DEFINE_SEQLOCK(ns_tree_lock); -static struct rb_root ns_unified_tree = RB_ROOT; /* protected by ns_tree_lock */ -static LIST_HEAD(ns_unified_list); /* protected by ns_tree_lock */ -/** - * struct ns_tree - Namespace tree - * @ns_tree: Rbtree of namespaces of a particular type - * @ns_list: Sequentially walkable list of all namespaces of this type - * @type: type of namespaces in this tree - */ -struct ns_tree { - struct rb_root ns_tree; - struct list_head ns_list; - int type; +static struct ns_tree_root ns_unified_root = { /* protected by ns_tree_lock */ + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(ns_unified_root.ns_list_head), }; -struct ns_tree mnt_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(mnt_ns_tree.ns_list), - .type = CLONE_NEWNS, +struct ns_tree_root mnt_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(mnt_ns_tree.ns_list_head), }; -struct ns_tree net_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(net_ns_tree.ns_list), - .type = CLONE_NEWNET, +struct ns_tree_root net_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(net_ns_tree.ns_list_head), }; EXPORT_SYMBOL_GPL(net_ns_tree); -struct ns_tree uts_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(uts_ns_tree.ns_list), - .type = CLONE_NEWUTS, +struct ns_tree_root uts_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(uts_ns_tree.ns_list_head), }; -struct ns_tree user_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(user_ns_tree.ns_list), - .type = CLONE_NEWUSER, +struct ns_tree_root user_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(user_ns_tree.ns_list_head), }; -struct ns_tree ipc_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(ipc_ns_tree.ns_list), - .type = CLONE_NEWIPC, +struct ns_tree_root ipc_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(ipc_ns_tree.ns_list_head), }; -struct ns_tree pid_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(pid_ns_tree.ns_list), - .type = CLONE_NEWPID, +struct ns_tree_root pid_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(pid_ns_tree.ns_list_head), }; -struct ns_tree cgroup_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(cgroup_ns_tree.ns_list), - .type = CLONE_NEWCGROUP, +struct ns_tree_root cgroup_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(cgroup_ns_tree.ns_list_head), }; -struct ns_tree time_ns_tree = { - .ns_tree = RB_ROOT, - .ns_list = LIST_HEAD_INIT(time_ns_tree.ns_list), - .type = CLONE_NEWTIME, +struct ns_tree_root time_ns_tree = { + .ns_rb = RB_ROOT, + .ns_list_head = LIST_HEAD_INIT(time_ns_tree.ns_list_head), }; /** @@ -162,21 +145,21 @@ static inline struct ns_common *node_to_ns(const struct rb_node *node) { if (!node) return NULL; - return rb_entry(node, struct ns_common, ns_tree_node); + return rb_entry(node, struct ns_common, ns_tree_node.ns_node); } static inline struct ns_common *node_to_ns_unified(const struct rb_node *node) { if (!node) return NULL; - return rb_entry(node, struct ns_common, ns_unified_tree_node); + return rb_entry(node, struct ns_common, ns_unified_node.ns_node); } static inline struct ns_common *node_to_ns_owner(const struct rb_node *node) { if (!node) return NULL; - return rb_entry(node, struct ns_common, ns_owner_tree_node); + return rb_entry(node, struct ns_common, ns_owner_node.ns_node); } static int ns_id_cmp(u64 id_a, u64 id_b) @@ -203,35 +186,22 @@ static int ns_cmp_owner(struct rb_node *a, const struct rb_node *b) return ns_id_cmp(node_to_ns_owner(a)->ns_id, node_to_ns_owner(b)->ns_id); } -void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) +void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree_root *ns_tree) { - struct rb_node *node, *prev; + struct rb_node *node; const struct proc_ns_operations *ops = ns->ops; VFS_WARN_ON_ONCE(!ns->ns_id); - VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); write_seqlock(&ns_tree_lock); - node = rb_find_add_rcu(&ns->ns_tree_node, &ns_tree->ns_tree, ns_cmp); - /* - * If there's no previous entry simply add it after the - * head and if there is add it after the previous entry. - */ - prev = rb_prev(&ns->ns_tree_node); - if (!prev) - list_add_rcu(&ns->ns_list_node, &ns_tree->ns_list); - else - list_add_rcu(&ns->ns_list_node, &node_to_ns(prev)->ns_list_node); + /* Add to per-type tree and list */ + node = ns_tree_node_add(&ns->ns_tree_node, ns_tree, ns_cmp); /* Add to unified tree and list */ - rb_find_add_rcu(&ns->ns_unified_tree_node, &ns_unified_tree, ns_cmp_unified); - prev = rb_prev(&ns->ns_unified_tree_node); - if (!prev) - list_add_rcu(&ns->ns_unified_list_node, &ns_unified_list); - else - list_add_rcu(&ns->ns_unified_list_node, &node_to_ns_unified(prev)->ns_unified_list_node); + ns_tree_node_add(&ns->ns_unified_node, &ns_unified_root, ns_cmp_unified); + /* Add to owner's tree if applicable */ if (ops) { struct user_namespace *user_ns; @@ -241,15 +211,8 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) struct ns_common *owner = &user_ns->ns; VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER); - /* Insert into owner's rbtree */ - rb_find_add_rcu(&ns->ns_owner_tree_node, &owner->ns_owner_tree, ns_cmp_owner); - - /* Insert into owner's list in sorted order */ - prev = rb_prev(&ns->ns_owner_tree_node); - if (!prev) - list_add_rcu(&ns->ns_owner_entry, &owner->ns_owner); - else - list_add_rcu(&ns->ns_owner_entry, &node_to_ns_owner(prev)->ns_owner_entry); + /* Insert into owner's tree and list */ + ns_tree_node_add(&ns->ns_owner_node, &owner->ns_owner_root, ns_cmp_owner); } else { /* Only the initial user namespace doesn't have an owner. */ VFS_WARN_ON_ONCE(ns != to_ns_common(&init_user_ns)); @@ -260,36 +223,29 @@ void __ns_tree_add_raw(struct ns_common *ns, struct ns_tree *ns_tree) VFS_WARN_ON_ONCE(node); } -void __ns_tree_remove(struct ns_common *ns, struct ns_tree *ns_tree) +void __ns_tree_remove(struct ns_common *ns, struct ns_tree_root *ns_tree) { const struct proc_ns_operations *ops = ns->ops; struct user_namespace *user_ns; - VFS_WARN_ON_ONCE(RB_EMPTY_NODE(&ns->ns_tree_node)); - VFS_WARN_ON_ONCE(list_empty(&ns->ns_list_node)); - VFS_WARN_ON_ONCE(ns->ns_type != ns_tree->type); + VFS_WARN_ON_ONCE(ns_tree_node_empty(&ns->ns_tree_node)); + VFS_WARN_ON_ONCE(list_empty(&ns->ns_tree_node.ns_list_entry)); write_seqlock(&ns_tree_lock); - rb_erase(&ns->ns_tree_node, &ns_tree->ns_tree); - RB_CLEAR_NODE(&ns->ns_tree_node); - - list_bidir_del_rcu(&ns->ns_list_node); - rb_erase(&ns->ns_unified_tree_node, &ns_unified_tree); - RB_CLEAR_NODE(&ns->ns_unified_tree_node); + /* Remove from per-type tree and list */ + ns_tree_node_del(&ns->ns_tree_node, ns_tree); - list_bidir_del_rcu(&ns->ns_unified_list_node); + /* Remove from unified tree and list */ + ns_tree_node_del(&ns->ns_unified_node, &ns_unified_root); - /* Remove from owner's rbtree if this namespace has an owner */ + /* Remove from owner's tree if applicable */ if (ops) { user_ns = ops->owner(ns); if (user_ns) { struct ns_common *owner = &user_ns->ns; - rb_erase(&ns->ns_owner_tree_node, &owner->ns_owner_tree); - RB_CLEAR_NODE(&ns->ns_owner_tree_node); + ns_tree_node_del(&ns->ns_owner_node, &owner->ns_owner_root); } - - list_bidir_del_rcu(&ns->ns_owner_entry); } write_sequnlock(&ns_tree_lock); @@ -320,7 +276,7 @@ static int ns_find_unified(const void *key, const struct rb_node *node) return 0; } -static struct ns_tree *ns_tree_from_type(int ns_type) +static struct ns_tree_root *ns_tree_from_type(int ns_type) { switch (ns_type) { case CLONE_NEWCGROUP: @@ -351,7 +307,7 @@ static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id) do { seq = read_seqbegin(&ns_tree_lock); - node = rb_find_rcu(&ns_id, &ns_unified_tree, ns_find_unified); + node = rb_find_rcu(&ns_id, &ns_unified_root.ns_rb, ns_find_unified); if (node) break; } while (read_seqretry(&ns_tree_lock, seq)); @@ -361,7 +317,7 @@ static struct ns_common *__ns_unified_tree_lookup_rcu(u64 ns_id) static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type) { - struct ns_tree *ns_tree; + struct ns_tree_root *ns_tree; struct rb_node *node; unsigned int seq; @@ -371,7 +327,7 @@ static struct ns_common *__ns_tree_lookup_rcu(u64 ns_id, int ns_type) do { seq = read_seqbegin(&ns_tree_lock); - node = rb_find_rcu(&ns_id, &ns_tree->ns_tree, ns_find); + node = rb_find_rcu(&ns_id, &ns_tree->ns_rb, ns_find); if (node) break; } while (read_seqretry(&ns_tree_lock, seq)); @@ -399,22 +355,20 @@ struct ns_common *ns_tree_lookup_rcu(u64 ns_id, int ns_type) * there is no next/previous namespace, -ENOENT is returned. */ struct ns_common *__ns_tree_adjoined_rcu(struct ns_common *ns, - struct ns_tree *ns_tree, bool previous) + struct ns_tree_root *ns_tree, bool previous) { struct list_head *list; RCU_LOCKDEP_WARN(!rcu_read_lock_held(), "suspicious ns_tree_adjoined_rcu() usage"); if (previous) - list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_list_node)); + list = rcu_dereference(list_bidir_prev_rcu(&ns->ns_tree_node.ns_list_entry)); else - list = rcu_dereference(list_next_rcu(&ns->ns_list_node)); - if (list_is_head(list, &ns_tree->ns_list)) + list = rcu_dereference(list_next_rcu(&ns->ns_tree_node.ns_list_entry)); + if (list_is_head(list, &ns_tree->ns_list_head)) return ERR_PTR(-ENOENT); - VFS_WARN_ON_ONCE(list_entry_rcu(list, struct ns_common, ns_list_node)->ns_type != ns_tree->type); - - return list_entry_rcu(list, struct ns_common, ns_list_node); + return list_entry_rcu(list, struct ns_common, ns_tree_node.ns_list_entry); } /** @@ -508,7 +462,7 @@ static struct ns_common *lookup_ns_owner_at(u64 ns_id, struct ns_common *owner) VFS_WARN_ON_ONCE(owner->ns_type != CLONE_NEWUSER); read_seqlock_excl(&ns_tree_lock); - node = owner->ns_owner_tree.rb_node; + node = owner->ns_owner_root.ns_rb.rb_node; while (node) { struct ns_common *ns; @@ -638,16 +592,15 @@ static ssize_t do_listns_userns(struct klistns *kls) } ret = 0; - head = &to_ns_common(kls->user_ns)->ns_owner; + head = &to_ns_common(kls->user_ns)->ns_owner_root.ns_list_head; kls->userns_capable = ns_capable_noaudit(kls->user_ns, CAP_SYS_ADMIN); rcu_read_lock(); if (!first_ns) - first_ns = list_entry_rcu(head->next, typeof(*ns), ns_owner_entry); - - for (ns = first_ns; &ns->ns_owner_entry != head && nr_ns_ids; - ns = list_entry_rcu(ns->ns_owner_entry.next, typeof(*ns), ns_owner_entry)) { + first_ns = list_entry_rcu(head->next, typeof(*ns), ns_owner_node.ns_list_entry); + for (ns = first_ns; &ns->ns_owner_node.ns_list_entry != head && nr_ns_ids; + ns = list_entry_rcu(ns->ns_owner_node.ns_list_entry.next, typeof(*ns), ns_owner_node.ns_list_entry)) { struct ns_common *valid; valid = legitimize_ns(kls, ns); @@ -682,7 +635,7 @@ static ssize_t do_listns_userns(struct klistns *kls) static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type) { struct ns_common *ret = NULL; - struct ns_tree *ns_tree = NULL; + struct ns_tree_root *ns_tree = NULL; struct rb_node *node; if (ns_type) { @@ -693,9 +646,9 @@ static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type) read_seqlock_excl(&ns_tree_lock); if (ns_tree) - node = ns_tree->ns_tree.rb_node; + node = ns_tree->ns_rb.rb_node; else - node = ns_unified_tree.rb_node; + node = ns_unified_root.ns_rb.rb_node; while (node) { struct ns_common *ns; @@ -725,28 +678,28 @@ static struct ns_common *lookup_ns_id_at(u64 ns_id, int ns_type) } static inline struct ns_common *first_ns_common(const struct list_head *head, - struct ns_tree *ns_tree) + struct ns_tree_root *ns_tree) { if (ns_tree) - return list_entry_rcu(head->next, struct ns_common, ns_list_node); - return list_entry_rcu(head->next, struct ns_common, ns_unified_list_node); + return list_entry_rcu(head->next, struct ns_common, ns_tree_node.ns_list_entry); + return list_entry_rcu(head->next, struct ns_common, ns_unified_node.ns_list_entry); } static inline struct ns_common *next_ns_common(struct ns_common *ns, - struct ns_tree *ns_tree) + struct ns_tree_root *ns_tree) { if (ns_tree) - return list_entry_rcu(ns->ns_list_node.next, struct ns_common, ns_list_node); - return list_entry_rcu(ns->ns_unified_list_node.next, struct ns_common, ns_unified_list_node); + return list_entry_rcu(ns->ns_tree_node.ns_list_entry.next, struct ns_common, ns_tree_node.ns_list_entry); + return list_entry_rcu(ns->ns_unified_node.ns_list_entry.next, struct ns_common, ns_unified_node.ns_list_entry); } static inline bool ns_common_is_head(struct ns_common *ns, const struct list_head *head, - struct ns_tree *ns_tree) + struct ns_tree_root *ns_tree) { if (ns_tree) - return &ns->ns_list_node == head; - return &ns->ns_unified_list_node == head; + return &ns->ns_tree_node.ns_list_entry == head; + return &ns->ns_unified_node.ns_list_entry == head; } static ssize_t do_listns(struct klistns *kls) @@ -754,7 +707,7 @@ static ssize_t do_listns(struct klistns *kls) u64 __user *ns_ids = kls->uns_ids; size_t nr_ns_ids = kls->nr_ns_ids; struct ns_common *ns, *first_ns = NULL, *prev = NULL; - struct ns_tree *ns_tree = NULL; + struct ns_tree_root *ns_tree = NULL; const struct list_head *head; u32 ns_type; ssize_t ret; @@ -779,9 +732,9 @@ static ssize_t do_listns(struct klistns *kls) ret = 0; if (ns_tree) - head = &ns_tree->ns_list; + head = &ns_tree->ns_list_head; else - head = &ns_unified_list; + head = &ns_unified_root.ns_list_head; rcu_read_lock(); -- cgit 1.2.3-korg From d9a44089ac7754e470dfdaf3658c3455b2d7f7dd Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 10 Nov 2025 16:08:22 +0100 Subject: fs: use boolean to indicate anonymous mount namespace Stop playing games with the namespace id and use a boolean instead: * This will remove the special-casing we need to do everywhere for mount namespaces. * It will allow us to use asserts on the namespace id for initial namespaces everywhere. * It will allow us to put anonymous mount namespaces on the namespaces trees in the future and thus make them available to statmount() and listmount(). Link: https://patch.msgid.link/20251110-work-namespace-nstree-fixes-v1-10-e8a9264e0fb9@kernel.org Signed-off-by: Christian Brauner --- fs/mount.h | 3 ++- fs/namespace.c | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/mount.h b/fs/mount.h index f13a28752d0ba2..2d28ef2a3aed67 100644 --- a/fs/mount.h +++ b/fs/mount.h @@ -27,6 +27,7 @@ struct mnt_namespace { unsigned int nr_mounts; /* # of mounts in the namespace */ unsigned int pending_mounts; refcount_t passive; /* number references not pinning @mounts */ + bool is_anon; } __randomize_layout; struct mnt_pcp { @@ -175,7 +176,7 @@ static inline bool is_local_mountpoint(const struct dentry *dentry) static inline bool is_anon_ns(struct mnt_namespace *ns) { - return ns->ns.ns_id == 0; + return ns->is_anon; } static inline bool anon_ns_root(const struct mount *m) diff --git a/fs/namespace.c b/fs/namespace.c index ad19530a13b225..efaff8680eafbd 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -4093,8 +4093,9 @@ static struct mnt_namespace *alloc_mnt_ns(struct user_namespace *user_ns, bool a dec_mnt_namespaces(ucounts); return ERR_PTR(ret); } - if (!anon) - ns_tree_gen_id(new_ns); + ns_tree_gen_id(new_ns); + + new_ns->is_anon = anon; refcount_set(&new_ns->passive, 1); new_ns->mounts = RB_ROOT; init_waitqueue_head(&new_ns->poll); -- cgit 1.2.3-korg From c2bbd2db521b018c59fb0ff8e1cdfa8ee907ba88 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Mon, 10 Nov 2025 16:08:28 +0100 Subject: ns: drop custom reference count initialization for initial namespaces Initial namespaces don't modify their reference count anymore. They remain fixed at one so drop the custom refcount initializations. Link: https://patch.msgid.link/20251110-work-namespace-nstree-fixes-v1-16-e8a9264e0fb9@kernel.org Signed-off-by: Christian Brauner --- fs/namespace.c | 2 +- include/linux/ns_common.h | 4 ++-- init/version-timestamp.c | 2 +- ipc/msgutil.c | 2 +- kernel/cgroup/cgroup.c | 2 +- kernel/pid.c | 2 +- kernel/time/namespace.c | 2 +- kernel/user.c | 2 +- 8 files changed, 9 insertions(+), 9 deletions(-) (limited to 'fs/namespace.c') diff --git a/fs/namespace.c b/fs/namespace.c index efaff8680eafbd..25289b869be15e 100644 --- a/fs/namespace.c +++ b/fs/namespace.c @@ -5986,7 +5986,7 @@ SYSCALL_DEFINE4(listmount, const struct mnt_id_req __user *, req, } struct mnt_namespace init_mnt_ns = { - .ns = NS_COMMON_INIT(init_mnt_ns, 1), + .ns = NS_COMMON_INIT(init_mnt_ns), .user_ns = &init_user_ns, .passive = REFCOUNT_INIT(1), .mounts = RB_ROOT, diff --git a/include/linux/ns_common.h b/include/linux/ns_common.h index 43f709ab846a72..136f6a322e5369 100644 --- a/include/linux/ns_common.h +++ b/include/linux/ns_common.h @@ -26,14 +26,14 @@ static __always_inline bool is_ns_init_id(const struct ns_common *ns) return ns->ns_id <= NS_LAST_INIT_ID; } -#define NS_COMMON_INIT(nsname, refs) \ +#define NS_COMMON_INIT(nsname) \ { \ .ns_type = ns_common_type(&nsname), \ .ns_id = ns_init_id(&nsname), \ .inum = ns_init_inum(&nsname), \ .ops = to_ns_operations(&nsname), \ .stashed = NULL, \ - .__ns_ref = REFCOUNT_INIT(refs), \ + .__ns_ref = REFCOUNT_INIT(1), \ .__ns_ref_active = ATOMIC_INIT(1), \ .ns_unified_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_unified_node.ns_list_entry), \ .ns_tree_node.ns_list_entry = LIST_HEAD_INIT(nsname.ns.ns_tree_node.ns_list_entry), \ diff --git a/init/version-timestamp.c b/init/version-timestamp.c index 56ded64fdfe409..375726e05f690f 100644 --- a/init/version-timestamp.c +++ b/init/version-timestamp.c @@ -8,7 +8,7 @@ #include struct uts_namespace init_uts_ns = { - .ns = NS_COMMON_INIT(init_uts_ns, 2), + .ns = NS_COMMON_INIT(init_uts_ns), .name = { .sysname = UTS_SYSNAME, .nodename = UTS_NODENAME, diff --git a/ipc/msgutil.c b/ipc/msgutil.c index 55a908ec0674c6..e28f0cecb2ec94 100644 --- a/ipc/msgutil.c +++ b/ipc/msgutil.c @@ -27,7 +27,7 @@ DEFINE_SPINLOCK(mq_lock); * and not CONFIG_IPC_NS. */ struct ipc_namespace init_ipc_ns = { - .ns = NS_COMMON_INIT(init_ipc_ns, 1), + .ns = NS_COMMON_INIT(init_ipc_ns), .user_ns = &init_user_ns, }; diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c index 20ab84b2cf4e08..2bf3951ca88f63 100644 --- a/kernel/cgroup/cgroup.c +++ b/kernel/cgroup/cgroup.c @@ -250,7 +250,7 @@ bool cgroup_enable_per_threadgroup_rwsem __read_mostly; /* cgroup namespace for init task */ struct cgroup_namespace init_cgroup_ns = { - .ns = NS_COMMON_INIT(init_cgroup_ns, 2), + .ns = NS_COMMON_INIT(init_cgroup_ns), .user_ns = &init_user_ns, .root_cset = &init_css_set, }; diff --git a/kernel/pid.c b/kernel/pid.c index a5a63dc0a49171..a31771bc89c1a3 100644 --- a/kernel/pid.c +++ b/kernel/pid.c @@ -71,7 +71,7 @@ static int pid_max_max = PID_MAX_LIMIT; * the scheme scales to up to 4 million PIDs, runtime. */ struct pid_namespace init_pid_ns = { - .ns = NS_COMMON_INIT(init_pid_ns, 2), + .ns = NS_COMMON_INIT(init_pid_ns), .idr = IDR_INIT(init_pid_ns.idr), .pid_allocated = PIDNS_ADDING, .level = 0, diff --git a/kernel/time/namespace.c b/kernel/time/namespace.c index 19911f88e2b88d..e76be24b132cf5 100644 --- a/kernel/time/namespace.c +++ b/kernel/time/namespace.c @@ -478,7 +478,7 @@ const struct proc_ns_operations timens_for_children_operations = { }; struct time_namespace init_time_ns = { - .ns = NS_COMMON_INIT(init_time_ns, 3), + .ns = NS_COMMON_INIT(init_time_ns), .user_ns = &init_user_ns, .frozen_offsets = true, }; diff --git a/kernel/user.c b/kernel/user.c index 4b3132e786d943..7aef4e679a6a0c 100644 --- a/kernel/user.c +++ b/kernel/user.c @@ -35,7 +35,7 @@ EXPORT_SYMBOL_GPL(init_binfmt_misc); * and 1 for... ? */ struct user_namespace init_user_ns = { - .ns = NS_COMMON_INIT(init_user_ns, 3), + .ns = NS_COMMON_INIT(init_user_ns), .uid_map = { { .extent[0] = { -- cgit 1.2.3-korg