aboutsummaryrefslogtreecommitdiffstats
path: root/include
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
committerLinus Torvalds <torvalds@linux-foundation.org>2025-12-03 16:54:54 -0800
commit015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f (patch)
tree258f719e59946c733dd03198eba404e85f9d0945 /include
parentb6d993310a65b994f37e3347419d9ed398ee37a3 (diff)
parentff34657aa72a4dab9c2fd38e1b31a506951f4b1c (diff)
downloadnet-015e7b0b0e8e51f7321ec2aafc1d7fc0a8a5536f.tar.gz
Merge tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next
Pull bpf updates from Alexei Starovoitov: - Convert selftests/bpf/test_tc_edt and test_tc_tunnel from .sh to test_progs runner (Alexis Lothoré) - Convert selftests/bpf/test_xsk to test_progs runner (Bastien Curutchet) - Replace bpf memory allocator with kmalloc_nolock() in bpf_local_storage (Amery Hung), and in bpf streams and range tree (Puranjay Mohan) - Introduce support for indirect jumps in BPF verifier and x86 JIT (Anton Protopopov) and arm64 JIT (Puranjay Mohan) - Remove runqslower bpf tool (Hoyeon Lee) - Fix corner cases in the verifier to close several syzbot reports (Eduard Zingerman, KaFai Wan) - Several improvements in deadlock detection in rqspinlock (Kumar Kartikeya Dwivedi) - Implement "jmp" mode for BPF trampoline and corresponding DYNAMIC_FTRACE_WITH_JMP. It improves "fexit" program type performance from 80 M/s to 136 M/s. With Steven's Ack. (Menglong Dong) - Add ability to test non-linear skbs in BPF_PROG_TEST_RUN (Paul Chaignon) - Do not let BPF_PROG_TEST_RUN emit invalid GSO types to stack (Daniel Borkmann) - Generalize buildid reader into bpf_dynptr (Mykyta Yatsenko) - Optimize bpf_map_update_elem() for map-in-map types (Ritesh Oedayrajsingh Varma) - Introduce overwrite mode for BPF ring buffer (Xu Kuohai) * tag 'bpf-next-6.19' of git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next: (169 commits) bpf: optimize bpf_map_update_elem() for map-in-map types bpf: make kprobe_multi_link_prog_run always_inline selftests/bpf: do not hardcode target rate in test_tc_edt BPF program selftests/bpf: remove test_tc_edt.sh selftests/bpf: integrate test_tc_edt into test_progs selftests/bpf: rename test_tc_edt.bpf.c section to expose program type selftests/bpf: Add success stats to rqspinlock stress test rqspinlock: Precede non-head waiter queueing with AA check rqspinlock: Disable spinning for trylock fallback rqspinlock: Use trylock fallback when per-CPU rqnode is busy rqspinlock: Perform AA checks immediately rqspinlock: Enclose lock/unlock within lock entry acquisitions bpf: Remove runqslower tool selftests/bpf: Remove usage of lsm/file_alloc_security in selftest bpf: Disable file_alloc_security hook bpf: check for insn arrays in check_ptr_alignment bpf: force BPF_F_RDONLY_PROG on insn array creation bpf: Fix exclusive map memory leak selftests/bpf: Make CS length configurable for rqspinlock stress test selftests/bpf: Add lock wait time stats to rqspinlock stress test ...
Diffstat (limited to 'include')
-rw-r--r--include/asm-generic/rqspinlock.h60
-rw-r--r--include/linux/bpf.h102
-rw-r--r--include/linux/bpf_local_storage.h13
-rw-r--r--include/linux/bpf_types.h1
-rw-r--r--include/linux/bpf_verifier.h30
-rw-r--r--include/linux/buildid.h25
-rw-r--r--include/linux/filter.h12
-rw-r--r--include/linux/ftrace.h33
-rw-r--r--include/uapi/linux/bpf.h33
9 files changed, 242 insertions, 67 deletions
diff --git a/include/asm-generic/rqspinlock.h b/include/asm-generic/rqspinlock.h
index 6d4244d643df32..0f2dcbbfee2f0a 100644
--- a/include/asm-generic/rqspinlock.h
+++ b/include/asm-generic/rqspinlock.h
@@ -129,8 +129,8 @@ dec:
* <error> for lock B
* release_held_lock_entry
*
- * try_cmpxchg_acquire for lock A
* grab_held_lock_entry
+ * try_cmpxchg_acquire for lock A
*
* Lack of any ordering means reordering may occur such that dec, inc
* are done before entry is overwritten. This permits a remote lock
@@ -139,13 +139,8 @@ dec:
* CPU holds a lock it is attempting to acquire, leading to false ABBA
* diagnosis).
*
- * In case of unlock, we will always do a release on the lock word after
- * releasing the entry, ensuring that other CPUs cannot hold the lock
- * (and make conclusions about deadlocks) until the entry has been
- * cleared on the local CPU, preventing any anomalies. Reordering is
- * still possible there, but a remote CPU cannot observe a lock in our
- * table which it is already holding, since visibility entails our
- * release store for the said lock has not retired.
+ * The case of unlock is treated differently due to NMI reentrancy, see
+ * comments in res_spin_unlock.
*
* In theory we don't have a problem if the dec and WRITE_ONCE above get
* reordered with each other, we either notice an empty NULL entry on
@@ -175,10 +170,22 @@ static __always_inline int res_spin_lock(rqspinlock_t *lock)
{
int val = 0;
- if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL))) {
- grab_held_lock_entry(lock);
+ /*
+ * Grab the deadlock detection entry before doing the cmpxchg, so that
+ * reentrancy due to NMIs between the succeeding cmpxchg and creation of
+ * held lock entry can correctly detect an acquisition attempt in the
+ * interrupted context.
+ *
+ * cmpxchg lock A
+ * <NMI>
+ * res_spin_lock(A) --> missed AA, leads to timeout
+ * </NMI>
+ * grab_held_lock_entry(A)
+ */
+ grab_held_lock_entry(lock);
+
+ if (likely(atomic_try_cmpxchg_acquire(&lock->val, &val, _Q_LOCKED_VAL)))
return 0;
- }
return resilient_queued_spin_lock_slowpath(lock, val);
}
@@ -192,28 +199,25 @@ static __always_inline void res_spin_unlock(rqspinlock_t *lock)
{
struct rqspinlock_held *rqh = this_cpu_ptr(&rqspinlock_held_locks);
- if (unlikely(rqh->cnt > RES_NR_HELD))
- goto unlock;
- WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
-unlock:
/*
- * Release barrier, ensures correct ordering. See release_held_lock_entry
- * for details. Perform release store instead of queued_spin_unlock,
- * since we use this function for test-and-set fallback as well. When we
- * have CONFIG_QUEUED_SPINLOCKS=n, we clear the full 4-byte lockword.
+ * Release barrier, ensures correct ordering. Perform release store
+ * instead of queued_spin_unlock, since we use this function for the TAS
+ * fallback as well. When we have CONFIG_QUEUED_SPINLOCKS=n, we clear
+ * the full 4-byte lockword.
*
- * Like release_held_lock_entry, we can do the release before the dec.
- * We simply care about not seeing the 'lock' in our table from a remote
- * CPU once the lock has been released, which doesn't rely on the dec.
+ * Perform the smp_store_release before clearing the lock entry so that
+ * NMIs landing in the unlock path can correctly detect AA issues. The
+ * opposite order shown below may lead to missed AA checks:
*
- * Unlike smp_wmb(), release is not a two way fence, hence it is
- * possible for a inc to move up and reorder with our clearing of the
- * entry. This isn't a problem however, as for a misdiagnosis of ABBA,
- * the remote CPU needs to hold this lock, which won't be released until
- * the store below is done, which would ensure the entry is overwritten
- * to NULL, etc.
+ * WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL)
+ * <NMI>
+ * res_spin_lock(A) --> missed AA, leads to timeout
+ * </NMI>
+ * smp_store_release(A->locked, 0)
*/
smp_store_release(&lock->locked, 0);
+ if (likely(rqh->cnt <= RES_NR_HELD))
+ WRITE_ONCE(rqh->locks[rqh->cnt - 1], NULL);
this_cpu_dec(rqspinlock_held_locks.cnt);
}
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index d808253f2e945d..6498be4c44f8c2 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -663,6 +663,16 @@ int map_check_no_btf(const struct bpf_map *map,
bool bpf_map_meta_equal(const struct bpf_map *meta0,
const struct bpf_map *meta1);
+static inline bool bpf_map_has_internal_structs(struct bpf_map *map)
+{
+ return btf_record_has_field(map->record, BPF_TIMER | BPF_WORKQUEUE | BPF_TASK_WORK);
+}
+
+void bpf_map_free_internal_structs(struct bpf_map *map, void *obj);
+
+int bpf_dynptr_from_file_sleepable(struct file *file, u32 flags,
+ struct bpf_dynptr *ptr__uninit);
+
extern const struct bpf_map_ops bpf_map_offload_ops;
/* bpf_type_flag contains a set of flags that are applicable to the values of
@@ -785,12 +795,15 @@ enum bpf_type_flag {
/* DYNPTR points to skb_metadata_end()-skb_metadata_len() */
DYNPTR_TYPE_SKB_META = BIT(19 + BPF_BASE_TYPE_BITS),
+ /* DYNPTR points to file */
+ DYNPTR_TYPE_FILE = BIT(20 + BPF_BASE_TYPE_BITS),
+
__BPF_TYPE_FLAG_MAX,
__BPF_TYPE_LAST_FLAG = __BPF_TYPE_FLAG_MAX - 1,
};
#define DYNPTR_TYPE_FLAG_MASK (DYNPTR_TYPE_LOCAL | DYNPTR_TYPE_RINGBUF | DYNPTR_TYPE_SKB \
- | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META)
+ | DYNPTR_TYPE_XDP | DYNPTR_TYPE_SKB_META | DYNPTR_TYPE_FILE)
/* Max number of base types. */
#define BPF_BASE_TYPE_LIMIT (1UL << BPF_BASE_TYPE_BITS)
@@ -988,6 +1001,7 @@ enum bpf_reg_type {
PTR_TO_ARENA,
PTR_TO_BUF, /* reg points to a read/write buffer */
PTR_TO_FUNC, /* reg points to a bpf program function */
+ PTR_TO_INSN, /* reg points to a bpf program instruction */
CONST_PTR_TO_DYNPTR, /* reg points to a const struct bpf_dynptr */
__BPF_REG_TYPE_MAX,
@@ -1250,6 +1264,18 @@ typedef void (*bpf_trampoline_exit_t)(struct bpf_prog *prog, u64 start,
bpf_trampoline_enter_t bpf_trampoline_enter(const struct bpf_prog *prog);
bpf_trampoline_exit_t bpf_trampoline_exit(const struct bpf_prog *prog);
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return flags & BPF_TRAMP_F_CALL_ORIG && !(flags & BPF_TRAMP_F_SKIP_FRAME);
+}
+#else
+static inline bool bpf_trampoline_use_jmp(u64 flags)
+{
+ return false;
+}
+#endif
+
struct bpf_ksym {
unsigned long start;
unsigned long end;
@@ -1378,21 +1404,23 @@ enum bpf_dynptr_type {
BPF_DYNPTR_TYPE_XDP,
/* Points to skb_metadata_end()-skb_metadata_len() */
BPF_DYNPTR_TYPE_SKB_META,
+ /* Underlying data is a file */
+ BPF_DYNPTR_TYPE_FILE,
};
-int bpf_dynptr_check_size(u32 size);
-u32 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
-const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u32 len);
-void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u32 len);
+int bpf_dynptr_check_size(u64 size);
+u64 __bpf_dynptr_size(const struct bpf_dynptr_kern *ptr);
+const void *__bpf_dynptr_data(const struct bpf_dynptr_kern *ptr, u64 len);
+void *__bpf_dynptr_data_rw(const struct bpf_dynptr_kern *ptr, u64 len);
bool __bpf_dynptr_is_rdonly(const struct bpf_dynptr_kern *ptr);
-int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u32 offset,
- void *src, u32 len, u64 flags);
-void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u32 offset,
- void *buffer__opt, u32 buffer__szk);
+int __bpf_dynptr_write(const struct bpf_dynptr_kern *dst, u64 offset,
+ void *src, u64 len, u64 flags);
+void *bpf_dynptr_slice_rdwr(const struct bpf_dynptr *p, u64 offset,
+ void *buffer__opt, u64 buffer__szk);
-static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u32 offset, u32 len)
+static inline int bpf_dynptr_check_off_len(const struct bpf_dynptr_kern *ptr, u64 offset, u64 len)
{
- u32 size = __bpf_dynptr_size(ptr);
+ u64 size = __bpf_dynptr_size(ptr);
if (len > size || offset > size - len)
return -E2BIG;
@@ -1616,6 +1644,7 @@ struct bpf_prog_aux {
u32 ctx_arg_info_size;
u32 max_rdonly_access;
u32 max_rdwr_access;
+ u32 subprog_start;
struct btf *attach_btf;
struct bpf_ctx_arg_aux *ctx_arg_info;
void __percpu *priv_stack_ptr;
@@ -1905,12 +1934,14 @@ struct btf_member;
* reason, if this callback is not defined, the check is skipped as
* the struct_ops map will have final verification performed in
* @reg.
- * @type: BTF type.
- * @value_type: Value type.
+ * @cfi_stubs: Pointer to a structure of stub functions for CFI. These stubs
+ * provide the correct Control Flow Integrity hashes for the
+ * trampolines generated by BPF struct_ops.
+ * @owner: The module that owns this struct_ops. Used for module reference
+ * counting to ensure the module providing the struct_ops cannot be
+ * unloaded while in use.
* @name: The name of the struct bpf_struct_ops object.
* @func_models: Func models
- * @type_id: BTF type id.
- * @value_id: BTF value id.
*/
struct bpf_struct_ops {
const struct bpf_verifier_ops *verifier_ops;
@@ -2099,6 +2130,12 @@ struct bpf_array {
};
};
+/*
+ * The bpf_array_get_next_key() function may be used for all array-like
+ * maps, i.e., maps with u32 keys with range [0 ,..., max_entries)
+ */
+int bpf_array_get_next_key(struct bpf_map *map, void *key, void *next_key);
+
#define BPF_COMPLEXITY_LIMIT_INSNS 1000000 /* yes. 1M insns */
#define MAX_TAIL_CALL_CNT 33
@@ -2374,6 +2411,9 @@ bpf_prog_run_array_uprobe(const struct bpf_prog_array *array,
bool bpf_jit_bypass_spec_v1(void);
bool bpf_jit_bypass_spec_v4(void);
+#define bpf_rcu_lock_held() \
+ (rcu_read_lock_held() || rcu_read_lock_trace_held() || rcu_read_lock_bh_held())
+
#ifdef CONFIG_BPF_SYSCALL
DECLARE_PER_CPU(int, bpf_prog_active);
extern struct mutex bpf_stats_enabled_mutex;
@@ -3670,12 +3710,14 @@ static inline u32 bpf_xdp_sock_convert_ctx_access(enum bpf_access_type type,
#endif /* CONFIG_INET */
enum bpf_text_poke_type {
+ BPF_MOD_NOP,
BPF_MOD_CALL,
BPF_MOD_JUMP,
};
-int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type t,
- void *addr1, void *addr2);
+int bpf_arch_text_poke(void *ip, enum bpf_text_poke_type old_t,
+ enum bpf_text_poke_type new_t, void *old_addr,
+ void *new_addr);
void bpf_arch_poke_desc_update(struct bpf_jit_poke_descriptor *poke,
struct bpf_prog *new, struct bpf_prog *old);
@@ -3772,4 +3814,30 @@ int bpf_prog_get_file_line(struct bpf_prog *prog, unsigned long ip, const char *
const char **linep, int *nump);
struct bpf_prog *bpf_prog_find_from_stack(void);
+int bpf_insn_array_init(struct bpf_map *map, const struct bpf_prog *prog);
+int bpf_insn_array_ready(struct bpf_map *map);
+void bpf_insn_array_release(struct bpf_map *map);
+void bpf_insn_array_adjust(struct bpf_map *map, u32 off, u32 len);
+void bpf_insn_array_adjust_after_remove(struct bpf_map *map, u32 off, u32 len);
+
+#ifdef CONFIG_BPF_SYSCALL
+void bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image);
+#else
+static inline void
+bpf_prog_update_insn_ptrs(struct bpf_prog *prog, u32 *offsets, void *image)
+{
+}
+#endif
+
+static inline int bpf_map_check_op_flags(struct bpf_map *map, u64 flags, u64 allowed_flags)
+{
+ if (flags & ~allowed_flags)
+ return -EINVAL;
+
+ if ((flags & BPF_F_LOCK) && !btf_record_has_field(map->record, BPF_SPIN_LOCK))
+ return -EINVAL;
+
+ return 0;
+}
+
#endif /* _LINUX_BPF_H */
diff --git a/include/linux/bpf_local_storage.h b/include/linux/bpf_local_storage.h
index ab7244d8108f60..66432248cd810e 100644
--- a/include/linux/bpf_local_storage.h
+++ b/include/linux/bpf_local_storage.h
@@ -18,9 +18,6 @@
#define BPF_LOCAL_STORAGE_CACHE_SIZE 16
-#define bpf_rcu_lock_held() \
- (rcu_read_lock_held() || rcu_read_lock_trace_held() || \
- rcu_read_lock_bh_held())
struct bpf_local_storage_map_bucket {
struct hlist_head list;
raw_spinlock_t lock;
@@ -56,9 +53,7 @@ struct bpf_local_storage_map {
u32 bucket_log;
u16 elem_size;
u16 cache_idx;
- struct bpf_mem_alloc selem_ma;
- struct bpf_mem_alloc storage_ma;
- bool bpf_ma;
+ bool use_kmalloc_nolock;
};
struct bpf_local_storage_data {
@@ -100,6 +95,7 @@ struct bpf_local_storage {
*/
struct rcu_head rcu;
raw_spinlock_t lock; /* Protect adding/removing from the "list" */
+ bool use_kmalloc_nolock;
};
/* U16_MAX is much more than enough for sk local storage
@@ -133,7 +129,7 @@ int bpf_local_storage_map_alloc_check(union bpf_attr *attr);
struct bpf_map *
bpf_local_storage_map_alloc(union bpf_attr *attr,
struct bpf_local_storage_cache *cache,
- bool bpf_ma);
+ bool use_kmalloc_nolock);
void __bpf_local_storage_insert_cache(struct bpf_local_storage *local_storage,
struct bpf_local_storage_map *smap,
@@ -187,10 +183,9 @@ void bpf_selem_link_map(struct bpf_local_storage_map *smap,
struct bpf_local_storage_elem *
bpf_selem_alloc(struct bpf_local_storage_map *smap, void *owner, void *value,
- bool charge_mem, bool swap_uptrs, gfp_t gfp_flags);
+ bool swap_uptrs, gfp_t gfp_flags);
void bpf_selem_free(struct bpf_local_storage_elem *selem,
- struct bpf_local_storage_map *smap,
bool reuse_now);
int
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fa78f49d4a9a64..b13de31e163f84 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -133,6 +133,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_BLOOM_FILTER, bloom_filter_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_USER_RINGBUF, user_ringbuf_map_ops)
BPF_MAP_TYPE(BPF_MAP_TYPE_ARENA, arena_map_ops)
+BPF_MAP_TYPE(BPF_MAP_TYPE_INSN_ARRAY, insn_array_map_ops)
BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 4c497e839526a4..130bcbd66f6060 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -416,7 +416,7 @@ struct bpf_verifier_state {
u32 active_irq_id;
u32 active_lock_id;
void *active_lock_ptr;
- bool active_rcu_lock;
+ u32 active_rcu_locks;
bool speculative;
bool in_sleepable;
@@ -509,6 +509,15 @@ struct bpf_map_ptr_state {
#define BPF_ALU_SANITIZE (BPF_ALU_SANITIZE_SRC | \
BPF_ALU_SANITIZE_DST)
+/*
+ * An array of BPF instructions.
+ * Primary usage: return value of bpf_insn_successors.
+ */
+struct bpf_iarray {
+ int cnt;
+ u32 items[];
+};
+
struct bpf_insn_aux_data {
union {
enum bpf_reg_type ptr_type; /* pointer type for load/store insns */
@@ -540,6 +549,7 @@ struct bpf_insn_aux_data {
/* remember the offset of node field within type to rewrite */
u64 insert_off;
};
+ struct bpf_iarray *jt; /* jump table for gotox or bpf_tailcall call instruction */
struct btf_struct_meta *kptr_struct_meta;
u64 map_key_state; /* constant (32 bit) key tracking for maps */
int ctx_field_size; /* the ctx field size for load insn, maybe 0 */
@@ -548,7 +558,7 @@ struct bpf_insn_aux_data {
bool nospec_result; /* result is unsafe under speculation, nospec must follow */
bool zext_dst; /* this insn zero extends dst reg */
bool needs_zext; /* alu op needs to clear upper bits */
- bool storage_get_func_atomic; /* bpf_*_storage_get() with atomic memory alloc */
+ bool non_sleepable; /* helper/kfunc may be called from non-sleepable context */
bool is_iter_next; /* bpf_iter_<type>_next() kfunc call */
bool call_with_percpu_alloc_ptr; /* {this,per}_cpu_ptr() with prog percpu alloc */
u8 alu_state; /* used in combination with alu_limit */
@@ -642,6 +652,7 @@ struct bpf_subprog_info {
u32 start; /* insn idx of function entry point */
u32 linfo_idx; /* The idx to the main_prog->aux->linfo */
u32 postorder_start; /* The idx to the env->cfg.insn_postorder */
+ u32 exit_idx; /* Index of one of the BPF_EXIT instructions in this subprogram */
u16 stack_depth; /* max. stack depth used by this function */
u16 stack_extra;
/* offsets in range [stack_depth .. fastcall_stack_off)
@@ -659,9 +670,9 @@ struct bpf_subprog_info {
bool keep_fastcall_stack: 1;
bool changes_pkt_data: 1;
bool might_sleep: 1;
+ u8 arg_cnt:3;
enum priv_stack_mode priv_stack_mode;
- u8 arg_cnt;
struct bpf_subprog_arg_info args[MAX_BPF_FUNC_REG_ARGS];
};
@@ -745,8 +756,10 @@ struct bpf_verifier_env {
struct list_head free_list; /* list of struct bpf_verifier_state_list */
struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */
struct btf_mod_pair used_btfs[MAX_USED_BTFS]; /* array of BTF's used by BPF program */
+ struct bpf_map *insn_array_maps[MAX_USED_MAPS]; /* array of INSN_ARRAY map's to be relocated */
u32 used_map_cnt; /* number of used maps */
u32 used_btf_cnt; /* number of used BTF objects */
+ u32 insn_array_map_cnt; /* number of used maps of type BPF_MAP_TYPE_INSN_ARRAY */
u32 id_gen; /* used to generate unique reg IDs */
u32 hidden_subprog_cnt; /* number of hidden subprogs */
int exception_callback_subprog;
@@ -828,6 +841,8 @@ struct bpf_verifier_env {
/* array of pointers to bpf_scc_info indexed by SCC id */
struct bpf_scc_info **scc_info;
u32 scc_cnt;
+ struct bpf_iarray *succ;
+ struct bpf_iarray *gotox_tmp_buf;
};
static inline struct bpf_func_info_aux *subprog_aux(struct bpf_verifier_env *env, int subprog)
@@ -1038,6 +1053,13 @@ static inline bool bpf_stack_narrow_access_ok(int off, int fill_size, int spill_
return !(off % BPF_REG_SIZE);
}
+static inline bool insn_is_gotox(struct bpf_insn *insn)
+{
+ return BPF_CLASS(insn->code) == BPF_JMP &&
+ BPF_OP(insn->code) == BPF_JA &&
+ BPF_SRC(insn->code) == BPF_X;
+}
+
const char *reg_type_str(struct bpf_verifier_env *env, enum bpf_reg_type type);
const char *dynptr_type_str(enum bpf_dynptr_type type);
const char *iter_type_str(const struct btf *btf, u32 btf_id);
@@ -1050,7 +1072,7 @@ void print_insn_state(struct bpf_verifier_env *env, const struct bpf_verifier_st
struct bpf_subprog_info *bpf_find_containing_subprog(struct bpf_verifier_env *env, int off);
int bpf_jmp_offset(struct bpf_insn *insn);
-int bpf_insn_successors(struct bpf_prog *prog, u32 idx, u32 succ[2]);
+struct bpf_iarray *bpf_insn_successors(struct bpf_verifier_env *env, u32 idx);
void bpf_fmt_stack_mask(char *buf, ssize_t buf_sz, u64 stack_mask);
bool bpf_calls_callback(struct bpf_verifier_env *env, int insn_idx);
diff --git a/include/linux/buildid.h b/include/linux/buildid.h
index 014a88c410739e..831c1b4b626c87 100644
--- a/include/linux/buildid.h
+++ b/include/linux/buildid.h
@@ -18,4 +18,29 @@ void init_vmlinux_build_id(void);
static inline void init_vmlinux_build_id(void) { }
#endif
+struct freader {
+ void *buf;
+ u32 buf_sz;
+ int err;
+ union {
+ struct {
+ struct file *file;
+ struct folio *folio;
+ void *addr;
+ loff_t folio_off;
+ bool may_fault;
+ };
+ struct {
+ const char *data;
+ u64 data_sz;
+ };
+ };
+};
+
+void freader_init_from_file(struct freader *r, void *buf, u32 buf_sz,
+ struct file *file, bool may_fault);
+void freader_init_from_mem(struct freader *r, const char *data, u64 data_sz);
+const void *freader_fetch(struct freader *r, loff_t file_off, size_t sz);
+void freader_cleanup(struct freader *r);
+
#endif
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 973233b82dc1fd..569de3b14279af 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -712,11 +712,13 @@ static __always_inline u32 __bpf_prog_run(const struct bpf_prog *prog,
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
duration = sched_clock() - start;
- stats = this_cpu_ptr(prog->stats);
- flags = u64_stats_update_begin_irqsave(&stats->syncp);
- u64_stats_inc(&stats->cnt);
- u64_stats_add(&stats->nsecs, duration);
- u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ if (likely(prog->stats)) {
+ stats = this_cpu_ptr(prog->stats);
+ flags = u64_stats_update_begin_irqsave(&stats->syncp);
+ u64_stats_inc(&stats->cnt);
+ u64_stats_add(&stats->nsecs, duration);
+ u64_stats_update_end_irqrestore(&stats->syncp, flags);
+ }
} else {
ret = dfunc(ctx, prog->insnsi, prog->bpf_func);
}
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index 07f8c309e43273..015dd1049bea05 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -359,6 +359,7 @@ enum {
FTRACE_OPS_FL_DIRECT = BIT(17),
FTRACE_OPS_FL_SUBOP = BIT(18),
FTRACE_OPS_FL_GRAPH = BIT(19),
+ FTRACE_OPS_FL_JMP = BIT(20),
};
#ifndef CONFIG_DYNAMIC_FTRACE_WITH_ARGS
@@ -577,6 +578,38 @@ static inline void arch_ftrace_set_direct_caller(struct ftrace_regs *fregs,
unsigned long addr) { }
#endif /* CONFIG_DYNAMIC_FTRACE_WITH_DIRECT_CALLS */
+#ifdef CONFIG_DYNAMIC_FTRACE_WITH_JMP
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+ return addr & 1;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+ return addr | 1UL;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+ return addr & ~1UL;
+}
+#else
+static inline bool ftrace_is_jmp(unsigned long addr)
+{
+ return false;
+}
+
+static inline unsigned long ftrace_jmp_set(unsigned long addr)
+{
+ return addr;
+}
+
+static inline unsigned long ftrace_jmp_get(unsigned long addr)
+{
+ return addr;
+}
+#endif /* CONFIG_DYNAMIC_FTRACE_WITH_JMP */
+
#ifdef CONFIG_STACK_TRACER
int stack_trace_sysctl(const struct ctl_table *table, int write, void *buffer,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6829936d33f58e..f5713f59ac10a0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1026,6 +1026,7 @@ enum bpf_map_type {
BPF_MAP_TYPE_USER_RINGBUF,
BPF_MAP_TYPE_CGRP_STORAGE,
BPF_MAP_TYPE_ARENA,
+ BPF_MAP_TYPE_INSN_ARRAY,
__MAX_BPF_MAP_TYPE
};
@@ -1430,6 +1431,9 @@ enum {
/* Do not translate kernel bpf_arena pointers to user pointers */
BPF_F_NO_USER_CONV = (1U << 18),
+
+/* Enable BPF ringbuf overwrite mode */
+ BPF_F_RB_OVERWRITE = (1U << 19),
};
/* Flags for BPF_PROG_QUERY. */
@@ -5618,7 +5622,7 @@ union bpf_attr {
* Return
* *sk* if casting is valid, or **NULL** otherwise.
*
- * long bpf_dynptr_from_mem(void *data, u32 size, u64 flags, struct bpf_dynptr *ptr)
+ * long bpf_dynptr_from_mem(void *data, u64 size, u64 flags, struct bpf_dynptr *ptr)
* Description
* Get a dynptr to local memory *data*.
*
@@ -5661,7 +5665,7 @@ union bpf_attr {
* Return
* Nothing. Always succeeds.
*
- * long bpf_dynptr_read(void *dst, u32 len, const struct bpf_dynptr *src, u32 offset, u64 flags)
+ * long bpf_dynptr_read(void *dst, u64 len, const struct bpf_dynptr *src, u64 offset, u64 flags)
* Description
* Read *len* bytes from *src* into *dst*, starting from *offset*
* into *src*.
@@ -5671,7 +5675,7 @@ union bpf_attr {
* of *src*'s data, -EINVAL if *src* is an invalid dynptr or if
* *flags* is not 0.
*
- * long bpf_dynptr_write(const struct bpf_dynptr *dst, u32 offset, void *src, u32 len, u64 flags)
+ * long bpf_dynptr_write(const struct bpf_dynptr *dst, u64 offset, void *src, u64 len, u64 flags)
* Description
* Write *len* bytes from *src* into *dst*, starting from *offset*
* into *dst*.
@@ -5692,7 +5696,7 @@ union bpf_attr {
* is a read-only dynptr or if *flags* is not correct. For skb-type dynptrs,
* other errors correspond to errors returned by **bpf_skb_store_bytes**\ ().
*
- * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u32 offset, u32 len)
+ * void *bpf_dynptr_data(const struct bpf_dynptr *ptr, u64 offset, u64 len)
* Description
* Get a pointer to the underlying dynptr data.
*
@@ -6231,6 +6235,7 @@ enum {
BPF_RB_RING_SIZE = 1,
BPF_RB_CONS_POS = 2,
BPF_RB_PROD_POS = 3,
+ BPF_RB_OVERWRITE_POS = 4,
};
/* BPF ring buffer constants */
@@ -7645,4 +7650,24 @@ enum bpf_kfunc_flags {
BPF_F_PAD_ZEROS = (1ULL << 0),
};
+/*
+ * Values of a BPF_MAP_TYPE_INSN_ARRAY entry must be of this type.
+ *
+ * Before the map is used the orig_off field should point to an
+ * instruction inside the program being loaded. The other fields
+ * must be set to 0.
+ *
+ * After the program is loaded, the xlated_off will be adjusted
+ * by the verifier to point to the index of the original instruction
+ * in the xlated program. If the instruction is deleted, it will
+ * be set to (u32)-1. The jitted_off will be set to the corresponding
+ * offset in the jitted image of the program.
+ */
+struct bpf_insn_array_value {
+ __u32 orig_off;
+ __u32 xlated_off;
+ __u32 jitted_off;
+ __u32 :32;
+};
+
#endif /* _UAPI__LINUX_BPF_H__ */