summaryrefslogtreecommitdiff
path: root/kernel/bpf/verifier.c
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r--kernel/bpf/verifier.c1080
1 files changed, 784 insertions, 296 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fba52d9ec8fc..3923c570070b 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -21,6 +21,7 @@
#include <linux/ctype.h>
#include <linux/error-injection.h>
#include <linux/bpf_lsm.h>
+#include <linux/btf_ids.h>
#include "disasm.h"
@@ -237,7 +238,6 @@ struct bpf_call_arg_meta {
u64 msize_max_value;
int ref_obj_id;
int func_id;
- u32 btf_id;
};
struct btf *btf_vmlinux;
@@ -435,6 +435,15 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
return type == ARG_PTR_TO_SOCK_COMMON;
}
+static bool arg_type_may_be_null(enum bpf_arg_type type)
+{
+ return type == ARG_PTR_TO_MAP_VALUE_OR_NULL ||
+ type == ARG_PTR_TO_MEM_OR_NULL ||
+ type == ARG_PTR_TO_CTX_OR_NULL ||
+ type == ARG_PTR_TO_SOCKET_OR_NULL ||
+ type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+}
+
/* Determine whether the function releases some resources allocated by another
* function call. The first reference type argument will be assumed to be
* released by release_reference().
@@ -477,7 +486,12 @@ static bool is_acquire_function(enum bpf_func_id func_id,
static bool is_ptr_cast_function(enum bpf_func_id func_id)
{
return func_id == BPF_FUNC_tcp_sock ||
- func_id == BPF_FUNC_sk_fullsock;
+ func_id == BPF_FUNC_sk_fullsock ||
+ func_id == BPF_FUNC_skc_to_tcp_sock ||
+ func_id == BPF_FUNC_skc_to_tcp6_sock ||
+ func_id == BPF_FUNC_skc_to_udp6_sock ||
+ func_id == BPF_FUNC_skc_to_tcp_timewait_sock ||
+ func_id == BPF_FUNC_skc_to_tcp_request_sock;
}
/* string representation of 'enum bpf_reg_type' */
@@ -1489,6 +1503,13 @@ static int check_subprogs(struct bpf_verifier_env *env)
for (i = 0; i < insn_cnt; i++) {
u8 code = insn[i].code;
+ if (code == (BPF_JMP | BPF_CALL) &&
+ insn[i].imm == BPF_FUNC_tail_call &&
+ insn[i].src_reg != BPF_PSEUDO_CALL)
+ subprog[cur_subprog].has_tail_call = true;
+ if (BPF_CLASS(code) == BPF_LD &&
+ (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND))
+ subprog[cur_subprog].has_ld_abs = true;
if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32)
goto next;
if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL)
@@ -2625,11 +2646,18 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
#define MAX_PACKET_OFF 0xffff
+static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog)
+{
+ return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type;
+}
+
static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
const struct bpf_call_arg_meta *meta,
enum bpf_access_type t)
{
- switch (env->prog->type) {
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
+
+ switch (prog_type) {
/* Program types only with direct read access go here! */
case BPF_PROG_TYPE_LWT_IN:
case BPF_PROG_TYPE_LWT_OUT:
@@ -2970,10 +2998,37 @@ static int check_max_stack_depth(struct bpf_verifier_env *env)
int depth = 0, frame = 0, idx = 0, i = 0, subprog_end;
struct bpf_subprog_info *subprog = env->subprog_info;
struct bpf_insn *insn = env->prog->insnsi;
+ bool tail_call_reachable = false;
int ret_insn[MAX_CALL_FRAMES];
int ret_prog[MAX_CALL_FRAMES];
+ int j;
process_func:
+ /* protect against potential stack overflow that might happen when
+ * bpf2bpf calls get combined with tailcalls. Limit the caller's stack
+ * depth for such case down to 256 so that the worst case scenario
+ * would result in 8k stack size (32 which is tailcall limit * 256 =
+ * 8k).
+ *
+ * To get the idea what might happen, see an example:
+ * func1 -> sub rsp, 128
+ * subfunc1 -> sub rsp, 256
+ * tailcall1 -> add rsp, 256
+ * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320)
+ * subfunc2 -> sub rsp, 64
+ * subfunc22 -> sub rsp, 128
+ * tailcall2 -> add rsp, 128
+ * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416)
+ *
+ * tailcall will unwind the current stack frame but it will not get rid
+ * of caller's stack as shown on the example above.
+ */
+ if (idx && subprog[idx].has_tail_call && depth >= 256) {
+ verbose(env,
+ "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n",
+ depth);
+ return -EACCES;
+ }
/* round up to 32-bytes, since this is granularity
* of interpreter stack size
*/
@@ -3002,6 +3057,10 @@ continue_func:
i);
return -EFAULT;
}
+
+ if (subprog[idx].has_tail_call)
+ tail_call_reachable = true;
+
frame++;
if (frame >= MAX_CALL_FRAMES) {
verbose(env, "the call stack of %d frames is too deep !\n",
@@ -3010,6 +3069,15 @@ continue_func:
}
goto process_func;
}
+ /* if tail call got detected across bpf2bpf calls then mark each of the
+ * currently present subprog frames as tail call reachable subprogs;
+ * this info will be utilized by JIT so that we will be preserving the
+ * tail call counter throughout bpf2bpf calls combined with tailcalls
+ */
+ if (tail_call_reachable)
+ for (j = 0; j < frame; j++)
+ subprog[ret_prog[j]].tail_call_reachable = true;
+
/* end of for() loop means the last insn of the 'subprog'
* was reached. Doesn't matter whether it was JA or EXIT
*/
@@ -3585,18 +3653,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
struct bpf_func_state *state = func(env, reg);
int err, min_off, max_off, i, j, slot, spi;
- if (reg->type != PTR_TO_STACK) {
- /* Allow zero-byte read from NULL, regardless of pointer type */
- if (zero_size_allowed && access_size == 0 &&
- register_is_null(reg))
- return 0;
-
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[reg->type],
- reg_type_str[PTR_TO_STACK]);
- return -EACCES;
- }
-
if (tnum_is_const(reg->var_off)) {
min_off = max_off = reg->var_off.value + reg->off;
err = __check_stack_boundary(env, regno, min_off, access_size,
@@ -3741,9 +3797,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
access_size, zero_size_allowed,
"rdwr",
&env->prog->aux->max_rdwr_access);
- default: /* scalar_value|ptr_to_stack or invalid ptr */
+ case PTR_TO_STACK:
return check_stack_boundary(env, regno, access_size,
zero_size_allowed, meta);
+ default: /* scalar_value or invalid ptr */
+ /* Allow zero-byte read from NULL, regardless of pointer type */
+ if (zero_size_allowed && access_size == 0 &&
+ register_is_null(reg))
+ return 0;
+
+ verbose(env, "R%d type=%s expected=%s\n", regno,
+ reg_type_str[reg->type],
+ reg_type_str[PTR_TO_STACK]);
+ return -EACCES;
}
}
@@ -3775,10 +3841,6 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno,
struct bpf_map *map = reg->map_ptr;
u64 val = reg->var_off.value;
- if (reg->type != PTR_TO_MAP_VALUE) {
- verbose(env, "R%d is not a pointer to map_value\n", regno);
- return -EINVAL;
- }
if (!is_const) {
verbose(env,
"R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n",
@@ -3845,12 +3907,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
type == ARG_CONST_SIZE_OR_ZERO;
}
-static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
-{
- return type == ARG_PTR_TO_ALLOC_MEM ||
- type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
-}
-
static bool arg_type_is_alloc_size(enum bpf_arg_type type)
{
return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
@@ -3872,14 +3928,188 @@ static int int_ptr_type_to_size(enum bpf_arg_type type)
return -EINVAL;
}
+static int resolve_map_arg_type(struct bpf_verifier_env *env,
+ const struct bpf_call_arg_meta *meta,
+ enum bpf_arg_type *arg_type)
+{
+ if (!meta->map_ptr) {
+ /* kernel subsystem misconfigured verifier */
+ verbose(env, "invalid map_ptr to access map->type\n");
+ return -EACCES;
+ }
+
+ switch (meta->map_ptr->map_type) {
+ case BPF_MAP_TYPE_SOCKMAP:
+ case BPF_MAP_TYPE_SOCKHASH:
+ if (*arg_type == ARG_PTR_TO_MAP_VALUE) {
+ *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON;
+ } else {
+ verbose(env, "invalid arg_type for sockmap/sockhash\n");
+ return -EINVAL;
+ }
+ break;
+
+ default:
+ break;
+ }
+ return 0;
+}
+
+struct bpf_reg_types {
+ const enum bpf_reg_type types[10];
+ u32 *btf_id;
+};
+
+static const struct bpf_reg_types map_key_value_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types sock_types = {
+ .types = {
+ PTR_TO_SOCK_COMMON,
+ PTR_TO_SOCKET,
+ PTR_TO_TCP_SOCK,
+ PTR_TO_XDP_SOCK,
+ },
+};
+
+static const struct bpf_reg_types btf_id_sock_common_types = {
+ .types = {
+ PTR_TO_SOCK_COMMON,
+ PTR_TO_SOCKET,
+ PTR_TO_TCP_SOCK,
+ PTR_TO_XDP_SOCK,
+ PTR_TO_BTF_ID,
+ },
+ .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON],
+};
+
+static const struct bpf_reg_types mem_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ PTR_TO_MEM,
+ PTR_TO_RDONLY_BUF,
+ PTR_TO_RDWR_BUF,
+ },
+};
+
+static const struct bpf_reg_types int_ptr_types = {
+ .types = {
+ PTR_TO_STACK,
+ PTR_TO_PACKET,
+ PTR_TO_PACKET_META,
+ PTR_TO_MAP_VALUE,
+ },
+};
+
+static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } };
+static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } };
+static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } };
+static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } };
+static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } };
+static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } };
+static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } };
+
+static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = {
+ [ARG_PTR_TO_MAP_KEY] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types,
+ [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types,
+ [ARG_CONST_SIZE] = &scalar_types,
+ [ARG_CONST_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types,
+ [ARG_CONST_MAP_PTR] = &const_map_ptr_types,
+ [ARG_PTR_TO_CTX] = &context_types,
+ [ARG_PTR_TO_CTX_OR_NULL] = &context_types,
+ [ARG_PTR_TO_SOCK_COMMON] = &sock_types,
+ [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types,
+ [ARG_PTR_TO_SOCKET] = &fullsock_types,
+ [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types,
+ [ARG_PTR_TO_BTF_ID] = &btf_ptr_types,
+ [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types,
+ [ARG_PTR_TO_MEM] = &mem_types,
+ [ARG_PTR_TO_MEM_OR_NULL] = &mem_types,
+ [ARG_PTR_TO_UNINIT_MEM] = &mem_types,
+ [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types,
+ [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types,
+ [ARG_PTR_TO_INT] = &int_ptr_types,
+ [ARG_PTR_TO_LONG] = &int_ptr_types,
+};
+
+static int check_reg_type(struct bpf_verifier_env *env, u32 regno,
+ enum bpf_arg_type arg_type,
+ const u32 *arg_btf_id)
+{
+ struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
+ enum bpf_reg_type expected, type = reg->type;
+ const struct bpf_reg_types *compatible;
+ int i, j;
+
+ compatible = compatible_reg_types[arg_type];
+ if (!compatible) {
+ verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type);
+ return -EFAULT;
+ }
+
+ for (i = 0; i < ARRAY_SIZE(compatible->types); i++) {
+ expected = compatible->types[i];
+ if (expected == NOT_INIT)
+ break;
+
+ if (type == expected)
+ goto found;
+ }
+
+ verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]);
+ for (j = 0; j + 1 < i; j++)
+ verbose(env, "%s, ", reg_type_str[compatible->types[j]]);
+ verbose(env, "%s\n", reg_type_str[compatible->types[j]]);
+ return -EACCES;
+
+found:
+ if (type == PTR_TO_BTF_ID) {
+ if (!arg_btf_id) {
+ if (!compatible->btf_id) {
+ verbose(env, "verifier internal error: missing arg compatible BTF ID\n");
+ return -EFAULT;
+ }
+ arg_btf_id = compatible->btf_id;
+ }
+
+ if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id,
+ *arg_btf_id)) {
+ verbose(env, "R%d is of type %s but %s is expected\n",
+ regno, kernel_type_name(reg->btf_id),
+ kernel_type_name(*arg_btf_id));
+ return -EACCES;
+ }
+
+ if (!tnum_is_const(reg->var_off) || reg->var_off.value) {
+ verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
+ regno);
+ return -EACCES;
+ }
+ }
+
+ return 0;
+}
+
static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
struct bpf_call_arg_meta *meta,
const struct bpf_func_proto *fn)
{
u32 regno = BPF_REG_1 + arg;
struct bpf_reg_state *regs = cur_regs(env), *reg = &regs[regno];
- enum bpf_reg_type expected_type, type = reg->type;
enum bpf_arg_type arg_type = fn->arg_type[arg];
+ enum bpf_reg_type type = reg->type;
int err = 0;
if (arg_type == ARG_DONTCARE)
@@ -3904,120 +4134,32 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EACCES;
}
- if (arg_type == ARG_PTR_TO_MAP_KEY ||
- arg_type == ARG_PTR_TO_MAP_VALUE ||
+ if (arg_type == ARG_PTR_TO_MAP_VALUE ||
arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE ||
arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) {
- expected_type = PTR_TO_STACK;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_SIZE ||
- arg_type == ARG_CONST_SIZE_OR_ZERO ||
- arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
- expected_type = SCALAR_VALUE;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_CONST_MAP_PTR) {
- expected_type = CONST_PTR_TO_MAP;
- if (type != expected_type)
- goto err_type;
- } else if (arg_type == ARG_PTR_TO_CTX ||
- arg_type == ARG_PTR_TO_CTX_OR_NULL) {
- expected_type = PTR_TO_CTX;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_CTX_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- err = check_ctx_reg(env, reg, regno);
- if (err < 0)
- return err;
- }
- } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) {
- expected_type = PTR_TO_SOCK_COMMON;
- /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */
- if (!type_is_sk_pointer(type))
- goto err_type;
- if (reg->ref_obj_id) {
- if (meta->ref_obj_id) {
- verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
- regno, reg->ref_obj_id,
- meta->ref_obj_id);
- return -EFAULT;
- }
- meta->ref_obj_id = reg->ref_obj_id;
- }
- } else if (arg_type == ARG_PTR_TO_SOCKET ||
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL) {
- expected_type = PTR_TO_SOCKET;
- if (!(register_is_null(reg) &&
- arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) {
- if (type != expected_type)
- goto err_type;
- }
- } else if (arg_type == ARG_PTR_TO_BTF_ID) {
- expected_type = PTR_TO_BTF_ID;
- if (type != expected_type)
- goto err_type;
- if (!fn->check_btf_id) {
- if (reg->btf_id != meta->btf_id) {
- verbose(env, "Helper has type %s got %s in R%d\n",
- kernel_type_name(meta->btf_id),
- kernel_type_name(reg->btf_id), regno);
-
- return -EACCES;
- }
- } else if (!fn->check_btf_id(reg->btf_id, arg)) {
- verbose(env, "Helper does not support %s in R%d\n",
- kernel_type_name(reg->btf_id), regno);
+ err = resolve_map_arg_type(env, meta, &arg_type);
+ if (err)
+ return err;
+ }
- return -EACCES;
- }
- if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) {
- verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n",
- regno);
- return -EACCES;
- }
- } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
- if (meta->func_id == BPF_FUNC_spin_lock) {
- if (process_spin_lock(env, regno, true))
- return -EACCES;
- } else if (meta->func_id == BPF_FUNC_spin_unlock) {
- if (process_spin_lock(env, regno, false))
- return -EACCES;
- } else {
- verbose(env, "verifier internal error\n");
- return -EFAULT;
- }
- } else if (arg_type_is_mem_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- /* One exception here. In case function allows for NULL to be
- * passed in as argument, it's a SCALAR_VALUE type. Final test
- * happens during stack boundary checking.
+ if (register_is_null(reg) && arg_type_may_be_null(arg_type))
+ /* A NULL register has a SCALAR_VALUE type, so skip
+ * type checking.
*/
- if (register_is_null(reg) &&
- (arg_type == ARG_PTR_TO_MEM_OR_NULL ||
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
- /* final test in check_stack_boundary() */;
- else if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != PTR_TO_MEM &&
- type != PTR_TO_RDONLY_BUF &&
- type != PTR_TO_RDWR_BUF &&
- type != expected_type)
- goto err_type;
- meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
- } else if (arg_type_is_alloc_mem_ptr(arg_type)) {
- expected_type = PTR_TO_MEM;
- if (register_is_null(reg) &&
- arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
- /* final test in check_stack_boundary() */;
- else if (type != expected_type)
- goto err_type;
+ goto skip_type_check;
+
+ err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]);
+ if (err)
+ return err;
+
+ if (type == PTR_TO_CTX) {
+ err = check_ctx_reg(env, reg, regno);
+ if (err < 0)
+ return err;
+ }
+
+skip_type_check:
+ if (reg->ref_obj_id) {
if (meta->ref_obj_id) {
verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
regno, reg->ref_obj_id,
@@ -4025,15 +4167,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
return -EFAULT;
}
meta->ref_obj_id = reg->ref_obj_id;
- } else if (arg_type_is_int_ptr(arg_type)) {
- expected_type = PTR_TO_STACK;
- if (!type_is_pkt_pointer(type) &&
- type != PTR_TO_MAP_VALUE &&
- type != expected_type)
- goto err_type;
- } else {
- verbose(env, "unsupported arg_type %d\n", arg_type);
- return -EFAULT;
}
if (arg_type == ARG_CONST_MAP_PTR) {
@@ -4072,6 +4205,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
err = check_helper_mem_access(env, regno,
meta->map_ptr->value_size, false,
meta);
+ } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) {
+ if (meta->func_id == BPF_FUNC_spin_lock) {
+ if (process_spin_lock(env, regno, true))
+ return -EACCES;
+ } else if (meta->func_id == BPF_FUNC_spin_unlock) {
+ if (process_spin_lock(env, regno, false))
+ return -EACCES;
+ } else {
+ verbose(env, "verifier internal error\n");
+ return -EFAULT;
+ }
+ } else if (arg_type_is_mem_ptr(arg_type)) {
+ /* The access to this pointer is only checked when we hit the
+ * next is_mem_size argument below.
+ */
+ meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM);
} else if (arg_type_is_mem_size(arg_type)) {
bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO);
@@ -4137,10 +4286,43 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg,
}
return err;
-err_type:
- verbose(env, "R%d type=%s expected=%s\n", regno,
- reg_type_str[type], reg_type_str[expected_type]);
- return -EACCES;
+}
+
+static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id)
+{
+ enum bpf_attach_type eatype = env->prog->expected_attach_type;
+ enum bpf_prog_type type = resolve_prog_type(env->prog);
+
+ if (func_id != BPF_FUNC_map_update_elem)
+ return false;
+
+ /* It's not possible to get access to a locked struct sock in these
+ * contexts, so updating is safe.
+ */
+ switch (type) {
+ case BPF_PROG_TYPE_TRACING:
+ if (eatype == BPF_TRACE_ITER)
+ return true;
+ break;
+ case BPF_PROG_TYPE_SOCKET_FILTER:
+ case BPF_PROG_TYPE_SCHED_CLS:
+ case BPF_PROG_TYPE_SCHED_ACT:
+ case BPF_PROG_TYPE_XDP:
+ case BPF_PROG_TYPE_SK_REUSEPORT:
+ case BPF_PROG_TYPE_FLOW_DISSECTOR:
+ case BPF_PROG_TYPE_SK_LOOKUP:
+ return true;
+ default:
+ break;
+ }
+
+ verbose(env, "cannot update sockmap in this context\n");
+ return false;
+}
+
+static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env)
+{
+ return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64);
}
static int check_map_func_compatibility(struct bpf_verifier_env *env,
@@ -4214,7 +4396,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_map &&
func_id != BPF_FUNC_sk_select_reuseport &&
- func_id != BPF_FUNC_map_lookup_elem)
+ func_id != BPF_FUNC_map_lookup_elem &&
+ !may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_SOCKHASH:
@@ -4223,7 +4406,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_map_delete_elem &&
func_id != BPF_FUNC_msg_redirect_hash &&
func_id != BPF_FUNC_sk_select_reuseport &&
- func_id != BPF_FUNC_map_lookup_elem)
+ func_id != BPF_FUNC_map_lookup_elem &&
+ !may_update_sockmap(env, func_id))
goto error;
break;
case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@@ -4242,6 +4426,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
func_id != BPF_FUNC_sk_storage_delete)
goto error;
break;
+ case BPF_MAP_TYPE_INODE_STORAGE:
+ if (func_id != BPF_FUNC_inode_storage_get &&
+ func_id != BPF_FUNC_inode_storage_delete)
+ goto error;
+ break;
default:
break;
}
@@ -4251,8 +4440,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
case BPF_FUNC_tail_call:
if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY)
goto error;
- if (env->subprog_cnt > 1) {
- verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n");
+ if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) {
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
return -EINVAL;
}
break;
@@ -4315,6 +4504,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
if (map->map_type != BPF_MAP_TYPE_SK_STORAGE)
goto error;
break;
+ case BPF_FUNC_inode_storage_get:
+ case BPF_FUNC_inode_storage_delete:
+ if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE)
+ goto error;
+ break;
default:
break;
}
@@ -4402,10 +4596,26 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
return count <= 1;
}
+static bool check_btf_id_ok(const struct bpf_func_proto *fn)
+{
+ int i;
+
+ for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) {
+ if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i])
+ return false;
+
+ if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i])
+ return false;
+ }
+
+ return true;
+}
+
static int check_func_proto(const struct bpf_func_proto *fn, int func_id)
{
return check_raw_mode_ok(fn) &&
check_arg_pair_ok(fn) &&
+ check_btf_id_ok(fn) &&
check_refcount_ok(fn, func_id) ? 0 : -EINVAL;
}
@@ -4775,6 +4985,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
return -EINVAL;
}
+ if (fn->allowed && !fn->allowed(env->prog)) {
+ verbose(env, "helper call is not allowed in probe\n");
+ return -EINVAL;
+ }
+
/* With LD_ABS/IND some JITs save/restore skb from r1. */
changes_data = bpf_helper_changes_pkt_data(fn->func);
if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) {
@@ -4796,11 +5011,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
meta.func_id = func_id;
/* check args */
for (i = 0; i < 5; i++) {
- if (!fn->check_btf_id) {
- err = btf_resolve_helper_id(&env->log, fn, i);
- if (err > 0)
- meta.btf_id = err;
- }
err = check_func_arg(env, i, &meta, fn);
if (err)
return err;
@@ -5219,6 +5429,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
dst, reg_type_str[ptr_reg->type]);
return -EACCES;
case CONST_PTR_TO_MAP:
+ /* smin_val represents the known value */
+ if (known && smin_val == 0 && opcode == BPF_ADD)
+ break;
+ /* fall-through */
case PTR_TO_PACKET_END:
case PTR_TO_SOCKET:
case PTR_TO_SOCKET_OR_NULL:
@@ -5634,8 +5848,7 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg,
u64 umax_val = src_reg->umax_value;
if (src_known && dst_known) {
- __mark_reg_known(dst_reg, dst_reg->var_off.value &
- src_reg->var_off.value);
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
return;
}
@@ -5705,8 +5918,7 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
u64 umin_val = src_reg->umin_value;
if (src_known && dst_known) {
- __mark_reg_known(dst_reg, dst_reg->var_off.value |
- src_reg->var_off.value);
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
return;
}
@@ -5732,6 +5944,67 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg,
__update_reg_bounds(dst_reg);
}
+static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ bool src_known = tnum_subreg_is_const(src_reg->var_off);
+ bool dst_known = tnum_subreg_is_const(dst_reg->var_off);
+ struct tnum var32_off = tnum_subreg(dst_reg->var_off);
+ s32 smin_val = src_reg->s32_min_value;
+
+ /* Assuming scalar64_min_max_xor will be called so it is safe
+ * to skip updating register for known case.
+ */
+ if (src_known && dst_known)
+ return;
+
+ /* We get both minimum and maximum from the var32_off. */
+ dst_reg->u32_min_value = var32_off.value;
+ dst_reg->u32_max_value = var32_off.value | var32_off.mask;
+
+ if (dst_reg->s32_min_value >= 0 && smin_val >= 0) {
+ /* XORing two positive sign numbers gives a positive,
+ * so safe to cast u32 result into s32.
+ */
+ dst_reg->s32_min_value = dst_reg->u32_min_value;
+ dst_reg->s32_max_value = dst_reg->u32_max_value;
+ } else {
+ dst_reg->s32_min_value = S32_MIN;
+ dst_reg->s32_max_value = S32_MAX;
+ }
+}
+
+static void scalar_min_max_xor(struct bpf_reg_state *dst_reg,
+ struct bpf_reg_state *src_reg)
+{
+ bool src_known = tnum_is_const(src_reg->var_off);
+ bool dst_known = tnum_is_const(dst_reg->var_off);
+ s64 smin_val = src_reg->smin_value;
+
+ if (src_known && dst_known) {
+ /* dst_reg->var_off.value has been updated earlier */
+ __mark_reg_known(dst_reg, dst_reg->var_off.value);
+ return;
+ }
+
+ /* We get both minimum and maximum from the var_off. */
+ dst_reg->umin_value = dst_reg->var_off.value;
+ dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask;
+
+ if (dst_reg->smin_value >= 0 && smin_val >= 0) {
+ /* XORing two positive sign numbers gives a positive,
+ * so safe to cast u64 result into s64.
+ */
+ dst_reg->smin_value = dst_reg->umin_value;
+ dst_reg->smax_value = dst_reg->umax_value;
+ } else {
+ dst_reg->smin_value = S64_MIN;
+ dst_reg->smax_value = S64_MAX;
+ }
+
+ __update_reg_bounds(dst_reg);
+}
+
static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg,
u64 umin_val, u64 umax_val)
{
@@ -6040,6 +6313,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
scalar32_min_max_or(dst_reg, &src_reg);
scalar_min_max_or(dst_reg, &src_reg);
break;
+ case BPF_XOR:
+ dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off);
+ scalar32_min_max_xor(dst_reg, &src_reg);
+ scalar_min_max_xor(dst_reg, &src_reg);
+ break;
case BPF_LSH:
if (umax_val >= insn_bitness) {
/* Shifts greater than 31 or 63 are undefined.
@@ -7287,7 +7565,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
u8 mode = BPF_MODE(insn->code);
int i, err;
- if (!may_access_skb(env->prog->type)) {
+ if (!may_access_skb(resolve_prog_type(env->prog))) {
verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n");
return -EINVAL;
}
@@ -7297,18 +7575,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn)
return -EINVAL;
}
- if (env->subprog_cnt > 1) {
- /* when program has LD_ABS insn JITs and interpreter assume
- * that r1 == ctx == skb which is not the case for callees
- * that can have arbitrary arguments. It's problematic
- * for main prog as well since JITs would need to analyze
- * all functions in order to make proper register save/restore
- * decisions in the main prog. Hence disallow LD_ABS with calls
- */
- verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n");
- return -EINVAL;
- }
-
if (insn->dst_reg != BPF_REG_0 || insn->off != 0 ||
BPF_SIZE(insn->code) == BPF_DW ||
(mode == BPF_ABS && insn->src_reg != BPF_REG_0)) {
@@ -7375,11 +7641,12 @@ static int check_return_code(struct bpf_verifier_env *env)
const struct bpf_prog *prog = env->prog;
struct bpf_reg_state *reg;
struct tnum range = tnum_range(0, 1);
+ enum bpf_prog_type prog_type = resolve_prog_type(env->prog);
int err;
/* LSM and struct_ops func-ptr's return type could be "void" */
- if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS ||
- env->prog->type == BPF_PROG_TYPE_LSM) &&
+ if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS ||
+ prog_type == BPF_PROG_TYPE_LSM) &&
!prog->aux->attach_func_proto->type)
return 0;
@@ -7398,7 +7665,7 @@ static int check_return_code(struct bpf_verifier_env *env)
return -EACCES;
}
- switch (env->prog->type) {
+ switch (prog_type) {
case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
@@ -7718,6 +7985,23 @@ err_free:
return ret;
}
+static int check_abnormal_return(struct bpf_verifier_env *env)
+{
+ int i;
+
+ for (i = 1; i < env->subprog_cnt; i++) {
+ if (env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ if (env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is not allowed in subprogs without BTF\n");
+ return -EINVAL;
+ }
+ }
+ return 0;
+}
+
/* The minimum supported BTF func info size */
#define MIN_BPF_FUNCINFO_SIZE 8
#define MAX_FUNCINFO_REC_SIZE 252
@@ -7726,20 +8010,24 @@ static int check_btf_func(struct bpf_verifier_env *env,
const union bpf_attr *attr,
union bpf_attr __user *uattr)
{
+ const struct btf_type *type, *func_proto, *ret_type;
u32 i, nfuncs, urec_size, min_size;
u32 krec_size = sizeof(struct bpf_func_info);
struct bpf_func_info *krecord;
struct bpf_func_info_aux *info_aux = NULL;
- const struct btf_type *type;
struct bpf_prog *prog;
const struct btf *btf;
void __user *urecord;
u32 prev_offset = 0;
+ bool scalar_return;
int ret = -ENOMEM;
nfuncs = attr->func_info_cnt;
- if (!nfuncs)
+ if (!nfuncs) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
if (nfuncs != env->subprog_cnt) {
verbose(env, "number of funcs in func_info doesn't match number of subprogs\n");
@@ -7787,25 +8075,23 @@ static int check_btf_func(struct bpf_verifier_env *env,
}
/* check insn_off */
+ ret = -EINVAL;
if (i == 0) {
if (krecord[i].insn_off) {
verbose(env,
"nonzero insn_off %u for the first func info record",
krecord[i].insn_off);
- ret = -EINVAL;
goto err_free;
}
} else if (krecord[i].insn_off <= prev_offset) {
verbose(env,
"same or smaller insn offset (%u) than previous func info record (%u)",
krecord[i].insn_off, prev_offset);
- ret = -EINVAL;
goto err_free;
}
if (env->subprog_info[i].start != krecord[i].insn_off) {
verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n");
- ret = -EINVAL;
goto err_free;
}
@@ -7814,10 +8100,26 @@ static int check_btf_func(struct bpf_verifier_env *env,
if (!type || !btf_type_is_func(type)) {
verbose(env, "invalid type id %d in func info",
krecord[i].type_id);
- ret = -EINVAL;
goto err_free;
}
info_aux[i].linkage = BTF_INFO_VLEN(type->info);
+
+ func_proto = btf_type_by_id(btf, type->type);
+ if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto)))
+ /* btf_func_check() already verified it during BTF load */
+ goto err_free;
+ ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL);
+ scalar_return =
+ btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type);
+ if (i && !scalar_return && env->subprog_info[i].has_ld_abs) {
+ verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+ if (i && !scalar_return && env->subprog_info[i].has_tail_call) {
+ verbose(env, "tail_call is only allowed in functions that return 'int'.\n");
+ goto err_free;
+ }
+
prev_offset = krecord[i].insn_off;
urecord += urec_size;
}
@@ -7978,8 +8280,11 @@ static int check_btf_info(struct bpf_verifier_env *env,
struct btf *btf;
int err;
- if (!attr->func_info_cnt && !attr->line_info_cnt)
+ if (!attr->func_info_cnt && !attr->line_info_cnt) {
+ if (check_abnormal_return(env))
+ return -EINVAL;
return 0;
+ }
btf = btf_get_by_fd(attr->prog_btf_fd);
if (IS_ERR(btf))
@@ -9154,6 +9459,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
struct bpf_prog *prog)
{
+ enum bpf_prog_type prog_type = resolve_prog_type(prog);
/*
* Validate that trace type programs use preallocated hash maps.
*
@@ -9171,8 +9477,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
* now, but warnings are emitted so developers are made aware of
* the unsafety and can fix their programs before this is enforced.
*/
- if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) {
- if (prog->type == BPF_PROG_TYPE_PERF_EVENT) {
+ if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) {
+ if (prog_type == BPF_PROG_TYPE_PERF_EVENT) {
verbose(env, "perf_event programs can only use preallocated hash map\n");
return -EINVAL;
}
@@ -9184,8 +9490,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n");
}
- if ((is_tracing_prog_type(prog->type) ||
- prog->type == BPF_PROG_TYPE_SOCKET_FILTER) &&
+ if ((is_tracing_prog_type(prog_type) ||
+ prog_type == BPF_PROG_TYPE_SOCKET_FILTER) &&
map_value_has_spin_lock(map)) {
verbose(env, "tracing progs cannot use bpf_spin_lock yet\n");
return -EINVAL;
@@ -9202,6 +9508,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env,
return -EINVAL;
}
+ if (prog->aux->sleepable)
+ switch (map->map_type) {
+ case BPF_MAP_TYPE_HASH:
+ case BPF_MAP_TYPE_LRU_HASH:
+ case BPF_MAP_TYPE_ARRAY:
+ if (!is_preallocated_map(map)) {
+ verbose(env,
+ "Sleepable programs can only use preallocated hash maps\n");
+ return -EINVAL;
+ }
+ break;
+ default:
+ verbose(env,
+ "Sleepable programs can only use array and hash maps\n");
+ return -EINVAL;
+ }
+
return 0;
}
@@ -9436,6 +9759,18 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len
}
}
+static void adjust_poke_descs(struct bpf_prog *prog, u32 len)
+{
+ struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab;
+ int i, sz = prog->aux->size_poke_tab;
+ struct bpf_jit_poke_descriptor *desc;
+
+ for (i = 0; i < sz; i++) {
+ desc = &tab[i];
+ desc->insn_idx += len - 1;
+ }
+}
+
static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off,
const struct bpf_insn *patch, u32 len)
{
@@ -9452,6 +9787,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of
if (adjust_insn_aux_data(env, new_prog, off, len))
return NULL;
adjust_subprog_starts(env, off, len);
+ adjust_poke_descs(new_prog, len);
return new_prog;
}
@@ -9897,7 +10233,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env)
insn->code = BPF_LDX | BPF_PROBE_MEM |
BPF_SIZE((insn)->code);
env->prog->aux->num_exentries++;
- } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) {
+ } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) {
verbose(env, "Writes through BTF pointers are not allowed\n");
return -EINVAL;
}
@@ -9982,6 +10318,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
{
struct bpf_prog *prog = env->prog, **func, *tmp;
int i, j, subprog_start, subprog_end = 0, len, subprog;
+ struct bpf_map *map_ptr;
struct bpf_insn *insn;
void *old_bpf_func;
int err, num_exentries;
@@ -10049,6 +10386,31 @@ static int jit_subprogs(struct bpf_verifier_env *env)
func[i]->aux->btf = prog->aux->btf;
func[i]->aux->func_info = prog->aux->func_info;
+ for (j = 0; j < prog->aux->size_poke_tab; j++) {
+ u32 insn_idx = prog->aux->poke_tab[j].insn_idx;
+ int ret;
+
+ if (!(insn_idx >= subprog_start &&
+ insn_idx <= subprog_end))
+ continue;
+
+ ret = bpf_jit_add_poke_descriptor(func[i],
+ &prog->aux->poke_tab[j]);
+ if (ret < 0) {
+ verbose(env, "adding tail call poke descriptor failed\n");
+ goto out_free;
+ }
+
+ func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1;
+
+ map_ptr = func[i]->aux->poke_tab[ret].tail_call.map;
+ ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux);
+ if (ret < 0) {
+ verbose(env, "tracking tail call prog failed\n");
+ goto out_free;
+ }
+ }
+
/* Use bpf_prog_F_tag to indicate functions in stack traces.
* Long term would need debug info to populate names
*/
@@ -10067,6 +10429,7 @@ static int jit_subprogs(struct bpf_verifier_env *env)
num_exentries++;
}
func[i]->aux->num_exentries = num_exentries;
+ func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable;
func[i] = bpf_int_jit_compile(func[i]);
if (!func[i]->jited) {
err = -ENOTSUPP;
@@ -10074,6 +10437,19 @@ static int jit_subprogs(struct bpf_verifier_env *env)
}
cond_resched();
}
+
+ /* Untrack main program's aux structs so that during map_poke_run()
+ * we will not stumble upon the unfilled poke descriptors; each
+ * of the main program's poke descs got distributed across subprogs
+ * and got tracked onto map, so we are sure that none of them will
+ * be missed after the operation below
+ */
+ for (i = 0; i < prog->aux->size_poke_tab; i++) {
+ map_ptr = prog->aux->poke_tab[i].tail_call.map;
+
+ map_ptr->ops->map_poke_untrack(map_ptr, prog->aux);
+ }
+
/* at this point all bpf functions were successfully JITed
* now populate all bpf_calls with correct addresses and
* run last pass of JIT
@@ -10142,9 +10518,16 @@ static int jit_subprogs(struct bpf_verifier_env *env)
bpf_prog_free_unused_jited_linfo(prog);
return 0;
out_free:
- for (i = 0; i < env->subprog_cnt; i++)
- if (func[i])
- bpf_jit_free(func[i]);
+ for (i = 0; i < env->subprog_cnt; i++) {
+ if (!func[i])
+ continue;
+
+ for (j = 0; j < func[i]->aux->size_poke_tab; j++) {
+ map_ptr = func[i]->aux->poke_tab[j].tail_call.map;
+ map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux);
+ }
+ bpf_jit_free(func[i]);
+ }
kfree(func);
out_undo_insn:
/* cleanup main prog to be interpreted */
@@ -10178,6 +10561,13 @@ static int fixup_call_args(struct bpf_verifier_env *env)
return err;
}
#ifndef CONFIG_BPF_JIT_ALWAYS_ON
+ if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) {
+ /* When JIT fails the progs with bpf2bpf calls and tail_calls
+ * have to be rejected, since interpreter doesn't support them yet.
+ */
+ verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n");
+ return -EINVAL;
+ }
for (i = 0; i < prog->len; i++, insn++) {
if (insn->code != (BPF_JMP | BPF_CALL) ||
insn->src_reg != BPF_PSEUDO_CALL)
@@ -10341,8 +10731,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
* the program array.
*/
prog->cb_access = 1;
- env->prog->aux->stack_depth = MAX_BPF_STACK;
- env->prog->aux->max_pkt_offset = MAX_PACKET_OFF;
+ if (!allow_tail_call_in_subprogs(env))
+ prog->aux->stack_depth = MAX_BPF_STACK;
+ prog->aux->max_pkt_offset = MAX_PACKET_OFF;
/* mark bpf_tail_call as different opcode to avoid
* conditional branch in the interpeter for every normal
@@ -10362,6 +10753,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
.reason = BPF_POKE_REASON_TAIL_CALL,
.tail_call.map = BPF_MAP_PTR(aux->map_ptr_state),
.tail_call.key = bpf_map_key_immediate(aux),
+ .insn_idx = i + delta,
};
ret = bpf_jit_add_poke_descriptor(prog, &desc);
@@ -10810,59 +11202,79 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env)
}
#define SECURITY_PREFIX "security_"
-static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr)
+static int check_attach_modify_return(unsigned long addr, const char *func_name)
{
if (within_error_injection_list(addr) ||
- !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name,
- sizeof(SECURITY_PREFIX) - 1))
+ !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1))
return 0;
return -EINVAL;
}
-static int check_attach_btf_id(struct bpf_verifier_env *env)
+/* non exhaustive list of sleepable bpf_lsm_*() functions */
+BTF_SET_START(btf_sleepable_lsm_hooks)
+#ifdef CONFIG_BPF_LSM
+BTF_ID(func, bpf_lsm_bprm_committed_creds)
+#else
+BTF_ID_UNUSED
+#endif
+BTF_SET_END(btf_sleepable_lsm_hooks)
+
+static int check_sleepable_lsm_hook(u32 btf_id)
+{
+ return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id);
+}
+
+/* list of non-sleepable functions that are otherwise on
+ * ALLOW_ERROR_INJECTION list
+ */
+BTF_SET_START(btf_non_sleepable_error_inject)
+/* Three functions below can be called from sleepable and non-sleepable context.
+ * Assume non-sleepable from bpf safety point of view.
+ */
+BTF_ID(func, __add_to_page_cache_locked)
+BTF_ID(func, should_fail_alloc_page)
+BTF_ID(func, should_failslab)
+BTF_SET_END(btf_non_sleepable_error_inject)
+
+static int check_non_sleepable_error_inject(u32 btf_id)
+{
+ return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id);
+}
+
+int bpf_check_attach_target(struct bpf_verifier_log *log,
+ const struct bpf_prog *prog,
+ const struct bpf_prog *tgt_prog,
+ u32 btf_id,
+ struct bpf_attach_target_info *tgt_info)
{
- struct bpf_prog *prog = env->prog;
bool prog_extension = prog->type == BPF_PROG_TYPE_EXT;
- struct bpf_prog *tgt_prog = prog->aux->linked_prog;
- u32 btf_id = prog->aux->attach_btf_id;
const char prefix[] = "btf_trace_";
- struct btf_func_model fmodel;
int ret = 0, subprog = -1, i;
- struct bpf_trampoline *tr;
const struct btf_type *t;
bool conservative = true;
const char *tname;
struct btf *btf;
- long addr;
- u64 key;
-
- if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
- return check_struct_ops_btf_id(env);
-
- if (prog->type != BPF_PROG_TYPE_TRACING &&
- prog->type != BPF_PROG_TYPE_LSM &&
- !prog_extension)
- return 0;
+ long addr = 0;
if (!btf_id) {
- verbose(env, "Tracing programs must provide btf_id\n");
+ bpf_log(log, "Tracing programs must provide btf_id\n");
return -EINVAL;
}
- btf = bpf_prog_get_target_btf(prog);
+ btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux;
if (!btf) {
- verbose(env,
+ bpf_log(log,
"FENTRY/FEXIT program can only be attached to another program annotated with BTF\n");
return -EINVAL;
}
t = btf_type_by_id(btf, btf_id);
if (!t) {
- verbose(env, "attach_btf_id %u is invalid\n", btf_id);
+ bpf_log(log, "attach_btf_id %u is invalid\n", btf_id);
return -EINVAL;
}
tname = btf_name_by_offset(btf, t->name_off);
if (!tname) {
- verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id);
+ bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id);
return -EINVAL;
}
if (tgt_prog) {
@@ -10874,26 +11286,24 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
break;
}
if (subprog == -1) {
- verbose(env, "Subprog %s doesn't exist\n", tname);
+ bpf_log(log, "Subprog %s doesn't exist\n", tname);
return -EINVAL;
}
conservative = aux->func_info_aux[subprog].unreliable;
if (prog_extension) {
if (conservative) {
- verbose(env,
+ bpf_log(log,
"Cannot replace static functions\n");
return -EINVAL;
}
if (!prog->jit_requested) {
- verbose(env,
+ bpf_log(log,
"Extension programs should be JITed\n");
return -EINVAL;
}
- env->ops = bpf_verifier_ops[tgt_prog->type];
- prog->expected_attach_type = tgt_prog->expected_attach_type;
}
if (!tgt_prog->jited) {
- verbose(env, "Can attach to only JITed progs\n");
+ bpf_log(log, "Can attach to only JITed progs\n");
return -EINVAL;
}
if (tgt_prog->type == prog->type) {
@@ -10901,7 +11311,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
* Cannot attach program extension to another extension.
* It's ok to attach fentry/fexit to extension program.
*/
- verbose(env, "Cannot recursively attach\n");
+ bpf_log(log, "Cannot recursively attach\n");
return -EINVAL;
}
if (tgt_prog->type == BPF_PROG_TYPE_TRACING &&
@@ -10923,32 +11333,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
* reasonable stack size. Hence extending fentry is not
* allowed.
*/
- verbose(env, "Cannot extend fentry/fexit\n");
+ bpf_log(log, "Cannot extend fentry/fexit\n");
return -EINVAL;
}
- key = ((u64)aux->id) << 32 | btf_id;
} else {
if (prog_extension) {
- verbose(env, "Cannot replace kernel functions\n");
+ bpf_log(log, "Cannot replace kernel functions\n");
return -EINVAL;
}
- key = btf_id;
}
switch (prog->expected_attach_type) {
case BPF_TRACE_RAW_TP:
if (tgt_prog) {
- verbose(env,
+ bpf_log(log,
"Only FENTRY/FEXIT progs are attachable to another BPF prog\n");
return -EINVAL;
}
if (!btf_type_is_typedef(t)) {
- verbose(env, "attach_btf_id %u is not a typedef\n",
+ bpf_log(log, "attach_btf_id %u is not a typedef\n",
btf_id);
return -EINVAL;
}
if (strncmp(prefix, tname, sizeof(prefix) - 1)) {
- verbose(env, "attach_btf_id %u points to wrong type name %s\n",
+ bpf_log(log, "attach_btf_id %u points to wrong type name %s\n",
btf_id, tname);
return -EINVAL;
}
@@ -10962,29 +11370,20 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
/* should never happen in valid vmlinux build */
return -EINVAL;
- /* remember two read only pointers that are valid for
- * the life time of the kernel
- */
- prog->aux->attach_func_name = tname;
- prog->aux->attach_func_proto = t;
- prog->aux->attach_btf_trace = true;
- return 0;
+ break;
case BPF_TRACE_ITER:
if (!btf_type_is_func(t)) {
- verbose(env, "attach_btf_id %u is not a function\n",
+ bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
return -EINVAL;
}
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
return -EINVAL;
- prog->aux->attach_func_name = tname;
- prog->aux->attach_func_proto = t;
- if (!bpf_iter_prog_supported(prog))
- return -EINVAL;
- ret = btf_distill_func_proto(&env->log, btf, t,
- tname, &fmodel);
- return ret;
+ ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
+ if (ret)
+ return ret;
+ break;
default:
if (!prog_extension)
return -EINVAL;
@@ -10993,42 +11392,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
case BPF_LSM_MAC:
case BPF_TRACE_FENTRY:
case BPF_TRACE_FEXIT:
- prog->aux->attach_func_name = tname;
- if (prog->type == BPF_PROG_TYPE_LSM) {
- ret = bpf_lsm_verify_prog(&env->log, prog);
- if (ret < 0)
- return ret;
- }
-
if (!btf_type_is_func(t)) {
- verbose(env, "attach_btf_id %u is not a function\n",
+ bpf_log(log, "attach_btf_id %u is not a function\n",
btf_id);
return -EINVAL;
}
if (prog_extension &&
- btf_check_type_match(env, prog, btf, t))
+ btf_check_type_match(log, prog, btf, t))
return -EINVAL;
t = btf_type_by_id(btf, t->type);
if (!btf_type_is_func_proto(t))
return -EINVAL;
- tr = bpf_trampoline_lookup(key);
- if (!tr)
- return -ENOMEM;
- /* t is either vmlinux type or another program's type */
- prog->aux->attach_func_proto = t;
- mutex_lock(&tr->mutex);
- if (tr->func.addr) {
- prog->aux->trampoline = tr;
- goto out;
- }
- if (tgt_prog && conservative) {
- prog->aux->attach_func_proto = NULL;
+
+ if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) &&
+ (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type ||
+ prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type))
+ return -EINVAL;
+
+ if (tgt_prog && conservative)
t = NULL;
- }
- ret = btf_distill_func_proto(&env->log, btf, t,
- tname, &tr->func.model);
+
+ ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel);
if (ret < 0)
- goto out;
+ return ret;
+
if (tgt_prog) {
if (subprog == 0)
addr = (long) tgt_prog->bpf_func;
@@ -11037,31 +11424,137 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
} else {
addr = kallsyms_lookup_name(tname);
if (!addr) {
- verbose(env,
+ bpf_log(log,
"The address of function %s cannot be found\n",
tname);
- ret = -ENOENT;
- goto out;
+ return -ENOENT;
}
}
- if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
- ret = check_attach_modify_return(prog, addr);
- if (ret)
- verbose(env, "%s() is not modifiable\n",
- prog->aux->attach_func_name);
+ if (prog->aux->sleepable) {
+ ret = -EINVAL;
+ switch (prog->type) {
+ case BPF_PROG_TYPE_TRACING:
+ /* fentry/fexit/fmod_ret progs can be sleepable only if they are
+ * attached to ALLOW_ERROR_INJECTION and are not in denylist.
+ */
+ if (!check_non_sleepable_error_inject(btf_id) &&
+ within_error_injection_list(addr))
+ ret = 0;
+ break;
+ case BPF_PROG_TYPE_LSM:
+ /* LSM progs check that they are attached to bpf_lsm_*() funcs.
+ * Only some of them are sleepable.
+ */
+ if (check_sleepable_lsm_hook(btf_id))
+ ret = 0;
+ break;
+ default:
+ break;
+ }
+ if (ret) {
+ bpf_log(log, "%s is not sleepable\n", tname);
+ return ret;
+ }
+ } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) {
+ if (tgt_prog) {
+ bpf_log(log, "can't modify return codes of BPF programs\n");
+ return -EINVAL;
+ }
+ ret = check_attach_modify_return(addr, tname);
+ if (ret) {
+ bpf_log(log, "%s() is not modifiable\n", tname);
+ return ret;
+ }
}
- if (ret)
- goto out;
- tr->func.addr = (void *)addr;
- prog->aux->trampoline = tr;
-out:
- mutex_unlock(&tr->mutex);
- if (ret)
- bpf_trampoline_put(tr);
+ break;
+ }
+ tgt_info->tgt_addr = addr;
+ tgt_info->tgt_name = tname;
+ tgt_info->tgt_type = t;
+ return 0;
+}
+
+static int check_attach_btf_id(struct bpf_verifier_env *env)
+{
+ struct bpf_prog *prog = env->prog;
+ struct bpf_prog *tgt_prog = prog->aux->dst_prog;
+ struct bpf_attach_target_info tgt_info = {};
+ u32 btf_id = prog->aux->attach_btf_id;
+ struct bpf_trampoline *tr;
+ int ret;
+ u64 key;
+
+ if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING &&
+ prog->type != BPF_PROG_TYPE_LSM) {
+ verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n");
+ return -EINVAL;
+ }
+
+ if (prog->type == BPF_PROG_TYPE_STRUCT_OPS)
+ return check_struct_ops_btf_id(env);
+
+ if (prog->type != BPF_PROG_TYPE_TRACING &&
+ prog->type != BPF_PROG_TYPE_LSM &&
+ prog->type != BPF_PROG_TYPE_EXT)
+ return 0;
+
+ ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info);
+ if (ret)
return ret;
+
+ if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) {
+ /* to make freplace equivalent to their targets, they need to
+ * inherit env->ops and expected_attach_type for the rest of the
+ * verification
+ */
+ env->ops = bpf_verifier_ops[tgt_prog->type];
+ prog->expected_attach_type = tgt_prog->expected_attach_type;
+ }
+
+ /* store info about the attachment target that will be used later */
+ prog->aux->attach_func_proto = tgt_info.tgt_type;
+ prog->aux->attach_func_name = tgt_info.tgt_name;
+
+ if (tgt_prog) {
+ prog->aux->saved_dst_prog_type = tgt_prog->type;
+ prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type;
+ }
+
+ if (prog->expected_attach_type == BPF_TRACE_RAW_TP) {
+ prog->aux->attach_btf_trace = true;
+ return 0;
+ } else if (prog->expected_attach_type == BPF_TRACE_ITER) {
+ if (!bpf_iter_prog_supported(prog))
+ return -EINVAL;
+ return 0;
}
+
+ if (prog->type == BPF_PROG_TYPE_LSM) {
+ ret = bpf_lsm_verify_prog(&env->log, prog);
+ if (ret < 0)
+ return ret;
+ }
+
+ key = bpf_trampoline_compute_key(tgt_prog, btf_id);
+ tr = bpf_trampoline_get(key, &tgt_info);
+ if (!tr)
+ return -ENOMEM;
+
+ prog->aux->dst_trampoline = tr;
+ return 0;
+}
+
+struct btf *bpf_get_btf_vmlinux(void)
+{
+ if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
+ mutex_lock(&bpf_verifier_lock);
+ if (!btf_vmlinux)
+ btf_vmlinux = btf_parse_vmlinux();
+ mutex_unlock(&bpf_verifier_lock);
+ }
+ return btf_vmlinux;
}
int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
@@ -11097,12 +11590,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
env->ops = bpf_verifier_ops[env->prog->type];
is_priv = bpf_capable();
- if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
- mutex_lock(&bpf_verifier_lock);
- if (!btf_vmlinux)
- btf_vmlinux = btf_parse_vmlinux();
- mutex_unlock(&bpf_verifier_lock);
- }
+ bpf_get_btf_vmlinux();
/* grab the mutex to protect few globals used by verifier */
if (!is_priv)