diff options
Diffstat (limited to 'kernel/bpf/verifier.c')
-rw-r--r-- | kernel/bpf/verifier.c | 1080 |
1 files changed, 784 insertions, 296 deletions
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c index fba52d9ec8fc..3923c570070b 100644 --- a/kernel/bpf/verifier.c +++ b/kernel/bpf/verifier.c @@ -21,6 +21,7 @@ #include <linux/ctype.h> #include <linux/error-injection.h> #include <linux/bpf_lsm.h> +#include <linux/btf_ids.h> #include "disasm.h" @@ -237,7 +238,6 @@ struct bpf_call_arg_meta { u64 msize_max_value; int ref_obj_id; int func_id; - u32 btf_id; }; struct btf *btf_vmlinux; @@ -435,6 +435,15 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type) return type == ARG_PTR_TO_SOCK_COMMON; } +static bool arg_type_may_be_null(enum bpf_arg_type type) +{ + return type == ARG_PTR_TO_MAP_VALUE_OR_NULL || + type == ARG_PTR_TO_MEM_OR_NULL || + type == ARG_PTR_TO_CTX_OR_NULL || + type == ARG_PTR_TO_SOCKET_OR_NULL || + type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; +} + /* Determine whether the function releases some resources allocated by another * function call. The first reference type argument will be assumed to be * released by release_reference(). @@ -477,7 +486,12 @@ static bool is_acquire_function(enum bpf_func_id func_id, static bool is_ptr_cast_function(enum bpf_func_id func_id) { return func_id == BPF_FUNC_tcp_sock || - func_id == BPF_FUNC_sk_fullsock; + func_id == BPF_FUNC_sk_fullsock || + func_id == BPF_FUNC_skc_to_tcp_sock || + func_id == BPF_FUNC_skc_to_tcp6_sock || + func_id == BPF_FUNC_skc_to_udp6_sock || + func_id == BPF_FUNC_skc_to_tcp_timewait_sock || + func_id == BPF_FUNC_skc_to_tcp_request_sock; } /* string representation of 'enum bpf_reg_type' */ @@ -1489,6 +1503,13 @@ static int check_subprogs(struct bpf_verifier_env *env) for (i = 0; i < insn_cnt; i++) { u8 code = insn[i].code; + if (code == (BPF_JMP | BPF_CALL) && + insn[i].imm == BPF_FUNC_tail_call && + insn[i].src_reg != BPF_PSEUDO_CALL) + subprog[cur_subprog].has_tail_call = true; + if (BPF_CLASS(code) == BPF_LD && + (BPF_MODE(code) == BPF_ABS || BPF_MODE(code) == BPF_IND)) + subprog[cur_subprog].has_ld_abs = true; if (BPF_CLASS(code) != BPF_JMP && BPF_CLASS(code) != BPF_JMP32) goto next; if (BPF_OP(code) == BPF_EXIT || BPF_OP(code) == BPF_CALL) @@ -2625,11 +2646,18 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno, #define MAX_PACKET_OFF 0xffff +static enum bpf_prog_type resolve_prog_type(struct bpf_prog *prog) +{ + return prog->aux->dst_prog ? prog->aux->dst_prog->type : prog->type; +} + static bool may_access_direct_pkt_data(struct bpf_verifier_env *env, const struct bpf_call_arg_meta *meta, enum bpf_access_type t) { - switch (env->prog->type) { + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); + + switch (prog_type) { /* Program types only with direct read access go here! */ case BPF_PROG_TYPE_LWT_IN: case BPF_PROG_TYPE_LWT_OUT: @@ -2970,10 +2998,37 @@ static int check_max_stack_depth(struct bpf_verifier_env *env) int depth = 0, frame = 0, idx = 0, i = 0, subprog_end; struct bpf_subprog_info *subprog = env->subprog_info; struct bpf_insn *insn = env->prog->insnsi; + bool tail_call_reachable = false; int ret_insn[MAX_CALL_FRAMES]; int ret_prog[MAX_CALL_FRAMES]; + int j; process_func: + /* protect against potential stack overflow that might happen when + * bpf2bpf calls get combined with tailcalls. Limit the caller's stack + * depth for such case down to 256 so that the worst case scenario + * would result in 8k stack size (32 which is tailcall limit * 256 = + * 8k). + * + * To get the idea what might happen, see an example: + * func1 -> sub rsp, 128 + * subfunc1 -> sub rsp, 256 + * tailcall1 -> add rsp, 256 + * func2 -> sub rsp, 192 (total stack size = 128 + 192 = 320) + * subfunc2 -> sub rsp, 64 + * subfunc22 -> sub rsp, 128 + * tailcall2 -> add rsp, 128 + * func3 -> sub rsp, 32 (total stack size 128 + 192 + 64 + 32 = 416) + * + * tailcall will unwind the current stack frame but it will not get rid + * of caller's stack as shown on the example above. + */ + if (idx && subprog[idx].has_tail_call && depth >= 256) { + verbose(env, + "tail_calls are not allowed when call stack of previous frames is %d bytes. Too large\n", + depth); + return -EACCES; + } /* round up to 32-bytes, since this is granularity * of interpreter stack size */ @@ -3002,6 +3057,10 @@ continue_func: i); return -EFAULT; } + + if (subprog[idx].has_tail_call) + tail_call_reachable = true; + frame++; if (frame >= MAX_CALL_FRAMES) { verbose(env, "the call stack of %d frames is too deep !\n", @@ -3010,6 +3069,15 @@ continue_func: } goto process_func; } + /* if tail call got detected across bpf2bpf calls then mark each of the + * currently present subprog frames as tail call reachable subprogs; + * this info will be utilized by JIT so that we will be preserving the + * tail call counter throughout bpf2bpf calls combined with tailcalls + */ + if (tail_call_reachable) + for (j = 0; j < frame; j++) + subprog[ret_prog[j]].tail_call_reachable = true; + /* end of for() loop means the last insn of the 'subprog' * was reached. Doesn't matter whether it was JA or EXIT */ @@ -3585,18 +3653,6 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno, struct bpf_func_state *state = func(env, reg); int err, min_off, max_off, i, j, slot, spi; - if (reg->type != PTR_TO_STACK) { - /* Allow zero-byte read from NULL, regardless of pointer type */ - if (zero_size_allowed && access_size == 0 && - register_is_null(reg)) - return 0; - - verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[reg->type], - reg_type_str[PTR_TO_STACK]); - return -EACCES; - } - if (tnum_is_const(reg->var_off)) { min_off = max_off = reg->var_off.value + reg->off; err = __check_stack_boundary(env, regno, min_off, access_size, @@ -3741,9 +3797,19 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno, access_size, zero_size_allowed, "rdwr", &env->prog->aux->max_rdwr_access); - default: /* scalar_value|ptr_to_stack or invalid ptr */ + case PTR_TO_STACK: return check_stack_boundary(env, regno, access_size, zero_size_allowed, meta); + default: /* scalar_value or invalid ptr */ + /* Allow zero-byte read from NULL, regardless of pointer type */ + if (zero_size_allowed && access_size == 0 && + register_is_null(reg)) + return 0; + + verbose(env, "R%d type=%s expected=%s\n", regno, + reg_type_str[reg->type], + reg_type_str[PTR_TO_STACK]); + return -EACCES; } } @@ -3775,10 +3841,6 @@ static int process_spin_lock(struct bpf_verifier_env *env, int regno, struct bpf_map *map = reg->map_ptr; u64 val = reg->var_off.value; - if (reg->type != PTR_TO_MAP_VALUE) { - verbose(env, "R%d is not a pointer to map_value\n", regno); - return -EINVAL; - } if (!is_const) { verbose(env, "R%d doesn't have constant offset. bpf_spin_lock has to be at the constant offset\n", @@ -3845,12 +3907,6 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type) type == ARG_CONST_SIZE_OR_ZERO; } -static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type) -{ - return type == ARG_PTR_TO_ALLOC_MEM || - type == ARG_PTR_TO_ALLOC_MEM_OR_NULL; -} - static bool arg_type_is_alloc_size(enum bpf_arg_type type) { return type == ARG_CONST_ALLOC_SIZE_OR_ZERO; @@ -3872,14 +3928,188 @@ static int int_ptr_type_to_size(enum bpf_arg_type type) return -EINVAL; } +static int resolve_map_arg_type(struct bpf_verifier_env *env, + const struct bpf_call_arg_meta *meta, + enum bpf_arg_type *arg_type) +{ + if (!meta->map_ptr) { + /* kernel subsystem misconfigured verifier */ + verbose(env, "invalid map_ptr to access map->type\n"); + return -EACCES; + } + + switch (meta->map_ptr->map_type) { + case BPF_MAP_TYPE_SOCKMAP: + case BPF_MAP_TYPE_SOCKHASH: + if (*arg_type == ARG_PTR_TO_MAP_VALUE) { + *arg_type = ARG_PTR_TO_BTF_ID_SOCK_COMMON; + } else { + verbose(env, "invalid arg_type for sockmap/sockhash\n"); + return -EINVAL; + } + break; + + default: + break; + } + return 0; +} + +struct bpf_reg_types { + const enum bpf_reg_type types[10]; + u32 *btf_id; +}; + +static const struct bpf_reg_types map_key_value_types = { + .types = { + PTR_TO_STACK, + PTR_TO_PACKET, + PTR_TO_PACKET_META, + PTR_TO_MAP_VALUE, + }, +}; + +static const struct bpf_reg_types sock_types = { + .types = { + PTR_TO_SOCK_COMMON, + PTR_TO_SOCKET, + PTR_TO_TCP_SOCK, + PTR_TO_XDP_SOCK, + }, +}; + +static const struct bpf_reg_types btf_id_sock_common_types = { + .types = { + PTR_TO_SOCK_COMMON, + PTR_TO_SOCKET, + PTR_TO_TCP_SOCK, + PTR_TO_XDP_SOCK, + PTR_TO_BTF_ID, + }, + .btf_id = &btf_sock_ids[BTF_SOCK_TYPE_SOCK_COMMON], +}; + +static const struct bpf_reg_types mem_types = { + .types = { + PTR_TO_STACK, + PTR_TO_PACKET, + PTR_TO_PACKET_META, + PTR_TO_MAP_VALUE, + PTR_TO_MEM, + PTR_TO_RDONLY_BUF, + PTR_TO_RDWR_BUF, + }, +}; + +static const struct bpf_reg_types int_ptr_types = { + .types = { + PTR_TO_STACK, + PTR_TO_PACKET, + PTR_TO_PACKET_META, + PTR_TO_MAP_VALUE, + }, +}; + +static const struct bpf_reg_types fullsock_types = { .types = { PTR_TO_SOCKET } }; +static const struct bpf_reg_types scalar_types = { .types = { SCALAR_VALUE } }; +static const struct bpf_reg_types context_types = { .types = { PTR_TO_CTX } }; +static const struct bpf_reg_types alloc_mem_types = { .types = { PTR_TO_MEM } }; +static const struct bpf_reg_types const_map_ptr_types = { .types = { CONST_PTR_TO_MAP } }; +static const struct bpf_reg_types btf_ptr_types = { .types = { PTR_TO_BTF_ID } }; +static const struct bpf_reg_types spin_lock_types = { .types = { PTR_TO_MAP_VALUE } }; + +static const struct bpf_reg_types *compatible_reg_types[__BPF_ARG_TYPE_MAX] = { + [ARG_PTR_TO_MAP_KEY] = &map_key_value_types, + [ARG_PTR_TO_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_UNINIT_MAP_VALUE] = &map_key_value_types, + [ARG_PTR_TO_MAP_VALUE_OR_NULL] = &map_key_value_types, + [ARG_CONST_SIZE] = &scalar_types, + [ARG_CONST_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_ALLOC_SIZE_OR_ZERO] = &scalar_types, + [ARG_CONST_MAP_PTR] = &const_map_ptr_types, + [ARG_PTR_TO_CTX] = &context_types, + [ARG_PTR_TO_CTX_OR_NULL] = &context_types, + [ARG_PTR_TO_SOCK_COMMON] = &sock_types, + [ARG_PTR_TO_BTF_ID_SOCK_COMMON] = &btf_id_sock_common_types, + [ARG_PTR_TO_SOCKET] = &fullsock_types, + [ARG_PTR_TO_SOCKET_OR_NULL] = &fullsock_types, + [ARG_PTR_TO_BTF_ID] = &btf_ptr_types, + [ARG_PTR_TO_SPIN_LOCK] = &spin_lock_types, + [ARG_PTR_TO_MEM] = &mem_types, + [ARG_PTR_TO_MEM_OR_NULL] = &mem_types, + [ARG_PTR_TO_UNINIT_MEM] = &mem_types, + [ARG_PTR_TO_ALLOC_MEM] = &alloc_mem_types, + [ARG_PTR_TO_ALLOC_MEM_OR_NULL] = &alloc_mem_types, + [ARG_PTR_TO_INT] = &int_ptr_types, + [ARG_PTR_TO_LONG] = &int_ptr_types, +}; + +static int check_reg_type(struct bpf_verifier_env *env, u32 regno, + enum bpf_arg_type arg_type, + const u32 *arg_btf_id) +{ + struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; + enum bpf_reg_type expected, type = reg->type; + const struct bpf_reg_types *compatible; + int i, j; + + compatible = compatible_reg_types[arg_type]; + if (!compatible) { + verbose(env, "verifier internal error: unsupported arg type %d\n", arg_type); + return -EFAULT; + } + + for (i = 0; i < ARRAY_SIZE(compatible->types); i++) { + expected = compatible->types[i]; + if (expected == NOT_INIT) + break; + + if (type == expected) + goto found; + } + + verbose(env, "R%d type=%s expected=", regno, reg_type_str[type]); + for (j = 0; j + 1 < i; j++) + verbose(env, "%s, ", reg_type_str[compatible->types[j]]); + verbose(env, "%s\n", reg_type_str[compatible->types[j]]); + return -EACCES; + +found: + if (type == PTR_TO_BTF_ID) { + if (!arg_btf_id) { + if (!compatible->btf_id) { + verbose(env, "verifier internal error: missing arg compatible BTF ID\n"); + return -EFAULT; + } + arg_btf_id = compatible->btf_id; + } + + if (!btf_struct_ids_match(&env->log, reg->off, reg->btf_id, + *arg_btf_id)) { + verbose(env, "R%d is of type %s but %s is expected\n", + regno, kernel_type_name(reg->btf_id), + kernel_type_name(*arg_btf_id)); + return -EACCES; + } + + if (!tnum_is_const(reg->var_off) || reg->var_off.value) { + verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", + regno); + return -EACCES; + } + } + + return 0; +} + static int check_func_arg(struct bpf_verifier_env *env, u32 arg, struct bpf_call_arg_meta *meta, const struct bpf_func_proto *fn) { u32 regno = BPF_REG_1 + arg; struct bpf_reg_state *regs = cur_regs(env), *reg = ®s[regno]; - enum bpf_reg_type expected_type, type = reg->type; enum bpf_arg_type arg_type = fn->arg_type[arg]; + enum bpf_reg_type type = reg->type; int err = 0; if (arg_type == ARG_DONTCARE) @@ -3904,120 +4134,32 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EACCES; } - if (arg_type == ARG_PTR_TO_MAP_KEY || - arg_type == ARG_PTR_TO_MAP_VALUE || + if (arg_type == ARG_PTR_TO_MAP_VALUE || arg_type == ARG_PTR_TO_UNINIT_MAP_VALUE || arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) { - expected_type = PTR_TO_STACK; - if (register_is_null(reg) && - arg_type == ARG_PTR_TO_MAP_VALUE_OR_NULL) - /* final test in check_stack_boundary() */; - else if (!type_is_pkt_pointer(type) && - type != PTR_TO_MAP_VALUE && - type != expected_type) - goto err_type; - } else if (arg_type == ARG_CONST_SIZE || - arg_type == ARG_CONST_SIZE_OR_ZERO || - arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) { - expected_type = SCALAR_VALUE; - if (type != expected_type) - goto err_type; - } else if (arg_type == ARG_CONST_MAP_PTR) { - expected_type = CONST_PTR_TO_MAP; - if (type != expected_type) - goto err_type; - } else if (arg_type == ARG_PTR_TO_CTX || - arg_type == ARG_PTR_TO_CTX_OR_NULL) { - expected_type = PTR_TO_CTX; - if (!(register_is_null(reg) && - arg_type == ARG_PTR_TO_CTX_OR_NULL)) { - if (type != expected_type) - goto err_type; - err = check_ctx_reg(env, reg, regno); - if (err < 0) - return err; - } - } else if (arg_type == ARG_PTR_TO_SOCK_COMMON) { - expected_type = PTR_TO_SOCK_COMMON; - /* Any sk pointer can be ARG_PTR_TO_SOCK_COMMON */ - if (!type_is_sk_pointer(type)) - goto err_type; - if (reg->ref_obj_id) { - if (meta->ref_obj_id) { - verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", - regno, reg->ref_obj_id, - meta->ref_obj_id); - return -EFAULT; - } - meta->ref_obj_id = reg->ref_obj_id; - } - } else if (arg_type == ARG_PTR_TO_SOCKET || - arg_type == ARG_PTR_TO_SOCKET_OR_NULL) { - expected_type = PTR_TO_SOCKET; - if (!(register_is_null(reg) && - arg_type == ARG_PTR_TO_SOCKET_OR_NULL)) { - if (type != expected_type) - goto err_type; - } - } else if (arg_type == ARG_PTR_TO_BTF_ID) { - expected_type = PTR_TO_BTF_ID; - if (type != expected_type) - goto err_type; - if (!fn->check_btf_id) { - if (reg->btf_id != meta->btf_id) { - verbose(env, "Helper has type %s got %s in R%d\n", - kernel_type_name(meta->btf_id), - kernel_type_name(reg->btf_id), regno); - - return -EACCES; - } - } else if (!fn->check_btf_id(reg->btf_id, arg)) { - verbose(env, "Helper does not support %s in R%d\n", - kernel_type_name(reg->btf_id), regno); + err = resolve_map_arg_type(env, meta, &arg_type); + if (err) + return err; + } - return -EACCES; - } - if (!tnum_is_const(reg->var_off) || reg->var_off.value || reg->off) { - verbose(env, "R%d is a pointer to in-kernel struct with non-zero offset\n", - regno); - return -EACCES; - } - } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { - if (meta->func_id == BPF_FUNC_spin_lock) { - if (process_spin_lock(env, regno, true)) - return -EACCES; - } else if (meta->func_id == BPF_FUNC_spin_unlock) { - if (process_spin_lock(env, regno, false)) - return -EACCES; - } else { - verbose(env, "verifier internal error\n"); - return -EFAULT; - } - } else if (arg_type_is_mem_ptr(arg_type)) { - expected_type = PTR_TO_STACK; - /* One exception here. In case function allows for NULL to be - * passed in as argument, it's a SCALAR_VALUE type. Final test - * happens during stack boundary checking. + if (register_is_null(reg) && arg_type_may_be_null(arg_type)) + /* A NULL register has a SCALAR_VALUE type, so skip + * type checking. */ - if (register_is_null(reg) && - (arg_type == ARG_PTR_TO_MEM_OR_NULL || - arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)) - /* final test in check_stack_boundary() */; - else if (!type_is_pkt_pointer(type) && - type != PTR_TO_MAP_VALUE && - type != PTR_TO_MEM && - type != PTR_TO_RDONLY_BUF && - type != PTR_TO_RDWR_BUF && - type != expected_type) - goto err_type; - meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM; - } else if (arg_type_is_alloc_mem_ptr(arg_type)) { - expected_type = PTR_TO_MEM; - if (register_is_null(reg) && - arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL) - /* final test in check_stack_boundary() */; - else if (type != expected_type) - goto err_type; + goto skip_type_check; + + err = check_reg_type(env, regno, arg_type, fn->arg_btf_id[arg]); + if (err) + return err; + + if (type == PTR_TO_CTX) { + err = check_ctx_reg(env, reg, regno); + if (err < 0) + return err; + } + +skip_type_check: + if (reg->ref_obj_id) { if (meta->ref_obj_id) { verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n", regno, reg->ref_obj_id, @@ -4025,15 +4167,6 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, return -EFAULT; } meta->ref_obj_id = reg->ref_obj_id; - } else if (arg_type_is_int_ptr(arg_type)) { - expected_type = PTR_TO_STACK; - if (!type_is_pkt_pointer(type) && - type != PTR_TO_MAP_VALUE && - type != expected_type) - goto err_type; - } else { - verbose(env, "unsupported arg_type %d\n", arg_type); - return -EFAULT; } if (arg_type == ARG_CONST_MAP_PTR) { @@ -4072,6 +4205,22 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, err = check_helper_mem_access(env, regno, meta->map_ptr->value_size, false, meta); + } else if (arg_type == ARG_PTR_TO_SPIN_LOCK) { + if (meta->func_id == BPF_FUNC_spin_lock) { + if (process_spin_lock(env, regno, true)) + return -EACCES; + } else if (meta->func_id == BPF_FUNC_spin_unlock) { + if (process_spin_lock(env, regno, false)) + return -EACCES; + } else { + verbose(env, "verifier internal error\n"); + return -EFAULT; + } + } else if (arg_type_is_mem_ptr(arg_type)) { + /* The access to this pointer is only checked when we hit the + * next is_mem_size argument below. + */ + meta->raw_mode = (arg_type == ARG_PTR_TO_UNINIT_MEM); } else if (arg_type_is_mem_size(arg_type)) { bool zero_size_allowed = (arg_type == ARG_CONST_SIZE_OR_ZERO); @@ -4137,10 +4286,43 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 arg, } return err; -err_type: - verbose(env, "R%d type=%s expected=%s\n", regno, - reg_type_str[type], reg_type_str[expected_type]); - return -EACCES; +} + +static bool may_update_sockmap(struct bpf_verifier_env *env, int func_id) +{ + enum bpf_attach_type eatype = env->prog->expected_attach_type; + enum bpf_prog_type type = resolve_prog_type(env->prog); + + if (func_id != BPF_FUNC_map_update_elem) + return false; + + /* It's not possible to get access to a locked struct sock in these + * contexts, so updating is safe. + */ + switch (type) { + case BPF_PROG_TYPE_TRACING: + if (eatype == BPF_TRACE_ITER) + return true; + break; + case BPF_PROG_TYPE_SOCKET_FILTER: + case BPF_PROG_TYPE_SCHED_CLS: + case BPF_PROG_TYPE_SCHED_ACT: + case BPF_PROG_TYPE_XDP: + case BPF_PROG_TYPE_SK_REUSEPORT: + case BPF_PROG_TYPE_FLOW_DISSECTOR: + case BPF_PROG_TYPE_SK_LOOKUP: + return true; + default: + break; + } + + verbose(env, "cannot update sockmap in this context\n"); + return false; +} + +static bool allow_tail_call_in_subprogs(struct bpf_verifier_env *env) +{ + return env->prog->jit_requested && IS_ENABLED(CONFIG_X86_64); } static int check_map_func_compatibility(struct bpf_verifier_env *env, @@ -4214,7 +4396,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_map && func_id != BPF_FUNC_sk_select_reuseport && - func_id != BPF_FUNC_map_lookup_elem) + func_id != BPF_FUNC_map_lookup_elem && + !may_update_sockmap(env, func_id)) goto error; break; case BPF_MAP_TYPE_SOCKHASH: @@ -4223,7 +4406,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_map_delete_elem && func_id != BPF_FUNC_msg_redirect_hash && func_id != BPF_FUNC_sk_select_reuseport && - func_id != BPF_FUNC_map_lookup_elem) + func_id != BPF_FUNC_map_lookup_elem && + !may_update_sockmap(env, func_id)) goto error; break; case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY: @@ -4242,6 +4426,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, func_id != BPF_FUNC_sk_storage_delete) goto error; break; + case BPF_MAP_TYPE_INODE_STORAGE: + if (func_id != BPF_FUNC_inode_storage_get && + func_id != BPF_FUNC_inode_storage_delete) + goto error; + break; default: break; } @@ -4251,8 +4440,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, case BPF_FUNC_tail_call: if (map->map_type != BPF_MAP_TYPE_PROG_ARRAY) goto error; - if (env->subprog_cnt > 1) { - verbose(env, "tail_calls are not allowed in programs with bpf-to-bpf calls\n"); + if (env->subprog_cnt > 1 && !allow_tail_call_in_subprogs(env)) { + verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); return -EINVAL; } break; @@ -4315,6 +4504,11 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env, if (map->map_type != BPF_MAP_TYPE_SK_STORAGE) goto error; break; + case BPF_FUNC_inode_storage_get: + case BPF_FUNC_inode_storage_delete: + if (map->map_type != BPF_MAP_TYPE_INODE_STORAGE) + goto error; + break; default: break; } @@ -4402,10 +4596,26 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id) return count <= 1; } +static bool check_btf_id_ok(const struct bpf_func_proto *fn) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(fn->arg_type); i++) { + if (fn->arg_type[i] == ARG_PTR_TO_BTF_ID && !fn->arg_btf_id[i]) + return false; + + if (fn->arg_type[i] != ARG_PTR_TO_BTF_ID && fn->arg_btf_id[i]) + return false; + } + + return true; +} + static int check_func_proto(const struct bpf_func_proto *fn, int func_id) { return check_raw_mode_ok(fn) && check_arg_pair_ok(fn) && + check_btf_id_ok(fn) && check_refcount_ok(fn, func_id) ? 0 : -EINVAL; } @@ -4775,6 +4985,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn return -EINVAL; } + if (fn->allowed && !fn->allowed(env->prog)) { + verbose(env, "helper call is not allowed in probe\n"); + return -EINVAL; + } + /* With LD_ABS/IND some JITs save/restore skb from r1. */ changes_data = bpf_helper_changes_pkt_data(fn->func); if (changes_data && fn->arg1_type != ARG_PTR_TO_CTX) { @@ -4796,11 +5011,6 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn meta.func_id = func_id; /* check args */ for (i = 0; i < 5; i++) { - if (!fn->check_btf_id) { - err = btf_resolve_helper_id(&env->log, fn, i); - if (err > 0) - meta.btf_id = err; - } err = check_func_arg(env, i, &meta, fn); if (err) return err; @@ -5219,6 +5429,10 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env, dst, reg_type_str[ptr_reg->type]); return -EACCES; case CONST_PTR_TO_MAP: + /* smin_val represents the known value */ + if (known && smin_val == 0 && opcode == BPF_ADD) + break; + /* fall-through */ case PTR_TO_PACKET_END: case PTR_TO_SOCKET: case PTR_TO_SOCKET_OR_NULL: @@ -5634,8 +5848,7 @@ static void scalar_min_max_and(struct bpf_reg_state *dst_reg, u64 umax_val = src_reg->umax_value; if (src_known && dst_known) { - __mark_reg_known(dst_reg, dst_reg->var_off.value & - src_reg->var_off.value); + __mark_reg_known(dst_reg, dst_reg->var_off.value); return; } @@ -5705,8 +5918,7 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg, u64 umin_val = src_reg->umin_value; if (src_known && dst_known) { - __mark_reg_known(dst_reg, dst_reg->var_off.value | - src_reg->var_off.value); + __mark_reg_known(dst_reg, dst_reg->var_off.value); return; } @@ -5732,6 +5944,67 @@ static void scalar_min_max_or(struct bpf_reg_state *dst_reg, __update_reg_bounds(dst_reg); } +static void scalar32_min_max_xor(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_subreg_is_const(src_reg->var_off); + bool dst_known = tnum_subreg_is_const(dst_reg->var_off); + struct tnum var32_off = tnum_subreg(dst_reg->var_off); + s32 smin_val = src_reg->s32_min_value; + + /* Assuming scalar64_min_max_xor will be called so it is safe + * to skip updating register for known case. + */ + if (src_known && dst_known) + return; + + /* We get both minimum and maximum from the var32_off. */ + dst_reg->u32_min_value = var32_off.value; + dst_reg->u32_max_value = var32_off.value | var32_off.mask; + + if (dst_reg->s32_min_value >= 0 && smin_val >= 0) { + /* XORing two positive sign numbers gives a positive, + * so safe to cast u32 result into s32. + */ + dst_reg->s32_min_value = dst_reg->u32_min_value; + dst_reg->s32_max_value = dst_reg->u32_max_value; + } else { + dst_reg->s32_min_value = S32_MIN; + dst_reg->s32_max_value = S32_MAX; + } +} + +static void scalar_min_max_xor(struct bpf_reg_state *dst_reg, + struct bpf_reg_state *src_reg) +{ + bool src_known = tnum_is_const(src_reg->var_off); + bool dst_known = tnum_is_const(dst_reg->var_off); + s64 smin_val = src_reg->smin_value; + + if (src_known && dst_known) { + /* dst_reg->var_off.value has been updated earlier */ + __mark_reg_known(dst_reg, dst_reg->var_off.value); + return; + } + + /* We get both minimum and maximum from the var_off. */ + dst_reg->umin_value = dst_reg->var_off.value; + dst_reg->umax_value = dst_reg->var_off.value | dst_reg->var_off.mask; + + if (dst_reg->smin_value >= 0 && smin_val >= 0) { + /* XORing two positive sign numbers gives a positive, + * so safe to cast u64 result into s64. + */ + dst_reg->smin_value = dst_reg->umin_value; + dst_reg->smax_value = dst_reg->umax_value; + } else { + dst_reg->smin_value = S64_MIN; + dst_reg->smax_value = S64_MAX; + } + + __update_reg_bounds(dst_reg); +} + static void __scalar32_min_max_lsh(struct bpf_reg_state *dst_reg, u64 umin_val, u64 umax_val) { @@ -6040,6 +6313,11 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env, scalar32_min_max_or(dst_reg, &src_reg); scalar_min_max_or(dst_reg, &src_reg); break; + case BPF_XOR: + dst_reg->var_off = tnum_xor(dst_reg->var_off, src_reg.var_off); + scalar32_min_max_xor(dst_reg, &src_reg); + scalar_min_max_xor(dst_reg, &src_reg); + break; case BPF_LSH: if (umax_val >= insn_bitness) { /* Shifts greater than 31 or 63 are undefined. @@ -7287,7 +7565,7 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) u8 mode = BPF_MODE(insn->code); int i, err; - if (!may_access_skb(env->prog->type)) { + if (!may_access_skb(resolve_prog_type(env->prog))) { verbose(env, "BPF_LD_[ABS|IND] instructions not allowed for this program type\n"); return -EINVAL; } @@ -7297,18 +7575,6 @@ static int check_ld_abs(struct bpf_verifier_env *env, struct bpf_insn *insn) return -EINVAL; } - if (env->subprog_cnt > 1) { - /* when program has LD_ABS insn JITs and interpreter assume - * that r1 == ctx == skb which is not the case for callees - * that can have arbitrary arguments. It's problematic - * for main prog as well since JITs would need to analyze - * all functions in order to make proper register save/restore - * decisions in the main prog. Hence disallow LD_ABS with calls - */ - verbose(env, "BPF_LD_[ABS|IND] instructions cannot be mixed with bpf-to-bpf calls\n"); - return -EINVAL; - } - if (insn->dst_reg != BPF_REG_0 || insn->off != 0 || BPF_SIZE(insn->code) == BPF_DW || (mode == BPF_ABS && insn->src_reg != BPF_REG_0)) { @@ -7375,11 +7641,12 @@ static int check_return_code(struct bpf_verifier_env *env) const struct bpf_prog *prog = env->prog; struct bpf_reg_state *reg; struct tnum range = tnum_range(0, 1); + enum bpf_prog_type prog_type = resolve_prog_type(env->prog); int err; /* LSM and struct_ops func-ptr's return type could be "void" */ - if ((env->prog->type == BPF_PROG_TYPE_STRUCT_OPS || - env->prog->type == BPF_PROG_TYPE_LSM) && + if ((prog_type == BPF_PROG_TYPE_STRUCT_OPS || + prog_type == BPF_PROG_TYPE_LSM) && !prog->aux->attach_func_proto->type) return 0; @@ -7398,7 +7665,7 @@ static int check_return_code(struct bpf_verifier_env *env) return -EACCES; } - switch (env->prog->type) { + switch (prog_type) { case BPF_PROG_TYPE_CGROUP_SOCK_ADDR: if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG || env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG || @@ -7718,6 +7985,23 @@ err_free: return ret; } +static int check_abnormal_return(struct bpf_verifier_env *env) +{ + int i; + + for (i = 1; i < env->subprog_cnt; i++) { + if (env->subprog_info[i].has_ld_abs) { + verbose(env, "LD_ABS is not allowed in subprogs without BTF\n"); + return -EINVAL; + } + if (env->subprog_info[i].has_tail_call) { + verbose(env, "tail_call is not allowed in subprogs without BTF\n"); + return -EINVAL; + } + } + return 0; +} + /* The minimum supported BTF func info size */ #define MIN_BPF_FUNCINFO_SIZE 8 #define MAX_FUNCINFO_REC_SIZE 252 @@ -7726,20 +8010,24 @@ static int check_btf_func(struct bpf_verifier_env *env, const union bpf_attr *attr, union bpf_attr __user *uattr) { + const struct btf_type *type, *func_proto, *ret_type; u32 i, nfuncs, urec_size, min_size; u32 krec_size = sizeof(struct bpf_func_info); struct bpf_func_info *krecord; struct bpf_func_info_aux *info_aux = NULL; - const struct btf_type *type; struct bpf_prog *prog; const struct btf *btf; void __user *urecord; u32 prev_offset = 0; + bool scalar_return; int ret = -ENOMEM; nfuncs = attr->func_info_cnt; - if (!nfuncs) + if (!nfuncs) { + if (check_abnormal_return(env)) + return -EINVAL; return 0; + } if (nfuncs != env->subprog_cnt) { verbose(env, "number of funcs in func_info doesn't match number of subprogs\n"); @@ -7787,25 +8075,23 @@ static int check_btf_func(struct bpf_verifier_env *env, } /* check insn_off */ + ret = -EINVAL; if (i == 0) { if (krecord[i].insn_off) { verbose(env, "nonzero insn_off %u for the first func info record", krecord[i].insn_off); - ret = -EINVAL; goto err_free; } } else if (krecord[i].insn_off <= prev_offset) { verbose(env, "same or smaller insn offset (%u) than previous func info record (%u)", krecord[i].insn_off, prev_offset); - ret = -EINVAL; goto err_free; } if (env->subprog_info[i].start != krecord[i].insn_off) { verbose(env, "func_info BTF section doesn't match subprog layout in BPF program\n"); - ret = -EINVAL; goto err_free; } @@ -7814,10 +8100,26 @@ static int check_btf_func(struct bpf_verifier_env *env, if (!type || !btf_type_is_func(type)) { verbose(env, "invalid type id %d in func info", krecord[i].type_id); - ret = -EINVAL; goto err_free; } info_aux[i].linkage = BTF_INFO_VLEN(type->info); + + func_proto = btf_type_by_id(btf, type->type); + if (unlikely(!func_proto || !btf_type_is_func_proto(func_proto))) + /* btf_func_check() already verified it during BTF load */ + goto err_free; + ret_type = btf_type_skip_modifiers(btf, func_proto->type, NULL); + scalar_return = + btf_type_is_small_int(ret_type) || btf_type_is_enum(ret_type); + if (i && !scalar_return && env->subprog_info[i].has_ld_abs) { + verbose(env, "LD_ABS is only allowed in functions that return 'int'.\n"); + goto err_free; + } + if (i && !scalar_return && env->subprog_info[i].has_tail_call) { + verbose(env, "tail_call is only allowed in functions that return 'int'.\n"); + goto err_free; + } + prev_offset = krecord[i].insn_off; urecord += urec_size; } @@ -7978,8 +8280,11 @@ static int check_btf_info(struct bpf_verifier_env *env, struct btf *btf; int err; - if (!attr->func_info_cnt && !attr->line_info_cnt) + if (!attr->func_info_cnt && !attr->line_info_cnt) { + if (check_abnormal_return(env)) + return -EINVAL; return 0; + } btf = btf_get_by_fd(attr->prog_btf_fd); if (IS_ERR(btf)) @@ -9154,6 +9459,7 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, struct bpf_prog *prog) { + enum bpf_prog_type prog_type = resolve_prog_type(prog); /* * Validate that trace type programs use preallocated hash maps. * @@ -9171,8 +9477,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, * now, but warnings are emitted so developers are made aware of * the unsafety and can fix their programs before this is enforced. */ - if (is_tracing_prog_type(prog->type) && !is_preallocated_map(map)) { - if (prog->type == BPF_PROG_TYPE_PERF_EVENT) { + if (is_tracing_prog_type(prog_type) && !is_preallocated_map(map)) { + if (prog_type == BPF_PROG_TYPE_PERF_EVENT) { verbose(env, "perf_event programs can only use preallocated hash map\n"); return -EINVAL; } @@ -9184,8 +9490,8 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, verbose(env, "trace type programs with run-time allocated hash maps are unsafe. Switch to preallocated hash maps.\n"); } - if ((is_tracing_prog_type(prog->type) || - prog->type == BPF_PROG_TYPE_SOCKET_FILTER) && + if ((is_tracing_prog_type(prog_type) || + prog_type == BPF_PROG_TYPE_SOCKET_FILTER) && map_value_has_spin_lock(map)) { verbose(env, "tracing progs cannot use bpf_spin_lock yet\n"); return -EINVAL; @@ -9202,6 +9508,23 @@ static int check_map_prog_compatibility(struct bpf_verifier_env *env, return -EINVAL; } + if (prog->aux->sleepable) + switch (map->map_type) { + case BPF_MAP_TYPE_HASH: + case BPF_MAP_TYPE_LRU_HASH: + case BPF_MAP_TYPE_ARRAY: + if (!is_preallocated_map(map)) { + verbose(env, + "Sleepable programs can only use preallocated hash maps\n"); + return -EINVAL; + } + break; + default: + verbose(env, + "Sleepable programs can only use array and hash maps\n"); + return -EINVAL; + } + return 0; } @@ -9436,6 +9759,18 @@ static void adjust_subprog_starts(struct bpf_verifier_env *env, u32 off, u32 len } } +static void adjust_poke_descs(struct bpf_prog *prog, u32 len) +{ + struct bpf_jit_poke_descriptor *tab = prog->aux->poke_tab; + int i, sz = prog->aux->size_poke_tab; + struct bpf_jit_poke_descriptor *desc; + + for (i = 0; i < sz; i++) { + desc = &tab[i]; + desc->insn_idx += len - 1; + } +} + static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 off, const struct bpf_insn *patch, u32 len) { @@ -9452,6 +9787,7 @@ static struct bpf_prog *bpf_patch_insn_data(struct bpf_verifier_env *env, u32 of if (adjust_insn_aux_data(env, new_prog, off, len)) return NULL; adjust_subprog_starts(env, off, len); + adjust_poke_descs(new_prog, len); return new_prog; } @@ -9897,7 +10233,7 @@ static int convert_ctx_accesses(struct bpf_verifier_env *env) insn->code = BPF_LDX | BPF_PROBE_MEM | BPF_SIZE((insn)->code); env->prog->aux->num_exentries++; - } else if (env->prog->type != BPF_PROG_TYPE_STRUCT_OPS) { + } else if (resolve_prog_type(env->prog) != BPF_PROG_TYPE_STRUCT_OPS) { verbose(env, "Writes through BTF pointers are not allowed\n"); return -EINVAL; } @@ -9982,6 +10318,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) { struct bpf_prog *prog = env->prog, **func, *tmp; int i, j, subprog_start, subprog_end = 0, len, subprog; + struct bpf_map *map_ptr; struct bpf_insn *insn; void *old_bpf_func; int err, num_exentries; @@ -10049,6 +10386,31 @@ static int jit_subprogs(struct bpf_verifier_env *env) func[i]->aux->btf = prog->aux->btf; func[i]->aux->func_info = prog->aux->func_info; + for (j = 0; j < prog->aux->size_poke_tab; j++) { + u32 insn_idx = prog->aux->poke_tab[j].insn_idx; + int ret; + + if (!(insn_idx >= subprog_start && + insn_idx <= subprog_end)) + continue; + + ret = bpf_jit_add_poke_descriptor(func[i], + &prog->aux->poke_tab[j]); + if (ret < 0) { + verbose(env, "adding tail call poke descriptor failed\n"); + goto out_free; + } + + func[i]->insnsi[insn_idx - subprog_start].imm = ret + 1; + + map_ptr = func[i]->aux->poke_tab[ret].tail_call.map; + ret = map_ptr->ops->map_poke_track(map_ptr, func[i]->aux); + if (ret < 0) { + verbose(env, "tracking tail call prog failed\n"); + goto out_free; + } + } + /* Use bpf_prog_F_tag to indicate functions in stack traces. * Long term would need debug info to populate names */ @@ -10067,6 +10429,7 @@ static int jit_subprogs(struct bpf_verifier_env *env) num_exentries++; } func[i]->aux->num_exentries = num_exentries; + func[i]->aux->tail_call_reachable = env->subprog_info[i].tail_call_reachable; func[i] = bpf_int_jit_compile(func[i]); if (!func[i]->jited) { err = -ENOTSUPP; @@ -10074,6 +10437,19 @@ static int jit_subprogs(struct bpf_verifier_env *env) } cond_resched(); } + + /* Untrack main program's aux structs so that during map_poke_run() + * we will not stumble upon the unfilled poke descriptors; each + * of the main program's poke descs got distributed across subprogs + * and got tracked onto map, so we are sure that none of them will + * be missed after the operation below + */ + for (i = 0; i < prog->aux->size_poke_tab; i++) { + map_ptr = prog->aux->poke_tab[i].tail_call.map; + + map_ptr->ops->map_poke_untrack(map_ptr, prog->aux); + } + /* at this point all bpf functions were successfully JITed * now populate all bpf_calls with correct addresses and * run last pass of JIT @@ -10142,9 +10518,16 @@ static int jit_subprogs(struct bpf_verifier_env *env) bpf_prog_free_unused_jited_linfo(prog); return 0; out_free: - for (i = 0; i < env->subprog_cnt; i++) - if (func[i]) - bpf_jit_free(func[i]); + for (i = 0; i < env->subprog_cnt; i++) { + if (!func[i]) + continue; + + for (j = 0; j < func[i]->aux->size_poke_tab; j++) { + map_ptr = func[i]->aux->poke_tab[j].tail_call.map; + map_ptr->ops->map_poke_untrack(map_ptr, func[i]->aux); + } + bpf_jit_free(func[i]); + } kfree(func); out_undo_insn: /* cleanup main prog to be interpreted */ @@ -10178,6 +10561,13 @@ static int fixup_call_args(struct bpf_verifier_env *env) return err; } #ifndef CONFIG_BPF_JIT_ALWAYS_ON + if (env->subprog_cnt > 1 && env->prog->aux->tail_call_reachable) { + /* When JIT fails the progs with bpf2bpf calls and tail_calls + * have to be rejected, since interpreter doesn't support them yet. + */ + verbose(env, "tail_calls are not allowed in non-JITed programs with bpf-to-bpf calls\n"); + return -EINVAL; + } for (i = 0; i < prog->len; i++, insn++) { if (insn->code != (BPF_JMP | BPF_CALL) || insn->src_reg != BPF_PSEUDO_CALL) @@ -10341,8 +10731,9 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) * the program array. */ prog->cb_access = 1; - env->prog->aux->stack_depth = MAX_BPF_STACK; - env->prog->aux->max_pkt_offset = MAX_PACKET_OFF; + if (!allow_tail_call_in_subprogs(env)) + prog->aux->stack_depth = MAX_BPF_STACK; + prog->aux->max_pkt_offset = MAX_PACKET_OFF; /* mark bpf_tail_call as different opcode to avoid * conditional branch in the interpeter for every normal @@ -10362,6 +10753,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env) .reason = BPF_POKE_REASON_TAIL_CALL, .tail_call.map = BPF_MAP_PTR(aux->map_ptr_state), .tail_call.key = bpf_map_key_immediate(aux), + .insn_idx = i + delta, }; ret = bpf_jit_add_poke_descriptor(prog, &desc); @@ -10810,59 +11202,79 @@ static int check_struct_ops_btf_id(struct bpf_verifier_env *env) } #define SECURITY_PREFIX "security_" -static int check_attach_modify_return(struct bpf_prog *prog, unsigned long addr) +static int check_attach_modify_return(unsigned long addr, const char *func_name) { if (within_error_injection_list(addr) || - !strncmp(SECURITY_PREFIX, prog->aux->attach_func_name, - sizeof(SECURITY_PREFIX) - 1)) + !strncmp(SECURITY_PREFIX, func_name, sizeof(SECURITY_PREFIX) - 1)) return 0; return -EINVAL; } -static int check_attach_btf_id(struct bpf_verifier_env *env) +/* non exhaustive list of sleepable bpf_lsm_*() functions */ +BTF_SET_START(btf_sleepable_lsm_hooks) +#ifdef CONFIG_BPF_LSM +BTF_ID(func, bpf_lsm_bprm_committed_creds) +#else +BTF_ID_UNUSED +#endif +BTF_SET_END(btf_sleepable_lsm_hooks) + +static int check_sleepable_lsm_hook(u32 btf_id) +{ + return btf_id_set_contains(&btf_sleepable_lsm_hooks, btf_id); +} + +/* list of non-sleepable functions that are otherwise on + * ALLOW_ERROR_INJECTION list + */ +BTF_SET_START(btf_non_sleepable_error_inject) +/* Three functions below can be called from sleepable and non-sleepable context. + * Assume non-sleepable from bpf safety point of view. + */ +BTF_ID(func, __add_to_page_cache_locked) +BTF_ID(func, should_fail_alloc_page) +BTF_ID(func, should_failslab) +BTF_SET_END(btf_non_sleepable_error_inject) + +static int check_non_sleepable_error_inject(u32 btf_id) +{ + return btf_id_set_contains(&btf_non_sleepable_error_inject, btf_id); +} + +int bpf_check_attach_target(struct bpf_verifier_log *log, + const struct bpf_prog *prog, + const struct bpf_prog *tgt_prog, + u32 btf_id, + struct bpf_attach_target_info *tgt_info) { - struct bpf_prog *prog = env->prog; bool prog_extension = prog->type == BPF_PROG_TYPE_EXT; - struct bpf_prog *tgt_prog = prog->aux->linked_prog; - u32 btf_id = prog->aux->attach_btf_id; const char prefix[] = "btf_trace_"; - struct btf_func_model fmodel; int ret = 0, subprog = -1, i; - struct bpf_trampoline *tr; const struct btf_type *t; bool conservative = true; const char *tname; struct btf *btf; - long addr; - u64 key; - - if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) - return check_struct_ops_btf_id(env); - - if (prog->type != BPF_PROG_TYPE_TRACING && - prog->type != BPF_PROG_TYPE_LSM && - !prog_extension) - return 0; + long addr = 0; if (!btf_id) { - verbose(env, "Tracing programs must provide btf_id\n"); + bpf_log(log, "Tracing programs must provide btf_id\n"); return -EINVAL; } - btf = bpf_prog_get_target_btf(prog); + btf = tgt_prog ? tgt_prog->aux->btf : btf_vmlinux; if (!btf) { - verbose(env, + bpf_log(log, "FENTRY/FEXIT program can only be attached to another program annotated with BTF\n"); return -EINVAL; } t = btf_type_by_id(btf, btf_id); if (!t) { - verbose(env, "attach_btf_id %u is invalid\n", btf_id); + bpf_log(log, "attach_btf_id %u is invalid\n", btf_id); return -EINVAL; } tname = btf_name_by_offset(btf, t->name_off); if (!tname) { - verbose(env, "attach_btf_id %u doesn't have a name\n", btf_id); + bpf_log(log, "attach_btf_id %u doesn't have a name\n", btf_id); return -EINVAL; } if (tgt_prog) { @@ -10874,26 +11286,24 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) break; } if (subprog == -1) { - verbose(env, "Subprog %s doesn't exist\n", tname); + bpf_log(log, "Subprog %s doesn't exist\n", tname); return -EINVAL; } conservative = aux->func_info_aux[subprog].unreliable; if (prog_extension) { if (conservative) { - verbose(env, + bpf_log(log, "Cannot replace static functions\n"); return -EINVAL; } if (!prog->jit_requested) { - verbose(env, + bpf_log(log, "Extension programs should be JITed\n"); return -EINVAL; } - env->ops = bpf_verifier_ops[tgt_prog->type]; - prog->expected_attach_type = tgt_prog->expected_attach_type; } if (!tgt_prog->jited) { - verbose(env, "Can attach to only JITed progs\n"); + bpf_log(log, "Can attach to only JITed progs\n"); return -EINVAL; } if (tgt_prog->type == prog->type) { @@ -10901,7 +11311,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) * Cannot attach program extension to another extension. * It's ok to attach fentry/fexit to extension program. */ - verbose(env, "Cannot recursively attach\n"); + bpf_log(log, "Cannot recursively attach\n"); return -EINVAL; } if (tgt_prog->type == BPF_PROG_TYPE_TRACING && @@ -10923,32 +11333,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) * reasonable stack size. Hence extending fentry is not * allowed. */ - verbose(env, "Cannot extend fentry/fexit\n"); + bpf_log(log, "Cannot extend fentry/fexit\n"); return -EINVAL; } - key = ((u64)aux->id) << 32 | btf_id; } else { if (prog_extension) { - verbose(env, "Cannot replace kernel functions\n"); + bpf_log(log, "Cannot replace kernel functions\n"); return -EINVAL; } - key = btf_id; } switch (prog->expected_attach_type) { case BPF_TRACE_RAW_TP: if (tgt_prog) { - verbose(env, + bpf_log(log, "Only FENTRY/FEXIT progs are attachable to another BPF prog\n"); return -EINVAL; } if (!btf_type_is_typedef(t)) { - verbose(env, "attach_btf_id %u is not a typedef\n", + bpf_log(log, "attach_btf_id %u is not a typedef\n", btf_id); return -EINVAL; } if (strncmp(prefix, tname, sizeof(prefix) - 1)) { - verbose(env, "attach_btf_id %u points to wrong type name %s\n", + bpf_log(log, "attach_btf_id %u points to wrong type name %s\n", btf_id, tname); return -EINVAL; } @@ -10962,29 +11370,20 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) /* should never happen in valid vmlinux build */ return -EINVAL; - /* remember two read only pointers that are valid for - * the life time of the kernel - */ - prog->aux->attach_func_name = tname; - prog->aux->attach_func_proto = t; - prog->aux->attach_btf_trace = true; - return 0; + break; case BPF_TRACE_ITER: if (!btf_type_is_func(t)) { - verbose(env, "attach_btf_id %u is not a function\n", + bpf_log(log, "attach_btf_id %u is not a function\n", btf_id); return -EINVAL; } t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) return -EINVAL; - prog->aux->attach_func_name = tname; - prog->aux->attach_func_proto = t; - if (!bpf_iter_prog_supported(prog)) - return -EINVAL; - ret = btf_distill_func_proto(&env->log, btf, t, - tname, &fmodel); - return ret; + ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); + if (ret) + return ret; + break; default: if (!prog_extension) return -EINVAL; @@ -10993,42 +11392,30 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) case BPF_LSM_MAC: case BPF_TRACE_FENTRY: case BPF_TRACE_FEXIT: - prog->aux->attach_func_name = tname; - if (prog->type == BPF_PROG_TYPE_LSM) { - ret = bpf_lsm_verify_prog(&env->log, prog); - if (ret < 0) - return ret; - } - if (!btf_type_is_func(t)) { - verbose(env, "attach_btf_id %u is not a function\n", + bpf_log(log, "attach_btf_id %u is not a function\n", btf_id); return -EINVAL; } if (prog_extension && - btf_check_type_match(env, prog, btf, t)) + btf_check_type_match(log, prog, btf, t)) return -EINVAL; t = btf_type_by_id(btf, t->type); if (!btf_type_is_func_proto(t)) return -EINVAL; - tr = bpf_trampoline_lookup(key); - if (!tr) - return -ENOMEM; - /* t is either vmlinux type or another program's type */ - prog->aux->attach_func_proto = t; - mutex_lock(&tr->mutex); - if (tr->func.addr) { - prog->aux->trampoline = tr; - goto out; - } - if (tgt_prog && conservative) { - prog->aux->attach_func_proto = NULL; + + if ((prog->aux->saved_dst_prog_type || prog->aux->saved_dst_attach_type) && + (!tgt_prog || prog->aux->saved_dst_prog_type != tgt_prog->type || + prog->aux->saved_dst_attach_type != tgt_prog->expected_attach_type)) + return -EINVAL; + + if (tgt_prog && conservative) t = NULL; - } - ret = btf_distill_func_proto(&env->log, btf, t, - tname, &tr->func.model); + + ret = btf_distill_func_proto(log, btf, t, tname, &tgt_info->fmodel); if (ret < 0) - goto out; + return ret; + if (tgt_prog) { if (subprog == 0) addr = (long) tgt_prog->bpf_func; @@ -11037,31 +11424,137 @@ static int check_attach_btf_id(struct bpf_verifier_env *env) } else { addr = kallsyms_lookup_name(tname); if (!addr) { - verbose(env, + bpf_log(log, "The address of function %s cannot be found\n", tname); - ret = -ENOENT; - goto out; + return -ENOENT; } } - if (prog->expected_attach_type == BPF_MODIFY_RETURN) { - ret = check_attach_modify_return(prog, addr); - if (ret) - verbose(env, "%s() is not modifiable\n", - prog->aux->attach_func_name); + if (prog->aux->sleepable) { + ret = -EINVAL; + switch (prog->type) { + case BPF_PROG_TYPE_TRACING: + /* fentry/fexit/fmod_ret progs can be sleepable only if they are + * attached to ALLOW_ERROR_INJECTION and are not in denylist. + */ + if (!check_non_sleepable_error_inject(btf_id) && + within_error_injection_list(addr)) + ret = 0; + break; + case BPF_PROG_TYPE_LSM: + /* LSM progs check that they are attached to bpf_lsm_*() funcs. + * Only some of them are sleepable. + */ + if (check_sleepable_lsm_hook(btf_id)) + ret = 0; + break; + default: + break; + } + if (ret) { + bpf_log(log, "%s is not sleepable\n", tname); + return ret; + } + } else if (prog->expected_attach_type == BPF_MODIFY_RETURN) { + if (tgt_prog) { + bpf_log(log, "can't modify return codes of BPF programs\n"); + return -EINVAL; + } + ret = check_attach_modify_return(addr, tname); + if (ret) { + bpf_log(log, "%s() is not modifiable\n", tname); + return ret; + } } - if (ret) - goto out; - tr->func.addr = (void *)addr; - prog->aux->trampoline = tr; -out: - mutex_unlock(&tr->mutex); - if (ret) - bpf_trampoline_put(tr); + break; + } + tgt_info->tgt_addr = addr; + tgt_info->tgt_name = tname; + tgt_info->tgt_type = t; + return 0; +} + +static int check_attach_btf_id(struct bpf_verifier_env *env) +{ + struct bpf_prog *prog = env->prog; + struct bpf_prog *tgt_prog = prog->aux->dst_prog; + struct bpf_attach_target_info tgt_info = {}; + u32 btf_id = prog->aux->attach_btf_id; + struct bpf_trampoline *tr; + int ret; + u64 key; + + if (prog->aux->sleepable && prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_LSM) { + verbose(env, "Only fentry/fexit/fmod_ret and lsm programs can be sleepable\n"); + return -EINVAL; + } + + if (prog->type == BPF_PROG_TYPE_STRUCT_OPS) + return check_struct_ops_btf_id(env); + + if (prog->type != BPF_PROG_TYPE_TRACING && + prog->type != BPF_PROG_TYPE_LSM && + prog->type != BPF_PROG_TYPE_EXT) + return 0; + + ret = bpf_check_attach_target(&env->log, prog, tgt_prog, btf_id, &tgt_info); + if (ret) return ret; + + if (tgt_prog && prog->type == BPF_PROG_TYPE_EXT) { + /* to make freplace equivalent to their targets, they need to + * inherit env->ops and expected_attach_type for the rest of the + * verification + */ + env->ops = bpf_verifier_ops[tgt_prog->type]; + prog->expected_attach_type = tgt_prog->expected_attach_type; + } + + /* store info about the attachment target that will be used later */ + prog->aux->attach_func_proto = tgt_info.tgt_type; + prog->aux->attach_func_name = tgt_info.tgt_name; + + if (tgt_prog) { + prog->aux->saved_dst_prog_type = tgt_prog->type; + prog->aux->saved_dst_attach_type = tgt_prog->expected_attach_type; + } + + if (prog->expected_attach_type == BPF_TRACE_RAW_TP) { + prog->aux->attach_btf_trace = true; + return 0; + } else if (prog->expected_attach_type == BPF_TRACE_ITER) { + if (!bpf_iter_prog_supported(prog)) + return -EINVAL; + return 0; } + + if (prog->type == BPF_PROG_TYPE_LSM) { + ret = bpf_lsm_verify_prog(&env->log, prog); + if (ret < 0) + return ret; + } + + key = bpf_trampoline_compute_key(tgt_prog, btf_id); + tr = bpf_trampoline_get(key, &tgt_info); + if (!tr) + return -ENOMEM; + + prog->aux->dst_trampoline = tr; + return 0; +} + +struct btf *bpf_get_btf_vmlinux(void) +{ + if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { + mutex_lock(&bpf_verifier_lock); + if (!btf_vmlinux) + btf_vmlinux = btf_parse_vmlinux(); + mutex_unlock(&bpf_verifier_lock); + } + return btf_vmlinux; } int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, @@ -11097,12 +11590,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr, env->ops = bpf_verifier_ops[env->prog->type]; is_priv = bpf_capable(); - if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) { - mutex_lock(&bpf_verifier_lock); - if (!btf_vmlinux) - btf_vmlinux = btf_parse_vmlinux(); - mutex_unlock(&bpf_verifier_lock); - } + bpf_get_btf_vmlinux(); /* grab the mutex to protect few globals used by verifier */ if (!is_priv) |