diff options
author | Lennart Poettering <lennart@poettering.net> | 2023-04-05 16:03:38 +0200 |
---|---|---|
committer | Lennart Poettering <lennart@poettering.net> | 2023-04-12 15:14:21 +0200 |
commit | d37cdac6cef5856b3839558308f7e7ab27979c83 (patch) | |
tree | 5e678c3400f1f6152b206e86d0ece6bca8a30c6d | |
parent | 790446bd6c09312e11f08395590582870306904c (diff) | |
download | systemd-d37cdac6cef5856b3839558308f7e7ab27979c83.tar.gz |
varlink: implement file descriptor passing
Let's add infrastructure to implement fd passing in varlink, when used
over AF_UNIX.
This will optionally associate one or more fds with a message sent via
varlink and deliver it to the server.
-rw-r--r-- | src/shared/varlink.c | 448 | ||||
-rw-r--r-- | src/shared/varlink.h | 12 |
2 files changed, 438 insertions, 22 deletions
diff --git a/src/shared/varlink.c b/src/shared/varlink.c index 220442826f..eb2b3c39b2 100644 --- a/src/shared/varlink.c +++ b/src/shared/varlink.c @@ -81,6 +81,18 @@ typedef enum VarlinkState { VARLINK_PENDING_METHOD, \ VARLINK_PENDING_METHOD_MORE) +typedef struct VarlinkJsonQueueItem VarlinkJsonQueueItem; + +/* A queued message we shall write into the socket, along with the file descriptors to send at the same + * time. This queue item binds them together so that message/fd boundaries are maintained throughout the + * whole pipeline. */ +struct VarlinkJsonQueueItem { + LIST_FIELDS(VarlinkJsonQueueItem, queue); + JsonVariant *data; + size_t n_fds; + int fds[]; +}; + struct Varlink { unsigned n_ref; @@ -125,6 +137,25 @@ struct Varlink { size_t output_buffer_index; size_t output_buffer_size; + int *input_fds; /* file descriptors associated with the data in input_buffer (for fd passing) */ + size_t n_input_fds; + + int *output_fds; /* file descriptors associated with the data in output_buffer (for fd passing) */ + size_t n_output_fds; + + /* Further messages to output not yet formatted into text, and thus not included in output_buffer + * yet. We keep them separate from output_buffer, to not violate fd message boundaries: we want that + * each fd that is sent is associated with its fds, and that fds cannot be accidentally associated + * with preceeding or following messages. */ + LIST_HEAD(VarlinkJsonQueueItem, output_queue); + VarlinkJsonQueueItem *output_queue_tail; + + /* The fds to associate with the next message that is about to be enqueued. The user first pushes the + * fds it intends to send via varlink_push_fd() into this queue, and then once the message data is + * submitted we'll combine the fds and the message data into one. */ + int *pushed_fds; + size_t n_pushed_fds; + VarlinkReply reply_callback; JsonVariant *current; @@ -137,6 +168,11 @@ struct Varlink { bool prefer_read_write:1; bool got_pollhup:1; + bool allow_fd_passing_input:1; + bool allow_fd_passing_output:1; + + int af; /* address family if socket; AF_UNSPEC if not socket; negative if not known */ + usec_t timestamp; usec_t timeout; @@ -222,6 +258,8 @@ DEFINE_PRIVATE_STRING_TABLE_LOOKUP_TO_STRING(varlink_state, VarlinkState); #define varlink_server_log(s, fmt, ...) \ log_debug("%s: " fmt, varlink_server_description(s), ##__VA_ARGS__) +static int varlink_format_queue(Varlink *v); + static inline const char *varlink_description(Varlink *v) { return (v ? v->description : NULL) ?: "varlink"; } @@ -230,6 +268,36 @@ static inline const char *varlink_server_description(VarlinkServer *s) { return (s ? s->description : NULL) ?: "varlink"; } +static VarlinkJsonQueueItem *varlink_json_queue_item_free(VarlinkJsonQueueItem *q) { + if (!q) + return NULL; + + json_variant_unref(q->data); + close_many(q->fds, q->n_fds); + + return mfree(q); +} + +static VarlinkJsonQueueItem *varlink_json_queue_item_new(JsonVariant *m, const int fds[], size_t n_fds) { + VarlinkJsonQueueItem *q; + + assert(m); + assert(fds || n_fds == 0); + + q = malloc(offsetof(VarlinkJsonQueueItem, fds) + sizeof(int) * n_fds); + if (!q) + return NULL; + + *q = (VarlinkJsonQueueItem) { + .data = json_variant_ref(m), + .n_fds = n_fds, + }; + + memcpy_safe(q->fds, fds, n_fds * sizeof(int)); + + return TAKE_PTR(q); +} + static void varlink_set_state(Varlink *v, VarlinkState state) { assert(v); assert(state >= 0 && state < _VARLINK_STATE_MAX); @@ -264,7 +332,9 @@ static int varlink_new(Varlink **ret) { .ucred = UCRED_INVALID, .timestamp = USEC_INFINITY, - .timeout = VARLINK_DEFAULT_TIMEOUT_USEC + .timeout = VARLINK_DEFAULT_TIMEOUT_USEC, + + .af = -1, }; *ret = v; @@ -288,6 +358,7 @@ int varlink_connect_address(Varlink **ret, const char *address) { return log_debug_errno(errno, "Failed to create AF_UNIX socket: %m"); v->fd = fd_move_above_stdio(v->fd); + v->af = AF_UNIX; r = sockaddr_un_set_path(&sockaddr.un, address); if (r < 0) { @@ -338,6 +409,7 @@ int varlink_connect_fd(Varlink **ret, int fd) { return log_debug_errno(r, "Failed to create varlink object: %m"); v->fd = fd; + v->af = -1, varlink_set_state(v, VARLINK_IDLE_CLIENT); /* Note that if this function is called we assume the passed socket (if it is one) is already @@ -365,6 +437,10 @@ static void varlink_clear_current(Varlink *v) { /* Clears the currently processed incoming message */ v->current = json_variant_unref(v->current); + + close_many(v->input_fds, v->n_input_fds); + v->input_fds = mfree(v->input_fds); + v->n_input_fds = 0; } static void varlink_clear(Varlink *v) { @@ -374,11 +450,29 @@ static void varlink_clear(Varlink *v) { v->fd = safe_close(v->fd); + varlink_clear_current(v); + v->input_buffer = mfree(v->input_buffer); v->output_buffer = mfree(v->output_buffer); varlink_clear_current(v); + close_many(v->output_fds, v->n_output_fds); + v->output_fds = mfree(v->output_fds); + v->n_output_fds = 0; + + close_many(v->pushed_fds, v->n_pushed_fds); + v->pushed_fds = mfree(v->pushed_fds); + v->n_pushed_fds = 0; + + while (v->output_queue) { + VarlinkJsonQueueItem *q = v->output_queue; + + LIST_REMOVE(queue, v->output_queue, q); + varlink_json_queue_item_free(q); + } + v->output_queue_tail = NULL; + v->event = sd_event_unref(v->event); } @@ -446,6 +540,7 @@ disconnect: static int varlink_write(Varlink *v) { ssize_t n; + int r; assert(v); @@ -454,25 +549,53 @@ static int varlink_write(Varlink *v) { if (v->connecting) /* Writing while we are still wait for a non-blocking connect() to complete will * result in ENOTCONN, hence exit early here */ return 0; - if (v->output_buffer_size == 0) - return 0; if (v->write_disconnected) return 0; + /* If needed let's convert some output queue json variants into text form */ + r = varlink_format_queue(v); + if (r < 0) + return r; + + if (v->output_buffer_size == 0) + return 0; + assert(v->fd >= 0); - /* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible - * with non-socket IO, hence fall back automatically. - * - * Use a local variable to help gcc figure out that we set 'n' in all cases. */ - bool prefer_write = v->prefer_read_write; - if (!prefer_write) { - n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL); - if (n < 0 && errno == ENOTSOCK) - prefer_write = v->prefer_read_write = true; + if (v->n_output_fds > 0) { /* If we shall send fds along, we must use sendmsg() */ + struct iovec iov = { + .iov_base = v->output_buffer + v->output_buffer_index, + .iov_len = v->output_buffer_size, + }; + struct msghdr mh = { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_controllen = CMSG_SPACE(sizeof(int) * v->n_output_fds), + }; + + mh.msg_control = alloca0(mh.msg_controllen); + + struct cmsghdr *control = CMSG_FIRSTHDR(&mh); + control->cmsg_len = CMSG_LEN(sizeof(int) * v->n_output_fds); + control->cmsg_level = SOL_SOCKET; + control->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(control), v->output_fds, sizeof(int) * v->n_output_fds); + + n = sendmsg(v->fd, &mh, MSG_DONTWAIT|MSG_NOSIGNAL); + } else { + /* We generally prefer recv()/send() (mostly because of MSG_NOSIGNAL) but also want to be compatible + * with non-socket IO, hence fall back automatically. + * + * Use a local variable to help gcc figure out that we set 'n' in all cases. */ + bool prefer_write = v->prefer_read_write; + if (!prefer_write) { + n = send(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size, MSG_DONTWAIT|MSG_NOSIGNAL); + if (n < 0 && errno == ENOTSOCK) + prefer_write = v->prefer_read_write = true; + } + if (prefer_write) + n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size); } - if (prefer_write) - n = write(v->fd, v->output_buffer + v->output_buffer_index, v->output_buffer_size); if (n < 0) { if (errno == EAGAIN) return 0; @@ -494,13 +617,22 @@ static int varlink_write(Varlink *v) { else v->output_buffer_index += n; + close_many(v->output_fds, v->n_output_fds); + v->n_output_fds = 0; + v->timestamp = now(CLOCK_MONOTONIC); return 1; } +#define VARLINK_FDS_MAX (16U*1024U) + static int varlink_read(Varlink *v) { + CMSG_BUFFER_TYPE(CMSG_SPACE(sizeof(int) * VARLINK_FDS_MAX)) control; + struct iovec iov; + struct msghdr mh; size_t rs; ssize_t n; + void *p; assert(v); @@ -544,16 +676,31 @@ static int varlink_read(Varlink *v) { } } + p = v->input_buffer + v->input_buffer_index + v->input_buffer_size; rs = MALLOC_SIZEOF_SAFE(v->input_buffer) - (v->input_buffer_index + v->input_buffer_size); - bool prefer_read = v->prefer_read_write; - if (!prefer_read) { - n = recv(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs, MSG_DONTWAIT); - if (n < 0 && errno == ENOTSOCK) - prefer_read = v->prefer_read_write = true; + if (v->allow_fd_passing_input) { + iov = (struct iovec) { + .iov_base = p, + .iov_len = rs, + }; + mh = (struct msghdr) { + .msg_iov = &iov, + .msg_iovlen = 1, + .msg_control = &control, + .msg_controllen = sizeof(control), + }; + n = recvmsg_safe(v->fd, &mh, MSG_DONTWAIT|MSG_CMSG_CLOEXEC); + } else { + bool prefer_read = v->prefer_read_write; + if (!prefer_read) { + n = recv(v->fd, p, rs, MSG_DONTWAIT); + if (n < 0 && errno == ENOTSOCK) + prefer_read = v->prefer_read_write = true; + } + if (prefer_read) + n = read(v->fd, p, rs); } - if (prefer_read) - n = read(v->fd, v->input_buffer + v->input_buffer_index + v->input_buffer_size, rs); if (n < 0) { if (errno == EAGAIN) return 0; @@ -566,10 +713,44 @@ static int varlink_read(Varlink *v) { return -errno; } if (n == 0) { /* EOF */ + + if (v->allow_fd_passing_input) + cmsg_close_all(&mh); + v->read_disconnected = true; return 1; } + if (v->allow_fd_passing_input) { + struct cmsghdr* cmsg; + + cmsg = cmsg_find(&mh, SOL_SOCKET, SCM_RIGHTS, (socklen_t) -1); + if (cmsg) { + size_t add; + + /* We only allow file descriptors to be passed along with the first byte of a + * message. If they are passed with any other byte this is a protocol violation. */ + if (v->input_buffer_size != 0) { + cmsg_close_all(&mh); + return -EPROTO; + } + + add = (cmsg->cmsg_len - CMSG_LEN(0)) / sizeof(int); + if (add > INT_MAX - v->n_input_fds) { + cmsg_close_all(&mh); + return -EBADF; + } + + if (!GREEDY_REALLOC(v->input_fds, v->n_input_fds + add)) { + cmsg_close_all(&mh); + return -ENOMEM; + } + + memcpy_safe(v->input_fds + v->n_input_fds, CMSG_TYPED_DATA(cmsg, int), add * sizeof(int)); + v->n_input_fds += add; + } + } + v->input_buffer_size += n; v->input_buffer_unscanned += n; @@ -1255,7 +1436,7 @@ Varlink* varlink_flush_close_unref(Varlink *v) { return varlink_close_unref(v); } -static int varlink_enqueue_json(Varlink *v, JsonVariant *m) { +static int varlink_format_json(Varlink *v, JsonVariant *m) { _cleanup_free_ char *text = NULL; int r; @@ -1305,6 +1486,70 @@ static int varlink_enqueue_json(Varlink *v, JsonVariant *m) { return 0; } +static int varlink_enqueue_json(Varlink *v, JsonVariant *m) { + VarlinkJsonQueueItem *q; + + assert(v); + assert(m); + + /* If ther are no file descriptors to be queued and no queue entries yet we can shortcut things and + * append this entry directly to the output buffer */ + if (v->n_pushed_fds == 0 && !v->output_queue) + return varlink_format_json(v, m); + + /* Otherwise add a queue entry for this */ + q = varlink_json_queue_item_new(m, v->pushed_fds, v->n_pushed_fds); + if (!q) + return -ENOMEM; + + v->n_pushed_fds = 0; /* fds now belong to the queue entry */ + + LIST_INSERT_AFTER(queue, v->output_queue, v->output_queue_tail, q); + v->output_queue_tail = q; + return 0; +} + +static int varlink_format_queue(Varlink *v) { + int r; + + assert(v); + + /* Takes entries out of the output queue and formats them into the output buffer. But only if this + * would not corrupt our fd message boundaries */ + + while (v->output_queue) { + _cleanup_free_ int *array = NULL; + VarlinkJsonQueueItem *q = v->output_queue; + + if (v->n_output_fds > 0) /* unwritten fds? if we'd add more we'd corrupt the fd message boundaries, hence wait */ + return 0; + + if (q->n_fds > 0) { + array = newdup(int, q->fds, q->n_fds); + if (!array) + return -ENOMEM; + } + + r = varlink_format_json(v, q->data); + if (r < 0) + return r; + + /* Take possession of the queue element's fds */ + free(v->output_fds); + v->output_fds = TAKE_PTR(array); + v->n_output_fds = q->n_fds; + q->n_fds = 0; + + LIST_REMOVE(queue, v->output_queue, q); + if (!v->output_queue) + v->output_queue_tail = NULL; + + varlink_json_queue_item_free(q); + } + + return 0; +} + int varlink_send(Varlink *v, const char *method, JsonVariant *parameters) { _cleanup_(json_variant_unrefp) JsonVariant *m = NULL; int r; @@ -1483,6 +1728,8 @@ int varlink_call( assert(v->n_pending == 0); /* n_pending can't be > 0 if we are in VARLINK_IDLE_CLIENT state */ + /* If there was still a reply pinned from a previous call, now it's the time to get rid of it, so + * that we can assign a new reply shortly. */ varlink_clear_current(v); r = varlink_sanitize_parameters(¶meters); @@ -1639,6 +1886,13 @@ int varlink_error(Varlink *v, const char *error_id, JsonVariant *parameters) { VARLINK_PENDING_METHOD, VARLINK_PENDING_METHOD_MORE)) return varlink_log_errno(v, SYNTHETIC_ERRNO(EBUSY), "Connection busy."); + /* Reset the list of pushed file descriptors before sending an error reply. We do this here to + * simplify code that puts together a complex reply message with fds, and half-way something + * fails. In that case the pushed fds need to be flushed out again. Under the assumption that it + * never makes sense to send fds along with errors we simply flush them out here beforehand, so that + * the callers don't need to do this explicitly. */ + varlink_reset_fds(v); + r = varlink_sanitize_parameters(¶meters); if (r < 0) return varlink_log_errno(v, r, "Failed to sanitize parameters: %m"); @@ -2028,6 +2282,156 @@ sd_event *varlink_get_event(Varlink *v) { return v->event; } +int varlink_push_fd(Varlink *v, int fd) { + int i; + + assert_return(v, -EINVAL); + assert_return(fd >= 0, -EBADF); + + /* Takes an fd to send along with the *next* varlink message sent via this varlink connection. This + * takes ownership of the specified fd. Use varlink_dup_fd() below to duplicate the fd first. */ + + if (!v->allow_fd_passing_output) + return -EPERM; + + if (v->n_pushed_fds >= INT_MAX) + return -ENOMEM; + + if (!GREEDY_REALLOC(v->pushed_fds, v->n_pushed_fds + 1)) + return -ENOMEM; + + i = (int) v->n_pushed_fds; + v->pushed_fds[v->n_pushed_fds++] = fd; + return i; +} + +int varlink_dup_fd(Varlink *v, int fd) { + _cleanup_close_ int dp = -1; + int r; + + assert_return(v, -EINVAL); + assert_return(fd >= 0, -EBADF); + + /* Like varlink_push_fd() but duplicates the specified fd instead of taking possession of it */ + + dp = fcntl(fd, F_DUPFD_CLOEXEC, 3); + if (dp < 0) + return -errno; + + r = varlink_push_fd(v, dp); + if (r < 0) + return r; + + TAKE_FD(dp); + return r; +} + +int varlink_reset_fds(Varlink *v) { + assert_return(v, -EINVAL); + + /* Closes all currently pending fds to send. This may be used whenever the caller is in the process + * of putting together a message with fds, and then eventually something fails and they need to + * rollback the fds. Note that this is implicitly called whenever an error reply is sent, see above. */ + + close_many(v->output_fds, v->n_output_fds); + v->n_output_fds = 0; + return 0; +} + +int varlink_peek_fd(Varlink *v, size_t i) { + assert_return(v, -EINVAL); + + /* Returns one of the file descriptors that were received along witht the current message. This does + * not duplicate the fd nor invalidate it, it hence remains in our possession. */ + + if (!v->allow_fd_passing_input) + return -EPERM; + + if (i >= v->n_input_fds) + return -ENXIO; + + return v->input_fds[i]; +} + +int varlink_take_fd(Varlink *v, size_t i) { + assert_return(v, -EINVAL); + + /* Similar to varlink_peek_fd() but the file descriptor's ownership is passed to the caller, and + * we'll invalidate the reference to it under our possession. If called twice in a row will return + * -EBADF */ + + if (!v->allow_fd_passing_input) + return -EPERM; + + if (i >= v->n_input_fds) + return -ENXIO; + + return TAKE_FD(v->input_fds[i]); +} + +static int verify_unix_socket(Varlink *v) { + assert(v); + + if (v->af < 0) { + struct stat st; + + if (fstat(v->fd, &st) < 0) + return -errno; + if (!S_ISSOCK(st.st_mode)) { + v->af = AF_UNSPEC; + return -ENOTSOCK; + } + + v->af = socket_get_family(v->fd); + if (v->af < 0) + return v->af; + } + + return v->af == AF_UNIX ? 0 : -ENOMEDIUM; +} + +int varlink_set_allow_fd_passing_input(Varlink *v, bool b) { + int r; + + assert_return(v, -EINVAL); + + if (v->allow_fd_passing_input == b) + return 0; + + if (!b) { + v->allow_fd_passing_input = false; + return 1; + } + + r = verify_unix_socket(v); + if (r < 0) + return r; + + v->allow_fd_passing_input = true; + return 0; +} + +int varlink_set_allow_fd_passing_output(Varlink *v, bool b) { + int r; + + assert_return(v, -EINVAL); + + if (v->allow_fd_passing_output == b) + return 0; + + if (!b) { + v->allow_fd_passing_output = false; + return 1; + } + + r = verify_unix_socket(v); + if (r < 0) + return r; + + v->allow_fd_passing_output = true; + return 0; +} + int varlink_server_new(VarlinkServer **ret, VarlinkServerFlags flags) { VarlinkServer *s; diff --git a/src/shared/varlink.h b/src/shared/varlink.h index ea4e23ff9c..a94f5a7729 100644 --- a/src/shared/varlink.h +++ b/src/shared/varlink.h @@ -107,6 +107,18 @@ int varlink_error_errno(Varlink *v, int error); int varlink_notify(Varlink *v, JsonVariant *parameters); int varlink_notifyb(Varlink *v, ...); +/* Write outgoing fds into the socket (to be associated with the next enqueued message) */ +int varlink_push_fd(Varlink *v, int fd); +int varlink_dup_fd(Varlink *v, int fd); +int varlink_reset_fds(Varlink *v); + +/* Read incoming fds from the socket (associated with the currently handled message) */ +int varlink_peek_fd(Varlink *v, size_t i); +int varlink_take_fd(Varlink *v, size_t i); + +int varlink_set_allow_fd_passing_input(Varlink *v, bool b); +int varlink_set_allow_fd_passing_output(Varlink *v, bool b); + /* Bind a disconnect, reply or timeout callback */ int varlink_bind_reply(Varlink *v, VarlinkReply reply); |