From 64242dcba918ae33dcefed495783bbe8c032f9d2 Mon Sep 17 00:00:00 2001 From: "Frank Ch. Eigler" Date: Tue, 11 Apr 2023 23:35:25 -0400 Subject: PR29472: debuginfod: add metadata query webapi, C api, client This patch extends the debuginfod API with a "metadata query" operation. It allows clients to request an enumeration of file names known to debuginfod servers, returning a JSON response including the matching buildids. This lets clients later download debuginfo for a range of versions of the same named binaries, in case they need to to prospective work (like systemtap-based live-patching). It also lets server operators implement prefetch triggering operations for popular but slow debuginfo slivers like kernel vdso.debug files on fedora. Implementation requires a modern enough json-c library. % debuginfod-find metadata file /bin/ls % debuginfod-find metadata glob "/usr/local/bin/c*" Documentation and testing are included. Signed-off-by: Ryan Goldberg Signed-off-by: Frank Ch. Eigler --- ChangeLog | 6 + NEWS | 4 + configure.ac | 6 + debuginfod/ChangeLog | 23 + debuginfod/Makefile.am | 4 +- debuginfod/debuginfod-client.c | 1193 +++++++++++++++++++++++---------- debuginfod/debuginfod-find.c | 101 +-- debuginfod/debuginfod.cxx | 327 +++++++-- debuginfod/debuginfod.h.in | 18 +- debuginfod/libdebuginfod.map | 3 + doc/ChangeLog | 7 + doc/debuginfod-client-config.7 | 8 + doc/debuginfod-find.1 | 32 + doc/debuginfod.8 | 18 + tests/ChangeLog | 6 + tests/Makefile.am | 10 +- tests/run-debuginfod-find-metadata.sh | 110 +++ 17 files changed, 1413 insertions(+), 463 deletions(-) create mode 100755 tests/run-debuginfod-find-metadata.sh diff --git a/ChangeLog b/ChangeLog index 10c23002..c35a19dd 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,9 @@ +2023-04-12 Ryan Golberg , Frank Ch. Eigler + + PR29472: debuginfod metadata query + * NEWS: Mention this. + * configure.ac: Look for json-c library. + 2023-03-03 Mark Wielaard * NEWS: Add ELFCOMPRESS_ZSTD support for libelf and elfcompress. diff --git a/NEWS b/NEWS index 16e37eca..1bf38a69 100644 --- a/NEWS +++ b/NEWS @@ -1,3 +1,7 @@ +Version 0.190 + +debuginfod: New API for metadata queries: file name -> buildid. + Version 0.189 "Don't deflate!" configure: eu-nm, eu-addr2line and eu-stack can provide demangled symbols diff --git a/configure.ac b/configure.ac index 4b67c844..b319a011 100644 --- a/configure.ac +++ b/configure.ac @@ -665,6 +665,12 @@ case "$ac_cv_search__obstack_free" in esac AC_SUBST([obstack_LIBS]) +dnl formerly checked for json_object_array_add, but debuginfod needs a newer function +AC_CHECK_LIB(json-c, json_object_from_fd, [ + AC_DEFINE([HAVE_JSON_C], [1], [Define if json-c is on the machine]) + AC_SUBST(jsonc_LIBS, '-ljson-c') +]) + dnl The directories with content. dnl Documentation. diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index f13c28d5..ac3fd6aa 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,26 @@ +2023-04-12 Ryan Golberg , Frank Ch. Eigler + + PR29472: debuginfod metadata query + * Makefile.am: Add json-c usage. + * debuginfod-client.c (debuginfod_find_metadata): New function. + (handle_data): New fields to hold metadata being received. + (debuginfod_clean_cache): Clean metadata too. + (header_callback): Simplify to realloc only. + (metadata_callback): New function. + (init_server_urls, init_handle, perform_queries, make_cache_path): + New refactored functions. + (debuginfod_query_server_by_buildid): Renamed, refactored. Update + callers. + * debuginfod-find.c (main): Handle metadata queries. + * debuginfod.cxx (DEBUGINFOD_SQLITE_DDL): Add an index or two. + (metadata_maxtime_s, parse_opt): New parameter for load control. + (add_client_federation_headers): New refactored function. + (handle_metadata): New function. + (handler_cb): Call it for /metadata URL. Trace it. + (groom): Tweak sqlite_ps object lifetimes. + * debuginfod.h.in (debuginfod_find_metadata): New decl. + * libdebuginfod.map: Export it under ELFUTILS_0.190. + 2023-03-30 Jan Alexander Steffens (heftig) * debuginfod-client.c (update_atime): New function. diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am index 125be97b..73ffe0e5 100644 --- a/debuginfod/Makefile.am +++ b/debuginfod/Makefile.am @@ -70,7 +70,7 @@ bin_PROGRAMS += debuginfod-find endif debuginfod_SOURCES = debuginfod.cxx -debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) -lpthread -ldl +debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) -lpthread -ldl debuginfod_find_SOURCES = debuginfod-find.c debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) @@ -97,7 +97,7 @@ libdebuginfod_so_LIBS = libdebuginfod_pic.a if DUMMY_LIBDEBUGINFOD libdebuginfod_so_LDLIBS = else -libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) +libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(libelf) $(jsonc_LIBS) endif $(LIBDEBUGINFOD_SONAME): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS) $(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \ diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 5bb79524..cbe44851 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -1,5 +1,5 @@ /* Retrieve ELF / DWARF / source files from the debuginfod. - Copyright (C) 2019-2021 Red Hat, Inc. + Copyright (C) 2019-2023 Red Hat, Inc. Copyright (C) 2021, 2022 Mark J. Wielaard This file is part of elfutils. @@ -60,6 +60,8 @@ int debuginfod_find_source (debuginfod_client *c, const unsigned char *b, int debuginfod_find_section (debuginfod_client *c, const unsigned char *b, int s, const char *scn, char **p) { return -ENOSYS; } +int debuginfod_find_metadata (debuginfod_client *c, + const char *k, const char *v, char **p) { return -ENOSYS; } void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn) { } void debuginfod_set_verbose_fd(debuginfod_client *c, int fd) { } @@ -114,6 +116,10 @@ void debuginfod_end (debuginfod_client *c) { } #include +#ifdef HAVE_JSON_C + #include +#endif + static pthread_once_t init_control = PTHREAD_ONCE_INIT; static void @@ -174,6 +180,13 @@ static const char *cache_miss_filename = "cache_miss_s"; static const char *cache_max_unused_age_filename = "max_unused_age_s"; static const long cache_default_max_unused_age_s = 604800; /* 1 week */ +#ifdef HAVE_JSON_C +/* The metadata_retention_default_s file within the debuginfod cache + specifies how long metadata query results should be cached. */ +static const long metadata_retention_default_s = 86400; /* 1 day */ +static const char *metadata_retention_filename = "metadata_retention_s"; +#endif + /* Location of the cache of files downloaded from debuginfods. The default parent directory is $HOME, or '/' if $HOME doesn't exist. */ static const char *cache_default_name = ".debuginfod_client_cache"; @@ -215,6 +228,9 @@ struct handle_data /* Response http headers for this client handle, sent from the server */ char *response_data; size_t response_data_size; + /* Response metadata values for this client handle, sent from the server */ + char *metadata; + size_t metadata_size; }; static size_t @@ -333,7 +349,8 @@ debuginfod_clean_cache(debuginfod_client *c, return -errno; regex_t re; - const char * pattern = ".*/[a-f0-9]+(/debuginfo|/executable|/source.*|)$"; /* include dirs */ + const char * pattern = ".*/(metadata.*|[a-f0-9]+(/debuginfo|/executable|/source.*|))$"; /* include dirs */ + /* NB: also includes .../section/ subdirs */ if (regcomp (&re, pattern, REG_EXTENDED | REG_NOSUB) != 0) return -ENOMEM; @@ -569,18 +586,9 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) } /* Temporary buffer for realloc */ char *temp = NULL; - if (data->response_data == NULL) - { - temp = malloc(numitems); - if (temp == NULL) - return 0; - } - else - { - temp = realloc(data->response_data, data->response_data_size + numitems); - if (temp == NULL) - return 0; - } + temp = realloc(data->response_data, data->response_data_size + numitems); + if (temp == NULL) + return 0; memcpy(temp + data->response_data_size, buffer, numitems-1); data->response_data = temp; @@ -590,6 +598,341 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) return numitems; } + +#ifdef HAVE_JSON_C +static size_t +metadata_callback (char * buffer, size_t size, size_t numitems, void * userdata) +{ + if (size != 1) + return 0; + /* Temporary buffer for realloc */ + char *temp = NULL; + struct handle_data *data = (struct handle_data *) userdata; + temp = realloc(data->metadata, data->metadata_size + numitems + 1); + if (temp == NULL) + return 0; + + memcpy(temp + data->metadata_size, buffer, numitems); + data->metadata = temp; + data->metadata_size += numitems; + data->metadata[data->metadata_size] = '\0'; + return numitems; +} +#endif + + +/* This function takes a copy of DEBUGINFOD_URLS, server_urls, and seperates it into an + * array of urls to query. The url_subdir is either 'buildid' or 'metadata', corresponding + * to the query type. Returns 0 on success and -Posix error on faliure. + */ +int +init_server_urls(char* url_subdir, char *server_urls, char ***server_url_list, int *num_urls, int vfd) +{ + /* Initialize the memory to zero */ + char *strtok_saveptr; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); + /* Count number of URLs. */ + int n = 0; + assert(0 == strcmp(url_subdir, "buildid") || 0 == strcmp(url_subdir, "metadata")); + + /* PR 27983: If the url is already set to be used use, skip it */ + while (server_url != NULL) + { + int r; + char *tmp_url; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + r = asprintf(&tmp_url, "%s%s", server_url, url_subdir); + else + r = asprintf(&tmp_url, "%s/%s", server_url, url_subdir); + + if (r == -1) + { + return -ENOMEM; + } + int url_index; + for (url_index = 0; url_index < n; ++url_index) + { + if(strcmp(tmp_url, (*server_url_list)[url_index]) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + } + else + { + n++; + char ** realloc_ptr; + realloc_ptr = reallocarray(*server_url_list, n, + sizeof(char*)); + if (realloc_ptr == NULL) + { + free (tmp_url); + return -ENOMEM; + } + *server_url_list = realloc_ptr; + (*server_url_list)[n-1] = tmp_url; + } + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + *num_urls = n; + return 0; +} + +/* Some boilerplate for checking curl_easy_setopt. */ +#define curl_easy_setopt_ck(H,O,P) do { \ + CURLcode curl_res = curl_easy_setopt (H,O,P); \ + if (curl_res != CURLE_OK) \ + { \ + if (vfd >= 0) \ + dprintf (vfd, \ + "Bad curl_easy_setopt: %s\n", \ + curl_easy_strerror(curl_res)); \ + return -EINVAL; \ + } \ + } while (0) + + +/* + * This function initializes a CURL handle. It takes optional callbacks for the write + * function and the header function, which if defined will use userdata of type struct handle_data*. + * Specifically the data[i] within an array of struct handle_data's. + * Returns 0 on success and -Posix error on faliure. + */ +int +init_handle(debuginfod_client *client, + size_t (*w_callback)(char *buffer,size_t size,size_t nitems,void *userdata), + size_t (*h_callback)(char *buffer,size_t size,size_t nitems,void *userdata), + struct handle_data *data, int i, long timeout, + int vfd) +{ + data->handle = curl_easy_init(); + if (data->handle == NULL) + { + return -ENETUNREACH; + } + + if (vfd >= 0) + dprintf (vfd, "url %d %s\n", i, data->url); + + /* Only allow http:// + https:// + file:// so we aren't being + redirected to some unsupported protocol. */ +#if CURL_AT_LEAST_VERSION(7, 85, 0) + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS_STR, "http,https,file"); +#else + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS, + (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE)); +#endif + curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url); + if (vfd >= 0) + curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER, + data->errbuf); + if(w_callback) { + curl_easy_setopt_ck(data->handle, + CURLOPT_WRITEFUNCTION, w_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data); + } + if (timeout > 0) + { + /* Make sure there is at least some progress, + try to get at least 100K per timeout seconds. */ + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME, + timeout); + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT, + 100 * 1024L); + } + curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1); + if(h_callback){ + curl_easy_setopt_ck(data->handle, + CURLOPT_HEADERFUNCTION, h_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data); + } + #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ + curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1); + #else + /* On old curl; no big deal, canonicalization here is almost the + same, except perhaps for ? # type decorations at the tail. */ + #endif + curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, ""); + curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers); + + return 0; +} + + +/* + * This function busy-waits on one or more curl queries to complete. This can + * be controled via only_one, which, if true, will find the first winner and exit + * once found. If positive maxtime and maxsize dictate the maximum allowed wait times + * and download sizes respectivly. Returns 0 on success and -Posix error on faliure. + */ +int +perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, debuginfod_client *c, + int num_urls, long maxtime, long maxsize, bool only_one, int vfd) +{ + int still_running = -1; + long loops = 0; + int committed_to = -1; + bool verbose_reported = false; + struct timespec start_time, cur_time; + if (c->winning_headers != NULL) + { + free (c->winning_headers); + c->winning_headers = NULL; + } + if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + { + return errno; + } + long delta = 0; + do + { + /* Check to see how long querying is taking. */ + if (maxtime > 0) + { + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) + { + return errno; + } + delta = cur_time.tv_sec - start_time.tv_sec; + if ( delta > maxtime) + { + dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); + return -ETIME; + } + } + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ + curl_multi_wait(curlm, NULL, 0, 1000, NULL); + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); + + if(only_one){ + /* If the target file has been found, abort the other queries. */ + if (target_handle && *target_handle != NULL) + { + for (int i = 0; i < num_urls; i++) + if (data[i].handle != *target_handle) + curl_multi_remove_handle(curlm, data[i].handle); + else + { + committed_to = i; + if (c->winning_headers == NULL) + { + c->winning_headers = data[committed_to].response_data; + if (vfd >= 0 && c->winning_headers != NULL) + dprintf(vfd, "\n%s", c->winning_headers); + data[committed_to].response_data = NULL; + data[committed_to].response_data_size = 0; + } + } + } + + if (vfd >= 0 && !verbose_reported && committed_to >= 0) + { + bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); + dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", + committed_to); + if (pnl) + c->default_progressfn_printed_p = 0; + verbose_reported = true; + } + } + + if (curlm_res != CURLM_OK) + { + switch (curlm_res) + { + case CURLM_CALL_MULTI_PERFORM: continue; + case CURLM_OUT_OF_MEMORY: return -ENOMEM; + default: return -ENETUNREACH; + } + } + + long dl_size = -1; + if(only_one && target_handle){ // Only bother with progress functions if we're retrieving exactly 1 file + if (*target_handle && (c->progressfn || maxsize > 0)) + { + /* Get size of file being downloaded. NB: If going through + deflate-compressing proxies, this number is likely to be + unavailable, so -1 may show. */ + CURLcode curl_res; +#if CURL_AT_LEAST_VERSION(7, 55, 0) + curl_off_t cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, + &cl); + if (curl_res == CURLE_OK && cl >= 0) + dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); +#else + double cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD, + &cl); + if (curl_res == CURLE_OK && cl >= 0) + dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); +#endif + /* If Content-Length is -1, try to get the size from + X-Debuginfod-Size */ + if (dl_size == -1 && c->winning_headers != NULL) + { + long xdl; + char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); + size_t off = strlen("x-debuginfod-size:"); + + if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1) + dl_size = xdl; + } + } + + if (c->progressfn) /* inform/check progress callback */ + { + loops ++; + long pa = loops; /* default param for progress callback */ + if (*target_handle) /* we've committed to a server; report its download progress */ + { + CURLcode curl_res; +#if CURL_AT_LEAST_VERSION(7, 55, 0) + curl_off_t dl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_SIZE_DOWNLOAD_T, + &dl); + if (curl_res == 0 && dl >= 0) + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); +#else + double dl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_SIZE_DOWNLOAD, + &dl); + if (curl_res == 0) + pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); +#endif + + } + + if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) + break; + } + } + /* Check to see if we are downloading something which exceeds maxsize, if set.*/ + if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0) + { + if (vfd >=0) + dprintf(vfd, "Content-Length too large.\n"); + return -EFBIG; + } + } while (still_running); + return 0; +} + + /* Copy SRC to DEST, s,/,#,g */ static void @@ -841,6 +1184,85 @@ cache_find_section (const char *scn_name, const char *target_cache_dir, return rc; } + +/* Helper function to create client cache directory. + $XDG_CACHE_HOME takes priority over $HOME/.cache. + $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME. + + Return resulting path name or NULL on error. Caller must free resulting string. + */ +static char * +make_cache_path(void) +{ + char* cache_path = NULL; + int rc = 0; + /* Determine location of the cache. The path specified by the debuginfod + cache environment variable takes priority. */ + char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); + if (cache_var != NULL && strlen (cache_var) > 0) + xalloc_str (cache_path, "%s", cache_var); + else + { + /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use + that. Otherwise use the XDG cache directory naming format. */ + xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name); + + struct stat st; + if (stat (cache_path, &st) < 0) + { + char cachedir[PATH_MAX]; + char *xdg = getenv ("XDG_CACHE_HOME"); + + if (xdg != NULL && strlen (xdg) > 0) + snprintf (cachedir, PATH_MAX, "%s", xdg); + else + snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/"); + + /* Create XDG cache directory if it doesn't exist. */ + if (stat (cachedir, &st) == 0) + { + if (! S_ISDIR (st.st_mode)) + { + rc = -EEXIST; + goto out1; + } + } + else + { + rc = mkdir (cachedir, 0700); + + /* Also check for EEXIST and S_ISDIR in case another client just + happened to create the cache. */ + if (rc < 0 + && (errno != EEXIST + || stat (cachedir, &st) != 0 + || ! S_ISDIR (st.st_mode))) + { + rc = -errno; + goto out1; + } + } + + free (cache_path); + xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); + } + } + + goto out; + + out1: + (void) rc; + free (cache_path); + cache_path = NULL; + + out: + if (cache_path != NULL) + (void) mkdir (cache_path, 0700); + return cache_path; +} + + + /* Query each of the server URLs found in $DEBUGINFOD_URLS for the file with the specified build-id and type (debuginfo, executable, source or section). If type is source, then type_arg should be a filename. If @@ -849,7 +1271,7 @@ cache_find_section (const char *scn_name, const char *target_cache_dir, for the target if successful, otherwise return an error code. */ static int -debuginfod_query_server (debuginfod_client *c, +debuginfod_query_server_by_buildid (debuginfod_client *c, const unsigned char *build_id, int build_id_len, const char *type, @@ -870,7 +1292,7 @@ debuginfod_query_server (debuginfod_client *c, char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int vfd = c->verbose_fd; - int rc; + int rc, r; c->progressfn_cancel = false; @@ -995,83 +1417,35 @@ debuginfod_query_server (debuginfod_client *c, dprintf (vfd, "suffix %s\n", suffix); /* set paths needed to perform the query - - example format + example format: cache_path: $HOME/.cache target_cache_dir: $HOME/.cache/0123abcd target_cache_path: $HOME/.cache/0123abcd/debuginfo target_cache_path: $HOME/.cache/0123abcd/source#PATH#TO#SOURCE ? - - $XDG_CACHE_HOME takes priority over $HOME/.cache. - $DEBUGINFOD_CACHE_PATH takes priority over $HOME/.cache and $XDG_CACHE_HOME. */ - /* Determine location of the cache. The path specified by the debuginfod - cache environment variable takes priority. */ - char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR); - if (cache_var != NULL && strlen (cache_var) > 0) - xalloc_str (cache_path, "%s", cache_var); - else + cache_path = make_cache_path(); + if (!cache_path) { - /* If a cache already exists in $HOME ('/' if $HOME isn't set), then use - that. Otherwise use the XDG cache directory naming format. */ - xalloc_str (cache_path, "%s/%s", getenv ("HOME") ?: "/", cache_default_name); + rc = -ENOMEM; + goto out; + } + xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); + (void) mkdir (target_cache_dir, 0700); // failures with this mkdir would be caught later too - struct stat st; - if (stat (cache_path, &st) < 0) - { - char cachedir[PATH_MAX]; - char *xdg = getenv ("XDG_CACHE_HOME"); + if (section != NULL) + xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix); + else + xalloc_str (target_cache_path, "%s/%s%s", target_cache_dir, type, suffix); + xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path); - if (xdg != NULL && strlen (xdg) > 0) - snprintf (cachedir, PATH_MAX, "%s", xdg); - else - snprintf (cachedir, PATH_MAX, "%s/.cache", getenv ("HOME") ?: "/"); + /* XXX combine these */ + xalloc_str (interval_path, "%s/%s", cache_path, cache_clean_interval_filename); + xalloc_str (cache_miss_path, "%s/%s", cache_path, cache_miss_filename); + xalloc_str (maxage_path, "%s/%s", cache_path, cache_max_unused_age_filename); - /* Create XDG cache directory if it doesn't exist. */ - if (stat (cachedir, &st) == 0) - { - if (! S_ISDIR (st.st_mode)) - { - rc = -EEXIST; - goto out; - } - } - else - { - rc = mkdir (cachedir, 0700); - - /* Also check for EEXIST and S_ISDIR in case another client just - happened to create the cache. */ - if (rc < 0 - && (errno != EEXIST - || stat (cachedir, &st) != 0 - || ! S_ISDIR (st.st_mode))) - { - rc = -errno; - goto out; - } - } - - free (cache_path); - xalloc_str (cache_path, "%s/%s", cachedir, cache_xdg_name); - } - } - - xalloc_str (target_cache_dir, "%s/%s", cache_path, build_id_bytes); - if (section != NULL) - xalloc_str (target_cache_path, "%s/%s-%s", target_cache_dir, type, suffix); - else - xalloc_str (target_cache_path, "%s/%s%s", target_cache_dir, type, suffix); - xalloc_str (target_cache_tmppath, "%s.XXXXXX", target_cache_path); - - /* XXX combine these */ - xalloc_str (interval_path, "%s/%s", cache_path, cache_clean_interval_filename); - xalloc_str (cache_miss_path, "%s/%s", cache_path, cache_miss_filename); - xalloc_str (maxage_path, "%s/%s", cache_path, cache_max_unused_age_filename); - - if (vfd >= 0) - dprintf (vfd, "checking cache dir %s\n", cache_path); + if (vfd >= 0) + dprintf (vfd, "checking cache dir %s\n", cache_path); /* Make sure cache dir exists. debuginfo_clean_cache will then make sure the interval, cache_miss and maxage files exist. */ @@ -1193,60 +1567,14 @@ debuginfod_query_server (debuginfod_client *c, goto out0; } - /* Initialize the memory to zero */ - char *strtok_saveptr; char **server_url_list = NULL; - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - /* Count number of URLs. */ - int num_urls = 0; - - while (server_url != NULL) - { - /* PR 27983: If the url is already set to be used use, skip it */ - char *slashbuildid; - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') - slashbuildid = "buildid"; - else - slashbuildid = "/buildid"; - - char *tmp_url; - if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) - { - rc = -ENOMEM; - goto out1; - } - int url_index; - for (url_index = 0; url_index < num_urls; ++url_index) - { - if(strcmp(tmp_url, server_url_list[url_index]) == 0) - { - url_index = -1; - break; - } - } - if (url_index == -1) - { - if (vfd >= 0) - dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); - free(tmp_url); - } - else - { - num_urls++; - char ** realloc_ptr; - realloc_ptr = reallocarray(server_url_list, num_urls, - sizeof(char*)); - if (realloc_ptr == NULL) - { - free (tmp_url); - rc = -ENOMEM; - goto out1; - } - server_url_list = realloc_ptr; - server_url_list[num_urls-1] = tmp_url; - } - server_url = strtok_r(NULL, url_delim, &strtok_saveptr); - } + char *server_url; + int num_urls; + r = init_server_urls("buildid", server_urls, &server_url_list, &num_urls, vfd); + if(0 != r){ + rc = r; + goto out1; + } int retry_limit = default_retry_limit; const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR); @@ -1318,13 +1646,6 @@ debuginfod_query_server (debuginfod_client *c, data[i].fd = fd; data[i].target_handle = &target_handle; - data[i].handle = curl_easy_init(); - if (data[i].handle == NULL) - { - if (filename) curl_free (escaped_string); - rc = -ENETUNREACH; - goto out2; - } data[i].client = c; if (filename) /* must start with / */ @@ -1338,226 +1659,29 @@ debuginfod_query_server (debuginfod_client *c, build_id_bytes, type, section); else snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type); - if (vfd >= 0) - dprintf (vfd, "url %d %s\n", i, data[i].url); - - /* Some boilerplate for checking curl_easy_setopt. */ -#define curl_easy_setopt_ck(H,O,P) do { \ - CURLcode curl_res = curl_easy_setopt (H,O,P); \ - if (curl_res != CURLE_OK) \ - { \ - if (vfd >= 0) \ - dprintf (vfd, \ - "Bad curl_easy_setopt: %s\n", \ - curl_easy_strerror(curl_res)); \ - rc = -EINVAL; \ - goto out2; \ - } \ - } while (0) - /* Only allow http:// + https:// + file:// so we aren't being - redirected to some unsupported protocol. */ -#if CURL_AT_LEAST_VERSION(7, 85, 0) - curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS_STR, - "http,https,file"); -#else - curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS, - (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE)); -#endif - curl_easy_setopt_ck(data[i].handle, CURLOPT_URL, data[i].url); - if (vfd >= 0) - curl_easy_setopt_ck(data[i].handle, CURLOPT_ERRORBUFFER, - data[i].errbuf); - curl_easy_setopt_ck(data[i].handle, - CURLOPT_WRITEFUNCTION, - debuginfod_write_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]); - if (timeout > 0) - { - /* Make sure there is at least some progress, - try to get at least 100K per timeout seconds. */ - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_TIME, - timeout); - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_LIMIT, - 100 * 1024L); - } - curl_easy_setopt_ck(data[i].handle, CURLOPT_FILETIME, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FAILONERROR, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_NOSIGNAL, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERFUNCTION, - header_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERDATA, - (void *) &(data[i])); -#if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ - curl_easy_setopt_ck(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1); -#else - /* On old curl; no big deal, canonicalization here is almost the - same, except perhaps for ? # type decorations at the tail. */ -#endif - curl_easy_setopt_ck(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HTTPHEADER, c->headers); + r = init_handle(c, debuginfod_write_callback, header_callback, &data[i], i, timeout, vfd); + if(0 != r){ + rc = r; + if(filename) curl_free (escaped_string); + goto out2; + } curl_multi_add_handle(curlm, data[i].handle); } if (filename) curl_free(escaped_string); + /* Query servers in parallel. */ if (vfd >= 0) dprintf (vfd, "query %d urls in parallel\n", num_urls); - int still_running; - long loops = 0; - int committed_to = -1; - bool verbose_reported = false; - struct timespec start_time, cur_time; - free (c->winning_headers); - c->winning_headers = NULL; - if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + r = perform_queries(curlm, &target_handle,data,c, num_urls, maxtime, maxsize, true, vfd); + if (0 != r) { - rc = -errno; + rc = r; goto out2; } - long delta = 0; - do - { - /* Check to see how long querying is taking. */ - if (maxtime > 0) - { - if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) - { - rc = -errno; - goto out2; - } - delta = cur_time.tv_sec - start_time.tv_sec; - if ( delta > maxtime) - { - dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); - rc = -ETIME; - goto out2; - } - } - /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ - curl_multi_wait(curlm, NULL, 0, 1000, NULL); - CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); - - /* If the target file has been found, abort the other queries. */ - if (target_handle != NULL) - { - for (int i = 0; i < num_urls; i++) - if (data[i].handle != target_handle) - curl_multi_remove_handle(curlm, data[i].handle); - else - { - committed_to = i; - if (c->winning_headers == NULL) - { - c->winning_headers = data[committed_to].response_data; - data[committed_to].response_data = NULL; - data[committed_to].response_data_size = 0; - } - - } - } - - if (vfd >= 0 && !verbose_reported && committed_to >= 0) - { - bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); - dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", - committed_to); - if (pnl) - c->default_progressfn_printed_p = 0; - verbose_reported = true; - } - - if (curlm_res != CURLM_OK) - { - switch (curlm_res) - { - case CURLM_CALL_MULTI_PERFORM: continue; - case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; - default: rc = -ENETUNREACH; break; - } - goto out2; - } - - long dl_size = -1; - if (target_handle && (c->progressfn || maxsize > 0)) - { - /* Get size of file being downloaded. NB: If going through - deflate-compressing proxies, this number is likely to be - unavailable, so -1 may show. */ - CURLcode curl_res; -#if CURL_AT_LEAST_VERSION(7, 55, 0) - curl_off_t cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, - &cl); - if (curl_res == CURLE_OK && cl >= 0) - dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); -#else - double cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD, - &cl); - if (curl_res == CURLE_OK && cl >= 0) - dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); -#endif - /* If Content-Length is -1, try to get the size from - X-Debuginfod-Size */ - if (dl_size == -1 && c->winning_headers != NULL) - { - long xdl; - char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); - size_t off = strlen("x-debuginfod-size:"); - - if (hdr != NULL && sscanf(hdr + off, "%ld", &xdl) == 1) - dl_size = xdl; - } - } - - if (c->progressfn) /* inform/check progress callback */ - { - loops ++; - long pa = loops; /* default param for progress callback */ - if (target_handle) /* we've committed to a server; report its download progress */ - { - CURLcode curl_res; -#if CURL_AT_LEAST_VERSION(7, 55, 0) - curl_off_t dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD_T, - &dl); - if (curl_res == 0 && dl >= 0) - pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); -#else - double dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD, - &dl); - if (curl_res == 0) - pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); -#endif - - } - - if ((*c->progressfn) (c, pa, dl_size == -1 ? 0 : dl_size)) - { - c->progressfn_cancel = true; - break; - } - } - - /* Check to see if we are downloading something which exceeds maxsize, if set.*/ - if (target_handle && dl_size > maxsize && maxsize > 0) - { - if (vfd >=0) - dprintf(vfd, "Content-Length too large.\n"); - rc = -EFBIG; - goto out2; - } - } while (still_running); /* Check whether a query was successful. If so, assign its handle to verified_handle. */ @@ -1709,6 +1833,7 @@ debuginfod_query_server (debuginfod_client *c, curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ curl_easy_cleanup (data[i].handle); free(data[i].response_data); + data[i].response_data = NULL; } free(c->winning_headers); c->winning_headers = NULL; @@ -1918,7 +2043,7 @@ debuginfod_find_debuginfo (debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "debuginfo", NULL, path); } @@ -1929,7 +2054,7 @@ debuginfod_find_executable(debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "executable", NULL, path); } @@ -1938,7 +2063,7 @@ int debuginfod_find_source(debuginfod_client *client, const unsigned char *build_id, int build_id_len, const char *filename, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "source", filename, path); } @@ -1947,8 +2072,8 @@ debuginfod_find_section (debuginfod_client *client, const unsigned char *build_id, int build_id_len, const char *section, char **path) { - int rc = debuginfod_query_server(client, build_id, build_id_len, - "section", section, path); + int rc = debuginfod_query_server_by_buildid(client, build_id, build_id_len, + "section", section, path); if (rc != -EINVAL) return rc; @@ -1997,6 +2122,364 @@ debuginfod_find_section (debuginfod_client *client, return rc; } + +int debuginfod_find_metadata (debuginfod_client *client, + const char* key, const char* value, char **path) +{ + (void) client; + (void) key; + (void) value; + (void) path; + +#ifdef HAVE_JSON_C + char *server_urls = NULL; + char *urls_envvar = NULL; + char *cache_path = NULL; + char *target_cache_dir = NULL; + char *target_cache_path = NULL; + char *target_cache_tmppath = NULL; + char *key_and_value = NULL; + int rc = 0, r; + int vfd = client->verbose_fd; + struct handle_data *data = NULL; + + json_object *json_metadata = json_object_new_array(); + if(NULL == json_metadata) { + rc = -ENOMEM; + goto out; + } + + if(NULL == value || NULL == key){ + rc = -EINVAL; + goto out; + } + + if (vfd >= 0) + dprintf (vfd, "debuginfod_find_metadata %s %s\n", key, value); + + /* Without query-able URL, we can stop here*/ + urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR); + if (vfd >= 0) + dprintf (vfd, "server urls \"%s\"\n", + urls_envvar != NULL ? urls_envvar : ""); + if (urls_envvar == NULL || urls_envvar[0] == '\0') + { + rc = -ENOSYS; + goto out; + } + + /* set paths needed to perform the query + example format: + cache_path: $HOME/.cache + target_cache_dir: $HOME/.cache/metadata + target_cache_path: $HOME/.cache/metadata/key=ENCODED&value=ENCODED + target_cache_path: $HOME/.cache/metadata/key=ENCODED&value=ENCODED.XXXXXX + */ + + // libcurl > 7.62ish has curl_url_set()/etc. to construct these things more properly. + // curl_easy_escape() is older + { + CURL *c = curl_easy_init(); + if (!c) { + rc = -ENOMEM; + goto out; + } + char *key_escaped = curl_easy_escape(c, key, 0); + char *value_escaped = curl_easy_escape(c, value, 0); + + // fallback to unescaped values in unlikely case of error + xalloc_str (key_and_value, "key=%s&value=%s", key_escaped ?: key, value_escaped ?: value); + curl_free(value_escaped); + curl_free(key_escaped); + curl_easy_cleanup(c); + } + + /* Check if we have a recent result already in the cache. */ + cache_path = make_cache_path(); + if (! cache_path) + goto out; + xalloc_str (target_cache_dir, "%s/metadata", cache_path); + (void) mkdir (target_cache_dir, 0700); + xalloc_str (target_cache_path, "%s/%s", target_cache_dir, key_and_value); + xalloc_str (target_cache_tmppath, "%s/%s.XXXXXX", target_cache_dir, key_and_value); + + int fd = open(target_cache_path, O_RDONLY); + if (fd >= 0) + { + struct stat st; + int metadata_retention = 0; + time_t now = time(NULL); + char *metadata_retention_path = NULL; + + xalloc_str (metadata_retention_path, "%s/%s", cache_path, metadata_retention_filename); + if (metadata_retention_path) + { + rc = debuginfod_config_cache(metadata_retention_path, + metadata_retention_default_s, &st); + free (metadata_retention_path); + if (rc < 0) + rc = 0; + } + else + rc = 0; + metadata_retention = rc; + + if (fstat(fd, &st) != 0) + { + rc = -errno; + close (fd); + goto out; + } + + if (metadata_retention > 0 && (now - st.st_mtime <= metadata_retention)) + { + if (client && client->verbose_fd >= 0) + dprintf (client->verbose_fd, "cached metadata %s", key_and_value); + + if (path != NULL) + { + *path = target_cache_path; // pass over the pointer + target_cache_path = NULL; // prevent free() in our own cleanup + } + + /* Success!!!! */ + rc = fd; + goto out; + } + + /* We don't have to clear the likely-expired cached object here + by unlinking. We will shortly make a new request and save + results right on top. Erasing here could trigger a TOCTOU + race with another thread just finishing a query and passing + its results back. + */ + // (void) unlink (target_cache_path); + + close (fd); + } + + /* No valid cached metadata found: time to make the queries. */ + + /* Clear the client of previous urls*/ + free (client->url); + client->url = NULL; + + long maxtime = 0; + const char *maxtime_envvar; + maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR); + if (maxtime_envvar != NULL) + maxtime = atol (maxtime_envvar); + if (maxtime && vfd >= 0) + dprintf(vfd, "using max time %lds\n", maxtime); + + long timeout = default_timeout; + const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR); + if (timeout_envvar != NULL) + timeout = atoi (timeout_envvar); + if (vfd >= 0) + dprintf (vfd, "using timeout %ld\n", timeout); + + add_default_headers(client); + + /* make a copy of the envvar so it can be safely modified. */ + server_urls = strdup(urls_envvar); + if (server_urls == NULL) + { + rc = -ENOMEM; + goto out; + } + + /* thereafter, goto out1 on error*/ + + char **server_url_list = NULL; + char *server_url; + int num_urls = 0; + r = init_server_urls("metadata", server_urls, &server_url_list, &num_urls, vfd); + if(0 != r){ + rc = r; + goto out1; + } + + CURLM *curlm = client->server_mhandle; + assert (curlm != NULL); + + CURL *target_handle = NULL; + data = malloc(sizeof(struct handle_data) * num_urls); + if (data == NULL) + { + rc = -ENOMEM; + goto out1; + } + + /* thereafter, goto out2 on error. */ + + /* Initialize handle_data */ + for (int i = 0; i < num_urls; i++) + { + if ((server_url = server_url_list[i]) == NULL) + break; + if (vfd >= 0) + dprintf (vfd, "init server %d %s\n", i, server_url); + + data[i].errbuf[0] = '\0'; + data[i].target_handle = &target_handle; + data[i].client = client; + data[i].metadata = NULL; + data[i].metadata_size = 0; + data[i].response_data = NULL; + data[i].response_data_size = 0; + + snprintf(data[i].url, PATH_MAX, "%s?%s", server_url, key_and_value); + + r = init_handle(client, metadata_callback, header_callback, &data[i], i, timeout, vfd); + if(0 != r){ + rc = r; + goto out2; + } + curl_multi_add_handle(curlm, data[i].handle); + } + + /* Query servers */ + if (vfd >= 0) + dprintf (vfd, "Starting %d queries\n",num_urls); + r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, false, vfd); + if (0 != r) { + rc = r; + goto out2; + } + + /* NOTE: We don't check the return codes of the curl messages since + a metadata query failing silently is just fine. We want to know what's + available from servers which can be connected with no issues. + If running with additional verbosity, the failure will be noted in stderr */ + + /* Building the new json array from all the upstream data and + cleanup while at it. + */ + for (int i = 0; i < num_urls; i++) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + + if (NULL == data[i].metadata) + { + if (vfd >= 0) + dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n", + data[i].url, data[i].errbuf); + continue; + } + + json_object *upstream_metadata = json_tokener_parse(data[i].metadata); + if(NULL == upstream_metadata) continue; + // Combine the upstream metadata into the json array + for (int j = 0, n = json_object_array_length(upstream_metadata); j < n; j++) { + json_object *entry = json_object_array_get_idx(upstream_metadata, j); + json_object_get(entry); // increment reference count + json_object_array_add(json_metadata, entry); + } + json_object_put(upstream_metadata); + + free (data[i].metadata); + } + + /* Because of race with cache cleanup / rmdir, try to mkdir/mkstemp up to twice. */ + for (int i=0; i<2; i++) { + /* (re)create target directory in cache */ + (void) mkdir(target_cache_dir, 0700); /* files will be 0400 later */ + + /* NB: write to a temporary file first, to avoid race condition of + multiple clients checking the cache, while a partially-written or empty + file is in there, being written from libcurl. */ + fd = mkstemp (target_cache_tmppath); + if (fd >= 0) break; + } + if (fd < 0) /* Still failed after two iterations. */ + { + rc = -errno; + goto out1; + } + + + /* Plop the complete json_metadata object into the cache. */ + const char* json_string = json_object_to_json_string_ext(json_metadata, JSON_C_TO_STRING_PRETTY); + if (json_string == NULL) + { + rc = -ENOMEM; + goto out1; + } + ssize_t res = write_retry (fd, json_string, strlen(json_string)); + (void) lseek(fd, 0, SEEK_SET); // rewind file so client can read it from the top + + /* NB: json_string is auto deleted when json_metadata object is nuked */ + if (res < 0 || (size_t) res != strlen(json_string)) + { + rc = -EIO; + goto out1; + } + /* PR27571: make cache files casually unwriteable; dirs are already 0700 */ + (void) fchmod(fd, 0400); + + /* rename tmp->real */ + rc = rename (target_cache_tmppath, target_cache_path); + if (rc < 0) + { + rc = -errno; + goto out1; + /* Perhaps we need not give up right away; could retry or something ... */ + } + + /* don't close fd - we're returning it */ + /* don't unlink the tmppath; it's already been renamed. */ + if (path != NULL) + *path = strdup(target_cache_path); + + rc = fd; + goto out1; + +/* error exits */ +out2: + /* remove all handles from multi */ + for (int i = 0; i < num_urls; i++) + { + if (data[i].handle != NULL) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + free (data[i].metadata); + } + } + +out1: + free(data); + + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); + +out: + free (server_urls); + json_object_put(json_metadata); + /* Reset sent headers */ + curl_slist_free_all (client->headers); + client->headers = NULL; + client->user_agent_set_p = 0; + + free (target_cache_dir); + free (target_cache_path); + free (target_cache_tmppath); + free (key_and_value); + free (cache_path); + + return rc; + +#else /* ! HAVE_JSON_C */ + return -ENOSYS; +#endif +} + + /* Add an outgoing HTTP header. */ int debuginfod_add_http_header (debuginfod_client *client, const char* header) { diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c index 30731098..2df6436d 100644 --- a/debuginfod/debuginfod-find.c +++ b/debuginfod/debuginfod-find.c @@ -1,6 +1,6 @@ /* Command-line frontend for retrieving ELF / DWARF / source files from the debuginfod. - Copyright (C) 2019-2020 Red Hat, Inc. + Copyright (C) 2019-2023 Red Hat, Inc. This file is part of elfutils. This file is free software; you can redistribute it and/or modify @@ -31,6 +31,9 @@ #include #include +#ifdef HAVE_JSON_C + #include +#endif /* Name and version of program. */ ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; @@ -50,7 +53,11 @@ static const char args_doc[] = N_("debuginfo BUILDID\n" "source BUILDID /FILENAME\n" "source PATH /FILENAME\n" "section BUILDID SECTION-NAME\n" - "section PATH SECTION-NAME\n"); + "section PATH SECTION-NAME\n" +#ifdef HAVE_JSON_C + "metadata KEY VALUE\n" +#endif + ); /* Definitions of arguments for argp functions. */ @@ -145,44 +152,51 @@ main(int argc, char** argv) /* If we were passed an ELF file name in the BUILDID slot, look in there. */ unsigned char* build_id = (unsigned char*) argv[remaining+1]; int build_id_len = 0; /* assume text */ - - int any_non_hex = 0; - int i; - for (i = 0; build_id[i] != '\0'; i++) - if ((build_id[i] >= '0' && build_id[i] <= '9') || - (build_id[i] >= 'a' && build_id[i] <= 'f')) - ; - else - any_non_hex = 1; - - int fd = -1; Elf* elf = NULL; - if (any_non_hex) /* raw build-id */ - { - fd = open ((char*) build_id, O_RDONLY); - if (fd < 0) - fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno)); - } - if (fd >= 0) - { - elf = dwelf_elf_begin (fd); - if (elf == NULL) - fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id, - elf_errmsg (-1)); - } - if (elf != NULL) + + /* Process optional buildid given via ELF file name, for some query types only. */ + if (strcmp(argv[remaining], "debuginfo") == 0 + || strcmp(argv[remaining], "executable") == 0 + || strcmp(argv[remaining], "source") == 0 + || strcmp(argv[remaining], "section") == 0) { - const void *extracted_build_id; - ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id); - if (s > 0) + int any_non_hex = 0; + int i; + for (i = 0; build_id[i] != '\0'; i++) + if ((build_id[i] >= '0' && build_id[i] <= '9') || + (build_id[i] >= 'a' && build_id[i] <= 'f')) + ; + else + any_non_hex = 1; + + int fd = -1; + if (any_non_hex) /* raw build-id */ + { + fd = open ((char*) build_id, O_RDONLY); + if (fd < 0) + fprintf (stderr, "Cannot open %s: %s\n", build_id, strerror(errno)); + } + if (fd >= 0) + { + elf = dwelf_elf_begin (fd); + if (elf == NULL) + fprintf (stderr, "Cannot open as ELF file %s: %s\n", build_id, + elf_errmsg (-1)); + } + if (elf != NULL) { - /* Success: replace the build_id pointer/len with the binary blob - that elfutils is keeping for us. It'll remain valid until elf_end(). */ - build_id = (unsigned char*) extracted_build_id; - build_id_len = s; + const void *extracted_build_id; + ssize_t s = dwelf_elf_gnu_build_id(elf, &extracted_build_id); + if (s > 0) + { + /* Success: replace the build_id pointer/len with the binary blob + that elfutils is keeping for us. It'll remain valid until elf_end(). */ + build_id = (unsigned char*) extracted_build_id; + build_id_len = s; + } + else + fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1)); } - else - fprintf (stderr, "Cannot extract build-id from %s: %s\n", build_id, elf_errmsg(-1)); } char *cache_name; @@ -221,6 +235,19 @@ main(int argc, char** argv) rc = debuginfod_find_section(client, build_id, build_id_len, argv[remaining+2], &cache_name); } +#ifdef HAVE_JSON_C + else if (strcmp(argv[remaining], "metadata") == 0) /* no buildid! */ + { + if (remaining+2 == argc) + { + fprintf(stderr, "Require KEY and VALUE for \"metadata\"\n"); + return 1; + } + + rc = debuginfod_find_metadata (client, argv[remaining+1], argv[remaining+2], + &cache_name); + } +#endif else { argp_help (&argp, stderr, ARGP_HELP_USAGE, argv[0]); @@ -240,8 +267,6 @@ main(int argc, char** argv) debuginfod_end (client); if (elf) elf_end(elf); - if (fd >= 0) - close (fd); if (rc < 0) { diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 5ef6cc32..000820fe 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -1,5 +1,5 @@ /* Debuginfo-over-http server. - Copyright (C) 2019-2021 Red Hat, Inc. + Copyright (C) 2019-2023 Red Hat, Inc. Copyright (C) 2021, 2022 Mark J. Wielaard This file is part of elfutils. @@ -68,6 +68,7 @@ extern "C" { #include #include #include +#include /* If fts.h is included before config.h, its indirect inclusions may not @@ -127,6 +128,9 @@ using namespace std; #define tid() pthread_self() #endif +#ifdef HAVE_JSON_C + #include +#endif inline bool string_endswith(const string& haystack, const string& needle) @@ -185,7 +189,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] = " foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n" " primary key (buildid, file, mtime)\n" " ) " WITHOUT_ROWID ";\n" - // Index for faster delete by file identifier + // Index for faster delete by file identifier and metadata searches "create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n" "create table if not exists " BUILDIDS "_f_s (\n" " buildid integer not null,\n" @@ -211,6 +215,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] = " ) " WITHOUT_ROWID ";\n" // Index for faster delete by archive file identifier "create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n" + // Index for metadata searches + "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n" "create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm " buildid integer not null,\n" " artifactsrc integer not null,\n" @@ -398,6 +404,9 @@ static const struct argp_option options[] = { "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 }, #define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009 { "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 }, +#define ARGP_KEY_METADATA_MAXTIME 0x100A + { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0, + "Number of seconds to limit metadata query run time, 0=unlimited.", 0 }, { NULL, 0, NULL, 0, NULL, 0 }, }; @@ -452,6 +461,8 @@ static unsigned forwarded_ttl_limit = 8; static bool scan_source_info = true; static string tmpdir; static bool passive_p = false; +static unsigned metadata_maxtime_s = 5; + static void set_metric(const string& key, double value); // static void inc_metric(const string& key); @@ -653,6 +664,9 @@ parse_opt (int key, char *arg, case ARGP_KEY_DISABLE_SOURCE_SCAN: scan_source_info = false; break; + case ARGP_KEY_METADATA_MAXTIME: + metadata_maxtime_s = (unsigned) atoi(arg); + break; // case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK); default: return ARGP_ERR_UNKNOWN; } @@ -2040,6 +2054,58 @@ handle_buildid_r_match (bool internal_req_p, return r; } +void +add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){ + // Transcribe incoming User-Agent: + string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; + string ua_complete = string("User-Agent: ") + ua; + debuginfod_add_http_header (client, ua_complete.c_str()); + + // Compute larger XFF:, for avoiding info loss during + // federation, and for future cyclicity detection. + string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; + if (xff != "") + xff += string(", "); // comma separated list + + unsigned int xff_count = 0; + for (auto&& i : xff){ + if (i == ',') xff_count++; + } + + // if X-Forwarded-For: exceeds N hops, + // do not delegate a local lookup miss to upstream debuginfods. + if (xff_count >= forwarded_ttl_limit) + throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ +and will not query the upstream servers"); + + // Compute the client's numeric IP address only - so can't merge with conninfo() + const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + struct sockaddr *so = u ? u->client_addr : 0; + char hostname[256] = ""; // RFC1035 + if (so && so->sa_family == AF_INET) { + (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else if (so && so->sa_family == AF_INET6) { + struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; + if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { + struct sockaddr_in addr4; + memset (&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_port = addr6->sin6_port; + memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); + (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), + hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else { + (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } + } + + string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); + debuginfod_add_http_header (client, xff_complete.c_str()); +} static struct MHD_Response* handle_buildid_match (bool internal_req_p, @@ -2273,58 +2339,8 @@ handle_buildid (MHD_Connection* conn, debuginfod_set_progressfn (client, & debuginfod_find_progress); if (conn) - { - // Transcribe incoming User-Agent: - string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; - string ua_complete = string("User-Agent: ") + ua; - debuginfod_add_http_header (client, ua_complete.c_str()); - - // Compute larger XFF:, for avoiding info loss during - // federation, and for future cyclicity detection. - string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; - if (xff != "") - xff += string(", "); // comma separated list - - unsigned int xff_count = 0; - for (auto&& i : xff){ - if (i == ',') xff_count++; - } + add_client_federation_headers(client, conn); - // if X-Forwarded-For: exceeds N hops, - // do not delegate a local lookup miss to upstream debuginfods. - if (xff_count >= forwarded_ttl_limit) - throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ -and will not query the upstream servers"); - - // Compute the client's numeric IP address only - so can't merge with conninfo() - const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, - MHD_CONNECTION_INFO_CLIENT_ADDRESS); - struct sockaddr *so = u ? u->client_addr : 0; - char hostname[256] = ""; // RFC1035 - if (so && so->sa_family == AF_INET) { - (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else if (so && so->sa_family == AF_INET6) { - struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; - if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { - struct sockaddr_in addr4; - memset (&addr4, 0, sizeof(addr4)); - addr4.sin_family = AF_INET; - addr4.sin_port = addr6->sin6_port; - memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); - (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), - hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else { - (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } - } - - string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); - debuginfod_add_http_header (client, xff_complete.c_str()); - } - if (artifacttype == "debuginfo") fd = debuginfod_find_debuginfo (client, (const unsigned char*) buildid.c_str(), @@ -2341,7 +2357,7 @@ and will not query the upstream servers"); fd = debuginfod_find_section (client, (const unsigned char*) buildid.c_str(), 0, section.c_str(), NULL); - + if (fd >= 0) { if (conn != 0) @@ -2535,6 +2551,176 @@ handle_metrics (off_t* size) return r; } + +#ifdef HAVE_JSON_C +static struct MHD_Response* +handle_metadata (MHD_Connection* conn, + string key, string value, off_t* size) +{ + MHD_Response* r; + sqlite3 *thisdb = dbq; + + // Query locally for matching e, d files + string op; + if (key == "glob") + op = "glob"; + else if (key == "file") + op = "="; + else + throw reportable_exception("/metadata webapi error, unsupported key"); + + string sql = string( + // explicit query r_de and f_de once here, rather than the query_d and query_e + // separately, because they scan the same tables, so we'd double the work + "select d1.executable_p, d1.debuginfo_p, 0 as source_p, b1.hex, f1.name as file, a1.name as archive " + "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_buildids b1, " BUILDIDS "_files a1 " + "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id and f1.name " + op + " ? " + "union all \n" + "select d2.executable_p, d2.debuginfo_p, 0, b2.hex, f2.name, NULL " + "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_buildids b2 " + "where f2.id = d2.file and d2.buildid = b2.id and f2.name " + op + " ? "); + // NB: we could query source file names too, thusly: + // + // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files f1, " BUILDIDS "_r_sref sr + // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?" + // UNION ALL something with BUILDIDS "_f_s" + // + // But the first part of this query cannot run fast without the same index temp-created + // during "maxigroom": + // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc); + // and unfortunately this index is HUGE. It's similar to the size of the _r_sref + // table, which is already the largest part of a debuginfod index. Adding that index + // would nearly double the .sqlite db size. + + sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql); + pp->reset(); + pp->bind(1, value); + pp->bind(2, value); + // pp->bind(3, value); // "source" query clause disabled + unique_ptr ps_closer(pp); // release pp if exception or return + + json_object *metadata = json_object_new_array(); + if (!metadata) + throw libc_exception(ENOMEM, "json allocation"); + + // consume all the rows + struct timespec ts_start; + clock_gettime (CLOCK_MONOTONIC, &ts_start); + + int rc; + while (SQLITE_DONE != (rc = pp->step())) + { + // break out of loop if we have searched too long + struct timespec ts_end; + clock_gettime (CLOCK_MONOTONIC, &ts_end); + double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9; + if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s) + break; // NB: no particular signal is given to the client about incompleteness + + if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step"); + + int m_executable_p = sqlite3_column_int (*pp, 0); + int m_debuginfo_p = sqlite3_column_int (*pp, 1); + int m_source_p = sqlite3_column_int (*pp, 2); + string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null + string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: ""; + string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: ""; + + // Confirm that m_file matches in the fnmatch(FNM_PATHNAME) + // sense, since sqlite's GLOB operator is a looser filter. + if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0) + continue; + + auto add_metadata = [metadata, m_buildid, m_file, m_archive](const string& type) { + json_object* entry = json_object_new_object(); + if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json"); + defer_dtor entry_d(entry, json_object_put); + + auto add_entry_metadata = [entry](const char* k, string v) { + json_object* s; + if(v != "") { + s = json_object_new_string(v.c_str()); + if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json"); + json_object_object_add(entry, k, s); + } + }; + + add_entry_metadata("type", type.c_str()); + add_entry_metadata("buildid", m_buildid); + add_entry_metadata("file", m_file); + if (m_archive != "") add_entry_metadata("archive", m_archive); + if (verbose > 3) + obatched(clog) << "metadata found local " + << json_object_to_json_string_ext(entry, + JSON_C_TO_STRING_PRETTY) + << endl; + + // Increase ref count to switch its ownership + json_object_array_add(metadata, json_object_get(entry)); + }; + + if (m_executable_p) add_metadata("executable"); + if (m_debuginfo_p) add_metadata("debuginfo"); + if (m_source_p) add_metadata("source"); + } + pp->reset(); + + unsigned num_local_results = json_object_array_length(metadata); + + // Query upstream as well + debuginfod_client *client = debuginfod_pool_begin(); + if (metadata && client != NULL) + { + add_client_federation_headers(client, conn); + + int upstream_metadata_fd; + upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), value.c_str(), NULL); + if (upstream_metadata_fd >= 0) { + json_object *upstream_metadata_json = json_object_from_fd(upstream_metadata_fd); + if (NULL != upstream_metadata_json) + { + for (int i = 0, n = json_object_array_length(upstream_metadata_json); i < n; i++) { + json_object *entry = json_object_array_get_idx(upstream_metadata_json, i); + if (verbose > 3) + obatched(clog) << "metadata found remote " + << json_object_to_json_string_ext(entry, + JSON_C_TO_STRING_PRETTY) + << endl; + + json_object_get(entry); // increment reference count + json_object_array_add(metadata, entry); + } + json_object_put(upstream_metadata_json); + } + close(upstream_metadata_fd); + } + debuginfod_pool_end (client); + } + + unsigned num_total_results = json_object_array_length(metadata); + + if (verbose > 2) + obatched(clog) << "metadata found local=" << num_local_results + << " remote=" << (num_total_results-num_local_results) + << " total=" << num_total_results + << endl; + + const char* metadata_str = (metadata != NULL) ? + json_object_to_json_string(metadata) : "[ ]" ; + if (! metadata_str) + throw libc_exception (ENOMEM, "cannot allocate json"); + r = MHD_create_response_from_buffer (strlen(metadata_str), + (void*) metadata_str, + MHD_RESPMEM_MUST_COPY); + *size = strlen(metadata_str); + json_object_put(metadata); + if (r) + add_mhd_response_header(r, "Content-Type", "application/json"); + return r; +} +#endif + + static struct MHD_Response* handle_root (off_t* size) { @@ -2601,6 +2787,7 @@ handler_cb (void * /*cls*/, clock_gettime (CLOCK_MONOTONIC, &ts_start); double afteryou = 0.0; string artifacttype, suffix; + string urlargs; // for logging try { @@ -2669,6 +2856,21 @@ handler_cb (void * /*cls*/, inc_metric("http_requests_total", "type", artifacttype); r = handle_metrics(& http_size); } +#ifdef HAVE_JSON_C + else if (url1 == "/metadata") + { + tmp_inc_metric m ("thread_busy", "role", "http-metadata"); + const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key"); + const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value"); + if (NULL == value || NULL == key) + throw reportable_exception("/metadata webapi error, need key and value"); + + urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging + artifacttype = "metadata"; + inc_metric("http_requests_total", "type", artifacttype); + r = handle_metadata(connection, key, value, &http_size); + } +#endif else if (url1 == "/") { artifacttype = "/"; @@ -2705,7 +2907,7 @@ handler_cb (void * /*cls*/, // afteryou: delay waiting for other client's identical query to complete // deltas: total latency, including afteryou waiting obatched(clog) << conninfo(connection) - << ' ' << method << ' ' << url + << ' ' << method << ' ' << url << urlargs << ' ' << http_code << ' ' << http_size << ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms" << endl; @@ -3956,12 +4158,13 @@ void groom() if (interrupted) return; // NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G - sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum"); - g1.reset().step_ok_done(); - sqlite_ps g2 (db, "optimize", "pragma optimize"); - g2.reset().step_ok_done(); - sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); - g3.reset().step_ok_done(); + { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); } + // https://www.sqlite.org/lang_analyze.html#approx + { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); } + { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); } database_stats_report(); @@ -4333,6 +4536,8 @@ main (int argc, char *argv[]) if (maxigroom) { obatched(clog) << "maxigrooming database, please wait." << endl; + // NB: this index alone can nearly double the database size! + // NB: this index would be necessary to run source-file metadata searches fast extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);"); extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);"); extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;"); diff --git a/debuginfod/debuginfod.h.in b/debuginfod/debuginfod.h.in index 4a256ba9..3efa877a 100644 --- a/debuginfod/debuginfod.h.in +++ b/debuginfod/debuginfod.h.in @@ -62,9 +62,9 @@ debuginfod_client *debuginfod_begin (void); it is a binary blob of given length. If successful, return a file descriptor to the target, otherwise - return a posix error code. If successful, set *path to a - strdup'd copy of the name of the same file in the cache. - Caller must free() it later. */ + return a negative POSIX error code. If successful, set *path to a + strdup'd copy of the name of the same file in the cache. Caller + must free() it later. */ int debuginfod_find_debuginfo (debuginfod_client *client, const unsigned char *build_id, @@ -88,6 +88,18 @@ int debuginfod_find_section (debuginfod_client *client, const char *section, char **path); +/* Query the urls contained in $DEBUGINFOD_URLS for metadata + with given query key/value. + + If successful, return a file descriptor to the JSON document + describing matches, otherwise return a negative POSIX error code. If + successful, set *path to a strdup'd copy of the name of the same + file in the cache. Caller must free() it later. */ +int debuginfod_find_metadata (debuginfod_client *client, + const char *key, + const char* value, + char **path); + typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b); void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn); diff --git a/debuginfod/libdebuginfod.map b/debuginfod/libdebuginfod.map index 6334373f..355a89fd 100644 --- a/debuginfod/libdebuginfod.map +++ b/debuginfod/libdebuginfod.map @@ -22,3 +22,6 @@ ELFUTILS_0.188 { debuginfod_get_headers; debuginfod_find_section; } ELFUTILS_0.183; +ELFUTILS_0.190 { + debuginfod_find_metadata; +} ELFUTILS_0.188; diff --git a/doc/ChangeLog b/doc/ChangeLog index 7f2d6ff4..4ed30eb2 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,10 @@ +2023-04-12 Ryan Golberg , Frank Ch. Eigler + + PR29472: debuginfod metadata query + * debuginfod-find.1: Document metadata query. + * debuginfod-client-config.7: Document metadata cache control setting. + * debuginfod.8: Document new option and webapi. + 2023-02-14 Mark Wielaard * debuginfod.8: Add .TP before -g. diff --git a/doc/debuginfod-client-config.7 b/doc/debuginfod-client-config.7 index 53d82806..bc98d1e6 100644 --- a/doc/debuginfod-client-config.7 +++ b/doc/debuginfod-client-config.7 @@ -134,3 +134,11 @@ are short-circuited (returning an immediate failure instead of sending a new query to servers). This accelerates queries that probably would still fail. The default is 600, 10 minutes. 0 means "forget immediately". + +.TP +.B metadata_retention_s +This control file sets how long to remember the results of a metadata +query. New queries for the same artifacts within this time window are +short-circuited (repeating the same results). This accelerates +queries that probably would probably have the same results. The +default is 86400, 1 day. 0 means "do not retain". diff --git a/doc/debuginfod-find.1 b/doc/debuginfod-find.1 index 7d577bab..6652539c 100644 --- a/doc/debuginfod-find.1 +++ b/doc/debuginfod-find.1 @@ -29,6 +29,8 @@ debuginfod-find \- request debuginfo-related data .B debuginfod-find [\fIOPTION\fP]... source \fIBUILDID\fP \fI/FILENAME\fP .br .B debuginfod-find [\fIOPTION\fP]... source \fIPATH\fP \fI/FILENAME\fP +.br +.B debuginfod-find [\fIOPTION\fP]... metadata \fIKEY\fP \fIVALUE\fP .SH DESCRIPTION \fBdebuginfod-find\fP queries one or more \fBdebuginfod\fP servers for @@ -119,6 +121,36 @@ l l. \../bar/foo.c AT_comp_dir=/zoo/ source BUILDID /zoo//../bar/foo.c .TE +.SS metadata \fIKEY\fP \fIVALUE\fP + +All designated debuginfod servers are queried for metadata about files +in their index. Different search keys may be supported by different +servers. + +.TS +l l l . +KEY VALUE DESCRIPTION + +\fBfile\fP path exact match \fIpath\fP, including in archives +\fBglob\fP pattern glob match \fIpattern\fP, including in archives +.TE + +The results of the search are output to \fBstdout\fP as a JSON array +of objects, supplying metadata about each match. This metadata report +may be cached. It may be incomplete and may contain duplicates. For +each match, the result is a JSON object with these fields. Additional +fields may be present. + +.TS +l l l . +NAME TYPE DESCRIPTION + +\fBbuildid\fP string hexadecimal buildid associated with the file +\fBtype\fP string one of \fBdebuginfo\fP or \fBexecutable\fP +\fBfile\fP string matched file name, outside or inside the archive +\fBarchive\fP string archive containing matched file name, if any +.TE + .SH "OPTIONS" .TP diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index 07cb01ae..1070d290 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -133,6 +133,14 @@ scanner/groomer server and multiple passive ones, thereby sharing service load. Archive pattern options must still be given, so debuginfod can recognize file name extensions for unpacking. +.TP +.B "\-\-metadata\-maxtime=SECONDS" +Impose a limit on the runtime of metadata webapi queries. These +queries, especially broad "glob" wildcards, can take a large amount of +time and produce large results. Public-facing servers may need to +throttle them. The default limit is 5 seconds. Set 0 to disable this +limit. + .TP .B "\-D SQL" "\-\-ddl=SQL" Execute given sqlite statement after the database is opened and @@ -384,6 +392,16 @@ The exact set of metrics and their meanings may change in future versions. Caution: configuration information (path names, versions) may be disclosed. +.SS /metadata?key=\fIKEY\fP&value=\fIVALUE\fP + +This endpoint triggers a search of the files in the index plus any +upstream federated servers, based on given key and value. If +successful, the result is a application/json textual array, listing +metadata for the matched files. See \fIdebuginfod-find(1)\fP for +documentation of the common key/value search parameters, and the +resulting data schema. + + .SH DATA MANAGEMENT debuginfod stores its index in an sqlite database in a densely packed diff --git a/tests/ChangeLog b/tests/ChangeLog index eb3e1118..4ef2248b 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2023-04-12 Ryan Golberg , Frank Ch. Eigler + + PR29472: debuginfod metadata query + * run-debuginfod-find-metadata.sh: New test. + * Makefile.am: Run it, dist it. + 2023-02-10 Mark Wielaard * varlocs.c (print_expr): Handle DW_OP_GNU_uninit. diff --git a/tests/Makefile.am b/tests/Makefile.am index 7e32f117..df2190f0 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -257,11 +257,12 @@ TESTS += run-debuginfod-dlopen.sh \ run-debuginfod-federation-sqlite.sh \ run-debuginfod-federation-link.sh \ run-debuginfod-percent-escape.sh \ - run-debuginfod-x-forwarded-for.sh \ - run-debuginfod-response-headers.sh \ - run-debuginfod-extraction-passive.sh \ + run-debuginfod-x-forwarded-for.sh \ + run-debuginfod-response-headers.sh \ + run-debuginfod-extraction-passive.sh \ run-debuginfod-webapi-concurrency.sh \ - run-debuginfod-section.sh + run-debuginfod-section.sh \ + run-debuginfod-find-metadata.sh endif if !OLD_LIBMICROHTTPD # Will crash on too old libmicrohttpd @@ -578,6 +579,7 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ run-debuginfod-extraction-passive.sh \ run-debuginfod-webapi-concurrency.sh \ run-debuginfod-section.sh \ + run-debuginfod-find-metadata.sh \ debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \ debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \ debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \ diff --git a/tests/run-debuginfod-find-metadata.sh b/tests/run-debuginfod-find-metadata.sh new file mode 100755 index 00000000..c378bcdd --- /dev/null +++ b/tests/run-debuginfod-find-metadata.sh @@ -0,0 +1,110 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2022 Red Hat, Inc. +# This file is part of elfutils. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# elfutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see . + +. $srcdir/debuginfod-subr.sh + +# for test case debugging, uncomment: +set -x +# unset VALGRIND_CMD + +type curl 2>/dev/null || { echo "need curl"; exit 77; } +type jq 2>/dev/null || { echo "need jq"; exit 77; } + +pkg-config json-c libcurl || { echo "one or more libraries are missing (libjson-c, libcurl)"; exit 77; } +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod-find --help 2>&1 | grep metadata || { echo "compiled without metadata support"; exit 77; } + + +DB=${PWD}/.debuginfod_tmp.sqlite +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache +tempfiles $DB ${DB}_2 $DB-wal ${DB}_2-wal $DB-shm ${DB}_2-shm + +# This variable is essential and ensures no time-race for claiming ports occurs +# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test +base=13100 +get_ports +mkdir R D +cp -rvp ${abs_srcdir}/debuginfod-rpms/rhel7 R +cp -rvp ${abs_srcdir}/debuginfod-debs/*deb D + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -R \ + -d $DB -p $PORT1 -t0 -g0 R > vlog$PORT1 2>&1 & +PID1=$! +tempfiles vlog$PORT1 +errfiles vlog$PORT1 + +wait_ready $PORT1 'ready' 1 +wait_ready $PORT1 'thread_work_total{role="traverse"}' 1 +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 +wait_ready $PORT1 'thread_busy{role="scan"}' 0 + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 https://bad/url.web" ${VALGRIND_CMD} ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -U \ + -d ${DB}_2 -p $PORT2 -t0 -g0 D > vlog$PORT2 2>&1 & +PID2=$! +tempfiles vlog$PORT2 +errfiles vlog$PORT2 + +wait_ready $PORT2 'ready' 1 +wait_ready $PORT2 'thread_work_total{role="traverse"}' 1 +wait_ready $PORT2 'thread_work_pending{role="scan"}' 0 +wait_ready $PORT2 'thread_busy{role="scan"}' 0 + +# have clients contact the new server +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2 + +tempfiles json.txt +# Check that we find correct number of files, both via local and federated links +RESULTF=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"` +cat $RESULTF +N_FOUND=`cat $RESULTF | jq '. | length'` +test $N_FOUND -eq 1 +RESULTF=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/usr/lo?al/bin/*"` +cat $RESULTF +N_FOUND=`cat $RESULTF | jq '. | length'` +test $N_FOUND -eq 2 + + +# Query via the webapi as well +curl http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' +test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.[0].buildid == "f17a29b5a25bd4960531d82aa6b07c8abe84fa66"'` = 'true' +test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.[0].file == "/usr/bin/hithere"'` = 'true' +test `curl -s http://127.0.0.1:$PORT2'/metadata?key=glob&value=/usr/bin/*hi*' | jq '.[0].archive | test(".*hithere.*deb")'` = 'true' + +# An empty array is returned on server error or if the file DNE +RESULTF=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/this/isnt/there"` +cat $RESULTF +test `cat $RESULTF | jq ". == [ ]" ` = 'true' + +kill $PID1 +kill $PID2 +wait $PID1 +wait $PID2 +PID1=0 +PID2=0 + +# check it's still in cache +RESULTF=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata file "/usr/bin/hithere"` +cat $RESULTF +test `cat $RESULTF | jq ". == [ ]" ` = 'true' + +# invalidate cache, retry previously successful query to now-dead servers +echo 0 > $DEBUGINFOD_CACHE_PATH/metadata_retention_s +RESULTF=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata glob "/u?r/bin/*"` +cat $RESULTF +test `cat $RESULTF | jq ". == [ ]" ` = 'true' + +exit 0 -- cgit v1.2.1