diff options
author | Frank Ch. Eigler <fche@redhat.com> | 2022-10-06 12:51:43 -0400 |
---|---|---|
committer | Frank Ch. Eigler <fche@redhat.com> | 2022-10-31 17:41:13 -0400 |
commit | 88430aeb79e42e134db4eae43a204d11941f1df9 (patch) | |
tree | 38138ab9a7683468944b3452817a620a8d27bee5 | |
parent | 5d8cd51a460fc29f35a016836c39fcea6bf59fb0 (diff) | |
download | elfutils-88430aeb79e42e134db4eae43a204d11941f1df9.tar.gz |
Added metadata query of debuginfod servers
Debuginfod servers can be queried by source path and will return JSON
arrays of all the known metadata.
Signed-off-by: Ryan Goldberg <rgoldber@redhat.com>
-rw-r--r-- | ChangeLog | 5 | ||||
-rw-r--r-- | configure.ac | 5 | ||||
-rw-r--r-- | debuginfod/ChangeLog | 24 | ||||
-rw-r--r-- | debuginfod/Makefile.am | 4 | ||||
-rw-r--r-- | debuginfod/debuginfod-client.c | 845 | ||||
-rw-r--r-- | debuginfod/debuginfod-find.c | 33 | ||||
-rw-r--r-- | debuginfod/debuginfod.cxx | 228 | ||||
-rw-r--r-- | debuginfod/debuginfod.h.in | 10 | ||||
-rw-r--r-- | doc/ChangeLog | 7 | ||||
-rw-r--r-- | doc/Makefile.am | 1 | ||||
-rw-r--r-- | doc/debuginfod-find.1 | 17 | ||||
-rw-r--r-- | doc/debuginfod_find_debuginfo.3 | 15 | ||||
-rw-r--r-- | doc/debuginfod_find_metadata.3 | 1 | ||||
-rw-r--r-- | tests/ChangeLog | 6 | ||||
-rw-r--r-- | tests/Makefile.am | 4 | ||||
-rwxr-xr-x | tests/run-debuginfod-find-metadata.sh | 89 |
16 files changed, 960 insertions, 334 deletions
@@ -1,3 +1,8 @@ +2022-10-06 Ryan Goldberg <rgoldber@redhat.com> + + * configure.ac (HAVE_JSON_C): Defined iff libjson-c + is found, and debuginfod metadata querying is thus enabled. + 2022-10-20 Mark Wielaard <mark@klomp.org> * Makefile.am (rpm): Remove --sign. diff --git a/configure.ac b/configure.ac index 1084b469..6077d52a 100644 --- a/configure.ac +++ b/configure.ac @@ -600,6 +600,11 @@ case "$ac_cv_search__obstack_free" in esac AC_SUBST([obstack_LIBS]) +AC_CHECK_LIB(json-c, json_object_array_add, [ + AC_DEFINE([HAVE_JSON_C], [1], [Define if json-c is on the machine]) + AC_SUBST(jsonc_LIBS, '-ljson-c') +]) + dnl The directories with content. dnl Documentation. diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 1df903fe..79f827d9 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,27 @@ +2022-10-06 Ryan Goldberg <rgoldber@redhat.com> + + * Makefile.am (debuginfod_LDADD): Add jsonc_LIBS. + (libdebuginfod_so_LDLIBS): Likewise. + * debuginfod-find.c (main): Add command line interface for + metadata query by path. + * debuginfod.h.in: Added debuginfod_find_metadata. + * debuginfod.cxx (add_client_federation_headers): New function + created from existing code to remove code duplication. + (handle_buildid_match): Calls new add_client_federation_headers + function. + (handle_metadata): New function which queries local DB and + upstream for metadata. + (handler_cb): New accepted url type, /metadata. + * debuginfod-client.c (struct handle_data): New fields: metadata, + metadata_size, to store incoming metadata. + (metadata_callback): New function called by curl upon reciving + metedata + (init_server_urls, init_handle, perform_queries) : New functions created from + existing code within debuginfod_query_server to reduce code duplication. + (debuginfod_query_server_by_buildid): debuginfod_query_server renamed, and above + functions used in place of identical previously inline code. + (debuginfod_find_metadata): New function. + 2022-10-18 Daniel Thornburgh <dthorn@google.com> * debuginfod-client.c (debuginfod_query_server): Add DEBUGINFOD_HEADERS_FILE diff --git a/debuginfod/Makefile.am b/debuginfod/Makefile.am index 435cb8a6..3d6bc26e 100644 --- a/debuginfod/Makefile.am +++ b/debuginfod/Makefile.am @@ -70,7 +70,7 @@ bin_PROGRAMS += debuginfod-find endif debuginfod_SOURCES = debuginfod.cxx -debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) -lpthread -ldl +debuginfod_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) $(libmicrohttpd_LIBS) $(sqlite3_LIBS) $(libarchive_LIBS) $(jsonc_LIBS) $(libcurl_LIBS) -lpthread -ldl debuginfod_find_SOURCES = debuginfod-find.c debuginfod_find_LDADD = $(libdw) $(libelf) $(libeu) $(libdebuginfod) $(argp_LDADD) $(fts_LIBS) @@ -97,7 +97,7 @@ libdebuginfod_so_LIBS = libdebuginfod_pic.a if DUMMY_LIBDEBUGINFOD libdebuginfod_so_LDLIBS = else -libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) +libdebuginfod_so_LDLIBS = -lpthread $(libcurl_LIBS) $(fts_LIBS) $(jsonc_LIBS) endif $(LIBDEBUGINFOD_SONAME): $(srcdir)/libdebuginfod.map $(libdebuginfod_so_LIBS) $(AM_V_CCLD)$(LINK) $(dso_LDFLAGS) -o $@ \ diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c index 716cb769..2be9a07e 100644 --- a/debuginfod/debuginfod-client.c +++ b/debuginfod/debuginfod-client.c @@ -56,6 +56,8 @@ int debuginfod_find_executable (debuginfod_client *c, const unsigned char *b, int s, char **p) { return -ENOSYS; } int debuginfod_find_source (debuginfod_client *c, const unsigned char *b, int s, const char *f, char **p) { return -ENOSYS; } +int debuginfod_find_metadata (debuginfod_client *c, + const char* p, char** m) { return -ENOSYS; } void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn) { } void debuginfod_set_verbose_fd(debuginfod_client *c, int fd) { } @@ -103,6 +105,10 @@ void debuginfod_end (debuginfod_client *c) { } #include <pthread.h> +#ifdef HAVE_JSON_C + #include <json-c/json.h> +#endif + static pthread_once_t init_control = PTHREAD_ONCE_INIT; static void @@ -201,6 +207,9 @@ struct handle_data /* Response http headers for this client handle, sent from the server */ char *response_data; size_t response_data_size; + /* Response metadata values for this client handle, sent from the server */ + char *metadata; + size_t metadata_size; }; static size_t @@ -555,18 +564,9 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) } /* Temporary buffer for realloc */ char *temp = NULL; - if (data->response_data == NULL) - { - temp = malloc(numitems); - if (temp == NULL) - return 0; - } - else - { - temp = realloc(data->response_data, data->response_data_size + numitems); - if (temp == NULL) - return 0; - } + temp = realloc(data->response_data, data->response_data_size + numitems); + if (temp == NULL) + return 0; memcpy(temp + data->response_data_size, buffer, numitems-1); data->response_data = temp; @@ -576,13 +576,345 @@ header_callback (char * buffer, size_t size, size_t numitems, void * userdata) return numitems; } +#ifdef HAVE_JSON_C +static size_t +metadata_callback (char * buffer, size_t size, size_t numitems, void * userdata) +{ + if (size != 1) + return 0; + /* Temporary buffer for realloc */ + char *temp = NULL; + struct handle_data *data = (struct handle_data *) userdata; + temp = realloc(data->metadata, data->metadata_size + numitems + 1); + if (temp == NULL) + return 0; + + memcpy(temp + data->metadata_size, buffer, numitems); + data->metadata = temp; + data->metadata_size += numitems; + data->metadata[data->metadata_size] = '\0'; + return numitems; +} +#endif + + +/* This function takes a copy of DEBUGINFOD_URLS, server_urls, and seperates it into an + * array of urls to query. The url_subdir is either 'buildid' or 'metadata', corresponding + * to the query type. Returns 0 on success and -Posix error on faliure. + */ +int +init_server_urls(char* url_subdir, char *server_urls, char ***server_url_list, int *num_urls, int vfd) +{ + /* Initialize the memory to zero */ + char *strtok_saveptr; + char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); + /* Count number of URLs. */ + int n = 0; + assert(0 == strcmp(url_subdir, "buildid") || 0 == strcmp(url_subdir, "metadata")); + + /* PR 27983: If the url is already set to be used use, skip it */ + while (server_url != NULL) + { + int r; + char *tmp_url; + if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') + r = asprintf(&tmp_url, "%s%s", server_url, url_subdir); + else + r = asprintf(&tmp_url, "%s/%s", server_url, url_subdir); + + if (r == -1) + { + return -ENOMEM; + } + int url_index; + for (url_index = 0; url_index < n; ++url_index) + { + if(strcmp(tmp_url, (*server_url_list)[url_index]) == 0) + { + url_index = -1; + break; + } + } + if (url_index == -1) + { + if (vfd >= 0) + dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); + free(tmp_url); + } + else + { + n++; + char ** realloc_ptr; + realloc_ptr = reallocarray(*server_url_list, n, + sizeof(char*)); + if (realloc_ptr == NULL) + { + free (tmp_url); + return -ENOMEM; + } + *server_url_list = realloc_ptr; + (*server_url_list)[n-1] = tmp_url; + } + server_url = strtok_r(NULL, url_delim, &strtok_saveptr); + } + *num_urls = n; + return 0; +} + +/* Some boilerplate for checking curl_easy_setopt. */ +#define curl_easy_setopt_ck(H,O,P) do { \ + CURLcode curl_res = curl_easy_setopt (H,O,P); \ + if (curl_res != CURLE_OK) \ + { \ + if (vfd >= 0) \ + dprintf (vfd, \ + "Bad curl_easy_setopt: %s\n", \ + curl_easy_strerror(curl_res)); \ + return -EINVAL; \ + } \ + } while (0) + + +/* + * This function initializes a CURL handle. It takes optional callbacks for the write + * function and the header function, which if defined will use userdata of type struct handle_data*. + * Specifically the data[i] within an array of struct handle_data's. + * Returns 0 on success and -Posix error on faliure. + */ +int +init_handle(debuginfod_client *client, + size_t (*w_callback)(char *buffer,size_t size,size_t nitems,void *userdata), + size_t (*h_callback)(char *buffer,size_t size,size_t nitems,void *userdata), + struct handle_data *data, int i, long timeout, + int vfd) +{ + data->handle = curl_easy_init(); + if (data->handle == NULL) + { + return -ENETUNREACH; + } + + if (vfd >= 0) + dprintf (vfd, "url %d %s\n", i, data->url); + + /* Only allow http:// + https:// + file:// so we aren't being + redirected to some unsupported protocol. */ + curl_easy_setopt_ck(data->handle, CURLOPT_PROTOCOLS, + (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE)); + curl_easy_setopt_ck(data->handle, CURLOPT_URL, data->url); + if (vfd >= 0) + curl_easy_setopt_ck(data->handle, CURLOPT_ERRORBUFFER, + data->errbuf); + if(w_callback) { + curl_easy_setopt_ck(data->handle, + CURLOPT_WRITEFUNCTION, w_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_WRITEDATA, data); + } + if (timeout > 0) + { + /* Make sure there is at least some progress, + try to get at least 100K per timeout seconds. */ + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_TIME, + timeout); + curl_easy_setopt_ck (data->handle, CURLOPT_LOW_SPEED_LIMIT, + 100 * 1024L); + } + data->response_data = NULL; + data->response_data_size = 0; + curl_easy_setopt_ck(data->handle, CURLOPT_FILETIME, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FOLLOWLOCATION, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_FAILONERROR, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_NOSIGNAL, (long) 1); + if(h_callback){ + curl_easy_setopt_ck(data->handle, + CURLOPT_HEADERFUNCTION, h_callback); + curl_easy_setopt_ck(data->handle, CURLOPT_HEADERDATA, data); + } + #if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ + curl_easy_setopt_ck(data->handle, CURLOPT_PATH_AS_IS, (long) 1); + #else + /* On old curl; no big deal, canonicalization here is almost the + same, except perhaps for ? # type decorations at the tail. */ + #endif + curl_easy_setopt_ck(data->handle, CURLOPT_AUTOREFERER, (long) 1); + curl_easy_setopt_ck(data->handle, CURLOPT_ACCEPT_ENCODING, ""); + curl_easy_setopt_ck(data->handle, CURLOPT_HTTPHEADER, client->headers); + + return 0; +} + + +/* + * This function busy-waits on one or more curl queries to complete. This can + * be controled via only_one, which, if true, will find the first winner and exit + * once found. If positive maxtime and maxsize dictate the maximum allowed wait times + * and download sizes respectivly. Returns 0 on success and -Posix error on faliure. + */ +int +perform_queries(CURLM *curlm, CURL **target_handle, struct handle_data *data, debuginfod_client *c, + int num_urls, long maxtime, long maxsize, bool only_one, int vfd) +{ + int still_running = -1; + long loops = 0; + int committed_to = -1; + bool verbose_reported = false; + struct timespec start_time, cur_time; + if (c->winning_headers != NULL) + { + free (c->winning_headers); + c->winning_headers = NULL; + } + if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + { + return errno; + } + long delta = 0; + do + { + /* Check to see how long querying is taking. */ + if (maxtime > 0) + { + if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) + { + return errno; + } + delta = cur_time.tv_sec - start_time.tv_sec; + if ( delta > maxtime) + { + dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); + return -ETIME; + } + } + /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ + curl_multi_wait(curlm, NULL, 0, 1000, NULL); + CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); + + if(only_one){ + /* If the target file has been found, abort the other queries. */ + if (target_handle && *target_handle != NULL) + { + for (int i = 0; i < num_urls; i++) + if (data[i].handle != *target_handle) + curl_multi_remove_handle(curlm, data[i].handle); + else + { + committed_to = i; + if (c->winning_headers == NULL) + { + c->winning_headers = data[committed_to].response_data; + if (vfd >= 0 && c->winning_headers != NULL) + dprintf(vfd, "\n%s", c->winning_headers); + data[committed_to].response_data = NULL; + data[committed_to].response_data_size = 0; + } + } + } + + if (vfd >= 0 && !verbose_reported && committed_to >= 0) + { + bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); + dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", + committed_to); + if (pnl) + c->default_progressfn_printed_p = 0; + verbose_reported = true; + } + } + + if (curlm_res != CURLM_OK) + { + switch (curlm_res) + { + case CURLM_CALL_MULTI_PERFORM: continue; + case CURLM_OUT_OF_MEMORY: return -ENOMEM; + default: return -ENETUNREACH; + } + } + + long dl_size = 0; + if(only_one && target_handle){ // Only bother with progress functions if we're retrieving exactly 1 file + if (*target_handle && (c->progressfn || maxsize > 0)) + { + /* Get size of file being downloaded. NB: If going through + deflate-compressing proxies, this number is likely to be + unavailable, so -1 may show. */ + CURLcode curl_res; +#ifdef CURLINFO_CONTENT_LENGTH_DOWNLOAD_T + curl_off_t cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, + &cl); + if (curl_res == CURLE_OK && cl >= 0) + dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); +#else + double cl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_CONTENT_LENGTH_DOWNLOAD, + &cl); + if (curl_res == CURLE_OK) + dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); +#endif + /* If Content-Length is -1, try to get the size from + X-Debuginfod-Size */ + if (dl_size == -1 && c->winning_headers != NULL) + { + long xdl; + char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); + + if (hdr != NULL + && sscanf(hdr, "x-debuginfod-size: %ld", &xdl) == 1) + dl_size = xdl; + } + } + + if (c->progressfn) /* inform/check progress callback */ + { + loops ++; + long pa = loops; /* default param for progress callback */ + if (*target_handle) /* we've committed to a server; report its download progress */ + { + CURLcode curl_res; +#ifdef CURLINFO_SIZE_DOWNLOAD_T + curl_off_t dl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_SIZE_DOWNLOAD_T, + &dl); + if (curl_res == 0 && dl >= 0) + pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); +#else + double dl; + curl_res = curl_easy_getinfo(*target_handle, + CURLINFO_SIZE_DOWNLOAD, + &dl); + if (curl_res == 0) + pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); +#endif + + } + + if ((*c->progressfn) (c, pa, dl_size)) + break; + } + } + /* Check to see if we are downloading something which exceeds maxsize, if set.*/ + if (target_handle && *target_handle && dl_size > maxsize && maxsize > 0) + { + if (vfd >=0) + dprintf(vfd, "Content-Length too large.\n"); + return -EFBIG; + } + } while (still_running); + return 0; +} + + /* Query each of the server URLs found in $DEBUGINFOD_URLS for the file with the specified build-id, type (debuginfo, executable or source) and filename. filename may be NULL. If found, return a file descriptor for the target, otherwise return an error code. */ static int -debuginfod_query_server (debuginfod_client *c, +debuginfod_query_server_by_buildid (debuginfod_client *c, const unsigned char *build_id, int build_id_len, const char *type, @@ -601,7 +933,7 @@ debuginfod_query_server (debuginfod_client *c, char suffix[PATH_MAX + 1]; /* +1 for zero terminator. */ char build_id_bytes[MAX_BUILD_ID_BYTES * 2 + 1]; int vfd = c->verbose_fd; - int rc; + int rc, r; if (vfd >= 0) { @@ -915,60 +1247,14 @@ debuginfod_query_server (debuginfod_client *c, goto out0; } - /* Initialize the memory to zero */ - char *strtok_saveptr; char **server_url_list = NULL; - char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr); - /* Count number of URLs. */ - int num_urls = 0; - - while (server_url != NULL) - { - /* PR 27983: If the url is already set to be used use, skip it */ - char *slashbuildid; - if (strlen(server_url) > 1 && server_url[strlen(server_url)-1] == '/') - slashbuildid = "buildid"; - else - slashbuildid = "/buildid"; - - char *tmp_url; - if (asprintf(&tmp_url, "%s%s", server_url, slashbuildid) == -1) - { - rc = -ENOMEM; - goto out1; - } - int url_index; - for (url_index = 0; url_index < num_urls; ++url_index) - { - if(strcmp(tmp_url, server_url_list[url_index]) == 0) - { - url_index = -1; - break; - } - } - if (url_index == -1) - { - if (vfd >= 0) - dprintf(vfd, "duplicate url: %s, skipping\n", tmp_url); - free(tmp_url); - } - else - { - num_urls++; - char ** realloc_ptr; - realloc_ptr = reallocarray(server_url_list, num_urls, - sizeof(char*)); - if (realloc_ptr == NULL) - { - free (tmp_url); - rc = -ENOMEM; - goto out1; - } - server_url_list = realloc_ptr; - server_url_list[num_urls-1] = tmp_url; - } - server_url = strtok_r(NULL, url_delim, &strtok_saveptr); - } + char *server_url; + int num_urls; + r = init_server_urls("buildid", server_urls, &server_url_list, &num_urls, vfd); + if(0 != r){ + rc = r; + goto out1; + } int retry_limit = default_retry_limit; const char* retry_limit_envvar = getenv(DEBUGINFOD_RETRY_LIMIT_ENV_VAR); @@ -1038,13 +1324,6 @@ debuginfod_query_server (debuginfod_client *c, data[i].fd = fd; data[i].target_handle = &target_handle; - data[i].handle = curl_easy_init(); - if (data[i].handle == NULL) - { - if (filename) curl_free (escaped_string); - rc = -ENETUNREACH; - goto out2; - } data[i].client = c; if (filename) /* must start with / */ @@ -1055,220 +1334,30 @@ debuginfod_query_server (debuginfod_client *c, } else snprintf(data[i].url, PATH_MAX, "%s/%s/%s", server_url, build_id_bytes, type); - if (vfd >= 0) - dprintf (vfd, "url %d %s\n", i, data[i].url); - /* Some boilerplate for checking curl_easy_setopt. */ -#define curl_easy_setopt_ck(H,O,P) do { \ - CURLcode curl_res = curl_easy_setopt (H,O,P); \ - if (curl_res != CURLE_OK) \ - { \ - if (vfd >= 0) \ - dprintf (vfd, \ - "Bad curl_easy_setopt: %s\n", \ - curl_easy_strerror(curl_res)); \ - rc = -EINVAL; \ - goto out2; \ - } \ - } while (0) - - /* Only allow http:// + https:// + file:// so we aren't being - redirected to some unsupported protocol. */ - curl_easy_setopt_ck(data[i].handle, CURLOPT_PROTOCOLS, - (CURLPROTO_HTTP | CURLPROTO_HTTPS | CURLPROTO_FILE)); - curl_easy_setopt_ck(data[i].handle, CURLOPT_URL, data[i].url); - if (vfd >= 0) - curl_easy_setopt_ck(data[i].handle, CURLOPT_ERRORBUFFER, - data[i].errbuf); - curl_easy_setopt_ck(data[i].handle, - CURLOPT_WRITEFUNCTION, - debuginfod_write_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_WRITEDATA, (void*)&data[i]); - if (timeout > 0) - { - /* Make sure there is at least some progress, - try to get at least 100K per timeout seconds. */ - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_TIME, - timeout); - curl_easy_setopt_ck (data[i].handle, CURLOPT_LOW_SPEED_LIMIT, - 100 * 1024L); - } - data[i].response_data = NULL; - data[i].response_data_size = 0; - curl_easy_setopt_ck(data[i].handle, CURLOPT_FILETIME, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FOLLOWLOCATION, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_FAILONERROR, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_NOSIGNAL, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERFUNCTION, - header_callback); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HEADERDATA, - (void *) &(data[i])); -#if LIBCURL_VERSION_NUM >= 0x072a00 /* 7.42.0 */ - curl_easy_setopt_ck(data[i].handle, CURLOPT_PATH_AS_IS, (long) 1); -#else - /* On old curl; no big deal, canonicalization here is almost the - same, except perhaps for ? # type decorations at the tail. */ -#endif - curl_easy_setopt_ck(data[i].handle, CURLOPT_AUTOREFERER, (long) 1); - curl_easy_setopt_ck(data[i].handle, CURLOPT_ACCEPT_ENCODING, ""); - curl_easy_setopt_ck(data[i].handle, CURLOPT_HTTPHEADER, c->headers); + r = init_handle(c, debuginfod_write_callback, header_callback, + data+i,i, timeout, vfd); + if(0 != r){ + rc = r; + if(filename) curl_free (escaped_string); + goto out2; + } curl_multi_add_handle(curlm, data[i].handle); } if (filename) curl_free(escaped_string); + /* Query servers in parallel. */ if (vfd >= 0) dprintf (vfd, "query %d urls in parallel\n", num_urls); - int still_running; - long loops = 0; - int committed_to = -1; - bool verbose_reported = false; - struct timespec start_time, cur_time; - free (c->winning_headers); - c->winning_headers = NULL; - if ( maxtime > 0 && clock_gettime(CLOCK_MONOTONIC_RAW, &start_time) == -1) + r = perform_queries(curlm, &target_handle,data,c, num_urls, maxtime, maxsize, true, vfd); + if (0 != r) { - rc = -errno; + rc = r; goto out2; } - long delta = 0; - do - { - /* Check to see how long querying is taking. */ - if (maxtime > 0) - { - if (clock_gettime(CLOCK_MONOTONIC_RAW, &cur_time) == -1) - { - rc = -errno; - goto out2; - } - delta = cur_time.tv_sec - start_time.tv_sec; - if ( delta > maxtime) - { - dprintf(vfd, "Timeout with max time=%lds and transfer time=%lds\n", maxtime, delta ); - rc = -ETIME; - goto out2; - } - } - /* Wait 1 second, the minimum DEBUGINFOD_TIMEOUT. */ - curl_multi_wait(curlm, NULL, 0, 1000, NULL); - CURLMcode curlm_res = curl_multi_perform(curlm, &still_running); - - /* If the target file has been found, abort the other queries. */ - if (target_handle != NULL) - { - for (int i = 0; i < num_urls; i++) - if (data[i].handle != target_handle) - curl_multi_remove_handle(curlm, data[i].handle); - else - { - committed_to = i; - if (c->winning_headers == NULL) - { - c->winning_headers = data[committed_to].response_data; - data[committed_to].response_data = NULL; - data[committed_to].response_data_size = 0; - } - - } - } - - if (vfd >= 0 && !verbose_reported && committed_to >= 0) - { - bool pnl = (c->default_progressfn_printed_p && vfd == STDERR_FILENO); - dprintf (vfd, "%scommitted to url %d\n", pnl ? "\n" : "", - committed_to); - if (pnl) - c->default_progressfn_printed_p = 0; - verbose_reported = true; - } - - if (curlm_res != CURLM_OK) - { - switch (curlm_res) - { - case CURLM_CALL_MULTI_PERFORM: continue; - case CURLM_OUT_OF_MEMORY: rc = -ENOMEM; break; - default: rc = -ENETUNREACH; break; - } - goto out2; - } - - long dl_size = 0; - if (target_handle && (c->progressfn || maxsize > 0)) - { - /* Get size of file being downloaded. NB: If going through - deflate-compressing proxies, this number is likely to be - unavailable, so -1 may show. */ - CURLcode curl_res; -#ifdef CURLINFO_CONTENT_LENGTH_DOWNLOAD_T - curl_off_t cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD_T, - &cl); - if (curl_res == CURLE_OK && cl >= 0) - dl_size = (cl > LONG_MAX ? LONG_MAX : (long)cl); -#else - double cl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_CONTENT_LENGTH_DOWNLOAD, - &cl); - if (curl_res == CURLE_OK) - dl_size = (cl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)cl); -#endif - /* If Content-Length is -1, try to get the size from - X-Debuginfod-Size */ - if (dl_size == -1 && c->winning_headers != NULL) - { - long xdl; - char *hdr = strcasestr(c->winning_headers, "x-debuginfod-size"); - - if (hdr != NULL - && sscanf(hdr, "x-debuginfod-size: %ld", &xdl) == 1) - dl_size = xdl; - } - } - - if (c->progressfn) /* inform/check progress callback */ - { - loops ++; - long pa = loops; /* default param for progress callback */ - if (target_handle) /* we've committed to a server; report its download progress */ - { - CURLcode curl_res; -#ifdef CURLINFO_SIZE_DOWNLOAD_T - curl_off_t dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD_T, - &dl); - if (curl_res == 0 && dl >= 0) - pa = (dl > LONG_MAX ? LONG_MAX : (long)dl); -#else - double dl; - curl_res = curl_easy_getinfo(target_handle, - CURLINFO_SIZE_DOWNLOAD, - &dl); - if (curl_res == 0) - pa = (dl >= (double)(LONG_MAX+1UL) ? LONG_MAX : (long)dl); -#endif - - } - - if ((*c->progressfn) (c, pa, dl_size)) - break; - } - - /* Check to see if we are downloading something which exceeds maxsize, if set.*/ - if (target_handle && dl_size > maxsize && maxsize > 0) - { - if (vfd >=0) - dprintf(vfd, "Content-Length too large.\n"); - rc = -EFBIG; - goto out2; - } - } while (still_running); /* Check whether a query was successful. If so, assign its handle to verified_handle. */ @@ -1625,7 +1714,7 @@ debuginfod_find_debuginfo (debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "debuginfo", NULL, path); } @@ -1636,7 +1725,7 @@ debuginfod_find_executable(debuginfod_client *client, const unsigned char *build_id, int build_id_len, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "executable", NULL, path); } @@ -1645,10 +1734,210 @@ int debuginfod_find_source(debuginfod_client *client, const unsigned char *build_id, int build_id_len, const char *filename, char **path) { - return debuginfod_query_server(client, build_id, build_id_len, + return debuginfod_query_server_by_buildid(client, build_id, build_id_len, "source", filename, path); } +int debuginfod_find_metadata (debuginfod_client *client, + const char* path, char** metadata) +{ + (void) client; + (void) path; + if(NULL == metadata) return EPERM; + *metadata = strdup("[ ]"); // An empty JSON array +#ifdef HAVE_JSON_C + char *server_urls; + char *urls_envvar; + json_object *json_metadata = json_object_new_array(); + int rc = 0, r; + int vfd = client->verbose_fd; + + if(NULL == json_metadata){ + rc = -ENOMEM; + goto out; + } + + if(NULL == path){ + rc = -ENOSYS; + goto out; + } + + if (vfd >= 0) + dprintf (vfd, "debuginfod_find_metadata %s\n", path); + + /* Without query-able URL, we can stop here*/ + urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR); + if (vfd >= 0) + dprintf (vfd, "server urls \"%s\"\n", + urls_envvar != NULL ? urls_envvar : ""); + if (urls_envvar == NULL || urls_envvar[0] == '\0') + { + rc = -ENOSYS; + goto out; + } + + /* Clear the client of previous urls*/ + free (client->url); + client->url = NULL; + + long maxtime = 0; + const char *maxtime_envvar; + maxtime_envvar = getenv(DEBUGINFOD_MAXTIME_ENV_VAR); + if (maxtime_envvar != NULL) + maxtime = atol (maxtime_envvar); + if (maxtime && vfd >= 0) + dprintf(vfd, "using max time %lds\n", maxtime); + + long timeout = default_timeout; + const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR); + if (timeout_envvar != NULL) + timeout = atoi (timeout_envvar); + if (vfd >= 0) + dprintf (vfd, "using timeout %ld\n", timeout); + + add_default_headers(client); + + /* make a copy of the envvar so it can be safely modified. */ + server_urls = strdup(urls_envvar); + if (server_urls == NULL) + { + rc = -ENOMEM; + goto out; + } + /* thereafter, goto out1 on error*/ + + char **server_url_list = NULL; + char *server_url; + int num_urls; + r = init_server_urls("metadata", server_urls, &server_url_list, &num_urls, vfd); + if(0 != r){ + rc = r; + goto out1; + } + + CURLM *curlm = client->server_mhandle; + assert (curlm != NULL); + + CURL *target_handle = NULL; + struct handle_data *data = malloc(sizeof(struct handle_data) * num_urls); + if (data == NULL) + { + rc = -ENOMEM; + goto out1; + } + + /* thereafter, goto out2 on error. */ + + + /* Initialize handle_data */ + for (int i = 0; i < num_urls; i++) + { + if ((server_url = server_url_list[i]) == NULL) + break; + if (vfd >= 0) + dprintf (vfd, "init server %d %s\n", i, server_url); + + data[i].errbuf[0] = '\0'; + data[i].target_handle = &target_handle; + data[i].client = client; + data[i].metadata = NULL; + data[i].metadata_size = 0; + + // At the moment only glob path querying is supported, but leave room for + // future expansion + const char *key = "glob"; + snprintf(data[i].url, PATH_MAX, "%s?%s=%s", server_url, key, path); + r = init_handle(client, metadata_callback, header_callback, + data+i, i, timeout, vfd); + if(0 != r){ + rc = r; + goto out2; + } + curl_multi_add_handle(curlm, data[i].handle); + } + + /* Query servers */ + if (vfd >= 0) + dprintf (vfd, "Starting %d queries\n",num_urls); + r = perform_queries(curlm, NULL, data, client, num_urls, maxtime, 0, false, vfd); + if(0 != r){ + rc = r; + goto out2; + } + + /* NOTE: We don't check the return codes of the curl messages since + a metadata query failing silently is just fine. We want to know what's + available from servers which can be connected with no issues. + If running with additional verbosity, the failure will be noted in stderr */ + + /* Building the new json array from all the upstream data + and cleanup while at it + */ + for (int i = 0; i < num_urls; i++) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + if(NULL == data[i].metadata) + { + if (vfd >= 0) + dprintf (vfd, "Query to %s failed with error message:\n\t\"%s\"\n", + data[i].url, data[i].errbuf); + continue; + } + json_object *upstream_metadata = json_tokener_parse(data[i].metadata); + if(NULL == upstream_metadata) continue; + // Combine the upstream metadata into the json array + for (int j = 0, n = json_object_array_length(upstream_metadata); j < n; j++) { + json_object *entry = json_object_array_get_idx(upstream_metadata, j); + json_object_get(entry); // increment reference count + json_object_array_add(json_metadata, entry); + } + json_object_put(upstream_metadata); + + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + free (data[i].metadata); + } + + free(*metadata); + *metadata = strdup(json_object_to_json_string_ext(json_metadata, JSON_C_TO_STRING_PRETTY)); + + free (data); + goto out1; + +/* error exits */ +out2: + /* remove all handles from multi */ + for (int i = 0; i < num_urls; i++) + { + if (data[i].handle != NULL) + { + curl_multi_remove_handle(curlm, data[i].handle); /* ok to repeat */ + curl_easy_cleanup (data[i].handle); + free (data[i].response_data); + free (data[i].metadata); + } + } + free(data); + +out1: + for (int i = 0; i < num_urls; ++i) + free(server_url_list[i]); + free(server_url_list); + free (server_urls); + +/* general purpose exit */ +out: + json_object_put(json_metadata); + /* Reset sent headers */ + curl_slist_free_all (client->headers); + client->headers = NULL; + client->user_agent_set_p = 0; + + return rc; +#else + return -ENOSYS; +#endif +} /* Add an outgoing HTTP header. */ int debuginfod_add_http_header (debuginfod_client *client, const char* header) diff --git a/debuginfod/debuginfod-find.c b/debuginfod/debuginfod-find.c index 778fb09b..ecaf954e 100644 --- a/debuginfod/debuginfod-find.c +++ b/debuginfod/debuginfod-find.c @@ -31,6 +31,9 @@ #include <gelf.h> #include <libdwelf.h> +#ifdef HAVE_JSON_C + #include <json-c/json.h> +#endif /* Name and version of program. */ ARGP_PROGRAM_VERSION_HOOK_DEF = print_version; @@ -48,7 +51,8 @@ static const char args_doc[] = N_("debuginfo BUILDID\n" "executable BUILDID\n" "executable PATH\n" "source BUILDID /FILENAME\n" - "source PATH /FILENAME\n"); + "source PATH /FILENAME\n" + "metadata GLOB"); /* Definitions of arguments for argp functions. */ @@ -140,6 +144,33 @@ main(int argc, char** argv) return 1; } + if(strcmp(argv[remaining], "metadata") == 0){ + #ifdef HAVE_JSON_C + if (remaining+1 == argc) + { + fprintf(stderr, "If FILETYPE is \"metadata\" then GLOB must be given\n"); + return 1; + } + + char* metadata; + int rc = debuginfod_find_metadata (client, argv[remaining+1], &metadata); + + if (rc < 0) + { + fprintf(stderr, "Server query failed: %s\n", strerror(-rc)); + return 1; + } + // Output the metadata to stdout + printf("%s\n", metadata); + free(metadata); + return 0; + #else + fprintf(stderr, "If FILETYPE is \"metadata\" then libjson-c must be available\n"); + return 1; + #endif + + } + /* If we were passed an ELF file name in the BUILDID slot, look in there. */ unsigned char* build_id = (unsigned char*) argv[remaining+1]; int build_id_len = 0; /* assume text */ diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index 9dc4836b..b9cf15ed 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -115,6 +115,9 @@ using namespace std; #define tid() pthread_self() #endif +#ifdef HAVE_JSON_C + #include <json-c/json.h> +#endif inline bool string_endswith(const string& haystack, const string& needle) @@ -1824,6 +1827,58 @@ handle_buildid_r_match (bool internal_req_p, return r; } +void +add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){ + // Transcribe incoming User-Agent: + string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; + string ua_complete = string("User-Agent: ") + ua; + debuginfod_add_http_header (client, ua_complete.c_str()); + + // Compute larger XFF:, for avoiding info loss during + // federation, and for future cyclicity detection. + string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; + if (xff != "") + xff += string(", "); // comma separated list + + unsigned int xff_count = 0; + for (auto&& i : xff){ + if (i == ',') xff_count++; + } + + // if X-Forwarded-For: exceeds N hops, + // do not delegate a local lookup miss to upstream debuginfods. + if (xff_count >= forwarded_ttl_limit) + throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ +and will not query the upstream servers"); + + // Compute the client's numeric IP address only - so can't merge with conninfo() + const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, + MHD_CONNECTION_INFO_CLIENT_ADDRESS); + struct sockaddr *so = u ? u->client_addr : 0; + char hostname[256] = ""; // RFC1035 + if (so && so->sa_family == AF_INET) { + (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else if (so && so->sa_family == AF_INET6) { + struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; + if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { + struct sockaddr_in addr4; + memset (&addr4, 0, sizeof(addr4)); + addr4.sin_family = AF_INET; + addr4.sin_port = addr6->sin6_port; + memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); + (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), + hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } else { + (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, + NI_NUMERICHOST); + } + } + + string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); + debuginfod_add_http_header (client, xff_complete.c_str()); +} static struct MHD_Response* handle_buildid_match (bool internal_req_p, @@ -2010,57 +2065,7 @@ handle_buildid (MHD_Connection* conn, debuginfod_set_progressfn (client, & debuginfod_find_progress); if (conn) - { - // Transcribe incoming User-Agent: - string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: ""; - string ua_complete = string("User-Agent: ") + ua; - debuginfod_add_http_header (client, ua_complete.c_str()); - - // Compute larger XFF:, for avoiding info loss during - // federation, and for future cyclicity detection. - string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: ""; - if (xff != "") - xff += string(", "); // comma separated list - - unsigned int xff_count = 0; - for (auto&& i : xff){ - if (i == ',') xff_count++; - } - - // if X-Forwarded-For: exceeds N hops, - // do not delegate a local lookup miss to upstream debuginfods. - if (xff_count >= forwarded_ttl_limit) - throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \ -and will not query the upstream servers"); - - // Compute the client's numeric IP address only - so can't merge with conninfo() - const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn, - MHD_CONNECTION_INFO_CLIENT_ADDRESS); - struct sockaddr *so = u ? u->client_addr : 0; - char hostname[256] = ""; // RFC1035 - if (so && so->sa_family == AF_INET) { - (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else if (so && so->sa_family == AF_INET6) { - struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so; - if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) { - struct sockaddr_in addr4; - memset (&addr4, 0, sizeof(addr4)); - addr4.sin_family = AF_INET; - addr4.sin_port = addr6->sin6_port; - memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr)); - (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4), - hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } else { - (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0, - NI_NUMERICHOST); - } - } - - string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname); - debuginfod_add_http_header (client, xff_complete.c_str()); - } + add_client_federation_headers(client, conn); if (artifacttype == "debuginfo") fd = debuginfod_find_debuginfo (client, @@ -2273,6 +2278,110 @@ handle_metrics (off_t* size) } static struct MHD_Response* +handle_metadata (MHD_Connection* conn, + string path, off_t* size) +{ + MHD_Response* r; + (void) conn; + (void) path; + (void) size; + #ifdef HAVE_JSON_C + sqlite3 *thisdb = dbq; + + json_object *metadata = json_object_new_array(); + // Query locally for matching e, d and s files + if(metadata){ + sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-m", + "select * from \n" + "(\n" + "select \"e\" as atype, mtime, buildid, sourcetype, source0, source1, null as artifactsrc, null as source0ref " + "from " BUILDIDS "_query_e " + "union all \n" + "select \"d\" as atype, mtime, buildid, sourcetype, source0, source1, null as artifactsrc, null as source0ref " + "from " BUILDIDS "_query_d " + "union all \n" + "select \"s\" as atype, mtime, buildid, sourcetype, source0, source1, artifactsrc, source0ref " + "from " BUILDIDS "_query_s " + "\n" + ")\n" + "where source1 glob ? "); + + pp->reset(); + pp->bind(1, path); + + unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return + + // consume all the rows + int rc; + while (SQLITE_DONE != (rc = pp->step())) + { + if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step"); + + auto get_column_str = [pp](int idx) { return string((const char*) sqlite3_column_text (*pp, idx) ?: ""); }; + + string atype = get_column_str(0); + string mtime = to_string(sqlite3_column_int64 (*pp, 1)); + string buildid = get_column_str(2); + string stype = get_column_str(3); + string source0 = get_column_str(4); + string source1 = get_column_str(5); + string artifactsrc = get_column_str(6); + string source0ref = get_column_str(7); + + json_object *entry = json_object_new_object(); + auto add_entry_metadata = [entry](const char* key, string value) { if(value != "") json_object_object_add(entry, key, json_object_new_string(value.c_str())); }; + + add_entry_metadata("atype", atype); + add_entry_metadata("mtime", mtime); + add_entry_metadata("buildid", buildid); + add_entry_metadata("stype", stype); + add_entry_metadata("source0", source0); + add_entry_metadata("source1", source1); + add_entry_metadata("artifactsrc", artifactsrc); + add_entry_metadata("source0ref", source0ref); + + json_object_array_add(metadata, json_object_get(entry)); // Increase ref count to switch its ownership + json_object_put(entry); + } + pp->reset(); + } + // Query upstream as well + debuginfod_client *client = debuginfod_pool_begin(); + if (metadata && client != NULL) + { + add_client_federation_headers(client, conn); + + char * upstream_metadata; + if(0 == debuginfod_find_metadata(client, path.c_str(), &upstream_metadata)){ + json_object *upstream_metadata_json = json_tokener_parse(upstream_metadata); + if(NULL != upstream_metadata_json) + for (int i = 0, n = json_object_array_length(upstream_metadata_json); i < n; i++) { + json_object *entry = json_object_array_get_idx(upstream_metadata_json, i); + json_object_get(entry); // increment reference count + json_object_array_add(metadata, entry); + } + json_object_put(upstream_metadata_json); + free(upstream_metadata); + } + debuginfod_pool_end (client); + } + + const char* metadata_str = (metadata != NULL) ? + json_object_to_json_string(metadata) : "[ ]" ; + r = MHD_create_response_from_buffer (strlen(metadata_str), + (void*) metadata_str, + MHD_RESPMEM_MUST_COPY); + *size = strlen(metadata_str); + json_object_put(metadata); + #else + throw reportable_exception("webapi error, metadata querying not supported by server"); + #endif + if(r) + add_mhd_response_header(r, "Content-Type", "application/json"); + return r; +} + +static struct MHD_Response* handle_root (off_t* size) { static string version = "debuginfod (" + string (PACKAGE_NAME) + ") " @@ -2406,6 +2515,23 @@ handler_cb (void * /*cls*/, inc_metric("http_requests_total", "type", artifacttype); r = handle_metrics(& http_size); } + else if (url1 == "/metadata") + { + tmp_inc_metric m ("thread_busy", "role", "http-metadata"); + + // At the moment only glob path querying is supported, but leave room for + // future expansion + const char *key = "glob"; + + const char* path = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, key); + if (NULL == path) + throw reportable_exception("/metadata webapi error, need glob"); + + artifacttype = "metadata"; + inc_metric("http_requests_total", "type", artifacttype); + r = handle_metadata(connection, path, &http_size); + + } else if (url1 == "/") { artifacttype = "/"; diff --git a/debuginfod/debuginfod.h.in b/debuginfod/debuginfod.h.in index 7d8e4972..076f8869 100644 --- a/debuginfod/debuginfod.h.in +++ b/debuginfod/debuginfod.h.in @@ -79,6 +79,16 @@ int debuginfod_find_source (debuginfod_client *client, const char *filename, char **path); +/* Query the urls contained in $DEBUGINFOD_URLS for metadata + with paths matching the given, glob-supporting path. + + If successful, return 0, otherwise return a posix error code. + If successful, set *metadata to a malloc'd json array + with each entry being a json object of metadata for 1 file. + Caller must free() it later. metadata MUST be non-NULL */ +int debuginfod_find_metadata (debuginfod_client *client, + const char* path, char** metadata); + typedef int (*debuginfod_progressfn_t)(debuginfod_client *c, long a, long b); void debuginfod_set_progressfn(debuginfod_client *c, debuginfod_progressfn_t fn); diff --git a/doc/ChangeLog b/doc/ChangeLog index 269ed06e..7f852824 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,10 @@ +2022-10-06 Ryan Goldberg <rgoldber@redhat.com> + + * debuginfod-find.1: Document metadata query commandline API. + * debuginfod_find_debuginfo.1: Document metadata queryC API. + * debuginfod_find_metadata.3: New file. + * Makefile.am (notrans_dist_*_man3): Add it. + 2022-10-28 Arsen Arsenović <arsen@aarsen.me> * readelf.1: Document the --syms alias. diff --git a/doc/Makefile.am b/doc/Makefile.am index db2506fd..64ffdaa2 100644 --- a/doc/Makefile.am +++ b/doc/Makefile.am @@ -38,6 +38,7 @@ notrans_dist_man3_MANS += debuginfod_end.3 notrans_dist_man3_MANS += debuginfod_find_debuginfo.3 notrans_dist_man3_MANS += debuginfod_find_executable.3 notrans_dist_man3_MANS += debuginfod_find_source.3 +notrans_dist_man3_MANS += debuginfod_find_metadata.3 notrans_dist_man3_MANS += debuginfod_get_user_data.3 notrans_dist_man3_MANS += debuginfod_get_url.3 notrans_dist_man3_MANS += debuginfod_set_progressfn.3 diff --git a/doc/debuginfod-find.1 b/doc/debuginfod-find.1 index 957ec7e7..caea2462 100644 --- a/doc/debuginfod-find.1 +++ b/doc/debuginfod-find.1 @@ -29,6 +29,8 @@ debuginfod-find \- request debuginfo-related data .B debuginfod-find [\fIOPTION\fP]... source \fIBUILDID\fP \fI/FILENAME\fP .br .B debuginfod-find [\fIOPTION\fP]... source \fIPATH\fP \fI/FILENAME\fP +.br +.B debuginfod-find [\fIOPTION\fP]... metadata \fIGLOB\fP .SH DESCRIPTION \fBdebuginfod-find\fP queries one or more \fBdebuginfod\fP servers for @@ -106,6 +108,21 @@ l l. \../bar/foo.c AT_comp_dir=/zoo/ source BUILDID /zoo//../bar/foo.c .TE +.SS metadata \fIGLOB\fP + +If the given path, which may be globbed, is known to the server, +this request will result in a json string being written to +\fBstdout\fP. The result will consist of a json array, with one +json object for each matching file's metadata. The result can be piped +into \fBjq\fP for easy processing. On failure, an empty array is written +as the output. Unlike finding by buildid no new file will be created or cached. + +For a given file's metadata, the result is guaranteed to contain, \fBatype\fP +(the atrifact type), \fBbuildid\fP, \fBmtime\fP (time of last data modification, +seconds since Epoch), \fBstype\fP (the sourcetype), \fBsource0\fP (the path to +the file in the local filesystem), \fBsource1\fP (the path which is being globbed against). +Extra fields and duplicated metadata may appear. + .SH "OPTIONS" .TP diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3 index 3dd83240..a7e8016c 100644 --- a/doc/debuginfod_find_debuginfo.3 +++ b/doc/debuginfod_find_debuginfo.3 @@ -43,6 +43,9 @@ LOOKUP FUNCTIONS .BI " int " build_id_len "," .BI " const char *" filename "," .BI " char ** " path ");" +.BI "int debuginfod_find_metadata(debuginfod_client *" client "," +.BI " const char *" path "," +.BI " char** " metadata ");" OPTIONAL FUNCTIONS @@ -109,6 +112,12 @@ A \fBclient\fP handle should be used from only one thread at a time. A handle may be reused for a series of lookups, which can improve performance due to retention of connections and caches. +.BR debuginfod_find_metadata (), +likewise queries the debuginfod server URLs contained in +.BR $DEBUGINFOD_URLS +but instead retrieves metadata. The given \fIpath\fP may contained +the standard glob characters. + .SH "RETURN VALUE" \fBdebuginfod_begin\fP returns the \fBdebuginfod_client\fP handle to @@ -118,7 +127,11 @@ use with all other calls. On error \fBNULL\fP will be returned and If a find family function is successful, the resulting file is saved to the client cache and a file descriptor to that file is returned. The caller needs to \fBclose\fP() this descriptor. Otherwise, a -negative error code is returned. +negative error code is returned. The one excpetion is \fBdebuginfod_find_metadata\fP +which likewise returns negative error codes, but on success returns 0 +and sets \fI*metadata\fP to a string-form json array of the found matching +metadata. This should be freed by the caller. See \fIdebuginfod-find(1)\fP for +more information on the metadata being returned. .SH "OPTIONAL FUNCTIONS" diff --git a/doc/debuginfod_find_metadata.3 b/doc/debuginfod_find_metadata.3 new file mode 100644 index 00000000..16279936 --- /dev/null +++ b/doc/debuginfod_find_metadata.3 @@ -0,0 +1 @@ +.so man3/debuginfod_find_debuginfo.3 diff --git a/tests/ChangeLog b/tests/ChangeLog index a240a705..79aae920 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,9 @@ +2022-10-06 Ryan Goldberg <rgoldber@redhat.com> + + * run-debuginfod-find-metadata.sh: New test. + * Makefile.am (TESTS): Add run-debuginfod-find-metadata.sh. + (EXTRA_DIST): Likewise. + 2022-09-20 Yonggang Luo <luoyonggang@gmail.com> * Makefile.am (EXTRA_DIST): Remove debuginfod-rpms/hello2.spec. diff --git a/tests/Makefile.am b/tests/Makefile.am index ced4a826..aaa5d35a 100644 --- a/tests/Makefile.am +++ b/tests/Makefile.am @@ -247,7 +247,8 @@ TESTS += run-debuginfod-dlopen.sh \ run-debuginfod-x-forwarded-for.sh \ run-debuginfod-response-headers.sh \ run-debuginfod-extraction-passive.sh \ - run-debuginfod-webapi-concurrency.sh + run-debuginfod-webapi-concurrency.sh \ + run-debuginfod-find-metadata.sh endif if !OLD_LIBMICROHTTPD # Will crash on too old libmicrohttpd @@ -554,6 +555,7 @@ EXTRA_DIST = run-arextract.sh run-arsymtest.sh run-ar.sh \ run-debuginfod-response-headers.sh \ run-debuginfod-extraction-passive.sh \ run-debuginfod-webapi-concurrency.sh \ + run-debuginfod-find-metadata.sh \ debuginfod-rpms/fedora30/hello2-1.0-2.src.rpm \ debuginfod-rpms/fedora30/hello2-1.0-2.x86_64.rpm \ debuginfod-rpms/fedora30/hello2-debuginfo-1.0-2.x86_64.rpm \ diff --git a/tests/run-debuginfod-find-metadata.sh b/tests/run-debuginfod-find-metadata.sh new file mode 100755 index 00000000..18c57067 --- /dev/null +++ b/tests/run-debuginfod-find-metadata.sh @@ -0,0 +1,89 @@ +#!/usr/bin/env bash +# +# Copyright (C) 2019-2021 Red Hat, Inc. +# This file is part of elfutils. +# +# This file is free software; you can redistribute it and/or modify +# it under the terms of the GNU General Public License as published by +# the Free Software Foundation; either version 3 of the License, or +# (at your option) any later version. +# +# elfutils is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +# GNU General Public License for more details. +# +# You should have received a copy of the GNU General Public License +# along with this program. If not, see <http://www.gnu.org/licenses/>. + +. $srcdir/debuginfod-subr.sh + +# for test case debugging, uncomment: +set -x +unset VALGRIND_CMD + +type curl 2>/dev/null || { echo "need curl"; exit 77; } +type jq 2>/dev/null || { echo "need jq"; exit 77; } + +pkg-config json-c libcurl || { echo "one or more libraries are missing (libjson-c, libcurl)"; exit 77; } + +DB=${PWD}/.debuginfod_tmp.sqlite +export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache +tempfiles $DB ${DB}_2 + +# This variable is essential and ensures no time-race for claiming ports occurs +# set base to a unique multiple of 100 not used in any other 'run-debuginfod-*' test +base=13100 +get_ports +mkdir R D +cp -rvp ${abs_srcdir}/debuginfod-rpms/rhel7 R +cp -rvp ${abs_srcdir}/debuginfod-debs/*deb D + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -R \ + -d $DB -p $PORT1 -t0 -g0 R > vlog$PORT1 2>&1 & +PID1=$! +tempfiles vlog$PORT1 +errfiles vlog$PORT1 + +wait_ready $PORT1 'ready' 1 +wait_ready $PORT1 'thread_work_total{role="traverse"}' 1 +wait_ready $PORT1 'thread_work_pending{role="scan"}' 0 +wait_ready $PORT1 'thread_busy{role="scan"}' 0 + +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT1 https://bad/url.web" ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -U \ + -d ${DB}_2 -p $PORT2 -t0 -g0 D > vlog$PORT2 2>&1 & +PID2=$! +tempfiles vlog$PORT2 +errfiles vlog$PORT2 + +wait_ready $PORT2 'ready' 1 +wait_ready $PORT2 'thread_work_total{role="traverse"}' 1 +wait_ready $PORT2 'thread_work_pending{role="scan"}' 0 +wait_ready $PORT2 'thread_busy{role="scan"}' 0 + +# have clients contact the new server +export DEBUGINFOD_URLS=http://127.0.0.1:$PORT2 + +tempfiles json.txt +# Check that we find 11 files(which means that the local and upstream correctly reply to the query) +N_FOUND=`env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata "/?sr*" | jq '. | length'` +test $N_FOUND -eq 11 + +# Query via the webapi as well +MTIME=$(stat -c %Y D/hithere_1.0-1_amd64.deb) +EXPECTED='[ { "atype": "e", "buildid": "f17a29b5a25bd4960531d82aa6b07c8abe84fa66", "mtime": "'$MTIME'", "stype": "R", "source0": "'$PWD'/D/hithere_1.0-1_amd64.deb", "source1": "/usr/bin/hithere"} ]' +test `curl http://127.0.0.1:$PORT2/metadata?glob=/usr/bin/*hi* | jq ". == $EXPECTED" ` = 'true' + +# An empty array is returned on server error or if the file DNE +test `env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata "/this/isnt/there" | jq ". == [ ]" ` = 'true' + +kill $PID1 +kill $PID2 +wait $PID1 +wait $PID2 +PID1=0 +PID2=0 + +test `env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod-find metadata "/usr/bin/hithere" | jq ". == [ ]" ` = 'true' + +exit 0 |