summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorFrank Ch. Eigler <fche@redhat.com>2021-04-23 13:04:26 -0400
committerFrank Ch. Eigler <fche@redhat.com>2021-04-30 21:26:27 -0400
commit6c8b68b0245c8754997b4c4b0ff4ba24974e3fdd (patch)
tree9e10760105fe5883e5c8008a7866ff3131b9b842
parent883f3d0151f61f6eb93c397501570527cd19044a (diff)
downloadelfutils-6c8b68b0245c8754997b4c4b0ff4ba24974e3fdd.tar.gz
PR27701: debuginfod client: encourage reused debuginfod_client objects
Client objects now carry long-lived curl handles for outgoing connections. This makes it more efficient for multiple sequential queries, because the TCP connections and/or TLS state info are kept around awhile, avoiding O(100ms) setup latencies. debuginfod is adjusted to take advantage of this for federation. Other clients should gradually do this too, perhaps including elfutils itself (in the libdwfl->debuginfod_client hooks). A large gdb session with 117 debuginfo downloads was observed to run twice as fast (45s vs. 1m30s wall-clock time), just in nuking this extra setup latency. This was tested via a debuginfod intermediary: it should be even faster once gdb reuses its own debuginfod_client. Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
-rw-r--r--debuginfod/ChangeLog16
-rw-r--r--debuginfod/debuginfod-client.c121
-rw-r--r--debuginfod/debuginfod.cxx61
-rw-r--r--doc/ChangeLog6
-rw-r--r--doc/debuginfod_find_debuginfo.36
-rw-r--r--tests/ChangeLog6
-rwxr-xr-xtests/run-debuginfod-find.sh33
7 files changed, 204 insertions, 45 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index ed2f77cf..ad9dbc0a 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,19 @@
+2021-04-23 Frank Ch. Eigler <fche@redhat.com>
+
+ PR27701
+ * debuginfod-client.c (struct debuginfod_client): Add long-lived
+ CURL easy and multi handles.
+ (debuginfo_begin,debuginfod_end): ctor/dtor for these.
+ (debuginfod_query_server): Rework to reuse easy & multi handles.
+ (*_envvar): Just use the DEBUGINFOD_*_ENV_VAR directly instead.
+
+ * debuginfod.cxx (dc_pool): New pile of reusable debuginfod_client
+ objects for upstream federation connections.
+ (debuginfod_pool_{begin,end,groom}): New functions.
+ (handle_buildid): Use them.
+ (handler_cb): Fix keep-alive given libmicrohttpd convention of multiple
+ callbacks.
+
2021-04-15 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.cxx (groom): Only update database stats once.
diff --git a/debuginfod/debuginfod-client.c b/debuginfod/debuginfod-client.c
index d5e7bbdf..7fdc6c9f 100644
--- a/debuginfod/debuginfod-client.c
+++ b/debuginfod/debuginfod-client.c
@@ -119,6 +119,11 @@ struct debuginfod_client
/* File descriptor to output any verbose messages if > 0. */
int verbose_fd;
+ /* Count DEBUGINFOD_URLS elements and corresponding curl handles. */
+ int num_urls;
+ CURL **server_handles;
+ CURLM *server_mhandle;
+
/* Can contain all other context, like cache_path, server_urls,
timeout or other info gotten from environment variables, the
handle data, etc. So those don't have to be reparsed and
@@ -140,15 +145,12 @@ static const time_t cache_default_max_unused_age_s = 604800; /* 1 week */
The default parent directory is $HOME, or '/' if $HOME doesn't exist. */
static const char *cache_default_name = ".debuginfod_client_cache";
static const char *cache_xdg_name = "debuginfod_client";
-static const char *cache_path_envvar = DEBUGINFOD_CACHE_PATH_ENV_VAR;
/* URLs of debuginfods, separated by url_delim. */
-static const char *server_urls_envvar = DEBUGINFOD_URLS_ENV_VAR;
static const char *url_delim = " ";
static const char url_delim_char = ' ';
/* Timeout for debuginfods, in seconds (to get at least 100K). */
-static const char *server_timeout_envvar = DEBUGINFOD_TIMEOUT_ENV_VAR;
static const long default_timeout = 90;
@@ -523,7 +525,13 @@ debuginfod_query_server (debuginfod_client *c,
/* Is there any server we can query? If not, don't do any work,
just return with ENOSYS. Don't even access the cache. */
- urls_envvar = getenv(server_urls_envvar);
+ if (c->num_urls == 0)
+ {
+ rc = -ENOSYS;
+ goto out;
+ }
+
+ urls_envvar = getenv(DEBUGINFOD_URLS_ENV_VAR);
if (vfd >= 0)
dprintf (vfd, "server urls \"%s\"\n",
urls_envvar != NULL ? urls_envvar : "");
@@ -611,7 +619,7 @@ debuginfod_query_server (debuginfod_client *c,
/* Determine location of the cache. The path specified by the debuginfod
cache environment variable takes priority. */
- char *cache_var = getenv(cache_path_envvar);
+ char *cache_var = getenv(DEBUGINFOD_CACHE_PATH_ENV_VAR);
if (cache_var != NULL && strlen (cache_var) > 0)
xalloc_str (cache_path, "%s", cache_var);
else
@@ -691,7 +699,7 @@ debuginfod_query_server (debuginfod_client *c,
}
long timeout = default_timeout;
- const char* timeout_envvar = getenv(server_timeout_envvar);
+ const char* timeout_envvar = getenv(DEBUGINFOD_TIMEOUT_ENV_VAR);
if (timeout_envvar != NULL)
timeout = atoi (timeout_envvar);
@@ -725,24 +733,13 @@ debuginfod_query_server (debuginfod_client *c,
goto out0;
}
- /* Count number of URLs. */
- int num_urls = 0;
- for (int i = 0; server_urls[i] != '\0'; i++)
- if (server_urls[i] != url_delim_char
- && (i == 0 || server_urls[i - 1] == url_delim_char))
- num_urls++;
-
- CURLM *curlm = curl_multi_init();
- if (curlm == NULL)
- {
- rc = -ENETUNREACH;
- goto out0;
- }
-
+ CURLM *curlm = c->server_mhandle;
+ assert (curlm != NULL);
+
/* Tracks which handle should write to fd. Set to the first
handle that is ready to write the target file to the cache. */
CURL *target_handle = NULL;
- struct handle_data *data = malloc(sizeof(struct handle_data) * num_urls);
+ struct handle_data *data = malloc(sizeof(struct handle_data) * c->num_urls);
if (data == NULL)
{
rc = -ENOMEM;
@@ -752,7 +749,7 @@ debuginfod_query_server (debuginfod_client *c,
/* thereafter, goto out1 on error. */
/* Initialize handle_data with default values. */
- for (int i = 0; i < num_urls; i++)
+ for (int i = 0; i < c->num_urls; i++)
{
data[i].handle = NULL;
data[i].fd = -1;
@@ -763,14 +760,16 @@ debuginfod_query_server (debuginfod_client *c,
char *server_url = strtok_r(server_urls, url_delim, &strtok_saveptr);
/* Initialize each handle. */
- for (int i = 0; i < num_urls && server_url != NULL; i++)
+ for (int i = 0; i < c->num_urls && server_url != NULL; i++)
{
if (vfd >= 0)
dprintf (vfd, "init server %d %s\n", i, server_url);
data[i].fd = fd;
data[i].target_handle = &target_handle;
- data[i].handle = curl_easy_init();
+ data[i].handle = c->server_handles[i];
+ assert (data[i].handle != NULL);
+ curl_easy_reset(data[i].handle); // esp. previously sent http headers
data[i].client = c;
if (data[i].handle == NULL)
@@ -833,7 +832,7 @@ debuginfod_query_server (debuginfod_client *c,
/* Query servers in parallel. */
if (vfd >= 0)
- dprintf (vfd, "query %d urls in parallel\n", num_urls);
+ dprintf (vfd, "query %d urls in parallel\n", c->num_urls);
int still_running;
long loops = 0;
int committed_to = -1;
@@ -846,7 +845,7 @@ debuginfod_query_server (debuginfod_client *c,
/* If the target file has been found, abort the other queries. */
if (target_handle != NULL)
{
- for (int i = 0; i < num_urls; i++)
+ for (int i = 0; i < c->num_urls; i++)
if (data[i].handle != target_handle)
curl_multi_remove_handle(curlm, data[i].handle);
else
@@ -943,7 +942,7 @@ debuginfod_query_server (debuginfod_client *c,
curl_easy_strerror (msg->data.result));
if (pnl)
c->default_progressfn_printed_p = 0;
- for (int i = 0; i < num_urls; i++)
+ for (int i = 0; i < c->num_urls; i++)
if (msg->easy_handle == data[i].handle)
{
if (strlen (data[i].errbuf) > 0)
@@ -1063,11 +1062,8 @@ debuginfod_query_server (debuginfod_client *c,
/* Perhaps we need not give up right away; could retry or something ... */
}
- /* Success!!!! */
- for (int i = 0; i < num_urls; i++)
- curl_easy_cleanup(data[i].handle);
-
- curl_multi_cleanup (curlm);
+ curl_multi_remove_handle(curlm, verified_handle);
+ assert (verified_handle == target_handle);
free (data);
free (server_urls);
@@ -1081,10 +1077,6 @@ debuginfod_query_server (debuginfod_client *c,
/* error exits */
out1:
- for (int i = 0; i < num_urls; i++)
- curl_easy_cleanup(data[i].handle);
-
- curl_multi_cleanup(curlm);
unlink (target_cache_tmppath);
close (fd); /* before the rmdir, otherwise it'll fail */
(void) rmdir (target_cache_dir); /* nop if not empty */
@@ -1095,6 +1087,11 @@ debuginfod_query_server (debuginfod_client *c,
/* general purpose exit */
out:
+ /* Reset sent headers */
+ curl_slist_free_all (c->headers);
+ c->headers = NULL;
+ c->user_agent_set_p = 0;
+
/* Conclude the last \r status line */
/* Another possibility is to use the ANSI CSI n K EL "Erase in Line"
code. That way, the previously printed messages would be erased,
@@ -1127,7 +1124,9 @@ debuginfod_begin (void)
{
debuginfod_client *client;
size_t size = sizeof (struct debuginfod_client);
+ const char* server_urls = NULL;
client = (debuginfod_client *) calloc (1, size);
+
if (client != NULL)
{
if (getenv(DEBUGINFOD_PROGRESS_ENV_VAR))
@@ -1137,6 +1136,51 @@ debuginfod_begin (void)
else
client->verbose_fd = -1;
}
+
+ /* Count the DEBUGINFOD_URLS and create the long-lived curl handles. */
+ client->num_urls = 0;
+ server_urls = getenv (DEBUGINFOD_URLS_ENV_VAR);
+ if (server_urls != NULL)
+ for (int i = 0; server_urls[i] != '\0'; i++)
+ if (server_urls[i] != url_delim_char
+ && (i == 0 || server_urls[i - 1] == url_delim_char))
+ client->num_urls++;
+
+ client->server_handles = calloc (client->num_urls, sizeof(CURL *));
+ if (client->server_handles == NULL)
+ goto out1;
+
+ // allocate N curl easy handles
+ for (int i=0; i<client->num_urls; i++)
+ {
+ client->server_handles[i] = curl_easy_init ();
+ if (client->server_handles[i] == NULL)
+ {
+ for (i--; i >= 0; i--)
+ curl_easy_cleanup (client->server_handles[i]);
+ goto out2;
+ }
+ }
+
+ // allocate 1 curl multi handle
+ client->server_mhandle = curl_multi_init ();
+ if (client->server_mhandle == NULL)
+ goto out3;
+
+ goto out;
+
+ out3:
+ for (int i=0; i<client->num_urls; i++)
+ curl_easy_cleanup (client->server_handles[i]);
+
+ out2:
+ free (client->server_handles);
+
+ out1:
+ free (client);
+ client = NULL;
+
+ out:
return client;
}
@@ -1165,6 +1209,11 @@ debuginfod_end (debuginfod_client *client)
if (client == NULL)
return;
+ // assume that all the easy handles have already been removed from the multi handle
+ for (int i=0; i<client->num_urls; i++)
+ curl_easy_cleanup (client->server_handles[i]);
+ free (client->server_handles);
+ curl_multi_cleanup (client->server_mhandle);
curl_slist_free_all (client->headers);
free (client->url);
free (client);
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 50777f1f..0e6fd13f 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1573,6 +1573,46 @@ debuginfod_find_progress (debuginfod_client *, long a, long b)
}
+// a little lru pool of debuginfod_client*s for reuse between query threads
+
+mutex dc_pool_lock;
+deque<debuginfod_client*> dc_pool;
+
+debuginfod_client* debuginfod_pool_begin()
+{
+ unique_lock<mutex> lock(dc_pool_lock);
+ if (dc_pool.size() > 0)
+ {
+ inc_metric("dc_pool_op_count","op","begin-reuse");
+ debuginfod_client *c = dc_pool.front();
+ dc_pool.pop_front();
+ return c;
+ }
+ inc_metric("dc_pool_op_count","op","begin-new");
+ return debuginfod_begin();
+}
+
+
+void debuginfod_pool_groom()
+{
+ unique_lock<mutex> lock(dc_pool_lock);
+ while (dc_pool.size() > 0)
+ {
+ inc_metric("dc_pool_op_count","op","end");
+ debuginfod_end(dc_pool.front());
+ dc_pool.pop_front();
+ }
+}
+
+
+void debuginfod_pool_end(debuginfod_client* c)
+{
+ unique_lock<mutex> lock(dc_pool_lock);
+ inc_metric("dc_pool_op_count","op","end-save");
+ dc_pool.push_front(c); // accelerate reuse, vs. push_back
+}
+
+
static struct MHD_Response*
handle_buildid (MHD_Connection* conn,
const string& buildid /* unsafe */,
@@ -1672,7 +1712,7 @@ handle_buildid (MHD_Connection* conn,
// is to defer to other debuginfo servers.
int fd = -1;
- debuginfod_client *client = debuginfod_begin ();
+ debuginfod_client *client = debuginfod_pool_begin ();
if (client != NULL)
{
debuginfod_set_progressfn (client, & debuginfod_find_progress);
@@ -1721,7 +1761,7 @@ handle_buildid (MHD_Connection* conn,
}
else
fd = -errno; /* Set by debuginfod_begin. */
- debuginfod_end (client);
+ debuginfod_pool_end (client);
if (fd >= 0)
{
@@ -1892,11 +1932,25 @@ handler_cb (void * /*cls*/,
const char * /*version*/,
const char * /*upload_data*/,
size_t * /*upload_data_size*/,
- void ** /*con_cls*/)
+ void ** ptr)
{
struct MHD_Response *r = NULL;
string url_copy = url;
+ /* libmicrohttpd always makes (at least) two callbacks: once just
+ past the headers, and one after the request body is finished
+ being received. If we process things early (first callback) and
+ queue a response, libmicrohttpd would suppress http keep-alive
+ (via connection->read_closed = true). */
+ static int aptr; /* just some random object to use as a flag */
+ if (&aptr != *ptr)
+ {
+ /* do never respond on first call */
+ *ptr = &aptr;
+ return MHD_YES;
+ }
+ *ptr = NULL; /* reset when done */
+
#if MHD_VERSION >= 0x00097002
enum MHD_Result rc;
#else
@@ -3163,6 +3217,7 @@ void groom()
sqlite3_db_release_memory(db); // shrink the process if possible
sqlite3_db_release_memory(dbq); // ... for both connections
+ debuginfod_pool_groom(); // and release any debuginfod_client objects we've been holding onto
fdcache.limit(0,0); // release the fdcache contents
fdcache.limit(fdcache_fds,fdcache_mbs); // restore status quo parameters
diff --git a/doc/ChangeLog b/doc/ChangeLog
index 5cd4fe15..05fcd23d 100644
--- a/doc/ChangeLog
+++ b/doc/ChangeLog
@@ -1,3 +1,9 @@
+2021-04-23 Frank Ch. Eigler <fche@redhat.com>
+
+ PR27701
+ * debuginfod_find_debuginfo.3: Specify sequential reuse policy of
+ debuginfod_client objects.
+
2021-02-04 Frank Ch. Eigler <fche@redhat.com>
* debuginfod.8: Mention new --fdcache-mintmp option.
diff --git a/doc/debuginfod_find_debuginfo.3 b/doc/debuginfod_find_debuginfo.3
index cfddb542..5ae44a98 100644
--- a/doc/debuginfod_find_debuginfo.3
+++ b/doc/debuginfod_find_debuginfo.3
@@ -105,6 +105,8 @@ as a debuginfod server begins transferring the target file all of the
connections to the other servers are closed.
A \fBclient\fP handle should be used from only one thread at a time.
+A handle may be reused for a series of lookups, which can improve
+performance due to retention of connections and caches.
.SH "RETURN VALUE"
@@ -175,8 +177,8 @@ of the client object.
.SS "HTTP HEADER"
-Before a lookup function is initiated, a client application may
-add HTTP request headers to future downloads.
+Before each lookup function is initiated, a client application may
+add HTTP request headers. These are reset after each lookup function.
.BR \%debuginfod_add_http_header ()
may be called with strings of the form
.BR \%"Header:\~value" .
diff --git a/tests/ChangeLog b/tests/ChangeLog
index ea44d20c..ac196365 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2021-04-23 Frank Ch. Eigler <fche@redhat.com>
+
+ * run-debuginfod-find.sh: Add a tiny test for client object reuse.
+ Add an "errfiles" test construct to ask the framework to print
+ various files in the case of an error.
+
2021-03-30 Frank Ch. Eigler <fche@redhat.com>
* run-debuginfod-find.sh: Add thread comm checks.
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 8213c8a4..11e84983 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -50,6 +50,29 @@ cleanup()
# clean up trash if we were aborted early
trap cleanup 0 1 2 3 5 9 15
+errfiles_list=
+err() {
+ for ports in $PORT1 $PORT2
+ do
+ echo $port metrics
+ curl -s http://127.0.0.1:$port/metrics
+ echo
+ done
+ for x in $errfiles_list
+ do
+ echo "$x"
+ cat $x
+ echo
+ done
+}
+trap err ERR
+
+errfiles() {
+ errfiles_list="$errfiles_list $*"
+}
+
+
+
# find an unused port number
while true; do
PORT1=`expr '(' $RANDOM % 1000 ')' + 9000`
@@ -90,10 +113,7 @@ wait_ready()
done;
if [ $timeout -eq 0 ]; then
- echo "metric $what never changed to $value on port $port"
- curl -s http://127.0.0.1:$port/metrics
- echo "logs for debuginfod with port $port"
- cat vlog$port
+ echo "metric $what never changed to $value on port $port"
exit 1;
fi
}
@@ -106,6 +126,7 @@ ln -s R/nothing.rpm R/nothing.rpm
env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog$PORT1 2>&1 &
PID1=$!
tempfiles vlog$PORT1
+errfiles vlog$PORT1
# Server must become ready
wait_ready $PORT1 'ready' 1
export DEBUGINFOD_URLS=http://127.0.0.1:$PORT1/ # or without trailing /
@@ -414,6 +435,7 @@ mkdir -p $DEBUGINFOD_CACHE_PATH
env LD_LIBRARY_PATH=$ldpath ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -U -d ${DB}_2 -p $PORT2 -L L D > vlog$PORT2 2>&1 &
PID2=$!
tempfiles vlog$PORT2
+errfiles vlog$PORT2
tempfiles ${DB}_2
wait_ready $PORT2 'ready' 1
wait_ready $PORT2 'thread_work_total{role="traverse"}' 1
@@ -502,6 +524,9 @@ curl -s http://127.0.0.1:$PORT2/buildid/deadbeef/debuginfo > /dev/null || true
curl -s http://127.0.0.1:$PORT2/buildid/deadbeef/badtype > /dev/null || true
(curl -s http://127.0.0.1:$PORT2/metrics | grep 'badtype') && false
+# Confirm that some debuginfod client pools are being used
+curl -s http://127.0.0.1:$PORT2/metrics | grep 'dc_pool_op.*reuse'
+
########################################################################
# Corrupt the sqlite database and get debuginfod to trip across its errors
curl -s http://127.0.0.1:$PORT1/metrics | grep 'sqlite3.*reset'