summaryrefslogtreecommitdiff
path: root/debuginfod/debuginfod.cxx
diff options
context:
space:
mode:
Diffstat (limited to 'debuginfod/debuginfod.cxx')
-rw-r--r--debuginfod/debuginfod.cxx327
1 files changed, 266 insertions, 61 deletions
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 5ef6cc32..000820fe 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -1,5 +1,5 @@
/* Debuginfo-over-http server.
- Copyright (C) 2019-2021 Red Hat, Inc.
+ Copyright (C) 2019-2023 Red Hat, Inc.
Copyright (C) 2021, 2022 Mark J. Wielaard <mark@klomp.org>
This file is part of elfutils.
@@ -68,6 +68,7 @@ extern "C" {
#include <unistd.h>
#include <fcntl.h>
#include <netdb.h>
+#include <fnmatch.h>
/* If fts.h is included before config.h, its indirect inclusions may not
@@ -127,6 +128,9 @@ using namespace std;
#define tid() pthread_self()
#endif
+#ifdef HAVE_JSON_C
+ #include <json-c/json.h>
+#endif
inline bool
string_endswith(const string& haystack, const string& needle)
@@ -185,7 +189,7 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" foreign key (buildid) references " BUILDIDS "_buildids(id) on update cascade on delete cascade,\n"
" primary key (buildid, file, mtime)\n"
" ) " WITHOUT_ROWID ";\n"
- // Index for faster delete by file identifier
+ // Index for faster delete by file identifier and metadata searches
"create index if not exists " BUILDIDS "_f_de_idx on " BUILDIDS "_f_de (file, mtime);\n"
"create table if not exists " BUILDIDS "_f_s (\n"
" buildid integer not null,\n"
@@ -211,6 +215,8 @@ static const char DEBUGINFOD_SQLITE_DDL[] =
" ) " WITHOUT_ROWID ";\n"
// Index for faster delete by archive file identifier
"create index if not exists " BUILDIDS "_r_de_idx on " BUILDIDS "_r_de (file, mtime);\n"
+ // Index for metadata searches
+ "create index if not exists " BUILDIDS "_r_de_idx2 on " BUILDIDS "_r_de (content);\n"
"create table if not exists " BUILDIDS "_r_sref (\n" // outgoing dwarf sourcefile references from rpm
" buildid integer not null,\n"
" artifactsrc integer not null,\n"
@@ -398,6 +404,9 @@ static const struct argp_option options[] =
{ "passive", ARGP_KEY_PASSIVE, NULL, 0, "Do not scan or groom, read-only database.", 0 },
#define ARGP_KEY_DISABLE_SOURCE_SCAN 0x1009
{ "disable-source-scan", ARGP_KEY_DISABLE_SOURCE_SCAN, NULL, 0, "Do not scan dwarf source info.", 0 },
+#define ARGP_KEY_METADATA_MAXTIME 0x100A
+ { "metadata-maxtime", ARGP_KEY_METADATA_MAXTIME, "SECONDS", 0,
+ "Number of seconds to limit metadata query run time, 0=unlimited.", 0 },
{ NULL, 0, NULL, 0, NULL, 0 },
};
@@ -452,6 +461,8 @@ static unsigned forwarded_ttl_limit = 8;
static bool scan_source_info = true;
static string tmpdir;
static bool passive_p = false;
+static unsigned metadata_maxtime_s = 5;
+
static void set_metric(const string& key, double value);
// static void inc_metric(const string& key);
@@ -653,6 +664,9 @@ parse_opt (int key, char *arg,
case ARGP_KEY_DISABLE_SOURCE_SCAN:
scan_source_info = false;
break;
+ case ARGP_KEY_METADATA_MAXTIME:
+ metadata_maxtime_s = (unsigned) atoi(arg);
+ break;
// case 'h': argp_state_help (state, stderr, ARGP_HELP_LONG|ARGP_HELP_EXIT_OK);
default: return ARGP_ERR_UNKNOWN;
}
@@ -2040,6 +2054,58 @@ handle_buildid_r_match (bool internal_req_p,
return r;
}
+void
+add_client_federation_headers(debuginfod_client *client, MHD_Connection* conn){
+ // Transcribe incoming User-Agent:
+ string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
+ string ua_complete = string("User-Agent: ") + ua;
+ debuginfod_add_http_header (client, ua_complete.c_str());
+
+ // Compute larger XFF:, for avoiding info loss during
+ // federation, and for future cyclicity detection.
+ string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
+ if (xff != "")
+ xff += string(", "); // comma separated list
+
+ unsigned int xff_count = 0;
+ for (auto&& i : xff){
+ if (i == ',') xff_count++;
+ }
+
+ // if X-Forwarded-For: exceeds N hops,
+ // do not delegate a local lookup miss to upstream debuginfods.
+ if (xff_count >= forwarded_ttl_limit)
+ throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
+and will not query the upstream servers");
+
+ // Compute the client's numeric IP address only - so can't merge with conninfo()
+ const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
+ MHD_CONNECTION_INFO_CLIENT_ADDRESS);
+ struct sockaddr *so = u ? u->client_addr : 0;
+ char hostname[256] = ""; // RFC1035
+ if (so && so->sa_family == AF_INET) {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else if (so && so->sa_family == AF_INET6) {
+ struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
+ if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
+ struct sockaddr_in addr4;
+ memset (&addr4, 0, sizeof(addr4));
+ addr4.sin_family = AF_INET;
+ addr4.sin_port = addr6->sin6_port;
+ memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
+ (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
+ hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ } else {
+ (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
+ NI_NUMERICHOST);
+ }
+ }
+
+ string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
+ debuginfod_add_http_header (client, xff_complete.c_str());
+}
static struct MHD_Response*
handle_buildid_match (bool internal_req_p,
@@ -2273,58 +2339,8 @@ handle_buildid (MHD_Connection* conn,
debuginfod_set_progressfn (client, & debuginfod_find_progress);
if (conn)
- {
- // Transcribe incoming User-Agent:
- string ua = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "User-Agent") ?: "";
- string ua_complete = string("User-Agent: ") + ua;
- debuginfod_add_http_header (client, ua_complete.c_str());
-
- // Compute larger XFF:, for avoiding info loss during
- // federation, and for future cyclicity detection.
- string xff = MHD_lookup_connection_value (conn, MHD_HEADER_KIND, "X-Forwarded-For") ?: "";
- if (xff != "")
- xff += string(", "); // comma separated list
-
- unsigned int xff_count = 0;
- for (auto&& i : xff){
- if (i == ',') xff_count++;
- }
+ add_client_federation_headers(client, conn);
- // if X-Forwarded-For: exceeds N hops,
- // do not delegate a local lookup miss to upstream debuginfods.
- if (xff_count >= forwarded_ttl_limit)
- throw reportable_exception(MHD_HTTP_NOT_FOUND, "not found, --forwared-ttl-limit reached \
-and will not query the upstream servers");
-
- // Compute the client's numeric IP address only - so can't merge with conninfo()
- const union MHD_ConnectionInfo *u = MHD_get_connection_info (conn,
- MHD_CONNECTION_INFO_CLIENT_ADDRESS);
- struct sockaddr *so = u ? u->client_addr : 0;
- char hostname[256] = ""; // RFC1035
- if (so && so->sa_family == AF_INET) {
- (void) getnameinfo (so, sizeof (struct sockaddr_in), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else if (so && so->sa_family == AF_INET6) {
- struct sockaddr_in6* addr6 = (struct sockaddr_in6*) so;
- if (IN6_IS_ADDR_V4MAPPED(&addr6->sin6_addr)) {
- struct sockaddr_in addr4;
- memset (&addr4, 0, sizeof(addr4));
- addr4.sin_family = AF_INET;
- addr4.sin_port = addr6->sin6_port;
- memcpy (&addr4.sin_addr.s_addr, addr6->sin6_addr.s6_addr+12, sizeof(addr4.sin_addr.s_addr));
- (void) getnameinfo ((struct sockaddr*) &addr4, sizeof (addr4),
- hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- } else {
- (void) getnameinfo (so, sizeof (struct sockaddr_in6), hostname, sizeof (hostname), NULL, 0,
- NI_NUMERICHOST);
- }
- }
-
- string xff_complete = string("X-Forwarded-For: ")+xff+string(hostname);
- debuginfod_add_http_header (client, xff_complete.c_str());
- }
-
if (artifacttype == "debuginfo")
fd = debuginfod_find_debuginfo (client,
(const unsigned char*) buildid.c_str(),
@@ -2341,7 +2357,7 @@ and will not query the upstream servers");
fd = debuginfod_find_section (client,
(const unsigned char*) buildid.c_str(),
0, section.c_str(), NULL);
-
+
if (fd >= 0)
{
if (conn != 0)
@@ -2535,6 +2551,176 @@ handle_metrics (off_t* size)
return r;
}
+
+#ifdef HAVE_JSON_C
+static struct MHD_Response*
+handle_metadata (MHD_Connection* conn,
+ string key, string value, off_t* size)
+{
+ MHD_Response* r;
+ sqlite3 *thisdb = dbq;
+
+ // Query locally for matching e, d files
+ string op;
+ if (key == "glob")
+ op = "glob";
+ else if (key == "file")
+ op = "=";
+ else
+ throw reportable_exception("/metadata webapi error, unsupported key");
+
+ string sql = string(
+ // explicit query r_de and f_de once here, rather than the query_d and query_e
+ // separately, because they scan the same tables, so we'd double the work
+ "select d1.executable_p, d1.debuginfo_p, 0 as source_p, b1.hex, f1.name as file, a1.name as archive "
+ "from " BUILDIDS "_r_de d1, " BUILDIDS "_files f1, " BUILDIDS "_buildids b1, " BUILDIDS "_files a1 "
+ "where f1.id = d1.content and a1.id = d1.file and d1.buildid = b1.id and f1.name " + op + " ? "
+ "union all \n"
+ "select d2.executable_p, d2.debuginfo_p, 0, b2.hex, f2.name, NULL "
+ "from " BUILDIDS "_f_de d2, " BUILDIDS "_files f2, " BUILDIDS "_buildids b2 "
+ "where f2.id = d2.file and d2.buildid = b2.id and f2.name " + op + " ? ");
+ // NB: we could query source file names too, thusly:
+ //
+ // select * from " BUILDIDS "_buildids b, " BUILDIDS "_files f1, " BUILDIDS "_r_sref sr
+ // where b.id = sr.buildid and f1.id = sr.artifactsrc and f1.name " + op + "?"
+ // UNION ALL something with BUILDIDS "_f_s"
+ //
+ // But the first part of this query cannot run fast without the same index temp-created
+ // during "maxigroom":
+ // create index " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);
+ // and unfortunately this index is HUGE. It's similar to the size of the _r_sref
+ // table, which is already the largest part of a debuginfod index. Adding that index
+ // would nearly double the .sqlite db size.
+
+ sqlite_ps *pp = new sqlite_ps (thisdb, "mhd-query-meta-glob", sql);
+ pp->reset();
+ pp->bind(1, value);
+ pp->bind(2, value);
+ // pp->bind(3, value); // "source" query clause disabled
+ unique_ptr<sqlite_ps> ps_closer(pp); // release pp if exception or return
+
+ json_object *metadata = json_object_new_array();
+ if (!metadata)
+ throw libc_exception(ENOMEM, "json allocation");
+
+ // consume all the rows
+ struct timespec ts_start;
+ clock_gettime (CLOCK_MONOTONIC, &ts_start);
+
+ int rc;
+ while (SQLITE_DONE != (rc = pp->step()))
+ {
+ // break out of loop if we have searched too long
+ struct timespec ts_end;
+ clock_gettime (CLOCK_MONOTONIC, &ts_end);
+ double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9;
+ if (metadata_maxtime_s > 0 && deltas > metadata_maxtime_s)
+ break; // NB: no particular signal is given to the client about incompleteness
+
+ if (rc != SQLITE_ROW) throw sqlite_exception(rc, "step");
+
+ int m_executable_p = sqlite3_column_int (*pp, 0);
+ int m_debuginfo_p = sqlite3_column_int (*pp, 1);
+ int m_source_p = sqlite3_column_int (*pp, 2);
+ string m_buildid = (const char*) sqlite3_column_text (*pp, 3) ?: ""; // should always be non-null
+ string m_file = (const char*) sqlite3_column_text (*pp, 4) ?: "";
+ string m_archive = (const char*) sqlite3_column_text (*pp, 5) ?: "";
+
+ // Confirm that m_file matches in the fnmatch(FNM_PATHNAME)
+ // sense, since sqlite's GLOB operator is a looser filter.
+ if (key == "glob" && fnmatch(value.c_str(), m_file.c_str(), FNM_PATHNAME) != 0)
+ continue;
+
+ auto add_metadata = [metadata, m_buildid, m_file, m_archive](const string& type) {
+ json_object* entry = json_object_new_object();
+ if (NULL == entry) throw libc_exception (ENOMEM, "cannot allocate json");
+ defer_dtor<json_object*,int> entry_d(entry, json_object_put);
+
+ auto add_entry_metadata = [entry](const char* k, string v) {
+ json_object* s;
+ if(v != "") {
+ s = json_object_new_string(v.c_str());
+ if (NULL == s) throw libc_exception (ENOMEM, "cannot allocate json");
+ json_object_object_add(entry, k, s);
+ }
+ };
+
+ add_entry_metadata("type", type.c_str());
+ add_entry_metadata("buildid", m_buildid);
+ add_entry_metadata("file", m_file);
+ if (m_archive != "") add_entry_metadata("archive", m_archive);
+ if (verbose > 3)
+ obatched(clog) << "metadata found local "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ // Increase ref count to switch its ownership
+ json_object_array_add(metadata, json_object_get(entry));
+ };
+
+ if (m_executable_p) add_metadata("executable");
+ if (m_debuginfo_p) add_metadata("debuginfo");
+ if (m_source_p) add_metadata("source");
+ }
+ pp->reset();
+
+ unsigned num_local_results = json_object_array_length(metadata);
+
+ // Query upstream as well
+ debuginfod_client *client = debuginfod_pool_begin();
+ if (metadata && client != NULL)
+ {
+ add_client_federation_headers(client, conn);
+
+ int upstream_metadata_fd;
+ upstream_metadata_fd = debuginfod_find_metadata(client, key.c_str(), value.c_str(), NULL);
+ if (upstream_metadata_fd >= 0) {
+ json_object *upstream_metadata_json = json_object_from_fd(upstream_metadata_fd);
+ if (NULL != upstream_metadata_json)
+ {
+ for (int i = 0, n = json_object_array_length(upstream_metadata_json); i < n; i++) {
+ json_object *entry = json_object_array_get_idx(upstream_metadata_json, i);
+ if (verbose > 3)
+ obatched(clog) << "metadata found remote "
+ << json_object_to_json_string_ext(entry,
+ JSON_C_TO_STRING_PRETTY)
+ << endl;
+
+ json_object_get(entry); // increment reference count
+ json_object_array_add(metadata, entry);
+ }
+ json_object_put(upstream_metadata_json);
+ }
+ close(upstream_metadata_fd);
+ }
+ debuginfod_pool_end (client);
+ }
+
+ unsigned num_total_results = json_object_array_length(metadata);
+
+ if (verbose > 2)
+ obatched(clog) << "metadata found local=" << num_local_results
+ << " remote=" << (num_total_results-num_local_results)
+ << " total=" << num_total_results
+ << endl;
+
+ const char* metadata_str = (metadata != NULL) ?
+ json_object_to_json_string(metadata) : "[ ]" ;
+ if (! metadata_str)
+ throw libc_exception (ENOMEM, "cannot allocate json");
+ r = MHD_create_response_from_buffer (strlen(metadata_str),
+ (void*) metadata_str,
+ MHD_RESPMEM_MUST_COPY);
+ *size = strlen(metadata_str);
+ json_object_put(metadata);
+ if (r)
+ add_mhd_response_header(r, "Content-Type", "application/json");
+ return r;
+}
+#endif
+
+
static struct MHD_Response*
handle_root (off_t* size)
{
@@ -2601,6 +2787,7 @@ handler_cb (void * /*cls*/,
clock_gettime (CLOCK_MONOTONIC, &ts_start);
double afteryou = 0.0;
string artifacttype, suffix;
+ string urlargs; // for logging
try
{
@@ -2669,6 +2856,21 @@ handler_cb (void * /*cls*/,
inc_metric("http_requests_total", "type", artifacttype);
r = handle_metrics(& http_size);
}
+#ifdef HAVE_JSON_C
+ else if (url1 == "/metadata")
+ {
+ tmp_inc_metric m ("thread_busy", "role", "http-metadata");
+ const char* key = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "key");
+ const char* value = MHD_lookup_connection_value(connection, MHD_GET_ARGUMENT_KIND, "value");
+ if (NULL == value || NULL == key)
+ throw reportable_exception("/metadata webapi error, need key and value");
+
+ urlargs = string("?key=") + string(key) + string("&value=") + string(value); // apprx., for logging
+ artifacttype = "metadata";
+ inc_metric("http_requests_total", "type", artifacttype);
+ r = handle_metadata(connection, key, value, &http_size);
+ }
+#endif
else if (url1 == "/")
{
artifacttype = "/";
@@ -2705,7 +2907,7 @@ handler_cb (void * /*cls*/,
// afteryou: delay waiting for other client's identical query to complete
// deltas: total latency, including afteryou waiting
obatched(clog) << conninfo(connection)
- << ' ' << method << ' ' << url
+ << ' ' << method << ' ' << url << urlargs
<< ' ' << http_code << ' ' << http_size
<< ' ' << (int)(afteryou*1000) << '+' << (int)((deltas-afteryou)*1000) << "ms"
<< endl;
@@ -3956,12 +4158,13 @@ void groom()
if (interrupted) return;
// NB: "vacuum" is too heavy for even daily runs: it rewrites the entire db, so is done as maxigroom -G
- sqlite_ps g1 (db, "incremental vacuum", "pragma incremental_vacuum");
- g1.reset().step_ok_done();
- sqlite_ps g2 (db, "optimize", "pragma optimize");
- g2.reset().step_ok_done();
- sqlite_ps g3 (db, "wal checkpoint", "pragma wal_checkpoint=truncate");
- g3.reset().step_ok_done();
+ { sqlite_ps g (db, "incremental vacuum", "pragma incremental_vacuum"); g.reset().step_ok_done(); }
+ // https://www.sqlite.org/lang_analyze.html#approx
+ { sqlite_ps g (db, "analyze setup", "pragma analysis_limit = 1000;\n"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze", "analyze"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "analyze reload", "analyze sqlite_schema"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "optimize", "pragma optimize"); g.reset().step_ok_done(); }
+ { sqlite_ps g (db, "wal checkpoint", "pragma wal_checkpoint=truncate"); g.reset().step_ok_done(); }
database_stats_report();
@@ -4333,6 +4536,8 @@ main (int argc, char *argv[])
if (maxigroom)
{
obatched(clog) << "maxigrooming database, please wait." << endl;
+ // NB: this index alone can nearly double the database size!
+ // NB: this index would be necessary to run source-file metadata searches fast
extra_ddl.push_back("create index if not exists " BUILDIDS "_r_sref_arc on " BUILDIDS "_r_sref(artifactsrc);");
extra_ddl.push_back("delete from " BUILDIDS "_r_sdef where not exists (select 1 from " BUILDIDS "_r_sref b where " BUILDIDS "_r_sdef.content = b.artifactsrc);");
extra_ddl.push_back("drop index if exists " BUILDIDS "_r_sref_arc;");