diff options
author | Frank Ch. Eigler <fche@redhat.com> | 2021-02-04 20:31:56 -0500 |
---|---|---|
committer | Frank Ch. Eigler <fche@redhat.com> | 2021-02-05 12:38:50 -0500 |
commit | aa121ea7f19e002c5e4738616342cd1d7be1bfd2 (patch) | |
tree | 67c8a093b4871b72900ed9a5c5736f73844b1c97 | |
parent | 6362941714548d93a348239ca8b8038005bd57cd (diff) | |
download | elfutils-aa121ea7f19e002c5e4738616342cd1d7be1bfd2.tar.gz |
PR27092: debuginfod low-memory handling
A couple of closely related pieces of work allow more early warning
about low storage/memory conditions:
- New prometheus metrics to track filesystem freespace, and more
details about some errors.
- Frequent checking of $TMPDIR freespace, to trigger fdcache
emergency flushes.
- Switch to floating point prometheus metrics, to communicate
fractions - and short time intervals - accurately.
- Fix startup-time pthread-creation error handling.
Testing is smoke-test-level only as it is hard to create
free-space-limited $TMPDIRs. Locally tested against tiny through
medium tmpfs filesystems, with or without sqlite db also there. Shows
a pleasant stream of diagnostics and metrics during shortage but
generally does not fail outright. However, catching an actual
libstdc++- or kernel-level OOM is beyond our ken.
Signed-off-by: Frank Ch. Eigler <fche@redhat.com>
-rw-r--r-- | debuginfod/ChangeLog | 14 | ||||
-rw-r--r-- | debuginfod/debuginfod.cxx | 119 | ||||
-rw-r--r-- | doc/ChangeLog | 4 | ||||
-rw-r--r-- | doc/debuginfod.8 | 11 | ||||
-rw-r--r-- | tests/ChangeLog | 4 | ||||
-rwxr-xr-x | tests/run-debuginfod-find.sh | 2 |
6 files changed, 119 insertions, 35 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog index 2872d667..8de88522 100644 --- a/debuginfod/ChangeLog +++ b/debuginfod/ChangeLog @@ -1,3 +1,17 @@ +2021-02-04 Frank Ch. Eigler <fche@redhat.com> + + PR27092 low-memory handling + * debuginfod.cxx (fdcache_mintmp): New parameter, with cmd-line option. + (parse_opt): Parse it. + (main): Default it. + (statfs_free_enough_p): New function. + (libarchive_fdcache::*): Call it to trigger emergency fdcache flush. + (thread_main_scanner): Call it to report filesystem fullness metrics. + (groom): Ditto. + (set/add_metric): Take double rather than int64_t values. + (archive_exception): Propagate suberror to metric label. + (main): Detect pthread creation fatal errors properly. + 2021-02-02 Frank Ch. Eigler <fche@redhat.com> PR27323 diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx index c9c0dc9b..b34eacc2 100644 --- a/debuginfod/debuginfod.cxx +++ b/debuginfod/debuginfod.cxx @@ -365,6 +365,8 @@ static const struct argp_option options[] = { "fdcache-mbs", ARGP_KEY_FDCACHE_MBS, "MB", 0, "Maximum total size of archive file fdcache.", 0 }, #define ARGP_KEY_FDCACHE_PREFETCH 0x1003 { "fdcache-prefetch", ARGP_KEY_FDCACHE_PREFETCH, "NUM", 0, "Number of archive files to prefetch into fdcache.", 0 }, +#define ARGP_KEY_FDCACHE_MINTMP 0x1004 + { "fdcache-mintmp", ARGP_KEY_FDCACHE_MINTMP, "NUM", 0, "Minimum free space% on tmpdir.", 0 }, { NULL, 0, NULL, 0, NULL, 0 } }; @@ -408,19 +410,20 @@ static bool traverse_logical; static long fdcache_fds; static long fdcache_mbs; static long fdcache_prefetch; +static long fdcache_mintmp; static string tmpdir; -static void set_metric(const string& key, int64_t value); +static void set_metric(const string& key, double value); // static void inc_metric(const string& key); static void set_metric(const string& metric, const string& lname, const string& lvalue, - int64_t value); + double value); static void inc_metric(const string& metric, const string& lname, const string& lvalue); static void add_metric(const string& metric, const string& lname, const string& lvalue, - int64_t value); -// static void add_metric(const string& metric, int64_t value); + double value); +// static void add_metric(const string& metric, double value); class tmp_inc_metric { // a RAII style wrapper for exception-safe scoped increment & decrement string m, n, v; @@ -452,7 +455,7 @@ public: double deltas = (ts_end.tv_sec - ts_start.tv_sec) + (ts_end.tv_nsec - ts_start.tv_nsec)/1.e9; - add_metric (m + "_milliseconds_sum", n, v, (deltas*1000)); + add_metric (m + "_milliseconds_sum", n, v, (deltas*1000.0)); inc_metric (m + "_milliseconds_count", n, v); } }; @@ -539,6 +542,9 @@ parse_opt (int key, char *arg, case ARGP_KEY_FDCACHE_PREFETCH: fdcache_prefetch = atol (arg); break; + case ARGP_KEY_FDCACHE_MINTMP: + fdcache_mintmp = atol (arg); + break; case ARGP_KEY_ARG: source_paths.insert(string(arg)); break; @@ -582,7 +588,7 @@ struct sqlite_exception: public reportable_exception { sqlite_exception(int rc, const string& msg): reportable_exception(string("sqlite3 error: ") + msg + ": " + string(sqlite3_errstr(rc) ?: "?")) { - inc_metric("error_count","sqlite3",sqlite3_errstr(rc)); + inc_metric("error_count","sqlite3",sqlite3_errstr(rc)); } }; @@ -603,7 +609,7 @@ struct archive_exception: public reportable_exception } archive_exception(struct archive* a, const string& msg): reportable_exception(string("libarchive error: ") + msg + ": " + string(archive_error_string(a) ?: "?")) { - inc_metric("error_count","libarchive",msg); + inc_metric("error_count","libarchive",msg + ": " + string(archive_error_string(a) ?: "?")); } }; @@ -1092,6 +1098,23 @@ canon_pathname (const string& input) } +// Estimate available free space for a given filesystem via statfs(2). +// Return true if the free fraction is known to be smaller than the +// given minimum percentage. Also update a related metric. +bool statfs_free_enough_p(const string& path, const string& label, long minfree = 0) +{ + struct statfs sfs; + int rc = statfs(path.c_str(), &sfs); + if (rc == 0) + { + double s = (double) sfs.f_bavail / (double) sfs.f_blocks; + set_metric("filesys_free_ratio","purpose",label, s); + return ((s * 100.0) < minfree); + } + return false; +} + + // A map-like class that owns a cache of file descriptors (indexed by // file / content names). @@ -1179,7 +1202,13 @@ public: set_metrics(); // NB: we age the cache at lookup time too - if (front_p) + if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp)) + { + inc_metric("fdcache_op_count","op","emerg-flush"); + obatched(clog) << "fdcache emergency flush for filling tmpdir" << endl; + this->limit(0, 0); // emergency flush + } + else if (front_p) this->limit(max_fds, max_mbs); // age cache if required } @@ -1202,7 +1231,13 @@ public: } } - if (fd >= 0) + if (statfs_free_enough_p(tmpdir, "tmpdir", fdcache_mintmp)) + { + inc_metric("fdcache_op_count","op","emerg-flush"); + obatched(clog) << "fdcache emergency flush for filling tmpdir"; + this->limit(0, 0); // emergency flush + } + else if (fd >= 0) this->limit(max_fds, max_mbs); // age cache if required return fd; @@ -1240,6 +1275,7 @@ public: } } + void limit(long maxfds, long maxmbs, bool metrics_p = true) { if (verbose > 3 && (this->max_fds != maxfds || this->max_mbs != maxmbs)) @@ -1277,10 +1313,11 @@ public: if (metrics_p) set_metrics(); } + ~libarchive_fdcache() { // unlink any fdcache entries in $TMPDIR - // don't update metrics; those globals may be already destroyed + // don't update metrics; those globals may be already destroyed limit(0, 0, false); } }; @@ -1447,7 +1484,7 @@ handle_buildid_r_match (bool internal_req_p, // NB: don't unlink (tmppath), as fdcache will take charge of it. // NB: this can take many uninterruptible seconds for a huge file - rc = archive_read_data_into_fd (a, fd); + rc = archive_read_data_into_fd (a, fd); if (rc != ARCHIVE_OK) // e.g. ENOSPC! { close (fd); @@ -1528,7 +1565,7 @@ handle_buildid_match (bool internal_req_p, // Report but swallow libc etc. errors here; let the caller // iterate to other matches of the content. } - + return 0; } @@ -1573,7 +1610,7 @@ handle_buildid (MHD_Connection* conn, // If invoked from the scanner threads, use the scanners' read-write // connection. Otherwise use the web query threads' read-only connection. sqlite3 *thisdb = (conn == 0) ? db : dbq; - + sqlite_ps *pp = 0; if (atype_code == "D") @@ -1729,7 +1766,7 @@ handle_buildid (MHD_Connection* conn, //////////////////////////////////////////////////////////////////////// -static map<string,int64_t> metrics; // arbitrary data for /metrics query +static map<string,double> metrics; // arbitrary data for /metrics query // NB: store int64_t since all our metrics are integers; prometheus accepts double static mutex metrics_lock; // NB: these objects get released during the process exit via global dtors @@ -1758,7 +1795,7 @@ metric_label(const string& name, const string& value) // add prometheus-format metric name + label tuple (if any) + value static void -set_metric(const string& metric, int64_t value) +set_metric(const string& metric, double value) { unique_lock<mutex> lock(metrics_lock); metrics[metric] = value; @@ -1774,7 +1811,7 @@ inc_metric(const string& metric) static void set_metric(const string& metric, const string& lname, const string& lvalue, - int64_t value) + double value) { string key = (metric + "{" + metric_label(lname, lvalue) + "}"); unique_lock<mutex> lock(metrics_lock); @@ -1792,7 +1829,7 @@ inc_metric(const string& metric, static void add_metric(const string& metric, const string& lname, const string& lvalue, - int64_t value) + double value) { string key = (metric + "{" + metric_label(lname, lvalue) + "}"); unique_lock<mutex> lock(metrics_lock); @@ -1801,7 +1838,7 @@ add_metric(const string& metric, #if 0 static void add_metric(const string& metric, - int64_t value) + double value) { unique_lock<mutex> lock(metrics_lock); metrics[metric] += value; @@ -1819,7 +1856,11 @@ handle_metrics (off_t* size) { unique_lock<mutex> lock(metrics_lock); for (auto&& i : metrics) - o << i.first << " " << i.second << endl; + o << i.first + << " " + << std::setprecision(std::numeric_limits<double>::digits10 + 1) + << i.second + << endl; } const string& os = o.str(); MHD_Response* r = MHD_create_response_from_buffer (os.size(), @@ -2825,11 +2866,17 @@ thread_main_scanner (void* arg) e.report(cerr); } + if (fts_cached || fts_executable || fts_debuginfo || fts_sourcefiles || fts_sref || fts_sdef) + {} // NB: not just if a successful scan - we might have encountered -ENOSPC & failed + (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size + (void) statfs_free_enough_p(tmpdir, "tmpdir"); // this too, in case of fdcache/tmpfile usage + // finished a scanning step -- not a "loop", because we just // consume the traversal loop's work, whenever inc_metric("thread_work_total","role","scan"); } + add_metric("thread_busy", "role", "scan", -1); return 0; } @@ -2871,12 +2918,12 @@ scan_source_paths() { if (interrupted) break; - if (sigusr2 != forced_groom_count) // stop early if groom triggered + if (sigusr2 != forced_groom_count) // stop early if groom triggered { scanq.clear(); // clear previously issued work for scanner threads break; } - + fts_scanned ++; if (verbose > 2) @@ -2895,7 +2942,7 @@ scan_source_paths() continue; // ignore dangling symlink or such string rps = string(rp); free (rp); - + bool ri = !regexec (&file_include_regex, rps.c_str(), 0, 0, 0); bool rx = !regexec (&file_exclude_regex, rps.c_str(), 0, 0, 0); if (!ri || rx) @@ -2934,7 +2981,7 @@ scan_source_paths() case FTS_D: // ignore inc_metric("traversed_total","type","directory"); break; - + default: // ignore inc_metric("traversed_total","type","other"); break; @@ -3013,7 +3060,7 @@ database_stats_report() if (interrupted) break; if (sigusr1 != forced_rescan_count) // stop early if scan triggered break; - + int rc = ps_query.step(); if (rc == SQLITE_DONE) break; if (rc != SQLITE_ROW) @@ -3101,6 +3148,8 @@ void groom() database_stats_report(); + (void) statfs_free_enough_p(db_path, "database"); // report sqlite filesystem size + sqlite3_db_release_memory(db); // shrink the process if possible sqlite3_db_release_memory(dbq); // ... for both connections @@ -3252,6 +3301,7 @@ main (int argc, char *argv[]) fdcache_mbs = 1024; // 1 gigabyte else fdcache_mbs = sfs.f_bavail * sfs.f_bsize / 1024 / 1024 / 4; // 25% of free space + fdcache_mintmp = 25; // emergency flush at 25% remaining (75% full) fdcache_prefetch = 64; // guesstimate storage is this much less costly than re-decompression fdcache_fds = (concurrency + fdcache_prefetch) * 2; @@ -3308,7 +3358,7 @@ main (int argc, char *argv[]) "cannot open %s, consider deleting database: %s", db_path.c_str(), sqlite3_errmsg(dbq)); } - + obatched(clog) << "opened database " << db_path << endl; obatched(clog) << "sqlite version " << sqlite3_version << endl; @@ -3408,6 +3458,7 @@ main (int argc, char *argv[]) obatched(clog) << "fdcache mbs " << fdcache_mbs << endl; obatched(clog) << "fdcache prefetch " << fdcache_prefetch << endl; obatched(clog) << "fdcache tmpdir " << tmpdir << endl; + obatched(clog) << "fdcache tmpdir min% " << fdcache_mintmp << endl; obatched(clog) << "groom time " << groom_s << endl; if (scan_archives.size()>0) { @@ -3425,22 +3476,22 @@ main (int argc, char *argv[]) pthread_t pt; rc = pthread_create (& pt, NULL, thread_main_groom, NULL); - if (rc < 0) - error (0, 0, "warning: cannot spawn thread (%d) to groom database\n", rc); + if (rc) + error (EXIT_FAILURE, rc, "cannot spawn thread to groom database\n"); else all_threads.push_back(pt); if (scan_files || scan_archives.size() > 0) { - pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL); - if (rc < 0) - error (0, 0, "warning: cannot spawn thread (%d) to traverse source paths\n", rc); + rc = pthread_create (& pt, NULL, thread_main_fts_source_paths, NULL); + if (rc) + error (EXIT_FAILURE, rc, "cannot spawn thread to traverse source paths\n"); all_threads.push_back(pt); for (unsigned i=0; i<concurrency; i++) { - pthread_create (& pt, NULL, thread_main_scanner, NULL); - if (rc < 0) - error (0, 0, "warning: cannot spawn thread (%d) to scan source files / archives\n", rc); + rc = pthread_create (& pt, NULL, thread_main_scanner, NULL); + if (rc) + error (EXIT_FAILURE, rc, "cannot spawn thread to scan source files / archives\n"); all_threads.push_back(pt); } } @@ -3476,7 +3527,7 @@ main (int argc, char *argv[]) (void) regfree (& file_exclude_regex); sqlite3 *database = db; - sqlite3 *databaseq = dbq; + sqlite3 *databaseq = dbq; db = dbq = 0; // for signal_handler not to freak (void) sqlite3_close (databaseq); (void) sqlite3_close (database); diff --git a/doc/ChangeLog b/doc/ChangeLog index c316047c..5cd4fe15 100644 --- a/doc/ChangeLog +++ b/doc/ChangeLog @@ -1,3 +1,7 @@ +2021-02-04 Frank Ch. Eigler <fche@redhat.com> + + * debuginfod.8: Mention new --fdcache-mintmp option. + 2020-12-11 Dmitry V. Levin <ldv@altlinux.org> * debuginfod.8: Fix spelling typos. diff --git a/doc/debuginfod.8 b/doc/debuginfod.8 index a836718f..c33a4b6b 100644 --- a/doc/debuginfod.8 +++ b/doc/debuginfod.8 @@ -213,6 +213,17 @@ most recently used extracted files are kept. Grooming cleans this cache. .TP +.B "\-\-fdcache\-mintmp=NUM" +Configure a disk space threshold for emergency flushing of the cache. +The filesystem holding the cache is checked periodically. If the +available space falls below the given percentage, the cache is +flushed, and the fdcache will stay disabled until the next groom +cycle. This mechanism, along a few associated /metrics on the webapi, +are intended to give an operator notice about storage scarcity - which +can translate to RAM scarcity if the disk happens to be on a RAM +virtual disk. The default threshold is 25%. + +.TP .B "\-v" Increase verbosity of logging to the standard error file descriptor. May be repeated to increase details. The default verbosity is 0. diff --git a/tests/ChangeLog b/tests/ChangeLog index c6e9f618..907b6351 100644 --- a/tests/ChangeLog +++ b/tests/ChangeLog @@ -1,3 +1,7 @@ +2021-02-04 Frank Ch. Eigler <fche@redhat.com> + + * run-debuginfod-find.sh: Smoke test --fdcache-mintmp option handling. + 2021-01-31 Sergei Trofimovich <slyfox@gentoo.org> * Makefile.am (TESTS_ENVIRONMENT): export CC variable diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh index 7fd3420a..6340f60e 100755 --- a/tests/run-debuginfod-find.sh +++ b/tests/run-debuginfod-find.sh @@ -100,7 +100,7 @@ wait_ready() # would see an error (running the testsuite under root is NOT encouraged). ln -s R/nothing.rpm R/nothing.rpm -env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 & +env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS= ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -F -R -d $DB -p $PORT1 -t0 -g0 --fdcache-fds 1 --fdcache-mbs 2 --fdcache-mintmp 0 -Z .tar.xz -Z .tar.bz2=bzcat -v R F Z L > vlog4 2>&1 & PID1=$! tempfiles vlog4 # Server must become ready |