summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorNoah <nsanci@redhat.com>2021-07-07 14:40:10 -0400
committerMark Wielaard <mark@klomp.org>2021-07-08 17:22:05 +0200
commit0ae9b791b47cdee92ac7221e3eead79c83a64a40 (patch)
treed3ab5ee9782b1a1a8de7203505e34be7d08f940f
parentcdcd7dc3d20a002abe1ce318f9b9d0895eee1810 (diff)
downloadelfutils-0ae9b791b47cdee92ac7221e3eead79c83a64a40.tar.gz
debuginfod: PR27711 - Use -I/-X regexes during groom phase
The debuginfod -I/-X regexes operate during traversal to identify those files in need of scanning. The regexes are not used during grooming. This means that if from run to run, the regex changes so that formerly indexed files are excluded from traversal, the data is still retained in the index. This is both good and bad. On one hand, if the underlying data is still available, grooming will preserve the data, and let clients ask for it. On the other hand, if the growing index size is a problem, and one wishes to age no-longer-regex-matching index data out, there is no way. Let's add a debuginfod flag to use regexes during grooming. Specifically, in groom(), where the stat() test exists, also check for regex matching as in scan_source_paths(). Treat failure of the regex the same way as though the file didn't exist. Signed-off-by: Noah Sanci <nsanci@redhat.com>
-rw-r--r--debuginfod/ChangeLog8
-rw-r--r--debuginfod/debuginfod.cxx11
-rw-r--r--doc/debuginfod.83
-rw-r--r--tests/ChangeLog6
-rwxr-xr-xtests/run-debuginfod-find.sh39
5 files changed, 59 insertions, 8 deletions
diff --git a/debuginfod/ChangeLog b/debuginfod/ChangeLog
index d9d11737..a4f20a78 100644
--- a/debuginfod/ChangeLog
+++ b/debuginfod/ChangeLog
@@ -1,3 +1,11 @@
+2021-07-01 Noah Sanci <nsanci@redhat.com>
+
+ PR27711
+ * debuginfod.cxx (options): Add --regex-groom, -r option.
+ (regex_groom): New static bool defaults to false.
+ (parse_opt): Handle 'r' option by setting regex_groom to true.
+ (groom): Introduce and use reg_include and reg_exclude.
+
2021-06-18 Mark Wielaard <mark@klomp.org>
* debuginfod-client.c (debuginfod_begin): Don't use client if
diff --git a/debuginfod/debuginfod.cxx b/debuginfod/debuginfod.cxx
index 543044c6..4f7fd2d5 100644
--- a/debuginfod/debuginfod.cxx
+++ b/debuginfod/debuginfod.cxx
@@ -360,6 +360,7 @@ static const struct argp_option options[] =
{ "database", 'd', "FILE", 0, "Path to sqlite database.", 0 },
{ "ddl", 'D', "SQL", 0, "Apply extra sqlite ddl/pragma to connection.", 0 },
{ "verbose", 'v', NULL, 0, "Increase verbosity.", 0 },
+ { "regex-groom", 'r', NULL, 0,"Uses regexes from -I and -X arguments to groom the database.",0},
#define ARGP_KEY_FDCACHE_FDS 0x1001
{ "fdcache-fds", ARGP_KEY_FDCACHE_FDS, "NUM", 0, "Maximum number of archive files to keep in fdcache.", 0 },
#define ARGP_KEY_FDCACHE_MBS 0x1002
@@ -407,6 +408,7 @@ static map<string,string> scan_archives;
static vector<string> extra_ddl;
static regex_t file_include_regex;
static regex_t file_exclude_regex;
+static bool regex_groom = false;
static bool traverse_logical;
static long fdcache_fds;
static long fdcache_mbs;
@@ -527,6 +529,9 @@ parse_opt (int key, char *arg,
if (rc != 0)
argp_failure(state, 1, EINVAL, "regular expression");
break;
+ case 'r':
+ regex_groom = true;
+ break;
case ARGP_KEY_FDCACHE_FDS:
fdcache_fds = atol (arg);
break;
@@ -3249,8 +3254,11 @@ void groom()
int64_t fileid = sqlite3_column_int64 (files, 1);
const char* filename = ((const char*) sqlite3_column_text (files, 2) ?: "");
struct stat s;
+ bool reg_include = !regexec (&file_include_regex, filename, 0, 0, 0);
+ bool reg_exclude = !regexec (&file_exclude_regex, filename, 0, 0, 0);
+
rc = stat(filename, &s);
- if (rc < 0 || (mtime != (int64_t) s.st_mtime))
+ if ( (regex_groom && reg_exclude && !reg_include) || rc < 0 || (mtime != (int64_t) s.st_mtime) )
{
if (verbose > 2)
obatched(clog) << "groom: forgetting file=" << filename << " mtime=" << mtime << endl;
@@ -3261,7 +3269,6 @@ void groom()
}
else
inc_metric("groomed_total", "decision", "fresh");
-
if (sigusr1 != forced_rescan_count) // stop early if scan triggered
break;
}
diff --git a/doc/debuginfod.8 b/doc/debuginfod.8
index 1ba42cf6..1adf703a 100644
--- a/doc/debuginfod.8
+++ b/doc/debuginfod.8
@@ -159,6 +159,9 @@ scan, independent of the rescan time (including if it was zero),
interrupting a groom pass (if any).
.TP
+.B "\-r"
+Apply the -I and -X during groom cycles, so that files excluded by the regexes are removed from the index. These parameters are in addition to what normally qualifies a file for grooming, not a replacement.
+
.B "\-g SECONDS" "\-\-groom\-time=SECONDS"
Set the groom time for the index database. This is the amount of time
the grooming thread will wait after finishing a grooming pass before
diff --git a/tests/ChangeLog b/tests/ChangeLog
index 7b493c99..3a30e06b 100644
--- a/tests/ChangeLog
+++ b/tests/ChangeLog
@@ -1,3 +1,9 @@
+2021-07-01 Noah Sanci <nsanci@redhat.com>
+
+ PR2711
+ * run-debuginfod-find.sh: Added test case for grooming the database
+ using regexes.
+
2021-07-08 Mark Wielaard <mark@klomp.org>
* Makefile.am (EXTRA_DIST): Fix typo testfile-largealign.bz2 was
diff --git a/tests/run-debuginfod-find.sh b/tests/run-debuginfod-find.sh
index 74a5ceff..a4c1a13a 100755
--- a/tests/run-debuginfod-find.sh
+++ b/tests/run-debuginfod-find.sh
@@ -36,13 +36,14 @@ export DEBUGINFOD_CACHE_PATH=${PWD}/.client_cache
PID1=0
PID2=0
PID3=0
+PID4=0
cleanup()
{
- if [ $PID1 -ne 0 ]; then kill $PID1 || true; wait $PID1; fi
- if [ $PID2 -ne 0 ]; then kill $PID2 || true; wait $PID2; fi
- if [ $PID3 -ne 0 ]; then kill $PID3 || true; wait $PID3; fi
-
+ if [ $PID1 -ne 0 ]; then kill $PID1; wait $PID1; fi
+ if [ $PID2 -ne 0 ]; then kill $PID2; wait $PID2; fi
+ if [ $PID3 -ne 0 ]; then kill $PID3; wait $PID3; fi
+ if [ $PID4 -ne 0 ]; then kill $PID4; wait $PID4; fi
rm -rf F R D L Z ${PWD}/foobar ${PWD}/mocktree ${PWD}/.client_cache* ${PWD}/tmp*
exit_cleanup
}
@@ -293,7 +294,8 @@ kill -USR1 $PID1
wait_ready $PORT1 'thread_work_total{role="traverse"}' 3
wait_ready $PORT1 'thread_work_pending{role="scan"}' 0
wait_ready $PORT1 'thread_busy{role="scan"}' 0
-
+cp $DB $DB.backup
+tempfiles $DB.backup
# Rerun same tests for the prog2 binary
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find -v debuginfo $BUILDID2 2>vlog`
cmp $filename F/prog2
@@ -710,4 +712,29 @@ DEBUGINFOD_URLS="file://${PWD}/mocktree/"
filename=`testrun ${abs_top_builddir}/debuginfod/debuginfod-find source aaaaaaaaaabbbbbbbbbbccccccccccdddddddddd /my/path/main.c`
cmp $filename ${local_dir}/main.c
-exit 0
+########################################################################
+## PR27711
+# Test to ensure that the --include="^$" --exclude=".*" options remove all files from a database backup
+while true; do
+ PORT3=`expr '(' $RANDOM % 1000 ')' + 9000`
+ ss -atn | fgrep ":$PORT3" || break
+done
+env LD_LIBRARY_PATH=$ldpath DEBUGINFOD_URLS="http://127.0.0.1:$PORT3/" ${abs_builddir}/../debuginfod/debuginfod $VERBOSE -p $PORT3 -t0 -g0 --regex-groom --include="^$" --exclude=".*" -d $DB.backup > vlog$PORT3 2>&1 &
+PID4=$!
+wait_ready $PORT3 'ready' 1
+tempfiles vlog$PORT3
+errfiles vlog$PORT3
+
+kill -USR2 $PID4
+wait_ready $PORT3 'thread_work_total{role="groom"}' 1
+wait_ready $PORT3 'groom{statistic="archive d/e"}' 0
+wait_ready $PORT3 'groom{statistic="archive sdef"}' 0
+wait_ready $PORT3 'groom{statistic="archive sref"}' 0
+wait_ready $PORT3 'groom{statistic="buildids"}' 0
+wait_ready $PORT3 'groom{statistic="file d/e"}' 0
+wait_ready $PORT3 'groom{statistic="file s"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (#)"}' 0
+wait_ready $PORT3 'groom{statistic="files scanned (mb)"}' 0
+
+kill $PID4
+exit 0;