summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSteve Dickson <steved@redhat.com>2016-01-18 12:17:28 -0500
committerSteve Dickson <steved@redhat.com>2016-01-18 12:17:28 -0500
commitf4bfde65f5734d507eef589cebc97a7332599eed (patch)
tree047d3db8175538c9bd7b8d7e7f25ffe79b19b3a8
parentb493ff0b2497a3b01409ef603189e65a6f36eea3 (diff)
downloadnfs-utils-pthreads.tar.gz
gssd: Don't wait for pthread_joinspthreads
To make gssd almost bullet proof from hanging threads in the krb5 libs, only wait a second for the thread to terminate. If the termination takes longer add the th id to a global and set an timeout. When the timeout pops do the join again. If the thread has still not terminate, set another timeout. Continue this loop until the thread terminates. Signed-off-by: Steve Dickson <steved@redhat.com>
-rw-r--r--utils/gssd/gssd.c81
1 files changed, 81 insertions, 0 deletions
diff --git a/utils/gssd/gssd.c b/utils/gssd/gssd.c
index 844a7ae..0f76a88 100644
--- a/utils/gssd/gssd.c
+++ b/utils/gssd/gssd.c
@@ -362,8 +362,81 @@ gssd_destroy_client(struct clnt_info *clp)
static void gssd_scan(void);
+static inline void gssd_sig_hold(int sig, sigset_t *mask)
+{
+ sigemptyset(mask);
+ sigaddset(mask, sig);
+ pthread_sigmask(SIG_BLOCK, mask, NULL);
+}
+static inline void gssd_sig_rel(sigset_t *mask)
+{
+ pthread_sigmask(SIG_UNBLOCK, mask, NULL);
+}
+
#define JOIN_TIMOUT 1
+#define JOIN_ALARM 5
+
+#define TH_TBL_SZ 50
+TAILQ_HEAD(thread_list_head, threads) thread_list;
+struct threads {
+ TAILQ_ENTRY(threads) list;
+ pthread_t th;
+};
+struct threads th_tbl[TH_TBL_SZ];
+static inline void th_tbl_init(void);
+
+static inline void th_tbl_init()
+{
+ int i;
+ for (i = 0; i < TH_TBL_SZ; i++)
+ th_tbl[i].th = 0;
+}
+
+static void
+th_tbl_alarm(int signal)
+{
+ struct threads *thp;
+ int setalarm = 0;
+
+ printerr(2, "th_tbl_alarm signal %d\n", signal);
+ TAILQ_FOREACH(thp, &thread_list, list) {
+ if (pthread_tryjoin_np(thp->th, NULL) == 0) {
+ printerr(2, "th_tbl_alarm found 0x%x\n", thp->th);
+ thp->th = 0;
+ } else
+ setalarm++;
+ }
+ if (setalarm)
+ alarm(JOIN_ALARM);
+
+}
+
+static int th_tbl_insert(pthread_t th)
+{
+ int i;
+ int status = 0;
+ sigset_t mask;
+
+ /* hold off any pending alarms */
+ gssd_sig_hold(SIGALRM, &mask);
+
+ for (i=0; i < TH_TBL_SZ; i++) {
+ if (th_tbl[i].th == 0) { /* free slot */
+ th_tbl[i].th = th;
+ TAILQ_INSERT_HEAD(&thread_list, &th_tbl[i], list);
+ status = 1;
+ printerr(2, "th_tbl_alarm set 0x%x\n", th);
+ break;
+ }
+ }
+ gssd_sig_rel(&mask);
+
+ if (status)
+ alarm(JOIN_ALARM);
+
+ return status;
+}
/*
* Try to join a terminated thread after a short wait.
* If successful return 0 otherwise return 1 which
@@ -381,6 +454,9 @@ static int gssd_try_join(pthread_t th)
if (pthread_timedjoin_np(th, NULL, &ts) == 0)
return 0;
+ if (th_tbl_insert(th))
+ return 0;
+
printerr(2, "gssd_try_join: no join\n");
return 1;
}
@@ -396,6 +472,7 @@ gssd_clnt_gssd_cb(int UNUSED(fd), short UNUSED(which), void *data)
printerr(0, "pthread_create failed: ret %d: %s\n", ret, strerror(errno));
return;
}
+
if (gssd_try_join(th))
pthread_join(th, NULL);
}
@@ -946,6 +1023,7 @@ main(int argc, char *argv[])
exit(EXIT_FAILURE);
}
+ signal(SIGALRM, th_tbl_alarm);
signal(SIGINT, sig_die);
signal(SIGTERM, sig_die);
signal_set(&sighup_ev, SIGHUP, gssd_scan_cb, NULL);
@@ -953,6 +1031,9 @@ main(int argc, char *argv[])
event_set(&inotify_ev, inotify_fd, EV_READ | EV_PERSIST, gssd_inotify_cb, NULL);
event_add(&inotify_ev, NULL);
+ th_tbl_init();
+ TAILQ_INIT(&thread_list);
+
TAILQ_INIT(&topdir_list);
gssd_scan();
daemon_ready();