summaryrefslogtreecommitdiff
path: root/distcc/src/stats.c
diff options
context:
space:
mode:
Diffstat (limited to 'distcc/src/stats.c')
-rw-r--r--distcc/src/stats.c415
1 files changed, 415 insertions, 0 deletions
diff --git a/distcc/src/stats.c b/distcc/src/stats.c
new file mode 100644
index 0000000..e6a92ba
--- /dev/null
+++ b/distcc/src/stats.c
@@ -0,0 +1,415 @@
+/* -*- c-file-style: "java"; indent-tabs-mode: nil -*-
+ *
+ * distcc -- A simple distributed compiler system
+ *
+ * Copyright (C) 2005 by Google
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU General Public License as
+ * published by the Free Software Foundation; either version 2 of the
+ * License, or (at your option) any later version.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * General Public License for more details.
+ *
+ * You should have received a copy of the GNU General Public License
+ * along with this program; if not, write to the Free Software
+ * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307
+ * USA
+ */
+
+// Author: Thomas Kho
+
+#include "config.h"
+
+#include <stdio.h>
+#include <stdlib.h>
+#include <unistd.h>
+#include <errno.h>
+#include <signal.h>
+#include <string.h>
+#include <time.h>
+
+#include <sys/socket.h>
+#include <sys/select.h>
+#include <sys/statvfs.h>
+
+#include "exitcode.h"
+#include "distcc.h"
+#include "trace.h"
+#include "dopt.h"
+#include "stats.h"
+#include "srvnet.h"
+#include "util.h"
+#include "netutil.h"
+#include "fcntl.h"
+#include "daemon.h"
+
+int dcc_statspipe[2];
+
+#define MAX_FILENAME_LEN 1024
+
+/* in prefork.c */
+void dcc_manage_kids(int listen_fd);
+
+struct stats_s {
+ int counters[STATS_ENUM_MAX];
+ int longest_job_time;
+ char longest_job_name[MAX_FILENAME_LEN];
+ char longest_job_compiler[MAX_FILENAME_LEN];
+ int io_rate; /* read/write sectors per second */
+
+ int compile_timeseries[300]; /* 300 3-sec time intervals */
+} dcc_stats;
+
+struct statsdata {
+ enum stats_e type;
+
+ /* used only for STATS_COMPILE_OK */
+ int time;
+ char filename[MAX_FILENAME_LEN];
+ char compiler[MAX_FILENAME_LEN];
+};
+
+const char *stats_text[20] = { "TCP_ACCEPT", "REJ_BAD_REQ", "REJ_OVERLOAD",
+ "COMPILE_OK", "COMPILE_ERROR", "COMPILE_TIMEOUT", "CLI_DISCONN",
+ "OTHER" };
+
+/* Call this to initialize stats */
+int dcc_stats_init() {
+ if (arg_stats) {
+ if (pipe(dcc_statspipe) == -1) {
+ return -1;
+ }
+ }
+ memset(&dcc_stats, 0, sizeof(dcc_stats));
+ return 0;
+}
+
+
+/* In the preforking model call this to initialize stats for forked children */
+void dcc_stats_init_kid() {
+ if (arg_stats) {
+ close(dcc_statspipe[0]);
+ }
+}
+
+
+/**
+ * Logs countable event of type e to stats server
+ **/
+void dcc_stats_event(enum stats_e e) {
+ if (arg_stats) {
+ struct statsdata sd;
+ memset(&sd, 0, sizeof(sd));
+ sd.type = e;
+ write(dcc_statspipe[1], &sd, sizeof(sd));
+ }
+}
+
+
+/**
+ * Logs a completed job to stats server
+ **/
+void dcc_stats_compile_ok(char *compiler, char *filename, int time_usec) {
+ if (arg_stats) {
+ struct statsdata sd;
+ memset(&sd, 0, sizeof(sd));
+
+ sd.type = STATS_COMPILE_OK;
+ /* also send compiler, filename & runtime */
+ sd.time = time_usec;
+ strncpy(sd.filename, filename, MAX_FILENAME_LEN);
+ strncpy(sd.compiler, compiler, MAX_FILENAME_LEN);
+ write(dcc_statspipe[1], &sd, sizeof(sd));
+ }
+}
+
+
+/*
+ * Updates a running total of compiles in the last 1, 5, 15 minutes
+ *
+ * Returns the oldest slot
+ */
+static int dcc_stats_update_running_total(int increment) {
+ static int prev_slot;
+ static time_t last = 0;
+ static int total = 0;
+ int i;
+ int cur_slot;
+ int *const cts = dcc_stats.compile_timeseries;
+ time_t now = time(NULL);
+
+ cur_slot = (now / 3) % 300;
+
+ if (last + 900 < now) {
+ /* reset all stats; last call was >15 min ago */
+ for (i = 0; i < 300; i++)
+ cts[i] = total;
+ prev_slot = cur_slot;
+ }
+
+ if (prev_slot != cur_slot) {
+ /* different timeslot, so set the interval [prev, cur) */
+ for (i = (prev_slot)%300; i != cur_slot; i = (i+1)%300) {
+ cts[i] = total;
+ }
+ prev_slot = cur_slot;
+ }
+
+ total += increment;
+ last = now;
+
+ return cur_slot;
+}
+
+
+static int *dcc_stats_get_compile_totals(void) {
+ int cur_slot;
+ static int ct[3]; /* 1, 5, 15 min compile totals */
+ int *const cts = dcc_stats.compile_timeseries;
+
+ cur_slot = dcc_stats_update_running_total(0);
+
+ ct[0] = cts[(cur_slot + 299)%300] - cts[(cur_slot+280)%300];
+ ct[1] = cts[(cur_slot + 299)%300] - cts[(cur_slot+200)%300];
+ ct[2] = cts[(cur_slot + 299)%300] - cts[(cur_slot+1)%300];
+
+ return ct;
+}
+
+
+/* Sets dcc_stats.io_rate at most 50 secs */
+static void dcc_stats_minutely_update(void) {
+ static int prev_io_tot = -1;
+ static time_t last = 0;
+ int n_reads, n_writes;
+ time_t now = time(NULL);
+
+ if (last + 50 < now) {
+ dcc_get_disk_io_stats(&n_reads, &n_writes);
+
+ if (prev_io_tot == -1)
+ dcc_stats.io_rate = -1;
+ else
+ dcc_stats.io_rate = (n_reads + n_writes - prev_io_tot) / (now - last);
+
+ prev_io_tot = n_reads + n_writes;
+ last = now;
+ }
+}
+
+static long dcc_get_tmpdirinfo(void) {
+ const char *tmp_dir;
+ struct statvfs buf;
+
+ if (dcc_get_tmp_top(&tmp_dir) != 0)
+ return -1;
+
+ if (statvfs(tmp_dir, &buf) != 0)
+ return -1;
+
+ if (buf.f_bsize >= 1024)
+ return buf.f_bavail * (buf.f_bsize / 1024) / 1024;
+ else
+ return (buf.f_bavail * buf.f_bsize) / (1024 * 1024);
+}
+
+/**
+ * Accept a connection on the stats port and send the reply, regardless of data
+ * that the client sends us.
+ **/
+static void dcc_service_stats_request(int http_fd) {
+ int acc_fd;
+ int *ct;
+ int num_D;
+ int max_RSS;
+ char *max_RSS_name;
+ size_t reply_len;
+ char challenge[1024];
+ char reply[2048];
+ struct dcc_sockaddr_storage cli_addr;
+ socklen_t cli_len = sizeof(cli_addr);
+ double loadavg[3];
+ int free_space_mb;
+
+ const char replytemplate[] = "\
+HTTP/1.0 200 OK\n\
+Content-Type: text/plain\n\
+Connection: close\n\n\
+argv /distccd\n\
+<distccstats>\n\
+dcc_tcp_accept %d\n\
+dcc_rej_bad_req %d\n\
+dcc_rej_overload %d\n\
+dcc_compile_ok %d\n\
+dcc_compile_error %d\n\
+dcc_compile_timeout %d\n\
+dcc_cli_disconnect %d\n\
+dcc_other %d\n\
+dcc_longest_job %s\n\
+dcc_longest_job_compiler %s\n\
+dcc_longest_job_time_msecs %d\n\
+dcc_max_kids %d\n\
+dcc_current_load %d\n\
+dcc_load1 %1.2lf\n\
+dcc_load2 %1.2lf\n\
+dcc_load3 %1.2lf\n\
+dcc_num_compiles1 %d\n\
+dcc_num_compiles2 %d\n\
+dcc_num_compiles3 %d\n\
+dcc_num_procstate_D %d\n\
+dcc_max_RSS %d\n\
+dcc_max_RSS_name %s\n\
+dcc_io_rate %d\n\
+dcc_free_space %d MB\n\
+</distccstats>\n";
+
+ dcc_stats_minutely_update(); /* force update to get fresh disk io data */
+ ct = dcc_stats_get_compile_totals();
+ dcc_getloadavg(loadavg);
+
+ free_space_mb = dcc_get_tmpdirinfo();
+ dcc_get_proc_stats(&num_D, &max_RSS, &max_RSS_name);
+
+ if (dcc_stats.longest_job_name[0] == 0)
+ strcpy(dcc_stats.longest_job_name, "none");
+ if (dcc_stats.longest_job_compiler[0] == 0)
+ strcpy(dcc_stats.longest_job_compiler, "none");
+
+ acc_fd = accept(http_fd, (struct sockaddr *) &cli_addr, &cli_len);
+ if (dcc_check_client((struct sockaddr *)&cli_addr,
+ (int) cli_len,
+ opt_allowed) == 0) {
+ reply_len = snprintf(reply, 2048, replytemplate,
+ dcc_stats.counters[STATS_TCP_ACCEPT],
+ dcc_stats.counters[STATS_REJ_BAD_REQ],
+ dcc_stats.counters[STATS_REJ_OVERLOAD],
+ dcc_stats.counters[STATS_COMPILE_OK],
+ dcc_stats.counters[STATS_COMPILE_ERROR],
+ dcc_stats.counters[STATS_COMPILE_TIMEOUT],
+ dcc_stats.counters[STATS_CLI_DISCONN],
+ dcc_stats.counters[STATS_OTHER],
+ dcc_stats.longest_job_name,
+ dcc_stats.longest_job_compiler,
+ dcc_stats.longest_job_time,
+ dcc_max_kids,
+ dcc_getcurrentload(),
+ loadavg[0], loadavg[1], loadavg[2],
+ ct[0], ct[1], ct[2],
+ num_D, max_RSS, max_RSS_name,
+ dcc_stats.io_rate,
+ free_space_mb);
+ dcc_set_nonblocking(acc_fd);
+ read(acc_fd, challenge, 1024); /* empty the receive queue */
+ write(acc_fd, reply, reply_len);
+ }
+
+ /* Don't think we need this to prevent RST anymore, since we read() now */
+ //shutdown(acc_fd, SHUT_WR); /* prevent connection reset */
+ dcc_close(acc_fd);
+}
+
+
+/**
+ * Process a packet of stats data
+ **/
+static void dcc_stats_process(struct statsdata *sd) {
+ if (sd->type > STATS_ENUM_MAX) {
+ /* Got a bad message */
+ return;
+ }
+
+ switch (sd->type) {
+
+ case STATS_TCP_ACCEPT:
+ case STATS_REJ_BAD_REQ:
+ case STATS_REJ_OVERLOAD:
+ break;
+ case STATS_COMPILE_OK:
+ /* Record file with longest runtime */
+ if (dcc_stats.longest_job_time < sd->time) {
+ dcc_stats.longest_job_time = sd->time;
+ strncpy(dcc_stats.longest_job_name, sd->filename,
+ MAX_FILENAME_LEN);
+ strncpy(dcc_stats.longest_job_compiler, sd->compiler,
+ MAX_FILENAME_LEN);
+ }
+ case STATS_COMPILE_ERROR:
+ case STATS_COMPILE_TIMEOUT:
+ case STATS_CLI_DISCONN:
+ /* We want to update the running compile total for all jobs that
+ * non-trivially tax the CPU */
+ dcc_stats_update_running_total(1);
+ default: ;
+ }
+
+ dcc_stats.counters[sd->type]++;
+}
+
+
+/**
+ * Collect runtime statistics from kids and serve them via HTTP
+ * Also, maintains the pool of kids.
+ **/
+int dcc_stats_server(int listen_fd)
+{
+ int http_fd, max_fd;
+ int i, ret;
+ fd_set fds, fds_master;
+ struct statsdata sd;
+ struct timeval timeout;
+
+ /* clear stats data */
+ for (i = 0; i < STATS_ENUM_MAX; i++)
+ dcc_stats.counters[i] = 0;
+ dcc_stats.longest_job_time = -1;
+ dcc_stats.longest_job_name[0] = 0;
+ dcc_stats.io_rate = -1;
+
+ if ((ret = dcc_socket_listen(arg_stats_port, &http_fd,
+ opt_listen_addr)) != 0) {
+ return ret;
+ }
+ rs_log_info("HTTP server started on port %d\n", arg_stats_port);
+
+ /* We don't want children to inherit this FD */
+ fcntl(http_fd, F_SETFD, FD_CLOEXEC);
+
+ max_fd = (http_fd > dcc_statspipe[0]) ? (http_fd + 1)
+ : (dcc_statspipe[0] + 1);
+
+ FD_ZERO(&fds_master);
+ FD_SET(dcc_statspipe[0], &fds_master);
+ FD_SET(http_fd, &fds_master);
+
+ while (1) {
+ dcc_stats_minutely_update();
+
+ timeout.tv_sec = 60;
+ timeout.tv_usec = 0;
+ fds = fds_master;
+ ret = select(max_fd, &fds, NULL, NULL, &timeout);
+ if (ret != -1) {
+ if (FD_ISSET(dcc_statspipe[0], &fds)) {
+ /* Received stats report from a child */
+ if (read(dcc_statspipe[0], &sd, sizeof(sd)) != -1) {
+ dcc_stats_process(&sd);
+ }
+ }
+
+ if (FD_ISSET(http_fd, &fds)) {
+ /* Received request on stats reporting port */
+ dcc_service_stats_request(http_fd);
+ }
+ } else {
+ if (errno == EINTR) {
+ /* Interrupted -- SIGCHLD? */
+ }
+ }
+
+ dcc_manage_kids(listen_fd);
+ }
+}