summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorJim Warner <james.warner@comcast.net>2013-11-08 00:00:00 -0600
committerCraig Small <csmall@enc.com.au>2013-11-25 20:57:32 +1100
commitf12c0d5c6e84f9409ac3a73c066841a8ff5aab0b (patch)
tree41691c1dc45d8ebbfe7400a8176675cd40c16fc0
parentbdb2fe005616bb40f2da53da02b01f91cabf9a87 (diff)
downloadprocps-ng-f12c0d5c6e84f9409ac3a73c066841a8ff5aab0b.tar.gz
top: minimize the statistics overhead for numa support
A recent libnuma potential corruption problem solution has caused me to reevaluate some associated numa logic for efficiency. Here is a summary of the problems that exist with current libnuma/user possible interactions: . Whenever the numa library was present extra overhead would always be incurred in maintaining the node stats even when the '2' or '3' commands were not being used. . As part of such overhead a separate loop was used to reinitialize each cpu/node structure with each display cycle so that prior accumulated totals were preserved. Again, it didn't matter if numa data was really shown. This commit attempts to refocus on the 'critical path' costs in a running top by optimizing for the occasions when numa node data is not being displayed. Under such conditions, no extra overhead will be incurred whether or not a distribution has the libnuma library present. To achieve this goal, some additional overhead will be incurred, but only when actually displaying numa data. And all such new costs have been minimized in spite of the gcc inclination to duplicate subscript resolution. Reference(s): commit 24bd950cb2e1722d459461f0f9c0c30a4b9ffdaa Signed-off-by: Jim Warner <james.warner@comcast.net>
-rw-r--r--top/top.c46
1 files changed, 23 insertions, 23 deletions
diff --git a/top/top.c b/top/top.c
index e619ddd..9d12693 100644
--- a/top/top.c
+++ b/top/top.c
@@ -2361,18 +2361,9 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
#endif
#ifndef NUMA_DISABLE
- for (i = 0; i < Numa_node_tot; i++) {
- node = sumSLOT + 1 + i;
- // remember from last time around
- memcpy(&cpus[node].sav, &cpus[node].cur, sizeof(CT_t));
- // initialize current node statistics
- memset(&cpus[node].cur, 0, sizeof(CT_t));
-#ifndef CPU_ZEROTICS
- cpus[node].edge = cpus[sumSLOT].edge;
- // this is for symmetry only, it's not currently required
- cpus[node].cur.tot = cpus[sumSLOT].cur.tot;
-#endif
- }
+ // forget all of the prior node statistics (maybe)
+ if (CHKw(Curwin, View_CPUNOD))
+ memset(&cpus[sumSLOT + 1], 0, Numa_node_tot * sizeof(CPU_t));
#endif
// now value each separate cpu's tics...
@@ -2400,21 +2391,30 @@ static CPU_t *cpus_refresh (CPU_t *cpus) {
cpus[i].id = i;
#endif
#ifndef NUMA_DISABLE
- if (Numa_node_tot
+ /* henceforth, with just a little more arithmetic we can avoid
+ maintaining *any* node stats unless they're actually needed */
+ if (CHKw(Curwin, View_CPUNOD)
+ && Numa_node_tot
&& -1 < (node = Numa_node_of_cpu(cpus[i].id))) {
+ // use our own pointer to avoid gcc subscript bloat
+ CPU_t *nod_ptr = &cpus[sumSLOT + 1 + node];
+ nod_ptr->cur.u += cpus[i].cur.u; nod_ptr->sav.u += cpus[i].sav.u;
+ nod_ptr->cur.n += cpus[i].cur.n; nod_ptr->sav.n += cpus[i].sav.n;
+ nod_ptr->cur.s += cpus[i].cur.s; nod_ptr->sav.s += cpus[i].sav.s;
+ nod_ptr->cur.i += cpus[i].cur.i; nod_ptr->sav.i += cpus[i].sav.i;
+ nod_ptr->cur.w += cpus[i].cur.w; nod_ptr->sav.w += cpus[i].sav.w;
+ nod_ptr->cur.x += cpus[i].cur.x; nod_ptr->sav.x += cpus[i].sav.x;
+ nod_ptr->cur.y += cpus[i].cur.y; nod_ptr->sav.y += cpus[i].sav.y;
+ nod_ptr->cur.z += cpus[i].cur.z; nod_ptr->sav.z += cpus[i].sav.z;
+#ifndef CPU_ZEROTICS
+ /* yep, we re-value this repeatedly for each cpu encountered, but we
+ can then avoid a prior loop to selectively initialize each node */
+ nod_ptr->edge = cpus[sumSLOT].edge;
+#endif
cpus[i].node = node;
- node += (sumSLOT + 1);
- cpus[node].cur.u += cpus[i].cur.u;
- cpus[node].cur.n += cpus[i].cur.n;
- cpus[node].cur.s += cpus[i].cur.s;
- cpus[node].cur.i += cpus[i].cur.i;
- cpus[node].cur.w += cpus[i].cur.w;
- cpus[node].cur.x += cpus[i].cur.x;
- cpus[node].cur.y += cpus[i].cur.y;
- cpus[node].cur.z += cpus[i].cur.z;
}
#endif
- }
+ } // end: for each cpu
Cpu_faux_tot = i; // tolerate cpus taken offline