summaryrefslogtreecommitdiff
path: root/src/mon/PGMap.cc
diff options
context:
space:
mode:
Diffstat (limited to 'src/mon/PGMap.cc')
-rw-r--r--src/mon/PGMap.cc357
1 files changed, 277 insertions, 80 deletions
diff --git a/src/mon/PGMap.cc b/src/mon/PGMap.cc
index e9a35c6b8ab..39cb30f97c8 100644
--- a/src/mon/PGMap.cc
+++ b/src/mon/PGMap.cc
@@ -30,7 +30,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
return;
}
- ENCODE_START(6, 5, bl);
+ ENCODE_START(7, 5, bl);
::encode(version, bl);
::encode(pg_stat_updates, bl);
::encode(osd_stat_updates, bl);
@@ -41,6 +41,7 @@ void PGMap::Incremental::encode(bufferlist &bl, uint64_t features) const
::encode(nearfull_ratio, bl);
::encode(pg_remove, bl);
::encode(stamp, bl);
+ ::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@@ -89,6 +90,17 @@ void PGMap::Incremental::decode(bufferlist::iterator &bl)
}
if (struct_v >= 6)
::decode(stamp, bl);
+ if (struct_v >= 7) {
+ ::decode(osd_epochs, bl);
+ } else {
+ for (map<int32_t, osd_stat_t>::iterator i = osd_stat_updates.begin();
+ i != osd_stat_updates.end();
+ ++i) {
+ // This isn't accurate, but will cause trimming to behave like
+ // previously.
+ osd_epochs.insert(make_pair(i->first, osdmap_epoch));
+ }
+ }
DECODE_FINISH(bl);
}
@@ -140,6 +152,7 @@ void PGMap::Incremental::generate_test_instances(list<PGMap::Incremental*>& o)
o.back()->version = 2;
o.back()->pg_stat_updates[pg_t(1,2,3)] = pg_stat_t();
o.back()->osd_stat_updates[5] = osd_stat_t();
+ o.back()->osd_epochs[5] = 12;
o.push_back(new Incremental);
o.back()->version = 3;
o.back()->osdmap_epoch = 1;
@@ -148,6 +161,7 @@ void PGMap::Incremental::generate_test_instances(list<PGMap::Incremental*>& o)
o.back()->nearfull_ratio = .3;
o.back()->pg_stat_updates[pg_t(4,5,6)] = pg_stat_t();
o.back()->osd_stat_updates[6] = osd_stat_t();
+ o.back()->osd_epochs[6] = 12;
o.back()->pg_remove.insert(pg_t(1,2,3));
o.back()->osd_stat_rm.insert(5);
}
@@ -166,6 +180,7 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stamp = inc.stamp;
pool_stat_t pg_sum_old = pg_sum;
+ hash_map<uint64_t, pool_stat_t> pg_pool_sum_old;
bool ratios_changed = false;
if (inc.full_ratio != full_ratio && inc.full_ratio != -1) {
@@ -185,6 +200,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
const pg_t &update_pg(p->first);
const pg_stat_t &update_stat(p->second);
+ if (pg_pool_sum_old.count(update_pg.pool()) == 0)
+ pg_pool_sum_old[update_pg.pool()] = pg_pool_sum[update_pg.pool()];
+
hash_map<pg_t,pg_stat_t>::iterator t = pg_stat.find(update_pg);
if (t == pg_stat.end()) {
hash_map<pg_t,pg_stat_t>::value_type v(update_pg, update_stat);
@@ -195,12 +213,14 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
}
stat_pg_add(update_pg, update_stat);
}
- for (map<int32_t,osd_stat_t>::const_iterator p = inc.osd_stat_updates.begin();
- p != inc.osd_stat_updates.end();
+ assert(osd_stat.size() == osd_epochs.size());
+ for (map<int32_t,osd_stat_t>::const_iterator p =
+ inc.get_osd_stat_updates().begin();
+ p != inc.get_osd_stat_updates().end();
++p) {
int osd = p->first;
const osd_stat_t &new_stats(p->second);
-
+
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(osd);
if (t == osd_stat.end()) {
hash_map<int32_t,osd_stat_t>::value_type v(osd, new_stats);
@@ -209,9 +229,11 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stat_osd_sub(t->second);
t->second = new_stats;
}
+ assert(inc.get_osd_epochs().find(osd) != inc.get_osd_epochs().end());
+ osd_epochs.insert(*(inc.get_osd_epochs().find(osd)));
stat_osd_add(new_stats);
-
+
// adjust [near]full status
register_nearfull_status(osd, new_stats);
}
@@ -225,9 +247,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
pg_stat.erase(s);
}
}
-
- for (set<int>::iterator p = inc.osd_stat_rm.begin();
- p != inc.osd_stat_rm.end();
+
+ for (set<int>::iterator p = inc.get_osd_stat_rm().begin();
+ p != inc.get_osd_stat_rm().end();
++p) {
hash_map<int32_t,osd_stat_t>::iterator t = osd_stat.find(*p);
if (t != osd_stat.end()) {
@@ -252,7 +274,9 @@ void PGMap::apply_incremental(CephContext *cct, const Incremental& inc)
stamp_delta -= pg_sum_deltas.front().second;
pg_sum_deltas.pop_front();
}
-
+
+ update_pool_deltas(cct, inc.stamp, pg_pool_sum_old);
+
if (inc.osdmap_epoch)
last_osdmap_epoch = inc.osdmap_epoch;
if (inc.pg_scan)
@@ -416,6 +440,14 @@ epoch_t PGMap::calc_min_last_epoch_clean() const
if (lec < min)
min = lec;
}
+ // also scan osd epochs
+ // don't trim past the oldest reported osd epoch
+ for (hash_map<int32_t, epoch_t>::const_iterator i = osd_epochs.begin();
+ i != osd_epochs.end();
+ ++i) {
+ if (i->second < min)
+ min = i->second;
+ }
return min;
}
@@ -434,7 +466,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
return;
}
- ENCODE_START(5, 4, bl);
+ ENCODE_START(6, 4, bl);
::encode(version, bl);
::encode(pg_stat, bl);
::encode(osd_stat, bl);
@@ -443,6 +475,7 @@ void PGMap::encode(bufferlist &bl, uint64_t features) const
::encode(full_ratio, bl);
::encode(nearfull_ratio, bl);
::encode(stamp, bl);
+ ::encode(osd_epochs, bl);
ENCODE_FINISH(bl);
}
@@ -472,6 +505,17 @@ void PGMap::decode(bufferlist::iterator &bl)
}
if (struct_v >= 5)
::decode(stamp, bl);
+ if (struct_v >= 6) {
+ ::decode(osd_epochs, bl);
+ } else {
+ for (hash_map<int32_t, osd_stat_t>::iterator i = osd_stat.begin();
+ i != osd_stat.end();
+ ++i) {
+ // This isn't accurate, but will cause trimming to behave like
+ // previously.
+ osd_epochs.insert(make_pair(i->first, last_osdmap_epoch));
+ }
+ }
DECODE_FINISH(bl);
calc_stats();
@@ -488,7 +532,10 @@ void PGMap::dirty_all(Incremental& inc)
inc.pg_stat_updates[p->first] = p->second;
}
for (hash_map<int32_t, osd_stat_t>::const_iterator p = osd_stat.begin(); p != osd_stat.end(); ++p) {
- inc.osd_stat_updates[p->first] = p->second;
+ assert(inc.get_osd_epochs().count(p->first));
+ inc.update_stat(p->first,
+ inc.get_osd_epochs().find(p->first)->second,
+ p->second);
}
}
@@ -701,7 +748,8 @@ void PGMap::dump_stuck_plain(ostream& ss, PGMap::StuckPG type, utime_t cutoff) c
{
hash_map<pg_t, pg_stat_t> stuck_pg_stats;
get_stuck_stats(type, cutoff, stuck_pg_stats);
- dump_pg_stats_plain(ss, stuck_pg_stats);
+ if (!stuck_pg_stats.empty())
+ dump_pg_stats_plain(ss, stuck_pg_stats);
}
void PGMap::dump_osd_perf_stats(Formatter *f) const
@@ -738,54 +786,59 @@ void PGMap::print_osd_perf_stats(std::ostream *ss) const
(*ss) << tab;
}
-void PGMap::recovery_summary(Formatter *f, ostream *out) const
+void PGMap::recovery_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum) const
{
bool first = true;
- if (pg_sum.stats.sum.num_objects_degraded) {
- double pc = (double)pg_sum.stats.sum.num_objects_degraded / (double)pg_sum.stats.sum.num_object_copies * (double)100.0;
+ if (delta_sum.stats.sum.num_objects_degraded) {
+ double pc = (double)delta_sum.stats.sum.num_objects_degraded /
+ (double)delta_sum.stats.sum.num_object_copies * (double)100.0;
char b[20];
snprintf(b, sizeof(b), "%.3lf", pc);
if (f) {
- f->dump_unsigned("degraded_objects", pg_sum.stats.sum.num_objects_degraded);
- f->dump_unsigned("degraded_total", pg_sum.stats.sum.num_object_copies);
+ f->dump_unsigned("degraded_objects", delta_sum.stats.sum.num_objects_degraded);
+ f->dump_unsigned("degraded_total", delta_sum.stats.sum.num_object_copies);
f->dump_string("degrated_ratio", b);
} else {
- *out << pg_sum.stats.sum.num_objects_degraded
- << "/" << pg_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)";
+ *out << delta_sum.stats.sum.num_objects_degraded
+ << "/" << delta_sum.stats.sum.num_object_copies << " objects degraded (" << b << "%)";
}
first = false;
}
- if (pg_sum.stats.sum.num_objects_unfound) {
- double pc = (double)pg_sum.stats.sum.num_objects_unfound / (double)pg_sum.stats.sum.num_objects * (double)100.0;
+ if (delta_sum.stats.sum.num_objects_unfound) {
+ double pc = (double)delta_sum.stats.sum.num_objects_unfound /
+ (double)delta_sum.stats.sum.num_objects * (double)100.0;
char b[20];
snprintf(b, sizeof(b), "%.3lf", pc);
if (f) {
- f->dump_unsigned("unfound_objects", pg_sum.stats.sum.num_objects_unfound);
- f->dump_unsigned("unfound_total", pg_sum.stats.sum.num_objects);
+ f->dump_unsigned("unfound_objects", delta_sum.stats.sum.num_objects_unfound);
+ f->dump_unsigned("unfound_total", delta_sum.stats.sum.num_objects);
f->dump_string("unfound_ratio", b);
} else {
if (!first)
*out << "; ";
- *out << pg_sum.stats.sum.num_objects_unfound
- << "/" << pg_sum.stats.sum.num_objects << " unfound (" << b << "%)";
+ *out << delta_sum.stats.sum.num_objects_unfound
+ << "/" << delta_sum.stats.sum.num_objects << " unfound (" << b << "%)";
}
first = false;
}
}
-void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const
+void PGMap::recovery_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const
{
// make non-negative; we can get negative values if osds send
// uncommitted stats and then "go backward" or if they are just
// buggy/wrong.
- pool_stat_t pos_delta = pg_sum_delta;
+ pool_stat_t pos_delta = delta_sum;
pos_delta.floor(0);
if (pos_delta.stats.sum.num_objects_recovered ||
pos_delta.stats.sum.num_bytes_recovered ||
pos_delta.stats.sum.num_keys_recovered) {
- int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)stamp_delta;
- int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)stamp_delta;
- int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)stamp_delta;
+ int64_t objps = pos_delta.stats.sum.num_objects_recovered / (double)delta_stamp;
+ int64_t bps = pos_delta.stats.sum.num_bytes_recovered / (double)delta_stamp;
+ int64_t kps = pos_delta.stats.sum.num_keys_recovered / (double)delta_stamp;
if (f) {
f->dump_int("recovering_objects_per_sec", objps);
f->dump_int("recovering_bytes_per_sec", bps);
@@ -799,24 +852,194 @@ void PGMap::recovery_rate_summary(Formatter *f, ostream *out) const
}
}
-void PGMap::update_delta(CephContext *cct, utime_t inc_stamp, pool_stat_t& pg_sum_old)
+void PGMap::overall_recovery_rate_summary(Formatter *f, ostream *out) const
+{
+ recovery_rate_summary(f, out, pg_sum_delta, stamp_delta);
+}
+
+void PGMap::overall_recovery_summary(Formatter *f, ostream *out) const
+{
+ recovery_summary(f, out, pg_sum);
+}
+
+void PGMap::pool_recovery_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
+{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ hash_map<uint64_t,utime_t>::const_iterator ts =
+ per_pool_sum_deltas_stamps.find(p->first);
+ assert(ts != per_pool_sum_deltas_stamps.end());
+ recovery_rate_summary(f, out, p->second.first, ts->second);
+}
+
+void PGMap::pool_recovery_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
+{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ recovery_summary(f, out, p->second.first);
+}
+
+void PGMap::client_io_rate_summary(Formatter *f, ostream *out,
+ pool_stat_t delta_sum,
+ utime_t delta_stamp) const
{
+ pool_stat_t pos_delta = delta_sum;
+ pos_delta.floor(0);
+ if (pos_delta.stats.sum.num_rd ||
+ pos_delta.stats.sum.num_wr) {
+ if (pos_delta.stats.sum.num_rd) {
+ int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("read_bytes_sec", rd);
+ } else {
+ *out << pretty_si_t(rd) << "B/s rd, ";
+ }
+ }
+ if (pos_delta.stats.sum.num_wr) {
+ int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("write_bytes_sec", wr);
+ } else {
+ *out << pretty_si_t(wr) << "B/s wr, ";
+ }
+ }
+ int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)delta_stamp;
+ if (f) {
+ f->dump_int("op_per_sec", iops);
+ } else {
+ *out << pretty_si_t(iops) << "op/s";
+ }
+ }
+}
+
+void PGMap::overall_client_io_rate_summary(Formatter *f, ostream *out) const
+{
+ client_io_rate_summary(f, out, pg_sum_delta, stamp_delta);
+}
+
+void PGMap::pool_client_io_rate_summary(Formatter *f, ostream *out,
+ uint64_t poolid) const
+{
+ hash_map<uint64_t,pair<pool_stat_t,utime_t> >::const_iterator p =
+ per_pool_sum_delta.find(poolid);
+ if (p == per_pool_sum_delta.end())
+ return;
+ hash_map<uint64_t,utime_t>::const_iterator ts =
+ per_pool_sum_deltas_stamps.find(p->first);
+ assert(ts != per_pool_sum_deltas_stamps.end());
+ client_io_rate_summary(f, out, p->second.first, ts->second);
+}
+
+/**
+ * update aggregated delta
+ *
+ * @param cct ceph context
+ * @param ts Timestamp for the stats being delta'ed
+ * @param old_pool_sum Previous stats sum
+ * @param last_ts Last timestamp for pool
+ * @param result_pool_sum Resulting stats
+ * @param result_ts_delta Resulting timestamp delta
+ * @param delta_avg_list List of last N computed deltas, used to average
+ */
+void PGMap::update_delta(CephContext *cct,
+ const utime_t ts,
+ const pool_stat_t& old_pool_sum,
+ utime_t *last_ts,
+ const pool_stat_t& current_pool_sum,
+ pool_stat_t *result_pool_delta,
+ utime_t *result_ts_delta,
+ list<pair<pool_stat_t,utime_t> > *delta_avg_list)
+{
+ /* @p ts is the timestamp we want to associate with the data
+ * in @p old_pool_sum, and on which we will base ourselves to
+ * calculate the delta, stored in 'delta_t'.
+ */
utime_t delta_t;
- delta_t = inc_stamp;
- delta_t -= stamp;
- stamp = inc_stamp;
+ delta_t = ts; // start with the provided timestamp
+ delta_t -= *last_ts; // take the last timestamp we saw
+ *last_ts = ts; // @p ts becomes the last timestamp we saw
// calculate a delta, and average over the last 2 deltas.
- pool_stat_t d = pg_sum;
- d.stats.sub(pg_sum_old.stats);
- pg_sum_deltas.push_back(make_pair(d, delta_t));
- stamp_delta += delta_t;
+ /* start by taking a copy of our current @p result_pool_sum, and by
+ * taking out the stats from @p old_pool_sum. This generates a stats
+ * delta. Stash this stats delta in @p delta_avg_list, along with the
+ * timestamp delta for these results.
+ */
+ pool_stat_t d = current_pool_sum;
+ d.stats.sub(old_pool_sum.stats);
+ delta_avg_list->push_back(make_pair(d,delta_t));
+ *result_ts_delta += delta_t;
+
+ /* Aggregate current delta, and take out the last seen delta (if any) to
+ * average it out.
+ */
+ result_pool_delta->stats.add(d.stats);
+ size_t s = MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1);
+ if (delta_avg_list->size() > s) {
+ result_pool_delta->stats.sub(delta_avg_list->front().first.stats);
+ *result_ts_delta -= delta_avg_list->front().second;
+ delta_avg_list->pop_front();
+ }
+}
- pg_sum_delta.stats.add(d.stats);
- if (pg_sum_deltas.size() > (std::list< pair<pool_stat_t, utime_t> >::size_type)MAX(1, cct ? cct->_conf->mon_stat_smooth_intervals : 1)) {
- pg_sum_delta.stats.sub(pg_sum_deltas.front().first.stats);
- stamp_delta -= pg_sum_deltas.front().second;
- pg_sum_deltas.pop_front();
+/**
+ * update aggregated delta
+ *
+ * @param cct ceph context
+ * @param ts Timestamp
+ * @param pg_sum_old Old pg_sum
+ */
+void PGMap::update_global_delta(CephContext *cct,
+ const utime_t ts, const pool_stat_t& pg_sum_old)
+{
+ update_delta(cct, ts, pg_sum_old, &stamp, pg_sum, &pg_sum_delta,
+ &stamp_delta, &pg_sum_deltas);
+}
+
+/**
+ * Update a given pool's deltas
+ *
+ * @param cct Ceph Context
+ * @param ts Timestamp for the stats being delta'ed
+ * @param pool Pool's id
+ * @param old_pool_sum Previous stats sum
+ */
+void PGMap::update_one_pool_delta(CephContext *cct,
+ const utime_t ts,
+ const uint64_t pool,
+ const pool_stat_t& old_pool_sum)
+{
+ if (per_pool_sum_deltas.count(pool) == 0) {
+ assert(per_pool_sum_deltas_stamps.count(pool) == 0);
+ assert(per_pool_sum_delta.count(pool) == 0);
+ }
+
+ pair<pool_stat_t,utime_t>& sum_delta = per_pool_sum_delta[pool];
+
+ update_delta(cct, ts, old_pool_sum, &sum_delta.second, pg_pool_sum[pool],
+ &sum_delta.first, &per_pool_sum_deltas_stamps[pool],
+ &per_pool_sum_deltas[pool]);
+}
+
+/**
+ * Update pools' deltas
+ *
+ * @param cct CephContext
+ * @param ts Timestamp for the stats being delta'ed
+ * @param pg_pool_sum_old Map of pool stats for delta calcs.
+ */
+void PGMap::update_pool_deltas(CephContext *cct, const utime_t ts,
+ const hash_map<uint64_t,pool_stat_t>& pg_pool_sum_old)
+{
+ for (hash_map<uint64_t,pool_stat_t>::const_iterator it = pg_pool_sum_old.begin();
+ it != pg_pool_sum_old.end(); ++it) {
+ update_one_pool_delta(cct, ts, it->first, it->second);
}
}
@@ -869,7 +1092,7 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
}
std::stringstream ssr;
- recovery_summary(f, &ssr);
+ overall_recovery_summary(f, &ssr);
if (!f && ssr.str().length())
*out << " " << ssr.str() << "\n";
ssr.clear();
@@ -878,43 +1101,17 @@ void PGMap::print_summary(Formatter *f, ostream *out) const
if (!f)
*out << ss.str(); // pgs by state
- recovery_rate_summary(f, &ssr);
+ overall_recovery_rate_summary(f, &ssr);
if (!f && ssr.str().length())
*out << "recovery io " << ssr.str() << "\n";
- // make non-negative; we can get negative values if osds send
- // uncommitted stats and then "go backward" or if they are just
- // buggy/wrong.
- pool_stat_t pos_delta = pg_sum_delta;
- pos_delta.floor(0);
- if (pos_delta.stats.sum.num_rd ||
- pos_delta.stats.sum.num_wr) {
- if (!f)
- *out << " client io ";
- if (pos_delta.stats.sum.num_rd) {
- int64_t rd = (pos_delta.stats.sum.num_rd_kb << 10) / (double)stamp_delta;
- if (f) {
- f->dump_int("read_bytes_sec", rd);
- } else {
- *out << pretty_si_t(rd) << "B/s rd, ";
- }
- }
- if (pos_delta.stats.sum.num_wr) {
- int64_t wr = (pos_delta.stats.sum.num_wr_kb << 10) / (double)stamp_delta;
- if (f) {
- f->dump_int("write_bytes_sec", wr);
- } else {
- *out << pretty_si_t(wr) << "B/s wr, ";
- }
- }
- int64_t iops = (pos_delta.stats.sum.num_rd + pos_delta.stats.sum.num_wr) / (double)stamp_delta;
- if (f) {
- f->dump_int("op_per_sec", iops);
- } else {
- *out << pretty_si_t(iops) << "op/s";
- *out << "\n";
- }
- }
+ ssr.clear();
+ ssr.str("");
+
+ overall_client_io_rate_summary(f, &ssr);
+ if (!f && ssr.str().length())
+ *out << " client io " << ssr.str() << "\n";
+
}
@@ -960,12 +1157,12 @@ void PGMap::print_oneline_summary(ostream *out) const
}
std::stringstream ssr;
- recovery_summary(NULL, &ssr);
+ overall_recovery_summary(NULL, &ssr);
if (ssr.str().length())
*out << "; " << ssr.str();
ssr.clear();
ssr.str("");
- recovery_rate_summary(NULL, &ssr);
+ overall_recovery_rate_summary(NULL, &ssr);
if (ssr.str().length())
*out << "; " << ssr.str() << " recovering";
}