summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSamuel Just <sam.just@inktank.com>2013-07-29 09:36:04 -0700
committerSamuel Just <sam.just@inktank.com>2013-07-29 12:49:16 -0700
commit1f13d8ac5b879134942cac2f5aca00669f24581f (patch)
tree370f3e24dff968157948c69d04e3239581389379
parentf1bd4e5bdf4fc9473c1762533aab61ac2dbe64d5 (diff)
downloadceph-1f13d8ac5b879134942cac2f5aca00669f24581f.tar.gz
OSD: suspend tp timeout while taking pg lock in OpWQ
If N op_tp threads are configured, and recovery_max_active is set to a sufficiently large number, all N op_tp threads might grab a MOSDPGPush op off of the queue for the same PG. The last thread to get the lock will have waited N*time_to_handle_push before completing its item and pinging the heartbeat timeout. If that time exceeds the timeout and there are enough ops waiting, each thread subsequently will end up exceeding the timeout before completeing an item preventing the OSD from heartbeating indefinitely. We prevent this by suspending the timeout while we try to get the PG lock. Even if we do block for an excessive period of time attempting to get the lock, hopefully, the thread holding the lock will cause the threadpool to time out. Signed-off-by: Samuel Just <sam.just@inktank.com> Reviewed-by: Sage Weil <sage@inktank.com>
-rw-r--r--src/common/WorkQueue.cc5
-rw-r--r--src/common/WorkQueue.h1
-rw-r--r--src/osd/OSD.cc4
-rw-r--r--src/osd/OSD.h2
-rw-r--r--src/osd/PG.cc7
-rw-r--r--src/osd/PG.h2
6 files changed, 18 insertions, 3 deletions
diff --git a/src/common/WorkQueue.cc b/src/common/WorkQueue.cc
index a57c0782030..6b648a78021 100644
--- a/src/common/WorkQueue.cc
+++ b/src/common/WorkQueue.cc
@@ -49,6 +49,11 @@ ThreadPool::ThreadPool(CephContext *cct_, string nm, int n, const char *option)
}
}
+void ThreadPool::TPHandle::suspend_tp_timeout()
+{
+ cct->get_heartbeat_map()->clear_timeout(hb);
+}
+
void ThreadPool::TPHandle::reset_tp_timeout()
{
cct->get_heartbeat_map()->reset_timeout(
diff --git a/src/common/WorkQueue.h b/src/common/WorkQueue.h
index d936d77abef..b2742accdce 100644
--- a/src/common/WorkQueue.h
+++ b/src/common/WorkQueue.h
@@ -49,6 +49,7 @@ public:
: cct(cct), hb(hb), grace(grace), suicide_grace(suicide_grace) {}
public:
void reset_tp_timeout();
+ void suspend_tp_timeout();
};
private:
diff --git a/src/osd/OSD.cc b/src/osd/OSD.cc
index a71c1e9af21..89aa1db34eb 100644
--- a/src/osd/OSD.cc
+++ b/src/osd/OSD.cc
@@ -7050,9 +7050,9 @@ PGRef OSD::OpWQ::_dequeue()
return pg;
}
-void OSD::OpWQ::_process(PGRef pg)
+void OSD::OpWQ::_process(PGRef pg, ThreadPool::TPHandle &handle)
{
- pg->lock();
+ pg->lock_suspend_timeout(handle);
OpRequestRef op;
{
Mutex::Locker l(qlock);
diff --git a/src/osd/OSD.h b/src/osd/OSD.h
index 5bcff7442d7..478f766d145 100644
--- a/src/osd/OSD.h
+++ b/src/osd/OSD.h
@@ -911,7 +911,7 @@ private:
bool _empty() {
return pqueue.empty();
}
- void _process(PGRef pg);
+ void _process(PGRef pg, ThreadPool::TPHandle &handle);
} op_wq;
void enqueue_op(PG *pg, OpRequestRef op);
diff --git a/src/osd/PG.cc b/src/osd/PG.cc
index 9f957b8e054..f731441e8a4 100644
--- a/src/osd/PG.cc
+++ b/src/osd/PG.cc
@@ -193,6 +193,13 @@ PG::~PG()
#endif
}
+void PG::lock_suspend_timeout(ThreadPool::TPHandle &handle)
+{
+ handle.suspend_tp_timeout();
+ lock();
+ handle.reset_tp_timeout();
+}
+
void PG::lock(bool no_lockdep)
{
_lock.Lock(no_lockdep);
diff --git a/src/osd/PG.h b/src/osd/PG.h
index 10e9a2544a9..8f572c75e19 100644
--- a/src/osd/PG.h
+++ b/src/osd/PG.h
@@ -245,6 +245,8 @@ protected:
public:
bool deleting; // true while in removing or OSD is shutting down
+
+ void lock_suspend_timeout(ThreadPool::TPHandle &handle);
void lock(bool no_lockdep = false);
void unlock() {
//generic_dout(0) << this << " " << info.pgid << " unlock" << dendl;