diff options
-rw-r--r-- | src/mds/Locker.cc | 97 | ||||
-rw-r--r-- | src/mds/Locker.h | 6 | ||||
-rw-r--r-- | src/mds/MDCache.cc | 9 | ||||
-rw-r--r-- | src/mds/MDCache.h | 2 | ||||
-rw-r--r-- | src/mds/Server.cc | 63 | ||||
-rw-r--r-- | src/messages/MMDSSlaveRequest.h | 8 |
6 files changed, 160 insertions, 25 deletions
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc index 1a793484f81..e6fff443c02 100644 --- a/src/mds/Locker.cc +++ b/src/mds/Locker.cc @@ -170,7 +170,8 @@ void Locker::include_snap_rdlocks_wlayout(set<SimpleLock*>& rdlocks, CInode *in, bool Locker::acquire_locks(MDRequest *mdr, set<SimpleLock*> &rdlocks, set<SimpleLock*> &wrlocks, - set<SimpleLock*> &xlocks) + set<SimpleLock*> &xlocks, + map<SimpleLock*,int> *remote_wrlocks) { if (mdr->done_locking && !mdr->is_slave()) { // not on slaves! master requests locks piecemeal. @@ -228,13 +229,24 @@ bool Locker::acquire_locks(MDRequest *mdr, if ((*p)->get_parent()->is_auth()) mustpin.insert(*p); else if (!(*p)->get_parent()->is_auth() && - !(*p)->can_wrlock(client)) { // we might have to request a scatter + !(*p)->can_wrlock(client) && // we might have to request a scatter + !mdr->is_slave()) { // if we are slave (remote_wrlock), the master already authpinned dout(15) << " will also auth_pin " << *(*p)->get_parent() << " in case we need to request a scatter" << dendl; mustpin.insert(*p); } } + // remote_wrlocks + if (remote_wrlocks) { + for (map<SimpleLock*,int>::iterator p = remote_wrlocks->begin(); p != remote_wrlocks->end(); ++p) { + dout(20) << " must remote_wrlock on mds" << p->second << " " + << *p->first << " " << *(p->first)->get_parent() << dendl; + sorted.insert(p->first); + mustpin.insert(p->first); + } + } + // rdlocks for (set<SimpleLock*>::iterator p = rdlocks.begin(); p != rdlocks.end(); @@ -343,15 +355,30 @@ bool Locker::acquire_locks(MDRequest *mdr, // right kind? SimpleLock *have = *existing; existing++; - if (xlocks.count(*p) && mdr->xlocks.count(*p)) + if (xlocks.count(have) && mdr->xlocks.count(have)) { dout(10) << " already xlocked " << *have << " " << *have->get_parent() << dendl; - else if (wrlocks.count(*p) && mdr->wrlocks.count(*p)) + continue; + } + if (wrlocks.count(have) && mdr->wrlocks.count(have)) { dout(10) << " already wrlocked " << *have << " " << *have->get_parent() << dendl; - else if (rdlocks.count(*p) && mdr->rdlocks.count(*p)) + continue; + } + if (remote_wrlocks && remote_wrlocks->count(have) && + mdr->remote_wrlocks.count(have)) { + if (mdr->remote_wrlocks[have] == (*remote_wrlocks)[have]) { + dout(10) << " already remote_wrlocked " << *have << " " << *have->get_parent() << dendl; + continue; + } + dout(10) << " unlocking remote_wrlock on wrong mds" << mdr->remote_wrlocks[have] + << " (want mds" << (*remote_wrlocks)[have] << ") " + << *have << " " << *have->get_parent() << dendl; + remote_wrlock_finish(have, mdr->remote_wrlocks[have], mdr); + // continue... + } + if (rdlocks.count(have) && mdr->rdlocks.count(have)) { dout(10) << " already rdlocked " << *have << " " << *have->get_parent() << dendl; - else - assert(0); - continue; + continue; + } } // hose any stray locks @@ -364,6 +391,8 @@ bool Locker::acquire_locks(MDRequest *mdr, xlock_finish(stray, mdr, &need_issue); else if (mdr->wrlocks.count(stray)) wrlock_finish(stray, mdr, &need_issue); + else if (mdr->remote_wrlocks.count(stray)) + remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); else rdlock_finish(stray, mdr, &need_issue); if (need_issue) @@ -379,6 +408,9 @@ bool Locker::acquire_locks(MDRequest *mdr, if (!wrlock_start(*p, mdr)) goto out; dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl; + } else if (remote_wrlocks && remote_wrlocks->count(*p)) { + remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr); + goto out; } else { if (!rdlock_start(*p, mdr)) goto out; @@ -396,6 +428,8 @@ bool Locker::acquire_locks(MDRequest *mdr, xlock_finish(stray, mdr, &need_issue); else if (mdr->wrlocks.count(stray)) wrlock_finish(stray, mdr, &need_issue); + else if (mdr->remote_wrlocks.count(stray)) + remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr); else rdlock_finish(stray, mdr, &need_issue); if (need_issue) @@ -441,7 +475,10 @@ void Locker::drop_locks(Mutation *mut, set<CInode*> *pneed_issue) rdlock_finish(*mut->rdlocks.begin(), mut, &ni); if (ni) pneed_issue->insert((CInode*)p); - } + } + while (!mut->remote_wrlocks.empty()) { + remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut); + } while (!mut->wrlocks.empty()) { bool ni = false; MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent(); @@ -468,6 +505,9 @@ void Locker::drop_non_rdlocks(Mutation *mut, set<CInode*> *pneed_issue) if (ni) pneed_issue->insert((CInode*)p); } + while (!mut->remote_wrlocks.empty()) { + remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut); + } while (!mut->wrlocks.empty()) { bool ni = false; MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent(); @@ -1138,6 +1178,45 @@ void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue) } +// remote wrlock + +void Locker::remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut) +{ + dout(7) << "remote_wrlock_start mds" << target << " on " << *lock << " on " << *lock->get_parent() << dendl; + + // wait for single auth + if (lock->get_parent()->is_ambiguous_auth()) { + lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH, + new C_MDS_RetryRequest(mdcache, mut)); + return; + } + + // send lock request + mut->more()->slaves.insert(target); + MMDSSlaveRequest *r = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_WRLOCK); + r->set_lock_type(lock->get_type()); + lock->get_parent()->set_object_info(r->get_object_info()); + mds->send_message_mds(r, target); + + // wait + lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mut)); +} + +void Locker::remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut) +{ + // drop ref + mut->remote_wrlocks.erase(lock); + mut->locks.erase(lock); + + dout(7) << "remote_wrlock_finish releasing remote wrlock on mds" << target + << " " << *lock->get_parent() << dendl; + if (mds->mdsmap->get_state(target) >= MDSMap::STATE_REJOIN) { + MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_UNWRLOCK); + slavereq->set_lock_type(lock->get_type()); + lock->get_parent()->set_object_info(slavereq->get_object_info()); + mds->send_message_mds(slavereq, target); + } +} // ------------------ diff --git a/src/mds/Locker.h b/src/mds/Locker.h index 68344984acf..39f01f80bf4 100644 --- a/src/mds/Locker.h +++ b/src/mds/Locker.h @@ -85,7 +85,8 @@ public: bool acquire_locks(MDRequest *mdr, set<SimpleLock*> &rdlocks, set<SimpleLock*> &wrlocks, - set<SimpleLock*> &xlocks); + set<SimpleLock*> &xlocks, + map<SimpleLock*,int> *remote_wrlocks=NULL); void drop_locks(Mutation *mut, set<CInode*> *pneed_issue=0); void set_xlocks_done(Mutation *mut); @@ -133,6 +134,9 @@ public: bool wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait=false); void wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue); + void remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut); + void remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut); + bool xlock_start(SimpleLock *lock, MDRequest *mut); void xlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue); diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc index 79d9ba68e0f..705d9490feb 100644 --- a/src/mds/MDCache.cc +++ b/src/mds/MDCache.cc @@ -7351,6 +7351,15 @@ void MDCache::request_drop_foreign_locks(MDRequest *mdr) } } + map<SimpleLock*, int>::iterator q = mdr->remote_wrlocks.begin(); + while (q != mdr->remote_wrlocks.end()) { + dout(10) << "request_drop_foreign_locks forgetting remote_wrlock " << *q->first + << " on mds" << q->second + << " on " << *(q->first)->get_parent() << dendl; + mdr->locks.erase(q->first); + mdr->remote_wrlocks.erase(q++); + } + mdr->more()->slaves.clear(); /* we no longer have requests out to them, and * leaving them in can cause double-notifies as * this function can get called more than once */ diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h index 3c8191453a8..0e9f023c333 100644 --- a/src/mds/MDCache.h +++ b/src/mds/MDCache.h @@ -86,6 +86,7 @@ struct Mutation { // held locks set< SimpleLock* > rdlocks; // always local. set< SimpleLock* > wrlocks; // always local. + map< SimpleLock*, int > remote_wrlocks; set< SimpleLock* > xlocks; // local or remote. set< SimpleLock*, SimpleLock::ptr_lt > locks; // full ordering @@ -118,6 +119,7 @@ struct Mutation { assert(xlocks.empty()); assert(rdlocks.empty()); assert(wrlocks.empty()); + assert(remote_wrlocks.empty()); } bool is_master() { return slave_to_mds < 0; } diff --git a/src/mds/Server.cc b/src/mds/Server.cc index 3a86e535791..21fddbabcb1 100644 --- a/src/mds/Server.cc +++ b/src/mds/Server.cc @@ -1253,6 +1253,20 @@ void Server::handle_slave_request(MMDSSlaveRequest *m) } break; + case MMDSSlaveRequest::OP_WRLOCKACK: + { + // identify lock, master request + SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(), + m->get_object_info()); + MDRequest *mdr = mdcache->request_get(m->get_reqid()); + mdr->more()->slaves.insert(from); + dout(10) << "got remote wrlock on " << *lock << " on " << *lock->get_parent() << dendl; + mdr->remote_wrlocks[lock] = from; + mdr->locks.insert(lock); + lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK); + } + break; + case MMDSSlaveRequest::OP_AUTHPINACK: { MDRequest *mdr = mdcache->request_get(m->get_reqid()); @@ -1331,37 +1345,48 @@ void Server::dispatch_slave_request(MDRequest *mdr) if (logger) logger->inc(l_mdss_dsreq); - switch (mdr->slave_request->get_op()) { + int op = mdr->slave_request->get_op(); + switch (op) { case MMDSSlaveRequest::OP_XLOCK: + case MMDSSlaveRequest::OP_WRLOCK: { // identify object SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(), mdr->slave_request->get_object_info()); - if (lock && lock->get_parent()->is_auth()) { - // xlock. + if (!lock) { + dout(10) << "don't have object, dropping" << dendl; + assert(0); // can this happen, if we auth pinned properly. + } + if (op == MMDSSlaveRequest::OP_XLOCK && !lock->get_parent()->is_auth()) { + dout(10) << "not auth for remote xlock attempt, dropping on " + << *lock << " on " << *lock->get_parent() << dendl; + } else { // use acquire_locks so that we get auth_pinning. set<SimpleLock*> rdlocks; - set<SimpleLock*> wrlocks; + set<SimpleLock*> wrlocks = mdr->wrlocks; set<SimpleLock*> xlocks = mdr->xlocks; - xlocks.insert(lock); + + int replycode; + switch (op) { + case MMDSSlaveRequest::OP_XLOCK: + xlocks.insert(lock); + replycode = MMDSSlaveRequest::OP_XLOCKACK; + break; + case MMDSSlaveRequest::OP_WRLOCK: + wrlocks.insert(lock); + replycode = MMDSSlaveRequest::OP_WRLOCKACK; + break; + } if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks)) return; // ack - MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_XLOCKACK); + MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, replycode); r->set_lock_type(lock->get_type()); lock->get_parent()->set_object_info(r->get_object_info()); mds->send_message(r, mdr->slave_request->get_connection()); - } else { - if (lock) { - dout(10) << "not auth for remote xlock attempt, dropping on " - << *lock << " on " << *lock->get_parent() << dendl; - } else { - dout(10) << "don't have object, dropping" << dendl; - assert(0); // can this happen, if we auth pinned properly. - } } // done. @@ -1371,12 +1396,20 @@ void Server::dispatch_slave_request(MDRequest *mdr) break; case MMDSSlaveRequest::OP_UNXLOCK: + case MMDSSlaveRequest::OP_UNWRLOCK: { SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(), mdr->slave_request->get_object_info()); assert(lock); bool need_issue = false; - mds->locker->xlock_finish(lock, mdr, &need_issue); + switch (op) { + case MMDSSlaveRequest::OP_UNXLOCK: + mds->locker->xlock_finish(lock, mdr, &need_issue); + break; + case MMDSSlaveRequest::OP_UNWRLOCK: + mds->locker->wrlock_finish(lock, mdr, &need_issue); + break; + } if (need_issue) mds->locker->issue_caps((CInode*)lock->get_parent()); diff --git a/src/messages/MMDSSlaveRequest.h b/src/messages/MMDSSlaveRequest.h index bb21d9ebae2..855088755b2 100644 --- a/src/messages/MMDSSlaveRequest.h +++ b/src/messages/MMDSSlaveRequest.h @@ -34,6 +34,10 @@ class MMDSSlaveRequest : public Message { static const int OP_RENAMEPREP = 7; static const int OP_RENAMEPREPACK = -7; + static const int OP_WRLOCK = 8; + static const int OP_WRLOCKACK = -8; + static const int OP_UNWRLOCK = 9; + static const int OP_FINISH = 17; static const int OP_COMMITTED = -18; @@ -59,6 +63,10 @@ class MMDSSlaveRequest : public Message { case OP_FINISH: return "finish"; // commit case OP_COMMITTED: return "committed"; + case OP_WRLOCK: return "wrlock"; + case OP_WRLOCKACK: return "wrlock_ack"; + case OP_UNWRLOCK: return "unwrlock"; + case OP_ABORT: return "abort"; //case OP_COMMIT: return "commit"; |