summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/mds/Locker.cc97
-rw-r--r--src/mds/Locker.h6
-rw-r--r--src/mds/MDCache.cc9
-rw-r--r--src/mds/MDCache.h2
-rw-r--r--src/mds/Server.cc63
-rw-r--r--src/messages/MMDSSlaveRequest.h8
6 files changed, 160 insertions, 25 deletions
diff --git a/src/mds/Locker.cc b/src/mds/Locker.cc
index 1a793484f81..e6fff443c02 100644
--- a/src/mds/Locker.cc
+++ b/src/mds/Locker.cc
@@ -170,7 +170,8 @@ void Locker::include_snap_rdlocks_wlayout(set<SimpleLock*>& rdlocks, CInode *in,
bool Locker::acquire_locks(MDRequest *mdr,
set<SimpleLock*> &rdlocks,
set<SimpleLock*> &wrlocks,
- set<SimpleLock*> &xlocks)
+ set<SimpleLock*> &xlocks,
+ map<SimpleLock*,int> *remote_wrlocks)
{
if (mdr->done_locking &&
!mdr->is_slave()) { // not on slaves! master requests locks piecemeal.
@@ -228,13 +229,24 @@ bool Locker::acquire_locks(MDRequest *mdr,
if ((*p)->get_parent()->is_auth())
mustpin.insert(*p);
else if (!(*p)->get_parent()->is_auth() &&
- !(*p)->can_wrlock(client)) { // we might have to request a scatter
+ !(*p)->can_wrlock(client) && // we might have to request a scatter
+ !mdr->is_slave()) { // if we are slave (remote_wrlock), the master already authpinned
dout(15) << " will also auth_pin " << *(*p)->get_parent()
<< " in case we need to request a scatter" << dendl;
mustpin.insert(*p);
}
}
+ // remote_wrlocks
+ if (remote_wrlocks) {
+ for (map<SimpleLock*,int>::iterator p = remote_wrlocks->begin(); p != remote_wrlocks->end(); ++p) {
+ dout(20) << " must remote_wrlock on mds" << p->second << " "
+ << *p->first << " " << *(p->first)->get_parent() << dendl;
+ sorted.insert(p->first);
+ mustpin.insert(p->first);
+ }
+ }
+
// rdlocks
for (set<SimpleLock*>::iterator p = rdlocks.begin();
p != rdlocks.end();
@@ -343,15 +355,30 @@ bool Locker::acquire_locks(MDRequest *mdr,
// right kind?
SimpleLock *have = *existing;
existing++;
- if (xlocks.count(*p) && mdr->xlocks.count(*p))
+ if (xlocks.count(have) && mdr->xlocks.count(have)) {
dout(10) << " already xlocked " << *have << " " << *have->get_parent() << dendl;
- else if (wrlocks.count(*p) && mdr->wrlocks.count(*p))
+ continue;
+ }
+ if (wrlocks.count(have) && mdr->wrlocks.count(have)) {
dout(10) << " already wrlocked " << *have << " " << *have->get_parent() << dendl;
- else if (rdlocks.count(*p) && mdr->rdlocks.count(*p))
+ continue;
+ }
+ if (remote_wrlocks && remote_wrlocks->count(have) &&
+ mdr->remote_wrlocks.count(have)) {
+ if (mdr->remote_wrlocks[have] == (*remote_wrlocks)[have]) {
+ dout(10) << " already remote_wrlocked " << *have << " " << *have->get_parent() << dendl;
+ continue;
+ }
+ dout(10) << " unlocking remote_wrlock on wrong mds" << mdr->remote_wrlocks[have]
+ << " (want mds" << (*remote_wrlocks)[have] << ") "
+ << *have << " " << *have->get_parent() << dendl;
+ remote_wrlock_finish(have, mdr->remote_wrlocks[have], mdr);
+ // continue...
+ }
+ if (rdlocks.count(have) && mdr->rdlocks.count(have)) {
dout(10) << " already rdlocked " << *have << " " << *have->get_parent() << dendl;
- else
- assert(0);
- continue;
+ continue;
+ }
}
// hose any stray locks
@@ -364,6 +391,8 @@ bool Locker::acquire_locks(MDRequest *mdr,
xlock_finish(stray, mdr, &need_issue);
else if (mdr->wrlocks.count(stray))
wrlock_finish(stray, mdr, &need_issue);
+ else if (mdr->remote_wrlocks.count(stray))
+ remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr);
else
rdlock_finish(stray, mdr, &need_issue);
if (need_issue)
@@ -379,6 +408,9 @@ bool Locker::acquire_locks(MDRequest *mdr,
if (!wrlock_start(*p, mdr))
goto out;
dout(10) << " got wrlock on " << **p << " " << *(*p)->get_parent() << dendl;
+ } else if (remote_wrlocks && remote_wrlocks->count(*p)) {
+ remote_wrlock_start(*p, (*remote_wrlocks)[*p], mdr);
+ goto out;
} else {
if (!rdlock_start(*p, mdr))
goto out;
@@ -396,6 +428,8 @@ bool Locker::acquire_locks(MDRequest *mdr,
xlock_finish(stray, mdr, &need_issue);
else if (mdr->wrlocks.count(stray))
wrlock_finish(stray, mdr, &need_issue);
+ else if (mdr->remote_wrlocks.count(stray))
+ remote_wrlock_finish(stray, mdr->remote_wrlocks[stray], mdr);
else
rdlock_finish(stray, mdr, &need_issue);
if (need_issue)
@@ -441,7 +475,10 @@ void Locker::drop_locks(Mutation *mut, set<CInode*> *pneed_issue)
rdlock_finish(*mut->rdlocks.begin(), mut, &ni);
if (ni)
pneed_issue->insert((CInode*)p);
- }
+ }
+ while (!mut->remote_wrlocks.empty()) {
+ remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut);
+ }
while (!mut->wrlocks.empty()) {
bool ni = false;
MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent();
@@ -468,6 +505,9 @@ void Locker::drop_non_rdlocks(Mutation *mut, set<CInode*> *pneed_issue)
if (ni)
pneed_issue->insert((CInode*)p);
}
+ while (!mut->remote_wrlocks.empty()) {
+ remote_wrlock_finish(mut->remote_wrlocks.begin()->first, mut->remote_wrlocks.begin()->second, mut);
+ }
while (!mut->wrlocks.empty()) {
bool ni = false;
MDSCacheObject *p = (*mut->wrlocks.begin())->get_parent();
@@ -1138,6 +1178,45 @@ void Locker::wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue)
}
+// remote wrlock
+
+void Locker::remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut)
+{
+ dout(7) << "remote_wrlock_start mds" << target << " on " << *lock << " on " << *lock->get_parent() << dendl;
+
+ // wait for single auth
+ if (lock->get_parent()->is_ambiguous_auth()) {
+ lock->get_parent()->add_waiter(MDSCacheObject::WAIT_SINGLEAUTH,
+ new C_MDS_RetryRequest(mdcache, mut));
+ return;
+ }
+
+ // send lock request
+ mut->more()->slaves.insert(target);
+ MMDSSlaveRequest *r = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_WRLOCK);
+ r->set_lock_type(lock->get_type());
+ lock->get_parent()->set_object_info(r->get_object_info());
+ mds->send_message_mds(r, target);
+
+ // wait
+ lock->add_waiter(SimpleLock::WAIT_REMOTEXLOCK, new C_MDS_RetryRequest(mdcache, mut));
+}
+
+void Locker::remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut)
+{
+ // drop ref
+ mut->remote_wrlocks.erase(lock);
+ mut->locks.erase(lock);
+
+ dout(7) << "remote_wrlock_finish releasing remote wrlock on mds" << target
+ << " " << *lock->get_parent() << dendl;
+ if (mds->mdsmap->get_state(target) >= MDSMap::STATE_REJOIN) {
+ MMDSSlaveRequest *slavereq = new MMDSSlaveRequest(mut->reqid, MMDSSlaveRequest::OP_UNWRLOCK);
+ slavereq->set_lock_type(lock->get_type());
+ lock->get_parent()->set_object_info(slavereq->get_object_info());
+ mds->send_message_mds(slavereq, target);
+ }
+}
// ------------------
diff --git a/src/mds/Locker.h b/src/mds/Locker.h
index 68344984acf..39f01f80bf4 100644
--- a/src/mds/Locker.h
+++ b/src/mds/Locker.h
@@ -85,7 +85,8 @@ public:
bool acquire_locks(MDRequest *mdr,
set<SimpleLock*> &rdlocks,
set<SimpleLock*> &wrlocks,
- set<SimpleLock*> &xlocks);
+ set<SimpleLock*> &xlocks,
+ map<SimpleLock*,int> *remote_wrlocks=NULL);
void drop_locks(Mutation *mut, set<CInode*> *pneed_issue=0);
void set_xlocks_done(Mutation *mut);
@@ -133,6 +134,9 @@ public:
bool wrlock_start(SimpleLock *lock, MDRequest *mut, bool nowait=false);
void wrlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue);
+ void remote_wrlock_start(SimpleLock *lock, int target, MDRequest *mut);
+ void remote_wrlock_finish(SimpleLock *lock, int target, Mutation *mut);
+
bool xlock_start(SimpleLock *lock, MDRequest *mut);
void xlock_finish(SimpleLock *lock, Mutation *mut, bool *pneed_issue);
diff --git a/src/mds/MDCache.cc b/src/mds/MDCache.cc
index 79d9ba68e0f..705d9490feb 100644
--- a/src/mds/MDCache.cc
+++ b/src/mds/MDCache.cc
@@ -7351,6 +7351,15 @@ void MDCache::request_drop_foreign_locks(MDRequest *mdr)
}
}
+ map<SimpleLock*, int>::iterator q = mdr->remote_wrlocks.begin();
+ while (q != mdr->remote_wrlocks.end()) {
+ dout(10) << "request_drop_foreign_locks forgetting remote_wrlock " << *q->first
+ << " on mds" << q->second
+ << " on " << *(q->first)->get_parent() << dendl;
+ mdr->locks.erase(q->first);
+ mdr->remote_wrlocks.erase(q++);
+ }
+
mdr->more()->slaves.clear(); /* we no longer have requests out to them, and
* leaving them in can cause double-notifies as
* this function can get called more than once */
diff --git a/src/mds/MDCache.h b/src/mds/MDCache.h
index 3c8191453a8..0e9f023c333 100644
--- a/src/mds/MDCache.h
+++ b/src/mds/MDCache.h
@@ -86,6 +86,7 @@ struct Mutation {
// held locks
set< SimpleLock* > rdlocks; // always local.
set< SimpleLock* > wrlocks; // always local.
+ map< SimpleLock*, int > remote_wrlocks;
set< SimpleLock* > xlocks; // local or remote.
set< SimpleLock*, SimpleLock::ptr_lt > locks; // full ordering
@@ -118,6 +119,7 @@ struct Mutation {
assert(xlocks.empty());
assert(rdlocks.empty());
assert(wrlocks.empty());
+ assert(remote_wrlocks.empty());
}
bool is_master() { return slave_to_mds < 0; }
diff --git a/src/mds/Server.cc b/src/mds/Server.cc
index 3a86e535791..21fddbabcb1 100644
--- a/src/mds/Server.cc
+++ b/src/mds/Server.cc
@@ -1253,6 +1253,20 @@ void Server::handle_slave_request(MMDSSlaveRequest *m)
}
break;
+ case MMDSSlaveRequest::OP_WRLOCKACK:
+ {
+ // identify lock, master request
+ SimpleLock *lock = mds->locker->get_lock(m->get_lock_type(),
+ m->get_object_info());
+ MDRequest *mdr = mdcache->request_get(m->get_reqid());
+ mdr->more()->slaves.insert(from);
+ dout(10) << "got remote wrlock on " << *lock << " on " << *lock->get_parent() << dendl;
+ mdr->remote_wrlocks[lock] = from;
+ mdr->locks.insert(lock);
+ lock->finish_waiters(SimpleLock::WAIT_REMOTEXLOCK);
+ }
+ break;
+
case MMDSSlaveRequest::OP_AUTHPINACK:
{
MDRequest *mdr = mdcache->request_get(m->get_reqid());
@@ -1331,37 +1345,48 @@ void Server::dispatch_slave_request(MDRequest *mdr)
if (logger) logger->inc(l_mdss_dsreq);
- switch (mdr->slave_request->get_op()) {
+ int op = mdr->slave_request->get_op();
+ switch (op) {
case MMDSSlaveRequest::OP_XLOCK:
+ case MMDSSlaveRequest::OP_WRLOCK:
{
// identify object
SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(),
mdr->slave_request->get_object_info());
- if (lock && lock->get_parent()->is_auth()) {
- // xlock.
+ if (!lock) {
+ dout(10) << "don't have object, dropping" << dendl;
+ assert(0); // can this happen, if we auth pinned properly.
+ }
+ if (op == MMDSSlaveRequest::OP_XLOCK && !lock->get_parent()->is_auth()) {
+ dout(10) << "not auth for remote xlock attempt, dropping on "
+ << *lock << " on " << *lock->get_parent() << dendl;
+ } else {
// use acquire_locks so that we get auth_pinning.
set<SimpleLock*> rdlocks;
- set<SimpleLock*> wrlocks;
+ set<SimpleLock*> wrlocks = mdr->wrlocks;
set<SimpleLock*> xlocks = mdr->xlocks;
- xlocks.insert(lock);
+
+ int replycode;
+ switch (op) {
+ case MMDSSlaveRequest::OP_XLOCK:
+ xlocks.insert(lock);
+ replycode = MMDSSlaveRequest::OP_XLOCKACK;
+ break;
+ case MMDSSlaveRequest::OP_WRLOCK:
+ wrlocks.insert(lock);
+ replycode = MMDSSlaveRequest::OP_WRLOCKACK;
+ break;
+ }
if (!mds->locker->acquire_locks(mdr, rdlocks, wrlocks, xlocks))
return;
// ack
- MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, MMDSSlaveRequest::OP_XLOCKACK);
+ MMDSSlaveRequest *r = new MMDSSlaveRequest(mdr->reqid, replycode);
r->set_lock_type(lock->get_type());
lock->get_parent()->set_object_info(r->get_object_info());
mds->send_message(r, mdr->slave_request->get_connection());
- } else {
- if (lock) {
- dout(10) << "not auth for remote xlock attempt, dropping on "
- << *lock << " on " << *lock->get_parent() << dendl;
- } else {
- dout(10) << "don't have object, dropping" << dendl;
- assert(0); // can this happen, if we auth pinned properly.
- }
}
// done.
@@ -1371,12 +1396,20 @@ void Server::dispatch_slave_request(MDRequest *mdr)
break;
case MMDSSlaveRequest::OP_UNXLOCK:
+ case MMDSSlaveRequest::OP_UNWRLOCK:
{
SimpleLock *lock = mds->locker->get_lock(mdr->slave_request->get_lock_type(),
mdr->slave_request->get_object_info());
assert(lock);
bool need_issue = false;
- mds->locker->xlock_finish(lock, mdr, &need_issue);
+ switch (op) {
+ case MMDSSlaveRequest::OP_UNXLOCK:
+ mds->locker->xlock_finish(lock, mdr, &need_issue);
+ break;
+ case MMDSSlaveRequest::OP_UNWRLOCK:
+ mds->locker->wrlock_finish(lock, mdr, &need_issue);
+ break;
+ }
if (need_issue)
mds->locker->issue_caps((CInode*)lock->get_parent());
diff --git a/src/messages/MMDSSlaveRequest.h b/src/messages/MMDSSlaveRequest.h
index bb21d9ebae2..855088755b2 100644
--- a/src/messages/MMDSSlaveRequest.h
+++ b/src/messages/MMDSSlaveRequest.h
@@ -34,6 +34,10 @@ class MMDSSlaveRequest : public Message {
static const int OP_RENAMEPREP = 7;
static const int OP_RENAMEPREPACK = -7;
+ static const int OP_WRLOCK = 8;
+ static const int OP_WRLOCKACK = -8;
+ static const int OP_UNWRLOCK = 9;
+
static const int OP_FINISH = 17;
static const int OP_COMMITTED = -18;
@@ -59,6 +63,10 @@ class MMDSSlaveRequest : public Message {
case OP_FINISH: return "finish"; // commit
case OP_COMMITTED: return "committed";
+ case OP_WRLOCK: return "wrlock";
+ case OP_WRLOCKACK: return "wrlock_ack";
+ case OP_UNWRLOCK: return "unwrlock";
+
case OP_ABORT: return "abort";
//case OP_COMMIT: return "commit";