// -*- mode:C++; tab-width:8; c-basic-offset:2; indent-tabs-mode:t -*- // vim: ts=8 sw=2 smarttab /* * Ceph - scalable distributed file system * * Copyright (C) 2004-2006 Sage Weil * * This is free software; you can redistribute it and/or * modify it under the terms of the GNU Lesser General Public * License version 2.1, as published by the Free Software * Foundation. See file COPYING. * */ /* Object Store Device (OSD) Monitor */ #ifndef CEPH_OSDMONITOR_H #define CEPH_OSDMONITOR_H #include #include using namespace std; #include "include/types.h" #include "msg/Messenger.h" #include "osd/OSDMap.h" #include "PaxosService.h" #include "Session.h" class Monitor; class MOSDBoot; class MMonCommand; class MPoolSnap; class MOSDMap; class OSDMonitor : public PaxosService { public: OSDMap osdmap; private: map > waiting_for_map; // [leader] OSDMap::Incremental pending_inc; multimap > failed_notes; // > map down_pending_out; // osd down -> out map osd_weight; // map thrashing int thrash_map; int thrash_last_up_osd; bool thrash(); // svc public: void create_initial(); private: void update_from_paxos(); void create_pending(); // prepare a new pending void encode_pending(bufferlist &bl); void on_active(); void share_map_with_random_osd(); void update_logger(); void handle_query(PaxosServiceMessage *m); bool preprocess_query(PaxosServiceMessage *m); // true if processed. bool prepare_update(PaxosServiceMessage *m); bool should_propose(double &delay); bool can_mark_down(int o); bool can_mark_up(int o); bool can_mark_out(int o); bool can_mark_in(int o); // ... void send_to_waiting(); // send current map to waiters. MOSDMap *build_latest_full(); MOSDMap *build_incremental(epoch_t first, epoch_t last); void send_full(PaxosServiceMessage *m); void send_incremental(PaxosServiceMessage *m, epoch_t first); void send_incremental(epoch_t first, entity_inst_t& dest, bool onetime); void remove_redundant_pg_temp(); int reweight_by_utilization(int oload, std::string& out_str); bool preprocess_failure(class MOSDFailure *m); bool prepare_failure(class MOSDFailure *m); void _reported_failure(MOSDFailure *m); bool preprocess_boot(class MOSDBoot *m); bool prepare_boot(class MOSDBoot *m); void _booted(MOSDBoot *m, bool logit); bool preprocess_alive(class MOSDAlive *m); bool prepare_alive(class MOSDAlive *m); void _reply_map(PaxosServiceMessage *m, epoch_t e); bool preprocess_pgtemp(class MOSDPGTemp *m); bool prepare_pgtemp(class MOSDPGTemp *m); int _prepare_remove_pool(uint64_t pool); bool preprocess_pool_op ( class MPoolOp *m); bool preprocess_pool_op_create ( class MPoolOp *m); bool prepare_pool_op (MPoolOp *m); bool prepare_pool_op_create (MPoolOp *m); bool prepare_pool_op_delete(MPoolOp *m); bool prepare_pool_op_auid(MPoolOp *m); int prepare_new_pool(string& name, uint64_t auid, int crush_rule, unsigned pg_num, unsigned pgp_num); int prepare_new_pool(MPoolOp *m); bool prepare_set_flag(MMonCommand *m, int flag); bool prepare_unset_flag(MMonCommand *m, int flag); void _pool_op_reply(MPoolOp *m, int ret, epoch_t epoch, bufferlist *blp=NULL); struct C_Booted : public Context { OSDMonitor *cmon; MOSDBoot *m; bool logit; C_Booted(OSDMonitor *cm, MOSDBoot *m_, bool l=true) : cmon(cm), m(m_), logit(l) {} void finish(int r) { if (r >= 0) cmon->_booted(m, logit); else cmon->dispatch((PaxosServiceMessage*)m); } }; struct C_ReplyMap : public Context { OSDMonitor *osdmon; PaxosServiceMessage *m; epoch_t e; C_ReplyMap(OSDMonitor *o, PaxosServiceMessage *mm, epoch_t ee) : osdmon(o), m(mm), e(ee) {} void finish(int r) { osdmon->_reply_map(m, e); } }; struct C_Reported : public Context { OSDMonitor *cmon; MOSDFailure *m; C_Reported(OSDMonitor *cm, MOSDFailure *m_) : cmon(cm), m(m_) {} void finish(int r) { if (r >= 0) cmon->_reported_failure(m); else cmon->dispatch((PaxosServiceMessage*)m); } }; struct C_PoolOp : public Context { OSDMonitor *osdmon; MPoolOp *m; int replyCode; int epoch; bufferlist *reply_data; C_PoolOp(OSDMonitor * osd, MPoolOp *m_, int rc, int e, bufferlist *rd=NULL) : osdmon(osd), m(m_), replyCode(rc), epoch(e), reply_data(rd) {} void finish(int r) { osdmon->_pool_op_reply(m, replyCode, epoch, reply_data); } }; bool preprocess_remove_snaps(class MRemoveSnaps *m); bool prepare_remove_snaps(class MRemoveSnaps *m); public: OSDMonitor(Monitor *mn, Paxos *p); void tick(); // check state, take actions void get_health(list >& summary, list > *detail) const; bool preprocess_command(MMonCommand *m); bool prepare_command(MMonCommand *m); void handle_osd_timeouts(const utime_t &now, std::map &last_osd_report); void mark_all_down(); void send_latest(PaxosServiceMessage *m, epoch_t start=0); void send_latest_now_nodelete(PaxosServiceMessage *m, epoch_t start=0) { send_incremental(m, start); } epoch_t blacklist(entity_addr_t a, utime_t until); void check_subs(); void check_sub(Subscription *sub); void add_flag(int flag) { if (!(osdmap.flags & flag)) { if (pending_inc.new_flags < 0) pending_inc.new_flags = osdmap.flags; pending_inc.new_flags |= flag; } } void remove_flag(int flag) { if(osdmap.flags & flag) { if (pending_inc.new_flags < 0) pending_inc.new_flags = osdmap.flags; pending_inc.new_flags &= ~flag; } } }; #endif