diff options
author | Sage Weil <sage@inktank.com> | 2012-06-08 11:18:59 -0700 |
---|---|---|
committer | Sage Weil <sage@inktank.com> | 2012-06-08 11:18:59 -0700 |
commit | 3876dbfdd7e193373e103b41dcf460d6f3723b25 (patch) | |
tree | c7b1343d892d62c480a34500339c2e2ecca4d391 /src | |
parent | 978d5419a12495c1e800c7d8b8d66c87ca374bef (diff) | |
parent | c0a02a4a49fded719b0c6a53918697b3f7ad2d38 (diff) | |
download | ceph-3876dbfdd7e193373e103b41dcf460d6f3723b25.tar.gz |
Merge branch 'wip-crush-tunables'
Reviewed-by: Sam Just <sam.just@inktank.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/crush/CrushCompiler.cc | 40 | ||||
-rw-r--r-- | src/crush/CrushCompiler.h | 9 | ||||
-rw-r--r-- | src/crush/CrushTester.cc | 22 | ||||
-rw-r--r-- | src/crush/CrushTester.h | 30 | ||||
-rw-r--r-- | src/crush/CrushWrapper.cc | 11 | ||||
-rw-r--r-- | src/crush/CrushWrapper.h | 42 | ||||
-rw-r--r-- | src/crush/builder.c | 5 | ||||
-rw-r--r-- | src/crush/crush.c | 1 | ||||
-rw-r--r-- | src/crush/crush.h | 10 | ||||
-rw-r--r-- | src/crush/grammar.h | 8 | ||||
-rw-r--r-- | src/crush/mapper.c | 18 | ||||
-rw-r--r-- | src/crushtool.cc | 86 | ||||
-rw-r--r-- | src/test/cli/crushtool/help.t | 16 |
13 files changed, 264 insertions, 34 deletions
diff --git a/src/crush/CrushCompiler.cc b/src/crush/CrushCompiler.cc index fc6e6e8856b..ec3ab2a1427 100644 --- a/src/crush/CrushCompiler.cc +++ b/src/crush/CrushCompiler.cc @@ -179,9 +179,17 @@ int CrushCompiler::decompile_bucket(int cur, int CrushCompiler::decompile(ostream &out) { - out << "# begin crush map\n\n"; + out << "# begin crush map\n"; - out << "# devices\n"; + // only dump tunables if they differ from the defaults + if (crush.get_choose_local_tries() != 2) + out << "tunable choose_local_tries " << crush.get_choose_local_tries() << "\n"; + if (crush.get_choose_local_fallback_tries() != 5) + out << "tunable choose_local_fallback_tries " << crush.get_choose_local_fallback_tries() << "\n"; + if (crush.get_choose_total_tries() != 19) + out << "tunable choose_total_tries " << crush.get_choose_total_tries() << "\n"; + + out << "\n# devices\n"; for (int i=0; i<crush.get_max_devices(); i++) { out << "device " << i << " "; print_item_name(out, i, crush); @@ -322,6 +330,31 @@ int CrushCompiler::parse_device(iter_t const& i) return 0; } +int CrushCompiler::parse_tunable(iter_t const& i) +{ + string name = string_node(i->children[1]); + int val = int_node(i->children[2]); + + if (name == "choose_local_tries") + crush.set_choose_local_tries(val); + else if (name == "choose_local_fallback_tries") + crush.set_choose_local_fallback_tries(val); + else if (name == "choose_total_tries") + crush.set_choose_total_tries(val); + else { + err << "tunable " << name << " not recognized" << std::endl; + return -1; + } + + if (!unsafe_tunables) { + err << "tunables are NOT FULLY IMPLEMENTED; enable with --enable-unsafe-tunables to enable this feature" << std::endl; + return -1; + } + + if (verbose) err << "tunable " << name << " " << val << std::endl; + return 0; +} + int CrushCompiler::parse_bucket_type(iter_t const& i) { int id = int_node(i->children[1]); @@ -595,6 +628,9 @@ int CrushCompiler::parse_crush(iter_t const& i) int r = 0; for (iter_t p = i->children.begin(); p != i->children.end(); p++) { switch (p->value.id().to_long()) { + case crush_grammar::_tunable: + r = parse_tunable(p); + break; case crush_grammar::_device: r = parse_device(p); break; diff --git a/src/crush/CrushCompiler.h b/src/crush/CrushCompiler.h index 5476f6b2e81..9d07d76afa9 100644 --- a/src/crush/CrushCompiler.h +++ b/src/crush/CrushCompiler.h @@ -15,6 +15,7 @@ class CrushCompiler { CrushWrapper& crush; ostream& err; int verbose; + bool unsafe_tunables; // decompile enum dcb_state_t { @@ -43,6 +44,7 @@ class CrushCompiler { int int_node(node_t &node); float float_node(node_t &node); + int parse_tunable(iter_t const& i); int parse_device(iter_t const& i); int parse_bucket_type(iter_t const& i); int parse_bucket(iter_t const& i); @@ -55,9 +57,14 @@ class CrushCompiler { public: CrushCompiler(CrushWrapper& c, ostream& eo, int verbosity=0) - : crush(c), err(eo), verbose(verbosity) {} + : crush(c), err(eo), verbose(verbosity), + unsafe_tunables(false) {} ~CrushCompiler() {} + void enable_unsafe_tunables() { + unsafe_tunables = true; + } + int decompile(ostream& out); int compile(istream& in, const char *infn=0); }; diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc index f7d1c64e4e8..a88aa9d869b 100644 --- a/src/crush/CrushTester.cc +++ b/src/crush/CrushTester.cc @@ -90,7 +90,8 @@ int CrushTester::test() if (verbose > 3 ) err << "devices weights (hex): " << hex << weight << dec << std::endl; - + if (output_choose_tries) + crush.start_choose_profile(); for (int r = min_rule; r < crush.get_max_rules() && r <= max_rule; r++) { @@ -223,6 +224,10 @@ int CrushTester::test() batchPer[currentBatch] = temporary_per; sizes[out.size()]++; + + if (output_bad_mappings && out.size() != (unsigned)nr) { + cout << "bad mapping rule " << r << " x " << x << " num_rep " << nr << " result " << out << std::endl; + } } // compute chi squared statistic for device examining the uniformity this batch of placements @@ -308,5 +313,20 @@ int CrushTester::test() #endif } } + + if (output_choose_tries) { + __u32 *v; + int n = crush.get_choose_profile(&v); + for (int i=0; i<n; i++) { + cout.setf(std::ios::right); + cout << std::setw(2) + << i << ": " << std::setw(9) << v[i]; + cout.unsetf(std::ios::right); + cout << std::endl; + } + + crush.stop_choose_profile(); + } + return 0; } diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h index 1974d10d153..fba36febab2 100644 --- a/src/crush/CrushTester.h +++ b/src/crush/CrushTester.h @@ -25,9 +25,13 @@ class CrushTester { float mark_down_percentage; int mark_down_start; int down_range; + +public: bool output_utilization; bool output_utilization_all; bool output_statistics; + bool output_bad_mappings; + bool output_choose_tries; public: CrushTester(CrushWrapper& c, ostream& eo, int verbosity=0) @@ -42,14 +46,30 @@ public: down_range_marked(false), mark_down_percentage(0.0), mark_down_start(0), - down_range(1) + down_range(1), + output_utilization(false), + output_utilization_all(false), + output_statistics(false), + output_bad_mappings(false), + output_choose_tries(false) { } - void set_output(bool set_output_utilization, bool set_output_utilization_all, bool set_output_statistics) { - output_utilization = set_output_utilization; - output_utilization_all = set_output_utilization_all; - output_statistics = set_output_statistics; + void set_output_utilization(bool b) { + output_utilization = b; + } + void set_output_utilization_all(bool b) { + output_utilization_all = b; + } + void set_output_statistics(bool b) { + output_statistics = b; } + void set_output_bad_mappings(bool b) { + output_bad_mappings = b; + } + void set_output_choose_tries(bool b) { + output_choose_tries = b; + } + void set_batches(int b) { num_batches = b; } diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc index fd86f361861..15d5e140ee1 100644 --- a/src/crush/CrushWrapper.cc +++ b/src/crush/CrushWrapper.cc @@ -346,6 +346,11 @@ void CrushWrapper::encode(bufferlist& bl, bool lean) const ::encode(type_map, bl); ::encode(name_map, bl); ::encode(rule_name_map, bl); + + // tunables + ::encode(crush->choose_local_tries, bl); + ::encode(crush->choose_local_fallback_tries, bl); + ::encode(crush->choose_total_tries, bl); } void CrushWrapper::decode(bufferlist::iterator& blp) @@ -393,6 +398,12 @@ void CrushWrapper::decode(bufferlist::iterator& blp) ::decode(rule_name_map, blp); build_rmaps(); + // tunables + if (!blp.end()) { + ::decode(crush->choose_local_tries, blp); + ::decode(crush->choose_local_fallback_tries, blp); + ::decode(crush->choose_total_tries, blp); + } finalize(); } catch (...) { diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h index 27e239d8ac9..82b9e4b2de2 100644 --- a/src/crush/CrushWrapper.h +++ b/src/crush/CrushWrapper.h @@ -91,6 +91,28 @@ public: assert(crush); } + // tunables + int get_choose_local_tries() const { + return crush->choose_local_tries; + } + void set_choose_local_tries(int n) { + crush->choose_local_tries = n; + } + + int get_choose_local_fallback_tries() const { + return crush->choose_local_fallback_tries; + } + void set_choose_local_fallback_tries(int n) { + crush->choose_local_fallback_tries = n; + } + + int get_choose_total_tries() const { + return crush->choose_total_tries; + } + void set_choose_total_tries(int n) { + crush->choose_total_tries = n; + } + // bucket types int get_num_type_names() const { return type_map.size(); @@ -469,6 +491,26 @@ public: crush_finalize(crush); } + void start_choose_profile() { + free(crush->choose_tries); + crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * crush->choose_total_tries); + memset(crush->choose_tries, 0, + sizeof(*crush->choose_tries) * crush->choose_total_tries); + } + void stop_choose_profile() { + free(crush->choose_tries); + crush->choose_tries = 0; + } + + int get_choose_profile(__u32 **vec) { + if (crush->choose_tries) { + *vec = crush->choose_tries; + return crush->choose_total_tries; + } + return 0; + } + + void set_max_devices(int m) { crush->max_devices = m; } diff --git a/src/crush/builder.c b/src/crush/builder.c index 9c7d1515faf..0f22b297692 100644 --- a/src/crush/builder.c +++ b/src/crush/builder.c @@ -19,6 +19,11 @@ struct crush_map *crush_create() if (!m) return NULL; memset(m, 0, sizeof(*m)); + + /* initialize legacy tunable values */ + m->choose_local_tries = 2; + m->choose_local_fallback_tries = 5; + m->choose_total_tries = 19; return m; } diff --git a/src/crush/crush.c b/src/crush/crush.c index 83bbbd0dcad..7f800b3404b 100644 --- a/src/crush/crush.c +++ b/src/crush/crush.c @@ -120,6 +120,7 @@ void crush_destroy(struct crush_map *map) kfree(map->rules); } + kfree(map->choose_tries); kfree(map); } diff --git a/src/crush/crush.h b/src/crush/crush.h index 9403696aadd..084c4a97869 100644 --- a/src/crush/crush.h +++ b/src/crush/crush.h @@ -159,6 +159,16 @@ struct crush_map { __s32 max_buckets; __u32 max_rules; __s32 max_devices; + + /* choose local retries before re-descent */ + __u32 choose_local_tries; + /* choose local attempts using a fallback permutation before + * re-descent */ + __u32 choose_local_fallback_tries; + /* choose attempts before giving up */ + __u32 choose_total_tries; + + __u32 *choose_tries; }; diff --git a/src/crush/grammar.h b/src/crush/grammar.h index d673af7f7de..c975e176a89 100644 --- a/src/crush/grammar.h +++ b/src/crush/grammar.h @@ -50,6 +50,7 @@ struct crush_grammar : public grammar<crush_grammar> _step, _crushrule, _crushmap, + _tunable, }; template <typename ScannerT> @@ -60,6 +61,8 @@ struct crush_grammar : public grammar<crush_grammar> rule<ScannerT, parser_context<>, parser_tag<_negint> > negint; rule<ScannerT, parser_context<>, parser_tag<_name> > name; + rule<ScannerT, parser_context<>, parser_tag<_tunable> > tunable; + rule<ScannerT, parser_context<>, parser_tag<_device> > device; rule<ScannerT, parser_context<>, parser_tag<_bucket_type> > bucket_type; @@ -89,6 +92,9 @@ struct crush_grammar : public grammar<crush_grammar> negint = leaf_node_d[ lexeme_d[ ch_p('-') >> +digit_p ] ]; name = leaf_node_d[ lexeme_d[ +( alnum_p || ch_p('-') || ch_p('_') || ch_p('.')) ] ]; + // tunables + tunable = str_p("tunable") >> name >> posint; + // devices device = str_p("device") >> posint >> name; @@ -132,7 +138,7 @@ struct crush_grammar : public grammar<crush_grammar> >> '}'; // the whole crush map - crushmap = *(device | bucket_type) >> *bucket >> *crushrule; + crushmap = *(tunable | device | bucket_type) >> *bucket >> *crushrule; } rule<ScannerT, parser_context<>, parser_tag<_crushmap> > const& diff --git a/src/crush/mapper.c b/src/crush/mapper.c index 94d5dd30d72..e38b76e492d 100644 --- a/src/crush/mapper.c +++ b/src/crush/mapper.c @@ -307,7 +307,6 @@ static int crush_choose(const struct crush_map *map, int item = 0; int itemtype; int collide, reject; - const unsigned int orig_tries = 5; /* attempts before we fall back to search */ dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "", bucket->id, x, outpos, numrep); @@ -352,8 +351,9 @@ static int crush_choose(const struct crush_map *map, reject = 1; goto reject; } - if (flocal >= (in->size>>1) && - flocal > orig_tries) + if (map->choose_local_fallback_tries > 0 && + flocal >= (in->size>>1) && + flocal > map->choose_local_fallback_tries) item = bucket_perm_choose(in, x, r); else item = crush_bucket_choose(in, x, r); @@ -392,7 +392,7 @@ static int crush_choose(const struct crush_map *map, } reject = 0; - if (recurse_to_leaf) { + if (!collide && recurse_to_leaf) { if (item < 0) { if (crush_choose(map, map->buckets[-1-item], @@ -423,13 +423,14 @@ reject: ftotal++; flocal++; - if (collide && flocal < 3) + if (collide && flocal <= map->choose_local_tries) /* retry locally a few times */ retry_bucket = 1; - else if (flocal <= in->size + orig_tries) + else if (map->choose_local_fallback_tries > 0 && + flocal <= in->size + map->choose_local_fallback_tries) /* exhaustive bucket search */ retry_bucket = 1; - else if (ftotal < 20) + else if (ftotal <= map->choose_total_tries) /* then retry descent */ retry_descent = 1; else @@ -451,6 +452,9 @@ reject: dprintk("CHOOSE got %d\n", item); out[outpos] = item; outpos++; + + if (map->choose_tries) + map->choose_tries[ftotal]++; } dprintk("CHOOSE returns %d\n", outpos); diff --git a/src/crushtool.cc b/src/crushtool.cc index b836cb464c4..809144ccfe1 100644 --- a/src/crushtool.cc +++ b/src/crushtool.cc @@ -75,9 +75,19 @@ void usage() cout << " reweight a given item (and adjust ancestor\n" << " weights as needed)\n"; cout << " -i mapfn --reweight recalculate all bucket weights\n"; - cout << " --output-utilization output OSD usage\n"; - cout << " --output utilization-all include zero weight items\n"; - cout << " --output-statistics output chi squared statistics\n"; + cout << " --show-utilization show OSD usage\n"; + cout << " --show utilization-all\n"; + cout << " include zero weight items\n"; + cout << " --show-statistics show chi squared statistics\n"; + cout << " --show-bad-mappings show bad mappings\n"; + cout << " --show-choose-tries show choose tries histogram\n"; + cout << " --set-choose-local-tries N\n"; + cout << " set choose local retries before re-descent\n"; + cout << " --set-choose-local-fallback-tries N\n"; + cout << " set choose local retries using fallback\n"; + cout << " permutation before re-descent\n"; + cout << " --set-choose-total-tries N\n"; + cout << " set choose total descent attempts\n"; exit(1); } @@ -109,9 +119,7 @@ int main(int argc, const char **argv) bool decompile = false; bool test = false; bool verbose = false; - bool output_utilization = false; - bool output_utilization_all = false; - bool output_statistics = false; + bool unsafe_tunables = false; bool reweight = false; int add_item = -1; @@ -120,10 +128,16 @@ int main(int argc, const char **argv) map<string,string> add_loc; float reweight_weight = 0; + bool adjust = false; + int build = 0; int num_osds =0; vector<layer_t> layers; + int choose_local_tries = -1; + int choose_local_fallback_tries = -1; + int choose_total_tries = -1; + CrushWrapper crush; CrushTester tester(crush, cerr, 1); @@ -151,12 +165,16 @@ int main(int argc, const char **argv) outfn = val; } else if (ceph_argparse_flag(args, i, "-v", "--verbose", (char*)NULL)) { verbose = true; - } else if (ceph_argparse_flag(args, i, "--output_utilization", (char*)NULL)) { - output_utilization = true; - } else if (ceph_argparse_flag(args, i, "--output_utilization_all", (char*)NULL)) { - output_utilization_all = true; - } else if (ceph_argparse_flag(args, i, "--output_statistics", (char*)NULL)) { - output_statistics = true; + } else if (ceph_argparse_flag(args, i, "--show_utilization", (char*)NULL)) { + tester.set_output_utilization(true); + } else if (ceph_argparse_flag(args, i, "--show_utilization_all", (char*)NULL)) { + tester.set_output_utilization_all(true); + } else if (ceph_argparse_flag(args, i, "--show_statistics", (char*)NULL)) { + tester.set_output_statistics(true); + } else if (ceph_argparse_flag(args, i, "--show_bad_mappings", (char*)NULL)) { + tester.set_output_bad_mappings(true); + } else if (ceph_argparse_flag(args, i, "--show_choose_tries", (char*)NULL)) { + tester.set_output_choose_tries(true); } else if (ceph_argparse_witharg(args, i, &val, "-c", "--compile", (char*)NULL)) { srcfn = val; compile = true; @@ -164,6 +182,17 @@ int main(int argc, const char **argv) test = true; } else if (ceph_argparse_flag(args, i, "-s", "--simulate", (char*)NULL)) { tester.set_random_placement(); + } else if (ceph_argparse_flag(args, i, "--enable-unsafe-tunables", (char*)NULL)) { + unsafe_tunables = true; + } else if (ceph_argparse_withint(args, i, &choose_local_tries, &err, + "--set_choose_local_tries", (char*)NULL)) { + adjust = true; + } else if (ceph_argparse_withint(args, i, &choose_local_fallback_tries, &err, + "--set_choose_local_fallback_tries", (char*)NULL)) { + adjust = true; + } else if (ceph_argparse_withint(args, i, &choose_total_tries, &err, + "--set_choose_total_tries", (char*)NULL)) { + adjust = true; } else if (ceph_argparse_flag(args, i, "--reweight", (char*)NULL)) { reweight = true; } else if (ceph_argparse_withint(args, i, &add_item, &err, "--add_item", (char*)NULL)) { @@ -300,7 +329,8 @@ int main(int argc, const char **argv) if (decompile + compile + build > 1) { usage(); } - if (!compile && !decompile && !build && !test && !reweight && add_item < 0 && + if (!compile && !decompile && !build && !test && !reweight && !adjust && + add_item < 0 && remove_name.empty() && reweight_name.empty()) { usage(); } @@ -371,6 +401,8 @@ int main(int argc, const char **argv) } CrushCompiler cc(crush, cerr); + if (unsafe_tunables) + cc.enable_unsafe_tunables(); int r = cc.compile(in, srcfn.c_str()); if (r < 0) exit(1); @@ -539,6 +571,33 @@ int main(int argc, const char **argv) modified = true; } + const char *scary_tunables_message = + "** tunables are DANGEROUS and NOT YET RECOMMENDED. DO NOT USE without\n" + "** confirming with developers that your use-case is safe and correct."; + if (choose_local_tries >= 0) { + if (!unsafe_tunables) { + cerr << scary_tunables_message << std::endl; + return -1; + } + crush.set_choose_local_tries(choose_local_tries); + modified = true; + } + if (choose_local_fallback_tries >= 0) { + if (!unsafe_tunables) { + cerr << scary_tunables_message << std::endl; + return -1; + } + crush.set_choose_local_fallback_tries(choose_local_fallback_tries); + modified = true; + } + if (choose_total_tries >= 0) { + if (!unsafe_tunables) { + cerr << scary_tunables_message << std::endl; + return -1; + } + crush.set_choose_total_tries(choose_total_tries); + modified = true; + } if (modified) { crush.finalize(); @@ -559,7 +618,6 @@ int main(int argc, const char **argv) } if (test) { - tester.set_output(output_utilization, output_utilization_all, output_statistics); int r = tester.test(); if (r < 0) exit(1); diff --git a/src/test/cli/crushtool/help.t b/src/test/cli/crushtool/help.t index 26cd17732c5..9987738e12f 100644 --- a/src/test/cli/crushtool/help.t +++ b/src/test/cli/crushtool/help.t @@ -28,7 +28,17 @@ reweight a given item (and adjust ancestor weights as needed) -i mapfn --reweight recalculate all bucket weights - --output-utilization output OSD usage - --output utilization-all include zero weight items - --output-statistics output chi squared statistics + --show-utilization show OSD usage + --show utilization-all + include zero weight items + --show-statistics show chi squared statistics + --show-bad-mappings show bad mappings + --show-choose-tries show choose tries histogram + --set-choose-local-tries N + set choose local retries before re-descent + --set-choose-local-fallback-tries N + set choose local retries using fallback + permutation before re-descent + --set-choose-total-tries N + set choose total descent attempts [1] |