summaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorSage Weil <sage@inktank.com>2012-06-08 11:18:59 -0700
committerSage Weil <sage@inktank.com>2012-06-08 11:18:59 -0700
commit3876dbfdd7e193373e103b41dcf460d6f3723b25 (patch)
treec7b1343d892d62c480a34500339c2e2ecca4d391 /src
parent978d5419a12495c1e800c7d8b8d66c87ca374bef (diff)
parentc0a02a4a49fded719b0c6a53918697b3f7ad2d38 (diff)
downloadceph-3876dbfdd7e193373e103b41dcf460d6f3723b25.tar.gz
Merge branch 'wip-crush-tunables'
Reviewed-by: Sam Just <sam.just@inktank.com>
Diffstat (limited to 'src')
-rw-r--r--src/crush/CrushCompiler.cc40
-rw-r--r--src/crush/CrushCompiler.h9
-rw-r--r--src/crush/CrushTester.cc22
-rw-r--r--src/crush/CrushTester.h30
-rw-r--r--src/crush/CrushWrapper.cc11
-rw-r--r--src/crush/CrushWrapper.h42
-rw-r--r--src/crush/builder.c5
-rw-r--r--src/crush/crush.c1
-rw-r--r--src/crush/crush.h10
-rw-r--r--src/crush/grammar.h8
-rw-r--r--src/crush/mapper.c18
-rw-r--r--src/crushtool.cc86
-rw-r--r--src/test/cli/crushtool/help.t16
13 files changed, 264 insertions, 34 deletions
diff --git a/src/crush/CrushCompiler.cc b/src/crush/CrushCompiler.cc
index fc6e6e8856b..ec3ab2a1427 100644
--- a/src/crush/CrushCompiler.cc
+++ b/src/crush/CrushCompiler.cc
@@ -179,9 +179,17 @@ int CrushCompiler::decompile_bucket(int cur,
int CrushCompiler::decompile(ostream &out)
{
- out << "# begin crush map\n\n";
+ out << "# begin crush map\n";
- out << "# devices\n";
+ // only dump tunables if they differ from the defaults
+ if (crush.get_choose_local_tries() != 2)
+ out << "tunable choose_local_tries " << crush.get_choose_local_tries() << "\n";
+ if (crush.get_choose_local_fallback_tries() != 5)
+ out << "tunable choose_local_fallback_tries " << crush.get_choose_local_fallback_tries() << "\n";
+ if (crush.get_choose_total_tries() != 19)
+ out << "tunable choose_total_tries " << crush.get_choose_total_tries() << "\n";
+
+ out << "\n# devices\n";
for (int i=0; i<crush.get_max_devices(); i++) {
out << "device " << i << " ";
print_item_name(out, i, crush);
@@ -322,6 +330,31 @@ int CrushCompiler::parse_device(iter_t const& i)
return 0;
}
+int CrushCompiler::parse_tunable(iter_t const& i)
+{
+ string name = string_node(i->children[1]);
+ int val = int_node(i->children[2]);
+
+ if (name == "choose_local_tries")
+ crush.set_choose_local_tries(val);
+ else if (name == "choose_local_fallback_tries")
+ crush.set_choose_local_fallback_tries(val);
+ else if (name == "choose_total_tries")
+ crush.set_choose_total_tries(val);
+ else {
+ err << "tunable " << name << " not recognized" << std::endl;
+ return -1;
+ }
+
+ if (!unsafe_tunables) {
+ err << "tunables are NOT FULLY IMPLEMENTED; enable with --enable-unsafe-tunables to enable this feature" << std::endl;
+ return -1;
+ }
+
+ if (verbose) err << "tunable " << name << " " << val << std::endl;
+ return 0;
+}
+
int CrushCompiler::parse_bucket_type(iter_t const& i)
{
int id = int_node(i->children[1]);
@@ -595,6 +628,9 @@ int CrushCompiler::parse_crush(iter_t const& i)
int r = 0;
for (iter_t p = i->children.begin(); p != i->children.end(); p++) {
switch (p->value.id().to_long()) {
+ case crush_grammar::_tunable:
+ r = parse_tunable(p);
+ break;
case crush_grammar::_device:
r = parse_device(p);
break;
diff --git a/src/crush/CrushCompiler.h b/src/crush/CrushCompiler.h
index 5476f6b2e81..9d07d76afa9 100644
--- a/src/crush/CrushCompiler.h
+++ b/src/crush/CrushCompiler.h
@@ -15,6 +15,7 @@ class CrushCompiler {
CrushWrapper& crush;
ostream& err;
int verbose;
+ bool unsafe_tunables;
// decompile
enum dcb_state_t {
@@ -43,6 +44,7 @@ class CrushCompiler {
int int_node(node_t &node);
float float_node(node_t &node);
+ int parse_tunable(iter_t const& i);
int parse_device(iter_t const& i);
int parse_bucket_type(iter_t const& i);
int parse_bucket(iter_t const& i);
@@ -55,9 +57,14 @@ class CrushCompiler {
public:
CrushCompiler(CrushWrapper& c, ostream& eo, int verbosity=0)
- : crush(c), err(eo), verbose(verbosity) {}
+ : crush(c), err(eo), verbose(verbosity),
+ unsafe_tunables(false) {}
~CrushCompiler() {}
+ void enable_unsafe_tunables() {
+ unsafe_tunables = true;
+ }
+
int decompile(ostream& out);
int compile(istream& in, const char *infn=0);
};
diff --git a/src/crush/CrushTester.cc b/src/crush/CrushTester.cc
index f7d1c64e4e8..a88aa9d869b 100644
--- a/src/crush/CrushTester.cc
+++ b/src/crush/CrushTester.cc
@@ -90,7 +90,8 @@ int CrushTester::test()
if (verbose > 3 )
err << "devices weights (hex): " << hex << weight << dec << std::endl;
-
+ if (output_choose_tries)
+ crush.start_choose_profile();
for (int r = min_rule; r < crush.get_max_rules() && r <= max_rule; r++) {
@@ -223,6 +224,10 @@ int CrushTester::test()
batchPer[currentBatch] = temporary_per;
sizes[out.size()]++;
+
+ if (output_bad_mappings && out.size() != (unsigned)nr) {
+ cout << "bad mapping rule " << r << " x " << x << " num_rep " << nr << " result " << out << std::endl;
+ }
}
// compute chi squared statistic for device examining the uniformity this batch of placements
@@ -308,5 +313,20 @@ int CrushTester::test()
#endif
}
}
+
+ if (output_choose_tries) {
+ __u32 *v;
+ int n = crush.get_choose_profile(&v);
+ for (int i=0; i<n; i++) {
+ cout.setf(std::ios::right);
+ cout << std::setw(2)
+ << i << ": " << std::setw(9) << v[i];
+ cout.unsetf(std::ios::right);
+ cout << std::endl;
+ }
+
+ crush.stop_choose_profile();
+ }
+
return 0;
}
diff --git a/src/crush/CrushTester.h b/src/crush/CrushTester.h
index 1974d10d153..fba36febab2 100644
--- a/src/crush/CrushTester.h
+++ b/src/crush/CrushTester.h
@@ -25,9 +25,13 @@ class CrushTester {
float mark_down_percentage;
int mark_down_start;
int down_range;
+
+public:
bool output_utilization;
bool output_utilization_all;
bool output_statistics;
+ bool output_bad_mappings;
+ bool output_choose_tries;
public:
CrushTester(CrushWrapper& c, ostream& eo, int verbosity=0)
@@ -42,14 +46,30 @@ public:
down_range_marked(false),
mark_down_percentage(0.0),
mark_down_start(0),
- down_range(1)
+ down_range(1),
+ output_utilization(false),
+ output_utilization_all(false),
+ output_statistics(false),
+ output_bad_mappings(false),
+ output_choose_tries(false)
{ }
- void set_output(bool set_output_utilization, bool set_output_utilization_all, bool set_output_statistics) {
- output_utilization = set_output_utilization;
- output_utilization_all = set_output_utilization_all;
- output_statistics = set_output_statistics;
+ void set_output_utilization(bool b) {
+ output_utilization = b;
+ }
+ void set_output_utilization_all(bool b) {
+ output_utilization_all = b;
+ }
+ void set_output_statistics(bool b) {
+ output_statistics = b;
}
+ void set_output_bad_mappings(bool b) {
+ output_bad_mappings = b;
+ }
+ void set_output_choose_tries(bool b) {
+ output_choose_tries = b;
+ }
+
void set_batches(int b) {
num_batches = b;
}
diff --git a/src/crush/CrushWrapper.cc b/src/crush/CrushWrapper.cc
index fd86f361861..15d5e140ee1 100644
--- a/src/crush/CrushWrapper.cc
+++ b/src/crush/CrushWrapper.cc
@@ -346,6 +346,11 @@ void CrushWrapper::encode(bufferlist& bl, bool lean) const
::encode(type_map, bl);
::encode(name_map, bl);
::encode(rule_name_map, bl);
+
+ // tunables
+ ::encode(crush->choose_local_tries, bl);
+ ::encode(crush->choose_local_fallback_tries, bl);
+ ::encode(crush->choose_total_tries, bl);
}
void CrushWrapper::decode(bufferlist::iterator& blp)
@@ -393,6 +398,12 @@ void CrushWrapper::decode(bufferlist::iterator& blp)
::decode(rule_name_map, blp);
build_rmaps();
+ // tunables
+ if (!blp.end()) {
+ ::decode(crush->choose_local_tries, blp);
+ ::decode(crush->choose_local_fallback_tries, blp);
+ ::decode(crush->choose_total_tries, blp);
+ }
finalize();
}
catch (...) {
diff --git a/src/crush/CrushWrapper.h b/src/crush/CrushWrapper.h
index 27e239d8ac9..82b9e4b2de2 100644
--- a/src/crush/CrushWrapper.h
+++ b/src/crush/CrushWrapper.h
@@ -91,6 +91,28 @@ public:
assert(crush);
}
+ // tunables
+ int get_choose_local_tries() const {
+ return crush->choose_local_tries;
+ }
+ void set_choose_local_tries(int n) {
+ crush->choose_local_tries = n;
+ }
+
+ int get_choose_local_fallback_tries() const {
+ return crush->choose_local_fallback_tries;
+ }
+ void set_choose_local_fallback_tries(int n) {
+ crush->choose_local_fallback_tries = n;
+ }
+
+ int get_choose_total_tries() const {
+ return crush->choose_total_tries;
+ }
+ void set_choose_total_tries(int n) {
+ crush->choose_total_tries = n;
+ }
+
// bucket types
int get_num_type_names() const {
return type_map.size();
@@ -469,6 +491,26 @@ public:
crush_finalize(crush);
}
+ void start_choose_profile() {
+ free(crush->choose_tries);
+ crush->choose_tries = (__u32 *)malloc(sizeof(*crush->choose_tries) * crush->choose_total_tries);
+ memset(crush->choose_tries, 0,
+ sizeof(*crush->choose_tries) * crush->choose_total_tries);
+ }
+ void stop_choose_profile() {
+ free(crush->choose_tries);
+ crush->choose_tries = 0;
+ }
+
+ int get_choose_profile(__u32 **vec) {
+ if (crush->choose_tries) {
+ *vec = crush->choose_tries;
+ return crush->choose_total_tries;
+ }
+ return 0;
+ }
+
+
void set_max_devices(int m) {
crush->max_devices = m;
}
diff --git a/src/crush/builder.c b/src/crush/builder.c
index 9c7d1515faf..0f22b297692 100644
--- a/src/crush/builder.c
+++ b/src/crush/builder.c
@@ -19,6 +19,11 @@ struct crush_map *crush_create()
if (!m)
return NULL;
memset(m, 0, sizeof(*m));
+
+ /* initialize legacy tunable values */
+ m->choose_local_tries = 2;
+ m->choose_local_fallback_tries = 5;
+ m->choose_total_tries = 19;
return m;
}
diff --git a/src/crush/crush.c b/src/crush/crush.c
index 83bbbd0dcad..7f800b3404b 100644
--- a/src/crush/crush.c
+++ b/src/crush/crush.c
@@ -120,6 +120,7 @@ void crush_destroy(struct crush_map *map)
kfree(map->rules);
}
+ kfree(map->choose_tries);
kfree(map);
}
diff --git a/src/crush/crush.h b/src/crush/crush.h
index 9403696aadd..084c4a97869 100644
--- a/src/crush/crush.h
+++ b/src/crush/crush.h
@@ -159,6 +159,16 @@ struct crush_map {
__s32 max_buckets;
__u32 max_rules;
__s32 max_devices;
+
+ /* choose local retries before re-descent */
+ __u32 choose_local_tries;
+ /* choose local attempts using a fallback permutation before
+ * re-descent */
+ __u32 choose_local_fallback_tries;
+ /* choose attempts before giving up */
+ __u32 choose_total_tries;
+
+ __u32 *choose_tries;
};
diff --git a/src/crush/grammar.h b/src/crush/grammar.h
index d673af7f7de..c975e176a89 100644
--- a/src/crush/grammar.h
+++ b/src/crush/grammar.h
@@ -50,6 +50,7 @@ struct crush_grammar : public grammar<crush_grammar>
_step,
_crushrule,
_crushmap,
+ _tunable,
};
template <typename ScannerT>
@@ -60,6 +61,8 @@ struct crush_grammar : public grammar<crush_grammar>
rule<ScannerT, parser_context<>, parser_tag<_negint> > negint;
rule<ScannerT, parser_context<>, parser_tag<_name> > name;
+ rule<ScannerT, parser_context<>, parser_tag<_tunable> > tunable;
+
rule<ScannerT, parser_context<>, parser_tag<_device> > device;
rule<ScannerT, parser_context<>, parser_tag<_bucket_type> > bucket_type;
@@ -89,6 +92,9 @@ struct crush_grammar : public grammar<crush_grammar>
negint = leaf_node_d[ lexeme_d[ ch_p('-') >> +digit_p ] ];
name = leaf_node_d[ lexeme_d[ +( alnum_p || ch_p('-') || ch_p('_') || ch_p('.')) ] ];
+ // tunables
+ tunable = str_p("tunable") >> name >> posint;
+
// devices
device = str_p("device") >> posint >> name;
@@ -132,7 +138,7 @@ struct crush_grammar : public grammar<crush_grammar>
>> '}';
// the whole crush map
- crushmap = *(device | bucket_type) >> *bucket >> *crushrule;
+ crushmap = *(tunable | device | bucket_type) >> *bucket >> *crushrule;
}
rule<ScannerT, parser_context<>, parser_tag<_crushmap> > const&
diff --git a/src/crush/mapper.c b/src/crush/mapper.c
index 94d5dd30d72..e38b76e492d 100644
--- a/src/crush/mapper.c
+++ b/src/crush/mapper.c
@@ -307,7 +307,6 @@ static int crush_choose(const struct crush_map *map,
int item = 0;
int itemtype;
int collide, reject;
- const unsigned int orig_tries = 5; /* attempts before we fall back to search */
dprintk("CHOOSE%s bucket %d x %d outpos %d numrep %d\n", recurse_to_leaf ? "_LEAF" : "",
bucket->id, x, outpos, numrep);
@@ -352,8 +351,9 @@ static int crush_choose(const struct crush_map *map,
reject = 1;
goto reject;
}
- if (flocal >= (in->size>>1) &&
- flocal > orig_tries)
+ if (map->choose_local_fallback_tries > 0 &&
+ flocal >= (in->size>>1) &&
+ flocal > map->choose_local_fallback_tries)
item = bucket_perm_choose(in, x, r);
else
item = crush_bucket_choose(in, x, r);
@@ -392,7 +392,7 @@ static int crush_choose(const struct crush_map *map,
}
reject = 0;
- if (recurse_to_leaf) {
+ if (!collide && recurse_to_leaf) {
if (item < 0) {
if (crush_choose(map,
map->buckets[-1-item],
@@ -423,13 +423,14 @@ reject:
ftotal++;
flocal++;
- if (collide && flocal < 3)
+ if (collide && flocal <= map->choose_local_tries)
/* retry locally a few times */
retry_bucket = 1;
- else if (flocal <= in->size + orig_tries)
+ else if (map->choose_local_fallback_tries > 0 &&
+ flocal <= in->size + map->choose_local_fallback_tries)
/* exhaustive bucket search */
retry_bucket = 1;
- else if (ftotal < 20)
+ else if (ftotal <= map->choose_total_tries)
/* then retry descent */
retry_descent = 1;
else
@@ -451,6 +452,9 @@ reject:
dprintk("CHOOSE got %d\n", item);
out[outpos] = item;
outpos++;
+
+ if (map->choose_tries)
+ map->choose_tries[ftotal]++;
}
dprintk("CHOOSE returns %d\n", outpos);
diff --git a/src/crushtool.cc b/src/crushtool.cc
index b836cb464c4..809144ccfe1 100644
--- a/src/crushtool.cc
+++ b/src/crushtool.cc
@@ -75,9 +75,19 @@ void usage()
cout << " reweight a given item (and adjust ancestor\n"
<< " weights as needed)\n";
cout << " -i mapfn --reweight recalculate all bucket weights\n";
- cout << " --output-utilization output OSD usage\n";
- cout << " --output utilization-all include zero weight items\n";
- cout << " --output-statistics output chi squared statistics\n";
+ cout << " --show-utilization show OSD usage\n";
+ cout << " --show utilization-all\n";
+ cout << " include zero weight items\n";
+ cout << " --show-statistics show chi squared statistics\n";
+ cout << " --show-bad-mappings show bad mappings\n";
+ cout << " --show-choose-tries show choose tries histogram\n";
+ cout << " --set-choose-local-tries N\n";
+ cout << " set choose local retries before re-descent\n";
+ cout << " --set-choose-local-fallback-tries N\n";
+ cout << " set choose local retries using fallback\n";
+ cout << " permutation before re-descent\n";
+ cout << " --set-choose-total-tries N\n";
+ cout << " set choose total descent attempts\n";
exit(1);
}
@@ -109,9 +119,7 @@ int main(int argc, const char **argv)
bool decompile = false;
bool test = false;
bool verbose = false;
- bool output_utilization = false;
- bool output_utilization_all = false;
- bool output_statistics = false;
+ bool unsafe_tunables = false;
bool reweight = false;
int add_item = -1;
@@ -120,10 +128,16 @@ int main(int argc, const char **argv)
map<string,string> add_loc;
float reweight_weight = 0;
+ bool adjust = false;
+
int build = 0;
int num_osds =0;
vector<layer_t> layers;
+ int choose_local_tries = -1;
+ int choose_local_fallback_tries = -1;
+ int choose_total_tries = -1;
+
CrushWrapper crush;
CrushTester tester(crush, cerr, 1);
@@ -151,12 +165,16 @@ int main(int argc, const char **argv)
outfn = val;
} else if (ceph_argparse_flag(args, i, "-v", "--verbose", (char*)NULL)) {
verbose = true;
- } else if (ceph_argparse_flag(args, i, "--output_utilization", (char*)NULL)) {
- output_utilization = true;
- } else if (ceph_argparse_flag(args, i, "--output_utilization_all", (char*)NULL)) {
- output_utilization_all = true;
- } else if (ceph_argparse_flag(args, i, "--output_statistics", (char*)NULL)) {
- output_statistics = true;
+ } else if (ceph_argparse_flag(args, i, "--show_utilization", (char*)NULL)) {
+ tester.set_output_utilization(true);
+ } else if (ceph_argparse_flag(args, i, "--show_utilization_all", (char*)NULL)) {
+ tester.set_output_utilization_all(true);
+ } else if (ceph_argparse_flag(args, i, "--show_statistics", (char*)NULL)) {
+ tester.set_output_statistics(true);
+ } else if (ceph_argparse_flag(args, i, "--show_bad_mappings", (char*)NULL)) {
+ tester.set_output_bad_mappings(true);
+ } else if (ceph_argparse_flag(args, i, "--show_choose_tries", (char*)NULL)) {
+ tester.set_output_choose_tries(true);
} else if (ceph_argparse_witharg(args, i, &val, "-c", "--compile", (char*)NULL)) {
srcfn = val;
compile = true;
@@ -164,6 +182,17 @@ int main(int argc, const char **argv)
test = true;
} else if (ceph_argparse_flag(args, i, "-s", "--simulate", (char*)NULL)) {
tester.set_random_placement();
+ } else if (ceph_argparse_flag(args, i, "--enable-unsafe-tunables", (char*)NULL)) {
+ unsafe_tunables = true;
+ } else if (ceph_argparse_withint(args, i, &choose_local_tries, &err,
+ "--set_choose_local_tries", (char*)NULL)) {
+ adjust = true;
+ } else if (ceph_argparse_withint(args, i, &choose_local_fallback_tries, &err,
+ "--set_choose_local_fallback_tries", (char*)NULL)) {
+ adjust = true;
+ } else if (ceph_argparse_withint(args, i, &choose_total_tries, &err,
+ "--set_choose_total_tries", (char*)NULL)) {
+ adjust = true;
} else if (ceph_argparse_flag(args, i, "--reweight", (char*)NULL)) {
reweight = true;
} else if (ceph_argparse_withint(args, i, &add_item, &err, "--add_item", (char*)NULL)) {
@@ -300,7 +329,8 @@ int main(int argc, const char **argv)
if (decompile + compile + build > 1) {
usage();
}
- if (!compile && !decompile && !build && !test && !reweight && add_item < 0 &&
+ if (!compile && !decompile && !build && !test && !reweight && !adjust &&
+ add_item < 0 &&
remove_name.empty() && reweight_name.empty()) {
usage();
}
@@ -371,6 +401,8 @@ int main(int argc, const char **argv)
}
CrushCompiler cc(crush, cerr);
+ if (unsafe_tunables)
+ cc.enable_unsafe_tunables();
int r = cc.compile(in, srcfn.c_str());
if (r < 0)
exit(1);
@@ -539,6 +571,33 @@ int main(int argc, const char **argv)
modified = true;
}
+ const char *scary_tunables_message =
+ "** tunables are DANGEROUS and NOT YET RECOMMENDED. DO NOT USE without\n"
+ "** confirming with developers that your use-case is safe and correct.";
+ if (choose_local_tries >= 0) {
+ if (!unsafe_tunables) {
+ cerr << scary_tunables_message << std::endl;
+ return -1;
+ }
+ crush.set_choose_local_tries(choose_local_tries);
+ modified = true;
+ }
+ if (choose_local_fallback_tries >= 0) {
+ if (!unsafe_tunables) {
+ cerr << scary_tunables_message << std::endl;
+ return -1;
+ }
+ crush.set_choose_local_fallback_tries(choose_local_fallback_tries);
+ modified = true;
+ }
+ if (choose_total_tries >= 0) {
+ if (!unsafe_tunables) {
+ cerr << scary_tunables_message << std::endl;
+ return -1;
+ }
+ crush.set_choose_total_tries(choose_total_tries);
+ modified = true;
+ }
if (modified) {
crush.finalize();
@@ -559,7 +618,6 @@ int main(int argc, const char **argv)
}
if (test) {
- tester.set_output(output_utilization, output_utilization_all, output_statistics);
int r = tester.test();
if (r < 0)
exit(1);
diff --git a/src/test/cli/crushtool/help.t b/src/test/cli/crushtool/help.t
index 26cd17732c5..9987738e12f 100644
--- a/src/test/cli/crushtool/help.t
+++ b/src/test/cli/crushtool/help.t
@@ -28,7 +28,17 @@
reweight a given item (and adjust ancestor
weights as needed)
-i mapfn --reweight recalculate all bucket weights
- --output-utilization output OSD usage
- --output utilization-all include zero weight items
- --output-statistics output chi squared statistics
+ --show-utilization show OSD usage
+ --show utilization-all
+ include zero weight items
+ --show-statistics show chi squared statistics
+ --show-bad-mappings show bad mappings
+ --show-choose-tries show choose tries histogram
+ --set-choose-local-tries N
+ set choose local retries before re-descent
+ --set-choose-local-fallback-tries N
+ set choose local retries using fallback
+ permutation before re-descent
+ --set-choose-total-tries N
+ set choose total descent attempts
[1]