diff options
author | Paul J. Davis <paul.joseph.davis@gmail.com> | 2018-10-25 16:58:48 -0500 |
---|---|---|
committer | Paul J. Davis <paul.joseph.davis@gmail.com> | 2019-01-18 13:03:28 -0600 |
commit | 05678b93d560bceb63a3e19350d3e068cac70dbf (patch) | |
tree | 33d302b5ed35c9db52aec32cfbb3d0d3c9ed973e /src/mem3 | |
parent | 7bbe8286e3720247e7bea71c7780432d62bdcb60 (diff) | |
download | couchdb-05678b93d560bceb63a3e19350d3e068cac70dbf.tar.gz |
Implement configurable hash functions
This provides the capability for features to specify alternative hash
functions for placing documents in a given shard range. While the
functionality exists with this implementation it is not yet actually
used.
Diffstat (limited to 'src/mem3')
-rw-r--r-- | src/mem3/src/mem3.erl | 8 | ||||
-rw-r--r-- | src/mem3/src/mem3_hash.erl | 73 | ||||
-rw-r--r-- | src/mem3/src/mem3_shards.erl | 4 | ||||
-rw-r--r-- | src/mem3/src/mem3_util.erl | 21 | ||||
-rw-r--r-- | src/mem3/test/mem3_hash_test.erl | 23 | ||||
-rw-r--r-- | src/mem3/test/mem3_util_test.erl | 8 |
6 files changed, 116 insertions, 21 deletions
diff --git a/src/mem3/src/mem3.erl b/src/mem3/src/mem3.erl index f1af0f796..832c88d54 100644 --- a/src/mem3/src/mem3.erl +++ b/src/mem3/src/mem3.erl @@ -237,15 +237,15 @@ dbname(_) -> %% @doc Determine if DocId belongs in shard (identified by record or filename) belongs(#shard{}=Shard, DocId) when is_binary(DocId) -> [Begin, End] = range(Shard), - belongs(Begin, End, DocId); + belongs(Begin, End, Shard, DocId); belongs(<<"shards/", _/binary>> = ShardName, DocId) when is_binary(DocId) -> [Begin, End] = range(ShardName), - belongs(Begin, End, DocId); + belongs(Begin, End, ShardName, DocId); belongs(DbName, DocId) when is_binary(DbName), is_binary(DocId) -> true. -belongs(Begin, End, DocId) -> - HashKey = mem3_util:hash(DocId), +belongs(Begin, End, Shard, DocId) -> + HashKey = mem3_hash:calculate(Shard, DocId), Begin =< HashKey andalso HashKey =< End. range(#shard{range = Range}) -> diff --git a/src/mem3/src/mem3_hash.erl b/src/mem3/src/mem3_hash.erl new file mode 100644 index 000000000..665c61cb1 --- /dev/null +++ b/src/mem3/src/mem3_hash.erl @@ -0,0 +1,73 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mem3_hash). + +-export([ + calculate/2, + + get_hash_fun/1, + + crc32/1 +]). + + +-include_lib("mem3/include/mem3.hrl"). + + +calculate(#shard{opts = Opts}, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + calculate(MFA, DocId); + +calculate(#ordered_shard{opts = Opts}, DocId) -> + Props = couch_util:get_value(props, Opts, []), + MFA = get_hash_fun_int(Props), + calculate(MFA, DocId); + +calculate(DbName, DocId) when is_binary(DbName) -> + MFA = get_hash_fun(DbName), + calculate(MFA, DocId); + +calculate({Mod, Fun, Args}, DocId) -> + erlang:apply(Mod, Fun, [DocId | Args]). + + +get_hash_fun(#shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(#ordered_shard{opts = Opts}) -> + get_hash_fun_int(Opts); + +get_hash_fun(DbName0) when is_binary(DbName0) -> + DbName = mem3:dbname(DbName0), + try + [#shard{opts=Opts} | _] = mem3_shards:for_db(DbName), + get_hash_fun_int(couch_util:get_value(props, Opts, [])) + catch error:database_does_not_exist -> + {?MODULE, crc32, []} + end. + + +crc32(Item) when is_binary(Item) -> + erlang:crc32(Item); +crc32(Item) -> + erlang:crc32(term_to_binary(Item)). + + +get_hash_fun_int(Opts) when is_list(Opts) -> + case lists:keyfind(hash, 1, Opts) of + {hash, [Mod, Fun, Args]} -> + {Mod, Fun, Args}; + _ -> + {?MODULE, crc32, []} + end. diff --git a/src/mem3/src/mem3_shards.erl b/src/mem3/src/mem3_shards.erl index 183f28fef..6afc22f57 100644 --- a/src/mem3/src/mem3_shards.erl +++ b/src/mem3/src/mem3_shards.erl @@ -67,7 +67,7 @@ for_docid(DbName, DocId) -> for_docid(DbName, DocId, []). for_docid(DbName, DocId, Options) -> - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:calculate(DbName, DocId), ShardHead = #shard{ dbname = DbName, range = ['$1', '$2'], @@ -397,7 +397,7 @@ load_shards_from_db(ShardDb, DbName) -> load_shards_from_disk(DbName, DocId)-> Shards = load_shards_from_disk(DbName), - HashKey = mem3_util:hash(DocId), + HashKey = mem3_hash:calculate(hd(Shards), DocId), [S || S <- Shards, in_range(S, HashKey)]. in_range(Shard, HashKey) -> diff --git a/src/mem3/src/mem3_util.erl b/src/mem3/src/mem3_util.erl index 927607aff..b44ca2332 100644 --- a/src/mem3/src/mem3_util.erl +++ b/src/mem3/src/mem3_util.erl @@ -12,7 +12,7 @@ -module(mem3_util). --export([hash/1, name_shard/2, create_partition_map/5, build_shards/2, +-export([name_shard/2, create_partition_map/5, build_shards/2, n_val/2, q_val/1, to_atom/1, to_integer/1, write_db_doc/1, delete_db_doc/1, shard_info/1, ensure_exists/1, open_db_doc/1]). -export([is_deleted/1, rotate_list/2]). @@ -32,10 +32,6 @@ -include_lib("mem3/include/mem3.hrl"). -include_lib("couch/include/couch_db.hrl"). -hash(Item) when is_binary(Item) -> - erlang:crc32(Item); -hash(Item) -> - erlang:crc32(term_to_binary(Item)). name_shard(Shard) -> name_shard(Shard, ""). @@ -165,7 +161,7 @@ build_shards_by_node(DbName, DocProps) -> dbname = DbName, node = to_atom(Node), range = [Beg, End], - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, Ranges) end, ByNode). @@ -183,7 +179,7 @@ build_shards_by_range(DbName, DocProps) -> node = to_atom(Node), range = [Beg, End], order = Order, - opts = get_engine_opt(DocProps) + opts = get_shard_opts(DocProps) }, Suffix) end, lists:zip(Nodes, lists:seq(1, length(Nodes)))) end, ByRange). @@ -200,6 +196,9 @@ to_integer(N) when is_binary(N) -> to_integer(N) when is_list(N) -> list_to_integer(N). +get_shard_opts(DocProps) -> + get_engine_opt(DocProps) ++ get_props_opt(DocProps). + get_engine_opt(DocProps) -> case couch_util:get_value(<<"engine">>, DocProps) of Engine when is_binary(Engine) -> @@ -208,6 +207,14 @@ get_engine_opt(DocProps) -> [] end. +get_props_opt(DocProps) -> + case couch_util:get_value(<<"props">>, DocProps) of + {Props} when is_list(Props) -> + [{props, Props}]; + _ -> + [] + end. + n_val(undefined, NodeCount) -> n_val(config:get("cluster", "n", "3"), NodeCount); n_val(N, NodeCount) when is_list(N) -> diff --git a/src/mem3/test/mem3_hash_test.erl b/src/mem3/test/mem3_hash_test.erl new file mode 100644 index 000000000..7a40c5366 --- /dev/null +++ b/src/mem3/test/mem3_hash_test.erl @@ -0,0 +1,23 @@ +% Licensed under the Apache License, Version 2.0 (the "License"); you may not +% use this file except in compliance with the License. You may obtain a copy of +% the License at +% +% http://www.apache.org/licenses/LICENSE-2.0 +% +% Unless required by applicable law or agreed to in writing, software +% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT +% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the +% License for the specific language governing permissions and limitations under +% the License. + +-module(mem3_hash_test). + +-include_lib("eunit/include/eunit.hrl"). + +hash_test() -> + ?assertEqual(1624516141,mem3_hash:crc32(0)), + ?assertEqual(3816901808,mem3_hash:crc32("0")), + ?assertEqual(3523407757,mem3_hash:crc32(<<0>>)), + ?assertEqual(4108050209,mem3_hash:crc32(<<"0">>)), + ?assertEqual(3094724072,mem3_hash:crc32(zero)), + ok. diff --git a/src/mem3/test/mem3_util_test.erl b/src/mem3/test/mem3_util_test.erl index 214217ec4..8b74c4b2b 100644 --- a/src/mem3/test/mem3_util_test.erl +++ b/src/mem3/test/mem3_util_test.erl @@ -15,14 +15,6 @@ -include("mem3.hrl"). -include_lib("eunit/include/eunit.hrl"). -hash_test() -> - ?assertEqual(1624516141,mem3_util:hash(0)), - ?assertEqual(3816901808,mem3_util:hash("0")), - ?assertEqual(3523407757,mem3_util:hash(<<0>>)), - ?assertEqual(4108050209,mem3_util:hash(<<"0">>)), - ?assertEqual(3094724072,mem3_util:hash(zero)), - ok. - name_shard_test() -> Shard1 = #shard{}, ?assertError(function_clause, mem3_util:name_shard(Shard1, ".1234")), |