diff options
Diffstat (limited to 'lib/compiler/src/rec_env.erl')
-rw-r--r-- | lib/compiler/src/rec_env.erl | 640 |
1 files changed, 640 insertions, 0 deletions
diff --git a/lib/compiler/src/rec_env.erl b/lib/compiler/src/rec_env.erl new file mode 100644 index 0000000000..9b73e08ad8 --- /dev/null +++ b/lib/compiler/src/rec_env.erl @@ -0,0 +1,640 @@ +%% +%% %CopyrightBegin% +%% +%% Copyright Ericsson AB 2001-2009. All Rights Reserved. +%% +%% The contents of this file are subject to the Erlang Public License, +%% Version 1.1, (the "License"); you may not use this file except in +%% compliance with the License. You should have received a copy of the +%% Erlang Public License along with this software. If not, it can be +%% retrieved online at http://www.erlang.org/. +%% +%% Software distributed under the License is distributed on an "AS IS" +%% basis, WITHOUT WARRANTY OF ANY KIND, either express or implied. See +%% the License for the specific language governing rights and limitations +%% under the License. +%% +%% %CopyrightEnd% +%% +%% @author Richard Carlsson <richardc@it.uu.se> +%% @copyright 1999-2004 Richard Carlsson +%% @doc Abstract environments, supporting self-referential bindings and +%% automatic new-key generation. + +%% The current implementation is based on Erlang standard library +%% dictionaries. + +%%% -define(DEBUG, true). + +-module(rec_env). + +-export([bind/3, bind_list/3, bind_recursive/4, delete/2, empty/0, + get/2, is_defined/2, is_empty/1, keys/1, lookup/2, new_key/1, + new_key/2, new_keys/2, new_keys/3, size/1, to_list/1]). + +-import(erlang, [max/2]). + +-ifdef(DEBUG). +-export([test/1, test_custom/1, test_custom/2]). +-endif. + +-ifdef(DEBUG). +%% Code for testing: +%%@hidden +test(N) -> + test_0(integer, N). + +%%@hidden +test_custom(N) -> + F = fun (X) -> list_to_atom("X"++integer_to_list(X)) end, + test_custom(F, N). + +%%@hidden +test_custom(F, N) -> + test_0({custom, F}, N). + +test_0(Type, N) -> + put(new_key_calls, 0), + put(new_key_retries, 0), + put(new_key_max, 0), + Env = test_1(Type, N, empty()), + io:fwrite("\ncalls: ~w.\n", [get(new_key_calls)]), + io:fwrite("\nretries: ~w.\n", [get(new_key_retries)]), + io:fwrite("\nmax: ~w.\n", [get(new_key_max)]), + dict:to_list(element(1,Env)). + +test_1(integer = Type, N, Env) when is_integer(N), N > 0 -> + Key = new_key(Env), + test_1(Type, N - 1, bind(Key, value, Env)); +test_1({custom, F} = Type, N, Env) when is_integer(N), N > 0 -> + Key = new_key(F, Env), + test_1(Type, N - 1, bind(Key, value, Env)); +test_1(_,0, Env) -> + Env. +-endif. + + +%% Representation: +%% +%% environment() = [Mapping] +%% +%% Mapping = {map, Dict} | {rec, Dict, Dict} +%% Dict = dict:dictionary() +%% +%% An empty environment is a list containing a single `{map, Dict}' +%% element - empty lists are not valid environments. To find a key in an +%% environment, it is searched for in each mapping in the list, in +%% order, until it the key is found in some mapping, or the end of the +%% list is reached. In a 'rec' mapping, we keep the original dictionary +%% together with a version where entries may have been deleted - this +%% makes it possible to garbage collect the entire 'rec' mapping when +%% all its entries are unused (for example, by being shadowed by later +%% definitions). + + +%% ===================================================================== +%% @type environment(). An abstract environment. + +-type mapping() :: {'map', dict()} | {'rec', dict(), dict()}. +-type environment() :: [mapping(),...]. + +%% ===================================================================== +%% @spec empty() -> environment() +%% +%% @doc Returns an empty environment. + +-spec empty() -> environment(). + +empty() -> + [{map, dict:new()}]. + + +%% ===================================================================== +%% @spec is_empty(Env::environment()) -> boolean() +%% +%% @doc Returns <code>true</code> if the environment is empty, otherwise +%% <code>false</code>. + +-spec is_empty(environment()) -> boolean(). + +is_empty([{map, Dict} | Es]) -> + N = dict:size(Dict), + if N =/= 0 -> false; + Es =:= [] -> true; + true -> is_empty(Es) + end; +is_empty([{rec, Dict, _} | Es]) -> + N = dict:size(Dict), + if N =/= 0 -> false; + Es =:= [] -> true; + true -> is_empty(Es) + end. + + +%% ===================================================================== +%% @spec size(Env::environment()) -> integer() +%% +%% @doc Returns the number of entries in an environment. + +%% (The name 'size' cannot be used in local calls, since there exists a +%% built-in function with the same name.) + +-spec size(environment()) -> non_neg_integer(). + +size(Env) -> + env_size(Env). + +env_size([{map, Dict}]) -> + dict:size(Dict); +env_size([{map, Dict} | Env]) -> + dict:size(Dict) + env_size(Env); +env_size([{rec, Dict, _Dict0} | Env]) -> + dict:size(Dict) + env_size(Env). + + +%% ===================================================================== +%% @spec is_defined(Key, Env) -> boolean() +%% +%% Key = term() +%% Env = environment() +%% +%% @doc Returns <code>true</code> if <code>Key</code> is bound in the +%% environment, otherwise <code>false</code>. + +-spec is_defined(term(), environment()) -> boolean(). + +is_defined(Key, [{map, Dict} | Env]) -> + case dict:is_key(Key, Dict) of + true -> + true; + false when Env =:= [] -> + false; + false -> + is_defined(Key, Env) + end; +is_defined(Key, [{rec, Dict, _Dict0} | Env]) -> + dict:is_key(Key, Dict) orelse is_defined(Key, Env). + + +%% ===================================================================== +%% @spec keys(Env::environment()) -> [term()] +%% +%% @doc Returns the ordered list of all keys in the environment. + +-spec keys(environment()) -> [term()]. + +keys(Env) -> + lists:sort(keys(Env, [])). + +keys([{map, Dict}], S) -> + dict:fetch_keys(Dict) ++ S; +keys([{map, Dict} | Env], S) -> + keys(Env, dict:fetch_keys(Dict) ++ S); +keys([{rec, Dict, _Dict0} | Env], S) -> + keys(Env, dict:fetch_keys(Dict) ++ S). + + +%% ===================================================================== +%% @spec to_list(Env) -> [{Key, Value}] +%% +%% Env = environment() +%% Key = term() +%% Value = term() +%% +%% @doc Returns an ordered list of <code>{Key, Value}</code> pairs for +%% all keys in <code>Env</code>. <code>Value</code> is the same as that +%% returned by {@link get/2}. + +-spec to_list(environment()) -> [{term(), term()}]. + +to_list(Env) -> + lists:sort(to_list(Env, [])). + +to_list([{map, Dict}], S) -> + dict:to_list(Dict) ++ S; +to_list([{map, Dict} | Env], S) -> + to_list(Env, dict:to_list(Dict) ++ S); +to_list([{rec, Dict, _Dict0} | Env], S) -> + to_list(Env, dict:to_list(Dict) ++ S). + + +%% ===================================================================== +%% @spec bind(Key, Value, Env) -> environment() +%% +%% Key = term() +%% Value = term() +%% Env = environment() +%% +%% @doc Make a nonrecursive entry. This binds <code>Key</code> to +%% <code>Value</code>. If the key already existed in the environment, +%% the old entry is replaced. + +%% Note that deletion is done to free old bindings so they can be +%% garbage collected. + +-spec bind(term(), term(), environment()) -> environment(). + +bind(Key, Value, [{map, Dict}]) -> + [{map, dict:store(Key, Value, Dict)}]; +bind(Key, Value, [{map, Dict} | Env]) -> + [{map, dict:store(Key, Value, Dict)} | delete_any(Key, Env)]; +bind(Key, Value, Env) -> + [{map, dict:store(Key, Value, dict:new())} | delete_any(Key, Env)]. + + +%% ===================================================================== +%% @spec bind_list(Keys, Values, Env) -> environment() +%% +%% Keys = [term()] +%% Values = [term()] +%% Env = environment() +%% +%% @doc Make N nonrecursive entries. This binds each key in +%% <code>Keys</code> to the corresponding value in +%% <code>Values</code>. If some key already existed in the environment, +%% the previous entry is replaced. If <code>Keys</code> does not have +%% the same length as <code>Values</code>, an exception is generated. + +-spec bind_list([term()], [term()], environment()) -> environment(). + +bind_list(Ks, Vs, [{map, Dict}]) -> + [{map, store_list(Ks, Vs, Dict)}]; +bind_list(Ks, Vs, [{map, Dict} | Env]) -> + [{map, store_list(Ks, Vs, Dict)} | delete_list(Ks, Env)]; +bind_list(Ks, Vs, Env) -> + [{map, store_list(Ks, Vs, dict:new())} | delete_list(Ks, Env)]. + +store_list([K | Ks], [V | Vs], Dict) -> + store_list(Ks, Vs, dict:store(K, V, Dict)); +store_list([], _, Dict) -> + Dict. + +delete_list([K | Ks], Env) -> + delete_list(Ks, delete_any(K, Env)); +delete_list([], Env) -> + Env. + +%% By not calling `delete' unless we have to, we avoid unnecessary +%% rewriting of the data. + +delete_any(Key, Env) -> + case is_defined(Key, Env) of + true -> + delete(Key, Env); + false -> + Env + end. + +%% ===================================================================== +%% @spec delete(Key, Env) -> environment() +%% +%% Key = term() +%% Env = environment() +%% +%% @doc Delete an entry. This removes <code>Key</code> from the +%% environment. + +-spec delete(term(), environment()) -> environment(). + +delete(Key, [{map, Dict} = E | Env]) -> + case dict:is_key(Key, Dict) of + true -> + [{map, dict:erase(Key, Dict)} | Env]; + false -> + delete_1(Key, Env, E) + end; +delete(Key, [{rec, Dict, Dict0} = E | Env]) -> + case dict:is_key(Key, Dict) of + true -> + %% The Dict0 component must be preserved as it is until all + %% keys in Dict have been deleted. + Dict1 = dict:erase(Key, Dict), + case dict:size(Dict1) of + 0 -> + Env; % the whole {rec,...} is now garbage + _ -> + [{rec, Dict1, Dict0} | Env] + end; + false -> + [E | delete(Key, Env)] + end. + +%% This is just like above, except we pass on the preceding 'map' +%% mapping in the list to enable merging when removing 'rec' mappings. + +delete_1(Key, [{rec, Dict, Dict0} = E | Env], E1) -> + case dict:is_key(Key, Dict) of + true -> + Dict1 = dict:erase(Key, Dict), + case dict:size(Dict1) of + 0 -> + concat(E1, Env); + _ -> + [E1, {rec, Dict1, Dict0} | Env] + end; + false -> + [E1, E | delete(Key, Env)] + end. + +concat({map, D1}, [{map, D2} | Env]) -> + [dict:merge(fun (_K, V1, _V2) -> V1 end, D1, D2) | Env]; +concat(E1, Env) -> + [E1 | Env]. + + +%% ===================================================================== +%% @spec bind_recursive(Keys, Values, Fun, Env) -> NewEnv +%% +%% Keys = [term()] +%% Values = [term()] +%% Fun = (Value, Env) -> term() +%% Env = environment() +%% NewEnv = environment() +%% +%% @doc Make N recursive entries. This binds each key in +%% <code>Keys</code> to the value of <code>Fun(Value, NewEnv)</code> for +%% the corresponding <code>Value</code>. If <code>Keys</code> does not +%% have the same length as <code>Values</code>, an exception is +%% generated. If some key already existed in the environment, the old +%% entry is replaced. +%% +%% <p>Note: the function <code>Fun</code> is evaluated each time one of +%% the stored keys is looked up, but only then.</p> +%% +%% <p>Examples: +%%<pre> +%% NewEnv = bind_recursive([foo, bar], [1, 2], +%% fun (V, E) -> V end, +%% Env)</pre> +%% +%% This does nothing interesting; <code>get(foo, NewEnv)</code> yields +%% <code>1</code> and <code>get(bar, NewEnv)</code> yields +%% <code>2</code>, but there is more overhead than if the {@link +%% bind_list/3} function had been used. +%% +%% <pre> +%% NewEnv = bind_recursive([foo, bar], [1, 2], +%% fun (V, E) -> {V, E} end, +%% Env)</pre> +%% +%% Here, however, <code>get(foo, NewEnv)</code> will yield <code>{1, +%% NewEnv}</code> and <code>get(bar, NewEnv)</code> will yield <code>{2, +%% NewEnv}</code>, i.e., the environment <code>NewEnv</code> contains +%% recursive bindings.</p> + +-spec bind_recursive([term()], [term()], + fun((term(), environment()) -> term()), + environment()) -> environment(). + +bind_recursive([], [], _, Env) -> + Env; +bind_recursive(Ks, Vs, F, Env) -> + F1 = fun (V) -> + fun (Dict) -> F(V, [{rec, Dict, Dict} | Env]) end + end, + Dict = bind_recursive_1(Ks, Vs, F1, dict:new()), + [{rec, Dict, Dict} | Env]. + +bind_recursive_1([K | Ks], [V | Vs], F, Dict) -> + bind_recursive_1(Ks, Vs, F, dict:store(K, F(V), Dict)); +bind_recursive_1([], [], _, Dict) -> + Dict. + + +%% ===================================================================== +%% @spec lookup(Key, Env) -> error | {ok, Value} +%% +%% Key = term() +%% Env = environment() +%% Value = term() +%% +%% @doc Returns <code>{ok, Value}</code> if <code>Key</code> is bound to +%% <code>Value</code> in <code>Env</code>, and <code>error</code> +%% otherwise. + +-spec lookup(term(), environment()) -> 'error' | {'ok', term()}. + +lookup(Key, [{map, Dict} | Env]) -> + case dict:find(Key, Dict) of + {ok, _}=Value -> + Value; + error when Env =:= [] -> + error; + error -> + lookup(Key, Env) + end; +lookup(Key, [{rec, Dict, Dict0} | Env]) -> + case dict:find(Key, Dict) of + {ok, F} -> + {ok, F(Dict0)}; + error -> + lookup(Key, Env) + end. + + +%% ===================================================================== +%% @spec get(Key, Env) -> Value +%% +%% Key = term() +%% Env = environment() +%% Value = term() +%% +%% @doc Returns the value that <code>Key</code> is bound to in +%% <code>Env</code>. Throws <code>{undefined, Key}</code> if the key +%% does not exist in <code>Env</code>. + +-spec get(term(), environment()) -> term(). + +get(Key, Env) -> + case lookup(Key, Env) of + {ok, Value} -> Value; + error -> throw({undefined, Key}) + end. + + +%% ===================================================================== +%% The key-generating algorithm could possibly be further improved. The +%% important thing to keep in mind is, that when we need a new key, we +%% are generally in mid-traversal of a syntax tree, and existing names +%% in the tree may be closely grouped and evenly distributed or even +%% forming a compact range (often having been generated by a "gensym", +%% or by this very algorithm itself). This means that if we generate an +%% identifier whose value is too close to those already seen (i.e., +%% which are in the environment), it is very probable that we will +%% shadow a not-yet-seen identifier further down in the tree, the result +%% being that we induce another later renaming, and end up renaming most +%% of the identifiers, completely contrary to our intention. We need to +%% generate new identifiers in a way that avoids such systematic +%% collisions. +%% +%% One way of getting a new key to try when the previous attempt failed +%% is of course to e.g. add one to the last tried value. However, in +%% general it's a bad idea to try adjacent identifiers: the percentage +%% of retries will typically increase a lot, so you may lose big on the +%% extra lookups while gaining only a little from the quicker +%% computation. +%% +%% We want an initial range that is large enough for most typical cases. +%% If we start with, say, a range of 10, we might quickly use up most of +%% the values in the range 1-10 (or 1-100) for new top-level variables - +%% but as we start traversing the syntax tree, it is quite likely that +%% exactly those variables will be encountered again (this depends on +%% how the names in the tree were created), and will then need to be +%% renamed. If we instead begin with a larger range, it is less likely +%% that any top-level names that we introduce will shadow names that we +%% will find in the tree. Of course we cannot know how large is large +%% enough: for any initial range, there is some syntax tree that uses +%% all the values in that range, and thus any top-level names introduced +%% will shadow names in the tree. The point is to avoid this happening +%% all the time - a range of about 1000 seems enough for most programs. +%% +%% The following values have been shown to work well: + +-define(MINIMUM_RANGE, 1000). +-define(START_RANGE_FACTOR, 50). +-define(MAX_RETRIES, 2). % retries before enlarging range +-define(ENLARGE_FACTOR, 10). % range enlargment factor + +-ifdef(DEBUG). +%% If you want to use these process dictionary counters, make sure to +%% initialise them to zero before you call any of the key-generating +%% functions. +%% +%% new_key_calls total number of calls +%% new_key_retries failed key generation attempts +%% new_key_max maximum generated integer value +%% +-define(measure_calls(), + put(new_key_calls, 1 + get(new_key_calls))). +-define(measure_max_key(N), + case N > get(new_key_max) of + true -> + put(new_key_max, N); + false -> + ok + end). +-define(measure_retries(N), + put(new_key_retries, get(new_key_retries) + N)). +-else. +-define(measure_calls(), ok). +-define(measure_max_key(N), ok). +-define(measure_retries(N), ok). +-endif. + + +%% ===================================================================== +%% @spec new_key(Env::environment()) -> integer() +%% +%% @doc Returns an integer which is not already used as key in the +%% environment. New integers are generated using an algorithm which +%% tries to keep the values randomly distributed within a reasonably +%% small range relative to the number of entries in the environment. +%% +%% <p>This function uses the Erlang standard library module +%% <code>random</code> to generate new keys.</p> +%% +%% <p>Note that only the new key is returned; the environment itself is +%% not updated by this function.</p> + +-spec new_key(environment()) -> integer(). + +new_key(Env) -> + new_key(fun (X) -> X end, Env). + + +%% ===================================================================== +%% @spec new_key(Function, Env) -> term() +%% +%% Function = (integer()) -> term() +%% Env = environment() +%% +%% @doc Returns a term which is not already used as key in the +%% environment. The term is generated by applying <code>Function</code> +%% to an integer generated as in {@link new_key/1}. +%% +%% <p>Note that only the generated term is returned; the environment +%% itself is not updated by this function.</p> + +-spec new_key(fun((integer()) -> term()), environment()) -> term(). + +new_key(F, Env) -> + ?measure_calls(), + R = start_range(Env), + %% io:fwrite("Start range: ~w.\n", [R]), + new_key(R, F, Env). + +new_key(R, F, Env) -> + new_key(generate(R, R), R, 0, F, Env). + +new_key(N, R, T, F, Env) when T < ?MAX_RETRIES -> + A = F(N), + case is_defined(A, Env) of + true -> + %% io:fwrite("CLASH: ~w.\n", [A]), + new_key(generate(N, R), R, T + 1, F, Env); + false -> + ?measure_max_key(N), + ?measure_retries(T), + %% io:fwrite("New: ~w.\n", [N]), + A + end; +new_key(N, R, _T, F, Env) -> + %% Too many retries - enlarge the range and start over. + ?measure_retries((_T + 1)), + R1 = trunc(R * ?ENLARGE_FACTOR), + %% io:fwrite("**NEW RANGE**: ~w.\n", [R1]), + new_key(generate(N, R1), R1, 0, F, Env). + +start_range(Env) -> + max(env_size(Env) * ?START_RANGE_FACTOR, ?MINIMUM_RANGE). + +%% The previous key might or might not be used to compute the next key +%% to be tried. It is currently not used. +%% +%% In order to avoid causing cascading renamings, it is important that +%% this function does not generate values in order, but +%% (pseudo-)randomly distributed over the range. + +generate(_N, Range) -> + random:uniform(Range). % works well + + +%% ===================================================================== +%% @spec new_keys(N, Env) -> [integer()] +%% +%% N = integer() +%% Env = environment() +%% +%% @doc Returns a list of <code>N</code> distinct integers that are not +%% already used as keys in the environment. See {@link new_key/1} for +%% details. + +-spec new_keys(integer(), environment()) -> [integer()]. + +new_keys(N, Env) when is_integer(N) -> + new_keys(N, fun (X) -> X end, Env). + + +%% ===================================================================== +%% @spec new_keys(N, Function, Env) -> [term()] +%% +%% N = integer() +%% Function = (integer()) -> term() +%% Env = environment() +%% +%% @doc Returns a list of <code>N</code> distinct terms that are not +%% already used as keys in the environment. See {@link new_key/3} for +%% details. + +-spec new_keys(integer(), fun((integer()) -> term()), environment()) -> [term()]. + +new_keys(N, F, Env) when is_integer(N) -> + R = start_range(Env), + new_keys(N, [], R, F, Env). + +new_keys(N, Ks, R, F, Env) when N > 0 -> + Key = new_key(R, F, Env), + Env1 = bind(Key, true, Env), % dummy binding + new_keys(N - 1, [Key | Ks], R, F, Env1); +new_keys(0, Ks, _, _, _) -> + Ks. |