%% %% %CopyrightBegin% %% %% Copyright Ericsson AB 2018. All Rights Reserved. %% %% Licensed under the Apache License, Version 2.0 (the "License"); %% you may not use this file except in compliance with the License. %% You may obtain a copy of the License at %% %% http://www.apache.org/licenses/LICENSE-2.0 %% %% Unless required by applicable law or agreed to in writing, software %% distributed under the License is distributed on an "AS IS" BASIS, %% WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. %% See the License for the specific language governing permissions and %% limitations under the License. %% %% %CopyrightEnd% %% %%% %%% This pass optimizes bit syntax matching, and is centered around the concept %%% of "match context reuse" which is best explained through example. To put it %%% shortly we attempt to turn this: %%% %%% <<0,B/bits>> = A, %%% <<1,C/bits>> = B, %%% <> = C, %%% D. %%% %%% ... Into this: %%% %%% <<0,1,D,_/bits>>=A, %%% D. %%% %%% Which is much faster as it avoids the creation of intermediate terms. This %%% is especially noticeable in loops where such garbage is generated on each %%% iteration. %%% %%% The optimization itself is very simple and can be applied whenever there's %%% matching on the tail end of a binary; instead of creating a new binary and %%% starting a new match context on it, we reuse the match context used to %%% extract the tail and avoid the creation of both objects. %%% %%% The catch is that a match context isn't a proper type and nothing outside %%% of bit syntax match operations can handle them. We therefore need to make %%% sure that they never "leak" into other instructions, and most of the pass %%% revolves around getting around that limitation. %%% %%% Unlike most other passes we look at the whole module so we can combine %%% matches across function boundaries, greatly increasing the performance of %%% complex matches and loops. %%% -module(beam_ssa_bsm). -export([module/2, format_error/1]). -include("beam_ssa.hrl"). -include("beam_types.hrl"). -import(lists, [member/2, reverse/1, splitwith/2, map/2, foldl/3, mapfoldl/3, nth/2, max/1, unzip/1]). -spec format_error(term()) -> nonempty_string(). format_error(OptInfo) -> format_opt_info(OptInfo). -spec module(Module, Options) -> Result when Module :: beam_ssa:b_module(), Options :: [compile:option()], Result :: {ok, beam_ssa:b_module(), list()}. -define(PASS(N), {N,fun N/1}). module(#b_module{body=Fs0}=Module, Opts) -> ModInfo = analyze_module(Module), %% combine_matches is repeated after accept_context_args as the control %% flow changes can enable further optimizations, as in the example below: %% %% a(<<0,X/binary>>) -> a(X); %% a(A) when bit_size(A) =:= 52 -> bar; %% a(<<1,X/binary>>) -> X. %% Match context will be reused here when %% %% when repeated. {Fs, _} = compile:run_sub_passes( [?PASS(combine_matches), ?PASS(accept_context_args), ?PASS(combine_matches), ?PASS(allow_context_passthrough), ?PASS(skip_outgoing_tail_extraction), ?PASS(annotate_context_parameters)], {Fs0, ModInfo}), Ws = case proplists:get_bool(bin_opt_info, Opts) of true -> collect_opt_info(Fs); false -> [] end, {ok, Module#b_module{body=Fs}, Ws}. -type module_info() :: #{ func_id() => func_info() }. -type func_id() :: {Name :: atom(), Arity :: non_neg_integer()}. -type func_info() :: #{ has_bsm_ops => boolean(), parameters => [#b_var{}], parameter_info => #{ #b_var{} => param_info() } }. -type param_info() :: suitable_for_reuse | {Problem :: atom(), Where :: term()}. -spec analyze_module(#b_module{}) -> module_info(). analyze_module(#b_module{body=Fs}) -> foldl(fun(#b_function{args=Parameters}=F, I) -> FuncInfo = #{ has_bsm_ops => has_bsm_ops(F), parameters => Parameters, parameter_info => #{} }, FuncId = get_fa(F), I#{ FuncId => FuncInfo } end, #{}, Fs). has_bsm_ops(#b_function{bs=Blocks}) -> hbo_blocks(maps:to_list(Blocks)). hbo_blocks([{_,#b_blk{is=Is}} | Blocks]) -> case hbo_is(Is) of false -> hbo_blocks(Blocks); true -> true end; hbo_blocks([]) -> false. hbo_is([#b_set{op=bs_start_match} | _]) -> true; hbo_is([_I | Is]) -> hbo_is(Is); hbo_is([]) -> false. %% Checks whether it's legal to make a call with the given argument as a match %% context, returning the param_info() of the relevant parameter. -spec check_context_call(#b_set{}, Arg, CtxChain, ModInfo) -> param_info() when Arg :: #b_var{}, CtxChain :: [#b_var{}], ModInfo :: module_info(). check_context_call(#b_set{args=Args}, Arg, CtxChain, ModInfo) -> Aliases = [Arg | CtxChain], ccc_1(Args, Arg, Aliases, ModInfo). ccc_1([#b_local{}=Call | Args], Ctx, Aliases, ModInfo) -> %% Matching operations assume that their context isn't aliased (as in %% pointer aliasing), so we must reject calls whose arguments contain more %% than one reference to the context. %% %% TODO: Try to fall back to passing binaries in these cases. Partial reuse %% is better than nothing. UseCount = foldl(fun(Arg, C) -> case member(Arg, Aliases) of true -> C + 1; false -> C end end, 0, Args), if UseCount =:= 1 -> #b_local{name=#b_literal{val=Name},arity=Arity} = Call, Callee = {Name, Arity}, ParamInfo = funcinfo_get(Callee, parameter_info, ModInfo), Parameters = funcinfo_get(Callee, parameters, ModInfo), Parameter = nth(1 + arg_index(Ctx, Args), Parameters), case maps:find(Parameter, ParamInfo) of {ok, suitable_for_reuse} -> suitable_for_reuse; {ok, Other} -> {unsuitable_call, {Call, Other}}; error -> {no_match_on_entry, Call} end; UseCount > 1 -> {multiple_uses_in_call, Call} end; ccc_1([#b_remote{}=Call | _Args], _Ctx, _CtxChain, _ModInfo) -> {remote_call, Call}; ccc_1([Fun | _Args], _Ctx, _CtxChain, _ModInfo) -> %% TODO: It may be possible to support this in the future for locally %% defined funs, including ones with free variables. {fun_call, Fun}. %% Returns the index of Var in Args. arg_index(Var, Args) -> arg_index_1(Var, Args, 0). arg_index_1(Var, [Var | _Args], Index) -> Index; arg_index_1(Var, [_Arg | Args], Index) -> arg_index_1(Var, Args, Index + 1). is_tail_binary(#b_set{op=bs_match,args=[#b_literal{val=binary} | Rest]}) -> member(#b_literal{val=all}, Rest); is_tail_binary(#b_set{op=bs_get_tail}) -> true; is_tail_binary(_) -> false. is_tail_binary(#b_var{}=Var, Defs) -> case find_match_definition(Var, Defs) of {ok, Def} -> is_tail_binary(Def); _ -> false end; is_tail_binary(_Literal, _Defs) -> false. assert_match_context(#b_var{}=Var, Defs) -> case maps:find(Var, Defs) of {ok, #b_set{op=bs_match,args=[_,#b_var{}=Ctx|_]}} -> assert_match_context(Ctx, Defs); {ok, #b_set{op=bs_start_match}} -> ok end. find_match_definition(#b_var{}=Var, Defs) -> case maps:find(Var, Defs) of {ok, #b_set{op=bs_extract,args=[Ctx]}} -> maps:find(Ctx, Defs); {ok, #b_set{op=bs_get_tail}=Def} -> {ok, Def}; _ -> error end. %% Returns a list of all contexts that were used to extract Var. context_chain_of(#b_var{}=Var, Defs) -> case maps:find(Var, Defs) of {ok, #b_set{op=bs_match,args=[_,#b_var{}=Ctx|_]}} -> [Ctx | context_chain_of(Ctx, Defs)]; {ok, #b_set{op=bs_get_tail,args=[Ctx]}} -> [Ctx | context_chain_of(Ctx, Defs)]; {ok, #b_set{op=bs_extract,args=[Ctx]}} -> [Ctx | context_chain_of(Ctx, Defs)]; _ -> [] end. %% Grabs the match context used to produce the given variable. match_context_of(#b_var{}=Var, Defs) -> Ctx = match_context_of_1(Var, Defs), assert_match_context(Ctx, Defs), Ctx. match_context_of_1(Var, Defs) -> case maps:get(Var, Defs) of #b_set{op=bs_extract,args=[#b_var{}=Ctx0]} -> #b_set{op=bs_match, args=[_,#b_var{}=Ctx|_]} = maps:get(Ctx0, Defs), Ctx; #b_set{op=bs_get_tail,args=[#b_var{}=Ctx]} -> Ctx end. funcinfo_get(#b_function{}=F, Attribute, ModInfo) -> funcinfo_get(get_fa(F), Attribute, ModInfo); funcinfo_get({_,_}=Key, Attribute, ModInfo) -> FuncInfo = maps:get(Key, ModInfo), maps:get(Attribute, FuncInfo). funcinfo_set(#b_function{}=F, Attribute, Value, ModInfo) -> funcinfo_set(get_fa(F), Attribute, Value, ModInfo); funcinfo_set(Key, Attribute, Value, ModInfo) -> FuncInfo = maps:put(Attribute, Value, maps:get(Key, ModInfo, #{})), maps:put(Key, FuncInfo, ModInfo). get_fa(#b_function{ anno = Anno }) -> {_,Name,Arity} = maps:get(func_info, Anno), {Name,Arity}. %% Replaces matched-out binaries with aliases that are lazily converted to %% binary form when used, allowing us to keep the "match path" free of binary %% creation. -spec alias_matched_binaries(Blocks, Counter, AliasMap) -> Result when Blocks :: beam_ssa:block_map(), Counter :: non_neg_integer(), AliasMap :: match_alias_map(), Result :: {Blocks, Counter}. -type match_alias_map() :: #{ Binary :: #b_var{} => { %% Replace all uses of Binary with an alias after this %% label. AliasAfter :: beam_ssa:label(), %% The match context whose tail is equal to Binary. Context :: #b_var{} } }. %% Keeps track of the promotions we need to insert. They're partially keyed by %% location because they may not be valid on all execution paths and we may %% need to add redundant promotions in some cases. -type promotion_map() :: #{ { PromoteAt :: beam_ssa:label(), Variable :: #b_var{} } => Instruction :: #b_set{} }. -record(amb, { dominators :: beam_ssa:dominator_map(), match_aliases :: match_alias_map(), cnt :: non_neg_integer(), promotions = #{} :: promotion_map() }). alias_matched_binaries(Blocks0, Counter, AliasMap) when AliasMap =/= #{} -> {Dominators, _} = beam_ssa:dominators(Blocks0), State0 = #amb{ dominators = Dominators, match_aliases = AliasMap, cnt = Counter }, {Blocks, State} = beam_ssa:mapfold_blocks_rpo(fun amb_1/3, [0], State0, Blocks0), {amb_insert_promotions(Blocks, State), State#amb.cnt}; alias_matched_binaries(Blocks, Counter, _AliasMap) -> {Blocks, Counter}. amb_1(Lbl, #b_blk{is=Is0,last=Last0}=Block, State0) -> {Is, State1} = mapfoldl(fun(I, State) -> amb_assign_set(I, Lbl, State) end, State0, Is0), {Last, State} = amb_assign_last(Last0, Lbl, State1), {Block#b_blk{is=Is,last=Last}, State}. amb_assign_set(#b_set{op=phi,args=Args0}=I, _Lbl, State0) -> %% Phi node aliases are relative to their source block, not their %% containing block. {Args, State} = mapfoldl(fun({Arg0, Lbl}, Acc) -> {Arg, State} = amb_get_alias(Arg0, Lbl, Acc), {{Arg, Lbl}, State} end, State0, Args0), {I#b_set{args=Args}, State}; amb_assign_set(#b_set{args=Args0}=I, Lbl, State0) -> {Args, State} = mapfoldl(fun(Arg0, Acc) -> amb_get_alias(Arg0, Lbl, Acc) end, State0, Args0), {I#b_set{args=Args}, State}. amb_assign_last(#b_ret{arg=Arg0}=T, Lbl, State0) -> {Arg, State} = amb_get_alias(Arg0, Lbl, State0), {T#b_ret{arg=Arg}, State}; amb_assign_last(#b_switch{arg=Arg0}=T, Lbl, State0) -> {Arg, State} = amb_get_alias(Arg0, Lbl, State0), {T#b_switch{arg=Arg}, State}; amb_assign_last(#b_br{bool=Arg0}=T, Lbl, State0) -> {Arg, State} = amb_get_alias(Arg0, Lbl, State0), {T#b_br{bool=Arg}, State}. amb_get_alias(#b_var{}=Arg, Lbl, State) -> case maps:find(Arg, State#amb.match_aliases) of {ok, {AliasAfter, Context}} -> %% Our context may not have been created yet, so we skip assigning %% an alias unless the given block is among our dominators. Dominators = maps:get(Lbl, State#amb.dominators), case member(AliasAfter, Dominators) of true -> amb_create_alias(Arg, Context, Lbl, State); false -> {Arg, State} end; error -> {Arg, State} end; amb_get_alias(#b_remote{mod=Mod0,name=Name0}=Arg0, Lbl, State0) -> {Mod, State1} = amb_get_alias(Mod0, Lbl, State0), {Name, State} = amb_get_alias(Name0, Lbl, State1), Arg = Arg0#b_remote{mod=Mod,name=Name}, {Arg, State}; amb_get_alias(Arg, _Lbl, State) -> {Arg, State}. amb_create_alias(#b_var{}=Arg0, Context, Lbl, State0) -> Dominators = maps:get(Lbl, State0#amb.dominators), Promotions0 = State0#amb.promotions, PrevPromotions = [maps:get({Dom, Arg0}, Promotions0) || Dom <- Dominators, is_map_key({Dom, Arg0}, Promotions0)], case PrevPromotions of [_|_] -> %% We've already created an alias prior to this block, so we'll %% grab the most recent one to minimize stack use. #b_set{dst=Alias} = max(PrevPromotions), {Alias, State0}; [] -> %% If we haven't created an alias we need to do so now. The %% promotion will be inserted later by amb_insert_promotions/2. Counter = State0#amb.cnt, Alias = #b_var{name={'@ssa_bsm_alias', Counter}}, Promotion = #b_set{op=bs_get_tail,dst=Alias,args=[Context]}, Promotions = maps:put({Lbl, Arg0}, Promotion, Promotions0), State = State0#amb{ promotions=Promotions, cnt=Counter+1 }, {Alias, State} end. amb_insert_promotions(Blocks0, State) -> F = fun({Lbl, #b_var{}}, Promotion, Blocks) -> Block = maps:get(Lbl, Blocks), Alias = Promotion#b_set.dst, {Before, After} = splitwith( fun(#b_set{args=Args}) -> not is_var_in_args(Alias, Args) end, Block#b_blk.is), Is = Before ++ [Promotion | After], maps:put(Lbl, Block#b_blk{is=Is}, Blocks) end, maps:fold(F, Blocks0, State#amb.promotions). is_var_in_args(Var, [Var | _]) -> true; is_var_in_args(Var, [#b_remote{name=Var} | _]) -> true; is_var_in_args(Var, [#b_remote{mod=Var} | _]) -> true; is_var_in_args(Var, [_ | Args]) -> is_var_in_args(Var, Args); is_var_in_args(_Var, []) -> false. %%% %%% Subpasses %%% %% Removes superflous chained bs_start_match instructions in the same %% function. When matching on an extracted tail binary, or on a binary we've %% already matched on, we reuse the original match context. %% %% This pass runs first since it makes subsequent optimizations more effective %% by removing spots where promotion would be required. -type prior_match_map() :: #{ Binary :: #b_var{} => [{ %% The context and success label of a previous %% bs_start_match made on this binary. ValidAfter :: beam_ssa:label(), Context :: #b_var{} }] }. -record(cm, { definitions :: beam_ssa:definition_map(), dominators :: beam_ssa:dominator_map(), blocks :: beam_ssa:block_map(), match_aliases = #{} :: match_alias_map(), prior_matches = #{} :: prior_match_map(), renames = #{} :: beam_ssa:rename_map() }). combine_matches({Fs0, ModInfo}) -> Fs = map(fun(F) -> combine_matches(F, ModInfo) end, Fs0), {Fs, ModInfo}. combine_matches(#b_function{bs=Blocks0,cnt=Counter0}=F, ModInfo) -> case funcinfo_get(F, has_bsm_ops, ModInfo) of true -> {Dominators, _} = beam_ssa:dominators(Blocks0), {Blocks1, State} = beam_ssa:mapfold_blocks_rpo( fun(Lbl, #b_blk{is=Is0}=Block0, State0) -> {Is, State} = cm_1(Is0, [], Lbl, State0), {Block0#b_blk{is=Is}, State} end, [0], #cm{ definitions = beam_ssa:definitions(Blocks0), dominators = Dominators, blocks = Blocks0 }, Blocks0), Blocks2 = beam_ssa:rename_vars(State#cm.renames, [0], Blocks1), {Blocks, Counter} = alias_matched_binaries(Blocks2, Counter0, State#cm.match_aliases), F#b_function{ bs=Blocks, cnt=Counter }; false -> F end. cm_1([#b_set{ op=bs_start_match, dst=Ctx, args=[Src] }, #b_set{ op=succeeded, dst=Bool, args=[Ctx] }]=MatchSeq, Acc0, Lbl, State0) -> Acc = reverse(Acc0), case is_tail_binary(Src, State0#cm.definitions) of true -> cm_combine_tail(Src, Ctx, Bool, Acc, State0); false -> cm_handle_priors(Src, Ctx, Bool, Acc, MatchSeq, Lbl, State0) end; cm_1([I | Is], Acc, Lbl, State) -> cm_1(Is, [I | Acc], Lbl, State); cm_1([], Acc, _Lbl, State) -> {reverse(Acc), State}. %% If we're dominated by at least one match on the same source, we can reuse %% the context created by that match. cm_handle_priors(Src, DstCtx, Bool, Acc, MatchSeq, Lbl, State0) -> PriorCtxs = case maps:find(Src, State0#cm.prior_matches) of {ok, Priors} -> %% We've seen other match contexts on this source, but %% we can only consider the ones whose success path %% dominate us. Dominators = maps:get(Lbl, State0#cm.dominators, []), [Ctx || {ValidAfter, Ctx} <- Priors, member(ValidAfter, Dominators)]; error -> [] end, case PriorCtxs of [Ctx|_] -> Renames0 = State0#cm.renames, Renames = Renames0#{ Bool => #b_literal{val=true}, DstCtx => Ctx }, {Acc, State0#cm{ renames = Renames }}; [] -> %% Since we lack a prior match, we need to register this one in %% case we dominate another. State = cm_register_prior(Src, DstCtx, Lbl, State0), {Acc ++ MatchSeq, State} end. cm_register_prior(Src, DstCtx, Lbl, State) -> Block = maps:get(Lbl, State#cm.blocks), #b_br{succ=ValidAfter} = Block#b_blk.last, Priors0 = maps:get(Src, State#cm.prior_matches, []), Priors = [{ValidAfter, DstCtx} | Priors0], PriorMatches = maps:put(Src, Priors, State#cm.prior_matches), State#cm{ prior_matches = PriorMatches }. cm_combine_tail(Src, DstCtx, Bool, Acc, State0) -> SrcCtx = match_context_of(Src, State0#cm.definitions), %% We replace the source with a context alias as it normally won't be used %% on the happy path after being matched, and the added cost of conversion %% is negligible if it is. Aliases = maps:put(Src, {0, SrcCtx}, State0#cm.match_aliases), Renames0 = State0#cm.renames, Renames = Renames0#{ Bool => #b_literal{val=true}, DstCtx => SrcCtx }, State = State0#cm{ match_aliases = Aliases, renames = Renames }, {Acc, State}. %% Lets functions accept match contexts as arguments. The parameter must be %% unused before the bs_start_match instruction, and it must be matched in the %% first block. -record(aca, { unused_parameters :: ordsets:ordset(#b_var{}), counter :: non_neg_integer(), parameter_info = #{} :: #{ #b_var{} => param_info() }, match_aliases = #{} :: match_alias_map() }). accept_context_args({Fs, ModInfo}) -> mapfoldl(fun accept_context_args/2, ModInfo, Fs). accept_context_args(#b_function{bs=Blocks0}=F, ModInfo0) -> case funcinfo_get(F, has_bsm_ops, ModInfo0) of true -> Parameters = ordsets:from_list(funcinfo_get(F, parameters, ModInfo0)), State0 = #aca{ unused_parameters = Parameters, counter = F#b_function.cnt }, {Blocks1, State} = aca_1(Blocks0, State0), {Blocks, Counter} = alias_matched_binaries(Blocks1, State#aca.counter, State#aca.match_aliases), ModInfo = funcinfo_set(F, parameter_info, State#aca.parameter_info, ModInfo0), {F#b_function{bs=Blocks,cnt=Counter}, ModInfo}; false -> {F, ModInfo0} end. aca_1(Blocks, State) -> %% We only handle block 0 as we don't yet support starting a match after a %% test. This is generally good enough as the sys_core_bsm pass makes the %% match instruction come first if possible, and it's rare for a function %% to binary-match several parameters at once. EntryBlock = maps:get(0, Blocks), aca_enable_reuse(EntryBlock#b_blk.is, EntryBlock, Blocks, [], State). aca_enable_reuse([#b_set{op=bs_start_match,args=[Src]}=I0 | Rest], EntryBlock, Blocks0, Acc, State0) -> case aca_is_reuse_safe(Src, State0) of true -> {I, Last, Blocks1, State} = aca_reuse_context(I0, EntryBlock, Blocks0, State0), Is = reverse([I|Acc]) ++ Rest, Blocks = maps:put(0, EntryBlock#b_blk{is=Is,last=Last}, Blocks1), {Blocks, State}; false -> {Blocks0, State0} end; aca_enable_reuse([I | Is], EntryBlock, Blocks, Acc, State0) -> UnusedParams0 = State0#aca.unused_parameters, case ordsets:intersection(UnusedParams0, beam_ssa:used(I)) of [] -> aca_enable_reuse(Is, EntryBlock, Blocks, [I | Acc], State0); PrematureUses -> UnusedParams = ordsets:subtract(UnusedParams0, PrematureUses), %% Mark the offending parameters as unsuitable for context reuse. ParamInfo = foldl(fun(A, Ps) -> maps:put(A, {used_before_match, I}, Ps) end, State0#aca.parameter_info, PrematureUses), State = State0#aca{ unused_parameters = UnusedParams, parameter_info = ParamInfo }, aca_enable_reuse(Is, EntryBlock, Blocks, [I | Acc], State) end; aca_enable_reuse([], _EntryBlock, Blocks, _Acc, State) -> {Blocks, State}. aca_is_reuse_safe(Src, State) -> %% Context reuse is unsafe unless all uses are dominated by the start_match %% instruction. Since we only process block 0 it's enough to check if %% they're unused so far. ordsets:is_element(Src, State#aca.unused_parameters). aca_reuse_context(#b_set{dst=Dst, args=[Src]}=I0, Block, Blocks0, State0) -> %% When matching fails on a reused context it needs to be converted back %% to a binary. We only need to do this on the success path since it can't %% be a context on the type failure path, but it's very common for these %% to converge which requires special handling. {State1, Last, Blocks} = aca_handle_convergence(Src, State0, Block#b_blk.last, Blocks0), Aliases = maps:put(Src, {Last#b_br.succ, Dst}, State1#aca.match_aliases), ParamInfo = maps:put(Src, suitable_for_reuse, State1#aca.parameter_info), State = State1#aca{ match_aliases = Aliases, parameter_info = ParamInfo }, I = beam_ssa:add_anno(accepts_match_contexts, true, I0), {I, Last, Blocks, State}. aca_handle_convergence(Src, State0, Last0, Blocks0) -> #b_br{fail=Fail0,succ=Succ0} = Last0, SuccPath = beam_ssa:rpo([Succ0], Blocks0), FailPath = beam_ssa:rpo([Fail0], Blocks0), %% The promotion logic in alias_matched_binaries breaks down if the source %% is used after the fail/success paths converge, as we have no way to tell %% whether the source is a match context or something else past that point. %% %% We could handle this through clever insertion of phi nodes but it's %% far simpler to copy either branch in its entirety. It doesn't matter %% which one as long as they become disjoint. ConvergedPaths = ordsets:intersection( ordsets:from_list(SuccPath), ordsets:from_list(FailPath)), case maps:is_key(Src, beam_ssa:uses(ConvergedPaths, Blocks0)) of true -> case shortest(SuccPath, FailPath) of left -> {Succ, Blocks, Counter} = aca_copy_successors(Succ0, Blocks0, State0#aca.counter), State = State0#aca{ counter = Counter }, {State, Last0#b_br{succ=Succ}, Blocks}; right -> {Fail, Blocks, Counter} = aca_copy_successors(Fail0, Blocks0, State0#aca.counter), State = State0#aca{ counter = Counter }, {State, Last0#b_br{fail=Fail}, Blocks} end; false -> {State0, Last0, Blocks0} end. shortest([_|As], [_|Bs]) -> shortest(As, Bs); shortest([], _) -> left; shortest(_, []) -> right. %% Copies all successor blocks of Lbl, returning the label to the entry block %% of this copy. Since the copied blocks aren't referenced anywhere else, they %% are all guaranteed to be dominated by Lbl. aca_copy_successors(Lbl0, Blocks0, Counter0) -> %% Building the block rename map up front greatly simplifies phi node %% handling. Path = beam_ssa:rpo([Lbl0], Blocks0), {BRs, Counter1} = aca_cs_build_brs(Path, Counter0, #{}), {Blocks, Counter} = aca_cs_1(Path, Blocks0, Counter1, #{}, BRs, #{}), Lbl = maps:get(Lbl0, BRs), {Lbl, Blocks, Counter}. aca_cs_build_brs([?EXCEPTION_BLOCK=Lbl | Path], Counter, Acc) -> %% ?EXCEPTION_BLOCK is a marker and not an actual block, so renaming it %% will break exception handling. aca_cs_build_brs(Path, Counter, Acc#{ Lbl => Lbl }); aca_cs_build_brs([Lbl | Path], Counter0, Acc) -> aca_cs_build_brs(Path, Counter0 + 1, Acc#{ Lbl => Counter0 }); aca_cs_build_brs([], Counter, Acc) -> {Acc, Counter}. aca_cs_1([Lbl0 | Path], Blocks, Counter0, VRs0, BRs, Acc0) -> Block0 = maps:get(Lbl0, Blocks), Lbl = maps:get(Lbl0, BRs), {VRs, Block, Counter} = aca_cs_block(Block0, Counter0, VRs0, BRs), Acc = maps:put(Lbl, Block, Acc0), aca_cs_1(Path, Blocks, Counter, VRs, BRs, Acc); aca_cs_1([], Blocks, Counter, _VRs, _BRs, Acc) -> {maps:merge(Blocks, Acc), Counter}. aca_cs_block(#b_blk{is=Is0,last=Last0}=Block0, Counter0, VRs0, BRs) -> {VRs, Is, Counter} = aca_cs_is(Is0, Counter0, VRs0, BRs, []), Last = aca_cs_last(Last0, VRs, BRs), Block = Block0#b_blk{is=Is,last=Last}, {VRs, Block, Counter}. aca_cs_is([#b_set{op=Op, dst=Dst0, args=Args0}=I0 | Is], Counter0, VRs0, BRs, Acc) -> Args = case Op of phi -> aca_cs_args_phi(Args0, VRs0, BRs); _ -> aca_cs_args(Args0, VRs0) end, Counter = Counter0 + 1, Dst = #b_var{name={'@ssa_bsm_aca',Counter}}, I = I0#b_set{dst=Dst,args=Args}, VRs = maps:put(Dst0, Dst, VRs0), aca_cs_is(Is, Counter, VRs, BRs, [I | Acc]); aca_cs_is([], Counter, VRs, _BRs, Acc) -> {VRs, reverse(Acc), Counter}. aca_cs_last(#b_switch{arg=Arg0,list=Switch0,fail=Fail0}=Sw, VRs, BRs) -> Switch = [{Literal, maps:get(Lbl, BRs)} || {Literal, Lbl} <- Switch0], Sw#b_switch{arg=aca_cs_arg(Arg0, VRs), fail=maps:get(Fail0, BRs), list=Switch}; aca_cs_last(#b_br{bool=Arg0,succ=Succ0,fail=Fail0}=Br, VRs, BRs) -> Br#b_br{bool=aca_cs_arg(Arg0, VRs), succ=maps:get(Succ0, BRs), fail=maps:get(Fail0, BRs)}; aca_cs_last(#b_ret{arg=Arg0}=Ret, VRs, _BRs) -> Ret#b_ret{arg=aca_cs_arg(Arg0, VRs)}. aca_cs_args_phi([{Arg, Lbl} | Args], VRs, BRs) -> case BRs of #{ Lbl := New } -> [{aca_cs_arg(Arg, VRs), New} | aca_cs_args_phi(Args, VRs, BRs)]; #{} -> aca_cs_args_phi(Args, VRs, BRs) end; aca_cs_args_phi([], _VRs, _BRs) -> []. aca_cs_args([Arg | Args], VRs) -> [aca_cs_arg(Arg, VRs) | aca_cs_args(Args, VRs)]; aca_cs_args([], _VRs) -> []. aca_cs_arg(#b_remote{mod=Mod0,name=Name0}=Rem, VRs) -> Mod = aca_cs_arg(Mod0, VRs), Name = aca_cs_arg(Name0, VRs), Rem#b_remote{mod=Mod,name=Name}; aca_cs_arg(Arg, VRs) -> case VRs of #{ Arg := New } -> New; #{} -> Arg end. %% Allows contexts to pass through "wrapper functions" where the context is %% passed directly to a function that accepts match contexts (including other %% wrappers). %% %% This does not alter the function in any way, it only changes parameter info %% so that skip_outgoing_tail_extraction is aware that it's safe to pass %% contexts to us. allow_context_passthrough({Fs, ModInfo0}) -> ModInfo = acp_forward_params([{F, beam_ssa:uses(F#b_function.bs)} || F <- Fs], ModInfo0), {Fs, ModInfo}. acp_forward_params(FsUses, ModInfo0) -> F = fun({#b_function{args=Parameters}=Func, UseMap}, ModInfo) -> ParamInfo = foldl(fun(Param, ParamInfo) -> Uses = maps:get(Param, UseMap, []), acp_1(Param, Uses, ModInfo, ParamInfo) end, funcinfo_get(Func, parameter_info, ModInfo), Parameters), funcinfo_set(Func, parameter_info, ParamInfo, ModInfo) end, %% Allowing context passthrough on one function may make it possible to %% enable it on another, so it needs to be repeated for maximum effect. case foldl(F, ModInfo0, FsUses) of ModInfo0 -> ModInfo0; Changed -> acp_forward_params(FsUses, Changed) end. %% We have no way to know if an argument is a context, so it's only safe to %% forward them if they're passed exactly once in the first block. Any other %% uses are unsafe, including function_clause errors. acp_1(Param, [{0, #b_set{op=call}=I}], ModInfo, ParamInfo) -> %% We don't need to provide a context chain as our callers make sure that %% multiple arguments never reference the same context. case check_context_call(I, Param, [], ModInfo) of {no_match_on_entry, _} -> ParamInfo; Other -> maps:put(Param, Other, ParamInfo) end; acp_1(_Param, _Uses, _ModInfo, ParamInfo) -> ParamInfo. %% This is conceptually similar to combine_matches but operates across %% functions. Whenever a tail binary is passed to a parameter that accepts %% match contexts we'll pass the context instead, improving performance by %% avoiding the creation of a new match context in the callee. %% %% We also create an alias to delay extraction until it's needed as an actual %% binary, which is often rare on the happy path. The cost of being wrong is %% negligible (`bs_test_unit + bs_get_tail` vs `bs_get_binary`) so we're %% applying it unconditionally to keep things simple. -record(sote, { definitions :: beam_ssa:definition_map(), mod_info :: module_info(), match_aliases = #{} :: match_alias_map() }). skip_outgoing_tail_extraction({Fs0, ModInfo}) -> Fs = map(fun(F) -> skip_outgoing_tail_extraction(F, ModInfo) end, Fs0), {Fs, ModInfo}. skip_outgoing_tail_extraction(#b_function{bs=Blocks0}=F, ModInfo) -> case funcinfo_get(F, has_bsm_ops, ModInfo) of true -> State0 = #sote{ definitions = beam_ssa:definitions(Blocks0), mod_info = ModInfo }, {Blocks1, State} = beam_ssa:mapfold_instrs_rpo( fun sote_rewrite_calls/2, [0], State0, Blocks0), {Blocks, Counter} = alias_matched_binaries(Blocks1, F#b_function.cnt, State#sote.match_aliases), F#b_function{bs=Blocks,cnt=Counter}; false -> F end. sote_rewrite_calls(#b_set{op=call,args=Args}=Call, State) -> sote_rewrite_call(Call, Args, [], State); sote_rewrite_calls(I, State) -> {I, State}. sote_rewrite_call(Call, [], ArgsOut, State) -> {Call#b_set{args=reverse(ArgsOut)}, State}; sote_rewrite_call(Call0, [Arg | ArgsIn], ArgsOut, State0) -> case is_tail_binary(Arg, State0#sote.definitions) of true -> CtxChain = context_chain_of(Arg, State0#sote.definitions), case check_context_call(Call0, Arg, CtxChain, State0#sote.mod_info) of suitable_for_reuse -> Ctx = match_context_of(Arg, State0#sote.definitions), MatchAliases0 = State0#sote.match_aliases, MatchAliases = maps:put(Arg, {0, Ctx}, MatchAliases0), State = State0#sote{ match_aliases = MatchAliases }, Call = beam_ssa:add_anno(bsm_info, context_reused, Call0), sote_rewrite_call(Call, ArgsIn, [Ctx | ArgsOut], State); Other -> Call = beam_ssa:add_anno(bsm_info, Other, Call0), sote_rewrite_call(Call, ArgsIn, [Arg | ArgsOut], State0) end; false -> sote_rewrite_call(Call0, ArgsIn, [Arg | ArgsOut], State0) end. %% Adds parameter_type_info annotations to help the validator determine whether %% our optimizations were safe. annotate_context_parameters({Fs, ModInfo}) -> mapfoldl(fun annotate_context_parameters/2, ModInfo, Fs). annotate_context_parameters(F, ModInfo) -> ParamInfo = funcinfo_get(F, parameter_info, ModInfo), TypeAnno0 = beam_ssa:get_anno(parameter_type_info, F, #{}), TypeAnno = maps:fold(fun(K, _V, Acc) when is_map_key(K, Acc) -> %% Assertion. error(conflicting_parameter_types); (K, suitable_for_reuse, Acc) -> Acc#{ K => #t_bs_context{} }; (_K, _V, Acc) -> Acc end, TypeAnno0, ParamInfo), {beam_ssa:add_anno(parameter_type_info, TypeAnno, F), ModInfo}. %%% %%% +bin_opt_info %%% collect_opt_info(Fs) -> foldl(fun(#b_function{bs=Blocks}=F, Acc0) -> UseMap = beam_ssa:uses(Blocks), Where = beam_ssa:get_anno(location, F, []), beam_ssa:fold_instrs_rpo( fun(I, Acc) -> collect_opt_info_1(I, Where, UseMap, Acc) end, [0], Acc0, Blocks) end, [], Fs). collect_opt_info_1(#b_set{op=Op,anno=Anno,dst=Dst}=I, Where, UseMap, Acc0) -> case is_tail_binary(I) of true when Op =:= bs_match -> %% The uses include when the context is passed raw, so we discard %% everything but the bs_extract instruction to limit warnings to %% unoptimized uses. Uses0 = maps:get(Dst, UseMap, []), case [E || {_, #b_set{op=bs_extract}=E} <- Uses0] of [Use] -> add_unopt_binary_info(Use, false, Where, UseMap, Acc0); [] -> Acc0 end; true -> %% Add a warning for each use. Note that we don't do anything %% special if unused as a later pass will remove this instruction %% anyway. Uses = maps:get(Dst, UseMap, []), foldl(fun({_Lbl, Use}, Acc) -> add_unopt_binary_info(Use, false, Where, UseMap, Acc) end, Acc0, Uses); false -> add_opt_info(Anno, Where, Acc0) end; collect_opt_info_1(#b_ret{anno=Anno}, Where, _UseMap, Acc) -> add_opt_info(Anno, Where, Acc); collect_opt_info_1(_I, _Where, _Uses, Acc) -> Acc. add_opt_info(Anno, Where, Acc) -> case maps:find(bsm_info, Anno) of {ok, Term} -> [make_warning(Term, Anno, Where) | Acc]; error -> Acc end. %% When an alias is promoted we need to figure out where it goes to ignore %% warnings for compiler-generated things, and provide more useful warnings in %% general. %% %% We track whether the binary has been used to build another term because it %% can be helpful when there's no line information. add_unopt_binary_info(#b_set{op=Follow,dst=Dst}, _Nested, Where, UseMap, Acc0) when Follow =:= put_tuple; Follow =:= put_list; Follow =:= put_map -> %% Term-building instructions. {_, Uses} = unzip(maps:get(Dst, UseMap, [])), foldl(fun(Use, Acc) -> add_unopt_binary_info(Use, true, Where, UseMap, Acc) end, Acc0, Uses); add_unopt_binary_info(#b_set{op=Follow,dst=Dst}, Nested, Where, UseMap, Acc0) when Follow =:= bs_extract; Follow =:= phi -> %% Non-building instructions that need to be followed. {_, Uses} = unzip(maps:get(Dst, UseMap, [])), foldl(fun(Use, Acc) -> add_unopt_binary_info(Use, Nested, Where, UseMap, Acc) end, Acc0, Uses); add_unopt_binary_info(#b_set{op=call, args=[#b_remote{mod=#b_literal{val=erlang}, name=#b_literal{val=error}} | _Ignored]}, _Nested, _Where, _UseMap, Acc) -> %% There's no nice way to tell compiler-generated exceptions apart from %% user ones so we ignore them all. I doubt anyone cares. Acc; add_unopt_binary_info(#b_switch{anno=Anno}=I, Nested, Where, _UseMap, Acc) -> [make_promotion_warning(I, Nested, Anno, Where) | Acc]; add_unopt_binary_info(#b_set{anno=Anno}=I, Nested, Where, _UseMap, Acc) -> [make_promotion_warning(I, Nested, Anno, Where) | Acc]; add_unopt_binary_info(#b_ret{anno=Anno}=I, Nested, Where, _UseMap, Acc) -> [make_promotion_warning(I, Nested, Anno, Where) | Acc]; add_unopt_binary_info(#b_br{anno=Anno}=I, Nested, Where, _UseMap, Acc) -> [make_promotion_warning(I, Nested, Anno, Where) | Acc]. make_promotion_warning(I, Nested, Anno, Where) -> make_warning({binary_created, I, Nested}, Anno, Where). make_warning(Term, Anno, Where) -> {File, Line} = maps:get(location, Anno, Where), {File,[{Line,?MODULE,Term}]}. format_opt_info(context_reused) -> "OPTIMIZED: match context reused"; format_opt_info({binary_created, _, _}=Promotion) -> io_lib:format("BINARY CREATED: ~s", [format_opt_info_1(Promotion)]); format_opt_info(Other) -> io_lib:format("NOT OPTIMIZED: ~s", [format_opt_info_1(Other)]). format_opt_info_1({binary_created, #b_set{op=call,args=[Call|_]}, false}) -> io_lib:format("binary is used in call to ~s which doesn't support " "context reuse", [format_call(Call)]); format_opt_info_1({binary_created, #b_set{op=call,args=[Call|_]}, true}) -> io_lib:format("binary is used in term passed to ~s", [format_call(Call)]); format_opt_info_1({binary_created, #b_set{op={bif, BIF},args=Args}, false}) -> io_lib:format("binary is used in ~p/~p which doesn't support context " "reuse", [BIF, length(Args)]); format_opt_info_1({binary_created, #b_set{op={bif, BIF},args=Args}, true}) -> io_lib:format("binary is used in term passed to ~p/~p", [BIF, length(Args)]); format_opt_info_1({binary_created, #b_set{op=Op}, false}) -> io_lib:format("binary is used in '~p' which doesn't support context " "reuse", [Op]); format_opt_info_1({binary_created, #b_set{op=Op}, true}) -> io_lib:format("binary is used in term passed to '~p'", [Op]); format_opt_info_1({binary_created, #b_ret{}, false}) -> io_lib:format("binary is returned from the function", []); format_opt_info_1({binary_created, #b_ret{}, true}) -> io_lib:format("binary is used in a term that is returned from the " "function", []); format_opt_info_1({unsuitable_call, {Call, Inner}}) -> io_lib:format("binary used in call to ~s, where ~s", [format_call(Call), format_opt_info_1(Inner)]); format_opt_info_1({remote_call, Call}) -> io_lib:format("binary is used in remote call to ~s", [format_call(Call)]); format_opt_info_1({fun_call, Call}) -> io_lib:format("binary is used in fun call (~s)", [format_call(Call)]); format_opt_info_1({multiple_uses_in_call, Call}) -> io_lib:format("binary is passed as multiple arguments to ~s", [format_call(Call)]); format_opt_info_1({no_match_on_entry, Call}) -> io_lib:format("binary is used in call to ~s which does not begin with a " "suitable binary match", [format_call(Call)]); format_opt_info_1({used_before_match, #b_set{op=call,args=[Call|_]}}) -> io_lib:format("binary is used in call to ~s before being matched", [format_call(Call)]); format_opt_info_1({used_before_match, #b_set{op={bif, BIF},args=Args}}) -> io_lib:format("binary is used in ~p/~p before being matched", [BIF, length(Args)]); format_opt_info_1({used_before_match, #b_set{op=phi}}) -> io_lib:format("binary is returned from an expression before being " "matched", []); format_opt_info_1({used_before_match, #b_set{op=Op}}) -> io_lib:format("binary is used in '~p' before being matched",[Op]); format_opt_info_1(Term) -> io_lib:format("~w", [Term]). format_call(#b_local{name=#b_literal{val=F},arity=A}) -> io_lib:format("~p/~p", [F, A]); format_call(#b_remote{mod=#b_literal{val=M},name=#b_literal{val=F},arity=A}) -> io_lib:format("~p:~p/~p", [M, F, A]); format_call(Fun) -> io_lib:format("~p", [Fun]).