diff options
Diffstat (limited to 'lib/compiler/src/v3_kernel.erl')
-rw-r--r-- | lib/compiler/src/v3_kernel.erl | 291 |
1 files changed, 209 insertions, 82 deletions
diff --git a/lib/compiler/src/v3_kernel.erl b/lib/compiler/src/v3_kernel.erl index e2b8787224..49fb66126f 100644 --- a/lib/compiler/src/v3_kernel.erl +++ b/lib/compiler/src/v3_kernel.erl @@ -81,13 +81,17 @@ -export([module/2,format_error/1]). --import(lists, [map/2,foldl/3,foldr/3,mapfoldl/3,splitwith/2,member/2, - keyfind/3,partition/2,droplast/1,last/1,sort/1,reverse/1]). +-import(lists, [droplast/1,flatten/1,foldl/3,foldr/3, + map/2,mapfoldl/3,member/2, + keyfind/3,keyreplace/4, + last/1,partition/2,reverse/1, + splitwith/2,sort/1]). -import(ordsets, [add_element/2,del_element/2,union/2,union/1,subtract/2]). -import(cerl, [c_tuple/1]). -include("core_parse.hrl"). -include("v3_kernel.hrl"). +-define(EXPAND_MAX_SIZE_SEGMENT, 1024). %% These are not defined in v3_kernel.hrl. get_kanno(Kthing) -> element(2, Kthing). @@ -120,15 +124,19 @@ copy_anno(Kdst, Ksrc) -> funs=[], %Fun functions free=#{}, %Free variables ws=[] :: [warning()], %Warnings. - guard_refc=0}). %> 0 means in guard + guard_refc=0, %> 0 means in guard + no_shared_fun_wrappers=false :: boolean() + }). -spec module(cerl:c_module(), [compile:option()]) -> {'ok', #k_mdef{}, [warning()]}. -module(#c_module{anno=A,name=M,exports=Es,attrs=As,defs=Fs}, _Options) -> +module(#c_module{anno=A,name=M,exports=Es,attrs=As,defs=Fs}, Options) -> Kas = attributes(As), Kes = map(fun (#c_var{name={_,_}=Fname}) -> Fname end, Es), - St0 = #kern{}, + NoSharedFunWrappers = proplists:get_bool(no_shared_fun_wrappers, + Options), + St0 = #kern{no_shared_fun_wrappers=NoSharedFunWrappers}, {Kfs,St} = mapfoldl(fun function/2, St0, Fs), {ok,#k_mdef{anno=A,name=M#c_literal.val,exports=Kes,attributes=Kas, body=Kfs ++ St#kern.funs},lists:sort(St#kern.ws)}. @@ -716,16 +724,27 @@ gexpr_test_add(Ke, St0) -> %% expr(Cexpr, Sub, State) -> {Kexpr,[PreKexpr],State}. %% Convert a Core expression, flattening it at the same time. -expr(#c_var{anno=A,name={_Name,Arity}}=Fname, Sub, St) -> - %% A local in an expression. - %% For now, these are wrapped into a fun by reverse - %% eta-conversion, but really, there should be exactly one - %% such "lambda function" for each escaping local name, - %% instead of one for each occurrence as done now. +expr(#c_var{anno=A0,name={Name,Arity}}=Fname, Sub, St) -> Vs = [#c_var{name=list_to_atom("V" ++ integer_to_list(V))} || - V <- integers(1, Arity)], - Fun = #c_fun{anno=A,vars=Vs,body=#c_apply{anno=A,op=Fname,args=Vs}}, - expr(Fun, Sub, St); + V <- integers(1, Arity)], + case St#kern.no_shared_fun_wrappers of + false -> + %% Generate a (possibly shared) wrapper function for calling + %% this function. + Wrapper0 = ["-fun.",atom_to_list(Name),"/",integer_to_list(Arity),"-"], + Wrapper = list_to_atom(flatten(Wrapper0)), + Id = {id,{0,0,Wrapper}}, + A = keyreplace(id, 1, A0, Id), + Fun = #c_fun{anno=A,vars=Vs,body=#c_apply{anno=A,op=Fname,args=Vs}}, + expr(Fun, Sub, St); + true -> + %% For backward compatibility with OTP 22 and earlier, + %% use the pre-generated name for the fun wrapper. + %% There will be one wrapper function for each occurrence + %% of `fun F/A`. + Fun = #c_fun{anno=A0,vars=Vs,body=#c_apply{anno=A0,op=Fname,args=Vs}}, + expr(Fun, Sub, St) + end; expr(#c_var{anno=A,name=V}, Sub, St) -> {#k_var{anno=A,name=get_vsub(V, Sub)},[],St}; expr(#c_literal{anno=A,val=V}, _Sub, St) -> @@ -1152,7 +1171,7 @@ validate_bin_element_size(#k_int{val=V}) when V >= 0 -> ok; validate_bin_element_size(#k_atom{val=all}) -> ok; validate_bin_element_size(#k_atom{val=undefined}) -> ok; validate_bin_element_size(_) -> throw(bad_element_size). - + %% atomic_list([Cexpr], Sub, State) -> {[Kexpr],[PreKexpr],State}. atomic_list(Ces, Sub, St) -> @@ -1278,14 +1297,63 @@ pattern_bin_1([#c_bitstr{anno=A,val=E0,size=S0,unit=U,type=T,flags=Fs}|Es0], _ -> Isub0 end, {Es,{Isub,Osub},St3} = pattern_bin_1(Es0, Isub1, Osub1, St2), - {#k_bin_seg{anno=A,size=S, - unit=U0, - type=cerl:concrete(T), - flags=Fs0, - seg=E,next=Es}, - {Isub,Osub},St3}; + {build_bin_seg(A, S, U0, cerl:concrete(T), Fs0, E, Es),{Isub,Osub},St3}; pattern_bin_1([], Isub, Osub, St) -> {#k_bin_end{},{Isub,Osub},St}. +%% build_bin_seg(Anno, Size, Unit, Type, Flags, Seg, Next) -> #k_bin_seg{}. +%% This function normalizes literal integers with size > 8 and literal +%% utf8 segments into integers with size = 8 (and potentially an integer +%% with size less than 8 at the end). This is so further optimizations +%% have a normalized view of literal integers, allowing us to generate +%% more literals and group more clauses. Those integers may be "squeezed" +%% later into the largest integer possible. +%% +build_bin_seg(A, #k_int{val=Bits} = Sz, U, integer=Type, [unsigned,big]=Flags, #k_literal{val=Int}=Seg, Next) -> + Size = Bits * U, + case integer_fits_and_is_expandable(Int, Size) of + true -> build_bin_seg_integer_recur(A, Size, Int, Next); + false -> #k_bin_seg{anno=A,size=Sz,unit=U,type=Type,flags=Flags,seg=Seg,next=Next} + end; +build_bin_seg(A, Sz, U, utf8=Type, [unsigned,big]=Flags, #k_literal{val=Utf8} = Seg, Next) -> + case utf8_fits(Utf8) of + {Int, Bits} -> build_bin_seg_integer_recur(A, Bits, Int, Next); + error -> #k_bin_seg{anno=A,size=Sz,unit=U,type=Type,flags=Flags,seg=Seg,next=Next} + end; +build_bin_seg(A, Sz, U, Type, Flags, Seg, Next) -> + #k_bin_seg{anno=A,size=Sz,unit=U,type=Type,flags=Flags,seg=Seg,next=Next}. + +build_bin_seg_integer_recur(A, Bits, Val, Next) when Bits > 8 -> + NextBits = Bits - 8, + NextVal = Val band ((1 bsl NextBits) - 1), + Last = build_bin_seg_integer_recur(A, NextBits, NextVal, Next), + build_bin_seg_integer(A, 8, Val bsr NextBits, Last); + +build_bin_seg_integer_recur(A, Bits, Val, Next) -> + build_bin_seg_integer(A, Bits, Val, Next). + +build_bin_seg_integer(A, Bits, Val, Next) -> + Sz = #k_int{anno=A,val=Bits}, + Seg = #k_literal{anno=A,val=Val}, + #k_bin_seg{anno=A,size=Sz,unit=1,type=integer,flags=[unsigned,big],seg=Seg,next=Next}. + +integer_fits_and_is_expandable(Int, Size) when 0 < Size, Size =< ?EXPAND_MAX_SIZE_SEGMENT -> + case <<Int:Size>> of + <<Int:Size>> -> true; + _ -> false + end; +integer_fits_and_is_expandable(_Int, _Size) -> + false. + +utf8_fits(Utf8) -> + try + Bin = <<Utf8/utf8>>, + Bits = bit_size(Bin), + <<Int:Bits>> = Bin, + {Int, Bits} + catch + _:_ -> error + end. + %% pattern_list([Cexpr], Sub, State) -> {[Kexpr],Sub,State}. pattern_list(Ces, Sub, St) -> @@ -1535,7 +1603,7 @@ maybe_add_warning(Ke, MatchAnno, St) -> get_line([Line|_]) when is_integer(Line) -> Line; get_line([_|T]) -> get_line(T); get_line([]) -> none. - + get_file([{file,File}|_]) -> File; get_file([_|T]) -> get_file(T); get_file([]) -> "no_file". % should not happen @@ -1743,27 +1811,10 @@ do_combine_lit_pat(#k_tuple{anno=A,es=Es0}) -> do_combine_lit_pat(_) -> throw(not_possible). -combine_bin_segs(#k_bin_seg{size=Size0,unit=Unit,type=integer, - flags=[unsigned,big],seg=Seg,next=Next}) -> - #k_literal{val=Size1} = do_combine_lit_pat(Size0), - #k_literal{val=Int} = do_combine_lit_pat(Seg), - Size = Size1 * Unit, - if - 0 < Size, Size < 64 -> - Bin = <<Int:Size>>, - case Bin of - <<Int:Size>> -> - NextBin = combine_bin_segs(Next), - <<Bin/bits,NextBin/bits>>; - _ -> - %% The integer Int does not fit in the segment, - %% thus it will not match. - throw(not_possible) - end; - true -> - %% Avoid creating huge binary literals. - throw(not_possible) - end; +combine_bin_segs(#k_bin_seg{size=#k_int{val=8},unit=1,type=integer, + flags=[unsigned,big],seg=#k_literal{val=Int},next=Next}) + when is_integer(Int), 0 =< Int, Int =< 255 -> + <<Int,(combine_bin_segs(Next))/bits>>; combine_bin_segs(#k_bin_end{}) -> <<>>; combine_bin_segs(_) -> @@ -1833,11 +1884,10 @@ handle_bin_con_not_possible([]) -> []. select_bin_int([#iclause{pats=[#k_bin_seg{anno=A,type=integer, size=#k_int{val=Bits0}=Sz,unit=U, flags=Fl,seg=#k_literal{val=Val}, - next=N}|Ps]}=C|Cs0]) - when is_integer(Val) -> + next=N}|Ps]}=C|Cs0]) -> Bits = U * Bits0, if - Bits > 1024 -> throw(not_possible); %Expands the code too much. + Bits > ?EXPAND_MAX_SIZE_SEGMENT -> throw(not_possible); %Expands the code too much. true -> ok end, select_assert_match_possible(Bits, Val, Fl), @@ -1848,16 +1898,6 @@ select_bin_int([#iclause{pats=[#k_bin_seg{anno=A,type=integer, end, Cs = select_bin_int_1(Cs0, Bits, Fl, Val), [{k_bin_int,[C#iclause{pats=[P|Ps]}|Cs]}]; -select_bin_int([#iclause{pats=[#k_bin_seg{anno=A,type=utf8, - flags=[unsigned,big]=Fl, - seg=#k_literal{val=Val0}, - next=N}|Ps]}=C|Cs0]) - when is_integer(Val0) -> - {Val,Bits} = select_utf8(Val0), - P = #k_bin_int{anno=A,size=#k_int{val=Bits},unit=1, - flags=Fl,val=Val,next=N}, - Cs = select_bin_int_1(Cs0, Bits, Fl, Val), - [{k_bin_int,[C#iclause{pats=[P|Ps]}|Cs]}]; select_bin_int(_) -> throw(not_possible). select_bin_int_1([#iclause{pats=[#k_bin_seg{anno=A,type=integer, @@ -1872,18 +1912,6 @@ select_bin_int_1([#iclause{pats=[#k_bin_seg{anno=A,type=integer, end, P = #k_bin_int{anno=A,size=Sz,unit=U,flags=Fl,val=Val,next=N}, [C#iclause{pats=[P|Ps]}|select_bin_int_1(Cs, Bits, Fl, Val)]; -select_bin_int_1([#iclause{pats=[#k_bin_seg{anno=A,type=utf8, - flags=Fl, - seg=#k_literal{val=Val0}, - next=N}|Ps]}=C|Cs], - Bits, Fl, Val) when is_integer(Val0) -> - case select_utf8(Val0) of - {Val,Bits} -> ok; - {_,_} -> throw(not_possible) - end, - P = #k_bin_int{anno=A,size=#k_int{val=Bits},unit=1, - flags=[unsigned,big],val=Val,next=N}, - [C#iclause{pats=[P|Ps]}|select_bin_int_1(Cs, Bits, Fl, Val)]; select_bin_int_1([], _, _, _) -> []; select_bin_int_1(_, _, _, _) -> throw(not_possible). @@ -1909,17 +1937,6 @@ match_fun(Val) -> {match,Bs} end. -select_utf8(Val0) -> - try - Bin = <<Val0/utf8>>, - Size = bit_size(Bin), - <<Val:Size>> = Bin, - {Val,Size} - catch - error:_ -> - throw(not_possible) - end. - %% match_value([Var], Con, [Clause], Default, State) -> {SelectExpr,State}. %% At this point all the clauses have the same constructor, we must %% now separate them according to value. @@ -2039,7 +2056,8 @@ match_clause([U|Us], [C|_]=Cs0, Def, St0) -> {Match0,Vs,St1} = get_match(get_con(Cs0), St0), Match = sub_size_var(Match0, Cs0), {Cs1,St2} = new_clauses(Cs0, U, St1), - {B,St3} = match(Vs ++ Us, Cs1, Def, St2), + Cs2 = squeeze_clauses_by_bin_integer_count(Cs1, []), + {B,St3} = match(Vs ++ Us, Cs2, Def, St2), {#k_val_clause{anno=Anno,val=Match,body=B},St3}. sub_size_var(#k_bin_seg{size=#k_var{name=Name}=Kvar}=BinSeg, [#iclause{isub=Sub}|_]) -> @@ -2109,6 +2127,102 @@ new_clauses(Cs0, U, St) -> end, Cs0), {Cs1,St}. +%% group and squeeze +%% The goal of those functions is to group subsequent integer k_bin_seg +%% literals by count so we can leverage bs_get_integer_16 whenever possible. +%% +%% The priority is to create large groups. So if we have three clauses matching +%% on 16-bits/16-bits/8-bits, we will first have a single 8-bits match for all +%% three clauses instead of clauses (one with 16 and another with 8). But note +%% the algorithm is recursive, so the remaining 8-bits for the first two clauses +%% will be grouped next. +%% +%% We also try to not create too large groups. If we have too many clauses, +%% it is preferrable to match on 8-bits, select a branch, then match on the +%% next 8-bits, rather than match on 16-bits which would force us to have +%% to select to many values at the same time, which would not be efficient. +%% +%% Another restriction is that we create groups only if the end of the +%% group is a variadic clause or the end of the binary. That's because +%% if we have 16-bits/16-bits/catch-all, breaking it into a 16-bits lookup +%% will make the catch-all more expensive. +%% +%% Clauses are grouped in reverse when squeezing and then flattened and +%% re-reversed at the end. +squeeze_clauses_by_bin_integer_count([Clause | Clauses], Acc) -> + case clause_count_bin_integer_segments(Clause) of + {literal, N} -> squeeze_clauses_by_bin_integer_count(Clauses, N, 1, [Clause], Acc); + _ -> squeeze_clauses_by_bin_integer_count(Clauses, [[Clause] | Acc]) + end; +squeeze_clauses_by_bin_integer_count(_, Acc) -> + flat_reverse(Acc, []). + +squeeze_clauses_by_bin_integer_count([], N, Count, GroupAcc, Acc) -> + Squeezed = squeeze_clauses(GroupAcc, fix_count_without_variadic_segment(N), Count), + flat_reverse([Squeezed | Acc], []); +squeeze_clauses_by_bin_integer_count([#iclause{pats=[#k_bin_end{} | _]} = Clause], N, Count, GroupAcc, Acc) -> + Squeezed = squeeze_clauses(GroupAcc, fix_count_without_variadic_segment(N), Count), + flat_reverse([[Clause | Squeezed] | Acc], []); +squeeze_clauses_by_bin_integer_count([Clause | Clauses], N, Count, GroupAcc, Acc) -> + case clause_count_bin_integer_segments(Clause) of + {literal, NewN} -> + squeeze_clauses_by_bin_integer_count(Clauses, min(N, NewN), Count + 1, [Clause | GroupAcc], Acc); + + {variadic, NewN} when NewN =< N -> + Squeezed = squeeze_clauses(GroupAcc, NewN, Count), + squeeze_clauses_by_bin_integer_count(Clauses, [[Clause | Squeezed] | Acc]); + + _ -> + squeeze_clauses_by_bin_integer_count(Clauses, [[Clause | GroupAcc] | Acc]) + end. + +clause_count_bin_integer_segments(#iclause{pats=[#k_bin_seg{seg=#k_literal{}} = BinSeg | _]}) -> + count_bin_integer_segments(BinSeg, 0); +clause_count_bin_integer_segments(#iclause{pats=[#k_bin_seg{size=#k_int{val=Size},unit=Unit, + type=integer,flags=[unsigned,big], seg=#k_var{}} | _]}) + when ((Size * Unit) rem 8) =:= 0 -> + {variadic, (Size * Unit) div 8}; +clause_count_bin_integer_segments(_) -> + error. + +count_bin_integer_segments(#k_bin_seg{size=#k_int{val=8},unit=1,type=integer,flags=[unsigned,big], + seg=#k_literal{val=Int},next=Next}, Count) when is_integer(Int), 0 =< Int, Int =< 255 -> + count_bin_integer_segments(Next, Count + 1); +count_bin_integer_segments(_, Count) when Count > 0 -> + {literal, Count}; +count_bin_integer_segments(_, _Count) -> + error. + +%% Since 4 bytes in on 32-bits systems are bignums, we convert +%% anything more than 3 into 2 bytes lookup. The goal is to convert +%% any multi-clause segment into 2-byte lookups with a potential +%% 3 byte lookup at the end. +fix_count_without_variadic_segment(N) when N > 3 -> 2; +fix_count_without_variadic_segment(N) -> N. + +%% If we have more than 16 clauses, then it is better +%% to branch multiple times than getting a large integer. +%% We also abort if we have nothing to squeeze. +squeeze_clauses(Clauses, Size, Count) when Count >= 16; Size == 1 -> Clauses; +squeeze_clauses(Clauses, Size, _Count) -> squeeze_clauses(Clauses, Size). + +squeeze_clauses([#iclause{pats=[#k_bin_seg{seg=#k_literal{}} = BinSeg | Pats]} = Clause | Clauses], Size) -> + [Clause#iclause{pats=[squeeze_segments(BinSeg, 0, 0, Size) | Pats]} | + squeeze_clauses(Clauses, Size)]; +squeeze_clauses([], _Size) -> + []. + +squeeze_segments(#k_bin_seg{size=Sz, seg=#k_literal{val=Val}=Lit} = BinSeg, Acc, Size, 1) -> + BinSeg#k_bin_seg{size=Sz#k_int{val=Size + 8}, seg=Lit#k_literal{val=(Acc bsl 8) bor Val}}; +squeeze_segments(#k_bin_seg{seg=#k_literal{val=Val},next=Next}, Acc, Size, Count) -> + squeeze_segments(Next, (Acc bsl 8) bor Val, Size + 8, Count - 1). + +flat_reverse([Head | Tail], Acc) -> flat_reverse(Tail, flat_reverse_1(Head, Acc)); +flat_reverse([], Acc) -> Acc. + +flat_reverse_1([Head | Tail], Acc) -> flat_reverse_1(Tail, [Head | Acc]); +flat_reverse_1([], Acc) -> Acc. + %% build_guard([GuardClause]) -> GuardExpr. build_guard([]) -> fail; @@ -2446,8 +2560,21 @@ uexpr(Lit, {break,Rs0}, St0) -> {#k_put{anno=#k{us=Used,ns=lit_list_vars(Rs),a=get_kanno(Lit)}, arg=Lit,ret=Rs},Used,St1}. -add_local_function(_, #kern{funs=ignore}=St) -> St; -add_local_function(F, #kern{funs=Funs}=St) -> St#kern{funs=[F|Funs]}. +add_local_function(_, #kern{funs=ignore}=St) -> + St; +add_local_function(#k_fdef{func=Name,arity=Arity}=F, #kern{funs=Funs}=St) -> + case is_defined(Name, Arity, Funs) of + false -> + St#kern{funs=[F|Funs]}; + true -> + St + end. + +is_defined(Name, Arity, [#k_fdef{func=Name,arity=Arity}|_]) -> + true; +is_defined(Name, Arity, [#k_fdef{}|T]) -> + is_defined(Name, Arity, T); +is_defined(_, _, []) -> false. %% Make a #k_fdef{}, making sure that the body is always a #k_match{}. make_fdef(Anno, Name, Arity, Vs, #k_match{}=Body) -> |