authorBjörn Gustavsson <>2021-09-20 09:21:09 +0200
committerBjörn Gustavsson <>2021-10-14 04:59:22 +0200
commit844a90a6b3b8d503b4111c36067e5d1c4a785210 (patch)
parent16c5728f442b51caca42bcd827a2058051ef8733 (diff)
compiler: Add a new instruction for creating binaries
Binary construction using the binary syntax currently uses multiple instructions. That makes it hard to provide good information in the exception if construction fails. It also makes it harder to optimize construction in the JIT. Therefore, introduce the instruction `bs_create_bin` that constructs a binary in one go. To be able to test the new instruction before implementing it in the runtime system, force translation back to the old-style instructions in the `beam_clean` pass.
@@ -34,7 +34,8 @@ module({Mod,Exp,Attr,Fs0,_}, Opts) ->
Fs1 = remove_unused(Order, Used, All),
{Fs2,Lc} = clean_labels(Fs1),
Fs3 = fix_swap(Fs2, Opts),
- Fs = maybe_remove_lines(Fs3, Opts),
+ Fs4 = fix_bs_create_bin(Fs3, Opts),
+ Fs = maybe_remove_lines(Fs4, Opts),
%% Determine the rootset, i.e. exported functions and
@@ -178,6 +179,146 @@ remove_lines_block([I|Is]) ->
remove_lines_block([]) -> [].
+%%% If compatibility with a previous release (OTP 24 or earlier) has
+%%% been requested, eliminate bs_create_bin instructions by translating
+%%% them to the old binary syntax instructions.
+fix_bs_create_bin(Fs, Opts) ->
+ %% Force translation to old instructions before we have actually
+ %% implemented the `bs_create_bin` instruction.
+ case true orelse proplists:get_bool(no_bs_create_bin, Opts) of
+ false -> Fs;
+ true -> fold_functions(fun fix_bs_create_bin/1, Fs)
+ end.
+fix_bs_create_bin([{bs_create_bin,Fail,Alloc,Live,Unit,Dst,{list,List}}|Is]) ->
+ Tail = fix_bs_create_bin(Is),
+ Flags = {field_flags,[]},
+ try bs_pre_size_calc(List) of
+ SizeCalc0 ->
+ SizeCalc = fold_size_calc(SizeCalc0, 0, []),
+ TmpDst = SizeReg = {x,Live},
+ SizeIs0 = bs_size_calc(SizeCalc, Fail, SizeReg, {x,Live+1}),
+ SizeIs = [{move,{integer,0},SizeReg}|SizeIs0],
+ RestIs = bs_puts(List, Fail) ++ [{move,TmpDst,Dst}|Tail],
+ case List of
+ [{atom,append},_,_,_,Src|_] ->
+ SizeIs ++ [{bs_append,Fail,SizeReg,Alloc,Live+1,Unit,Src,Flags,TmpDst}|RestIs];
+ [{atom,private_append},_,_,_,Src|_] ->
+ TestHeap = {test_heap,Alloc,Live+1},
+ SizeIs ++ [TestHeap,{bs_private_append,Fail,SizeReg,Unit,Src,Flags,TmpDst}|RestIs];
+ _ ->
+ SizeIs ++ [{bs_init_bits,Fail,SizeReg,Alloc,Live+1,Flags,TmpDst}|RestIs]
+ end
+ catch
+ throw:invalid_size ->
+ [{move,{atom,badarg},{x,0}},
+ {call_ext_only,1,{extfunc,erlang,error,1}}|Tail]
+ end;
+fix_bs_create_bin([I|Is]) ->
+ [I|fix_bs_create_bin(Is)];
+fix_bs_create_bin([]) -> [].
+bs_pre_size_calc([Type,_Seg,Unit,_Flags,Src,Size|Segs]) ->
+ case Type of
+ {atom,T} when T =:= append; T =:= private_append ->
+ bs_pre_size_calc(Segs);
+ _ ->
+ [bs_pre_size_calc_1(Type, Unit, Src, Size)|bs_pre_size_calc(Segs)]
+ end;
+bs_pre_size_calc([]) -> [].
+bs_pre_size_calc_1({atom,Type}, Unit, Src, Size) ->
+ case {Unit,Size} of
+ {0,{atom,undefined}} ->
+ %% No size/unit given.
+ {8,case Type of
+ utf8 -> {{instr,bs_utf8_size},Src};
+ utf16 -> {{instr,bs_utf16_size},Src};
+ utf32 -> {term,{integer,4}}
+ end};
+ {Unit,_} ->
+ case {Type,Size} of
+ {binary,{atom,all}} ->
+ case Unit rem 8 of
+ 0 -> {8,{{bif,byte_size},Src}};
+ _ -> {1,{{bif,bit_size},Src}}
+ end;
+ {_,_} ->
+ ensure_valid_size(Size),
+ {Unit,{term,Size}}
+ end
+ end.
+ensure_valid_size({x,_}) -> ok;
+ensure_valid_size({y,_}) -> ok;
+ensure_valid_size({integer,Size}) when Size >= 0 -> ok;
+ensure_valid_size(_) -> throw(invalid_size).
+fold_size_calc([{Unit,{term,{integer,Size}}}|T], Bits, Acc) ->
+ fold_size_calc(T, Bits + Unit*Size, Acc);
+fold_size_calc([{Unit,{{bif,Bif},{literal,Lit}}}=H|T], Bits, Acc) ->
+ try erlang:Bif(Lit) of
+ Result ->
+ fold_size_calc([{Unit,{term,{integer,Result}}}|T], Bits, Acc)
+ catch
+ _:_ ->
+ fold_size_calc(T, Bits, [H|Acc])
+ end;
+fold_size_calc([{U,_}=H|T], Bits, Acc) when U =:= 1; U =:= 8 ->
+ fold_size_calc(T, Bits, [H|Acc]);
+fold_size_calc([{U,Var}|T], Bits, Acc) ->
+ fold_size_calc(T, Bits, [{1,{'*',{term,{integer,U}},Var}}|Acc]);
+fold_size_calc([], Bits, Acc) ->
+ Bytes = Bits div 8,
+ RemBits = Bits rem 8,
+ Sizes = [{1,{term,{integer,RemBits}}},{8,{term,{integer,Bytes}}}|Acc],
+ [Pair || {_,Sz}=Pair <- Sizes, Sz =/= {term,{integer,0}}].
+bs_size_calc([{Unit,{{bif,Bif},Reg}}|T], Fail, SizeReg, TmpReg) ->
+ Live = element(2, SizeReg) + 1,
+ [{gc_bif,Bif,Fail,Live,[Reg],TmpReg},
+ {bs_add,Fail,[SizeReg,TmpReg,Unit],SizeReg}|bs_size_calc(T, Fail, SizeReg, TmpReg)];
+bs_size_calc([{Unit,{'*',{term,Term1},{term,Term2}}}|T], Fail, SizeReg, TmpReg) ->
+ Live = element(2, SizeReg) + 1,
+ [{gc_bif,'*',Fail,Live,[Term1,Term2],TmpReg},
+ {bs_add,Fail,[SizeReg,TmpReg,Unit],SizeReg}|bs_size_calc(T, Fail, SizeReg, TmpReg)];
+bs_size_calc([{Unit,{{instr,Instr},Reg}}|T], Fail, SizeReg, TmpReg) ->
+ [{Instr,Fail,Reg,TmpReg},
+ {bs_add,Fail,[SizeReg,TmpReg,Unit],SizeReg}|bs_size_calc(T, Fail, SizeReg, TmpReg)];
+bs_size_calc([{Unit,{term,Term}}|T], Fail, SizeReg, TmpReg) ->
+ [{bs_add,Fail,[SizeReg,Term,Unit],SizeReg}|bs_size_calc(T, Fail, SizeReg, TmpReg)];
+bs_size_calc([], _Fail, _SizeReg, _TmpReg) -> [].
+bs_puts([{atom,string},_Seg,_Unit,_Flags,{string,_}=Str,{integer,Size}|Is], Fail) ->
+ [{bs_put_string,Size,Str}|bs_puts(Is, Fail)];
+bs_puts([{atom,append},_,_,_,_,_|Is], Fail) ->
+ bs_puts(Is, Fail);
+bs_puts([{atom,private_append},_,_,_,_,_|Is], Fail) ->
+ bs_puts(Is, Fail);
+bs_puts([{atom,Type},_Seg,Unit,Flags0,Src,Size|Is], Fail) ->
+ Op = case Type of
+ integer -> bs_put_integer;
+ float -> bs_put_float;
+ binary -> bs_put_binary;
+ utf8 -> bs_put_utf8;
+ utf16 -> bs_put_utf16;
+ utf32 -> bs_put_utf32
+ end,
+ Flags = case Flags0 of
+ nil -> [];
+ {literal,Fs} -> Fs
+ end,
+ I = if
+ Unit =:= 0 ->
+ {bs_put,Fail,{Op,{field_flags,Flags}},[Src]};
+ true ->
+ {bs_put,Fail,{Op,Unit,{field_flags,Flags}},[Size,Src]}
+ end,
+ [I|bs_puts(Is, Fail)];
+bs_puts([], _Fail) -> [].
%%% Helpers.
@@ -896,6 +896,8 @@ instr_labels({bif,_Name,Lbl,_As,_R}) ->
instr_labels({gc_bif,_Name,Lbl,_Live,_As,_R}) ->
+instr_labels({bs_create_bin,Lbl,_,_,_,_,_}) ->
+ do_instr_labels(Lbl);
instr_labels({bs_init,Lbl,_,_,_,_}) ->
instr_labels({bs_put,Lbl,_,_}) ->
@@ -25,8 +25,8 @@
-import(lists, [all/2,append/1,flatmap/2,foldl/3,
- keysort/2,mapfoldl/3,map/2,member/2,
- reverse/1,reverse/2,sort/1]).
+ keysort/2,mapfoldl/3,member/2,
+ reverse/1,sort/1]).
@@ -1048,167 +1048,54 @@ put_cg_map(LineAnno, Op, SrcMap, Dst, List, St0) ->
cg_binary(Dst, Segs0, FailCtx, Le, St0) ->
- {PutCode0,SzCalc0,St1} = cg_bin_put(Segs0, FailCtx, St0),
+ Segs1 = cg_bin_segments(Segs0, St0),
+ Segs = case Segs1 of
+ [#b_literal{val=binary},UnitFlags,Val,#b_literal{val=all}|Segs2] ->
+ Op = case member(single_use, Le) of
+ true -> private_append;
+ false -> append
+ end,
+ [#b_literal{val=Op},UnitFlags,Val,#b_literal{val=all}|Segs2];
+ _ ->
+ Segs1
+ end,
LineAnno = line_anno(Le),
- Anno = Le,
- case PutCode0 of
- [#b_set{op=bs_put,dst=Bin,args=[_,_,Src,#b_literal{val=all}|_]},
- #b_set{op={succeeded,_},dst=Bool,args=[Bin]},
- #b_br{bool=Bool},
- {label,_}|_] ->
- #k_bin_seg{unit=Unit0,next=Segs} = Segs0,
- Unit = #b_literal{val=Unit0},
- {PutCode,SzCalc1,St2} = cg_bin_put(Segs, FailCtx, St1),
- {_,SzVar,SzCode0,St3} = cg_size_calc(1, SzCalc1, FailCtx, St2),
- SzCode = cg_bin_anno(SzCode0, LineAnno),
- Args = case member(single_use, Anno) of
- true ->
- [#b_literal{val=private_append},Src,SzVar,Unit];
- false ->
- [#b_literal{val=append},Src,SzVar,Unit]
- end,
- BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
- {TestIs,St} = make_succeeded(Dst, FailCtx, St3),
- {SzCode ++ [BsInit] ++ TestIs ++ PutCode,St};
- [#b_set{op=bs_put}|_] ->
- {Unit,SzVar,SzCode0,St2} = cg_size_calc(8, SzCalc0, FailCtx, St1),
- SzCode = cg_bin_anno(SzCode0, LineAnno),
- Args = [#b_literal{val=new},SzVar,Unit],
- BsInit = #b_set{anno=LineAnno,op=bs_init,dst=Dst,args=Args},
- {TestIs,St} = make_succeeded(Dst, FailCtx, St2),
- {SzCode ++ [BsInit] ++ TestIs ++ PutCode0,St}
- end.
-cg_bin_anno([Set|Sets], Anno) ->
- [Set#b_set{anno=Anno}|Sets];
-cg_bin_anno([], _) -> [].
-%% cg_size_calc(PreferredUnit, SzCalc, FailCtx, St0) ->
-%% {ActualUnit,SizeVariable,SizeCode,St}.
-%% Generate size calculation code.
-cg_size_calc(Unit, error, _FailCtx, St) ->
- {#b_literal{val=Unit},#b_literal{val=badarg},[],St};
-cg_size_calc(8, [{1,_}|_]=SzCalc, FailCtx, St) ->
- cg_size_calc(1, SzCalc, FailCtx, St);
-cg_size_calc(8, SzCalc, FailCtx, St0) ->
- {Var,Pre,St} = cg_size_calc_1(SzCalc, FailCtx, St0),
- {#b_literal{val=8},Var,Pre,St};
-cg_size_calc(1, SzCalc0, FailCtx, St0) ->
- SzCalc = map(fun({8,#b_literal{val=Size}}) ->
- {1,#b_literal{val=8*Size}};
- ({8,{{bif,byte_size},Src}}) ->
- {1,{{bif,bit_size},Src}};
- ({8,{_,_}=UtfCalc}) ->
- {1,{'*',#b_literal{val=8},UtfCalc}};
- ({_,_}=Pair) ->
- Pair
- end, SzCalc0),
- {Var,Pre,St} = cg_size_calc_1(SzCalc, FailCtx, St0),
- {#b_literal{val=1},Var,Pre,St}.
-cg_size_calc_1(SzCalc, FailCtx, St0) ->
- cg_size_calc_2(SzCalc, #b_literal{val=0}, FailCtx, St0).
-cg_size_calc_2([{_,{'*',Unit,{_,_}=Bif}}|T], Sum0, FailCtx, St0) ->
- {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
- {BifDst,Pre1,St2} = cg_size_bif(Bif, FailCtx, St1),
- {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, Unit, FailCtx, St2),
- {Sum,Pre0++Pre1++Pre2,St};
-cg_size_calc_2([{_,#b_literal{}=Sz}|T], Sum0, FailCtx, St0) ->
- {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
- {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, FailCtx, St1),
- {Sum,Pre0++Pre,St};
-cg_size_calc_2([{_,#b_var{}=Sz}|T], Sum0, FailCtx, St0) ->
- {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
- {Sum,Pre,St} = cg_size_add(Sum1, Sz, #b_literal{val=1}, FailCtx, St1),
- {Sum,Pre0++Pre,St};
-cg_size_calc_2([{_,{_,_}=Bif}|T], Sum0, FailCtx, St0) ->
- {Sum1,Pre0,St1} = cg_size_calc_2(T, Sum0, FailCtx, St0),
- {BifDst,Pre1,St2} = cg_size_bif(Bif, FailCtx, St1),
- {Sum,Pre2,St} = cg_size_add(Sum1, BifDst, #b_literal{val=1}, FailCtx, St2),
- {Sum,Pre0++Pre1++Pre2,St};
-cg_size_calc_2([], Sum, _FailCtx, St) ->
- {Sum,[],St}.
-cg_size_bif(#b_var{}=Var, _FailCtx, St) ->
- {Var,[],St};
-cg_size_bif({Name,Src}, FailCtx, St0) ->
- {Dst,St1} = new_ssa_var('@ssa_bif', St0),
- Bif = #b_set{op=Name,dst=Dst,args=[Src]},
- {TestIs,St} = make_succeeded(Dst, FailCtx, St1),
- {Dst,[Bif|TestIs],St}.
-cg_size_add(#b_literal{val=0}, Val, #b_literal{val=1}, _FailCtx, St) ->
- {Val,[],St};
-cg_size_add(#b_literal{val=A}, #b_literal{val=B}, #b_literal{val=U}, _FailCtx, St) ->
- {#b_literal{val=A+B*U},[],St};
-cg_size_add(A, B, Unit, FailCtx, St0) ->
- {Dst,St1} = new_ssa_var('@ssa_sum', St0),
- {TestIs,St} = make_succeeded(Dst, FailCtx, St1),
- BsAdd = #b_set{op=bs_add,dst=Dst,args=[A,B,Unit]},
- {Dst,[BsAdd|TestIs],St}.
-cg_bin_put(Seg, FailCtx, St) ->
- cg_bin_put_1(Seg, FailCtx, [], [], St).
- FailCtx, Acc, SzCalcAcc, St0) ->
- [Src,Size] = ssa_args([Src0,Size0], St0),
- NeedSize = bs_need_size(T),
- TypeArg = #b_literal{val=T},
- Flags = #b_literal{val=Fs},
- Unit = #b_literal{val=U},
- Args = case NeedSize of
- true -> [TypeArg,Flags,Src,Size,Unit];
- false -> [TypeArg,Flags,Src]
+ Build = #b_set{anno=LineAnno,op=bs_create_bin,args=Segs,dst=Dst},
+ {TestIs,St} = make_succeeded(Dst, FailCtx, St0),
+ {[Build|TestIs],St}.
+cg_bin_segments(#k_bin_seg{anno=Anno,type=Type,flags=Flags0,seg=Src0,size=Size0,unit=U,next=Next}, St) ->
+ Seg = case lists:keyfind(segment, 1,Anno) of
+ false -> [];
+ {segment,_}=Seg0 -> [Seg0]
+ end,
+ [Src,Size] = ssa_args([Src0,Size0], St),
+ TypeArg = #b_literal{val=Type},
+ Unit = case U of
+ undefined -> 0;
+ _ -> U
- {Dst,St1} = new_ssa_var('@ssa_bs_put', St0),
- {TestIs,St} = make_succeeded(Dst, FailCtx, St1),
- Is = [#b_set{op=bs_put,dst=Dst,args=Args}|TestIs],
- SzCalc = bin_size_calc(T, Src, Size, U),
- cg_bin_put_1(Next, FailCtx, reverse(Is, Acc), [SzCalc|SzCalcAcc], St);
-cg_bin_put_1(#k_bin_end{}, _, Acc, SzCalcAcc, St) ->
- SzCalc = fold_size_calc(SzCalcAcc, 0, []),
- {reverse(Acc),SzCalc,St}.
+ Flags = strip_bs_construct_flags(Flags0),
+ UnitFlags = #b_literal{val=[Unit|Flags++Seg]},
+ [TypeArg,UnitFlags,Src,Size|cg_bin_segments(Next, St)];
+cg_bin_segments(#k_bin_end{}, _St) -> [].
bs_need_size(utf8) -> false;
bs_need_size(utf16) -> false;
bs_need_size(utf32) -> false;
bs_need_size(_) -> true.
-bin_size_calc(utf8, Src, _Size, _Unit) ->
- {8,{bs_utf8_size,Src}};
-bin_size_calc(utf16, Src, _Size, _Unit) ->
- {8,{bs_utf16_size,Src}};
-bin_size_calc(utf32, _Src, _Size, _Unit) ->
- {8,#b_literal{val=4}};
-bin_size_calc(binary, Src, #b_literal{val=all}, Unit) ->
- case Unit rem 8 of
- 0 -> {8,{{bif,byte_size},Src}};
- _ -> {1,{{bif,bit_size},Src}}
- end;
-bin_size_calc(_Type, _Src, Size, Unit) ->
- {Unit,Size}.
-fold_size_calc([{Unit,#b_literal{val=Size}}|T], Bits, Acc) ->
- if
- is_integer(Size) ->
- fold_size_calc(T, Bits + Unit*Size, Acc);
- true ->
- error
- end;
-fold_size_calc([{U,#b_var{}}=H|T], Bits, Acc) when U =:= 1; U =:= 8 ->
- fold_size_calc(T, Bits, [H|Acc]);
-fold_size_calc([{U,#b_var{}=Var}|T], Bits, Acc) ->
- fold_size_calc(T, Bits, [{1,{'*',#b_literal{val=U},Var}}|Acc]);
-fold_size_calc([{_,_}=H|T], Bits, Acc) ->
- fold_size_calc(T, Bits, [H|Acc]);
-fold_size_calc([], Bits, Acc) ->
- Bytes = Bits div 8,
- RemBits = Bits rem 8,
- Sizes = sort([{1,#b_literal{val=RemBits}},{8,#b_literal{val=Bytes}}|Acc]),
- [Pair || {_,Sz}=Pair <- Sizes, Sz =/= #b_literal{val=0}].
+%% Only keep the flags that have a meaning for binary construction and
+%% are distinct from the default value.
+strip_bs_construct_flags(Flags) ->
+ [Flag || Flag <- Flags,
+ case Flag of
+ little -> true;
+ native -> true;
+ big -> false;
+ signed -> false;
+ unsigned -> false
+ end].
%%% Utilities for creating the SSA types.
@@ -105,10 +105,9 @@
%% To avoid the collapsing, change the value of SET_LIMIT to 50 in the
%% file erl_types.erl in the dialyzer application.
--type prim_op() :: 'bs_add' | 'bs_extract' | 'bs_get_tail' |
- 'bs_init' | 'bs_init_writable' |
- 'bs_match' | 'bs_put' | 'bs_start_match' | 'bs_test_tail' |
- 'bs_utf16_size' | 'bs_utf8_size' | 'build_stacktrace' |
+-type prim_op() :: 'bs_extract' | 'bs_get_tail' | 'bs_init_writable' |
+ 'bs_match' | 'bs_start_match' | 'bs_test_tail' |
+ 'build_stacktrace' |
'call' | 'catch_end' |
'extract' |
'get_hd' | 'get_map_element' | 'get_tl' | 'get_tuple_element' |
@@ -197,17 +196,13 @@ no_side_effect(#b_set{op=Op}) ->
case Op of
{bif,_} -> true;
{float,get} -> true;
- bs_add -> true;
- bs_init -> true;
+ bs_create_bin -> true;
bs_init_writable -> true;
bs_extract -> true;
bs_match -> true;
bs_start_match -> true;
bs_test_tail -> true;
bs_get_tail -> true;
- bs_put -> true;
- bs_utf16_size -> true;
- bs_utf8_size -> true;
build_stacktrace -> true;
extract -> true;
get_hd -> true;
@@ -276,32 +276,13 @@ calc_size_is([I|Is], Bs0) ->
calc_size_is(Is, Bs);
calc_size_is([], Bs) -> Bs.
-calc_size_instr(#b_set{op=bs_add,args=[A,B,U],dst=Dst}, Bs) ->
- %% We must make sure that the value of bs_add only depends on literals
- %% and arguments passed from the function that created the writable
- %% binary.
- case {get_value(A, Bs),get_arg_value(B, Bs)} of
- {#b_literal{}=Lit,Val} ->
- Bs#{Dst => {expr,{{bif,'+'},[Lit,{{bif,'*'},[Val,U]}]}}};
- {{expr,Expr},Val} ->
- Bs#{Dst => {expr,{{bif,'+'},[Expr,{{bif,'*'},[Val,U]}]}}};
- {_,_} ->
- %% The value depends on a variable of which we know nothing.
- Bs#{Dst => any}
- end;
- Writable,Size,Unit],
+ args=[#b_literal{val=private_append},_,Writable,_|Args],
dst=Dst}, Bs) ->
- case get_value(Size, Bs) of
- {arg,SizeOrigin} ->
- Expr = {{bif,'*'},[SizeOrigin,Unit]},
- update_writable(Dst, Writable, Expr, Bs);
- #b_literal{} ->
- Expr = {{bif,'*'},[Size,Unit]},
- update_writable(Dst, Writable, Expr, Bs);
+ case calc_create_bin_size(Args, Bs) of
{expr,Expr} ->
update_writable(Dst, Writable, Expr, Bs);
- _ ->
+ any ->
Bs#{Dst => any}
@@ -347,6 +328,26 @@ calc_size_instr(#b_set{op={succeeded,_},args=[Arg],dst=Dst}, Bs) ->
calc_size_instr(#b_set{dst=Dst}, Bs) ->
Bs#{Dst => any}.
+calc_create_bin_size(Args, Bs) ->
+ calc_create_bin_size(Args, Bs, #b_literal{val=0}).
+calc_create_bin_size([_,#b_literal{val=[0|_]},_,_|_], _Bs, _Acc) ->
+ %% Construction without size (utf8/utf16/utf32).
+ any;
+calc_create_bin_size([_,#b_literal{val=[U|_]},_,Size|T], Bs, Acc0) when is_integer(U) ->
+ case get_value(Size, Bs) of
+ #b_literal{val=Val} when is_integer(Val) ->
+ Acc = {{bif,'+'},[Acc0,#b_literal{val=U*Val}]},
+ calc_create_bin_size(T, Bs, Acc);
+ {arg,Var} ->
+ Acc = {{bif,'+'},[Acc0,{{bif,'*'},[Var,#b_literal{val=U}]}]},
+ calc_create_bin_size(T, Bs, Acc);
+ _ ->
+ any
+ end;
+calc_create_bin_size([], _Bs, Acc) ->
+ {expr,Acc}.
update_writable(Dst, Writable, Expr, Bs) ->
case get_value(Writable, Bs) of
{writable,#b_literal{val=0}} ->
@@ -252,7 +252,7 @@ need_heap_blks([], H, Acc) ->
need_heap_is([#cg_alloc{words=Words}=Alloc0|Is], N, Acc) ->
Alloc = Alloc0#cg_alloc{words=add_heap_words(N, Words)},
need_heap_is(Is, #need{}, [Alloc|Acc]);
-need_heap_is([#cg_set{anno=Anno,op=bs_init}=I0|Is], N, Acc) ->
+need_heap_is([#cg_set{anno=Anno,op=bs_create_bin}=I0|Is], N, Acc) ->
Alloc = case need_heap_need(N) of
[#cg_alloc{words=Need}] -> alloc(Need);
[] -> 0
@@ -284,13 +284,11 @@ need_heap_terminator([{_,#cg_blk{is=Is,last=#cg_br{succ=L}}}|_], L, N) ->
[] ->
[_|_]=Alloc ->
- %% If the preceding instructions are a binary construction,
- %% hoist the allocation and incorporate into the bs_init
+ %% If the preceding instruction is a bs_create_bin instruction,
+ %% hoist the allocation and incorporate into the bs_create_bin
%% instruction.
case reverse(Is) of
- [#cg_set{op=succeeded},#cg_set{op=bs_init}|_] ->
- {[],N};
- [#cg_set{op=succeeded},#cg_set{op=bs_put}|_] ->
+ [#cg_set{op=succeeded},#cg_set{op=bs_create_bin}|_] ->
_ ->
%% Not binary construction. Must emit an allocation
@@ -371,20 +369,16 @@ classify_heap_need(Name, _Args) ->
%% Note: Only handle operations in this function that are not handled
%% by classify_heap_need/2.
-classify_heap_need(bs_add) -> gc;
classify_heap_need(bs_get) -> gc;
classify_heap_need(bs_get_tail) -> gc;
-classify_heap_need(bs_init) -> gc;
classify_heap_need(bs_init_writable) -> gc;
classify_heap_need(bs_match_string) -> gc;
-classify_heap_need(bs_put) -> neutral;
+classify_heap_need(bs_create_bin) -> gc;
classify_heap_need(bs_get_position) -> gc;
classify_heap_need(bs_set_position) -> neutral;
classify_heap_need(bs_skip) -> gc;
classify_heap_need(bs_start_match) -> gc;
classify_heap_need(bs_test_tail) -> neutral;
-classify_heap_need(bs_utf16_size) -> neutral;
-classify_heap_need(bs_utf8_size) -> neutral;
classify_heap_need(build_stacktrace) -> gc;
classify_heap_need(call) -> gc;
classify_heap_need(catch_end) -> gc;
@@ -677,8 +671,8 @@ get_live(#cg_set{anno=#{live:=Live}}) ->
need_live_anno(Op) ->
case Op of
{bif,_} -> true;
+ bs_create_bin -> true;
bs_get -> true;
- bs_init -> true;
bs_get_position -> true;
bs_get_tail -> true;
bs_start_match -> true;
@@ -801,7 +795,7 @@ need_y_init(#cg_set{anno=#{clobbers:=Clobbers}}) -> Clobbers;
need_y_init(#cg_set{op=bs_get}) -> true;
need_y_init(#cg_set{op=bs_get_position}) -> true;
need_y_init(#cg_set{op=bs_get_tail}) -> true;
-need_y_init(#cg_set{op=bs_init}) -> true;
+need_y_init(#cg_set{op=bs_create_bin}) -> true;
need_y_init(#cg_set{op=bs_skip,args=[#b_literal{val=Type}|_]}) ->
case Type of
utf8 -> true;
@@ -1108,30 +1102,21 @@ cg_block([#cg_set{anno=Anno,op={bif,Name},dst=Dst0,args=Args0}=I|T],
Is = [{bif,Name,{f,0},Args,Dst}|Is0],
#cg_set{op=succeeded,dst=Bool}], {Bool,Fail0}, St) ->
Fail = bif_fail(Fail0),
Line = line(Anno),
Alloc = map_get(alloc, Anno),
- [#b_literal{val=Kind}|Args1] = Args0,
Live = get_live(I),
- case Kind of
- new ->
- [Dst,Size,{integer,Unit}] = beam_args([Dst0|Args1], St),
- {[Line|cg_bs_init(Dst, Size, Alloc, Unit, Live, Fail)],St};
- private_append ->
- [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
- Flags = {field_flags,[]},
- TestHeap = {test_heap,Alloc,Live},
- BsPrivateAppend = {bs_private_append,Fail,Bits,Unit,Src,Flags,Dst},
- Is = [TestHeap,Line,BsPrivateAppend],
- {Is,St};
- append ->
- [Dst,Src,Bits,{integer,Unit}] = beam_args([Dst0|Args1], St),
- Flags = {field_flags,[]},
- Is = [Line,{bs_append,Fail,Bits,Alloc,Live,Unit,Src,Flags,Dst}],
- {Is,St}
- end;
+ [Dst|Args1] = beam_args([Dst0|Args0], St),
+ Args = bs_args(Args1),
+ Unit = case Args of
+ [{atom,append},_Seg,U|_] -> U;
+ [{atom,private_append},_Seg,U|_] -> U;
+ _ -> 1
+ end,
+ Is = [Line,{bs_create_bin,Fail,Alloc,Live,Unit,Dst,{list,Args}}],
+ {Is,St};
@@ -1156,10 +1141,6 @@ cg_block([#cg_set{op=bs_match_string,args=[CtxVar,#b_literal{val=String0}]},
Is = [{test,bs_match_string,Fail,[CtxReg,Bits,{string,String}]}],
- #cg_set{op=succeeded,dst=Bool}], {Bool,Fail}, St) ->
- Args = beam_args(Args0, St),
- {cg_bs_put(bif_fail(Fail), Args),St};
cg_block([#cg_set{dst=Dst0,op=landingpad,args=Args0}|T], Context, St0) ->
[Dst,{atom,Kind},Tag] = beam_args([Dst0|Args0], St0),
case Kind of
@@ -1329,6 +1310,32 @@ cg_block([], {Bool0,Fail}, St) ->
[Bool] = beam_args([Bool0], St),
+ when bit_size(Bs) =:= 0 ->
+ bs_args(Args);
+ when is_bitstring(Bs) ->
+ Bits = bit_size(Bs),
+ Bytes = Bits div 8,
+ case Bits rem 8 of
+ 0 ->
+ [{atom,string},0,8,nil,{string,Bs},{integer,byte_size(Bs)}|bs_args(Args0)];
+ Rem ->
+ <<Binary:Bytes/bytes,Int:Rem>> = Bs,
+ Args = [{atom,binary},UFs,{literal,Binary},{atom,all},
+ {atom,integer},{literal,[1]},{integer,Int},{integer,Rem}|Args0],
+ bs_args(Args)
+ end;
+bs_args([Type,{literal,[Unit|Fs0]},Val,Size|Args]) ->
+ Segment = proplists:get_value(segment, Fs0, 0),
+ Fs1 = proplists:delete(segment, Fs0),
+ Fs = case Fs1 of
+ [] -> nil;
+ [_|_] -> {literal,Fs1}
+ end,
+ [Type,Segment,Unit,Fs,Val,Size|bs_args(Args)];
+bs_args([]) -> [].
cg_copy(T0, St) ->
{Copies,T} = splitwith(fun(#cg_set{op=copy}) -> true;
(_) -> false
@@ -1716,14 +1723,8 @@ cg_instr(remove_message, [], _Dst) ->
cg_instr(resume, [A,B], _Dst) ->
-cg_test(bs_add=Op, Fail, [Src1,Src2,{integer,Unit}], Dst, _I) ->
- [{Op,Fail,[Src1,Src2,Unit],Dst}];
cg_test(bs_skip, Fail, Args, _Dst, I) ->
cg_bs_skip(Fail, Args, I);
-cg_test(bs_utf8_size=Op, Fail, [Src], Dst, _I) ->
- [{Op,Fail,Src,Dst}];
-cg_test(bs_utf16_size=Op, Fail, [Src], Dst, _I) ->
- [{Op,Fail,Src,Dst}];
cg_test({float,convert}, Fail, [Src], Dst, #cg_set{anno=Anno}) ->
{f,0} = Fail, %Assertion.
@@ -1793,34 +1794,6 @@ field_flags(Flags, #cg_set{anno=#{location:={File,Line}}}) ->
field_flags(Flags, _) ->
-cg_bs_put(Fail, [{atom,Type},{literal,Flags}|Args]) ->
- Op = case Type of
- integer -> bs_put_integer;
- float -> bs_put_float;
- binary -> bs_put_binary;
- utf8 -> bs_put_utf8;
- utf16 -> bs_put_utf16;
- utf32 -> bs_put_utf32
- end,
- case Args of
- [Src,Size,{integer,Unit}] ->
- [{Op,Fail,Size,Unit,{field_flags,Flags},Src}];
- [Src] ->
- [{Op,Fail,{field_flags,Flags},Src}]
- end.
-cg_bs_init(Dst, Size0, Alloc, Unit, Live, Fail) ->
- Op = case Unit of
- 1 -> bs_init_bits;
- 8 -> bs_init2
- end,
- Size = cg_bs_init_size(Size0),
- [{Op,Fail,Size,Alloc,Live,{field_flags,[]},Dst}].
-cg_bs_init_size({x,_}=R) -> R;
-cg_bs_init_size({y,_}=R) -> R;
-cg_bs_init_size({integer,Int}) -> Int.
cg_catch(Agg, T0, Context, St0) ->
{Moves,T1} = cg_extract(T0, Agg, St0),
{T,St} = cg_block(T1, Context, St0),
@@ -277,7 +277,7 @@ module_passes(Opts) ->
repeated_passes(Opts) ->
Ps = [?PASS(ssa_opt_live),
- ?PASS(ssa_opt_bs_puts),
+ ?PASS(ssa_opt_bs_create_bin),
@@ -1788,101 +1788,63 @@ bsm_shortcut([], _PosMap) -> [].
%%% If an integer segment or a float segment has a literal size and
%%% a literal value, convert to a binary segment. Coalesce adjacent
%%% literal binary segments. Literal binary segments will be converted
-%%% to bs_put_string instructions in later pass.
+%%% to bs_put_string instructions in a later pass.
-ssa_opt_bs_puts({#opt_st{ssa=Linear0,cnt=Count0}=St, FuncDb}) ->
- {Linear,Count} = opt_bs_puts(Linear0, Count0, []),
- {St#opt_st{ssa=Linear,cnt=Count}, FuncDb}.
-opt_bs_puts([{L,#b_blk{is=Is}=Blk0}|Bs], Count0, Acc0) ->
- case Is of
- [#b_set{op=bs_put},#b_set{op={succeeded,_}}]=Is ->
- case opt_bs_put(L, Is, Blk0, Count0, Acc0) of
- not_possible ->
- opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0]);
- {Count,Acc1} ->
- Acc = opt_bs_puts_merge(Acc1),
- opt_bs_puts(Bs, Count, Acc)
- end;
- _ ->
- opt_bs_puts(Bs, Count0, [{L,Blk0}|Acc0])
- end;
-opt_bs_puts([], Count, Acc) ->
- {reverse(Acc),Count}.
-opt_bs_puts_merge([{L1,#b_blk{is=Is}=Blk0},{L2,#b_blk{is=AccIs}}=BAcc|Acc]) ->
- case {AccIs,Is} of
- {[#b_set{op=bs_put,
- args=[#b_literal{val=binary},
- #b_literal{},
- #b_literal{val=Bin0},
- #b_literal{val=all},
- #b_literal{val=1}]},
- #b_set{op={succeeded,_}}],
- [#b_set{op=bs_put,
- args=[#b_literal{val=binary},
- #b_literal{},
- #b_literal{val=Bin1},
- #b_literal{val=all},
- #b_literal{val=1}]}=I0,
- #b_set{op={succeeded,_}}=Succeeded]} ->
- %% Coalesce the two segments to one.
- Bin = <<Bin0/bitstring,Bin1/bitstring>>,
- I = I0#b_set{args=bs_put_args(binary, Bin, all)},
- Blk = Blk0#b_blk{is=[I,Succeeded]},
- [{L2,Blk}|Acc];
- {_,_} ->
- [{L1,Blk0},BAcc|Acc]
- end.
+ssa_opt_bs_create_bin({#opt_st{ssa=Linear0}=St, FuncDb}) ->
+ Linear = opt_create_bin_fs(Linear0),
+ {St#opt_st{ssa=Linear}, FuncDb}.
-opt_bs_put(L, [I0,Succeeded], #b_blk{last=Br0}=Blk0, Count0, Acc) ->
- case opt_bs_put(I0) of
- [Bin] when is_bitstring(Bin) ->
- Args = bs_put_args(binary, Bin, all),
- I = I0#b_set{args=Args},
- Blk = Blk0#b_blk{is=[I,Succeeded]},
- {Count0,[{L,Blk}|Acc]};
- [{int,Int,Size},Bin] when is_bitstring(Bin) ->
- %% Construct a bs_put_integer instruction following
- %% by a bs_put_binary instruction.
- IntArgs = bs_put_args(integer, Int, Size),
- BinArgs = bs_put_args(binary, Bin, all),
- {BinL,BinVarNum,BinBoolNum} = {Count0,Count0+1,Count0+2},
- Count = Count0 + 3,
- BinVar = #b_var{name={'@ssa_bs_put',BinVarNum}},
- BinBool = #b_var{name={'@ssa_bool',BinBoolNum}},
- BinI = I0#b_set{dst=BinVar,args=BinArgs},
- BinSucceeded = Succeeded#b_set{dst=BinBool,args=[BinVar]},
- BinBlk = Blk0#b_blk{is=[BinI,BinSucceeded],
- last=Br0#b_br{bool=BinBool}},
- IntI = I0#b_set{args=IntArgs},
- IntBlk = Blk0#b_blk{is=[IntI,Succeeded],last=Br0#b_br{succ=BinL}},
- {Count,[{BinL,BinBlk},{L,IntBlk}|Acc]};
+opt_create_bin_fs([{L,#b_blk{is=Is0}=Blk0}|Bs]) ->
+ Is = opt_create_bin_is(Is0),
+ Blk = Blk0#b_blk{is=Is},
+ [{L,Blk}|opt_create_bin_fs(Bs)];
+opt_create_bin_fs([]) -> [].
+opt_create_bin_is([#b_set{op=bs_create_bin,args=Args0}=I0|Is]) ->
+ Args = opt_create_bin_args(Args0),
+ I = I0#b_set{args=Args},
+ [I|opt_create_bin_is(Is)];
+opt_create_bin_is([I|Is]) ->
+ [I|opt_create_bin_is(Is)];
+opt_create_bin_is([]) -> [].
+ #b_literal{val=Bin0},#b_literal{val=all},
+ #b_literal{val=binary},#b_literal{val=[1|_]},
+ #b_literal{val=Bin1},#b_literal{val=all}|Args0]) ->
+ %% Coalesce two litary binary segments to one.
+ Bin = <<Bin0/bitstring,Bin1/bitstring>>,
+ Args = [#b_literal{val=binary},#b_literal{val=[1]},
+ #b_literal{val=Bin},#b_literal{val=all}|Args0],
+ opt_create_bin_args(Args);
+opt_create_bin_args([#b_literal{val=Type}=Type0,#b_literal{val=UFs}=UFs0,Val,Size|Args0]) ->
+ [Unit|Flags] = UFs,
+ case opt_create_bin_arg(Type, Unit, UFs, Val, Size) of
not_possible ->
- not_possible
- end.
- #b_literal{val=all},#b_literal{val=Unit}]})
- when is_bitstring(Val) ->
- if
- bit_size(Val) rem Unit =:= 0 ->
- [Val];
- true ->
- not_possible
- end;
- #b_literal{val=Val},#b_literal{val=Size},
- #b_literal{val=Unit}]}=I0) when is_integer(Size) ->
+ [Type0,UFs0,Val,Size|opt_create_bin_args(Args0)];
+ [Bin] when is_bitstring(Bin) ->
+ Args = [#b_literal{val=binary},#b_literal{val=[1]},
+ #b_literal{val=Bin},#b_literal{val=all}|Args0],
+ opt_create_bin_args(Args);
+ [{int,Int,IntSize},Bin] when is_bitstring(Bin) ->
+ Args = [#b_literal{val=integer},#b_literal{val=[1|Flags]},
+ #b_literal{val=Int},#b_literal{val=IntSize},
+ #b_literal{val=binary},#b_literal{val=[1]},
+ #b_literal{val=Bin},#b_literal{val=all}|Args0],
+ opt_create_bin_args(Args)
+ end;
+opt_create_bin_args([]) -> [].
+opt_create_bin_arg(binary, Unit, _Flags, #b_literal{val=Val}, #b_literal{val=all})
+ when Unit =/= 1, bit_size(Val) rem Unit =:= 0 ->
+ [Val];
+opt_create_bin_arg(Type, Unit, Flags, #b_literal{val=Val}, #b_literal{val=Size})
+ when is_integer(Size), is_integer(Unit) ->
EffectiveSize = Size * Unit,
EffectiveSize > 0 ->
- case {Type,opt_bs_put_endian(Flags)} of
+ case {Type,opt_create_bin_endian(Flags)} of
{integer,big} when is_integer(Val) ->
EffectiveSize < 64 ->
@@ -1894,9 +1856,8 @@ opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
%% To avoid an explosion in code size, we only try
%% to optimize relatively small fields.
<<Int:EffectiveSize>> = <<Val:EffectiveSize/little>>,
- Args = bs_put_args(Type, Int, EffectiveSize),
- I = I0#b_set{args=Args},
- opt_bs_put(I);
+ opt_create_bin_arg(Type, 1, [], #b_literal{val=Int},
+ #b_literal{val=EffectiveSize});
{binary,_} when is_bitstring(Val) ->
case Val of
<<Bitstring:EffectiveSize/bits,_/bits>> ->
@@ -1907,8 +1868,14 @@ opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
{float,Endian} ->
- [opt_bs_put_float(Val, EffectiveSize, Endian)]
- catch error:_ ->
+ case Endian of
+ big ->
+ [<<Val:EffectiveSize/big-float-unit:1>>];
+ little ->
+ [<<Val:EffectiveSize/little-float-unit:1>>]
+ end
+ catch
+ error:_ ->
{_,_} ->
@@ -1917,25 +1884,12 @@ opt_bs_put(#b_set{args=[#b_literal{val=Type},#b_literal{val=Flags},
true ->
-opt_bs_put(#b_set{}) -> not_possible.
-opt_bs_put_float(N, Sz, Endian) ->
- case Endian of
- big -> <<N:Sz/big-float-unit:1>>;
- little -> <<N:Sz/little-float-unit:1>>
- end.
-bs_put_args(Type, Val, Size) ->
- [#b_literal{val=Type},
- #b_literal{val=[unsigned,big]},
- #b_literal{val=Val},
- #b_literal{val=Size},
- #b_literal{val=1}].
+opt_create_bin_arg(_, _, _, _, _) -> not_possible.
-opt_bs_put_endian([big=E|_]) -> E;
-opt_bs_put_endian([little=E|_]) -> E;
-opt_bs_put_endian([native=E|_]) -> E;
-opt_bs_put_endian([_|Fs]) -> opt_bs_put_endian(Fs).
+opt_create_bin_endian([little=E|_]) -> E;
+opt_create_bin_endian([native=E|_]) -> E;
+opt_create_bin_endian([_|Fs]) -> opt_create_bin_endian(Fs);
+opt_create_bin_endian([]) -> big.
opt_bs_put_split_int(Int, Size) ->
Pos = opt_bs_put_split_int_1(Int, 0, Size - 1),
@@ -2512,7 +2466,6 @@ unsuitable(Linear, Blocks) ->
unsuitable_1([{L,#b_blk{is=[#b_set{op=Op}=I|_]}}|Bs]) ->
Unsuitable = case Op of
bs_extract -> true;
- bs_put -> true;
{float,_} -> true;
landingpad -> true;
_ -> beam_ssa:is_loop_header(I)
@@ -634,36 +634,7 @@ sanitize_is([#b_set{op=get_map_element,args=Args0}=I0|Is],
I = I0#b_set{args=Args},
sanitize_is(Is, Last, Count0, Values, true, [I|Acc])
- #b_br{bool=Dst}=Last, Count, Values, _Changed, Acc0) ->
- %% We no longer need to distinguish between guard and body checks, so we'll
- %% rewrite this as a plain 'succeeded'.
- case sanitize_arg(Arg0, Values) of
- #b_var{}=Arg ->
- case Acc0 of
- [#b_set{op=call,
- args=[#b_remote{mod=#b_literal{val=erlang},
- name=#b_literal{val=error},
- arity=1},_],
- dst=Arg0}|Acc] ->
- %% This erlang:error/1 is the result from a
- %% sanitized bs_add or bs_init instruction. Calls
- %% to erlang:error/1 in receive is not allowed, so
- %% we will have to rewrite this instruction
- %% sequence to an unconditional branch to the
- %% failure label.
- Fail =,
- Br = #b_br{bool=#b_literal{val=true},succ=Fail,fail=Fail},
- {reverse(Acc), Br, Count, Values};
- _ ->
- I = I0#b_set{op=succeeded,args=[Arg]},
- {reverse(Acc0, [I]), Last, Count, Values}
- end;
- #b_literal{} ->
- Value = #b_literal{val=true},
- {reverse(Acc0), Last, Count, Values#{ Dst => Value }}
- end;
#b_br{bool=Dst}=Last, Count, Values, _Changed, Acc) ->
%% We no longer need to distinguish between guard and body checks, so we'll
%% rewrite this as a plain 'succeeded'.
@@ -803,31 +774,9 @@ sanitize_instr(is_tagged_tuple, [#b_literal{val=Tuple},
true ->
-sanitize_instr(bs_add, [Arg1,Arg2,_|_], I0) ->
- case all(fun(#b_literal{val=Size}) -> is_integer(Size) andalso Size >= 0;
- (#b_var{}) -> true
- end, [Arg1,Arg2]) of
- true -> ok;
- false -> {ok,sanitize_badarg(I0)}
- end;
-sanitize_instr(bs_init, [#b_literal{val=new},#b_literal{val=Sz}|_], I0) ->
- if
- is_integer(Sz), Sz >= 0 -> ok;
- true -> {ok,sanitize_badarg(I0)}
- end;
-sanitize_instr(bs_init, [#b_literal{},_,#b_literal{val=Sz}|_], I0) ->
- if
- is_integer(Sz), Sz >= 0 -> ok;
- true -> {ok,sanitize_badarg(I0)}
- end;
sanitize_instr(_, _, _) ->
-sanitize_badarg(I) ->
- Func = #b_remote{mod=#b_literal{val=erlang},
- name=#b_literal{val=error},arity=1},
- I#b_set{op=call,args=[Func,#b_literal{val=badarg}]}.
remove_unreachable([L|Ls], Blocks, Reachable, Acc) ->
#b_blk{is=Is0} = Blk0 = map_get(L, Blocks),
case split_phis(Is0) of
diff --git a/lib/compiler/src/beam_ssa_type.erl b/lib/compiler/src/beam_ssa_type.erl
--import(lists, [all/2,any/2,duplicate/2,foldl/3,member/2,
+-import(lists, [any/2,duplicate/2,foldl/3,member/2,
%% The maximum number of #b_ret{} terminators a function can have before
@@ -1122,36 +1122,12 @@ will_succeed_1(#b_set{op=get_tuple_element}, _Src, _Ts, _Sub) ->
will_succeed_1(#b_set{op=put_tuple}, _Src, _Ts, _Sub) ->
-%% Remove the success branch from binary operations with invalid
-%% sizes. That will remove subsequent bs_put and bs_match instructions,
-%% which are probably not loadable.
- _Src, _Ts, _Sub) ->
- case all(fun(#b_literal{val=Size}) -> is_integer(Size) andalso Size >= 0;
- (#b_var{}) -> true
- end, [Arg1,Arg2]) of
- true -> maybe;
- false -> no
- end;
- args=[#b_literal{val=new},#b_literal{val=Size},_Unit]},
- _Src, _Ts, _Sub) ->
- if
- is_integer(Size), Size >= 0 ->
- maybe;
- true ->
- no
- end;
- args=[#b_literal{},_,#b_literal{val=Size},_Unit]},
- _Src, _Ts, _Sub) ->
- if
- is_integer(Size), Size >= 0 ->
- maybe;
- true ->
- no
- end;
+will_succeed_1(#b_set{op=bs_create_bin}, _Src, _Ts, _Sub) ->
+ %% Intentionally don't try to determine whether construction will
+ %% fail. Construction is unlikely to fail, and if it fails, the
+ %% instruction in the runtime system will generate an exception with
+ %% better information of what went wrong.
+ maybe;
_Src, _Ts, _Sub) ->
@@ -1741,7 +1717,7 @@ type({bif,Bif}, Args, _Anno, Ts, _Ds) ->
ArgTypes = normalized_types(Args, Ts),
{RetType, _, _} = beam_call_types:types(erlang, Bif, ArgTypes),
-type(bs_init, _Args, _Anno, _Ts, _Ds) ->
+type(bs_create_bin, _Args, _Anno, _Ts, _Ds) ->
type(bs_extract, [Ctx], _Anno, _Ts, Ds) ->
#b_set{op=bs_match,args=Args} = map_get(Ctx, Ds),
diff --git a/lib/compiler/src/beam_trim.erl b/lib/compiler/src/beam_trim.erl
L = [Map(E) || E <- L0],
I = {get_map_elements,Fail,Map(M),{list,L}},
remap(Is, Map, [I|Acc]);
-remap([{bs_init,Fail,Info,Live,Ss0,Dst0}|Is], Map, Acc) ->
- Ss = [Map(Src) || Src <- Ss0],
- Dst = Map(Dst0),
- I = {bs_init,Fail,Info,Live,Ss,Dst},
- remap(Is, Map, [I|Acc]);
-remap([{bs_put=Op,Fail,Info,Ss}|Is], Map, Acc) ->
- I = {Op,Fail,Info,[Map(S) || S <- Ss]},
- remap(Is, Map, [I|Acc]);
remap([{init_yregs,{list,Yregs0}}|Is], Map, Acc) ->
Yregs = sort([Map(Y) || Y <- Yregs0]),
I = {init_yregs,{list,Yregs}},
@@ -418,10 +410,6 @@ frame_size([{test,_,{f,L},_}|Is], Safe) ->
frame_size_branch(L, Is, Safe);
frame_size([{test,_,{f,L},_,_,_}|Is], Safe) ->
frame_size_branch(L, Is, Safe);
-frame_size([{bs_init,{f,L},_,_,_,_}|Is], Safe) ->
- frame_size_branch(L, Is, Safe);
-frame_size([{bs_put,{f,L},_,_}|Is], Safe) ->
- frame_size_branch(L, Is, Safe);
frame_size([{init_yregs,_}|Is], Safe) ->
frame_size(Is, Safe);
frame_size([{make_fun2,_,_,_,_}|Is], Safe) ->
@@ -480,10 +468,6 @@ is_not_used(Y, [{block,Bl}|Is]) ->
is_not_used(Y, [{bs_get_tail,Src,Dst,_}|Is]) ->
is_not_used_ss_dst(Y, [Src], Dst, Is);
-is_not_used(Y, [{bs_init,_,_,_,Ss,Dst}|Is]) ->
- is_not_used_ss_dst(Y, Ss, Dst, Is);
-is_not_used(Y, [{bs_put,{f,_},_,Ss}|Is]) ->
- not member(Y, Ss) andalso is_not_used(Y, Is);
is_not_used(Y, [{bs_start_match4,_Fail,_Live,Src,Dst}|Is]) ->
Y =/= Src andalso Y =/= Dst andalso
is_not_used(Y, Is);
diff --git a/lib/compiler/src/beam_utils.erl b/lib/compiler/src/beam_utils.erl
replace_labels_1(Is, [{make_fun2,{f,label(Lbl, D, Fb)},U1,U2,U3}|Acc], D, Fb);
replace_labels_1([{make_fun3,{f,Lbl},U1,U2,U3,U4}|Is], Acc, D, Fb) ->
replace_labels_1(Is, [{make_fun3,{f,label(Lbl, D, Fb)},U1,U2,U3,U4}|Acc], D, Fb);
+replace_labels_1([{bs_create_bin,{f,Lbl},Alloc,Live,Unit,Dst,{list,List}}|Is], Acc, D, Fb)
+ when Lbl =/= 0 ->
+ replace_labels_1(Is, [{bs_create_bin,{f,label(Lbl, D, Fb)},
+ Alloc,Live,Unit,Dst,{list,List}}|Acc], D, Fb);
replace_labels_1([{bs_init,{f,Lbl},Info,Live,Ss,Dst}|Is], Acc, D, Fb) when Lbl =/= 0 ->
replace_labels_1(Is, [{bs_init,{f,label(Lbl, D, Fb)},Info,Live,Ss,Dst}|Acc], D, Fb);
replace_labels_1([{bs_put,{f,Lbl},Info,Ss}|Is], Acc, D, Fb) when Lbl =/= 0 ->
diff --git a/lib/compiler/src/beam_validator.erl b/lib/compiler/src/beam_validator.erl
@@ -995,6 +995,18 @@ vi(raw_raise=I, Vst0) ->
vi(bs_init_writable=I, Vst) ->
validate_body_call(I, 1, Vst);
+vi({bs_create_bin,{f,Fail},Heap,Live,Unit,Dst,{list,List}}, Vst0) ->
+ verify_live(Live, Vst0),
+ verify_y_init(Vst0),
+ verify_create_bin_list(List, Vst0),
+ Vst = heap_alloc(Heap, Vst0),
+ branch(Fail, Vst,
+ fun(SuccVst0) ->
+ SuccVst1 = update_create_bin_list(List, SuccVst0),
+ SuccVst = prune_x_regs(Live, SuccVst1),
+ create_term(#t_bitstring{size_unit=Unit}, bs_create_bin, [], Dst,
+ SuccVst, SuccVst1)
+ end);
vi({bs_init2,{f,Fail},Sz,Heap,Live,_,Dst}, Vst0) ->
verify_live(Live, Vst0),
@@ -1305,6 +1317,54 @@ pmt_1([Key0, Value0 | List], Vst, Acc0) ->
pmt_1([], _Vst, Acc) ->
+verify_create_bin_list([{atom,string},_Seg,Unit,Flags,Val,Size|Args], Vst) ->
+ assert_bs_unit({atom,string}, Unit),
+ assert_term(Flags, Vst),
+ case Val of
+ {string,Bs} when is_binary(Bs) -> ok;
+ _ -> error({not_string,Val})
+ end,
+ assert_term(Flags, Vst),
+ assert_term(Size, Vst),
+ verify_create_bin_list(Args, Vst);
+verify_create_bin_list([Type,_Seg,Unit,Flags,Val,Size|Args], Vst) ->
+ assert_term(Type, Vst),
+ assert_bs_unit(Type, Unit),
+ assert_term(Flags, Vst),
+ assert_term(Val, Vst),
+ assert_term(Size, Vst),
+ verify_create_bin_list(Args, Vst);
+verify_create_bin_list([], _Vst) -> ok.
+update_create_bin_list([{atom,string},_Seg,_Unit,_Flags,_Val,_Size|T], Vst) ->
+ update_create_bin_list(T, Vst);
+update_create_bin_list([{atom,Op},_Seg,_Unit,_Flags,Val,_Size|T], Vst0) ->
+ Type = update_create_bin_type(Op),
+ Vst = update_type(fun meet/2, Type, Val, Vst0),
+ update_create_bin_list(T, Vst);
+update_create_bin_list([], Vst) -> Vst.
+update_create_bin_type(append) -> #t_bitstring{};
+update_create_bin_type(private_append) -> #t_bitstring{};
+update_create_bin_type(binary) -> #t_bitstring{};
+update_create_bin_type(float) -> #t_float{};
+update_create_bin_type(integer) -> #t_integer{};
+update_create_bin_type(utf8) -> #t_integer{};
+update_create_bin_type(utf16) -> #t_integer{};
+update_create_bin_type(utf32) -> #t_integer{}.
+assert_bs_unit({atom,Type}, 0) ->
+ case Type of
+ utf8 -> ok;
+ utf16 -> ok;
+ utf32 -> ok;
+ _ -> error({zero_unit_invalid_for_type,Type})
+ end;
+assert_bs_unit({atom,_Type}, Unit) when is_integer(Unit), 0 < Unit, Unit =< 256 ->
+ ok;
+assert_bs_unit(_, Unit) ->
+ error({invalid,Unit}).
%% Common code for validating returns, whether naked or as part of a tail call.
diff --git a/lib/compiler/src/beam_z.erl b/lib/compiler/src/beam_z.erl
@@ -87,32 +87,6 @@ undo_renames([{get_hd,Src,Hd},{get_tl,Src,Tl}|Is]) ->
get_list(Src, Hd, Tl, Is);
undo_renames([{get_tl,Src,Tl},{get_hd,Src,Hd}|Is]) ->
get_list(Src, Hd, Tl, Is);
- [{atom,all},{literal,<<>>}]}|Is]) ->
- undo_renames(Is);
- [{atom,all},{literal,BinString}]}|Is0])
- when is_bitstring(BinString)->
- Bits = bit_size(BinString),
- Bytes = Bits div 8,
- case Bits rem 8 of
- 0 ->
- I = {bs_put_string,byte_size(BinString),
- {string,BinString}},
- [undo_rename(I)|undo_renames(Is0)];
- Rem ->
- <<Binary:Bytes/bytes,Int:Rem>> = BinString,
- PutInt = {bs_put_integer,Fail,{integer,Rem},1,
- {field_flags,[unsigned,big]},{integer,Int}},
- Is = [PutInt|undo_renames(Is0)],
- case Binary of
- <<>> ->
- Is;
- _ ->
- [{bs_put_string,byte_size(Binary),
- {string,Binary}}|Is]
- end
- end;
undo_renames([I|Is]) ->
undo_renames([]) -> [].
@@ -272,9 +272,13 @@ expand_opt(no_bsm4, Os) ->
%% that a match instruction won't fail.
expand_opt(no_type_opt, Os);
expand_opt(r22, Os) ->
- expand_opt(r23, [no_shared_fun_wrappers, no_swap | expand_opt(no_bsm4, Os)]);
+ expand_opt(r23, [no_bs_create_bin, no_shared_fun_wrappers,
+ no_swap | expand_opt(no_bsm4, Os)]);
expand_opt(r23, Os) ->
- expand_opt(no_make_fun3, [no_ssa_opt_float, no_recv_opt, no_init_yregs | Os]);
+ expand_opt(no_make_fun3, [no_bs_create_bin, no_ssa_opt_float,
+ no_recv_opt, no_init_yregs | Os]);
+expand_opt(r24, Os) ->
+ [no_bs_create_bin | Os];
expand_opt(no_make_fun3, Os) ->
[no_make_fun3, no_fun_opt | Os];
expand_opt({debug_info_key,_}=O, Os) ->
diff --git a/lib/compiler/src/v3_core.erl b/lib/compiler/src/v3_core.erl
@@ -1162,7 +1162,7 @@ is_iexprs_small_2(_, Threshold) ->
%% record whereas c_literal should not have a wrapped annotation
expr_bin(Es0, Anno, St0) ->
- Es1 = [bin_element(E) || E <- Es0],
+ Es1 = bin_elements(Es0, 1),
case constant_bin(Es1) of
error ->
case expr_bin_1(Es1, St0) of
@@ -1202,12 +1202,12 @@ bitstrs([E0|Es0], St0) ->
bitstrs([], St) ->
-bitstr({bin_element,Line,{string,_,S},{integer,_,8},_}, St) ->
- bitstrs(bin_expand_string(S, Line, 0, 0, []), St);
-bitstr({bin_element,Line,{string,_,[]},Sz0,Ts}, St0) ->
+bitstr({bin_element,{sl,_,Line},{string,_,S},{integer,_,8},_}, St) ->
+ bitstrs(bin_expand_string(S, {sl,0,Line}, 0, 0, []), St);
+bitstr({bin_element,{sl,_,Line},{string,_,[]},Sz0,Ts}, St0) ->
%% Empty string. We must make sure that the type is correct.
{[#c_bitstr{size=Sz}],Eps0,St1} =
- bitstr({bin_element,Line,{char,Line,0},Sz0,Ts}, St0),
+ bitstr({bin_element,{sl,0,Line},{char,Line,0},Sz0,Ts}, St0),
%% At this point, the type is either a correct literal or
%% an expression.
@@ -1234,12 +1234,12 @@ bitstr({bin_element,Line,{string,_,[]},Sz0,Ts}, St0) ->
Eps = Eps0 ++ Eps1,
-bitstr({bin_element,Line,{string,_,S},Sz0,Ts}, St0) ->
- {[Bitstr],Eps,St1} = bitstr({bin_element,Line,{char,Line,0},Sz0,Ts}, St0),
+bitstr({bin_element,{sl,_,Line},{string,_,S},Sz0,Ts}, St0) ->
+ {[Bitstr],Eps,St1} = bitstr({bin_element,{sl,0,Line},{char,Line,0},Sz0,Ts}, St0),
Es = [Bitstr#c_bitstr{val=#c_literal{anno=full_anno(Line, St1),val=C}} ||
C <- S],
-bitstr({bin_element,Line,E0,Size0,[Type,{unit,Unit}|Flags]}, St0) ->
+bitstr({bin_element,{sl,Seg,Line},E0,Size0,[Type,{unit,Unit}|Flags]}, St0) ->
{E1,Eps0,St1} = safe(E0, St0),
{Size1,Eps1,St2} = safe(Size0, St1),
Eps = Eps0 ++ Eps1,
@@ -1263,16 +1263,30 @@ bitstr({bin_element,Line,E0,Size0,[Type,{unit,Unit}|Flags]}, St0) ->
#c_literal{val=all} -> ok;
_ -> throw({bad_binary,Eps,St2})
- {[#c_bitstr{anno=lineno_anno(Line, St2),
+ Anno0 = lineno_anno(Line, St2),
+ %% We will add a 'segment' annotation to segments that could
+ %% fail. There is no need to add it to literal segments of fixed
+ %% sized. The annotation will be used by the runtime system to
+ %% provide extended error information if construction of the
+ %% binary fails.
+ Anno = if Seg =:= 0 ->
+ Anno0;
+ true ->
+ [{segment,Seg}|Anno0]
+ end,
+ {[#c_bitstr{anno=Anno,
-bin_element({bin_element,Line,Expr,Size0,Type0}) ->
+bin_elements([{bin_element,Line,Expr,Size0,Type0}|Es], Seg) ->
{Size,Type} = make_bit_type(Line, Size0, Type0),
- {bin_element,Line,Expr,Size,Type}.
+ [{bin_element,{sl,Seg,Line},Expr,Size,Type}|bin_elements(Es, Seg+1)];
+bin_elements([], _) -> [].
make_bit_type(Line, default, Type0) ->
case erl_bits:set_bit_type(default, Type0) of
@@ -1392,8 +1406,12 @@ bin_expand_string([H|T], Line, Val, Size, Last) ->
bin_expand_string([], Line, Val, Size, Last) ->
[make_combined(Line, Val, Size) | Last].
-make_combined(Line, Val, Size) ->
- {bin_element,Line,{integer,Line,Val},
+make_combined(SegLine, Val, Size) ->
+ Line = case SegLine of
+ {sl,_,Line0} -> Line0;
+ _ -> SegLine
+ end,
+ {bin_element,SegLine,{integer,Line,Val},
diff --git a/lib/compiler/test/Makefile b/lib/compiler/test/Makefile
@@ -106,6 +106,11 @@ INLINE= \
R23= \
+R24= \
+ bs_construct \
+ bs_utf \
+ bs_bincomp
DIALYZER = bs_match
@@ -128,6 +133,8 @@ INLINE_MODULES= $(INLINE:%=%_inline_SUITE)
R23_MODULES= $(R23:%=%_r23_SUITE)
R23_ERL_FILES= $(R23_MODULES:%=%.erl)
+R24_MODULES= $(R24:%=%_r24_SUITE)
+R24_ERL_FILES= $(R24_MODULES:%=%.erl)
NO_MOD_OPT_MODULES= $(NO_MOD_OPT:%=%_no_module_opt_SUITE)
@@ -169,7 +176,7 @@ DISABLE_SSA_OPT = +no_bool_opt +no_share_opt +no_bsm_opt +no_fun_opt +no_ssa_opt
$(ERL_TOP)/make/make_emakefile $(ERL_COMPILE_FLAGS) -o$(EBIN) $(MODULES) \
$(ERL_TOP)/make/make_emakefile +no_copt $(DISABLE_SSA_OPT) +no_postopt \
@@ -184,6 +191,8 @@ make_emakefile: $(NO_OPT_ERL_FILES) $(POST_OPT_ERL_FILES) $(NO_SSA_OPT_ERL_FILES
$(ERL_TOP)/make/make_emakefile +r23 $(ERL_COMPILE_FLAGS) \
+ $(ERL_TOP)/make/make_emakefile +r24 $(ERL_COMPILE_FLAGS) \
+ -o$(EBIN) $(R24_MODULES) >> $(EMAKEFILE)
$(ERL_TOP)/make/make_emakefile +no_module_opt $(ERL_COMPILE_FLAGS) \
$(ERL_TOP)/make/make_emakefile +from_core $(ERL_COMPILE_FLAGS) \
@@ -225,6 +234,9 @@ docs:
%_r23_SUITE.erl: %_SUITE.erl
sed -e 's;-module($(basename $<));-module($(basename $@));' $< > $@
+%_r24_SUITE.erl: %_SUITE.erl
+ sed -e 's;-module($(basename $<));-module($(basename $@));' $< > $@
%_no_module_opt_SUITE.erl: %_SUITE.erl
sed -e 's;-module($(basename $<));-module($(basename $@));' $< > $@
@@ -246,7 +258,9 @@ release_tests_spec: make_emakefile
$(INSTALL_DATA) compiler.spec compiler.cover \
+ $(R23_ERL_FILES) \
+ $(R24_ERL_FILES) \
diff --git a/lib/compiler/test/bs_bincomp_SUITE.erl b/lib/compiler/test/bs_bincomp_SUITE.erl
@@ -387,6 +387,12 @@ sizes(Config) when is_list(Config) ->
<<>> = Fun6([], 42),
<<42,43:20>> = Fun6([42], 20),
+ Fun7 = fun(B) ->
+ cs_default(<< <<C/utf8>> || C <- B >>)
+ end,
+ <<"Foundation"/utf8>> = Fun7("Foundation"),
+ <<"Основание"/utf8>> = Fun7("Основание"),
%% Binary generators.
Fun10 = fun(Bin) ->
@@ -437,6 +443,12 @@ sizes(Config) when is_list(Config) ->
<<$a:32,$b:32,$c:32,($a bsl 8 bor $b):32>> = Fun14([8,16], <<"abc">>),
<<$a:32,$b:32,$c:32>> = Fun14([8,bad], <<"abc">>),
+ Fun15 = fun(B) ->
+ cs_default(<< <<C/utf8>> || << C:32 >> <= id(B) >>)
+ end,
+ <<"Foundation"/utf8>> = Fun15(<<"Foundation"/utf32>>),
+ <<"Основание"/utf8>> = Fun15(<<"Основание"/utf32>>),
{'EXIT',_} = (catch << <<C:4>> || <<C:8>> <= {1,2,3} >>),
diff --git a/lib/compiler/test/bs_construct_SUITE.erl b/lib/compiler/test/bs_construct_SUITE.erl
@@ -72,10 +72,10 @@ end_per_testcase(Case, Config) when is_atom(Case), is_list(Config) ->
verify_highest_opcode(_Config) ->
case ?MODULE of
- bs_construct_r21_SUITE ->
+ bs_construct_r24_SUITE ->
{ok,Beam} = file:read_file(code:which(?MODULE)),
case test_lib:highest_opcode(Beam) of
- Highest when Highest =< 163 ->
+ Highest when Highest =< 176 ->
TooHigh ->
diff --git a/lib/compiler/test/test_lib.erl b/lib/compiler/test/test_lib.erl
@@ -87,7 +87,7 @@ opt_opts(Mod) ->
(debug_info) -> true;
(dialyzer) -> true;
(inline) -> true;
- (no_bsm3) -> true;
+ (no_bs_create_bin) -> true;
(no_bsm_opt) -> true;
(no_copt) -> true;
(no_fun_opt) -> true;
@@ -95,11 +95,10 @@ opt_opts(Mod) ->
(no_make_fun3) -> true;
(no_module_opt) -> true;
(no_postopt) -> true;
- (no_put_tuple2) -> true;
(no_recv_opt) -> true;
(no_share_opt) -> true;
(no_shared_fun_wrappers) -> true;
- (no_ssa_float) -> true;
+ (no_ssa_opt_float) -> true;
(no_ssa_opt) -> true;
(no_stack_trimming) -> true;
(no_swap) -> true;
@@ -136,7 +135,8 @@ is_cloned_mod_1("_no_copt_SUITE") -> true;
is_cloned_mod_1("_no_ssa_opt_SUITE") -> true;
is_cloned_mod_1("_post_opt_SUITE") -> true;
is_cloned_mod_1("_inline_SUITE") -> true;
-is_cloned_mod_1("_21_SUITE") -> true;
+is_cloned_mod_1("_23_SUITE") -> true;
+is_cloned_mod_1("_24_SUITE") -> true;
is_cloned_mod_1("_no_module_opt_SUITE") -> true;
is_cloned_mod_1([_|T]) -> is_cloned_mod_1(T);
is_cloned_mod_1([]) -> false.