summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorSiri Hansen <siri@erlang.org>2017-09-07 15:56:18 +0200
committerSiri Hansen <siri@erlang.org>2017-09-15 09:57:25 +0200
commitae089c72fb06b069675cbebcec10f0820cf16112 (patch)
treed1ba5bc2e8ec4f230af3b86b6d0578a9d5ce2a1e
parent903a289213aa22b5c9c42ead2599174c2cf15b95 (diff)
downloaderlang-ae089c72fb06b069675cbebcec10f0820cf16112.tar.gz
cdv: Optimize reading of crashdump with many binaries
Earlier, crashdump_viewer stored an index of all binaries in a gb_tree on startup. The binary index was also stored in the cdv_dump_index_table along with all other "=xxx" tags from the dump. The difference between the indices was that the ets table contained the addresses of the binaries as strings (the hex address found after the "=binary:" tag) and in the gb_tree this hex address was instead converted to its integer value. The index in the ets table was only used once - when creating the gb_tree. The gb_tree was used for all later looups (to map integer address to file position). This commit replaces the two storages with one new ets table, cdv_binary_index_table, using the integer value of the hex address as key, and the position in the crashdump file as value. In the case of many binaries, this makes the start of crashdump viewer faster (only one place to write), and the data usage smaller (hex address strings are no longer stored). And it avoids the gc of the gb_tree.
-rw-r--r--lib/observer/src/crashdump_viewer.erl138
-rw-r--r--lib/observer/src/observer_html_lib.erl29
-rw-r--r--lib/observer/test/crashdump_viewer_SUITE.erl125
3 files changed, 193 insertions, 99 deletions
diff --git a/lib/observer/src/crashdump_viewer.erl b/lib/observer/src/crashdump_viewer.erl
index f7c44628cf..0534ead50e 100644
--- a/lib/observer/src/crashdump_viewer.erl
+++ b/lib/observer/src/crashdump_viewer.erl
@@ -36,7 +36,6 @@
%% file: The name of the crashdump currently viewed.
%% dump_vsn: The version number of the crashdump
%% wordsize: 4 | 8, the number of bytes in a word.
-%% binaries: a gb_tree containing binaries or links to binaries in the dump
%%
%% User API
@@ -124,7 +123,7 @@
-define(visible_node,visible_node).
--record(state,{file,dump_vsn,wordsize=4,num_atoms="unknown",binaries}).
+-record(state,{file,dump_vsn,wordsize=4,num_atoms="unknown"}).
%%%-----------------------------------------------------------------
%%% Debugging
@@ -307,6 +306,7 @@ expand_binary(Pos) ->
init([]) ->
ets:new(cdv_dump_index_table,[ordered_set,named_table,public]),
ets:new(cdv_reg_proc_table,[ordered_set,named_table,public]),
+ ets:new(cdv_binary_index_table,[ordered_set,named_table,public]),
{ok, #state{}}.
%%--------------------------------------------------------------------
@@ -350,9 +350,9 @@ handle_call(procs_summary,_From,State=#state{file=File,wordsize=WS}) ->
Procs = procs_summary(File,WS),
{reply,{ok,Procs,TW},State};
handle_call({proc_details,Pid},_From,
- State=#state{file=File,wordsize=WS,dump_vsn=DumpVsn,binaries=B})->
+ State=#state{file=File,wordsize=WS,dump_vsn=DumpVsn})->
Reply =
- case get_proc_details(File,Pid,WS,DumpVsn,B) of
+ case get_proc_details(File,Pid,WS,DumpVsn) of
{ok,Proc,TW} ->
{ok,Proc,TW};
Other ->
@@ -464,9 +464,9 @@ handle_call(schedulers,_From,State=#state{file=File}) ->
%%--------------------------------------------------------------------
handle_cast({read_file,File}, _State) ->
case do_read_file(File) of
- {ok,Binaries,DumpVsn} ->
+ {ok,DumpVsn} ->
observer_lib:report_progress({ok,done}),
- {noreply, #state{file=File,binaries=Binaries,dump_vsn=DumpVsn}};
+ {noreply, #state{file=File,dump_vsn=DumpVsn}};
Error ->
end_progress(Error),
{noreply, #state{}}
@@ -793,18 +793,17 @@ do_read_file(File) ->
{Tag,Id,Rest,N1} = tag(Fd,TagAndRest,1),
case Tag of
?erl_crash_dump ->
- reset_index_table(),
+ reset_tables(),
insert_index(Tag,Id,N1+1),
put_last_tag(Tag,""),
- indexify(Fd,Rest,N1),
+ DumpVsn = [list_to_integer(L) ||
+ L<-string:tokens(Id,".")],
+ AddrAdj = get_bin_addr_adj(DumpVsn),
+ indexify(Fd,AddrAdj,Rest,N1),
end_progress(),
check_if_truncated(),
- [{DumpVsn0,_}] = lookup_index(?erl_crash_dump),
- DumpVsn = [list_to_integer(L) ||
- L<-string:tokens(DumpVsn0,".")],
- Binaries = read_binaries(Fd,DumpVsn),
close(Fd),
- {ok,Binaries,DumpVsn};
+ {ok,DumpVsn};
_Other ->
R = io_lib:format(
"~ts is not an Erlang crash dump~n",
@@ -832,15 +831,26 @@ do_read_file(File) ->
{error,R}
end.
-indexify(Fd,Bin,N) ->
+indexify(Fd,AddrAdj,Bin,N) ->
case binary:match(Bin,<<"\n=">>) of
{Start,Len} ->
Pos = Start+Len,
<<_:Pos/binary,TagAndRest/binary>> = Bin,
{Tag,Id,Rest,N1} = tag(Fd,TagAndRest,N+Pos),
- insert_index(Tag,Id,N1+1), % +1 to get past newline
+ NewPos = N1+1, % +1 to get past newline
+ case Tag of
+ ?binary ->
+ %% Binaries are stored in a separate table in
+ %% order to minimize lookup time. Key is the
+ %% translated address.
+ {HexAddr,_} = get_hex(Id),
+ Addr = HexAddr bor AddrAdj,
+ insert_binary_index(Addr,NewPos);
+ _ ->
+ insert_index(Tag,Id,NewPos)
+ end,
put_last_tag(Tag,Id),
- indexify(Fd,Rest,N1);
+ indexify(Fd,AddrAdj,Rest,N1);
nomatch ->
case progress_read(Fd) of
{ok,Chunk0} when is_binary(Chunk0) ->
@@ -851,7 +861,7 @@ indexify(Fd,Bin,N) ->
_ ->
{Chunk0,N+byte_size(Bin)}
end,
- indexify(Fd,Chunk,N1);
+ indexify(Fd,AddrAdj,Chunk,N1);
eof ->
eof
end
@@ -1040,14 +1050,14 @@ procs_summary(File,WS) ->
%%-----------------------------------------------------------------
%% Page with one process
-get_proc_details(File,Pid,WS,DumpVsn,Binaries) ->
+get_proc_details(File,Pid,WS,DumpVsn) ->
case lookup_index(?proc,Pid) of
[{_,Start}] ->
Fd = open(File),
{{Stack,MsgQ,Dict},TW} =
case truncated_warning([{?proc,Pid}]) of
[] ->
- {expand_memory(Fd,Pid,DumpVsn,Binaries),[]};
+ {expand_memory(Fd,Pid,DumpVsn),[]};
TW0 ->
{{[],[],[]},TW0}
end,
@@ -1365,10 +1375,10 @@ maybe_other_node2(Channel) ->
end.
-expand_memory(Fd,Pid,DumpVsn,Binaries) ->
+expand_memory(Fd,Pid,DumpVsn) ->
BinAddrAdj = get_bin_addr_adj(DumpVsn),
put(fd,Fd),
- Dict = read_heap(Fd,Pid,BinAddrAdj,Binaries),
+ Dict = read_heap(Fd,Pid,BinAddrAdj,gb_trees:empty()),
Expanded = {read_stack_dump(Fd,Pid,BinAddrAdj,Dict),
read_messages(Fd,Pid,BinAddrAdj,Dict),
read_dictionary(Fd,Pid,BinAddrAdj,Dict)},
@@ -1386,25 +1396,6 @@ get_bin_addr_adj(_) ->
0.
%%%
-%%% Read binaries.
-%%%
-read_binaries(Fd,DumpVsn) ->
- AllBinaries = lookup_index(?binary),
- AddrAdj = get_bin_addr_adj(DumpVsn),
- Fun = fun({Addr0,Pos},Dict0) ->
- pos_bof(Fd,Pos),
- {HexAddr,_} = get_hex(Addr0),
- Addr = HexAddr bor AddrAdj,
- Bin =
- case line_head(Fd) of
- {eof,_} -> '#CDVTruncatedBinary';
- _Size -> {'#CDVBin',Pos}
- end,
- gb_trees:enter(Addr,Bin,Dict0)
- end,
- progress_foldl("Processing binaries",Fun,gb_trees:empty(),AllBinaries).
-
-%%%
%%% Read top level section.
%%%
@@ -2564,9 +2555,9 @@ parse_heap_term("Yc"++Line0, Addr, BinAddrAdj, D0) -> %Reference-counted binary.
{Offset,":"++Line2} = get_hex(Line1),
{Sz,Line} = get_hex(Line2),
Binp = Binp0 bor BinAddrAdj,
- Term = case gb_trees:lookup(Binp, D0) of
- {value,Bin} -> cdvbin(Offset,Sz,Bin);
- none -> '#CDVNonexistingBinary'
+ Term = case lookup_binary_index(Binp) of
+ [{_,Start}] -> cdvbin(Offset,Sz,{'#CDVBin',Start});
+ [] -> '#CDVNonexistingBinary'
end,
D = gb_trees:insert(Addr, Term, D0),
{Term,Line,D};
@@ -2575,15 +2566,14 @@ parse_heap_term("Ys"++Line0, Addr, BinAddrAdj, D0) -> %Sub binary.
{Offset,":"++Line2} = get_hex(Line1),
{Sz,Line} = get_hex(Line2),
Binp = Binp0 bor BinAddrAdj,
- Term = case gb_trees:lookup(Binp, D0) of
- {value,Bin} -> cdvbin(Offset,Sz,Bin);
- none when Binp0=/=Binp ->
+ Term = case lookup_binary_index(Binp) of
+ [{_,Start}] -> cdvbin(Offset,Sz,{'#CDVBin',Start});
+ [] ->
%% Might it be on the heap?
- case gb_trees:lookup(Binp0, D0) of
+ case gb_trees:lookup(Binp, D0) of
{value,Bin} -> cdvbin(Offset,Sz,Bin);
none -> '#CDVNonexistingBinary'
- end;
- none -> '#CDVNonexistingBinary'
+ end
end,
D = gb_trees:insert(Addr, Term, D0),
{Term,Line,D}.
@@ -2739,12 +2729,20 @@ get_label([H|T], Acc) ->
get_label(T, [H|Acc]).
get_binary(Line0) ->
- {N,":"++Line} = get_hex(Line0),
- do_get_binary(N, Line, []).
+ case get_hex(Line0) of
+ {N,":"++Line} ->
+ do_get_binary(N, Line, []);
+ _ ->
+ {'#CDVTruncatedBinary',[]}
+ end.
get_binary(Offset,Size,Line0) ->
- {_N,":"++Line} = get_hex(Line0),
- do_get_binary(Size, lists:sublist(Line,(Offset*2)+1,Size*2), []).
+ case get_hex(Line0) of
+ {_N,":"++Line} ->
+ do_get_binary(Size, lists:sublist(Line,(Offset*2)+1,Size*2), []);
+ _ ->
+ {'#CDVTruncatedBinary',[]}
+ end.
do_get_binary(0, Line, Acc) ->
{list_to_binary(lists:reverse(Acc)),Line};
@@ -2759,12 +2757,16 @@ cdvbin(Offset,Size,{'#CDVBin',Pos}) ->
cdvbin(Offset,Size,['#CDVBin',_,_,Pos]) ->
['#CDVBin',Offset,Size,Pos];
cdvbin(_,_,'#CDVTruncatedBinary') ->
- '#CDVTruncatedBinary'.
+ '#CDVTruncatedBinary';
+cdvbin(_,_,'#CDVNonexistingBinary') ->
+ '#CDVNonexistingBinary'.
%%-----------------------------------------------------------------
-%% Functions for accessing the cdv_dump_index_table
-reset_index_table() ->
- ets:delete_all_objects(cdv_dump_index_table).
+%% Functions for accessing tables
+reset_tables() ->
+ ets:delete_all_objects(cdv_dump_index_table),
+ ets:delete_all_objects(cdv_reg_proc_table),
+ ets:delete_all_objects(cdv_binary_index_table).
insert_index(Tag,Id,Pos) ->
ets:insert(cdv_dump_index_table,{{Tag,Pos},Id}).
@@ -2779,6 +2781,11 @@ lookup_index(Tag,Id) ->
count_index(Tag) ->
ets:select_count(cdv_dump_index_table,[{{{Tag,'_'},'_'},[],[true]}]).
+insert_binary_index(Addr,Pos) ->
+ ets:insert(cdv_binary_index_table,{Addr,Pos}).
+
+lookup_binary_index(Addr) ->
+ ets:lookup(cdv_binary_index_table,Addr).
%%-----------------------------------------------------------------
%% Convert tags read from crashdump to atoms used as first part of key
@@ -2849,23 +2856,6 @@ to_value_list(Record) ->
Values.
%%%-----------------------------------------------------------------
-%%% Fold over List and report progress in percent.
-%%% Report is the text to be presented in the progress dialog.
-%%% Acc0 is the initial accumulator and will be passed to Fun as the
-%%% second arguement, i.e. Fun = fun(Item,Acc) -> NewAcc end.
-progress_foldl(Report,Fun,Acc0,List) ->
- init_progress(Report, length(List)),
- progress_foldl1(Fun,Acc0,List).
-
-progress_foldl1(Fun,Acc,[H|T]) ->
- update_progress(),
- progress_foldl1(Fun,Fun(H,Acc),T);
-progress_foldl1(_Fun,Acc,[]) ->
- end_progress(),
- Acc.
-
-
-%%%-----------------------------------------------------------------
%%% Map over List and report progress in percent.
%%% Report is the text to be presented in the progress dialog.
%%% Distribute the load over a number of processes, and File is opened
diff --git a/lib/observer/src/observer_html_lib.erl b/lib/observer/src/observer_html_lib.erl
index 3dfcc42ada..a85808a472 100644
--- a/lib/observer/src/observer_html_lib.erl
+++ b/lib/observer/src/observer_html_lib.erl
@@ -337,17 +337,24 @@ href_proc_bin(From, T, Acc, LTB) ->
Size = list_to_integer(SizeStr),
PreviewSize = min(Size,10),
Id = {list_to_integer(Offset),PreviewSize,list_to_integer(Pos)},
- {ok,PreviewBin} = crashdump_viewer:expand_binary(Id),
- PreviewStr = preview_string(Size, PreviewBin),
- if LTB ->
- href("TARGET=\"expanded\"",
- ["#Binary?offset="++Offset++
- "&size="++SizeStr++
- "&pos="++Pos],
- PreviewStr);
- true ->
- PreviewStr
- end;
+ case crashdump_viewer:expand_binary(Id) of
+ {ok, '#CDVTruncatedBinary'} ->
+ lists:flatten(
+ "<FONT COLOR=\"#FF0000\">"
+ "&lt;&lt;...(Truncated Binary)&gt;&gt;"
+ "</FONT>");
+ {ok, PreviewBin} ->
+ PreviewStr = preview_string(Size, PreviewBin),
+ if LTB ->
+ href("TARGET=\"expanded\"",
+ ["#Binary?offset="++Offset++
+ "&size="++SizeStr++
+ "&pos="++Pos],
+ PreviewStr);
+ true ->
+ PreviewStr
+ end
+ end;
[PreviewIntStr,SizeStr,Md5] when From =:= obs ->
Size = list_to_integer(SizeStr),
PreviewInt = list_to_integer(PreviewIntStr),
diff --git a/lib/observer/test/crashdump_viewer_SUITE.erl b/lib/observer/test/crashdump_viewer_SUITE.erl
index 4b0127bcfb..4449ec54d1 100644
--- a/lib/observer/test/crashdump_viewer_SUITE.erl
+++ b/lib/observer/test/crashdump_viewer_SUITE.erl
@@ -76,7 +76,7 @@ end_per_testcase(Case, Config) ->
end,
ok.
-suite() -> [{ct_hooks,[ts_install_cth]}].
+suite() -> [].
all() ->
[start_stop,
@@ -416,19 +416,90 @@ special(File,Procs) ->
old_attrib=undefined,
old_comp_info=undefined}=Mod2,
ok;
- %% ".strangemodname" ->
- %% {ok,Mods,[]} = crashdump_viewer:loaded_modules(),
- %% lookat_all_mods(Mods),
- %% ok;
- %% ".sort" ->
- %% %% sort ports, atoms and modules ????
- %% ok;
- %% ".trunc" ->
- %% %% ????
- %% ok;
- ".trunc.bytes" ->
+ ".trunc_bin1" ->
+ %% This is 'full_dist' truncated after the first
+ %% "=binary:"
+ %% i.e. no binary exist in the dump
+ [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs),
+ Pid = pid_to_list(Pid0),
+ {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid),
+ io:format(" process details ok",[]),
+
+ #proc{dict=Dict} = ProcDetails,
+
+ '#CDVNonexistingBinary' = proplists:get_value(bin,Dict),
+ '#CDVNonexistingBinary' = proplists:get_value(sub_bin,Dict),
+
+ io:format(" nonexisting binaries ok",[]),
+ ok;
+ ".trunc_bin2" ->
+ %% This is 'full_dist' truncated after the first
+ %% "=binary:Addr\n
+ %% Size"
+ %% i.e. binaries are truncated
+ [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs),
+ Pid = pid_to_list(Pid0),
+ {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid),
+ io:format(" process details ok",[]),
+
+ #proc{dict=Dict} = ProcDetails,
+
+ ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict),
+ {ok,'#CDVTruncatedBinary'} =
+ crashdump_viewer:expand_binary({Offset,Size,Pos}),
+ ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict),
+ {ok,'#CDVTruncatedBinary'} =
+ crashdump_viewer:expand_binary({SOffset,SSize,SPos}),
+
+ io:format(" expand truncated binary ok",[]),
+ ok;
+ ".trunc_bin3" ->
+ %% This is 'full_dist' truncated after the first
+ %% "=binary:Addr\n
+ %% Size:"
+ %% i.e. same as 'trunc_bin2', except the colon exists also
+ [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs),
+ Pid = pid_to_list(Pid0),
+ {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid),
+ io:format(" process details ok",[]),
+
+ #proc{dict=Dict} = ProcDetails,
+
+ ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict),
+ {ok,'#CDVTruncatedBinary'} =
+ crashdump_viewer:expand_binary({Offset,Size,Pos}),
+ ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict),
+ {ok,'#CDVTruncatedBinary'} =
+ crashdump_viewer:expand_binary({SOffset,SSize,SPos}),
+
+ io:format(" expand truncated binary ok",[]),
+ ok;
+ ".trunc_bin4" ->
+ %% This is 'full_dist' truncated after the first
+ %% "=binary:Addr\n
+ %% Size:BinaryMissinOneByte"
+ %% i.e. the full binary is truncated, but the sub binary is complete
+ [#proc{pid=Pid0}|_Rest] = lists:keysort(#proc.name,Procs),
+ Pid = pid_to_list(Pid0),
+ {ok,ProcDetails=#proc{},[]} = crashdump_viewer:proc_details(Pid),
+ io:format(" process details ok",[]),
+
+ #proc{dict=Dict} = ProcDetails,
+
+ ['#CDVBin',Offset,Size,Pos] = proplists:get_value(bin,Dict),
+ {ok,'#CDVTruncatedBinary'} =
+ crashdump_viewer:expand_binary({Offset,Size,Pos}),
+ io:format(" expand truncated binary ok",[]),
+ ['#CDVBin',SOffset,SSize,SPos] = proplists:get_value(sub_bin,Dict),
+ {ok,<<_:SSize/binary>>} =
+ crashdump_viewer:expand_binary({SOffset,SSize,SPos}),
+ io:format(" expand complete sub binary ok",[]),
+
+ ok;
+ ".trunc_bytes" ->
{ok,_,[TW]} = crashdump_viewer:general_info(),
{match,_} = re:run(TW,"CRASH DUMP SIZE LIMIT REACHED"),
+ io:format(" size limit information ok",[]),
ok;
".unicode" ->
#proc{pid=Pid0} =
@@ -504,15 +575,41 @@ do_create_dumps(DataDir,Rel) ->
CD3 = dump_with_args(DataDir,Rel,"instr","+Mim true"),
CD4 = dump_with_strange_module_name(DataDir,Rel,"strangemodname"),
Bytes = rand:uniform(300000) + 100,
- CD5 = dump_with_args(DataDir,Rel,"trunc.bytes",
+ CD5 = dump_with_args(DataDir,Rel,"trunc_bytes",
"-env ERL_CRASH_DUMP_BYTES " ++
integer_to_list(Bytes)),
CD6 = dump_with_unicode_atoms(DataDir,Rel,"unicode"),
- {[CD1,CD2,CD3,CD4,CD5,CD6], DosDump};
+ TruncatedDumps = truncate_dump(CD1),
+ {[CD1,CD2,CD3,CD4,CD5,CD6|TruncatedDumps], DosDump};
_ ->
{[CD1,CD2], DosDump}
end.
+truncate_dump(File) ->
+ {ok,Bin} = file:read_file(File),
+ BinTag = <<"\n=binary:">>,
+ Colon = <<":">>,
+ NewLine = case os:type() of
+ {win32,_} -> <<"\r\n">>;
+ _ -> <<"\n">>
+ end,
+ [StartBin,AfterTag] = binary:split(Bin,BinTag),
+ [AddrAndSize,BinaryAndRest] = binary:split(AfterTag,Colon),
+ [Binary,_Rest] = binary:split(BinaryAndRest,NewLine),
+ TruncSize = byte_size(Binary) - 2,
+ <<TruncBinary:TruncSize/binary,_/binary>> = Binary,
+ TruncName = filename:rootname(File) ++ ".trunc_bin",
+ write_trunc_files(TruncName,StartBin,
+ [BinTag,AddrAndSize,Colon,TruncBinary],1).
+
+write_trunc_files(TruncName0,Bin,[Part|Parts],N) ->
+ TruncName = TruncName0++integer_to_list(N),
+ Bin1 = <<Bin/binary,Part/binary>>,
+ ok = file:write_file(TruncName,Bin1),
+ [TruncName|write_trunc_files(TruncName0,Bin1,Parts,N+1)];
+write_trunc_files(_,_,[],_) ->
+ [].
+
%% Create a dump which has three visible nodes, one hidden and one
%% not connected node, and with monitors and links between nodes.