diff options
author | Maxim Fedorov <maximfca@gmail.com> | 2021-07-28 21:23:31 -0700 |
---|---|---|
committer | Maxim Fedorov <dane@whatsapp.com> | 2021-08-09 18:12:58 -0700 |
commit | 4b145ffbf9cd992d7bc9429796156d792a939ef5 (patch) | |
tree | 9ed418029a72b6167069dee2e730c9cc10bcb2f4 | |
parent | a29aeb16660f2a91aaa24c474d57c6ee7754ee0a (diff) | |
download | erlang-4b145ffbf9cd992d7bc9429796156d792a939ef5.tar.gz |
Fix process_info that can hang waiting for reply
With multiple reusable receive markers, marker that is marked
to remove may be followed by process_info signal, which will
also be handled within the loop in erts_proc_sig_handle_incoming.
It is possible that process_info moves messages up to the next
non-message signal from the middle to inner queue. If there was a
receive marker at the very beginning of the middle queue, it
would set save pointer to sig_qs.cont, which will be overwritten
when linking middle queue head into inner queue. This would make
save pointer to skip all messages that were in between of removed
receive marker and process_info request.
To observe this behaviour using a debugger, run the test case
provided with a breakpoint set to handle_process_info part where
it reassigns c_p->sig_qs.cont to the next non-message signal
(**next_nm_sig)
Test case runs a process that triggers receive marker to enter
its middle queue. Currently it's done with process_info BIF, but
can also be triggered with erlang:cancel_timer (which also
places receive marker). Additional processes are sending
process_info requesting to move messages into inner queue, which
eventually makes middle queue to contain:
- receive marker that is set to be removed but set_save
- reply tuple ({Ref, Dictionary})
- process_info request moving previous message into inner queue
Receive marker gets removed from the middle queue after setting
save pointer to sig_qs.cont, then erts_proc_sig_handle_incoming
proceeds to process_info. Which moves reply tuple into the
inner queue, but save pointer is left untouched pointing to the
head of the middle queue, which is beyond the reply. This
way reply never gets matched (as save pointer has already
moved past it), and process_info hangs forever.
-rw-r--r-- | erts/emulator/beam/erl_proc_sig_queue.c | 2 | ||||
-rw-r--r-- | erts/emulator/test/bif_SUITE.erl | 50 | ||||
-rw-r--r-- | erts/etc/unix/etp-commands.in | 31 |
3 files changed, 77 insertions, 6 deletions
diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index 652887abe0..97709e7028 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -4120,6 +4120,8 @@ handle_process_info(Process *c_p, ErtsSigRecvTracing *tracing, ASSERT(tracing); if (*next_nm_sig != &c_p->sig_qs.cont) { + if (c_p->sig_qs.save == &c_p->sig_qs.cont) + c_p->sig_qs.save = c_p->sig_qs.last; if (ERTS_SIG_IS_RECV_MARKER(c_p->sig_qs.cont)) { ErtsRecvMarker *markp = (ErtsRecvMarker *) c_p->sig_qs.cont; markp->prev_next = c_p->sig_qs.last; diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl index 6a6e0930bd..5923038eb6 100644 --- a/erts/emulator/test/bif_SUITE.erl +++ b/erts/emulator/test/bif_SUITE.erl @@ -39,6 +39,7 @@ is_process_alive/1, process_info_blast/1, os_env_case_sensitivity/1, + verify_middle_queue_save/1, test_length/1, fixed_apply_badarg/1, external_fun_apply3/1]). @@ -57,7 +58,8 @@ all() -> error_stacktrace, error_stacktrace_during_call_trace, group_leader_prio, group_leader_prio_dirty, is_process_alive, process_info_blast, os_env_case_sensitivity, - test_length,fixed_apply_badarg,external_fun_apply3]. + verify_middle_queue_save, test_length,fixed_apply_badarg, + external_fun_apply3]. init_per_testcase(guard_bifs_in_erl_bif_types, Config) when is_list(Config) -> skip_missing_erl_bif_types(Config); @@ -1277,6 +1279,52 @@ consume_msgs() -> ok end. + +%% Test that process_info(Pid, [message_queue_len]) works correctly when +%% fetching part of the middle signal queue into inner queue. +verify_middle_queue_save(Config) when is_list(Config) -> + Control = self(), + ProcessToHang = spawn_link( + fun () -> + Single = self(), + put(count, 0), + Doubles = [spawn_link(fun () -> message_queue_len_retrievers(Single, 0) end) || _ <- lists:seq(1, 2)], + Control ! {doubles, Doubles}, + process_that_hangs(Control, 0, Doubles) + end), + ensure_not_hanging(ProcessToHang, [], 50000). + +process_that_hangs(Control, Total, Doubles) -> + put(count, Total), + %% fetch something innocent, like 'priority' of the process + [process_info(Pid, [priority]) || Pid <- Doubles], + Control ! alive, + process_that_hangs(Control, Total + 1, Doubles). + +message_queue_len_retrievers(Control, PrevCount) -> + %% need to fetch dictionary for test reasons, but actual trigger is 'message_queue_len', + %% or 'memory', or 'total_heap_size' - anything that needs to fetch external message queue + %% via ERTS_PI_FLAG_NEED_MSGQ_LEN internal flag to process_info + [_, {dictionary, [{count, Count}]}] = erlang:process_info(Control, [message_queue_len, dictionary]), + Count > PrevCount andalso begin Control ! count end, + message_queue_len_retrievers(Control, Count). + +ensure_not_hanging(Proc, _Doubles, 0) -> + unlink(Proc), + exit(Proc, kill); +ensure_not_hanging(Proc, Doubles, Remaining) -> + receive + alive -> + ensure_not_hanging(Proc, Doubles, Remaining - 1); + {doubles, NewDoubles} -> + ensure_not_hanging(Proc, NewDoubles, Remaining) + after 1000 -> + Reason = {Proc, "hung", erlang:process_info(Proc, backtrace)}, + unlink(Proc), + exit(Proc, kill), + ct:fail(Reason) + end. + %% Test that length/1 returns the correct result after trapping, and %% also that the argument is correct in the stacktrace for a badarg %% exception. diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 06d313dfe0..46cbe38da4 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -1456,7 +1456,25 @@ define etp-sig-int printf "!DIST_SPAWN_REPLY[%d]", $etp_sig_type else if $etp_sig_op == 15 + printf "!ALIAS[%d]", $etp_sig_type + else + if $etp_sig_op == 16 printf "!RECV_MARKER[%d]", $etp_sig_type + else + if $etp_sig_op == 17 + printf "!UNLINK_ACK[%d]", $etp_sig_type + else + if $etp_sig_op == 18 + printf "!ADJUST_MSGQ[%d]", $etp_sig_type + else + if $etp_sig_op == 255 + printf "->OFFSET_MARKER" + else + printf "UNKNOWN SIGNAL %d [%d]", $etp_sig_op, $etp_sig_type + end + end + end + end end end end @@ -1511,6 +1529,9 @@ define etp-sigq-int end set $etp_sig = $etp_sig_next end + if $etp_sig_save && $etp_sig_save == ($arg2) + printf "\n %% <== SAVE" + end printf "]\n\n" printf " Message signals: %d\n", $etp_sigq_msig_len printf " Non-message signals: %d\n\n", $etp_sigq_nmsig_len @@ -1526,10 +1547,10 @@ define etp-sigq-flags-int printf "delayed-sigq-len " end if ($arg0 & (1 << 5)) - printf "deferred-save " + printf "wait-handle-sig " end if ($arg0 & (1 << 4)) - printf "deferred-saved-last " + printf "handling-sig " end if ($arg0 & (1 << 3)) printf "local-signals-only " @@ -1566,11 +1587,11 @@ define etp-sigqs printf " Msgq Flags: " etp-sigq-flags $proc_int printf " --- Inner signal queue (message queue) ---\n" - etp-sigq-int ($proc_int)->sig_qs.first ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_qs.first ($proc_int)->sig_qs.save ($proc_int)->sig_qs.last printf " --- Middle signal queue ---\n" - etp-sigq-int ($proc_int)->sig_qs.cont ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_qs.cont ($proc_int)->sig_qs.save ($proc_int)->sig_qs.cont_last printf " --- Outer queue ---\n" - etp-sigq-int ($proc_int)->sig_inq.first ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_inq.first ($proc_int)->sig_qs.save ($proc_int)->sig_inq.last end define etp-msgq |