From 4b145ffbf9cd992d7bc9429796156d792a939ef5 Mon Sep 17 00:00:00 2001 From: Maxim Fedorov Date: Wed, 28 Jul 2021 21:23:31 -0700 Subject: Fix process_info that can hang waiting for reply With multiple reusable receive markers, marker that is marked to remove may be followed by process_info signal, which will also be handled within the loop in erts_proc_sig_handle_incoming. It is possible that process_info moves messages up to the next non-message signal from the middle to inner queue. If there was a receive marker at the very beginning of the middle queue, it would set save pointer to sig_qs.cont, which will be overwritten when linking middle queue head into inner queue. This would make save pointer to skip all messages that were in between of removed receive marker and process_info request. To observe this behaviour using a debugger, run the test case provided with a breakpoint set to handle_process_info part where it reassigns c_p->sig_qs.cont to the next non-message signal (**next_nm_sig) Test case runs a process that triggers receive marker to enter its middle queue. Currently it's done with process_info BIF, but can also be triggered with erlang:cancel_timer (which also places receive marker). Additional processes are sending process_info requesting to move messages into inner queue, which eventually makes middle queue to contain: - receive marker that is set to be removed but set_save - reply tuple ({Ref, Dictionary}) - process_info request moving previous message into inner queue Receive marker gets removed from the middle queue after setting save pointer to sig_qs.cont, then erts_proc_sig_handle_incoming proceeds to process_info. Which moves reply tuple into the inner queue, but save pointer is left untouched pointing to the head of the middle queue, which is beyond the reply. This way reply never gets matched (as save pointer has already moved past it), and process_info hangs forever. --- erts/emulator/beam/erl_proc_sig_queue.c | 2 ++ erts/emulator/test/bif_SUITE.erl | 50 ++++++++++++++++++++++++++++++++- erts/etc/unix/etp-commands.in | 31 ++++++++++++++++---- 3 files changed, 77 insertions(+), 6 deletions(-) diff --git a/erts/emulator/beam/erl_proc_sig_queue.c b/erts/emulator/beam/erl_proc_sig_queue.c index 652887abe0..97709e7028 100644 --- a/erts/emulator/beam/erl_proc_sig_queue.c +++ b/erts/emulator/beam/erl_proc_sig_queue.c @@ -4120,6 +4120,8 @@ handle_process_info(Process *c_p, ErtsSigRecvTracing *tracing, ASSERT(tracing); if (*next_nm_sig != &c_p->sig_qs.cont) { + if (c_p->sig_qs.save == &c_p->sig_qs.cont) + c_p->sig_qs.save = c_p->sig_qs.last; if (ERTS_SIG_IS_RECV_MARKER(c_p->sig_qs.cont)) { ErtsRecvMarker *markp = (ErtsRecvMarker *) c_p->sig_qs.cont; markp->prev_next = c_p->sig_qs.last; diff --git a/erts/emulator/test/bif_SUITE.erl b/erts/emulator/test/bif_SUITE.erl index 6a6e0930bd..5923038eb6 100644 --- a/erts/emulator/test/bif_SUITE.erl +++ b/erts/emulator/test/bif_SUITE.erl @@ -39,6 +39,7 @@ is_process_alive/1, process_info_blast/1, os_env_case_sensitivity/1, + verify_middle_queue_save/1, test_length/1, fixed_apply_badarg/1, external_fun_apply3/1]). @@ -57,7 +58,8 @@ all() -> error_stacktrace, error_stacktrace_during_call_trace, group_leader_prio, group_leader_prio_dirty, is_process_alive, process_info_blast, os_env_case_sensitivity, - test_length,fixed_apply_badarg,external_fun_apply3]. + verify_middle_queue_save, test_length,fixed_apply_badarg, + external_fun_apply3]. init_per_testcase(guard_bifs_in_erl_bif_types, Config) when is_list(Config) -> skip_missing_erl_bif_types(Config); @@ -1277,6 +1279,52 @@ consume_msgs() -> ok end. + +%% Test that process_info(Pid, [message_queue_len]) works correctly when +%% fetching part of the middle signal queue into inner queue. +verify_middle_queue_save(Config) when is_list(Config) -> + Control = self(), + ProcessToHang = spawn_link( + fun () -> + Single = self(), + put(count, 0), + Doubles = [spawn_link(fun () -> message_queue_len_retrievers(Single, 0) end) || _ <- lists:seq(1, 2)], + Control ! {doubles, Doubles}, + process_that_hangs(Control, 0, Doubles) + end), + ensure_not_hanging(ProcessToHang, [], 50000). + +process_that_hangs(Control, Total, Doubles) -> + put(count, Total), + %% fetch something innocent, like 'priority' of the process + [process_info(Pid, [priority]) || Pid <- Doubles], + Control ! alive, + process_that_hangs(Control, Total + 1, Doubles). + +message_queue_len_retrievers(Control, PrevCount) -> + %% need to fetch dictionary for test reasons, but actual trigger is 'message_queue_len', + %% or 'memory', or 'total_heap_size' - anything that needs to fetch external message queue + %% via ERTS_PI_FLAG_NEED_MSGQ_LEN internal flag to process_info + [_, {dictionary, [{count, Count}]}] = erlang:process_info(Control, [message_queue_len, dictionary]), + Count > PrevCount andalso begin Control ! count end, + message_queue_len_retrievers(Control, Count). + +ensure_not_hanging(Proc, _Doubles, 0) -> + unlink(Proc), + exit(Proc, kill); +ensure_not_hanging(Proc, Doubles, Remaining) -> + receive + alive -> + ensure_not_hanging(Proc, Doubles, Remaining - 1); + {doubles, NewDoubles} -> + ensure_not_hanging(Proc, NewDoubles, Remaining) + after 1000 -> + Reason = {Proc, "hung", erlang:process_info(Proc, backtrace)}, + unlink(Proc), + exit(Proc, kill), + ct:fail(Reason) + end. + %% Test that length/1 returns the correct result after trapping, and %% also that the argument is correct in the stacktrace for a badarg %% exception. diff --git a/erts/etc/unix/etp-commands.in b/erts/etc/unix/etp-commands.in index 06d313dfe0..46cbe38da4 100644 --- a/erts/etc/unix/etp-commands.in +++ b/erts/etc/unix/etp-commands.in @@ -1456,7 +1456,25 @@ define etp-sig-int printf "!DIST_SPAWN_REPLY[%d]", $etp_sig_type else if $etp_sig_op == 15 + printf "!ALIAS[%d]", $etp_sig_type + else + if $etp_sig_op == 16 printf "!RECV_MARKER[%d]", $etp_sig_type + else + if $etp_sig_op == 17 + printf "!UNLINK_ACK[%d]", $etp_sig_type + else + if $etp_sig_op == 18 + printf "!ADJUST_MSGQ[%d]", $etp_sig_type + else + if $etp_sig_op == 255 + printf "->OFFSET_MARKER" + else + printf "UNKNOWN SIGNAL %d [%d]", $etp_sig_op, $etp_sig_type + end + end + end + end end end end @@ -1511,6 +1529,9 @@ define etp-sigq-int end set $etp_sig = $etp_sig_next end + if $etp_sig_save && $etp_sig_save == ($arg2) + printf "\n %% <== SAVE" + end printf "]\n\n" printf " Message signals: %d\n", $etp_sigq_msig_len printf " Non-message signals: %d\n\n", $etp_sigq_nmsig_len @@ -1526,10 +1547,10 @@ define etp-sigq-flags-int printf "delayed-sigq-len " end if ($arg0 & (1 << 5)) - printf "deferred-save " + printf "wait-handle-sig " end if ($arg0 & (1 << 4)) - printf "deferred-saved-last " + printf "handling-sig " end if ($arg0 & (1 << 3)) printf "local-signals-only " @@ -1566,11 +1587,11 @@ define etp-sigqs printf " Msgq Flags: " etp-sigq-flags $proc_int printf " --- Inner signal queue (message queue) ---\n" - etp-sigq-int ($proc_int)->sig_qs.first ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_qs.first ($proc_int)->sig_qs.save ($proc_int)->sig_qs.last printf " --- Middle signal queue ---\n" - etp-sigq-int ($proc_int)->sig_qs.cont ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_qs.cont ($proc_int)->sig_qs.save ($proc_int)->sig_qs.cont_last printf " --- Outer queue ---\n" - etp-sigq-int ($proc_int)->sig_inq.first ($proc_int)->sig_qs.save + etp-sigq-int ($proc_int)->sig_inq.first ($proc_int)->sig_qs.save ($proc_int)->sig_inq.last end define etp-msgq -- cgit v1.2.1