summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorWill Holley <will.holley@uk.ibm.com>2023-04-12 07:30:06 +0000
committerWill Holley <will.holley@uk.ibm.com>2023-04-14 07:52:50 +0000
commitb7686a9da8ec3059ce9a8e9df1fee837ad1a1ea3 (patch)
tree70136427dbd1364c8c7bdb3cf36cbe10e72fdffd
parent865d5f8987e6b264aae5bcf2a943dafb521c424e (diff)
downloadcouchdb-b7686a9da8ec3059ce9a8e9df1fee837ad1a1ea3.tar.gz
feat (prometheus): include aggregated couch/index message queues
In #3860 and #3366 we added sharding to `couch_index_server` and `couch_server`. The `_system` endpoint surfaces a "fake" message queue for each of these contining the aggregated queue size across all shards. This commit adds the same for the `_prometheus` endpoint. Originally I had thought to just filter out the per-shard queue lengths as we've not found these to be useful in Cloudant, but I'll leave them in for now for consistency with the `_system` endpoint. Arguably, we should filter in both places if there's agreement that the per-shard queue lengths are just noise.
-rw-r--r--.devcontainer/devcontainer.json2
-rw-r--r--src/chttpd/src/chttpd_node.erl30
-rw-r--r--src/couch_prometheus/src/couch_prometheus_server.erl37
3 files changed, 24 insertions, 45 deletions
diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json
index 5e577d96d..3920cd9dd 100644
--- a/.devcontainer/devcontainer.json
+++ b/.devcontainer/devcontainer.json
@@ -9,7 +9,7 @@
// apache/couchdbci-debian:bullseye-erlang-24.3.4.2
// apache/couchdbci-debian:bullseye-erlang-23.3.4.15
//
- "COUCHDB_IMAGE": "apache/couchdbci-debian:bullseye-erlang-25.0.2"
+ "COUCHDB_IMAGE": "apache/couchdbci-debian:bullseye-erlang-24.3.4.10"
}
},
diff --git a/src/chttpd/src/chttpd_node.erl b/src/chttpd/src/chttpd_node.erl
index a63236db7..bb3cf4798 100644
--- a/src/chttpd/src/chttpd_node.erl
+++ b/src/chttpd/src/chttpd_node.erl
@@ -16,7 +16,9 @@
-export([
handle_node_req/1,
get_stats/0,
- run_queues/0
+ run_queues/0,
+ message_queues/0,
+ db_pid_stats/0
]).
-include_lib("couch/include/couch_db.hrl").
@@ -284,14 +286,13 @@ get_stats() ->
],
{NumberOfGCs, WordsReclaimed, _} = statistics(garbage_collection),
{{input, Input}, {output, Output}} = statistics(io),
+
{CF, CDU} = db_pid_stats(),
- MessageQueues0 = [
+ MessageQueuesHist = [
{couch_file, {CF}},
- {couch_db_updater, {CDU}},
- {couch_server, couch_server:aggregate_queue_len()},
- {index_server, couch_index_server:aggregate_queue_len()}
+ {couch_db_updater, {CDU}}
],
- MessageQueues = MessageQueues0 ++ message_queues(registered()),
+ MessageQueues = message_queues(),
{SQ, DCQ} = run_queues(),
[
{uptime, couch_app:uptime() div 1000},
@@ -309,7 +310,7 @@ get_stats() ->
{stale_proc_count, couch_proc_manager:get_stale_proc_count()},
{process_count, erlang:system_info(process_count)},
{process_limit, erlang:system_info(process_limit)},
- {message_queues, {MessageQueues}},
+ {message_queues, {MessageQueuesHist ++ MessageQueues}},
{internal_replication_jobs, mem3_sync:get_backlog()},
{distribution, {get_distribution_stats()}}
].
@@ -385,15 +386,22 @@ get_distribution_stats() ->
erlang:system_info(dist_ctrl)
).
-message_queues(Registered) ->
- lists:map(
+-spec message_queues() ->
+ [{Name :: atom(), Length :: pos_integer()}].
+message_queues() ->
+ MessageQueuesAgg = [
+ {couch_server, couch_server:aggregate_queue_len()},
+ {index_server, couch_index_server:aggregate_queue_len()}
+ ],
+ MessageQueuesReg = lists:map(
fun(Name) ->
Type = message_queue_len,
{Type, Length} = process_info(whereis(Name), Type),
{Name, Length}
end,
- Registered
- ).
+ registered()
+ ),
+ MessageQueuesAgg ++ MessageQueuesReg.
%% Workaround for https://bugs.erlang.org/browse/ERL-1355
run_queues() ->
diff --git a/src/couch_prometheus/src/couch_prometheus_server.erl b/src/couch_prometheus/src/couch_prometheus_server.erl
index 05cd26265..7699c4fc4 100644
--- a/src/couch_prometheus/src/couch_prometheus_server.erl
+++ b/src/couch_prometheus/src/couch_prometheus_server.erl
@@ -198,8 +198,7 @@ get_io_stats() ->
].
get_message_queue_stats() ->
- QFun = fun(Name) -> {Name, message_queue_len(whereis(Name))} end,
- Queues = lists:map(QFun, registered()),
+ Queues = chttpd_node:message_queues(),
QueueLens = lists:map(fun({_, Len}) -> Len end, Queues),
QueueLenByLabel = lists:map(fun({Name, Len}) -> {[{queue_name, Name}], Len} end, Queues),
[
@@ -221,33 +220,16 @@ get_message_queue_stats() ->
to_prom(erlang_message_queue_size, gauge, "size of message queue", QueueLenByLabel)
].
-message_queue_len(undefined) ->
- 0;
-message_queue_len(Pid) when is_pid(Pid) ->
- case erlang:process_info(Pid, message_queue_len) of
- {message_queue_len, N} ->
- N;
- _ ->
- 0
- end.
-
get_run_queue_stats() ->
%% Workaround for https://bugs.erlang.org/browse/ERL-1355
- {Normal, Dirty} =
- case erlang:system_info(dirty_cpu_schedulers) > 0 of
- false ->
- {statistics(run_queue), 0};
- true ->
- [DCQ | SQs] = lists:reverse(statistics(run_queue_lengths)),
- {lists:sum(SQs), DCQ}
- end,
+ {SQ, DCQ} = chttpd_node:run_queues(),
[
- to_prom(erlang_scheduler_queues, gauge, "the total size of all normal run queues", Normal),
+ to_prom(erlang_scheduler_queues, gauge, "the total size of all normal run queues", SQ),
to_prom(
erlang_dirty_cpu_scheduler_queues,
gauge,
"the total size of all dirty CPU scheduler run queues",
- Dirty
+ DCQ
)
].
@@ -271,17 +253,6 @@ update_refresh_timer() ->
-include_lib("couch/include/couch_eunit.hrl").
-message_queue_len_test() ->
- self() ! refresh,
- ?assert(message_queue_len(self()) >= 1),
- ?assertEqual(0, message_queue_len(undefined)),
- {Pid, Ref} = spawn_monitor(fun() -> ok end),
- receive
- {'DOWN', Ref, process, Pid, _} ->
- ok
- end,
- ?assertEqual(0, message_queue_len(Pid)).
-
drain_refresh_messages_test() ->
self() ! refresh,
{messages, Mq0} = erlang:process_info(self(), messages),